untested This won't be super slow, but it does need to diff two likely large trees, and since the git-annex branch rarely sits still, it will most likely be run at the beginning of every import. A possible speed improvement would be to only run this when the database did not contain a ContentIdentifier. But that would only speed up imports when there is no new version of a file on the special remote, at most renames of existing files being imported. A better speed improvement would be to record something in the git-annex branch that indicates when an import has been run, and only do the diff if the git-annex branch has record of a newer import than we've seen before. Then, it would only run when there is in fact new ContentIdentifier information available from a remote. Certianly doable, but didn't want to complicate things yet.
		
			
				
	
	
		
			47 lines
		
	
	
	
		
			1.4 KiB
			
		
	
	
	
		
			Haskell
		
	
	
	
	
	
			
		
		
	
	
			47 lines
		
	
	
	
		
			1.4 KiB
			
		
	
	
	
		
			Haskell
		
	
	
	
	
	
{- Remote content identifier logs.
 | 
						|
 -
 | 
						|
 - Copyright 2019 Joey Hess <id@joeyh.name>
 | 
						|
 -
 | 
						|
 - Licensed under the GNU AGPL version 3 or higher.
 | 
						|
 -}
 | 
						|
 | 
						|
module Logs.ContentIdentifier (
 | 
						|
	module X,
 | 
						|
	recordContentIdentifier,
 | 
						|
	getContentIdentifiers,
 | 
						|
) where
 | 
						|
 | 
						|
import Annex.Common
 | 
						|
import Logs
 | 
						|
import Logs.MapLog
 | 
						|
import Types.Import
 | 
						|
import qualified Annex.Branch
 | 
						|
import Logs.ContentIdentifier.Pure as X
 | 
						|
import qualified Annex
 | 
						|
 | 
						|
import qualified Data.Map as M
 | 
						|
import Data.List.NonEmpty (NonEmpty(..))
 | 
						|
import qualified Data.List.NonEmpty as NonEmpty
 | 
						|
 | 
						|
-- | Records a remote's content identifier and the key that it corresponds to.
 | 
						|
--
 | 
						|
-- A remote may use multiple content identifiers for the same key over time,
 | 
						|
-- so ones that were recorded before are preserved.
 | 
						|
recordContentIdentifier :: UUID -> ContentIdentifier -> Key -> Annex ()
 | 
						|
recordContentIdentifier u cid k = do
 | 
						|
	c <- liftIO currentVectorClock
 | 
						|
	config <- Annex.getGitConfig
 | 
						|
	Annex.Branch.change (remoteContentIdentifierLogFile config k) $
 | 
						|
		buildLog . addcid c . parseLog
 | 
						|
  where
 | 
						|
	addcid c l = changeMapLog c u (cid :| contentIdentifierList (M.lookup u m)) l
 | 
						|
	  where
 | 
						|
		m = simpleMap l
 | 
						|
 | 
						|
-- | Get all known content identifiers for a key.
 | 
						|
getContentIdentifiers :: Key -> Annex [(UUID, [ContentIdentifier])]
 | 
						|
getContentIdentifiers k = do
 | 
						|
	config <- Annex.getGitConfig
 | 
						|
	map (\(u, l) -> (u, NonEmpty.toList l) )
 | 
						|
		. M.toList . simpleMap . parseLog
 | 
						|
		<$> Annex.Branch.get (remoteContentIdentifierLogFile config k)
 |