git-annex/Logs/ContentIdentifier.hs
Joey Hess ee251b2e2e
implement updating the ContentIdentifier db with info from the git-annex branch
untested

This won't be super slow, but it does need to diff two likely large
trees, and since the git-annex branch rarely sits still, it will most
likely be run at the beginning of every import.

A possible speed improvement would be to only run this when the database
did not contain a ContentIdentifier. But that would only speed up
imports when there is no new version of a file on the special remote,
at most renames of existing files being imported.

A better speed improvement would be to record something in the git-annex
branch that indicates when an import has been run, and only do the diff
if the git-annex branch has record of a newer import than we've seen
before. Then, it would only run when there is in fact new
ContentIdentifier information available from a remote. Certianly doable,
but didn't want to complicate things yet.
2019-03-06 18:04:30 -04:00

47 lines
1.4 KiB
Haskell

{- Remote content identifier logs.
-
- Copyright 2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Logs.ContentIdentifier (
module X,
recordContentIdentifier,
getContentIdentifiers,
) where
import Annex.Common
import Logs
import Logs.MapLog
import Types.Import
import qualified Annex.Branch
import Logs.ContentIdentifier.Pure as X
import qualified Annex
import qualified Data.Map as M
import Data.List.NonEmpty (NonEmpty(..))
import qualified Data.List.NonEmpty as NonEmpty
-- | Records a remote's content identifier and the key that it corresponds to.
--
-- A remote may use multiple content identifiers for the same key over time,
-- so ones that were recorded before are preserved.
recordContentIdentifier :: UUID -> ContentIdentifier -> Key -> Annex ()
recordContentIdentifier u cid k = do
c <- liftIO currentVectorClock
config <- Annex.getGitConfig
Annex.Branch.change (remoteContentIdentifierLogFile config k) $
buildLog . addcid c . parseLog
where
addcid c l = changeMapLog c u (cid :| contentIdentifierList (M.lookup u m)) l
where
m = simpleMap l
-- | Get all known content identifiers for a key.
getContentIdentifiers :: Key -> Annex [(UUID, [ContentIdentifier])]
getContentIdentifiers k = do
config <- Annex.getGitConfig
map (\(u, l) -> (u, NonEmpty.toList l) )
. M.toList . simpleMap . parseLog
<$> Annex.Branch.get (remoteContentIdentifierLogFile config k)