implement updating the ContentIdentifier db with info from the git-annex branch

untested

This won't be super slow, but it does need to diff two likely large
trees, and since the git-annex branch rarely sits still, it will most
likely be run at the beginning of every import.

A possible speed improvement would be to only run this when the database
did not contain a ContentIdentifier. But that would only speed up
imports when there is no new version of a file on the special remote,
at most renames of existing files being imported.

A better speed improvement would be to record something in the git-annex
branch that indicates when an import has been run, and only do the diff
if the git-annex branch has record of a newer import than we've seen
before. Then, it would only run when there is in fact new
ContentIdentifier information available from a remote. Certianly doable,
but didn't want to complicate things yet.
This commit is contained in:
Joey Hess 2019-03-06 18:04:30 -04:00
parent 12e4906657
commit ee251b2e2e
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
4 changed files with 72 additions and 24 deletions

View file

@ -22,7 +22,10 @@ module Database.ContentIdentifier (
recordContentIdentifier,
getContentIdentifiers,
getContentIdentifierKeys,
recordAnnexBranchTree,
getAnnexBranchTree,
ContentIdentifiersId,
AnnexBranchId,
) where
import Database.Types
@ -31,6 +34,8 @@ import Database.Init
import Annex.Locations
import Annex.Common hiding (delete)
import Types.Import
import Git.Types
import Git.Sha
import Database.Persist.Sql hiding (Key)
import Database.Persist.TH
@ -45,6 +50,11 @@ ContentIdentifiers
ContentIdentifiersIndexRemoteKey remote key
ContentIdentifiersIndexRemoteCID remote cid
UniqueRemoteCidKey remote cid key
-- The last git-annex branch tree sha that was used to update
-- ContentIdentifiers
AnnexBranch
tree SRef
UniqueTree tree
|]
{- Opens the database, creating it if it doesn't exist yet.
@ -97,3 +107,15 @@ getContentIdentifierKeys (ContentIdentifierHandle h) u cid =
, ContentIdentifiersRemote ==. u
] []
return $ map (fromIKey . contentIdentifiersKey . entityVal) l
recordAnnexBranchTree :: ContentIdentifierHandle -> Sha -> IO ()
recordAnnexBranchTree h s = queueDb h $ do
deleteWhere ([] :: [Filter AnnexBranch])
void $ insertUnique $ AnnexBranch $ toSRef s
getAnnexBranchTree :: ContentIdentifierHandle -> IO Sha
getAnnexBranchTree (ContentIdentifierHandle h) = H.queryDbQueue h $ do
l <- selectList ([] :: [Filter AnnexBranch]) []
case l of
(s:[]) -> return $ fromSRef $ annexBranchTree $ entityVal s
_ -> return emptyTree