optimisation: use graftTree to remember the export branch

Sped up git-annex export in repositories with lots of keys.

Old method read whole git-annex branch tree into memory.
This commit is contained in:
Joey Hess 2019-02-22 11:16:22 -04:00
parent 1580ff3866
commit 7af55de83c
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
3 changed files with 9 additions and 10 deletions

View file

@ -24,7 +24,7 @@ module Annex.Branch (
forceCommit, forceCommit,
getBranch, getBranch,
files, files,
graftTreeish, rememberTreeish,
performTransitions, performTransitions,
withIndex, withIndex,
) where ) where
@ -646,17 +646,15 @@ getMergedRefs' = do
- and then removes it. This ensures that the treeish won't get garbage - and then removes it. This ensures that the treeish won't get garbage
- collected, and will always be available as long as the git-annex branch - collected, and will always be available as long as the git-annex branch
- is available. -} - is available. -}
graftTreeish :: Git.Ref -> TopFilePath -> Annex () rememberTreeish :: Git.Ref -> TopFilePath -> Annex ()
graftTreeish treeish graftpoint = lockJournal $ \jl -> do rememberTreeish treeish graftpoint = lockJournal $ \jl -> do
branchref <- getBranch branchref <- getBranch
updateIndex jl branchref updateIndex jl branchref
Git.Tree.Tree t <- inRepo $ origtree <- fromMaybe (giveup "unable to determine git-annex branch tree") <$>
Git.Tree.getTree Git.LsTree.LsTreeRecursive branchref inRepo (Git.Ref.tree branchref)
t' <- inRepo $ Git.Tree.recordTree $ Git.Tree.Tree $ addedt <- inRepo $ Git.Tree.graftTree treeish graftpoint origtree
Git.Tree.RecordedSubTree graftpoint treeish [] : t
c <- inRepo $ Git.Branch.commitTree Git.Branch.AutomaticCommit c <- inRepo $ Git.Branch.commitTree Git.Branch.AutomaticCommit
"graft" [branchref] t' "graft" [branchref] addedt
origtree <- inRepo $ Git.Tree.recordTree (Git.Tree.Tree t)
c' <- inRepo $ Git.Branch.commitTree Git.Branch.AutomaticCommit c' <- inRepo $ Git.Branch.commitTree Git.Branch.AutomaticCommit
"graft cleanup" [c] origtree "graft cleanup" [c] origtree
inRepo $ Git.Branch.update' fullname c' inRepo $ Git.Branch.update' fullname c'

View file

@ -2,6 +2,7 @@ git-annex (7.20190220) UNRELEASED; urgency=medium
* Fix storage of metadata values containing newlines. * Fix storage of metadata values containing newlines.
(Reversion introduced in version 7.20190122.) (Reversion introduced in version 7.20190122.)
* Sped up git-annex export in repositories with lots of keys.
-- Joey Hess <id@joeyh.name> Wed, 20 Feb 2019 14:20:59 -0400 -- Joey Hess <id@joeyh.name> Wed, 20 Feb 2019 14:20:59 -0400

View file

@ -124,7 +124,7 @@ recordExportBeginning remoteuuid newtree = do
buildExportLog buildExportLog
. changeMapLog c ep new . changeMapLog c ep new
. parseExportLog . parseExportLog
Annex.Branch.graftTreeish newtree (asTopFilePath "export.tree") Annex.Branch.rememberTreeish newtree (asTopFilePath "export.tree")
parseExportLog :: L.ByteString -> MapLog ExportParticipants Exported parseExportLog :: L.ByteString -> MapLog ExportParticipants Exported
parseExportLog = parseMapLog exportParticipantsParser exportedParser parseExportLog = parseMapLog exportParticipantsParser exportedParser