From 7af55de83c83963784c423f1a1ddbc265c25c8b5 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 22 Feb 2019 11:16:22 -0400 Subject: [PATCH] optimisation: use graftTree to remember the export branch Sped up git-annex export in repositories with lots of keys. Old method read whole git-annex branch tree into memory. --- Annex/Branch.hs | 16 +++++++--------- CHANGELOG | 1 + Logs/Export.hs | 2 +- 3 files changed, 9 insertions(+), 10 deletions(-) diff --git a/Annex/Branch.hs b/Annex/Branch.hs index b920b4a94f..ef5c8e8584 100644 --- a/Annex/Branch.hs +++ b/Annex/Branch.hs @@ -24,7 +24,7 @@ module Annex.Branch ( forceCommit, getBranch, files, - graftTreeish, + rememberTreeish, performTransitions, withIndex, ) where @@ -646,17 +646,15 @@ getMergedRefs' = do - and then removes it. This ensures that the treeish won't get garbage - collected, and will always be available as long as the git-annex branch - is available. -} -graftTreeish :: Git.Ref -> TopFilePath -> Annex () -graftTreeish treeish graftpoint = lockJournal $ \jl -> do +rememberTreeish :: Git.Ref -> TopFilePath -> Annex () +rememberTreeish treeish graftpoint = lockJournal $ \jl -> do branchref <- getBranch updateIndex jl branchref - Git.Tree.Tree t <- inRepo $ - Git.Tree.getTree Git.LsTree.LsTreeRecursive branchref - t' <- inRepo $ Git.Tree.recordTree $ Git.Tree.Tree $ - Git.Tree.RecordedSubTree graftpoint treeish [] : t + origtree <- fromMaybe (giveup "unable to determine git-annex branch tree") <$> + inRepo (Git.Ref.tree branchref) + addedt <- inRepo $ Git.Tree.graftTree treeish graftpoint origtree c <- inRepo $ Git.Branch.commitTree Git.Branch.AutomaticCommit - "graft" [branchref] t' - origtree <- inRepo $ Git.Tree.recordTree (Git.Tree.Tree t) + "graft" [branchref] addedt c' <- inRepo $ Git.Branch.commitTree Git.Branch.AutomaticCommit "graft cleanup" [c] origtree inRepo $ Git.Branch.update' fullname c' diff --git a/CHANGELOG b/CHANGELOG index b74d7d1fde..5779bbbeef 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -2,6 +2,7 @@ git-annex (7.20190220) UNRELEASED; urgency=medium * Fix storage of metadata values containing newlines. (Reversion introduced in version 7.20190122.) + * Sped up git-annex export in repositories with lots of keys. -- Joey Hess Wed, 20 Feb 2019 14:20:59 -0400 diff --git a/Logs/Export.hs b/Logs/Export.hs index 74f24412ef..4271517e4e 100644 --- a/Logs/Export.hs +++ b/Logs/Export.hs @@ -124,7 +124,7 @@ recordExportBeginning remoteuuid newtree = do buildExportLog . changeMapLog c ep new . parseExportLog - Annex.Branch.graftTreeish newtree (asTopFilePath "export.tree") + Annex.Branch.rememberTreeish newtree (asTopFilePath "export.tree") parseExportLog :: L.ByteString -> MapLog ExportParticipants Exported parseExportLog = parseMapLog exportParticipantsParser exportedParser