optimise adjustTree when adding many TreeItems
The old code traversed the list of addtreeitems once per subdirectory in the tree, so could get quite slow. Converting to Map lookups sped it up significantly. In my test case, git-annex import used to take about 2 minutes, when calling adjustTree to add back excluded files to the imported tree. This dropped it down to 6 seconds. Of which 4 seconds are the actual enumeration of the contents of the remote, so really only 2 seconds for this. The path prefix map is a bit suboptimal memory-wise, since items get stored in the map once per subdirectory on the path to the item. It would perhaps be better to use a tree data structure. Also it's suboptimal memory-wise that it builds two maps, as well as retaining a reference to addtreeitems. I could not see a way around that though. Sponsored-by: Luke T. Shumaker on Patreon
This commit is contained in:
parent
a6a67f79e7
commit
2c86651180
3 changed files with 57 additions and 16 deletions
|
@ -1,8 +1,7 @@
|
|||
git-annex (10.20231228) UNRELEASED; urgency=medium
|
||||
|
||||
* info: Added "annex sizes of repositories" table to the overall display.
|
||||
* import: Sped up import from special remote when the imported tree is
|
||||
unchanged.
|
||||
* import: Sped up import from special remotes.
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Fri, 29 Dec 2023 11:52:06 -0400
|
||||
|
||||
|
|
68
Git/Tree.hs
68
Git/Tree.hs
|
@ -1,11 +1,12 @@
|
|||
{- git trees
|
||||
-
|
||||
- Copyright 2016-2021 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2016-2023 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
||||
{-# LANGUAGE BangPatterns, TypeSynonymInstances, FlexibleInstances #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module Git.Tree (
|
||||
Tree(..),
|
||||
|
@ -231,7 +232,7 @@ adjustTree adjusttreeitem addtreeitems resolveaddconflict removefiles r repo =
|
|||
withMkTreeHandle repo $ \h -> do
|
||||
(l, cleanup) <- liftIO $ lsTreeWithObjects LsTree.LsTreeRecursive r repo
|
||||
(l', _, _) <- go h False [] 1 inTopTree l
|
||||
l'' <- adjustlist h 0 inTopTree (const True) l'
|
||||
l'' <- adjustlist h 0 inTopTree topTreePath l'
|
||||
sha <- liftIO $ mkTree h l''
|
||||
void $ liftIO cleanup
|
||||
return sha
|
||||
|
@ -250,7 +251,7 @@ adjustTree adjusttreeitem addtreeitems resolveaddconflict removefiles r repo =
|
|||
in go h modified (blob:c) depth intree is
|
||||
Just TreeObject -> do
|
||||
(sl, modified, is') <- go h False [] (depth+1) (beneathSubTree i) is
|
||||
sl' <- adjustlist h depth (inTree i) (beneathSubTree i) sl
|
||||
sl' <- adjustlist h depth (inTree i) (gitPath i) sl
|
||||
let slmodified = sl' /= sl
|
||||
subtree <- if modified || slmodified
|
||||
then liftIO $ recordSubTree h $ NewSubTree (LsTree.file i) sl'
|
||||
|
@ -268,16 +269,22 @@ adjustTree adjusttreeitem addtreeitems resolveaddconflict removefiles r repo =
|
|||
_ -> giveup ("unexpected object type \"" ++ decodeBS (LsTree.typeobj i) ++ "\"")
|
||||
| otherwise = return (c, wasmodified, i:is)
|
||||
|
||||
adjustlist h depth ishere underhere l = do
|
||||
let (addhere, rest) = partition ishere addtreeitems
|
||||
adjustlist h depth ishere herepath l = do
|
||||
let addhere = fromMaybe [] $ M.lookup herepath addtreeitempathmap
|
||||
let l' = filter (not . removed) $
|
||||
addoldnew l (map treeItemToTreeContent addhere)
|
||||
let inl i = any (\t -> beneathSubTree t i) l'
|
||||
let (Tree addunderhere) = flattenTree depth $ treeItemsToTree $
|
||||
filter (\i -> underhere i && not (inl i)) rest
|
||||
filter (not . inl) $ if herepath == topTreePath
|
||||
then filter (not . ishere) addtreeitems
|
||||
else fromMaybe [] $
|
||||
M.lookup (subTreePrefix herepath) addtreeitemprefixmap
|
||||
addunderhere' <- liftIO $ mapM (recordSubTree h) addunderhere
|
||||
return (addoldnew l' addunderhere')
|
||||
|
||||
addtreeitempathmap = mkPathMap addtreeitems
|
||||
addtreeitemprefixmap = mkPathPrefixMap addtreeitems
|
||||
|
||||
removeset = S.fromList $ map (P.normalise . gitPath) removefiles
|
||||
removed (TreeBlob f _ _) = S.member (P.normalise (gitPath f)) removeset
|
||||
removed (TreeCommit f _ _) = S.member (P.normalise (gitPath f)) removeset
|
||||
|
@ -355,12 +362,8 @@ graftTree' subtree graftloc basetree repo hdl = go basetree subdirs graftdirs
|
|||
|
||||
subdirs = P.splitDirectories $ gitPath graftloc
|
||||
|
||||
-- For a graftloc of "foo/bar/baz", this generates
|
||||
-- ["foo", "foo/bar", "foo/bar/baz"]
|
||||
graftdirs = map (asTopFilePath . toInternalGitPath) $
|
||||
mkpaths [] subdirs
|
||||
mkpaths _ [] = []
|
||||
mkpaths base (d:rest) = (P.joinPath base P.</> d) : mkpaths (base ++ [d]) rest
|
||||
pathPrefixes subdirs
|
||||
|
||||
{- Assumes the list is ordered, with tree objects coming right before their
|
||||
- contents. -}
|
||||
|
@ -413,13 +416,50 @@ instance GitPath TreeContent where
|
|||
gitPath (TreeCommit f _ _) = gitPath f
|
||||
|
||||
inTopTree :: GitPath t => t -> Bool
|
||||
inTopTree = inTree "."
|
||||
inTopTree = inTree topTreePath
|
||||
|
||||
topTreePath :: RawFilePath
|
||||
topTreePath = "."
|
||||
|
||||
inTree :: (GitPath t, GitPath f) => t -> f -> Bool
|
||||
inTree t f = gitPath t == P.takeDirectory (gitPath f)
|
||||
|
||||
beneathSubTree :: (GitPath t, GitPath f) => t -> f -> Bool
|
||||
beneathSubTree t f = prefix `B.isPrefixOf` P.normalise (gitPath f)
|
||||
beneathSubTree t f = subTreePrefix t `B.isPrefixOf` subTreePath f
|
||||
|
||||
subTreePath :: GitPath t => t -> RawFilePath
|
||||
subTreePath = P.normalise . gitPath
|
||||
|
||||
subTreePrefix :: GitPath t => t -> RawFilePath
|
||||
subTreePrefix t
|
||||
| B.null tp = tp
|
||||
| otherwise = P.addTrailingPathSeparator (P.normalise tp)
|
||||
where
|
||||
tp = gitPath t
|
||||
prefix = if B.null tp then tp else P.addTrailingPathSeparator (P.normalise tp)
|
||||
|
||||
{- Makes a Map where the keys are directories, and the values
|
||||
- are the items located in that directory.
|
||||
-
|
||||
- Values that are not in any subdirectory are placed in
|
||||
- the topTreePath key.
|
||||
-}
|
||||
mkPathMap :: GitPath t => [t] -> M.Map RawFilePath [t]
|
||||
mkPathMap l = M.fromListWith (++) $
|
||||
map (\ti -> (P.takeDirectory (gitPath ti), [ti])) l
|
||||
|
||||
{- Input is eg splitDirectories "foo/bar/baz",
|
||||
- for which it will output ["foo", "foo/bar", "foo/bar/baz"] -}
|
||||
pathPrefixes :: [RawFilePath] -> [RawFilePath]
|
||||
pathPrefixes = go []
|
||||
where
|
||||
go _ [] = []
|
||||
go base (d:rest) = (P.joinPath base P.</> d) : go (base ++ [d]) rest
|
||||
|
||||
{- Makes a Map where the keys are all path prefixes,
|
||||
- and the values are items with that path prefix.
|
||||
-}
|
||||
mkPathPrefixMap :: GitPath t => [t] -> M.Map RawFilePath [t]
|
||||
mkPathPrefixMap l = M.fromListWith (++) $ concatMap go l
|
||||
where
|
||||
go ti = map (\p -> (p, [ti]))
|
||||
(pathPrefixes $ P.splitDirectories $ subTreePath ti)
|
||||
|
|
|
@ -27,6 +27,8 @@ Note that adjustTree is also used with a possibly big list of files to add
|
|||
in Annex.Import.buildImportTrees. No other calls of adjustTree pass files
|
||||
to add.
|
||||
|
||||
> [[done]] --[[Joey]]
|
||||
|
||||
## git merge-tree
|
||||
|
||||
Would it be possible to use `git merge-tree` instead?
|
||||
|
|
Loading…
Reference in a new issue