Sped up view branch construction by 50%
A benchmark in my sound repository with `git-annex view feedtitle=*` took 2:52 wall clock time before and 1:58 after. Though it still only used 130% of CPU. This is the same kind of optimisation that is in seekFilteredKeys, though that precaches location logs while this streams the metadata logs direct to parsing them. seekFilteredKeys contains more streaming, to find the annexed files, and this could be further sped up with similar streaming. Sponsored-by: Nicholas Golder-Manning on Patreon
This commit is contained in:
parent
c733ccdf21
commit
826b225ca8
3 changed files with 63 additions and 23 deletions
|
@ -15,12 +15,14 @@ import Types.View
|
||||||
import Types.MetaData
|
import Types.MetaData
|
||||||
import Annex.MetaData
|
import Annex.MetaData
|
||||||
import qualified Annex
|
import qualified Annex
|
||||||
|
import qualified Annex.Branch
|
||||||
import qualified Git
|
import qualified Git
|
||||||
import qualified Git.DiffTree as DiffTree
|
import qualified Git.DiffTree as DiffTree
|
||||||
import qualified Git.Branch
|
import qualified Git.Branch
|
||||||
import qualified Git.LsFiles
|
import qualified Git.LsFiles
|
||||||
import qualified Git.LsTree
|
import qualified Git.LsTree
|
||||||
import qualified Git.Ref
|
import qualified Git.Ref
|
||||||
|
import Git.CatFile
|
||||||
import Git.UpdateIndex
|
import Git.UpdateIndex
|
||||||
import Git.Sha
|
import Git.Sha
|
||||||
import Git.Types
|
import Git.Types
|
||||||
|
@ -29,6 +31,8 @@ import Annex.WorkTree
|
||||||
import Annex.GitOverlay
|
import Annex.GitOverlay
|
||||||
import Annex.Link
|
import Annex.Link
|
||||||
import Annex.CatFile
|
import Annex.CatFile
|
||||||
|
import Annex.Concurrent
|
||||||
|
import Logs
|
||||||
import Logs.MetaData
|
import Logs.MetaData
|
||||||
import Logs.View
|
import Logs.View
|
||||||
import Utility.Glob
|
import Utility.Glob
|
||||||
|
@ -41,6 +45,7 @@ import qualified Data.ByteString as B
|
||||||
import qualified Data.Set as S
|
import qualified Data.Set as S
|
||||||
import qualified Data.Map as M
|
import qualified Data.Map as M
|
||||||
import qualified System.FilePath.ByteString as P
|
import qualified System.FilePath.ByteString as P
|
||||||
|
import Control.Concurrent.Async
|
||||||
import "mtl" Control.Monad.Writer
|
import "mtl" Control.Monad.Writer
|
||||||
|
|
||||||
{- Each visible ViewFilter in a view results in another level of
|
{- Each visible ViewFilter in a view results in another level of
|
||||||
|
@ -435,9 +440,10 @@ applyView' mkviewedfile getfilemetadata view = do
|
||||||
top <- fromRepo Git.repoPath
|
top <- fromRepo Git.repoPath
|
||||||
(l, clean) <- inRepo $ Git.LsFiles.inRepoDetails [] [top]
|
(l, clean) <- inRepo $ Git.LsFiles.inRepoDetails [] [top]
|
||||||
applyView'' mkviewedfile getfilemetadata view l clean $
|
applyView'' mkviewedfile getfilemetadata view l clean $
|
||||||
\go (f, sha, mode) -> do
|
\(f, sha, mode) -> do
|
||||||
topf <- inRepo (toTopFilePath f)
|
topf <- inRepo (toTopFilePath f)
|
||||||
go topf sha (toTreeItemType mode) =<< lookupKey f
|
k <- lookupKey f
|
||||||
|
return (topf, sha, toTreeItemType mode, k)
|
||||||
genViewBranch view
|
genViewBranch view
|
||||||
|
|
||||||
applyView''
|
applyView''
|
||||||
|
@ -446,29 +452,58 @@ applyView''
|
||||||
-> View
|
-> View
|
||||||
-> [t]
|
-> [t]
|
||||||
-> IO Bool
|
-> IO Bool
|
||||||
-> ((TopFilePath -> Sha -> Maybe TreeItemType -> Maybe Key -> Annex ()) -> t -> Annex ())
|
-> (t -> Annex (TopFilePath, Sha, Maybe TreeItemType, Maybe Key))
|
||||||
-> Annex ()
|
-> Annex ()
|
||||||
applyView'' mkviewedfile getfilemetadata view l clean a = do
|
applyView'' mkviewedfile getfilemetadata view l clean conv = do
|
||||||
viewg <- withNewViewIndex gitRepo
|
viewg <- withNewViewIndex gitRepo
|
||||||
withUpdateIndex viewg $ \uh -> do
|
withUpdateIndex viewg $ \uh -> do
|
||||||
forM_ l $ a (go uh)
|
g <- Annex.gitRepo
|
||||||
liftIO $ void clean
|
gc <- Annex.getGitConfig
|
||||||
|
-- Streaming the metadata like this is an optimisation.
|
||||||
|
catObjectStream g $ \mdfeeder mdcloser mdreader -> do
|
||||||
|
tid <- liftIO . async =<< forkState
|
||||||
|
(getmetadata gc mdfeeder mdcloser l)
|
||||||
|
process uh mdreader
|
||||||
|
join (liftIO (wait tid))
|
||||||
|
liftIO $ void clean
|
||||||
where
|
where
|
||||||
genviewedfiles = viewedFiles view mkviewedfile -- enables memoization
|
genviewedfiles = viewedFiles view mkviewedfile -- enables memoization
|
||||||
|
|
||||||
go uh topf _sha _mode (Just k) = do
|
getmetadata _ _ mdcloser [] = liftIO mdcloser
|
||||||
metadata <- getCurrentMetaData k
|
getmetadata gc mdfeeder mdcloser (t:ts) = do
|
||||||
let f = fromRawFilePath $ getTopFilePath topf
|
v@(topf, _sha, _treeitemtype, mkey) <- conv t
|
||||||
let metadata' = getfilemetadata f `unionMetaData` metadata
|
let feed mdlogf = liftIO $ mdfeeder
|
||||||
forM_ (genviewedfiles f metadata') $ \fv -> do
|
(v, Git.Ref.branchFileRef Annex.Branch.fullname mdlogf)
|
||||||
f' <- fromRepo (fromTopFilePath $ asTopFilePath $ toRawFilePath fv)
|
case mkey of
|
||||||
stagesymlink uh f' =<< calcRepo (gitAnnexLink f' k)
|
Just key -> feed (metaDataLogFile gc key)
|
||||||
go uh topf sha (Just treeitemtype) Nothing
|
Nothing
|
||||||
| "." `B.isPrefixOf` getTopFilePath topf =
|
-- Handle toplevel dotfiles that are not
|
||||||
|
-- annexed files by feeding through a query
|
||||||
|
-- for dummy metadata. Calling
|
||||||
|
-- Git.UpdateIndex.streamUpdateIndex'
|
||||||
|
-- here would race with process's calls
|
||||||
|
-- to it.
|
||||||
|
| "." `B.isPrefixOf` getTopFilePath topf ->
|
||||||
|
feed "dummy"
|
||||||
|
| otherwise -> noop
|
||||||
|
getmetadata gc mdfeeder mdcloser ts
|
||||||
|
|
||||||
|
process uh mdreader = liftIO mdreader >>= \case
|
||||||
|
Just ((topf, _, _, Just k), Just mdlog) -> do
|
||||||
|
let metadata = parseCurrentMetaData mdlog
|
||||||
|
let f = fromRawFilePath $ getTopFilePath topf
|
||||||
|
let metadata' = getfilemetadata f `unionMetaData` metadata
|
||||||
|
forM_ (genviewedfiles f metadata') $ \fv -> do
|
||||||
|
f' <- fromRepo (fromTopFilePath $ asTopFilePath $ toRawFilePath fv)
|
||||||
|
stagesymlink uh f' =<< calcRepo (gitAnnexLink f' k)
|
||||||
|
process uh mdreader
|
||||||
|
Just ((topf, sha, Just treeitemtype, Nothing), _) -> do
|
||||||
liftIO $ Git.UpdateIndex.streamUpdateIndex' uh $
|
liftIO $ Git.UpdateIndex.streamUpdateIndex' uh $
|
||||||
pureStreamer $ updateIndexLine sha treeitemtype topf
|
pureStreamer $ updateIndexLine sha treeitemtype topf
|
||||||
go _ _ _ _ _ = noop
|
process uh mdreader
|
||||||
|
Just _ -> process uh mdreader
|
||||||
|
Nothing -> return ()
|
||||||
|
|
||||||
stagesymlink uh f linktarget = do
|
stagesymlink uh f linktarget = do
|
||||||
sha <- hashSymlink linktarget
|
sha <- hashSymlink linktarget
|
||||||
liftIO . Git.UpdateIndex.streamUpdateIndex' uh
|
liftIO . Git.UpdateIndex.streamUpdateIndex' uh
|
||||||
|
@ -490,17 +525,18 @@ updateView view = do
|
||||||
(Git.LsTree.LsTreeLong True)
|
(Git.LsTree.LsTreeLong True)
|
||||||
(viewParentBranch view)
|
(viewParentBranch view)
|
||||||
applyView'' viewedFileFromReference getWorkTreeMetaData view l clean $
|
applyView'' viewedFileFromReference getWorkTreeMetaData view l clean $
|
||||||
\go ti -> do
|
\ti -> do
|
||||||
let ref = Git.Ref.branchFileRef (viewParentBranch view)
|
let ref = Git.Ref.branchFileRef (viewParentBranch view)
|
||||||
(getTopFilePath (Git.LsTree.file ti))
|
(getTopFilePath (Git.LsTree.file ti))
|
||||||
k <- case Git.LsTree.size ti of
|
k <- case Git.LsTree.size ti of
|
||||||
Nothing -> catKey ref
|
Nothing -> catKey ref
|
||||||
Just sz -> catKey' ref sz
|
Just sz -> catKey' ref sz
|
||||||
go
|
return
|
||||||
(Git.LsTree.file ti)
|
( (Git.LsTree.file ti)
|
||||||
(Git.LsTree.sha ti)
|
, (Git.LsTree.sha ti)
|
||||||
(toTreeItemType (Git.LsTree.mode ti))
|
, (toTreeItemType (Git.LsTree.mode ti))
|
||||||
k
|
, k
|
||||||
|
)
|
||||||
oldcommit <- inRepo $ Git.Ref.sha (branchView view)
|
oldcommit <- inRepo $ Git.Ref.sha (branchView view)
|
||||||
oldtree <- maybe (pure Nothing) (inRepo . Git.Ref.tree) oldcommit
|
oldtree <- maybe (pure Nothing) (inRepo . Git.Ref.tree) oldcommit
|
||||||
newtree <- withViewIndex $ inRepo Git.Branch.writeTree
|
newtree <- withViewIndex $ inRepo Git.Branch.writeTree
|
||||||
|
|
|
@ -20,6 +20,7 @@ git-annex (10.20230127) UNRELEASED; urgency=medium
|
||||||
number, the same as git does.
|
number, the same as git does.
|
||||||
* sync: Warn when the adjusted basis ref cannot be found, as happens eg when
|
* sync: Warn when the adjusted basis ref cannot be found, as happens eg when
|
||||||
the user has renamed branches.
|
the user has renamed branches.
|
||||||
|
* Sped up view branch construction by 50%.
|
||||||
|
|
||||||
-- Joey Hess <id@joeyh.name> Mon, 06 Feb 2023 13:39:18 -0400
|
-- Joey Hess <id@joeyh.name> Mon, 06 Feb 2023 13:39:18 -0400
|
||||||
|
|
||||||
|
|
|
@ -18,3 +18,6 @@ It may also be that just improving the precaching of metadata logs
|
||||||
would improve the speed a lot. The streaming precaching of location logs
|
would improve the speed a lot. The streaming precaching of location logs
|
||||||
sped up some commands around 2x before IIRC.
|
sped up some commands around 2x before IIRC.
|
||||||
|
|
||||||
|
> catObjectStream of metadata logs sped view constriction up by ~50%.
|
||||||
|
> More streaming should speed it up more; it still uses
|
||||||
|
> lookupKey/catKey once per file. --[[Joey]]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue