speed up seeking pointer files

This solves the same problem as commit b4d0f6dfc2
but in a better way, that should make processing pointer files maximally
fast. If there is a mixture of pointer files and symlinks, the first
symlinks until the pointer file are handled maximally fast, while the
ones after that go via the slightly slower path.
This commit is contained in:
Joey Hess 2020-07-13 14:09:08 -04:00
parent f34edf0011
commit a290792a4f
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38

View file

@ -30,7 +30,7 @@ import Logs.Transfer
import Remote.List import Remote.List
import qualified Remote import qualified Remote
import Annex.CatFile import Annex.CatFile
import Git.CatFile (catObjectStreamLsTree, catObjectStream) import Git.CatFile
import Annex.CurrentBranch import Annex.CurrentBranch
import Annex.Content import Annex.Content
import Annex.Link import Annex.Link
@ -273,16 +273,16 @@ seekFilteredKeys a listfs = do
-- Run here, not in the async, because it could throw an exception -- Run here, not in the async, because it could throw an exception
-- The list should be built lazily. -- The list should be built lazily.
l <- listfs l <- listfs
catObjectStream g $ \feeder closer reader -> do catObjectMetaDataStream g $ \mdfeeder mdcloser mdreader ->
processertid <- liftIO . async =<< forkState catObjectStream g $ \feeder closer reader -> do
(gofeed l matcher feeder closer) processertid <- liftIO . async =<< forkState
goread reader (process matcher feeder mdfeeder mdcloser False l)
join (liftIO (wait processertid)) mdprocessertid <- liftIO . async =<< forkState
(mdprocess matcher mdreader feeder closer)
goread reader
join (liftIO (wait mdprocessertid))
join (liftIO (wait processertid))
where where
gofeed l matcher feeder closer =
forM_ l (process matcher feeder)
`finally` liftIO closer
goread reader = liftIO reader >>= \case goread reader = liftIO reader >>= \case
Just (f, content) -> do Just (f, content) -> do
maybe noop (a f) (parseLinkTargetOrPointerLazy =<< content) maybe noop (a f) (parseLinkTargetOrPointerLazy =<< content)
@ -293,19 +293,37 @@ seekFilteredKeys a listfs = do
whenM (matcher $ MatchingFile $ FileInfo f f) $ whenM (matcher $ MatchingFile $ FileInfo f f) $
liftIO $ feeder (f, sha) liftIO $ feeder (f, sha)
process matcher feeder (f, sha, mode) = case process matcher feeder mdfeeder mdcloser seenpointer ((f, sha, mode):rest) =
Git.toTreeItemType mode of case Git.toTreeItemType mode of
Just Git.TreeSymlink -> Just Git.TreeSymlink -> do
feedmatches matcher feeder f sha -- Once a pointer file has been seen,
Just Git.TreeSubmodule -> return () -- symlinks have to be sent via the
-- metadata processor too. That is slightly
-- slower, but preserves the requested
-- file order.
if seenpointer
then liftIO $ mdfeeder (f, sha)
else feedmatches matcher feeder f sha
process matcher feeder mdfeeder mdcloser seenpointer rest
Just Git.TreeSubmodule ->
process matcher feeder mdfeeder mdcloser seenpointer rest
-- Might be a pointer file, might be other -- Might be a pointer file, might be other
-- file in git, possibly large. Avoid catting -- file in git, possibly large. Avoid catting
-- large files by first looking up the size. -- large files by first looking up the size.
Just _ -> catObjectMetaData sha >>= \case Just _ -> do
Just (_, sz, _) | sz <= maxPointerSz -> liftIO $ mdfeeder (f, sha)
feedmatches matcher feeder f sha process matcher feeder mdfeeder mdcloser True rest
_ -> return () Nothing ->
Nothing -> return () process matcher feeder mdfeeder mdcloser seenpointer rest
process _ _ _ mdcloser _ [] = liftIO $ void mdcloser
mdprocess matcher mdreader feeder closer = liftIO mdreader >>= \case
Just (f, Just (sha, size, _type))
| size < maxPointerSz -> do
feedmatches matcher feeder f sha
mdprocess matcher mdreader feeder closer
Just _ -> mdprocess matcher mdreader feeder closer
Nothing -> liftIO $ void closer
seekHelper :: (a -> RawFilePath) -> WarnUnmatchWhen -> ([LsFiles.Options] -> [RawFilePath] -> Git.Repo -> IO ([a], IO Bool)) -> [WorkTreeItem] -> Annex [a] seekHelper :: (a -> RawFilePath) -> WarnUnmatchWhen -> ([LsFiles.Options] -> [RawFilePath] -> Git.Repo -> IO ([a], IO Bool)) -> [WorkTreeItem] -> Annex [a]
seekHelper c ww a l = do seekHelper c ww a l = do