add git ls-tree --long parser

Not yet used, but allows getting the size of items in the tree fairly
cheaply.

I noticed that CmdLine.Seek uses ls-tree and the feeds the files into
another long-running process to check their size. That would be an
example of a place that might be sped up by using this. Although in that
particular case, it only needs to know the size of unlocked files, not
locked. And since enabling --long probably doubles the ls-tree runtime
or more, the overhead of using it there may outwweigh the benefit.
This commit is contained in:
Joey Hess 2021-03-23 12:44:29 -04:00
parent d89a9b0f78
commit a8b837aaef
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
13 changed files with 97 additions and 56 deletions

View file

@ -414,7 +414,9 @@ branchFiles = withIndex $ inRepo branchFiles'
branchFiles' :: Git.Repo -> IO ([RawFilePath], IO Bool) branchFiles' :: Git.Repo -> IO ([RawFilePath], IO Bool)
branchFiles' = Git.Command.pipeNullSplit' $ branchFiles' = Git.Command.pipeNullSplit' $
lsTreeParams Git.LsTree.LsTreeRecursive fullname [Param "--name-only"] lsTreeParams Git.LsTree.LsTreeRecursive (Git.LsTree.LsTreeLong False)
fullname
[Param "--name-only"]
{- Populates the branch's index file with the current branch contents. {- Populates the branch's index file with the current branch contents.
- -

View file

@ -82,7 +82,10 @@ scanUnlockedFiles = whenM (inRepo Git.Ref.headExists <&&> not <$> isBareRepo) $
dropold <- liftIO $ newMVar $ dropold <- liftIO $ newMVar $
Database.Keys.runWriter $ Database.Keys.runWriter $
liftIO . Database.Keys.SQL.dropAllAssociatedFiles liftIO . Database.Keys.SQL.dropAllAssociatedFiles
(l, cleanup) <- inRepo $ Git.LsTree.lsTree Git.LsTree.LsTreeRecursive Git.Ref.headRef (l, cleanup) <- inRepo $ Git.LsTree.lsTree
Git.LsTree.LsTreeRecursive
(Git.LsTree.LsTreeLong False)
Git.Ref.headRef
forM_ l $ \i -> forM_ l $ \i ->
when (isregfile i) $ when (isregfile i) $
maybe noop (add dropold i) maybe noop (add dropold i)

View file

@ -89,7 +89,7 @@ reloadConfigs changedconfigs = do
getConfigs :: Assistant Configs getConfigs :: Assistant Configs
getConfigs = S.fromList . map extract getConfigs = S.fromList . map extract
<$> liftAnnex (inRepo $ LsTree.lsTreeFiles Annex.Branch.fullname files) <$> liftAnnex (inRepo $ LsTree.lsTreeFiles (LsTree.LsTreeLong False) Annex.Branch.fullname files)
where where
files = map (fromRawFilePath . fst) configFilesActions files = map (fromRawFilePath . fst) configFilesActions
extract treeitem = (getTopFilePath $ LsTree.file treeitem, LsTree.sha treeitem) extract treeitem = (getTopFilePath $ LsTree.file treeitem, LsTree.sha treeitem)

View file

@ -279,6 +279,7 @@ withKeyOptions' ko auto mkkeyaction fallbackaction worktreeitems = do
void Annex.Branch.update void Annex.Branch.update
(l, cleanup) <- inRepo $ LsTree.lsTree (l, cleanup) <- inRepo $ LsTree.lsTree
LsTree.LsTreeRecursive LsTree.LsTreeRecursive
(LsTree.LsTreeLong False)
Annex.Branch.fullname Annex.Branch.fullname
let getk f = fmap (,f) (locationLogFileKey config f) let getk f = fmap (,f) (locationLogFileKey config f)
let discard reader = reader >>= \case let discard reader = reader >>= \case
@ -301,7 +302,7 @@ withKeyOptions' ko auto mkkeyaction fallbackaction worktreeitems = do
runbranchkeys bs = do runbranchkeys bs = do
keyaction <- mkkeyaction keyaction <- mkkeyaction
forM_ bs $ \b -> do forM_ bs $ \b -> do
(l, cleanup) <- inRepo $ LsTree.lsTree LsTree.LsTreeRecursive b (l, cleanup) <- inRepo $ LsTree.lsTree LsTree.LsTreeRecursive (LsTree.LsTreeLong False) b
forM_ l $ \i -> catKey (LsTree.sha i) >>= \case forM_ l $ \i -> catKey (LsTree.sha i) >>= \case
Just k -> Just k ->
let bfp = mkActionItem (BranchFilePath b (LsTree.file i), k) let bfp = mkActionItem (BranchFilePath b (LsTree.file i), k)

View file

@ -238,7 +238,10 @@ newtype AllFilled = AllFilled { fromAllFilled :: Bool }
-- remote tracking branch. -- remote tracking branch.
fillExport :: Remote -> ExportHandle -> ExportFiltered Git.Ref -> Maybe (RemoteTrackingBranch, Sha) -> Annex Bool fillExport :: Remote -> ExportHandle -> ExportFiltered Git.Ref -> Maybe (RemoteTrackingBranch, Sha) -> Annex Bool
fillExport r db (ExportFiltered newtree) mtbcommitsha = do fillExport r db (ExportFiltered newtree) mtbcommitsha = do
(l, cleanup) <- inRepo $ Git.LsTree.lsTree Git.LsTree.LsTreeRecursive newtree (l, cleanup) <- inRepo $ Git.LsTree.lsTree
Git.LsTree.LsTreeRecursive
(Git.LsTree.LsTreeLong False)
newtree
cvar <- liftIO $ newMVar (FileUploaded False) cvar <- liftIO $ newMVar (FileUploaded False)
allfilledvar <- liftIO $ newMVar (AllFilled True) allfilledvar <- liftIO $ newMVar (AllFilled True)
commandActions $ commandActions $

View file

@ -589,7 +589,10 @@ getDirStatInfo o dir = do
getTreeStatInfo :: InfoOptions -> Git.Ref -> Annex (Maybe StatInfo) getTreeStatInfo :: InfoOptions -> Git.Ref -> Annex (Maybe StatInfo)
getTreeStatInfo o r = do getTreeStatInfo o r = do
fast <- Annex.getState Annex.fast fast <- Annex.getState Annex.fast
(ls, cleanup) <- inRepo $ LsTree.lsTree LsTree.LsTreeRecursive r (ls, cleanup) <- inRepo $ LsTree.lsTree
LsTree.LsTreeRecursive
(LsTree.LsTreeLong False)
r
(presentdata, referenceddata, repodata) <- go fast ls initial (presentdata, referenceddata, repodata) <- go fast ls initial
ifM (liftIO cleanup) ifM (liftIO cleanup)
( return $ Just $ ( return $ Just $

View file

@ -1,13 +1,14 @@
{- git ls-tree interface {- git ls-tree interface
- -
- Copyright 2011-2020 Joey Hess <id@joeyh.name> - Copyright 2011-2021 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
module Git.LsTree ( module Git.LsTree (
TreeItem(..), TreeItem(..),
LsTreeMode(..), LsTreeRecursive(..),
LsTreeLong(..),
lsTree, lsTree,
lsTree', lsTree',
lsTreeStrict, lsTreeStrict,
@ -38,44 +39,55 @@ data TreeItem = TreeItem
{ mode :: FileMode { mode :: FileMode
, typeobj :: S.ByteString , typeobj :: S.ByteString
, sha :: Ref , sha :: Ref
, size :: Maybe FileSize
, file :: TopFilePath , file :: TopFilePath
-- ^ only available when long is used
} deriving (Show) } deriving (Show)
data LsTreeMode = LsTreeRecursive | LsTreeNonRecursive data LsTreeRecursive = LsTreeRecursive | LsTreeNonRecursive
{- Enabling --long also gets the size of tree items.
- This slows down ls-tree some, since it has to look up the size of each
- blob.
-}
data LsTreeLong = LsTreeLong Bool
{- Lists the contents of a tree, with lazy output. -} {- Lists the contents of a tree, with lazy output. -}
lsTree :: LsTreeMode -> Ref -> Repo -> IO ([TreeItem], IO Bool) lsTree :: LsTreeRecursive -> LsTreeLong -> Ref -> Repo -> IO ([TreeItem], IO Bool)
lsTree = lsTree' [] lsTree = lsTree' []
lsTree' :: [CommandParam] -> LsTreeMode -> Ref -> Repo -> IO ([TreeItem], IO Bool) lsTree' :: [CommandParam] -> LsTreeRecursive -> LsTreeLong -> Ref -> Repo -> IO ([TreeItem], IO Bool)
lsTree' ps lsmode t repo = do lsTree' ps recursive long t repo = do
(l, cleanup) <- pipeNullSplit (lsTreeParams lsmode t ps) repo (l, cleanup) <- pipeNullSplit (lsTreeParams recursive long t ps) repo
return (rights (map parseLsTree l), cleanup) return (rights (map (parseLsTree long) l), cleanup)
lsTreeStrict :: LsTreeMode -> Ref -> Repo -> IO [TreeItem] lsTreeStrict :: LsTreeRecursive -> LsTreeLong -> Ref -> Repo -> IO [TreeItem]
lsTreeStrict = lsTreeStrict' [] lsTreeStrict = lsTreeStrict' []
lsTreeStrict' :: [CommandParam] -> LsTreeMode -> Ref -> Repo -> IO [TreeItem] lsTreeStrict' :: [CommandParam] -> LsTreeRecursive -> LsTreeLong -> Ref -> Repo -> IO [TreeItem]
lsTreeStrict' ps lsmode t repo = rights . map parseLsTreeStrict lsTreeStrict' ps recursive long t repo = rights . map (parseLsTreeStrict long)
<$> pipeNullSplitStrict (lsTreeParams lsmode t ps) repo <$> pipeNullSplitStrict (lsTreeParams recursive long t ps) repo
lsTreeParams :: LsTreeMode -> Ref -> [CommandParam] -> [CommandParam] lsTreeParams :: LsTreeRecursive -> LsTreeLong -> Ref -> [CommandParam] -> [CommandParam]
lsTreeParams lsmode r ps = lsTreeParams recursive long r ps =
[ Param "ls-tree" [ Param "ls-tree"
, Param "--full-tree" , Param "--full-tree"
, Param "-z" , Param "-z"
] ++ recursiveparams ++ ps ++ ] ++ recursiveparams ++ longparams ++ ps ++
[ Param "--" [ Param "--"
, File $ fromRef r , File $ fromRef r
] ]
where where
recursiveparams = case lsmode of recursiveparams = case recursive of
LsTreeRecursive -> [ Param "-r" ] LsTreeRecursive -> [ Param "-r" ]
LsTreeNonRecursive -> [] LsTreeNonRecursive -> []
longparams = case long of
LsTreeLong True -> [ Param "--long" ]
LsTreeLong False -> []
{- Lists specified files in a tree. -} {- Lists specified files in a tree. -}
lsTreeFiles :: Ref -> [FilePath] -> Repo -> IO [TreeItem] lsTreeFiles :: LsTreeLong -> Ref -> [FilePath] -> Repo -> IO [TreeItem]
lsTreeFiles t fs repo = rights . map (parseLsTree . L.fromStrict) lsTreeFiles long t fs repo = rights . map (parseLsTree long . L.fromStrict)
<$> pipeNullSplitStrict ps repo <$> pipeNullSplitStrict ps repo
where where
ps = ps =
@ -86,41 +98,54 @@ lsTreeFiles t fs repo = rights . map (parseLsTree . L.fromStrict)
, File $ fromRef t , File $ fromRef t
] ++ map File fs ] ++ map File fs
parseLsTree :: L.ByteString -> Either String TreeItem parseLsTree :: LsTreeLong -> L.ByteString -> Either String TreeItem
parseLsTree b = case A.parse parserLsTree b of parseLsTree long b = case A.parse (parserLsTree long) b of
A.Done _ r -> Right r A.Done _ r -> Right r
A.Fail _ _ err -> Left err A.Fail _ _ err -> Left err
parseLsTreeStrict :: S.ByteString -> Either String TreeItem parseLsTreeStrict :: LsTreeLong -> S.ByteString -> Either String TreeItem
parseLsTreeStrict b = go (AS.parse parserLsTree b) parseLsTreeStrict long b = go (AS.parse (parserLsTree long) b)
where where
go (AS.Done _ r) = Right r go (AS.Done _ r) = Right r
go (AS.Fail _ _ err) = Left err go (AS.Fail _ _ err) = Left err
go (AS.Partial c) = go (c mempty) go (AS.Partial c) = go (c mempty)
{- Parses a line of ls-tree output, in format: {- Parses a line of ls-tree output, in format:
- mode SP type SP sha TAB file - mode SP type SP sha TAB file
- Or long format:
- mode SP type SP sha SPACES size TAB file
- -
- The TAB can also be a space. Git does not use that, but an earlier - The TAB can also be a space. Git does not use that, but an earlier
- version of formatLsTree did, and this keeps parsing what it output - version of formatLsTree did, and this keeps parsing what it output
- working. - working.
- -}
- (The --long format is not currently supported.) -} parserLsTree :: LsTreeLong -> A.Parser TreeItem
parserLsTree :: A.Parser TreeItem parserLsTree long = case long of
parserLsTree = TreeItem LsTreeLong False ->
-- mode startparser <*> pure Nothing <* filesep <*> fileparser
<$> octal LsTreeLong True ->
<* A8.char ' ' startparser <* sizesep <*> sizeparser <* filesep <*> fileparser
-- type where
<*> A8.takeTill (== ' ') startparser = TreeItem
<* A8.char ' ' -- mode
-- sha <$> octal
<*> (Ref <$> A8.takeTill A8.isSpace) <* A8.char ' '
<* A8.space -- type
-- file <*> A8.takeTill (== ' ')
<*> (asTopFilePath . Git.Filename.decode <$> A.takeByteString) <* A8.char ' '
-- sha
<*> (Ref <$> A8.takeTill A8.isSpace)
{- Inverse of parseLsTree -} fileparser = asTopFilePath . Git.Filename.decode <$> A.takeByteString
sizeparser = fmap Just A8.decimal
filesep = A8.space
sizesep = A.many1 A8.space
{- Inverse of parseLsTree. Note that the long output format is not
- generated, so any size information is not included. -}
formatLsTree :: TreeItem -> String formatLsTree :: TreeItem -> String
formatLsTree ti = unwords formatLsTree ti = unwords
[ showOct (mode ti) "" [ showOct (mode ti) ""

View file

@ -351,8 +351,9 @@ verifyTree :: MissingObjects -> Sha -> Repo -> IO Bool
verifyTree missing treesha r verifyTree missing treesha r
| S.member treesha missing = return False | S.member treesha missing = return False
| otherwise = do | otherwise = do
(ls, cleanup) <- pipeNullSplit (LsTree.lsTreeParams LsTree.LsTreeRecursive treesha []) r let nolong = LsTree.LsTreeLong False
let objshas = mapMaybe (LsTree.sha <$$> eitherToMaybe . LsTree.parseLsTree) ls (ls, cleanup) <- pipeNullSplit (LsTree.lsTreeParams LsTree.LsTreeRecursive nolong treesha []) r
let objshas = mapMaybe (LsTree.sha <$$> eitherToMaybe . LsTree.parseLsTree nolong) ls
if any (`S.member` missing) objshas if any (`S.member` missing) objshas
then do then do
void cleanup void cleanup

View file

@ -55,16 +55,17 @@ data TreeContent
deriving (Show, Eq, Ord) deriving (Show, Eq, Ord)
{- Gets the Tree for a Ref. -} {- Gets the Tree for a Ref. -}
getTree :: LsTree.LsTreeMode -> Ref -> Repo -> IO Tree getTree :: LsTree.LsTreeRecursive -> Ref -> Repo -> IO Tree
getTree lstreemode r repo = do getTree recursive r repo = do
(l, cleanup) <- lsTreeWithObjects lstreemode r repo (l, cleanup) <- lsTreeWithObjects recursive r repo
let !t = either (\e -> error ("ls-tree parse error:" ++ e)) id let !t = either (\e -> error ("ls-tree parse error:" ++ e)) id
(extractTree l) (extractTree l)
void cleanup void cleanup
return t return t
lsTreeWithObjects :: LsTree.LsTreeMode -> Ref -> Repo -> IO ([LsTree.TreeItem], IO Bool) lsTreeWithObjects :: LsTree.LsTreeRecursive -> Ref -> Repo -> IO ([LsTree.TreeItem], IO Bool)
lsTreeWithObjects = LsTree.lsTree' [Param "-t"] lsTreeWithObjects recursive =
LsTree.lsTree' [Param "-t"] recursive (LsTree.LsTreeLong False)
newtype MkTreeHandle = MkTreeHandle CoProcess.CoProcessHandle newtype MkTreeHandle = MkTreeHandle CoProcess.CoProcessHandle
@ -140,6 +141,7 @@ treeItemToLsTreeItem (TreeItem f mode sha) = LsTree.TreeItem
Just TreeSubtree -> TreeObject Just TreeSubtree -> TreeObject
_ -> BlobObject _ -> BlobObject
, LsTree.sha = sha , LsTree.sha = sha
, LsTree.size = Nothing
, LsTree.file = f , LsTree.file = f
} }

View file

@ -204,5 +204,5 @@ getExportExcluded u = do
exportExcludedParser :: L.ByteString -> [Git.Tree.TreeItem] exportExcludedParser :: L.ByteString -> [Git.Tree.TreeItem]
exportExcludedParser = map Git.Tree.lsTreeItemToTreeItem exportExcludedParser = map Git.Tree.lsTreeItemToTreeItem
. rights . rights
. map Git.LsTree.parseLsTree . map (Git.LsTree.parseLsTree (Git.LsTree.LsTreeLong False))
. L.split (fromIntegral $ ord '\n') . L.split (fromIntegral $ ord '\n')

View file

@ -97,6 +97,7 @@ withKnownUrls a = do
Annex.Branch.commit =<< Annex.Branch.commitMessage Annex.Branch.commit =<< Annex.Branch.commitMessage
(l, cleanup) <- inRepo $ Git.LsTree.lsTree (l, cleanup) <- inRepo $ Git.LsTree.lsTree
Git.LsTree.LsTreeRecursive Git.LsTree.LsTreeRecursive
(Git.LsTree.LsTreeLong False)
Annex.Branch.fullname Annex.Branch.fullname
g <- Annex.gitRepo g <- Annex.gitRepo
let want = urlLogFileKey . getTopFilePath . Git.LsTree.file let want = urlLogFileKey . getTopFilePath . Git.LsTree.file

View file

@ -243,7 +243,7 @@ getImported :: UUID -> Annex (M.Map BorgArchiveName (Annex [(ImportLocation, (Co
getImported u = M.unions <$> (mapM go . exportedTreeishes =<< getExport u) getImported u = M.unions <$> (mapM go . exportedTreeishes =<< getExport u)
where where
go t = M.fromList . mapMaybe mk go t = M.fromList . mapMaybe mk
<$> inRepo (LsTree.lsTreeStrict LsTree.LsTreeNonRecursive t) <$> inRepo (LsTree.lsTreeStrict LsTree.LsTreeNonRecursive (LsTree.LsTreeLong False) t)
mk ti mk ti
| toTreeItemType (LsTree.mode ti) == Just TreeSubtree = Just | toTreeItemType (LsTree.mode ti) == Just TreeSubtree = Just
@ -255,7 +255,7 @@ getImported u = M.unions <$> (mapM go . exportedTreeishes =<< getExport u)
| otherwise = Nothing | otherwise = Nothing
getcontents archivename t = mapMaybe (mkcontents archivename) getcontents archivename t = mapMaybe (mkcontents archivename)
<$> inRepo (LsTree.lsTreeStrict LsTree.LsTreeRecursive t) <$> inRepo (LsTree.lsTreeStrict LsTree.LsTreeRecursive (LsTree.LsTreeLong False) t)
mkcontents archivename ti = do mkcontents archivename ti = do
let f = ThirdPartyPopulated.fromThirdPartyImportLocation $ let f = ThirdPartyPopulated.fromThirdPartyImportLocation $

View file

@ -1495,7 +1495,7 @@ test_conflict_resolution_symlink_bit = unlessM (hasUnlockedFiles <$> getTestMode
conflictor = "conflictor" conflictor = "conflictor"
check_is_link f what = do check_is_link f what = do
git_annex_expectoutput "find" ["--include=*", f] [fromRawFilePath (Git.FilePath.toInternalGitPath (toRawFilePath f))] git_annex_expectoutput "find" ["--include=*", f] [fromRawFilePath (Git.FilePath.toInternalGitPath (toRawFilePath f))]
l <- annexeval $ Annex.inRepo $ Git.LsTree.lsTreeFiles Git.Ref.headRef [f] l <- annexeval $ Annex.inRepo $ Git.LsTree.lsTreeFiles (Git.LsTree.LsTreeLong False) Git.Ref.headRef [f]
all (\i -> Git.Types.toTreeItemType (Git.LsTree.mode i) == Just Git.Types.TreeSymlink) l all (\i -> Git.Types.toTreeItemType (Git.LsTree.mode i) == Just Git.Types.TreeSymlink) l
@? (what ++ " " ++ f ++ " lost symlink bit after merge: " ++ show l) @? (what ++ " " ++ f ++ " lost symlink bit after merge: " ++ show l)