add git ls-tree --long parser

Not yet used, but allows getting the size of items in the tree fairly
cheaply.

I noticed that CmdLine.Seek uses ls-tree and the feeds the files into
another long-running process to check their size. That would be an
example of a place that might be sped up by using this. Although in that
particular case, it only needs to know the size of unlocked files, not
locked. And since enabling --long probably doubles the ls-tree runtime
or more, the overhead of using it there may outwweigh the benefit.
This commit is contained in:
Joey Hess 2021-03-23 12:44:29 -04:00
parent d89a9b0f78
commit a8b837aaef
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
13 changed files with 97 additions and 56 deletions

View file

@ -414,7 +414,9 @@ branchFiles = withIndex $ inRepo branchFiles'
branchFiles' :: Git.Repo -> IO ([RawFilePath], IO Bool)
branchFiles' = Git.Command.pipeNullSplit' $
lsTreeParams Git.LsTree.LsTreeRecursive fullname [Param "--name-only"]
lsTreeParams Git.LsTree.LsTreeRecursive (Git.LsTree.LsTreeLong False)
fullname
[Param "--name-only"]
{- Populates the branch's index file with the current branch contents.
-

View file

@ -82,7 +82,10 @@ scanUnlockedFiles = whenM (inRepo Git.Ref.headExists <&&> not <$> isBareRepo) $
dropold <- liftIO $ newMVar $
Database.Keys.runWriter $
liftIO . Database.Keys.SQL.dropAllAssociatedFiles
(l, cleanup) <- inRepo $ Git.LsTree.lsTree Git.LsTree.LsTreeRecursive Git.Ref.headRef
(l, cleanup) <- inRepo $ Git.LsTree.lsTree
Git.LsTree.LsTreeRecursive
(Git.LsTree.LsTreeLong False)
Git.Ref.headRef
forM_ l $ \i ->
when (isregfile i) $
maybe noop (add dropold i)

View file

@ -89,7 +89,7 @@ reloadConfigs changedconfigs = do
getConfigs :: Assistant Configs
getConfigs = S.fromList . map extract
<$> liftAnnex (inRepo $ LsTree.lsTreeFiles Annex.Branch.fullname files)
<$> liftAnnex (inRepo $ LsTree.lsTreeFiles (LsTree.LsTreeLong False) Annex.Branch.fullname files)
where
files = map (fromRawFilePath . fst) configFilesActions
extract treeitem = (getTopFilePath $ LsTree.file treeitem, LsTree.sha treeitem)

View file

@ -279,6 +279,7 @@ withKeyOptions' ko auto mkkeyaction fallbackaction worktreeitems = do
void Annex.Branch.update
(l, cleanup) <- inRepo $ LsTree.lsTree
LsTree.LsTreeRecursive
(LsTree.LsTreeLong False)
Annex.Branch.fullname
let getk f = fmap (,f) (locationLogFileKey config f)
let discard reader = reader >>= \case
@ -301,7 +302,7 @@ withKeyOptions' ko auto mkkeyaction fallbackaction worktreeitems = do
runbranchkeys bs = do
keyaction <- mkkeyaction
forM_ bs $ \b -> do
(l, cleanup) <- inRepo $ LsTree.lsTree LsTree.LsTreeRecursive b
(l, cleanup) <- inRepo $ LsTree.lsTree LsTree.LsTreeRecursive (LsTree.LsTreeLong False) b
forM_ l $ \i -> catKey (LsTree.sha i) >>= \case
Just k ->
let bfp = mkActionItem (BranchFilePath b (LsTree.file i), k)

View file

@ -238,7 +238,10 @@ newtype AllFilled = AllFilled { fromAllFilled :: Bool }
-- remote tracking branch.
fillExport :: Remote -> ExportHandle -> ExportFiltered Git.Ref -> Maybe (RemoteTrackingBranch, Sha) -> Annex Bool
fillExport r db (ExportFiltered newtree) mtbcommitsha = do
(l, cleanup) <- inRepo $ Git.LsTree.lsTree Git.LsTree.LsTreeRecursive newtree
(l, cleanup) <- inRepo $ Git.LsTree.lsTree
Git.LsTree.LsTreeRecursive
(Git.LsTree.LsTreeLong False)
newtree
cvar <- liftIO $ newMVar (FileUploaded False)
allfilledvar <- liftIO $ newMVar (AllFilled True)
commandActions $

View file

@ -589,7 +589,10 @@ getDirStatInfo o dir = do
getTreeStatInfo :: InfoOptions -> Git.Ref -> Annex (Maybe StatInfo)
getTreeStatInfo o r = do
fast <- Annex.getState Annex.fast
(ls, cleanup) <- inRepo $ LsTree.lsTree LsTree.LsTreeRecursive r
(ls, cleanup) <- inRepo $ LsTree.lsTree
LsTree.LsTreeRecursive
(LsTree.LsTreeLong False)
r
(presentdata, referenceddata, repodata) <- go fast ls initial
ifM (liftIO cleanup)
( return $ Just $

View file

@ -1,13 +1,14 @@
{- git ls-tree interface
-
- Copyright 2011-2020 Joey Hess <id@joeyh.name>
- Copyright 2011-2021 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Git.LsTree (
TreeItem(..),
LsTreeMode(..),
LsTreeRecursive(..),
LsTreeLong(..),
lsTree,
lsTree',
lsTreeStrict,
@ -38,44 +39,55 @@ data TreeItem = TreeItem
{ mode :: FileMode
, typeobj :: S.ByteString
, sha :: Ref
, size :: Maybe FileSize
, file :: TopFilePath
-- ^ only available when long is used
} deriving (Show)
data LsTreeMode = LsTreeRecursive | LsTreeNonRecursive
data LsTreeRecursive = LsTreeRecursive | LsTreeNonRecursive
{- Enabling --long also gets the size of tree items.
- This slows down ls-tree some, since it has to look up the size of each
- blob.
-}
data LsTreeLong = LsTreeLong Bool
{- Lists the contents of a tree, with lazy output. -}
lsTree :: LsTreeMode -> Ref -> Repo -> IO ([TreeItem], IO Bool)
lsTree :: LsTreeRecursive -> LsTreeLong -> Ref -> Repo -> IO ([TreeItem], IO Bool)
lsTree = lsTree' []
lsTree' :: [CommandParam] -> LsTreeMode -> Ref -> Repo -> IO ([TreeItem], IO Bool)
lsTree' ps lsmode t repo = do
(l, cleanup) <- pipeNullSplit (lsTreeParams lsmode t ps) repo
return (rights (map parseLsTree l), cleanup)
lsTree' :: [CommandParam] -> LsTreeRecursive -> LsTreeLong -> Ref -> Repo -> IO ([TreeItem], IO Bool)
lsTree' ps recursive long t repo = do
(l, cleanup) <- pipeNullSplit (lsTreeParams recursive long t ps) repo
return (rights (map (parseLsTree long) l), cleanup)
lsTreeStrict :: LsTreeMode -> Ref -> Repo -> IO [TreeItem]
lsTreeStrict :: LsTreeRecursive -> LsTreeLong -> Ref -> Repo -> IO [TreeItem]
lsTreeStrict = lsTreeStrict' []
lsTreeStrict' :: [CommandParam] -> LsTreeMode -> Ref -> Repo -> IO [TreeItem]
lsTreeStrict' ps lsmode t repo = rights . map parseLsTreeStrict
<$> pipeNullSplitStrict (lsTreeParams lsmode t ps) repo
lsTreeStrict' :: [CommandParam] -> LsTreeRecursive -> LsTreeLong -> Ref -> Repo -> IO [TreeItem]
lsTreeStrict' ps recursive long t repo = rights . map (parseLsTreeStrict long)
<$> pipeNullSplitStrict (lsTreeParams recursive long t ps) repo
lsTreeParams :: LsTreeMode -> Ref -> [CommandParam] -> [CommandParam]
lsTreeParams lsmode r ps =
lsTreeParams :: LsTreeRecursive -> LsTreeLong -> Ref -> [CommandParam] -> [CommandParam]
lsTreeParams recursive long r ps =
[ Param "ls-tree"
, Param "--full-tree"
, Param "-z"
] ++ recursiveparams ++ ps ++
] ++ recursiveparams ++ longparams ++ ps ++
[ Param "--"
, File $ fromRef r
]
where
recursiveparams = case lsmode of
recursiveparams = case recursive of
LsTreeRecursive -> [ Param "-r" ]
LsTreeNonRecursive -> []
longparams = case long of
LsTreeLong True -> [ Param "--long" ]
LsTreeLong False -> []
{- Lists specified files in a tree. -}
lsTreeFiles :: Ref -> [FilePath] -> Repo -> IO [TreeItem]
lsTreeFiles t fs repo = rights . map (parseLsTree . L.fromStrict)
lsTreeFiles :: LsTreeLong -> Ref -> [FilePath] -> Repo -> IO [TreeItem]
lsTreeFiles long t fs repo = rights . map (parseLsTree long . L.fromStrict)
<$> pipeNullSplitStrict ps repo
where
ps =
@ -86,41 +98,54 @@ lsTreeFiles t fs repo = rights . map (parseLsTree . L.fromStrict)
, File $ fromRef t
] ++ map File fs
parseLsTree :: L.ByteString -> Either String TreeItem
parseLsTree b = case A.parse parserLsTree b of
parseLsTree :: LsTreeLong -> L.ByteString -> Either String TreeItem
parseLsTree long b = case A.parse (parserLsTree long) b of
A.Done _ r -> Right r
A.Fail _ _ err -> Left err
parseLsTreeStrict :: S.ByteString -> Either String TreeItem
parseLsTreeStrict b = go (AS.parse parserLsTree b)
parseLsTreeStrict :: LsTreeLong -> S.ByteString -> Either String TreeItem
parseLsTreeStrict long b = go (AS.parse (parserLsTree long) b)
where
go (AS.Done _ r) = Right r
go (AS.Fail _ _ err) = Left err
go (AS.Partial c) = go (c mempty)
{- Parses a line of ls-tree output, in format:
- mode SP type SP sha TAB file
- mode SP type SP sha TAB file
- Or long format:
- mode SP type SP sha SPACES size TAB file
-
- The TAB can also be a space. Git does not use that, but an earlier
- version of formatLsTree did, and this keeps parsing what it output
- working.
-
- (The --long format is not currently supported.) -}
parserLsTree :: A.Parser TreeItem
parserLsTree = TreeItem
-- mode
<$> octal
<* A8.char ' '
-- type
<*> A8.takeTill (== ' ')
<* A8.char ' '
-- sha
<*> (Ref <$> A8.takeTill A8.isSpace)
<* A8.space
-- file
<*> (asTopFilePath . Git.Filename.decode <$> A.takeByteString)
-}
parserLsTree :: LsTreeLong -> A.Parser TreeItem
parserLsTree long = case long of
LsTreeLong False ->
startparser <*> pure Nothing <* filesep <*> fileparser
LsTreeLong True ->
startparser <* sizesep <*> sizeparser <* filesep <*> fileparser
where
startparser = TreeItem
-- mode
<$> octal
<* A8.char ' '
-- type
<*> A8.takeTill (== ' ')
<* A8.char ' '
-- sha
<*> (Ref <$> A8.takeTill A8.isSpace)
{- Inverse of parseLsTree -}
fileparser = asTopFilePath . Git.Filename.decode <$> A.takeByteString
sizeparser = fmap Just A8.decimal
filesep = A8.space
sizesep = A.many1 A8.space
{- Inverse of parseLsTree. Note that the long output format is not
- generated, so any size information is not included. -}
formatLsTree :: TreeItem -> String
formatLsTree ti = unwords
[ showOct (mode ti) ""

View file

@ -351,8 +351,9 @@ verifyTree :: MissingObjects -> Sha -> Repo -> IO Bool
verifyTree missing treesha r
| S.member treesha missing = return False
| otherwise = do
(ls, cleanup) <- pipeNullSplit (LsTree.lsTreeParams LsTree.LsTreeRecursive treesha []) r
let objshas = mapMaybe (LsTree.sha <$$> eitherToMaybe . LsTree.parseLsTree) ls
let nolong = LsTree.LsTreeLong False
(ls, cleanup) <- pipeNullSplit (LsTree.lsTreeParams LsTree.LsTreeRecursive nolong treesha []) r
let objshas = mapMaybe (LsTree.sha <$$> eitherToMaybe . LsTree.parseLsTree nolong) ls
if any (`S.member` missing) objshas
then do
void cleanup

View file

@ -55,16 +55,17 @@ data TreeContent
deriving (Show, Eq, Ord)
{- Gets the Tree for a Ref. -}
getTree :: LsTree.LsTreeMode -> Ref -> Repo -> IO Tree
getTree lstreemode r repo = do
(l, cleanup) <- lsTreeWithObjects lstreemode r repo
getTree :: LsTree.LsTreeRecursive -> Ref -> Repo -> IO Tree
getTree recursive r repo = do
(l, cleanup) <- lsTreeWithObjects recursive r repo
let !t = either (\e -> error ("ls-tree parse error:" ++ e)) id
(extractTree l)
void cleanup
return t
lsTreeWithObjects :: LsTree.LsTreeMode -> Ref -> Repo -> IO ([LsTree.TreeItem], IO Bool)
lsTreeWithObjects = LsTree.lsTree' [Param "-t"]
lsTreeWithObjects :: LsTree.LsTreeRecursive -> Ref -> Repo -> IO ([LsTree.TreeItem], IO Bool)
lsTreeWithObjects recursive =
LsTree.lsTree' [Param "-t"] recursive (LsTree.LsTreeLong False)
newtype MkTreeHandle = MkTreeHandle CoProcess.CoProcessHandle
@ -140,6 +141,7 @@ treeItemToLsTreeItem (TreeItem f mode sha) = LsTree.TreeItem
Just TreeSubtree -> TreeObject
_ -> BlobObject
, LsTree.sha = sha
, LsTree.size = Nothing
, LsTree.file = f
}

View file

@ -204,5 +204,5 @@ getExportExcluded u = do
exportExcludedParser :: L.ByteString -> [Git.Tree.TreeItem]
exportExcludedParser = map Git.Tree.lsTreeItemToTreeItem
. rights
. map Git.LsTree.parseLsTree
. map (Git.LsTree.parseLsTree (Git.LsTree.LsTreeLong False))
. L.split (fromIntegral $ ord '\n')

View file

@ -97,6 +97,7 @@ withKnownUrls a = do
Annex.Branch.commit =<< Annex.Branch.commitMessage
(l, cleanup) <- inRepo $ Git.LsTree.lsTree
Git.LsTree.LsTreeRecursive
(Git.LsTree.LsTreeLong False)
Annex.Branch.fullname
g <- Annex.gitRepo
let want = urlLogFileKey . getTopFilePath . Git.LsTree.file

View file

@ -243,7 +243,7 @@ getImported :: UUID -> Annex (M.Map BorgArchiveName (Annex [(ImportLocation, (Co
getImported u = M.unions <$> (mapM go . exportedTreeishes =<< getExport u)
where
go t = M.fromList . mapMaybe mk
<$> inRepo (LsTree.lsTreeStrict LsTree.LsTreeNonRecursive t)
<$> inRepo (LsTree.lsTreeStrict LsTree.LsTreeNonRecursive (LsTree.LsTreeLong False) t)
mk ti
| toTreeItemType (LsTree.mode ti) == Just TreeSubtree = Just
@ -255,7 +255,7 @@ getImported u = M.unions <$> (mapM go . exportedTreeishes =<< getExport u)
| otherwise = Nothing
getcontents archivename t = mapMaybe (mkcontents archivename)
<$> inRepo (LsTree.lsTreeStrict LsTree.LsTreeRecursive t)
<$> inRepo (LsTree.lsTreeStrict LsTree.LsTreeRecursive (LsTree.LsTreeLong False) t)
mkcontents archivename ti = do
let f = ThirdPartyPopulated.fromThirdPartyImportLocation $

View file

@ -1495,7 +1495,7 @@ test_conflict_resolution_symlink_bit = unlessM (hasUnlockedFiles <$> getTestMode
conflictor = "conflictor"
check_is_link f what = do
git_annex_expectoutput "find" ["--include=*", f] [fromRawFilePath (Git.FilePath.toInternalGitPath (toRawFilePath f))]
l <- annexeval $ Annex.inRepo $ Git.LsTree.lsTreeFiles Git.Ref.headRef [f]
l <- annexeval $ Annex.inRepo $ Git.LsTree.lsTreeFiles (Git.LsTree.LsTreeLong False) Git.Ref.headRef [f]
all (\i -> Git.Types.toTreeItemType (Git.LsTree.mode i) == Just Git.Types.TreeSymlink) l
@? (what ++ " " ++ f ++ " lost symlink bit after merge: " ++ show l)