From a8b837aaefbde3b8cc0625439d352d28aa3ffdd7 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 23 Mar 2021 12:44:29 -0400 Subject: [PATCH] add git ls-tree --long parser Not yet used, but allows getting the size of items in the tree fairly cheaply. I noticed that CmdLine.Seek uses ls-tree and the feeds the files into another long-running process to check their size. That would be an example of a place that might be sped up by using this. Although in that particular case, it only needs to know the size of unlocked files, not locked. And since enabling --long probably doubles the ls-tree runtime or more, the overhead of using it there may outwweigh the benefit. --- Annex/Branch.hs | 4 +- Annex/WorkTree.hs | 5 +- Assistant/Threads/ConfigMonitor.hs | 2 +- CmdLine/Seek.hs | 3 +- Command/Export.hs | 5 +- Command/Info.hs | 5 +- Git/LsTree.hs | 103 ++++++++++++++++++----------- Git/Repair.hs | 5 +- Git/Tree.hs | 12 ++-- Logs/Export.hs | 2 +- Logs/Web.hs | 1 + Remote/Borg.hs | 4 +- Test.hs | 2 +- 13 files changed, 97 insertions(+), 56 deletions(-) diff --git a/Annex/Branch.hs b/Annex/Branch.hs index 6b497377a2..165e1f79e5 100644 --- a/Annex/Branch.hs +++ b/Annex/Branch.hs @@ -414,7 +414,9 @@ branchFiles = withIndex $ inRepo branchFiles' branchFiles' :: Git.Repo -> IO ([RawFilePath], IO Bool) branchFiles' = Git.Command.pipeNullSplit' $ - lsTreeParams Git.LsTree.LsTreeRecursive fullname [Param "--name-only"] + lsTreeParams Git.LsTree.LsTreeRecursive (Git.LsTree.LsTreeLong False) + fullname + [Param "--name-only"] {- Populates the branch's index file with the current branch contents. - diff --git a/Annex/WorkTree.hs b/Annex/WorkTree.hs index 7ef88b3ef8..42abde34aa 100644 --- a/Annex/WorkTree.hs +++ b/Annex/WorkTree.hs @@ -82,7 +82,10 @@ scanUnlockedFiles = whenM (inRepo Git.Ref.headExists <&&> not <$> isBareRepo) $ dropold <- liftIO $ newMVar $ Database.Keys.runWriter $ liftIO . Database.Keys.SQL.dropAllAssociatedFiles - (l, cleanup) <- inRepo $ Git.LsTree.lsTree Git.LsTree.LsTreeRecursive Git.Ref.headRef + (l, cleanup) <- inRepo $ Git.LsTree.lsTree + Git.LsTree.LsTreeRecursive + (Git.LsTree.LsTreeLong False) + Git.Ref.headRef forM_ l $ \i -> when (isregfile i) $ maybe noop (add dropold i) diff --git a/Assistant/Threads/ConfigMonitor.hs b/Assistant/Threads/ConfigMonitor.hs index 1e6372c5fc..9f1e03f8d1 100644 --- a/Assistant/Threads/ConfigMonitor.hs +++ b/Assistant/Threads/ConfigMonitor.hs @@ -89,7 +89,7 @@ reloadConfigs changedconfigs = do getConfigs :: Assistant Configs getConfigs = S.fromList . map extract - <$> liftAnnex (inRepo $ LsTree.lsTreeFiles Annex.Branch.fullname files) + <$> liftAnnex (inRepo $ LsTree.lsTreeFiles (LsTree.LsTreeLong False) Annex.Branch.fullname files) where files = map (fromRawFilePath . fst) configFilesActions extract treeitem = (getTopFilePath $ LsTree.file treeitem, LsTree.sha treeitem) diff --git a/CmdLine/Seek.hs b/CmdLine/Seek.hs index 1681e522d3..65156772e3 100644 --- a/CmdLine/Seek.hs +++ b/CmdLine/Seek.hs @@ -279,6 +279,7 @@ withKeyOptions' ko auto mkkeyaction fallbackaction worktreeitems = do void Annex.Branch.update (l, cleanup) <- inRepo $ LsTree.lsTree LsTree.LsTreeRecursive + (LsTree.LsTreeLong False) Annex.Branch.fullname let getk f = fmap (,f) (locationLogFileKey config f) let discard reader = reader >>= \case @@ -301,7 +302,7 @@ withKeyOptions' ko auto mkkeyaction fallbackaction worktreeitems = do runbranchkeys bs = do keyaction <- mkkeyaction forM_ bs $ \b -> do - (l, cleanup) <- inRepo $ LsTree.lsTree LsTree.LsTreeRecursive b + (l, cleanup) <- inRepo $ LsTree.lsTree LsTree.LsTreeRecursive (LsTree.LsTreeLong False) b forM_ l $ \i -> catKey (LsTree.sha i) >>= \case Just k -> let bfp = mkActionItem (BranchFilePath b (LsTree.file i), k) diff --git a/Command/Export.hs b/Command/Export.hs index 6187d8dc51..b94f12c3d2 100644 --- a/Command/Export.hs +++ b/Command/Export.hs @@ -238,7 +238,10 @@ newtype AllFilled = AllFilled { fromAllFilled :: Bool } -- remote tracking branch. fillExport :: Remote -> ExportHandle -> ExportFiltered Git.Ref -> Maybe (RemoteTrackingBranch, Sha) -> Annex Bool fillExport r db (ExportFiltered newtree) mtbcommitsha = do - (l, cleanup) <- inRepo $ Git.LsTree.lsTree Git.LsTree.LsTreeRecursive newtree + (l, cleanup) <- inRepo $ Git.LsTree.lsTree + Git.LsTree.LsTreeRecursive + (Git.LsTree.LsTreeLong False) + newtree cvar <- liftIO $ newMVar (FileUploaded False) allfilledvar <- liftIO $ newMVar (AllFilled True) commandActions $ diff --git a/Command/Info.hs b/Command/Info.hs index f90230cac0..36c7a88cb9 100644 --- a/Command/Info.hs +++ b/Command/Info.hs @@ -589,7 +589,10 @@ getDirStatInfo o dir = do getTreeStatInfo :: InfoOptions -> Git.Ref -> Annex (Maybe StatInfo) getTreeStatInfo o r = do fast <- Annex.getState Annex.fast - (ls, cleanup) <- inRepo $ LsTree.lsTree LsTree.LsTreeRecursive r + (ls, cleanup) <- inRepo $ LsTree.lsTree + LsTree.LsTreeRecursive + (LsTree.LsTreeLong False) + r (presentdata, referenceddata, repodata) <- go fast ls initial ifM (liftIO cleanup) ( return $ Just $ diff --git a/Git/LsTree.hs b/Git/LsTree.hs index 9fb0562124..2c57521c45 100644 --- a/Git/LsTree.hs +++ b/Git/LsTree.hs @@ -1,13 +1,14 @@ {- git ls-tree interface - - - Copyright 2011-2020 Joey Hess + - Copyright 2011-2021 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} module Git.LsTree ( TreeItem(..), - LsTreeMode(..), + LsTreeRecursive(..), + LsTreeLong(..), lsTree, lsTree', lsTreeStrict, @@ -38,44 +39,55 @@ data TreeItem = TreeItem { mode :: FileMode , typeobj :: S.ByteString , sha :: Ref + , size :: Maybe FileSize , file :: TopFilePath + -- ^ only available when long is used } deriving (Show) -data LsTreeMode = LsTreeRecursive | LsTreeNonRecursive +data LsTreeRecursive = LsTreeRecursive | LsTreeNonRecursive + +{- Enabling --long also gets the size of tree items. + - This slows down ls-tree some, since it has to look up the size of each + - blob. + -} +data LsTreeLong = LsTreeLong Bool {- Lists the contents of a tree, with lazy output. -} -lsTree :: LsTreeMode -> Ref -> Repo -> IO ([TreeItem], IO Bool) +lsTree :: LsTreeRecursive -> LsTreeLong -> Ref -> Repo -> IO ([TreeItem], IO Bool) lsTree = lsTree' [] -lsTree' :: [CommandParam] -> LsTreeMode -> Ref -> Repo -> IO ([TreeItem], IO Bool) -lsTree' ps lsmode t repo = do - (l, cleanup) <- pipeNullSplit (lsTreeParams lsmode t ps) repo - return (rights (map parseLsTree l), cleanup) +lsTree' :: [CommandParam] -> LsTreeRecursive -> LsTreeLong -> Ref -> Repo -> IO ([TreeItem], IO Bool) +lsTree' ps recursive long t repo = do + (l, cleanup) <- pipeNullSplit (lsTreeParams recursive long t ps) repo + return (rights (map (parseLsTree long) l), cleanup) -lsTreeStrict :: LsTreeMode -> Ref -> Repo -> IO [TreeItem] +lsTreeStrict :: LsTreeRecursive -> LsTreeLong -> Ref -> Repo -> IO [TreeItem] lsTreeStrict = lsTreeStrict' [] -lsTreeStrict' :: [CommandParam] -> LsTreeMode -> Ref -> Repo -> IO [TreeItem] -lsTreeStrict' ps lsmode t repo = rights . map parseLsTreeStrict - <$> pipeNullSplitStrict (lsTreeParams lsmode t ps) repo +lsTreeStrict' :: [CommandParam] -> LsTreeRecursive -> LsTreeLong -> Ref -> Repo -> IO [TreeItem] +lsTreeStrict' ps recursive long t repo = rights . map (parseLsTreeStrict long) + <$> pipeNullSplitStrict (lsTreeParams recursive long t ps) repo -lsTreeParams :: LsTreeMode -> Ref -> [CommandParam] -> [CommandParam] -lsTreeParams lsmode r ps = +lsTreeParams :: LsTreeRecursive -> LsTreeLong -> Ref -> [CommandParam] -> [CommandParam] +lsTreeParams recursive long r ps = [ Param "ls-tree" , Param "--full-tree" , Param "-z" - ] ++ recursiveparams ++ ps ++ + ] ++ recursiveparams ++ longparams ++ ps ++ [ Param "--" , File $ fromRef r ] where - recursiveparams = case lsmode of + recursiveparams = case recursive of LsTreeRecursive -> [ Param "-r" ] LsTreeNonRecursive -> [] + longparams = case long of + LsTreeLong True -> [ Param "--long" ] + LsTreeLong False -> [] {- Lists specified files in a tree. -} -lsTreeFiles :: Ref -> [FilePath] -> Repo -> IO [TreeItem] -lsTreeFiles t fs repo = rights . map (parseLsTree . L.fromStrict) +lsTreeFiles :: LsTreeLong -> Ref -> [FilePath] -> Repo -> IO [TreeItem] +lsTreeFiles long t fs repo = rights . map (parseLsTree long . L.fromStrict) <$> pipeNullSplitStrict ps repo where ps = @@ -86,41 +98,54 @@ lsTreeFiles t fs repo = rights . map (parseLsTree . L.fromStrict) , File $ fromRef t ] ++ map File fs -parseLsTree :: L.ByteString -> Either String TreeItem -parseLsTree b = case A.parse parserLsTree b of +parseLsTree :: LsTreeLong -> L.ByteString -> Either String TreeItem +parseLsTree long b = case A.parse (parserLsTree long) b of A.Done _ r -> Right r A.Fail _ _ err -> Left err -parseLsTreeStrict :: S.ByteString -> Either String TreeItem -parseLsTreeStrict b = go (AS.parse parserLsTree b) +parseLsTreeStrict :: LsTreeLong -> S.ByteString -> Either String TreeItem +parseLsTreeStrict long b = go (AS.parse (parserLsTree long) b) where go (AS.Done _ r) = Right r go (AS.Fail _ _ err) = Left err go (AS.Partial c) = go (c mempty) {- Parses a line of ls-tree output, in format: - - mode SP type SP sha TAB file + - mode SP type SP sha TAB file + - Or long format: + - mode SP type SP sha SPACES size TAB file - - The TAB can also be a space. Git does not use that, but an earlier - version of formatLsTree did, and this keeps parsing what it output - working. - - - - (The --long format is not currently supported.) -} -parserLsTree :: A.Parser TreeItem -parserLsTree = TreeItem - -- mode - <$> octal - <* A8.char ' ' - -- type - <*> A8.takeTill (== ' ') - <* A8.char ' ' - -- sha - <*> (Ref <$> A8.takeTill A8.isSpace) - <* A8.space - -- file - <*> (asTopFilePath . Git.Filename.decode <$> A.takeByteString) + -} +parserLsTree :: LsTreeLong -> A.Parser TreeItem +parserLsTree long = case long of + LsTreeLong False -> + startparser <*> pure Nothing <* filesep <*> fileparser + LsTreeLong True -> + startparser <* sizesep <*> sizeparser <* filesep <*> fileparser + where + startparser = TreeItem + -- mode + <$> octal + <* A8.char ' ' + -- type + <*> A8.takeTill (== ' ') + <* A8.char ' ' + -- sha + <*> (Ref <$> A8.takeTill A8.isSpace) -{- Inverse of parseLsTree -} + fileparser = asTopFilePath . Git.Filename.decode <$> A.takeByteString + + sizeparser = fmap Just A8.decimal + + filesep = A8.space + + sizesep = A.many1 A8.space + +{- Inverse of parseLsTree. Note that the long output format is not + - generated, so any size information is not included. -} formatLsTree :: TreeItem -> String formatLsTree ti = unwords [ showOct (mode ti) "" diff --git a/Git/Repair.hs b/Git/Repair.hs index 6d9aeec992..d80ebd8b68 100644 --- a/Git/Repair.hs +++ b/Git/Repair.hs @@ -351,8 +351,9 @@ verifyTree :: MissingObjects -> Sha -> Repo -> IO Bool verifyTree missing treesha r | S.member treesha missing = return False | otherwise = do - (ls, cleanup) <- pipeNullSplit (LsTree.lsTreeParams LsTree.LsTreeRecursive treesha []) r - let objshas = mapMaybe (LsTree.sha <$$> eitherToMaybe . LsTree.parseLsTree) ls + let nolong = LsTree.LsTreeLong False + (ls, cleanup) <- pipeNullSplit (LsTree.lsTreeParams LsTree.LsTreeRecursive nolong treesha []) r + let objshas = mapMaybe (LsTree.sha <$$> eitherToMaybe . LsTree.parseLsTree nolong) ls if any (`S.member` missing) objshas then do void cleanup diff --git a/Git/Tree.hs b/Git/Tree.hs index 80a717a884..325508e46d 100644 --- a/Git/Tree.hs +++ b/Git/Tree.hs @@ -55,16 +55,17 @@ data TreeContent deriving (Show, Eq, Ord) {- Gets the Tree for a Ref. -} -getTree :: LsTree.LsTreeMode -> Ref -> Repo -> IO Tree -getTree lstreemode r repo = do - (l, cleanup) <- lsTreeWithObjects lstreemode r repo +getTree :: LsTree.LsTreeRecursive -> Ref -> Repo -> IO Tree +getTree recursive r repo = do + (l, cleanup) <- lsTreeWithObjects recursive r repo let !t = either (\e -> error ("ls-tree parse error:" ++ e)) id (extractTree l) void cleanup return t -lsTreeWithObjects :: LsTree.LsTreeMode -> Ref -> Repo -> IO ([LsTree.TreeItem], IO Bool) -lsTreeWithObjects = LsTree.lsTree' [Param "-t"] +lsTreeWithObjects :: LsTree.LsTreeRecursive -> Ref -> Repo -> IO ([LsTree.TreeItem], IO Bool) +lsTreeWithObjects recursive = + LsTree.lsTree' [Param "-t"] recursive (LsTree.LsTreeLong False) newtype MkTreeHandle = MkTreeHandle CoProcess.CoProcessHandle @@ -140,6 +141,7 @@ treeItemToLsTreeItem (TreeItem f mode sha) = LsTree.TreeItem Just TreeSubtree -> TreeObject _ -> BlobObject , LsTree.sha = sha + , LsTree.size = Nothing , LsTree.file = f } diff --git a/Logs/Export.hs b/Logs/Export.hs index 71bfe66c8a..752be9e673 100644 --- a/Logs/Export.hs +++ b/Logs/Export.hs @@ -204,5 +204,5 @@ getExportExcluded u = do exportExcludedParser :: L.ByteString -> [Git.Tree.TreeItem] exportExcludedParser = map Git.Tree.lsTreeItemToTreeItem . rights - . map Git.LsTree.parseLsTree + . map (Git.LsTree.parseLsTree (Git.LsTree.LsTreeLong False)) . L.split (fromIntegral $ ord '\n') diff --git a/Logs/Web.hs b/Logs/Web.hs index 88cfbc2695..426b0f6396 100644 --- a/Logs/Web.hs +++ b/Logs/Web.hs @@ -97,6 +97,7 @@ withKnownUrls a = do Annex.Branch.commit =<< Annex.Branch.commitMessage (l, cleanup) <- inRepo $ Git.LsTree.lsTree Git.LsTree.LsTreeRecursive + (Git.LsTree.LsTreeLong False) Annex.Branch.fullname g <- Annex.gitRepo let want = urlLogFileKey . getTopFilePath . Git.LsTree.file diff --git a/Remote/Borg.hs b/Remote/Borg.hs index 4e241eb4ac..9d312a5da7 100644 --- a/Remote/Borg.hs +++ b/Remote/Borg.hs @@ -243,7 +243,7 @@ getImported :: UUID -> Annex (M.Map BorgArchiveName (Annex [(ImportLocation, (Co getImported u = M.unions <$> (mapM go . exportedTreeishes =<< getExport u) where go t = M.fromList . mapMaybe mk - <$> inRepo (LsTree.lsTreeStrict LsTree.LsTreeNonRecursive t) + <$> inRepo (LsTree.lsTreeStrict LsTree.LsTreeNonRecursive (LsTree.LsTreeLong False) t) mk ti | toTreeItemType (LsTree.mode ti) == Just TreeSubtree = Just @@ -255,7 +255,7 @@ getImported u = M.unions <$> (mapM go . exportedTreeishes =<< getExport u) | otherwise = Nothing getcontents archivename t = mapMaybe (mkcontents archivename) - <$> inRepo (LsTree.lsTreeStrict LsTree.LsTreeRecursive t) + <$> inRepo (LsTree.lsTreeStrict LsTree.LsTreeRecursive (LsTree.LsTreeLong False) t) mkcontents archivename ti = do let f = ThirdPartyPopulated.fromThirdPartyImportLocation $ diff --git a/Test.hs b/Test.hs index fa04cc8eb3..cd7c0fb1f5 100644 --- a/Test.hs +++ b/Test.hs @@ -1495,7 +1495,7 @@ test_conflict_resolution_symlink_bit = unlessM (hasUnlockedFiles <$> getTestMode conflictor = "conflictor" check_is_link f what = do git_annex_expectoutput "find" ["--include=*", f] [fromRawFilePath (Git.FilePath.toInternalGitPath (toRawFilePath f))] - l <- annexeval $ Annex.inRepo $ Git.LsTree.lsTreeFiles Git.Ref.headRef [f] + l <- annexeval $ Annex.inRepo $ Git.LsTree.lsTreeFiles (Git.LsTree.LsTreeLong False) Git.Ref.headRef [f] all (\i -> Git.Types.toTreeItemType (Git.LsTree.mode i) == Just Git.Types.TreeSymlink) l @? (what ++ " " ++ f ++ " lost symlink bit after merge: " ++ show l)