diff --git a/Git/LsTree.hs b/Git/LsTree.hs index ead501f0dc..cd0d406edf 100644 --- a/Git/LsTree.hs +++ b/Git/LsTree.hs @@ -1,17 +1,17 @@ {- git ls-tree interface - - - Copyright 2011-2019 Joey Hess + - Copyright 2011-2020 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} -{-# LANGUAGE BangPatterns #-} - module Git.LsTree ( TreeItem(..), LsTreeMode(..), lsTree, lsTree', + lsTreeStrict, + lsTreeStrict', lsTreeParams, lsTreeFiles, parseLsTree, @@ -30,6 +30,7 @@ import Data.Either import System.Posix.Types import qualified Data.ByteString as S import qualified Data.ByteString.Lazy as L +import qualified Data.Attoparsec.ByteString as AS import qualified Data.Attoparsec.ByteString.Lazy as A import qualified Data.Attoparsec.ByteString.Char8 as A8 @@ -38,7 +39,7 @@ data TreeItem = TreeItem , typeobj :: S.ByteString , sha :: Ref , file :: TopFilePath - } deriving Show + } deriving (Show) data LsTreeMode = LsTreeRecursive | LsTreeNonRecursive @@ -51,6 +52,13 @@ lsTree' ps lsmode t repo = do (l, cleanup) <- pipeNullSplit (lsTreeParams lsmode t ps) repo return (rights (map parseLsTree l), cleanup) +lsTreeStrict :: LsTreeMode -> Ref -> Repo -> IO [TreeItem] +lsTreeStrict = lsTreeStrict' [] + +lsTreeStrict' :: [CommandParam] -> LsTreeMode -> Ref -> Repo -> IO [TreeItem] +lsTreeStrict' ps lsmode t repo = rights . map parseLsTreeStrict + <$> pipeNullSplitStrict (lsTreeParams lsmode t ps) repo + lsTreeParams :: LsTreeMode -> Ref -> [CommandParam] -> [CommandParam] lsTreeParams lsmode r ps = [ Param "ls-tree" @@ -83,6 +91,13 @@ parseLsTree b = case A.parse parserLsTree b of A.Done _ r -> Right r A.Fail _ _ err -> Left err +parseLsTreeStrict :: S.ByteString -> Either String TreeItem +parseLsTreeStrict b = go (AS.parse parserLsTree b) + where + go (AS.Done _ r) = Right r + go (AS.Fail _ _ err) = Left err + go (AS.Partial c) = go (c mempty) + {- Parses a line of ls-tree output, in format: - mode SP type SP sha TAB file - diff --git a/Git/Types.hs b/Git/Types.hs index 77a52d1e45..73c4fe62de 100644 --- a/Git/Types.hs +++ b/Git/Types.hs @@ -135,7 +135,12 @@ fmtObjectType CommitObject = "commit" fmtObjectType TreeObject = "tree" {- Types of items in a tree. -} -data TreeItemType = TreeFile | TreeExecutable | TreeSymlink | TreeSubmodule +data TreeItemType + = TreeFile + | TreeExecutable + | TreeSymlink + | TreeSubmodule + | TreeSubtree deriving (Eq, Show) {- Git uses magic numbers to denote the type of a tree item. -} @@ -144,6 +149,7 @@ readTreeItemType "100644" = Just TreeFile readTreeItemType "100755" = Just TreeExecutable readTreeItemType "120000" = Just TreeSymlink readTreeItemType "160000" = Just TreeSubmodule +readTreeItemType "040000" = Just TreeSubtree readTreeItemType _ = Nothing fmtTreeItemType :: TreeItemType -> S.ByteString @@ -151,12 +157,14 @@ fmtTreeItemType TreeFile = "100644" fmtTreeItemType TreeExecutable = "100755" fmtTreeItemType TreeSymlink = "120000" fmtTreeItemType TreeSubmodule = "160000" +fmtTreeItemType TreeSubtree = "040000" toTreeItemType :: FileMode -> Maybe TreeItemType toTreeItemType 0o100644 = Just TreeFile toTreeItemType 0o100755 = Just TreeExecutable toTreeItemType 0o120000 = Just TreeSymlink toTreeItemType 0o160000 = Just TreeSubmodule +toTreeItemType 0o040000 = Just TreeSubtree toTreeItemType _ = Nothing fromTreeItemType :: TreeItemType -> FileMode @@ -164,6 +172,7 @@ fromTreeItemType TreeFile = 0o100644 fromTreeItemType TreeExecutable = 0o100755 fromTreeItemType TreeSymlink = 0o120000 fromTreeItemType TreeSubmodule = 0o160000 +fromTreeItemType TreeSubtree = 0o040000 data Commit = Commit { commitTree :: Sha diff --git a/Logs/Export.hs b/Logs/Export.hs index 1c198b7992..50b2ea1378 100644 --- a/Logs/Export.hs +++ b/Logs/Export.hs @@ -1,4 +1,4 @@ -{- git-annex export log +{- git-annex export log (also used to log imports) - - Copyright 2017-2019 Joey Hess - @@ -64,7 +64,6 @@ exportedTreeishes = nub . map exportedTreeish incompleteExportedTreeishes :: [Exported] -> [Git.Ref] incompleteExportedTreeishes = concatMap incompleteExportedTreeish - data ExportParticipants = ExportParticipants { exportFrom :: UUID , exportTo :: UUID diff --git a/Remote/Borg.hs b/Remote/Borg.hs index a1dc10b0f8..0e7dd50e6f 100644 --- a/Remote/Borg.hs +++ b/Remote/Borg.hs @@ -12,6 +12,9 @@ import Types.Remote import Types.Creds import Types.Import import qualified Git +import qualified Git.LsTree as LsTree +import Git.Types (toTreeItemType, TreeItemType(..)) +import Git.FilePath import Config import Config.Cost import Annex.SpecialRemote.Config @@ -21,6 +24,7 @@ import Annex.UUID import Types.ProposedAccepted import Utility.Metered import qualified Remote.Helper.ThirdPartyPopulated as ThirdPartyPopulated +import Logs.Export import Text.Read import Control.Exception (evaluate) @@ -74,7 +78,7 @@ gen r u rc gc rs = do , checkPresentCheap = borgLocal borgrepo , exportActions = exportUnsupported , importActions = ImportActions - { listImportableContents = listImportableContentsM borgrepo + { listImportableContents = listImportableContentsM u borgrepo , importKey = Just ThirdPartyPopulated.importKey , retrieveExportWithContentIdentifier = retrieveExportWithContentIdentifierM borgrepo , checkPresentExportWithContentIdentifier = checkPresentExportWithContentIdentifierM borgrepo @@ -125,19 +129,21 @@ borgSetup _ mu _ c _gc = do borgLocal :: BorgRepo -> Bool borgLocal = notElem ':' --- TODO avoid rescanning archives that have already been scanned --- -- XXX the tree generated by using this does not seem to get grafted into -- the git-annex branch, so would be subject to being lost to GC. -- Is this a general problem affecting importtree too? -listImportableContentsM :: BorgRepo -> Annex (Maybe (ImportableContents (ContentIdentifier, ByteSize))) -listImportableContentsM borgrepo = prompt $ do +listImportableContentsM :: UUID -> BorgRepo -> Annex (Maybe (ImportableContents (ContentIdentifier, ByteSize))) +listImportableContentsM u borgrepo = prompt $ do + imported <- getImported u ls <- withborglist borgrepo "{barchive}{NUL}" $ \as -> - forM as $ \archive -> - let archive' = borgrepo ++ "::" ++ decodeBS' archive - in withborglist archive' "{size}{NUL}{path}{NUL}" $ - liftIO . evaluate . force . parsefilelist archive - return (Just (mkimportablecontents ls)) + forM as $ \archivename -> + case M.lookup archivename imported of + Just getfast -> getfast + Nothing -> + let archive = borgrepo ++ "::" ++ decodeBS' archivename + in withborglist archive "{size}{NUL}{path}{NUL}" $ + liftIO . evaluate . force . parsefilelist archivename + return $ Just $ mkimportablecontents ls where withborglist what format a = do let p = (proc "borg" ["list", what, "--format", format]) @@ -152,10 +158,10 @@ listImportableContentsM borgrepo = prompt $ do forceSuccessProcess p pid a l `finally` cleanup - parsefilelist archive (bsz:f:rest) = case readMaybe (fromRawFilePath bsz) of - Nothing -> parsefilelist archive rest + parsefilelist archivename (bsz:f:rest) = case readMaybe (fromRawFilePath bsz) of + Nothing -> parsefilelist archivename rest Just sz -> - let loc = genImportLocation archive f + let loc = genImportLocation archivename f -- This does a little unncessary work to parse the -- key, which is then thrown away. But, it lets the -- file list be shrank down to only the ones that are @@ -163,19 +169,25 @@ listImportableContentsM borgrepo = prompt $ do -- the rest of the files in memory. in case ThirdPartyPopulated.importKey' loc sz of Just k -> (loc, (borgContentIdentifier, sz)) - : parsefilelist archive rest - Nothing -> parsefilelist archive rest + : parsefilelist archivename rest + Nothing -> parsefilelist archivename rest parsefilelist _ _ = [] -- importableHistory is not used for retrieval, so is not -- populated with old archives. Instead, a tree of archives -- is constructed, by genImportLocation including the archive -- name in the ImportLocation. - mkimportablecontents (l) = ImportableContents + mkimportablecontents l = ImportableContents { importableContents = concat l , importableHistory = [] } - + +-- We do not need a ContentIdentifier in order to retrieve a file from +-- borg; the ImportLocation contains all that's needed. So, this is left +-- empty. +borgContentIdentifier :: ContentIdentifier +borgContentIdentifier = ContentIdentifier mempty + -- Borg does not allow / in the name of an archive, so the archive -- name will always be the first directory in the ImportLocation. -- @@ -193,11 +205,45 @@ extractImportLocation loc = go $ P.splitDirectories $ go (archivename:rest) = (archivename, P.joinPath rest) go _ = giveup $ "Unable to parse import location " ++ fromRawFilePath (fromImportLocation loc) --- We do not need a ContentIdentifier in order to retrieve a file from --- borg; the ImportLocation contains all that's needed. So, this is left --- empty. -borgContentIdentifier :: ContentIdentifier -borgContentIdentifier = ContentIdentifier mempty +-- Since the ImportLocation starts with the archive name, a list of all +-- archive names we've already imported can be found by just listing the +-- last imported tree. And the contents of those archives can be retrieved +-- by listing the subtree recursively, which will likely be quite a lot +-- faster than running borg. +getImported :: UUID -> Annex (M.Map BorgArchiveName (Annex [(ImportLocation, (ContentIdentifier, ByteSize))])) +getImported u = M.unions <$> (mapM go . exportedTreeishes =<< getExport u) + where + go t = M.fromList . mapMaybe mk + <$> inRepo (LsTree.lsTreeStrict LsTree.LsTreeNonRecursive t) + + mk ti + | toTreeItemType (LsTree.mode ti) == Just TreeSubtree = Just + ( getTopFilePath (LsTree.file ti) + , getcontents + (getTopFilePath (LsTree.file ti)) + (LsTree.sha ti) + ) + | otherwise = Nothing + + getcontents archivename t = mapMaybe (mkcontents archivename) + <$> inRepo (LsTree.lsTreeStrict LsTree.LsTreeRecursive t) + + mkcontents archivename ti = do + let f = ThirdPartyPopulated.fromThirdPartyImportLocation $ + mkImportLocation $ getTopFilePath $ LsTree.file ti + k <- deserializeKey' (P.takeFileName f) + return + ( genImportLocation archivename f + , + ( borgContentIdentifier + -- defaulting to 0 size is ok, this size + -- only gets used by + -- ThirdPartyPopulated.importKey, + -- which ignores the size when the key + -- does not have a size. + , fromMaybe 0 (fromKey keySize k) + ) + ) retrieveExportWithContentIdentifierM :: BorgRepo -> ImportLocation -> ContentIdentifier -> FilePath -> Annex Key -> MeterUpdate -> Annex Key retrieveExportWithContentIdentifierM borgrepo loc cid dest k p = error "TODO"