avoid borg list of archives that have been listed before
This makes sync a lot faster in the common case where there's no new backup. There's still room for it to be faster. Currently the old imported tree has to be traversed, to generate the ImportableContents. Which then gets turned around to generate the new imported tree, which is identical. So, it would be possible to just return a "no new imports", or an ImportableContents that has a way to graft in a tree. The latter is probably too far to go to optimise this, unless other things need it. The former might be worth it, but it's already pretty fast, since git ls-tree is pretty fast.
This commit is contained in:
parent
06ef1b7d68
commit
5d8e4a7c74
4 changed files with 98 additions and 29 deletions
|
@ -1,17 +1,17 @@
|
||||||
{- git ls-tree interface
|
{- git ls-tree interface
|
||||||
-
|
-
|
||||||
- Copyright 2011-2019 Joey Hess <id@joeyh.name>
|
- Copyright 2011-2020 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU AGPL version 3 or higher.
|
- Licensed under the GNU AGPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
|
||||||
{-# LANGUAGE BangPatterns #-}
|
|
||||||
|
|
||||||
module Git.LsTree (
|
module Git.LsTree (
|
||||||
TreeItem(..),
|
TreeItem(..),
|
||||||
LsTreeMode(..),
|
LsTreeMode(..),
|
||||||
lsTree,
|
lsTree,
|
||||||
lsTree',
|
lsTree',
|
||||||
|
lsTreeStrict,
|
||||||
|
lsTreeStrict',
|
||||||
lsTreeParams,
|
lsTreeParams,
|
||||||
lsTreeFiles,
|
lsTreeFiles,
|
||||||
parseLsTree,
|
parseLsTree,
|
||||||
|
@ -30,6 +30,7 @@ import Data.Either
|
||||||
import System.Posix.Types
|
import System.Posix.Types
|
||||||
import qualified Data.ByteString as S
|
import qualified Data.ByteString as S
|
||||||
import qualified Data.ByteString.Lazy as L
|
import qualified Data.ByteString.Lazy as L
|
||||||
|
import qualified Data.Attoparsec.ByteString as AS
|
||||||
import qualified Data.Attoparsec.ByteString.Lazy as A
|
import qualified Data.Attoparsec.ByteString.Lazy as A
|
||||||
import qualified Data.Attoparsec.ByteString.Char8 as A8
|
import qualified Data.Attoparsec.ByteString.Char8 as A8
|
||||||
|
|
||||||
|
@ -38,7 +39,7 @@ data TreeItem = TreeItem
|
||||||
, typeobj :: S.ByteString
|
, typeobj :: S.ByteString
|
||||||
, sha :: Ref
|
, sha :: Ref
|
||||||
, file :: TopFilePath
|
, file :: TopFilePath
|
||||||
} deriving Show
|
} deriving (Show)
|
||||||
|
|
||||||
data LsTreeMode = LsTreeRecursive | LsTreeNonRecursive
|
data LsTreeMode = LsTreeRecursive | LsTreeNonRecursive
|
||||||
|
|
||||||
|
@ -51,6 +52,13 @@ lsTree' ps lsmode t repo = do
|
||||||
(l, cleanup) <- pipeNullSplit (lsTreeParams lsmode t ps) repo
|
(l, cleanup) <- pipeNullSplit (lsTreeParams lsmode t ps) repo
|
||||||
return (rights (map parseLsTree l), cleanup)
|
return (rights (map parseLsTree l), cleanup)
|
||||||
|
|
||||||
|
lsTreeStrict :: LsTreeMode -> Ref -> Repo -> IO [TreeItem]
|
||||||
|
lsTreeStrict = lsTreeStrict' []
|
||||||
|
|
||||||
|
lsTreeStrict' :: [CommandParam] -> LsTreeMode -> Ref -> Repo -> IO [TreeItem]
|
||||||
|
lsTreeStrict' ps lsmode t repo = rights . map parseLsTreeStrict
|
||||||
|
<$> pipeNullSplitStrict (lsTreeParams lsmode t ps) repo
|
||||||
|
|
||||||
lsTreeParams :: LsTreeMode -> Ref -> [CommandParam] -> [CommandParam]
|
lsTreeParams :: LsTreeMode -> Ref -> [CommandParam] -> [CommandParam]
|
||||||
lsTreeParams lsmode r ps =
|
lsTreeParams lsmode r ps =
|
||||||
[ Param "ls-tree"
|
[ Param "ls-tree"
|
||||||
|
@ -83,6 +91,13 @@ parseLsTree b = case A.parse parserLsTree b of
|
||||||
A.Done _ r -> Right r
|
A.Done _ r -> Right r
|
||||||
A.Fail _ _ err -> Left err
|
A.Fail _ _ err -> Left err
|
||||||
|
|
||||||
|
parseLsTreeStrict :: S.ByteString -> Either String TreeItem
|
||||||
|
parseLsTreeStrict b = go (AS.parse parserLsTree b)
|
||||||
|
where
|
||||||
|
go (AS.Done _ r) = Right r
|
||||||
|
go (AS.Fail _ _ err) = Left err
|
||||||
|
go (AS.Partial c) = go (c mempty)
|
||||||
|
|
||||||
{- Parses a line of ls-tree output, in format:
|
{- Parses a line of ls-tree output, in format:
|
||||||
- mode SP type SP sha TAB file
|
- mode SP type SP sha TAB file
|
||||||
-
|
-
|
||||||
|
|
11
Git/Types.hs
11
Git/Types.hs
|
@ -135,7 +135,12 @@ fmtObjectType CommitObject = "commit"
|
||||||
fmtObjectType TreeObject = "tree"
|
fmtObjectType TreeObject = "tree"
|
||||||
|
|
||||||
{- Types of items in a tree. -}
|
{- Types of items in a tree. -}
|
||||||
data TreeItemType = TreeFile | TreeExecutable | TreeSymlink | TreeSubmodule
|
data TreeItemType
|
||||||
|
= TreeFile
|
||||||
|
| TreeExecutable
|
||||||
|
| TreeSymlink
|
||||||
|
| TreeSubmodule
|
||||||
|
| TreeSubtree
|
||||||
deriving (Eq, Show)
|
deriving (Eq, Show)
|
||||||
|
|
||||||
{- Git uses magic numbers to denote the type of a tree item. -}
|
{- Git uses magic numbers to denote the type of a tree item. -}
|
||||||
|
@ -144,6 +149,7 @@ readTreeItemType "100644" = Just TreeFile
|
||||||
readTreeItemType "100755" = Just TreeExecutable
|
readTreeItemType "100755" = Just TreeExecutable
|
||||||
readTreeItemType "120000" = Just TreeSymlink
|
readTreeItemType "120000" = Just TreeSymlink
|
||||||
readTreeItemType "160000" = Just TreeSubmodule
|
readTreeItemType "160000" = Just TreeSubmodule
|
||||||
|
readTreeItemType "040000" = Just TreeSubtree
|
||||||
readTreeItemType _ = Nothing
|
readTreeItemType _ = Nothing
|
||||||
|
|
||||||
fmtTreeItemType :: TreeItemType -> S.ByteString
|
fmtTreeItemType :: TreeItemType -> S.ByteString
|
||||||
|
@ -151,12 +157,14 @@ fmtTreeItemType TreeFile = "100644"
|
||||||
fmtTreeItemType TreeExecutable = "100755"
|
fmtTreeItemType TreeExecutable = "100755"
|
||||||
fmtTreeItemType TreeSymlink = "120000"
|
fmtTreeItemType TreeSymlink = "120000"
|
||||||
fmtTreeItemType TreeSubmodule = "160000"
|
fmtTreeItemType TreeSubmodule = "160000"
|
||||||
|
fmtTreeItemType TreeSubtree = "040000"
|
||||||
|
|
||||||
toTreeItemType :: FileMode -> Maybe TreeItemType
|
toTreeItemType :: FileMode -> Maybe TreeItemType
|
||||||
toTreeItemType 0o100644 = Just TreeFile
|
toTreeItemType 0o100644 = Just TreeFile
|
||||||
toTreeItemType 0o100755 = Just TreeExecutable
|
toTreeItemType 0o100755 = Just TreeExecutable
|
||||||
toTreeItemType 0o120000 = Just TreeSymlink
|
toTreeItemType 0o120000 = Just TreeSymlink
|
||||||
toTreeItemType 0o160000 = Just TreeSubmodule
|
toTreeItemType 0o160000 = Just TreeSubmodule
|
||||||
|
toTreeItemType 0o040000 = Just TreeSubtree
|
||||||
toTreeItemType _ = Nothing
|
toTreeItemType _ = Nothing
|
||||||
|
|
||||||
fromTreeItemType :: TreeItemType -> FileMode
|
fromTreeItemType :: TreeItemType -> FileMode
|
||||||
|
@ -164,6 +172,7 @@ fromTreeItemType TreeFile = 0o100644
|
||||||
fromTreeItemType TreeExecutable = 0o100755
|
fromTreeItemType TreeExecutable = 0o100755
|
||||||
fromTreeItemType TreeSymlink = 0o120000
|
fromTreeItemType TreeSymlink = 0o120000
|
||||||
fromTreeItemType TreeSubmodule = 0o160000
|
fromTreeItemType TreeSubmodule = 0o160000
|
||||||
|
fromTreeItemType TreeSubtree = 0o040000
|
||||||
|
|
||||||
data Commit = Commit
|
data Commit = Commit
|
||||||
{ commitTree :: Sha
|
{ commitTree :: Sha
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
{- git-annex export log
|
{- git-annex export log (also used to log imports)
|
||||||
-
|
-
|
||||||
- Copyright 2017-2019 Joey Hess <id@joeyh.name>
|
- Copyright 2017-2019 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
|
@ -64,7 +64,6 @@ exportedTreeishes = nub . map exportedTreeish
|
||||||
incompleteExportedTreeishes :: [Exported] -> [Git.Ref]
|
incompleteExportedTreeishes :: [Exported] -> [Git.Ref]
|
||||||
incompleteExportedTreeishes = concatMap incompleteExportedTreeish
|
incompleteExportedTreeishes = concatMap incompleteExportedTreeish
|
||||||
|
|
||||||
|
|
||||||
data ExportParticipants = ExportParticipants
|
data ExportParticipants = ExportParticipants
|
||||||
{ exportFrom :: UUID
|
{ exportFrom :: UUID
|
||||||
, exportTo :: UUID
|
, exportTo :: UUID
|
||||||
|
|
|
@ -12,6 +12,9 @@ import Types.Remote
|
||||||
import Types.Creds
|
import Types.Creds
|
||||||
import Types.Import
|
import Types.Import
|
||||||
import qualified Git
|
import qualified Git
|
||||||
|
import qualified Git.LsTree as LsTree
|
||||||
|
import Git.Types (toTreeItemType, TreeItemType(..))
|
||||||
|
import Git.FilePath
|
||||||
import Config
|
import Config
|
||||||
import Config.Cost
|
import Config.Cost
|
||||||
import Annex.SpecialRemote.Config
|
import Annex.SpecialRemote.Config
|
||||||
|
@ -21,6 +24,7 @@ import Annex.UUID
|
||||||
import Types.ProposedAccepted
|
import Types.ProposedAccepted
|
||||||
import Utility.Metered
|
import Utility.Metered
|
||||||
import qualified Remote.Helper.ThirdPartyPopulated as ThirdPartyPopulated
|
import qualified Remote.Helper.ThirdPartyPopulated as ThirdPartyPopulated
|
||||||
|
import Logs.Export
|
||||||
|
|
||||||
import Text.Read
|
import Text.Read
|
||||||
import Control.Exception (evaluate)
|
import Control.Exception (evaluate)
|
||||||
|
@ -74,7 +78,7 @@ gen r u rc gc rs = do
|
||||||
, checkPresentCheap = borgLocal borgrepo
|
, checkPresentCheap = borgLocal borgrepo
|
||||||
, exportActions = exportUnsupported
|
, exportActions = exportUnsupported
|
||||||
, importActions = ImportActions
|
, importActions = ImportActions
|
||||||
{ listImportableContents = listImportableContentsM borgrepo
|
{ listImportableContents = listImportableContentsM u borgrepo
|
||||||
, importKey = Just ThirdPartyPopulated.importKey
|
, importKey = Just ThirdPartyPopulated.importKey
|
||||||
, retrieveExportWithContentIdentifier = retrieveExportWithContentIdentifierM borgrepo
|
, retrieveExportWithContentIdentifier = retrieveExportWithContentIdentifierM borgrepo
|
||||||
, checkPresentExportWithContentIdentifier = checkPresentExportWithContentIdentifierM borgrepo
|
, checkPresentExportWithContentIdentifier = checkPresentExportWithContentIdentifierM borgrepo
|
||||||
|
@ -125,19 +129,21 @@ borgSetup _ mu _ c _gc = do
|
||||||
borgLocal :: BorgRepo -> Bool
|
borgLocal :: BorgRepo -> Bool
|
||||||
borgLocal = notElem ':'
|
borgLocal = notElem ':'
|
||||||
|
|
||||||
-- TODO avoid rescanning archives that have already been scanned
|
|
||||||
--
|
|
||||||
-- XXX the tree generated by using this does not seem to get grafted into
|
-- XXX the tree generated by using this does not seem to get grafted into
|
||||||
-- the git-annex branch, so would be subject to being lost to GC.
|
-- the git-annex branch, so would be subject to being lost to GC.
|
||||||
-- Is this a general problem affecting importtree too?
|
-- Is this a general problem affecting importtree too?
|
||||||
listImportableContentsM :: BorgRepo -> Annex (Maybe (ImportableContents (ContentIdentifier, ByteSize)))
|
listImportableContentsM :: UUID -> BorgRepo -> Annex (Maybe (ImportableContents (ContentIdentifier, ByteSize)))
|
||||||
listImportableContentsM borgrepo = prompt $ do
|
listImportableContentsM u borgrepo = prompt $ do
|
||||||
|
imported <- getImported u
|
||||||
ls <- withborglist borgrepo "{barchive}{NUL}" $ \as ->
|
ls <- withborglist borgrepo "{barchive}{NUL}" $ \as ->
|
||||||
forM as $ \archive ->
|
forM as $ \archivename ->
|
||||||
let archive' = borgrepo ++ "::" ++ decodeBS' archive
|
case M.lookup archivename imported of
|
||||||
in withborglist archive' "{size}{NUL}{path}{NUL}" $
|
Just getfast -> getfast
|
||||||
liftIO . evaluate . force . parsefilelist archive
|
Nothing ->
|
||||||
return (Just (mkimportablecontents ls))
|
let archive = borgrepo ++ "::" ++ decodeBS' archivename
|
||||||
|
in withborglist archive "{size}{NUL}{path}{NUL}" $
|
||||||
|
liftIO . evaluate . force . parsefilelist archivename
|
||||||
|
return $ Just $ mkimportablecontents ls
|
||||||
where
|
where
|
||||||
withborglist what format a = do
|
withborglist what format a = do
|
||||||
let p = (proc "borg" ["list", what, "--format", format])
|
let p = (proc "borg" ["list", what, "--format", format])
|
||||||
|
@ -152,10 +158,10 @@ listImportableContentsM borgrepo = prompt $ do
|
||||||
forceSuccessProcess p pid
|
forceSuccessProcess p pid
|
||||||
a l `finally` cleanup
|
a l `finally` cleanup
|
||||||
|
|
||||||
parsefilelist archive (bsz:f:rest) = case readMaybe (fromRawFilePath bsz) of
|
parsefilelist archivename (bsz:f:rest) = case readMaybe (fromRawFilePath bsz) of
|
||||||
Nothing -> parsefilelist archive rest
|
Nothing -> parsefilelist archivename rest
|
||||||
Just sz ->
|
Just sz ->
|
||||||
let loc = genImportLocation archive f
|
let loc = genImportLocation archivename f
|
||||||
-- This does a little unncessary work to parse the
|
-- This does a little unncessary work to parse the
|
||||||
-- key, which is then thrown away. But, it lets the
|
-- key, which is then thrown away. But, it lets the
|
||||||
-- file list be shrank down to only the ones that are
|
-- file list be shrank down to only the ones that are
|
||||||
|
@ -163,19 +169,25 @@ listImportableContentsM borgrepo = prompt $ do
|
||||||
-- the rest of the files in memory.
|
-- the rest of the files in memory.
|
||||||
in case ThirdPartyPopulated.importKey' loc sz of
|
in case ThirdPartyPopulated.importKey' loc sz of
|
||||||
Just k -> (loc, (borgContentIdentifier, sz))
|
Just k -> (loc, (borgContentIdentifier, sz))
|
||||||
: parsefilelist archive rest
|
: parsefilelist archivename rest
|
||||||
Nothing -> parsefilelist archive rest
|
Nothing -> parsefilelist archivename rest
|
||||||
parsefilelist _ _ = []
|
parsefilelist _ _ = []
|
||||||
|
|
||||||
-- importableHistory is not used for retrieval, so is not
|
-- importableHistory is not used for retrieval, so is not
|
||||||
-- populated with old archives. Instead, a tree of archives
|
-- populated with old archives. Instead, a tree of archives
|
||||||
-- is constructed, by genImportLocation including the archive
|
-- is constructed, by genImportLocation including the archive
|
||||||
-- name in the ImportLocation.
|
-- name in the ImportLocation.
|
||||||
mkimportablecontents (l) = ImportableContents
|
mkimportablecontents l = ImportableContents
|
||||||
{ importableContents = concat l
|
{ importableContents = concat l
|
||||||
, importableHistory = []
|
, importableHistory = []
|
||||||
}
|
}
|
||||||
|
|
||||||
|
-- We do not need a ContentIdentifier in order to retrieve a file from
|
||||||
|
-- borg; the ImportLocation contains all that's needed. So, this is left
|
||||||
|
-- empty.
|
||||||
|
borgContentIdentifier :: ContentIdentifier
|
||||||
|
borgContentIdentifier = ContentIdentifier mempty
|
||||||
|
|
||||||
-- Borg does not allow / in the name of an archive, so the archive
|
-- Borg does not allow / in the name of an archive, so the archive
|
||||||
-- name will always be the first directory in the ImportLocation.
|
-- name will always be the first directory in the ImportLocation.
|
||||||
--
|
--
|
||||||
|
@ -193,11 +205,45 @@ extractImportLocation loc = go $ P.splitDirectories $
|
||||||
go (archivename:rest) = (archivename, P.joinPath rest)
|
go (archivename:rest) = (archivename, P.joinPath rest)
|
||||||
go _ = giveup $ "Unable to parse import location " ++ fromRawFilePath (fromImportLocation loc)
|
go _ = giveup $ "Unable to parse import location " ++ fromRawFilePath (fromImportLocation loc)
|
||||||
|
|
||||||
-- We do not need a ContentIdentifier in order to retrieve a file from
|
-- Since the ImportLocation starts with the archive name, a list of all
|
||||||
-- borg; the ImportLocation contains all that's needed. So, this is left
|
-- archive names we've already imported can be found by just listing the
|
||||||
-- empty.
|
-- last imported tree. And the contents of those archives can be retrieved
|
||||||
borgContentIdentifier :: ContentIdentifier
|
-- by listing the subtree recursively, which will likely be quite a lot
|
||||||
borgContentIdentifier = ContentIdentifier mempty
|
-- faster than running borg.
|
||||||
|
getImported :: UUID -> Annex (M.Map BorgArchiveName (Annex [(ImportLocation, (ContentIdentifier, ByteSize))]))
|
||||||
|
getImported u = M.unions <$> (mapM go . exportedTreeishes =<< getExport u)
|
||||||
|
where
|
||||||
|
go t = M.fromList . mapMaybe mk
|
||||||
|
<$> inRepo (LsTree.lsTreeStrict LsTree.LsTreeNonRecursive t)
|
||||||
|
|
||||||
|
mk ti
|
||||||
|
| toTreeItemType (LsTree.mode ti) == Just TreeSubtree = Just
|
||||||
|
( getTopFilePath (LsTree.file ti)
|
||||||
|
, getcontents
|
||||||
|
(getTopFilePath (LsTree.file ti))
|
||||||
|
(LsTree.sha ti)
|
||||||
|
)
|
||||||
|
| otherwise = Nothing
|
||||||
|
|
||||||
|
getcontents archivename t = mapMaybe (mkcontents archivename)
|
||||||
|
<$> inRepo (LsTree.lsTreeStrict LsTree.LsTreeRecursive t)
|
||||||
|
|
||||||
|
mkcontents archivename ti = do
|
||||||
|
let f = ThirdPartyPopulated.fromThirdPartyImportLocation $
|
||||||
|
mkImportLocation $ getTopFilePath $ LsTree.file ti
|
||||||
|
k <- deserializeKey' (P.takeFileName f)
|
||||||
|
return
|
||||||
|
( genImportLocation archivename f
|
||||||
|
,
|
||||||
|
( borgContentIdentifier
|
||||||
|
-- defaulting to 0 size is ok, this size
|
||||||
|
-- only gets used by
|
||||||
|
-- ThirdPartyPopulated.importKey,
|
||||||
|
-- which ignores the size when the key
|
||||||
|
-- does not have a size.
|
||||||
|
, fromMaybe 0 (fromKey keySize k)
|
||||||
|
)
|
||||||
|
)
|
||||||
|
|
||||||
retrieveExportWithContentIdentifierM :: BorgRepo -> ImportLocation -> ContentIdentifier -> FilePath -> Annex Key -> MeterUpdate -> Annex Key
|
retrieveExportWithContentIdentifierM :: BorgRepo -> ImportLocation -> ContentIdentifier -> FilePath -> Annex Key -> MeterUpdate -> Annex Key
|
||||||
retrieveExportWithContentIdentifierM borgrepo loc cid dest k p = error "TODO"
|
retrieveExportWithContentIdentifierM borgrepo loc cid dest k p = error "TODO"
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue