optimisation for borg

Skip needing to list importable contents when unchanged since last time.
This commit is contained in:
Joey Hess 2020-12-22 14:35:02 -04:00
parent e1ac42be77
commit 4f9969d0a1
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
9 changed files with 32 additions and 21 deletions

View file

@ -660,12 +660,15 @@ makeImportMatcher r = load preferredContentKeylessTokens >>= \case
- would delete the files. - would delete the files.
- -
- Throws exception if unable to contact the remote. - Throws exception if unable to contact the remote.
- Returns Nothing when there is no change since last time.
-} -}
getImportableContents :: Remote -> ImportTreeConfig -> CheckGitIgnore -> FileMatcher Annex -> Annex (ImportableContents (ContentIdentifier, ByteSize)) getImportableContents :: Remote -> ImportTreeConfig -> CheckGitIgnore -> FileMatcher Annex -> Annex (Maybe (ImportableContents (ContentIdentifier, ByteSize)))
getImportableContents r importtreeconfig ci matcher = do getImportableContents r importtreeconfig ci matcher = do
importable <- Remote.listImportableContents (Remote.importActions r) Remote.listImportableContents (Remote.importActions r) >>= \case
dbhandle <- Export.openDb (Remote.uuid r) Just importable -> do
filterunwanted dbhandle importable dbhandle <- Export.openDb (Remote.uuid r)
Just <$> filterunwanted dbhandle importable
Nothing -> return Nothing
where where
filterunwanted dbhandle ic = ImportableContents filterunwanted dbhandle ic = ImportableContents
<$> filterM (wanted dbhandle) (importableContents ic) <$> filterM (wanted dbhandle) (importableContents ic)

View file

@ -325,13 +325,13 @@ seekRemote remote branch msubdir importcontent ci = do
listContents :: Remote -> ImportTreeConfig -> CheckGitIgnore -> TVar (Maybe (ImportableContents (ContentIdentifier, Remote.ByteSize))) -> CommandStart listContents :: Remote -> ImportTreeConfig -> CheckGitIgnore -> TVar (Maybe (ImportableContents (ContentIdentifier, Remote.ByteSize))) -> CommandStart
listContents remote importtreeconfig ci tvar = starting "list" ai si $ listContents remote importtreeconfig ci tvar = starting "list" ai si $
listContents' remote importtreeconfig ci $ \importable -> do listContents' remote importtreeconfig ci $ \importable -> do
liftIO $ atomically $ writeTVar tvar (Just importable) liftIO $ atomically $ writeTVar tvar importable
next $ return True next $ return True
where where
ai = ActionItemOther (Just (Remote.name remote)) ai = ActionItemOther (Just (Remote.name remote))
si = SeekInput [] si = SeekInput []
listContents' :: Remote -> ImportTreeConfig -> CheckGitIgnore -> (ImportableContents (ContentIdentifier, Remote.ByteSize) -> Annex a) -> Annex a listContents' :: Remote -> ImportTreeConfig -> CheckGitIgnore -> (Maybe (ImportableContents (ContentIdentifier, Remote.ByteSize)) -> Annex a) -> Annex a
listContents' remote importtreeconfig ci a = listContents' remote importtreeconfig ci a =
makeImportMatcher remote >>= \case makeImportMatcher remote >>= \case
Right matcher -> tryNonAsync (getImportableContents remote importtreeconfig ci matcher) >>= \case Right matcher -> tryNonAsync (getImportableContents remote importtreeconfig ci matcher) >>= \case

View file

@ -495,13 +495,14 @@ importThirdPartyPopulated remote =
void $ includeCommandAction $ starting "list" ai si $ void $ includeCommandAction $ starting "list" ai si $
Command.Import.listContents' remote ImportTree (CheckGitIgnore False) go Command.Import.listContents' remote ImportTree (CheckGitIgnore False) go
where where
go importable = importKeys remote ImportTree False True importable >>= \case go (Just importable) = importKeys remote ImportTree False True importable >>= \case
Just importablekeys -> do Just importablekeys -> do
(_imported, updatestate) <- recordImportTree remote ImportTree importablekeys (_imported, updatestate) <- recordImportTree remote ImportTree importablekeys
next $ do next $ do
updatestate updatestate
return True return True
Nothing -> next $ return False Nothing -> next $ return False
go Nothing = next $ return True -- unchanged from before
ai = ActionItemOther (Just (Remote.name remote)) ai = ActionItemOther (Just (Remote.name remote))
si = SeekInput [] si = SeekInput []

View file

@ -286,9 +286,9 @@ renameExportM serial adir _k old new = do
, File newloc , File newloc
] ]
listImportableContentsM :: AndroidSerial -> AndroidPath -> Annex (ImportableContents (ContentIdentifier, ByteSize)) listImportableContentsM :: AndroidSerial -> AndroidPath -> Annex (Maybe (ImportableContents (ContentIdentifier, ByteSize)))
listImportableContentsM serial adir = adbfind >>= \case listImportableContentsM serial adir = adbfind >>= \case
Just ls -> return $ ImportableContents (mapMaybe mk ls) [] Just ls -> return $ Just $ ImportableContents (mapMaybe mk ls) []
Nothing -> giveup "adb find failed" Nothing -> giveup "adb find failed"
where where
adbfind = adbShell serial adbfind = adbShell serial

View file

@ -26,6 +26,7 @@ import Utility.Metered
import qualified Remote.Helper.ThirdPartyPopulated as ThirdPartyPopulated import qualified Remote.Helper.ThirdPartyPopulated as ThirdPartyPopulated
import Logs.Export import Logs.Export
import Data.Either
import Text.Read import Text.Read
import Control.Exception (evaluate) import Control.Exception (evaluate)
import Control.DeepSeq import Control.DeepSeq
@ -122,8 +123,6 @@ borgSetup _ mu _ c _gc = do
-- persistant state, so it can vary between hosts. -- persistant state, so it can vary between hosts.
gitConfigSpecialRemote u c [("borgrepo", borgrepo)] gitConfigSpecialRemote u c [("borgrepo", borgrepo)]
-- TODO: untrusted by default, but allow overriding that
return (c, u) return (c, u)
borgLocal :: BorgRepo -> Bool borgLocal :: BorgRepo -> Bool
@ -132,18 +131,20 @@ borgLocal = notElem ':'
-- XXX the tree generated by using this does not seem to get grafted into -- XXX the tree generated by using this does not seem to get grafted into
-- the git-annex branch, so would be subject to being lost to GC. -- the git-annex branch, so would be subject to being lost to GC.
-- Is this a general problem affecting importtree too? -- Is this a general problem affecting importtree too?
listImportableContentsM :: UUID -> BorgRepo -> Annex (ImportableContents (ContentIdentifier, ByteSize)) listImportableContentsM :: UUID -> BorgRepo -> Annex (Maybe (ImportableContents (ContentIdentifier, ByteSize)))
listImportableContentsM u borgrepo = prompt $ do listImportableContentsM u borgrepo = prompt $ do
imported <- getImported u imported <- getImported u
ls <- withborglist borgrepo "{barchive}{NUL}" $ \as -> ls <- withborglist borgrepo "{barchive}{NUL}" $ \as ->
forM as $ \archivename -> forM as $ \archivename ->
case M.lookup archivename imported of case M.lookup archivename imported of
Just getfast -> getfast Just getfast -> return $ Left getfast
Nothing -> Nothing -> Right <$>
let archive = borgrepo ++ "::" ++ decodeBS' archivename let archive = borgrepo ++ "::" ++ decodeBS' archivename
in withborglist archive "{size}{NUL}{path}{NUL}" $ in withborglist archive "{size}{NUL}{path}{NUL}" $
liftIO . evaluate . force . parsefilelist archivename liftIO . evaluate . force . parsefilelist archivename
return $ mkimportablecontents ls if all isLeft ls
then return Nothing -- unchanged since last time, avoid work
else Just . mkimportablecontents <$> mapM (either id pure) ls
where where
withborglist what format a = do withborglist what format a = do
let p = (proc "borg" ["list", what, "--format", format]) let p = (proc "borg" ["list", what, "--format", format])

View file

@ -337,11 +337,11 @@ removeExportLocation topdir loc =
mkExportLocation loc' mkExportLocation loc'
in go (upFrom loc') =<< tryIO (removeDirectory p) in go (upFrom loc') =<< tryIO (removeDirectory p)
listImportableContentsM :: RawFilePath -> Annex (ImportableContents (ContentIdentifier, ByteSize)) listImportableContentsM :: RawFilePath -> Annex (Maybe (ImportableContents (ContentIdentifier, ByteSize)))
listImportableContentsM dir = liftIO $ do listImportableContentsM dir = liftIO $ do
l <- dirContentsRecursive (fromRawFilePath dir) l <- dirContentsRecursive (fromRawFilePath dir)
l' <- mapM (go . toRawFilePath) l l' <- mapM (go . toRawFilePath) l
return $ ImportableContents (catMaybes l') [] return $ Just $ ImportableContents (catMaybes l') []
where where
go f = do go f = do
st <- R.getFileStatus f st <- R.getFileStatus f

View file

@ -550,13 +550,14 @@ renameExportS3 hv r rs info k src dest = Just <$> go
srcobject = T.pack $ bucketExportLocation info src srcobject = T.pack $ bucketExportLocation info src
dstobject = T.pack $ bucketExportLocation info dest dstobject = T.pack $ bucketExportLocation info dest
listImportableContentsS3 :: S3HandleVar -> Remote -> S3Info -> Annex (ImportableContents (ContentIdentifier, ByteSize)) listImportableContentsS3 :: S3HandleVar -> Remote -> S3Info -> Annex (Maybe (ImportableContents (ContentIdentifier, ByteSize)))
listImportableContentsS3 hv r info = listImportableContentsS3 hv r info =
withS3Handle hv $ \case withS3Handle hv $ \case
Nothing -> giveup $ needS3Creds (uuid r) Nothing -> giveup $ needS3Creds (uuid r)
Just h -> liftIO $ runResourceT $ Just h -> Just <$> go h
extractFromResourceT =<< startlist h
where where
go h = liftIO $ runResourceT $ extractFromResourceT =<< startlist h
startlist h startlist h
| versioning info = do | versioning info = do
rsp <- sendS3Handle h $ rsp <- sendS3Handle h $

View file

@ -283,7 +283,8 @@ data ImportActions a = ImportActions
-- remote. -- remote.
-- --
-- Throws exception on failure to access the remote. -- Throws exception on failure to access the remote.
{ listImportableContents :: a (ImportableContents (ContentIdentifier, ByteSize)) -- May return Nothing when the remote is unchanged since last time.
{ listImportableContents :: a (Maybe (ImportableContents (ContentIdentifier, ByteSize)))
-- Generates a Key (of any type) for the file stored on the -- Generates a Key (of any type) for the file stored on the
-- remote at the ImportLocation. Does not download the file -- remote at the ImportLocation. Does not download the file
-- from the remote. -- from the remote.

View file

@ -62,3 +62,7 @@ So either keep the borg special remote as untrusted, and use such borg
commands to delete old archives as needed, or avoid using `borg delete` commands to delete old archives as needed, or avoid using `borg delete`
and `borg prune`, and then the remote can safely be made semitrusted or and `borg prune`, and then the remote can safely be made semitrusted or
trusted. trusted.
Also, if you do choose to delete old archives, make sure to never reuse
that archive name for a new archive. git-annex may think it's the same
archive it saw before, and not notice the change.