optimisation for borg

Skip needing to list importable contents when unchanged since last time.
This commit is contained in:
Joey Hess 2020-12-22 14:35:02 -04:00
parent e1ac42be77
commit 4f9969d0a1
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
9 changed files with 32 additions and 21 deletions

View file

@ -660,12 +660,15 @@ makeImportMatcher r = load preferredContentKeylessTokens >>= \case
- would delete the files.
-
- Throws exception if unable to contact the remote.
- Returns Nothing when there is no change since last time.
-}
getImportableContents :: Remote -> ImportTreeConfig -> CheckGitIgnore -> FileMatcher Annex -> Annex (ImportableContents (ContentIdentifier, ByteSize))
getImportableContents :: Remote -> ImportTreeConfig -> CheckGitIgnore -> FileMatcher Annex -> Annex (Maybe (ImportableContents (ContentIdentifier, ByteSize)))
getImportableContents r importtreeconfig ci matcher = do
importable <- Remote.listImportableContents (Remote.importActions r)
dbhandle <- Export.openDb (Remote.uuid r)
filterunwanted dbhandle importable
Remote.listImportableContents (Remote.importActions r) >>= \case
Just importable -> do
dbhandle <- Export.openDb (Remote.uuid r)
Just <$> filterunwanted dbhandle importable
Nothing -> return Nothing
where
filterunwanted dbhandle ic = ImportableContents
<$> filterM (wanted dbhandle) (importableContents ic)

View file

@ -325,13 +325,13 @@ seekRemote remote branch msubdir importcontent ci = do
listContents :: Remote -> ImportTreeConfig -> CheckGitIgnore -> TVar (Maybe (ImportableContents (ContentIdentifier, Remote.ByteSize))) -> CommandStart
listContents remote importtreeconfig ci tvar = starting "list" ai si $
listContents' remote importtreeconfig ci $ \importable -> do
liftIO $ atomically $ writeTVar tvar (Just importable)
liftIO $ atomically $ writeTVar tvar importable
next $ return True
where
ai = ActionItemOther (Just (Remote.name remote))
si = SeekInput []
listContents' :: Remote -> ImportTreeConfig -> CheckGitIgnore -> (ImportableContents (ContentIdentifier, Remote.ByteSize) -> Annex a) -> Annex a
listContents' :: Remote -> ImportTreeConfig -> CheckGitIgnore -> (Maybe (ImportableContents (ContentIdentifier, Remote.ByteSize)) -> Annex a) -> Annex a
listContents' remote importtreeconfig ci a =
makeImportMatcher remote >>= \case
Right matcher -> tryNonAsync (getImportableContents remote importtreeconfig ci matcher) >>= \case

View file

@ -495,13 +495,14 @@ importThirdPartyPopulated remote =
void $ includeCommandAction $ starting "list" ai si $
Command.Import.listContents' remote ImportTree (CheckGitIgnore False) go
where
go importable = importKeys remote ImportTree False True importable >>= \case
go (Just importable) = importKeys remote ImportTree False True importable >>= \case
Just importablekeys -> do
(_imported, updatestate) <- recordImportTree remote ImportTree importablekeys
next $ do
updatestate
return True
Nothing -> next $ return False
go Nothing = next $ return True -- unchanged from before
ai = ActionItemOther (Just (Remote.name remote))
si = SeekInput []

View file

@ -286,9 +286,9 @@ renameExportM serial adir _k old new = do
, File newloc
]
listImportableContentsM :: AndroidSerial -> AndroidPath -> Annex (ImportableContents (ContentIdentifier, ByteSize))
listImportableContentsM :: AndroidSerial -> AndroidPath -> Annex (Maybe (ImportableContents (ContentIdentifier, ByteSize)))
listImportableContentsM serial adir = adbfind >>= \case
Just ls -> return $ ImportableContents (mapMaybe mk ls) []
Just ls -> return $ Just $ ImportableContents (mapMaybe mk ls) []
Nothing -> giveup "adb find failed"
where
adbfind = adbShell serial

View file

@ -26,6 +26,7 @@ import Utility.Metered
import qualified Remote.Helper.ThirdPartyPopulated as ThirdPartyPopulated
import Logs.Export
import Data.Either
import Text.Read
import Control.Exception (evaluate)
import Control.DeepSeq
@ -122,8 +123,6 @@ borgSetup _ mu _ c _gc = do
-- persistant state, so it can vary between hosts.
gitConfigSpecialRemote u c [("borgrepo", borgrepo)]
-- TODO: untrusted by default, but allow overriding that
return (c, u)
borgLocal :: BorgRepo -> Bool
@ -132,18 +131,20 @@ borgLocal = notElem ':'
-- XXX the tree generated by using this does not seem to get grafted into
-- the git-annex branch, so would be subject to being lost to GC.
-- Is this a general problem affecting importtree too?
listImportableContentsM :: UUID -> BorgRepo -> Annex (ImportableContents (ContentIdentifier, ByteSize))
listImportableContentsM :: UUID -> BorgRepo -> Annex (Maybe (ImportableContents (ContentIdentifier, ByteSize)))
listImportableContentsM u borgrepo = prompt $ do
imported <- getImported u
ls <- withborglist borgrepo "{barchive}{NUL}" $ \as ->
forM as $ \archivename ->
case M.lookup archivename imported of
Just getfast -> getfast
Nothing ->
Just getfast -> return $ Left getfast
Nothing -> Right <$>
let archive = borgrepo ++ "::" ++ decodeBS' archivename
in withborglist archive "{size}{NUL}{path}{NUL}" $
liftIO . evaluate . force . parsefilelist archivename
return $ mkimportablecontents ls
if all isLeft ls
then return Nothing -- unchanged since last time, avoid work
else Just . mkimportablecontents <$> mapM (either id pure) ls
where
withborglist what format a = do
let p = (proc "borg" ["list", what, "--format", format])

View file

@ -337,11 +337,11 @@ removeExportLocation topdir loc =
mkExportLocation loc'
in go (upFrom loc') =<< tryIO (removeDirectory p)
listImportableContentsM :: RawFilePath -> Annex (ImportableContents (ContentIdentifier, ByteSize))
listImportableContentsM :: RawFilePath -> Annex (Maybe (ImportableContents (ContentIdentifier, ByteSize)))
listImportableContentsM dir = liftIO $ do
l <- dirContentsRecursive (fromRawFilePath dir)
l' <- mapM (go . toRawFilePath) l
return $ ImportableContents (catMaybes l') []
return $ Just $ ImportableContents (catMaybes l') []
where
go f = do
st <- R.getFileStatus f

View file

@ -550,13 +550,14 @@ renameExportS3 hv r rs info k src dest = Just <$> go
srcobject = T.pack $ bucketExportLocation info src
dstobject = T.pack $ bucketExportLocation info dest
listImportableContentsS3 :: S3HandleVar -> Remote -> S3Info -> Annex (ImportableContents (ContentIdentifier, ByteSize))
listImportableContentsS3 :: S3HandleVar -> Remote -> S3Info -> Annex (Maybe (ImportableContents (ContentIdentifier, ByteSize)))
listImportableContentsS3 hv r info =
withS3Handle hv $ \case
Nothing -> giveup $ needS3Creds (uuid r)
Just h -> liftIO $ runResourceT $
extractFromResourceT =<< startlist h
Just h -> Just <$> go h
where
go h = liftIO $ runResourceT $ extractFromResourceT =<< startlist h
startlist h
| versioning info = do
rsp <- sendS3Handle h $

View file

@ -283,7 +283,8 @@ data ImportActions a = ImportActions
-- remote.
--
-- Throws exception on failure to access the remote.
{ listImportableContents :: a (ImportableContents (ContentIdentifier, ByteSize))
-- May return Nothing when the remote is unchanged since last time.
{ listImportableContents :: a (Maybe (ImportableContents (ContentIdentifier, ByteSize)))
-- Generates a Key (of any type) for the file stored on the
-- remote at the ImportLocation. Does not download the file
-- from the remote.

View file

@ -62,3 +62,7 @@ So either keep the borg special remote as untrusted, and use such borg
commands to delete old archives as needed, or avoid using `borg delete`
and `borg prune`, and then the remote can safely be made semitrusted or
trusted.
Also, if you do choose to delete old archives, make sure to never reuse
that archive name for a new archive. git-annex may think it's the same
archive it saw before, and not notice the change.