Merge branch 'borgchunks'
This commit is contained in:
commit
022bb6174c
14 changed files with 306 additions and 92 deletions
224
Annex/Import.hs
224
Annex/Import.hs
|
@ -1,6 +1,6 @@
|
|||
{- git-annex import from remotes
|
||||
-
|
||||
- Copyright 2019-2020 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2019-2021 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
@ -98,7 +98,7 @@ buildImportCommit
|
|||
:: Remote
|
||||
-> ImportTreeConfig
|
||||
-> ImportCommitConfig
|
||||
-> ImportableContents (Either Sha Key)
|
||||
-> ImportableContentsChunkable Annex (Either Sha Key)
|
||||
-> Annex (Maybe Ref)
|
||||
buildImportCommit remote importtreeconfig importcommitconfig importable =
|
||||
case importCommitTracking importcommitconfig of
|
||||
|
@ -123,7 +123,7 @@ buildImportCommit remote importtreeconfig importcommitconfig importable =
|
|||
recordImportTree
|
||||
:: Remote
|
||||
-> ImportTreeConfig
|
||||
-> ImportableContents (Either Sha Key)
|
||||
-> ImportableContentsChunkable Annex (Either Sha Key)
|
||||
-> Annex (History Sha, Annex ())
|
||||
recordImportTree remote importtreeconfig importable = do
|
||||
imported@(History finaltree _) <- buildImportTrees basetree subdir importable
|
||||
|
@ -264,25 +264,75 @@ buildImportCommit' remote importcommitconfig mtrackingcommit imported@(History t
|
|||
buildImportTrees
|
||||
:: Ref
|
||||
-> Maybe TopFilePath
|
||||
-> ImportableContents (Either Sha Key)
|
||||
-> ImportableContentsChunkable Annex (Either Sha Key)
|
||||
-> Annex (History Sha)
|
||||
buildImportTrees basetree msubdir importable = History
|
||||
<$> (buildtree (importableContents importable) =<< Annex.gitRepo)
|
||||
<*> buildhistory
|
||||
buildImportTrees basetree msubdir (ImportableContentsComplete importable) = do
|
||||
repo <- Annex.gitRepo
|
||||
withMkTreeHandle repo $ buildImportTrees' basetree msubdir importable
|
||||
buildImportTrees basetree msubdir importable@(ImportableContentsChunked {}) = do
|
||||
repo <- Annex.gitRepo
|
||||
withMkTreeHandle repo $ \hdl ->
|
||||
History
|
||||
<$> go hdl
|
||||
<*> buildImportTreesHistory basetree msubdir
|
||||
(importableHistoryComplete importable) hdl
|
||||
where
|
||||
go hdl = do
|
||||
tree <- gochunks [] (importableContentsChunk importable) hdl
|
||||
importtree <- liftIO $ recordTree' hdl tree
|
||||
graftImportTree basetree msubdir importtree hdl
|
||||
|
||||
gochunks l c hdl = do
|
||||
let subdir = importChunkSubDir $ importableContentsSubDir c
|
||||
-- Full directory prefix where the sub tree is located.
|
||||
let fullprefix = asTopFilePath $ case msubdir of
|
||||
Nothing -> subdir
|
||||
Just d -> getTopFilePath d Posix.</> subdir
|
||||
Tree ts <- convertImportTree (Just fullprefix) $
|
||||
map (\(p, i) -> (mkImportLocation p, i))
|
||||
(importableContentsSubTree c)
|
||||
-- Record this subtree before getting next chunk, this
|
||||
-- avoids buffering all the chunks into memory.
|
||||
tc <- liftIO $ recordSubTree hdl $
|
||||
NewSubTree (asTopFilePath subdir) ts
|
||||
importableContentsNextChunk c >>= \case
|
||||
Nothing -> return (Tree (tc:l))
|
||||
Just c' -> gochunks (tc:l) c' hdl
|
||||
|
||||
buildImportTrees'
|
||||
:: Ref
|
||||
-> Maybe TopFilePath
|
||||
-> ImportableContents (Either Sha Key)
|
||||
-> MkTreeHandle
|
||||
-> Annex (History Sha)
|
||||
buildImportTrees' basetree msubdir importable hdl = History
|
||||
<$> buildImportTree basetree msubdir (importableContents importable) hdl
|
||||
<*> buildImportTreesHistory basetree msubdir (importableHistory importable) hdl
|
||||
|
||||
buildImportTree
|
||||
:: Ref
|
||||
-> Maybe TopFilePath
|
||||
-> [(ImportLocation, Either Sha Key)]
|
||||
-> MkTreeHandle
|
||||
-> Annex Sha
|
||||
buildImportTree basetree msubdir ls hdl = do
|
||||
importtree <- liftIO . recordTree' hdl =<< convertImportTree msubdir ls
|
||||
graftImportTree basetree msubdir importtree hdl
|
||||
|
||||
graftImportTree
|
||||
:: Ref
|
||||
-> Maybe TopFilePath
|
||||
-> Sha
|
||||
-> MkTreeHandle
|
||||
-> Annex Sha
|
||||
graftImportTree basetree msubdir tree hdl = case msubdir of
|
||||
Nothing -> return tree
|
||||
Just subdir -> inRepo $ \repo ->
|
||||
graftTree' tree subdir basetree repo hdl
|
||||
|
||||
convertImportTree :: Maybe TopFilePath -> [(ImportLocation, Either Sha Key)] -> Annex Tree
|
||||
convertImportTree msubdir ls = treeItemsToTree <$> mapM mktreeitem ls
|
||||
where
|
||||
buildhistory = S.fromList
|
||||
<$> mapM (buildImportTrees basetree msubdir)
|
||||
(importableHistory importable)
|
||||
|
||||
buildtree ls repo = withMkTreeHandle repo $ \hdl -> do
|
||||
importtree <- liftIO . recordTree' hdl
|
||||
. treeItemsToTree
|
||||
=<< mapM mktreeitem ls
|
||||
case msubdir of
|
||||
Nothing -> return importtree
|
||||
Just subdir -> liftIO $
|
||||
graftTree' importtree subdir basetree repo hdl
|
||||
|
||||
mktreeitem (loc, v) = case v of
|
||||
Right k -> do
|
||||
relf <- fromRepo $ fromTopFilePath topf
|
||||
|
@ -297,6 +347,15 @@ buildImportTrees basetree msubdir importable = History
|
|||
topf = asTopFilePath $
|
||||
maybe lf (\sd -> getTopFilePath sd P.</> lf) msubdir
|
||||
|
||||
buildImportTreesHistory
|
||||
:: Ref
|
||||
-> Maybe TopFilePath
|
||||
-> [ImportableContents (Either Sha Key)]
|
||||
-> MkTreeHandle
|
||||
-> Annex (S.Set (History Sha))
|
||||
buildImportTreesHistory basetree msubdir history hdl = S.fromList
|
||||
<$> mapM (\ic -> buildImportTrees' basetree msubdir ic hdl) history
|
||||
|
||||
canImportKeys :: Remote -> Bool -> Bool
|
||||
canImportKeys remote importcontent =
|
||||
importcontent || isJust (Remote.importKey ia)
|
||||
|
@ -324,8 +383,8 @@ importKeys
|
|||
-> ImportTreeConfig
|
||||
-> Bool
|
||||
-> Bool
|
||||
-> ImportableContents (ContentIdentifier, ByteSize)
|
||||
-> Annex (Maybe (ImportableContents (Either Sha Key)))
|
||||
-> ImportableContentsChunkable Annex (ContentIdentifier, ByteSize)
|
||||
-> Annex (Maybe (ImportableContentsChunkable Annex (Either Sha Key)))
|
||||
importKeys remote importtreeconfig importcontent thirdpartypopulated importablecontents = do
|
||||
unless (canImportKeys remote importcontent) $
|
||||
giveup "This remote does not support importing without downloading content."
|
||||
|
@ -339,40 +398,82 @@ importKeys remote importtreeconfig importcontent thirdpartypopulated importablec
|
|||
-- When concurrency is enabled, this set is needed to
|
||||
-- avoid two threads both importing the same content identifier.
|
||||
importing <- liftIO $ newTVarIO S.empty
|
||||
withExclusiveLock gitAnnexContentIdentifierLock $
|
||||
bracket CIDDb.openDb CIDDb.closeDb $ \db -> do
|
||||
CIDDb.needsUpdateFromLog db
|
||||
>>= maybe noop (CIDDb.updateFromLog db)
|
||||
(run (go False cidmap importing importablecontents db))
|
||||
withciddb $ \db -> do
|
||||
CIDDb.needsUpdateFromLog db
|
||||
>>= maybe noop (CIDDb.updateFromLog db)
|
||||
(prepclock (run cidmap importing db))
|
||||
where
|
||||
-- When not importing content, reuse the same vector
|
||||
-- clock for all state that's recorded. This can save
|
||||
-- a little bit of disk space. Individual file downloads
|
||||
-- while downloading take too long for this optimisation
|
||||
-- to be safe to do.
|
||||
run a
|
||||
prepclock a
|
||||
| importcontent = a
|
||||
| otherwise = reuseVectorClockWhile a
|
||||
|
||||
go oldversion cidmap importing (ImportableContents l h) db = do
|
||||
withciddb = withExclusiveLock gitAnnexContentIdentifierLock .
|
||||
bracket CIDDb.openDb CIDDb.closeDb
|
||||
|
||||
run cidmap importing db = do
|
||||
largematcher <- largeFilesMatcher
|
||||
case importablecontents of
|
||||
ImportableContentsComplete ic ->
|
||||
go False largematcher cidmap importing db ic >>= return . \case
|
||||
Nothing -> Nothing
|
||||
Just v -> Just $ ImportableContentsComplete v
|
||||
ImportableContentsChunked {} -> do
|
||||
c <- gochunked db (importableContentsChunk importablecontents)
|
||||
gohistory largematcher cidmap importing db (importableHistoryComplete importablecontents) >>= return . \case
|
||||
Nothing -> Nothing
|
||||
Just h -> Just $ ImportableContentsChunked
|
||||
{ importableContentsChunk = c
|
||||
, importableHistoryComplete = h
|
||||
}
|
||||
|
||||
go oldversion largematcher cidmap importing db (ImportableContents l h) = do
|
||||
jobs <- forM l $ \i ->
|
||||
if thirdpartypopulated
|
||||
then thirdpartypopulatedimport cidmap db i
|
||||
then Left <$> thirdpartypopulatedimport db i
|
||||
else startimport cidmap importing db i oldversion largematcher
|
||||
l' <- liftIO $ forM jobs $
|
||||
either pure (atomically . takeTMVar)
|
||||
if any isNothing l'
|
||||
then return Nothing
|
||||
else do
|
||||
h' <- mapM (\ic -> go True cidmap importing ic db) h
|
||||
if any isNothing h'
|
||||
then return Nothing
|
||||
else return $ Just $
|
||||
ImportableContents
|
||||
(catMaybes l')
|
||||
(catMaybes h')
|
||||
else gohistory largematcher cidmap importing db h >>= return . \case
|
||||
Nothing -> Nothing
|
||||
Just h' -> Just $ ImportableContents (catMaybes l') h'
|
||||
|
||||
gohistory largematcher cidmap importing db h = do
|
||||
h' <- mapM (go True largematcher cidmap importing db) h
|
||||
if any isNothing h'
|
||||
then return Nothing
|
||||
else return $ Just $ catMaybes h'
|
||||
|
||||
gochunked db c
|
||||
-- Downloading cannot be done when chunked, since only
|
||||
-- the first chunk is processed before returning.
|
||||
| importcontent = error "importKeys does not support downloading chunked import"
|
||||
-- Chunked import is currently only used by thirdpartypopulated
|
||||
-- remotes.
|
||||
| not thirdpartypopulated = error "importKeys does not support chunked import when not thirdpartypopulated"
|
||||
| otherwise = do
|
||||
l <- forM (importableContentsSubTree c) $ \(loc, i) -> do
|
||||
let loc' = importableContentsChunkFullLocation (importableContentsSubDir c) loc
|
||||
thirdpartypopulatedimport db (loc', i) >>= return . \case
|
||||
Just (_loc, k) -> Just (loc, k)
|
||||
Nothing -> Nothing
|
||||
return $ ImportableContentsChunk
|
||||
{ importableContentsSubDir = importableContentsSubDir c
|
||||
, importableContentsSubTree = catMaybes l
|
||||
, importableContentsNextChunk =
|
||||
importableContentsNextChunk c >>= \case
|
||||
Nothing -> return Nothing
|
||||
Just c' -> withciddb $ \db' ->
|
||||
prepclock $
|
||||
Just <$> gochunked db' c'
|
||||
}
|
||||
|
||||
waitstart importing cid = liftIO $ atomically $ do
|
||||
s <- readTVar importing
|
||||
if S.member cid s
|
||||
|
@ -418,19 +519,19 @@ importKeys remote importtreeconfig importcontent thirdpartypopulated importablec
|
|||
importaction
|
||||
return (Right job)
|
||||
|
||||
thirdpartypopulatedimport cidmap db (loc, (cid, sz)) =
|
||||
thirdpartypopulatedimport db (loc, (cid, sz)) =
|
||||
case Remote.importKey ia of
|
||||
Nothing -> return $ Left Nothing
|
||||
Nothing -> return Nothing
|
||||
Just importkey ->
|
||||
tryNonAsync (importkey loc cid sz nullMeterUpdate) >>= \case
|
||||
Right (Just k) -> do
|
||||
recordcidkey cidmap db cid k
|
||||
logChange k (Remote.uuid remote) InfoPresent
|
||||
return $ Left $ Just (loc, Right k)
|
||||
Right Nothing -> return $ Left Nothing
|
||||
recordcidkey' db cid k
|
||||
logChange k (Remote.uuid remote) InfoPresent
|
||||
return $ Just (loc, Right k)
|
||||
Right Nothing -> return Nothing
|
||||
Left e -> do
|
||||
warning (show e)
|
||||
return $ Left Nothing
|
||||
return Nothing
|
||||
|
||||
importordownload cidmap db (loc, (cid, sz)) largematcher= do
|
||||
f <- locworktreefile loc
|
||||
|
@ -603,6 +704,8 @@ importKeys remote importtreeconfig importcontent thirdpartypopulated importablec
|
|||
recordcidkey cidmap db cid k = do
|
||||
liftIO $ atomically $ modifyTVar' cidmap $
|
||||
M.insert cid k
|
||||
recordcidkey' db cid k
|
||||
recordcidkey' db cid k = do
|
||||
liftIO $ CIDDb.recordContentIdentifier db rs cid k
|
||||
CIDLog.recordContentIdentifier rs cid k
|
||||
|
||||
|
@ -675,18 +778,38 @@ makeImportMatcher r = load preferredContentKeylessTokens >>= \case
|
|||
- Throws exception if unable to contact the remote.
|
||||
- Returns Nothing when there is no change since last time.
|
||||
-}
|
||||
getImportableContents :: Remote -> ImportTreeConfig -> CheckGitIgnore -> FileMatcher Annex -> Annex (Maybe (ImportableContents (ContentIdentifier, ByteSize)))
|
||||
getImportableContents :: Remote -> ImportTreeConfig -> CheckGitIgnore -> FileMatcher Annex -> Annex (Maybe (ImportableContentsChunkable Annex (ContentIdentifier, ByteSize)))
|
||||
getImportableContents r importtreeconfig ci matcher = do
|
||||
Remote.listImportableContents (Remote.importActions r) >>= \case
|
||||
Just importable -> do
|
||||
dbhandle <- Export.openDb (Remote.uuid r)
|
||||
Just <$> filterunwanted dbhandle importable
|
||||
Just (ImportableContentsComplete ic) -> do
|
||||
dbhandle <- opendbhandle
|
||||
Just . ImportableContentsComplete
|
||||
<$> filterunwanted dbhandle ic
|
||||
Just (c@(ImportableContentsChunked {})) -> do
|
||||
dbhandle <- opendbhandle
|
||||
Just <$> filterunwantedchunked dbhandle c
|
||||
Nothing -> return Nothing
|
||||
where
|
||||
filterunwanted dbhandle ic = ImportableContents
|
||||
<$> filterM (wanted dbhandle) (importableContents ic)
|
||||
<*> mapM (filterunwanted dbhandle) (importableHistory ic)
|
||||
|
||||
filterunwantedchunked dbhandle c = ImportableContentsChunked
|
||||
<$> filterunwantedchunk dbhandle (importableContentsChunk c)
|
||||
<*> mapM (filterunwanted dbhandle) (importableHistoryComplete c)
|
||||
|
||||
filterunwantedchunk dbhandle c = ImportableContentsChunk
|
||||
<$> pure (importableContentsSubDir c)
|
||||
<*> filterM (wantedunder dbhandle (importableContentsSubDir c))
|
||||
(importableContentsSubTree c)
|
||||
<*> pure (
|
||||
importableContentsNextChunk c >>= \case
|
||||
Nothing -> return Nothing
|
||||
Just c' -> Just <$> filterunwantedchunk dbhandle c'
|
||||
)
|
||||
|
||||
opendbhandle = Export.openDb (Remote.uuid r)
|
||||
|
||||
wanted dbhandle (loc, (_cid, sz))
|
||||
| ingitdir = pure False
|
||||
| otherwise =
|
||||
|
@ -697,6 +820,9 @@ getImportableContents r importtreeconfig ci matcher = do
|
|||
matches = matchesImportLocation matcher loc sz
|
||||
isknown = isKnownImportLocation dbhandle loc
|
||||
notignored = notIgnoredImportLocation importtreeconfig ci loc
|
||||
|
||||
wantedunder dbhandle root (loc, v) =
|
||||
wanted dbhandle (importableContentsChunkFullLocation root loc, v)
|
||||
|
||||
isKnownImportLocation :: Export.ExportHandle -> ImportLocation -> Annex Bool
|
||||
isKnownImportLocation dbhandle loc = liftIO $
|
||||
|
|
|
@ -6,6 +6,8 @@ git-annex (8.20210904) UNRELEASED; urgency=medium
|
|||
* Bug fix: Git configs such as annex.verify were incorrectly overriding
|
||||
per-remote git configs such as remote.name.annex-verify.
|
||||
(Reversion in version 4.20130323)
|
||||
* borg: Significantly improved memory use when a borg repository
|
||||
contains many archives.
|
||||
* borg: Avoid trying to extract xattrs, ACLS, and bsdflags when
|
||||
retrieving from a borg repository.
|
||||
* Sped up git-annex smudge --clean by 25%.
|
||||
|
@ -15,7 +17,6 @@ git-annex (8.20210904) UNRELEASED; urgency=medium
|
|||
incrementally verified, when used on NTFS and perhaps other filesystems.
|
||||
* reinject: Fix crash when reinjecting a file from outside the repository.
|
||||
(Reversion in version 8.20210621)
|
||||
* borg: Some improvements to memory use when importing a lot of archives.
|
||||
* Avoid cursor jitter when updating progress display.
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Fri, 03 Sep 2021 12:02:55 -0400
|
||||
|
|
|
@ -346,7 +346,7 @@ seekRemote remote branch msubdir importcontent ci = do
|
|||
|
||||
fromtrackingbranch a = inRepo $ a (fromRemoteTrackingBranch tb)
|
||||
|
||||
listContents :: Remote -> ImportTreeConfig -> CheckGitIgnore -> TVar (Maybe (ImportableContents (ContentIdentifier, Remote.ByteSize))) -> CommandStart
|
||||
listContents :: Remote -> ImportTreeConfig -> CheckGitIgnore -> TVar (Maybe (ImportableContentsChunkable Annex (ContentIdentifier, Remote.ByteSize))) -> CommandStart
|
||||
listContents remote importtreeconfig ci tvar = starting "list" ai si $
|
||||
listContents' remote importtreeconfig ci $ \importable -> do
|
||||
liftIO $ atomically $ writeTVar tvar importable
|
||||
|
@ -355,7 +355,7 @@ listContents remote importtreeconfig ci tvar = starting "list" ai si $
|
|||
ai = ActionItemOther (Just (Remote.name remote))
|
||||
si = SeekInput []
|
||||
|
||||
listContents' :: Remote -> ImportTreeConfig -> CheckGitIgnore -> (Maybe (ImportableContents (ContentIdentifier, Remote.ByteSize)) -> Annex a) -> Annex a
|
||||
listContents' :: Remote -> ImportTreeConfig -> CheckGitIgnore -> (Maybe (ImportableContentsChunkable Annex (ContentIdentifier, Remote.ByteSize)) -> Annex a) -> Annex a
|
||||
listContents' remote importtreeconfig ci a =
|
||||
makeImportMatcher remote >>= \case
|
||||
Right matcher -> tryNonAsync (getImportableContents remote importtreeconfig ci matcher) >>= \case
|
||||
|
@ -368,7 +368,7 @@ listContents' remote importtreeconfig ci a =
|
|||
, err
|
||||
]
|
||||
|
||||
commitRemote :: Remote -> Branch -> RemoteTrackingBranch -> Maybe Sha -> ImportTreeConfig -> ImportCommitConfig -> ImportableContents (Either Sha Key) -> CommandStart
|
||||
commitRemote :: Remote -> Branch -> RemoteTrackingBranch -> Maybe Sha -> ImportTreeConfig -> ImportCommitConfig -> ImportableContentsChunkable Annex (Either Sha Key) -> CommandStart
|
||||
commitRemote remote branch tb trackingcommit importtreeconfig importcommitconfig importable =
|
||||
starting "update" ai si $ do
|
||||
importcommit <- buildImportCommit remote importtreeconfig importcommitconfig importable
|
||||
|
|
|
@ -13,6 +13,7 @@ module Git.Tree (
|
|||
getTree,
|
||||
recordTree,
|
||||
recordTree',
|
||||
recordSubTree,
|
||||
TreeItem(..),
|
||||
treeItemsToTree,
|
||||
treeItemToLsTreeItem,
|
||||
|
@ -21,6 +22,7 @@ module Git.Tree (
|
|||
graftTree,
|
||||
graftTree',
|
||||
withMkTreeHandle,
|
||||
MkTreeHandle,
|
||||
treeMode,
|
||||
) where
|
||||
|
||||
|
|
|
@ -288,9 +288,10 @@ renameExportM serial adir _k old new = do
|
|||
, File newloc
|
||||
]
|
||||
|
||||
listImportableContentsM :: AndroidSerial -> AndroidPath -> Annex (Maybe (ImportableContents (ContentIdentifier, ByteSize)))
|
||||
listImportableContentsM :: AndroidSerial -> AndroidPath -> Annex (Maybe (ImportableContentsChunkable Annex (ContentIdentifier, ByteSize)))
|
||||
listImportableContentsM serial adir = adbfind >>= \case
|
||||
Just ls -> return $ Just $ ImportableContents (mapMaybe mk ls) []
|
||||
Just ls -> return $ Just $ ImportableContentsComplete $
|
||||
ImportableContents (mapMaybe mk ls) []
|
||||
Nothing -> giveup "adb find failed"
|
||||
where
|
||||
adbfind = adbShell serial
|
||||
|
|
|
@ -162,20 +162,21 @@ borgRepoLocalPath r@(BorgRepo p)
|
|||
| borgLocal r && not (null p) = Just p
|
||||
| otherwise = Nothing
|
||||
|
||||
listImportableContentsM :: UUID -> BorgRepo -> ParsedRemoteConfig -> Annex (Maybe (ImportableContents (ContentIdentifier, ByteSize)))
|
||||
listImportableContentsM :: UUID -> BorgRepo -> ParsedRemoteConfig -> Annex (Maybe (ImportableContentsChunkable Annex (ContentIdentifier, ByteSize)))
|
||||
listImportableContentsM u borgrepo c = prompt $ do
|
||||
imported <- getImported u
|
||||
ls <- withborglist (locBorgRepo borgrepo) Nothing formatarchivelist $ \as ->
|
||||
forM (filter (not . S.null) as) $ \archivename ->
|
||||
case M.lookup archivename imported of
|
||||
Just getfast -> return $ Left (archivename, getfast)
|
||||
Nothing -> Right <$>
|
||||
return $ case M.lookup archivename imported of
|
||||
Just getlist -> Left (archivename, getlist)
|
||||
Nothing ->
|
||||
let archive = borgArchive borgrepo archivename
|
||||
in withborglist archive subdir formatfilelist $
|
||||
getlist = withborglist archive subdir formatfilelist $
|
||||
liftIO . evaluate . force . parsefilelist archivename
|
||||
in Right (archivename, getlist)
|
||||
if all isLeft ls && M.null (M.difference imported (M.fromList (lefts ls)))
|
||||
then return Nothing -- unchanged since last time, avoid work
|
||||
else Just . mkimportablecontents <$> mapM (either snd pure) ls
|
||||
else Just <$> mkimportablecontents (map (either id id) ls)
|
||||
where
|
||||
withborglist what addparam format a = do
|
||||
environ <- liftIO getEnvironment
|
||||
|
@ -210,7 +211,7 @@ listImportableContentsM u borgrepo c = prompt $ do
|
|||
parsefilelist archivename (bsz:f:extra:rest) = case readMaybe (fromRawFilePath bsz) of
|
||||
Nothing -> parsefilelist archivename rest
|
||||
Just sz ->
|
||||
let loc = genImportLocation archivename f
|
||||
let loc = genImportLocation f
|
||||
-- borg list reports hard links as 0 byte files,
|
||||
-- with the extra field set to " link to ".
|
||||
-- When the annex object is a hard link to
|
||||
|
@ -234,12 +235,27 @@ listImportableContentsM u borgrepo c = prompt $ do
|
|||
|
||||
-- importableHistory is not used for retrieval, so is not
|
||||
-- populated with old archives. Instead, a tree of archives
|
||||
-- is constructed, by genImportLocation including the archive
|
||||
-- name in the ImportLocation.
|
||||
mkimportablecontents l = ImportableContents
|
||||
{ importableContents = concat l
|
||||
, importableHistory = []
|
||||
}
|
||||
-- is constructed, with a subtree for each archive.
|
||||
mkimportablecontents [] = return $ ImportableContentsComplete $
|
||||
ImportableContents
|
||||
{ importableContents = []
|
||||
, importableHistory = []
|
||||
}
|
||||
mkimportablecontents (l:ls) = ImportableContentsChunked
|
||||
<$> mkimportablecontentschunk l ls
|
||||
<*> pure []
|
||||
|
||||
mkimportablecontentschunk (archivename, getlist) rest = do
|
||||
l <- getlist
|
||||
return $ ImportableContentsChunk
|
||||
{ importableContentsSubDir =
|
||||
genImportChunkSubDir archivename
|
||||
, importableContentsSubTree = l
|
||||
, importableContentsNextChunk = case rest of
|
||||
(getlist':rest') -> Just
|
||||
<$> mkimportablecontentschunk getlist' rest'
|
||||
[] -> return Nothing
|
||||
}
|
||||
|
||||
-- We do not need a ContentIdentifier in order to retrieve a file from
|
||||
-- borg; the ImportLocation contains all that's needed. So, this is left
|
||||
|
@ -247,15 +263,20 @@ listImportableContentsM u borgrepo c = prompt $ do
|
|||
borgContentIdentifier :: ContentIdentifier
|
||||
borgContentIdentifier = ContentIdentifier mempty
|
||||
|
||||
-- Convert a path file a borg archive to a path that can be used as an
|
||||
-- ImportLocation. The archive name gets used as a subdirectory,
|
||||
-- which this path is inside.
|
||||
--
|
||||
-- Borg does not allow / in the name of an archive, so the archive
|
||||
-- name will always be the first directory in the ImportLocation.
|
||||
--
|
||||
-- Paths in a borg archive are always relative, not absolute, so the use of
|
||||
-- </> to combine the archive name with the path will always work.
|
||||
genImportLocation :: BorgArchiveName -> RawFilePath -> ImportLocation
|
||||
genImportLocation archivename p =
|
||||
ThirdPartyPopulated.mkThirdPartyImportLocation $
|
||||
archivename P.</> p
|
||||
-- This scheme also relies on the fact that paths in a borg archive are
|
||||
-- always relative, not absolute.
|
||||
genImportLocation :: RawFilePath -> RawFilePath
|
||||
genImportLocation = fromImportLocation . ThirdPartyPopulated.mkThirdPartyImportLocation
|
||||
|
||||
genImportChunkSubDir :: BorgArchiveName -> ImportChunkSubDir
|
||||
genImportChunkSubDir = ImportChunkSubDir . fromImportLocation . ThirdPartyPopulated.mkThirdPartyImportLocation
|
||||
|
||||
extractImportLocation :: ImportLocation -> (BorgArchiveName, RawFilePath)
|
||||
extractImportLocation loc = go $ P.splitDirectories $
|
||||
|
@ -269,7 +290,7 @@ extractImportLocation loc = go $ P.splitDirectories $
|
|||
-- last imported tree. And the contents of those archives can be retrieved
|
||||
-- by listing the subtree recursively, which will likely be quite a lot
|
||||
-- faster than running borg.
|
||||
getImported :: UUID -> Annex (M.Map BorgArchiveName (Annex [(ImportLocation, (ContentIdentifier, ByteSize))]))
|
||||
getImported :: UUID -> Annex (M.Map BorgArchiveName (Annex [(RawFilePath, (ContentIdentifier, ByteSize))]))
|
||||
getImported u = M.unions <$> (mapM go . exportedTreeishes =<< getExport u)
|
||||
where
|
||||
go t = M.fromList . mapMaybe mk
|
||||
|
@ -278,21 +299,19 @@ getImported u = M.unions <$> (mapM go . exportedTreeishes =<< getExport u)
|
|||
mk ti
|
||||
| toTreeItemType (LsTree.mode ti) == Just TreeSubtree = Just
|
||||
( getTopFilePath (LsTree.file ti)
|
||||
, getcontents
|
||||
(getTopFilePath (LsTree.file ti))
|
||||
(LsTree.sha ti)
|
||||
, getcontents (LsTree.sha ti)
|
||||
)
|
||||
| otherwise = Nothing
|
||||
|
||||
getcontents archivename t = mapMaybe (mkcontents archivename)
|
||||
getcontents t = mapMaybe mkcontents
|
||||
<$> inRepo (LsTree.lsTreeStrict LsTree.LsTreeRecursive (LsTree.LsTreeLong False) t)
|
||||
|
||||
mkcontents archivename ti = do
|
||||
mkcontents ti = do
|
||||
let f = ThirdPartyPopulated.fromThirdPartyImportLocation $
|
||||
mkImportLocation $ getTopFilePath $ LsTree.file ti
|
||||
k <- fileKey (P.takeFileName f)
|
||||
return
|
||||
( genImportLocation archivename f
|
||||
( genImportLocation f
|
||||
,
|
||||
( borgContentIdentifier
|
||||
-- defaulting to 0 size is ok, this size
|
||||
|
|
|
@ -351,11 +351,12 @@ removeExportLocation topdir loc =
|
|||
mkExportLocation loc'
|
||||
in go (upFrom loc') =<< tryIO (removeDirectory p)
|
||||
|
||||
listImportableContentsM :: RawFilePath -> Annex (Maybe (ImportableContents (ContentIdentifier, ByteSize)))
|
||||
listImportableContentsM :: RawFilePath -> Annex (Maybe (ImportableContentsChunkable Annex (ContentIdentifier, ByteSize)))
|
||||
listImportableContentsM dir = liftIO $ do
|
||||
l <- dirContentsRecursive (fromRawFilePath dir)
|
||||
l' <- mapM (go . toRawFilePath) l
|
||||
return $ Just $ ImportableContents (catMaybes l') []
|
||||
return $ Just $ ImportableContentsComplete $
|
||||
ImportableContents (catMaybes l') []
|
||||
where
|
||||
go f = do
|
||||
st <- R.getFileStatus f
|
||||
|
|
|
@ -47,10 +47,10 @@ fromThirdPartyImportLocation =
|
|||
-- find only those ImportLocations that are annex object files.
|
||||
-- All other ImportLocations are ignored.
|
||||
importKey :: ImportLocation -> ContentIdentifier -> ByteSize -> MeterUpdate -> Annex (Maybe Key)
|
||||
importKey loc _cid sz _ = return $ importKey' loc (Just sz)
|
||||
importKey loc _cid sz _ = return $ importKey' (fromImportLocation loc) (Just sz)
|
||||
|
||||
importKey' :: ImportLocation -> Maybe ByteSize -> Maybe Key
|
||||
importKey' loc msz = case fileKey f of
|
||||
importKey' :: RawFilePath -> Maybe ByteSize -> Maybe Key
|
||||
importKey' p msz = case fileKey f of
|
||||
Just k
|
||||
-- Annex objects always are in a subdirectory with the same
|
||||
-- name as the filename. If this is not the case for the file
|
||||
|
@ -82,5 +82,4 @@ importKey' loc msz = case fileKey f of
|
|||
_ -> Just k
|
||||
Nothing -> Nothing
|
||||
where
|
||||
p = fromImportLocation loc
|
||||
f = P.takeFileName p
|
||||
|
|
|
@ -549,13 +549,15 @@ renameExportS3 hv r rs info k src dest = Just <$> go
|
|||
srcobject = T.pack $ bucketExportLocation info src
|
||||
dstobject = T.pack $ bucketExportLocation info dest
|
||||
|
||||
listImportableContentsS3 :: S3HandleVar -> Remote -> S3Info -> Annex (Maybe (ImportableContents (ContentIdentifier, ByteSize)))
|
||||
listImportableContentsS3 :: S3HandleVar -> Remote -> S3Info -> Annex (Maybe (ImportableContentsChunkable Annex (ContentIdentifier, ByteSize)))
|
||||
listImportableContentsS3 hv r info =
|
||||
withS3Handle hv $ \case
|
||||
Nothing -> giveup $ needS3Creds (uuid r)
|
||||
Just h -> Just <$> go h
|
||||
where
|
||||
go h = liftIO $ runResourceT $ extractFromResourceT =<< startlist h
|
||||
go h = do
|
||||
ic <- liftIO $ runResourceT $ extractFromResourceT =<< startlist h
|
||||
return (ImportableContentsComplete ic)
|
||||
|
||||
startlist h
|
||||
| versioning info = do
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{- git-annex import types
|
||||
-
|
||||
- Copyright 2019 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2019-2021 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
@ -13,6 +13,7 @@ import qualified Data.ByteString as S
|
|||
import Data.Char
|
||||
import Control.DeepSeq
|
||||
import GHC.Generics
|
||||
import qualified System.FilePath.Posix.ByteString as Posix
|
||||
|
||||
import Types.Export
|
||||
import Utility.QuickCheck
|
||||
|
@ -69,3 +70,34 @@ data ImportableContents info = ImportableContents
|
|||
deriving (Show, Generic)
|
||||
|
||||
instance NFData info => NFData (ImportableContents info)
|
||||
|
||||
{- ImportableContents, but it can be chunked into subtrees to avoid
|
||||
- all needing to fit in memory at the same time. -}
|
||||
data ImportableContentsChunkable m info
|
||||
= ImportableContentsComplete (ImportableContents info)
|
||||
-- ^ Used when not chunking
|
||||
| ImportableContentsChunked
|
||||
{ importableContentsChunk :: ImportableContentsChunk m info
|
||||
, importableHistoryComplete :: [ImportableContents info]
|
||||
-- ^ Chunking the history is not supported
|
||||
}
|
||||
|
||||
{- A chunk of ImportableContents, which is the entire content of a subtree
|
||||
- of the main tree. Nested subtrees are not allowed. -}
|
||||
data ImportableContentsChunk m info = ImportableContentsChunk
|
||||
{ importableContentsSubDir :: ImportChunkSubDir
|
||||
, importableContentsSubTree :: [(RawFilePath, info)]
|
||||
-- ^ locations are relative to importableContentsSubDir
|
||||
, importableContentsNextChunk :: m (Maybe (ImportableContentsChunk m info))
|
||||
-- ^ Continuation to get the next chunk.
|
||||
-- Returns Nothing when there are no more chunks.
|
||||
}
|
||||
|
||||
newtype ImportChunkSubDir = ImportChunkSubDir { importChunkSubDir :: RawFilePath }
|
||||
|
||||
importableContentsChunkFullLocation
|
||||
:: ImportChunkSubDir
|
||||
-> RawFilePath
|
||||
-> ImportLocation
|
||||
importableContentsChunkFullLocation (ImportChunkSubDir root) loc =
|
||||
mkImportLocation $ Posix.combine root loc
|
||||
|
|
|
@ -309,7 +309,7 @@ data ImportActions a = ImportActions
|
|||
--
|
||||
-- Throws exception on failure to access the remote.
|
||||
-- May return Nothing when the remote is unchanged since last time.
|
||||
{ listImportableContents :: a (Maybe (ImportableContents (ContentIdentifier, ByteSize)))
|
||||
{ listImportableContents :: a (Maybe (ImportableContentsChunkable a (ContentIdentifier, ByteSize)))
|
||||
-- Generates a Key (of any type) for the file stored on the
|
||||
-- remote at the ImportLocation. Does not download the file
|
||||
-- from the remote.
|
||||
|
@ -322,7 +322,7 @@ data ImportActions a = ImportActions
|
|||
-- since the ContentIdentifier was generated.
|
||||
--
|
||||
-- When it returns nothing, the file at the ImportLocation
|
||||
-- not by included in the imported tree.
|
||||
-- will not be included in the imported tree.
|
||||
--
|
||||
-- When the remote is thirdPartyPopulated, this should check if the
|
||||
-- file stored on the remote is the content of an annex object,
|
||||
|
|
|
@ -3,3 +3,5 @@ memory, then got OOM-killed.
|
|||
|
||||
I don't know if this is a memory leak or just trying to load too much, but it seems like this is a thing you should be able to do on
|
||||
a machine with 64G of RAM.
|
||||
|
||||
> [[fixed|done]] --[[Joey]]
|
||||
|
|
|
@ -0,0 +1,9 @@
|
|||
[[!comment format=mdwn
|
||||
username="joey"
|
||||
subject="""comment 12"""
|
||||
date="2021-10-08T17:06:05Z"
|
||||
content="""
|
||||
I've fixed this problem, my test case tops out at 160 mb now, and adding more
|
||||
archives to the borg repo no longer increases memory use. Memory use is now
|
||||
proportional to the number of annexed objects in a borg archive.
|
||||
"""]]
|
20
doc/todo/avoid_storing_contentidentifier_log_for_borg.mdwn
Normal file
20
doc/todo/avoid_storing_contentidentifier_log_for_borg.mdwn
Normal file
|
@ -0,0 +1,20 @@
|
|||
Borg uses an empty ContentIdentifier for everything; it does not need to
|
||||
record anything. But that empty value gets stored in the log for each key
|
||||
that is stored in borg. This unncessarily bloats the size of the git-annex
|
||||
branch, by one content identifier per key stored in borg.
|
||||
|
||||
I think that it also slows down importing many archives from borg,
|
||||
because for each of them it has to record the content identifier,
|
||||
which is always the same, but still results in a db write.
|
||||
|
||||
Omitting storing any ContentIdentifier would break code such as
|
||||
Remote.Helper.ExportImport's retrieveKeyFileFromImport.
|
||||
|
||||
If the borg Remote could indicate with a flag that it does not use
|
||||
ContentIdentifiers, then code like that could pass it a null
|
||||
ContentIdentifier without needing to read it from the db.
|
||||
|
||||
Annex.Import uses getContentIdentifierKeys, but only when it's not
|
||||
thirdpartypopulated. So this change would not break that for borg,
|
||||
but a clean way to handle that would be to make it also return a null
|
||||
ContentIdentifier when the remote has the flag set. --[[Joey]]
|
Loading…
Reference in a new issue