Merge branch 'borgchunks'
This commit is contained in:
commit
022bb6174c
14 changed files with 306 additions and 92 deletions
224
Annex/Import.hs
224
Annex/Import.hs
|
@ -1,6 +1,6 @@
|
||||||
{- git-annex import from remotes
|
{- git-annex import from remotes
|
||||||
-
|
-
|
||||||
- Copyright 2019-2020 Joey Hess <id@joeyh.name>
|
- Copyright 2019-2021 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU AGPL version 3 or higher.
|
- Licensed under the GNU AGPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
@ -98,7 +98,7 @@ buildImportCommit
|
||||||
:: Remote
|
:: Remote
|
||||||
-> ImportTreeConfig
|
-> ImportTreeConfig
|
||||||
-> ImportCommitConfig
|
-> ImportCommitConfig
|
||||||
-> ImportableContents (Either Sha Key)
|
-> ImportableContentsChunkable Annex (Either Sha Key)
|
||||||
-> Annex (Maybe Ref)
|
-> Annex (Maybe Ref)
|
||||||
buildImportCommit remote importtreeconfig importcommitconfig importable =
|
buildImportCommit remote importtreeconfig importcommitconfig importable =
|
||||||
case importCommitTracking importcommitconfig of
|
case importCommitTracking importcommitconfig of
|
||||||
|
@ -123,7 +123,7 @@ buildImportCommit remote importtreeconfig importcommitconfig importable =
|
||||||
recordImportTree
|
recordImportTree
|
||||||
:: Remote
|
:: Remote
|
||||||
-> ImportTreeConfig
|
-> ImportTreeConfig
|
||||||
-> ImportableContents (Either Sha Key)
|
-> ImportableContentsChunkable Annex (Either Sha Key)
|
||||||
-> Annex (History Sha, Annex ())
|
-> Annex (History Sha, Annex ())
|
||||||
recordImportTree remote importtreeconfig importable = do
|
recordImportTree remote importtreeconfig importable = do
|
||||||
imported@(History finaltree _) <- buildImportTrees basetree subdir importable
|
imported@(History finaltree _) <- buildImportTrees basetree subdir importable
|
||||||
|
@ -264,25 +264,75 @@ buildImportCommit' remote importcommitconfig mtrackingcommit imported@(History t
|
||||||
buildImportTrees
|
buildImportTrees
|
||||||
:: Ref
|
:: Ref
|
||||||
-> Maybe TopFilePath
|
-> Maybe TopFilePath
|
||||||
-> ImportableContents (Either Sha Key)
|
-> ImportableContentsChunkable Annex (Either Sha Key)
|
||||||
-> Annex (History Sha)
|
-> Annex (History Sha)
|
||||||
buildImportTrees basetree msubdir importable = History
|
buildImportTrees basetree msubdir (ImportableContentsComplete importable) = do
|
||||||
<$> (buildtree (importableContents importable) =<< Annex.gitRepo)
|
repo <- Annex.gitRepo
|
||||||
<*> buildhistory
|
withMkTreeHandle repo $ buildImportTrees' basetree msubdir importable
|
||||||
|
buildImportTrees basetree msubdir importable@(ImportableContentsChunked {}) = do
|
||||||
|
repo <- Annex.gitRepo
|
||||||
|
withMkTreeHandle repo $ \hdl ->
|
||||||
|
History
|
||||||
|
<$> go hdl
|
||||||
|
<*> buildImportTreesHistory basetree msubdir
|
||||||
|
(importableHistoryComplete importable) hdl
|
||||||
|
where
|
||||||
|
go hdl = do
|
||||||
|
tree <- gochunks [] (importableContentsChunk importable) hdl
|
||||||
|
importtree <- liftIO $ recordTree' hdl tree
|
||||||
|
graftImportTree basetree msubdir importtree hdl
|
||||||
|
|
||||||
|
gochunks l c hdl = do
|
||||||
|
let subdir = importChunkSubDir $ importableContentsSubDir c
|
||||||
|
-- Full directory prefix where the sub tree is located.
|
||||||
|
let fullprefix = asTopFilePath $ case msubdir of
|
||||||
|
Nothing -> subdir
|
||||||
|
Just d -> getTopFilePath d Posix.</> subdir
|
||||||
|
Tree ts <- convertImportTree (Just fullprefix) $
|
||||||
|
map (\(p, i) -> (mkImportLocation p, i))
|
||||||
|
(importableContentsSubTree c)
|
||||||
|
-- Record this subtree before getting next chunk, this
|
||||||
|
-- avoids buffering all the chunks into memory.
|
||||||
|
tc <- liftIO $ recordSubTree hdl $
|
||||||
|
NewSubTree (asTopFilePath subdir) ts
|
||||||
|
importableContentsNextChunk c >>= \case
|
||||||
|
Nothing -> return (Tree (tc:l))
|
||||||
|
Just c' -> gochunks (tc:l) c' hdl
|
||||||
|
|
||||||
|
buildImportTrees'
|
||||||
|
:: Ref
|
||||||
|
-> Maybe TopFilePath
|
||||||
|
-> ImportableContents (Either Sha Key)
|
||||||
|
-> MkTreeHandle
|
||||||
|
-> Annex (History Sha)
|
||||||
|
buildImportTrees' basetree msubdir importable hdl = History
|
||||||
|
<$> buildImportTree basetree msubdir (importableContents importable) hdl
|
||||||
|
<*> buildImportTreesHistory basetree msubdir (importableHistory importable) hdl
|
||||||
|
|
||||||
|
buildImportTree
|
||||||
|
:: Ref
|
||||||
|
-> Maybe TopFilePath
|
||||||
|
-> [(ImportLocation, Either Sha Key)]
|
||||||
|
-> MkTreeHandle
|
||||||
|
-> Annex Sha
|
||||||
|
buildImportTree basetree msubdir ls hdl = do
|
||||||
|
importtree <- liftIO . recordTree' hdl =<< convertImportTree msubdir ls
|
||||||
|
graftImportTree basetree msubdir importtree hdl
|
||||||
|
|
||||||
|
graftImportTree
|
||||||
|
:: Ref
|
||||||
|
-> Maybe TopFilePath
|
||||||
|
-> Sha
|
||||||
|
-> MkTreeHandle
|
||||||
|
-> Annex Sha
|
||||||
|
graftImportTree basetree msubdir tree hdl = case msubdir of
|
||||||
|
Nothing -> return tree
|
||||||
|
Just subdir -> inRepo $ \repo ->
|
||||||
|
graftTree' tree subdir basetree repo hdl
|
||||||
|
|
||||||
|
convertImportTree :: Maybe TopFilePath -> [(ImportLocation, Either Sha Key)] -> Annex Tree
|
||||||
|
convertImportTree msubdir ls = treeItemsToTree <$> mapM mktreeitem ls
|
||||||
where
|
where
|
||||||
buildhistory = S.fromList
|
|
||||||
<$> mapM (buildImportTrees basetree msubdir)
|
|
||||||
(importableHistory importable)
|
|
||||||
|
|
||||||
buildtree ls repo = withMkTreeHandle repo $ \hdl -> do
|
|
||||||
importtree <- liftIO . recordTree' hdl
|
|
||||||
. treeItemsToTree
|
|
||||||
=<< mapM mktreeitem ls
|
|
||||||
case msubdir of
|
|
||||||
Nothing -> return importtree
|
|
||||||
Just subdir -> liftIO $
|
|
||||||
graftTree' importtree subdir basetree repo hdl
|
|
||||||
|
|
||||||
mktreeitem (loc, v) = case v of
|
mktreeitem (loc, v) = case v of
|
||||||
Right k -> do
|
Right k -> do
|
||||||
relf <- fromRepo $ fromTopFilePath topf
|
relf <- fromRepo $ fromTopFilePath topf
|
||||||
|
@ -297,6 +347,15 @@ buildImportTrees basetree msubdir importable = History
|
||||||
topf = asTopFilePath $
|
topf = asTopFilePath $
|
||||||
maybe lf (\sd -> getTopFilePath sd P.</> lf) msubdir
|
maybe lf (\sd -> getTopFilePath sd P.</> lf) msubdir
|
||||||
|
|
||||||
|
buildImportTreesHistory
|
||||||
|
:: Ref
|
||||||
|
-> Maybe TopFilePath
|
||||||
|
-> [ImportableContents (Either Sha Key)]
|
||||||
|
-> MkTreeHandle
|
||||||
|
-> Annex (S.Set (History Sha))
|
||||||
|
buildImportTreesHistory basetree msubdir history hdl = S.fromList
|
||||||
|
<$> mapM (\ic -> buildImportTrees' basetree msubdir ic hdl) history
|
||||||
|
|
||||||
canImportKeys :: Remote -> Bool -> Bool
|
canImportKeys :: Remote -> Bool -> Bool
|
||||||
canImportKeys remote importcontent =
|
canImportKeys remote importcontent =
|
||||||
importcontent || isJust (Remote.importKey ia)
|
importcontent || isJust (Remote.importKey ia)
|
||||||
|
@ -324,8 +383,8 @@ importKeys
|
||||||
-> ImportTreeConfig
|
-> ImportTreeConfig
|
||||||
-> Bool
|
-> Bool
|
||||||
-> Bool
|
-> Bool
|
||||||
-> ImportableContents (ContentIdentifier, ByteSize)
|
-> ImportableContentsChunkable Annex (ContentIdentifier, ByteSize)
|
||||||
-> Annex (Maybe (ImportableContents (Either Sha Key)))
|
-> Annex (Maybe (ImportableContentsChunkable Annex (Either Sha Key)))
|
||||||
importKeys remote importtreeconfig importcontent thirdpartypopulated importablecontents = do
|
importKeys remote importtreeconfig importcontent thirdpartypopulated importablecontents = do
|
||||||
unless (canImportKeys remote importcontent) $
|
unless (canImportKeys remote importcontent) $
|
||||||
giveup "This remote does not support importing without downloading content."
|
giveup "This remote does not support importing without downloading content."
|
||||||
|
@ -339,40 +398,82 @@ importKeys remote importtreeconfig importcontent thirdpartypopulated importablec
|
||||||
-- When concurrency is enabled, this set is needed to
|
-- When concurrency is enabled, this set is needed to
|
||||||
-- avoid two threads both importing the same content identifier.
|
-- avoid two threads both importing the same content identifier.
|
||||||
importing <- liftIO $ newTVarIO S.empty
|
importing <- liftIO $ newTVarIO S.empty
|
||||||
withExclusiveLock gitAnnexContentIdentifierLock $
|
withciddb $ \db -> do
|
||||||
bracket CIDDb.openDb CIDDb.closeDb $ \db -> do
|
CIDDb.needsUpdateFromLog db
|
||||||
CIDDb.needsUpdateFromLog db
|
>>= maybe noop (CIDDb.updateFromLog db)
|
||||||
>>= maybe noop (CIDDb.updateFromLog db)
|
(prepclock (run cidmap importing db))
|
||||||
(run (go False cidmap importing importablecontents db))
|
|
||||||
where
|
where
|
||||||
-- When not importing content, reuse the same vector
|
-- When not importing content, reuse the same vector
|
||||||
-- clock for all state that's recorded. This can save
|
-- clock for all state that's recorded. This can save
|
||||||
-- a little bit of disk space. Individual file downloads
|
-- a little bit of disk space. Individual file downloads
|
||||||
-- while downloading take too long for this optimisation
|
-- while downloading take too long for this optimisation
|
||||||
-- to be safe to do.
|
-- to be safe to do.
|
||||||
run a
|
prepclock a
|
||||||
| importcontent = a
|
| importcontent = a
|
||||||
| otherwise = reuseVectorClockWhile a
|
| otherwise = reuseVectorClockWhile a
|
||||||
|
|
||||||
go oldversion cidmap importing (ImportableContents l h) db = do
|
withciddb = withExclusiveLock gitAnnexContentIdentifierLock .
|
||||||
|
bracket CIDDb.openDb CIDDb.closeDb
|
||||||
|
|
||||||
|
run cidmap importing db = do
|
||||||
largematcher <- largeFilesMatcher
|
largematcher <- largeFilesMatcher
|
||||||
|
case importablecontents of
|
||||||
|
ImportableContentsComplete ic ->
|
||||||
|
go False largematcher cidmap importing db ic >>= return . \case
|
||||||
|
Nothing -> Nothing
|
||||||
|
Just v -> Just $ ImportableContentsComplete v
|
||||||
|
ImportableContentsChunked {} -> do
|
||||||
|
c <- gochunked db (importableContentsChunk importablecontents)
|
||||||
|
gohistory largematcher cidmap importing db (importableHistoryComplete importablecontents) >>= return . \case
|
||||||
|
Nothing -> Nothing
|
||||||
|
Just h -> Just $ ImportableContentsChunked
|
||||||
|
{ importableContentsChunk = c
|
||||||
|
, importableHistoryComplete = h
|
||||||
|
}
|
||||||
|
|
||||||
|
go oldversion largematcher cidmap importing db (ImportableContents l h) = do
|
||||||
jobs <- forM l $ \i ->
|
jobs <- forM l $ \i ->
|
||||||
if thirdpartypopulated
|
if thirdpartypopulated
|
||||||
then thirdpartypopulatedimport cidmap db i
|
then Left <$> thirdpartypopulatedimport db i
|
||||||
else startimport cidmap importing db i oldversion largematcher
|
else startimport cidmap importing db i oldversion largematcher
|
||||||
l' <- liftIO $ forM jobs $
|
l' <- liftIO $ forM jobs $
|
||||||
either pure (atomically . takeTMVar)
|
either pure (atomically . takeTMVar)
|
||||||
if any isNothing l'
|
if any isNothing l'
|
||||||
then return Nothing
|
then return Nothing
|
||||||
else do
|
else gohistory largematcher cidmap importing db h >>= return . \case
|
||||||
h' <- mapM (\ic -> go True cidmap importing ic db) h
|
Nothing -> Nothing
|
||||||
if any isNothing h'
|
Just h' -> Just $ ImportableContents (catMaybes l') h'
|
||||||
then return Nothing
|
|
||||||
else return $ Just $
|
|
||||||
ImportableContents
|
|
||||||
(catMaybes l')
|
|
||||||
(catMaybes h')
|
|
||||||
|
|
||||||
|
gohistory largematcher cidmap importing db h = do
|
||||||
|
h' <- mapM (go True largematcher cidmap importing db) h
|
||||||
|
if any isNothing h'
|
||||||
|
then return Nothing
|
||||||
|
else return $ Just $ catMaybes h'
|
||||||
|
|
||||||
|
gochunked db c
|
||||||
|
-- Downloading cannot be done when chunked, since only
|
||||||
|
-- the first chunk is processed before returning.
|
||||||
|
| importcontent = error "importKeys does not support downloading chunked import"
|
||||||
|
-- Chunked import is currently only used by thirdpartypopulated
|
||||||
|
-- remotes.
|
||||||
|
| not thirdpartypopulated = error "importKeys does not support chunked import when not thirdpartypopulated"
|
||||||
|
| otherwise = do
|
||||||
|
l <- forM (importableContentsSubTree c) $ \(loc, i) -> do
|
||||||
|
let loc' = importableContentsChunkFullLocation (importableContentsSubDir c) loc
|
||||||
|
thirdpartypopulatedimport db (loc', i) >>= return . \case
|
||||||
|
Just (_loc, k) -> Just (loc, k)
|
||||||
|
Nothing -> Nothing
|
||||||
|
return $ ImportableContentsChunk
|
||||||
|
{ importableContentsSubDir = importableContentsSubDir c
|
||||||
|
, importableContentsSubTree = catMaybes l
|
||||||
|
, importableContentsNextChunk =
|
||||||
|
importableContentsNextChunk c >>= \case
|
||||||
|
Nothing -> return Nothing
|
||||||
|
Just c' -> withciddb $ \db' ->
|
||||||
|
prepclock $
|
||||||
|
Just <$> gochunked db' c'
|
||||||
|
}
|
||||||
|
|
||||||
waitstart importing cid = liftIO $ atomically $ do
|
waitstart importing cid = liftIO $ atomically $ do
|
||||||
s <- readTVar importing
|
s <- readTVar importing
|
||||||
if S.member cid s
|
if S.member cid s
|
||||||
|
@ -418,19 +519,19 @@ importKeys remote importtreeconfig importcontent thirdpartypopulated importablec
|
||||||
importaction
|
importaction
|
||||||
return (Right job)
|
return (Right job)
|
||||||
|
|
||||||
thirdpartypopulatedimport cidmap db (loc, (cid, sz)) =
|
thirdpartypopulatedimport db (loc, (cid, sz)) =
|
||||||
case Remote.importKey ia of
|
case Remote.importKey ia of
|
||||||
Nothing -> return $ Left Nothing
|
Nothing -> return Nothing
|
||||||
Just importkey ->
|
Just importkey ->
|
||||||
tryNonAsync (importkey loc cid sz nullMeterUpdate) >>= \case
|
tryNonAsync (importkey loc cid sz nullMeterUpdate) >>= \case
|
||||||
Right (Just k) -> do
|
Right (Just k) -> do
|
||||||
recordcidkey cidmap db cid k
|
recordcidkey' db cid k
|
||||||
logChange k (Remote.uuid remote) InfoPresent
|
logChange k (Remote.uuid remote) InfoPresent
|
||||||
return $ Left $ Just (loc, Right k)
|
return $ Just (loc, Right k)
|
||||||
Right Nothing -> return $ Left Nothing
|
Right Nothing -> return Nothing
|
||||||
Left e -> do
|
Left e -> do
|
||||||
warning (show e)
|
warning (show e)
|
||||||
return $ Left Nothing
|
return Nothing
|
||||||
|
|
||||||
importordownload cidmap db (loc, (cid, sz)) largematcher= do
|
importordownload cidmap db (loc, (cid, sz)) largematcher= do
|
||||||
f <- locworktreefile loc
|
f <- locworktreefile loc
|
||||||
|
@ -603,6 +704,8 @@ importKeys remote importtreeconfig importcontent thirdpartypopulated importablec
|
||||||
recordcidkey cidmap db cid k = do
|
recordcidkey cidmap db cid k = do
|
||||||
liftIO $ atomically $ modifyTVar' cidmap $
|
liftIO $ atomically $ modifyTVar' cidmap $
|
||||||
M.insert cid k
|
M.insert cid k
|
||||||
|
recordcidkey' db cid k
|
||||||
|
recordcidkey' db cid k = do
|
||||||
liftIO $ CIDDb.recordContentIdentifier db rs cid k
|
liftIO $ CIDDb.recordContentIdentifier db rs cid k
|
||||||
CIDLog.recordContentIdentifier rs cid k
|
CIDLog.recordContentIdentifier rs cid k
|
||||||
|
|
||||||
|
@ -675,18 +778,38 @@ makeImportMatcher r = load preferredContentKeylessTokens >>= \case
|
||||||
- Throws exception if unable to contact the remote.
|
- Throws exception if unable to contact the remote.
|
||||||
- Returns Nothing when there is no change since last time.
|
- Returns Nothing when there is no change since last time.
|
||||||
-}
|
-}
|
||||||
getImportableContents :: Remote -> ImportTreeConfig -> CheckGitIgnore -> FileMatcher Annex -> Annex (Maybe (ImportableContents (ContentIdentifier, ByteSize)))
|
getImportableContents :: Remote -> ImportTreeConfig -> CheckGitIgnore -> FileMatcher Annex -> Annex (Maybe (ImportableContentsChunkable Annex (ContentIdentifier, ByteSize)))
|
||||||
getImportableContents r importtreeconfig ci matcher = do
|
getImportableContents r importtreeconfig ci matcher = do
|
||||||
Remote.listImportableContents (Remote.importActions r) >>= \case
|
Remote.listImportableContents (Remote.importActions r) >>= \case
|
||||||
Just importable -> do
|
Just (ImportableContentsComplete ic) -> do
|
||||||
dbhandle <- Export.openDb (Remote.uuid r)
|
dbhandle <- opendbhandle
|
||||||
Just <$> filterunwanted dbhandle importable
|
Just . ImportableContentsComplete
|
||||||
|
<$> filterunwanted dbhandle ic
|
||||||
|
Just (c@(ImportableContentsChunked {})) -> do
|
||||||
|
dbhandle <- opendbhandle
|
||||||
|
Just <$> filterunwantedchunked dbhandle c
|
||||||
Nothing -> return Nothing
|
Nothing -> return Nothing
|
||||||
where
|
where
|
||||||
filterunwanted dbhandle ic = ImportableContents
|
filterunwanted dbhandle ic = ImportableContents
|
||||||
<$> filterM (wanted dbhandle) (importableContents ic)
|
<$> filterM (wanted dbhandle) (importableContents ic)
|
||||||
<*> mapM (filterunwanted dbhandle) (importableHistory ic)
|
<*> mapM (filterunwanted dbhandle) (importableHistory ic)
|
||||||
|
|
||||||
|
filterunwantedchunked dbhandle c = ImportableContentsChunked
|
||||||
|
<$> filterunwantedchunk dbhandle (importableContentsChunk c)
|
||||||
|
<*> mapM (filterunwanted dbhandle) (importableHistoryComplete c)
|
||||||
|
|
||||||
|
filterunwantedchunk dbhandle c = ImportableContentsChunk
|
||||||
|
<$> pure (importableContentsSubDir c)
|
||||||
|
<*> filterM (wantedunder dbhandle (importableContentsSubDir c))
|
||||||
|
(importableContentsSubTree c)
|
||||||
|
<*> pure (
|
||||||
|
importableContentsNextChunk c >>= \case
|
||||||
|
Nothing -> return Nothing
|
||||||
|
Just c' -> Just <$> filterunwantedchunk dbhandle c'
|
||||||
|
)
|
||||||
|
|
||||||
|
opendbhandle = Export.openDb (Remote.uuid r)
|
||||||
|
|
||||||
wanted dbhandle (loc, (_cid, sz))
|
wanted dbhandle (loc, (_cid, sz))
|
||||||
| ingitdir = pure False
|
| ingitdir = pure False
|
||||||
| otherwise =
|
| otherwise =
|
||||||
|
@ -697,6 +820,9 @@ getImportableContents r importtreeconfig ci matcher = do
|
||||||
matches = matchesImportLocation matcher loc sz
|
matches = matchesImportLocation matcher loc sz
|
||||||
isknown = isKnownImportLocation dbhandle loc
|
isknown = isKnownImportLocation dbhandle loc
|
||||||
notignored = notIgnoredImportLocation importtreeconfig ci loc
|
notignored = notIgnoredImportLocation importtreeconfig ci loc
|
||||||
|
|
||||||
|
wantedunder dbhandle root (loc, v) =
|
||||||
|
wanted dbhandle (importableContentsChunkFullLocation root loc, v)
|
||||||
|
|
||||||
isKnownImportLocation :: Export.ExportHandle -> ImportLocation -> Annex Bool
|
isKnownImportLocation :: Export.ExportHandle -> ImportLocation -> Annex Bool
|
||||||
isKnownImportLocation dbhandle loc = liftIO $
|
isKnownImportLocation dbhandle loc = liftIO $
|
||||||
|
|
|
@ -6,6 +6,8 @@ git-annex (8.20210904) UNRELEASED; urgency=medium
|
||||||
* Bug fix: Git configs such as annex.verify were incorrectly overriding
|
* Bug fix: Git configs such as annex.verify were incorrectly overriding
|
||||||
per-remote git configs such as remote.name.annex-verify.
|
per-remote git configs such as remote.name.annex-verify.
|
||||||
(Reversion in version 4.20130323)
|
(Reversion in version 4.20130323)
|
||||||
|
* borg: Significantly improved memory use when a borg repository
|
||||||
|
contains many archives.
|
||||||
* borg: Avoid trying to extract xattrs, ACLS, and bsdflags when
|
* borg: Avoid trying to extract xattrs, ACLS, and bsdflags when
|
||||||
retrieving from a borg repository.
|
retrieving from a borg repository.
|
||||||
* Sped up git-annex smudge --clean by 25%.
|
* Sped up git-annex smudge --clean by 25%.
|
||||||
|
@ -15,7 +17,6 @@ git-annex (8.20210904) UNRELEASED; urgency=medium
|
||||||
incrementally verified, when used on NTFS and perhaps other filesystems.
|
incrementally verified, when used on NTFS and perhaps other filesystems.
|
||||||
* reinject: Fix crash when reinjecting a file from outside the repository.
|
* reinject: Fix crash when reinjecting a file from outside the repository.
|
||||||
(Reversion in version 8.20210621)
|
(Reversion in version 8.20210621)
|
||||||
* borg: Some improvements to memory use when importing a lot of archives.
|
|
||||||
* Avoid cursor jitter when updating progress display.
|
* Avoid cursor jitter when updating progress display.
|
||||||
|
|
||||||
-- Joey Hess <id@joeyh.name> Fri, 03 Sep 2021 12:02:55 -0400
|
-- Joey Hess <id@joeyh.name> Fri, 03 Sep 2021 12:02:55 -0400
|
||||||
|
|
|
@ -346,7 +346,7 @@ seekRemote remote branch msubdir importcontent ci = do
|
||||||
|
|
||||||
fromtrackingbranch a = inRepo $ a (fromRemoteTrackingBranch tb)
|
fromtrackingbranch a = inRepo $ a (fromRemoteTrackingBranch tb)
|
||||||
|
|
||||||
listContents :: Remote -> ImportTreeConfig -> CheckGitIgnore -> TVar (Maybe (ImportableContents (ContentIdentifier, Remote.ByteSize))) -> CommandStart
|
listContents :: Remote -> ImportTreeConfig -> CheckGitIgnore -> TVar (Maybe (ImportableContentsChunkable Annex (ContentIdentifier, Remote.ByteSize))) -> CommandStart
|
||||||
listContents remote importtreeconfig ci tvar = starting "list" ai si $
|
listContents remote importtreeconfig ci tvar = starting "list" ai si $
|
||||||
listContents' remote importtreeconfig ci $ \importable -> do
|
listContents' remote importtreeconfig ci $ \importable -> do
|
||||||
liftIO $ atomically $ writeTVar tvar importable
|
liftIO $ atomically $ writeTVar tvar importable
|
||||||
|
@ -355,7 +355,7 @@ listContents remote importtreeconfig ci tvar = starting "list" ai si $
|
||||||
ai = ActionItemOther (Just (Remote.name remote))
|
ai = ActionItemOther (Just (Remote.name remote))
|
||||||
si = SeekInput []
|
si = SeekInput []
|
||||||
|
|
||||||
listContents' :: Remote -> ImportTreeConfig -> CheckGitIgnore -> (Maybe (ImportableContents (ContentIdentifier, Remote.ByteSize)) -> Annex a) -> Annex a
|
listContents' :: Remote -> ImportTreeConfig -> CheckGitIgnore -> (Maybe (ImportableContentsChunkable Annex (ContentIdentifier, Remote.ByteSize)) -> Annex a) -> Annex a
|
||||||
listContents' remote importtreeconfig ci a =
|
listContents' remote importtreeconfig ci a =
|
||||||
makeImportMatcher remote >>= \case
|
makeImportMatcher remote >>= \case
|
||||||
Right matcher -> tryNonAsync (getImportableContents remote importtreeconfig ci matcher) >>= \case
|
Right matcher -> tryNonAsync (getImportableContents remote importtreeconfig ci matcher) >>= \case
|
||||||
|
@ -368,7 +368,7 @@ listContents' remote importtreeconfig ci a =
|
||||||
, err
|
, err
|
||||||
]
|
]
|
||||||
|
|
||||||
commitRemote :: Remote -> Branch -> RemoteTrackingBranch -> Maybe Sha -> ImportTreeConfig -> ImportCommitConfig -> ImportableContents (Either Sha Key) -> CommandStart
|
commitRemote :: Remote -> Branch -> RemoteTrackingBranch -> Maybe Sha -> ImportTreeConfig -> ImportCommitConfig -> ImportableContentsChunkable Annex (Either Sha Key) -> CommandStart
|
||||||
commitRemote remote branch tb trackingcommit importtreeconfig importcommitconfig importable =
|
commitRemote remote branch tb trackingcommit importtreeconfig importcommitconfig importable =
|
||||||
starting "update" ai si $ do
|
starting "update" ai si $ do
|
||||||
importcommit <- buildImportCommit remote importtreeconfig importcommitconfig importable
|
importcommit <- buildImportCommit remote importtreeconfig importcommitconfig importable
|
||||||
|
|
|
@ -13,6 +13,7 @@ module Git.Tree (
|
||||||
getTree,
|
getTree,
|
||||||
recordTree,
|
recordTree,
|
||||||
recordTree',
|
recordTree',
|
||||||
|
recordSubTree,
|
||||||
TreeItem(..),
|
TreeItem(..),
|
||||||
treeItemsToTree,
|
treeItemsToTree,
|
||||||
treeItemToLsTreeItem,
|
treeItemToLsTreeItem,
|
||||||
|
@ -21,6 +22,7 @@ module Git.Tree (
|
||||||
graftTree,
|
graftTree,
|
||||||
graftTree',
|
graftTree',
|
||||||
withMkTreeHandle,
|
withMkTreeHandle,
|
||||||
|
MkTreeHandle,
|
||||||
treeMode,
|
treeMode,
|
||||||
) where
|
) where
|
||||||
|
|
||||||
|
|
|
@ -288,9 +288,10 @@ renameExportM serial adir _k old new = do
|
||||||
, File newloc
|
, File newloc
|
||||||
]
|
]
|
||||||
|
|
||||||
listImportableContentsM :: AndroidSerial -> AndroidPath -> Annex (Maybe (ImportableContents (ContentIdentifier, ByteSize)))
|
listImportableContentsM :: AndroidSerial -> AndroidPath -> Annex (Maybe (ImportableContentsChunkable Annex (ContentIdentifier, ByteSize)))
|
||||||
listImportableContentsM serial adir = adbfind >>= \case
|
listImportableContentsM serial adir = adbfind >>= \case
|
||||||
Just ls -> return $ Just $ ImportableContents (mapMaybe mk ls) []
|
Just ls -> return $ Just $ ImportableContentsComplete $
|
||||||
|
ImportableContents (mapMaybe mk ls) []
|
||||||
Nothing -> giveup "adb find failed"
|
Nothing -> giveup "adb find failed"
|
||||||
where
|
where
|
||||||
adbfind = adbShell serial
|
adbfind = adbShell serial
|
||||||
|
|
|
@ -162,20 +162,21 @@ borgRepoLocalPath r@(BorgRepo p)
|
||||||
| borgLocal r && not (null p) = Just p
|
| borgLocal r && not (null p) = Just p
|
||||||
| otherwise = Nothing
|
| otherwise = Nothing
|
||||||
|
|
||||||
listImportableContentsM :: UUID -> BorgRepo -> ParsedRemoteConfig -> Annex (Maybe (ImportableContents (ContentIdentifier, ByteSize)))
|
listImportableContentsM :: UUID -> BorgRepo -> ParsedRemoteConfig -> Annex (Maybe (ImportableContentsChunkable Annex (ContentIdentifier, ByteSize)))
|
||||||
listImportableContentsM u borgrepo c = prompt $ do
|
listImportableContentsM u borgrepo c = prompt $ do
|
||||||
imported <- getImported u
|
imported <- getImported u
|
||||||
ls <- withborglist (locBorgRepo borgrepo) Nothing formatarchivelist $ \as ->
|
ls <- withborglist (locBorgRepo borgrepo) Nothing formatarchivelist $ \as ->
|
||||||
forM (filter (not . S.null) as) $ \archivename ->
|
forM (filter (not . S.null) as) $ \archivename ->
|
||||||
case M.lookup archivename imported of
|
return $ case M.lookup archivename imported of
|
||||||
Just getfast -> return $ Left (archivename, getfast)
|
Just getlist -> Left (archivename, getlist)
|
||||||
Nothing -> Right <$>
|
Nothing ->
|
||||||
let archive = borgArchive borgrepo archivename
|
let archive = borgArchive borgrepo archivename
|
||||||
in withborglist archive subdir formatfilelist $
|
getlist = withborglist archive subdir formatfilelist $
|
||||||
liftIO . evaluate . force . parsefilelist archivename
|
liftIO . evaluate . force . parsefilelist archivename
|
||||||
|
in Right (archivename, getlist)
|
||||||
if all isLeft ls && M.null (M.difference imported (M.fromList (lefts ls)))
|
if all isLeft ls && M.null (M.difference imported (M.fromList (lefts ls)))
|
||||||
then return Nothing -- unchanged since last time, avoid work
|
then return Nothing -- unchanged since last time, avoid work
|
||||||
else Just . mkimportablecontents <$> mapM (either snd pure) ls
|
else Just <$> mkimportablecontents (map (either id id) ls)
|
||||||
where
|
where
|
||||||
withborglist what addparam format a = do
|
withborglist what addparam format a = do
|
||||||
environ <- liftIO getEnvironment
|
environ <- liftIO getEnvironment
|
||||||
|
@ -210,7 +211,7 @@ listImportableContentsM u borgrepo c = prompt $ do
|
||||||
parsefilelist archivename (bsz:f:extra:rest) = case readMaybe (fromRawFilePath bsz) of
|
parsefilelist archivename (bsz:f:extra:rest) = case readMaybe (fromRawFilePath bsz) of
|
||||||
Nothing -> parsefilelist archivename rest
|
Nothing -> parsefilelist archivename rest
|
||||||
Just sz ->
|
Just sz ->
|
||||||
let loc = genImportLocation archivename f
|
let loc = genImportLocation f
|
||||||
-- borg list reports hard links as 0 byte files,
|
-- borg list reports hard links as 0 byte files,
|
||||||
-- with the extra field set to " link to ".
|
-- with the extra field set to " link to ".
|
||||||
-- When the annex object is a hard link to
|
-- When the annex object is a hard link to
|
||||||
|
@ -234,12 +235,27 @@ listImportableContentsM u borgrepo c = prompt $ do
|
||||||
|
|
||||||
-- importableHistory is not used for retrieval, so is not
|
-- importableHistory is not used for retrieval, so is not
|
||||||
-- populated with old archives. Instead, a tree of archives
|
-- populated with old archives. Instead, a tree of archives
|
||||||
-- is constructed, by genImportLocation including the archive
|
-- is constructed, with a subtree for each archive.
|
||||||
-- name in the ImportLocation.
|
mkimportablecontents [] = return $ ImportableContentsComplete $
|
||||||
mkimportablecontents l = ImportableContents
|
ImportableContents
|
||||||
{ importableContents = concat l
|
{ importableContents = []
|
||||||
, importableHistory = []
|
, importableHistory = []
|
||||||
}
|
}
|
||||||
|
mkimportablecontents (l:ls) = ImportableContentsChunked
|
||||||
|
<$> mkimportablecontentschunk l ls
|
||||||
|
<*> pure []
|
||||||
|
|
||||||
|
mkimportablecontentschunk (archivename, getlist) rest = do
|
||||||
|
l <- getlist
|
||||||
|
return $ ImportableContentsChunk
|
||||||
|
{ importableContentsSubDir =
|
||||||
|
genImportChunkSubDir archivename
|
||||||
|
, importableContentsSubTree = l
|
||||||
|
, importableContentsNextChunk = case rest of
|
||||||
|
(getlist':rest') -> Just
|
||||||
|
<$> mkimportablecontentschunk getlist' rest'
|
||||||
|
[] -> return Nothing
|
||||||
|
}
|
||||||
|
|
||||||
-- We do not need a ContentIdentifier in order to retrieve a file from
|
-- We do not need a ContentIdentifier in order to retrieve a file from
|
||||||
-- borg; the ImportLocation contains all that's needed. So, this is left
|
-- borg; the ImportLocation contains all that's needed. So, this is left
|
||||||
|
@ -247,15 +263,20 @@ listImportableContentsM u borgrepo c = prompt $ do
|
||||||
borgContentIdentifier :: ContentIdentifier
|
borgContentIdentifier :: ContentIdentifier
|
||||||
borgContentIdentifier = ContentIdentifier mempty
|
borgContentIdentifier = ContentIdentifier mempty
|
||||||
|
|
||||||
|
-- Convert a path file a borg archive to a path that can be used as an
|
||||||
|
-- ImportLocation. The archive name gets used as a subdirectory,
|
||||||
|
-- which this path is inside.
|
||||||
|
--
|
||||||
-- Borg does not allow / in the name of an archive, so the archive
|
-- Borg does not allow / in the name of an archive, so the archive
|
||||||
-- name will always be the first directory in the ImportLocation.
|
-- name will always be the first directory in the ImportLocation.
|
||||||
--
|
--
|
||||||
-- Paths in a borg archive are always relative, not absolute, so the use of
|
-- This scheme also relies on the fact that paths in a borg archive are
|
||||||
-- </> to combine the archive name with the path will always work.
|
-- always relative, not absolute.
|
||||||
genImportLocation :: BorgArchiveName -> RawFilePath -> ImportLocation
|
genImportLocation :: RawFilePath -> RawFilePath
|
||||||
genImportLocation archivename p =
|
genImportLocation = fromImportLocation . ThirdPartyPopulated.mkThirdPartyImportLocation
|
||||||
ThirdPartyPopulated.mkThirdPartyImportLocation $
|
|
||||||
archivename P.</> p
|
genImportChunkSubDir :: BorgArchiveName -> ImportChunkSubDir
|
||||||
|
genImportChunkSubDir = ImportChunkSubDir . fromImportLocation . ThirdPartyPopulated.mkThirdPartyImportLocation
|
||||||
|
|
||||||
extractImportLocation :: ImportLocation -> (BorgArchiveName, RawFilePath)
|
extractImportLocation :: ImportLocation -> (BorgArchiveName, RawFilePath)
|
||||||
extractImportLocation loc = go $ P.splitDirectories $
|
extractImportLocation loc = go $ P.splitDirectories $
|
||||||
|
@ -269,7 +290,7 @@ extractImportLocation loc = go $ P.splitDirectories $
|
||||||
-- last imported tree. And the contents of those archives can be retrieved
|
-- last imported tree. And the contents of those archives can be retrieved
|
||||||
-- by listing the subtree recursively, which will likely be quite a lot
|
-- by listing the subtree recursively, which will likely be quite a lot
|
||||||
-- faster than running borg.
|
-- faster than running borg.
|
||||||
getImported :: UUID -> Annex (M.Map BorgArchiveName (Annex [(ImportLocation, (ContentIdentifier, ByteSize))]))
|
getImported :: UUID -> Annex (M.Map BorgArchiveName (Annex [(RawFilePath, (ContentIdentifier, ByteSize))]))
|
||||||
getImported u = M.unions <$> (mapM go . exportedTreeishes =<< getExport u)
|
getImported u = M.unions <$> (mapM go . exportedTreeishes =<< getExport u)
|
||||||
where
|
where
|
||||||
go t = M.fromList . mapMaybe mk
|
go t = M.fromList . mapMaybe mk
|
||||||
|
@ -278,21 +299,19 @@ getImported u = M.unions <$> (mapM go . exportedTreeishes =<< getExport u)
|
||||||
mk ti
|
mk ti
|
||||||
| toTreeItemType (LsTree.mode ti) == Just TreeSubtree = Just
|
| toTreeItemType (LsTree.mode ti) == Just TreeSubtree = Just
|
||||||
( getTopFilePath (LsTree.file ti)
|
( getTopFilePath (LsTree.file ti)
|
||||||
, getcontents
|
, getcontents (LsTree.sha ti)
|
||||||
(getTopFilePath (LsTree.file ti))
|
|
||||||
(LsTree.sha ti)
|
|
||||||
)
|
)
|
||||||
| otherwise = Nothing
|
| otherwise = Nothing
|
||||||
|
|
||||||
getcontents archivename t = mapMaybe (mkcontents archivename)
|
getcontents t = mapMaybe mkcontents
|
||||||
<$> inRepo (LsTree.lsTreeStrict LsTree.LsTreeRecursive (LsTree.LsTreeLong False) t)
|
<$> inRepo (LsTree.lsTreeStrict LsTree.LsTreeRecursive (LsTree.LsTreeLong False) t)
|
||||||
|
|
||||||
mkcontents archivename ti = do
|
mkcontents ti = do
|
||||||
let f = ThirdPartyPopulated.fromThirdPartyImportLocation $
|
let f = ThirdPartyPopulated.fromThirdPartyImportLocation $
|
||||||
mkImportLocation $ getTopFilePath $ LsTree.file ti
|
mkImportLocation $ getTopFilePath $ LsTree.file ti
|
||||||
k <- fileKey (P.takeFileName f)
|
k <- fileKey (P.takeFileName f)
|
||||||
return
|
return
|
||||||
( genImportLocation archivename f
|
( genImportLocation f
|
||||||
,
|
,
|
||||||
( borgContentIdentifier
|
( borgContentIdentifier
|
||||||
-- defaulting to 0 size is ok, this size
|
-- defaulting to 0 size is ok, this size
|
||||||
|
|
|
@ -351,11 +351,12 @@ removeExportLocation topdir loc =
|
||||||
mkExportLocation loc'
|
mkExportLocation loc'
|
||||||
in go (upFrom loc') =<< tryIO (removeDirectory p)
|
in go (upFrom loc') =<< tryIO (removeDirectory p)
|
||||||
|
|
||||||
listImportableContentsM :: RawFilePath -> Annex (Maybe (ImportableContents (ContentIdentifier, ByteSize)))
|
listImportableContentsM :: RawFilePath -> Annex (Maybe (ImportableContentsChunkable Annex (ContentIdentifier, ByteSize)))
|
||||||
listImportableContentsM dir = liftIO $ do
|
listImportableContentsM dir = liftIO $ do
|
||||||
l <- dirContentsRecursive (fromRawFilePath dir)
|
l <- dirContentsRecursive (fromRawFilePath dir)
|
||||||
l' <- mapM (go . toRawFilePath) l
|
l' <- mapM (go . toRawFilePath) l
|
||||||
return $ Just $ ImportableContents (catMaybes l') []
|
return $ Just $ ImportableContentsComplete $
|
||||||
|
ImportableContents (catMaybes l') []
|
||||||
where
|
where
|
||||||
go f = do
|
go f = do
|
||||||
st <- R.getFileStatus f
|
st <- R.getFileStatus f
|
||||||
|
|
|
@ -47,10 +47,10 @@ fromThirdPartyImportLocation =
|
||||||
-- find only those ImportLocations that are annex object files.
|
-- find only those ImportLocations that are annex object files.
|
||||||
-- All other ImportLocations are ignored.
|
-- All other ImportLocations are ignored.
|
||||||
importKey :: ImportLocation -> ContentIdentifier -> ByteSize -> MeterUpdate -> Annex (Maybe Key)
|
importKey :: ImportLocation -> ContentIdentifier -> ByteSize -> MeterUpdate -> Annex (Maybe Key)
|
||||||
importKey loc _cid sz _ = return $ importKey' loc (Just sz)
|
importKey loc _cid sz _ = return $ importKey' (fromImportLocation loc) (Just sz)
|
||||||
|
|
||||||
importKey' :: ImportLocation -> Maybe ByteSize -> Maybe Key
|
importKey' :: RawFilePath -> Maybe ByteSize -> Maybe Key
|
||||||
importKey' loc msz = case fileKey f of
|
importKey' p msz = case fileKey f of
|
||||||
Just k
|
Just k
|
||||||
-- Annex objects always are in a subdirectory with the same
|
-- Annex objects always are in a subdirectory with the same
|
||||||
-- name as the filename. If this is not the case for the file
|
-- name as the filename. If this is not the case for the file
|
||||||
|
@ -82,5 +82,4 @@ importKey' loc msz = case fileKey f of
|
||||||
_ -> Just k
|
_ -> Just k
|
||||||
Nothing -> Nothing
|
Nothing -> Nothing
|
||||||
where
|
where
|
||||||
p = fromImportLocation loc
|
|
||||||
f = P.takeFileName p
|
f = P.takeFileName p
|
||||||
|
|
|
@ -549,13 +549,15 @@ renameExportS3 hv r rs info k src dest = Just <$> go
|
||||||
srcobject = T.pack $ bucketExportLocation info src
|
srcobject = T.pack $ bucketExportLocation info src
|
||||||
dstobject = T.pack $ bucketExportLocation info dest
|
dstobject = T.pack $ bucketExportLocation info dest
|
||||||
|
|
||||||
listImportableContentsS3 :: S3HandleVar -> Remote -> S3Info -> Annex (Maybe (ImportableContents (ContentIdentifier, ByteSize)))
|
listImportableContentsS3 :: S3HandleVar -> Remote -> S3Info -> Annex (Maybe (ImportableContentsChunkable Annex (ContentIdentifier, ByteSize)))
|
||||||
listImportableContentsS3 hv r info =
|
listImportableContentsS3 hv r info =
|
||||||
withS3Handle hv $ \case
|
withS3Handle hv $ \case
|
||||||
Nothing -> giveup $ needS3Creds (uuid r)
|
Nothing -> giveup $ needS3Creds (uuid r)
|
||||||
Just h -> Just <$> go h
|
Just h -> Just <$> go h
|
||||||
where
|
where
|
||||||
go h = liftIO $ runResourceT $ extractFromResourceT =<< startlist h
|
go h = do
|
||||||
|
ic <- liftIO $ runResourceT $ extractFromResourceT =<< startlist h
|
||||||
|
return (ImportableContentsComplete ic)
|
||||||
|
|
||||||
startlist h
|
startlist h
|
||||||
| versioning info = do
|
| versioning info = do
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
{- git-annex import types
|
{- git-annex import types
|
||||||
-
|
-
|
||||||
- Copyright 2019 Joey Hess <id@joeyh.name>
|
- Copyright 2019-2021 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU AGPL version 3 or higher.
|
- Licensed under the GNU AGPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
@ -13,6 +13,7 @@ import qualified Data.ByteString as S
|
||||||
import Data.Char
|
import Data.Char
|
||||||
import Control.DeepSeq
|
import Control.DeepSeq
|
||||||
import GHC.Generics
|
import GHC.Generics
|
||||||
|
import qualified System.FilePath.Posix.ByteString as Posix
|
||||||
|
|
||||||
import Types.Export
|
import Types.Export
|
||||||
import Utility.QuickCheck
|
import Utility.QuickCheck
|
||||||
|
@ -69,3 +70,34 @@ data ImportableContents info = ImportableContents
|
||||||
deriving (Show, Generic)
|
deriving (Show, Generic)
|
||||||
|
|
||||||
instance NFData info => NFData (ImportableContents info)
|
instance NFData info => NFData (ImportableContents info)
|
||||||
|
|
||||||
|
{- ImportableContents, but it can be chunked into subtrees to avoid
|
||||||
|
- all needing to fit in memory at the same time. -}
|
||||||
|
data ImportableContentsChunkable m info
|
||||||
|
= ImportableContentsComplete (ImportableContents info)
|
||||||
|
-- ^ Used when not chunking
|
||||||
|
| ImportableContentsChunked
|
||||||
|
{ importableContentsChunk :: ImportableContentsChunk m info
|
||||||
|
, importableHistoryComplete :: [ImportableContents info]
|
||||||
|
-- ^ Chunking the history is not supported
|
||||||
|
}
|
||||||
|
|
||||||
|
{- A chunk of ImportableContents, which is the entire content of a subtree
|
||||||
|
- of the main tree. Nested subtrees are not allowed. -}
|
||||||
|
data ImportableContentsChunk m info = ImportableContentsChunk
|
||||||
|
{ importableContentsSubDir :: ImportChunkSubDir
|
||||||
|
, importableContentsSubTree :: [(RawFilePath, info)]
|
||||||
|
-- ^ locations are relative to importableContentsSubDir
|
||||||
|
, importableContentsNextChunk :: m (Maybe (ImportableContentsChunk m info))
|
||||||
|
-- ^ Continuation to get the next chunk.
|
||||||
|
-- Returns Nothing when there are no more chunks.
|
||||||
|
}
|
||||||
|
|
||||||
|
newtype ImportChunkSubDir = ImportChunkSubDir { importChunkSubDir :: RawFilePath }
|
||||||
|
|
||||||
|
importableContentsChunkFullLocation
|
||||||
|
:: ImportChunkSubDir
|
||||||
|
-> RawFilePath
|
||||||
|
-> ImportLocation
|
||||||
|
importableContentsChunkFullLocation (ImportChunkSubDir root) loc =
|
||||||
|
mkImportLocation $ Posix.combine root loc
|
||||||
|
|
|
@ -309,7 +309,7 @@ data ImportActions a = ImportActions
|
||||||
--
|
--
|
||||||
-- Throws exception on failure to access the remote.
|
-- Throws exception on failure to access the remote.
|
||||||
-- May return Nothing when the remote is unchanged since last time.
|
-- May return Nothing when the remote is unchanged since last time.
|
||||||
{ listImportableContents :: a (Maybe (ImportableContents (ContentIdentifier, ByteSize)))
|
{ listImportableContents :: a (Maybe (ImportableContentsChunkable a (ContentIdentifier, ByteSize)))
|
||||||
-- Generates a Key (of any type) for the file stored on the
|
-- Generates a Key (of any type) for the file stored on the
|
||||||
-- remote at the ImportLocation. Does not download the file
|
-- remote at the ImportLocation. Does not download the file
|
||||||
-- from the remote.
|
-- from the remote.
|
||||||
|
@ -322,7 +322,7 @@ data ImportActions a = ImportActions
|
||||||
-- since the ContentIdentifier was generated.
|
-- since the ContentIdentifier was generated.
|
||||||
--
|
--
|
||||||
-- When it returns nothing, the file at the ImportLocation
|
-- When it returns nothing, the file at the ImportLocation
|
||||||
-- not by included in the imported tree.
|
-- will not be included in the imported tree.
|
||||||
--
|
--
|
||||||
-- When the remote is thirdPartyPopulated, this should check if the
|
-- When the remote is thirdPartyPopulated, this should check if the
|
||||||
-- file stored on the remote is the content of an annex object,
|
-- file stored on the remote is the content of an annex object,
|
||||||
|
|
|
@ -3,3 +3,5 @@ memory, then got OOM-killed.
|
||||||
|
|
||||||
I don't know if this is a memory leak or just trying to load too much, but it seems like this is a thing you should be able to do on
|
I don't know if this is a memory leak or just trying to load too much, but it seems like this is a thing you should be able to do on
|
||||||
a machine with 64G of RAM.
|
a machine with 64G of RAM.
|
||||||
|
|
||||||
|
> [[fixed|done]] --[[Joey]]
|
||||||
|
|
|
@ -0,0 +1,9 @@
|
||||||
|
[[!comment format=mdwn
|
||||||
|
username="joey"
|
||||||
|
subject="""comment 12"""
|
||||||
|
date="2021-10-08T17:06:05Z"
|
||||||
|
content="""
|
||||||
|
I've fixed this problem, my test case tops out at 160 mb now, and adding more
|
||||||
|
archives to the borg repo no longer increases memory use. Memory use is now
|
||||||
|
proportional to the number of annexed objects in a borg archive.
|
||||||
|
"""]]
|
20
doc/todo/avoid_storing_contentidentifier_log_for_borg.mdwn
Normal file
20
doc/todo/avoid_storing_contentidentifier_log_for_borg.mdwn
Normal file
|
@ -0,0 +1,20 @@
|
||||||
|
Borg uses an empty ContentIdentifier for everything; it does not need to
|
||||||
|
record anything. But that empty value gets stored in the log for each key
|
||||||
|
that is stored in borg. This unncessarily bloats the size of the git-annex
|
||||||
|
branch, by one content identifier per key stored in borg.
|
||||||
|
|
||||||
|
I think that it also slows down importing many archives from borg,
|
||||||
|
because for each of them it has to record the content identifier,
|
||||||
|
which is always the same, but still results in a db write.
|
||||||
|
|
||||||
|
Omitting storing any ContentIdentifier would break code such as
|
||||||
|
Remote.Helper.ExportImport's retrieveKeyFileFromImport.
|
||||||
|
|
||||||
|
If the borg Remote could indicate with a flag that it does not use
|
||||||
|
ContentIdentifiers, then code like that could pass it a null
|
||||||
|
ContentIdentifier without needing to read it from the db.
|
||||||
|
|
||||||
|
Annex.Import uses getContentIdentifierKeys, but only when it's not
|
||||||
|
thirdpartypopulated. So this change would not break that for borg,
|
||||||
|
but a clean way to handle that would be to make it also return a null
|
||||||
|
ContentIdentifier when the remote has the flag set. --[[Joey]]
|
Loading…
Reference in a new issue