Improve memory use of --all when using annex.private
This does not improve Annex.Branch.files at all, since it still uses ++ to
combine the lists, so forcing all but the last one.
But when there are a lot of files in the private journal, it does avoid
--all (or a bare repo) from buffering the filenames in memory.
See commit 653b719472
for prior discussion of
this buffering.
Sponsored-by: Graham Spencer on Patreon
This commit is contained in:
parent
18f902efa9
commit
0da1d40cd4
3 changed files with 42 additions and 25 deletions
|
@ -1,6 +1,6 @@
|
||||||
{- management of the git-annex branch
|
{- management of the git-annex branch
|
||||||
-
|
-
|
||||||
- Copyright 2011-2022 Joey Hess <id@joeyh.name>
|
- Copyright 2011-2023 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU AGPL version 3 or higher.
|
- Licensed under the GNU AGPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
@ -597,21 +597,24 @@ files = do
|
||||||
then return Nothing
|
then return Nothing
|
||||||
else do
|
else do
|
||||||
(bfs, cleanup) <- branchFiles
|
(bfs, cleanup) <- branchFiles
|
||||||
|
jfs <- journalledFiles
|
||||||
|
pjfs <- journalledFilesPrivate
|
||||||
-- ++ forces the content of the first list to be
|
-- ++ forces the content of the first list to be
|
||||||
-- buffered in memory, so use journalledFiles,
|
-- buffered in memory, so use journalledFiles,
|
||||||
-- which should be much smaller most of the time.
|
-- which should be much smaller most of the time.
|
||||||
-- branchFiles will stream as the list is consumed.
|
-- branchFiles will stream as the list is consumed.
|
||||||
l <- (++) <$> journalledFiles <*> pure bfs
|
let l = jfs ++ pjfs ++ bfs
|
||||||
return (Just (l, cleanup))
|
return (Just (l, cleanup))
|
||||||
|
|
||||||
{- Lists all files currently in the journal. There may be duplicates in
|
{- Lists all files currently in the journal, but not files in the private
|
||||||
- the list when using a private journal. -}
|
- journal. -}
|
||||||
journalledFiles :: Annex [RawFilePath]
|
journalledFiles :: Annex [RawFilePath]
|
||||||
journalledFiles = ifM privateUUIDsKnown
|
journalledFiles = getJournalledFilesStale gitAnnexJournalDir
|
||||||
( (++)
|
|
||||||
<$> getJournalledFilesStale gitAnnexPrivateJournalDir
|
journalledFilesPrivate :: Annex [RawFilePath]
|
||||||
<*> getJournalledFilesStale gitAnnexJournalDir
|
journalledFilesPrivate = ifM privateUUIDsKnown
|
||||||
, getJournalledFilesStale gitAnnexJournalDir
|
( getJournalledFilesStale gitAnnexPrivateJournalDir
|
||||||
|
, return []
|
||||||
)
|
)
|
||||||
|
|
||||||
{- Files in the branch, not including any from journalled changes,
|
{- Files in the branch, not including any from journalled changes,
|
||||||
|
@ -992,8 +995,11 @@ overBranchFileContents' select go st = do
|
||||||
-- This can cause the action to be run a
|
-- This can cause the action to be run a
|
||||||
-- second time with a file it already ran on.
|
-- second time with a file it already ran on.
|
||||||
| otherwise -> liftIO (tryTakeMVar buf) >>= \case
|
| otherwise -> liftIO (tryTakeMVar buf) >>= \case
|
||||||
Nothing -> drain buf =<< journalledFiles
|
Nothing -> do
|
||||||
Just fs -> drain buf fs
|
jfs <- journalledFiles
|
||||||
|
pjfs <- journalledFilesPrivate
|
||||||
|
drain buf jfs pjfs
|
||||||
|
Just (jfs, pjfs) -> drain buf jfs pjfs
|
||||||
catObjectStreamLsTree l (select' . getTopFilePath . Git.LsTree.file) g go'
|
catObjectStreamLsTree l (select' . getTopFilePath . Git.LsTree.file) g go'
|
||||||
`finally` liftIO (void cleanup)
|
`finally` liftIO (void cleanup)
|
||||||
where
|
where
|
||||||
|
@ -1007,9 +1013,9 @@ overBranchFileContents' select go st = do
|
||||||
PossiblyStaleJournalledContent journalledcontent ->
|
PossiblyStaleJournalledContent journalledcontent ->
|
||||||
Just (fromMaybe mempty branchcontent <> journalledcontent)
|
Just (fromMaybe mempty branchcontent <> journalledcontent)
|
||||||
|
|
||||||
drain buf fs = case getnext fs of
|
drain buf fs pfs = case getnext fs pfs of
|
||||||
Just (v, f, fs') -> do
|
Just (v, f, fs', pfs') -> do
|
||||||
liftIO $ putMVar buf fs'
|
liftIO $ putMVar buf (fs', pfs')
|
||||||
content <- getJournalFileStale (GetPrivate True) f >>= \case
|
content <- getJournalFileStale (GetPrivate True) f >>= \case
|
||||||
NoJournalledContent -> return Nothing
|
NoJournalledContent -> return Nothing
|
||||||
JournalledContent journalledcontent ->
|
JournalledContent journalledcontent ->
|
||||||
|
@ -1022,13 +1028,16 @@ overBranchFileContents' select go st = do
|
||||||
return (Just (content <> journalledcontent))
|
return (Just (content <> journalledcontent))
|
||||||
return (Just (v, f, content))
|
return (Just (v, f, content))
|
||||||
Nothing -> do
|
Nothing -> do
|
||||||
liftIO $ putMVar buf []
|
liftIO $ putMVar buf ([], [])
|
||||||
return Nothing
|
return Nothing
|
||||||
|
|
||||||
getnext [] = Nothing
|
getnext [] [] = Nothing
|
||||||
getnext (f:fs) = case select f of
|
getnext (f:fs) pfs = case select f of
|
||||||
Nothing -> getnext fs
|
Nothing -> getnext fs pfs
|
||||||
Just v -> Just (v, f, fs)
|
Just v -> Just (v, f, fs, pfs)
|
||||||
|
getnext [] (pf:pfs) = case select pf of
|
||||||
|
Nothing -> getnext [] pfs
|
||||||
|
Just v -> Just (v, pf, [], pfs)
|
||||||
|
|
||||||
{- Check if the git-annex branch has been updated from the oldtree.
|
{- Check if the git-annex branch has been updated from the oldtree.
|
||||||
- If so, returns the tuple of the old and new trees. -}
|
- If so, returns the tuple of the old and new trees. -}
|
||||||
|
|
|
@ -4,6 +4,7 @@ git-annex (10.20230927) UNRELEASED; urgency=medium
|
||||||
* Fix crash of enableremote when the special remote has embedcreds=yes.
|
* Fix crash of enableremote when the special remote has embedcreds=yes.
|
||||||
* importfeed: Use caching database to avoid needing to list urls
|
* importfeed: Use caching database to avoid needing to list urls
|
||||||
on every run, and avoid using too much memory.
|
on every run, and avoid using too much memory.
|
||||||
|
* Improve memory use of --all when using annex.private.
|
||||||
|
|
||||||
-- Joey Hess <id@joeyh.name> Tue, 10 Oct 2023 13:17:31 -0400
|
-- Joey Hess <id@joeyh.name> Tue, 10 Oct 2023 13:17:31 -0400
|
||||||
|
|
||||||
|
|
|
@ -1,16 +1,23 @@
|
||||||
Using --all, or running in a bare repo, as well as
|
`git annex unused --from=$remote` and `git annex info $remote`
|
||||||
`git annex unused` and `git annex info` all end up buffering the list of
|
buffer the list of keys that have uncommitted journalled changes
|
||||||
all keys that have uncommitted journalled changes in memory.
|
in memory. This is due to Annex.Branch.files's which reads all the
|
||||||
This is due to Annex.Branch.files's call to getJournalledFilesStale which
|
files in the journal into a buffer.
|
||||||
reads all the files in the directory into a buffer.
|
|
||||||
|
|
||||||
Note that the list of keys in the branch *does* stream in, so this
|
Note that the list of keys in the branch *does* stream in, so this
|
||||||
is only really a problem when using annex.alwayscommit=false to build
|
is only really a problem when using annex.alwayscommit=false to build
|
||||||
up big git-annex branch commits via the journal.
|
up big git-annex branch commits via the journal. Or using annex.private,
|
||||||
|
since the private journal can build up a lot of keys in it.
|
||||||
|
|
||||||
An attempt at making it stream via unsafeInterleaveIO failed miserably
|
An attempt at making it stream via unsafeInterleaveIO failed miserably
|
||||||
and that is not the right approach. This would be a good place to use
|
and that is not the right approach. This would be a good place to use
|
||||||
ResourceT, but it might need some changes to the Annex monad to allow
|
ResourceT, but it might need some changes to the Annex monad to allow
|
||||||
combining the two. --[[Joey]]
|
combining the two. --[[Joey]]
|
||||||
|
|
||||||
|
> This used to also affect --all and using git-annex in a bare repo, but
|
||||||
|
> that was avoided by using the overBranchFileContents interface. This
|
||||||
|
> suggests that changing to that interface in unused and info would be a
|
||||||
|
> solution.
|
||||||
|
|
||||||
[[!tag confirmed]]
|
[[!tag confirmed]]
|
||||||
|
|
||||||
|
[[!meta title="improve memory usage of unused and info when the journal contains a lot of files"]]
|
||||||
|
|
Loading…
Reference in a new issue