avoid --all buffering list of all keys
In Annex.Branch.branch, the (++) was killing laziness. Rewrote so it streams lazily. filterM also kills laziness, so made loggedKeys use a Unchecked type, and check if the key is dead in the seek loop. Note that loggedKeysFor still buffers, so git-annex info <remote> and git-annex unused --from remote still use more memory than necessary. Also removed some unused functions from Annex.Journal.
This commit is contained in:
parent
a8c91ce69a
commit
bea0ad220a
6 changed files with 52 additions and 44 deletions
|
@ -34,6 +34,7 @@ import qualified Data.Map as M
|
||||||
import Data.Function
|
import Data.Function
|
||||||
import Data.Char
|
import Data.Char
|
||||||
import Control.Concurrent (threadDelay)
|
import Control.Concurrent (threadDelay)
|
||||||
|
import System.IO.Unsafe (unsafeInterleaveIO)
|
||||||
|
|
||||||
import Annex.Common
|
import Annex.Common
|
||||||
import Annex.BranchState
|
import Annex.BranchState
|
||||||
|
@ -333,19 +334,33 @@ commitIndex' jl branchref message basemessage retrynum parents = do
|
||||||
let racemessage = basemessage ++ " (recovery from race #" ++ show retrynum' ++ "; expected commit parent " ++ show branchref ++ " but found " ++ show lostrefs ++ " )"
|
let racemessage = basemessage ++ " (recovery from race #" ++ show retrynum' ++ "; expected commit parent " ++ show branchref ++ " but found " ++ show lostrefs ++ " )"
|
||||||
commitIndex' jl committedref racemessage basemessage retrynum' [committedref]
|
commitIndex' jl committedref racemessage basemessage retrynum' [committedref]
|
||||||
|
|
||||||
{- Lists all files on the branch. There may be duplicates in the list. -}
|
{- Lists all files on the branch. including ones in the journal
|
||||||
|
- that have not been committed yet. There may be duplicates in the list.
|
||||||
|
- Streams lazily. -}
|
||||||
files :: Annex [FilePath]
|
files :: Annex [FilePath]
|
||||||
files = do
|
files = do
|
||||||
update
|
update
|
||||||
(++)
|
withIndex $ do
|
||||||
<$> branchFiles
|
g <- gitRepo
|
||||||
<*> getJournalledFilesStale
|
withJournalHandle (go g)
|
||||||
|
where
|
||||||
|
go g jh = readDirectory jh >>= \case
|
||||||
|
Nothing -> branchFiles' g
|
||||||
|
Just file
|
||||||
|
| dirCruft file -> go g jh
|
||||||
|
| otherwise -> do
|
||||||
|
let branchfile = fileJournal file
|
||||||
|
rest <- unsafeInterleaveIO (go g jh)
|
||||||
|
return (branchfile:rest)
|
||||||
|
|
||||||
{- Files in the branch, not including any from journalled changes,
|
{- Files in the branch, not including any from journalled changes,
|
||||||
- and without updating the branch. -}
|
- and without updating the branch. -}
|
||||||
branchFiles :: Annex [FilePath]
|
branchFiles :: Annex [FilePath]
|
||||||
branchFiles = withIndex $ inRepo $ Git.Command.pipeNullSplitZombie $
|
branchFiles = withIndex $ inRepo branchFiles'
|
||||||
lsTreeParams fullname [Param "--name-only"]
|
|
||||||
|
branchFiles' :: Git.Repo -> IO [FilePath]
|
||||||
|
branchFiles' = Git.Command.pipeNullSplitZombie
|
||||||
|
(lsTreeParams fullname [Param "--name-only"])
|
||||||
|
|
||||||
{- Populates the branch's index file with the current branch contents.
|
{- Populates the branch's index file with the current branch contents.
|
||||||
-
|
-
|
||||||
|
|
|
@ -55,27 +55,6 @@ getJournalFileStale :: FilePath -> Annex (Maybe String)
|
||||||
getJournalFileStale file = inRepo $ \g -> catchMaybeIO $
|
getJournalFileStale file = inRepo $ \g -> catchMaybeIO $
|
||||||
readFileStrict $ journalFile file g
|
readFileStrict $ journalFile file g
|
||||||
|
|
||||||
{- List of files that have updated content in the journal. -}
|
|
||||||
getJournalledFiles :: JournalLocked -> Annex [FilePath]
|
|
||||||
getJournalledFiles jl = map fileJournal <$> getJournalFiles jl
|
|
||||||
|
|
||||||
getJournalledFilesStale :: Annex [FilePath]
|
|
||||||
getJournalledFilesStale = map fileJournal <$> getJournalFilesStale
|
|
||||||
|
|
||||||
{- List of existing journal files. -}
|
|
||||||
getJournalFiles :: JournalLocked -> Annex [FilePath]
|
|
||||||
getJournalFiles _jl = getJournalFilesStale
|
|
||||||
|
|
||||||
{- List of existing journal files, but without locking, may miss new ones
|
|
||||||
- just being added, or may have false positives if the journal is staged
|
|
||||||
- as it is run. -}
|
|
||||||
getJournalFilesStale :: Annex [FilePath]
|
|
||||||
getJournalFilesStale = do
|
|
||||||
g <- gitRepo
|
|
||||||
fs <- liftIO $ catchDefaultIO [] $
|
|
||||||
getDirectoryContents $ gitAnnexJournalDir g
|
|
||||||
return $ filter (`notElem` [".", ".."]) fs
|
|
||||||
|
|
||||||
withJournalHandle :: (DirectoryHandle -> IO a) -> Annex a
|
withJournalHandle :: (DirectoryHandle -> IO a) -> Annex a
|
||||||
withJournalHandle a = do
|
withJournalHandle a = do
|
||||||
d <- fromRepo gitAnnexJournalDir
|
d <- fromRepo gitAnnexJournalDir
|
||||||
|
|
|
@ -23,6 +23,7 @@ git-annex (6.20180410) UNRELEASED; urgency=medium
|
||||||
* Assistant: Fix installation of menus, icons, etc when run
|
* Assistant: Fix installation of menus, icons, etc when run
|
||||||
from within runshell.
|
from within runshell.
|
||||||
* import: Avoid buffering all filenames to be imported in memory.
|
* import: Avoid buffering all filenames to be imported in memory.
|
||||||
|
* Improve memory use and speed of --all, by not buffering list of all keys.
|
||||||
|
|
||||||
-- Joey Hess <id@joeyh.name> Mon, 09 Apr 2018 14:03:28 -0400
|
-- Joey Hess <id@joeyh.name> Mon, 09 Apr 2018 14:03:28 -0400
|
||||||
|
|
||||||
|
|
|
@ -196,14 +196,14 @@ withKeyOptions' ko auto mkkeyaction fallbackaction params = do
|
||||||
giveup "Cannot use --auto in a bare repository"
|
giveup "Cannot use --auto in a bare repository"
|
||||||
case (null params, ko) of
|
case (null params, ko) of
|
||||||
(True, Nothing)
|
(True, Nothing)
|
||||||
| bare -> noauto $ runkeyaction loggedKeys
|
| bare -> noauto $ runkeyaction finishCheck loggedKeys
|
||||||
| otherwise -> fallbackaction params
|
| otherwise -> fallbackaction params
|
||||||
(False, Nothing) -> fallbackaction params
|
(False, Nothing) -> fallbackaction params
|
||||||
(True, Just WantAllKeys) -> noauto $ runkeyaction loggedKeys
|
(True, Just WantAllKeys) -> noauto $ runkeyaction finishCheck loggedKeys
|
||||||
(True, Just WantUnusedKeys) -> noauto $ runkeyaction unusedKeys'
|
(True, Just WantUnusedKeys) -> noauto $ runkeyaction (pure . Just) unusedKeys'
|
||||||
(True, Just WantFailedTransfers) -> noauto runfailedtransfers
|
(True, Just WantFailedTransfers) -> noauto runfailedtransfers
|
||||||
(True, Just (WantSpecificKey k)) -> noauto $ runkeyaction (return [k])
|
(True, Just (WantSpecificKey k)) -> noauto $ runkeyaction (pure . Just) (return [k])
|
||||||
(True, Just WantIncompleteKeys) -> noauto $ runkeyaction incompletekeys
|
(True, Just WantIncompleteKeys) -> noauto $ runkeyaction (pure . Just) incompletekeys
|
||||||
(True, Just (WantBranchKeys bs)) -> noauto $ runbranchkeys bs
|
(True, Just (WantBranchKeys bs)) -> noauto $ runbranchkeys bs
|
||||||
(False, Just _) -> giveup "Can only specify one of file names, --all, --branch, --unused, --failed, --key, or --incomplete"
|
(False, Just _) -> giveup "Can only specify one of file names, --all, --branch, --unused, --failed, --key, or --incomplete"
|
||||||
where
|
where
|
||||||
|
@ -211,10 +211,11 @@ withKeyOptions' ko auto mkkeyaction fallbackaction params = do
|
||||||
| auto = giveup "Cannot use --auto with --all or --branch or --unused or --key or --incomplete"
|
| auto = giveup "Cannot use --auto with --all or --branch or --unused or --key or --incomplete"
|
||||||
| otherwise = a
|
| otherwise = a
|
||||||
incompletekeys = staleKeysPrune gitAnnexTmpObjectDir True
|
incompletekeys = staleKeysPrune gitAnnexTmpObjectDir True
|
||||||
runkeyaction getks = do
|
runkeyaction checker getks = do
|
||||||
keyaction <- mkkeyaction
|
keyaction <- mkkeyaction
|
||||||
ks <- getks
|
ks <- getks
|
||||||
forM_ ks $ \k -> keyaction k (mkActionItem k)
|
forM_ ks $ checker >=> maybe noop
|
||||||
|
(\k -> keyaction k (mkActionItem k))
|
||||||
runbranchkeys bs = do
|
runbranchkeys bs = do
|
||||||
keyaction <- mkkeyaction
|
keyaction <- mkkeyaction
|
||||||
forM_ bs $ \b -> do
|
forM_ bs $ \b -> do
|
||||||
|
|
|
@ -100,7 +100,8 @@ checkRemoteUnused name refspec = go =<< fromJust <$> Remote.byNameWithUUID (Just
|
||||||
showAction "checking for unused data"
|
showAction "checking for unused data"
|
||||||
_ <- check "" (remoteUnusedMsg r) (remoteunused r) 0
|
_ <- check "" (remoteUnusedMsg r) (remoteunused r) 0
|
||||||
next $ return True
|
next $ return True
|
||||||
remoteunused r = excludeReferenced refspec <=< loggedKeysFor $ Remote.uuid r
|
remoteunused r = excludeReferenced refspec
|
||||||
|
<=< loggedKeysFor $ Remote.uuid r
|
||||||
|
|
||||||
check :: FilePath -> ([(Int, Key)] -> String) -> Annex [Key] -> Int -> Annex Int
|
check :: FilePath -> ([(Int, Key)] -> String) -> Annex [Key] -> Int -> Annex Int
|
||||||
check file msg a c = do
|
check file msg a c = do
|
||||||
|
|
|
@ -8,7 +8,7 @@
|
||||||
- Repositories record their UUID and the date when they --get or --drop
|
- Repositories record their UUID and the date when they --get or --drop
|
||||||
- a value.
|
- a value.
|
||||||
-
|
-
|
||||||
- Copyright 2010-2015 Joey Hess <id@joeyh.name>
|
- Copyright 2010-2018 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU GPL version 3 or higher.
|
- Licensed under the GNU GPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
@ -23,6 +23,8 @@ module Logs.Location (
|
||||||
isKnownKey,
|
isKnownKey,
|
||||||
checkDead,
|
checkDead,
|
||||||
setDead,
|
setDead,
|
||||||
|
Unchecked,
|
||||||
|
finishCheck,
|
||||||
loggedKeys,
|
loggedKeys,
|
||||||
loggedKeysFor,
|
loggedKeysFor,
|
||||||
) where
|
) where
|
||||||
|
@ -114,24 +116,33 @@ setDead' l = l
|
||||||
Unknown -> Unknown
|
Unknown -> Unknown
|
||||||
}
|
}
|
||||||
|
|
||||||
|
data Unchecked a = Unchecked (Annex (Maybe a))
|
||||||
|
|
||||||
|
finishCheck :: Unchecked a -> Annex (Maybe a)
|
||||||
|
finishCheck (Unchecked a) = a
|
||||||
|
|
||||||
{- Finds all keys that have location log information.
|
{- Finds all keys that have location log information.
|
||||||
- (There may be duplicate keys in the list.)
|
- (There may be duplicate keys in the list.)
|
||||||
-
|
-
|
||||||
- Keys that have been marked as dead are not included.
|
- Keys that have been marked as dead are not included.
|
||||||
-}
|
-}
|
||||||
loggedKeys :: Annex [Key]
|
loggedKeys :: Annex [Unchecked Key]
|
||||||
loggedKeys = loggedKeys' (not <$$> checkDead)
|
loggedKeys = loggedKeys' (not <$$> checkDead)
|
||||||
|
|
||||||
{- Note that sel should be strict, to avoid the filterM building many
|
loggedKeys' :: (Key -> Annex Bool) -> Annex [Unchecked Key]
|
||||||
- thunks. -}
|
loggedKeys' check = mapMaybe (defercheck <$$> locationLogFileKey)
|
||||||
loggedKeys' :: (Key -> Annex Bool) -> Annex [Key]
|
<$> Annex.Branch.files
|
||||||
loggedKeys' sel = filterM sel =<<
|
where
|
||||||
(mapMaybe locationLogFileKey <$> Annex.Branch.files)
|
defercheck k = Unchecked $ ifM (check k)
|
||||||
|
( return (Just k)
|
||||||
|
, return Nothing
|
||||||
|
)
|
||||||
|
|
||||||
{- Finds all keys that have location log information indicating
|
{- Finds all keys that have location log information indicating
|
||||||
- they are present for the specified repository. -}
|
- they are present for the specified repository.
|
||||||
|
-}
|
||||||
loggedKeysFor :: UUID -> Annex [Key]
|
loggedKeysFor :: UUID -> Annex [Key]
|
||||||
loggedKeysFor u = loggedKeys' isthere
|
loggedKeysFor u = catMaybes <$> (mapM finishCheck =<< loggedKeys' isthere)
|
||||||
where
|
where
|
||||||
isthere k = do
|
isthere k = do
|
||||||
us <- loggedLocations k
|
us <- loggedLocations k
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue