avoid --all buffering list of all keys
In Annex.Branch.branch, the (++) was killing laziness. Rewrote so it streams lazily. filterM also kills laziness, so made loggedKeys use a Unchecked type, and check if the key is dead in the seek loop. Note that loggedKeysFor still buffers, so git-annex info <remote> and git-annex unused --from remote still use more memory than necessary. Also removed some unused functions from Annex.Journal.
This commit is contained in:
parent
a8c91ce69a
commit
bea0ad220a
6 changed files with 52 additions and 44 deletions
|
@ -34,6 +34,7 @@ import qualified Data.Map as M
|
|||
import Data.Function
|
||||
import Data.Char
|
||||
import Control.Concurrent (threadDelay)
|
||||
import System.IO.Unsafe (unsafeInterleaveIO)
|
||||
|
||||
import Annex.Common
|
||||
import Annex.BranchState
|
||||
|
@ -333,19 +334,33 @@ commitIndex' jl branchref message basemessage retrynum parents = do
|
|||
let racemessage = basemessage ++ " (recovery from race #" ++ show retrynum' ++ "; expected commit parent " ++ show branchref ++ " but found " ++ show lostrefs ++ " )"
|
||||
commitIndex' jl committedref racemessage basemessage retrynum' [committedref]
|
||||
|
||||
{- Lists all files on the branch. There may be duplicates in the list. -}
|
||||
{- Lists all files on the branch. including ones in the journal
|
||||
- that have not been committed yet. There may be duplicates in the list.
|
||||
- Streams lazily. -}
|
||||
files :: Annex [FilePath]
|
||||
files = do
|
||||
update
|
||||
(++)
|
||||
<$> branchFiles
|
||||
<*> getJournalledFilesStale
|
||||
withIndex $ do
|
||||
g <- gitRepo
|
||||
withJournalHandle (go g)
|
||||
where
|
||||
go g jh = readDirectory jh >>= \case
|
||||
Nothing -> branchFiles' g
|
||||
Just file
|
||||
| dirCruft file -> go g jh
|
||||
| otherwise -> do
|
||||
let branchfile = fileJournal file
|
||||
rest <- unsafeInterleaveIO (go g jh)
|
||||
return (branchfile:rest)
|
||||
|
||||
{- Files in the branch, not including any from journalled changes,
|
||||
- and without updating the branch. -}
|
||||
branchFiles :: Annex [FilePath]
|
||||
branchFiles = withIndex $ inRepo $ Git.Command.pipeNullSplitZombie $
|
||||
lsTreeParams fullname [Param "--name-only"]
|
||||
branchFiles = withIndex $ inRepo branchFiles'
|
||||
|
||||
branchFiles' :: Git.Repo -> IO [FilePath]
|
||||
branchFiles' = Git.Command.pipeNullSplitZombie
|
||||
(lsTreeParams fullname [Param "--name-only"])
|
||||
|
||||
{- Populates the branch's index file with the current branch contents.
|
||||
-
|
||||
|
|
|
@ -55,27 +55,6 @@ getJournalFileStale :: FilePath -> Annex (Maybe String)
|
|||
getJournalFileStale file = inRepo $ \g -> catchMaybeIO $
|
||||
readFileStrict $ journalFile file g
|
||||
|
||||
{- List of files that have updated content in the journal. -}
|
||||
getJournalledFiles :: JournalLocked -> Annex [FilePath]
|
||||
getJournalledFiles jl = map fileJournal <$> getJournalFiles jl
|
||||
|
||||
getJournalledFilesStale :: Annex [FilePath]
|
||||
getJournalledFilesStale = map fileJournal <$> getJournalFilesStale
|
||||
|
||||
{- List of existing journal files. -}
|
||||
getJournalFiles :: JournalLocked -> Annex [FilePath]
|
||||
getJournalFiles _jl = getJournalFilesStale
|
||||
|
||||
{- List of existing journal files, but without locking, may miss new ones
|
||||
- just being added, or may have false positives if the journal is staged
|
||||
- as it is run. -}
|
||||
getJournalFilesStale :: Annex [FilePath]
|
||||
getJournalFilesStale = do
|
||||
g <- gitRepo
|
||||
fs <- liftIO $ catchDefaultIO [] $
|
||||
getDirectoryContents $ gitAnnexJournalDir g
|
||||
return $ filter (`notElem` [".", ".."]) fs
|
||||
|
||||
withJournalHandle :: (DirectoryHandle -> IO a) -> Annex a
|
||||
withJournalHandle a = do
|
||||
d <- fromRepo gitAnnexJournalDir
|
||||
|
|
|
@ -23,6 +23,7 @@ git-annex (6.20180410) UNRELEASED; urgency=medium
|
|||
* Assistant: Fix installation of menus, icons, etc when run
|
||||
from within runshell.
|
||||
* import: Avoid buffering all filenames to be imported in memory.
|
||||
* Improve memory use and speed of --all, by not buffering list of all keys.
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Mon, 09 Apr 2018 14:03:28 -0400
|
||||
|
||||
|
|
|
@ -196,14 +196,14 @@ withKeyOptions' ko auto mkkeyaction fallbackaction params = do
|
|||
giveup "Cannot use --auto in a bare repository"
|
||||
case (null params, ko) of
|
||||
(True, Nothing)
|
||||
| bare -> noauto $ runkeyaction loggedKeys
|
||||
| bare -> noauto $ runkeyaction finishCheck loggedKeys
|
||||
| otherwise -> fallbackaction params
|
||||
(False, Nothing) -> fallbackaction params
|
||||
(True, Just WantAllKeys) -> noauto $ runkeyaction loggedKeys
|
||||
(True, Just WantUnusedKeys) -> noauto $ runkeyaction unusedKeys'
|
||||
(True, Just WantAllKeys) -> noauto $ runkeyaction finishCheck loggedKeys
|
||||
(True, Just WantUnusedKeys) -> noauto $ runkeyaction (pure . Just) unusedKeys'
|
||||
(True, Just WantFailedTransfers) -> noauto runfailedtransfers
|
||||
(True, Just (WantSpecificKey k)) -> noauto $ runkeyaction (return [k])
|
||||
(True, Just WantIncompleteKeys) -> noauto $ runkeyaction incompletekeys
|
||||
(True, Just (WantSpecificKey k)) -> noauto $ runkeyaction (pure . Just) (return [k])
|
||||
(True, Just WantIncompleteKeys) -> noauto $ runkeyaction (pure . Just) incompletekeys
|
||||
(True, Just (WantBranchKeys bs)) -> noauto $ runbranchkeys bs
|
||||
(False, Just _) -> giveup "Can only specify one of file names, --all, --branch, --unused, --failed, --key, or --incomplete"
|
||||
where
|
||||
|
@ -211,10 +211,11 @@ withKeyOptions' ko auto mkkeyaction fallbackaction params = do
|
|||
| auto = giveup "Cannot use --auto with --all or --branch or --unused or --key or --incomplete"
|
||||
| otherwise = a
|
||||
incompletekeys = staleKeysPrune gitAnnexTmpObjectDir True
|
||||
runkeyaction getks = do
|
||||
runkeyaction checker getks = do
|
||||
keyaction <- mkkeyaction
|
||||
ks <- getks
|
||||
forM_ ks $ \k -> keyaction k (mkActionItem k)
|
||||
forM_ ks $ checker >=> maybe noop
|
||||
(\k -> keyaction k (mkActionItem k))
|
||||
runbranchkeys bs = do
|
||||
keyaction <- mkkeyaction
|
||||
forM_ bs $ \b -> do
|
||||
|
|
|
@ -100,7 +100,8 @@ checkRemoteUnused name refspec = go =<< fromJust <$> Remote.byNameWithUUID (Just
|
|||
showAction "checking for unused data"
|
||||
_ <- check "" (remoteUnusedMsg r) (remoteunused r) 0
|
||||
next $ return True
|
||||
remoteunused r = excludeReferenced refspec <=< loggedKeysFor $ Remote.uuid r
|
||||
remoteunused r = excludeReferenced refspec
|
||||
<=< loggedKeysFor $ Remote.uuid r
|
||||
|
||||
check :: FilePath -> ([(Int, Key)] -> String) -> Annex [Key] -> Int -> Annex Int
|
||||
check file msg a c = do
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
- Repositories record their UUID and the date when they --get or --drop
|
||||
- a value.
|
||||
-
|
||||
- Copyright 2010-2015 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2010-2018 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
@ -23,6 +23,8 @@ module Logs.Location (
|
|||
isKnownKey,
|
||||
checkDead,
|
||||
setDead,
|
||||
Unchecked,
|
||||
finishCheck,
|
||||
loggedKeys,
|
||||
loggedKeysFor,
|
||||
) where
|
||||
|
@ -114,24 +116,33 @@ setDead' l = l
|
|||
Unknown -> Unknown
|
||||
}
|
||||
|
||||
data Unchecked a = Unchecked (Annex (Maybe a))
|
||||
|
||||
finishCheck :: Unchecked a -> Annex (Maybe a)
|
||||
finishCheck (Unchecked a) = a
|
||||
|
||||
{- Finds all keys that have location log information.
|
||||
- (There may be duplicate keys in the list.)
|
||||
-
|
||||
- Keys that have been marked as dead are not included.
|
||||
-}
|
||||
loggedKeys :: Annex [Key]
|
||||
loggedKeys :: Annex [Unchecked Key]
|
||||
loggedKeys = loggedKeys' (not <$$> checkDead)
|
||||
|
||||
{- Note that sel should be strict, to avoid the filterM building many
|
||||
- thunks. -}
|
||||
loggedKeys' :: (Key -> Annex Bool) -> Annex [Key]
|
||||
loggedKeys' sel = filterM sel =<<
|
||||
(mapMaybe locationLogFileKey <$> Annex.Branch.files)
|
||||
loggedKeys' :: (Key -> Annex Bool) -> Annex [Unchecked Key]
|
||||
loggedKeys' check = mapMaybe (defercheck <$$> locationLogFileKey)
|
||||
<$> Annex.Branch.files
|
||||
where
|
||||
defercheck k = Unchecked $ ifM (check k)
|
||||
( return (Just k)
|
||||
, return Nothing
|
||||
)
|
||||
|
||||
{- Finds all keys that have location log information indicating
|
||||
- they are present for the specified repository. -}
|
||||
- they are present for the specified repository.
|
||||
-}
|
||||
loggedKeysFor :: UUID -> Annex [Key]
|
||||
loggedKeysFor u = loggedKeys' isthere
|
||||
loggedKeysFor u = catMaybes <$> (mapM finishCheck =<< loggedKeys' isthere)
|
||||
where
|
||||
isthere k = do
|
||||
us <- loggedLocations k
|
||||
|
|
Loading…
Reference in a new issue