avoid counting size of keys that are in the journal twice
In calcRepoSizes and also git-annex info, when a key was in the journal, it was passed to the callback twice, so the calculated size was wrong.
This commit is contained in:
parent
467d80101a
commit
5afbea25e7
6 changed files with 25 additions and 20 deletions
|
@ -994,29 +994,30 @@ data UnmergedBranches t
|
|||
- The action is passed a callback that it can repeatedly call to read
|
||||
- the next file and its contents. When there are no more files, the
|
||||
- callback will return Nothing.
|
||||
-
|
||||
- In some cases the callback may return the same file more than once,
|
||||
- with different content. This happens rarely, only when the journal
|
||||
- contains additional information, and the last version of the
|
||||
- file it returns is the most current one.
|
||||
-}
|
||||
overBranchFileContents
|
||||
:: (RawFilePath -> Maybe v)
|
||||
-> Bool
|
||||
-- ^ When there are new files in the journal that have not yet
|
||||
-- been committed to the branch, should those files be omitted?
|
||||
-- When this is False, the callback is run on each journalled file
|
||||
-- at the end, and so may be run more than once on the same file.
|
||||
-> (Annex (Maybe (v, RawFilePath, Maybe L.ByteString)) -> Annex a)
|
||||
-> Annex (UnmergedBranches a)
|
||||
overBranchFileContents select go = do
|
||||
overBranchFileContents select omitnewjournalledfiles go = do
|
||||
st <- update
|
||||
v <- overBranchFileContents' select go st
|
||||
v <- overBranchFileContents' select omitnewjournalledfiles go st
|
||||
return $ if not (null (unmergedRefs st))
|
||||
then UnmergedBranches v
|
||||
else NoUnmergedBranches v
|
||||
|
||||
overBranchFileContents'
|
||||
:: (RawFilePath -> Maybe v)
|
||||
-> Bool
|
||||
-> (Annex (Maybe (v, RawFilePath, Maybe L.ByteString)) -> Annex a)
|
||||
-> BranchState
|
||||
-> Annex a
|
||||
overBranchFileContents' select go st = do
|
||||
overBranchFileContents' select omitnewjournalledfiles go st = do
|
||||
g <- Annex.gitRepo
|
||||
(l, cleanup) <- inRepo $ Git.LsTree.lsTree
|
||||
Git.LsTree.LsTreeRecursive
|
||||
|
@ -1029,9 +1030,10 @@ overBranchFileContents' select go st = do
|
|||
content' <- checkjournal f content
|
||||
return (Just (v, f, content'))
|
||||
Nothing
|
||||
| journalIgnorable st -> return Nothing
|
||||
| journalIgnorable st || omitnewjournalledfiles ->
|
||||
return Nothing
|
||||
-- The journal did not get committed to the
|
||||
-- branch, and may contain files that
|
||||
-- branch, and may contain new files that
|
||||
-- are not present in the branch, which
|
||||
-- need to be provided to the action still.
|
||||
-- This can cause the action to be run a
|
||||
|
|
|
@ -16,7 +16,8 @@ import Logs.UUID
|
|||
import qualified Data.Map.Strict as M
|
||||
|
||||
{- Sum up the sizes of all keys in all repositories, from the information
|
||||
- in the git-annex branch. Can be slow.
|
||||
- in the git-annex branch. New keys that only appear in the journal are
|
||||
- not included. Can be slow.
|
||||
-
|
||||
- The map includes the UUIDs of all known repositories, including
|
||||
- repositories that are empty.
|
||||
|
@ -25,7 +26,7 @@ calcRepoSizes :: Annex (M.Map UUID RepoSize)
|
|||
calcRepoSizes = do
|
||||
knownuuids <- M.keys <$> uuidDescMap
|
||||
let startmap = M.fromList $ map (\u -> (u, RepoSize 0)) knownuuids
|
||||
overLocationLogs startmap accum >>= \case
|
||||
overLocationLogs True startmap accum >>= \case
|
||||
UnmergedBranches m -> return m
|
||||
NoUnmergedBranches m -> return m
|
||||
where
|
||||
|
|
|
@ -284,7 +284,7 @@ withKeyOptions' ko auto mkkeyaction fallbackaction worktreeitems = do
|
|||
let discard reader = reader >>= \case
|
||||
Nothing -> noop
|
||||
Just _ -> discard reader
|
||||
overLocationLogs' ()
|
||||
overLocationLogs' False ()
|
||||
(\reader cont -> checktimelimit (discard reader) cont)
|
||||
(\k _ () -> keyaction Nothing (SeekInput [], k, mkActionItem k))
|
||||
>>= \case
|
||||
|
|
|
@ -641,7 +641,7 @@ cachedAllRepoData = do
|
|||
Just _ -> return s
|
||||
Nothing -> do
|
||||
matcher <- lift getKeyOnlyMatcher
|
||||
r <- lift $ overLocationLogs (emptyKeyInfo, mempty) $ \k locs (d, rd) -> do
|
||||
r <- lift $ overLocationLogs True (emptyKeyInfo, mempty) $ \k locs (d, rd) -> do
|
||||
ifM (matchOnKey matcher k)
|
||||
( do
|
||||
alivelocs <- snd
|
||||
|
|
|
@ -187,7 +187,7 @@ updateFromLog db@(ImportFeedDbHandle h) (oldtree, currtree)
|
|||
-- When initially populating the database, this
|
||||
-- is faster than diffing from the empty tree
|
||||
-- and looking up every log file.
|
||||
scanbranch = Annex.Branch.overBranchFileContents toscan goscan >>= \case
|
||||
scanbranch = Annex.Branch.overBranchFileContents toscan False goscan >>= \case
|
||||
Annex.Branch.NoUnmergedBranches () -> return ()
|
||||
Annex.Branch.UnmergedBranches () -> scandiff
|
||||
|
||||
|
|
|
@ -219,15 +219,17 @@ loggedKeysFor' u = loggedKeys' isthere
|
|||
return there
|
||||
|
||||
{- This is much faster than loggedKeys. -}
|
||||
overLocationLogs :: v -> (Key -> [UUID] -> v -> Annex v) -> Annex (Annex.Branch.UnmergedBranches v)
|
||||
overLocationLogs v = overLocationLogs' v (flip const)
|
||||
overLocationLogs :: Bool -> v -> (Key -> [UUID] -> v -> Annex v) -> Annex (Annex.Branch.UnmergedBranches v)
|
||||
overLocationLogs omitnewjournalledfiles v =
|
||||
overLocationLogs' omitnewjournalledfiles v (flip const)
|
||||
|
||||
overLocationLogs'
|
||||
:: v
|
||||
:: Bool
|
||||
-> v
|
||||
-> (Annex (Maybe (Key, RawFilePath, Maybe L.ByteString)) -> Annex v -> Annex v)
|
||||
-> (Key -> [UUID] -> v -> Annex v)
|
||||
-> Annex (Annex.Branch.UnmergedBranches v)
|
||||
overLocationLogs' iv discarder keyaction = do
|
||||
overLocationLogs' omitnewjournalledfiles iv discarder keyaction = do
|
||||
config <- Annex.getGitConfig
|
||||
clusters <- getClusters
|
||||
|
||||
|
@ -245,7 +247,7 @@ overLocationLogs' iv discarder keyaction = do
|
|||
)
|
||||
Nothing -> return v
|
||||
|
||||
Annex.Branch.overBranchFileContents getk (go iv)
|
||||
Annex.Branch.overBranchFileContents getk omitnewjournalledfiles (go iv)
|
||||
|
||||
-- Cannot import Logs.Cluster due to a cycle.
|
||||
-- Annex.clusters gets populated when starting up git-annex.
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue