optimise
03cb2c8ece
put a cat-file into the fast
bloomfilter generation path. Instead, add another bloom filter which diffs
from the work tree to the index.
Also, pull the sha of the changed object out of the diffs, and cat that
object directly, rather than indirecting through the filename.
Finally, removed some hacks that are unncessary thanks to the worktree to
index diff.
This commit is contained in:
parent
b26776d92f
commit
3320870bad
2 changed files with 54 additions and 52 deletions
|
@ -30,8 +30,8 @@ import Annex.Link
|
||||||
import Annex.CatFile
|
import Annex.CatFile
|
||||||
import Types.Key
|
import Types.Key
|
||||||
import Types.RefSpec
|
import Types.RefSpec
|
||||||
import Git.FilePath
|
|
||||||
import Git.Types
|
import Git.Types
|
||||||
|
import Git.Sha
|
||||||
import Logs.View (is_branchView)
|
import Logs.View (is_branchView)
|
||||||
import Annex.BloomFilter
|
import Annex.BloomFilter
|
||||||
|
|
||||||
|
@ -158,28 +158,23 @@ dropMsg' s = "\nTo remove unwanted data: git-annex dropunused" ++ s ++ " NUMBER\
|
||||||
-
|
-
|
||||||
- Strategy:
|
- Strategy:
|
||||||
-
|
-
|
||||||
- * Build a bloom filter of all keys referenced by symlinks. This
|
- Pass keys through 3 bloom filters in order, only creating each bloom
|
||||||
- is the fastest one to build and will filter out most keys.
|
- filter on demand if the previous one didn't filter out all keys.
|
||||||
- * If keys remain, build a second bloom filter of keys referenced by
|
-
|
||||||
- branches maching the RefSpec.
|
- 1. All keys referenced by files in the work tree.
|
||||||
- * The list is streamed through these bloom filters lazily, so both will
|
- This is the fastest one to build and will filter out most keys.
|
||||||
- exist at the same time. This means that twice the memory is used,
|
- 2. All keys in the diff from the work tree to the index.
|
||||||
- but they're relatively small, so the added complexity of using a
|
- 3. All keys in the diffs between the index and branches matching the
|
||||||
- mutable bloom filter does not seem worthwhile.
|
- RefSpec. (This can take quite a while).
|
||||||
- * Generating the second bloom filter can take quite a while, since
|
|
||||||
- it needs enumerating all keys in all git branches. But, the common
|
|
||||||
- case, if the second filter is needed, is for some keys to be globally
|
|
||||||
- unused, and in that case, no short-circuit is possible.
|
|
||||||
- Short-circuiting if the first filter filters all the keys handles the
|
|
||||||
- other common case.
|
|
||||||
-}
|
-}
|
||||||
excludeReferenced :: RefSpec -> [Key] -> Annex [Key]
|
excludeReferenced :: RefSpec -> [Key] -> Annex [Key]
|
||||||
excludeReferenced refspec ks = runfilter firstlevel ks >>= runfilter secondlevel
|
excludeReferenced refspec ks =
|
||||||
|
runfilter withKeysReferencedM ks
|
||||||
|
>>= runfilter withKeysReferencedDiffIndex
|
||||||
|
>>= runfilter (withKeysReferencedDiffGitRefs refspec)
|
||||||
where
|
where
|
||||||
runfilter _ [] = return [] -- optimisation
|
runfilter _ [] = return [] -- optimisation
|
||||||
runfilter a l = bloomFilter l <$> genBloomFilter a
|
runfilter a l = bloomFilter l <$> genBloomFilter a
|
||||||
firstlevel = withKeysReferencedM
|
|
||||||
secondlevel = withKeysReferencedInGit refspec
|
|
||||||
|
|
||||||
{- Given an initial value, folds it with each key referenced by
|
{- Given an initial value, folds it with each key referenced by
|
||||||
- files in the working tree. -}
|
- files in the working tree. -}
|
||||||
|
@ -218,7 +213,6 @@ withKeysReferenced' mdir initial a = do
|
||||||
mk <- getM id
|
mk <- getM id
|
||||||
[ isAnnexLink f
|
[ isAnnexLink f
|
||||||
, liftIO (isPointerFile f)
|
, liftIO (isPointerFile f)
|
||||||
, catKeyFile f
|
|
||||||
]
|
]
|
||||||
case mk of
|
case mk of
|
||||||
Nothing -> go v fs
|
Nothing -> go v fs
|
||||||
|
@ -226,53 +220,56 @@ withKeysReferenced' mdir initial a = do
|
||||||
!v' <- a k f v
|
!v' <- a k f v
|
||||||
go v' fs
|
go v' fs
|
||||||
|
|
||||||
withKeysReferencedInGit :: RefSpec -> (Key -> Annex ()) -> Annex ()
|
withKeysReferencedDiffGitRefs :: RefSpec -> (Key -> Annex ()) -> Annex ()
|
||||||
withKeysReferencedInGit refspec a = do
|
withKeysReferencedDiffGitRefs refspec a = do
|
||||||
current <- inRepo Git.Branch.currentUnsafe
|
rs <- relevantrefs <$> inRepo (Git.Command.pipeReadStrict [Param "show-ref"])
|
||||||
shaHead <- maybe (return Nothing) (inRepo . Git.Ref.sha) current
|
shaHead <- maybe (return Nothing) (inRepo . Git.Ref.sha)
|
||||||
rs <- relevantrefs (shaHead, current)
|
=<< inRepo Git.Branch.currentUnsafe
|
||||||
<$> inRepo (Git.Command.pipeReadStrict [Param "show-ref"])
|
let haveHead = any (\(shaRef, _) -> Just shaRef == shaHead) rs
|
||||||
usedrefs <- applyRefSpec refspec rs (getreflog rs)
|
let rs' = map snd (nubRefs rs)
|
||||||
forM_ usedrefs $
|
usedrefs <- applyRefSpec refspec rs' (getreflog rs')
|
||||||
withKeysReferencedInGitRef a
|
forM_ (if haveHead then usedrefs else Git.Ref.headRef : usedrefs) $
|
||||||
|
withKeysReferencedDiffGitRef a
|
||||||
where
|
where
|
||||||
relevantrefs headRef = addHead headRef .
|
relevantrefs = map (\(r, h) -> (Git.Ref r, Git.Ref h)) .
|
||||||
filter ourbranches .
|
filter ourbranches .
|
||||||
map (separate (== ' ')) .
|
map (separate (== ' ')) .
|
||||||
lines
|
lines
|
||||||
nubRefs = map (Git.Ref . snd) . nubBy (\(x, _) (y, _) -> x == y)
|
nubRefs = nubBy (\(x, _) (y, _) -> x == y)
|
||||||
ourbranchend = '/' : Git.fromRef Annex.Branch.name
|
ourbranchend = '/' : Git.fromRef Annex.Branch.name
|
||||||
ourbranches (_, b) = not (ourbranchend `isSuffixOf` b)
|
ourbranches (_, b) = not (ourbranchend `isSuffixOf` b)
|
||||||
&& not ("refs/synced/" `isPrefixOf` b)
|
&& not ("refs/synced/" `isPrefixOf` b)
|
||||||
&& not (is_branchView (Git.Ref b))
|
&& not (is_branchView (Git.Ref b))
|
||||||
addHead headRef refs = case headRef of
|
|
||||||
-- if HEAD diverges from all branches (except the branch it
|
|
||||||
-- points to), run the actions on staged keys (and keys
|
|
||||||
-- that are only present in the work tree if the repo is
|
|
||||||
-- non bare)
|
|
||||||
(Just (Git.Ref x), Just (Git.Ref b))
|
|
||||||
| all (\(x',b') -> x /= x' || b == b') refs ->
|
|
||||||
Git.Ref.headRef
|
|
||||||
: nubRefs (filter ((/= x) . fst) refs)
|
|
||||||
_ -> nubRefs refs
|
|
||||||
getreflog rs = inRepo $ Git.RefLog.getMulti rs
|
getreflog rs = inRepo $ Git.RefLog.getMulti rs
|
||||||
|
|
||||||
{- Runs an action on keys referenced in the given Git reference which
|
{- Runs an action on keys referenced in the given Git reference which
|
||||||
- differ from those referenced in the work tree. -}
|
- differ from those referenced in the index. -}
|
||||||
withKeysReferencedInGitRef :: (Key -> Annex ()) -> Git.Ref -> Annex ()
|
withKeysReferencedDiffGitRef :: (Key -> Annex ()) -> Git.Ref -> Annex ()
|
||||||
withKeysReferencedInGitRef a ref = do
|
withKeysReferencedDiffGitRef a ref = do
|
||||||
showAction $ "checking " ++ Git.Ref.describe ref
|
showAction $ "checking " ++ Git.Ref.describe ref
|
||||||
bare <- isBareRepo
|
withKeysReferencedDiff a
|
||||||
(ts,clean) <- inRepo $ if bare
|
(inRepo $ DiffTree.diffIndex ref)
|
||||||
then DiffTree.diffIndex ref
|
DiffTree.srcsha
|
||||||
else DiffTree.diffWorkTree ref
|
|
||||||
let lookAtWorkingTree = not bare && ref == Git.Ref.headRef
|
{- Runs an action on keys referenced in the index which differ from the
|
||||||
forM_ ts $ tKey lookAtWorkingTree >=> maybe noop a
|
- work tree. -}
|
||||||
|
withKeysReferencedDiffIndex :: (Key -> Annex ()) -> Annex ()
|
||||||
|
withKeysReferencedDiffIndex a = unlessM (isBareRepo) $
|
||||||
|
withKeysReferencedDiff a
|
||||||
|
(inRepo $ DiffTree.diffFiles [])
|
||||||
|
DiffTree.srcsha
|
||||||
|
|
||||||
|
withKeysReferencedDiff :: (Key -> Annex ()) -> (Annex ([DiffTree.DiffTreeItem], IO Bool)) -> (DiffTree.DiffTreeItem -> Sha) -> Annex ()
|
||||||
|
withKeysReferencedDiff a getdiff extractsha = do
|
||||||
|
(ds, clean) <- getdiff
|
||||||
|
forM_ ds go
|
||||||
liftIO $ void clean
|
liftIO $ void clean
|
||||||
where
|
where
|
||||||
tKey True = lookupFile . getTopFilePath . DiffTree.file
|
go d = do
|
||||||
tKey False = parseLinkOrPointer
|
let sha = extractsha d
|
||||||
<$$> catFile ref . getTopFilePath . DiffTree.file
|
unless (sha == nullSha) $
|
||||||
|
(parseLinkOrPointer <$> catObject sha)
|
||||||
|
>>= maybe noop a
|
||||||
|
|
||||||
data UnusedMaps = UnusedMaps
|
data UnusedMaps = UnusedMaps
|
||||||
{ unusedMap :: UnusedMap
|
{ unusedMap :: UnusedMap
|
||||||
|
|
|
@ -12,6 +12,7 @@ module Git.DiffTree (
|
||||||
diffTreeRecursive,
|
diffTreeRecursive,
|
||||||
diffIndex,
|
diffIndex,
|
||||||
diffWorkTree,
|
diffWorkTree,
|
||||||
|
diffFiles,
|
||||||
diffLog,
|
diffLog,
|
||||||
) where
|
) where
|
||||||
|
|
||||||
|
@ -66,6 +67,10 @@ diffIndex' ref params repo =
|
||||||
, return ([], return True)
|
, return ([], return True)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
{- Diff between the index and work tree. -}
|
||||||
|
diffFiles :: [CommandParam] -> Repo -> IO ([DiffTreeItem], IO Bool)
|
||||||
|
diffFiles = getdiff (Param "diff-files")
|
||||||
|
|
||||||
{- Runs git log in --raw mode to get the changes that were made in
|
{- Runs git log in --raw mode to get the changes that were made in
|
||||||
- a particular commit. The output format is adjusted to be the same
|
- a particular commit. The output format is adjusted to be the same
|
||||||
- as diff-tree --raw._-}
|
- as diff-tree --raw._-}
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue