move, copy: Sped up seeking for annexed files to operate on by a factor of nearly 2x.
This commit is contained in:
parent
00865cdae8
commit
d732ef1a89
5 changed files with 37 additions and 30 deletions
|
@ -8,6 +8,8 @@ git-annex (8.20200720.2) UNRELEASED; urgency=medium
|
||||||
some weird inheriting of ssh FDs by sshd. Bug was introduced in
|
some weird inheriting of ssh FDs by sshd. Bug was introduced in
|
||||||
git-annex version 7.20200202.7.
|
git-annex version 7.20200202.7.
|
||||||
* Fix a bug in find --branch in the previous version.
|
* Fix a bug in find --branch in the previous version.
|
||||||
|
* move, copy: Sped up seeking for annexed files to operate on by a factor
|
||||||
|
of nearly 2x.
|
||||||
|
|
||||||
-- Joey Hess <id@joeyh.name> Tue, 21 Jul 2020 12:58:30 -0400
|
-- Joey Hess <id@joeyh.name> Tue, 21 Jul 2020 12:58:30 -0400
|
||||||
|
|
||||||
|
|
|
@ -57,8 +57,11 @@ seek o = startConcurrency commandStages $ do
|
||||||
|
|
||||||
seeker = AnnexedFileSeeker
|
seeker = AnnexedFileSeeker
|
||||||
{ startAction = start o
|
{ startAction = start o
|
||||||
, checkContentPresent = Nothing
|
, checkContentPresent = case fromToOptions o of
|
||||||
, usesLocationLog = False
|
Right (FromRemote _) -> Just False
|
||||||
|
Right (ToRemote _) -> Just True
|
||||||
|
Left ToHere -> Just False
|
||||||
|
, usesLocationLog = True
|
||||||
}
|
}
|
||||||
|
|
||||||
{- A copy is just a move that does not delete the source file.
|
{- A copy is just a move that does not delete the source file.
|
||||||
|
|
|
@ -55,11 +55,6 @@ data RemoveWhen = RemoveSafe | RemoveNever
|
||||||
|
|
||||||
seek :: MoveOptions -> CommandSeek
|
seek :: MoveOptions -> CommandSeek
|
||||||
seek o = startConcurrency stages $ do
|
seek o = startConcurrency stages $ do
|
||||||
let seeker = AnnexedFileSeeker
|
|
||||||
{ startAction = start (fromToOptions o) (removeWhen o)
|
|
||||||
, checkContentPresent = Nothing
|
|
||||||
, usesLocationLog = False
|
|
||||||
}
|
|
||||||
case batchOption o of
|
case batchOption o of
|
||||||
NoBatch -> withKeyOptions (keyOptions o) False seeker
|
NoBatch -> withKeyOptions (keyOptions o) False seeker
|
||||||
(commandAction . startKey (fromToOptions o) (removeWhen o))
|
(commandAction . startKey (fromToOptions o) (removeWhen o))
|
||||||
|
@ -67,6 +62,14 @@ seek o = startConcurrency stages $ do
|
||||||
=<< workTreeItems ww (moveFiles o)
|
=<< workTreeItems ww (moveFiles o)
|
||||||
Batch fmt -> batchAnnexedFilesMatching fmt seeker
|
Batch fmt -> batchAnnexedFilesMatching fmt seeker
|
||||||
where
|
where
|
||||||
|
seeker = AnnexedFileSeeker
|
||||||
|
{ startAction = start (fromToOptions o) (removeWhen o)
|
||||||
|
, checkContentPresent = case fromToOptions o of
|
||||||
|
Right (FromRemote _) -> Nothing
|
||||||
|
Right (ToRemote _) -> Just True
|
||||||
|
Left ToHere -> Nothing
|
||||||
|
, usesLocationLog = True
|
||||||
|
}
|
||||||
stages = case fromToOptions o of
|
stages = case fromToOptions o of
|
||||||
Right (FromRemote _) -> downloadStages
|
Right (FromRemote _) -> downloadStages
|
||||||
Right (ToRemote _) -> commandStages
|
Right (ToRemote _) -> commandStages
|
||||||
|
@ -103,9 +106,8 @@ describeMoveAction _ = "move"
|
||||||
toStart :: RemoveWhen -> AssociatedFile -> Key -> ActionItem -> Remote -> CommandStart
|
toStart :: RemoveWhen -> AssociatedFile -> Key -> ActionItem -> Remote -> CommandStart
|
||||||
toStart removewhen afile key ai dest = do
|
toStart removewhen afile key ai dest = do
|
||||||
u <- getUUID
|
u <- getUUID
|
||||||
ishere <- inAnnex key
|
if u == Remote.uuid dest
|
||||||
if not ishere || u == Remote.uuid dest
|
then stop
|
||||||
then stop -- not here, so nothing to do
|
|
||||||
else toStart' dest removewhen afile key ai
|
else toStart' dest removewhen afile key ai
|
||||||
|
|
||||||
toStart' :: Remote -> RemoveWhen -> AssociatedFile -> Key -> ActionItem -> CommandStart
|
toStart' :: Remote -> RemoveWhen -> AssociatedFile -> Key -> ActionItem -> CommandStart
|
||||||
|
@ -188,11 +190,8 @@ toPerform dest removewhen key afile fastcheck isthere =
|
||||||
return False
|
return False
|
||||||
|
|
||||||
fromStart :: RemoveWhen -> AssociatedFile -> Key -> ActionItem -> Remote -> CommandStart
|
fromStart :: RemoveWhen -> AssociatedFile -> Key -> ActionItem -> Remote -> CommandStart
|
||||||
fromStart removewhen afile key ai src = case removewhen of
|
fromStart removewhen afile key ai src =
|
||||||
RemoveNever -> stopUnless (not <$> inAnnex key) go
|
stopUnless (fromOk src key) $
|
||||||
RemoveSafe -> go
|
|
||||||
where
|
|
||||||
go = stopUnless (fromOk src key) $
|
|
||||||
starting (describeMoveAction removewhen) (OnlyActionOn key ai) $
|
starting (describeMoveAction removewhen) (OnlyActionOn key ai) $
|
||||||
fromPerform src removewhen key afile
|
fromPerform src removewhen key afile
|
||||||
|
|
||||||
|
@ -247,11 +246,8 @@ fromPerform src removewhen key afile = do
|
||||||
- When moving, the content is removed from all the reachable remotes that
|
- When moving, the content is removed from all the reachable remotes that
|
||||||
- it can safely be removed from. -}
|
- it can safely be removed from. -}
|
||||||
toHereStart :: RemoveWhen -> AssociatedFile -> Key -> ActionItem -> CommandStart
|
toHereStart :: RemoveWhen -> AssociatedFile -> Key -> ActionItem -> CommandStart
|
||||||
toHereStart removewhen afile key ai = case removewhen of
|
toHereStart removewhen afile key ai =
|
||||||
RemoveNever -> stopUnless (not <$> inAnnex key) go
|
startingNoMessage (OnlyActionOn key ai) $ do
|
||||||
RemoveSafe -> go
|
|
||||||
where
|
|
||||||
go = startingNoMessage (OnlyActionOn key ai) $ do
|
|
||||||
rs <- Remote.keyPossibilities key
|
rs <- Remote.keyPossibilities key
|
||||||
forM_ rs $ \r ->
|
forM_ rs $ \r ->
|
||||||
includeCommandAction $
|
includeCommandAction $
|
||||||
|
|
9
doc/todo/faster_key_lookup_for_limits.mdwn
Normal file
9
doc/todo/faster_key_lookup_for_limits.mdwn
Normal file
|
@ -0,0 +1,9 @@
|
||||||
|
As part of the work in [[precache_logs_for_speed_with_cat-file_--buffer]],
|
||||||
|
key lookups are now done twice as fast as before.
|
||||||
|
|
||||||
|
But, limits that look up keys still do a key lookup, before the key
|
||||||
|
is looked up efficiently. Avoiding that would speed up --in etc, probably
|
||||||
|
another 1.5x-2x speedup when such limits are used. What that optimisation
|
||||||
|
needs is a way to tell if the current limit needs the key or not. If it
|
||||||
|
does, then match on it after getting the key (and precaching the location
|
||||||
|
log for limits that need that), otherwise before getting the key.
|
|
@ -33,10 +33,13 @@ and precache them.
|
||||||
> > > * `sync --content` 2x speedup!
|
> > > * `sync --content` 2x speedup!
|
||||||
> > > * `fsck --fast` 1.5x speedup
|
> > > * `fsck --fast` 1.5x speedup
|
||||||
> > > * `whereis` 1.5x speedup
|
> > > * `whereis` 1.5x speedup
|
||||||
|
> > > * `copy --to --fast` twenty-five percent or so speedup
|
||||||
|
> > > * `copy --to` 2x speedup
|
||||||
|
> > > * `copy --from` 2x speedup
|
||||||
> > >
|
> > >
|
||||||
> > > Still todo:
|
> > > For copy benchmarks, note that both repos had all files.
|
||||||
> > >
|
> > >
|
||||||
> > > * move, copy, drop, and mirror were left not using the location log caching yet
|
> > > [[done]]
|
||||||
|
|
||||||
Another thing that the same cat-file --buffer approach could be used with
|
Another thing that the same cat-file --buffer approach could be used with
|
||||||
is to cat the annex links. Git.LsFiles.inRepoDetails provides the Sha
|
is to cat the annex links. Git.LsFiles.inRepoDetails provides the Sha
|
||||||
|
@ -52,10 +55,4 @@ Some calls to lookupKey remain, and the above could
|
||||||
be used to remove them and make it faster. The ones in Annex.View and
|
be used to remove them and make it faster. The ones in Annex.View and
|
||||||
Command.Unused seem most likely to be able to be converted.
|
Command.Unused seem most likely to be able to be converted.
|
||||||
|
|
||||||
Also, limits that look up keys still do a key lookup, before the key is
|
See also [[faster_key_lookup_for_limits]]
|
||||||
looked up efficiently. (Before these changes, the same key lookup was done
|
|
||||||
2x too..) Avoiding that would speed up --in etc, probably another 1.5x-2x
|
|
||||||
speedup when such limits are used. What that optimisation needs is a way to
|
|
||||||
tell if the current limit needs the key or not. If it does, then match on
|
|
||||||
it after getting the key (and precaching the location log for limits that
|
|
||||||
need that), otherwise before getting the key.
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue