move, copy: Sped up seeking for annexed files to operate on by a factor of nearly 2x.
This commit is contained in:
parent
00865cdae8
commit
d732ef1a89
5 changed files with 37 additions and 30 deletions
|
@ -8,6 +8,8 @@ git-annex (8.20200720.2) UNRELEASED; urgency=medium
|
|||
some weird inheriting of ssh FDs by sshd. Bug was introduced in
|
||||
git-annex version 7.20200202.7.
|
||||
* Fix a bug in find --branch in the previous version.
|
||||
* move, copy: Sped up seeking for annexed files to operate on by a factor
|
||||
of nearly 2x.
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Tue, 21 Jul 2020 12:58:30 -0400
|
||||
|
||||
|
|
|
@ -57,8 +57,11 @@ seek o = startConcurrency commandStages $ do
|
|||
|
||||
seeker = AnnexedFileSeeker
|
||||
{ startAction = start o
|
||||
, checkContentPresent = Nothing
|
||||
, usesLocationLog = False
|
||||
, checkContentPresent = case fromToOptions o of
|
||||
Right (FromRemote _) -> Just False
|
||||
Right (ToRemote _) -> Just True
|
||||
Left ToHere -> Just False
|
||||
, usesLocationLog = True
|
||||
}
|
||||
|
||||
{- A copy is just a move that does not delete the source file.
|
||||
|
|
|
@ -55,11 +55,6 @@ data RemoveWhen = RemoveSafe | RemoveNever
|
|||
|
||||
seek :: MoveOptions -> CommandSeek
|
||||
seek o = startConcurrency stages $ do
|
||||
let seeker = AnnexedFileSeeker
|
||||
{ startAction = start (fromToOptions o) (removeWhen o)
|
||||
, checkContentPresent = Nothing
|
||||
, usesLocationLog = False
|
||||
}
|
||||
case batchOption o of
|
||||
NoBatch -> withKeyOptions (keyOptions o) False seeker
|
||||
(commandAction . startKey (fromToOptions o) (removeWhen o))
|
||||
|
@ -67,6 +62,14 @@ seek o = startConcurrency stages $ do
|
|||
=<< workTreeItems ww (moveFiles o)
|
||||
Batch fmt -> batchAnnexedFilesMatching fmt seeker
|
||||
where
|
||||
seeker = AnnexedFileSeeker
|
||||
{ startAction = start (fromToOptions o) (removeWhen o)
|
||||
, checkContentPresent = case fromToOptions o of
|
||||
Right (FromRemote _) -> Nothing
|
||||
Right (ToRemote _) -> Just True
|
||||
Left ToHere -> Nothing
|
||||
, usesLocationLog = True
|
||||
}
|
||||
stages = case fromToOptions o of
|
||||
Right (FromRemote _) -> downloadStages
|
||||
Right (ToRemote _) -> commandStages
|
||||
|
@ -103,9 +106,8 @@ describeMoveAction _ = "move"
|
|||
toStart :: RemoveWhen -> AssociatedFile -> Key -> ActionItem -> Remote -> CommandStart
|
||||
toStart removewhen afile key ai dest = do
|
||||
u <- getUUID
|
||||
ishere <- inAnnex key
|
||||
if not ishere || u == Remote.uuid dest
|
||||
then stop -- not here, so nothing to do
|
||||
if u == Remote.uuid dest
|
||||
then stop
|
||||
else toStart' dest removewhen afile key ai
|
||||
|
||||
toStart' :: Remote -> RemoveWhen -> AssociatedFile -> Key -> ActionItem -> CommandStart
|
||||
|
@ -188,11 +190,8 @@ toPerform dest removewhen key afile fastcheck isthere =
|
|||
return False
|
||||
|
||||
fromStart :: RemoveWhen -> AssociatedFile -> Key -> ActionItem -> Remote -> CommandStart
|
||||
fromStart removewhen afile key ai src = case removewhen of
|
||||
RemoveNever -> stopUnless (not <$> inAnnex key) go
|
||||
RemoveSafe -> go
|
||||
where
|
||||
go = stopUnless (fromOk src key) $
|
||||
fromStart removewhen afile key ai src =
|
||||
stopUnless (fromOk src key) $
|
||||
starting (describeMoveAction removewhen) (OnlyActionOn key ai) $
|
||||
fromPerform src removewhen key afile
|
||||
|
||||
|
@ -247,11 +246,8 @@ fromPerform src removewhen key afile = do
|
|||
- When moving, the content is removed from all the reachable remotes that
|
||||
- it can safely be removed from. -}
|
||||
toHereStart :: RemoveWhen -> AssociatedFile -> Key -> ActionItem -> CommandStart
|
||||
toHereStart removewhen afile key ai = case removewhen of
|
||||
RemoveNever -> stopUnless (not <$> inAnnex key) go
|
||||
RemoveSafe -> go
|
||||
where
|
||||
go = startingNoMessage (OnlyActionOn key ai) $ do
|
||||
toHereStart removewhen afile key ai =
|
||||
startingNoMessage (OnlyActionOn key ai) $ do
|
||||
rs <- Remote.keyPossibilities key
|
||||
forM_ rs $ \r ->
|
||||
includeCommandAction $
|
||||
|
|
9
doc/todo/faster_key_lookup_for_limits.mdwn
Normal file
9
doc/todo/faster_key_lookup_for_limits.mdwn
Normal file
|
@ -0,0 +1,9 @@
|
|||
As part of the work in [[precache_logs_for_speed_with_cat-file_--buffer]],
|
||||
key lookups are now done twice as fast as before.
|
||||
|
||||
But, limits that look up keys still do a key lookup, before the key
|
||||
is looked up efficiently. Avoiding that would speed up --in etc, probably
|
||||
another 1.5x-2x speedup when such limits are used. What that optimisation
|
||||
needs is a way to tell if the current limit needs the key or not. If it
|
||||
does, then match on it after getting the key (and precaching the location
|
||||
log for limits that need that), otherwise before getting the key.
|
|
@ -33,10 +33,13 @@ and precache them.
|
|||
> > > * `sync --content` 2x speedup!
|
||||
> > > * `fsck --fast` 1.5x speedup
|
||||
> > > * `whereis` 1.5x speedup
|
||||
> > > * `copy --to --fast` twenty-five percent or so speedup
|
||||
> > > * `copy --to` 2x speedup
|
||||
> > > * `copy --from` 2x speedup
|
||||
> > >
|
||||
> > > Still todo:
|
||||
> > >
|
||||
> > > * move, copy, drop, and mirror were left not using the location log caching yet
|
||||
> > > For copy benchmarks, note that both repos had all files.
|
||||
> > >
|
||||
> > > [[done]]
|
||||
|
||||
Another thing that the same cat-file --buffer approach could be used with
|
||||
is to cat the annex links. Git.LsFiles.inRepoDetails provides the Sha
|
||||
|
@ -52,10 +55,4 @@ Some calls to lookupKey remain, and the above could
|
|||
be used to remove them and make it faster. The ones in Annex.View and
|
||||
Command.Unused seem most likely to be able to be converted.
|
||||
|
||||
Also, limits that look up keys still do a key lookup, before the key is
|
||||
looked up efficiently. (Before these changes, the same key lookup was done
|
||||
2x too..) Avoiding that would speed up --in etc, probably another 1.5x-2x
|
||||
speedup when such limits are used. What that optimisation needs is a way to
|
||||
tell if the current limit needs the key or not. If it does, then match on
|
||||
it after getting the key (and precaching the location log for limits that
|
||||
need that), otherwise before getting the key.
|
||||
See also [[faster_key_lookup_for_limits]]
|
||||
|
|
Loading…
Reference in a new issue