drop performance improvements

Sped up seeking files to drop by 2x, and also some performance
improvements to checking numcopies.

Interestingly, the seek speedup is not due to precaching, but I think is
due to calling getParsed earlier.

Annex.Drop had to be changed to check inAnnex there, since it was removed
from Command.Drop. All other users of Command.Drop already checked inAnnex
themselves.

This commit was sponsored by Ryan Newton on Patreon.
This commit is contained in:
Joey Hess 2020-07-24 13:27:30 -04:00
parent a01aa214be
commit 18f1fb5841
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
4 changed files with 40 additions and 37 deletions

View file

@ -17,6 +17,7 @@ import qualified Command.Drop
import Command
import Annex.Wanted
import Annex.SpecialRemote.Config
import Annex.Content
import qualified Database.Keys
import Git.FilePath
@ -118,7 +119,8 @@ handleDropsFrom locs rs reason fromhere key afile preverified runner = do
)
dropl fs n = checkdrop fs n Nothing $ \numcopies ->
Command.Drop.startLocal afile ai numcopies key preverified
stopUnless (inAnnex key) $
Command.Drop.startLocal afile ai numcopies key preverified
dropr fs r n = checkdrop fs n (Just $ Remote.uuid r) $ \numcopies ->
Command.Drop.startRemote afile ai numcopies key r

View file

@ -8,8 +8,9 @@ git-annex (8.20200720.2) UNRELEASED; urgency=medium
some weird inheriting of ssh FDs by sshd. Bug was introduced in
git-annex version 7.20200202.7.
* Fix a bug in find --branch in the previous version.
* move, copy: Sped up seeking for annexed files to operate on by a factor
of nearly 2x.
* move, copy: Some performance improvements.
* drop: Sped up seeking files to drop by 2x, and also some performance
improvements to checking numcopies.
-- Joey Hess <id@joeyh.name> Tue, 21 Jul 2020 12:58:30 -0400

View file

@ -1,6 +1,6 @@
{- git-annex command
-
- Copyright 2010 Joey Hess <id@joeyh.name>
- Copyright 2010-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -52,53 +52,55 @@ parseDropFromOption = parseRemoteOption <$> strOption
)
seek :: DropOptions -> CommandSeek
seek o = startConcurrency commandStages $
seek o = startConcurrency commandStages $ do
from <- case dropFrom o of
Nothing -> pure Nothing
Just f -> getParsed f >>= \remote -> do
u <- getUUID
if Remote.uuid remote == u
then pure Nothing
else pure (Just remote)
let seeker = AnnexedFileSeeker
{ startAction = start o from
, checkContentPresent = case from of
Nothing -> Just True
Just _ -> Nothing
, usesLocationLog = True
}
case batchOption o of
Batch fmt -> batchAnnexedFilesMatching fmt seeker
NoBatch -> withKeyOptions (keyOptions o) (autoMode o) seeker
(commandAction . startKeys o)
(commandAction . startKeys o from)
(withFilesInGitAnnex ww seeker)
=<< workTreeItems ww (dropFiles o)
where
ww = WarnUnmatchLsFiles
seeker = AnnexedFileSeeker
{ startAction = start o
, checkContentPresent = Nothing
, usesLocationLog = False
}
start :: DropOptions -> RawFilePath -> Key -> CommandStart
start o file key = start' o key afile ai
start :: DropOptions -> Maybe Remote -> RawFilePath -> Key -> CommandStart
start o from file key = start' o from key afile ai
where
afile = AssociatedFile (Just file)
ai = mkActionItem (key, afile)
start' :: DropOptions -> Key -> AssociatedFile -> ActionItem -> CommandStart
start' o key afile ai = do
from <- maybe (pure Nothing) (Just <$$> getParsed) (dropFrom o)
start' :: DropOptions -> Maybe Remote -> Key -> AssociatedFile -> ActionItem -> CommandStart
start' o from key afile ai =
checkDropAuto (autoMode o) from afile key $ \numcopies ->
stopUnless (want from) $
stopUnless want $
case from of
Nothing -> startLocal afile ai numcopies key []
Just remote -> do
u <- getUUID
if Remote.uuid remote == u
then startLocal afile ai numcopies key []
else startRemote afile ai numcopies key remote
where
want from
| autoMode o = wantDrop False (Remote.uuid <$> from) (Just key) afile
| otherwise = return True
Just remote -> startRemote afile ai numcopies key remote
where
want
| autoMode o = wantDrop False (Remote.uuid <$> from) (Just key) afile
| otherwise = return True
startKeys :: DropOptions -> (Key, ActionItem) -> CommandStart
startKeys o (key, ai) = start' o key (AssociatedFile Nothing) ai
startKeys :: DropOptions -> Maybe Remote -> (Key, ActionItem) -> CommandStart
startKeys o from (key, ai) = start' o from key (AssociatedFile Nothing) ai
startLocal :: AssociatedFile -> ActionItem -> NumCopies -> Key -> [VerifiedCopy] -> CommandStart
startLocal afile ai numcopies key preverified =
stopUnless (inAnnex key) $
starting "drop" (OnlyActionOn key ai) $
performLocal key afile numcopies preverified
starting "drop" (OnlyActionOn key ai) $
performLocal key afile numcopies preverified
startRemote :: AssociatedFile -> ActionItem -> NumCopies -> Key -> Remote -> CommandStart
startRemote afile ai numcopies key remote =

View file

@ -33,11 +33,9 @@ and precache them.
> > > * `sync --content` 2x speedup!
> > > * `fsck --fast` 1.5x speedup
> > > * `whereis` 1.5x speedup
> > > * `copy --to --fast` twenty-five percent or so speedup
> > > * `copy --to` 2x speedup
> > > * `copy --from` 2x speedup
> > >
> > > For copy benchmarks, note that both repos had all files.
> > >
> > > move, copy, and drop probably are also faster, but the work will
> > > dominate
> > >
> > > [[done]]