From 18f1fb584179bf95a9bc5e70f8601f06070e2515 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 24 Jul 2020 13:27:30 -0400 Subject: [PATCH] drop performance improvements Sped up seeking files to drop by 2x, and also some performance improvements to checking numcopies. Interestingly, the seek speedup is not due to precaching, but I think is due to calling getParsed earlier. Annex.Drop had to be changed to check inAnnex there, since it was removed from Command.Drop. All other users of Command.Drop already checked inAnnex themselves. This commit was sponsored by Ryan Newton on Patreon. --- Annex/Drop.hs | 4 +- CHANGELOG | 5 +- Command/Drop.hs | 60 ++++++++++--------- ...logs_for_speed_with_cat-file_--buffer.mdwn | 8 +-- 4 files changed, 40 insertions(+), 37 deletions(-) diff --git a/Annex/Drop.hs b/Annex/Drop.hs index af603ac981..57ca15bc3e 100644 --- a/Annex/Drop.hs +++ b/Annex/Drop.hs @@ -17,6 +17,7 @@ import qualified Command.Drop import Command import Annex.Wanted import Annex.SpecialRemote.Config +import Annex.Content import qualified Database.Keys import Git.FilePath @@ -118,7 +119,8 @@ handleDropsFrom locs rs reason fromhere key afile preverified runner = do ) dropl fs n = checkdrop fs n Nothing $ \numcopies -> - Command.Drop.startLocal afile ai numcopies key preverified + stopUnless (inAnnex key) $ + Command.Drop.startLocal afile ai numcopies key preverified dropr fs r n = checkdrop fs n (Just $ Remote.uuid r) $ \numcopies -> Command.Drop.startRemote afile ai numcopies key r diff --git a/CHANGELOG b/CHANGELOG index 6dc53d6b57..d7b353ae77 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -8,8 +8,9 @@ git-annex (8.20200720.2) UNRELEASED; urgency=medium some weird inheriting of ssh FDs by sshd. Bug was introduced in git-annex version 7.20200202.7. * Fix a bug in find --branch in the previous version. - * move, copy: Sped up seeking for annexed files to operate on by a factor - of nearly 2x. + * move, copy: Some performance improvements. + * drop: Sped up seeking files to drop by 2x, and also some performance + improvements to checking numcopies. -- Joey Hess Tue, 21 Jul 2020 12:58:30 -0400 diff --git a/Command/Drop.hs b/Command/Drop.hs index ccac908adb..c81b727c1c 100644 --- a/Command/Drop.hs +++ b/Command/Drop.hs @@ -1,6 +1,6 @@ {- git-annex command - - - Copyright 2010 Joey Hess + - Copyright 2010-2020 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -52,53 +52,55 @@ parseDropFromOption = parseRemoteOption <$> strOption ) seek :: DropOptions -> CommandSeek -seek o = startConcurrency commandStages $ +seek o = startConcurrency commandStages $ do + from <- case dropFrom o of + Nothing -> pure Nothing + Just f -> getParsed f >>= \remote -> do + u <- getUUID + if Remote.uuid remote == u + then pure Nothing + else pure (Just remote) + let seeker = AnnexedFileSeeker + { startAction = start o from + , checkContentPresent = case from of + Nothing -> Just True + Just _ -> Nothing + , usesLocationLog = True + } case batchOption o of Batch fmt -> batchAnnexedFilesMatching fmt seeker NoBatch -> withKeyOptions (keyOptions o) (autoMode o) seeker - (commandAction . startKeys o) + (commandAction . startKeys o from) (withFilesInGitAnnex ww seeker) =<< workTreeItems ww (dropFiles o) where ww = WarnUnmatchLsFiles - seeker = AnnexedFileSeeker - { startAction = start o - , checkContentPresent = Nothing - , usesLocationLog = False - } - -start :: DropOptions -> RawFilePath -> Key -> CommandStart -start o file key = start' o key afile ai +start :: DropOptions -> Maybe Remote -> RawFilePath -> Key -> CommandStart +start o from file key = start' o from key afile ai where afile = AssociatedFile (Just file) ai = mkActionItem (key, afile) -start' :: DropOptions -> Key -> AssociatedFile -> ActionItem -> CommandStart -start' o key afile ai = do - from <- maybe (pure Nothing) (Just <$$> getParsed) (dropFrom o) +start' :: DropOptions -> Maybe Remote -> Key -> AssociatedFile -> ActionItem -> CommandStart +start' o from key afile ai = checkDropAuto (autoMode o) from afile key $ \numcopies -> - stopUnless (want from) $ + stopUnless want $ case from of Nothing -> startLocal afile ai numcopies key [] - Just remote -> do - u <- getUUID - if Remote.uuid remote == u - then startLocal afile ai numcopies key [] - else startRemote afile ai numcopies key remote - where - want from - | autoMode o = wantDrop False (Remote.uuid <$> from) (Just key) afile - | otherwise = return True + Just remote -> startRemote afile ai numcopies key remote + where + want + | autoMode o = wantDrop False (Remote.uuid <$> from) (Just key) afile + | otherwise = return True -startKeys :: DropOptions -> (Key, ActionItem) -> CommandStart -startKeys o (key, ai) = start' o key (AssociatedFile Nothing) ai +startKeys :: DropOptions -> Maybe Remote -> (Key, ActionItem) -> CommandStart +startKeys o from (key, ai) = start' o from key (AssociatedFile Nothing) ai startLocal :: AssociatedFile -> ActionItem -> NumCopies -> Key -> [VerifiedCopy] -> CommandStart startLocal afile ai numcopies key preverified = - stopUnless (inAnnex key) $ - starting "drop" (OnlyActionOn key ai) $ - performLocal key afile numcopies preverified + starting "drop" (OnlyActionOn key ai) $ + performLocal key afile numcopies preverified startRemote :: AssociatedFile -> ActionItem -> NumCopies -> Key -> Remote -> CommandStart startRemote afile ai numcopies key remote = diff --git a/doc/todo/precache_logs_for_speed_with_cat-file_--buffer.mdwn b/doc/todo/precache_logs_for_speed_with_cat-file_--buffer.mdwn index da6962b65b..6ab4b7887a 100644 --- a/doc/todo/precache_logs_for_speed_with_cat-file_--buffer.mdwn +++ b/doc/todo/precache_logs_for_speed_with_cat-file_--buffer.mdwn @@ -33,11 +33,9 @@ and precache them. > > > * `sync --content` 2x speedup! > > > * `fsck --fast` 1.5x speedup > > > * `whereis` 1.5x speedup -> > > * `copy --to --fast` twenty-five percent or so speedup -> > > * `copy --to` 2x speedup -> > > * `copy --from` 2x speedup -> > > -> > > For copy benchmarks, note that both repos had all files. +> > > +> > > move, copy, and drop probably are also faster, but the work will +> > > dominate > > > > > > [[done]]