From 04cc470201a0f7b9eb1aa42b202ae56db36b6f4e Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 17 Jun 2019 14:58:02 -0400 Subject: [PATCH] run download checksum verification in separate job pool get, move, copy, sync: When -J or annex.jobs has enabled concurrency, checksum verification uses a separate job pool than is used for downloads, to keep bandwidth saturated. Not yet done for upload checksum verification, but that only affects remotes on local disks. --- CHANGELOG | 10 +++++---- Command.hs | 19 +++++++++++++++++ Command/Get.hs | 2 +- Command/Move.hs | 2 +- Command/Sync.hs | 3 ++- Types/Concurrency.hs | 3 +++ Types/WorkerPool.hs | 7 +++++- doc/todo/parallel_possibilities.mdwn | 32 +++++----------------------- 8 files changed, 43 insertions(+), 35 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index c72d2173da..7d2f1453a7 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,9 +1,11 @@ git-annex (7.20190616) UNRELEASED; urgency=medium - * When running multiple concurrent actions, the cleanup phase is run - in a separate queue than the main action queue. This can make some - commands faster, because less time is spent on bookkeeping in - between each file transfer. + * get, move, copy, sync: When -J or annex.jobs has enabled concurrency, + checksum verification uses a separate job pool than is used for + downloads, to keep bandwidth saturated. + * Other commands also run their cleanup phase using a separate job pool + than their perform phase, which may make some of them somewhat faster + when running concurrently as well. -- Joey Hess Sat, 15 Jun 2019 12:38:25 -0400 diff --git a/Command.hs b/Command.hs index 455e59bc08..a699f552f8 100644 --- a/Command.hs +++ b/Command.hs @@ -28,6 +28,7 @@ import Config import Utility.Daemon import Types.Transfer import Types.ActionItem +import Types.WorkerPool {- Generates a normal Command -} command :: String -> CommandSection -> String -> CmdParamsDesc -> (CmdParamsDesc -> CommandParser) -> Command @@ -104,6 +105,24 @@ stop = return Nothing stopUnless :: Annex Bool -> Annex (Maybe a) -> Annex (Maybe a) stopUnless c a = ifM c ( a , stop ) +{- This can be used in the perform stage to run the action that is the bulk + - of the work to do in that stage. If the action succeeds, then any actions + - run after it will be scheduled as if they were run in the cleanup stage + - instead of the perform stage. + - + - This is not needed for a perform stage that uses `next` to run the + - cleanup stage action. But sometimes a larger action is being built up + - and it's not practical to separate out the cleanup stage part from the + - rest of the action. + -} +performJob :: Observable a => Annex a -> Annex a +performJob a = do + r <- a + if observeBool r + then changeStageTo CleanupStage + else noop + return r + {- When acting on a failed transfer, stops unless it was in the specified - direction. -} checkFailedTransferDirection :: ActionItem -> Direction -> Annex (Maybe a) -> Annex (Maybe a) diff --git a/Command/Get.hs b/Command/Get.hs index a68e60ffc3..d67bab5774 100644 --- a/Command/Get.hs +++ b/Command/Get.hs @@ -108,7 +108,7 @@ getKey' key afile = dispatch | Remote.hasKeyCheap r = either (const False) id <$> Remote.hasKey r key | otherwise = return True - docopy r witness = getViaTmp (Remote.retrievalSecurityPolicy r) (RemoteVerify r) key $ \dest -> + docopy r witness = performJob $ getViaTmp (Remote.retrievalSecurityPolicy r) (RemoteVerify r) key $ \dest -> download (Remote.uuid r) key afile stdRetry (\p -> do showAction $ "from " ++ Remote.name r diff --git a/Command/Move.hs b/Command/Move.hs index 7020a27371..04d5b87856 100644 --- a/Command/Move.hs +++ b/Command/Move.hs @@ -208,7 +208,7 @@ fromPerform src removewhen key afile = do where go = notifyTransfer Download afile $ download (Remote.uuid src) key afile stdRetry $ \p -> - getViaTmp (Remote.retrievalSecurityPolicy src) (RemoteVerify src) key $ \t -> + performJob $ getViaTmp (Remote.retrievalSecurityPolicy src) (RemoteVerify src) key $ \t -> Remote.retrieveKeyFile src key afile t p dispatch _ _ False = stop -- failed dispatch RemoveNever _ True = next $ return True -- copy complete diff --git a/Command/Sync.hs b/Command/Sync.hs index 77287fda22..31e7bc15e6 100644 --- a/Command/Sync.hs +++ b/Command/Sync.hs @@ -692,7 +692,8 @@ syncFile ebloom rs af k = do , return [] ) get have = includeCommandAction $ starting "get" ai $ - next $ getKey' k af have + stopUnless (getKey' k af have) $ + next $ return True wantput r | Remote.readonly r || remoteAnnexReadOnly (Remote.gitconfig r) = return False diff --git a/Types/Concurrency.hs b/Types/Concurrency.hs index fa58798640..3eab7b71e6 100644 --- a/Types/Concurrency.hs +++ b/Types/Concurrency.hs @@ -7,6 +7,9 @@ module Types.Concurrency where import Utility.PartialPrelude +-- Note that Concurrent 1 is not the same as NonConcurrent; +-- the former specifies 1 job of each particular kind, but there can be +-- more than one kind of job running concurrently. data Concurrency = NonConcurrent | Concurrent Int | ConcurrentPerCpu parseConcurrency :: String -> Maybe Concurrency diff --git a/Types/WorkerPool.hs b/Types/WorkerPool.hs index a2b7131bf4..6ded904ee3 100644 --- a/Types/WorkerPool.hs +++ b/Types/WorkerPool.hs @@ -14,6 +14,7 @@ import Control.Concurrent.Async data WorkerPool t = UnallocatedWorkerPool | WorkerPool [Worker t] + deriving (Show) -- | A worker can either be idle or running an Async action. -- And it is used for some stage. @@ -21,9 +22,13 @@ data Worker t = IdleWorker t WorkerStage | ActiveWorker (Async t) WorkerStage +instance Show (Worker t) where + show (IdleWorker _ s) = "IdleWorker " ++ show s + show (ActiveWorker _ s) = "ActiveWorker " ++ show s + -- | These correspond to CommandPerform and CommandCleanup. data WorkerStage = PerformStage | CleanupStage - deriving (Eq) + deriving (Show, Eq) workerStage :: Worker t -> WorkerStage workerStage (IdleWorker _ s) = s diff --git a/doc/todo/parallel_possibilities.mdwn b/doc/todo/parallel_possibilities.mdwn index b222699013..1742f84f1a 100644 --- a/doc/todo/parallel_possibilities.mdwn +++ b/doc/todo/parallel_possibilities.mdwn @@ -8,30 +8,8 @@ are still some things that could be improved, tracked here: supported; it might be good to have `--jobs=cpus-1` to leave a spare cpu to avoid contention, or `--jobs=remotes*2` to run 2 jobs per remote. -* Parallelism is often used when the user wants to full saturate the pipe - to a remote, since having some extra transfers running avoid being - delayed while git-annex runs cleanup actions, checksum verification, - and other non-transfer stuff. - - But, the user will sometimes be disappointed, because every job - can still end up stuck doing checksum verification at the same time, - so the pipe to the remote is not saturated. - - Now that cleanup actions don't occupy space in the main worker queue, - all that needs to be done is make checksum verification be done as the - cleanup action. Currently, it's bundled into the same action that - transfers content. - - > Had a closer look at moving the checksum verification to cleanup, - > and it's really quite difficult to do. Things like runTransfer - > and pickRemote expect to be able to run the entire transfer action, - > including verification, and if it fails may retry it or try to - > transfer from a different remote instead. - > - > It feels like inverting all that control to move verification to - > cleanup would introduce a lot of complexity if it's even possible to do - > cleanly at all. - > - > Makes me wonder about just calling changeStageTo once the transfer - > is complete and before verification. Feels like a hack, but I think it - > would just work. +* Checksum verification is done in the cleanup stage job pool now for + `git-annex get`, and `git-annex move --from` etc. But only for downloads. + When an upload involves checksum verification, eg `git annex move --to` a + removable drive, that checksum verification is done inside Remote.Git, + and still runs in the perform stage job pool.