git-annex/Command/Mirror.hs
Joey Hess 579d9b60c1
improve concurrency of move/copy --from --to
Use separate stages for download and upload. In the common case where
it downloads the file from one remote and then uploads to the other,
those are by far the most expensive operations, and there's a decent
chance the two remotes bottleneck on different resources.

Suppose it's being run with -J2 and a bunch of 10 mb files. Two threads
will be started both downloading from the src remote. They will probably
finish at the same time. Then two threads will be started uploading to
the dst remote. They will probably take the same time as well. Before
this change, it would alternate back and forth, bottlenecking on src and dst.
With this change, as soon as the two threads start uploading to dst, two
more threads are able to start, downloading from src. So bandwidth to
both remotes is saturated more often.

Other commands that use transferStages only send in one direction at a
time. So the worker threads for the other direction will sit idle, and
there will be no change in their behavior.

Sponsored-by: Dartmouth College's DANDI project
2023-01-24 13:59:39 -04:00

90 lines
2.8 KiB
Haskell

{- git-annex command
-
- Copyright 2013 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Command.Mirror where
import Command
import qualified Command.Move
import qualified Command.Drop
import qualified Command.Get
import qualified Remote
import Annex.Content
import Annex.NumCopies
import Types.Transfer
cmd :: Command
cmd = withAnnexOptions [jobsOption, jsonOptions, jsonProgressOption, annexedMatchingOptions] $
command "mirror" SectionCommon
"mirror content of files to/from another repository"
paramPaths (seek <--< optParser)
data MirrorOptions = MirrorOptions
{ mirrorFiles :: CmdParams
, fromToOptions :: FromToOptions
, keyOptions :: Maybe KeyOptions
}
optParser :: CmdParamsDesc -> Parser MirrorOptions
optParser desc = MirrorOptions
<$> cmdParams desc
<*> parseFromToOptions
<*> optional (parseKeyOptions <|> parseFailedTransfersOption)
instance DeferredParseClass MirrorOptions where
finishParse v = MirrorOptions
<$> pure (mirrorFiles v)
<*> finishParse (fromToOptions v)
<*> pure (keyOptions v)
seek :: MirrorOptions -> CommandSeek
seek o = startConcurrency stages $
withKeyOptions (keyOptions o) False seeker
(commandAction . startKey o (AssociatedFile Nothing))
(withFilesInGitAnnex ww seeker)
=<< workTreeItems ww (mirrorFiles o)
where
stages = case fromToOptions o of
FromRemote _ -> transferStages
ToRemote _ -> commandStages
ww = WarnUnmatchLsFiles
seeker = AnnexedFileSeeker
{ startAction = start o
, checkContentPresent = Nothing
, usesLocationLog = True
}
start :: MirrorOptions -> SeekInput -> RawFilePath -> Key -> CommandStart
start o si file k = startKey o afile (si, k, ai)
where
afile = AssociatedFile (Just file)
ai = mkActionItem (k, afile)
startKey :: MirrorOptions -> AssociatedFile -> (SeekInput, Key, ActionItem) -> CommandStart
startKey o afile (si, key, ai) = case fromToOptions o of
ToRemote r -> checkFailedTransferDirection ai Upload $ ifM (inAnnex key)
( Command.Move.toStart Command.Move.RemoveNever afile key ai si =<< getParsed r
, do
(numcopies, mincopies) <- getSafestNumMinCopies afile key
Command.Drop.startRemote pcc afile ai si numcopies mincopies key (Command.Drop.DroppingUnused False)
=<< getParsed r
)
FromRemote r -> checkFailedTransferDirection ai Download $ do
haskey <- flip Remote.hasKey key =<< getParsed r
case haskey of
Left _ -> stop
Right True -> ifM (inAnnex key)
( stop
, Command.Get.start' (return True) Nothing key afile ai si
)
Right False -> ifM (inAnnex key)
( do
(numcopies, mincopies) <- getSafestNumMinCopies afile key
Command.Drop.startLocal pcc afile ai si numcopies mincopies key [] (Command.Drop.DroppingUnused False)
, stop
)
where
pcc = Command.Drop.PreferredContentChecked False