git-annex/Command/Get.hs
Joey Hess 579d9b60c1
improve concurrency of move/copy --from --to
Use separate stages for download and upload. In the common case where
it downloads the file from one remote and then uploads to the other,
those are by far the most expensive operations, and there's a decent
chance the two remotes bottleneck on different resources.

Suppose it's being run with -J2 and a bunch of 10 mb files. Two threads
will be started both downloading from the src remote. They will probably
finish at the same time. Then two threads will be started uploading to
the dst remote. They will probably take the same time as well. Before
this change, it would alternate back and forth, bottlenecking on src and dst.
With this change, as soon as the two threads start uploading to dst, two
more threads are able to start, downloading from src. So bandwidth to
both remotes is saturated more often.

Other commands that use transferStages only send in one direction at a
time. So the worker threads for the other direction will sit idle, and
there will be no change in their behavior.

Sponsored-by: Dartmouth College's DANDI project
2023-01-24 13:59:39 -04:00

121 lines
3.8 KiB
Haskell

{- git-annex command
-
- Copyright 2010-2023 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Command.Get where
import Command
import qualified Remote
import Annex.Transfer
import Annex.NumCopies
import Annex.Wanted
import qualified Command.Move
import Logs.Location
cmd :: Command
cmd = withAnnexOptions [jobsOption, jsonOptions, jsonProgressOption, annexedMatchingOptions] $
command "get" SectionCommon
"make content of annexed files available"
paramPaths (seek <$$> optParser)
data GetOptions = GetOptions
{ getFiles :: CmdParams
, getFrom :: Maybe (DeferredParse Remote)
, autoMode :: Bool
, keyOptions :: Maybe KeyOptions
, batchOption :: BatchMode
}
optParser :: CmdParamsDesc -> Parser GetOptions
optParser desc = GetOptions
<$> cmdParams desc
<*> optional (parseRemoteOption <$> parseFromOption)
<*> parseAutoOption
<*> optional (parseIncompleteOption <|> parseKeyOptions <|> parseFailedTransfersOption)
<*> parseBatchOption True
seek :: GetOptions -> CommandSeek
seek o = startConcurrency transferStages $ do
from <- maybe (pure Nothing) (Just <$$> getParsed) (getFrom o)
let seeker = AnnexedFileSeeker
{ startAction = start o from
, checkContentPresent = Just False
, usesLocationLog = True
}
case batchOption o of
NoBatch -> withKeyOptions (keyOptions o) (autoMode o) seeker
(commandAction . startKeys from)
(withFilesInGitAnnex ww seeker)
=<< workTreeItems ww (getFiles o)
Batch fmt -> batchOnly (keyOptions o) (getFiles o) $
batchAnnexed fmt seeker (startKeys from)
where
ww = WarnUnmatchLsFiles
start :: GetOptions -> Maybe Remote -> SeekInput -> RawFilePath -> Key -> CommandStart
start o from si file key = start' expensivecheck from key afile ai si
where
afile = AssociatedFile (Just file)
ai = mkActionItem (key, afile)
expensivecheck
| autoMode o = numCopiesCheck file key (<)
<||> wantGet False (Just key) afile
| otherwise = return True
startKeys :: Maybe Remote -> (SeekInput, Key, ActionItem) -> CommandStart
startKeys from (si, key, ai) = checkFailedTransferDirection ai Download $
start' (return True) from key (AssociatedFile Nothing) ai si
start' :: Annex Bool -> Maybe Remote -> Key -> AssociatedFile -> ActionItem -> SeekInput -> CommandStart
start' expensivecheck from key afile ai si =
stopUnless expensivecheck $
case from of
Nothing -> go $ perform key afile
Just src ->
stopUnless (Command.Move.fromOk src key) $
go $ Command.Move.fromPerform src Command.Move.RemoveNever key afile
where
go = starting "get" (OnlyActionOn key ai) si
perform :: Key -> AssociatedFile -> CommandPerform
perform key afile = stopUnless (getKey key afile) $
next $ return True -- no cleanup needed
{- Try to find a copy of the file in one of the remotes,
- and copy it to here. -}
getKey :: Key -> AssociatedFile -> Annex Bool
getKey key afile = getKey' key afile =<< Remote.keyPossibilities key
getKey' :: Key -> AssociatedFile -> [Remote] -> Annex Bool
getKey' key afile = dispatch
where
dispatch [] = do
showNote "not available"
showlocs []
return False
dispatch remotes = notifyTransfer Download afile $ \witness -> do
ok <- pickRemote remotes $ \r -> ifM (probablyPresent r)
( docopy r witness
, return False
)
if ok
then return ok
else do
Remote.showTriedRemotes remotes
showlocs (map Remote.uuid remotes)
return False
showlocs exclude = Remote.showLocations False key exclude
"No other repository is known to contain the file."
-- This check is to avoid an ugly message if a remote is a
-- drive that is not mounted.
probablyPresent r
| Remote.hasKeyCheap r =
either (const False) id <$> Remote.hasKey r key
| otherwise = return True
docopy r witness = do
showAction $ "from " ++ Remote.name r
logStatusAfter key $
download r key afile stdRetry witness