579d9b60c1
Use separate stages for download and upload. In the common case where it downloads the file from one remote and then uploads to the other, those are by far the most expensive operations, and there's a decent chance the two remotes bottleneck on different resources. Suppose it's being run with -J2 and a bunch of 10 mb files. Two threads will be started both downloading from the src remote. They will probably finish at the same time. Then two threads will be started uploading to the dst remote. They will probably take the same time as well. Before this change, it would alternate back and forth, bottlenecking on src and dst. With this change, as soon as the two threads start uploading to dst, two more threads are able to start, downloading from src. So bandwidth to both remotes is saturated more often. Other commands that use transferStages only send in one direction at a time. So the worker threads for the other direction will sit idle, and there will be no change in their behavior. Sponsored-by: Dartmouth College's DANDI project
121 lines
3.8 KiB
Haskell
121 lines
3.8 KiB
Haskell
{- git-annex command
|
|
-
|
|
- Copyright 2010-2023 Joey Hess <id@joeyh.name>
|
|
-
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
|
-}
|
|
|
|
module Command.Get where
|
|
|
|
import Command
|
|
import qualified Remote
|
|
import Annex.Transfer
|
|
import Annex.NumCopies
|
|
import Annex.Wanted
|
|
import qualified Command.Move
|
|
import Logs.Location
|
|
|
|
cmd :: Command
|
|
cmd = withAnnexOptions [jobsOption, jsonOptions, jsonProgressOption, annexedMatchingOptions] $
|
|
command "get" SectionCommon
|
|
"make content of annexed files available"
|
|
paramPaths (seek <$$> optParser)
|
|
|
|
data GetOptions = GetOptions
|
|
{ getFiles :: CmdParams
|
|
, getFrom :: Maybe (DeferredParse Remote)
|
|
, autoMode :: Bool
|
|
, keyOptions :: Maybe KeyOptions
|
|
, batchOption :: BatchMode
|
|
}
|
|
|
|
optParser :: CmdParamsDesc -> Parser GetOptions
|
|
optParser desc = GetOptions
|
|
<$> cmdParams desc
|
|
<*> optional (parseRemoteOption <$> parseFromOption)
|
|
<*> parseAutoOption
|
|
<*> optional (parseIncompleteOption <|> parseKeyOptions <|> parseFailedTransfersOption)
|
|
<*> parseBatchOption True
|
|
|
|
seek :: GetOptions -> CommandSeek
|
|
seek o = startConcurrency transferStages $ do
|
|
from <- maybe (pure Nothing) (Just <$$> getParsed) (getFrom o)
|
|
let seeker = AnnexedFileSeeker
|
|
{ startAction = start o from
|
|
, checkContentPresent = Just False
|
|
, usesLocationLog = True
|
|
}
|
|
case batchOption o of
|
|
NoBatch -> withKeyOptions (keyOptions o) (autoMode o) seeker
|
|
(commandAction . startKeys from)
|
|
(withFilesInGitAnnex ww seeker)
|
|
=<< workTreeItems ww (getFiles o)
|
|
Batch fmt -> batchOnly (keyOptions o) (getFiles o) $
|
|
batchAnnexed fmt seeker (startKeys from)
|
|
where
|
|
ww = WarnUnmatchLsFiles
|
|
|
|
start :: GetOptions -> Maybe Remote -> SeekInput -> RawFilePath -> Key -> CommandStart
|
|
start o from si file key = start' expensivecheck from key afile ai si
|
|
where
|
|
afile = AssociatedFile (Just file)
|
|
ai = mkActionItem (key, afile)
|
|
expensivecheck
|
|
| autoMode o = numCopiesCheck file key (<)
|
|
<||> wantGet False (Just key) afile
|
|
| otherwise = return True
|
|
|
|
startKeys :: Maybe Remote -> (SeekInput, Key, ActionItem) -> CommandStart
|
|
startKeys from (si, key, ai) = checkFailedTransferDirection ai Download $
|
|
start' (return True) from key (AssociatedFile Nothing) ai si
|
|
|
|
start' :: Annex Bool -> Maybe Remote -> Key -> AssociatedFile -> ActionItem -> SeekInput -> CommandStart
|
|
start' expensivecheck from key afile ai si =
|
|
stopUnless expensivecheck $
|
|
case from of
|
|
Nothing -> go $ perform key afile
|
|
Just src ->
|
|
stopUnless (Command.Move.fromOk src key) $
|
|
go $ Command.Move.fromPerform src Command.Move.RemoveNever key afile
|
|
where
|
|
go = starting "get" (OnlyActionOn key ai) si
|
|
|
|
perform :: Key -> AssociatedFile -> CommandPerform
|
|
perform key afile = stopUnless (getKey key afile) $
|
|
next $ return True -- no cleanup needed
|
|
|
|
{- Try to find a copy of the file in one of the remotes,
|
|
- and copy it to here. -}
|
|
getKey :: Key -> AssociatedFile -> Annex Bool
|
|
getKey key afile = getKey' key afile =<< Remote.keyPossibilities key
|
|
|
|
getKey' :: Key -> AssociatedFile -> [Remote] -> Annex Bool
|
|
getKey' key afile = dispatch
|
|
where
|
|
dispatch [] = do
|
|
showNote "not available"
|
|
showlocs []
|
|
return False
|
|
dispatch remotes = notifyTransfer Download afile $ \witness -> do
|
|
ok <- pickRemote remotes $ \r -> ifM (probablyPresent r)
|
|
( docopy r witness
|
|
, return False
|
|
)
|
|
if ok
|
|
then return ok
|
|
else do
|
|
Remote.showTriedRemotes remotes
|
|
showlocs (map Remote.uuid remotes)
|
|
return False
|
|
showlocs exclude = Remote.showLocations False key exclude
|
|
"No other repository is known to contain the file."
|
|
-- This check is to avoid an ugly message if a remote is a
|
|
-- drive that is not mounted.
|
|
probablyPresent r
|
|
| Remote.hasKeyCheap r =
|
|
either (const False) id <$> Remote.hasKey r key
|
|
| otherwise = return True
|
|
docopy r witness = do
|
|
showAction $ "from " ++ Remote.name r
|
|
logStatusAfter key $
|
|
download r key afile stdRetry witness
|