git-annex/Types/Transfer.hs
Joey Hess 579d9b60c1
improve concurrency of move/copy --from --to
Use separate stages for download and upload. In the common case where
it downloads the file from one remote and then uploads to the other,
those are by far the most expensive operations, and there's a decent
chance the two remotes bottleneck on different resources.

Suppose it's being run with -J2 and a bunch of 10 mb files. Two threads
will be started both downloading from the src remote. They will probably
finish at the same time. Then two threads will be started uploading to
the dst remote. They will probably take the same time as well. Before
this change, it would alternate back and forth, bottlenecking on src and dst.
With this change, as soon as the two threads start uploading to dst, two
more threads are able to start, downloading from src. So bandwidth to
both remotes is saturated more often.

Other commands that use transferStages only send in one direction at a
time. So the worker threads for the other direction will sit idle, and
there will be no change in their behavior.

Sponsored-by: Dartmouth College's DANDI project
2023-01-24 13:59:39 -04:00

99 lines
2.5 KiB
Haskell

{- git-annex transfer types
-
- Copyright 2012 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE FlexibleInstances #-}
module Types.Transfer (
module Types.Transfer,
module Types.Direction
) where
import Types
import Types.Remote (Verification(..))
import Types.Key
import Types.Direction
import Utility.PID
import Utility.QuickCheck
import Utility.Url
import Utility.FileSystemEncoding
import Data.Time.Clock.POSIX
import Control.Concurrent
import Control.Applicative
import Prelude
{- Enough information to uniquely identify a transfer. -}
data Transfer = Transfer
{ transferDirection :: Direction
, transferUUID :: UUID
, transferKeyData :: KeyData
}
deriving (Eq, Ord, Show, Read)
transferKey :: Transfer -> Key
transferKey = mkKey . const . transferKeyData
{- Information about a Transfer, stored in the transfer information file.
-
- Note that the associatedFile may not correspond to a file in the local
- git repository. It's some file, possibly relative to some directory,
- of some repository, that was acted on to initiate the transfer.
-}
data TransferInfo = TransferInfo
{ startedTime :: Maybe POSIXTime
, transferPid :: Maybe PID
, transferTid :: Maybe ThreadId
, transferRemote :: Maybe Remote
, bytesComplete :: Maybe Integer
, associatedFile :: AssociatedFile
, transferPaused :: Bool
}
deriving (Show, Eq, Ord)
stubTransferInfo :: TransferInfo
stubTransferInfo = TransferInfo Nothing Nothing Nothing Nothing Nothing (AssociatedFile Nothing) False
instance Arbitrary TransferInfo where
arbitrary = TransferInfo
<$> arbitrary
<*> arbitrary
<*> pure Nothing -- cannot generate a ThreadID
<*> pure Nothing -- remote not needed
<*> arbitrary
<*> arbitrary
<*> arbitrary
class Observable a where
observeBool :: a -> Bool
observeFailure :: a
instance Observable Bool where
observeBool = id
observeFailure = False
instance Observable (Bool, Verification) where
observeBool = fst
observeFailure = (False, UnVerified)
instance Observable (Either e Bool) where
observeBool (Left _) = False
observeBool (Right b) = b
observeFailure = Right False
instance Observable (Maybe a) where
observeBool (Just _) = True
observeBool Nothing = False
observeFailure = Nothing
class Transferrable t where
descTransfrerrable :: t -> Maybe String
instance Transferrable AssociatedFile where
descTransfrerrable (AssociatedFile af) = fromRawFilePath <$> af
instance Transferrable URLString where
descTransfrerrable = Just