get -J: Improve distribution of jobs amoung remotes when there are more jobs than remotes.

It was distributing jobs to remotes that were not being used by any other
job. But, suppose that there are only 2 remotes, and -J10. In such a case,
the first 2 downloads would be distributed amoung the 2 remotes, but
the other 8 would all go to remote #1. Improved by keeping a counter
of how many jobs are assigned to a remote, and prefer remotes with fewer
jobs.

Note use of Data.Map.Strict to avoid blowing up space. I kept the
bang-patterns as-is, although probably not needed with Data.Map.Strict.

This commit was sponsored by Jack Hill on Patreon.
This commit is contained in:
Joey Hess 2017-03-08 14:49:30 -04:00
parent fde5dbff2b
commit 0534152685
No known key found for this signature in database
GPG key ID: C910D9222512E3C7
3 changed files with 13 additions and 11 deletions

View file

@ -136,7 +136,7 @@ data AnnexState = AnnexState
, existinghooks :: M.Map Git.Hook.Hook Bool
, desktopnotify :: DesktopNotify
, workers :: [Either AnnexState (Async AnnexState)]
, activeremotes :: MVar (S.Set (Types.Remote.RemoteA Annex))
, activeremotes :: MVar (M.Map (Types.Remote.RemoteA Annex) Integer)
, keysdbhandle :: Maybe Keys.DbHandle
, cachedcurrentbranch :: Maybe Git.Branch
, cachedgitenv :: Maybe [(String, String)]
@ -144,7 +144,7 @@ data AnnexState = AnnexState
newState :: GitConfig -> Git.Repo -> IO AnnexState
newState c r = do
emptyactiveremotes <- newMVar S.empty
emptyactiveremotes <- newMVar M.empty
return $ AnnexState
{ repo = r
, repoadjustment = return

View file

@ -32,7 +32,8 @@ import qualified Types.Remote as Remote
import Types.Concurrency
import Control.Concurrent
import qualified Data.Set as S
import qualified Data.Map.Strict as M
import Data.Ord
class Observable a where
observeBool :: a -> Bool
@ -218,7 +219,7 @@ pickRemote l a = go l =<< Annex.getState Annex.concurrency
go rs (Concurrent n) | n > 1 = do
mv <- Annex.getState Annex.activeremotes
active <- liftIO $ takeMVar mv
let rs' = sortBy (inactiveFirst active) rs
let rs' = sortBy (lessActiveFirst active) rs
goconcurrent mv active rs'
go (r:rs) _ = do
ok <- a r
@ -229,11 +230,11 @@ pickRemote l a = go l =<< Annex.getState Annex.concurrency
liftIO $ putMVar mv active
return observeFailure
goconcurrent mv active (r:rs) = do
let !active' = S.insert r active
let !active' = M.insertWith (+) r 1 active
liftIO $ putMVar mv active'
let getnewactive = do
active'' <- liftIO $ takeMVar mv
let !active''' = S.delete r active''
let !active''' = M.update (\n -> if n > 1 then Just (n-1) else Nothing) r active''
return active'''
let removeactive = liftIO . putMVar mv =<< getnewactive
ok <- a r `onException` removeactive
@ -246,11 +247,10 @@ pickRemote l a = go l =<< Annex.getState Annex.concurrency
-- Re-sort the remaining rs
-- because other threads could have
-- been assigned them in the meantime.
let rs' = sortBy (inactiveFirst active'') rs
let rs' = sortBy (lessActiveFirst active'') rs
goconcurrent mv active'' rs'
inactiveFirst :: S.Set Remote -> Remote -> Remote -> Ordering
inactiveFirst active a b
| Remote.cost a == Remote.cost b =
if a `S.member` active then GT else LT
lessActiveFirst :: M.Map Remote Integer -> Remote -> Remote -> Ordering
lessActiveFirst active a b
| Remote.cost a == Remote.cost b = comparing (`M.lookup` active) a b
| otherwise = compare a b

View file

@ -7,6 +7,8 @@ git-annex (6.20170301.2) UNRELEASED; urgency=medium
so any system ssh will be preferred over it.
* assistant: Add 1/200th second delay between checking each file
in the full transfer scan, to avoid using too much CPU.
* get -J: Improve distribution of jobs amoung remotes when there are more
jobs than remotes.
-- Joey Hess <id@joeyh.name> Thu, 02 Mar 2017 12:51:40 -0400