get -J: Improve distribution of jobs amoung remotes when there are more jobs than remotes.
It was distributing jobs to remotes that were not being used by any other job. But, suppose that there are only 2 remotes, and -J10. In such a case, the first 2 downloads would be distributed amoung the 2 remotes, but the other 8 would all go to remote #1. Improved by keeping a counter of how many jobs are assigned to a remote, and prefer remotes with fewer jobs. Note use of Data.Map.Strict to avoid blowing up space. I kept the bang-patterns as-is, although probably not needed with Data.Map.Strict. This commit was sponsored by Jack Hill on Patreon.
This commit is contained in:
parent
fde5dbff2b
commit
0534152685
3 changed files with 13 additions and 11 deletions
4
Annex.hs
4
Annex.hs
|
@ -136,7 +136,7 @@ data AnnexState = AnnexState
|
|||
, existinghooks :: M.Map Git.Hook.Hook Bool
|
||||
, desktopnotify :: DesktopNotify
|
||||
, workers :: [Either AnnexState (Async AnnexState)]
|
||||
, activeremotes :: MVar (S.Set (Types.Remote.RemoteA Annex))
|
||||
, activeremotes :: MVar (M.Map (Types.Remote.RemoteA Annex) Integer)
|
||||
, keysdbhandle :: Maybe Keys.DbHandle
|
||||
, cachedcurrentbranch :: Maybe Git.Branch
|
||||
, cachedgitenv :: Maybe [(String, String)]
|
||||
|
@ -144,7 +144,7 @@ data AnnexState = AnnexState
|
|||
|
||||
newState :: GitConfig -> Git.Repo -> IO AnnexState
|
||||
newState c r = do
|
||||
emptyactiveremotes <- newMVar S.empty
|
||||
emptyactiveremotes <- newMVar M.empty
|
||||
return $ AnnexState
|
||||
{ repo = r
|
||||
, repoadjustment = return
|
||||
|
|
|
@ -32,7 +32,8 @@ import qualified Types.Remote as Remote
|
|||
import Types.Concurrency
|
||||
|
||||
import Control.Concurrent
|
||||
import qualified Data.Set as S
|
||||
import qualified Data.Map.Strict as M
|
||||
import Data.Ord
|
||||
|
||||
class Observable a where
|
||||
observeBool :: a -> Bool
|
||||
|
@ -218,7 +219,7 @@ pickRemote l a = go l =<< Annex.getState Annex.concurrency
|
|||
go rs (Concurrent n) | n > 1 = do
|
||||
mv <- Annex.getState Annex.activeremotes
|
||||
active <- liftIO $ takeMVar mv
|
||||
let rs' = sortBy (inactiveFirst active) rs
|
||||
let rs' = sortBy (lessActiveFirst active) rs
|
||||
goconcurrent mv active rs'
|
||||
go (r:rs) _ = do
|
||||
ok <- a r
|
||||
|
@ -229,11 +230,11 @@ pickRemote l a = go l =<< Annex.getState Annex.concurrency
|
|||
liftIO $ putMVar mv active
|
||||
return observeFailure
|
||||
goconcurrent mv active (r:rs) = do
|
||||
let !active' = S.insert r active
|
||||
let !active' = M.insertWith (+) r 1 active
|
||||
liftIO $ putMVar mv active'
|
||||
let getnewactive = do
|
||||
active'' <- liftIO $ takeMVar mv
|
||||
let !active''' = S.delete r active''
|
||||
let !active''' = M.update (\n -> if n > 1 then Just (n-1) else Nothing) r active''
|
||||
return active'''
|
||||
let removeactive = liftIO . putMVar mv =<< getnewactive
|
||||
ok <- a r `onException` removeactive
|
||||
|
@ -246,11 +247,10 @@ pickRemote l a = go l =<< Annex.getState Annex.concurrency
|
|||
-- Re-sort the remaining rs
|
||||
-- because other threads could have
|
||||
-- been assigned them in the meantime.
|
||||
let rs' = sortBy (inactiveFirst active'') rs
|
||||
let rs' = sortBy (lessActiveFirst active'') rs
|
||||
goconcurrent mv active'' rs'
|
||||
|
||||
inactiveFirst :: S.Set Remote -> Remote -> Remote -> Ordering
|
||||
inactiveFirst active a b
|
||||
| Remote.cost a == Remote.cost b =
|
||||
if a `S.member` active then GT else LT
|
||||
lessActiveFirst :: M.Map Remote Integer -> Remote -> Remote -> Ordering
|
||||
lessActiveFirst active a b
|
||||
| Remote.cost a == Remote.cost b = comparing (`M.lookup` active) a b
|
||||
| otherwise = compare a b
|
||||
|
|
|
@ -7,6 +7,8 @@ git-annex (6.20170301.2) UNRELEASED; urgency=medium
|
|||
so any system ssh will be preferred over it.
|
||||
* assistant: Add 1/200th second delay between checking each file
|
||||
in the full transfer scan, to avoid using too much CPU.
|
||||
* get -J: Improve distribution of jobs amoung remotes when there are more
|
||||
jobs than remotes.
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Thu, 02 Mar 2017 12:51:40 -0400
|
||||
|
||||
|
|
Loading…
Reference in a new issue