annex.jobs=cpus etc
Added the ability to run one job per CPU (core), by setting annex.jobs=cpus, or using option --jobs=cpus or -Jcpus. Built with future expansion in mind, including not defaulting matching on Concurrency so more constructors can later be added, and using "cpu" instead of "0".
This commit is contained in:
parent
459bbd9005
commit
82186ca58f
20 changed files with 105 additions and 32 deletions
|
@ -189,8 +189,9 @@ prepSocket socketfile sshhost sshparams = do
|
|||
let socketlock = socket2lock socketfile
|
||||
|
||||
Annex.getState Annex.concurrency >>= \case
|
||||
NonConcurrent -> return ()
|
||||
Concurrent {} -> makeconnection socketlock
|
||||
_ -> return ()
|
||||
ConcurrentPerCpu -> makeconnection socketlock
|
||||
|
||||
lockFileCached socketlock
|
||||
where
|
||||
|
|
|
@ -247,20 +247,29 @@ pickRemote l a = debugLocks $ go l =<< Annex.getState Annex.concurrency
|
|||
where
|
||||
go [] _ = return observeFailure
|
||||
go (r:[]) _ = a r
|
||||
go rs (Concurrent n) | n > 1 = do
|
||||
mv <- Annex.getState Annex.activeremotes
|
||||
active <- liftIO $ takeMVar mv
|
||||
let rs' = sortBy (lessActiveFirst active) rs
|
||||
goconcurrent mv active rs'
|
||||
go (r:rs) _ = do
|
||||
go rs NonConcurrent = gononconcurrent rs
|
||||
go rs (Concurrent n)
|
||||
| n <= 1 = gononconcurrent rs
|
||||
| otherwise = goconcurrent rs
|
||||
go rs ConcurrentPerCpu = goconcurrent rs
|
||||
|
||||
gononconcurrent [] = return observeFailure
|
||||
gononconcurrent (r:rs) = do
|
||||
ok <- a r
|
||||
if observeBool ok
|
||||
then return ok
|
||||
else go rs NonConcurrent
|
||||
goconcurrent mv active [] = do
|
||||
else gononconcurrent rs
|
||||
|
||||
goconcurrent rs = do
|
||||
mv <- Annex.getState Annex.activeremotes
|
||||
active <- liftIO $ takeMVar mv
|
||||
let rs' = sortBy (lessActiveFirst active) rs
|
||||
goconcurrent' mv active rs'
|
||||
|
||||
goconcurrent' mv active [] = do
|
||||
liftIO $ putMVar mv active
|
||||
return observeFailure
|
||||
goconcurrent mv active (r:rs) = do
|
||||
goconcurrent' mv active (r:rs) = do
|
||||
let !active' = M.insertWith (+) r 1 active
|
||||
liftIO $ putMVar mv active'
|
||||
let getnewactive = do
|
||||
|
@ -279,7 +288,7 @@ pickRemote l a = debugLocks $ go l =<< Annex.getState Annex.concurrency
|
|||
-- because other threads could have
|
||||
-- been assigned them in the meantime.
|
||||
let rs' = sortBy (lessActiveFirst active'') rs
|
||||
goconcurrent mv active'' rs'
|
||||
goconcurrent' mv active'' rs'
|
||||
|
||||
lessActiveFirst :: M.Map Remote Integer -> Remote -> Remote -> Ordering
|
||||
lessActiveFirst active a b
|
||||
|
|
|
@ -3,6 +3,8 @@ git-annex (7.20190508) UNRELEASED; urgency=medium
|
|||
* Fixed bug that caused git-annex to fail to add a file when another
|
||||
git-annex process cleaned up the temp directory it was using.
|
||||
* Makefile: Added install-completions to install target.
|
||||
* Added the ability to run one job per CPU (core), by setting
|
||||
annex.jobs=cpus, or using option --jobs=cpus or -Jcpus.
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Mon, 06 May 2019 13:52:02 -0400
|
||||
|
||||
|
|
|
@ -22,6 +22,7 @@ import Control.Concurrent
|
|||
import Control.Concurrent.Async
|
||||
import Control.Concurrent.STM
|
||||
import Control.Exception (throwIO)
|
||||
import GHC.Conc
|
||||
import Data.Either
|
||||
import qualified Data.Map.Strict as M
|
||||
import qualified System.Console.Regions as Regions
|
||||
|
@ -51,9 +52,14 @@ performCommandAction Command { cmdcheck = c, cmdname = name } seek cont = do
|
|||
- This should only be run in the seek stage.
|
||||
-}
|
||||
commandAction :: CommandStart -> Annex ()
|
||||
commandAction a = go =<< Annex.getState Annex.concurrency
|
||||
commandAction a = Annex.getState Annex.concurrency >>= \case
|
||||
NonConcurrent -> run
|
||||
Concurrent n -> runconcurrent n
|
||||
ConcurrentPerCpu -> runconcurrent =<< liftIO getNumProcessors
|
||||
where
|
||||
go (Concurrent n) = do
|
||||
run = void $ includeCommandAction a
|
||||
|
||||
runconcurrent n = do
|
||||
ws <- Annex.getState Annex.workers
|
||||
(st, ws') <- if null ws
|
||||
then do
|
||||
|
@ -71,8 +77,6 @@ commandAction a = go =<< Annex.getState Annex.concurrency
|
|||
w <- liftIO $ async
|
||||
$ snd <$> Annex.run st (inOwnConsoleRegion (Annex.output st) run)
|
||||
Annex.changeState $ \s -> s { Annex.workers = Right w:ws' }
|
||||
go NonConcurrent = run
|
||||
run = void $ includeCommandAction a
|
||||
|
||||
commandActions :: [CommandStart] -> Annex ()
|
||||
commandActions = mapM_ commandAction
|
||||
|
@ -170,18 +174,23 @@ allowConcurrentOutput :: Annex a -> Annex a
|
|||
allowConcurrentOutput a = do
|
||||
fromcmdline <- Annex.getState Annex.concurrency
|
||||
fromgitcfg <- annexJobs <$> Annex.getGitConfig
|
||||
let usegitcfg = Annex.changeState $
|
||||
\c -> c { Annex.concurrency = fromgitcfg }
|
||||
case (fromcmdline, fromgitcfg) of
|
||||
(NonConcurrent, NonConcurrent) -> a
|
||||
(Concurrent n, _) -> goconcurrent n
|
||||
(Concurrent n, _) -> do
|
||||
raisecapabilitiesto n
|
||||
goconcurrent
|
||||
(ConcurrentPerCpu, _) -> goconcurrent
|
||||
(NonConcurrent, Concurrent n) -> do
|
||||
Annex.changeState $
|
||||
\c -> c { Annex.concurrency = fromgitcfg }
|
||||
goconcurrent n
|
||||
usegitcfg
|
||||
raisecapabilitiesto n
|
||||
goconcurrent
|
||||
(NonConcurrent, ConcurrentPerCpu) -> do
|
||||
usegitcfg
|
||||
goconcurrent
|
||||
where
|
||||
goconcurrent n = do
|
||||
c <- liftIO getNumCapabilities
|
||||
when (n > c) $
|
||||
liftIO $ setNumCapabilities n
|
||||
goconcurrent = do
|
||||
withMessageState $ \s -> case outputType s of
|
||||
NormalOutput -> ifM (liftIO concurrentOutputSupported)
|
||||
( Regions.displayConsoleRegions $
|
||||
|
@ -190,13 +199,21 @@ allowConcurrentOutput a = do
|
|||
)
|
||||
_ -> goconcurrent' False
|
||||
goconcurrent' b = bracket_ (setup b) cleanup a
|
||||
|
||||
setup = setconcurrentoutputenabled
|
||||
|
||||
cleanup = do
|
||||
finishCommandActions
|
||||
setconcurrentoutputenabled False
|
||||
|
||||
setconcurrentoutputenabled b = Annex.changeState $ \s ->
|
||||
s { Annex.output = (Annex.output s) { concurrentOutputEnabled = b } }
|
||||
|
||||
raisecapabilitiesto n = do
|
||||
c <- liftIO getNumCapabilities
|
||||
when (n > c) $
|
||||
liftIO $ setNumCapabilities n
|
||||
|
||||
{- Ensures that only one thread processes a key at a time.
|
||||
- Other threads will block until it's done. -}
|
||||
onlyActionOn :: Key -> CommandStart -> CommandStart
|
||||
|
@ -212,7 +229,9 @@ onlyActionOn' :: Key -> Annex a -> Annex a
|
|||
onlyActionOn' k a = go =<< Annex.getState Annex.concurrency
|
||||
where
|
||||
go NonConcurrent = a
|
||||
go (Concurrent _) = do
|
||||
go (Concurrent _) = goconcurrent
|
||||
go ConcurrentPerCpu = goconcurrent
|
||||
goconcurrent = do
|
||||
tv <- Annex.getState Annex.activekeys
|
||||
bracket (setup tv) id (const a)
|
||||
setup tv = liftIO $ do
|
||||
|
|
|
@ -366,14 +366,15 @@ jsonProgressOption =
|
|||
jobsOption :: [GlobalOption]
|
||||
jobsOption =
|
||||
[ globalSetter set $
|
||||
option auto
|
||||
( long "jobs" <> short 'J' <> metavar paramNumber
|
||||
option (maybeReader parseConcurrency)
|
||||
( long "jobs" <> short 'J'
|
||||
<> metavar (paramNumber `paramOr` "cpu")
|
||||
<> help "enable concurrent jobs"
|
||||
<> hidden
|
||||
)
|
||||
]
|
||||
where
|
||||
set n = Annex.changeState $ \s -> s { Annex.concurrency = Concurrent n }
|
||||
set v = Annex.changeState $ \s -> s { Annex.concurrency = v }
|
||||
|
||||
timeLimitOption :: [GlobalOption]
|
||||
timeLimitOption =
|
||||
|
|
|
@ -262,10 +262,12 @@ implicitMessage = whenM (implicitMessages <$> Annex.getState Annex.output)
|
|||
- the user.
|
||||
-}
|
||||
prompt :: Annex a -> Annex a
|
||||
prompt a = debugLocks $ go =<< Annex.getState Annex.concurrency
|
||||
prompt a = debugLocks $ Annex.getState Annex.concurrency >>= \case
|
||||
NonConcurrent -> a
|
||||
(Concurrent _) -> goconcurrent
|
||||
ConcurrentPerCpu -> goconcurrent
|
||||
where
|
||||
go NonConcurrent = a
|
||||
go (Concurrent {}) = withMessageState $ \s -> do
|
||||
goconcurrent = withMessageState $ \s -> do
|
||||
let l = promptLock s
|
||||
bracketIO
|
||||
(takeMVar l)
|
||||
|
|
|
@ -5,4 +5,11 @@
|
|||
|
||||
module Types.Concurrency where
|
||||
|
||||
data Concurrency = NonConcurrent | Concurrent Int
|
||||
import Utility.PartialPrelude
|
||||
|
||||
data Concurrency = NonConcurrent | Concurrent Int | ConcurrentPerCpu
|
||||
|
||||
parseConcurrency :: String -> Maybe Concurrency
|
||||
parseConcurrency "cpus" = Just ConcurrentPerCpu
|
||||
parseConcurrency "cpu" = Just ConcurrentPerCpu
|
||||
parseConcurrency s = Concurrent <$> readish s
|
||||
|
|
|
@ -177,7 +177,8 @@ extractGitConfig r = GitConfig
|
|||
, annexAllowUnverifiedDownloads = (== Just "ACKTHPPT") $
|
||||
getmaybe (annex "security.allow-unverified-downloads")
|
||||
, annexMaxExtensionLength = getmayberead (annex "maxextensionlength")
|
||||
, annexJobs = maybe NonConcurrent Concurrent $ getmayberead (annex "jobs")
|
||||
, annexJobs = fromMaybe NonConcurrent $
|
||||
parseConcurrency =<< getmaybe (annex "jobs")
|
||||
, annexCacheCreds = getbool (annex "cachecreds") True
|
||||
, coreSymlinks = getbool "core.symlinks" True
|
||||
, coreSharedRepository = getSharedRepository r
|
||||
|
|
|
@ -56,6 +56,8 @@ annexed content, and other symlinks.
|
|||
Adds multiple files in parallel. This may be faster.
|
||||
For example: `-J4`
|
||||
|
||||
Setting this to "cpus" will run one job per CPU core.
|
||||
|
||||
* `--update` `-u`
|
||||
|
||||
Like `git add --update`, this does not add new files, but any updates
|
||||
|
|
|
@ -78,6 +78,8 @@ be used to get better filenames.
|
|||
Enables parallel downloads when multiple urls are being added.
|
||||
For example: `-J4`
|
||||
|
||||
Setting this to "cpus" will run one job per CPU core.
|
||||
|
||||
* `--batch`
|
||||
|
||||
Enables batch mode, in which lines containing urls to add are read from
|
||||
|
|
|
@ -34,6 +34,8 @@ Copies the content of files from or to another remote.
|
|||
Enables parallel transfers with up to the specified number of jobs
|
||||
running at once. For example: `-J10`
|
||||
|
||||
Setting this to "cpus" will run one job per CPU core.
|
||||
|
||||
* `--auto`
|
||||
|
||||
Rather than copying all files, only copy files that don't yet have
|
||||
|
|
|
@ -77,6 +77,8 @@ safe to do so.
|
|||
when git-annex has to contact remotes to check if it can drop files.
|
||||
For example: `-J4`
|
||||
|
||||
Setting this to "cpus" will run one job per CPU core.
|
||||
|
||||
* `--batch`
|
||||
|
||||
Enables batch mode, in which lines containing names of files to drop
|
||||
|
|
|
@ -93,6 +93,8 @@ With parameters, only the specified files are checked.
|
|||
|
||||
Runs multiple fsck jobs in parallel. For example: `-J4`
|
||||
|
||||
Setting this to "cpus" will run one job per CPU core.
|
||||
|
||||
* `--json`
|
||||
|
||||
Enable JSON output. This is intended to be parsed by programs that use
|
||||
|
|
|
@ -33,6 +33,8 @@ or transferring them from some kind of key-value store.
|
|||
Enables parallel download with up to the specified number of jobs
|
||||
running at once. For example: `-J10`
|
||||
|
||||
Setting this to "cpus" will run one job per CPU core.
|
||||
|
||||
When files can be downloaded from multiple remotes, enabling parallel
|
||||
downloads will split the load between the remotes. For example, if
|
||||
the files are available on remotes A and B, then one file will be
|
||||
|
|
|
@ -141,6 +141,8 @@ and `--reinject-duplicates` documentation below.
|
|||
Imports multiple files in parallel. This may be faster.
|
||||
For example: `-J4`
|
||||
|
||||
Setting this to "cpus" will run one job per CPU core.
|
||||
|
||||
* `--json`
|
||||
|
||||
Enable JSON output. This is intended to be parsed by programs that use
|
||||
|
|
|
@ -36,6 +36,8 @@ contents. Use [[git-annex-sync]](1) for that.
|
|||
Enables parallel transfers with up to the specified number of jobs
|
||||
running at once. For example: `-J10`
|
||||
|
||||
Setting this to "cpus" will run one job per CPU core.
|
||||
|
||||
* `--all` `-A`
|
||||
|
||||
Mirror all objects stored in the git annex, not only objects used by
|
||||
|
|
|
@ -40,6 +40,8 @@ Moves the content of files from or to another remote.
|
|||
Enables parallel transfers with up to the specified number of jobs
|
||||
running at once. For example: `-J10`
|
||||
|
||||
Setting this to "cpus" will run one job per CPU core.
|
||||
|
||||
* `--all` `-A`
|
||||
|
||||
Rather than specifying a filename or path to move, this option can be
|
||||
|
|
|
@ -112,6 +112,8 @@ by running "git annex sync" on the remote.
|
|||
Enables parallel syncing with up to the specified number of jobs
|
||||
running at once. For example: `-J10`
|
||||
|
||||
Setting this to "cpus" will run one job per CPU core.
|
||||
|
||||
When there are multiple git remotes, pushes will be made to them in
|
||||
parallel. Pulls are not done in parallel because that tends to be
|
||||
less efficient. When --content is synced, the files are processed
|
||||
|
|
|
@ -952,6 +952,8 @@ Here are all the supported configuration settings.
|
|||
Only git-annex commands that support the --jobs option will
|
||||
use this.
|
||||
|
||||
Setting this to "cpus" will run one job per CPU core.
|
||||
|
||||
* `annex.queuesize`
|
||||
|
||||
git-annex builds a queue of git commands, in order to combine similar
|
||||
|
|
|
@ -1 +1,10 @@
|
|||
Can you add a global config flag to tell parallelizable commands to use all available cores? Often I forget to add -JN when it would have sped things up.
|
||||
|
||||
> Added as --jobs=cpus / annex.jobs=cpus. This will allow
|
||||
> later expansion, perhaps `--jobs=cpus-1` to leave a spare core
|
||||
> or `--jobs=remotes*2` to run two jobs per remote, or things like that.
|
||||
>
|
||||
> It's a bit more typing than -J0, but since it can be configured once in
|
||||
> annex.jobs, that seemed an acceptable tradeoff to future proof it.
|
||||
>
|
||||
> [[done]] --[[Joey]]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue