annex.jobs=cpus etc
Added the ability to run one job per CPU (core), by setting annex.jobs=cpus, or using option --jobs=cpus or -Jcpus. Built with future expansion in mind, including not defaulting matching on Concurrency so more constructors can later be added, and using "cpu" instead of "0".
This commit is contained in:
parent
459bbd9005
commit
82186ca58f
20 changed files with 105 additions and 32 deletions
|
@ -189,8 +189,9 @@ prepSocket socketfile sshhost sshparams = do
|
||||||
let socketlock = socket2lock socketfile
|
let socketlock = socket2lock socketfile
|
||||||
|
|
||||||
Annex.getState Annex.concurrency >>= \case
|
Annex.getState Annex.concurrency >>= \case
|
||||||
|
NonConcurrent -> return ()
|
||||||
Concurrent {} -> makeconnection socketlock
|
Concurrent {} -> makeconnection socketlock
|
||||||
_ -> return ()
|
ConcurrentPerCpu -> makeconnection socketlock
|
||||||
|
|
||||||
lockFileCached socketlock
|
lockFileCached socketlock
|
||||||
where
|
where
|
||||||
|
|
|
@ -247,20 +247,29 @@ pickRemote l a = debugLocks $ go l =<< Annex.getState Annex.concurrency
|
||||||
where
|
where
|
||||||
go [] _ = return observeFailure
|
go [] _ = return observeFailure
|
||||||
go (r:[]) _ = a r
|
go (r:[]) _ = a r
|
||||||
go rs (Concurrent n) | n > 1 = do
|
go rs NonConcurrent = gononconcurrent rs
|
||||||
mv <- Annex.getState Annex.activeremotes
|
go rs (Concurrent n)
|
||||||
active <- liftIO $ takeMVar mv
|
| n <= 1 = gononconcurrent rs
|
||||||
let rs' = sortBy (lessActiveFirst active) rs
|
| otherwise = goconcurrent rs
|
||||||
goconcurrent mv active rs'
|
go rs ConcurrentPerCpu = goconcurrent rs
|
||||||
go (r:rs) _ = do
|
|
||||||
|
gononconcurrent [] = return observeFailure
|
||||||
|
gononconcurrent (r:rs) = do
|
||||||
ok <- a r
|
ok <- a r
|
||||||
if observeBool ok
|
if observeBool ok
|
||||||
then return ok
|
then return ok
|
||||||
else go rs NonConcurrent
|
else gononconcurrent rs
|
||||||
goconcurrent mv active [] = do
|
|
||||||
|
goconcurrent rs = do
|
||||||
|
mv <- Annex.getState Annex.activeremotes
|
||||||
|
active <- liftIO $ takeMVar mv
|
||||||
|
let rs' = sortBy (lessActiveFirst active) rs
|
||||||
|
goconcurrent' mv active rs'
|
||||||
|
|
||||||
|
goconcurrent' mv active [] = do
|
||||||
liftIO $ putMVar mv active
|
liftIO $ putMVar mv active
|
||||||
return observeFailure
|
return observeFailure
|
||||||
goconcurrent mv active (r:rs) = do
|
goconcurrent' mv active (r:rs) = do
|
||||||
let !active' = M.insertWith (+) r 1 active
|
let !active' = M.insertWith (+) r 1 active
|
||||||
liftIO $ putMVar mv active'
|
liftIO $ putMVar mv active'
|
||||||
let getnewactive = do
|
let getnewactive = do
|
||||||
|
@ -279,7 +288,7 @@ pickRemote l a = debugLocks $ go l =<< Annex.getState Annex.concurrency
|
||||||
-- because other threads could have
|
-- because other threads could have
|
||||||
-- been assigned them in the meantime.
|
-- been assigned them in the meantime.
|
||||||
let rs' = sortBy (lessActiveFirst active'') rs
|
let rs' = sortBy (lessActiveFirst active'') rs
|
||||||
goconcurrent mv active'' rs'
|
goconcurrent' mv active'' rs'
|
||||||
|
|
||||||
lessActiveFirst :: M.Map Remote Integer -> Remote -> Remote -> Ordering
|
lessActiveFirst :: M.Map Remote Integer -> Remote -> Remote -> Ordering
|
||||||
lessActiveFirst active a b
|
lessActiveFirst active a b
|
||||||
|
|
|
@ -3,6 +3,8 @@ git-annex (7.20190508) UNRELEASED; urgency=medium
|
||||||
* Fixed bug that caused git-annex to fail to add a file when another
|
* Fixed bug that caused git-annex to fail to add a file when another
|
||||||
git-annex process cleaned up the temp directory it was using.
|
git-annex process cleaned up the temp directory it was using.
|
||||||
* Makefile: Added install-completions to install target.
|
* Makefile: Added install-completions to install target.
|
||||||
|
* Added the ability to run one job per CPU (core), by setting
|
||||||
|
annex.jobs=cpus, or using option --jobs=cpus or -Jcpus.
|
||||||
|
|
||||||
-- Joey Hess <id@joeyh.name> Mon, 06 May 2019 13:52:02 -0400
|
-- Joey Hess <id@joeyh.name> Mon, 06 May 2019 13:52:02 -0400
|
||||||
|
|
||||||
|
|
|
@ -22,6 +22,7 @@ import Control.Concurrent
|
||||||
import Control.Concurrent.Async
|
import Control.Concurrent.Async
|
||||||
import Control.Concurrent.STM
|
import Control.Concurrent.STM
|
||||||
import Control.Exception (throwIO)
|
import Control.Exception (throwIO)
|
||||||
|
import GHC.Conc
|
||||||
import Data.Either
|
import Data.Either
|
||||||
import qualified Data.Map.Strict as M
|
import qualified Data.Map.Strict as M
|
||||||
import qualified System.Console.Regions as Regions
|
import qualified System.Console.Regions as Regions
|
||||||
|
@ -51,9 +52,14 @@ performCommandAction Command { cmdcheck = c, cmdname = name } seek cont = do
|
||||||
- This should only be run in the seek stage.
|
- This should only be run in the seek stage.
|
||||||
-}
|
-}
|
||||||
commandAction :: CommandStart -> Annex ()
|
commandAction :: CommandStart -> Annex ()
|
||||||
commandAction a = go =<< Annex.getState Annex.concurrency
|
commandAction a = Annex.getState Annex.concurrency >>= \case
|
||||||
|
NonConcurrent -> run
|
||||||
|
Concurrent n -> runconcurrent n
|
||||||
|
ConcurrentPerCpu -> runconcurrent =<< liftIO getNumProcessors
|
||||||
where
|
where
|
||||||
go (Concurrent n) = do
|
run = void $ includeCommandAction a
|
||||||
|
|
||||||
|
runconcurrent n = do
|
||||||
ws <- Annex.getState Annex.workers
|
ws <- Annex.getState Annex.workers
|
||||||
(st, ws') <- if null ws
|
(st, ws') <- if null ws
|
||||||
then do
|
then do
|
||||||
|
@ -71,8 +77,6 @@ commandAction a = go =<< Annex.getState Annex.concurrency
|
||||||
w <- liftIO $ async
|
w <- liftIO $ async
|
||||||
$ snd <$> Annex.run st (inOwnConsoleRegion (Annex.output st) run)
|
$ snd <$> Annex.run st (inOwnConsoleRegion (Annex.output st) run)
|
||||||
Annex.changeState $ \s -> s { Annex.workers = Right w:ws' }
|
Annex.changeState $ \s -> s { Annex.workers = Right w:ws' }
|
||||||
go NonConcurrent = run
|
|
||||||
run = void $ includeCommandAction a
|
|
||||||
|
|
||||||
commandActions :: [CommandStart] -> Annex ()
|
commandActions :: [CommandStart] -> Annex ()
|
||||||
commandActions = mapM_ commandAction
|
commandActions = mapM_ commandAction
|
||||||
|
@ -170,18 +174,23 @@ allowConcurrentOutput :: Annex a -> Annex a
|
||||||
allowConcurrentOutput a = do
|
allowConcurrentOutput a = do
|
||||||
fromcmdline <- Annex.getState Annex.concurrency
|
fromcmdline <- Annex.getState Annex.concurrency
|
||||||
fromgitcfg <- annexJobs <$> Annex.getGitConfig
|
fromgitcfg <- annexJobs <$> Annex.getGitConfig
|
||||||
|
let usegitcfg = Annex.changeState $
|
||||||
|
\c -> c { Annex.concurrency = fromgitcfg }
|
||||||
case (fromcmdline, fromgitcfg) of
|
case (fromcmdline, fromgitcfg) of
|
||||||
(NonConcurrent, NonConcurrent) -> a
|
(NonConcurrent, NonConcurrent) -> a
|
||||||
(Concurrent n, _) -> goconcurrent n
|
(Concurrent n, _) -> do
|
||||||
|
raisecapabilitiesto n
|
||||||
|
goconcurrent
|
||||||
|
(ConcurrentPerCpu, _) -> goconcurrent
|
||||||
(NonConcurrent, Concurrent n) -> do
|
(NonConcurrent, Concurrent n) -> do
|
||||||
Annex.changeState $
|
usegitcfg
|
||||||
\c -> c { Annex.concurrency = fromgitcfg }
|
raisecapabilitiesto n
|
||||||
goconcurrent n
|
goconcurrent
|
||||||
|
(NonConcurrent, ConcurrentPerCpu) -> do
|
||||||
|
usegitcfg
|
||||||
|
goconcurrent
|
||||||
where
|
where
|
||||||
goconcurrent n = do
|
goconcurrent = do
|
||||||
c <- liftIO getNumCapabilities
|
|
||||||
when (n > c) $
|
|
||||||
liftIO $ setNumCapabilities n
|
|
||||||
withMessageState $ \s -> case outputType s of
|
withMessageState $ \s -> case outputType s of
|
||||||
NormalOutput -> ifM (liftIO concurrentOutputSupported)
|
NormalOutput -> ifM (liftIO concurrentOutputSupported)
|
||||||
( Regions.displayConsoleRegions $
|
( Regions.displayConsoleRegions $
|
||||||
|
@ -190,13 +199,21 @@ allowConcurrentOutput a = do
|
||||||
)
|
)
|
||||||
_ -> goconcurrent' False
|
_ -> goconcurrent' False
|
||||||
goconcurrent' b = bracket_ (setup b) cleanup a
|
goconcurrent' b = bracket_ (setup b) cleanup a
|
||||||
|
|
||||||
setup = setconcurrentoutputenabled
|
setup = setconcurrentoutputenabled
|
||||||
|
|
||||||
cleanup = do
|
cleanup = do
|
||||||
finishCommandActions
|
finishCommandActions
|
||||||
setconcurrentoutputenabled False
|
setconcurrentoutputenabled False
|
||||||
|
|
||||||
setconcurrentoutputenabled b = Annex.changeState $ \s ->
|
setconcurrentoutputenabled b = Annex.changeState $ \s ->
|
||||||
s { Annex.output = (Annex.output s) { concurrentOutputEnabled = b } }
|
s { Annex.output = (Annex.output s) { concurrentOutputEnabled = b } }
|
||||||
|
|
||||||
|
raisecapabilitiesto n = do
|
||||||
|
c <- liftIO getNumCapabilities
|
||||||
|
when (n > c) $
|
||||||
|
liftIO $ setNumCapabilities n
|
||||||
|
|
||||||
{- Ensures that only one thread processes a key at a time.
|
{- Ensures that only one thread processes a key at a time.
|
||||||
- Other threads will block until it's done. -}
|
- Other threads will block until it's done. -}
|
||||||
onlyActionOn :: Key -> CommandStart -> CommandStart
|
onlyActionOn :: Key -> CommandStart -> CommandStart
|
||||||
|
@ -212,7 +229,9 @@ onlyActionOn' :: Key -> Annex a -> Annex a
|
||||||
onlyActionOn' k a = go =<< Annex.getState Annex.concurrency
|
onlyActionOn' k a = go =<< Annex.getState Annex.concurrency
|
||||||
where
|
where
|
||||||
go NonConcurrent = a
|
go NonConcurrent = a
|
||||||
go (Concurrent _) = do
|
go (Concurrent _) = goconcurrent
|
||||||
|
go ConcurrentPerCpu = goconcurrent
|
||||||
|
goconcurrent = do
|
||||||
tv <- Annex.getState Annex.activekeys
|
tv <- Annex.getState Annex.activekeys
|
||||||
bracket (setup tv) id (const a)
|
bracket (setup tv) id (const a)
|
||||||
setup tv = liftIO $ do
|
setup tv = liftIO $ do
|
||||||
|
|
|
@ -366,14 +366,15 @@ jsonProgressOption =
|
||||||
jobsOption :: [GlobalOption]
|
jobsOption :: [GlobalOption]
|
||||||
jobsOption =
|
jobsOption =
|
||||||
[ globalSetter set $
|
[ globalSetter set $
|
||||||
option auto
|
option (maybeReader parseConcurrency)
|
||||||
( long "jobs" <> short 'J' <> metavar paramNumber
|
( long "jobs" <> short 'J'
|
||||||
|
<> metavar (paramNumber `paramOr` "cpu")
|
||||||
<> help "enable concurrent jobs"
|
<> help "enable concurrent jobs"
|
||||||
<> hidden
|
<> hidden
|
||||||
)
|
)
|
||||||
]
|
]
|
||||||
where
|
where
|
||||||
set n = Annex.changeState $ \s -> s { Annex.concurrency = Concurrent n }
|
set v = Annex.changeState $ \s -> s { Annex.concurrency = v }
|
||||||
|
|
||||||
timeLimitOption :: [GlobalOption]
|
timeLimitOption :: [GlobalOption]
|
||||||
timeLimitOption =
|
timeLimitOption =
|
||||||
|
|
|
@ -262,10 +262,12 @@ implicitMessage = whenM (implicitMessages <$> Annex.getState Annex.output)
|
||||||
- the user.
|
- the user.
|
||||||
-}
|
-}
|
||||||
prompt :: Annex a -> Annex a
|
prompt :: Annex a -> Annex a
|
||||||
prompt a = debugLocks $ go =<< Annex.getState Annex.concurrency
|
prompt a = debugLocks $ Annex.getState Annex.concurrency >>= \case
|
||||||
|
NonConcurrent -> a
|
||||||
|
(Concurrent _) -> goconcurrent
|
||||||
|
ConcurrentPerCpu -> goconcurrent
|
||||||
where
|
where
|
||||||
go NonConcurrent = a
|
goconcurrent = withMessageState $ \s -> do
|
||||||
go (Concurrent {}) = withMessageState $ \s -> do
|
|
||||||
let l = promptLock s
|
let l = promptLock s
|
||||||
bracketIO
|
bracketIO
|
||||||
(takeMVar l)
|
(takeMVar l)
|
||||||
|
|
|
@ -5,4 +5,11 @@
|
||||||
|
|
||||||
module Types.Concurrency where
|
module Types.Concurrency where
|
||||||
|
|
||||||
data Concurrency = NonConcurrent | Concurrent Int
|
import Utility.PartialPrelude
|
||||||
|
|
||||||
|
data Concurrency = NonConcurrent | Concurrent Int | ConcurrentPerCpu
|
||||||
|
|
||||||
|
parseConcurrency :: String -> Maybe Concurrency
|
||||||
|
parseConcurrency "cpus" = Just ConcurrentPerCpu
|
||||||
|
parseConcurrency "cpu" = Just ConcurrentPerCpu
|
||||||
|
parseConcurrency s = Concurrent <$> readish s
|
||||||
|
|
|
@ -177,7 +177,8 @@ extractGitConfig r = GitConfig
|
||||||
, annexAllowUnverifiedDownloads = (== Just "ACKTHPPT") $
|
, annexAllowUnverifiedDownloads = (== Just "ACKTHPPT") $
|
||||||
getmaybe (annex "security.allow-unverified-downloads")
|
getmaybe (annex "security.allow-unverified-downloads")
|
||||||
, annexMaxExtensionLength = getmayberead (annex "maxextensionlength")
|
, annexMaxExtensionLength = getmayberead (annex "maxextensionlength")
|
||||||
, annexJobs = maybe NonConcurrent Concurrent $ getmayberead (annex "jobs")
|
, annexJobs = fromMaybe NonConcurrent $
|
||||||
|
parseConcurrency =<< getmaybe (annex "jobs")
|
||||||
, annexCacheCreds = getbool (annex "cachecreds") True
|
, annexCacheCreds = getbool (annex "cachecreds") True
|
||||||
, coreSymlinks = getbool "core.symlinks" True
|
, coreSymlinks = getbool "core.symlinks" True
|
||||||
, coreSharedRepository = getSharedRepository r
|
, coreSharedRepository = getSharedRepository r
|
||||||
|
|
|
@ -56,6 +56,8 @@ annexed content, and other symlinks.
|
||||||
Adds multiple files in parallel. This may be faster.
|
Adds multiple files in parallel. This may be faster.
|
||||||
For example: `-J4`
|
For example: `-J4`
|
||||||
|
|
||||||
|
Setting this to "cpus" will run one job per CPU core.
|
||||||
|
|
||||||
* `--update` `-u`
|
* `--update` `-u`
|
||||||
|
|
||||||
Like `git add --update`, this does not add new files, but any updates
|
Like `git add --update`, this does not add new files, but any updates
|
||||||
|
|
|
@ -78,6 +78,8 @@ be used to get better filenames.
|
||||||
Enables parallel downloads when multiple urls are being added.
|
Enables parallel downloads when multiple urls are being added.
|
||||||
For example: `-J4`
|
For example: `-J4`
|
||||||
|
|
||||||
|
Setting this to "cpus" will run one job per CPU core.
|
||||||
|
|
||||||
* `--batch`
|
* `--batch`
|
||||||
|
|
||||||
Enables batch mode, in which lines containing urls to add are read from
|
Enables batch mode, in which lines containing urls to add are read from
|
||||||
|
|
|
@ -34,6 +34,8 @@ Copies the content of files from or to another remote.
|
||||||
Enables parallel transfers with up to the specified number of jobs
|
Enables parallel transfers with up to the specified number of jobs
|
||||||
running at once. For example: `-J10`
|
running at once. For example: `-J10`
|
||||||
|
|
||||||
|
Setting this to "cpus" will run one job per CPU core.
|
||||||
|
|
||||||
* `--auto`
|
* `--auto`
|
||||||
|
|
||||||
Rather than copying all files, only copy files that don't yet have
|
Rather than copying all files, only copy files that don't yet have
|
||||||
|
|
|
@ -77,6 +77,8 @@ safe to do so.
|
||||||
when git-annex has to contact remotes to check if it can drop files.
|
when git-annex has to contact remotes to check if it can drop files.
|
||||||
For example: `-J4`
|
For example: `-J4`
|
||||||
|
|
||||||
|
Setting this to "cpus" will run one job per CPU core.
|
||||||
|
|
||||||
* `--batch`
|
* `--batch`
|
||||||
|
|
||||||
Enables batch mode, in which lines containing names of files to drop
|
Enables batch mode, in which lines containing names of files to drop
|
||||||
|
|
|
@ -93,6 +93,8 @@ With parameters, only the specified files are checked.
|
||||||
|
|
||||||
Runs multiple fsck jobs in parallel. For example: `-J4`
|
Runs multiple fsck jobs in parallel. For example: `-J4`
|
||||||
|
|
||||||
|
Setting this to "cpus" will run one job per CPU core.
|
||||||
|
|
||||||
* `--json`
|
* `--json`
|
||||||
|
|
||||||
Enable JSON output. This is intended to be parsed by programs that use
|
Enable JSON output. This is intended to be parsed by programs that use
|
||||||
|
|
|
@ -33,6 +33,8 @@ or transferring them from some kind of key-value store.
|
||||||
Enables parallel download with up to the specified number of jobs
|
Enables parallel download with up to the specified number of jobs
|
||||||
running at once. For example: `-J10`
|
running at once. For example: `-J10`
|
||||||
|
|
||||||
|
Setting this to "cpus" will run one job per CPU core.
|
||||||
|
|
||||||
When files can be downloaded from multiple remotes, enabling parallel
|
When files can be downloaded from multiple remotes, enabling parallel
|
||||||
downloads will split the load between the remotes. For example, if
|
downloads will split the load between the remotes. For example, if
|
||||||
the files are available on remotes A and B, then one file will be
|
the files are available on remotes A and B, then one file will be
|
||||||
|
|
|
@ -141,6 +141,8 @@ and `--reinject-duplicates` documentation below.
|
||||||
Imports multiple files in parallel. This may be faster.
|
Imports multiple files in parallel. This may be faster.
|
||||||
For example: `-J4`
|
For example: `-J4`
|
||||||
|
|
||||||
|
Setting this to "cpus" will run one job per CPU core.
|
||||||
|
|
||||||
* `--json`
|
* `--json`
|
||||||
|
|
||||||
Enable JSON output. This is intended to be parsed by programs that use
|
Enable JSON output. This is intended to be parsed by programs that use
|
||||||
|
|
|
@ -36,6 +36,8 @@ contents. Use [[git-annex-sync]](1) for that.
|
||||||
Enables parallel transfers with up to the specified number of jobs
|
Enables parallel transfers with up to the specified number of jobs
|
||||||
running at once. For example: `-J10`
|
running at once. For example: `-J10`
|
||||||
|
|
||||||
|
Setting this to "cpus" will run one job per CPU core.
|
||||||
|
|
||||||
* `--all` `-A`
|
* `--all` `-A`
|
||||||
|
|
||||||
Mirror all objects stored in the git annex, not only objects used by
|
Mirror all objects stored in the git annex, not only objects used by
|
||||||
|
|
|
@ -40,6 +40,8 @@ Moves the content of files from or to another remote.
|
||||||
Enables parallel transfers with up to the specified number of jobs
|
Enables parallel transfers with up to the specified number of jobs
|
||||||
running at once. For example: `-J10`
|
running at once. For example: `-J10`
|
||||||
|
|
||||||
|
Setting this to "cpus" will run one job per CPU core.
|
||||||
|
|
||||||
* `--all` `-A`
|
* `--all` `-A`
|
||||||
|
|
||||||
Rather than specifying a filename or path to move, this option can be
|
Rather than specifying a filename or path to move, this option can be
|
||||||
|
|
|
@ -112,6 +112,8 @@ by running "git annex sync" on the remote.
|
||||||
Enables parallel syncing with up to the specified number of jobs
|
Enables parallel syncing with up to the specified number of jobs
|
||||||
running at once. For example: `-J10`
|
running at once. For example: `-J10`
|
||||||
|
|
||||||
|
Setting this to "cpus" will run one job per CPU core.
|
||||||
|
|
||||||
When there are multiple git remotes, pushes will be made to them in
|
When there are multiple git remotes, pushes will be made to them in
|
||||||
parallel. Pulls are not done in parallel because that tends to be
|
parallel. Pulls are not done in parallel because that tends to be
|
||||||
less efficient. When --content is synced, the files are processed
|
less efficient. When --content is synced, the files are processed
|
||||||
|
|
|
@ -952,6 +952,8 @@ Here are all the supported configuration settings.
|
||||||
Only git-annex commands that support the --jobs option will
|
Only git-annex commands that support the --jobs option will
|
||||||
use this.
|
use this.
|
||||||
|
|
||||||
|
Setting this to "cpus" will run one job per CPU core.
|
||||||
|
|
||||||
* `annex.queuesize`
|
* `annex.queuesize`
|
||||||
|
|
||||||
git-annex builds a queue of git commands, in order to combine similar
|
git-annex builds a queue of git commands, in order to combine similar
|
||||||
|
|
|
@ -1 +1,10 @@
|
||||||
Can you add a global config flag to tell parallelizable commands to use all available cores? Often I forget to add -JN when it would have sped things up.
|
Can you add a global config flag to tell parallelizable commands to use all available cores? Often I forget to add -JN when it would have sped things up.
|
||||||
|
|
||||||
|
> Added as --jobs=cpus / annex.jobs=cpus. This will allow
|
||||||
|
> later expansion, perhaps `--jobs=cpus-1` to leave a spare core
|
||||||
|
> or `--jobs=remotes*2` to run two jobs per remote, or things like that.
|
||||||
|
>
|
||||||
|
> It's a bit more typing than -J0, but since it can be configured once in
|
||||||
|
> annex.jobs, that seemed an acceptable tradeoff to future proof it.
|
||||||
|
>
|
||||||
|
> [[done]] --[[Joey]]
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue