annex.jobs=cpus etc

Added the ability to run one job per CPU (core), by setting annex.jobs=cpus,
or using option --jobs=cpus or -Jcpus.

Built with future expansion in mind, including not defaulting matching on
Concurrency so more constructors can later be added, and using "cpu"
instead of "0".
This commit is contained in:
Joey Hess 2019-05-10 13:24:31 -04:00
parent 459bbd9005
commit 82186ca58f
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
20 changed files with 105 additions and 32 deletions

View file

@ -189,8 +189,9 @@ prepSocket socketfile sshhost sshparams = do
let socketlock = socket2lock socketfile let socketlock = socket2lock socketfile
Annex.getState Annex.concurrency >>= \case Annex.getState Annex.concurrency >>= \case
NonConcurrent -> return ()
Concurrent {} -> makeconnection socketlock Concurrent {} -> makeconnection socketlock
_ -> return () ConcurrentPerCpu -> makeconnection socketlock
lockFileCached socketlock lockFileCached socketlock
where where

View file

@ -247,20 +247,29 @@ pickRemote l a = debugLocks $ go l =<< Annex.getState Annex.concurrency
where where
go [] _ = return observeFailure go [] _ = return observeFailure
go (r:[]) _ = a r go (r:[]) _ = a r
go rs (Concurrent n) | n > 1 = do go rs NonConcurrent = gononconcurrent rs
mv <- Annex.getState Annex.activeremotes go rs (Concurrent n)
active <- liftIO $ takeMVar mv | n <= 1 = gononconcurrent rs
let rs' = sortBy (lessActiveFirst active) rs | otherwise = goconcurrent rs
goconcurrent mv active rs' go rs ConcurrentPerCpu = goconcurrent rs
go (r:rs) _ = do
gononconcurrent [] = return observeFailure
gononconcurrent (r:rs) = do
ok <- a r ok <- a r
if observeBool ok if observeBool ok
then return ok then return ok
else go rs NonConcurrent else gononconcurrent rs
goconcurrent mv active [] = do
goconcurrent rs = do
mv <- Annex.getState Annex.activeremotes
active <- liftIO $ takeMVar mv
let rs' = sortBy (lessActiveFirst active) rs
goconcurrent' mv active rs'
goconcurrent' mv active [] = do
liftIO $ putMVar mv active liftIO $ putMVar mv active
return observeFailure return observeFailure
goconcurrent mv active (r:rs) = do goconcurrent' mv active (r:rs) = do
let !active' = M.insertWith (+) r 1 active let !active' = M.insertWith (+) r 1 active
liftIO $ putMVar mv active' liftIO $ putMVar mv active'
let getnewactive = do let getnewactive = do
@ -279,7 +288,7 @@ pickRemote l a = debugLocks $ go l =<< Annex.getState Annex.concurrency
-- because other threads could have -- because other threads could have
-- been assigned them in the meantime. -- been assigned them in the meantime.
let rs' = sortBy (lessActiveFirst active'') rs let rs' = sortBy (lessActiveFirst active'') rs
goconcurrent mv active'' rs' goconcurrent' mv active'' rs'
lessActiveFirst :: M.Map Remote Integer -> Remote -> Remote -> Ordering lessActiveFirst :: M.Map Remote Integer -> Remote -> Remote -> Ordering
lessActiveFirst active a b lessActiveFirst active a b

View file

@ -3,6 +3,8 @@ git-annex (7.20190508) UNRELEASED; urgency=medium
* Fixed bug that caused git-annex to fail to add a file when another * Fixed bug that caused git-annex to fail to add a file when another
git-annex process cleaned up the temp directory it was using. git-annex process cleaned up the temp directory it was using.
* Makefile: Added install-completions to install target. * Makefile: Added install-completions to install target.
* Added the ability to run one job per CPU (core), by setting
annex.jobs=cpus, or using option --jobs=cpus or -Jcpus.
-- Joey Hess <id@joeyh.name> Mon, 06 May 2019 13:52:02 -0400 -- Joey Hess <id@joeyh.name> Mon, 06 May 2019 13:52:02 -0400

View file

@ -22,6 +22,7 @@ import Control.Concurrent
import Control.Concurrent.Async import Control.Concurrent.Async
import Control.Concurrent.STM import Control.Concurrent.STM
import Control.Exception (throwIO) import Control.Exception (throwIO)
import GHC.Conc
import Data.Either import Data.Either
import qualified Data.Map.Strict as M import qualified Data.Map.Strict as M
import qualified System.Console.Regions as Regions import qualified System.Console.Regions as Regions
@ -51,9 +52,14 @@ performCommandAction Command { cmdcheck = c, cmdname = name } seek cont = do
- This should only be run in the seek stage. - This should only be run in the seek stage.
-} -}
commandAction :: CommandStart -> Annex () commandAction :: CommandStart -> Annex ()
commandAction a = go =<< Annex.getState Annex.concurrency commandAction a = Annex.getState Annex.concurrency >>= \case
NonConcurrent -> run
Concurrent n -> runconcurrent n
ConcurrentPerCpu -> runconcurrent =<< liftIO getNumProcessors
where where
go (Concurrent n) = do run = void $ includeCommandAction a
runconcurrent n = do
ws <- Annex.getState Annex.workers ws <- Annex.getState Annex.workers
(st, ws') <- if null ws (st, ws') <- if null ws
then do then do
@ -71,8 +77,6 @@ commandAction a = go =<< Annex.getState Annex.concurrency
w <- liftIO $ async w <- liftIO $ async
$ snd <$> Annex.run st (inOwnConsoleRegion (Annex.output st) run) $ snd <$> Annex.run st (inOwnConsoleRegion (Annex.output st) run)
Annex.changeState $ \s -> s { Annex.workers = Right w:ws' } Annex.changeState $ \s -> s { Annex.workers = Right w:ws' }
go NonConcurrent = run
run = void $ includeCommandAction a
commandActions :: [CommandStart] -> Annex () commandActions :: [CommandStart] -> Annex ()
commandActions = mapM_ commandAction commandActions = mapM_ commandAction
@ -170,18 +174,23 @@ allowConcurrentOutput :: Annex a -> Annex a
allowConcurrentOutput a = do allowConcurrentOutput a = do
fromcmdline <- Annex.getState Annex.concurrency fromcmdline <- Annex.getState Annex.concurrency
fromgitcfg <- annexJobs <$> Annex.getGitConfig fromgitcfg <- annexJobs <$> Annex.getGitConfig
let usegitcfg = Annex.changeState $
\c -> c { Annex.concurrency = fromgitcfg }
case (fromcmdline, fromgitcfg) of case (fromcmdline, fromgitcfg) of
(NonConcurrent, NonConcurrent) -> a (NonConcurrent, NonConcurrent) -> a
(Concurrent n, _) -> goconcurrent n (Concurrent n, _) -> do
raisecapabilitiesto n
goconcurrent
(ConcurrentPerCpu, _) -> goconcurrent
(NonConcurrent, Concurrent n) -> do (NonConcurrent, Concurrent n) -> do
Annex.changeState $ usegitcfg
\c -> c { Annex.concurrency = fromgitcfg } raisecapabilitiesto n
goconcurrent n goconcurrent
(NonConcurrent, ConcurrentPerCpu) -> do
usegitcfg
goconcurrent
where where
goconcurrent n = do goconcurrent = do
c <- liftIO getNumCapabilities
when (n > c) $
liftIO $ setNumCapabilities n
withMessageState $ \s -> case outputType s of withMessageState $ \s -> case outputType s of
NormalOutput -> ifM (liftIO concurrentOutputSupported) NormalOutput -> ifM (liftIO concurrentOutputSupported)
( Regions.displayConsoleRegions $ ( Regions.displayConsoleRegions $
@ -190,13 +199,21 @@ allowConcurrentOutput a = do
) )
_ -> goconcurrent' False _ -> goconcurrent' False
goconcurrent' b = bracket_ (setup b) cleanup a goconcurrent' b = bracket_ (setup b) cleanup a
setup = setconcurrentoutputenabled setup = setconcurrentoutputenabled
cleanup = do cleanup = do
finishCommandActions finishCommandActions
setconcurrentoutputenabled False setconcurrentoutputenabled False
setconcurrentoutputenabled b = Annex.changeState $ \s -> setconcurrentoutputenabled b = Annex.changeState $ \s ->
s { Annex.output = (Annex.output s) { concurrentOutputEnabled = b } } s { Annex.output = (Annex.output s) { concurrentOutputEnabled = b } }
raisecapabilitiesto n = do
c <- liftIO getNumCapabilities
when (n > c) $
liftIO $ setNumCapabilities n
{- Ensures that only one thread processes a key at a time. {- Ensures that only one thread processes a key at a time.
- Other threads will block until it's done. -} - Other threads will block until it's done. -}
onlyActionOn :: Key -> CommandStart -> CommandStart onlyActionOn :: Key -> CommandStart -> CommandStart
@ -212,7 +229,9 @@ onlyActionOn' :: Key -> Annex a -> Annex a
onlyActionOn' k a = go =<< Annex.getState Annex.concurrency onlyActionOn' k a = go =<< Annex.getState Annex.concurrency
where where
go NonConcurrent = a go NonConcurrent = a
go (Concurrent _) = do go (Concurrent _) = goconcurrent
go ConcurrentPerCpu = goconcurrent
goconcurrent = do
tv <- Annex.getState Annex.activekeys tv <- Annex.getState Annex.activekeys
bracket (setup tv) id (const a) bracket (setup tv) id (const a)
setup tv = liftIO $ do setup tv = liftIO $ do

View file

@ -366,14 +366,15 @@ jsonProgressOption =
jobsOption :: [GlobalOption] jobsOption :: [GlobalOption]
jobsOption = jobsOption =
[ globalSetter set $ [ globalSetter set $
option auto option (maybeReader parseConcurrency)
( long "jobs" <> short 'J' <> metavar paramNumber ( long "jobs" <> short 'J'
<> metavar (paramNumber `paramOr` "cpu")
<> help "enable concurrent jobs" <> help "enable concurrent jobs"
<> hidden <> hidden
) )
] ]
where where
set n = Annex.changeState $ \s -> s { Annex.concurrency = Concurrent n } set v = Annex.changeState $ \s -> s { Annex.concurrency = v }
timeLimitOption :: [GlobalOption] timeLimitOption :: [GlobalOption]
timeLimitOption = timeLimitOption =

View file

@ -262,10 +262,12 @@ implicitMessage = whenM (implicitMessages <$> Annex.getState Annex.output)
- the user. - the user.
-} -}
prompt :: Annex a -> Annex a prompt :: Annex a -> Annex a
prompt a = debugLocks $ go =<< Annex.getState Annex.concurrency prompt a = debugLocks $ Annex.getState Annex.concurrency >>= \case
NonConcurrent -> a
(Concurrent _) -> goconcurrent
ConcurrentPerCpu -> goconcurrent
where where
go NonConcurrent = a goconcurrent = withMessageState $ \s -> do
go (Concurrent {}) = withMessageState $ \s -> do
let l = promptLock s let l = promptLock s
bracketIO bracketIO
(takeMVar l) (takeMVar l)

View file

@ -5,4 +5,11 @@
module Types.Concurrency where module Types.Concurrency where
data Concurrency = NonConcurrent | Concurrent Int import Utility.PartialPrelude
data Concurrency = NonConcurrent | Concurrent Int | ConcurrentPerCpu
parseConcurrency :: String -> Maybe Concurrency
parseConcurrency "cpus" = Just ConcurrentPerCpu
parseConcurrency "cpu" = Just ConcurrentPerCpu
parseConcurrency s = Concurrent <$> readish s

View file

@ -177,7 +177,8 @@ extractGitConfig r = GitConfig
, annexAllowUnverifiedDownloads = (== Just "ACKTHPPT") $ , annexAllowUnverifiedDownloads = (== Just "ACKTHPPT") $
getmaybe (annex "security.allow-unverified-downloads") getmaybe (annex "security.allow-unverified-downloads")
, annexMaxExtensionLength = getmayberead (annex "maxextensionlength") , annexMaxExtensionLength = getmayberead (annex "maxextensionlength")
, annexJobs = maybe NonConcurrent Concurrent $ getmayberead (annex "jobs") , annexJobs = fromMaybe NonConcurrent $
parseConcurrency =<< getmaybe (annex "jobs")
, annexCacheCreds = getbool (annex "cachecreds") True , annexCacheCreds = getbool (annex "cachecreds") True
, coreSymlinks = getbool "core.symlinks" True , coreSymlinks = getbool "core.symlinks" True
, coreSharedRepository = getSharedRepository r , coreSharedRepository = getSharedRepository r

View file

@ -56,6 +56,8 @@ annexed content, and other symlinks.
Adds multiple files in parallel. This may be faster. Adds multiple files in parallel. This may be faster.
For example: `-J4` For example: `-J4`
Setting this to "cpus" will run one job per CPU core.
* `--update` `-u` * `--update` `-u`
Like `git add --update`, this does not add new files, but any updates Like `git add --update`, this does not add new files, but any updates

View file

@ -78,6 +78,8 @@ be used to get better filenames.
Enables parallel downloads when multiple urls are being added. Enables parallel downloads when multiple urls are being added.
For example: `-J4` For example: `-J4`
Setting this to "cpus" will run one job per CPU core.
* `--batch` * `--batch`
Enables batch mode, in which lines containing urls to add are read from Enables batch mode, in which lines containing urls to add are read from

View file

@ -34,6 +34,8 @@ Copies the content of files from or to another remote.
Enables parallel transfers with up to the specified number of jobs Enables parallel transfers with up to the specified number of jobs
running at once. For example: `-J10` running at once. For example: `-J10`
Setting this to "cpus" will run one job per CPU core.
* `--auto` * `--auto`
Rather than copying all files, only copy files that don't yet have Rather than copying all files, only copy files that don't yet have

View file

@ -77,6 +77,8 @@ safe to do so.
when git-annex has to contact remotes to check if it can drop files. when git-annex has to contact remotes to check if it can drop files.
For example: `-J4` For example: `-J4`
Setting this to "cpus" will run one job per CPU core.
* `--batch` * `--batch`
Enables batch mode, in which lines containing names of files to drop Enables batch mode, in which lines containing names of files to drop

View file

@ -93,6 +93,8 @@ With parameters, only the specified files are checked.
Runs multiple fsck jobs in parallel. For example: `-J4` Runs multiple fsck jobs in parallel. For example: `-J4`
Setting this to "cpus" will run one job per CPU core.
* `--json` * `--json`
Enable JSON output. This is intended to be parsed by programs that use Enable JSON output. This is intended to be parsed by programs that use

View file

@ -33,6 +33,8 @@ or transferring them from some kind of key-value store.
Enables parallel download with up to the specified number of jobs Enables parallel download with up to the specified number of jobs
running at once. For example: `-J10` running at once. For example: `-J10`
Setting this to "cpus" will run one job per CPU core.
When files can be downloaded from multiple remotes, enabling parallel When files can be downloaded from multiple remotes, enabling parallel
downloads will split the load between the remotes. For example, if downloads will split the load between the remotes. For example, if
the files are available on remotes A and B, then one file will be the files are available on remotes A and B, then one file will be

View file

@ -141,6 +141,8 @@ and `--reinject-duplicates` documentation below.
Imports multiple files in parallel. This may be faster. Imports multiple files in parallel. This may be faster.
For example: `-J4` For example: `-J4`
Setting this to "cpus" will run one job per CPU core.
* `--json` * `--json`
Enable JSON output. This is intended to be parsed by programs that use Enable JSON output. This is intended to be parsed by programs that use

View file

@ -36,6 +36,8 @@ contents. Use [[git-annex-sync]](1) for that.
Enables parallel transfers with up to the specified number of jobs Enables parallel transfers with up to the specified number of jobs
running at once. For example: `-J10` running at once. For example: `-J10`
Setting this to "cpus" will run one job per CPU core.
* `--all` `-A` * `--all` `-A`
Mirror all objects stored in the git annex, not only objects used by Mirror all objects stored in the git annex, not only objects used by

View file

@ -40,6 +40,8 @@ Moves the content of files from or to another remote.
Enables parallel transfers with up to the specified number of jobs Enables parallel transfers with up to the specified number of jobs
running at once. For example: `-J10` running at once. For example: `-J10`
Setting this to "cpus" will run one job per CPU core.
* `--all` `-A` * `--all` `-A`
Rather than specifying a filename or path to move, this option can be Rather than specifying a filename or path to move, this option can be

View file

@ -112,6 +112,8 @@ by running "git annex sync" on the remote.
Enables parallel syncing with up to the specified number of jobs Enables parallel syncing with up to the specified number of jobs
running at once. For example: `-J10` running at once. For example: `-J10`
Setting this to "cpus" will run one job per CPU core.
When there are multiple git remotes, pushes will be made to them in When there are multiple git remotes, pushes will be made to them in
parallel. Pulls are not done in parallel because that tends to be parallel. Pulls are not done in parallel because that tends to be
less efficient. When --content is synced, the files are processed less efficient. When --content is synced, the files are processed

View file

@ -952,6 +952,8 @@ Here are all the supported configuration settings.
Only git-annex commands that support the --jobs option will Only git-annex commands that support the --jobs option will
use this. use this.
Setting this to "cpus" will run one job per CPU core.
* `annex.queuesize` * `annex.queuesize`
git-annex builds a queue of git commands, in order to combine similar git-annex builds a queue of git commands, in order to combine similar

View file

@ -1 +1,10 @@
Can you add a global config flag to tell parallelizable commands to use all available cores? Often I forget to add -JN when it would have sped things up. Can you add a global config flag to tell parallelizable commands to use all available cores? Often I forget to add -JN when it would have sped things up.
> Added as --jobs=cpus / annex.jobs=cpus. This will allow
> later expansion, perhaps `--jobs=cpus-1` to leave a spare core
> or `--jobs=remotes*2` to run two jobs per remote, or things like that.
>
> It's a bit more typing than -J0, but since it can be configured once in
> annex.jobs, that seemed an acceptable tradeoff to future proof it.
>
> [[done]] --[[Joey]]