annex.jobs=cpus etc

Added the ability to run one job per CPU (core), by setting annex.jobs=cpus,
or using option --jobs=cpus or -Jcpus.

Built with future expansion in mind, including not defaulting matching on
Concurrency so more constructors can later be added, and using "cpu"
instead of "0".
This commit is contained in:
Joey Hess 2019-05-10 13:24:31 -04:00
parent 459bbd9005
commit 82186ca58f
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
20 changed files with 105 additions and 32 deletions

View file

@ -189,8 +189,9 @@ prepSocket socketfile sshhost sshparams = do
let socketlock = socket2lock socketfile
Annex.getState Annex.concurrency >>= \case
NonConcurrent -> return ()
Concurrent {} -> makeconnection socketlock
_ -> return ()
ConcurrentPerCpu -> makeconnection socketlock
lockFileCached socketlock
where

View file

@ -247,20 +247,29 @@ pickRemote l a = debugLocks $ go l =<< Annex.getState Annex.concurrency
where
go [] _ = return observeFailure
go (r:[]) _ = a r
go rs (Concurrent n) | n > 1 = do
mv <- Annex.getState Annex.activeremotes
active <- liftIO $ takeMVar mv
let rs' = sortBy (lessActiveFirst active) rs
goconcurrent mv active rs'
go (r:rs) _ = do
go rs NonConcurrent = gononconcurrent rs
go rs (Concurrent n)
| n <= 1 = gononconcurrent rs
| otherwise = goconcurrent rs
go rs ConcurrentPerCpu = goconcurrent rs
gononconcurrent [] = return observeFailure
gononconcurrent (r:rs) = do
ok <- a r
if observeBool ok
then return ok
else go rs NonConcurrent
goconcurrent mv active [] = do
else gononconcurrent rs
goconcurrent rs = do
mv <- Annex.getState Annex.activeremotes
active <- liftIO $ takeMVar mv
let rs' = sortBy (lessActiveFirst active) rs
goconcurrent' mv active rs'
goconcurrent' mv active [] = do
liftIO $ putMVar mv active
return observeFailure
goconcurrent mv active (r:rs) = do
goconcurrent' mv active (r:rs) = do
let !active' = M.insertWith (+) r 1 active
liftIO $ putMVar mv active'
let getnewactive = do
@ -279,7 +288,7 @@ pickRemote l a = debugLocks $ go l =<< Annex.getState Annex.concurrency
-- because other threads could have
-- been assigned them in the meantime.
let rs' = sortBy (lessActiveFirst active'') rs
goconcurrent mv active'' rs'
goconcurrent' mv active'' rs'
lessActiveFirst :: M.Map Remote Integer -> Remote -> Remote -> Ordering
lessActiveFirst active a b

View file

@ -3,6 +3,8 @@ git-annex (7.20190508) UNRELEASED; urgency=medium
* Fixed bug that caused git-annex to fail to add a file when another
git-annex process cleaned up the temp directory it was using.
* Makefile: Added install-completions to install target.
* Added the ability to run one job per CPU (core), by setting
annex.jobs=cpus, or using option --jobs=cpus or -Jcpus.
-- Joey Hess <id@joeyh.name> Mon, 06 May 2019 13:52:02 -0400

View file

@ -22,6 +22,7 @@ import Control.Concurrent
import Control.Concurrent.Async
import Control.Concurrent.STM
import Control.Exception (throwIO)
import GHC.Conc
import Data.Either
import qualified Data.Map.Strict as M
import qualified System.Console.Regions as Regions
@ -51,9 +52,14 @@ performCommandAction Command { cmdcheck = c, cmdname = name } seek cont = do
- This should only be run in the seek stage.
-}
commandAction :: CommandStart -> Annex ()
commandAction a = go =<< Annex.getState Annex.concurrency
commandAction a = Annex.getState Annex.concurrency >>= \case
NonConcurrent -> run
Concurrent n -> runconcurrent n
ConcurrentPerCpu -> runconcurrent =<< liftIO getNumProcessors
where
go (Concurrent n) = do
run = void $ includeCommandAction a
runconcurrent n = do
ws <- Annex.getState Annex.workers
(st, ws') <- if null ws
then do
@ -71,8 +77,6 @@ commandAction a = go =<< Annex.getState Annex.concurrency
w <- liftIO $ async
$ snd <$> Annex.run st (inOwnConsoleRegion (Annex.output st) run)
Annex.changeState $ \s -> s { Annex.workers = Right w:ws' }
go NonConcurrent = run
run = void $ includeCommandAction a
commandActions :: [CommandStart] -> Annex ()
commandActions = mapM_ commandAction
@ -170,18 +174,23 @@ allowConcurrentOutput :: Annex a -> Annex a
allowConcurrentOutput a = do
fromcmdline <- Annex.getState Annex.concurrency
fromgitcfg <- annexJobs <$> Annex.getGitConfig
let usegitcfg = Annex.changeState $
\c -> c { Annex.concurrency = fromgitcfg }
case (fromcmdline, fromgitcfg) of
(NonConcurrent, NonConcurrent) -> a
(Concurrent n, _) -> goconcurrent n
(Concurrent n, _) -> do
raisecapabilitiesto n
goconcurrent
(ConcurrentPerCpu, _) -> goconcurrent
(NonConcurrent, Concurrent n) -> do
Annex.changeState $
\c -> c { Annex.concurrency = fromgitcfg }
goconcurrent n
usegitcfg
raisecapabilitiesto n
goconcurrent
(NonConcurrent, ConcurrentPerCpu) -> do
usegitcfg
goconcurrent
where
goconcurrent n = do
c <- liftIO getNumCapabilities
when (n > c) $
liftIO $ setNumCapabilities n
goconcurrent = do
withMessageState $ \s -> case outputType s of
NormalOutput -> ifM (liftIO concurrentOutputSupported)
( Regions.displayConsoleRegions $
@ -190,13 +199,21 @@ allowConcurrentOutput a = do
)
_ -> goconcurrent' False
goconcurrent' b = bracket_ (setup b) cleanup a
setup = setconcurrentoutputenabled
cleanup = do
finishCommandActions
setconcurrentoutputenabled False
setconcurrentoutputenabled b = Annex.changeState $ \s ->
s { Annex.output = (Annex.output s) { concurrentOutputEnabled = b } }
raisecapabilitiesto n = do
c <- liftIO getNumCapabilities
when (n > c) $
liftIO $ setNumCapabilities n
{- Ensures that only one thread processes a key at a time.
- Other threads will block until it's done. -}
onlyActionOn :: Key -> CommandStart -> CommandStart
@ -212,7 +229,9 @@ onlyActionOn' :: Key -> Annex a -> Annex a
onlyActionOn' k a = go =<< Annex.getState Annex.concurrency
where
go NonConcurrent = a
go (Concurrent _) = do
go (Concurrent _) = goconcurrent
go ConcurrentPerCpu = goconcurrent
goconcurrent = do
tv <- Annex.getState Annex.activekeys
bracket (setup tv) id (const a)
setup tv = liftIO $ do

View file

@ -366,14 +366,15 @@ jsonProgressOption =
jobsOption :: [GlobalOption]
jobsOption =
[ globalSetter set $
option auto
( long "jobs" <> short 'J' <> metavar paramNumber
option (maybeReader parseConcurrency)
( long "jobs" <> short 'J'
<> metavar (paramNumber `paramOr` "cpu")
<> help "enable concurrent jobs"
<> hidden
)
]
where
set n = Annex.changeState $ \s -> s { Annex.concurrency = Concurrent n }
set v = Annex.changeState $ \s -> s { Annex.concurrency = v }
timeLimitOption :: [GlobalOption]
timeLimitOption =

View file

@ -262,10 +262,12 @@ implicitMessage = whenM (implicitMessages <$> Annex.getState Annex.output)
- the user.
-}
prompt :: Annex a -> Annex a
prompt a = debugLocks $ go =<< Annex.getState Annex.concurrency
prompt a = debugLocks $ Annex.getState Annex.concurrency >>= \case
NonConcurrent -> a
(Concurrent _) -> goconcurrent
ConcurrentPerCpu -> goconcurrent
where
go NonConcurrent = a
go (Concurrent {}) = withMessageState $ \s -> do
goconcurrent = withMessageState $ \s -> do
let l = promptLock s
bracketIO
(takeMVar l)

View file

@ -5,4 +5,11 @@
module Types.Concurrency where
data Concurrency = NonConcurrent | Concurrent Int
import Utility.PartialPrelude
data Concurrency = NonConcurrent | Concurrent Int | ConcurrentPerCpu
parseConcurrency :: String -> Maybe Concurrency
parseConcurrency "cpus" = Just ConcurrentPerCpu
parseConcurrency "cpu" = Just ConcurrentPerCpu
parseConcurrency s = Concurrent <$> readish s

View file

@ -177,7 +177,8 @@ extractGitConfig r = GitConfig
, annexAllowUnverifiedDownloads = (== Just "ACKTHPPT") $
getmaybe (annex "security.allow-unverified-downloads")
, annexMaxExtensionLength = getmayberead (annex "maxextensionlength")
, annexJobs = maybe NonConcurrent Concurrent $ getmayberead (annex "jobs")
, annexJobs = fromMaybe NonConcurrent $
parseConcurrency =<< getmaybe (annex "jobs")
, annexCacheCreds = getbool (annex "cachecreds") True
, coreSymlinks = getbool "core.symlinks" True
, coreSharedRepository = getSharedRepository r

View file

@ -56,6 +56,8 @@ annexed content, and other symlinks.
Adds multiple files in parallel. This may be faster.
For example: `-J4`
Setting this to "cpus" will run one job per CPU core.
* `--update` `-u`
Like `git add --update`, this does not add new files, but any updates

View file

@ -78,6 +78,8 @@ be used to get better filenames.
Enables parallel downloads when multiple urls are being added.
For example: `-J4`
Setting this to "cpus" will run one job per CPU core.
* `--batch`
Enables batch mode, in which lines containing urls to add are read from

View file

@ -34,6 +34,8 @@ Copies the content of files from or to another remote.
Enables parallel transfers with up to the specified number of jobs
running at once. For example: `-J10`
Setting this to "cpus" will run one job per CPU core.
* `--auto`
Rather than copying all files, only copy files that don't yet have

View file

@ -77,6 +77,8 @@ safe to do so.
when git-annex has to contact remotes to check if it can drop files.
For example: `-J4`
Setting this to "cpus" will run one job per CPU core.
* `--batch`
Enables batch mode, in which lines containing names of files to drop

View file

@ -93,6 +93,8 @@ With parameters, only the specified files are checked.
Runs multiple fsck jobs in parallel. For example: `-J4`
Setting this to "cpus" will run one job per CPU core.
* `--json`
Enable JSON output. This is intended to be parsed by programs that use

View file

@ -33,6 +33,8 @@ or transferring them from some kind of key-value store.
Enables parallel download with up to the specified number of jobs
running at once. For example: `-J10`
Setting this to "cpus" will run one job per CPU core.
When files can be downloaded from multiple remotes, enabling parallel
downloads will split the load between the remotes. For example, if
the files are available on remotes A and B, then one file will be

View file

@ -141,6 +141,8 @@ and `--reinject-duplicates` documentation below.
Imports multiple files in parallel. This may be faster.
For example: `-J4`
Setting this to "cpus" will run one job per CPU core.
* `--json`
Enable JSON output. This is intended to be parsed by programs that use

View file

@ -36,6 +36,8 @@ contents. Use [[git-annex-sync]](1) for that.
Enables parallel transfers with up to the specified number of jobs
running at once. For example: `-J10`
Setting this to "cpus" will run one job per CPU core.
* `--all` `-A`
Mirror all objects stored in the git annex, not only objects used by

View file

@ -40,6 +40,8 @@ Moves the content of files from or to another remote.
Enables parallel transfers with up to the specified number of jobs
running at once. For example: `-J10`
Setting this to "cpus" will run one job per CPU core.
* `--all` `-A`
Rather than specifying a filename or path to move, this option can be

View file

@ -112,6 +112,8 @@ by running "git annex sync" on the remote.
Enables parallel syncing with up to the specified number of jobs
running at once. For example: `-J10`
Setting this to "cpus" will run one job per CPU core.
When there are multiple git remotes, pushes will be made to them in
parallel. Pulls are not done in parallel because that tends to be
less efficient. When --content is synced, the files are processed

View file

@ -952,6 +952,8 @@ Here are all the supported configuration settings.
Only git-annex commands that support the --jobs option will
use this.
Setting this to "cpus" will run one job per CPU core.
* `annex.queuesize`
git-annex builds a queue of git commands, in order to combine similar

View file

@ -1 +1,10 @@
Can you add a global config flag to tell parallelizable commands to use all available cores? Often I forget to add -JN when it would have sped things up.
> Added as --jobs=cpus / annex.jobs=cpus. This will allow
> later expansion, perhaps `--jobs=cpus-1` to leave a spare core
> or `--jobs=remotes*2` to run two jobs per remote, or things like that.
>
> It's a bit more typing than -J0, but since it can be configured once in
> annex.jobs, that seemed an acceptable tradeoff to future proof it.
>
> [[done]] --[[Joey]]