test: Added --jobs option

Default to the number of CPU cores, which seems about optimal on my laptop. Using one more saves me 2 seconds actually. Better packing of workers improves speed significantly. In 2 tests runs, I saw segfaulting workers despite my attempt to work around that issue. So detect when a worker does, and re-run it. Removed installSignalHandlers again, because I was seeing an error "lost signal due to full pipe", which I guess was somehow caused by using it. Sponsored-by: Dartmouth College's Datalad project
2022-03-16 14:42:07 -04:00 · 2022-03-16 14:42:07 -04:00 · 025c18128b
commit 025c18128b
parent be31a8a3d2
6 changed files with 77 additions and 24 deletions
--- a/2
+++ b/2
@ -19,6 +19,8 @@ git-annex (10.20220223) UNRELEASED; urgency=medium
    standalone linux tarball or OSX app.
  * Fix build with aeson 2.0.
    Thanks, sternenseemann for the patch.
  * test: Runs tests in parallel to speed up the test suite.
  * test: Added --jobs option.
 -- Joey Hess <id@joeyh.name>  Wed, 23 Feb 2022 14:14:09 -0400
--- a/Test.hs
+++ b/Test.hs
@ -11,13 +11,14 @@ module Test where
 import Types.Test
 import Types.RepoVersion
 import Types.Concurrency
 import Test.Framework
 import Options.Applicative.Types
 import Test.Tasty
 import Test.Tasty.HUnit
 import Test.Tasty.QuickCheck
-import Options.Applicative (switch, long, help, internal)
+import Options.Applicative (switch, long, short, help, internal, maybeReader, option)
 import qualified Data.Map as M
 import qualified Data.ByteString.Lazy.UTF8 as BU8
@ -90,7 +91,7 @@ import qualified Utility.Gpg
 optParser :: Parser TestOptions
 optParser = TestOptions
-	<$> snd (tastyParser (tests 1 False True mempty))
+	<$> snd (tastyParser (tests 1 False True (TestOptions mempty False False Nothing mempty)))
 	<*> switch
 		( long "keep-failures"
 		<> help "preserve repositories on test failure"
@ -99,6 +100,11 @@ optParser = TestOptions
 		( long "fakessh"
 		<> internal
 		)
 	<*> optional (option (maybeReader parseConcurrency)
 		( long "jobs"
 		<> short 'J'
 		<> help "number of concurrent jobs"
 		))
 	<*> cmdParams "non-options are for internal use only"
 runner :: TestOptions -> IO ()
--- a/Test/Framework.hs
+++ b/Test/Framework.hs
@ -19,12 +19,15 @@ import Test.Tasty.Ingredients.ConsoleReporter
 import Options.Applicative.Types
 import Control.Concurrent
 import Control.Concurrent.Async
 import Control.Concurrent.STM
 import System.Environment (getArgs)
 import System.Console.Concurrent
 import System.Console.ANSI
 import GHC.Conc
 import Common
 import Types.Test
 import Types.Concurrency
 import qualified Annex
 import qualified Annex.UUID
@ -676,13 +679,35 @@ make_writeable d = void $
 - prevents some failures to clean up after the test suite.
 -}
 parallelTestRunner :: TestOptions -> (Int -> Bool -> Bool -> TestOptions -> [TestTree]) -> IO ()
-parallelTestRunner opts mkts
+parallelTestRunner opts mkts = do
 	numjobs <- case concurrentJobs opts of
 		Just NonConcurrent -> pure 1
 		Just (Concurrent n) -> pure n
 		Just ConcurrentPerCpu -> getNumProcessors
 		Nothing -> getNumProcessors
 	parallelTestRunner' numjobs opts mkts
 parallelTestRunner' :: Int -> TestOptions -> (Int -> Bool -> Bool -> TestOptions -> [TestTree]) -> IO ()
 parallelTestRunner' numjobs opts mkts
 	| fakeSsh opts = runFakeSsh (internalData opts)
 	| otherwise = go =<< Utility.Env.getEnv subenv
  where
 	numparts = 1
 	subenv = "GIT_ANNEX_TEST_SUBPROCESS"
-	go Nothing = do
+	-- Make more parts than there are jobs, because some parts
 	-- are larger, and this allows the smaller parts to be packed
 	-- in more efficiently, speeding up the test suite overall.
 	numparts = numjobs * 2
 	worker rs nvar a = do
 		(n, m) <- atomically $ do
 			(n, m) <- readTVar nvar
 			writeTVar nvar (n+1, m)
 			return (n, m)
 		if n > m
 			then return rs
 			else do
 				r <- a n
 				worker (r:rs) nvar a
 	go Nothing = withConcurrentOutput $ do
 		ensuredir tmpdir
 		crippledfilesystem <- fst <$> Annex.Init.probeCrippledFileSystem'
 			(toRawFilePath tmpdir)
@ -700,7 +725,7 @@ parallelTestRunner opts mkts
 		let ps = if useColor (lookupOption (tastyOptionSet opts)) termcolor
 			then "--color=always":args
 			else "--color=never":args
-		exitcodes <- withConcurrentOutput $ forConcurrently [1..length ts] $ \n -> do
+		let runone n = do
 			let subdir = tmpdir </> show n
 			ensuredir subdir
 			let p = (proc pp ps)
@ -708,11 +733,21 @@ parallelTestRunner opts mkts
 				, cwd = Just subdir
 				}
 			(_, _, _, pid) <- createProcessConcurrent p
-			waitForProcess pid
+			ret <- waitForProcess pid
 			-- Work around this strange issue
 			-- https://github.com/UnkindPartition/tasty/issues/326
 			-- when other workaround does not work.
 			if ret == ExitFailure (-11)
 				then runone n
 				else return ret
 		nvar <- newTVarIO (1, length ts)
 		exitcodes <- forConcurrently [1..numjobs] $ \_ -> 
 			worker [] nvar runone
 		let exitcodes' = concat exitcodes
 		unless (keepFailuresOption opts) finalCleanup
-		if all (== ExitSuccess) exitcodes
+		if all (== ExitSuccess) exitcodes'
 			then exitSuccess
-			else case (filter (/= ExitFailure 1) exitcodes) of
+			else case (filter (/= ExitFailure 1) exitcodes') of
 				[] -> do
 					putStrLn "  (Failures above could be due to a bug in git-annex, or an incompatibility"
 					putStrLn "   with utilities, such as git, installed on this system.)"
@ -732,7 +767,6 @@ parallelTestRunner opts mkts
 					]
 				, ts !! (n - 1)
 				]
 			installSignalHandlers
 			case tryIngredients ingredients (tastyOptionSet opts) t of
 				Nothing -> error "No tests found!?"
 				Just act -> ifM act
--- a/Types/Test.hs
+++ b/Types/Test.hs
@ -1,6 +1,6 @@
 {- git-annex test data types.
 -
- - Copyright 2011-2017 Joey Hess <id@joeyh.name>
+ - Copyright 2011-2022 Joey Hess <id@joeyh.name>
 -
 - Licensed under the GNU AGPL version 3 or higher.
 -}
@ -8,27 +8,16 @@
 module Types.Test where
 import Test.Tasty.Options
 import Data.Monoid
 import qualified Data.Semigroup as Sem
 import Prelude
 import Types.Concurrency
 import Types.Command
 data TestOptions = TestOptions
 	{ tastyOptionSet :: OptionSet
 	, keepFailuresOption :: Bool
 	, fakeSsh :: Bool
 	, concurrentJobs :: Maybe Concurrency
 	, internalData :: CmdParams
 	}
 instance Sem.Semigroup TestOptions where
 	a <> b = TestOptions
 		(tastyOptionSet a <> tastyOptionSet b)
 		(keepFailuresOption a || keepFailuresOption b)
 		(fakeSsh a || fakeSsh b)
 		(internalData a <> internalData b)
 instance Monoid TestOptions where
 	mempty = TestOptions mempty False False mempty
 type TestRunner = TestOptions -> IO ()
--- a/doc/git-annex-test.mdwn
+++ b/doc/git-annex-test.mdwn
@ -20,6 +20,11 @@ or to verify your local installation of git-annex.
 There are several options, provided by Haskell's tasty test
 framework. Pass --help for details about those.
 * `--jobs=N` `-JN`
  How many tests to run in parallel. The default is "cpus", which will
  runs one job per CPU core.
 * `--keep-failures`
  When there are test failures, leave the `.t` directory populated with
--- a/doc/todo/speed_up_34standalone_build34_and47or_tests/comment_9_2253c6916a1c6e4f1393b8656617aa10._comment
+++ b/doc/todo/speed_up_34standalone_build34_and47or_tests/comment_9_2253c6916a1c6e4f1393b8656617aa10._comment
@ -0,0 +1,17 @@
 [[!comment format=mdwn
 username="joey"
 subject="""comment 9"""
 date="2022-03-16T17:55:52Z"
 content="""
 I've finished up parallelizing git-annex test.
 Splitting up the test groups further and improved scheduling sped it up more.
 On my laptop, it's dropped from 444 to 334 to now 289 seconds.
 Also, the `-J` option is now supported by git-annex test, so you can experiment
 to find the number of jobs where it runs fastest in your particular situation.
 The default is one job per CPU core.
 My guess is that on NFS, it's not CPU bound but is network latency bound,
 and so a rather high -J value like -J10 may behave better.
 """]]