test: Added --jobs option

Default to the number of CPU cores, which seems about optimal on my laptop. Using one more saves me 2 seconds actually. Better packing of workers improves speed significantly. In 2 tests runs, I saw segfaulting workers despite my attempt to work around that issue. So detect when a worker does, and re-run it. Removed installSignalHandlers again, because I was seeing an error "lost signal due to full pipe", which I guess was somehow caused by using it. Sponsored-by: Dartmouth College's Datalad project
2022-03-16 14:42:07 -04:00 · 2022-03-16 14:42:07 -04:00 · 025c18128b
commit 025c18128b
parent be31a8a3d2
6 changed files with 77 additions and 24 deletions
--- a/2
+++ b/2
@ -19,6 +19,8 @@ git-annex (10.20220223) UNRELEASED; urgency=medium
    standalone linux tarball or OSX app.
  * Fix build with aeson 2.0.
    Thanks, sternenseemann for the patch.
+  * test: Runs tests in parallel to speed up the test suite.
+  * test: Added --jobs option.

 -- Joey Hess <id@joeyh.name>  Wed, 23 Feb 2022 14:14:09 -0400

--- a/Test.hs
+++ b/Test.hs
@ -11,13 +11,14 @@ module Test where

 import Types.Test
 import Types.RepoVersion
+import Types.Concurrency
 import Test.Framework
 import Options.Applicative.Types

 import Test.Tasty
 import Test.Tasty.HUnit
 import Test.Tasty.QuickCheck
-import Options.Applicative (switch, long, help, internal)
+import Options.Applicative (switch, long, short, help, internal, maybeReader, option)

 import qualified Data.Map as M
 import qualified Data.ByteString.Lazy.UTF8 as BU8
@ -90,7 +91,7 @@ import qualified Utility.Gpg

 optParser :: Parser TestOptions
 optParser = TestOptions
-	<$> snd (tastyParser (tests 1 False True mempty))
+	<$> snd (tastyParser (tests 1 False True (TestOptions mempty False False Nothing mempty)))
 	<*> switch
 		( long "keep-failures"
 		<> help "preserve repositories on test failure"
@ -99,6 +100,11 @@ optParser = TestOptions
 		( long "fakessh"
 		<> internal
 		)
+	<*> optional (option (maybeReader parseConcurrency)
+		( long "jobs"
+		<> short 'J'
+		<> help "number of concurrent jobs"
+		))
 	<*> cmdParams "non-options are for internal use only"

 runner :: TestOptions -> IO ()
--- a/Test/Framework.hs
+++ b/Test/Framework.hs
@ -19,12 +19,15 @@ import Test.Tasty.Ingredients.ConsoleReporter
 import Options.Applicative.Types
 import Control.Concurrent
 import Control.Concurrent.Async
+import Control.Concurrent.STM
 import System.Environment (getArgs)
 import System.Console.Concurrent
 import System.Console.ANSI
+import GHC.Conc

 import Common
 import Types.Test
+import Types.Concurrency

 import qualified Annex
 import qualified Annex.UUID
@ -676,13 +679,35 @@ make_writeable d = void $
 - prevents some failures to clean up after the test suite.
 -}
 parallelTestRunner :: TestOptions -> (Int -> Bool -> Bool -> TestOptions -> [TestTree]) -> IO ()
-parallelTestRunner opts mkts
+parallelTestRunner opts mkts = do
+	numjobs <- case concurrentJobs opts of
+		Just NonConcurrent -> pure 1
+		Just (Concurrent n) -> pure n
+		Just ConcurrentPerCpu -> getNumProcessors
+		Nothing -> getNumProcessors
+	parallelTestRunner' numjobs opts mkts
+
+parallelTestRunner' :: Int -> TestOptions -> (Int -> Bool -> Bool -> TestOptions -> [TestTree]) -> IO ()
+parallelTestRunner' numjobs opts mkts
 	| fakeSsh opts = runFakeSsh (internalData opts)
 	| otherwise = go =<< Utility.Env.getEnv subenv
  where
-	numparts = 1
 	subenv = "GIT_ANNEX_TEST_SUBPROCESS"
-	go Nothing = do
+	-- Make more parts than there are jobs, because some parts
+	-- are larger, and this allows the smaller parts to be packed
+	-- in more efficiently, speeding up the test suite overall.
+	numparts = numjobs * 2
+	worker rs nvar a = do
+		(n, m) <- atomically $ do
+			(n, m) <- readTVar nvar
+			writeTVar nvar (n+1, m)
+			return (n, m)
+		if n > m
+			then return rs
+			else do
+				r <- a n
+				worker (r:rs) nvar a
+	go Nothing = withConcurrentOutput $ do
 		ensuredir tmpdir
 		crippledfilesystem <- fst <$> Annex.Init.probeCrippledFileSystem'
 			(toRawFilePath tmpdir)
@ -700,7 +725,7 @@ parallelTestRunner opts mkts
 		let ps = if useColor (lookupOption (tastyOptionSet opts)) termcolor
 			then "--color=always":args
 			else "--color=never":args
-		exitcodes <- withConcurrentOutput $ forConcurrently [1..length ts] $ \n -> do
+		let runone n = do
 			let subdir = tmpdir </> show n
 			ensuredir subdir
 			let p = (proc pp ps)
@ -708,11 +733,21 @@ parallelTestRunner opts mkts
 				, cwd = Just subdir
 				}
 			(_, _, _, pid) <- createProcessConcurrent p
-			waitForProcess pid
+			ret <- waitForProcess pid
+			-- Work around this strange issue
+			-- https://github.com/UnkindPartition/tasty/issues/326
+			-- when other workaround does not work.
+			if ret == ExitFailure (-11)
+				then runone n
+				else return ret
+		nvar <- newTVarIO (1, length ts)
+		exitcodes <- forConcurrently [1..numjobs] $ \_ -> 
+			worker [] nvar runone
+		let exitcodes' = concat exitcodes
 		unless (keepFailuresOption opts) finalCleanup
-		if all (== ExitSuccess) exitcodes
+		if all (== ExitSuccess) exitcodes'
 			then exitSuccess
-			else case (filter (/= ExitFailure 1) exitcodes) of
+			else case (filter (/= ExitFailure 1) exitcodes') of
 				[] -> do
 					putStrLn "  (Failures above could be due to a bug in git-annex, or an incompatibility"
 					putStrLn "   with utilities, such as git, installed on this system.)"
@ -732,7 +767,6 @@ parallelTestRunner opts mkts
 					]
 				, ts !! (n - 1)
 				]
-			installSignalHandlers
 			case tryIngredients ingredients (tastyOptionSet opts) t of
 				Nothing -> error "No tests found!?"
 				Just act -> ifM act
--- a/Types/Test.hs
+++ b/Types/Test.hs
@ -1,6 +1,6 @@
 {- git-annex test data types.
 -
- - Copyright 2011-2017 Joey Hess <id@joeyh.name>
+ - Copyright 2011-2022 Joey Hess <id@joeyh.name>
 -
 - Licensed under the GNU AGPL version 3 or higher.
 -}
@ -8,27 +8,16 @@
 module Types.Test where

 import Test.Tasty.Options
-import Data.Monoid
-import qualified Data.Semigroup as Sem
-import Prelude

+import Types.Concurrency
 import Types.Command

 data TestOptions = TestOptions
 	{ tastyOptionSet :: OptionSet
 	, keepFailuresOption :: Bool
 	, fakeSsh :: Bool
+	, concurrentJobs :: Maybe Concurrency
 	, internalData :: CmdParams
 	}

-instance Sem.Semigroup TestOptions where
-	a <> b = TestOptions
-		(tastyOptionSet a <> tastyOptionSet b)
-		(keepFailuresOption a || keepFailuresOption b)
-		(fakeSsh a || fakeSsh b)
-		(internalData a <> internalData b)
-
-instance Monoid TestOptions where
-	mempty = TestOptions mempty False False mempty
-
 type TestRunner = TestOptions -> IO ()
--- a/doc/git-annex-test.mdwn
+++ b/doc/git-annex-test.mdwn
@ -20,6 +20,11 @@ or to verify your local installation of git-annex.
 There are several options, provided by Haskell's tasty test
 framework. Pass --help for details about those.

+* `--jobs=N` `-JN`
+
+  How many tests to run in parallel. The default is "cpus", which will
+  runs one job per CPU core.
+
 * `--keep-failures`

  When there are test failures, leave the `.t` directory populated with
--- a/doc/todo/speed_up_34standalone_build34_and47or_tests/comment_9_2253c6916a1c6e4f1393b8656617aa10._comment
+++ b/doc/todo/speed_up_34standalone_build34_and47or_tests/comment_9_2253c6916a1c6e4f1393b8656617aa10._comment
@ -0,0 +1,17 @@
+[[!comment format=mdwn
+ username="joey"
+ subject="""comment 9"""
+ date="2022-03-16T17:55:52Z"
+ content="""
+I've finished up parallelizing git-annex test.
+
+Splitting up the test groups further and improved scheduling sped it up more.
+On my laptop, it's dropped from 444 to 334 to now 289 seconds.
+
+Also, the `-J` option is now supported by git-annex test, so you can experiment
+to find the number of jobs where it runs fastest in your particular situation.
+The default is one job per CPU core.
+
+My guess is that on NFS, it's not CPU bound but is network latency bound,
+and so a rather high -J value like -J10 may behave better.
+"""]]