parallelize git-annex test for 25% speedup

Note the very weird workaround for what appears to be some kind of tasty bug, which causes a segfault. This is not new to this modification, I was seeing a segfault before at least intermittently when limiting git-annex test -p to only run a single test group. Also, the path from one test repo to a remote test repo used to be "../../foo", which somehow broke when moving the test repos from .t to .t/N. I don't actually quite understand how it used to work, but "../foo" seems correct and works in the new situation. Test output from the concurrent processes is not yet serialized. Should be easy to do using concurrent-output. More test groups will probably make the speedup larger. It would probably be best to have a larger number of test groups and divvy them amoung subprocesses numbered based on the number of CPU cores, perhaps times 2 or 3. Sponsored-by: Dartmouth College's Datalad project
2022-03-14 15:24:37 -04:00 · 2022-03-14 15:24:37 -04:00 · 8d14ce8f38
commit 8d14ce8f38
parent 62e6c6afb9
3 changed files with 132 additions and 80 deletions
--- a/Test.hs
+++ b/Test.hs
@ -15,16 +15,12 @@ import Test.Framework
 import Options.Applicative.Types

 import Test.Tasty
-import Test.Tasty.Runners
 import Test.Tasty.HUnit
 import Test.Tasty.QuickCheck
-import Test.Tasty.Ingredients.Rerun
-import Test.Tasty.Options
 import Options.Applicative (switch, long, help, internal)

 import qualified Data.Map as M
 import qualified Data.ByteString.Lazy.UTF8 as BU8
-import System.Environment
 import Control.Concurrent.STM hiding (check)

 import Common
@ -60,12 +56,9 @@ import qualified Config.Cost
 import qualified Crypto
 import qualified Database.Keys
 import qualified Annex.WorkTree
-import qualified Annex.Init
 import qualified Annex.CatFile
-import qualified Annex.Path
 import qualified Annex.VectorClock
 import qualified Annex.VariantFile
-import qualified Annex.AdjustedBranch
 import qualified Annex.View
 import qualified Annex.View.ViewedFile
 import qualified Logs.View
@ -78,8 +71,6 @@ import qualified Utility.Verifiable
 import qualified Utility.Process
 import qualified Utility.Misc
 import qualified Utility.InodeCache
-import qualified Utility.Env
-import qualified Utility.Env.Set
 import qualified Utility.Matcher
 import qualified Utility.Hash
 import qualified Utility.Scheduled
@ -99,7 +90,7 @@ import qualified Utility.Gpg

 optParser :: Parser TestOptions
 optParser = TestOptions
-	<$> snd tastyParser
+	<$> snd (tastyParser (tests False True mempty))
 	<*> switch
 		( long "keep-failures"
 		<> help "preserve repositories on test failure"
@ -110,62 +101,12 @@ optParser = TestOptions
 		)
 	<*> cmdParams "non-options are for internal use only"

-tastyParser :: ([String], Parser Test.Tasty.Options.OptionSet)
-#if MIN_VERSION_tasty(1,3,0)
-tastyParser = go
-#else
-tastyParser = ([], go)
-#endif
-  where
- 	go = suiteOptionParser ingredients (tests False True mempty)
-
 runner :: TestOptions -> IO ()
-runner opts
-	| fakeSsh opts = runFakeSsh (internalData opts)
-	| otherwise = runsubprocesstests =<< Utility.Env.getEnv subenv
-  where
-	-- Run git-annex test in a subprocess, so that any files
-	-- it may open will be closed before running finalCleanup.
-	-- This should prevent most failures to clean up after the test
-	-- suite.
-	subenv = "GIT_ANNEX_TEST_SUBPROCESS"
-	runsubprocesstests Nothing = do
-		let warnings = fst tastyParser
-		unless (null warnings) $ do
-			hPutStrLn stderr "warnings from tasty:"
-			mapM_ (hPutStrLn stderr) warnings
-		pp <- Annex.Path.programPath
-		Utility.Env.Set.setEnv subenv "1" True
-		ps <- getArgs
-		exitcode <- withCreateProcess (proc pp ps) $
-			\_ _ _ pid -> waitForProcess pid
-		unless (keepFailuresOption opts) finalCleanup
-		exitWith exitcode
-	runsubprocesstests (Just _) = isolateGitConfig $ do
-		ensuretmpdir
-		crippledfilesystem <- fst <$> Annex.Init.probeCrippledFileSystem'
-			(toRawFilePath tmpdir)
-			Nothing Nothing False
-		adjustedbranchok <- Annex.AdjustedBranch.isGitVersionSupported
-		case tryIngredients ingredients (tastyOptionSet opts) (tests crippledfilesystem adjustedbranchok opts) of
-			Nothing -> error "No tests found!?"
-			Just act -> ifM act
-				( exitSuccess
-				, do
-					putStrLn "  (Failures above could be due to a bug in git-annex, or an incompatibility"
-					putStrLn "   with utilities, such as git, installed on this system.)"
-					exitFailure
-				)
+runner opts = parallelTestRunner opts tests

-ingredients :: [Ingredient]
-ingredients =
-	[ listingTests
-	, rerunningTests [consoleTestReporter]
-	]
-
-tests :: Bool -> Bool -> TestOptions -> TestTree
+tests :: Bool -> Bool -> TestOptions -> [TestTree]
 tests crippledfilesystem adjustedbranchok opts = 
-	testGroup "Tests" $ properties 
+	properties 
 		: withTestMode remotetestmode Nothing testRemotes
 		: map (\(d, te) -> withTestMode te (Just initTests) (unitTests d)) testmodes
  where
@ -1228,11 +1169,11 @@ test_union_merge_regression =
 			withtmpclonerepo $ \r3 -> do
 				forM_ [r1, r2, r3] $ \r -> indir r $ do
 					when (r /= r1) $
-						git "remote" ["add", "r1", "../../" ++ r1] "remote add"
+						git "remote" ["add", "r1", "../" ++ r1] "remote add"
 					when (r /= r2) $
-						git "remote" ["add", "r2", "../../" ++ r2] "remote add"
+						git "remote" ["add", "r2", "../" ++ r2] "remote add"
 					when (r /= r3) $
-						git "remote" ["add", "r3", "../../" ++ r3] "remote add"
+						git "remote" ["add", "r3", "../" ++ r3] "remote add"
 					git_annex "get" [annexedfile] "get"
 					git "remote" ["rm", "origin"] "remote rm"
 				forM_ [r3, r2, r1] $ \r -> indir r $
--- a/Test/Framework.hs
+++ b/Test/Framework.hs
@ -1,6 +1,6 @@
 {- git-annex test suite framework
 -
- - Copyright 2010-2021 Joey Hess <id@joeyh.name>
+ - Copyright 2010-2022 Joey Hess <id@joeyh.name>
 -
 - Licensed under the GNU AGPL version 3 or higher.
 -}
@ -12,7 +12,13 @@ module Test.Framework where
 import Test.Tasty
 import Test.Tasty.Runners
 import Test.Tasty.HUnit
+import Test.Tasty.QuickCheck
+import Test.Tasty.Options
+import Test.Tasty.Ingredients.Rerun
+import Options.Applicative.Types
 import Control.Concurrent
+import Control.Concurrent.Async
+import System.Environment (getArgs)

 import Common
 import Types.Test
@ -39,6 +45,7 @@ import qualified Annex.Link
 import qualified Annex.Path
 import qualified Annex.Action
 import qualified Annex.AdjustedBranch
+import qualified Annex.Init
 import qualified Utility.Process
 import qualified Utility.Process.Transcript
 import qualified Utility.Env
@ -166,7 +173,7 @@ indir dir a = do
 	-- any type of error and change back to currdir before
 	-- rethrowing.
 	r <- bracket_
-		(changeToTmpDir dir)
+		(changeToTopDir dir)
 		(setCurrentDirectory currdir)
 		(tryNonAsync a)
 	case r of
@ -179,7 +186,6 @@ adjustedbranchsupported repo = indir repo $ Annex.AdjustedBranch.isGitVersionSup
 setuprepo :: FilePath -> IO FilePath
 setuprepo dir = do
 	cleanup dir
-	ensuretmpdir
 	git "init" ["-q", dir] "git init"
 	configrepo dir
 	return dir
@ -199,7 +205,6 @@ newCloneRepoConfig = CloneRepoConfig
 clonerepo :: FilePath -> FilePath -> CloneRepoConfig -> IO FilePath
 clonerepo old new cfg = do
 	cleanup new
-	ensuretmpdir
 	let cloneparams = catMaybes
 		[ Just "-q"
 		, if bareClone cfg then Just "--bare" else Nothing
@ -236,11 +241,11 @@ configrepo dir = indir dir $ do
 	git "config" ["annex.largefiles", "exclude=" ++ ingitfile]
 		"git config annex.largefiles"

-ensuretmpdir :: IO ()
-ensuretmpdir = do
-	e <- doesDirectoryExist tmpdir
+ensuredir :: FilePath -> IO ()
+ensuredir d = do
+	e <- doesDirectoryExist d
 	unless e $
-		createDirectory tmpdir
+		createDirectory d
 	
 {- Prevent global git configs from affecting the test suite. -}
 isolateGitConfig :: IO a -> IO a
@ -508,8 +513,8 @@ setupTestMode = do
 		git "commit" ["--allow-empty", "-m", "empty"] "git commit failed"
 		git_annex "adjust" ["--unlock"] "git annex adjust failed"

-changeToTmpDir :: FilePath -> IO ()
-changeToTmpDir t = do
+changeToTopDir :: FilePath -> IO ()
+changeToTopDir t = do
 	topdir <- Utility.Env.getEnvDefault "TOPDIR" (error "TOPDIR not set")
 	setCurrentDirectory $ topdir ++ "/" ++ t

@ -527,7 +532,7 @@ newmainrepodir :: IO FilePath
 newmainrepodir = go (0 :: Int)
  where
 	go n = do
-		let d = tmpdir </> "main" ++ show n
+		let d = "main" ++ show n
 		ifM (doesDirectoryExist d)
 			( go $ n + 1
 			, do
@ -539,7 +544,7 @@ tmprepodir :: IO FilePath
 tmprepodir = go (0 :: Int)
  where
 	go n = do
-		let d = tmpdir </> "tmprepo" ++ show n
+		let d = "tmprepo" ++ show n
 		ifM (doesDirectoryExist d)
 			( go $ n + 1
 			, return d
@ -638,9 +643,9 @@ origBranch = maybe "foo"
 pair :: FilePath -> FilePath -> Assertion
 pair r1 r2 = forM_ [r1, r2] $ \r -> indir r $ do
 	when (r /= r1) $
-		git "remote" ["add", "r1", "../../" ++ r1] "remote add"
+		git "remote" ["add", "r1", "../" ++ r1] "remote add"
 	when (r /= r2) $
-		git "remote" ["add", "r2", "../../" ++ r2] "remote add"
+		git "remote" ["add", "r2", "../" ++ r2] "remote add"


 {- Runs a query in the current repository, but first makes the repository
@ -663,3 +668,92 @@ make_writeable :: FilePath -> IO ()
 make_writeable d = void $
 	Utility.Process.Transcript.processTranscript
 		"chmod" ["-R", "u+w", d] Nothing
+
+{- Tests each TestTree in parallel, and exits with succcess/failure.
+ -
+ - Tasty supports parallel tests, but this does not use it, because
+ - many tests need to be run in test repos, and chdir would not be 
+ - thread safe. Instead, this starts one child process for each TestTree.
+ -
+ - An added benefit of using child processes is that any files they may
+ - leave open are closed before finalCleanup is run at the end. This
+ - prevents some failures to clean up after the test suite.
+ -}
+parallelTestRunner :: TestOptions -> (Bool -> Bool -> TestOptions -> [TestTree]) -> IO ()
+parallelTestRunner opts mkts
+	| fakeSsh opts = runFakeSsh (internalData opts)
+	| otherwise = go =<< Utility.Env.getEnv subenv
+  where
+	subenv = "GIT_ANNEX_TEST_SUBPROCESS"
+	go Nothing = do
+		ensuredir tmpdir
+		crippledfilesystem <- fst <$> Annex.Init.probeCrippledFileSystem'
+			(toRawFilePath tmpdir)
+			Nothing Nothing False
+		adjustedbranchok <- Annex.AdjustedBranch.isGitVersionSupported
+		let ts = mkts crippledfilesystem adjustedbranchok opts
+		let warnings = fst (tastyParser ts)
+		unless (null warnings) $ do
+			hPutStrLn stderr "warnings from tasty:"
+			mapM_ (hPutStrLn stderr) warnings
+		environ <- Utility.Env.getEnvironment
+		ps <- getArgs
+		pp <- Annex.Path.programPath
+		exitcodes <- forConcurrently [1..length ts] $ \n -> do
+			let subdir = tmpdir </> show n
+			ensuredir subdir
+			let p = (proc pp ps)
+				{ env = Just ((subenv, show (n, crippledfilesystem, adjustedbranchok)):environ)
+				, cwd = Just subdir
+				}
+			withCreateProcess p $
+				\_ _ _ pid -> waitForProcess pid
+		unless (keepFailuresOption opts) finalCleanup
+		if all (== ExitSuccess) exitcodes
+			then exitSuccess
+			else case (filter (/= ExitFailure 1) exitcodes) of
+				[] -> do
+					putStrLn "  (Failures above could be due to a bug in git-annex, or an incompatibility"
+					putStrLn "   with utilities, such as git, installed on this system.)"
+					exitFailure
+				v -> do
+					putStrLn $ "  Test subprocesses exited with unexpected exit codes: " ++ show v
+					exitFailure
+	go (Just subenvval) = case readish subenvval of
+		Nothing -> error ("Bad " ++ subenv)
+		Just (n, crippledfilesystem, adjustedbranchok) -> isolateGitConfig $ do
+			let ts = mkts crippledfilesystem adjustedbranchok opts
+			let t = topLevelTestGroup 
+				-- This group is needed to avoid what
+				-- seems to be a tasty bug which causes a
+				-- segfault.
+				[ testGroup "Tasty" 
+					[ testProperty "tasty self-check" True
+					]
+				, ts !! (n - 1)
+				]
+			case tryIngredients ingredients (tastyOptionSet opts) t of
+				Nothing -> error "No tests found!?"
+				Just act -> ifM act
+					( exitSuccess
+					, exitFailure
+							)
+
+topLevelTestGroup :: [TestTree] -> TestTree
+topLevelTestGroup = testGroup "Tests"
+
+tastyParser :: [TestTree] -> ([String], Parser Test.Tasty.Options.OptionSet)
+#if MIN_VERSION_tasty(1,3,0)
+tastyParser ts = go
+#else
+tastyParser ts = ([], go)
+#endif
+  where
+ 	go = suiteOptionParser ingredients (topLevelTestGroup ts)
+
+ingredients :: [Ingredient]
+ingredients =
+	[ listingTests
+	, rerunningTests [consoleTestReporter]
+	]
+
--- a/doc/todo/speed_up_34standalone_build34_and47or_tests/comment_8_b06360071e05df18b895322382357b61._comment
+++ b/doc/todo/speed_up_34standalone_build34_and47or_tests/comment_8_b06360071e05df18b895322382357b61._comment
@ -0,0 +1,17 @@
+[[!comment format=mdwn
+ username="joey"
+ subject="""comment 8"""
+ date="2022-03-14T18:52:09Z"
+ content="""
+I have implemented parallelism as described in comment 7. 
+
+Currently there are 5 child processes, and the test runtime
+dropped from 444 to 334 seconds on my laptop.
+Splitting up the test groups further, so there are more child
+processes will probably improve that more.
+Remains to be seen if it helps on NFS much..
+
+The `git-annex test` output is currently a mess, it needs to be serialized.
+Ran out of time to do that today, but the speed improvement is worth
+temporarily ugly output.
+"""]]