parallelize git-annex test for 25% speedup

Note the very weird workaround for what appears to be some kind of tasty
bug, which causes a segfault. This is not new to this modification,
I was seeing a segfault before at least intermittently when limiting
git-annex test -p to only run a single test group.

Also, the path from one test repo to a remote test repo used to be
"../../foo", which somehow broke when moving the test repos from .t to
.t/N. I don't actually quite understand how it used to work, but
"../foo" seems correct and works in the new situation.

Test output from the concurrent processes is not yet serialized.
Should be easy to do using concurrent-output.

More test groups will probably make the speedup larger. It would
probably be best to have a larger number of test groups and divvy them
amoung subprocesses numbered based on the number of CPU cores, perhaps
times 2 or 3.

Sponsored-by: Dartmouth College's Datalad project
This commit is contained in:
Joey Hess 2022-03-14 15:24:37 -04:00
parent 62e6c6afb9
commit 8d14ce8f38
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
3 changed files with 132 additions and 80 deletions

73
Test.hs
View file

@ -15,16 +15,12 @@ import Test.Framework
import Options.Applicative.Types
import Test.Tasty
import Test.Tasty.Runners
import Test.Tasty.HUnit
import Test.Tasty.QuickCheck
import Test.Tasty.Ingredients.Rerun
import Test.Tasty.Options
import Options.Applicative (switch, long, help, internal)
import qualified Data.Map as M
import qualified Data.ByteString.Lazy.UTF8 as BU8
import System.Environment
import Control.Concurrent.STM hiding (check)
import Common
@ -60,12 +56,9 @@ import qualified Config.Cost
import qualified Crypto
import qualified Database.Keys
import qualified Annex.WorkTree
import qualified Annex.Init
import qualified Annex.CatFile
import qualified Annex.Path
import qualified Annex.VectorClock
import qualified Annex.VariantFile
import qualified Annex.AdjustedBranch
import qualified Annex.View
import qualified Annex.View.ViewedFile
import qualified Logs.View
@ -78,8 +71,6 @@ import qualified Utility.Verifiable
import qualified Utility.Process
import qualified Utility.Misc
import qualified Utility.InodeCache
import qualified Utility.Env
import qualified Utility.Env.Set
import qualified Utility.Matcher
import qualified Utility.Hash
import qualified Utility.Scheduled
@ -99,7 +90,7 @@ import qualified Utility.Gpg
optParser :: Parser TestOptions
optParser = TestOptions
<$> snd tastyParser
<$> snd (tastyParser (tests False True mempty))
<*> switch
( long "keep-failures"
<> help "preserve repositories on test failure"
@ -110,62 +101,12 @@ optParser = TestOptions
)
<*> cmdParams "non-options are for internal use only"
tastyParser :: ([String], Parser Test.Tasty.Options.OptionSet)
#if MIN_VERSION_tasty(1,3,0)
tastyParser = go
#else
tastyParser = ([], go)
#endif
where
go = suiteOptionParser ingredients (tests False True mempty)
runner :: TestOptions -> IO ()
runner opts
| fakeSsh opts = runFakeSsh (internalData opts)
| otherwise = runsubprocesstests =<< Utility.Env.getEnv subenv
where
-- Run git-annex test in a subprocess, so that any files
-- it may open will be closed before running finalCleanup.
-- This should prevent most failures to clean up after the test
-- suite.
subenv = "GIT_ANNEX_TEST_SUBPROCESS"
runsubprocesstests Nothing = do
let warnings = fst tastyParser
unless (null warnings) $ do
hPutStrLn stderr "warnings from tasty:"
mapM_ (hPutStrLn stderr) warnings
pp <- Annex.Path.programPath
Utility.Env.Set.setEnv subenv "1" True
ps <- getArgs
exitcode <- withCreateProcess (proc pp ps) $
\_ _ _ pid -> waitForProcess pid
unless (keepFailuresOption opts) finalCleanup
exitWith exitcode
runsubprocesstests (Just _) = isolateGitConfig $ do
ensuretmpdir
crippledfilesystem <- fst <$> Annex.Init.probeCrippledFileSystem'
(toRawFilePath tmpdir)
Nothing Nothing False
adjustedbranchok <- Annex.AdjustedBranch.isGitVersionSupported
case tryIngredients ingredients (tastyOptionSet opts) (tests crippledfilesystem adjustedbranchok opts) of
Nothing -> error "No tests found!?"
Just act -> ifM act
( exitSuccess
, do
putStrLn " (Failures above could be due to a bug in git-annex, or an incompatibility"
putStrLn " with utilities, such as git, installed on this system.)"
exitFailure
)
runner opts = parallelTestRunner opts tests
ingredients :: [Ingredient]
ingredients =
[ listingTests
, rerunningTests [consoleTestReporter]
]
tests :: Bool -> Bool -> TestOptions -> TestTree
tests :: Bool -> Bool -> TestOptions -> [TestTree]
tests crippledfilesystem adjustedbranchok opts =
testGroup "Tests" $ properties
properties
: withTestMode remotetestmode Nothing testRemotes
: map (\(d, te) -> withTestMode te (Just initTests) (unitTests d)) testmodes
where
@ -1228,11 +1169,11 @@ test_union_merge_regression =
withtmpclonerepo $ \r3 -> do
forM_ [r1, r2, r3] $ \r -> indir r $ do
when (r /= r1) $
git "remote" ["add", "r1", "../../" ++ r1] "remote add"
git "remote" ["add", "r1", "../" ++ r1] "remote add"
when (r /= r2) $
git "remote" ["add", "r2", "../../" ++ r2] "remote add"
git "remote" ["add", "r2", "../" ++ r2] "remote add"
when (r /= r3) $
git "remote" ["add", "r3", "../../" ++ r3] "remote add"
git "remote" ["add", "r3", "../" ++ r3] "remote add"
git_annex "get" [annexedfile] "get"
git "remote" ["rm", "origin"] "remote rm"
forM_ [r3, r2, r1] $ \r -> indir r $

View file

@ -1,6 +1,6 @@
{- git-annex test suite framework
-
- Copyright 2010-2021 Joey Hess <id@joeyh.name>
- Copyright 2010-2022 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -12,7 +12,13 @@ module Test.Framework where
import Test.Tasty
import Test.Tasty.Runners
import Test.Tasty.HUnit
import Test.Tasty.QuickCheck
import Test.Tasty.Options
import Test.Tasty.Ingredients.Rerun
import Options.Applicative.Types
import Control.Concurrent
import Control.Concurrent.Async
import System.Environment (getArgs)
import Common
import Types.Test
@ -39,6 +45,7 @@ import qualified Annex.Link
import qualified Annex.Path
import qualified Annex.Action
import qualified Annex.AdjustedBranch
import qualified Annex.Init
import qualified Utility.Process
import qualified Utility.Process.Transcript
import qualified Utility.Env
@ -166,7 +173,7 @@ indir dir a = do
-- any type of error and change back to currdir before
-- rethrowing.
r <- bracket_
(changeToTmpDir dir)
(changeToTopDir dir)
(setCurrentDirectory currdir)
(tryNonAsync a)
case r of
@ -179,7 +186,6 @@ adjustedbranchsupported repo = indir repo $ Annex.AdjustedBranch.isGitVersionSup
setuprepo :: FilePath -> IO FilePath
setuprepo dir = do
cleanup dir
ensuretmpdir
git "init" ["-q", dir] "git init"
configrepo dir
return dir
@ -199,7 +205,6 @@ newCloneRepoConfig = CloneRepoConfig
clonerepo :: FilePath -> FilePath -> CloneRepoConfig -> IO FilePath
clonerepo old new cfg = do
cleanup new
ensuretmpdir
let cloneparams = catMaybes
[ Just "-q"
, if bareClone cfg then Just "--bare" else Nothing
@ -236,11 +241,11 @@ configrepo dir = indir dir $ do
git "config" ["annex.largefiles", "exclude=" ++ ingitfile]
"git config annex.largefiles"
ensuretmpdir :: IO ()
ensuretmpdir = do
e <- doesDirectoryExist tmpdir
ensuredir :: FilePath -> IO ()
ensuredir d = do
e <- doesDirectoryExist d
unless e $
createDirectory tmpdir
createDirectory d
{- Prevent global git configs from affecting the test suite. -}
isolateGitConfig :: IO a -> IO a
@ -508,8 +513,8 @@ setupTestMode = do
git "commit" ["--allow-empty", "-m", "empty"] "git commit failed"
git_annex "adjust" ["--unlock"] "git annex adjust failed"
changeToTmpDir :: FilePath -> IO ()
changeToTmpDir t = do
changeToTopDir :: FilePath -> IO ()
changeToTopDir t = do
topdir <- Utility.Env.getEnvDefault "TOPDIR" (error "TOPDIR not set")
setCurrentDirectory $ topdir ++ "/" ++ t
@ -527,7 +532,7 @@ newmainrepodir :: IO FilePath
newmainrepodir = go (0 :: Int)
where
go n = do
let d = tmpdir </> "main" ++ show n
let d = "main" ++ show n
ifM (doesDirectoryExist d)
( go $ n + 1
, do
@ -539,7 +544,7 @@ tmprepodir :: IO FilePath
tmprepodir = go (0 :: Int)
where
go n = do
let d = tmpdir </> "tmprepo" ++ show n
let d = "tmprepo" ++ show n
ifM (doesDirectoryExist d)
( go $ n + 1
, return d
@ -638,9 +643,9 @@ origBranch = maybe "foo"
pair :: FilePath -> FilePath -> Assertion
pair r1 r2 = forM_ [r1, r2] $ \r -> indir r $ do
when (r /= r1) $
git "remote" ["add", "r1", "../../" ++ r1] "remote add"
git "remote" ["add", "r1", "../" ++ r1] "remote add"
when (r /= r2) $
git "remote" ["add", "r2", "../../" ++ r2] "remote add"
git "remote" ["add", "r2", "../" ++ r2] "remote add"
{- Runs a query in the current repository, but first makes the repository
@ -663,3 +668,92 @@ make_writeable :: FilePath -> IO ()
make_writeable d = void $
Utility.Process.Transcript.processTranscript
"chmod" ["-R", "u+w", d] Nothing
{- Tests each TestTree in parallel, and exits with succcess/failure.
-
- Tasty supports parallel tests, but this does not use it, because
- many tests need to be run in test repos, and chdir would not be
- thread safe. Instead, this starts one child process for each TestTree.
-
- An added benefit of using child processes is that any files they may
- leave open are closed before finalCleanup is run at the end. This
- prevents some failures to clean up after the test suite.
-}
parallelTestRunner :: TestOptions -> (Bool -> Bool -> TestOptions -> [TestTree]) -> IO ()
parallelTestRunner opts mkts
| fakeSsh opts = runFakeSsh (internalData opts)
| otherwise = go =<< Utility.Env.getEnv subenv
where
subenv = "GIT_ANNEX_TEST_SUBPROCESS"
go Nothing = do
ensuredir tmpdir
crippledfilesystem <- fst <$> Annex.Init.probeCrippledFileSystem'
(toRawFilePath tmpdir)
Nothing Nothing False
adjustedbranchok <- Annex.AdjustedBranch.isGitVersionSupported
let ts = mkts crippledfilesystem adjustedbranchok opts
let warnings = fst (tastyParser ts)
unless (null warnings) $ do
hPutStrLn stderr "warnings from tasty:"
mapM_ (hPutStrLn stderr) warnings
environ <- Utility.Env.getEnvironment
ps <- getArgs
pp <- Annex.Path.programPath
exitcodes <- forConcurrently [1..length ts] $ \n -> do
let subdir = tmpdir </> show n
ensuredir subdir
let p = (proc pp ps)
{ env = Just ((subenv, show (n, crippledfilesystem, adjustedbranchok)):environ)
, cwd = Just subdir
}
withCreateProcess p $
\_ _ _ pid -> waitForProcess pid
unless (keepFailuresOption opts) finalCleanup
if all (== ExitSuccess) exitcodes
then exitSuccess
else case (filter (/= ExitFailure 1) exitcodes) of
[] -> do
putStrLn " (Failures above could be due to a bug in git-annex, or an incompatibility"
putStrLn " with utilities, such as git, installed on this system.)"
exitFailure
v -> do
putStrLn $ " Test subprocesses exited with unexpected exit codes: " ++ show v
exitFailure
go (Just subenvval) = case readish subenvval of
Nothing -> error ("Bad " ++ subenv)
Just (n, crippledfilesystem, adjustedbranchok) -> isolateGitConfig $ do
let ts = mkts crippledfilesystem adjustedbranchok opts
let t = topLevelTestGroup
-- This group is needed to avoid what
-- seems to be a tasty bug which causes a
-- segfault.
[ testGroup "Tasty"
[ testProperty "tasty self-check" True
]
, ts !! (n - 1)
]
case tryIngredients ingredients (tastyOptionSet opts) t of
Nothing -> error "No tests found!?"
Just act -> ifM act
( exitSuccess
, exitFailure
)
topLevelTestGroup :: [TestTree] -> TestTree
topLevelTestGroup = testGroup "Tests"
tastyParser :: [TestTree] -> ([String], Parser Test.Tasty.Options.OptionSet)
#if MIN_VERSION_tasty(1,3,0)
tastyParser ts = go
#else
tastyParser ts = ([], go)
#endif
where
go = suiteOptionParser ingredients (topLevelTestGroup ts)
ingredients :: [Ingredient]
ingredients =
[ listingTests
, rerunningTests [consoleTestReporter]
]

View file

@ -0,0 +1,17 @@
[[!comment format=mdwn
username="joey"
subject="""comment 8"""
date="2022-03-14T18:52:09Z"
content="""
I have implemented parallelism as described in comment 7.
Currently there are 5 child processes, and the test runtime
dropped from 444 to 334 seconds on my laptop.
Splitting up the test groups further, so there are more child
processes will probably improve that more.
Remains to be seen if it helps on NFS much..
The `git-annex test` output is currently a mess, it needs to be serialized.
Ran out of time to do that today, but the speed improvement is worth
temporarily ugly output.
"""]]