test: Avoid most situations involving failure to delete test directories

By forking a worker process and only deleting the test directory once it exits. This way, if a test leaves files open, they'll get closed when the worker exits, so avoiding failure to delete open files on Windows, and failure to delete directories due to NFS lock files. If a test leaves a git worker process running, the closed pipes should cause the worker to exit too, also avoiding the problem there. The 10 second sleep ought to give plenty of time for such worker processes to exit, although this is of course a race. Finally, even if test directory fails to be deleted still, it won't appear as if the last test in the test suite failed; the error will be displayed at the very end. This commit was supported by the NSF-funded DataLad project.
2017-08-14 16:27:35 -04:00 · 2017-08-14 16:27:35 -04:00 · 8526cd7c92
commit 8526cd7c92
parent af6068525a
3 changed files with 41 additions and 22 deletions
--- a/3
+++ b/3
@ -7,6 +7,9 @@ git-annex (6.20170521) UNRELEASED; urgency=medium
    branch. This is a dangerous environment variable; use with caution.
  * Fix a git-annex test failure when run on NFS due to NFS lock files
    preventing directory removal.
+  * test: Avoid most situations involving failure to delete test
+    directories, by forking a worker process and only deleting the test
+    directory once it exits.

 -- Joey Hess <id@joeyh.name>  Sat, 17 Jun 2017 13:02:24 -0400

--- a/Test.hs
+++ b/Test.hs
@ -34,6 +34,7 @@ import Options.Applicative (switch, long, help, internal)
 import qualified Data.Map as M
 import qualified Data.Aeson
 import qualified Data.ByteString.Lazy.UTF8 as BU8
+import System.Environment

 import Common
 import CmdLine.GitAnnex.Options
@ -127,8 +128,23 @@ runner = Just go
  where
 	go opts
 		| fakeSsh opts = runFakeSsh (internalData opts)
-		| otherwise = runtests opts
-	runtests opts = isolateGitConfig $ do
+		| otherwise = runsubprocesstests opts
+			=<< Utility.Env.getEnv subenv
+	
+	-- Run git-annex test in a subprocess, so that any files
+	-- it may open will be closed before running finalCleanup.
+	-- This should prevent most failures to clean up after the test
+	-- suite.
+	subenv = "GIT_ANNEX_TEST_SUBPROCESS"
+	runsubprocesstests opts Nothing = do
+		pp <- Annex.Path.programPath
+		Utility.Env.setEnv subenv "1" True
+		ps <- getArgs
+		(Nothing, Nothing, Nothing, pid) <-createProcess (proc pp ps)
+		exitcode <- waitForProcess pid
+		unless (keepFailuresOption opts) finalCleanup
+		exitWith exitcode
+	runsubprocesstests opts (Just _) = isolateGitConfig $ do
 		ensuretmpdir
 		crippledfilesystem <- Annex.Init.probeCrippledFileSystem' tmpdir
 		case tryIngredients ingredients (tastyOptionSet opts) (tests crippledfilesystem opts) of
@ -136,7 +152,7 @@ runner = Just go
 			Just act -> ifM act
 				( exitSuccess
 				, do
-					putStrLn "  (This could be due to a bug in git-annex, or an incompatibility"
+					putStrLn "  (Failures above could be due to a bug in git-annex, or an incompatibility"
 					putStrLn "   with utilities, such as git, installed on this system.)"
 					exitFailure
 				)
@ -1915,20 +1931,24 @@ isolateGitConfig a = Utility.Tmp.withTmpDir "testhome" $ \tmphome -> do
 	a

 cleanup :: FilePath -> IO ()
-cleanup = cleanup' False
-
-cleanup' :: Bool -> FilePath -> IO ()
-cleanup' final dir = whenM (doesDirectoryExist dir) $ do
+cleanup dir = whenM (doesDirectoryExist dir) $ do
 	Command.Uninit.prepareRemoveAnnexDir' dir
-	-- This sometimes fails on Windows, due to some files
-	-- being still opened by a subprocess.
-	catchIO (removeDirectoryRecursive dir) $ \e ->
-		when final $ do
-			print e
-			putStrLn "sleeping 10 seconds and will retry directory cleanup"
-			Utility.ThreadScheduler.threadDelaySeconds (Utility.ThreadScheduler.Seconds 10)
-			whenM (doesDirectoryExist dir) $
-				removeDirectoryRecursive dir
+	-- This can fail if files in the directory are still open by a
+	-- subprocess.
+	void $ tryIO $ removeDirectoryRecursive dir
+
+finalCleanup :: IO ()
+finalCleanup = whenM (doesDirectoryExist tmpdir) $ do
+	Utility.Misc.reapZombies
+	Command.Uninit.prepareRemoveAnnexDir' tmpdir
+	catchIO (removeDirectoryRecursive tmpdir) $ \e -> do
+		print e
+		putStrLn "sleeping 10 seconds and will retry directory cleanup"
+		Utility.ThreadScheduler.threadDelaySeconds $
+			Utility.ThreadScheduler.Seconds 10
+		whenM (doesDirectoryExist tmpdir) $ do
+			Utility.Misc.reapZombies
+			removeDirectoryRecursive tmpdir
 	
 checklink :: FilePath -> Assertion
 checklink f =
@ -2086,11 +2106,7 @@ withTestMode testmode = withResource prepare release . const
 			Just act -> unlessM act $
 				error "init tests failed! cannot continue"
 		return ()
-	release _
-		| keepFailures testmode = void $ tryIO $ do
-			cleanup' True mainrepodir
-			removeDirectory tmpdir
-		| otherwise = cleanup' True tmpdir
+	release _ = cleanup mainrepodir

 setTestMode :: TestMode -> IO ()
 setTestMode testmode = do
--- a/Utility/Misc.hs
+++ b/Utility/Misc.hs
@ -112,7 +112,7 @@ hGetSomeString h sz = do
 	peekbytes :: Int -> Ptr Word8 -> IO [Word8]
 	peekbytes len buf = mapM (peekElemOff buf) [0..pred len]

-{- Reaps any zombie git processes. 
+{- Reaps any zombie processes that may be hanging around.
 -
 - Warning: Not thread safe. Anything that was expecting to wait
 - on a process and get back an exit status is going to be confused