new improved benchmark command that can benchmark anything git-annex does

2019-01-04 13:43:53 -04:00 · 2019-01-04 13:43:53 -04:00 · 11d6e2e260
commit 11d6e2e260
parent 3b3d31583b
11 changed files with 184 additions and 123 deletions
--- a/Benchmark.hs
+++ b/Benchmark.hs
@ -0,0 +1,53 @@
+{- git-annex benchmark infrastructure
+ -
+ - Copyright 2019 Joey Hess <id@joeyh.name>
+ -
+ - Licensed under the GNU AGPL version 3 or higher.
+ -}
+
+module Benchmark where
+
+import Common
+import Types.Benchmark
+import Types.Command
+import CmdLine.Action
+import CmdLine
+import CmdLine.GitAnnex.Options
+import qualified Annex
+import qualified Annex.Branch
+import Annex.Action
+
+import qualified Options.Applicative as O
+
+{- Given a list of all git-annex Commands, and the user's input,
+ - generates an IO action to benchmark that runs the specified
+ - commands. -}
+mkGenerator :: MkBenchmarkGenerator
+mkGenerator cmds userinput = do
+	-- Get the git-annex branch updated, to avoid the overhead of doing
+	-- so skewing the runtime of the first action that will be
+	-- benchmarked.
+	Annex.Branch.commit "benchmarking"
+	Annex.Branch.update
+	l <- mapM parsesubcommand $ split [";"] userinput
+	return $ do
+		forM_ l $ \(cmd, seek, st) ->
+			-- The cmd is run for benchmarking without startup or
+			-- shutdown actions.
+			Annex.eval st $ performCommandAction cmd seek noop
+		-- Since the cmd will be run many times, some zombie
+		-- processes that normally only occur once per command
+		-- will build up; reap them.
+		reapZombies
+  where
+	-- Simplified versio of CmdLine.dispatch, without support for fuzzy
+	-- matching or out-of-repo commands.
+	parsesubcommand ps = do
+		(cmd, seek, globalconfig) <- liftIO $ O.handleParseResult $
+			parseCmd "git-annex" "benchmarking" gitAnnexGlobalOptions ps cmds cmdparser
+		-- Make an entirely separate Annex state for each subcommand,
+		-- and prepare it to run the cmd.
+		st <- liftIO . Annex.new =<< Annex.getState Annex.repo
+		((), st') <- liftIO $ Annex.run st $
+			prepRunCommand cmd globalconfig
+		return (cmd, seek, st')
--- a/5
+++ b/5
@ -12,7 +12,10 @@ git-annex (7.20181212) UNRELEASED; urgency=medium
    used so it will also work with v7 unlocked pointer files.
  * Fix doubled progress display when downloading an url when -J is used.
  * importfeed: Better error message when downloading the feed fails.
-  * Optimised timestamp parser is 10x faster.
+  * Some optimisations, including a 10x faster timestamp parser,
+    and improved parsing and serialization of git-annex branch data.
+  * The benchmark command, which only had some old benchmarking of the sqlite
+    databases before, now allows benchmarking any other git-annex commands.

 -- Joey Hess <id@joeyh.name>  Tue, 18 Dec 2018 12:24:52 -0400

--- a/CmdLine.hs
+++ b/CmdLine.hs
@ -8,6 +8,8 @@
 module CmdLine (
 	dispatch,
 	usage,
+	parseCmd,
+	prepRunCommand,
 ) where

 import qualified Options.Applicative as O
@ -39,13 +41,7 @@ dispatch fuzzyok allargs allcmds globaloptions fields getgitrepo progname progde
 			(cmd, seek, globalconfig) <- parsewith False cmdparser
 				(\a -> inRepo $ a . Just)
 				(liftIO . O.handleParseResult)
-			when (cmdnomessages cmd) $ do
-				Annex.setOutput QuietOutput
-				Annex.changeState $ \s -> s 
-					{ Annex.output = (Annex.output s) { implicitMessages = False } }
-			getParsed globalconfig
-			whenM (annexDebug <$> Annex.getGitConfig) $
-				liftIO enableDebugOutput
+			prepRunCommand cmd globalconfig
 			startup
 			performCommandAction cmd seek $
 				shutdown $ cmdnocommit cmd
@ -123,3 +119,13 @@ findCmd fuzzyok argv cmds
 	inexactcmds = case name of
 		Nothing -> []
 		Just n -> Git.AutoCorrect.fuzzymatches n cmdname cmds
+
+prepRunCommand :: Command -> GlobalSetter -> Annex ()
+prepRunCommand cmd globalconfig = do
+	when (cmdnomessages cmd) $ do
+		Annex.setOutput QuietOutput
+		Annex.changeState $ \s -> s 
+			{ Annex.output = (Annex.output s) { implicitMessages = False } }
+	getParsed globalconfig
+	whenM (annexDebug <$> Annex.getGitConfig) $
+		liftIO enableDebugOutput
--- a/CmdLine/GitAnnex.hs
+++ b/CmdLine/GitAnnex.hs
@ -1,6 +1,6 @@
 {- git-annex main program
 -
- - Copyright 2010-2015 Joey Hess <id@joeyh.name>
+ - Copyright 2010-2019 Joey Hess <id@joeyh.name>
 -
 - Licensed under the GNU GPL version 3 or higher.
 -}
@ -16,6 +16,7 @@ import Utility.Env
 import Annex.Ssh
 import Annex.Multicast
 import Types.Test
+import Types.Benchmark

 import qualified Command.Help
 import qualified Command.Add
@ -123,8 +124,8 @@ import qualified Command.TestRemote
 import qualified Command.Benchmark
 #endif

-cmds :: Parser TestOptions -> TestRunner -> [Command]
-cmds testoptparser testrunner = 
+cmds :: Parser TestOptions -> TestRunner -> MkBenchmarkGenerator -> [Command]
+cmds testoptparser testrunner mkbenchmarkgenerator = 
 	[ Command.Help.cmd
 	, Command.Add.cmd
 	, Command.Get.cmd
@ -229,15 +230,16 @@ cmds testoptparser testrunner =
 	, Command.FuzzTest.cmd
 	, Command.TestRemote.cmd
 #ifdef WITH_BENCHMARK
-	, Command.Benchmark.cmd
+	, Command.Benchmark.cmd $
+		mkbenchmarkgenerator $ cmds testoptparser testrunner (\_ _ -> return noop)
 #endif
 	]

-run :: Parser TestOptions -> TestRunner -> [String] -> IO ()
-run testoptparser testrunner args = go envmodes
+run :: Parser TestOptions -> TestRunner -> MkBenchmarkGenerator -> [String] -> IO ()
+run testoptparser testrunner mkbenchmarkgenerator args = go envmodes
  where
 	go [] = dispatch True args 
-		(cmds testoptparser testrunner)
+		(cmds testoptparser testrunner mkbenchmarkgenerator)
 		gitAnnexGlobalOptions [] Git.CurrentRepo.get
 		"git-annex"
 		"manage files with git, without checking their contents in"
--- a/Command/Benchmark.hs
+++ b/Command/Benchmark.hs
@ -1,6 +1,6 @@
 {- git-annex benchmark
 -
- - Copyright 2016 Joey Hess <id@joeyh.name>
+ - Copyright 2016-2019 Joey Hess <id@joeyh.name>
 -
 - Licensed under the GNU GPL version 3 or higher.
 -}
@ -10,114 +10,26 @@
 module Command.Benchmark where

 import Command
-import Database.Types
-import qualified Database.Keys.SQL as SQL
-import qualified Database.Queue as H
-import Utility.Tmp
-import Git.FilePath
+import Types.Benchmark

 import Criterion.Main
-import Criterion.Internal (runAndAnalyse)
-import Criterion.Monad
-import Control.Monad.IO.Class (liftIO)
-import Control.Monad
-import Control.DeepSeq
-import System.FilePath
-import System.Random
+import Criterion.Main.Options (parseWith, Mode)

-cmd :: Command
-cmd = noRepo (withParams benchmark) $
-	dontCheck repoExists $
-		command "benchmark" SectionTesting
-			"run benchmarks"
+cmd :: BenchmarkGenerator -> Command
+cmd generator = command "benchmark" SectionTesting
+	"benchmark git-annex commands"
 	paramNothing
-			(withParams (liftIO . benchmark))
+	(seek generator <$$> optParser)

-benchmark :: CmdParams -> IO ()
-benchmark _ = withTmpDirIn "." "benchmark" $ \tmpdir -> do
-	-- benchmark different sizes of databases
-	dbs <- mapM (benchDb tmpdir)
-		[ 1000
-		, 10000
-		-- , 100000
-		]
-	runCriterion $
-		bgroup "keys database" $ flip concatMap dbs $ \db ->
-			[ getAssociatedFilesHitBench db
-			, getAssociatedFilesMissBench db
-			, getAssociatedKeyHitBench db
-			, getAssociatedKeyMissBench db
-			, addAssociatedFileOldBench db
-			, addAssociatedFileNewBench db
-			]
+data BenchmarkOptions = BenchmarkOptions CmdParams Mode

-getAssociatedFilesHitBench :: BenchDb -> Benchmark
-getAssociatedFilesHitBench ( BenchDb h num) = bench ("getAssociatedFiles from " ++ show num ++ " (hit)") $ nfIO $ do
-	n <- getStdRandom (randomR (1,num))
-	SQL.getAssociatedFiles (keyN n) (SQL.ReadHandle h)
+optParser :: CmdParamsDesc -> Parser BenchmarkOptions
+optParser desc = BenchmarkOptions
+	<$> cmdParams desc
+	-- parse criterion's options
+	<*> parseWith defaultConfig

-getAssociatedFilesMissBench :: BenchDb -> Benchmark
-getAssociatedFilesMissBench ( BenchDb h num) = bench ("getAssociatedFiles from " ++ show num ++ " (miss)") $ nfIO $
-	SQL.getAssociatedFiles keyMiss (SQL.ReadHandle h)
-
-getAssociatedKeyHitBench :: BenchDb -> Benchmark
-getAssociatedKeyHitBench (BenchDb h num) = bench ("getAssociatedKey from " ++ show num ++ " (hit)") $ nfIO $ do
-	n <- getStdRandom (randomR (1,num))
-	SQL.getAssociatedKey (fileN n) (SQL.ReadHandle h)
-
-getAssociatedKeyMissBench :: BenchDb -> Benchmark
-getAssociatedKeyMissBench (BenchDb h num) = bench ("getAssociatedKey from " ++ show num ++ " (miss)") $ nfIO $
-	SQL.getAssociatedKey fileMiss (SQL.ReadHandle h)
-
-addAssociatedFileOldBench :: BenchDb -> Benchmark
-addAssociatedFileOldBench ( BenchDb h num) = bench ("addAssociatedFile to " ++ show num ++ " (old)") $ nfIO $ do
-	n <- getStdRandom (randomR (1,num))
-	SQL.addAssociatedFile (keyN n) (fileN n) (SQL.WriteHandle h)
-	H.flushDbQueue h
-
-addAssociatedFileNewBench :: BenchDb -> Benchmark
-addAssociatedFileNewBench ( BenchDb h num) = bench ("addAssociatedFile to " ++ show num ++ " (new)") $ nfIO $ do
-	n <- getStdRandom (randomR (1,num))
-	SQL.addAssociatedFile (keyN n) (fileN (n+1)) (SQL.WriteHandle h)
-	H.flushDbQueue h
-
-populateAssociatedFiles :: H.DbQueue -> Int -> IO ()
-populateAssociatedFiles h num = do
-	forM_ [1..num] $ \n ->
-		SQL.addAssociatedFile (keyN n) (fileN n) (SQL.WriteHandle h)
-	H.flushDbQueue h
-
-keyN :: Int -> IKey
-keyN n = IKey ("key" ++ show n)
-
-fileN :: Int -> TopFilePath
-fileN n = asTopFilePath ("file" ++ show n)
-
-keyMiss :: IKey
-keyMiss = keyN 0 -- 0 is never stored
-
-fileMiss :: TopFilePath
-fileMiss = fileN 0 -- 0 is never stored
-
-data BenchDb = BenchDb H.DbQueue Int
-
-benchDb :: FilePath -> Int -> IO BenchDb
-benchDb tmpdir num = do
-	putStrLn $ "setting up database with " ++ show num
-	H.initDb f SQL.createTables
-	h <- H.openDbQueue f SQL.containedTable
-	populateAssociatedFiles h num
-	return (BenchDb h num)
-  where
-	f = tmpdir </> "db" ++ show num
-
-instance NFData TopFilePath where
-	rnf = rnf . getTopFilePath
-
-instance NFData IKey where
-	rnf (IKey s) = rnf s
-	
-- can't use Criterion's defaultMain here because it looks at
-- command-line parameters
-runCriterion :: Benchmark -> IO ()
-runCriterion = withConfig defaultConfig . runAndAnalyse (const True)
+seek :: BenchmarkGenerator -> BenchmarkOptions -> CommandSeek
+seek generator (BenchmarkOptions ps mode) = do
+	runner <- generator ps
+	liftIO $ runMode mode [ bench (unwords ps) $ nfIO runner ]
--- a/Test/Framework.hs
+++ b/Test/Framework.hs
@ -64,8 +64,13 @@ git_annex' command params = do
 	-- catch all errors, including normally fatal errors
 	try run ::IO (Either SomeException ())
  where
-	run = GitAnnex.run dummyTestOptParser (\_ -> noop) (command:"-q":params)
+	run = GitAnnex.run dummyTestOptParser
+		dummyTestRunner
+		dummyBenchmarkGenerator
+		(command:"-q":params)
 	dummyTestOptParser = pure mempty
+	dummyTestRunner _ = noop
+	dummyBenchmarkGenerator _ _ = return noop

 {- Runs git-annex and returns its output. -}
 git_annex_output :: String -> [String] -> IO String
--- a/Types/Benchmark.hs
+++ b/Types/Benchmark.hs
@ -0,0 +1,15 @@
+{- git-annex benchmark data types.
+ -
+ - Copyright 2019 Joey Hess <id@joeyh.name>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Types.Benchmark where
+
+import Annex
+import Types.Command
+
+type BenchmarkGenerator = [String] -> Annex (IO ())
+
+type MkBenchmarkGenerator = [Command] -> BenchmarkGenerator
--- a/doc/git-annex-benchmark.mdwn
+++ b/doc/git-annex-benchmark.mdwn
@ -0,0 +1,60 @@
+# NAME
+
+git-annex benchmark - benchmark git-annex commands
+
+# SYNOPSIS
+
+git annex benchmark [criterionopts] -- commmand [; command]
+
+# DESCRIPTION
+
+When git-annex is built with benchmarking support, this command can be used
+to benchmark any other git-annex command. For example "git annex benchmark -- get ."
+will benchmark "git annex get".
+
+The command being benchmarked is run in the current git-annex repository.
+It does not run just once; the benchmarking process will run it several
+times to get a statistically meaningful result.
+
+When benchmarking an action like "git annex get", the first run will
+often do much more than subseqent runs. To make the benchmark repeat an
+action like getting a file each time, additional command can be listed,
+separated by ';'. (Note that ';' needs to be escaped from the shell.)
+The combined script will be run repeatedly by the benchmark. An example
+of using this:
+
+	git annex benchmark -- get . ';' drop .
+
+Note that git-annex benchmark does not fork new git-annex processes when
+benchmarking; it calls the command to benchmark internally, and so avoids
+git-annex's startup overhead. (So don't try to use it to optimise git-annex
+startup.) 
+
+# OPTIONS
+
+Before the "--" any of the criterion library's command-line options can be
+used.
+
+Any options that git-annex usually accepts can be included after the
+command to benchmark.
+
+# OUTPUT
+
+The output of the commands being benchmarked goes to standard output and
+standard error as usual. It's often a good idea to sink it to /dev/null to
+avoid the display of the output skewing the benchmark results. Of course
+--quiet can also be used to avoid most git-annex output, as long as you
+don't want to benchmark the generation of that output.
+
+The benchmark report is output to standard output by default, although
+criterion options can be used to redirect it to a file.
+
+# SEE ALSO
+
+[[git-annex]](1)
+
+# AUTHOR
+
+Joey Hess <id@joeyh.name>
+
+Warning: Automatically converted into a man page by mdwn2man. Edit with care.
--- a/doc/git-annex.mdwn
+++ b/doc/git-annex.mdwn
@ -731,6 +731,8 @@ subdirectories).
  This runs git-annex's built-in benchmarks, if it was built with
  benchmarking support.
  
+  See [[git-annex-benchmark]](1) for details.
+
 # COMMON OPTIONS

 These common options are accepted by all git-annex commands, and
--- a/git-annex.cabal
+++ b/git-annex.cabal
@ -663,6 +663,7 @@ Executable git-annex
    Backend.URL
    Backend.Utilities
    Backend.WORM
+    Benchmark
    Build.BundledPrograms
    Build.Configure
    Build.DesktopFile
@ -949,6 +950,7 @@ Executable git-annex
    Types.AdjustedBranch
    Types.Availability
    Types.Backend
+    Types.Benchmark
    Types.BranchState
    Types.CleanupActions
    Types.Command
--- a/git-annex.hs
+++ b/git-annex.hs
@ -15,6 +15,7 @@ import qualified CmdLine.GitAnnex
 import qualified CmdLine.GitAnnexShell
 import qualified CmdLine.GitRemoteTorAnnex
 import qualified Test
+import qualified Benchmark
 import Utility.FileSystemEncoding

 #ifdef mingw32_HOST_OS
@ -34,7 +35,7 @@ main = withSocketsDo $ do
 	run ps n = case takeFileName n of
 		"git-annex-shell" -> CmdLine.GitAnnexShell.run ps
 		"git-remote-tor-annex" -> CmdLine.GitRemoteTorAnnex.run ps
-		_  -> CmdLine.GitAnnex.run Test.optParser Test.runner ps
+		_  -> CmdLine.GitAnnex.run Test.optParser Test.runner Benchmark.mkGenerator ps

 #ifdef mingw32_HOST_OS
 {- On Windows, if HOME is not set, probe it and set it.