From 11d6e2e260d70ba99e35464c19c2b2772ce9efaa Mon Sep 17 00:00:00 2001
From: Joey Hess <joeyh@joeyh.name>
Date: Fri, 4 Jan 2019 13:43:53 -0400
Subject: [PATCH] new improved benchmark command that can benchmark anything
 git-annex does

---
 Benchmark.hs                 |  53 +++++++++++++++
 CHANGELOG                    |   5 +-
 CmdLine.hs                   |  20 ++++--
 CmdLine/GitAnnex.hs          |  16 +++--
 Command/Benchmark.hs         | 124 +++++------------------------------
 Test/Framework.hs            |   7 +-
 Types/Benchmark.hs           |  15 +++++
 doc/git-annex-benchmark.mdwn |  60 +++++++++++++++++
 doc/git-annex.mdwn           |   2 +
 git-annex.cabal              |   2 +
 git-annex.hs                 |   3 +-
 11 files changed, 184 insertions(+), 123 deletions(-)
 create mode 100644 Benchmark.hs
 create mode 100644 Types/Benchmark.hs
 create mode 100644 doc/git-annex-benchmark.mdwn

diff --git a/Benchmark.hs b/Benchmark.hs
new file mode 100644
index 0000000000..35ae9ef11b
--- /dev/null
+++ b/Benchmark.hs
@@ -0,0 +1,53 @@
+{- git-annex benchmark infrastructure
+ -
+ - Copyright 2019 Joey Hess <id@joeyh.name>
+ -
+ - Licensed under the GNU AGPL version 3 or higher.
+ -}
+
+module Benchmark where
+
+import Common
+import Types.Benchmark
+import Types.Command
+import CmdLine.Action
+import CmdLine
+import CmdLine.GitAnnex.Options
+import qualified Annex
+import qualified Annex.Branch
+import Annex.Action
+
+import qualified Options.Applicative as O
+
+{- Given a list of all git-annex Commands, and the user's input,
+ - generates an IO action to benchmark that runs the specified
+ - commands. -}
+mkGenerator :: MkBenchmarkGenerator
+mkGenerator cmds userinput = do
+	-- Get the git-annex branch updated, to avoid the overhead of doing
+	-- so skewing the runtime of the first action that will be
+	-- benchmarked.
+	Annex.Branch.commit "benchmarking"
+	Annex.Branch.update
+	l <- mapM parsesubcommand $ split [";"] userinput
+	return $ do
+		forM_ l $ \(cmd, seek, st) ->
+			-- The cmd is run for benchmarking without startup or
+			-- shutdown actions.
+			Annex.eval st $ performCommandAction cmd seek noop
+		-- Since the cmd will be run many times, some zombie
+		-- processes that normally only occur once per command
+		-- will build up; reap them.
+		reapZombies
+  where
+	-- Simplified versio of CmdLine.dispatch, without support for fuzzy
+	-- matching or out-of-repo commands.
+	parsesubcommand ps = do
+		(cmd, seek, globalconfig) <- liftIO $ O.handleParseResult $
+			parseCmd "git-annex" "benchmarking" gitAnnexGlobalOptions ps cmds cmdparser
+		-- Make an entirely separate Annex state for each subcommand,
+		-- and prepare it to run the cmd.
+		st <- liftIO . Annex.new =<< Annex.getState Annex.repo
+		((), st') <- liftIO $ Annex.run st $
+			prepRunCommand cmd globalconfig
+		return (cmd, seek, st')
diff --git a/CHANGELOG b/CHANGELOG
index 3854dcd3f9..ba10efdca6 100644
--- a/CHANGELOG
+++ b/CHANGELOG
@@ -12,7 +12,10 @@ git-annex (7.20181212) UNRELEASED; urgency=medium
     used so it will also work with v7 unlocked pointer files.
   * Fix doubled progress display when downloading an url when -J is used.
   * importfeed: Better error message when downloading the feed fails.
-  * Optimised timestamp parser is 10x faster.
+  * Some optimisations, including a 10x faster timestamp parser,
+    and improved parsing and serialization of git-annex branch data.
+  * The benchmark command, which only had some old benchmarking of the sqlite
+    databases before, now allows benchmarking any other git-annex commands.
 
  -- Joey Hess <id@joeyh.name>  Tue, 18 Dec 2018 12:24:52 -0400
 
diff --git a/CmdLine.hs b/CmdLine.hs
index c9de90ec07..036463fd5c 100644
--- a/CmdLine.hs
+++ b/CmdLine.hs
@@ -8,6 +8,8 @@
 module CmdLine (
 	dispatch,
 	usage,
+	parseCmd,
+	prepRunCommand,
 ) where
 
 import qualified Options.Applicative as O
@@ -39,13 +41,7 @@ dispatch fuzzyok allargs allcmds globaloptions fields getgitrepo progname progde
 			(cmd, seek, globalconfig) <- parsewith False cmdparser
 				(\a -> inRepo $ a . Just)
 				(liftIO . O.handleParseResult)
-			when (cmdnomessages cmd) $ do
-				Annex.setOutput QuietOutput
-				Annex.changeState $ \s -> s 
-					{ Annex.output = (Annex.output s) { implicitMessages = False } }
-			getParsed globalconfig
-			whenM (annexDebug <$> Annex.getGitConfig) $
-				liftIO enableDebugOutput
+			prepRunCommand cmd globalconfig
 			startup
 			performCommandAction cmd seek $
 				shutdown $ cmdnocommit cmd
@@ -123,3 +119,13 @@ findCmd fuzzyok argv cmds
 	inexactcmds = case name of
 		Nothing -> []
 		Just n -> Git.AutoCorrect.fuzzymatches n cmdname cmds
+
+prepRunCommand :: Command -> GlobalSetter -> Annex ()
+prepRunCommand cmd globalconfig = do
+	when (cmdnomessages cmd) $ do
+		Annex.setOutput QuietOutput
+		Annex.changeState $ \s -> s 
+			{ Annex.output = (Annex.output s) { implicitMessages = False } }
+	getParsed globalconfig
+	whenM (annexDebug <$> Annex.getGitConfig) $
+		liftIO enableDebugOutput
diff --git a/CmdLine/GitAnnex.hs b/CmdLine/GitAnnex.hs
index 11354684f9..f1bb96d3f9 100644
--- a/CmdLine/GitAnnex.hs
+++ b/CmdLine/GitAnnex.hs
@@ -1,6 +1,6 @@
 {- git-annex main program
  -
- - Copyright 2010-2015 Joey Hess <id@joeyh.name>
+ - Copyright 2010-2019 Joey Hess <id@joeyh.name>
  -
  - Licensed under the GNU GPL version 3 or higher.
  -}
@@ -16,6 +16,7 @@ import Utility.Env
 import Annex.Ssh
 import Annex.Multicast
 import Types.Test
+import Types.Benchmark
 
 import qualified Command.Help
 import qualified Command.Add
@@ -123,8 +124,8 @@ import qualified Command.TestRemote
 import qualified Command.Benchmark
 #endif
 
-cmds :: Parser TestOptions -> TestRunner -> [Command]
-cmds testoptparser testrunner = 
+cmds :: Parser TestOptions -> TestRunner -> MkBenchmarkGenerator -> [Command]
+cmds testoptparser testrunner mkbenchmarkgenerator = 
 	[ Command.Help.cmd
 	, Command.Add.cmd
 	, Command.Get.cmd
@@ -229,15 +230,16 @@ cmds testoptparser testrunner =
 	, Command.FuzzTest.cmd
 	, Command.TestRemote.cmd
 #ifdef WITH_BENCHMARK
-	, Command.Benchmark.cmd
+	, Command.Benchmark.cmd $
+		mkbenchmarkgenerator $ cmds testoptparser testrunner (\_ _ -> return noop)
 #endif
 	]
 
-run :: Parser TestOptions -> TestRunner -> [String] -> IO ()
-run testoptparser testrunner args = go envmodes
+run :: Parser TestOptions -> TestRunner -> MkBenchmarkGenerator -> [String] -> IO ()
+run testoptparser testrunner mkbenchmarkgenerator args = go envmodes
   where
 	go [] = dispatch True args 
-		(cmds testoptparser testrunner)
+		(cmds testoptparser testrunner mkbenchmarkgenerator)
 		gitAnnexGlobalOptions [] Git.CurrentRepo.get
 		"git-annex"
 		"manage files with git, without checking their contents in"
diff --git a/Command/Benchmark.hs b/Command/Benchmark.hs
index bcfecc2dc1..e1e2cf84f7 100644
--- a/Command/Benchmark.hs
+++ b/Command/Benchmark.hs
@@ -1,6 +1,6 @@
 {- git-annex benchmark
  -
- - Copyright 2016 Joey Hess <id@joeyh.name>
+ - Copyright 2016-2019 Joey Hess <id@joeyh.name>
  -
  - Licensed under the GNU GPL version 3 or higher.
  -}
@@ -10,114 +10,26 @@
 module Command.Benchmark where
 
 import Command
-import Database.Types
-import qualified Database.Keys.SQL as SQL
-import qualified Database.Queue as H
-import Utility.Tmp
-import Git.FilePath
+import Types.Benchmark
 
 import Criterion.Main
-import Criterion.Internal (runAndAnalyse)
-import Criterion.Monad
-import Control.Monad.IO.Class (liftIO)
-import Control.Monad
-import Control.DeepSeq
-import System.FilePath
-import System.Random
+import Criterion.Main.Options (parseWith, Mode)
 
-cmd :: Command
-cmd = noRepo (withParams benchmark) $
-	dontCheck repoExists $
-		command "benchmark" SectionTesting
-			"run benchmarks"
-			paramNothing
-			(withParams (liftIO . benchmark))
+cmd :: BenchmarkGenerator -> Command
+cmd generator = command "benchmark" SectionTesting
+	"benchmark git-annex commands"
+	paramNothing
+	(seek generator <$$> optParser)
 
-benchmark :: CmdParams -> IO ()
-benchmark _ = withTmpDirIn "." "benchmark" $ \tmpdir -> do
-	-- benchmark different sizes of databases
-	dbs <- mapM (benchDb tmpdir)
-		[ 1000
-		, 10000
-		-- , 100000
-		]
-	runCriterion $
-		bgroup "keys database" $ flip concatMap dbs $ \db ->
-			[ getAssociatedFilesHitBench db
-			, getAssociatedFilesMissBench db
-			, getAssociatedKeyHitBench db
-			, getAssociatedKeyMissBench db
-			, addAssociatedFileOldBench db
-			, addAssociatedFileNewBench db
-			]
+data BenchmarkOptions = BenchmarkOptions CmdParams Mode
 
-getAssociatedFilesHitBench :: BenchDb -> Benchmark
-getAssociatedFilesHitBench ( BenchDb h num) = bench ("getAssociatedFiles from " ++ show num ++ " (hit)") $ nfIO $ do
-	n <- getStdRandom (randomR (1,num))
-	SQL.getAssociatedFiles (keyN n) (SQL.ReadHandle h)
+optParser :: CmdParamsDesc -> Parser BenchmarkOptions
+optParser desc = BenchmarkOptions
+	<$> cmdParams desc
+	-- parse criterion's options
+	<*> parseWith defaultConfig
 
-getAssociatedFilesMissBench :: BenchDb -> Benchmark
-getAssociatedFilesMissBench ( BenchDb h num) = bench ("getAssociatedFiles from " ++ show num ++ " (miss)") $ nfIO $
-	SQL.getAssociatedFiles keyMiss (SQL.ReadHandle h)
-
-getAssociatedKeyHitBench :: BenchDb -> Benchmark
-getAssociatedKeyHitBench (BenchDb h num) = bench ("getAssociatedKey from " ++ show num ++ " (hit)") $ nfIO $ do
-	n <- getStdRandom (randomR (1,num))
-	SQL.getAssociatedKey (fileN n) (SQL.ReadHandle h)
-
-getAssociatedKeyMissBench :: BenchDb -> Benchmark
-getAssociatedKeyMissBench (BenchDb h num) = bench ("getAssociatedKey from " ++ show num ++ " (miss)") $ nfIO $
-	SQL.getAssociatedKey fileMiss (SQL.ReadHandle h)
-
-addAssociatedFileOldBench :: BenchDb -> Benchmark
-addAssociatedFileOldBench ( BenchDb h num) = bench ("addAssociatedFile to " ++ show num ++ " (old)") $ nfIO $ do
-	n <- getStdRandom (randomR (1,num))
-	SQL.addAssociatedFile (keyN n) (fileN n) (SQL.WriteHandle h)
-	H.flushDbQueue h
-
-addAssociatedFileNewBench :: BenchDb -> Benchmark
-addAssociatedFileNewBench ( BenchDb h num) = bench ("addAssociatedFile to " ++ show num ++ " (new)") $ nfIO $ do
-	n <- getStdRandom (randomR (1,num))
-	SQL.addAssociatedFile (keyN n) (fileN (n+1)) (SQL.WriteHandle h)
-	H.flushDbQueue h
-
-populateAssociatedFiles :: H.DbQueue -> Int -> IO ()
-populateAssociatedFiles h num = do
-	forM_ [1..num] $ \n ->
-		SQL.addAssociatedFile (keyN n) (fileN n) (SQL.WriteHandle h)
-	H.flushDbQueue h
-
-keyN :: Int -> IKey
-keyN n = IKey ("key" ++ show n)
-
-fileN :: Int -> TopFilePath
-fileN n = asTopFilePath ("file" ++ show n)
-
-keyMiss :: IKey
-keyMiss = keyN 0 -- 0 is never stored
-
-fileMiss :: TopFilePath
-fileMiss = fileN 0 -- 0 is never stored
-
-data BenchDb = BenchDb H.DbQueue Int
-
-benchDb :: FilePath -> Int -> IO BenchDb
-benchDb tmpdir num = do
-	putStrLn $ "setting up database with " ++ show num
-	H.initDb f SQL.createTables
-	h <- H.openDbQueue f SQL.containedTable
-	populateAssociatedFiles h num
-	return (BenchDb h num)
-  where
-	f = tmpdir </> "db" ++ show num
-
-instance NFData TopFilePath where
-	rnf = rnf . getTopFilePath
-
-instance NFData IKey where
-	rnf (IKey s) = rnf s
-	
--- can't use Criterion's defaultMain here because it looks at
--- command-line parameters
-runCriterion :: Benchmark -> IO ()
-runCriterion = withConfig defaultConfig . runAndAnalyse (const True)
+seek :: BenchmarkGenerator -> BenchmarkOptions -> CommandSeek
+seek generator (BenchmarkOptions ps mode) = do
+	runner <- generator ps
+	liftIO $ runMode mode [ bench (unwords ps) $ nfIO runner ]
diff --git a/Test/Framework.hs b/Test/Framework.hs
index 01b70b0be6..a4202ec8fc 100644
--- a/Test/Framework.hs
+++ b/Test/Framework.hs
@@ -64,8 +64,13 @@ git_annex' command params = do
 	-- catch all errors, including normally fatal errors
 	try run ::IO (Either SomeException ())
   where
-	run = GitAnnex.run dummyTestOptParser (\_ -> noop) (command:"-q":params)
+	run = GitAnnex.run dummyTestOptParser
+		dummyTestRunner
+		dummyBenchmarkGenerator
+		(command:"-q":params)
 	dummyTestOptParser = pure mempty
+	dummyTestRunner _ = noop
+	dummyBenchmarkGenerator _ _ = return noop
 
 {- Runs git-annex and returns its output. -}
 git_annex_output :: String -> [String] -> IO String
diff --git a/Types/Benchmark.hs b/Types/Benchmark.hs
new file mode 100644
index 0000000000..ae8e4f1e09
--- /dev/null
+++ b/Types/Benchmark.hs
@@ -0,0 +1,15 @@
+{- git-annex benchmark data types.
+ -
+ - Copyright 2019 Joey Hess <id@joeyh.name>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Types.Benchmark where
+
+import Annex
+import Types.Command
+
+type BenchmarkGenerator = [String] -> Annex (IO ())
+
+type MkBenchmarkGenerator = [Command] -> BenchmarkGenerator
diff --git a/doc/git-annex-benchmark.mdwn b/doc/git-annex-benchmark.mdwn
new file mode 100644
index 0000000000..72bbc38572
--- /dev/null
+++ b/doc/git-annex-benchmark.mdwn
@@ -0,0 +1,60 @@
+# NAME
+
+git-annex benchmark - benchmark git-annex commands
+
+# SYNOPSIS
+
+git annex benchmark [criterionopts] -- commmand [; command]
+
+# DESCRIPTION
+
+When git-annex is built with benchmarking support, this command can be used
+to benchmark any other git-annex command. For example "git annex benchmark -- get ."
+will benchmark "git annex get".
+
+The command being benchmarked is run in the current git-annex repository.
+It does not run just once; the benchmarking process will run it several
+times to get a statistically meaningful result.
+
+When benchmarking an action like "git annex get", the first run will
+often do much more than subseqent runs. To make the benchmark repeat an
+action like getting a file each time, additional command can be listed,
+separated by ';'. (Note that ';' needs to be escaped from the shell.)
+The combined script will be run repeatedly by the benchmark. An example
+of using this:
+
+	git annex benchmark -- get . ';' drop .
+
+Note that git-annex benchmark does not fork new git-annex processes when
+benchmarking; it calls the command to benchmark internally, and so avoids
+git-annex's startup overhead. (So don't try to use it to optimise git-annex
+startup.) 
+
+# OPTIONS
+
+Before the "--" any of the criterion library's command-line options can be
+used.
+
+Any options that git-annex usually accepts can be included after the
+command to benchmark.
+
+# OUTPUT
+
+The output of the commands being benchmarked goes to standard output and
+standard error as usual. It's often a good idea to sink it to /dev/null to
+avoid the display of the output skewing the benchmark results. Of course
+--quiet can also be used to avoid most git-annex output, as long as you
+don't want to benchmark the generation of that output.
+
+The benchmark report is output to standard output by default, although
+criterion options can be used to redirect it to a file.
+
+# SEE ALSO
+
+[[git-annex]](1)
+
+# AUTHOR
+
+Joey Hess <id@joeyh.name>
+
+Warning: Automatically converted into a man page by mdwn2man. Edit with care.
diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn
index 095f2a8169..5295110269 100644
--- a/doc/git-annex.mdwn
+++ b/doc/git-annex.mdwn
@@ -730,6 +730,8 @@ subdirectories).
 
   This runs git-annex's built-in benchmarks, if it was built with
   benchmarking support.
+  
+  See [[git-annex-benchmark]](1) for details.
 
 # COMMON OPTIONS
 
diff --git a/git-annex.cabal b/git-annex.cabal
index 8725593171..6af2430150 100644
--- a/git-annex.cabal
+++ b/git-annex.cabal
@@ -663,6 +663,7 @@ Executable git-annex
     Backend.URL
     Backend.Utilities
     Backend.WORM
+    Benchmark
     Build.BundledPrograms
     Build.Configure
     Build.DesktopFile
@@ -949,6 +950,7 @@ Executable git-annex
     Types.AdjustedBranch
     Types.Availability
     Types.Backend
+    Types.Benchmark
     Types.BranchState
     Types.CleanupActions
     Types.Command
diff --git a/git-annex.hs b/git-annex.hs
index e9e8e7bc38..67ed4d27b8 100644
--- a/git-annex.hs
+++ b/git-annex.hs
@@ -15,6 +15,7 @@ import qualified CmdLine.GitAnnex
 import qualified CmdLine.GitAnnexShell
 import qualified CmdLine.GitRemoteTorAnnex
 import qualified Test
+import qualified Benchmark
 import Utility.FileSystemEncoding
 
 #ifdef mingw32_HOST_OS
@@ -34,7 +35,7 @@ main = withSocketsDo $ do
 	run ps n = case takeFileName n of
 		"git-annex-shell" -> CmdLine.GitAnnexShell.run ps
 		"git-remote-tor-annex" -> CmdLine.GitRemoteTorAnnex.run ps
-		_  -> CmdLine.GitAnnex.run Test.optParser Test.runner ps
+		_  -> CmdLine.GitAnnex.run Test.optParser Test.runner Benchmark.mkGenerator ps
 
 #ifdef mingw32_HOST_OS
 {- On Windows, if HOME is not set, probe it and set it.