From 11d6e2e260d70ba99e35464c19c2b2772ce9efaa Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 4 Jan 2019 13:43:53 -0400 Subject: [PATCH] new improved benchmark command that can benchmark anything git-annex does --- Benchmark.hs | 53 +++++++++++++++ CHANGELOG | 5 +- CmdLine.hs | 20 ++++-- CmdLine/GitAnnex.hs | 16 +++-- Command/Benchmark.hs | 124 +++++------------------------------ Test/Framework.hs | 7 +- Types/Benchmark.hs | 15 +++++ doc/git-annex-benchmark.mdwn | 60 +++++++++++++++++ doc/git-annex.mdwn | 2 + git-annex.cabal | 2 + git-annex.hs | 3 +- 11 files changed, 184 insertions(+), 123 deletions(-) create mode 100644 Benchmark.hs create mode 100644 Types/Benchmark.hs create mode 100644 doc/git-annex-benchmark.mdwn diff --git a/Benchmark.hs b/Benchmark.hs new file mode 100644 index 0000000000..35ae9ef11b --- /dev/null +++ b/Benchmark.hs @@ -0,0 +1,53 @@ +{- git-annex benchmark infrastructure + - + - Copyright 2019 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Benchmark where + +import Common +import Types.Benchmark +import Types.Command +import CmdLine.Action +import CmdLine +import CmdLine.GitAnnex.Options +import qualified Annex +import qualified Annex.Branch +import Annex.Action + +import qualified Options.Applicative as O + +{- Given a list of all git-annex Commands, and the user's input, + - generates an IO action to benchmark that runs the specified + - commands. -} +mkGenerator :: MkBenchmarkGenerator +mkGenerator cmds userinput = do + -- Get the git-annex branch updated, to avoid the overhead of doing + -- so skewing the runtime of the first action that will be + -- benchmarked. + Annex.Branch.commit "benchmarking" + Annex.Branch.update + l <- mapM parsesubcommand $ split [";"] userinput + return $ do + forM_ l $ \(cmd, seek, st) -> + -- The cmd is run for benchmarking without startup or + -- shutdown actions. + Annex.eval st $ performCommandAction cmd seek noop + -- Since the cmd will be run many times, some zombie + -- processes that normally only occur once per command + -- will build up; reap them. + reapZombies + where + -- Simplified versio of CmdLine.dispatch, without support for fuzzy + -- matching or out-of-repo commands. + parsesubcommand ps = do + (cmd, seek, globalconfig) <- liftIO $ O.handleParseResult $ + parseCmd "git-annex" "benchmarking" gitAnnexGlobalOptions ps cmds cmdparser + -- Make an entirely separate Annex state for each subcommand, + -- and prepare it to run the cmd. + st <- liftIO . Annex.new =<< Annex.getState Annex.repo + ((), st') <- liftIO $ Annex.run st $ + prepRunCommand cmd globalconfig + return (cmd, seek, st') diff --git a/CHANGELOG b/CHANGELOG index 3854dcd3f9..ba10efdca6 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -12,7 +12,10 @@ git-annex (7.20181212) UNRELEASED; urgency=medium used so it will also work with v7 unlocked pointer files. * Fix doubled progress display when downloading an url when -J is used. * importfeed: Better error message when downloading the feed fails. - * Optimised timestamp parser is 10x faster. + * Some optimisations, including a 10x faster timestamp parser, + and improved parsing and serialization of git-annex branch data. + * The benchmark command, which only had some old benchmarking of the sqlite + databases before, now allows benchmarking any other git-annex commands. -- Joey Hess Tue, 18 Dec 2018 12:24:52 -0400 diff --git a/CmdLine.hs b/CmdLine.hs index c9de90ec07..036463fd5c 100644 --- a/CmdLine.hs +++ b/CmdLine.hs @@ -8,6 +8,8 @@ module CmdLine ( dispatch, usage, + parseCmd, + prepRunCommand, ) where import qualified Options.Applicative as O @@ -39,13 +41,7 @@ dispatch fuzzyok allargs allcmds globaloptions fields getgitrepo progname progde (cmd, seek, globalconfig) <- parsewith False cmdparser (\a -> inRepo $ a . Just) (liftIO . O.handleParseResult) - when (cmdnomessages cmd) $ do - Annex.setOutput QuietOutput - Annex.changeState $ \s -> s - { Annex.output = (Annex.output s) { implicitMessages = False } } - getParsed globalconfig - whenM (annexDebug <$> Annex.getGitConfig) $ - liftIO enableDebugOutput + prepRunCommand cmd globalconfig startup performCommandAction cmd seek $ shutdown $ cmdnocommit cmd @@ -123,3 +119,13 @@ findCmd fuzzyok argv cmds inexactcmds = case name of Nothing -> [] Just n -> Git.AutoCorrect.fuzzymatches n cmdname cmds + +prepRunCommand :: Command -> GlobalSetter -> Annex () +prepRunCommand cmd globalconfig = do + when (cmdnomessages cmd) $ do + Annex.setOutput QuietOutput + Annex.changeState $ \s -> s + { Annex.output = (Annex.output s) { implicitMessages = False } } + getParsed globalconfig + whenM (annexDebug <$> Annex.getGitConfig) $ + liftIO enableDebugOutput diff --git a/CmdLine/GitAnnex.hs b/CmdLine/GitAnnex.hs index 11354684f9..f1bb96d3f9 100644 --- a/CmdLine/GitAnnex.hs +++ b/CmdLine/GitAnnex.hs @@ -1,6 +1,6 @@ {- git-annex main program - - - Copyright 2010-2015 Joey Hess + - Copyright 2010-2019 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -16,6 +16,7 @@ import Utility.Env import Annex.Ssh import Annex.Multicast import Types.Test +import Types.Benchmark import qualified Command.Help import qualified Command.Add @@ -123,8 +124,8 @@ import qualified Command.TestRemote import qualified Command.Benchmark #endif -cmds :: Parser TestOptions -> TestRunner -> [Command] -cmds testoptparser testrunner = +cmds :: Parser TestOptions -> TestRunner -> MkBenchmarkGenerator -> [Command] +cmds testoptparser testrunner mkbenchmarkgenerator = [ Command.Help.cmd , Command.Add.cmd , Command.Get.cmd @@ -229,15 +230,16 @@ cmds testoptparser testrunner = , Command.FuzzTest.cmd , Command.TestRemote.cmd #ifdef WITH_BENCHMARK - , Command.Benchmark.cmd + , Command.Benchmark.cmd $ + mkbenchmarkgenerator $ cmds testoptparser testrunner (\_ _ -> return noop) #endif ] -run :: Parser TestOptions -> TestRunner -> [String] -> IO () -run testoptparser testrunner args = go envmodes +run :: Parser TestOptions -> TestRunner -> MkBenchmarkGenerator -> [String] -> IO () +run testoptparser testrunner mkbenchmarkgenerator args = go envmodes where go [] = dispatch True args - (cmds testoptparser testrunner) + (cmds testoptparser testrunner mkbenchmarkgenerator) gitAnnexGlobalOptions [] Git.CurrentRepo.get "git-annex" "manage files with git, without checking their contents in" diff --git a/Command/Benchmark.hs b/Command/Benchmark.hs index bcfecc2dc1..e1e2cf84f7 100644 --- a/Command/Benchmark.hs +++ b/Command/Benchmark.hs @@ -1,6 +1,6 @@ {- git-annex benchmark - - - Copyright 2016 Joey Hess + - Copyright 2016-2019 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -10,114 +10,26 @@ module Command.Benchmark where import Command -import Database.Types -import qualified Database.Keys.SQL as SQL -import qualified Database.Queue as H -import Utility.Tmp -import Git.FilePath +import Types.Benchmark import Criterion.Main -import Criterion.Internal (runAndAnalyse) -import Criterion.Monad -import Control.Monad.IO.Class (liftIO) -import Control.Monad -import Control.DeepSeq -import System.FilePath -import System.Random +import Criterion.Main.Options (parseWith, Mode) -cmd :: Command -cmd = noRepo (withParams benchmark) $ - dontCheck repoExists $ - command "benchmark" SectionTesting - "run benchmarks" - paramNothing - (withParams (liftIO . benchmark)) +cmd :: BenchmarkGenerator -> Command +cmd generator = command "benchmark" SectionTesting + "benchmark git-annex commands" + paramNothing + (seek generator <$$> optParser) -benchmark :: CmdParams -> IO () -benchmark _ = withTmpDirIn "." "benchmark" $ \tmpdir -> do - -- benchmark different sizes of databases - dbs <- mapM (benchDb tmpdir) - [ 1000 - , 10000 - -- , 100000 - ] - runCriterion $ - bgroup "keys database" $ flip concatMap dbs $ \db -> - [ getAssociatedFilesHitBench db - , getAssociatedFilesMissBench db - , getAssociatedKeyHitBench db - , getAssociatedKeyMissBench db - , addAssociatedFileOldBench db - , addAssociatedFileNewBench db - ] +data BenchmarkOptions = BenchmarkOptions CmdParams Mode -getAssociatedFilesHitBench :: BenchDb -> Benchmark -getAssociatedFilesHitBench ( BenchDb h num) = bench ("getAssociatedFiles from " ++ show num ++ " (hit)") $ nfIO $ do - n <- getStdRandom (randomR (1,num)) - SQL.getAssociatedFiles (keyN n) (SQL.ReadHandle h) +optParser :: CmdParamsDesc -> Parser BenchmarkOptions +optParser desc = BenchmarkOptions + <$> cmdParams desc + -- parse criterion's options + <*> parseWith defaultConfig -getAssociatedFilesMissBench :: BenchDb -> Benchmark -getAssociatedFilesMissBench ( BenchDb h num) = bench ("getAssociatedFiles from " ++ show num ++ " (miss)") $ nfIO $ - SQL.getAssociatedFiles keyMiss (SQL.ReadHandle h) - -getAssociatedKeyHitBench :: BenchDb -> Benchmark -getAssociatedKeyHitBench (BenchDb h num) = bench ("getAssociatedKey from " ++ show num ++ " (hit)") $ nfIO $ do - n <- getStdRandom (randomR (1,num)) - SQL.getAssociatedKey (fileN n) (SQL.ReadHandle h) - -getAssociatedKeyMissBench :: BenchDb -> Benchmark -getAssociatedKeyMissBench (BenchDb h num) = bench ("getAssociatedKey from " ++ show num ++ " (miss)") $ nfIO $ - SQL.getAssociatedKey fileMiss (SQL.ReadHandle h) - -addAssociatedFileOldBench :: BenchDb -> Benchmark -addAssociatedFileOldBench ( BenchDb h num) = bench ("addAssociatedFile to " ++ show num ++ " (old)") $ nfIO $ do - n <- getStdRandom (randomR (1,num)) - SQL.addAssociatedFile (keyN n) (fileN n) (SQL.WriteHandle h) - H.flushDbQueue h - -addAssociatedFileNewBench :: BenchDb -> Benchmark -addAssociatedFileNewBench ( BenchDb h num) = bench ("addAssociatedFile to " ++ show num ++ " (new)") $ nfIO $ do - n <- getStdRandom (randomR (1,num)) - SQL.addAssociatedFile (keyN n) (fileN (n+1)) (SQL.WriteHandle h) - H.flushDbQueue h - -populateAssociatedFiles :: H.DbQueue -> Int -> IO () -populateAssociatedFiles h num = do - forM_ [1..num] $ \n -> - SQL.addAssociatedFile (keyN n) (fileN n) (SQL.WriteHandle h) - H.flushDbQueue h - -keyN :: Int -> IKey -keyN n = IKey ("key" ++ show n) - -fileN :: Int -> TopFilePath -fileN n = asTopFilePath ("file" ++ show n) - -keyMiss :: IKey -keyMiss = keyN 0 -- 0 is never stored - -fileMiss :: TopFilePath -fileMiss = fileN 0 -- 0 is never stored - -data BenchDb = BenchDb H.DbQueue Int - -benchDb :: FilePath -> Int -> IO BenchDb -benchDb tmpdir num = do - putStrLn $ "setting up database with " ++ show num - H.initDb f SQL.createTables - h <- H.openDbQueue f SQL.containedTable - populateAssociatedFiles h num - return (BenchDb h num) - where - f = tmpdir "db" ++ show num - -instance NFData TopFilePath where - rnf = rnf . getTopFilePath - -instance NFData IKey where - rnf (IKey s) = rnf s - --- can't use Criterion's defaultMain here because it looks at --- command-line parameters -runCriterion :: Benchmark -> IO () -runCriterion = withConfig defaultConfig . runAndAnalyse (const True) +seek :: BenchmarkGenerator -> BenchmarkOptions -> CommandSeek +seek generator (BenchmarkOptions ps mode) = do + runner <- generator ps + liftIO $ runMode mode [ bench (unwords ps) $ nfIO runner ] diff --git a/Test/Framework.hs b/Test/Framework.hs index 01b70b0be6..a4202ec8fc 100644 --- a/Test/Framework.hs +++ b/Test/Framework.hs @@ -64,8 +64,13 @@ git_annex' command params = do -- catch all errors, including normally fatal errors try run ::IO (Either SomeException ()) where - run = GitAnnex.run dummyTestOptParser (\_ -> noop) (command:"-q":params) + run = GitAnnex.run dummyTestOptParser + dummyTestRunner + dummyBenchmarkGenerator + (command:"-q":params) dummyTestOptParser = pure mempty + dummyTestRunner _ = noop + dummyBenchmarkGenerator _ _ = return noop {- Runs git-annex and returns its output. -} git_annex_output :: String -> [String] -> IO String diff --git a/Types/Benchmark.hs b/Types/Benchmark.hs new file mode 100644 index 0000000000..ae8e4f1e09 --- /dev/null +++ b/Types/Benchmark.hs @@ -0,0 +1,15 @@ +{- git-annex benchmark data types. + - + - Copyright 2019 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Types.Benchmark where + +import Annex +import Types.Command + +type BenchmarkGenerator = [String] -> Annex (IO ()) + +type MkBenchmarkGenerator = [Command] -> BenchmarkGenerator diff --git a/doc/git-annex-benchmark.mdwn b/doc/git-annex-benchmark.mdwn new file mode 100644 index 0000000000..72bbc38572 --- /dev/null +++ b/doc/git-annex-benchmark.mdwn @@ -0,0 +1,60 @@ +# NAME + +git-annex benchmark - benchmark git-annex commands + +# SYNOPSIS + +git annex benchmark [criterionopts] -- commmand [; command] + +# DESCRIPTION + +When git-annex is built with benchmarking support, this command can be used +to benchmark any other git-annex command. For example "git annex benchmark -- get ." +will benchmark "git annex get". + +The command being benchmarked is run in the current git-annex repository. +It does not run just once; the benchmarking process will run it several +times to get a statistically meaningful result. + +When benchmarking an action like "git annex get", the first run will +often do much more than subseqent runs. To make the benchmark repeat an +action like getting a file each time, additional command can be listed, +separated by ';'. (Note that ';' needs to be escaped from the shell.) +The combined script will be run repeatedly by the benchmark. An example +of using this: + + git annex benchmark -- get . ';' drop . + +Note that git-annex benchmark does not fork new git-annex processes when +benchmarking; it calls the command to benchmark internally, and so avoids +git-annex's startup overhead. (So don't try to use it to optimise git-annex +startup.) + +# OPTIONS + +Before the "--" any of the criterion library's command-line options can be +used. + +Any options that git-annex usually accepts can be included after the +command to benchmark. + +# OUTPUT + +The output of the commands being benchmarked goes to standard output and +standard error as usual. It's often a good idea to sink it to /dev/null to +avoid the display of the output skewing the benchmark results. Of course +--quiet can also be used to avoid most git-annex output, as long as you +don't want to benchmark the generation of that output. + +The benchmark report is output to standard output by default, although +criterion options can be used to redirect it to a file. + +# SEE ALSO + +[[git-annex]](1) + +# AUTHOR + +Joey Hess + +Warning: Automatically converted into a man page by mdwn2man. Edit with care. diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 095f2a8169..5295110269 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -730,6 +730,8 @@ subdirectories). This runs git-annex's built-in benchmarks, if it was built with benchmarking support. + + See [[git-annex-benchmark]](1) for details. # COMMON OPTIONS diff --git a/git-annex.cabal b/git-annex.cabal index 8725593171..6af2430150 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -663,6 +663,7 @@ Executable git-annex Backend.URL Backend.Utilities Backend.WORM + Benchmark Build.BundledPrograms Build.Configure Build.DesktopFile @@ -949,6 +950,7 @@ Executable git-annex Types.AdjustedBranch Types.Availability Types.Backend + Types.Benchmark Types.BranchState Types.CleanupActions Types.Command diff --git a/git-annex.hs b/git-annex.hs index e9e8e7bc38..67ed4d27b8 100644 --- a/git-annex.hs +++ b/git-annex.hs @@ -15,6 +15,7 @@ import qualified CmdLine.GitAnnex import qualified CmdLine.GitAnnexShell import qualified CmdLine.GitRemoteTorAnnex import qualified Test +import qualified Benchmark import Utility.FileSystemEncoding #ifdef mingw32_HOST_OS @@ -34,7 +35,7 @@ main = withSocketsDo $ do run ps n = case takeFileName n of "git-annex-shell" -> CmdLine.GitAnnexShell.run ps "git-remote-tor-annex" -> CmdLine.GitRemoteTorAnnex.run ps - _ -> CmdLine.GitAnnex.run Test.optParser Test.runner ps + _ -> CmdLine.GitAnnex.run Test.optParser Test.runner Benchmark.mkGenerator ps #ifdef mingw32_HOST_OS {- On Windows, if HOME is not set, probe it and set it.