new improved benchmark command that can benchmark anything git-annex does

This commit is contained in:
Joey Hess 2019-01-04 13:43:53 -04:00
parent 3b3d31583b
commit 11d6e2e260
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
11 changed files with 184 additions and 123 deletions

53
Benchmark.hs Normal file
View file

@ -0,0 +1,53 @@
{- git-annex benchmark infrastructure
-
- Copyright 2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Benchmark where
import Common
import Types.Benchmark
import Types.Command
import CmdLine.Action
import CmdLine
import CmdLine.GitAnnex.Options
import qualified Annex
import qualified Annex.Branch
import Annex.Action
import qualified Options.Applicative as O
{- Given a list of all git-annex Commands, and the user's input,
- generates an IO action to benchmark that runs the specified
- commands. -}
mkGenerator :: MkBenchmarkGenerator
mkGenerator cmds userinput = do
-- Get the git-annex branch updated, to avoid the overhead of doing
-- so skewing the runtime of the first action that will be
-- benchmarked.
Annex.Branch.commit "benchmarking"
Annex.Branch.update
l <- mapM parsesubcommand $ split [";"] userinput
return $ do
forM_ l $ \(cmd, seek, st) ->
-- The cmd is run for benchmarking without startup or
-- shutdown actions.
Annex.eval st $ performCommandAction cmd seek noop
-- Since the cmd will be run many times, some zombie
-- processes that normally only occur once per command
-- will build up; reap them.
reapZombies
where
-- Simplified versio of CmdLine.dispatch, without support for fuzzy
-- matching or out-of-repo commands.
parsesubcommand ps = do
(cmd, seek, globalconfig) <- liftIO $ O.handleParseResult $
parseCmd "git-annex" "benchmarking" gitAnnexGlobalOptions ps cmds cmdparser
-- Make an entirely separate Annex state for each subcommand,
-- and prepare it to run the cmd.
st <- liftIO . Annex.new =<< Annex.getState Annex.repo
((), st') <- liftIO $ Annex.run st $
prepRunCommand cmd globalconfig
return (cmd, seek, st')

View file

@ -12,7 +12,10 @@ git-annex (7.20181212) UNRELEASED; urgency=medium
used so it will also work with v7 unlocked pointer files.
* Fix doubled progress display when downloading an url when -J is used.
* importfeed: Better error message when downloading the feed fails.
* Optimised timestamp parser is 10x faster.
* Some optimisations, including a 10x faster timestamp parser,
and improved parsing and serialization of git-annex branch data.
* The benchmark command, which only had some old benchmarking of the sqlite
databases before, now allows benchmarking any other git-annex commands.
-- Joey Hess <id@joeyh.name> Tue, 18 Dec 2018 12:24:52 -0400

View file

@ -8,6 +8,8 @@
module CmdLine (
dispatch,
usage,
parseCmd,
prepRunCommand,
) where
import qualified Options.Applicative as O
@ -39,13 +41,7 @@ dispatch fuzzyok allargs allcmds globaloptions fields getgitrepo progname progde
(cmd, seek, globalconfig) <- parsewith False cmdparser
(\a -> inRepo $ a . Just)
(liftIO . O.handleParseResult)
when (cmdnomessages cmd) $ do
Annex.setOutput QuietOutput
Annex.changeState $ \s -> s
{ Annex.output = (Annex.output s) { implicitMessages = False } }
getParsed globalconfig
whenM (annexDebug <$> Annex.getGitConfig) $
liftIO enableDebugOutput
prepRunCommand cmd globalconfig
startup
performCommandAction cmd seek $
shutdown $ cmdnocommit cmd
@ -123,3 +119,13 @@ findCmd fuzzyok argv cmds
inexactcmds = case name of
Nothing -> []
Just n -> Git.AutoCorrect.fuzzymatches n cmdname cmds
prepRunCommand :: Command -> GlobalSetter -> Annex ()
prepRunCommand cmd globalconfig = do
when (cmdnomessages cmd) $ do
Annex.setOutput QuietOutput
Annex.changeState $ \s -> s
{ Annex.output = (Annex.output s) { implicitMessages = False } }
getParsed globalconfig
whenM (annexDebug <$> Annex.getGitConfig) $
liftIO enableDebugOutput

View file

@ -1,6 +1,6 @@
{- git-annex main program
-
- Copyright 2010-2015 Joey Hess <id@joeyh.name>
- Copyright 2010-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@ -16,6 +16,7 @@ import Utility.Env
import Annex.Ssh
import Annex.Multicast
import Types.Test
import Types.Benchmark
import qualified Command.Help
import qualified Command.Add
@ -123,8 +124,8 @@ import qualified Command.TestRemote
import qualified Command.Benchmark
#endif
cmds :: Parser TestOptions -> TestRunner -> [Command]
cmds testoptparser testrunner =
cmds :: Parser TestOptions -> TestRunner -> MkBenchmarkGenerator -> [Command]
cmds testoptparser testrunner mkbenchmarkgenerator =
[ Command.Help.cmd
, Command.Add.cmd
, Command.Get.cmd
@ -229,15 +230,16 @@ cmds testoptparser testrunner =
, Command.FuzzTest.cmd
, Command.TestRemote.cmd
#ifdef WITH_BENCHMARK
, Command.Benchmark.cmd
, Command.Benchmark.cmd $
mkbenchmarkgenerator $ cmds testoptparser testrunner (\_ _ -> return noop)
#endif
]
run :: Parser TestOptions -> TestRunner -> [String] -> IO ()
run testoptparser testrunner args = go envmodes
run :: Parser TestOptions -> TestRunner -> MkBenchmarkGenerator -> [String] -> IO ()
run testoptparser testrunner mkbenchmarkgenerator args = go envmodes
where
go [] = dispatch True args
(cmds testoptparser testrunner)
(cmds testoptparser testrunner mkbenchmarkgenerator)
gitAnnexGlobalOptions [] Git.CurrentRepo.get
"git-annex"
"manage files with git, without checking their contents in"

View file

@ -1,6 +1,6 @@
{- git-annex benchmark
-
- Copyright 2016 Joey Hess <id@joeyh.name>
- Copyright 2016-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@ -10,114 +10,26 @@
module Command.Benchmark where
import Command
import Database.Types
import qualified Database.Keys.SQL as SQL
import qualified Database.Queue as H
import Utility.Tmp
import Git.FilePath
import Types.Benchmark
import Criterion.Main
import Criterion.Internal (runAndAnalyse)
import Criterion.Monad
import Control.Monad.IO.Class (liftIO)
import Control.Monad
import Control.DeepSeq
import System.FilePath
import System.Random
import Criterion.Main.Options (parseWith, Mode)
cmd :: Command
cmd = noRepo (withParams benchmark) $
dontCheck repoExists $
command "benchmark" SectionTesting
"run benchmarks"
cmd :: BenchmarkGenerator -> Command
cmd generator = command "benchmark" SectionTesting
"benchmark git-annex commands"
paramNothing
(withParams (liftIO . benchmark))
(seek generator <$$> optParser)
benchmark :: CmdParams -> IO ()
benchmark _ = withTmpDirIn "." "benchmark" $ \tmpdir -> do
-- benchmark different sizes of databases
dbs <- mapM (benchDb tmpdir)
[ 1000
, 10000
-- , 100000
]
runCriterion $
bgroup "keys database" $ flip concatMap dbs $ \db ->
[ getAssociatedFilesHitBench db
, getAssociatedFilesMissBench db
, getAssociatedKeyHitBench db
, getAssociatedKeyMissBench db
, addAssociatedFileOldBench db
, addAssociatedFileNewBench db
]
data BenchmarkOptions = BenchmarkOptions CmdParams Mode
getAssociatedFilesHitBench :: BenchDb -> Benchmark
getAssociatedFilesHitBench ( BenchDb h num) = bench ("getAssociatedFiles from " ++ show num ++ " (hit)") $ nfIO $ do
n <- getStdRandom (randomR (1,num))
SQL.getAssociatedFiles (keyN n) (SQL.ReadHandle h)
optParser :: CmdParamsDesc -> Parser BenchmarkOptions
optParser desc = BenchmarkOptions
<$> cmdParams desc
-- parse criterion's options
<*> parseWith defaultConfig
getAssociatedFilesMissBench :: BenchDb -> Benchmark
getAssociatedFilesMissBench ( BenchDb h num) = bench ("getAssociatedFiles from " ++ show num ++ " (miss)") $ nfIO $
SQL.getAssociatedFiles keyMiss (SQL.ReadHandle h)
getAssociatedKeyHitBench :: BenchDb -> Benchmark
getAssociatedKeyHitBench (BenchDb h num) = bench ("getAssociatedKey from " ++ show num ++ " (hit)") $ nfIO $ do
n <- getStdRandom (randomR (1,num))
SQL.getAssociatedKey (fileN n) (SQL.ReadHandle h)
getAssociatedKeyMissBench :: BenchDb -> Benchmark
getAssociatedKeyMissBench (BenchDb h num) = bench ("getAssociatedKey from " ++ show num ++ " (miss)") $ nfIO $
SQL.getAssociatedKey fileMiss (SQL.ReadHandle h)
addAssociatedFileOldBench :: BenchDb -> Benchmark
addAssociatedFileOldBench ( BenchDb h num) = bench ("addAssociatedFile to " ++ show num ++ " (old)") $ nfIO $ do
n <- getStdRandom (randomR (1,num))
SQL.addAssociatedFile (keyN n) (fileN n) (SQL.WriteHandle h)
H.flushDbQueue h
addAssociatedFileNewBench :: BenchDb -> Benchmark
addAssociatedFileNewBench ( BenchDb h num) = bench ("addAssociatedFile to " ++ show num ++ " (new)") $ nfIO $ do
n <- getStdRandom (randomR (1,num))
SQL.addAssociatedFile (keyN n) (fileN (n+1)) (SQL.WriteHandle h)
H.flushDbQueue h
populateAssociatedFiles :: H.DbQueue -> Int -> IO ()
populateAssociatedFiles h num = do
forM_ [1..num] $ \n ->
SQL.addAssociatedFile (keyN n) (fileN n) (SQL.WriteHandle h)
H.flushDbQueue h
keyN :: Int -> IKey
keyN n = IKey ("key" ++ show n)
fileN :: Int -> TopFilePath
fileN n = asTopFilePath ("file" ++ show n)
keyMiss :: IKey
keyMiss = keyN 0 -- 0 is never stored
fileMiss :: TopFilePath
fileMiss = fileN 0 -- 0 is never stored
data BenchDb = BenchDb H.DbQueue Int
benchDb :: FilePath -> Int -> IO BenchDb
benchDb tmpdir num = do
putStrLn $ "setting up database with " ++ show num
H.initDb f SQL.createTables
h <- H.openDbQueue f SQL.containedTable
populateAssociatedFiles h num
return (BenchDb h num)
where
f = tmpdir </> "db" ++ show num
instance NFData TopFilePath where
rnf = rnf . getTopFilePath
instance NFData IKey where
rnf (IKey s) = rnf s
-- can't use Criterion's defaultMain here because it looks at
-- command-line parameters
runCriterion :: Benchmark -> IO ()
runCriterion = withConfig defaultConfig . runAndAnalyse (const True)
seek :: BenchmarkGenerator -> BenchmarkOptions -> CommandSeek
seek generator (BenchmarkOptions ps mode) = do
runner <- generator ps
liftIO $ runMode mode [ bench (unwords ps) $ nfIO runner ]

View file

@ -64,8 +64,13 @@ git_annex' command params = do
-- catch all errors, including normally fatal errors
try run ::IO (Either SomeException ())
where
run = GitAnnex.run dummyTestOptParser (\_ -> noop) (command:"-q":params)
run = GitAnnex.run dummyTestOptParser
dummyTestRunner
dummyBenchmarkGenerator
(command:"-q":params)
dummyTestOptParser = pure mempty
dummyTestRunner _ = noop
dummyBenchmarkGenerator _ _ = return noop
{- Runs git-annex and returns its output. -}
git_annex_output :: String -> [String] -> IO String

15
Types/Benchmark.hs Normal file
View file

@ -0,0 +1,15 @@
{- git-annex benchmark data types.
-
- Copyright 2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Types.Benchmark where
import Annex
import Types.Command
type BenchmarkGenerator = [String] -> Annex (IO ())
type MkBenchmarkGenerator = [Command] -> BenchmarkGenerator

View file

@ -0,0 +1,60 @@
# NAME
git-annex benchmark - benchmark git-annex commands
# SYNOPSIS
git annex benchmark [criterionopts] -- commmand [; command]
# DESCRIPTION
When git-annex is built with benchmarking support, this command can be used
to benchmark any other git-annex command. For example "git annex benchmark -- get ."
will benchmark "git annex get".
The command being benchmarked is run in the current git-annex repository.
It does not run just once; the benchmarking process will run it several
times to get a statistically meaningful result.
When benchmarking an action like "git annex get", the first run will
often do much more than subseqent runs. To make the benchmark repeat an
action like getting a file each time, additional command can be listed,
separated by ';'. (Note that ';' needs to be escaped from the shell.)
The combined script will be run repeatedly by the benchmark. An example
of using this:
git annex benchmark -- get . ';' drop .
Note that git-annex benchmark does not fork new git-annex processes when
benchmarking; it calls the command to benchmark internally, and so avoids
git-annex's startup overhead. (So don't try to use it to optimise git-annex
startup.)
# OPTIONS
Before the "--" any of the criterion library's command-line options can be
used.
Any options that git-annex usually accepts can be included after the
command to benchmark.
# OUTPUT
The output of the commands being benchmarked goes to standard output and
standard error as usual. It's often a good idea to sink it to /dev/null to
avoid the display of the output skewing the benchmark results. Of course
--quiet can also be used to avoid most git-annex output, as long as you
don't want to benchmark the generation of that output.
The benchmark report is output to standard output by default, although
criterion options can be used to redirect it to a file.
# SEE ALSO
[[git-annex]](1)
# AUTHOR
Joey Hess <id@joeyh.name>
Warning: Automatically converted into a man page by mdwn2man. Edit with care.

View file

@ -731,6 +731,8 @@ subdirectories).
This runs git-annex's built-in benchmarks, if it was built with
benchmarking support.
See [[git-annex-benchmark]](1) for details.
# COMMON OPTIONS
These common options are accepted by all git-annex commands, and

View file

@ -663,6 +663,7 @@ Executable git-annex
Backend.URL
Backend.Utilities
Backend.WORM
Benchmark
Build.BundledPrograms
Build.Configure
Build.DesktopFile
@ -949,6 +950,7 @@ Executable git-annex
Types.AdjustedBranch
Types.Availability
Types.Backend
Types.Benchmark
Types.BranchState
Types.CleanupActions
Types.Command

View file

@ -15,6 +15,7 @@ import qualified CmdLine.GitAnnex
import qualified CmdLine.GitAnnexShell
import qualified CmdLine.GitRemoteTorAnnex
import qualified Test
import qualified Benchmark
import Utility.FileSystemEncoding
#ifdef mingw32_HOST_OS
@ -34,7 +35,7 @@ main = withSocketsDo $ do
run ps n = case takeFileName n of
"git-annex-shell" -> CmdLine.GitAnnexShell.run ps
"git-remote-tor-annex" -> CmdLine.GitRemoteTorAnnex.run ps
_ -> CmdLine.GitAnnex.run Test.optParser Test.runner ps
_ -> CmdLine.GitAnnex.run Test.optParser Test.runner Benchmark.mkGenerator ps
#ifdef mingw32_HOST_OS
{- On Windows, if HOME is not set, probe it and set it.