new improved benchmark command that can benchmark anything git-annex does

This commit is contained in:
Joey Hess 2019-01-04 13:43:53 -04:00
parent 3b3d31583b
commit 11d6e2e260
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
11 changed files with 184 additions and 123 deletions

53
Benchmark.hs Normal file
View file

@ -0,0 +1,53 @@
{- git-annex benchmark infrastructure
-
- Copyright 2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Benchmark where
import Common
import Types.Benchmark
import Types.Command
import CmdLine.Action
import CmdLine
import CmdLine.GitAnnex.Options
import qualified Annex
import qualified Annex.Branch
import Annex.Action
import qualified Options.Applicative as O
{- Given a list of all git-annex Commands, and the user's input,
- generates an IO action to benchmark that runs the specified
- commands. -}
mkGenerator :: MkBenchmarkGenerator
mkGenerator cmds userinput = do
-- Get the git-annex branch updated, to avoid the overhead of doing
-- so skewing the runtime of the first action that will be
-- benchmarked.
Annex.Branch.commit "benchmarking"
Annex.Branch.update
l <- mapM parsesubcommand $ split [";"] userinput
return $ do
forM_ l $ \(cmd, seek, st) ->
-- The cmd is run for benchmarking without startup or
-- shutdown actions.
Annex.eval st $ performCommandAction cmd seek noop
-- Since the cmd will be run many times, some zombie
-- processes that normally only occur once per command
-- will build up; reap them.
reapZombies
where
-- Simplified versio of CmdLine.dispatch, without support for fuzzy
-- matching or out-of-repo commands.
parsesubcommand ps = do
(cmd, seek, globalconfig) <- liftIO $ O.handleParseResult $
parseCmd "git-annex" "benchmarking" gitAnnexGlobalOptions ps cmds cmdparser
-- Make an entirely separate Annex state for each subcommand,
-- and prepare it to run the cmd.
st <- liftIO . Annex.new =<< Annex.getState Annex.repo
((), st') <- liftIO $ Annex.run st $
prepRunCommand cmd globalconfig
return (cmd, seek, st')

View file

@ -12,7 +12,10 @@ git-annex (7.20181212) UNRELEASED; urgency=medium
used so it will also work with v7 unlocked pointer files. used so it will also work with v7 unlocked pointer files.
* Fix doubled progress display when downloading an url when -J is used. * Fix doubled progress display when downloading an url when -J is used.
* importfeed: Better error message when downloading the feed fails. * importfeed: Better error message when downloading the feed fails.
* Optimised timestamp parser is 10x faster. * Some optimisations, including a 10x faster timestamp parser,
and improved parsing and serialization of git-annex branch data.
* The benchmark command, which only had some old benchmarking of the sqlite
databases before, now allows benchmarking any other git-annex commands.
-- Joey Hess <id@joeyh.name> Tue, 18 Dec 2018 12:24:52 -0400 -- Joey Hess <id@joeyh.name> Tue, 18 Dec 2018 12:24:52 -0400

View file

@ -8,6 +8,8 @@
module CmdLine ( module CmdLine (
dispatch, dispatch,
usage, usage,
parseCmd,
prepRunCommand,
) where ) where
import qualified Options.Applicative as O import qualified Options.Applicative as O
@ -39,13 +41,7 @@ dispatch fuzzyok allargs allcmds globaloptions fields getgitrepo progname progde
(cmd, seek, globalconfig) <- parsewith False cmdparser (cmd, seek, globalconfig) <- parsewith False cmdparser
(\a -> inRepo $ a . Just) (\a -> inRepo $ a . Just)
(liftIO . O.handleParseResult) (liftIO . O.handleParseResult)
when (cmdnomessages cmd) $ do prepRunCommand cmd globalconfig
Annex.setOutput QuietOutput
Annex.changeState $ \s -> s
{ Annex.output = (Annex.output s) { implicitMessages = False } }
getParsed globalconfig
whenM (annexDebug <$> Annex.getGitConfig) $
liftIO enableDebugOutput
startup startup
performCommandAction cmd seek $ performCommandAction cmd seek $
shutdown $ cmdnocommit cmd shutdown $ cmdnocommit cmd
@ -123,3 +119,13 @@ findCmd fuzzyok argv cmds
inexactcmds = case name of inexactcmds = case name of
Nothing -> [] Nothing -> []
Just n -> Git.AutoCorrect.fuzzymatches n cmdname cmds Just n -> Git.AutoCorrect.fuzzymatches n cmdname cmds
prepRunCommand :: Command -> GlobalSetter -> Annex ()
prepRunCommand cmd globalconfig = do
when (cmdnomessages cmd) $ do
Annex.setOutput QuietOutput
Annex.changeState $ \s -> s
{ Annex.output = (Annex.output s) { implicitMessages = False } }
getParsed globalconfig
whenM (annexDebug <$> Annex.getGitConfig) $
liftIO enableDebugOutput

View file

@ -1,6 +1,6 @@
{- git-annex main program {- git-annex main program
- -
- Copyright 2010-2015 Joey Hess <id@joeyh.name> - Copyright 2010-2019 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU GPL version 3 or higher. - Licensed under the GNU GPL version 3 or higher.
-} -}
@ -16,6 +16,7 @@ import Utility.Env
import Annex.Ssh import Annex.Ssh
import Annex.Multicast import Annex.Multicast
import Types.Test import Types.Test
import Types.Benchmark
import qualified Command.Help import qualified Command.Help
import qualified Command.Add import qualified Command.Add
@ -123,8 +124,8 @@ import qualified Command.TestRemote
import qualified Command.Benchmark import qualified Command.Benchmark
#endif #endif
cmds :: Parser TestOptions -> TestRunner -> [Command] cmds :: Parser TestOptions -> TestRunner -> MkBenchmarkGenerator -> [Command]
cmds testoptparser testrunner = cmds testoptparser testrunner mkbenchmarkgenerator =
[ Command.Help.cmd [ Command.Help.cmd
, Command.Add.cmd , Command.Add.cmd
, Command.Get.cmd , Command.Get.cmd
@ -229,15 +230,16 @@ cmds testoptparser testrunner =
, Command.FuzzTest.cmd , Command.FuzzTest.cmd
, Command.TestRemote.cmd , Command.TestRemote.cmd
#ifdef WITH_BENCHMARK #ifdef WITH_BENCHMARK
, Command.Benchmark.cmd , Command.Benchmark.cmd $
mkbenchmarkgenerator $ cmds testoptparser testrunner (\_ _ -> return noop)
#endif #endif
] ]
run :: Parser TestOptions -> TestRunner -> [String] -> IO () run :: Parser TestOptions -> TestRunner -> MkBenchmarkGenerator -> [String] -> IO ()
run testoptparser testrunner args = go envmodes run testoptparser testrunner mkbenchmarkgenerator args = go envmodes
where where
go [] = dispatch True args go [] = dispatch True args
(cmds testoptparser testrunner) (cmds testoptparser testrunner mkbenchmarkgenerator)
gitAnnexGlobalOptions [] Git.CurrentRepo.get gitAnnexGlobalOptions [] Git.CurrentRepo.get
"git-annex" "git-annex"
"manage files with git, without checking their contents in" "manage files with git, without checking their contents in"

View file

@ -1,6 +1,6 @@
{- git-annex benchmark {- git-annex benchmark
- -
- Copyright 2016 Joey Hess <id@joeyh.name> - Copyright 2016-2019 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU GPL version 3 or higher. - Licensed under the GNU GPL version 3 or higher.
-} -}
@ -10,114 +10,26 @@
module Command.Benchmark where module Command.Benchmark where
import Command import Command
import Database.Types import Types.Benchmark
import qualified Database.Keys.SQL as SQL
import qualified Database.Queue as H
import Utility.Tmp
import Git.FilePath
import Criterion.Main import Criterion.Main
import Criterion.Internal (runAndAnalyse) import Criterion.Main.Options (parseWith, Mode)
import Criterion.Monad
import Control.Monad.IO.Class (liftIO)
import Control.Monad
import Control.DeepSeq
import System.FilePath
import System.Random
cmd :: Command cmd :: BenchmarkGenerator -> Command
cmd = noRepo (withParams benchmark) $ cmd generator = command "benchmark" SectionTesting
dontCheck repoExists $ "benchmark git-annex commands"
command "benchmark" SectionTesting
"run benchmarks"
paramNothing paramNothing
(withParams (liftIO . benchmark)) (seek generator <$$> optParser)
benchmark :: CmdParams -> IO () data BenchmarkOptions = BenchmarkOptions CmdParams Mode
benchmark _ = withTmpDirIn "." "benchmark" $ \tmpdir -> do
-- benchmark different sizes of databases
dbs <- mapM (benchDb tmpdir)
[ 1000
, 10000
-- , 100000
]
runCriterion $
bgroup "keys database" $ flip concatMap dbs $ \db ->
[ getAssociatedFilesHitBench db
, getAssociatedFilesMissBench db
, getAssociatedKeyHitBench db
, getAssociatedKeyMissBench db
, addAssociatedFileOldBench db
, addAssociatedFileNewBench db
]
getAssociatedFilesHitBench :: BenchDb -> Benchmark optParser :: CmdParamsDesc -> Parser BenchmarkOptions
getAssociatedFilesHitBench ( BenchDb h num) = bench ("getAssociatedFiles from " ++ show num ++ " (hit)") $ nfIO $ do optParser desc = BenchmarkOptions
n <- getStdRandom (randomR (1,num)) <$> cmdParams desc
SQL.getAssociatedFiles (keyN n) (SQL.ReadHandle h) -- parse criterion's options
<*> parseWith defaultConfig
getAssociatedFilesMissBench :: BenchDb -> Benchmark seek :: BenchmarkGenerator -> BenchmarkOptions -> CommandSeek
getAssociatedFilesMissBench ( BenchDb h num) = bench ("getAssociatedFiles from " ++ show num ++ " (miss)") $ nfIO $ seek generator (BenchmarkOptions ps mode) = do
SQL.getAssociatedFiles keyMiss (SQL.ReadHandle h) runner <- generator ps
liftIO $ runMode mode [ bench (unwords ps) $ nfIO runner ]
getAssociatedKeyHitBench :: BenchDb -> Benchmark
getAssociatedKeyHitBench (BenchDb h num) = bench ("getAssociatedKey from " ++ show num ++ " (hit)") $ nfIO $ do
n <- getStdRandom (randomR (1,num))
SQL.getAssociatedKey (fileN n) (SQL.ReadHandle h)
getAssociatedKeyMissBench :: BenchDb -> Benchmark
getAssociatedKeyMissBench (BenchDb h num) = bench ("getAssociatedKey from " ++ show num ++ " (miss)") $ nfIO $
SQL.getAssociatedKey fileMiss (SQL.ReadHandle h)
addAssociatedFileOldBench :: BenchDb -> Benchmark
addAssociatedFileOldBench ( BenchDb h num) = bench ("addAssociatedFile to " ++ show num ++ " (old)") $ nfIO $ do
n <- getStdRandom (randomR (1,num))
SQL.addAssociatedFile (keyN n) (fileN n) (SQL.WriteHandle h)
H.flushDbQueue h
addAssociatedFileNewBench :: BenchDb -> Benchmark
addAssociatedFileNewBench ( BenchDb h num) = bench ("addAssociatedFile to " ++ show num ++ " (new)") $ nfIO $ do
n <- getStdRandom (randomR (1,num))
SQL.addAssociatedFile (keyN n) (fileN (n+1)) (SQL.WriteHandle h)
H.flushDbQueue h
populateAssociatedFiles :: H.DbQueue -> Int -> IO ()
populateAssociatedFiles h num = do
forM_ [1..num] $ \n ->
SQL.addAssociatedFile (keyN n) (fileN n) (SQL.WriteHandle h)
H.flushDbQueue h
keyN :: Int -> IKey
keyN n = IKey ("key" ++ show n)
fileN :: Int -> TopFilePath
fileN n = asTopFilePath ("file" ++ show n)
keyMiss :: IKey
keyMiss = keyN 0 -- 0 is never stored
fileMiss :: TopFilePath
fileMiss = fileN 0 -- 0 is never stored
data BenchDb = BenchDb H.DbQueue Int
benchDb :: FilePath -> Int -> IO BenchDb
benchDb tmpdir num = do
putStrLn $ "setting up database with " ++ show num
H.initDb f SQL.createTables
h <- H.openDbQueue f SQL.containedTable
populateAssociatedFiles h num
return (BenchDb h num)
where
f = tmpdir </> "db" ++ show num
instance NFData TopFilePath where
rnf = rnf . getTopFilePath
instance NFData IKey where
rnf (IKey s) = rnf s
-- can't use Criterion's defaultMain here because it looks at
-- command-line parameters
runCriterion :: Benchmark -> IO ()
runCriterion = withConfig defaultConfig . runAndAnalyse (const True)

View file

@ -64,8 +64,13 @@ git_annex' command params = do
-- catch all errors, including normally fatal errors -- catch all errors, including normally fatal errors
try run ::IO (Either SomeException ()) try run ::IO (Either SomeException ())
where where
run = GitAnnex.run dummyTestOptParser (\_ -> noop) (command:"-q":params) run = GitAnnex.run dummyTestOptParser
dummyTestRunner
dummyBenchmarkGenerator
(command:"-q":params)
dummyTestOptParser = pure mempty dummyTestOptParser = pure mempty
dummyTestRunner _ = noop
dummyBenchmarkGenerator _ _ = return noop
{- Runs git-annex and returns its output. -} {- Runs git-annex and returns its output. -}
git_annex_output :: String -> [String] -> IO String git_annex_output :: String -> [String] -> IO String

15
Types/Benchmark.hs Normal file
View file

@ -0,0 +1,15 @@
{- git-annex benchmark data types.
-
- Copyright 2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Types.Benchmark where
import Annex
import Types.Command
type BenchmarkGenerator = [String] -> Annex (IO ())
type MkBenchmarkGenerator = [Command] -> BenchmarkGenerator

View file

@ -0,0 +1,60 @@
# NAME
git-annex benchmark - benchmark git-annex commands
# SYNOPSIS
git annex benchmark [criterionopts] -- commmand [; command]
# DESCRIPTION
When git-annex is built with benchmarking support, this command can be used
to benchmark any other git-annex command. For example "git annex benchmark -- get ."
will benchmark "git annex get".
The command being benchmarked is run in the current git-annex repository.
It does not run just once; the benchmarking process will run it several
times to get a statistically meaningful result.
When benchmarking an action like "git annex get", the first run will
often do much more than subseqent runs. To make the benchmark repeat an
action like getting a file each time, additional command can be listed,
separated by ';'. (Note that ';' needs to be escaped from the shell.)
The combined script will be run repeatedly by the benchmark. An example
of using this:
git annex benchmark -- get . ';' drop .
Note that git-annex benchmark does not fork new git-annex processes when
benchmarking; it calls the command to benchmark internally, and so avoids
git-annex's startup overhead. (So don't try to use it to optimise git-annex
startup.)
# OPTIONS
Before the "--" any of the criterion library's command-line options can be
used.
Any options that git-annex usually accepts can be included after the
command to benchmark.
# OUTPUT
The output of the commands being benchmarked goes to standard output and
standard error as usual. It's often a good idea to sink it to /dev/null to
avoid the display of the output skewing the benchmark results. Of course
--quiet can also be used to avoid most git-annex output, as long as you
don't want to benchmark the generation of that output.
The benchmark report is output to standard output by default, although
criterion options can be used to redirect it to a file.
# SEE ALSO
[[git-annex]](1)
# AUTHOR
Joey Hess <id@joeyh.name>
Warning: Automatically converted into a man page by mdwn2man. Edit with care.

View file

@ -731,6 +731,8 @@ subdirectories).
This runs git-annex's built-in benchmarks, if it was built with This runs git-annex's built-in benchmarks, if it was built with
benchmarking support. benchmarking support.
See [[git-annex-benchmark]](1) for details.
# COMMON OPTIONS # COMMON OPTIONS
These common options are accepted by all git-annex commands, and These common options are accepted by all git-annex commands, and

View file

@ -663,6 +663,7 @@ Executable git-annex
Backend.URL Backend.URL
Backend.Utilities Backend.Utilities
Backend.WORM Backend.WORM
Benchmark
Build.BundledPrograms Build.BundledPrograms
Build.Configure Build.Configure
Build.DesktopFile Build.DesktopFile
@ -949,6 +950,7 @@ Executable git-annex
Types.AdjustedBranch Types.AdjustedBranch
Types.Availability Types.Availability
Types.Backend Types.Backend
Types.Benchmark
Types.BranchState Types.BranchState
Types.CleanupActions Types.CleanupActions
Types.Command Types.Command

View file

@ -15,6 +15,7 @@ import qualified CmdLine.GitAnnex
import qualified CmdLine.GitAnnexShell import qualified CmdLine.GitAnnexShell
import qualified CmdLine.GitRemoteTorAnnex import qualified CmdLine.GitRemoteTorAnnex
import qualified Test import qualified Test
import qualified Benchmark
import Utility.FileSystemEncoding import Utility.FileSystemEncoding
#ifdef mingw32_HOST_OS #ifdef mingw32_HOST_OS
@ -34,7 +35,7 @@ main = withSocketsDo $ do
run ps n = case takeFileName n of run ps n = case takeFileName n of
"git-annex-shell" -> CmdLine.GitAnnexShell.run ps "git-annex-shell" -> CmdLine.GitAnnexShell.run ps
"git-remote-tor-annex" -> CmdLine.GitRemoteTorAnnex.run ps "git-remote-tor-annex" -> CmdLine.GitRemoteTorAnnex.run ps
_ -> CmdLine.GitAnnex.run Test.optParser Test.runner ps _ -> CmdLine.GitAnnex.run Test.optParser Test.runner Benchmark.mkGenerator ps
#ifdef mingw32_HOST_OS #ifdef mingw32_HOST_OS
{- On Windows, if HOME is not set, probe it and set it. {- On Windows, if HOME is not set, probe it and set it.