From 25f912de5b116bc1a50904750ecf6b9cb1c72da3 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 29 Oct 2019 15:16:15 -0400 Subject: [PATCH] benchmark: Add --databases to benchmark sqlite databases Rescued from commit 11d6e2e260d70ba99e35464c19c2b2772ce9efaa which removed db benchmarks in favor of benchmarking arbitrary git-annex commands. Which is nice and general, but microbenchmarks are useful too. --- CHANGELOG | 6 ++ Command/Benchmark.hs | 32 ++++++---- Database/Benchmark.hs | 120 +++++++++++++++++++++++++++++++++++ Database/Types.hs | 4 ++ Git/FilePath.hs | 7 +- Types/Benchmark.hs | 11 ++++ Types/Key.hs | 20 ++++-- doc/git-annex-benchmark.mdwn | 10 ++- git-annex.cabal | 1 + 9 files changed, 190 insertions(+), 21 deletions(-) create mode 100644 Database/Benchmark.hs diff --git a/CHANGELOG b/CHANGELOG index 4b19b76896..22c69752e1 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,3 +1,9 @@ +git-annex (7.20191025) UNRELEASED; urgency=medium + + * benchmark: Add --databases to benchmark sqlite databases. + + -- Joey Hess Tue, 29 Oct 2019 15:13:03 -0400 + git-annex (7.20191024) upstream; urgency=medium * Changed git add/git commit -a default behavior back to what it was diff --git a/Command/Benchmark.hs b/Command/Benchmark.hs index 95418c1c0c..7ecbf338d4 100644 --- a/Command/Benchmark.hs +++ b/Command/Benchmark.hs @@ -11,10 +11,11 @@ module Command.Benchmark where import Command import Types.Benchmark - #ifdef WITH_BENCHMARK +import Database.Benchmark + import Criterion.Main -import Criterion.Main.Options (parseWith, Mode) +import Criterion.Main.Options (parseWith) #endif cmd :: BenchmarkGenerator -> Command @@ -23,20 +24,26 @@ cmd generator = command "benchmark" SectionTesting paramNothing (seek generator <$$> optParser) -#ifndef WITH_BENCHMARK -type Mode = () -#endif - -data BenchmarkOptions = BenchmarkOptions CmdParams Mode +data BenchmarkOptions + = BenchmarkOptions CmdParams CriterionMode + | BenchmarkDatabases CriterionMode optParser :: CmdParamsDesc -> Parser BenchmarkOptions -optParser desc = BenchmarkOptions - <$> cmdParams desc +optParser desc = benchmarkoptions <|> benchmarkdatabases + where + benchmarkoptions = BenchmarkOptions + <$> cmdParams desc + <*> criterionopts + benchmarkdatabases = BenchmarkDatabases + <$> criterionopts + <* flag' () + ( long "databases" + <> help "benchmark sqlite databases" + ) #ifdef WITH_BENCHMARK - -- parse criterion's options - <*> parseWith defaultConfig + criterionopts = parseWith defaultConfig #else - <*> pure () + criterionopts = pure () #endif seek :: BenchmarkGenerator -> BenchmarkOptions -> CommandSeek @@ -44,6 +51,7 @@ seek :: BenchmarkGenerator -> BenchmarkOptions -> CommandSeek seek generator (BenchmarkOptions ps mode) = do runner <- generator ps liftIO $ runMode mode [ bench (unwords ps) $ nfIO runner ] +seek _ (BenchmarkDatabases mode) = benchmarkDbs mode #else seek _ _ = giveup "git-annex is not built with benchmarking support" #endif diff --git a/Database/Benchmark.hs b/Database/Benchmark.hs new file mode 100644 index 0000000000..7f0929757f --- /dev/null +++ b/Database/Benchmark.hs @@ -0,0 +1,120 @@ +{- git-annex database benchmarks + - + - Copyright 2016-2019 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} +{-# LANGUAGE OverloadedStrings #-} + +module Database.Benchmark (benchmarkDbs) where + +import Annex.Common +import Types.Benchmark +#ifdef WITH_BENCHMARK +import qualified Database.Keys.SQL as SQL +import qualified Database.Queue as H +import Database.Init +import Database.Types +import Utility.Tmp.Dir +import Git.FilePath +import Types.Key + +import Criterion.Main +import Control.Monad.IO.Class (liftIO) +import qualified Data.ByteString.Char8 as B8 +import System.Random +#endif + +benchmarkDbs :: CriterionMode -> Annex () +#ifdef WITH_BENCHMARK +benchmarkDbs mode = withTmpDirIn "." "benchmark" $ \tmpdir -> do + -- benchmark different sizes of databases + dbs <- mapM (benchDb tmpdir) + [ 1000 + , 10000 + -- , 100000 + ] + liftIO $ runMode mode + [ bgroup "keys database" $ flip concatMap dbs $ \db -> + [ getAssociatedFilesHitBench db + , getAssociatedFilesMissBench db + , getAssociatedKeyHitBench db + , getAssociatedKeyMissBench db + , addAssociatedFileOldBench db + , addAssociatedFileNewBench db + ] + ] +#else +benchmarkDbs _ = error "not built with criterion, cannot benchmark" +#endif + +#ifdef WITH_BENCHMARK + +getAssociatedFilesHitBench :: BenchDb -> Benchmark +getAssociatedFilesHitBench (BenchDb h num) = bench ("getAssociatedFiles from " ++ show num ++ " (hit)") $ nfIO $ do + n <- getStdRandom (randomR (1,num)) + SQL.getAssociatedFiles (toIKey (keyN n)) (SQL.ReadHandle h) + +getAssociatedFilesMissBench :: BenchDb -> Benchmark +getAssociatedFilesMissBench (BenchDb h num) = bench ("getAssociatedFiles from " ++ show num ++ " (miss)") $ nfIO $ + SQL.getAssociatedFiles (toIKey keyMiss) (SQL.ReadHandle h) + +getAssociatedKeyHitBench :: BenchDb -> Benchmark +getAssociatedKeyHitBench (BenchDb h num) = bench ("getAssociatedKey from " ++ show num ++ " (hit)") $ nfIO $ do + n <- getStdRandom (randomR (1,num)) + -- fromIKey because this ends up being used to get a Key + map fromIKey <$> SQL.getAssociatedKey (fileN n) (SQL.ReadHandle h) + +getAssociatedKeyMissBench :: BenchDb -> Benchmark +getAssociatedKeyMissBench (BenchDb h num) = bench ("getAssociatedKey from " ++ show num ++ " (miss)") $ nfIO $ + -- fromIKey because this ends up being used to get a Key + map fromIKey <$> SQL.getAssociatedKey fileMiss (SQL.ReadHandle h) + +addAssociatedFileOldBench :: BenchDb -> Benchmark +addAssociatedFileOldBench (BenchDb h num) = bench ("addAssociatedFile to " ++ show num ++ " (old)") $ nfIO $ do + n <- getStdRandom (randomR (1,num)) + SQL.addAssociatedFile (toIKey (keyN n)) (fileN n) (SQL.WriteHandle h) + H.flushDbQueue h + +addAssociatedFileNewBench :: BenchDb -> Benchmark +addAssociatedFileNewBench (BenchDb h num) = bench ("addAssociatedFile to " ++ show num ++ " (new)") $ nfIO $ do + n <- getStdRandom (randomR (1,num)) + SQL.addAssociatedFile (toIKey (keyN n)) (fileN (n+1)) (SQL.WriteHandle h) + H.flushDbQueue h + +populateAssociatedFiles :: H.DbQueue -> Int -> IO () +populateAssociatedFiles h num = do + forM_ [1..num] $ \n -> + SQL.addAssociatedFile (toIKey (keyN n)) (fileN n) (SQL.WriteHandle h) + H.flushDbQueue h + +keyN :: Int -> Key +keyN n = stubKey + { keyName = B8.pack $ "key" ++ show n + , keyVariety = OtherKey "BENCH" + } + +fileN :: Int -> TopFilePath +fileN n = asTopFilePath ("file" ++ show n) + +keyMiss :: Key +keyMiss = keyN 0 -- 0 is never stored + +fileMiss :: TopFilePath +fileMiss = fileN 0 -- 0 is never stored + +data BenchDb = BenchDb H.DbQueue Int + +benchDb :: FilePath -> Int -> Annex BenchDb +benchDb tmpdir num = do + liftIO $ putStrLn $ "setting up database with " ++ show num + initDb db SQL.createTables + h <- liftIO $ H.openDbQueue H.MultiWriter db SQL.containedTable + liftIO $ populateAssociatedFiles h num + return (BenchDb h num) + where + db = tmpdir show num "db" + +#endif /* WITH_BENCHMARK */ diff --git a/Database/Types.hs b/Database/Types.hs index f08cf4e9d8..e86fd30601 100644 --- a/Database/Types.hs +++ b/Database/Types.hs @@ -17,6 +17,7 @@ import Data.Maybe import Data.Char import qualified Data.ByteString as S import qualified Data.Text as T +import Control.DeepSeq import Utility.PartialPrelude import Key @@ -41,6 +42,9 @@ derivePersistField "SKey" -- work when it's used in any kind of complex data structure. newtype IKey = IKey String +instance NFData IKey where + rnf (IKey s) = rnf s + instance Read IKey where readsPrec _ s = [(IKey s, "")] diff --git a/Git/FilePath.hs b/Git/FilePath.hs index a394e1ccd6..f0c3b69ed7 100644 --- a/Git/FilePath.hs +++ b/Git/FilePath.hs @@ -11,6 +11,7 @@ -} {-# LANGUAGE CPP #-} +{-# LANGUAGE DeriveGeneric #-} module Git.FilePath ( TopFilePath, @@ -30,10 +31,14 @@ import Common import Git import qualified System.FilePath.Posix +import GHC.Generics +import Control.DeepSeq {- A FilePath, relative to the top of the git repository. -} newtype TopFilePath = TopFilePath { getTopFilePath :: FilePath } - deriving (Show, Eq, Ord) + deriving (Show, Eq, Ord, Generic) + +instance NFData TopFilePath {- A file in a branch or other treeish. -} data BranchFilePath = BranchFilePath Ref TopFilePath diff --git a/Types/Benchmark.hs b/Types/Benchmark.hs index 379382ddc3..050b725745 100644 --- a/Types/Benchmark.hs +++ b/Types/Benchmark.hs @@ -5,11 +5,22 @@ - Licensed under the GNU AGPL version 3 or higher. -} +{-# LANGUAGE CPP #-} + module Types.Benchmark where import Annex import Types.Command +#ifdef WITH_BENCHMARK +import Criterion.Main.Options (Mode) +#endif type BenchmarkGenerator = [String] -> Annex (IO ()) type MkBenchmarkGenerator = [Command] -> BenchmarkGenerator + +#ifdef WITH_BENCHMARK +type CriterionMode = Mode +#else +type CriterionMode = () +#endif diff --git a/Types/Key.hs b/Types/Key.hs index 400d0ecbb7..0d751bd736 100644 --- a/Types/Key.hs +++ b/Types/Key.hs @@ -5,7 +5,7 @@ - Licensed under the GNU AGPL version 3 or higher. -} -{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE OverloadedStrings, DeriveGeneric #-} module Types.Key where @@ -13,6 +13,8 @@ import qualified Data.ByteString as S import qualified Data.ByteString.Char8 as S8 import System.Posix.Types import Data.Monoid +import GHC.Generics +import Control.DeepSeq import Prelude {- A Key has a unique name, which is derived from a particular backend, @@ -24,7 +26,9 @@ data Key = Key , keyMtime :: Maybe EpochTime , keyChunkSize :: Maybe Integer , keyChunkNum :: Maybe Integer - } deriving (Eq, Ord, Read, Show) + } deriving (Eq, Ord, Read, Show, Generic) + +instance NFData Key {- A filename may be associated with a Key. -} newtype AssociatedFile = AssociatedFile (Maybe FilePath) @@ -46,15 +50,21 @@ data KeyVariety -- Some repositories may contain keys of other varieties, -- which can still be processed to some extent. | OtherKey S.ByteString - deriving (Eq, Ord, Read, Show) + deriving (Eq, Ord, Read, Show, Generic) + +instance NFData KeyVariety {- Some varieties of keys may contain an extension at the end of the - keyName -} newtype HasExt = HasExt Bool - deriving (Eq, Ord, Read, Show) + deriving (Eq, Ord, Read, Show, Generic) + +instance NFData HasExt newtype HashSize = HashSize Int - deriving (Eq, Ord, Read, Show) + deriving (Eq, Ord, Read, Show, Generic) + +instance NFData HashSize hasExt :: KeyVariety -> Bool hasExt (SHA2Key _ (HasExt b)) = b diff --git a/doc/git-annex-benchmark.mdwn b/doc/git-annex-benchmark.mdwn index 72bbc38572..66f9c65258 100644 --- a/doc/git-annex-benchmark.mdwn +++ b/doc/git-annex-benchmark.mdwn @@ -4,12 +4,13 @@ git-annex benchmark - benchmark git-annex commands # SYNOPSIS -git annex benchmark [criterionopts] -- commmand [; command] +git annex benchmark [criterionopts] ( -- commmand [; command] | --databases ) # DESCRIPTION When git-annex is built with benchmarking support, this command can be used -to benchmark any other git-annex command. For example "git annex benchmark -- get ." +to benchmark any other git-annex command. For example +"git annex benchmark -- get ." will benchmark "git annex get". The command being benchmarked is run in the current git-annex repository. @@ -18,7 +19,7 @@ times to get a statistically meaningful result. When benchmarking an action like "git annex get", the first run will often do much more than subseqent runs. To make the benchmark repeat an -action like getting a file each time, additional command can be listed, +action like getting a file each time, additional commands can be listed, separated by ';'. (Note that ';' needs to be escaped from the shell.) The combined script will be run repeatedly by the benchmark. An example of using this: @@ -38,6 +39,9 @@ used. Any options that git-annex usually accepts can be included after the command to benchmark. +The --databases option benchmark's git-annex's use of sqlite databases, +instead of a command. + # OUTPUT The output of the commands being benchmarked goes to standard output and diff --git a/git-annex.cabal b/git-annex.cabal index 430910a0ec..1f18d6dca1 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -813,6 +813,7 @@ Executable git-annex Config.Smudge Creds Crypto + Database.Benchmark Database.ContentIdentifier Database.Export Database.Fsck