benchmark: Add --databases to benchmark sqlite databases

Rescued from commit 11d6e2e260 which removed
db benchmarks in favor of benchmarking arbitrary git-annex commands. Which
is nice and general, but microbenchmarks are useful too.
This commit is contained in:
Joey Hess 2019-10-29 15:16:15 -04:00
parent 27e10fdbd7
commit 25f912de5b
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
9 changed files with 190 additions and 21 deletions

View file

@ -1,3 +1,9 @@
git-annex (7.20191025) UNRELEASED; urgency=medium
* benchmark: Add --databases to benchmark sqlite databases.
-- Joey Hess <id@joeyh.name> Tue, 29 Oct 2019 15:13:03 -0400
git-annex (7.20191024) upstream; urgency=medium
* Changed git add/git commit -a default behavior back to what it was

View file

@ -11,10 +11,11 @@ module Command.Benchmark where
import Command
import Types.Benchmark
#ifdef WITH_BENCHMARK
import Database.Benchmark
import Criterion.Main
import Criterion.Main.Options (parseWith, Mode)
import Criterion.Main.Options (parseWith)
#endif
cmd :: BenchmarkGenerator -> Command
@ -23,20 +24,26 @@ cmd generator = command "benchmark" SectionTesting
paramNothing
(seek generator <$$> optParser)
#ifndef WITH_BENCHMARK
type Mode = ()
#endif
data BenchmarkOptions = BenchmarkOptions CmdParams Mode
data BenchmarkOptions
= BenchmarkOptions CmdParams CriterionMode
| BenchmarkDatabases CriterionMode
optParser :: CmdParamsDesc -> Parser BenchmarkOptions
optParser desc = BenchmarkOptions
<$> cmdParams desc
optParser desc = benchmarkoptions <|> benchmarkdatabases
where
benchmarkoptions = BenchmarkOptions
<$> cmdParams desc
<*> criterionopts
benchmarkdatabases = BenchmarkDatabases
<$> criterionopts
<* flag' ()
( long "databases"
<> help "benchmark sqlite databases"
)
#ifdef WITH_BENCHMARK
-- parse criterion's options
<*> parseWith defaultConfig
criterionopts = parseWith defaultConfig
#else
<*> pure ()
criterionopts = pure ()
#endif
seek :: BenchmarkGenerator -> BenchmarkOptions -> CommandSeek
@ -44,6 +51,7 @@ seek :: BenchmarkGenerator -> BenchmarkOptions -> CommandSeek
seek generator (BenchmarkOptions ps mode) = do
runner <- generator ps
liftIO $ runMode mode [ bench (unwords ps) $ nfIO runner ]
seek _ (BenchmarkDatabases mode) = benchmarkDbs mode
#else
seek _ _ = giveup "git-annex is not built with benchmarking support"
#endif

120
Database/Benchmark.hs Normal file
View file

@ -0,0 +1,120 @@
{- git-annex database benchmarks
-
- Copyright 2016-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE OverloadedStrings #-}
module Database.Benchmark (benchmarkDbs) where
import Annex.Common
import Types.Benchmark
#ifdef WITH_BENCHMARK
import qualified Database.Keys.SQL as SQL
import qualified Database.Queue as H
import Database.Init
import Database.Types
import Utility.Tmp.Dir
import Git.FilePath
import Types.Key
import Criterion.Main
import Control.Monad.IO.Class (liftIO)
import qualified Data.ByteString.Char8 as B8
import System.Random
#endif
benchmarkDbs :: CriterionMode -> Annex ()
#ifdef WITH_BENCHMARK
benchmarkDbs mode = withTmpDirIn "." "benchmark" $ \tmpdir -> do
-- benchmark different sizes of databases
dbs <- mapM (benchDb tmpdir)
[ 1000
, 10000
-- , 100000
]
liftIO $ runMode mode
[ bgroup "keys database" $ flip concatMap dbs $ \db ->
[ getAssociatedFilesHitBench db
, getAssociatedFilesMissBench db
, getAssociatedKeyHitBench db
, getAssociatedKeyMissBench db
, addAssociatedFileOldBench db
, addAssociatedFileNewBench db
]
]
#else
benchmarkDbs _ = error "not built with criterion, cannot benchmark"
#endif
#ifdef WITH_BENCHMARK
getAssociatedFilesHitBench :: BenchDb -> Benchmark
getAssociatedFilesHitBench (BenchDb h num) = bench ("getAssociatedFiles from " ++ show num ++ " (hit)") $ nfIO $ do
n <- getStdRandom (randomR (1,num))
SQL.getAssociatedFiles (toIKey (keyN n)) (SQL.ReadHandle h)
getAssociatedFilesMissBench :: BenchDb -> Benchmark
getAssociatedFilesMissBench (BenchDb h num) = bench ("getAssociatedFiles from " ++ show num ++ " (miss)") $ nfIO $
SQL.getAssociatedFiles (toIKey keyMiss) (SQL.ReadHandle h)
getAssociatedKeyHitBench :: BenchDb -> Benchmark
getAssociatedKeyHitBench (BenchDb h num) = bench ("getAssociatedKey from " ++ show num ++ " (hit)") $ nfIO $ do
n <- getStdRandom (randomR (1,num))
-- fromIKey because this ends up being used to get a Key
map fromIKey <$> SQL.getAssociatedKey (fileN n) (SQL.ReadHandle h)
getAssociatedKeyMissBench :: BenchDb -> Benchmark
getAssociatedKeyMissBench (BenchDb h num) = bench ("getAssociatedKey from " ++ show num ++ " (miss)") $ nfIO $
-- fromIKey because this ends up being used to get a Key
map fromIKey <$> SQL.getAssociatedKey fileMiss (SQL.ReadHandle h)
addAssociatedFileOldBench :: BenchDb -> Benchmark
addAssociatedFileOldBench (BenchDb h num) = bench ("addAssociatedFile to " ++ show num ++ " (old)") $ nfIO $ do
n <- getStdRandom (randomR (1,num))
SQL.addAssociatedFile (toIKey (keyN n)) (fileN n) (SQL.WriteHandle h)
H.flushDbQueue h
addAssociatedFileNewBench :: BenchDb -> Benchmark
addAssociatedFileNewBench (BenchDb h num) = bench ("addAssociatedFile to " ++ show num ++ " (new)") $ nfIO $ do
n <- getStdRandom (randomR (1,num))
SQL.addAssociatedFile (toIKey (keyN n)) (fileN (n+1)) (SQL.WriteHandle h)
H.flushDbQueue h
populateAssociatedFiles :: H.DbQueue -> Int -> IO ()
populateAssociatedFiles h num = do
forM_ [1..num] $ \n ->
SQL.addAssociatedFile (toIKey (keyN n)) (fileN n) (SQL.WriteHandle h)
H.flushDbQueue h
keyN :: Int -> Key
keyN n = stubKey
{ keyName = B8.pack $ "key" ++ show n
, keyVariety = OtherKey "BENCH"
}
fileN :: Int -> TopFilePath
fileN n = asTopFilePath ("file" ++ show n)
keyMiss :: Key
keyMiss = keyN 0 -- 0 is never stored
fileMiss :: TopFilePath
fileMiss = fileN 0 -- 0 is never stored
data BenchDb = BenchDb H.DbQueue Int
benchDb :: FilePath -> Int -> Annex BenchDb
benchDb tmpdir num = do
liftIO $ putStrLn $ "setting up database with " ++ show num
initDb db SQL.createTables
h <- liftIO $ H.openDbQueue H.MultiWriter db SQL.containedTable
liftIO $ populateAssociatedFiles h num
return (BenchDb h num)
where
db = tmpdir </> show num </> "db"
#endif /* WITH_BENCHMARK */

View file

@ -17,6 +17,7 @@ import Data.Maybe
import Data.Char
import qualified Data.ByteString as S
import qualified Data.Text as T
import Control.DeepSeq
import Utility.PartialPrelude
import Key
@ -41,6 +42,9 @@ derivePersistField "SKey"
-- work when it's used in any kind of complex data structure.
newtype IKey = IKey String
instance NFData IKey where
rnf (IKey s) = rnf s
instance Read IKey where
readsPrec _ s = [(IKey s, "")]

View file

@ -11,6 +11,7 @@
-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE DeriveGeneric #-}
module Git.FilePath (
TopFilePath,
@ -30,10 +31,14 @@ import Common
import Git
import qualified System.FilePath.Posix
import GHC.Generics
import Control.DeepSeq
{- A FilePath, relative to the top of the git repository. -}
newtype TopFilePath = TopFilePath { getTopFilePath :: FilePath }
deriving (Show, Eq, Ord)
deriving (Show, Eq, Ord, Generic)
instance NFData TopFilePath
{- A file in a branch or other treeish. -}
data BranchFilePath = BranchFilePath Ref TopFilePath

View file

@ -5,11 +5,22 @@
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Types.Benchmark where
import Annex
import Types.Command
#ifdef WITH_BENCHMARK
import Criterion.Main.Options (Mode)
#endif
type BenchmarkGenerator = [String] -> Annex (IO ())
type MkBenchmarkGenerator = [Command] -> BenchmarkGenerator
#ifdef WITH_BENCHMARK
type CriterionMode = Mode
#else
type CriterionMode = ()
#endif

View file

@ -5,7 +5,7 @@
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE OverloadedStrings, DeriveGeneric #-}
module Types.Key where
@ -13,6 +13,8 @@ import qualified Data.ByteString as S
import qualified Data.ByteString.Char8 as S8
import System.Posix.Types
import Data.Monoid
import GHC.Generics
import Control.DeepSeq
import Prelude
{- A Key has a unique name, which is derived from a particular backend,
@ -24,7 +26,9 @@ data Key = Key
, keyMtime :: Maybe EpochTime
, keyChunkSize :: Maybe Integer
, keyChunkNum :: Maybe Integer
} deriving (Eq, Ord, Read, Show)
} deriving (Eq, Ord, Read, Show, Generic)
instance NFData Key
{- A filename may be associated with a Key. -}
newtype AssociatedFile = AssociatedFile (Maybe FilePath)
@ -46,15 +50,21 @@ data KeyVariety
-- Some repositories may contain keys of other varieties,
-- which can still be processed to some extent.
| OtherKey S.ByteString
deriving (Eq, Ord, Read, Show)
deriving (Eq, Ord, Read, Show, Generic)
instance NFData KeyVariety
{- Some varieties of keys may contain an extension at the end of the
- keyName -}
newtype HasExt = HasExt Bool
deriving (Eq, Ord, Read, Show)
deriving (Eq, Ord, Read, Show, Generic)
instance NFData HasExt
newtype HashSize = HashSize Int
deriving (Eq, Ord, Read, Show)
deriving (Eq, Ord, Read, Show, Generic)
instance NFData HashSize
hasExt :: KeyVariety -> Bool
hasExt (SHA2Key _ (HasExt b)) = b

View file

@ -4,12 +4,13 @@ git-annex benchmark - benchmark git-annex commands
# SYNOPSIS
git annex benchmark [criterionopts] -- commmand [; command]
git annex benchmark [criterionopts] ( -- commmand [; command] | --databases )
# DESCRIPTION
When git-annex is built with benchmarking support, this command can be used
to benchmark any other git-annex command. For example "git annex benchmark -- get ."
to benchmark any other git-annex command. For example
"git annex benchmark -- get ."
will benchmark "git annex get".
The command being benchmarked is run in the current git-annex repository.
@ -18,7 +19,7 @@ times to get a statistically meaningful result.
When benchmarking an action like "git annex get", the first run will
often do much more than subseqent runs. To make the benchmark repeat an
action like getting a file each time, additional command can be listed,
action like getting a file each time, additional commands can be listed,
separated by ';'. (Note that ';' needs to be escaped from the shell.)
The combined script will be run repeatedly by the benchmark. An example
of using this:
@ -38,6 +39,9 @@ used.
Any options that git-annex usually accepts can be included after the
command to benchmark.
The --databases option benchmark's git-annex's use of sqlite databases,
instead of a command.
# OUTPUT
The output of the commands being benchmarked goes to standard output and

View file

@ -813,6 +813,7 @@ Executable git-annex
Config.Smudge
Creds
Crypto
Database.Benchmark
Database.ContentIdentifier
Database.Export
Database.Fsck