benchmark: Add --databases to benchmark sqlite databases

Rescued from commit 11d6e2e260 which removed
db benchmarks in favor of benchmarking arbitrary git-annex commands. Which
is nice and general, but microbenchmarks are useful too.
This commit is contained in:
Joey Hess 2019-10-29 15:16:15 -04:00
parent 27e10fdbd7
commit 25f912de5b
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
9 changed files with 190 additions and 21 deletions

View file

@ -1,3 +1,9 @@
git-annex (7.20191025) UNRELEASED; urgency=medium
* benchmark: Add --databases to benchmark sqlite databases.
-- Joey Hess <id@joeyh.name> Tue, 29 Oct 2019 15:13:03 -0400
git-annex (7.20191024) upstream; urgency=medium git-annex (7.20191024) upstream; urgency=medium
* Changed git add/git commit -a default behavior back to what it was * Changed git add/git commit -a default behavior back to what it was

View file

@ -11,10 +11,11 @@ module Command.Benchmark where
import Command import Command
import Types.Benchmark import Types.Benchmark
#ifdef WITH_BENCHMARK #ifdef WITH_BENCHMARK
import Database.Benchmark
import Criterion.Main import Criterion.Main
import Criterion.Main.Options (parseWith, Mode) import Criterion.Main.Options (parseWith)
#endif #endif
cmd :: BenchmarkGenerator -> Command cmd :: BenchmarkGenerator -> Command
@ -23,20 +24,26 @@ cmd generator = command "benchmark" SectionTesting
paramNothing paramNothing
(seek generator <$$> optParser) (seek generator <$$> optParser)
#ifndef WITH_BENCHMARK data BenchmarkOptions
type Mode = () = BenchmarkOptions CmdParams CriterionMode
#endif | BenchmarkDatabases CriterionMode
data BenchmarkOptions = BenchmarkOptions CmdParams Mode
optParser :: CmdParamsDesc -> Parser BenchmarkOptions optParser :: CmdParamsDesc -> Parser BenchmarkOptions
optParser desc = BenchmarkOptions optParser desc = benchmarkoptions <|> benchmarkdatabases
<$> cmdParams desc where
benchmarkoptions = BenchmarkOptions
<$> cmdParams desc
<*> criterionopts
benchmarkdatabases = BenchmarkDatabases
<$> criterionopts
<* flag' ()
( long "databases"
<> help "benchmark sqlite databases"
)
#ifdef WITH_BENCHMARK #ifdef WITH_BENCHMARK
-- parse criterion's options criterionopts = parseWith defaultConfig
<*> parseWith defaultConfig
#else #else
<*> pure () criterionopts = pure ()
#endif #endif
seek :: BenchmarkGenerator -> BenchmarkOptions -> CommandSeek seek :: BenchmarkGenerator -> BenchmarkOptions -> CommandSeek
@ -44,6 +51,7 @@ seek :: BenchmarkGenerator -> BenchmarkOptions -> CommandSeek
seek generator (BenchmarkOptions ps mode) = do seek generator (BenchmarkOptions ps mode) = do
runner <- generator ps runner <- generator ps
liftIO $ runMode mode [ bench (unwords ps) $ nfIO runner ] liftIO $ runMode mode [ bench (unwords ps) $ nfIO runner ]
seek _ (BenchmarkDatabases mode) = benchmarkDbs mode
#else #else
seek _ _ = giveup "git-annex is not built with benchmarking support" seek _ _ = giveup "git-annex is not built with benchmarking support"
#endif #endif

120
Database/Benchmark.hs Normal file
View file

@ -0,0 +1,120 @@
{- git-annex database benchmarks
-
- Copyright 2016-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE OverloadedStrings #-}
module Database.Benchmark (benchmarkDbs) where
import Annex.Common
import Types.Benchmark
#ifdef WITH_BENCHMARK
import qualified Database.Keys.SQL as SQL
import qualified Database.Queue as H
import Database.Init
import Database.Types
import Utility.Tmp.Dir
import Git.FilePath
import Types.Key
import Criterion.Main
import Control.Monad.IO.Class (liftIO)
import qualified Data.ByteString.Char8 as B8
import System.Random
#endif
benchmarkDbs :: CriterionMode -> Annex ()
#ifdef WITH_BENCHMARK
benchmarkDbs mode = withTmpDirIn "." "benchmark" $ \tmpdir -> do
-- benchmark different sizes of databases
dbs <- mapM (benchDb tmpdir)
[ 1000
, 10000
-- , 100000
]
liftIO $ runMode mode
[ bgroup "keys database" $ flip concatMap dbs $ \db ->
[ getAssociatedFilesHitBench db
, getAssociatedFilesMissBench db
, getAssociatedKeyHitBench db
, getAssociatedKeyMissBench db
, addAssociatedFileOldBench db
, addAssociatedFileNewBench db
]
]
#else
benchmarkDbs _ = error "not built with criterion, cannot benchmark"
#endif
#ifdef WITH_BENCHMARK
getAssociatedFilesHitBench :: BenchDb -> Benchmark
getAssociatedFilesHitBench (BenchDb h num) = bench ("getAssociatedFiles from " ++ show num ++ " (hit)") $ nfIO $ do
n <- getStdRandom (randomR (1,num))
SQL.getAssociatedFiles (toIKey (keyN n)) (SQL.ReadHandle h)
getAssociatedFilesMissBench :: BenchDb -> Benchmark
getAssociatedFilesMissBench (BenchDb h num) = bench ("getAssociatedFiles from " ++ show num ++ " (miss)") $ nfIO $
SQL.getAssociatedFiles (toIKey keyMiss) (SQL.ReadHandle h)
getAssociatedKeyHitBench :: BenchDb -> Benchmark
getAssociatedKeyHitBench (BenchDb h num) = bench ("getAssociatedKey from " ++ show num ++ " (hit)") $ nfIO $ do
n <- getStdRandom (randomR (1,num))
-- fromIKey because this ends up being used to get a Key
map fromIKey <$> SQL.getAssociatedKey (fileN n) (SQL.ReadHandle h)
getAssociatedKeyMissBench :: BenchDb -> Benchmark
getAssociatedKeyMissBench (BenchDb h num) = bench ("getAssociatedKey from " ++ show num ++ " (miss)") $ nfIO $
-- fromIKey because this ends up being used to get a Key
map fromIKey <$> SQL.getAssociatedKey fileMiss (SQL.ReadHandle h)
addAssociatedFileOldBench :: BenchDb -> Benchmark
addAssociatedFileOldBench (BenchDb h num) = bench ("addAssociatedFile to " ++ show num ++ " (old)") $ nfIO $ do
n <- getStdRandom (randomR (1,num))
SQL.addAssociatedFile (toIKey (keyN n)) (fileN n) (SQL.WriteHandle h)
H.flushDbQueue h
addAssociatedFileNewBench :: BenchDb -> Benchmark
addAssociatedFileNewBench (BenchDb h num) = bench ("addAssociatedFile to " ++ show num ++ " (new)") $ nfIO $ do
n <- getStdRandom (randomR (1,num))
SQL.addAssociatedFile (toIKey (keyN n)) (fileN (n+1)) (SQL.WriteHandle h)
H.flushDbQueue h
populateAssociatedFiles :: H.DbQueue -> Int -> IO ()
populateAssociatedFiles h num = do
forM_ [1..num] $ \n ->
SQL.addAssociatedFile (toIKey (keyN n)) (fileN n) (SQL.WriteHandle h)
H.flushDbQueue h
keyN :: Int -> Key
keyN n = stubKey
{ keyName = B8.pack $ "key" ++ show n
, keyVariety = OtherKey "BENCH"
}
fileN :: Int -> TopFilePath
fileN n = asTopFilePath ("file" ++ show n)
keyMiss :: Key
keyMiss = keyN 0 -- 0 is never stored
fileMiss :: TopFilePath
fileMiss = fileN 0 -- 0 is never stored
data BenchDb = BenchDb H.DbQueue Int
benchDb :: FilePath -> Int -> Annex BenchDb
benchDb tmpdir num = do
liftIO $ putStrLn $ "setting up database with " ++ show num
initDb db SQL.createTables
h <- liftIO $ H.openDbQueue H.MultiWriter db SQL.containedTable
liftIO $ populateAssociatedFiles h num
return (BenchDb h num)
where
db = tmpdir </> show num </> "db"
#endif /* WITH_BENCHMARK */

View file

@ -17,6 +17,7 @@ import Data.Maybe
import Data.Char import Data.Char
import qualified Data.ByteString as S import qualified Data.ByteString as S
import qualified Data.Text as T import qualified Data.Text as T
import Control.DeepSeq
import Utility.PartialPrelude import Utility.PartialPrelude
import Key import Key
@ -41,6 +42,9 @@ derivePersistField "SKey"
-- work when it's used in any kind of complex data structure. -- work when it's used in any kind of complex data structure.
newtype IKey = IKey String newtype IKey = IKey String
instance NFData IKey where
rnf (IKey s) = rnf s
instance Read IKey where instance Read IKey where
readsPrec _ s = [(IKey s, "")] readsPrec _ s = [(IKey s, "")]

View file

@ -11,6 +11,7 @@
-} -}
{-# LANGUAGE CPP #-} {-# LANGUAGE CPP #-}
{-# LANGUAGE DeriveGeneric #-}
module Git.FilePath ( module Git.FilePath (
TopFilePath, TopFilePath,
@ -30,10 +31,14 @@ import Common
import Git import Git
import qualified System.FilePath.Posix import qualified System.FilePath.Posix
import GHC.Generics
import Control.DeepSeq
{- A FilePath, relative to the top of the git repository. -} {- A FilePath, relative to the top of the git repository. -}
newtype TopFilePath = TopFilePath { getTopFilePath :: FilePath } newtype TopFilePath = TopFilePath { getTopFilePath :: FilePath }
deriving (Show, Eq, Ord) deriving (Show, Eq, Ord, Generic)
instance NFData TopFilePath
{- A file in a branch or other treeish. -} {- A file in a branch or other treeish. -}
data BranchFilePath = BranchFilePath Ref TopFilePath data BranchFilePath = BranchFilePath Ref TopFilePath

View file

@ -5,11 +5,22 @@
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
{-# LANGUAGE CPP #-}
module Types.Benchmark where module Types.Benchmark where
import Annex import Annex
import Types.Command import Types.Command
#ifdef WITH_BENCHMARK
import Criterion.Main.Options (Mode)
#endif
type BenchmarkGenerator = [String] -> Annex (IO ()) type BenchmarkGenerator = [String] -> Annex (IO ())
type MkBenchmarkGenerator = [Command] -> BenchmarkGenerator type MkBenchmarkGenerator = [Command] -> BenchmarkGenerator
#ifdef WITH_BENCHMARK
type CriterionMode = Mode
#else
type CriterionMode = ()
#endif

View file

@ -5,7 +5,7 @@
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings, DeriveGeneric #-}
module Types.Key where module Types.Key where
@ -13,6 +13,8 @@ import qualified Data.ByteString as S
import qualified Data.ByteString.Char8 as S8 import qualified Data.ByteString.Char8 as S8
import System.Posix.Types import System.Posix.Types
import Data.Monoid import Data.Monoid
import GHC.Generics
import Control.DeepSeq
import Prelude import Prelude
{- A Key has a unique name, which is derived from a particular backend, {- A Key has a unique name, which is derived from a particular backend,
@ -24,7 +26,9 @@ data Key = Key
, keyMtime :: Maybe EpochTime , keyMtime :: Maybe EpochTime
, keyChunkSize :: Maybe Integer , keyChunkSize :: Maybe Integer
, keyChunkNum :: Maybe Integer , keyChunkNum :: Maybe Integer
} deriving (Eq, Ord, Read, Show) } deriving (Eq, Ord, Read, Show, Generic)
instance NFData Key
{- A filename may be associated with a Key. -} {- A filename may be associated with a Key. -}
newtype AssociatedFile = AssociatedFile (Maybe FilePath) newtype AssociatedFile = AssociatedFile (Maybe FilePath)
@ -46,15 +50,21 @@ data KeyVariety
-- Some repositories may contain keys of other varieties, -- Some repositories may contain keys of other varieties,
-- which can still be processed to some extent. -- which can still be processed to some extent.
| OtherKey S.ByteString | OtherKey S.ByteString
deriving (Eq, Ord, Read, Show) deriving (Eq, Ord, Read, Show, Generic)
instance NFData KeyVariety
{- Some varieties of keys may contain an extension at the end of the {- Some varieties of keys may contain an extension at the end of the
- keyName -} - keyName -}
newtype HasExt = HasExt Bool newtype HasExt = HasExt Bool
deriving (Eq, Ord, Read, Show) deriving (Eq, Ord, Read, Show, Generic)
instance NFData HasExt
newtype HashSize = HashSize Int newtype HashSize = HashSize Int
deriving (Eq, Ord, Read, Show) deriving (Eq, Ord, Read, Show, Generic)
instance NFData HashSize
hasExt :: KeyVariety -> Bool hasExt :: KeyVariety -> Bool
hasExt (SHA2Key _ (HasExt b)) = b hasExt (SHA2Key _ (HasExt b)) = b

View file

@ -4,12 +4,13 @@ git-annex benchmark - benchmark git-annex commands
# SYNOPSIS # SYNOPSIS
git annex benchmark [criterionopts] -- commmand [; command] git annex benchmark [criterionopts] ( -- commmand [; command] | --databases )
# DESCRIPTION # DESCRIPTION
When git-annex is built with benchmarking support, this command can be used When git-annex is built with benchmarking support, this command can be used
to benchmark any other git-annex command. For example "git annex benchmark -- get ." to benchmark any other git-annex command. For example
"git annex benchmark -- get ."
will benchmark "git annex get". will benchmark "git annex get".
The command being benchmarked is run in the current git-annex repository. The command being benchmarked is run in the current git-annex repository.
@ -18,7 +19,7 @@ times to get a statistically meaningful result.
When benchmarking an action like "git annex get", the first run will When benchmarking an action like "git annex get", the first run will
often do much more than subseqent runs. To make the benchmark repeat an often do much more than subseqent runs. To make the benchmark repeat an
action like getting a file each time, additional command can be listed, action like getting a file each time, additional commands can be listed,
separated by ';'. (Note that ';' needs to be escaped from the shell.) separated by ';'. (Note that ';' needs to be escaped from the shell.)
The combined script will be run repeatedly by the benchmark. An example The combined script will be run repeatedly by the benchmark. An example
of using this: of using this:
@ -38,6 +39,9 @@ used.
Any options that git-annex usually accepts can be included after the Any options that git-annex usually accepts can be included after the
command to benchmark. command to benchmark.
The --databases option benchmark's git-annex's use of sqlite databases,
instead of a command.
# OUTPUT # OUTPUT
The output of the commands being benchmarked goes to standard output and The output of the commands being benchmarked goes to standard output and

View file

@ -813,6 +813,7 @@ Executable git-annex
Config.Smudge Config.Smudge
Creds Creds
Crypto Crypto
Database.Benchmark
Database.ContentIdentifier Database.ContentIdentifier
Database.Export Database.Export
Database.Fsck Database.Fsck