benchmark: Add --databases to benchmark sqlite databases
Rescued from commit 11d6e2e260
which removed
db benchmarks in favor of benchmarking arbitrary git-annex commands. Which
is nice and general, but microbenchmarks are useful too.
This commit is contained in:
parent
27e10fdbd7
commit
25f912de5b
9 changed files with 190 additions and 21 deletions
|
@ -1,3 +1,9 @@
|
|||
git-annex (7.20191025) UNRELEASED; urgency=medium
|
||||
|
||||
* benchmark: Add --databases to benchmark sqlite databases.
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Tue, 29 Oct 2019 15:13:03 -0400
|
||||
|
||||
git-annex (7.20191024) upstream; urgency=medium
|
||||
|
||||
* Changed git add/git commit -a default behavior back to what it was
|
||||
|
|
|
@ -11,10 +11,11 @@ module Command.Benchmark where
|
|||
|
||||
import Command
|
||||
import Types.Benchmark
|
||||
|
||||
#ifdef WITH_BENCHMARK
|
||||
import Database.Benchmark
|
||||
|
||||
import Criterion.Main
|
||||
import Criterion.Main.Options (parseWith, Mode)
|
||||
import Criterion.Main.Options (parseWith)
|
||||
#endif
|
||||
|
||||
cmd :: BenchmarkGenerator -> Command
|
||||
|
@ -23,20 +24,26 @@ cmd generator = command "benchmark" SectionTesting
|
|||
paramNothing
|
||||
(seek generator <$$> optParser)
|
||||
|
||||
#ifndef WITH_BENCHMARK
|
||||
type Mode = ()
|
||||
#endif
|
||||
|
||||
data BenchmarkOptions = BenchmarkOptions CmdParams Mode
|
||||
data BenchmarkOptions
|
||||
= BenchmarkOptions CmdParams CriterionMode
|
||||
| BenchmarkDatabases CriterionMode
|
||||
|
||||
optParser :: CmdParamsDesc -> Parser BenchmarkOptions
|
||||
optParser desc = BenchmarkOptions
|
||||
optParser desc = benchmarkoptions <|> benchmarkdatabases
|
||||
where
|
||||
benchmarkoptions = BenchmarkOptions
|
||||
<$> cmdParams desc
|
||||
<*> criterionopts
|
||||
benchmarkdatabases = BenchmarkDatabases
|
||||
<$> criterionopts
|
||||
<* flag' ()
|
||||
( long "databases"
|
||||
<> help "benchmark sqlite databases"
|
||||
)
|
||||
#ifdef WITH_BENCHMARK
|
||||
-- parse criterion's options
|
||||
<*> parseWith defaultConfig
|
||||
criterionopts = parseWith defaultConfig
|
||||
#else
|
||||
<*> pure ()
|
||||
criterionopts = pure ()
|
||||
#endif
|
||||
|
||||
seek :: BenchmarkGenerator -> BenchmarkOptions -> CommandSeek
|
||||
|
@ -44,6 +51,7 @@ seek :: BenchmarkGenerator -> BenchmarkOptions -> CommandSeek
|
|||
seek generator (BenchmarkOptions ps mode) = do
|
||||
runner <- generator ps
|
||||
liftIO $ runMode mode [ bench (unwords ps) $ nfIO runner ]
|
||||
seek _ (BenchmarkDatabases mode) = benchmarkDbs mode
|
||||
#else
|
||||
seek _ _ = giveup "git-annex is not built with benchmarking support"
|
||||
#endif
|
||||
|
|
120
Database/Benchmark.hs
Normal file
120
Database/Benchmark.hs
Normal file
|
@ -0,0 +1,120 @@
|
|||
{- git-annex database benchmarks
|
||||
-
|
||||
- Copyright 2016-2019 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
||||
{-# LANGUAGE CPP #-}
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module Database.Benchmark (benchmarkDbs) where
|
||||
|
||||
import Annex.Common
|
||||
import Types.Benchmark
|
||||
#ifdef WITH_BENCHMARK
|
||||
import qualified Database.Keys.SQL as SQL
|
||||
import qualified Database.Queue as H
|
||||
import Database.Init
|
||||
import Database.Types
|
||||
import Utility.Tmp.Dir
|
||||
import Git.FilePath
|
||||
import Types.Key
|
||||
|
||||
import Criterion.Main
|
||||
import Control.Monad.IO.Class (liftIO)
|
||||
import qualified Data.ByteString.Char8 as B8
|
||||
import System.Random
|
||||
#endif
|
||||
|
||||
benchmarkDbs :: CriterionMode -> Annex ()
|
||||
#ifdef WITH_BENCHMARK
|
||||
benchmarkDbs mode = withTmpDirIn "." "benchmark" $ \tmpdir -> do
|
||||
-- benchmark different sizes of databases
|
||||
dbs <- mapM (benchDb tmpdir)
|
||||
[ 1000
|
||||
, 10000
|
||||
-- , 100000
|
||||
]
|
||||
liftIO $ runMode mode
|
||||
[ bgroup "keys database" $ flip concatMap dbs $ \db ->
|
||||
[ getAssociatedFilesHitBench db
|
||||
, getAssociatedFilesMissBench db
|
||||
, getAssociatedKeyHitBench db
|
||||
, getAssociatedKeyMissBench db
|
||||
, addAssociatedFileOldBench db
|
||||
, addAssociatedFileNewBench db
|
||||
]
|
||||
]
|
||||
#else
|
||||
benchmarkDbs _ = error "not built with criterion, cannot benchmark"
|
||||
#endif
|
||||
|
||||
#ifdef WITH_BENCHMARK
|
||||
|
||||
getAssociatedFilesHitBench :: BenchDb -> Benchmark
|
||||
getAssociatedFilesHitBench (BenchDb h num) = bench ("getAssociatedFiles from " ++ show num ++ " (hit)") $ nfIO $ do
|
||||
n <- getStdRandom (randomR (1,num))
|
||||
SQL.getAssociatedFiles (toIKey (keyN n)) (SQL.ReadHandle h)
|
||||
|
||||
getAssociatedFilesMissBench :: BenchDb -> Benchmark
|
||||
getAssociatedFilesMissBench (BenchDb h num) = bench ("getAssociatedFiles from " ++ show num ++ " (miss)") $ nfIO $
|
||||
SQL.getAssociatedFiles (toIKey keyMiss) (SQL.ReadHandle h)
|
||||
|
||||
getAssociatedKeyHitBench :: BenchDb -> Benchmark
|
||||
getAssociatedKeyHitBench (BenchDb h num) = bench ("getAssociatedKey from " ++ show num ++ " (hit)") $ nfIO $ do
|
||||
n <- getStdRandom (randomR (1,num))
|
||||
-- fromIKey because this ends up being used to get a Key
|
||||
map fromIKey <$> SQL.getAssociatedKey (fileN n) (SQL.ReadHandle h)
|
||||
|
||||
getAssociatedKeyMissBench :: BenchDb -> Benchmark
|
||||
getAssociatedKeyMissBench (BenchDb h num) = bench ("getAssociatedKey from " ++ show num ++ " (miss)") $ nfIO $
|
||||
-- fromIKey because this ends up being used to get a Key
|
||||
map fromIKey <$> SQL.getAssociatedKey fileMiss (SQL.ReadHandle h)
|
||||
|
||||
addAssociatedFileOldBench :: BenchDb -> Benchmark
|
||||
addAssociatedFileOldBench (BenchDb h num) = bench ("addAssociatedFile to " ++ show num ++ " (old)") $ nfIO $ do
|
||||
n <- getStdRandom (randomR (1,num))
|
||||
SQL.addAssociatedFile (toIKey (keyN n)) (fileN n) (SQL.WriteHandle h)
|
||||
H.flushDbQueue h
|
||||
|
||||
addAssociatedFileNewBench :: BenchDb -> Benchmark
|
||||
addAssociatedFileNewBench (BenchDb h num) = bench ("addAssociatedFile to " ++ show num ++ " (new)") $ nfIO $ do
|
||||
n <- getStdRandom (randomR (1,num))
|
||||
SQL.addAssociatedFile (toIKey (keyN n)) (fileN (n+1)) (SQL.WriteHandle h)
|
||||
H.flushDbQueue h
|
||||
|
||||
populateAssociatedFiles :: H.DbQueue -> Int -> IO ()
|
||||
populateAssociatedFiles h num = do
|
||||
forM_ [1..num] $ \n ->
|
||||
SQL.addAssociatedFile (toIKey (keyN n)) (fileN n) (SQL.WriteHandle h)
|
||||
H.flushDbQueue h
|
||||
|
||||
keyN :: Int -> Key
|
||||
keyN n = stubKey
|
||||
{ keyName = B8.pack $ "key" ++ show n
|
||||
, keyVariety = OtherKey "BENCH"
|
||||
}
|
||||
|
||||
fileN :: Int -> TopFilePath
|
||||
fileN n = asTopFilePath ("file" ++ show n)
|
||||
|
||||
keyMiss :: Key
|
||||
keyMiss = keyN 0 -- 0 is never stored
|
||||
|
||||
fileMiss :: TopFilePath
|
||||
fileMiss = fileN 0 -- 0 is never stored
|
||||
|
||||
data BenchDb = BenchDb H.DbQueue Int
|
||||
|
||||
benchDb :: FilePath -> Int -> Annex BenchDb
|
||||
benchDb tmpdir num = do
|
||||
liftIO $ putStrLn $ "setting up database with " ++ show num
|
||||
initDb db SQL.createTables
|
||||
h <- liftIO $ H.openDbQueue H.MultiWriter db SQL.containedTable
|
||||
liftIO $ populateAssociatedFiles h num
|
||||
return (BenchDb h num)
|
||||
where
|
||||
db = tmpdir </> show num </> "db"
|
||||
|
||||
#endif /* WITH_BENCHMARK */
|
|
@ -17,6 +17,7 @@ import Data.Maybe
|
|||
import Data.Char
|
||||
import qualified Data.ByteString as S
|
||||
import qualified Data.Text as T
|
||||
import Control.DeepSeq
|
||||
|
||||
import Utility.PartialPrelude
|
||||
import Key
|
||||
|
@ -41,6 +42,9 @@ derivePersistField "SKey"
|
|||
-- work when it's used in any kind of complex data structure.
|
||||
newtype IKey = IKey String
|
||||
|
||||
instance NFData IKey where
|
||||
rnf (IKey s) = rnf s
|
||||
|
||||
instance Read IKey where
|
||||
readsPrec _ s = [(IKey s, "")]
|
||||
|
||||
|
|
|
@ -11,6 +11,7 @@
|
|||
-}
|
||||
|
||||
{-# LANGUAGE CPP #-}
|
||||
{-# LANGUAGE DeriveGeneric #-}
|
||||
|
||||
module Git.FilePath (
|
||||
TopFilePath,
|
||||
|
@ -30,10 +31,14 @@ import Common
|
|||
import Git
|
||||
|
||||
import qualified System.FilePath.Posix
|
||||
import GHC.Generics
|
||||
import Control.DeepSeq
|
||||
|
||||
{- A FilePath, relative to the top of the git repository. -}
|
||||
newtype TopFilePath = TopFilePath { getTopFilePath :: FilePath }
|
||||
deriving (Show, Eq, Ord)
|
||||
deriving (Show, Eq, Ord, Generic)
|
||||
|
||||
instance NFData TopFilePath
|
||||
|
||||
{- A file in a branch or other treeish. -}
|
||||
data BranchFilePath = BranchFilePath Ref TopFilePath
|
||||
|
|
|
@ -5,11 +5,22 @@
|
|||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
||||
{-# LANGUAGE CPP #-}
|
||||
|
||||
module Types.Benchmark where
|
||||
|
||||
import Annex
|
||||
import Types.Command
|
||||
#ifdef WITH_BENCHMARK
|
||||
import Criterion.Main.Options (Mode)
|
||||
#endif
|
||||
|
||||
type BenchmarkGenerator = [String] -> Annex (IO ())
|
||||
|
||||
type MkBenchmarkGenerator = [Command] -> BenchmarkGenerator
|
||||
|
||||
#ifdef WITH_BENCHMARK
|
||||
type CriterionMode = Mode
|
||||
#else
|
||||
type CriterionMode = ()
|
||||
#endif
|
||||
|
|
20
Types/Key.hs
20
Types/Key.hs
|
@ -5,7 +5,7 @@
|
|||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
{-# LANGUAGE OverloadedStrings, DeriveGeneric #-}
|
||||
|
||||
module Types.Key where
|
||||
|
||||
|
@ -13,6 +13,8 @@ import qualified Data.ByteString as S
|
|||
import qualified Data.ByteString.Char8 as S8
|
||||
import System.Posix.Types
|
||||
import Data.Monoid
|
||||
import GHC.Generics
|
||||
import Control.DeepSeq
|
||||
import Prelude
|
||||
|
||||
{- A Key has a unique name, which is derived from a particular backend,
|
||||
|
@ -24,7 +26,9 @@ data Key = Key
|
|||
, keyMtime :: Maybe EpochTime
|
||||
, keyChunkSize :: Maybe Integer
|
||||
, keyChunkNum :: Maybe Integer
|
||||
} deriving (Eq, Ord, Read, Show)
|
||||
} deriving (Eq, Ord, Read, Show, Generic)
|
||||
|
||||
instance NFData Key
|
||||
|
||||
{- A filename may be associated with a Key. -}
|
||||
newtype AssociatedFile = AssociatedFile (Maybe FilePath)
|
||||
|
@ -46,15 +50,21 @@ data KeyVariety
|
|||
-- Some repositories may contain keys of other varieties,
|
||||
-- which can still be processed to some extent.
|
||||
| OtherKey S.ByteString
|
||||
deriving (Eq, Ord, Read, Show)
|
||||
deriving (Eq, Ord, Read, Show, Generic)
|
||||
|
||||
instance NFData KeyVariety
|
||||
|
||||
{- Some varieties of keys may contain an extension at the end of the
|
||||
- keyName -}
|
||||
newtype HasExt = HasExt Bool
|
||||
deriving (Eq, Ord, Read, Show)
|
||||
deriving (Eq, Ord, Read, Show, Generic)
|
||||
|
||||
instance NFData HasExt
|
||||
|
||||
newtype HashSize = HashSize Int
|
||||
deriving (Eq, Ord, Read, Show)
|
||||
deriving (Eq, Ord, Read, Show, Generic)
|
||||
|
||||
instance NFData HashSize
|
||||
|
||||
hasExt :: KeyVariety -> Bool
|
||||
hasExt (SHA2Key _ (HasExt b)) = b
|
||||
|
|
|
@ -4,12 +4,13 @@ git-annex benchmark - benchmark git-annex commands
|
|||
|
||||
# SYNOPSIS
|
||||
|
||||
git annex benchmark [criterionopts] -- commmand [; command]
|
||||
git annex benchmark [criterionopts] ( -- commmand [; command] | --databases )
|
||||
|
||||
# DESCRIPTION
|
||||
|
||||
When git-annex is built with benchmarking support, this command can be used
|
||||
to benchmark any other git-annex command. For example "git annex benchmark -- get ."
|
||||
to benchmark any other git-annex command. For example
|
||||
"git annex benchmark -- get ."
|
||||
will benchmark "git annex get".
|
||||
|
||||
The command being benchmarked is run in the current git-annex repository.
|
||||
|
@ -18,7 +19,7 @@ times to get a statistically meaningful result.
|
|||
|
||||
When benchmarking an action like "git annex get", the first run will
|
||||
often do much more than subseqent runs. To make the benchmark repeat an
|
||||
action like getting a file each time, additional command can be listed,
|
||||
action like getting a file each time, additional commands can be listed,
|
||||
separated by ';'. (Note that ';' needs to be escaped from the shell.)
|
||||
The combined script will be run repeatedly by the benchmark. An example
|
||||
of using this:
|
||||
|
@ -38,6 +39,9 @@ used.
|
|||
Any options that git-annex usually accepts can be included after the
|
||||
command to benchmark.
|
||||
|
||||
The --databases option benchmark's git-annex's use of sqlite databases,
|
||||
instead of a command.
|
||||
|
||||
# OUTPUT
|
||||
|
||||
The output of the commands being benchmarked goes to standard output and
|
||||
|
|
|
@ -813,6 +813,7 @@ Executable git-annex
|
|||
Config.Smudge
|
||||
Creds
|
||||
Crypto
|
||||
Database.Benchmark
|
||||
Database.ContentIdentifier
|
||||
Database.Export
|
||||
Database.Fsck
|
||||
|
|
Loading…
Reference in a new issue