improve benchmark --databases

* benchmark: Changed --databases to take a parameter specifiying the size
  of the database to benchmark.
* benchmark --databases: Display size of the populated database.
* benchmark --databases: Improve the "addAssociatedFile to (new)"
  benchmark to really add new values, not overwriting old values.
This commit is contained in:
Joey Hess 2019-11-21 17:25:20 -04:00
parent 8ea5f3ff99
commit 25ba8156bc
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
4 changed files with 29 additions and 24 deletions

View file

@ -10,6 +10,11 @@ git-annex (7.20191115) UNRELEASED; urgency=medium
* sync, assistant: Pull and push from git-lfs remotes. * sync, assistant: Pull and push from git-lfs remotes.
* Fix bug that made bare repos be treated as non-bare when --git-dir * Fix bug that made bare repos be treated as non-bare when --git-dir
was used. was used.
* benchmark: Changed --databases to take a parameter specifiying the size
of the database to benchmark.
* benchmark --databases: Display size of the populated database.
* benchmark --databases: Improve the "addAssociatedFile to (new)"
benchmark to really add new values, not overwriting old values.
-- Joey Hess <id@joeyh.name> Fri, 15 Nov 2019 11:57:19 -0400 -- Joey Hess <id@joeyh.name> Fri, 15 Nov 2019 11:57:19 -0400

View file

@ -26,7 +26,7 @@ cmd generator = command "benchmark" SectionTesting
data BenchmarkOptions data BenchmarkOptions
= BenchmarkOptions CmdParams CriterionMode = BenchmarkOptions CmdParams CriterionMode
| BenchmarkDatabases CriterionMode | BenchmarkDatabases CriterionMode Integer
optParser :: CmdParamsDesc -> Parser BenchmarkOptions optParser :: CmdParamsDesc -> Parser BenchmarkOptions
optParser desc = benchmarkoptions <|> benchmarkdatabases optParser desc = benchmarkoptions <|> benchmarkdatabases
@ -36,10 +36,11 @@ optParser desc = benchmarkoptions <|> benchmarkdatabases
<*> criterionopts <*> criterionopts
benchmarkdatabases = BenchmarkDatabases benchmarkdatabases = BenchmarkDatabases
<$> criterionopts <$> criterionopts
<* flag' () <*> option auto
( long "databases" ( long "databases"
<> metavar paramNumber
<> help "benchmark sqlite databases" <> help "benchmark sqlite databases"
) )
#ifdef WITH_BENCHMARK #ifdef WITH_BENCHMARK
criterionopts = parseWith defaultConfig criterionopts = parseWith defaultConfig
#else #else
@ -51,7 +52,7 @@ seek :: BenchmarkGenerator -> BenchmarkOptions -> CommandSeek
seek generator (BenchmarkOptions ps mode) = do seek generator (BenchmarkOptions ps mode) = do
runner <- generator ps runner <- generator ps
liftIO $ runMode mode [ bench (unwords ps) $ nfIO runner ] liftIO $ runMode mode [ bench (unwords ps) $ nfIO runner ]
seek _ (BenchmarkDatabases mode) = benchmarkDbs mode seek _ (BenchmarkDatabases mode n) = benchmarkDbs mode n
#else #else
seek _ _ = giveup "git-annex is not built with benchmarking support" seek _ _ = giveup "git-annex is not built with benchmarking support"
#endif #endif

View file

@ -20,6 +20,7 @@ import Database.Types
import Utility.Tmp.Dir import Utility.Tmp.Dir
import Git.FilePath import Git.FilePath
import Types.Key import Types.Key
import Utility.DataUnits
import Criterion.Main import Criterion.Main
import Control.Monad.IO.Class (liftIO) import Control.Monad.IO.Class (liftIO)
@ -27,17 +28,12 @@ import qualified Data.ByteString.Char8 as B8
import System.Random import System.Random
#endif #endif
benchmarkDbs :: CriterionMode -> Annex () benchmarkDbs :: CriterionMode -> Integer -> Annex ()
#ifdef WITH_BENCHMARK #ifdef WITH_BENCHMARK
benchmarkDbs mode = withTmpDirIn "." "benchmark" $ \tmpdir -> do benchmarkDbs mode n = withTmpDirIn "." "benchmark" $ \tmpdir -> do
-- benchmark different sizes of databases db <- benchDb tmpdir n
dbs <- mapM (benchDb tmpdir)
[ 1000
, 10000
-- , 100000
]
liftIO $ runMode mode liftIO $ runMode mode
[ bgroup "keys database" $ flip concatMap dbs $ \db -> [ bgroup "keys database"
[ getAssociatedFilesHitBench db [ getAssociatedFilesHitBench db
, getAssociatedFilesMissBench db , getAssociatedFilesMissBench db
, getAssociatedKeyHitBench db , getAssociatedKeyHitBench db
@ -81,22 +77,22 @@ addAssociatedFileOldBench (BenchDb h num) = bench ("addAssociatedFile to " ++ sh
addAssociatedFileNewBench :: BenchDb -> Benchmark addAssociatedFileNewBench :: BenchDb -> Benchmark
addAssociatedFileNewBench (BenchDb h num) = bench ("addAssociatedFile to " ++ show num ++ " (new)") $ nfIO $ do addAssociatedFileNewBench (BenchDb h num) = bench ("addAssociatedFile to " ++ show num ++ " (new)") $ nfIO $ do
n <- getStdRandom (randomR (1,num)) n <- getStdRandom (randomR (1,num))
SQL.addAssociatedFile (toIKey (keyN n)) (fileN (n+1)) (SQL.WriteHandle h) SQL.addAssociatedFile (toIKey (keyN n)) (fileN (num+n)) (SQL.WriteHandle h)
H.flushDbQueue h H.flushDbQueue h
populateAssociatedFiles :: H.DbQueue -> Int -> IO () populateAssociatedFiles :: H.DbQueue -> Integer -> IO ()
populateAssociatedFiles h num = do populateAssociatedFiles h num = do
forM_ [1..num] $ \n -> forM_ [1..num] $ \n ->
SQL.addAssociatedFile (toIKey (keyN n)) (fileN n) (SQL.WriteHandle h) SQL.addAssociatedFile (toIKey (keyN n)) (fileN n) (SQL.WriteHandle h)
H.flushDbQueue h H.flushDbQueue h
keyN :: Int -> Key keyN :: Integer -> Key
keyN n = stubKey keyN n = stubKey
{ keyName = B8.pack $ "key" ++ show n { keyName = B8.pack $ "key" ++ show n
, keyVariety = OtherKey "BENCH" , keyVariety = OtherKey "BENCH"
} }
fileN :: Int -> TopFilePath fileN :: Integer -> TopFilePath
fileN n = asTopFilePath ("file" ++ show n) fileN n = asTopFilePath ("file" ++ show n)
keyMiss :: Key keyMiss :: Key
@ -105,14 +101,17 @@ keyMiss = keyN 0 -- 0 is never stored
fileMiss :: TopFilePath fileMiss :: TopFilePath
fileMiss = fileN 0 -- 0 is never stored fileMiss = fileN 0 -- 0 is never stored
data BenchDb = BenchDb H.DbQueue Int data BenchDb = BenchDb H.DbQueue Integer
benchDb :: FilePath -> Int -> Annex BenchDb benchDb :: FilePath -> Integer -> Annex BenchDb
benchDb tmpdir num = do benchDb tmpdir num = do
liftIO $ putStrLn $ "setting up database with " ++ show num liftIO $ putStrLn $ "setting up database with " ++ show num ++ " items"
initDb db SQL.createTables initDb db SQL.createTables
h <- liftIO $ H.openDbQueue H.MultiWriter db SQL.containedTable h <- liftIO $ H.openDbQueue H.MultiWriter db SQL.containedTable
liftIO $ populateAssociatedFiles h num liftIO $ populateAssociatedFiles h num
sz <- liftIO $ getFileSize db
liftIO $ putStrLn $ "size of database on disk: " ++
roughSize storageUnits False sz
return (BenchDb h num) return (BenchDb h num)
where where
db = tmpdir </> show num </> "db" db = tmpdir </> show num </> "db"

View file

@ -4,7 +4,7 @@ git-annex benchmark - benchmark git-annex commands
# SYNOPSIS # SYNOPSIS
git annex benchmark [criterionopts] ( -- commmand [; command] | --databases ) git annex benchmark [criterionopts] ( -- commmand [; command] | --databases=N )
# DESCRIPTION # DESCRIPTION
@ -39,8 +39,8 @@ used.
Any options that git-annex usually accepts can be included after the Any options that git-annex usually accepts can be included after the
command to benchmark. command to benchmark.
The --databases option benchmark's git-annex's use of sqlite databases, The --databases=N option benchmark's git-annex's use of sqlite databases,
instead of a command. instead of a command. N is the number of items to benchmark.
# OUTPUT # OUTPUT