improve benchmark --databases

* benchmark: Changed --databases to take a parameter specifiying the size
  of the database to benchmark.
* benchmark --databases: Display size of the populated database.
* benchmark --databases: Improve the "addAssociatedFile to (new)"
  benchmark to really add new values, not overwriting old values.
This commit is contained in:
Joey Hess 2019-11-21 17:25:20 -04:00
parent 8ea5f3ff99
commit 25ba8156bc
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
4 changed files with 29 additions and 24 deletions

View file

@ -10,6 +10,11 @@ git-annex (7.20191115) UNRELEASED; urgency=medium
* sync, assistant: Pull and push from git-lfs remotes.
* Fix bug that made bare repos be treated as non-bare when --git-dir
was used.
* benchmark: Changed --databases to take a parameter specifiying the size
of the database to benchmark.
* benchmark --databases: Display size of the populated database.
* benchmark --databases: Improve the "addAssociatedFile to (new)"
benchmark to really add new values, not overwriting old values.
-- Joey Hess <id@joeyh.name> Fri, 15 Nov 2019 11:57:19 -0400

View file

@ -26,7 +26,7 @@ cmd generator = command "benchmark" SectionTesting
data BenchmarkOptions
= BenchmarkOptions CmdParams CriterionMode
| BenchmarkDatabases CriterionMode
| BenchmarkDatabases CriterionMode Integer
optParser :: CmdParamsDesc -> Parser BenchmarkOptions
optParser desc = benchmarkoptions <|> benchmarkdatabases
@ -36,8 +36,9 @@ optParser desc = benchmarkoptions <|> benchmarkdatabases
<*> criterionopts
benchmarkdatabases = BenchmarkDatabases
<$> criterionopts
<* flag' ()
<*> option auto
( long "databases"
<> metavar paramNumber
<> help "benchmark sqlite databases"
)
#ifdef WITH_BENCHMARK
@ -51,7 +52,7 @@ seek :: BenchmarkGenerator -> BenchmarkOptions -> CommandSeek
seek generator (BenchmarkOptions ps mode) = do
runner <- generator ps
liftIO $ runMode mode [ bench (unwords ps) $ nfIO runner ]
seek _ (BenchmarkDatabases mode) = benchmarkDbs mode
seek _ (BenchmarkDatabases mode n) = benchmarkDbs mode n
#else
seek _ _ = giveup "git-annex is not built with benchmarking support"
#endif

View file

@ -20,6 +20,7 @@ import Database.Types
import Utility.Tmp.Dir
import Git.FilePath
import Types.Key
import Utility.DataUnits
import Criterion.Main
import Control.Monad.IO.Class (liftIO)
@ -27,17 +28,12 @@ import qualified Data.ByteString.Char8 as B8
import System.Random
#endif
benchmarkDbs :: CriterionMode -> Annex ()
benchmarkDbs :: CriterionMode -> Integer -> Annex ()
#ifdef WITH_BENCHMARK
benchmarkDbs mode = withTmpDirIn "." "benchmark" $ \tmpdir -> do
-- benchmark different sizes of databases
dbs <- mapM (benchDb tmpdir)
[ 1000
, 10000
-- , 100000
]
benchmarkDbs mode n = withTmpDirIn "." "benchmark" $ \tmpdir -> do
db <- benchDb tmpdir n
liftIO $ runMode mode
[ bgroup "keys database" $ flip concatMap dbs $ \db ->
[ bgroup "keys database"
[ getAssociatedFilesHitBench db
, getAssociatedFilesMissBench db
, getAssociatedKeyHitBench db
@ -81,22 +77,22 @@ addAssociatedFileOldBench (BenchDb h num) = bench ("addAssociatedFile to " ++ sh
addAssociatedFileNewBench :: BenchDb -> Benchmark
addAssociatedFileNewBench (BenchDb h num) = bench ("addAssociatedFile to " ++ show num ++ " (new)") $ nfIO $ do
n <- getStdRandom (randomR (1,num))
SQL.addAssociatedFile (toIKey (keyN n)) (fileN (n+1)) (SQL.WriteHandle h)
SQL.addAssociatedFile (toIKey (keyN n)) (fileN (num+n)) (SQL.WriteHandle h)
H.flushDbQueue h
populateAssociatedFiles :: H.DbQueue -> Int -> IO ()
populateAssociatedFiles :: H.DbQueue -> Integer -> IO ()
populateAssociatedFiles h num = do
forM_ [1..num] $ \n ->
SQL.addAssociatedFile (toIKey (keyN n)) (fileN n) (SQL.WriteHandle h)
H.flushDbQueue h
keyN :: Int -> Key
keyN :: Integer -> Key
keyN n = stubKey
{ keyName = B8.pack $ "key" ++ show n
, keyVariety = OtherKey "BENCH"
}
fileN :: Int -> TopFilePath
fileN :: Integer -> TopFilePath
fileN n = asTopFilePath ("file" ++ show n)
keyMiss :: Key
@ -105,14 +101,17 @@ keyMiss = keyN 0 -- 0 is never stored
fileMiss :: TopFilePath
fileMiss = fileN 0 -- 0 is never stored
data BenchDb = BenchDb H.DbQueue Int
data BenchDb = BenchDb H.DbQueue Integer
benchDb :: FilePath -> Int -> Annex BenchDb
benchDb :: FilePath -> Integer -> Annex BenchDb
benchDb tmpdir num = do
liftIO $ putStrLn $ "setting up database with " ++ show num
liftIO $ putStrLn $ "setting up database with " ++ show num ++ " items"
initDb db SQL.createTables
h <- liftIO $ H.openDbQueue H.MultiWriter db SQL.containedTable
liftIO $ populateAssociatedFiles h num
sz <- liftIO $ getFileSize db
liftIO $ putStrLn $ "size of database on disk: " ++
roughSize storageUnits False sz
return (BenchDb h num)
where
db = tmpdir </> show num </> "db"

View file

@ -4,7 +4,7 @@ git-annex benchmark - benchmark git-annex commands
# SYNOPSIS
git annex benchmark [criterionopts] ( -- commmand [; command] | --databases )
git annex benchmark [criterionopts] ( -- commmand [; command] | --databases=N )
# DESCRIPTION
@ -39,8 +39,8 @@ used.
Any options that git-annex usually accepts can be included after the
command to benchmark.
The --databases option benchmark's git-annex's use of sqlite databases,
instead of a command.
The --databases=N option benchmark's git-annex's use of sqlite databases,
instead of a command. N is the number of items to benchmark.
# OUTPUT