commit new transaction after 60 seconds

Database.Handle can now be given a CommitPolicy, making it easy to specify
transaction granularity.

Benchmarking the old git-annex incremental fsck that flips sticky bits
to the new that uses sqlite, running in a repo with 37000 annexed files,
both from cold cache:

old: 6m6.906s
new: 6m26.913s

This commit was sponsored by TasLUG.
This commit is contained in:
Joey Hess 2015-02-16 16:48:19 -04:00
parent d2766df914
commit 7d36e7d18d
3 changed files with 36 additions and 12 deletions

View file

@ -443,9 +443,7 @@ withFsckDb (StartIncremental h) a = a h
withFsckDb NonIncremental _ = noop withFsckDb NonIncremental _ = noop
recordFsckTime :: Incremental -> Key -> Annex () recordFsckTime :: Incremental -> Key -> Annex ()
recordFsckTime inc key = withFsckDb inc $ \h -> liftIO $ do recordFsckTime inc key = withFsckDb inc $ \h -> liftIO $ FsckDb.addDb h key
FsckDb.addDb h key
FsckDb.commitDb h
{- Records the start time of an incremental fsck. {- Records the start time of an incremental fsck.
- -

View file

@ -40,7 +40,6 @@ share [mkPersist sqlSettings, mkMigrate "migrateFsck"] [persistLowerCase|
Fscked Fscked
key SKey key SKey
UniqueKey key UniqueKey key
deriving Show
|] |]
{- The database is removed when starting a new incremental fsck pass. -} {- The database is removed when starting a new incremental fsck pass. -}
@ -62,7 +61,7 @@ openDb = do
liftIO $ H.openDb db liftIO $ H.openDb db
addDb :: H.DbHandle -> Key -> IO () addDb :: H.DbHandle -> Key -> IO ()
addDb h = void . H.runDb h . insert . Fscked . toSKey addDb h = void . H.runDb' h commitPolicy . insert . Fscked . toSKey
inDb :: H.DbHandle -> Key -> IO Bool inDb :: H.DbHandle -> Key -> IO Bool
inDb h k = H.runDb h $ do inDb h k = H.runDb h $ do
@ -70,3 +69,10 @@ inDb h k = H.runDb h $ do
where_ (r ^. FsckedKey ==. val (toSKey k)) where_ (r ^. FsckedKey ==. val (toSKey k))
return (r ^. FsckedKey) return (r ^. FsckedKey)
return $ not $ null r return $ not $ null r
{- Bundle up addDb transactions and commit after 60 seconds.
- This is a balance between resuming where the last incremental
- fsck left off, and making too many commits which slows down the fsck
- of lots of small or not present files. -}
commitPolicy :: H.CommitPolicy
commitPolicy = H.CommitAfterSeconds 60

View file

@ -9,6 +9,8 @@ module Database.Handle (
DbHandle, DbHandle,
openDb, openDb,
runDb, runDb,
CommitPolicy(..),
runDb',
commitDb, commitDb,
closeDb, closeDb,
) where ) where
@ -17,15 +19,17 @@ import Utility.Exception
import Database.Persist.Sqlite (runSqlite) import Database.Persist.Sqlite (runSqlite)
import Database.Esqueleto hiding (Key) import Database.Esqueleto hiding (Key)
import Control.Monad
import Control.Monad.IO.Class (liftIO) import Control.Monad.IO.Class (liftIO)
import Control.Concurrent import Control.Concurrent
import Control.Concurrent.Async import Control.Concurrent.Async
import Control.Exception (throwIO) import Control.Exception (throwIO)
import qualified Data.Text as T import qualified Data.Text as T
import Data.Time.Clock
{- A DbHandle is a reference to a worker thread that communicates with {- A DbHandle is a reference to a worker thread that communicates with
- the database. It has a MVar which Jobs are submitted to. -} - the database. It has a MVar which Jobs are submitted to. -}
data DbHandle = DbHandle (Async ()) (MVar Job) data DbHandle = DbHandle (Async ()) (MVar Job) (MVar UTCTime)
data Job = RunJob (SqlPersistM ()) | CommitJob | CloseJob data Job = RunJob (SqlPersistM ()) | CommitJob | CloseJob
@ -33,7 +37,8 @@ openDb :: FilePath -> IO DbHandle
openDb db = do openDb db = do
jobs <- newEmptyMVar jobs <- newEmptyMVar
worker <- async (workerThread (T.pack db) jobs) worker <- async (workerThread (T.pack db) jobs)
return $ DbHandle worker jobs t <- newMVar =<< getCurrentTime
return $ DbHandle worker jobs t
workerThread :: T.Text -> MVar Job -> IO () workerThread :: T.Text -> MVar Job -> IO ()
workerThread db jobs = go workerThread db jobs = go
@ -50,26 +55,41 @@ workerThread db jobs = go
CommitJob -> return CommitJob CommitJob -> return CommitJob
CloseJob -> return CloseJob CloseJob -> return CloseJob
{- Runs an action using the DbHandle. {- Runs an action using the DbHandle.
- -
- Note that the action is not run by the calling thread, but by a - Note that the action is not run by the calling thread, but by a
- worker thread. Exceptions are propigated to the calling thread. - worker thread. Exceptions are propigated to the calling thread.
- -
- Note that only one action can be run at a time against a given DbHandle. - Only one action can be run at a time against a given DbHandle.
- If called concurrently, this will block until it is able to run. - If called concurrently, this will block until it is able to run.
-} -}
runDb :: DbHandle -> SqlPersistM a -> IO a runDb :: DbHandle -> SqlPersistM a -> IO a
runDb (DbHandle _ jobs) a = do runDb h = runDb' h CommitManually
data CommitPolicy = CommitManually | CommitAfterSeconds Int
runDb' :: DbHandle -> CommitPolicy -> SqlPersistM a -> IO a
runDb' h@(DbHandle _ jobs t) pol a = do
res <- newEmptyMVar res <- newEmptyMVar
putMVar jobs $ RunJob $ liftIO . putMVar res =<< tryNonAsync a putMVar jobs $ RunJob $ liftIO . putMVar res =<< tryNonAsync a
either throwIO return =<< takeMVar res r <- either throwIO return =<< takeMVar res
case pol of
CommitManually -> return ()
CommitAfterSeconds n -> do
now <- getCurrentTime
prev <- takeMVar t
putMVar t now
when (diffUTCTime now prev > fromIntegral n) $
commitDb h
return r
{- Commits any transaction that was created by the previous calls to runDb, {- Commits any transaction that was created by the previous calls to runDb,
- and starts a new transaction. -} - and starts a new transaction. -}
commitDb :: DbHandle -> IO () commitDb :: DbHandle -> IO ()
commitDb (DbHandle _ jobs) = putMVar jobs CommitJob commitDb (DbHandle _ jobs _) = putMVar jobs CommitJob
closeDb :: DbHandle -> IO () closeDb :: DbHandle -> IO ()
closeDb (DbHandle worker jobs) = do closeDb (DbHandle worker jobs _) = do
putMVar jobs CloseJob putMVar jobs CloseJob
wait worker wait worker