use WAL mode to ensure read from db always works, even when it's being written to

Also, moved the database to a subdir, as there are multiple files.

This seems to work well with concurrent fscks, although they still do
redundant work due to the commit granularity. Occasionally two writes will
conflict, and one is then deferred and happens later.

Except, with 3 concurrent fscks, I got failures:

git-annex: user error (SQLite3 returned ErrorBusy while attempting to perform prepare "SELECT \"fscked\".\"key\"\nFROM \"fscked\"\nWHERE \"fscked\".\"key\" = ?\n": database is locked)

Argh!!!
This commit is contained in:
Joey Hess 2015-02-18 15:54:24 -04:00
parent 823cc9b800
commit af254615b2
3 changed files with 50 additions and 20 deletions

View file

@ -21,7 +21,8 @@ module Database.Fsck (
import Database.Types import Database.Types
import qualified Database.Handle as H import qualified Database.Handle as H
import Locations import Locations
import Utility.Directory import Utility.PosixFiles
import Utility.Exception
import Annex import Annex
import Types.Key import Types.Key
import Types.UUID import Types.UUID
@ -34,6 +35,7 @@ import Control.Monad
import Control.Monad.IfElse import Control.Monad.IfElse
import Control.Monad.IO.Class (liftIO) import Control.Monad.IO.Class (liftIO)
import System.Directory import System.Directory
import System.FilePath
import Data.Maybe import Data.Maybe
import Control.Applicative import Control.Applicative
@ -56,20 +58,27 @@ Fscked
newPass :: UUID -> Annex Bool newPass :: UUID -> Annex Bool
newPass u = isJust <$> tryExclusiveLock (gitAnnexFsckDbLock u) go newPass u = isJust <$> tryExclusiveLock (gitAnnexFsckDbLock u) go
where where
go = liftIO. nukeFile =<< fromRepo (gitAnnexFsckDb u) go = liftIO . void . tryIO . removeDirectoryRecursive
=<< fromRepo (gitAnnexFsckDbDir u)
{- Opens the database, creating it atomically if it doesn't exist yet. -} {- Opens the database, creating it atomically if it doesn't exist yet. -}
openDb :: UUID -> Annex FsckHandle openDb :: UUID -> Annex FsckHandle
openDb u = do openDb u = do
db <- fromRepo (gitAnnexFsckDb u) dbdir <- fromRepo (gitAnnexFsckDbDir u)
let db = dbdir </> "db"
unlessM (liftIO $ doesFileExist db) $ do unlessM (liftIO $ doesFileExist db) $ do
let newdb = db ++ ".new" let tmpdbdir = dbdir ++ ".tmp"
h <- liftIO $ H.openDb newdb let tmpdb = tmpdbdir </> "db"
void $ liftIO $ H.commitDb h $ liftIO $ do
void $ runMigrationSilent migrateFsck createDirectoryIfMissing True tmpdbdir
liftIO $ H.closeDb h h <- H.initDb tmpdb $ void $
setAnnexFilePerm newdb runMigrationSilent migrateFsck
liftIO $ renameFile newdb db H.closeDb h
setAnnexDirPerm tmpdbdir
setAnnexFilePerm tmpdb
liftIO $ do
void $ tryIO $ removeDirectoryRecursive dbdir
rename tmpdbdir dbdir
lockFileShared =<< fromRepo (gitAnnexFsckDbLock u) lockFileShared =<< fromRepo (gitAnnexFsckDbLock u)
h <- liftIO $ H.openDb db h <- liftIO $ H.openDb db
return $ FsckHandle h u return $ FsckHandle h u

View file

@ -9,6 +9,7 @@
module Database.Handle ( module Database.Handle (
DbHandle, DbHandle,
initDb,
openDb, openDb,
queryDb, queryDb,
closeDb, closeDb,
@ -22,6 +23,7 @@ import Utility.Exception
import Messages import Messages
import Database.Persist.Sqlite import Database.Persist.Sqlite
import qualified Database.Sqlite as Sqlite
import Control.Monad import Control.Monad
import Control.Monad.IO.Class (liftIO) import Control.Monad.IO.Class (liftIO)
import Control.Concurrent import Control.Concurrent
@ -33,6 +35,29 @@ import qualified Data.Text as T
- the database. It has a MVar which Jobs are submitted to. -} - the database. It has a MVar which Jobs are submitted to. -}
data DbHandle = DbHandle (Async ()) (MVar Job) (MVar DbQueue) data DbHandle = DbHandle (Async ()) (MVar Job) (MVar DbQueue)
{- Ensures that the database is initialized. Pass the migration action for
- the database.
-
- The database is put into WAL mode, to prevent readers from blocking
- writers, and prevent a writer from blocking readers.
-}
initDb :: FilePath -> SqlPersistM () -> IO DbHandle
initDb db migration = do
enableWAL db
h <- openDb db
either throwIO (const $ return ()) =<< commitDb h migration
return h
enableWAL :: FilePath -> IO ()
enableWAL db = do
conn <- Sqlite.open (T.pack db)
stmt <- Sqlite.prepare conn (T.pack "PRAGMA journal_mode=WAL;")
void $ Sqlite.step stmt
void $ Sqlite.finalize stmt
Sqlite.close conn
{- Opens the database, but does not perform any migrations. Only use
- if the database is known to exist and have the right tables. -}
openDb :: FilePath -> IO DbHandle openDb :: FilePath -> IO DbHandle
openDb db = do openDb db = do
jobs <- newEmptyMVar jobs <- newEmptyMVar
@ -120,12 +145,8 @@ queueDb h@(DbHandle _ _ qvar) maxsz a = do
then do then do
r <- commitDb h qa' r <- commitDb h qa'
case r of case r of
Left e -> do Left e -> enqueue 0
print ("commit deferred", e) Right _ -> putMVar qvar emptyDbQueue
enqueue 0
Right _ -> do
print "commit made"
putMVar qvar emptyDbQueue
else enqueue sz' else enqueue sz'
{- If flushing the queue fails, this could be because there is another {- If flushing the queue fails, this could be because there is another

View file

@ -29,7 +29,7 @@ module Locations (
gitAnnexBadLocation, gitAnnexBadLocation,
gitAnnexUnusedLog, gitAnnexUnusedLog,
gitAnnexFsckState, gitAnnexFsckState,
gitAnnexFsckDb, gitAnnexFsckDbDir,
gitAnnexFsckDbLock, gitAnnexFsckDbLock,
gitAnnexFsckResultsLog, gitAnnexFsckResultsLog,
gitAnnexScheduleState, gitAnnexScheduleState,
@ -229,9 +229,9 @@ gitAnnexFsckDir u r = gitAnnexDir r </> "fsck" </> fromUUID u
gitAnnexFsckState :: UUID -> Git.Repo -> FilePath gitAnnexFsckState :: UUID -> Git.Repo -> FilePath
gitAnnexFsckState u r = gitAnnexFsckDir u r </> "state" gitAnnexFsckState u r = gitAnnexFsckDir u r </> "state"
{- Database used to record fsck info. -} {- Directory containing database used to record fsck info. -}
gitAnnexFsckDb :: UUID -> Git.Repo -> FilePath gitAnnexFsckDbDir :: UUID -> Git.Repo -> FilePath
gitAnnexFsckDb u r = gitAnnexFsckDir u r </> "fsck.db" gitAnnexFsckDbDir u r = gitAnnexFsckDir u r </> "db"
{- Lock file for the fsck database. -} {- Lock file for the fsck database. -}
gitAnnexFsckDbLock :: UUID -> Git.Repo -> FilePath gitAnnexFsckDbLock :: UUID -> Git.Repo -> FilePath