use WAL mode to ensure read from db always works, even when it's being written to

Also, moved the database to a subdir, as there are multiple files.

This seems to work well with concurrent fscks, although they still do
redundant work due to the commit granularity. Occasionally two writes will
conflict, and one is then deferred and happens later.

Except, with 3 concurrent fscks, I got failures:

git-annex: user error (SQLite3 returned ErrorBusy while attempting to perform prepare "SELECT \"fscked\".\"key\"\nFROM \"fscked\"\nWHERE \"fscked\".\"key\" = ?\n": database is locked)

Argh!!!
This commit is contained in:
Joey Hess 2015-02-18 15:54:24 -04:00
parent 823cc9b800
commit af254615b2
3 changed files with 50 additions and 20 deletions

View file

@ -21,7 +21,8 @@ module Database.Fsck (
import Database.Types
import qualified Database.Handle as H
import Locations
import Utility.Directory
import Utility.PosixFiles
import Utility.Exception
import Annex
import Types.Key
import Types.UUID
@ -34,6 +35,7 @@ import Control.Monad
import Control.Monad.IfElse
import Control.Monad.IO.Class (liftIO)
import System.Directory
import System.FilePath
import Data.Maybe
import Control.Applicative
@ -56,20 +58,27 @@ Fscked
newPass :: UUID -> Annex Bool
newPass u = isJust <$> tryExclusiveLock (gitAnnexFsckDbLock u) go
where
go = liftIO. nukeFile =<< fromRepo (gitAnnexFsckDb u)
go = liftIO . void . tryIO . removeDirectoryRecursive
=<< fromRepo (gitAnnexFsckDbDir u)
{- Opens the database, creating it atomically if it doesn't exist yet. -}
openDb :: UUID -> Annex FsckHandle
openDb u = do
db <- fromRepo (gitAnnexFsckDb u)
dbdir <- fromRepo (gitAnnexFsckDbDir u)
let db = dbdir </> "db"
unlessM (liftIO $ doesFileExist db) $ do
let newdb = db ++ ".new"
h <- liftIO $ H.openDb newdb
void $ liftIO $ H.commitDb h $
void $ runMigrationSilent migrateFsck
liftIO $ H.closeDb h
setAnnexFilePerm newdb
liftIO $ renameFile newdb db
let tmpdbdir = dbdir ++ ".tmp"
let tmpdb = tmpdbdir </> "db"
liftIO $ do
createDirectoryIfMissing True tmpdbdir
h <- H.initDb tmpdb $ void $
runMigrationSilent migrateFsck
H.closeDb h
setAnnexDirPerm tmpdbdir
setAnnexFilePerm tmpdb
liftIO $ do
void $ tryIO $ removeDirectoryRecursive dbdir
rename tmpdbdir dbdir
lockFileShared =<< fromRepo (gitAnnexFsckDbLock u)
h <- liftIO $ H.openDb db
return $ FsckHandle h u

View file

@ -9,6 +9,7 @@
module Database.Handle (
DbHandle,
initDb,
openDb,
queryDb,
closeDb,
@ -22,6 +23,7 @@ import Utility.Exception
import Messages
import Database.Persist.Sqlite
import qualified Database.Sqlite as Sqlite
import Control.Monad
import Control.Monad.IO.Class (liftIO)
import Control.Concurrent
@ -33,6 +35,29 @@ import qualified Data.Text as T
- the database. It has a MVar which Jobs are submitted to. -}
data DbHandle = DbHandle (Async ()) (MVar Job) (MVar DbQueue)
{- Ensures that the database is initialized. Pass the migration action for
- the database.
-
- The database is put into WAL mode, to prevent readers from blocking
- writers, and prevent a writer from blocking readers.
-}
initDb :: FilePath -> SqlPersistM () -> IO DbHandle
initDb db migration = do
enableWAL db
h <- openDb db
either throwIO (const $ return ()) =<< commitDb h migration
return h
enableWAL :: FilePath -> IO ()
enableWAL db = do
conn <- Sqlite.open (T.pack db)
stmt <- Sqlite.prepare conn (T.pack "PRAGMA journal_mode=WAL;")
void $ Sqlite.step stmt
void $ Sqlite.finalize stmt
Sqlite.close conn
{- Opens the database, but does not perform any migrations. Only use
- if the database is known to exist and have the right tables. -}
openDb :: FilePath -> IO DbHandle
openDb db = do
jobs <- newEmptyMVar
@ -120,12 +145,8 @@ queueDb h@(DbHandle _ _ qvar) maxsz a = do
then do
r <- commitDb h qa'
case r of
Left e -> do
print ("commit deferred", e)
enqueue 0
Right _ -> do
print "commit made"
putMVar qvar emptyDbQueue
Left e -> enqueue 0
Right _ -> putMVar qvar emptyDbQueue
else enqueue sz'
{- If flushing the queue fails, this could be because there is another

View file

@ -29,7 +29,7 @@ module Locations (
gitAnnexBadLocation,
gitAnnexUnusedLog,
gitAnnexFsckState,
gitAnnexFsckDb,
gitAnnexFsckDbDir,
gitAnnexFsckDbLock,
gitAnnexFsckResultsLog,
gitAnnexScheduleState,
@ -229,9 +229,9 @@ gitAnnexFsckDir u r = gitAnnexDir r </> "fsck" </> fromUUID u
gitAnnexFsckState :: UUID -> Git.Repo -> FilePath
gitAnnexFsckState u r = gitAnnexFsckDir u r </> "state"
{- Database used to record fsck info. -}
gitAnnexFsckDb :: UUID -> Git.Repo -> FilePath
gitAnnexFsckDb u r = gitAnnexFsckDir u r </> "fsck.db"
{- Directory containing database used to record fsck info. -}
gitAnnexFsckDbDir :: UUID -> Git.Repo -> FilePath
gitAnnexFsckDbDir u r = gitAnnexFsckDir u r </> "db"
{- Lock file for the fsck database. -}
gitAnnexFsckDbLock :: UUID -> Git.Repo -> FilePath