diff --git a/Database/Fsck.hs b/Database/Fsck.hs index d9416927bb..20b4878e39 100644 --- a/Database/Fsck.hs +++ b/Database/Fsck.hs @@ -34,6 +34,7 @@ import Annex.LockFile import Database.Persist.TH import Database.Esqueleto hiding (Key) +import Data.Time.Clock data FsckHandle = FsckHandle H.DbHandle UUID @@ -84,11 +85,18 @@ closeDb (FsckHandle h u) = do unlockFile =<< fromRepo (gitAnnexFsckDbLock u) addDb :: FsckHandle -> Key -> IO () -addDb (FsckHandle h _) k = H.queueDb h 1000 $ +addDb (FsckHandle h _) k = H.queueDb h checkcommit $ void $ insertUnique $ Fscked sk where sk = toSKey k + -- commit queue after 1000 files or 5 minutes, whichever comes first + checkcommit sz lastcommittime + | sz > 1000 = return True + | otherwise = do + now <- getCurrentTime + return $ diffUTCTime lastcommittime now > 300 + inDb :: FsckHandle -> Key -> IO Bool inDb (FsckHandle h _) = H.queryDb h . inDb' . toSKey diff --git a/Database/Handle.hs b/Database/Handle.hs index dc3363e482..1fd9f78346 100644 --- a/Database/Handle.hs +++ b/Database/Handle.hs @@ -20,6 +20,7 @@ module Database.Handle ( ) where import Utility.Exception +import Utility.Monad import Messages import Database.Persist.Sqlite @@ -33,6 +34,7 @@ import qualified Data.Text as T import Control.Monad.Trans.Resource (runResourceT) import Control.Monad.Logger (runNoLoggingT) import Data.List +import Data.Time.Clock {- A DbHandle is a reference to a worker thread that communicates with - the database. It has a MVar which Jobs are submitted to. -} @@ -64,7 +66,7 @@ openDb :: FilePath -> TableName -> IO DbHandle openDb db tablename = do jobs <- newEmptyMVar worker <- async (workerThread (T.pack db) tablename jobs) - q <- newMVar emptyDbQueue + q <- newMVar =<< emptyDbQueue return $ DbHandle worker jobs q data Job @@ -145,16 +147,19 @@ closeDb h@(DbHandle worker jobs _) = do type Size = Int -{- A queue of actions to perform, with a count of the number of actions - - queued. -} -data DbQueue = DbQueue Size (SqlPersistM ()) +type LastCommitTime = UTCTime -emptyDbQueue :: DbQueue -emptyDbQueue = DbQueue 0 (return ()) +{- A queue of actions to perform, with a count of the number of actions + - queued, and a last commit time. -} +data DbQueue = DbQueue Size LastCommitTime (SqlPersistM ()) + +emptyDbQueue :: IO DbQueue +emptyDbQueue = do + now <- getCurrentTime + return $ DbQueue 0 now (return ()) {- Queues a change to be made to the database. It will be buffered - - to be committed later, unless the queue gets larger than the specified - - size. + - to be committed later, unless the commitchecker action returns true. - - (Be sure to call closeDb or flushQueueDb to ensure the change - gets committed.) @@ -164,25 +169,32 @@ emptyDbQueue = DbQueue 0 (return ()) - process, the transaction is put back in the queue. This solves - the sqlite multiple writer problem. -} -queueDb :: DbHandle -> Size -> SqlPersistM () -> IO () -queueDb h@(DbHandle _ _ qvar) maxsz a = do - DbQueue sz qa <- takeMVar qvar +queueDb + :: DbHandle + -> (Size -> LastCommitTime -> IO Bool) + -> SqlPersistM () + -> IO () +queueDb h@(DbHandle _ _ qvar) commitchecker a = do + DbQueue sz lastcommittime qa <- takeMVar qvar let !sz' = sz + 1 let qa' = qa >> a - let enqueue newsz = putMVar qvar (DbQueue newsz qa') - if sz' > maxsz - then do + let enqueue = putMVar qvar + ifM (commitchecker sz' lastcommittime) + ( do r <- commitDb h qa' case r of - Left _ -> enqueue 0 - Right _ -> putMVar qvar emptyDbQueue - else enqueue sz' + Left _ -> enqueue $ DbQueue sz' lastcommittime qa' + Right _ -> do + now <- getCurrentTime + enqueue $ DbQueue 0 now (return ()) + , enqueue $ DbQueue sz' lastcommittime qa' + ) {- If flushing the queue fails, this could be because there is another - writer to the database. Retry repeatedly for up to 10 seconds. -} flushQueueDb :: DbHandle -> IO () flushQueueDb h@(DbHandle _ _ qvar) = do - DbQueue sz qa <- takeMVar qvar + DbQueue sz _ qa <- takeMVar qvar when (sz > 0) $ robustly Nothing 100 (commitDb h qa) where diff --git a/debian/changelog b/debian/changelog index 579c4bcd2f..bfcd6725a2 100644 --- a/debian/changelog +++ b/debian/changelog @@ -6,6 +6,9 @@ git-annex (5.20150732) UNRELEASED; urgency=medium * fsck: Commit incremental fsck database when --time-limit is reached. Previously, some of the last files fscked did not make it into the database when using --time-limit. + * fsck: Commit incremental fsck database after every 1000 files + fscked, or every 5 minutes, whichever comes first. Previously, + commits were made every 1000 files fscked. -- Joey Hess Fri, 31 Jul 2015 12:31:39 -0400 diff --git a/doc/bugs/incremental___40__continued__41___fsck_start_froms_beginning__44___rechecks_files_already_checked_.mdwn b/doc/bugs/incremental___40__continued__41___fsck_start_froms_beginning__44___rechecks_files_already_checked_.mdwn index 001284821a..8cc989a0b6 100644 --- a/doc/bugs/incremental___40__continued__41___fsck_start_froms_beginning__44___rechecks_files_already_checked_.mdwn +++ b/doc/bugs/incremental___40__continued__41___fsck_start_froms_beginning__44___rechecks_files_already_checked_.mdwn @@ -21,3 +21,14 @@ Also tried with `docker/ubuntu:latest` using a clean install from https://downlo git-annex: 5.20150522-gb199d65 Linux: 3.16.0-43-generic #58-Ubuntu SMP Fri Jun 19 11:04:02 UTC 2015 x86_64 x86_64 x86_64 GNU/Linux +> I've adjusted the timing of the fsck checkpoints used in resumes some. +> Now it makes one every 5 minutes, or every 1000 files, whichever comes +> first. I could make this tunable, but I don't think it's worth adding the +> complexity; this heuristic should work pretty well. +> +> Another approach would be to catch sigint and commit the fsck database +> then, as is now done when --time-limit interrupts a fsck run. +> But, I am leery of complicating git-annex with signal handling, +> so I've not done that currently. +> +> Also, documented this in fsck's man page. [[done]] --[[Joey]] diff --git a/doc/git-annex-fsck.mdwn b/doc/git-annex-fsck.mdwn index 73c401eb35..68c824c91e 100644 --- a/doc/git-annex-fsck.mdwn +++ b/doc/git-annex-fsck.mdwn @@ -37,7 +37,12 @@ With parameters, only the specified files are checked. * `--more` - Continue the last incremental fsck pass, where it left off. + Resume the last incremental fsck pass, where it left off. + + Resuming may redundantly check some files that were checked + before. Any files that fsck found problems with before will be re-checked + on resume. Also, checkpoints are made every 1000 files or every 5 minutes + during a fsck, and it resumes from the last checkpoint. * `--incremental-schedule=time`