fsck: Commit incremental fsck database after every 1000 files fscked, or every 5 minutes, whichever comes first.

Previously, commits were made every 1000 files fscked. Also, improve docs
2015-07-31 16:42:15 -04:00 · 2015-07-31 16:42:15 -04:00 · bc4129cc77
commit bc4129cc77
parent 9dfe03dbcd
5 changed files with 59 additions and 20 deletions
--- a/Database/Fsck.hs
+++ b/Database/Fsck.hs
@ -34,6 +34,7 @@ import Annex.LockFile

 import Database.Persist.TH
 import Database.Esqueleto hiding (Key)
+import Data.Time.Clock

 data FsckHandle = FsckHandle H.DbHandle UUID

@ -84,11 +85,18 @@ closeDb (FsckHandle h u) = do
 	unlockFile =<< fromRepo (gitAnnexFsckDbLock u)

 addDb :: FsckHandle -> Key -> IO ()
-addDb (FsckHandle h _) k = H.queueDb h 1000 $ 
+addDb (FsckHandle h _) k = H.queueDb h checkcommit $ 
 	void $ insertUnique $ Fscked sk
  where
 	sk = toSKey k

+	-- commit queue after 1000 files or 5 minutes, whichever comes first
+	checkcommit sz lastcommittime
+		| sz > 1000 = return True
+		| otherwise = do
+			now <- getCurrentTime
+			return $ diffUTCTime lastcommittime now > 300
+
 inDb :: FsckHandle -> Key -> IO Bool
 inDb (FsckHandle h _) = H.queryDb h . inDb' . toSKey

--- a/Database/Handle.hs
+++ b/Database/Handle.hs
@ -20,6 +20,7 @@ module Database.Handle (
 ) where

 import Utility.Exception
+import Utility.Monad
 import Messages

 import Database.Persist.Sqlite
@ -33,6 +34,7 @@ import qualified Data.Text as T
 import Control.Monad.Trans.Resource (runResourceT)
 import Control.Monad.Logger (runNoLoggingT)
 import Data.List
+import Data.Time.Clock

 {- A DbHandle is a reference to a worker thread that communicates with
 - the database. It has a MVar which Jobs are submitted to. -}
@ -64,7 +66,7 @@ openDb :: FilePath -> TableName -> IO DbHandle
 openDb db tablename = do
 	jobs <- newEmptyMVar
 	worker <- async (workerThread (T.pack db) tablename jobs)
-	q <- newMVar emptyDbQueue
+	q <- newMVar =<< emptyDbQueue
 	return $ DbHandle worker jobs q

 data Job
@ -145,16 +147,19 @@ closeDb h@(DbHandle worker jobs _) = do

 type Size = Int

-{- A queue of actions to perform, with a count of the number of actions
- - queued. -}
-data DbQueue = DbQueue Size (SqlPersistM ())
+type LastCommitTime = UTCTime

-emptyDbQueue :: DbQueue
-emptyDbQueue = DbQueue 0 (return ())
+{- A queue of actions to perform, with a count of the number of actions
+ - queued, and a last commit time. -}
+data DbQueue = DbQueue Size LastCommitTime (SqlPersistM ())
+
+emptyDbQueue :: IO DbQueue
+emptyDbQueue = do
+	now <- getCurrentTime
+	return $ DbQueue 0 now (return ())

 {- Queues a change to be made to the database. It will be buffered
- - to be committed later, unless the queue gets larger than the specified
- - size.
+ - to be committed later, unless the commitchecker action returns true.
 -
 - (Be sure to call closeDb or flushQueueDb to ensure the change
 - gets committed.)
@ -164,25 +169,32 @@ emptyDbQueue = DbQueue 0 (return ())
 - process, the transaction is put back in the queue. This solves
 - the sqlite multiple writer problem.
 -}
-queueDb :: DbHandle -> Size -> SqlPersistM () -> IO ()
-queueDb h@(DbHandle _ _ qvar) maxsz a = do
-	DbQueue sz qa <- takeMVar qvar
+queueDb 
+	:: DbHandle
+	-> (Size -> LastCommitTime -> IO Bool) 
+	-> SqlPersistM ()
+	-> IO ()
+queueDb h@(DbHandle _ _ qvar) commitchecker a = do
+	DbQueue sz lastcommittime qa <- takeMVar qvar
 	let !sz' = sz + 1
 	let qa' = qa >> a
-	let enqueue newsz = putMVar qvar (DbQueue newsz qa')
-	if sz' > maxsz
-		then do
+	let enqueue = putMVar qvar
+	ifM (commitchecker sz' lastcommittime)
+		( do
 			r <- commitDb h qa'
 			case r of
-				Left _ -> enqueue 0
-				Right _ -> putMVar qvar emptyDbQueue
-		else enqueue sz'
+				Left _ -> enqueue $ DbQueue sz' lastcommittime qa'
+				Right _ -> do
+					now <- getCurrentTime
+					enqueue $ DbQueue 0 now (return ())
+		, enqueue $ DbQueue sz' lastcommittime qa'
+		)

 {- If flushing the queue fails, this could be because there is another
 - writer to the database. Retry repeatedly for up to 10 seconds. -}
 flushQueueDb :: DbHandle -> IO ()
 flushQueueDb h@(DbHandle _ _ qvar) = do
-	DbQueue sz qa <- takeMVar qvar	
+	DbQueue sz _ qa <- takeMVar qvar	
 	when (sz > 0) $
 		robustly Nothing 100 (commitDb h qa)
  where
--- a/debian/changelog
+++ b/debian/changelog
@ -6,6 +6,9 @@ git-annex (5.20150732) UNRELEASED; urgency=medium
  * fsck: Commit incremental fsck database when --time-limit is reached.
    Previously, some of the last files fscked did not make it into the
    database when using --time-limit.
+  * fsck: Commit incremental fsck database after every 1000 files
+    fscked, or every 5 minutes, whichever comes first. Previously,
+    commits were made every 1000 files fscked.

 -- Joey Hess <id@joeyh.name>  Fri, 31 Jul 2015 12:31:39 -0400

--- a/doc/bugs/incremental_40continued41_fsck_start_froms_beginning44_rechecks_files_already_checked_.mdwn
+++ b/doc/bugs/incremental_40continued41_fsck_start_froms_beginning44_rechecks_files_already_checked_.mdwn
@ -21,3 +21,14 @@ Also tried with `docker/ubuntu:latest` using a clean install from https://downlo
 git-annex: 5.20150522-gb199d65 
 Linux: 3.16.0-43-generic #58-Ubuntu SMP Fri Jun 19 11:04:02 UTC 2015 x86_64 x86_64 x86_64 GNU/Linux 

+> I've adjusted the timing of the fsck checkpoints used in resumes some.
+> Now it makes one every 5 minutes, or every 1000 files, whichever comes
+> first. I could make this tunable, but I don't think it's worth adding the
+> complexity; this heuristic should work pretty well.
+> 
+> Another approach would be to catch sigint and commit the fsck database
+> then, as is now done when --time-limit interrupts a fsck run.
+> But, I am leery of complicating git-annex with signal handling,
+> so I've not done that currently.
+> 
+> Also, documented this in fsck's man page. [[done]] --[[Joey]] 
--- a/doc/git-annex-fsck.mdwn
+++ b/doc/git-annex-fsck.mdwn
@ -37,7 +37,12 @@ With parameters, only the specified files are checked.

 * `--more`

-  Continue the last incremental fsck pass, where it left off.
+  Resume the last incremental fsck pass, where it left off.
+
+  Resuming may redundantly check some files that were checked
+  before. Any files that fsck found problems with before will be re-checked
+  on resume. Also, checkpoints are made every 1000 files or every 5 minutes
+  during a fsck, and it resumes from the last checkpoint.

 * `--incremental-schedule=time`