fsck: Commit incremental fsck database after every 1000 files fscked, or every 5 minutes, whichever comes first.
Previously, commits were made every 1000 files fscked. Also, improve docs
This commit is contained in:
parent
9dfe03dbcd
commit
bc4129cc77
5 changed files with 59 additions and 20 deletions
|
@ -34,6 +34,7 @@ import Annex.LockFile
|
|||
|
||||
import Database.Persist.TH
|
||||
import Database.Esqueleto hiding (Key)
|
||||
import Data.Time.Clock
|
||||
|
||||
data FsckHandle = FsckHandle H.DbHandle UUID
|
||||
|
||||
|
@ -84,11 +85,18 @@ closeDb (FsckHandle h u) = do
|
|||
unlockFile =<< fromRepo (gitAnnexFsckDbLock u)
|
||||
|
||||
addDb :: FsckHandle -> Key -> IO ()
|
||||
addDb (FsckHandle h _) k = H.queueDb h 1000 $
|
||||
addDb (FsckHandle h _) k = H.queueDb h checkcommit $
|
||||
void $ insertUnique $ Fscked sk
|
||||
where
|
||||
sk = toSKey k
|
||||
|
||||
-- commit queue after 1000 files or 5 minutes, whichever comes first
|
||||
checkcommit sz lastcommittime
|
||||
| sz > 1000 = return True
|
||||
| otherwise = do
|
||||
now <- getCurrentTime
|
||||
return $ diffUTCTime lastcommittime now > 300
|
||||
|
||||
inDb :: FsckHandle -> Key -> IO Bool
|
||||
inDb (FsckHandle h _) = H.queryDb h . inDb' . toSKey
|
||||
|
||||
|
|
|
@ -20,6 +20,7 @@ module Database.Handle (
|
|||
) where
|
||||
|
||||
import Utility.Exception
|
||||
import Utility.Monad
|
||||
import Messages
|
||||
|
||||
import Database.Persist.Sqlite
|
||||
|
@ -33,6 +34,7 @@ import qualified Data.Text as T
|
|||
import Control.Monad.Trans.Resource (runResourceT)
|
||||
import Control.Monad.Logger (runNoLoggingT)
|
||||
import Data.List
|
||||
import Data.Time.Clock
|
||||
|
||||
{- A DbHandle is a reference to a worker thread that communicates with
|
||||
- the database. It has a MVar which Jobs are submitted to. -}
|
||||
|
@ -64,7 +66,7 @@ openDb :: FilePath -> TableName -> IO DbHandle
|
|||
openDb db tablename = do
|
||||
jobs <- newEmptyMVar
|
||||
worker <- async (workerThread (T.pack db) tablename jobs)
|
||||
q <- newMVar emptyDbQueue
|
||||
q <- newMVar =<< emptyDbQueue
|
||||
return $ DbHandle worker jobs q
|
||||
|
||||
data Job
|
||||
|
@ -145,16 +147,19 @@ closeDb h@(DbHandle worker jobs _) = do
|
|||
|
||||
type Size = Int
|
||||
|
||||
{- A queue of actions to perform, with a count of the number of actions
|
||||
- queued. -}
|
||||
data DbQueue = DbQueue Size (SqlPersistM ())
|
||||
type LastCommitTime = UTCTime
|
||||
|
||||
emptyDbQueue :: DbQueue
|
||||
emptyDbQueue = DbQueue 0 (return ())
|
||||
{- A queue of actions to perform, with a count of the number of actions
|
||||
- queued, and a last commit time. -}
|
||||
data DbQueue = DbQueue Size LastCommitTime (SqlPersistM ())
|
||||
|
||||
emptyDbQueue :: IO DbQueue
|
||||
emptyDbQueue = do
|
||||
now <- getCurrentTime
|
||||
return $ DbQueue 0 now (return ())
|
||||
|
||||
{- Queues a change to be made to the database. It will be buffered
|
||||
- to be committed later, unless the queue gets larger than the specified
|
||||
- size.
|
||||
- to be committed later, unless the commitchecker action returns true.
|
||||
-
|
||||
- (Be sure to call closeDb or flushQueueDb to ensure the change
|
||||
- gets committed.)
|
||||
|
@ -164,25 +169,32 @@ emptyDbQueue = DbQueue 0 (return ())
|
|||
- process, the transaction is put back in the queue. This solves
|
||||
- the sqlite multiple writer problem.
|
||||
-}
|
||||
queueDb :: DbHandle -> Size -> SqlPersistM () -> IO ()
|
||||
queueDb h@(DbHandle _ _ qvar) maxsz a = do
|
||||
DbQueue sz qa <- takeMVar qvar
|
||||
queueDb
|
||||
:: DbHandle
|
||||
-> (Size -> LastCommitTime -> IO Bool)
|
||||
-> SqlPersistM ()
|
||||
-> IO ()
|
||||
queueDb h@(DbHandle _ _ qvar) commitchecker a = do
|
||||
DbQueue sz lastcommittime qa <- takeMVar qvar
|
||||
let !sz' = sz + 1
|
||||
let qa' = qa >> a
|
||||
let enqueue newsz = putMVar qvar (DbQueue newsz qa')
|
||||
if sz' > maxsz
|
||||
then do
|
||||
let enqueue = putMVar qvar
|
||||
ifM (commitchecker sz' lastcommittime)
|
||||
( do
|
||||
r <- commitDb h qa'
|
||||
case r of
|
||||
Left _ -> enqueue 0
|
||||
Right _ -> putMVar qvar emptyDbQueue
|
||||
else enqueue sz'
|
||||
Left _ -> enqueue $ DbQueue sz' lastcommittime qa'
|
||||
Right _ -> do
|
||||
now <- getCurrentTime
|
||||
enqueue $ DbQueue 0 now (return ())
|
||||
, enqueue $ DbQueue sz' lastcommittime qa'
|
||||
)
|
||||
|
||||
{- If flushing the queue fails, this could be because there is another
|
||||
- writer to the database. Retry repeatedly for up to 10 seconds. -}
|
||||
flushQueueDb :: DbHandle -> IO ()
|
||||
flushQueueDb h@(DbHandle _ _ qvar) = do
|
||||
DbQueue sz qa <- takeMVar qvar
|
||||
DbQueue sz _ qa <- takeMVar qvar
|
||||
when (sz > 0) $
|
||||
robustly Nothing 100 (commitDb h qa)
|
||||
where
|
||||
|
|
3
debian/changelog
vendored
3
debian/changelog
vendored
|
@ -6,6 +6,9 @@ git-annex (5.20150732) UNRELEASED; urgency=medium
|
|||
* fsck: Commit incremental fsck database when --time-limit is reached.
|
||||
Previously, some of the last files fscked did not make it into the
|
||||
database when using --time-limit.
|
||||
* fsck: Commit incremental fsck database after every 1000 files
|
||||
fscked, or every 5 minutes, whichever comes first. Previously,
|
||||
commits were made every 1000 files fscked.
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Fri, 31 Jul 2015 12:31:39 -0400
|
||||
|
||||
|
|
|
@ -21,3 +21,14 @@ Also tried with `docker/ubuntu:latest` using a clean install from https://downlo
|
|||
git-annex: 5.20150522-gb199d65
|
||||
Linux: 3.16.0-43-generic #58-Ubuntu SMP Fri Jun 19 11:04:02 UTC 2015 x86_64 x86_64 x86_64 GNU/Linux
|
||||
|
||||
> I've adjusted the timing of the fsck checkpoints used in resumes some.
|
||||
> Now it makes one every 5 minutes, or every 1000 files, whichever comes
|
||||
> first. I could make this tunable, but I don't think it's worth adding the
|
||||
> complexity; this heuristic should work pretty well.
|
||||
>
|
||||
> Another approach would be to catch sigint and commit the fsck database
|
||||
> then, as is now done when --time-limit interrupts a fsck run.
|
||||
> But, I am leery of complicating git-annex with signal handling,
|
||||
> so I've not done that currently.
|
||||
>
|
||||
> Also, documented this in fsck's man page. [[done]] --[[Joey]]
|
||||
|
|
|
@ -37,7 +37,12 @@ With parameters, only the specified files are checked.
|
|||
|
||||
* `--more`
|
||||
|
||||
Continue the last incremental fsck pass, where it left off.
|
||||
Resume the last incremental fsck pass, where it left off.
|
||||
|
||||
Resuming may redundantly check some files that were checked
|
||||
before. Any files that fsck found problems with before will be re-checked
|
||||
on resume. Also, checkpoints are made every 1000 files or every 5 minutes
|
||||
during a fsck, and it resumes from the last checkpoint.
|
||||
|
||||
* `--incremental-schedule=time`
|
||||
|
||||
|
|
Loading…
Reference in a new issue