2015-02-16 19:08:29 +00:00
|
|
|
{- Sqlite database used for incremental fsck.
|
|
|
|
-
|
|
|
|
- Copyright 2015 Joey Hess <id@joeyh.name>
|
|
|
|
-:
|
|
|
|
- Licensed under the GNU GPL version 3 or higher.
|
|
|
|
-}
|
|
|
|
|
|
|
|
{-# LANGUAGE QuasiQuotes, TypeFamilies, TemplateHaskell #-}
|
|
|
|
{-# LANGUAGE OverloadedStrings, GADTs, FlexibleContexts #-}
|
2015-02-18 21:30:07 +00:00
|
|
|
{-# LANGUAGE MultiParamTypeClasses, GeneralizedNewtypeDeriving #-}
|
2015-02-22 20:57:19 +00:00
|
|
|
{-# LANGUAGE RankNTypes #-}
|
2015-02-16 19:08:29 +00:00
|
|
|
|
|
|
|
module Database.Fsck (
|
2015-02-17 21:08:11 +00:00
|
|
|
FsckHandle,
|
2015-02-16 19:08:29 +00:00
|
|
|
newPass,
|
|
|
|
openDb,
|
2015-02-17 17:04:22 +00:00
|
|
|
closeDb,
|
2015-02-16 19:08:29 +00:00
|
|
|
addDb,
|
|
|
|
inDb,
|
|
|
|
FsckedId,
|
|
|
|
) where
|
|
|
|
|
|
|
|
import Database.Types
|
2015-12-23 18:59:58 +00:00
|
|
|
import qualified Database.Queue as H
|
2017-02-13 21:30:28 +00:00
|
|
|
import Database.Init
|
2016-01-20 20:36:33 +00:00
|
|
|
import Annex.Locations
|
2015-02-18 19:54:24 +00:00
|
|
|
import Utility.Exception
|
2016-01-20 20:36:33 +00:00
|
|
|
import Annex.Common
|
2015-02-17 17:04:22 +00:00
|
|
|
import Annex.LockFile
|
2015-02-16 19:08:29 +00:00
|
|
|
|
|
|
|
import Database.Persist.TH
|
|
|
|
import Database.Esqueleto hiding (Key)
|
2015-07-31 20:42:15 +00:00
|
|
|
import Data.Time.Clock
|
2015-02-16 19:08:29 +00:00
|
|
|
|
2015-12-23 18:59:58 +00:00
|
|
|
data FsckHandle = FsckHandle H.DbQueue UUID
|
2015-02-17 21:08:11 +00:00
|
|
|
|
2015-02-16 19:08:29 +00:00
|
|
|
{- Each key stored in the database has already been fscked as part
|
|
|
|
- of the latest incremental fsck pass. -}
|
|
|
|
share [mkPersist sqlSettings, mkMigrate "migrateFsck"] [persistLowerCase|
|
|
|
|
Fscked
|
|
|
|
key SKey
|
|
|
|
UniqueKey key
|
|
|
|
|]
|
|
|
|
|
2015-02-17 17:04:22 +00:00
|
|
|
{- The database is removed when starting a new incremental fsck pass.
|
|
|
|
-
|
|
|
|
- This may fail, if other fsck processes are currently running using the
|
|
|
|
- database. Removing the database in that situation would lead to crashes
|
2015-04-19 04:38:29 +00:00
|
|
|
- or unknown behavior.
|
2015-02-17 17:04:22 +00:00
|
|
|
-}
|
2015-02-17 21:08:11 +00:00
|
|
|
newPass :: UUID -> Annex Bool
|
|
|
|
newPass u = isJust <$> tryExclusiveLock (gitAnnexFsckDbLock u) go
|
2015-02-17 17:04:22 +00:00
|
|
|
where
|
2015-02-18 19:54:24 +00:00
|
|
|
go = liftIO . void . tryIO . removeDirectoryRecursive
|
|
|
|
=<< fromRepo (gitAnnexFsckDbDir u)
|
2015-02-16 19:08:29 +00:00
|
|
|
|
2015-12-07 17:42:03 +00:00
|
|
|
{- Opens the database, creating it if it doesn't exist yet. -}
|
2015-02-17 21:08:11 +00:00
|
|
|
openDb :: UUID -> Annex FsckHandle
|
|
|
|
openDb u = do
|
2015-02-18 19:54:24 +00:00
|
|
|
dbdir <- fromRepo (gitAnnexFsckDbDir u)
|
|
|
|
let db = dbdir </> "db"
|
2015-02-16 19:08:29 +00:00
|
|
|
unlessM (liftIO $ doesFileExist db) $ do
|
2017-02-13 21:30:28 +00:00
|
|
|
initDb db $ void $
|
|
|
|
runMigrationSilent migrateFsck
|
2015-05-18 20:23:07 +00:00
|
|
|
lockFileCached =<< fromRepo (gitAnnexFsckDbLock u)
|
2015-12-23 18:59:58 +00:00
|
|
|
h <- liftIO $ H.openDbQueue db "fscked"
|
2015-02-17 21:08:11 +00:00
|
|
|
return $ FsckHandle h u
|
2015-02-16 19:08:29 +00:00
|
|
|
|
2015-02-17 21:08:11 +00:00
|
|
|
closeDb :: FsckHandle -> Annex ()
|
|
|
|
closeDb (FsckHandle h u) = do
|
2015-12-23 18:59:58 +00:00
|
|
|
liftIO $ H.closeDbQueue h
|
2015-02-17 21:08:11 +00:00
|
|
|
unlockFile =<< fromRepo (gitAnnexFsckDbLock u)
|
2015-02-17 17:04:22 +00:00
|
|
|
|
2015-02-17 21:08:11 +00:00
|
|
|
addDb :: FsckHandle -> Key -> IO ()
|
2015-07-31 20:42:15 +00:00
|
|
|
addDb (FsckHandle h _) k = H.queueDb h checkcommit $
|
2015-02-22 18:08:26 +00:00
|
|
|
void $ insertUnique $ Fscked sk
|
allow for concurrent incremental fsck processes again (sorta)
Sqlite doesn't support multiple concurrent writers
at all. One of them will fail to write. It's not even possible to have two
processes building up separate transactions at the same time. Before using
sqlite, incremental fsck could work perfectly well with multiple fsck
processes running concurrently. I'd like to keep that working.
My partial solution, so far, is to make git-annex buffer writes, and every
so often send them all to sqlite at once, in a transaction. So most of the
time, nothing is writing to the database. (And if it gets unlucky and
a write fails due to a collision with another writer, it can just wait and
retry the write later.) This lets multiple processes write to the database
successfully.
But, for the purposes of concurrent, incremental fsck, it's not ideal.
Each process doesn't immediately learn of files that another process has
checked. So they'll tend to do redundant work.
Only way I can see to improve this is to use some other mechanism for
short-term IPC between the fsck processes. Not yet done.
----
Also, make addDb check if an item is in the database already, and not try
to re-add it. That fixes an intermittent crash with
"SQLite3 returned ErrorConstraint while attempting to perform step."
I am not 100% sure why; it only started happening when I moved write
buffering into the queue. It seemed to generally happen on the same file
each time, so could just be due to multiple files having the same key.
However, I doubt my sound repo has many duplicate keys, and I suspect
something else is going on.
----
Updated benchmark, with the 1000 item queue: 6m33.808s
2015-02-17 20:39:35 +00:00
|
|
|
where
|
|
|
|
sk = toSKey k
|
2015-02-16 19:08:29 +00:00
|
|
|
|
2015-07-31 20:42:15 +00:00
|
|
|
-- commit queue after 1000 files or 5 minutes, whichever comes first
|
|
|
|
checkcommit sz lastcommittime
|
|
|
|
| sz > 1000 = return True
|
|
|
|
| otherwise = do
|
|
|
|
now <- getCurrentTime
|
|
|
|
return $ diffUTCTime lastcommittime now > 300
|
|
|
|
|
2015-12-23 18:59:58 +00:00
|
|
|
{- Doesn't know about keys that were just added with addDb. -}
|
2015-02-17 21:08:11 +00:00
|
|
|
inDb :: FsckHandle -> Key -> IO Bool
|
2015-12-23 18:59:58 +00:00
|
|
|
inDb (FsckHandle h _) = H.queryDbQueue h . inDb' . toSKey
|
allow for concurrent incremental fsck processes again (sorta)
Sqlite doesn't support multiple concurrent writers
at all. One of them will fail to write. It's not even possible to have two
processes building up separate transactions at the same time. Before using
sqlite, incremental fsck could work perfectly well with multiple fsck
processes running concurrently. I'd like to keep that working.
My partial solution, so far, is to make git-annex buffer writes, and every
so often send them all to sqlite at once, in a transaction. So most of the
time, nothing is writing to the database. (And if it gets unlucky and
a write fails due to a collision with another writer, it can just wait and
retry the write later.) This lets multiple processes write to the database
successfully.
But, for the purposes of concurrent, incremental fsck, it's not ideal.
Each process doesn't immediately learn of files that another process has
checked. So they'll tend to do redundant work.
Only way I can see to improve this is to use some other mechanism for
short-term IPC between the fsck processes. Not yet done.
----
Also, make addDb check if an item is in the database already, and not try
to re-add it. That fixes an intermittent crash with
"SQLite3 returned ErrorConstraint while attempting to perform step."
I am not 100% sure why; it only started happening when I moved write
buffering into the queue. It seemed to generally happen on the same file
each time, so could just be due to multiple files having the same key.
However, I doubt my sound repo has many duplicate keys, and I suspect
something else is going on.
----
Updated benchmark, with the 1000 item queue: 6m33.808s
2015-02-17 20:39:35 +00:00
|
|
|
|
|
|
|
inDb' :: SKey -> SqlPersistM Bool
|
|
|
|
inDb' sk = do
|
2015-02-16 19:08:29 +00:00
|
|
|
r <- select $ from $ \r -> do
|
allow for concurrent incremental fsck processes again (sorta)
Sqlite doesn't support multiple concurrent writers
at all. One of them will fail to write. It's not even possible to have two
processes building up separate transactions at the same time. Before using
sqlite, incremental fsck could work perfectly well with multiple fsck
processes running concurrently. I'd like to keep that working.
My partial solution, so far, is to make git-annex buffer writes, and every
so often send them all to sqlite at once, in a transaction. So most of the
time, nothing is writing to the database. (And if it gets unlucky and
a write fails due to a collision with another writer, it can just wait and
retry the write later.) This lets multiple processes write to the database
successfully.
But, for the purposes of concurrent, incremental fsck, it's not ideal.
Each process doesn't immediately learn of files that another process has
checked. So they'll tend to do redundant work.
Only way I can see to improve this is to use some other mechanism for
short-term IPC between the fsck processes. Not yet done.
----
Also, make addDb check if an item is in the database already, and not try
to re-add it. That fixes an intermittent crash with
"SQLite3 returned ErrorConstraint while attempting to perform step."
I am not 100% sure why; it only started happening when I moved write
buffering into the queue. It seemed to generally happen on the same file
each time, so could just be due to multiple files having the same key.
However, I doubt my sound repo has many duplicate keys, and I suspect
something else is going on.
----
Updated benchmark, with the 1000 item queue: 6m33.808s
2015-02-17 20:39:35 +00:00
|
|
|
where_ (r ^. FsckedKey ==. val sk)
|
2015-02-16 19:08:29 +00:00
|
|
|
return (r ^. FsckedKey)
|
|
|
|
return $ not $ null r
|