2015-02-16 19:08:29 +00:00
|
|
|
{- Sqlite database used for incremental fsck.
|
|
|
|
-
|
|
|
|
- Copyright 2015 Joey Hess <id@joeyh.name>
|
|
|
|
-:
|
|
|
|
- Licensed under the GNU GPL version 3 or higher.
|
|
|
|
-}
|
|
|
|
|
|
|
|
{-# LANGUAGE QuasiQuotes, TypeFamilies, TemplateHaskell #-}
|
|
|
|
{-# LANGUAGE OverloadedStrings, GADTs, FlexibleContexts #-}
|
|
|
|
|
|
|
|
module Database.Fsck (
|
2015-02-17 21:08:11 +00:00
|
|
|
FsckHandle,
|
2015-02-16 19:08:29 +00:00
|
|
|
newPass,
|
|
|
|
openDb,
|
2015-02-17 17:04:22 +00:00
|
|
|
closeDb,
|
2015-02-16 19:08:29 +00:00
|
|
|
addDb,
|
|
|
|
inDb,
|
|
|
|
FsckedId,
|
|
|
|
) where
|
|
|
|
|
|
|
|
import Database.Types
|
|
|
|
import qualified Database.Handle as H
|
|
|
|
import Locations
|
2015-02-18 19:54:24 +00:00
|
|
|
import Utility.PosixFiles
|
|
|
|
import Utility.Exception
|
2015-02-16 19:08:29 +00:00
|
|
|
import Annex
|
|
|
|
import Types.Key
|
2015-02-17 21:08:11 +00:00
|
|
|
import Types.UUID
|
2015-02-16 19:08:29 +00:00
|
|
|
import Annex.Perms
|
2015-02-17 17:04:22 +00:00
|
|
|
import Annex.LockFile
|
2015-02-16 19:08:29 +00:00
|
|
|
|
|
|
|
import Database.Persist.TH
|
|
|
|
import Database.Esqueleto hiding (Key)
|
|
|
|
import Control.Monad
|
|
|
|
import Control.Monad.IfElse
|
|
|
|
import Control.Monad.IO.Class (liftIO)
|
|
|
|
import System.Directory
|
2015-02-18 19:54:24 +00:00
|
|
|
import System.FilePath
|
2015-02-17 17:04:22 +00:00
|
|
|
import Data.Maybe
|
|
|
|
import Control.Applicative
|
2015-02-16 19:08:29 +00:00
|
|
|
|
2015-02-17 21:08:11 +00:00
|
|
|
data FsckHandle = FsckHandle H.DbHandle UUID
|
|
|
|
|
2015-02-16 19:08:29 +00:00
|
|
|
{- Each key stored in the database has already been fscked as part
|
|
|
|
- of the latest incremental fsck pass. -}
|
|
|
|
share [mkPersist sqlSettings, mkMigrate "migrateFsck"] [persistLowerCase|
|
|
|
|
Fscked
|
|
|
|
key SKey
|
|
|
|
UniqueKey key
|
|
|
|
|]
|
|
|
|
|
2015-02-17 17:04:22 +00:00
|
|
|
{- The database is removed when starting a new incremental fsck pass.
|
|
|
|
-
|
|
|
|
- This may fail, if other fsck processes are currently running using the
|
|
|
|
- database. Removing the database in that situation would lead to crashes
|
|
|
|
- or undefined behavior.
|
|
|
|
-}
|
2015-02-17 21:08:11 +00:00
|
|
|
newPass :: UUID -> Annex Bool
|
|
|
|
newPass u = isJust <$> tryExclusiveLock (gitAnnexFsckDbLock u) go
|
2015-02-17 17:04:22 +00:00
|
|
|
where
|
2015-02-18 19:54:24 +00:00
|
|
|
go = liftIO . void . tryIO . removeDirectoryRecursive
|
|
|
|
=<< fromRepo (gitAnnexFsckDbDir u)
|
2015-02-16 19:08:29 +00:00
|
|
|
|
|
|
|
{- Opens the database, creating it atomically if it doesn't exist yet. -}
|
2015-02-17 21:08:11 +00:00
|
|
|
openDb :: UUID -> Annex FsckHandle
|
|
|
|
openDb u = do
|
2015-02-18 19:54:24 +00:00
|
|
|
dbdir <- fromRepo (gitAnnexFsckDbDir u)
|
|
|
|
let db = dbdir </> "db"
|
2015-02-16 19:08:29 +00:00
|
|
|
unlessM (liftIO $ doesFileExist db) $ do
|
2015-02-18 19:54:24 +00:00
|
|
|
let tmpdbdir = dbdir ++ ".tmp"
|
|
|
|
let tmpdb = tmpdbdir </> "db"
|
|
|
|
liftIO $ do
|
|
|
|
createDirectoryIfMissing True tmpdbdir
|
|
|
|
h <- H.initDb tmpdb $ void $
|
|
|
|
runMigrationSilent migrateFsck
|
|
|
|
H.closeDb h
|
|
|
|
setAnnexDirPerm tmpdbdir
|
|
|
|
setAnnexFilePerm tmpdb
|
|
|
|
liftIO $ do
|
|
|
|
void $ tryIO $ removeDirectoryRecursive dbdir
|
|
|
|
rename tmpdbdir dbdir
|
2015-02-17 21:08:11 +00:00
|
|
|
lockFileShared =<< fromRepo (gitAnnexFsckDbLock u)
|
|
|
|
h <- liftIO $ H.openDb db
|
|
|
|
return $ FsckHandle h u
|
2015-02-16 19:08:29 +00:00
|
|
|
|
2015-02-17 21:08:11 +00:00
|
|
|
closeDb :: FsckHandle -> Annex ()
|
|
|
|
closeDb (FsckHandle h u) = do
|
2015-02-17 17:04:22 +00:00
|
|
|
liftIO $ H.closeDb h
|
2015-02-17 21:08:11 +00:00
|
|
|
unlockFile =<< fromRepo (gitAnnexFsckDbLock u)
|
2015-02-17 17:04:22 +00:00
|
|
|
|
2015-02-17 21:08:11 +00:00
|
|
|
addDb :: FsckHandle -> Key -> IO ()
|
|
|
|
addDb (FsckHandle h _) k = H.queueDb h 1000 $
|
allow for concurrent incremental fsck processes again (sorta)
Sqlite doesn't support multiple concurrent writers
at all. One of them will fail to write. It's not even possible to have two
processes building up separate transactions at the same time. Before using
sqlite, incremental fsck could work perfectly well with multiple fsck
processes running concurrently. I'd like to keep that working.
My partial solution, so far, is to make git-annex buffer writes, and every
so often send them all to sqlite at once, in a transaction. So most of the
time, nothing is writing to the database. (And if it gets unlucky and
a write fails due to a collision with another writer, it can just wait and
retry the write later.) This lets multiple processes write to the database
successfully.
But, for the purposes of concurrent, incremental fsck, it's not ideal.
Each process doesn't immediately learn of files that another process has
checked. So they'll tend to do redundant work.
Only way I can see to improve this is to use some other mechanism for
short-term IPC between the fsck processes. Not yet done.
----
Also, make addDb check if an item is in the database already, and not try
to re-add it. That fixes an intermittent crash with
"SQLite3 returned ErrorConstraint while attempting to perform step."
I am not 100% sure why; it only started happening when I moved write
buffering into the queue. It seemed to generally happen on the same file
each time, so could just be due to multiple files having the same key.
However, I doubt my sound repo has many duplicate keys, and I suspect
something else is going on.
----
Updated benchmark, with the 1000 item queue: 6m33.808s
2015-02-17 20:39:35 +00:00
|
|
|
unlessM (inDb' sk) $
|
|
|
|
insert_ $ Fscked sk
|
|
|
|
where
|
|
|
|
sk = toSKey k
|
2015-02-16 19:08:29 +00:00
|
|
|
|
2015-02-17 21:08:11 +00:00
|
|
|
inDb :: FsckHandle -> Key -> IO Bool
|
2015-02-18 18:11:27 +00:00
|
|
|
inDb (FsckHandle h _) = H.queryDb h . inDb' . toSKey
|
allow for concurrent incremental fsck processes again (sorta)
Sqlite doesn't support multiple concurrent writers
at all. One of them will fail to write. It's not even possible to have two
processes building up separate transactions at the same time. Before using
sqlite, incremental fsck could work perfectly well with multiple fsck
processes running concurrently. I'd like to keep that working.
My partial solution, so far, is to make git-annex buffer writes, and every
so often send them all to sqlite at once, in a transaction. So most of the
time, nothing is writing to the database. (And if it gets unlucky and
a write fails due to a collision with another writer, it can just wait and
retry the write later.) This lets multiple processes write to the database
successfully.
But, for the purposes of concurrent, incremental fsck, it's not ideal.
Each process doesn't immediately learn of files that another process has
checked. So they'll tend to do redundant work.
Only way I can see to improve this is to use some other mechanism for
short-term IPC between the fsck processes. Not yet done.
----
Also, make addDb check if an item is in the database already, and not try
to re-add it. That fixes an intermittent crash with
"SQLite3 returned ErrorConstraint while attempting to perform step."
I am not 100% sure why; it only started happening when I moved write
buffering into the queue. It seemed to generally happen on the same file
each time, so could just be due to multiple files having the same key.
However, I doubt my sound repo has many duplicate keys, and I suspect
something else is going on.
----
Updated benchmark, with the 1000 item queue: 6m33.808s
2015-02-17 20:39:35 +00:00
|
|
|
|
|
|
|
inDb' :: SKey -> SqlPersistM Bool
|
|
|
|
inDb' sk = do
|
2015-02-16 19:08:29 +00:00
|
|
|
r <- select $ from $ \r -> do
|
allow for concurrent incremental fsck processes again (sorta)
Sqlite doesn't support multiple concurrent writers
at all. One of them will fail to write. It's not even possible to have two
processes building up separate transactions at the same time. Before using
sqlite, incremental fsck could work perfectly well with multiple fsck
processes running concurrently. I'd like to keep that working.
My partial solution, so far, is to make git-annex buffer writes, and every
so often send them all to sqlite at once, in a transaction. So most of the
time, nothing is writing to the database. (And if it gets unlucky and
a write fails due to a collision with another writer, it can just wait and
retry the write later.) This lets multiple processes write to the database
successfully.
But, for the purposes of concurrent, incremental fsck, it's not ideal.
Each process doesn't immediately learn of files that another process has
checked. So they'll tend to do redundant work.
Only way I can see to improve this is to use some other mechanism for
short-term IPC between the fsck processes. Not yet done.
----
Also, make addDb check if an item is in the database already, and not try
to re-add it. That fixes an intermittent crash with
"SQLite3 returned ErrorConstraint while attempting to perform step."
I am not 100% sure why; it only started happening when I moved write
buffering into the queue. It seemed to generally happen on the same file
each time, so could just be due to multiple files having the same key.
However, I doubt my sound repo has many duplicate keys, and I suspect
something else is going on.
----
Updated benchmark, with the 1000 item queue: 6m33.808s
2015-02-17 20:39:35 +00:00
|
|
|
where_ (r ^. FsckedKey ==. val sk)
|
2015-02-16 19:08:29 +00:00
|
|
|
return (r ^. FsckedKey)
|
|
|
|
return $ not $ null r
|