2015-02-16 19:08:29 +00:00
|
|
|
{- Sqlite database used for incremental fsck.
|
|
|
|
-
|
2019-10-29 16:28:01 +00:00
|
|
|
- Copyright 2015-2019 Joey Hess <id@joeyh.name>
|
2015-02-16 19:08:29 +00:00
|
|
|
-:
|
2019-03-13 19:48:14 +00:00
|
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
2015-02-16 19:08:29 +00:00
|
|
|
-}
|
|
|
|
|
2020-02-04 17:53:00 +00:00
|
|
|
{-# LANGUAGE CPP #-}
|
2015-02-16 19:08:29 +00:00
|
|
|
{-# LANGUAGE QuasiQuotes, TypeFamilies, TemplateHaskell #-}
|
|
|
|
{-# LANGUAGE OverloadedStrings, GADTs, FlexibleContexts #-}
|
2015-02-18 21:30:07 +00:00
|
|
|
{-# LANGUAGE MultiParamTypeClasses, GeneralizedNewtypeDeriving #-}
|
2020-11-07 18:09:17 +00:00
|
|
|
{-# LANGUAGE DataKinds, FlexibleInstances #-}
|
2015-02-22 20:57:19 +00:00
|
|
|
{-# LANGUAGE RankNTypes #-}
|
2019-07-30 16:49:37 +00:00
|
|
|
{-# LANGUAGE UndecidableInstances #-}
|
2020-02-04 17:53:00 +00:00
|
|
|
#if MIN_VERSION_persistent_template(2,8,0)
|
2020-02-04 16:03:30 +00:00
|
|
|
{-# LANGUAGE DerivingStrategies #-}
|
|
|
|
{-# LANGUAGE StandaloneDeriving #-}
|
2020-02-04 17:53:00 +00:00
|
|
|
#endif
|
2015-02-16 19:08:29 +00:00
|
|
|
|
|
|
|
module Database.Fsck (
|
2015-02-17 21:08:11 +00:00
|
|
|
FsckHandle,
|
2015-02-16 19:08:29 +00:00
|
|
|
newPass,
|
|
|
|
openDb,
|
2015-02-17 17:04:22 +00:00
|
|
|
closeDb,
|
2015-02-16 19:08:29 +00:00
|
|
|
addDb,
|
|
|
|
inDb,
|
|
|
|
FsckedId,
|
|
|
|
) where
|
|
|
|
|
|
|
|
import Database.Types
|
2015-12-23 18:59:58 +00:00
|
|
|
import qualified Database.Queue as H
|
2017-02-13 21:30:28 +00:00
|
|
|
import Database.Init
|
2016-01-20 20:36:33 +00:00
|
|
|
import Annex.Locations
|
2015-02-18 19:54:24 +00:00
|
|
|
import Utility.Exception
|
2016-01-20 20:36:33 +00:00
|
|
|
import Annex.Common
|
2015-02-17 17:04:22 +00:00
|
|
|
import Annex.LockFile
|
2020-11-05 22:45:37 +00:00
|
|
|
import qualified Utility.RawFilePath as R
|
2015-02-16 19:08:29 +00:00
|
|
|
|
2018-11-04 20:46:39 +00:00
|
|
|
import Database.Persist.Sql hiding (Key)
|
2015-02-16 19:08:29 +00:00
|
|
|
import Database.Persist.TH
|
2015-07-31 20:42:15 +00:00
|
|
|
import Data.Time.Clock
|
2020-11-05 22:45:37 +00:00
|
|
|
import qualified System.FilePath.ByteString as P
|
2015-02-16 19:08:29 +00:00
|
|
|
|
2015-12-23 18:59:58 +00:00
|
|
|
data FsckHandle = FsckHandle H.DbQueue UUID
|
2015-02-17 21:08:11 +00:00
|
|
|
|
2015-02-16 19:08:29 +00:00
|
|
|
{- Each key stored in the database has already been fscked as part
|
|
|
|
- of the latest incremental fsck pass. -}
|
|
|
|
share [mkPersist sqlSettings, mkMigrate "migrateFsck"] [persistLowerCase|
|
|
|
|
Fscked
|
2019-10-29 16:28:01 +00:00
|
|
|
key Key
|
2019-10-30 17:40:29 +00:00
|
|
|
FsckedKeyIndex key
|
2015-02-16 19:08:29 +00:00
|
|
|
|]
|
|
|
|
|
2015-02-17 17:04:22 +00:00
|
|
|
{- The database is removed when starting a new incremental fsck pass.
|
2019-11-06 20:27:25 +00:00
|
|
|
-
|
|
|
|
- (The old fsck database used before v8 is also removed here.)
|
2015-02-17 17:04:22 +00:00
|
|
|
-
|
|
|
|
- This may fail, if other fsck processes are currently running using the
|
|
|
|
- database. Removing the database in that situation would lead to crashes
|
2015-04-19 04:38:29 +00:00
|
|
|
- or unknown behavior.
|
2015-02-17 17:04:22 +00:00
|
|
|
-}
|
2015-02-17 21:08:11 +00:00
|
|
|
newPass :: UUID -> Annex Bool
|
|
|
|
newPass u = isJust <$> tryExclusiveLock (gitAnnexFsckDbLock u) go
|
2015-02-17 17:04:22 +00:00
|
|
|
where
|
2019-11-06 20:27:25 +00:00
|
|
|
go = do
|
|
|
|
removedb =<< fromRepo (gitAnnexFsckDbDir u)
|
|
|
|
removedb =<< fromRepo (gitAnnexFsckDbDirOld u)
|
2020-10-29 16:02:46 +00:00
|
|
|
removedb = liftIO . void . tryIO . removeDirectoryRecursive . fromRawFilePath
|
2015-02-16 19:08:29 +00:00
|
|
|
|
2015-12-07 17:42:03 +00:00
|
|
|
{- Opens the database, creating it if it doesn't exist yet. -}
|
2015-02-17 21:08:11 +00:00
|
|
|
openDb :: UUID -> Annex FsckHandle
|
|
|
|
openDb u = do
|
2015-02-18 19:54:24 +00:00
|
|
|
dbdir <- fromRepo (gitAnnexFsckDbDir u)
|
2020-11-05 22:45:37 +00:00
|
|
|
let db = dbdir P.</> "db"
|
|
|
|
unlessM (liftIO $ R.doesPathExist db) $ do
|
2017-02-13 21:30:28 +00:00
|
|
|
initDb db $ void $
|
|
|
|
runMigrationSilent migrateFsck
|
2015-05-18 20:23:07 +00:00
|
|
|
lockFileCached =<< fromRepo (gitAnnexFsckDbLock u)
|
2017-09-06 21:07:49 +00:00
|
|
|
h <- liftIO $ H.openDbQueue H.MultiWriter db "fscked"
|
2015-02-17 21:08:11 +00:00
|
|
|
return $ FsckHandle h u
|
2015-02-16 19:08:29 +00:00
|
|
|
|
2015-02-17 21:08:11 +00:00
|
|
|
closeDb :: FsckHandle -> Annex ()
|
|
|
|
closeDb (FsckHandle h u) = do
|
2015-12-23 18:59:58 +00:00
|
|
|
liftIO $ H.closeDbQueue h
|
2015-02-17 21:08:11 +00:00
|
|
|
unlockFile =<< fromRepo (gitAnnexFsckDbLock u)
|
2015-02-17 17:04:22 +00:00
|
|
|
|
2015-02-17 21:08:11 +00:00
|
|
|
addDb :: FsckHandle -> Key -> IO ()
|
2018-11-04 20:46:39 +00:00
|
|
|
addDb (FsckHandle h _) k = H.queueDb h checkcommit $
|
2019-10-29 16:28:01 +00:00
|
|
|
void $ insertUnique $ Fscked k
|
allow for concurrent incremental fsck processes again (sorta)
Sqlite doesn't support multiple concurrent writers
at all. One of them will fail to write. It's not even possible to have two
processes building up separate transactions at the same time. Before using
sqlite, incremental fsck could work perfectly well with multiple fsck
processes running concurrently. I'd like to keep that working.
My partial solution, so far, is to make git-annex buffer writes, and every
so often send them all to sqlite at once, in a transaction. So most of the
time, nothing is writing to the database. (And if it gets unlucky and
a write fails due to a collision with another writer, it can just wait and
retry the write later.) This lets multiple processes write to the database
successfully.
But, for the purposes of concurrent, incremental fsck, it's not ideal.
Each process doesn't immediately learn of files that another process has
checked. So they'll tend to do redundant work.
Only way I can see to improve this is to use some other mechanism for
short-term IPC between the fsck processes. Not yet done.
----
Also, make addDb check if an item is in the database already, and not try
to re-add it. That fixes an intermittent crash with
"SQLite3 returned ErrorConstraint while attempting to perform step."
I am not 100% sure why; it only started happening when I moved write
buffering into the queue. It seemed to generally happen on the same file
each time, so could just be due to multiple files having the same key.
However, I doubt my sound repo has many duplicate keys, and I suspect
something else is going on.
----
Updated benchmark, with the 1000 item queue: 6m33.808s
2015-02-17 20:39:35 +00:00
|
|
|
where
|
2021-05-31 18:56:14 +00:00
|
|
|
-- Commit queue after 1000 changes or 5 minutes, whichever comes first.
|
|
|
|
-- The time based commit allows for an incremental fsck to be
|
|
|
|
-- interrupted and not lose much work.
|
2015-07-31 20:42:15 +00:00
|
|
|
checkcommit sz lastcommittime
|
|
|
|
| sz > 1000 = return True
|
|
|
|
| otherwise = do
|
|
|
|
now <- getCurrentTime
|
2019-10-03 13:54:19 +00:00
|
|
|
return $ diffUTCTime now lastcommittime > 300
|
2015-07-31 20:42:15 +00:00
|
|
|
|
2015-12-23 18:59:58 +00:00
|
|
|
{- Doesn't know about keys that were just added with addDb. -}
|
2015-02-17 21:08:11 +00:00
|
|
|
inDb :: FsckHandle -> Key -> IO Bool
|
2019-10-29 16:28:01 +00:00
|
|
|
inDb (FsckHandle h _) = H.queryDbQueue h . inDb'
|
allow for concurrent incremental fsck processes again (sorta)
Sqlite doesn't support multiple concurrent writers
at all. One of them will fail to write. It's not even possible to have two
processes building up separate transactions at the same time. Before using
sqlite, incremental fsck could work perfectly well with multiple fsck
processes running concurrently. I'd like to keep that working.
My partial solution, so far, is to make git-annex buffer writes, and every
so often send them all to sqlite at once, in a transaction. So most of the
time, nothing is writing to the database. (And if it gets unlucky and
a write fails due to a collision with another writer, it can just wait and
retry the write later.) This lets multiple processes write to the database
successfully.
But, for the purposes of concurrent, incremental fsck, it's not ideal.
Each process doesn't immediately learn of files that another process has
checked. So they'll tend to do redundant work.
Only way I can see to improve this is to use some other mechanism for
short-term IPC between the fsck processes. Not yet done.
----
Also, make addDb check if an item is in the database already, and not try
to re-add it. That fixes an intermittent crash with
"SQLite3 returned ErrorConstraint while attempting to perform step."
I am not 100% sure why; it only started happening when I moved write
buffering into the queue. It seemed to generally happen on the same file
each time, so could just be due to multiple files having the same key.
However, I doubt my sound repo has many duplicate keys, and I suspect
something else is going on.
----
Updated benchmark, with the 1000 item queue: 6m33.808s
2015-02-17 20:39:35 +00:00
|
|
|
|
2019-10-29 16:28:01 +00:00
|
|
|
inDb' :: Key -> SqlPersistM Bool
|
|
|
|
inDb' k = do
|
|
|
|
r <- selectList [FsckedKey ==. k] []
|
2015-02-16 19:08:29 +00:00
|
|
|
return $ not $ null r
|