git-annex/Database/Fsck.hs
Joey Hess e60766543f
add annex.dbdir (WIP)
WIP: This is mostly complete, but there is a problem: createDirectoryUnder
throws an error when annex.dbdir is set to outside the git repo.

annex.dbdir is a workaround for filesystems where sqlite does not work,
due to eg, the filesystem not properly supporting locking.

It's intended to be set before initializing the repository. Changing it
in an existing repository can be done, but would be the same as making a
new repository and moving all the annexed objects into it. While the
databases get recreated from the git-annex branch in that situation, any
information that is in the databases but not stored in the branch gets
lost. It may be that no information ever gets stored in the databases
that cannot be reconstructed from the branch, but I have not verified
that.

Sponsored-by: Dartmouth College's Datalad project
2022-08-11 16:58:53 -04:00

109 lines
3.1 KiB
Haskell

{- Sqlite database used for incremental fsck.
-
- Copyright 2015-2019 Joey Hess <id@joeyh.name>
-:
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE QuasiQuotes, TypeFamilies, TemplateHaskell #-}
{-# LANGUAGE OverloadedStrings, GADTs, FlexibleContexts #-}
{-# LANGUAGE MultiParamTypeClasses, GeneralizedNewtypeDeriving #-}
{-# LANGUAGE DataKinds, FlexibleInstances #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE UndecidableInstances #-}
#if MIN_VERSION_persistent_template(2,8,0)
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE StandaloneDeriving #-}
#endif
module Database.Fsck (
FsckHandle,
newPass,
openDb,
closeDb,
addDb,
inDb,
FsckedId,
) where
import Database.Types
import qualified Database.Queue as H
import Database.Init
import Annex.Locations
import Utility.Exception
import Annex.Common
import Annex.LockFile
import qualified Utility.RawFilePath as R
import Database.Persist.Sql hiding (Key)
import Database.Persist.TH
import Data.Time.Clock
import qualified System.FilePath.ByteString as P
data FsckHandle = FsckHandle H.DbQueue UUID
{- Each key stored in the database has already been fscked as part
- of the latest incremental fsck pass. -}
share [mkPersist sqlSettings, mkMigrate "migrateFsck"] [persistLowerCase|
Fscked
key Key
FsckedKeyIndex key
|]
{- The database is removed when starting a new incremental fsck pass.
-
- (The old fsck database used before v8 is also removed here.)
-
- This may fail, if other fsck processes are currently running using the
- database. Removing the database in that situation would lead to crashes
- or unknown behavior.
-}
newPass :: UUID -> Annex Bool
newPass u = do
lck <- calcRepo' (gitAnnexFsckDbLock u)
isJust <$> tryExclusiveLock lck go
where
go = do
removedb =<< calcRepo' (gitAnnexFsckDbDir u)
removedb =<< calcRepo' (gitAnnexFsckDbDirOld u)
removedb = liftIO . void . tryIO . removeDirectoryRecursive . fromRawFilePath
{- Opens the database, creating it if it doesn't exist yet. -}
openDb :: UUID -> Annex FsckHandle
openDb u = do
dbdir <- calcRepo' (gitAnnexFsckDbDir u)
let db = dbdir P.</> "db"
unlessM (liftIO $ R.doesPathExist db) $ do
initDb db $ void $
runMigrationSilent migrateFsck
lockFileCached =<< calcRepo' (gitAnnexFsckDbLock u)
h <- liftIO $ H.openDbQueue db "fscked"
return $ FsckHandle h u
closeDb :: FsckHandle -> Annex ()
closeDb (FsckHandle h u) = do
liftIO $ H.closeDbQueue h
unlockFile =<< calcRepo' (gitAnnexFsckDbLock u)
addDb :: FsckHandle -> Key -> IO ()
addDb (FsckHandle h _) k = H.queueDb h checkcommit $
void $ insertUnique $ Fscked k
where
-- Commit queue after 1000 changes or 5 minutes, whichever comes first.
-- The time based commit allows for an incremental fsck to be
-- interrupted and not lose much work.
checkcommit sz lastcommittime
| sz > 1000 = return True
| otherwise = do
now <- getCurrentTime
return $ diffUTCTime now lastcommittime > 300
{- Doesn't know about keys that were just added with addDb. -}
inDb :: FsckHandle -> Key -> IO Bool
inDb (FsckHandle h _) = H.queryDbQueue h . inDb'
inDb' :: Key -> SqlPersistM Bool
inDb' k = do
r <- selectList [FsckedKey ==. k] []
return $ not $ null r