git-annex/Database/Fsck.hs
Joey Hess 4224fae71f
optimise read and write for Keys database (untested)
Writes are optimised by queueing up multiple writes when possible.
The queue is flushed after the Annex monad action finishes. That makes it
happen on program termination, and also whenever a nested Annex monad action
finishes.

Reads are optimised by checking once (per AnnexState) if the database
exists. If the database doesn't exist yet, all reads return mempty.

Reads also cause queued writes to be flushed, so reads will always be
consistent with writes (as long as they're made inside the same Annex monad).
A future optimisation path would be to determine when that's not necessary,
which is probably most of the time, and avoid flushing unncessarily.

Design notes for this commit:

- separate reads from writes
- reuse a handle which is left open until program
  exit or until the MVar goes out of scope (and autoclosed then)
- writes are queued
  - queue is flushed periodically
  - immediate queue flush before any read
  - auto-flush queue when database handle is garbage collected
  - flush queue on exit from Annex monad
    (Note that this may happen repeatedly for a single database connection;
    or a connection may be reused for multiple Annex monad actions,
    possibly even concurrent ones.)
- if database does not exist (or is empty) the handle
  is not opened by reads; reads instead return empty results
- writes open the handle if it was not open previously
2015-12-23 19:18:52 -04:00

110 lines
2.9 KiB
Haskell

{- Sqlite database used for incremental fsck.
-
- Copyright 2015 Joey Hess <id@joeyh.name>
-:
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE QuasiQuotes, TypeFamilies, TemplateHaskell #-}
{-# LANGUAGE OverloadedStrings, GADTs, FlexibleContexts #-}
{-# LANGUAGE MultiParamTypeClasses, GeneralizedNewtypeDeriving #-}
{-# LANGUAGE RankNTypes #-}
module Database.Fsck (
FsckHandle,
newPass,
openDb,
closeDb,
addDb,
inDb,
FsckedId,
) where
import Database.Types
import qualified Database.Queue as H
import Locations
import Utility.PosixFiles
import Utility.Exception
import Common
import Annex
import Types.Key
import Types.UUID
import Annex.Perms
import Annex.LockFile
import Messages
import Database.Persist.TH
import Database.Esqueleto hiding (Key)
import Data.Time.Clock
data FsckHandle = FsckHandle H.DbQueue UUID
{- Each key stored in the database has already been fscked as part
- of the latest incremental fsck pass. -}
share [mkPersist sqlSettings, mkMigrate "migrateFsck"] [persistLowerCase|
Fscked
key SKey
UniqueKey key
|]
{- The database is removed when starting a new incremental fsck pass.
-
- This may fail, if other fsck processes are currently running using the
- database. Removing the database in that situation would lead to crashes
- or unknown behavior.
-}
newPass :: UUID -> Annex Bool
newPass u = isJust <$> tryExclusiveLock (gitAnnexFsckDbLock u) go
where
go = liftIO . void . tryIO . removeDirectoryRecursive
=<< fromRepo (gitAnnexFsckDbDir u)
{- Opens the database, creating it if it doesn't exist yet. -}
openDb :: UUID -> Annex FsckHandle
openDb u = do
dbdir <- fromRepo (gitAnnexFsckDbDir u)
let db = dbdir </> "db"
unlessM (liftIO $ doesFileExist db) $ do
let tmpdbdir = dbdir ++ ".tmp"
let tmpdb = tmpdbdir </> "db"
liftIO $ do
createDirectoryIfMissing True tmpdbdir
H.initDb tmpdb $ void $
runMigrationSilent migrateFsck
setAnnexDirPerm tmpdbdir
setAnnexFilePerm tmpdb
liftIO $ do
void $ tryIO $ removeDirectoryRecursive dbdir
rename tmpdbdir dbdir
lockFileCached =<< fromRepo (gitAnnexFsckDbLock u)
h <- liftIO $ H.openDbQueue db "fscked"
return $ FsckHandle h u
closeDb :: FsckHandle -> Annex ()
closeDb (FsckHandle h u) = do
liftIO $ H.closeDbQueue h
unlockFile =<< fromRepo (gitAnnexFsckDbLock u)
addDb :: FsckHandle -> Key -> IO ()
addDb (FsckHandle h _) k = H.queueDb h checkcommit $
void $ insertUnique $ Fscked sk
where
sk = toSKey k
-- commit queue after 1000 files or 5 minutes, whichever comes first
checkcommit sz lastcommittime
| sz > 1000 = return True
| otherwise = do
now <- getCurrentTime
return $ diffUTCTime lastcommittime now > 300
{- Doesn't know about keys that were just added with addDb. -}
inDb :: FsckHandle -> Key -> IO Bool
inDb (FsckHandle h _) = H.queryDbQueue h . inDb' . toSKey
inDb' :: SKey -> SqlPersistM Bool
inDb' sk = do
r <- select $ from $ \r -> do
where_ (r ^. FsckedKey ==. val sk)
return (r ^. FsckedKey)
return $ not $ null r