git-annex/Database/Fsck.hs
Joey Hess 6ab14710fc
fix consistency bug reading from export database
The export database has writes made to it and then expects to read back
the same data immediately. But, the way that Database.Handle does
writes, in order to support multiple writers, makes that not work, due
to caching issues. This resulted in export re-uploading files it had
already successfully renamed into place.

Fixed by allowing databases to be opened in MultiWriter or SingleWriter
mode. The export database only needs to support a single writer; it does
not make sense for multiple exports to run at the same time to the same
special remote.

All other databases still use MultiWriter mode. And by inspection,
nothing else in git-annex seems to be relying on being able to
immediately query for changes that were just written to the database.

This commit was supported by the NSF-funded DataLad project.
2017-09-06 17:19:07 -04:00

96 lines
2.6 KiB
Haskell

{- Sqlite database used for incremental fsck.
-
- Copyright 2015 Joey Hess <id@joeyh.name>
-:
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE QuasiQuotes, TypeFamilies, TemplateHaskell #-}
{-# LANGUAGE OverloadedStrings, GADTs, FlexibleContexts #-}
{-# LANGUAGE MultiParamTypeClasses, GeneralizedNewtypeDeriving #-}
{-# LANGUAGE RankNTypes #-}
module Database.Fsck (
FsckHandle,
newPass,
openDb,
closeDb,
addDb,
inDb,
FsckedId,
) where
import Database.Types
import qualified Database.Queue as H
import Database.Init
import Annex.Locations
import Utility.Exception
import Annex.Common
import Annex.LockFile
import Database.Persist.TH
import Database.Esqueleto hiding (Key)
import Data.Time.Clock
data FsckHandle = FsckHandle H.DbQueue UUID
{- Each key stored in the database has already been fscked as part
- of the latest incremental fsck pass. -}
share [mkPersist sqlSettings, mkMigrate "migrateFsck"] [persistLowerCase|
Fscked
key SKey
UniqueKey key
|]
{- The database is removed when starting a new incremental fsck pass.
-
- This may fail, if other fsck processes are currently running using the
- database. Removing the database in that situation would lead to crashes
- or unknown behavior.
-}
newPass :: UUID -> Annex Bool
newPass u = isJust <$> tryExclusiveLock (gitAnnexFsckDbLock u) go
where
go = liftIO . void . tryIO . removeDirectoryRecursive
=<< fromRepo (gitAnnexFsckDbDir u)
{- Opens the database, creating it if it doesn't exist yet. -}
openDb :: UUID -> Annex FsckHandle
openDb u = do
dbdir <- fromRepo (gitAnnexFsckDbDir u)
let db = dbdir </> "db"
unlessM (liftIO $ doesFileExist db) $ do
initDb db $ void $
runMigrationSilent migrateFsck
lockFileCached =<< fromRepo (gitAnnexFsckDbLock u)
h <- liftIO $ H.openDbQueue H.MultiWriter db "fscked"
return $ FsckHandle h u
closeDb :: FsckHandle -> Annex ()
closeDb (FsckHandle h u) = do
liftIO $ H.closeDbQueue h
unlockFile =<< fromRepo (gitAnnexFsckDbLock u)
addDb :: FsckHandle -> Key -> IO ()
addDb (FsckHandle h _) k = H.queueDb h checkcommit $
void $ insertUnique $ Fscked sk
where
sk = toSKey k
-- commit queue after 1000 files or 5 minutes, whichever comes first
checkcommit sz lastcommittime
| sz > 1000 = return True
| otherwise = do
now <- getCurrentTime
return $ diffUTCTime lastcommittime now > 300
{- Doesn't know about keys that were just added with addDb. -}
inDb :: FsckHandle -> Key -> IO Bool
inDb (FsckHandle h _) = H.queryDbQueue h . inDb' . toSKey
inDb' :: SKey -> SqlPersistM Bool
inDb' sk = do
r <- select $ from $ \r -> do
where_ (r ^. FsckedKey ==. val sk)
return (r ^. FsckedKey)
return $ not $ null r