git-annex/Database/Fsck.hs
Joey Hess eb6f6ff9b8
speed up keys database writes
There seems to be no reason to check the time here. I think it was
inherited from code in Database.Fsck, which does have a reason to commit
every few minutes. Removing that syscall speeds up a git-annex init
in a repo with 100000 annexed files by about 3 seconds.

Sponsored-by: Dartmouth College's Datalad project
2021-05-31 15:01:00 -04:00

107 lines
3.1 KiB
Haskell

{- Sqlite database used for incremental fsck.
-
- Copyright 2015-2019 Joey Hess <id@joeyh.name>
-:
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE QuasiQuotes, TypeFamilies, TemplateHaskell #-}
{-# LANGUAGE OverloadedStrings, GADTs, FlexibleContexts #-}
{-# LANGUAGE MultiParamTypeClasses, GeneralizedNewtypeDeriving #-}
{-# LANGUAGE DataKinds, FlexibleInstances #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE UndecidableInstances #-}
#if MIN_VERSION_persistent_template(2,8,0)
{-# LANGUAGE DerivingStrategies #-}
{-# LANGUAGE StandaloneDeriving #-}
#endif
module Database.Fsck (
FsckHandle,
newPass,
openDb,
closeDb,
addDb,
inDb,
FsckedId,
) where
import Database.Types
import qualified Database.Queue as H
import Database.Init
import Annex.Locations
import Utility.Exception
import Annex.Common
import Annex.LockFile
import qualified Utility.RawFilePath as R
import Database.Persist.Sql hiding (Key)
import Database.Persist.TH
import Data.Time.Clock
import qualified System.FilePath.ByteString as P
data FsckHandle = FsckHandle H.DbQueue UUID
{- Each key stored in the database has already been fscked as part
- of the latest incremental fsck pass. -}
share [mkPersist sqlSettings, mkMigrate "migrateFsck"] [persistLowerCase|
Fscked
key Key
FsckedKeyIndex key
|]
{- The database is removed when starting a new incremental fsck pass.
-
- (The old fsck database used before v8 is also removed here.)
-
- This may fail, if other fsck processes are currently running using the
- database. Removing the database in that situation would lead to crashes
- or unknown behavior.
-}
newPass :: UUID -> Annex Bool
newPass u = isJust <$> tryExclusiveLock (gitAnnexFsckDbLock u) go
where
go = do
removedb =<< fromRepo (gitAnnexFsckDbDir u)
removedb =<< fromRepo (gitAnnexFsckDbDirOld u)
removedb = liftIO . void . tryIO . removeDirectoryRecursive . fromRawFilePath
{- Opens the database, creating it if it doesn't exist yet. -}
openDb :: UUID -> Annex FsckHandle
openDb u = do
dbdir <- fromRepo (gitAnnexFsckDbDir u)
let db = dbdir P.</> "db"
unlessM (liftIO $ R.doesPathExist db) $ do
initDb db $ void $
runMigrationSilent migrateFsck
lockFileCached =<< fromRepo (gitAnnexFsckDbLock u)
h <- liftIO $ H.openDbQueue H.MultiWriter db "fscked"
return $ FsckHandle h u
closeDb :: FsckHandle -> Annex ()
closeDb (FsckHandle h u) = do
liftIO $ H.closeDbQueue h
unlockFile =<< fromRepo (gitAnnexFsckDbLock u)
addDb :: FsckHandle -> Key -> IO ()
addDb (FsckHandle h _) k = H.queueDb h checkcommit $
void $ insertUnique $ Fscked k
where
-- Commit queue after 1000 changes or 5 minutes, whichever comes first.
-- The time based commit allows for an incremental fsck to be
-- interrupted and not lose much work.
checkcommit sz lastcommittime
| sz > 1000 = return True
| otherwise = do
now <- getCurrentTime
return $ diffUTCTime now lastcommittime > 300
{- Doesn't know about keys that were just added with addDb. -}
inDb :: FsckHandle -> Key -> IO Bool
inDb (FsckHandle h _) = H.queryDbQueue h . inDb'
inDb' :: Key -> SqlPersistM Bool
inDb' k = do
r <- selectList [FsckedKey ==. k] []
return $ not $ null r