fsck: Multiple incremental fscks of different repos (some remote) can now be in progress at the same time in the same repo without it getting confused about which files have been checked for which remotes.

This commit is contained in:
Joey Hess 2015-02-17 17:08:11 -04:00
parent a3370ac459
commit 3414229354
4 changed files with 63 additions and 51 deletions

View file

@ -67,15 +67,16 @@ fsckOptions =
seek :: CommandSeek seek :: CommandSeek
seek ps = do seek ps = do
from <- getOptionField fsckFromOption Remote.byNameWithUUID from <- getOptionField fsckFromOption Remote.byNameWithUUID
i <- getIncremental u <- maybe getUUID (pure . Remote.uuid) from
i <- getIncremental u
withKeyOptions withKeyOptions
(\k -> startKey i k =<< getNumCopies) (\k -> startKey i k =<< getNumCopies)
(withFilesInGit $ whenAnnexed $ start from i) (withFilesInGit $ whenAnnexed $ start from i)
ps ps
withFsckDb i FsckDb.closeDb withFsckDb i FsckDb.closeDb
getIncremental :: Annex Incremental getIncremental :: UUID -> Annex Incremental
getIncremental = do getIncremental u = do
i <- maybe (return False) (checkschedule . parseDuration) i <- maybe (return False) (checkschedule . parseDuration)
=<< Annex.getField (optionName incrementalScheduleOption) =<< Annex.getField (optionName incrementalScheduleOption)
starti <- Annex.getFlag (optionName startIncrementalOption) starti <- Annex.getFlag (optionName startIncrementalOption)
@ -86,27 +87,27 @@ getIncremental = do
(False ,False, True) -> contIncremental (False ,False, True) -> contIncremental
(True, False, False) -> (True, False, False) ->
maybe startIncremental (const contIncremental) maybe startIncremental (const contIncremental)
=<< getStartTime =<< getStartTime u
_ -> error "Specify only one of --incremental, --more, or --incremental-schedule" _ -> error "Specify only one of --incremental, --more, or --incremental-schedule"
where where
startIncremental = do startIncremental = do
recordStartTime recordStartTime u
ifM FsckDb.newPass ifM (FsckDb.newPass u)
( StartIncremental <$> FsckDb.openDb ( StartIncremental <$> FsckDb.openDb u
, error "Cannot start a new --incremental fsck pass; another fsck process is already running." , error "Cannot start a new --incremental fsck pass; another fsck process is already running."
) )
contIncremental = ContIncremental <$> FsckDb.openDb contIncremental = ContIncremental <$> FsckDb.openDb u
checkschedule Nothing = error "bad --incremental-schedule value" checkschedule Nothing = error "bad --incremental-schedule value"
checkschedule (Just delta) = do checkschedule (Just delta) = do
Annex.addCleanup FsckCleanup $ do Annex.addCleanup FsckCleanup $ do
v <- getStartTime v <- getStartTime u
case v of case v of
Nothing -> noop Nothing -> noop
Just started -> do Just started -> do
now <- liftIO getPOSIXTime now <- liftIO getPOSIXTime
when (now - realToFrac started >= durationToPOSIXTime delta) when (now - realToFrac started >= durationToPOSIXTime delta) $
resetStartTime resetStartTime u
return True return True
start :: Maybe Remote -> Incremental -> FilePath -> Key -> CommandStart start :: Maybe Remote -> Incremental -> FilePath -> Key -> CommandStart
@ -420,7 +421,7 @@ badContentRemote remote key = do
return $ (if ok then "dropped from " else "failed to drop from ") return $ (if ok then "dropped from " else "failed to drop from ")
++ Remote.name remote ++ Remote.name remote
data Incremental = StartIncremental FsckDb.DbHandle | ContIncremental FsckDb.DbHandle | NonIncremental data Incremental = StartIncremental FsckDb.FsckHandle | ContIncremental FsckDb.FsckHandle | NonIncremental
runFsck :: Incremental -> FilePath -> Key -> Annex Bool -> CommandStart runFsck :: Incremental -> FilePath -> Key -> Annex Bool -> CommandStart
runFsck inc file key a = ifM (needFsck inc key) runFsck inc file key a = ifM (needFsck inc key)
@ -439,7 +440,7 @@ needFsck :: Incremental -> Key -> Annex Bool
needFsck (ContIncremental h) key = liftIO $ not <$> FsckDb.inDb h key needFsck (ContIncremental h) key = liftIO $ not <$> FsckDb.inDb h key
needFsck _ _ = return True needFsck _ _ = return True
withFsckDb :: Incremental -> (FsckDb.DbHandle -> Annex ()) -> Annex () withFsckDb :: Incremental -> (FsckDb.FsckHandle -> Annex ()) -> Annex ()
withFsckDb (ContIncremental h) a = a h withFsckDb (ContIncremental h) a = a h
withFsckDb (StartIncremental h) a = a h withFsckDb (StartIncremental h) a = a h
withFsckDb NonIncremental _ = noop withFsckDb NonIncremental _ = noop
@ -455,9 +456,9 @@ recordFsckTime inc key = withFsckDb inc $ \h -> liftIO $ FsckDb.addDb h key
- (This is not possible to do on Windows, and so the timestamp in - (This is not possible to do on Windows, and so the timestamp in
- the file will only be equal or greater than the modification time.) - the file will only be equal or greater than the modification time.)
-} -}
recordStartTime :: Annex () recordStartTime :: UUID -> Annex ()
recordStartTime = do recordStartTime u = do
f <- fromRepo gitAnnexFsckState f <- fromRepo (gitAnnexFsckState u)
createAnnexDirectory $ parentDir f createAnnexDirectory $ parentDir f
liftIO $ do liftIO $ do
nukeFile f nukeFile f
@ -472,13 +473,13 @@ recordStartTime = do
showTime :: POSIXTime -> String showTime :: POSIXTime -> String
showTime = show showTime = show
resetStartTime :: Annex () resetStartTime :: UUID -> Annex ()
resetStartTime = liftIO . nukeFile =<< fromRepo gitAnnexFsckState resetStartTime u = liftIO . nukeFile =<< fromRepo (gitAnnexFsckState u)
{- Gets the incremental fsck start time. -} {- Gets the incremental fsck start time. -}
getStartTime :: Annex (Maybe EpochTime) getStartTime :: UUID -> Annex (Maybe EpochTime)
getStartTime = do getStartTime u = do
f <- fromRepo gitAnnexFsckState f <- fromRepo (gitAnnexFsckState u)
liftIO $ catchDefaultIO Nothing $ do liftIO $ catchDefaultIO Nothing $ do
timestamp <- modificationTime <$> getFileStatus f timestamp <- modificationTime <$> getFileStatus f
let fromstatus = Just (realToFrac timestamp) let fromstatus = Just (realToFrac timestamp)

View file

@ -9,11 +9,10 @@
{-# LANGUAGE OverloadedStrings, GADTs, FlexibleContexts #-} {-# LANGUAGE OverloadedStrings, GADTs, FlexibleContexts #-}
module Database.Fsck ( module Database.Fsck (
FsckHandle,
newPass, newPass,
openDb, openDb,
closeDb, closeDb,
H.commitDb,
H.DbHandle,
addDb, addDb,
inDb, inDb,
FsckedId, FsckedId,
@ -25,6 +24,7 @@ import Locations
import Utility.Directory import Utility.Directory
import Annex import Annex
import Types.Key import Types.Key
import Types.UUID
import Annex.Perms import Annex.Perms
import Annex.LockFile import Annex.LockFile
@ -37,6 +37,8 @@ import System.Directory
import Data.Maybe import Data.Maybe
import Control.Applicative import Control.Applicative
data FsckHandle = FsckHandle H.DbHandle UUID
{- Each key stored in the database has already been fscked as part {- Each key stored in the database has already been fscked as part
- of the latest incremental fsck pass. -} - of the latest incremental fsck pass. -}
share [mkPersist sqlSettings, mkMigrate "migrateFsck"] [persistLowerCase| share [mkPersist sqlSettings, mkMigrate "migrateFsck"] [persistLowerCase|
@ -51,15 +53,15 @@ Fscked
- database. Removing the database in that situation would lead to crashes - database. Removing the database in that situation would lead to crashes
- or undefined behavior. - or undefined behavior.
-} -}
newPass :: Annex Bool newPass :: UUID -> Annex Bool
newPass = isJust <$> tryExclusiveLock gitAnnexFsckDbLock go newPass u = isJust <$> tryExclusiveLock (gitAnnexFsckDbLock u) go
where where
go = liftIO. nukeFile =<< fromRepo gitAnnexFsckDb go = liftIO. nukeFile =<< fromRepo (gitAnnexFsckDb u)
{- Opens the database, creating it atomically if it doesn't exist yet. -} {- Opens the database, creating it atomically if it doesn't exist yet. -}
openDb :: Annex H.DbHandle openDb :: UUID -> Annex FsckHandle
openDb = do openDb u = do
db <- fromRepo gitAnnexFsckDb db <- fromRepo (gitAnnexFsckDb u)
unlessM (liftIO $ doesFileExist db) $ do unlessM (liftIO $ doesFileExist db) $ do
let newdb = db ++ ".new" let newdb = db ++ ".new"
h <- liftIO $ H.openDb newdb h <- liftIO $ H.openDb newdb
@ -68,23 +70,24 @@ openDb = do
liftIO $ H.closeDb h liftIO $ H.closeDb h
setAnnexFilePerm newdb setAnnexFilePerm newdb
liftIO $ renameFile newdb db liftIO $ renameFile newdb db
lockFileShared =<< fromRepo gitAnnexFsckDbLock lockFileShared =<< fromRepo (gitAnnexFsckDbLock u)
liftIO $ H.openDb db h <- liftIO $ H.openDb db
return $ FsckHandle h u
closeDb :: H.DbHandle -> Annex () closeDb :: FsckHandle -> Annex ()
closeDb h = do closeDb (FsckHandle h u) = do
liftIO $ H.closeDb h liftIO $ H.closeDb h
unlockFile =<< fromRepo gitAnnexFsckDbLock unlockFile =<< fromRepo (gitAnnexFsckDbLock u)
addDb :: H.DbHandle -> Key -> IO () addDb :: FsckHandle -> Key -> IO ()
addDb h k = H.queueDb h 1000 $ addDb (FsckHandle h _) k = H.queueDb h 1000 $
unlessM (inDb' sk) $ unlessM (inDb' sk) $
insert_ $ Fscked sk insert_ $ Fscked sk
where where
sk = toSKey k sk = toSKey k
inDb :: H.DbHandle -> Key -> IO Bool inDb :: FsckHandle -> Key -> IO Bool
inDb h = H.runDb h . inDb' . toSKey inDb (FsckHandle h _) = H.runDb h . inDb' . toSKey
inDb' :: SKey -> SqlPersistM Bool inDb' :: SKey -> SqlPersistM Bool
inDb' sk = do inDb' sk = do

View file

@ -29,6 +29,8 @@ module Locations (
gitAnnexBadLocation, gitAnnexBadLocation,
gitAnnexUnusedLog, gitAnnexUnusedLog,
gitAnnexFsckState, gitAnnexFsckState,
gitAnnexFsckDb,
gitAnnexFsckDbLock,
gitAnnexFsckResultsLog, gitAnnexFsckResultsLog,
gitAnnexScheduleState, gitAnnexScheduleState,
gitAnnexTransferDir, gitAnnexTransferDir,
@ -57,8 +59,6 @@ module Locations (
gitAnnexSshDir, gitAnnexSshDir,
gitAnnexRemotesDir, gitAnnexRemotesDir,
gitAnnexAssistantDefaultDir, gitAnnexAssistantDefaultDir,
gitAnnexFsckDb,
gitAnnexFsckDbLock,
isLinkToAnnex, isLinkToAnnex,
HashLevels(..), HashLevels(..),
hashDirMixed, hashDirMixed,
@ -220,9 +220,22 @@ gitAnnexBadLocation key r = gitAnnexBadDir r </> keyFile key
gitAnnexUnusedLog :: FilePath -> Git.Repo -> FilePath gitAnnexUnusedLog :: FilePath -> Git.Repo -> FilePath
gitAnnexUnusedLog prefix r = gitAnnexDir r </> (prefix ++ "unused") gitAnnexUnusedLog prefix r = gitAnnexDir r </> (prefix ++ "unused")
{- .git/annex/fsckstate is used to store information about incremental fscks. -} {- .git/annex/fsck/uuid/ is used to store information about incremental
gitAnnexFsckState :: Git.Repo -> FilePath - fscks. -}
gitAnnexFsckState r = gitAnnexDir r </> "fsckstate" gitAnnexFsckDir :: UUID -> Git.Repo -> FilePath
gitAnnexFsckDir u r = gitAnnexDir r </> "fsck" </> fromUUID u
{- used to store information about incremental fscks. -}
gitAnnexFsckState :: UUID -> Git.Repo -> FilePath
gitAnnexFsckState u r = gitAnnexFsckDir u r </> "state"
{- Database used to record fsck info. -}
gitAnnexFsckDb :: UUID -> Git.Repo -> FilePath
gitAnnexFsckDb u r = gitAnnexFsckDir u r </> "fsck.db"
{- Lock file for the fsck database. -}
gitAnnexFsckDbLock :: UUID -> Git.Repo -> FilePath
gitAnnexFsckDbLock u r = gitAnnexFsckDir u r </> "fsck.lck"
{- .git/annex/fsckresults/uuid is used to store results of git fscks -} {- .git/annex/fsckresults/uuid is used to store results of git fscks -}
gitAnnexFsckResultsLog :: UUID -> Git.Repo -> FilePath gitAnnexFsckResultsLog :: UUID -> Git.Repo -> FilePath
@ -342,14 +355,6 @@ gitAnnexRemotesDir r = addTrailingPathSeparator $ gitAnnexDir r </> "remotes"
gitAnnexAssistantDefaultDir :: FilePath gitAnnexAssistantDefaultDir :: FilePath
gitAnnexAssistantDefaultDir = "annex" gitAnnexAssistantDefaultDir = "annex"
{- Database used to record fsck info. -}
gitAnnexFsckDb :: Git.Repo -> FilePath
gitAnnexFsckDb r = gitAnnexDir r </> "fsck.db"
{- Lock file for the fsck database. -}
gitAnnexFsckDbLock :: Git.Repo -> FilePath
gitAnnexFsckDbLock r = gitAnnexDir r </> "fsck.dbl"
{- Checks a symlink target to see if it appears to point to annexed content. {- Checks a symlink target to see if it appears to point to annexed content.
- -
- We only look at paths inside the .git directory, and not at the .git - We only look at paths inside the .git directory, and not at the .git

3
debian/changelog vendored
View file

@ -36,6 +36,9 @@ git-annex (5.20150206) UNRELEASED; urgency=medium
of abusing the sticky bit. Existing sticky bits are ignored, of abusing the sticky bit. Existing sticky bits are ignored,
incremental fscks started by old versions won't be resumed by incremental fscks started by old versions won't be resumed by
this version. this version.
* fsck: Multiple incremental fscks of different repos (some remote)
can now be in progress at the same time in the same repo without it
getting confused about which files have been checked for which remotes.
-- Joey Hess <id@joeyh.name> Fri, 06 Feb 2015 13:57:08 -0400 -- Joey Hess <id@joeyh.name> Fri, 06 Feb 2015 13:57:08 -0400