fsck: Multiple incremental fscks of different repos (some remote) can now be in progress at the same time in the same repo without it getting confused about which files have been checked for which remotes.

This commit is contained in:
Joey Hess 2015-02-17 17:08:11 -04:00
parent a3370ac459
commit 3414229354
4 changed files with 63 additions and 51 deletions

View file

@ -67,15 +67,16 @@ fsckOptions =
seek :: CommandSeek
seek ps = do
from <- getOptionField fsckFromOption Remote.byNameWithUUID
i <- getIncremental
u <- maybe getUUID (pure . Remote.uuid) from
i <- getIncremental u
withKeyOptions
(\k -> startKey i k =<< getNumCopies)
(withFilesInGit $ whenAnnexed $ start from i)
ps
withFsckDb i FsckDb.closeDb
getIncremental :: Annex Incremental
getIncremental = do
getIncremental :: UUID -> Annex Incremental
getIncremental u = do
i <- maybe (return False) (checkschedule . parseDuration)
=<< Annex.getField (optionName incrementalScheduleOption)
starti <- Annex.getFlag (optionName startIncrementalOption)
@ -86,27 +87,27 @@ getIncremental = do
(False ,False, True) -> contIncremental
(True, False, False) ->
maybe startIncremental (const contIncremental)
=<< getStartTime
=<< getStartTime u
_ -> error "Specify only one of --incremental, --more, or --incremental-schedule"
where
startIncremental = do
recordStartTime
ifM FsckDb.newPass
( StartIncremental <$> FsckDb.openDb
recordStartTime u
ifM (FsckDb.newPass u)
( StartIncremental <$> FsckDb.openDb u
, error "Cannot start a new --incremental fsck pass; another fsck process is already running."
)
contIncremental = ContIncremental <$> FsckDb.openDb
contIncremental = ContIncremental <$> FsckDb.openDb u
checkschedule Nothing = error "bad --incremental-schedule value"
checkschedule (Just delta) = do
Annex.addCleanup FsckCleanup $ do
v <- getStartTime
v <- getStartTime u
case v of
Nothing -> noop
Just started -> do
now <- liftIO getPOSIXTime
when (now - realToFrac started >= durationToPOSIXTime delta)
resetStartTime
when (now - realToFrac started >= durationToPOSIXTime delta) $
resetStartTime u
return True
start :: Maybe Remote -> Incremental -> FilePath -> Key -> CommandStart
@ -420,7 +421,7 @@ badContentRemote remote key = do
return $ (if ok then "dropped from " else "failed to drop from ")
++ Remote.name remote
data Incremental = StartIncremental FsckDb.DbHandle | ContIncremental FsckDb.DbHandle | NonIncremental
data Incremental = StartIncremental FsckDb.FsckHandle | ContIncremental FsckDb.FsckHandle | NonIncremental
runFsck :: Incremental -> FilePath -> Key -> Annex Bool -> CommandStart
runFsck inc file key a = ifM (needFsck inc key)
@ -439,7 +440,7 @@ needFsck :: Incremental -> Key -> Annex Bool
needFsck (ContIncremental h) key = liftIO $ not <$> FsckDb.inDb h key
needFsck _ _ = return True
withFsckDb :: Incremental -> (FsckDb.DbHandle -> Annex ()) -> Annex ()
withFsckDb :: Incremental -> (FsckDb.FsckHandle -> Annex ()) -> Annex ()
withFsckDb (ContIncremental h) a = a h
withFsckDb (StartIncremental h) a = a h
withFsckDb NonIncremental _ = noop
@ -455,9 +456,9 @@ recordFsckTime inc key = withFsckDb inc $ \h -> liftIO $ FsckDb.addDb h key
- (This is not possible to do on Windows, and so the timestamp in
- the file will only be equal or greater than the modification time.)
-}
recordStartTime :: Annex ()
recordStartTime = do
f <- fromRepo gitAnnexFsckState
recordStartTime :: UUID -> Annex ()
recordStartTime u = do
f <- fromRepo (gitAnnexFsckState u)
createAnnexDirectory $ parentDir f
liftIO $ do
nukeFile f
@ -472,13 +473,13 @@ recordStartTime = do
showTime :: POSIXTime -> String
showTime = show
resetStartTime :: Annex ()
resetStartTime = liftIO . nukeFile =<< fromRepo gitAnnexFsckState
resetStartTime :: UUID -> Annex ()
resetStartTime u = liftIO . nukeFile =<< fromRepo (gitAnnexFsckState u)
{- Gets the incremental fsck start time. -}
getStartTime :: Annex (Maybe EpochTime)
getStartTime = do
f <- fromRepo gitAnnexFsckState
getStartTime :: UUID -> Annex (Maybe EpochTime)
getStartTime u = do
f <- fromRepo (gitAnnexFsckState u)
liftIO $ catchDefaultIO Nothing $ do
timestamp <- modificationTime <$> getFileStatus f
let fromstatus = Just (realToFrac timestamp)

View file

@ -9,11 +9,10 @@
{-# LANGUAGE OverloadedStrings, GADTs, FlexibleContexts #-}
module Database.Fsck (
FsckHandle,
newPass,
openDb,
closeDb,
H.commitDb,
H.DbHandle,
addDb,
inDb,
FsckedId,
@ -25,6 +24,7 @@ import Locations
import Utility.Directory
import Annex
import Types.Key
import Types.UUID
import Annex.Perms
import Annex.LockFile
@ -37,6 +37,8 @@ import System.Directory
import Data.Maybe
import Control.Applicative
data FsckHandle = FsckHandle H.DbHandle UUID
{- Each key stored in the database has already been fscked as part
- of the latest incremental fsck pass. -}
share [mkPersist sqlSettings, mkMigrate "migrateFsck"] [persistLowerCase|
@ -51,15 +53,15 @@ Fscked
- database. Removing the database in that situation would lead to crashes
- or undefined behavior.
-}
newPass :: Annex Bool
newPass = isJust <$> tryExclusiveLock gitAnnexFsckDbLock go
newPass :: UUID -> Annex Bool
newPass u = isJust <$> tryExclusiveLock (gitAnnexFsckDbLock u) go
where
go = liftIO. nukeFile =<< fromRepo gitAnnexFsckDb
go = liftIO. nukeFile =<< fromRepo (gitAnnexFsckDb u)
{- Opens the database, creating it atomically if it doesn't exist yet. -}
openDb :: Annex H.DbHandle
openDb = do
db <- fromRepo gitAnnexFsckDb
openDb :: UUID -> Annex FsckHandle
openDb u = do
db <- fromRepo (gitAnnexFsckDb u)
unlessM (liftIO $ doesFileExist db) $ do
let newdb = db ++ ".new"
h <- liftIO $ H.openDb newdb
@ -68,23 +70,24 @@ openDb = do
liftIO $ H.closeDb h
setAnnexFilePerm newdb
liftIO $ renameFile newdb db
lockFileShared =<< fromRepo gitAnnexFsckDbLock
liftIO $ H.openDb db
lockFileShared =<< fromRepo (gitAnnexFsckDbLock u)
h <- liftIO $ H.openDb db
return $ FsckHandle h u
closeDb :: H.DbHandle -> Annex ()
closeDb h = do
closeDb :: FsckHandle -> Annex ()
closeDb (FsckHandle h u) = do
liftIO $ H.closeDb h
unlockFile =<< fromRepo gitAnnexFsckDbLock
unlockFile =<< fromRepo (gitAnnexFsckDbLock u)
addDb :: H.DbHandle -> Key -> IO ()
addDb h k = H.queueDb h 1000 $
addDb :: FsckHandle -> Key -> IO ()
addDb (FsckHandle h _) k = H.queueDb h 1000 $
unlessM (inDb' sk) $
insert_ $ Fscked sk
where
sk = toSKey k
inDb :: H.DbHandle -> Key -> IO Bool
inDb h = H.runDb h . inDb' . toSKey
inDb :: FsckHandle -> Key -> IO Bool
inDb (FsckHandle h _) = H.runDb h . inDb' . toSKey
inDb' :: SKey -> SqlPersistM Bool
inDb' sk = do

View file

@ -29,6 +29,8 @@ module Locations (
gitAnnexBadLocation,
gitAnnexUnusedLog,
gitAnnexFsckState,
gitAnnexFsckDb,
gitAnnexFsckDbLock,
gitAnnexFsckResultsLog,
gitAnnexScheduleState,
gitAnnexTransferDir,
@ -57,8 +59,6 @@ module Locations (
gitAnnexSshDir,
gitAnnexRemotesDir,
gitAnnexAssistantDefaultDir,
gitAnnexFsckDb,
gitAnnexFsckDbLock,
isLinkToAnnex,
HashLevels(..),
hashDirMixed,
@ -220,9 +220,22 @@ gitAnnexBadLocation key r = gitAnnexBadDir r </> keyFile key
gitAnnexUnusedLog :: FilePath -> Git.Repo -> FilePath
gitAnnexUnusedLog prefix r = gitAnnexDir r </> (prefix ++ "unused")
{- .git/annex/fsckstate is used to store information about incremental fscks. -}
gitAnnexFsckState :: Git.Repo -> FilePath
gitAnnexFsckState r = gitAnnexDir r </> "fsckstate"
{- .git/annex/fsck/uuid/ is used to store information about incremental
- fscks. -}
gitAnnexFsckDir :: UUID -> Git.Repo -> FilePath
gitAnnexFsckDir u r = gitAnnexDir r </> "fsck" </> fromUUID u
{- used to store information about incremental fscks. -}
gitAnnexFsckState :: UUID -> Git.Repo -> FilePath
gitAnnexFsckState u r = gitAnnexFsckDir u r </> "state"
{- Database used to record fsck info. -}
gitAnnexFsckDb :: UUID -> Git.Repo -> FilePath
gitAnnexFsckDb u r = gitAnnexFsckDir u r </> "fsck.db"
{- Lock file for the fsck database. -}
gitAnnexFsckDbLock :: UUID -> Git.Repo -> FilePath
gitAnnexFsckDbLock u r = gitAnnexFsckDir u r </> "fsck.lck"
{- .git/annex/fsckresults/uuid is used to store results of git fscks -}
gitAnnexFsckResultsLog :: UUID -> Git.Repo -> FilePath
@ -342,14 +355,6 @@ gitAnnexRemotesDir r = addTrailingPathSeparator $ gitAnnexDir r </> "remotes"
gitAnnexAssistantDefaultDir :: FilePath
gitAnnexAssistantDefaultDir = "annex"
{- Database used to record fsck info. -}
gitAnnexFsckDb :: Git.Repo -> FilePath
gitAnnexFsckDb r = gitAnnexDir r </> "fsck.db"
{- Lock file for the fsck database. -}
gitAnnexFsckDbLock :: Git.Repo -> FilePath
gitAnnexFsckDbLock r = gitAnnexDir r </> "fsck.dbl"
{- Checks a symlink target to see if it appears to point to annexed content.
-
- We only look at paths inside the .git directory, and not at the .git

3
debian/changelog vendored
View file

@ -36,6 +36,9 @@ git-annex (5.20150206) UNRELEASED; urgency=medium
of abusing the sticky bit. Existing sticky bits are ignored,
incremental fscks started by old versions won't be resumed by
this version.
* fsck: Multiple incremental fscks of different repos (some remote)
can now be in progress at the same time in the same repo without it
getting confused about which files have been checked for which remotes.
-- Joey Hess <id@joeyh.name> Fri, 06 Feb 2015 13:57:08 -0400