fsck: Added --distributed and --expire options, for distributed fsck.

This commit is contained in:
Joey Hess 2015-04-01 17:53:16 -04:00
parent 724a94a6d4
commit 656fc1c881
7 changed files with 206 additions and 64 deletions

View file

@ -22,6 +22,7 @@ import Annex.Direct
import Annex.Perms import Annex.Perms
import Annex.Link import Annex.Link
import Logs.Location import Logs.Location
import Logs.Presence
import Logs.Trust import Logs.Trust
import Config.NumCopies import Config.NumCopies
import Annex.UUID import Annex.UUID
@ -38,6 +39,7 @@ import Data.Time.Clock.POSIX
import Data.Time import Data.Time
import System.Posix.Types (EpochTime) import System.Posix.Types (EpochTime)
import System.Locale import System.Locale
import qualified Data.Map as M
cmd :: [Command] cmd :: [Command]
cmd = [withOptions fsckOptions $ command "fsck" paramPaths seek cmd = [withOptions fsckOptions $ command "fsck" paramPaths seek
@ -56,12 +58,22 @@ incrementalScheduleOption :: Option
incrementalScheduleOption = fieldOption [] "incremental-schedule" paramTime incrementalScheduleOption = fieldOption [] "incremental-schedule" paramTime
"schedule incremental fscking" "schedule incremental fscking"
distributedOption :: Option
distributedOption = flagOption [] "distributed" "distributed fsck mode"
expireOption :: Option
expireOption = fieldOption [] "expire"
(paramRepeating $ paramOptional paramRemote ++ ":" ++ paramTime)
"distributed expire mode"
fsckOptions :: [Option] fsckOptions :: [Option]
fsckOptions = fsckOptions =
[ fsckFromOption [ fsckFromOption
, startIncrementalOption , startIncrementalOption
, moreIncrementalOption , moreIncrementalOption
, incrementalScheduleOption , incrementalScheduleOption
, distributedOption
, expireOption
] ++ keyOptions ++ annexedMatchingOptions ] ++ keyOptions ++ annexedMatchingOptions
seek :: CommandSeek seek :: CommandSeek
@ -69,62 +81,28 @@ seek ps = do
from <- getOptionField fsckFromOption Remote.byNameWithUUID from <- getOptionField fsckFromOption Remote.byNameWithUUID
u <- maybe getUUID (pure . Remote.uuid) from u <- maybe getUUID (pure . Remote.uuid) from
i <- getIncremental u i <- getIncremental u
d <- getDistributed
withKeyOptions False withKeyOptions False
(\k -> startKey i k =<< getNumCopies) (\k -> startKey i d k =<< getNumCopies)
(withFilesInGit $ whenAnnexed $ start from i) (withFilesInGit $ whenAnnexed $ start from i d)
ps ps
withFsckDb i FsckDb.closeDb withFsckDb i FsckDb.closeDb
getIncremental :: UUID -> Annex Incremental start :: Maybe Remote -> Incremental -> Distributed -> FilePath -> Key -> CommandStart
getIncremental u = do start from inc dist file key = do
i <- maybe (return False) (checkschedule . parseDuration)
=<< Annex.getField (optionName incrementalScheduleOption)
starti <- Annex.getFlag (optionName startIncrementalOption)
morei <- Annex.getFlag (optionName moreIncrementalOption)
case (i, starti, morei) of
(False, False, False) -> return NonIncremental
(False, True, False) -> startIncremental
(False ,False, True) -> contIncremental
(True, False, False) ->
maybe startIncremental (const contIncremental)
=<< getStartTime u
_ -> error "Specify only one of --incremental, --more, or --incremental-schedule"
where
startIncremental = do
recordStartTime u
ifM (FsckDb.newPass u)
( StartIncremental <$> FsckDb.openDb u
, error "Cannot start a new --incremental fsck pass; another fsck process is already running."
)
contIncremental = ContIncremental <$> FsckDb.openDb u
checkschedule Nothing = error "bad --incremental-schedule value"
checkschedule (Just delta) = do
Annex.addCleanup FsckCleanup $ do
v <- getStartTime u
case v of
Nothing -> noop
Just started -> do
now <- liftIO getPOSIXTime
when (now - realToFrac started >= durationToPOSIXTime delta) $
resetStartTime u
return True
start :: Maybe Remote -> Incremental -> FilePath -> Key -> CommandStart
start from inc file key = do
v <- Backend.getBackend file key v <- Backend.getBackend file key
case v of case v of
Nothing -> stop Nothing -> stop
Just backend -> do Just backend -> do
numcopies <- getFileNumCopies file numcopies <- getFileNumCopies file
case from of case from of
Nothing -> go $ perform key file backend numcopies Nothing -> go $ perform dist key file backend numcopies
Just r -> go $ performRemote key file backend numcopies r Just r -> go $ performRemote dist key file backend numcopies r
where where
go = runFsck inc file key go = runFsck inc file key
perform :: Key -> FilePath -> Backend -> NumCopies -> Annex Bool perform :: Distributed -> Key -> FilePath -> Backend -> NumCopies -> Annex Bool
perform key file backend numcopies = check perform dist key file backend numcopies = check
-- order matters -- order matters
[ fixLink key file [ fixLink key file
, verifyLocationLog key file , verifyLocationLog key file
@ -132,13 +110,14 @@ perform key file backend numcopies = check
, verifyDirectMode key file , verifyDirectMode key file
, checkKeySize key , checkKeySize key
, checkBackend backend key (Just file) , checkBackend backend key (Just file)
, checkDistributed dist key Nothing
, checkKeyNumCopies key file numcopies , checkKeyNumCopies key file numcopies
] ]
{- To fsck a remote, the content is retrieved to a tmp file, {- To fsck a remote, the content is retrieved to a tmp file,
- and checked locally. -} - and checked locally. -}
performRemote :: Key -> FilePath -> Backend -> NumCopies -> Remote -> Annex Bool performRemote :: Distributed -> Key -> FilePath -> Backend -> NumCopies -> Remote -> Annex Bool
performRemote key file backend numcopies remote = performRemote dist key file backend numcopies remote =
dispatch =<< Remote.hasKey remote key dispatch =<< Remote.hasKey remote key
where where
dispatch (Left err) = do dispatch (Left err) = do
@ -157,6 +136,7 @@ performRemote key file backend numcopies remote =
[ verifyLocationLogRemote key file remote present [ verifyLocationLogRemote key file remote present
, checkKeySizeRemote key remote localcopy , checkKeySizeRemote key remote localcopy
, checkBackendRemote backend key remote localcopy , checkBackendRemote backend key remote localcopy
, checkDistributed dist key (Just $ Remote.uuid remote)
, checkKeyNumCopies key file numcopies , checkKeyNumCopies key file numcopies
] ]
withtmp a = do withtmp a = do
@ -177,18 +157,19 @@ performRemote key file backend numcopies remote =
) )
dummymeter _ = noop dummymeter _ = noop
startKey :: Incremental -> Key -> NumCopies -> CommandStart startKey :: Incremental -> Distributed -> Key -> NumCopies -> CommandStart
startKey inc key numcopies = startKey inc dist key numcopies =
case Backend.maybeLookupBackendName (Types.Key.keyBackendName key) of case Backend.maybeLookupBackendName (Types.Key.keyBackendName key) of
Nothing -> stop Nothing -> stop
Just backend -> runFsck inc (key2file key) key $ Just backend -> runFsck inc (key2file key) key $
performKey key backend numcopies performKey dist key backend numcopies
performKey :: Key -> Backend -> NumCopies -> Annex Bool performKey :: Distributed -> Key -> Backend -> NumCopies -> Annex Bool
performKey key backend numcopies = check performKey dist key backend numcopies = check
[ verifyLocationLog key (key2file key) [ verifyLocationLog key (key2file key)
, checkKeySize key , checkKeySize key
, checkBackend backend key Nothing , checkBackend backend key Nothing
, checkDistributed dist key Nothing
, checkKeyNumCopies key (key2file key) numcopies , checkKeyNumCopies key (key2file key) numcopies
] ]
@ -421,8 +402,6 @@ badContentRemote remote key = do
return $ (if ok then "dropped from " else "failed to drop from ") return $ (if ok then "dropped from " else "failed to drop from ")
++ Remote.name remote ++ Remote.name remote
data Incremental = StartIncremental FsckDb.FsckHandle | ContIncremental FsckDb.FsckHandle | NonIncremental
runFsck :: Incremental -> FilePath -> Key -> Annex Bool -> CommandStart runFsck :: Incremental -> FilePath -> Key -> Annex Bool -> CommandStart
runFsck inc file key a = ifM (needFsck inc key) runFsck inc file key a = ifM (needFsck inc key)
( do ( do
@ -497,3 +476,106 @@ getStartTime u = do
#else #else
fromfile >= fromstatus fromfile >= fromstatus
#endif #endif
data Incremental = StartIncremental FsckDb.FsckHandle | ContIncremental FsckDb.FsckHandle | NonIncremental
getIncremental :: UUID -> Annex Incremental
getIncremental u = do
i <- maybe (return False) (checkschedule . parseDuration)
=<< Annex.getField (optionName incrementalScheduleOption)
starti <- getOptionFlag startIncrementalOption
morei <- getOptionFlag moreIncrementalOption
case (i, starti, morei) of
(False, False, False) -> return NonIncremental
(False, True, False) -> startIncremental
(False ,False, True) -> contIncremental
(True, False, False) ->
maybe startIncremental (const contIncremental)
=<< getStartTime u
_ -> error "Specify only one of --incremental, --more, or --incremental-schedule"
where
startIncremental = do
recordStartTime u
ifM (FsckDb.newPass u)
( StartIncremental <$> FsckDb.openDb u
, error "Cannot start a new --incremental fsck pass; another fsck process is already running."
)
contIncremental = ContIncremental <$> FsckDb.openDb u
checkschedule Nothing = error "bad --incremental-schedule value"
checkschedule (Just delta) = do
Annex.addCleanup FsckCleanup $ do
v <- getStartTime u
case v of
Nothing -> noop
Just started -> do
now <- liftIO getPOSIXTime
when (now - realToFrac started >= durationToPOSIXTime delta) $
resetStartTime u
return True
data Distributed
= NonDistributed
| Distributed POSIXTime
| DistributedExpire POSIXTime (M.Map (Maybe UUID) (Maybe POSIXTime))
deriving (Show)
getDistributed :: Annex Distributed
getDistributed = go =<< getOptionField expireOption parseexpire
where
go (Just m) = DistributedExpire <$> liftIO getPOSIXTime <*> pure m
go Nothing = ifM (getOptionFlag distributedOption)
( Distributed <$> liftIO getPOSIXTime
, return NonDistributed
)
parseexpire Nothing = return Nothing
parseexpire (Just s) = do
now <- liftIO getPOSIXTime
Just . M.fromList <$> mapM (parseexpire' now) (words s)
parseexpire' now s = case separate (== ':') s of
(t, []) -> return (Nothing, parsetime now t)
(n, t) -> do
r <- Remote.nameToUUID n
return (Just r, parsetime now t)
parsetime _ "never" = Nothing
parsetime now s = case parseDuration s of
Nothing -> error $ "bad expire time: " ++ s
Just d -> Just (now - durationToPOSIXTime d)
checkDistributed :: Distributed -> Key -> Maybe UUID -> Annex Bool
checkDistributed d k mu = do
go d
return True
where
go NonDistributed = noop
-- This is called after fsck has checked the key's content, so
-- if the key is present in the annex now, we just need to update
-- the location log with the timestamp of the start of the fsck.
--
-- Note that reusing this timestamp means that the same log line
-- is generated for each key, which keeps the size increase
-- of the git-annex branch down.
go (Distributed ts) = whenM (inAnnex k) $ do
u <- maybe getUUID return mu
logChange' (logThen ts) k u InfoPresent
-- Get the location log for the key, and expire all entries
-- that are older than their uuid's listed expiration date.
-- (Except for the local repository.)
go (DistributedExpire ts m) = do
ls <- locationLog k
hereu <- getUUID
forM_ ls $ \l -> do
let u = toUUID (info l)
unless (u == hereu) $
case lookupexpire u of
Just (Just expiretime)
| date l < expiretime ->
logChange' (logThen ts) k u InfoMissing
_ -> noop
where
lookupexpire u = headMaybe $ catMaybes $
map (`M.lookup` m) [Just u, Nothing]

View file

@ -17,8 +17,10 @@ module Logs.Location (
LogStatus(..), LogStatus(..),
logStatus, logStatus,
logChange, logChange,
logChange',
loggedLocations, loggedLocations,
loggedLocationsHistorical, loggedLocationsHistorical,
locationLog,
loggedKeys, loggedKeys,
loggedKeysFor, loggedKeysFor,
) where ) where
@ -39,24 +41,32 @@ logStatus key s = do
{- Log a change in the presence of a key's value in a repository. -} {- Log a change in the presence of a key's value in a repository. -}
logChange :: Key -> UUID -> LogStatus -> Annex () logChange :: Key -> UUID -> LogStatus -> Annex ()
logChange key (UUID u) s = do logChange = logChange' logNow
logChange' :: (LogStatus -> String -> Annex LogLine) -> Key -> UUID -> LogStatus -> Annex ()
logChange' mklog key (UUID u) s = do
config <- Annex.getGitConfig config <- Annex.getGitConfig
addLog (locationLogFile config key) =<< logNow s u addLog (locationLogFile config key) =<< mklog s u
logChange _ NoUUID _ = noop logChange' _ _ NoUUID _ = noop
{- Returns a list of repository UUIDs that, according to the log, have {- Returns a list of repository UUIDs that, according to the log, have
- the value of a key. -} - the value of a key. -}
loggedLocations :: Key -> Annex [UUID] loggedLocations :: Key -> Annex [UUID]
loggedLocations = getLoggedLocations currentLog loggedLocations = getLoggedLocations currentLogInfo
{- Gets the location log on a particular date. -} {- Gets the location log on a particular date. -}
loggedLocationsHistorical :: RefDate -> Key -> Annex [UUID] loggedLocationsHistorical :: RefDate -> Key -> Annex [UUID]
loggedLocationsHistorical = getLoggedLocations . historicalLog loggedLocationsHistorical = getLoggedLocations . historicalLogInfo
getLoggedLocations :: (FilePath -> Annex [String]) -> Key -> Annex [UUID] getLoggedLocations :: (FilePath -> Annex [String]) -> Key -> Annex [UUID]
getLoggedLocations getter key = do getLoggedLocations getter key = do
config <- Annex.getGitConfig config <- Annex.getGitConfig
map toUUID <$> (getter . locationLogFile config) key map toUUID <$> getter (locationLogFile config key)
locationLog :: Key -> Annex [LogLine]
locationLog key = do
config <- Annex.getGitConfig
currentLog (locationLogFile config key)
{- Finds all keys that have location log information. {- Finds all keys that have location log information.
- (There may be duplicate keys in the list.) -} - (There may be duplicate keys in the list.) -}

View file

@ -16,8 +16,10 @@ module Logs.Presence (
addLog, addLog,
readLog, readLog,
logNow, logNow,
logThen,
currentLog, currentLog,
historicalLog currentLogInfo,
historicalLogInfo,
) where ) where
import Data.Time.Clock.POSIX import Data.Time.Clock.POSIX
@ -42,15 +44,21 @@ logNow s i = do
now <- liftIO getPOSIXTime now <- liftIO getPOSIXTime
return $ LogLine now s i return $ LogLine now s i
logThen :: POSIXTime -> LogStatus -> String -> Annex LogLine
logThen t s i = return $ LogLine t s i
{- Reads a log and returns only the info that is still in effect. -} {- Reads a log and returns only the info that is still in effect. -}
currentLog :: FilePath -> Annex [String] currentLogInfo :: FilePath -> Annex [String]
currentLog file = map info . filterPresent <$> readLog file currentLogInfo file = map info <$> currentLog file
currentLog :: FilePath -> Annex [LogLine]
currentLog file = filterPresent <$> readLog file
{- Reads a historical version of a log and returns the info that was in {- Reads a historical version of a log and returns the info that was in
- effect at that time. - effect at that time.
- -
- The date is formatted as shown in gitrevisions man page. - The date is formatted as shown in gitrevisions man page.
-} -}
historicalLog :: RefDate -> FilePath -> Annex [String] historicalLogInfo :: RefDate -> FilePath -> Annex [String]
historicalLog refdate file = map info . filterPresent . parseLog historicalLogInfo refdate file = map info . filterPresent . parseLog
<$> Annex.Branch.getHistorical refdate file <$> Annex.Branch.getHistorical refdate file

View file

@ -43,7 +43,7 @@ getUrls key = do
where where
go [] = return [] go [] = return []
go (l:ls) = do go (l:ls) = do
us <- currentLog l us <- currentLogInfo l
if null us if null us
then go ls then go ls
else return us else return us

2
debian/changelog vendored
View file

@ -20,6 +20,8 @@ git-annex (5.20150328) UNRELEASED; urgency=medium
multiple files. multiple files.
* import: --deduplicate and --cleanduplicates now output the keys * import: --deduplicate and --cleanduplicates now output the keys
corresponding to duplicated files they process. corresponding to duplicated files they process.
* fsck: Added --distributed and --expire options,
for distributed fsck.
-- Joey Hess <id@joeyh.name> Fri, 27 Mar 2015 16:04:43 -0400 -- Joey Hess <id@joeyh.name> Fri, 27 Mar 2015 16:04:43 -0400

View file

@ -134,6 +134,8 @@ It will probably take just a few hours to code.
With that change, the server can check for files that not enough clients With that change, the server can check for files that not enough clients
have verified they have recently, and distribute them to more clients. have verified they have recently, and distribute them to more clients.
(This is now implemented.)
Note that bad actors can lie about this verification; it's not a proof they Note that bad actors can lie about this verification; it's not a proof they
still have the file. But, a bad actor could prove they have a file, and still have the file. But, a bad actor could prove they have a file, and
refuse to give it back if the IA needed to restore the backup, too. refuse to give it back if the IA needed to restore the backup, too.

View file

@ -53,6 +53,44 @@ With parameters, only the specified files are checked.
git annex fsck --incremental-schedule 30d --time-limit 5h git annex fsck --incremental-schedule 30d --time-limit 5h
* `--distributed`
Normally, fsck only fixes the git-annex location logs when an inconsistecy
is detected. In distributed mode, each file that is checked will result
in a location log update noting the time that it was present.
This is useful in situations where repositories cannot be trusted to
continue to exist. By running a periodic distributed fsck, those
repositories can verify that they still exist and that the information
about their contents is still accurate.
This is not the default mode, because each distributed fsck increases
the size of the git-annex branch. While it takes care to log identical
location tracking lines for all keys, which will delta-compress well,
there is still overhead in committing the changes. If this causes
the git-annex branch to grow too big, it can be pruned using
[[git-annex-forget]](1)
* `--expire="[repository:]time`..."
This option makes the fsck check for location logs of the specified
repository that have not been updated by a distributed fsck within the
specified time period. Such stale location logs are then thrown out, so
git-annex will no longer think that a repository contains data, if it is
not participating in distributed fscking.
The repository can be specified using the name of a remote,
or the description or uuid of the repository. If a time is specified
without a repository, it is used as the default value for all
repositories. Note that location logs for the current repository are
never expired, since they can be verified directly.
The time is in the form "60d" or "1y". A time of "never" will disable
expiration.
Note that a remote can always run `fsck` later on to re-update the
location log if it was expired in error.
* `--numcopies=N` * `--numcopies=N`
Override the normally configured number of copies. Override the normally configured number of copies.