fsck: Added --distributed and --expire options, for distributed fsck.

This commit is contained in:
Joey Hess 2015-04-01 17:53:16 -04:00
parent 724a94a6d4
commit 656fc1c881
7 changed files with 206 additions and 64 deletions

View file

@ -22,6 +22,7 @@ import Annex.Direct
import Annex.Perms
import Annex.Link
import Logs.Location
import Logs.Presence
import Logs.Trust
import Config.NumCopies
import Annex.UUID
@ -38,6 +39,7 @@ import Data.Time.Clock.POSIX
import Data.Time
import System.Posix.Types (EpochTime)
import System.Locale
import qualified Data.Map as M
cmd :: [Command]
cmd = [withOptions fsckOptions $ command "fsck" paramPaths seek
@ -56,12 +58,22 @@ incrementalScheduleOption :: Option
incrementalScheduleOption = fieldOption [] "incremental-schedule" paramTime
"schedule incremental fscking"
distributedOption :: Option
distributedOption = flagOption [] "distributed" "distributed fsck mode"
expireOption :: Option
expireOption = fieldOption [] "expire"
(paramRepeating $ paramOptional paramRemote ++ ":" ++ paramTime)
"distributed expire mode"
fsckOptions :: [Option]
fsckOptions =
[ fsckFromOption
, startIncrementalOption
, moreIncrementalOption
, incrementalScheduleOption
, distributedOption
, expireOption
] ++ keyOptions ++ annexedMatchingOptions
seek :: CommandSeek
@ -69,62 +81,28 @@ seek ps = do
from <- getOptionField fsckFromOption Remote.byNameWithUUID
u <- maybe getUUID (pure . Remote.uuid) from
i <- getIncremental u
d <- getDistributed
withKeyOptions False
(\k -> startKey i k =<< getNumCopies)
(withFilesInGit $ whenAnnexed $ start from i)
(\k -> startKey i d k =<< getNumCopies)
(withFilesInGit $ whenAnnexed $ start from i d)
ps
withFsckDb i FsckDb.closeDb
getIncremental :: UUID -> Annex Incremental
getIncremental u = do
i <- maybe (return False) (checkschedule . parseDuration)
=<< Annex.getField (optionName incrementalScheduleOption)
starti <- Annex.getFlag (optionName startIncrementalOption)
morei <- Annex.getFlag (optionName moreIncrementalOption)
case (i, starti, morei) of
(False, False, False) -> return NonIncremental
(False, True, False) -> startIncremental
(False ,False, True) -> contIncremental
(True, False, False) ->
maybe startIncremental (const contIncremental)
=<< getStartTime u
_ -> error "Specify only one of --incremental, --more, or --incremental-schedule"
where
startIncremental = do
recordStartTime u
ifM (FsckDb.newPass u)
( StartIncremental <$> FsckDb.openDb u
, error "Cannot start a new --incremental fsck pass; another fsck process is already running."
)
contIncremental = ContIncremental <$> FsckDb.openDb u
checkschedule Nothing = error "bad --incremental-schedule value"
checkschedule (Just delta) = do
Annex.addCleanup FsckCleanup $ do
v <- getStartTime u
case v of
Nothing -> noop
Just started -> do
now <- liftIO getPOSIXTime
when (now - realToFrac started >= durationToPOSIXTime delta) $
resetStartTime u
return True
start :: Maybe Remote -> Incremental -> FilePath -> Key -> CommandStart
start from inc file key = do
start :: Maybe Remote -> Incremental -> Distributed -> FilePath -> Key -> CommandStart
start from inc dist file key = do
v <- Backend.getBackend file key
case v of
Nothing -> stop
Just backend -> do
numcopies <- getFileNumCopies file
case from of
Nothing -> go $ perform key file backend numcopies
Just r -> go $ performRemote key file backend numcopies r
Nothing -> go $ perform dist key file backend numcopies
Just r -> go $ performRemote dist key file backend numcopies r
where
go = runFsck inc file key
perform :: Key -> FilePath -> Backend -> NumCopies -> Annex Bool
perform key file backend numcopies = check
perform :: Distributed -> Key -> FilePath -> Backend -> NumCopies -> Annex Bool
perform dist key file backend numcopies = check
-- order matters
[ fixLink key file
, verifyLocationLog key file
@ -132,13 +110,14 @@ perform key file backend numcopies = check
, verifyDirectMode key file
, checkKeySize key
, checkBackend backend key (Just file)
, checkDistributed dist key Nothing
, checkKeyNumCopies key file numcopies
]
{- To fsck a remote, the content is retrieved to a tmp file,
- and checked locally. -}
performRemote :: Key -> FilePath -> Backend -> NumCopies -> Remote -> Annex Bool
performRemote key file backend numcopies remote =
performRemote :: Distributed -> Key -> FilePath -> Backend -> NumCopies -> Remote -> Annex Bool
performRemote dist key file backend numcopies remote =
dispatch =<< Remote.hasKey remote key
where
dispatch (Left err) = do
@ -157,6 +136,7 @@ performRemote key file backend numcopies remote =
[ verifyLocationLogRemote key file remote present
, checkKeySizeRemote key remote localcopy
, checkBackendRemote backend key remote localcopy
, checkDistributed dist key (Just $ Remote.uuid remote)
, checkKeyNumCopies key file numcopies
]
withtmp a = do
@ -177,18 +157,19 @@ performRemote key file backend numcopies remote =
)
dummymeter _ = noop
startKey :: Incremental -> Key -> NumCopies -> CommandStart
startKey inc key numcopies =
startKey :: Incremental -> Distributed -> Key -> NumCopies -> CommandStart
startKey inc dist key numcopies =
case Backend.maybeLookupBackendName (Types.Key.keyBackendName key) of
Nothing -> stop
Just backend -> runFsck inc (key2file key) key $
performKey key backend numcopies
performKey dist key backend numcopies
performKey :: Key -> Backend -> NumCopies -> Annex Bool
performKey key backend numcopies = check
performKey :: Distributed -> Key -> Backend -> NumCopies -> Annex Bool
performKey dist key backend numcopies = check
[ verifyLocationLog key (key2file key)
, checkKeySize key
, checkBackend backend key Nothing
, checkDistributed dist key Nothing
, checkKeyNumCopies key (key2file key) numcopies
]
@ -421,8 +402,6 @@ badContentRemote remote key = do
return $ (if ok then "dropped from " else "failed to drop from ")
++ Remote.name remote
data Incremental = StartIncremental FsckDb.FsckHandle | ContIncremental FsckDb.FsckHandle | NonIncremental
runFsck :: Incremental -> FilePath -> Key -> Annex Bool -> CommandStart
runFsck inc file key a = ifM (needFsck inc key)
( do
@ -497,3 +476,106 @@ getStartTime u = do
#else
fromfile >= fromstatus
#endif
data Incremental = StartIncremental FsckDb.FsckHandle | ContIncremental FsckDb.FsckHandle | NonIncremental
getIncremental :: UUID -> Annex Incremental
getIncremental u = do
i <- maybe (return False) (checkschedule . parseDuration)
=<< Annex.getField (optionName incrementalScheduleOption)
starti <- getOptionFlag startIncrementalOption
morei <- getOptionFlag moreIncrementalOption
case (i, starti, morei) of
(False, False, False) -> return NonIncremental
(False, True, False) -> startIncremental
(False ,False, True) -> contIncremental
(True, False, False) ->
maybe startIncremental (const contIncremental)
=<< getStartTime u
_ -> error "Specify only one of --incremental, --more, or --incremental-schedule"
where
startIncremental = do
recordStartTime u
ifM (FsckDb.newPass u)
( StartIncremental <$> FsckDb.openDb u
, error "Cannot start a new --incremental fsck pass; another fsck process is already running."
)
contIncremental = ContIncremental <$> FsckDb.openDb u
checkschedule Nothing = error "bad --incremental-schedule value"
checkschedule (Just delta) = do
Annex.addCleanup FsckCleanup $ do
v <- getStartTime u
case v of
Nothing -> noop
Just started -> do
now <- liftIO getPOSIXTime
when (now - realToFrac started >= durationToPOSIXTime delta) $
resetStartTime u
return True
data Distributed
= NonDistributed
| Distributed POSIXTime
| DistributedExpire POSIXTime (M.Map (Maybe UUID) (Maybe POSIXTime))
deriving (Show)
getDistributed :: Annex Distributed
getDistributed = go =<< getOptionField expireOption parseexpire
where
go (Just m) = DistributedExpire <$> liftIO getPOSIXTime <*> pure m
go Nothing = ifM (getOptionFlag distributedOption)
( Distributed <$> liftIO getPOSIXTime
, return NonDistributed
)
parseexpire Nothing = return Nothing
parseexpire (Just s) = do
now <- liftIO getPOSIXTime
Just . M.fromList <$> mapM (parseexpire' now) (words s)
parseexpire' now s = case separate (== ':') s of
(t, []) -> return (Nothing, parsetime now t)
(n, t) -> do
r <- Remote.nameToUUID n
return (Just r, parsetime now t)
parsetime _ "never" = Nothing
parsetime now s = case parseDuration s of
Nothing -> error $ "bad expire time: " ++ s
Just d -> Just (now - durationToPOSIXTime d)
checkDistributed :: Distributed -> Key -> Maybe UUID -> Annex Bool
checkDistributed d k mu = do
go d
return True
where
go NonDistributed = noop
-- This is called after fsck has checked the key's content, so
-- if the key is present in the annex now, we just need to update
-- the location log with the timestamp of the start of the fsck.
--
-- Note that reusing this timestamp means that the same log line
-- is generated for each key, which keeps the size increase
-- of the git-annex branch down.
go (Distributed ts) = whenM (inAnnex k) $ do
u <- maybe getUUID return mu
logChange' (logThen ts) k u InfoPresent
-- Get the location log for the key, and expire all entries
-- that are older than their uuid's listed expiration date.
-- (Except for the local repository.)
go (DistributedExpire ts m) = do
ls <- locationLog k
hereu <- getUUID
forM_ ls $ \l -> do
let u = toUUID (info l)
unless (u == hereu) $
case lookupexpire u of
Just (Just expiretime)
| date l < expiretime ->
logChange' (logThen ts) k u InfoMissing
_ -> noop
where
lookupexpire u = headMaybe $ catMaybes $
map (`M.lookup` m) [Just u, Nothing]

View file

@ -17,8 +17,10 @@ module Logs.Location (
LogStatus(..),
logStatus,
logChange,
logChange',
loggedLocations,
loggedLocationsHistorical,
locationLog,
loggedKeys,
loggedKeysFor,
) where
@ -39,24 +41,32 @@ logStatus key s = do
{- Log a change in the presence of a key's value in a repository. -}
logChange :: Key -> UUID -> LogStatus -> Annex ()
logChange key (UUID u) s = do
logChange = logChange' logNow
logChange' :: (LogStatus -> String -> Annex LogLine) -> Key -> UUID -> LogStatus -> Annex ()
logChange' mklog key (UUID u) s = do
config <- Annex.getGitConfig
addLog (locationLogFile config key) =<< logNow s u
logChange _ NoUUID _ = noop
addLog (locationLogFile config key) =<< mklog s u
logChange' _ _ NoUUID _ = noop
{- Returns a list of repository UUIDs that, according to the log, have
- the value of a key. -}
loggedLocations :: Key -> Annex [UUID]
loggedLocations = getLoggedLocations currentLog
loggedLocations = getLoggedLocations currentLogInfo
{- Gets the location log on a particular date. -}
loggedLocationsHistorical :: RefDate -> Key -> Annex [UUID]
loggedLocationsHistorical = getLoggedLocations . historicalLog
loggedLocationsHistorical = getLoggedLocations . historicalLogInfo
getLoggedLocations :: (FilePath -> Annex [String]) -> Key -> Annex [UUID]
getLoggedLocations getter key = do
config <- Annex.getGitConfig
map toUUID <$> (getter . locationLogFile config) key
map toUUID <$> getter (locationLogFile config key)
locationLog :: Key -> Annex [LogLine]
locationLog key = do
config <- Annex.getGitConfig
currentLog (locationLogFile config key)
{- Finds all keys that have location log information.
- (There may be duplicate keys in the list.) -}

View file

@ -16,8 +16,10 @@ module Logs.Presence (
addLog,
readLog,
logNow,
logThen,
currentLog,
historicalLog
currentLogInfo,
historicalLogInfo,
) where
import Data.Time.Clock.POSIX
@ -42,15 +44,21 @@ logNow s i = do
now <- liftIO getPOSIXTime
return $ LogLine now s i
logThen :: POSIXTime -> LogStatus -> String -> Annex LogLine
logThen t s i = return $ LogLine t s i
{- Reads a log and returns only the info that is still in effect. -}
currentLog :: FilePath -> Annex [String]
currentLog file = map info . filterPresent <$> readLog file
currentLogInfo :: FilePath -> Annex [String]
currentLogInfo file = map info <$> currentLog file
currentLog :: FilePath -> Annex [LogLine]
currentLog file = filterPresent <$> readLog file
{- Reads a historical version of a log and returns the info that was in
- effect at that time.
-
- The date is formatted as shown in gitrevisions man page.
-}
historicalLog :: RefDate -> FilePath -> Annex [String]
historicalLog refdate file = map info . filterPresent . parseLog
historicalLogInfo :: RefDate -> FilePath -> Annex [String]
historicalLogInfo refdate file = map info . filterPresent . parseLog
<$> Annex.Branch.getHistorical refdate file

View file

@ -43,7 +43,7 @@ getUrls key = do
where
go [] = return []
go (l:ls) = do
us <- currentLog l
us <- currentLogInfo l
if null us
then go ls
else return us

2
debian/changelog vendored
View file

@ -20,6 +20,8 @@ git-annex (5.20150328) UNRELEASED; urgency=medium
multiple files.
* import: --deduplicate and --cleanduplicates now output the keys
corresponding to duplicated files they process.
* fsck: Added --distributed and --expire options,
for distributed fsck.
-- Joey Hess <id@joeyh.name> Fri, 27 Mar 2015 16:04:43 -0400

View file

@ -134,6 +134,8 @@ It will probably take just a few hours to code.
With that change, the server can check for files that not enough clients
have verified they have recently, and distribute them to more clients.
(This is now implemented.)
Note that bad actors can lie about this verification; it's not a proof they
still have the file. But, a bad actor could prove they have a file, and
refuse to give it back if the IA needed to restore the backup, too.

View file

@ -53,6 +53,44 @@ With parameters, only the specified files are checked.
git annex fsck --incremental-schedule 30d --time-limit 5h
* `--distributed`
Normally, fsck only fixes the git-annex location logs when an inconsistecy
is detected. In distributed mode, each file that is checked will result
in a location log update noting the time that it was present.
This is useful in situations where repositories cannot be trusted to
continue to exist. By running a periodic distributed fsck, those
repositories can verify that they still exist and that the information
about their contents is still accurate.
This is not the default mode, because each distributed fsck increases
the size of the git-annex branch. While it takes care to log identical
location tracking lines for all keys, which will delta-compress well,
there is still overhead in committing the changes. If this causes
the git-annex branch to grow too big, it can be pruned using
[[git-annex-forget]](1)
* `--expire="[repository:]time`..."
This option makes the fsck check for location logs of the specified
repository that have not been updated by a distributed fsck within the
specified time period. Such stale location logs are then thrown out, so
git-annex will no longer think that a repository contains data, if it is
not participating in distributed fscking.
The repository can be specified using the name of a remote,
or the description or uuid of the repository. If a time is specified
without a repository, it is used as the default value for all
repositories. Note that location logs for the current repository are
never expired, since they can be verified directly.
The time is in the form "60d" or "1y". A time of "never" will disable
expiration.
Note that a remote can always run `fsck` later on to re-update the
location log if it was expired in error.
* `--numcopies=N`
Override the normally configured number of copies.