fsck: Added --distributed and --expire options, for distributed fsck.
This commit is contained in:
parent
724a94a6d4
commit
656fc1c881
7 changed files with 206 additions and 64 deletions
186
Command/Fsck.hs
186
Command/Fsck.hs
|
@ -22,6 +22,7 @@ import Annex.Direct
|
|||
import Annex.Perms
|
||||
import Annex.Link
|
||||
import Logs.Location
|
||||
import Logs.Presence
|
||||
import Logs.Trust
|
||||
import Config.NumCopies
|
||||
import Annex.UUID
|
||||
|
@ -38,6 +39,7 @@ import Data.Time.Clock.POSIX
|
|||
import Data.Time
|
||||
import System.Posix.Types (EpochTime)
|
||||
import System.Locale
|
||||
import qualified Data.Map as M
|
||||
|
||||
cmd :: [Command]
|
||||
cmd = [withOptions fsckOptions $ command "fsck" paramPaths seek
|
||||
|
@ -56,12 +58,22 @@ incrementalScheduleOption :: Option
|
|||
incrementalScheduleOption = fieldOption [] "incremental-schedule" paramTime
|
||||
"schedule incremental fscking"
|
||||
|
||||
distributedOption :: Option
|
||||
distributedOption = flagOption [] "distributed" "distributed fsck mode"
|
||||
|
||||
expireOption :: Option
|
||||
expireOption = fieldOption [] "expire"
|
||||
(paramRepeating $ paramOptional paramRemote ++ ":" ++ paramTime)
|
||||
"distributed expire mode"
|
||||
|
||||
fsckOptions :: [Option]
|
||||
fsckOptions =
|
||||
[ fsckFromOption
|
||||
, startIncrementalOption
|
||||
, moreIncrementalOption
|
||||
, incrementalScheduleOption
|
||||
, distributedOption
|
||||
, expireOption
|
||||
] ++ keyOptions ++ annexedMatchingOptions
|
||||
|
||||
seek :: CommandSeek
|
||||
|
@ -69,62 +81,28 @@ seek ps = do
|
|||
from <- getOptionField fsckFromOption Remote.byNameWithUUID
|
||||
u <- maybe getUUID (pure . Remote.uuid) from
|
||||
i <- getIncremental u
|
||||
d <- getDistributed
|
||||
withKeyOptions False
|
||||
(\k -> startKey i k =<< getNumCopies)
|
||||
(withFilesInGit $ whenAnnexed $ start from i)
|
||||
(\k -> startKey i d k =<< getNumCopies)
|
||||
(withFilesInGit $ whenAnnexed $ start from i d)
|
||||
ps
|
||||
withFsckDb i FsckDb.closeDb
|
||||
|
||||
getIncremental :: UUID -> Annex Incremental
|
||||
getIncremental u = do
|
||||
i <- maybe (return False) (checkschedule . parseDuration)
|
||||
=<< Annex.getField (optionName incrementalScheduleOption)
|
||||
starti <- Annex.getFlag (optionName startIncrementalOption)
|
||||
morei <- Annex.getFlag (optionName moreIncrementalOption)
|
||||
case (i, starti, morei) of
|
||||
(False, False, False) -> return NonIncremental
|
||||
(False, True, False) -> startIncremental
|
||||
(False ,False, True) -> contIncremental
|
||||
(True, False, False) ->
|
||||
maybe startIncremental (const contIncremental)
|
||||
=<< getStartTime u
|
||||
_ -> error "Specify only one of --incremental, --more, or --incremental-schedule"
|
||||
where
|
||||
startIncremental = do
|
||||
recordStartTime u
|
||||
ifM (FsckDb.newPass u)
|
||||
( StartIncremental <$> FsckDb.openDb u
|
||||
, error "Cannot start a new --incremental fsck pass; another fsck process is already running."
|
||||
)
|
||||
contIncremental = ContIncremental <$> FsckDb.openDb u
|
||||
|
||||
checkschedule Nothing = error "bad --incremental-schedule value"
|
||||
checkschedule (Just delta) = do
|
||||
Annex.addCleanup FsckCleanup $ do
|
||||
v <- getStartTime u
|
||||
case v of
|
||||
Nothing -> noop
|
||||
Just started -> do
|
||||
now <- liftIO getPOSIXTime
|
||||
when (now - realToFrac started >= durationToPOSIXTime delta) $
|
||||
resetStartTime u
|
||||
return True
|
||||
|
||||
start :: Maybe Remote -> Incremental -> FilePath -> Key -> CommandStart
|
||||
start from inc file key = do
|
||||
start :: Maybe Remote -> Incremental -> Distributed -> FilePath -> Key -> CommandStart
|
||||
start from inc dist file key = do
|
||||
v <- Backend.getBackend file key
|
||||
case v of
|
||||
Nothing -> stop
|
||||
Just backend -> do
|
||||
numcopies <- getFileNumCopies file
|
||||
case from of
|
||||
Nothing -> go $ perform key file backend numcopies
|
||||
Just r -> go $ performRemote key file backend numcopies r
|
||||
Nothing -> go $ perform dist key file backend numcopies
|
||||
Just r -> go $ performRemote dist key file backend numcopies r
|
||||
where
|
||||
go = runFsck inc file key
|
||||
|
||||
perform :: Key -> FilePath -> Backend -> NumCopies -> Annex Bool
|
||||
perform key file backend numcopies = check
|
||||
perform :: Distributed -> Key -> FilePath -> Backend -> NumCopies -> Annex Bool
|
||||
perform dist key file backend numcopies = check
|
||||
-- order matters
|
||||
[ fixLink key file
|
||||
, verifyLocationLog key file
|
||||
|
@ -132,13 +110,14 @@ perform key file backend numcopies = check
|
|||
, verifyDirectMode key file
|
||||
, checkKeySize key
|
||||
, checkBackend backend key (Just file)
|
||||
, checkDistributed dist key Nothing
|
||||
, checkKeyNumCopies key file numcopies
|
||||
]
|
||||
|
||||
{- To fsck a remote, the content is retrieved to a tmp file,
|
||||
- and checked locally. -}
|
||||
performRemote :: Key -> FilePath -> Backend -> NumCopies -> Remote -> Annex Bool
|
||||
performRemote key file backend numcopies remote =
|
||||
performRemote :: Distributed -> Key -> FilePath -> Backend -> NumCopies -> Remote -> Annex Bool
|
||||
performRemote dist key file backend numcopies remote =
|
||||
dispatch =<< Remote.hasKey remote key
|
||||
where
|
||||
dispatch (Left err) = do
|
||||
|
@ -157,6 +136,7 @@ performRemote key file backend numcopies remote =
|
|||
[ verifyLocationLogRemote key file remote present
|
||||
, checkKeySizeRemote key remote localcopy
|
||||
, checkBackendRemote backend key remote localcopy
|
||||
, checkDistributed dist key (Just $ Remote.uuid remote)
|
||||
, checkKeyNumCopies key file numcopies
|
||||
]
|
||||
withtmp a = do
|
||||
|
@ -177,18 +157,19 @@ performRemote key file backend numcopies remote =
|
|||
)
|
||||
dummymeter _ = noop
|
||||
|
||||
startKey :: Incremental -> Key -> NumCopies -> CommandStart
|
||||
startKey inc key numcopies =
|
||||
startKey :: Incremental -> Distributed -> Key -> NumCopies -> CommandStart
|
||||
startKey inc dist key numcopies =
|
||||
case Backend.maybeLookupBackendName (Types.Key.keyBackendName key) of
|
||||
Nothing -> stop
|
||||
Just backend -> runFsck inc (key2file key) key $
|
||||
performKey key backend numcopies
|
||||
performKey dist key backend numcopies
|
||||
|
||||
performKey :: Key -> Backend -> NumCopies -> Annex Bool
|
||||
performKey key backend numcopies = check
|
||||
performKey :: Distributed -> Key -> Backend -> NumCopies -> Annex Bool
|
||||
performKey dist key backend numcopies = check
|
||||
[ verifyLocationLog key (key2file key)
|
||||
, checkKeySize key
|
||||
, checkBackend backend key Nothing
|
||||
, checkDistributed dist key Nothing
|
||||
, checkKeyNumCopies key (key2file key) numcopies
|
||||
]
|
||||
|
||||
|
@ -421,8 +402,6 @@ badContentRemote remote key = do
|
|||
return $ (if ok then "dropped from " else "failed to drop from ")
|
||||
++ Remote.name remote
|
||||
|
||||
data Incremental = StartIncremental FsckDb.FsckHandle | ContIncremental FsckDb.FsckHandle | NonIncremental
|
||||
|
||||
runFsck :: Incremental -> FilePath -> Key -> Annex Bool -> CommandStart
|
||||
runFsck inc file key a = ifM (needFsck inc key)
|
||||
( do
|
||||
|
@ -497,3 +476,106 @@ getStartTime u = do
|
|||
#else
|
||||
fromfile >= fromstatus
|
||||
#endif
|
||||
|
||||
data Incremental = StartIncremental FsckDb.FsckHandle | ContIncremental FsckDb.FsckHandle | NonIncremental
|
||||
|
||||
getIncremental :: UUID -> Annex Incremental
|
||||
getIncremental u = do
|
||||
i <- maybe (return False) (checkschedule . parseDuration)
|
||||
=<< Annex.getField (optionName incrementalScheduleOption)
|
||||
starti <- getOptionFlag startIncrementalOption
|
||||
morei <- getOptionFlag moreIncrementalOption
|
||||
case (i, starti, morei) of
|
||||
(False, False, False) -> return NonIncremental
|
||||
(False, True, False) -> startIncremental
|
||||
(False ,False, True) -> contIncremental
|
||||
(True, False, False) ->
|
||||
maybe startIncremental (const contIncremental)
|
||||
=<< getStartTime u
|
||||
_ -> error "Specify only one of --incremental, --more, or --incremental-schedule"
|
||||
where
|
||||
startIncremental = do
|
||||
recordStartTime u
|
||||
ifM (FsckDb.newPass u)
|
||||
( StartIncremental <$> FsckDb.openDb u
|
||||
, error "Cannot start a new --incremental fsck pass; another fsck process is already running."
|
||||
)
|
||||
contIncremental = ContIncremental <$> FsckDb.openDb u
|
||||
|
||||
checkschedule Nothing = error "bad --incremental-schedule value"
|
||||
checkschedule (Just delta) = do
|
||||
Annex.addCleanup FsckCleanup $ do
|
||||
v <- getStartTime u
|
||||
case v of
|
||||
Nothing -> noop
|
||||
Just started -> do
|
||||
now <- liftIO getPOSIXTime
|
||||
when (now - realToFrac started >= durationToPOSIXTime delta) $
|
||||
resetStartTime u
|
||||
return True
|
||||
|
||||
data Distributed
|
||||
= NonDistributed
|
||||
| Distributed POSIXTime
|
||||
| DistributedExpire POSIXTime (M.Map (Maybe UUID) (Maybe POSIXTime))
|
||||
deriving (Show)
|
||||
|
||||
getDistributed :: Annex Distributed
|
||||
getDistributed = go =<< getOptionField expireOption parseexpire
|
||||
where
|
||||
go (Just m) = DistributedExpire <$> liftIO getPOSIXTime <*> pure m
|
||||
go Nothing = ifM (getOptionFlag distributedOption)
|
||||
( Distributed <$> liftIO getPOSIXTime
|
||||
, return NonDistributed
|
||||
)
|
||||
|
||||
parseexpire Nothing = return Nothing
|
||||
parseexpire (Just s) = do
|
||||
now <- liftIO getPOSIXTime
|
||||
Just . M.fromList <$> mapM (parseexpire' now) (words s)
|
||||
parseexpire' now s = case separate (== ':') s of
|
||||
(t, []) -> return (Nothing, parsetime now t)
|
||||
(n, t) -> do
|
||||
r <- Remote.nameToUUID n
|
||||
return (Just r, parsetime now t)
|
||||
parsetime _ "never" = Nothing
|
||||
parsetime now s = case parseDuration s of
|
||||
Nothing -> error $ "bad expire time: " ++ s
|
||||
Just d -> Just (now - durationToPOSIXTime d)
|
||||
|
||||
checkDistributed :: Distributed -> Key -> Maybe UUID -> Annex Bool
|
||||
checkDistributed d k mu = do
|
||||
go d
|
||||
return True
|
||||
where
|
||||
go NonDistributed = noop
|
||||
|
||||
-- This is called after fsck has checked the key's content, so
|
||||
-- if the key is present in the annex now, we just need to update
|
||||
-- the location log with the timestamp of the start of the fsck.
|
||||
--
|
||||
-- Note that reusing this timestamp means that the same log line
|
||||
-- is generated for each key, which keeps the size increase
|
||||
-- of the git-annex branch down.
|
||||
go (Distributed ts) = whenM (inAnnex k) $ do
|
||||
u <- maybe getUUID return mu
|
||||
logChange' (logThen ts) k u InfoPresent
|
||||
|
||||
-- Get the location log for the key, and expire all entries
|
||||
-- that are older than their uuid's listed expiration date.
|
||||
-- (Except for the local repository.)
|
||||
go (DistributedExpire ts m) = do
|
||||
ls <- locationLog k
|
||||
hereu <- getUUID
|
||||
forM_ ls $ \l -> do
|
||||
let u = toUUID (info l)
|
||||
unless (u == hereu) $
|
||||
case lookupexpire u of
|
||||
Just (Just expiretime)
|
||||
| date l < expiretime ->
|
||||
logChange' (logThen ts) k u InfoMissing
|
||||
_ -> noop
|
||||
where
|
||||
lookupexpire u = headMaybe $ catMaybes $
|
||||
map (`M.lookup` m) [Just u, Nothing]
|
||||
|
||||
|
|
|
@ -17,8 +17,10 @@ module Logs.Location (
|
|||
LogStatus(..),
|
||||
logStatus,
|
||||
logChange,
|
||||
logChange',
|
||||
loggedLocations,
|
||||
loggedLocationsHistorical,
|
||||
locationLog,
|
||||
loggedKeys,
|
||||
loggedKeysFor,
|
||||
) where
|
||||
|
@ -39,24 +41,32 @@ logStatus key s = do
|
|||
|
||||
{- Log a change in the presence of a key's value in a repository. -}
|
||||
logChange :: Key -> UUID -> LogStatus -> Annex ()
|
||||
logChange key (UUID u) s = do
|
||||
logChange = logChange' logNow
|
||||
|
||||
logChange' :: (LogStatus -> String -> Annex LogLine) -> Key -> UUID -> LogStatus -> Annex ()
|
||||
logChange' mklog key (UUID u) s = do
|
||||
config <- Annex.getGitConfig
|
||||
addLog (locationLogFile config key) =<< logNow s u
|
||||
logChange _ NoUUID _ = noop
|
||||
addLog (locationLogFile config key) =<< mklog s u
|
||||
logChange' _ _ NoUUID _ = noop
|
||||
|
||||
{- Returns a list of repository UUIDs that, according to the log, have
|
||||
- the value of a key. -}
|
||||
loggedLocations :: Key -> Annex [UUID]
|
||||
loggedLocations = getLoggedLocations currentLog
|
||||
loggedLocations = getLoggedLocations currentLogInfo
|
||||
|
||||
{- Gets the location log on a particular date. -}
|
||||
loggedLocationsHistorical :: RefDate -> Key -> Annex [UUID]
|
||||
loggedLocationsHistorical = getLoggedLocations . historicalLog
|
||||
loggedLocationsHistorical = getLoggedLocations . historicalLogInfo
|
||||
|
||||
getLoggedLocations :: (FilePath -> Annex [String]) -> Key -> Annex [UUID]
|
||||
getLoggedLocations getter key = do
|
||||
config <- Annex.getGitConfig
|
||||
map toUUID <$> (getter . locationLogFile config) key
|
||||
map toUUID <$> getter (locationLogFile config key)
|
||||
|
||||
locationLog :: Key -> Annex [LogLine]
|
||||
locationLog key = do
|
||||
config <- Annex.getGitConfig
|
||||
currentLog (locationLogFile config key)
|
||||
|
||||
{- Finds all keys that have location log information.
|
||||
- (There may be duplicate keys in the list.) -}
|
||||
|
|
|
@ -16,8 +16,10 @@ module Logs.Presence (
|
|||
addLog,
|
||||
readLog,
|
||||
logNow,
|
||||
logThen,
|
||||
currentLog,
|
||||
historicalLog
|
||||
currentLogInfo,
|
||||
historicalLogInfo,
|
||||
) where
|
||||
|
||||
import Data.Time.Clock.POSIX
|
||||
|
@ -42,15 +44,21 @@ logNow s i = do
|
|||
now <- liftIO getPOSIXTime
|
||||
return $ LogLine now s i
|
||||
|
||||
logThen :: POSIXTime -> LogStatus -> String -> Annex LogLine
|
||||
logThen t s i = return $ LogLine t s i
|
||||
|
||||
{- Reads a log and returns only the info that is still in effect. -}
|
||||
currentLog :: FilePath -> Annex [String]
|
||||
currentLog file = map info . filterPresent <$> readLog file
|
||||
currentLogInfo :: FilePath -> Annex [String]
|
||||
currentLogInfo file = map info <$> currentLog file
|
||||
|
||||
currentLog :: FilePath -> Annex [LogLine]
|
||||
currentLog file = filterPresent <$> readLog file
|
||||
|
||||
{- Reads a historical version of a log and returns the info that was in
|
||||
- effect at that time.
|
||||
-
|
||||
- The date is formatted as shown in gitrevisions man page.
|
||||
-}
|
||||
historicalLog :: RefDate -> FilePath -> Annex [String]
|
||||
historicalLog refdate file = map info . filterPresent . parseLog
|
||||
historicalLogInfo :: RefDate -> FilePath -> Annex [String]
|
||||
historicalLogInfo refdate file = map info . filterPresent . parseLog
|
||||
<$> Annex.Branch.getHistorical refdate file
|
||||
|
|
|
@ -43,7 +43,7 @@ getUrls key = do
|
|||
where
|
||||
go [] = return []
|
||||
go (l:ls) = do
|
||||
us <- currentLog l
|
||||
us <- currentLogInfo l
|
||||
if null us
|
||||
then go ls
|
||||
else return us
|
||||
|
|
2
debian/changelog
vendored
2
debian/changelog
vendored
|
@ -20,6 +20,8 @@ git-annex (5.20150328) UNRELEASED; urgency=medium
|
|||
multiple files.
|
||||
* import: --deduplicate and --cleanduplicates now output the keys
|
||||
corresponding to duplicated files they process.
|
||||
* fsck: Added --distributed and --expire options,
|
||||
for distributed fsck.
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Fri, 27 Mar 2015 16:04:43 -0400
|
||||
|
||||
|
|
|
@ -134,6 +134,8 @@ It will probably take just a few hours to code.
|
|||
With that change, the server can check for files that not enough clients
|
||||
have verified they have recently, and distribute them to more clients.
|
||||
|
||||
(This is now implemented.)
|
||||
|
||||
Note that bad actors can lie about this verification; it's not a proof they
|
||||
still have the file. But, a bad actor could prove they have a file, and
|
||||
refuse to give it back if the IA needed to restore the backup, too.
|
||||
|
|
|
@ -53,6 +53,44 @@ With parameters, only the specified files are checked.
|
|||
|
||||
git annex fsck --incremental-schedule 30d --time-limit 5h
|
||||
|
||||
* `--distributed`
|
||||
|
||||
Normally, fsck only fixes the git-annex location logs when an inconsistecy
|
||||
is detected. In distributed mode, each file that is checked will result
|
||||
in a location log update noting the time that it was present.
|
||||
|
||||
This is useful in situations where repositories cannot be trusted to
|
||||
continue to exist. By running a periodic distributed fsck, those
|
||||
repositories can verify that they still exist and that the information
|
||||
about their contents is still accurate.
|
||||
|
||||
This is not the default mode, because each distributed fsck increases
|
||||
the size of the git-annex branch. While it takes care to log identical
|
||||
location tracking lines for all keys, which will delta-compress well,
|
||||
there is still overhead in committing the changes. If this causes
|
||||
the git-annex branch to grow too big, it can be pruned using
|
||||
[[git-annex-forget]](1)
|
||||
|
||||
* `--expire="[repository:]time`..."
|
||||
|
||||
This option makes the fsck check for location logs of the specified
|
||||
repository that have not been updated by a distributed fsck within the
|
||||
specified time period. Such stale location logs are then thrown out, so
|
||||
git-annex will no longer think that a repository contains data, if it is
|
||||
not participating in distributed fscking.
|
||||
|
||||
The repository can be specified using the name of a remote,
|
||||
or the description or uuid of the repository. If a time is specified
|
||||
without a repository, it is used as the default value for all
|
||||
repositories. Note that location logs for the current repository are
|
||||
never expired, since they can be verified directly.
|
||||
|
||||
The time is in the form "60d" or "1y". A time of "never" will disable
|
||||
expiration.
|
||||
|
||||
Note that a remote can always run `fsck` later on to re-update the
|
||||
location log if it was expired in error.
|
||||
|
||||
* `--numcopies=N`
|
||||
|
||||
Override the normally configured number of copies.
|
||||
|
|
Loading…
Reference in a new issue