fsck --from

Fscking a remote is now supported. It's done by retrieving
the contents of the specified files from the remote, and checking them,
so can be an expensive operation.

(Several optimisations are possible, to speed it up, of course.. This is
the slow and stupid remote fsck to start with.)

Still, if the remote is a special remote, or a git repository that you
cannot run fsck in locally, it's nice to have the ability to fsck it.

If you have any directory special remotes, now would be a good time to
fsck them, in case you were hit by the data loss bug fixed in the
previous release!
This commit is contained in:
Joey Hess 2012-01-19 15:24:05 -04:00
parent d36525e974
commit 90319afa41
8 changed files with 131 additions and 44 deletions

View file

@ -9,7 +9,6 @@ module Backend.SHA (backends) where
import Common.Annex
import qualified Annex
import Annex.Content
import Types.Backend
import Types.Key
import qualified Build.SysConfig as SysConfig
@ -97,18 +96,14 @@ keyValueE size file = keyValue size file >>= maybe (return Nothing) addE
| otherwise = naiveextension
{- A key's checksum is checked during fsck. -}
checkKeyChecksum :: SHASize -> Key -> Annex Bool
checkKeyChecksum size key = do
checkKeyChecksum :: SHASize -> Key -> FilePath -> Annex Bool
checkKeyChecksum size key file = do
fast <- Annex.getState Annex.fast
file <- inRepo $ gitAnnexLocation key
present <- liftIO $ doesFileExist file
if not present || fast
then return True
else check =<< shaN size file
else check <$> shaN size file
where
check s
| s == dropExtension (keyName key) = return True
| otherwise = do
dest <- moveBad key
warning $ "Bad file content; moved to " ++ dest
return False
| s == dropExtension (keyName key) = True
| otherwise = False

View file

@ -87,7 +87,7 @@ cleanupRemote key remote ok = do
-- better safe than sorry: assume the remote dropped the key
-- even if it seemed to fail; the failure could have occurred
-- after it really dropped it
Remote.logStatus remote key False
Remote.logStatus remote key InfoMissing
return ok
{- Checks specified remotes to verify that enough copies of a key exist to

View file

@ -20,20 +20,31 @@ import Annex.UUID
import Utility.DataUnits
import Utility.FileMode
import Config
import qualified Option
def :: [Command]
def = [command "fsck" paramPaths seek "check for problems"]
def = [withOptions options $ command "fsck" paramPaths seek
"check for problems"]
fromOption :: Option
fromOption = Option.field ['f'] "from" paramRemote "check remote"
options :: [Option]
options = [fromOption]
seek :: [CommandSeek]
seek =
[ withNumCopies $ \n -> whenAnnexed $ start n
[ withField fromOption Remote.byName $ \from ->
withNumCopies $ \n -> whenAnnexed $ start from n
, withBarePresentKeys startBare
]
start :: Maybe Int -> FilePath -> (Key, Backend) -> CommandStart
start numcopies file (key, backend) = do
start :: Maybe Remote -> Maybe Int -> FilePath -> (Key, Backend) -> CommandStart
start from numcopies file (key, backend) = do
showStart "fsck" file
next $ perform key file backend numcopies
case from of
Nothing -> next $ perform key file backend numcopies
Just r -> next $ performRemote key file backend numcopies r
perform :: Key -> FilePath -> Backend -> Maybe Int -> CommandPerform
perform key file backend numcopies = check
@ -44,6 +55,27 @@ perform key file backend numcopies = check
, checkKeyNumCopies key file numcopies
]
{- To fsck a remote, the content is retrieved to a tmp file,
- and checked locally. -}
performRemote :: Key -> FilePath -> Backend -> Maybe Int -> Remote -> CommandPerform
performRemote key file backend numcopies remote = withTmp key $ \tmpfile -> do
v <- Remote.hasKey remote key
case v of
Left err -> do
showNote err
stop
Right True -> do
copied <- Remote.retrieveKeyFile remote key tmpfile
if copied then go True (Just tmpfile) else go False Nothing
Right False -> go False Nothing
where
go present localcopy = check
[ verifyLocationLogRemote key file remote present
, checkKeySizeRemote key remote localcopy
, checkBackendRemote backend key remote localcopy
, checkKeyNumCopies key file numcopies
]
{- To fsck a bare repository, fsck each key in the location log. -}
withBarePresentKeys :: (Key -> CommandStart) -> CommandSeek
withBarePresentKeys a params = isBareRepo >>= go
@ -93,26 +125,33 @@ verifyLocationLog key desc = do
preventWrite (parentDir f)
u <- getUUID
uuids <- Remote.keyLocations key
verifyLocationLog' key desc present u (logChange key u)
verifyLocationLogRemote :: Key -> String -> Remote -> Bool -> Annex Bool
verifyLocationLogRemote key desc remote present =
verifyLocationLog' key desc present (Remote.uuid remote)
(Remote.logStatus remote key)
verifyLocationLog' :: Key -> String -> Bool -> UUID -> (LogStatus -> Annex ()) -> Annex Bool
verifyLocationLog' key desc present u bad = do
uuids <- Remote.keyLocations key
case (present, u `elem` uuids) of
(True, False) -> do
fix u InfoPresent
fix InfoPresent
-- There is no data loss, so do not fail.
return True
(False, True) -> do
fix u InfoMissing
fix InfoMissing
warning $
"** Based on the location log, " ++ desc
++ "\n** was expected to be present, " ++
"but its content is missing."
return False
_ -> return True
where
fix u s = do
fix s = do
showNote "fixing location log"
logChange key u s
bad s
{- The size of the data for a key is checked against the size encoded in
- the key's metadata, if available. -}
@ -120,26 +159,49 @@ checkKeySize :: Key -> Annex Bool
checkKeySize key = do
file <- inRepo $ gitAnnexLocation key
present <- liftIO $ doesFileExist file
case (present, Types.Key.keySize key) of
(_, Nothing) -> return True
(False, _) -> return True
(True, Just size) -> do
stat <- liftIO $ getFileStatus file
let size' = fromIntegral (fileSize stat)
if size == size'
then return True
else do
dest <- moveBad key
warning $ "Bad file size (" ++
compareSizes storageUnits True size size' ++
"); moved to " ++ dest
return False
if present
then checkKeySize' key file badContent
else return True
checkKeySizeRemote :: Key -> Remote -> Maybe FilePath -> Annex Bool
checkKeySizeRemote _ _ Nothing = return True
checkKeySizeRemote key remote (Just file) = checkKeySize' key file
(badContentRemote remote)
checkKeySize' :: Key -> FilePath -> (Key -> Annex String) -> Annex Bool
checkKeySize' key file bad = case Types.Key.keySize key of
Nothing -> return True
Just size -> do
stat <- liftIO $ getFileStatus file
let size' = fromIntegral (fileSize stat)
if size == size'
then return True
else do
msg <- bad key
warning $ "Bad file size (" ++
compareSizes storageUnits True size size' ++
"); " ++ msg
return False
checkBackend :: Backend -> Key -> Annex Bool
checkBackend backend key = case Types.Backend.fsckKey backend of
checkBackend backend key = do
file <- inRepo (gitAnnexLocation key)
checkBackend' backend key (Just file) badContent
checkBackendRemote :: Backend -> Key -> Remote -> Maybe FilePath -> Annex Bool
checkBackendRemote backend key remote localcopy =
checkBackend' backend key localcopy (badContentRemote remote)
checkBackend' :: Backend -> Key -> Maybe FilePath -> (Key -> Annex String) -> Annex Bool
checkBackend' _ _ Nothing _ = return True
checkBackend' backend key (Just file) bad = case Types.Backend.fsckKey backend of
Nothing -> return True
Just a -> a key
Just a -> do
ok <- a key file
unless ok $ do
msg <- bad key
warning $ "Bad file content; " ++ msg
return ok
checkKeyNumCopies :: Key -> FilePath -> Maybe Int -> Annex Bool
checkKeyNumCopies key file numcopies = do
@ -168,3 +230,19 @@ missingNote file present needed untrusted =
missingNote file present needed [] ++
"\nThe following untrusted locations may also have copies: " ++
"\n" ++ untrusted
{- Bad content is moved aside. -}
badContent :: Key -> Annex String
badContent key = do
dest <- moveBad key
return $ "moved to " ++ dest
badContentRemote :: Remote -> Key -> Annex String
badContentRemote remote key = do
ok <- Remote.removeKey remote key
-- better safe than sorry: assume the remote dropped the key
-- even if it seemed to fail; the failure could have occurred
-- after it really dropped it
Remote.logStatus remote key InfoMissing
return $ (if ok then "dropped from " else "failed to drop from ")
++ Remote.name remote

View file

@ -15,6 +15,7 @@ import Annex.Content
import qualified Remote
import Annex.UUID
import qualified Option
import Logs.Presence
def :: [Command]
def = [withOptions options $ command "move" paramPaths seek
@ -97,7 +98,7 @@ toPerform dest move key = moveLock move key $ do
Right True -> finish
where
finish = do
Remote.logStatus dest key True
Remote.logStatus dest key InfoPresent
if move
then do
whenM (inAnnex key) $ removeAnnex key

View file

@ -212,7 +212,5 @@ forceTrust level remotename = do
- in the local repo, not on the remote. The process of transferring the
- key to the remote, or removing the key from it *may* log the change
- on the remote, but this cannot always be relied on. -}
logStatus :: Remote -> Key -> Bool -> Annex ()
logStatus remote key present = logChange key (uuid remote) status
where
status = if present then InfoPresent else InfoMissing
logStatus :: Remote -> Key -> LogStatus -> Annex ()
logStatus remote key present = logChange key (uuid remote) present

View file

@ -17,7 +17,7 @@ data BackendA a = Backend {
-- converts a filename to a key
getKey :: FilePath -> a (Maybe Key),
-- called during fsck to check a key, if the backend has its own checks
fsckKey :: Maybe (Key -> a Bool)
fsckKey :: Maybe (Key -> FilePath -> a Bool)
}
instance Show (BackendA a) where

13
debian/changelog vendored
View file

@ -1,3 +1,16 @@
git-annex (3.20120117) UNRELEASED; urgency=low
* fsck --from: Fscking a remote is now supported. It's done by retrieving
the contents of the specified files from the remote, and checking them,
so can be an expensive operation. Still, if the remote is a special
remote, or a git repository that you cannot run fsck in locally, it's
nice to have the ability to fsck it.
* If you have any directory special remotes, now would be a good time to
fsck them, in case you were hit by the data loss bug fixed in the
previous release!
-- Joey Hess <joeyh@debian.org> Thu, 19 Jan 2012 15:12:03 -0400
git-annex (3.20120116) unstable; urgency=medium
* Fix data loss bug in directory special remote, when moving a file

View file

@ -212,6 +212,8 @@ subdirectories).
To avoid expensive checksum calculations, specify --fast
To check a remote to fsck, specify --from.
* unused
Checks the annex for data that does not correspond to any files present