fsck --from

Fscking a remote is now supported. It's done by retrieving
the contents of the specified files from the remote, and checking them,
so can be an expensive operation.

(Several optimisations are possible, to speed it up, of course.. This is
the slow and stupid remote fsck to start with.)

Still, if the remote is a special remote, or a git repository that you
cannot run fsck in locally, it's nice to have the ability to fsck it.

If you have any directory special remotes, now would be a good time to
fsck them, in case you were hit by the data loss bug fixed in the
previous release!
This commit is contained in:
Joey Hess 2012-01-19 15:24:05 -04:00
parent d36525e974
commit 90319afa41
8 changed files with 131 additions and 44 deletions

View file

@ -9,7 +9,6 @@ module Backend.SHA (backends) where
import Common.Annex import Common.Annex
import qualified Annex import qualified Annex
import Annex.Content
import Types.Backend import Types.Backend
import Types.Key import Types.Key
import qualified Build.SysConfig as SysConfig import qualified Build.SysConfig as SysConfig
@ -97,18 +96,14 @@ keyValueE size file = keyValue size file >>= maybe (return Nothing) addE
| otherwise = naiveextension | otherwise = naiveextension
{- A key's checksum is checked during fsck. -} {- A key's checksum is checked during fsck. -}
checkKeyChecksum :: SHASize -> Key -> Annex Bool checkKeyChecksum :: SHASize -> Key -> FilePath -> Annex Bool
checkKeyChecksum size key = do checkKeyChecksum size key file = do
fast <- Annex.getState Annex.fast fast <- Annex.getState Annex.fast
file <- inRepo $ gitAnnexLocation key
present <- liftIO $ doesFileExist file present <- liftIO $ doesFileExist file
if not present || fast if not present || fast
then return True then return True
else check =<< shaN size file else check <$> shaN size file
where where
check s check s
| s == dropExtension (keyName key) = return True | s == dropExtension (keyName key) = True
| otherwise = do | otherwise = False
dest <- moveBad key
warning $ "Bad file content; moved to " ++ dest
return False

View file

@ -87,7 +87,7 @@ cleanupRemote key remote ok = do
-- better safe than sorry: assume the remote dropped the key -- better safe than sorry: assume the remote dropped the key
-- even if it seemed to fail; the failure could have occurred -- even if it seemed to fail; the failure could have occurred
-- after it really dropped it -- after it really dropped it
Remote.logStatus remote key False Remote.logStatus remote key InfoMissing
return ok return ok
{- Checks specified remotes to verify that enough copies of a key exist to {- Checks specified remotes to verify that enough copies of a key exist to

View file

@ -20,20 +20,31 @@ import Annex.UUID
import Utility.DataUnits import Utility.DataUnits
import Utility.FileMode import Utility.FileMode
import Config import Config
import qualified Option
def :: [Command] def :: [Command]
def = [command "fsck" paramPaths seek "check for problems"] def = [withOptions options $ command "fsck" paramPaths seek
"check for problems"]
fromOption :: Option
fromOption = Option.field ['f'] "from" paramRemote "check remote"
options :: [Option]
options = [fromOption]
seek :: [CommandSeek] seek :: [CommandSeek]
seek = seek =
[ withNumCopies $ \n -> whenAnnexed $ start n [ withField fromOption Remote.byName $ \from ->
withNumCopies $ \n -> whenAnnexed $ start from n
, withBarePresentKeys startBare , withBarePresentKeys startBare
] ]
start :: Maybe Int -> FilePath -> (Key, Backend) -> CommandStart start :: Maybe Remote -> Maybe Int -> FilePath -> (Key, Backend) -> CommandStart
start numcopies file (key, backend) = do start from numcopies file (key, backend) = do
showStart "fsck" file showStart "fsck" file
next $ perform key file backend numcopies case from of
Nothing -> next $ perform key file backend numcopies
Just r -> next $ performRemote key file backend numcopies r
perform :: Key -> FilePath -> Backend -> Maybe Int -> CommandPerform perform :: Key -> FilePath -> Backend -> Maybe Int -> CommandPerform
perform key file backend numcopies = check perform key file backend numcopies = check
@ -44,6 +55,27 @@ perform key file backend numcopies = check
, checkKeyNumCopies key file numcopies , checkKeyNumCopies key file numcopies
] ]
{- To fsck a remote, the content is retrieved to a tmp file,
- and checked locally. -}
performRemote :: Key -> FilePath -> Backend -> Maybe Int -> Remote -> CommandPerform
performRemote key file backend numcopies remote = withTmp key $ \tmpfile -> do
v <- Remote.hasKey remote key
case v of
Left err -> do
showNote err
stop
Right True -> do
copied <- Remote.retrieveKeyFile remote key tmpfile
if copied then go True (Just tmpfile) else go False Nothing
Right False -> go False Nothing
where
go present localcopy = check
[ verifyLocationLogRemote key file remote present
, checkKeySizeRemote key remote localcopy
, checkBackendRemote backend key remote localcopy
, checkKeyNumCopies key file numcopies
]
{- To fsck a bare repository, fsck each key in the location log. -} {- To fsck a bare repository, fsck each key in the location log. -}
withBarePresentKeys :: (Key -> CommandStart) -> CommandSeek withBarePresentKeys :: (Key -> CommandStart) -> CommandSeek
withBarePresentKeys a params = isBareRepo >>= go withBarePresentKeys a params = isBareRepo >>= go
@ -93,26 +125,33 @@ verifyLocationLog key desc = do
preventWrite (parentDir f) preventWrite (parentDir f)
u <- getUUID u <- getUUID
uuids <- Remote.keyLocations key verifyLocationLog' key desc present u (logChange key u)
verifyLocationLogRemote :: Key -> String -> Remote -> Bool -> Annex Bool
verifyLocationLogRemote key desc remote present =
verifyLocationLog' key desc present (Remote.uuid remote)
(Remote.logStatus remote key)
verifyLocationLog' :: Key -> String -> Bool -> UUID -> (LogStatus -> Annex ()) -> Annex Bool
verifyLocationLog' key desc present u bad = do
uuids <- Remote.keyLocations key
case (present, u `elem` uuids) of case (present, u `elem` uuids) of
(True, False) -> do (True, False) -> do
fix u InfoPresent fix InfoPresent
-- There is no data loss, so do not fail. -- There is no data loss, so do not fail.
return True return True
(False, True) -> do (False, True) -> do
fix u InfoMissing fix InfoMissing
warning $ warning $
"** Based on the location log, " ++ desc "** Based on the location log, " ++ desc
++ "\n** was expected to be present, " ++ ++ "\n** was expected to be present, " ++
"but its content is missing." "but its content is missing."
return False return False
_ -> return True _ -> return True
where where
fix u s = do fix s = do
showNote "fixing location log" showNote "fixing location log"
logChange key u s bad s
{- The size of the data for a key is checked against the size encoded in {- The size of the data for a key is checked against the size encoded in
- the key's metadata, if available. -} - the key's metadata, if available. -}
@ -120,26 +159,49 @@ checkKeySize :: Key -> Annex Bool
checkKeySize key = do checkKeySize key = do
file <- inRepo $ gitAnnexLocation key file <- inRepo $ gitAnnexLocation key
present <- liftIO $ doesFileExist file present <- liftIO $ doesFileExist file
case (present, Types.Key.keySize key) of if present
(_, Nothing) -> return True then checkKeySize' key file badContent
(False, _) -> return True else return True
(True, Just size) -> do
stat <- liftIO $ getFileStatus file
let size' = fromIntegral (fileSize stat)
if size == size'
then return True
else do
dest <- moveBad key
warning $ "Bad file size (" ++
compareSizes storageUnits True size size' ++
"); moved to " ++ dest
return False
checkKeySizeRemote :: Key -> Remote -> Maybe FilePath -> Annex Bool
checkKeySizeRemote _ _ Nothing = return True
checkKeySizeRemote key remote (Just file) = checkKeySize' key file
(badContentRemote remote)
checkKeySize' :: Key -> FilePath -> (Key -> Annex String) -> Annex Bool
checkKeySize' key file bad = case Types.Key.keySize key of
Nothing -> return True
Just size -> do
stat <- liftIO $ getFileStatus file
let size' = fromIntegral (fileSize stat)
if size == size'
then return True
else do
msg <- bad key
warning $ "Bad file size (" ++
compareSizes storageUnits True size size' ++
"); " ++ msg
return False
checkBackend :: Backend -> Key -> Annex Bool checkBackend :: Backend -> Key -> Annex Bool
checkBackend backend key = case Types.Backend.fsckKey backend of checkBackend backend key = do
file <- inRepo (gitAnnexLocation key)
checkBackend' backend key (Just file) badContent
checkBackendRemote :: Backend -> Key -> Remote -> Maybe FilePath -> Annex Bool
checkBackendRemote backend key remote localcopy =
checkBackend' backend key localcopy (badContentRemote remote)
checkBackend' :: Backend -> Key -> Maybe FilePath -> (Key -> Annex String) -> Annex Bool
checkBackend' _ _ Nothing _ = return True
checkBackend' backend key (Just file) bad = case Types.Backend.fsckKey backend of
Nothing -> return True Nothing -> return True
Just a -> a key Just a -> do
ok <- a key file
unless ok $ do
msg <- bad key
warning $ "Bad file content; " ++ msg
return ok
checkKeyNumCopies :: Key -> FilePath -> Maybe Int -> Annex Bool checkKeyNumCopies :: Key -> FilePath -> Maybe Int -> Annex Bool
checkKeyNumCopies key file numcopies = do checkKeyNumCopies key file numcopies = do
@ -168,3 +230,19 @@ missingNote file present needed untrusted =
missingNote file present needed [] ++ missingNote file present needed [] ++
"\nThe following untrusted locations may also have copies: " ++ "\nThe following untrusted locations may also have copies: " ++
"\n" ++ untrusted "\n" ++ untrusted
{- Bad content is moved aside. -}
badContent :: Key -> Annex String
badContent key = do
dest <- moveBad key
return $ "moved to " ++ dest
badContentRemote :: Remote -> Key -> Annex String
badContentRemote remote key = do
ok <- Remote.removeKey remote key
-- better safe than sorry: assume the remote dropped the key
-- even if it seemed to fail; the failure could have occurred
-- after it really dropped it
Remote.logStatus remote key InfoMissing
return $ (if ok then "dropped from " else "failed to drop from ")
++ Remote.name remote

View file

@ -15,6 +15,7 @@ import Annex.Content
import qualified Remote import qualified Remote
import Annex.UUID import Annex.UUID
import qualified Option import qualified Option
import Logs.Presence
def :: [Command] def :: [Command]
def = [withOptions options $ command "move" paramPaths seek def = [withOptions options $ command "move" paramPaths seek
@ -97,7 +98,7 @@ toPerform dest move key = moveLock move key $ do
Right True -> finish Right True -> finish
where where
finish = do finish = do
Remote.logStatus dest key True Remote.logStatus dest key InfoPresent
if move if move
then do then do
whenM (inAnnex key) $ removeAnnex key whenM (inAnnex key) $ removeAnnex key

View file

@ -212,7 +212,5 @@ forceTrust level remotename = do
- in the local repo, not on the remote. The process of transferring the - in the local repo, not on the remote. The process of transferring the
- key to the remote, or removing the key from it *may* log the change - key to the remote, or removing the key from it *may* log the change
- on the remote, but this cannot always be relied on. -} - on the remote, but this cannot always be relied on. -}
logStatus :: Remote -> Key -> Bool -> Annex () logStatus :: Remote -> Key -> LogStatus -> Annex ()
logStatus remote key present = logChange key (uuid remote) status logStatus remote key present = logChange key (uuid remote) present
where
status = if present then InfoPresent else InfoMissing

View file

@ -17,7 +17,7 @@ data BackendA a = Backend {
-- converts a filename to a key -- converts a filename to a key
getKey :: FilePath -> a (Maybe Key), getKey :: FilePath -> a (Maybe Key),
-- called during fsck to check a key, if the backend has its own checks -- called during fsck to check a key, if the backend has its own checks
fsckKey :: Maybe (Key -> a Bool) fsckKey :: Maybe (Key -> FilePath -> a Bool)
} }
instance Show (BackendA a) where instance Show (BackendA a) where

13
debian/changelog vendored
View file

@ -1,3 +1,16 @@
git-annex (3.20120117) UNRELEASED; urgency=low
* fsck --from: Fscking a remote is now supported. It's done by retrieving
the contents of the specified files from the remote, and checking them,
so can be an expensive operation. Still, if the remote is a special
remote, or a git repository that you cannot run fsck in locally, it's
nice to have the ability to fsck it.
* If you have any directory special remotes, now would be a good time to
fsck them, in case you were hit by the data loss bug fixed in the
previous release!
-- Joey Hess <joeyh@debian.org> Thu, 19 Jan 2012 15:12:03 -0400
git-annex (3.20120116) unstable; urgency=medium git-annex (3.20120116) unstable; urgency=medium
* Fix data loss bug in directory special remote, when moving a file * Fix data loss bug in directory special remote, when moving a file

View file

@ -212,6 +212,8 @@ subdirectories).
To avoid expensive checksum calculations, specify --fast To avoid expensive checksum calculations, specify --fast
To check a remote to fsck, specify --from.
* unused * unused
Checks the annex for data that does not correspond to any files present Checks the annex for data that does not correspond to any files present