fsck improvements

* fsck: Check if annex.numcopies is satisfied.
* fsck: Verify the sha1 of files when the SHA1 backend is used.
* fsck: Verify the size of files when the WORM backend is used.
* fsck: Allow specifying individual files to fsk if fscking everything
  is not desired.
* fsck: Fix bug, introduced in 0.04, in detection of unused data.
This commit is contained in:
Joey Hess 2010-11-13 14:59:27 -04:00
parent d4d65a3c92
commit 5fa25a812a
15 changed files with 236 additions and 31 deletions

View file

@ -23,6 +23,7 @@ module Backend (
retrieveKeyFile, retrieveKeyFile,
removeKey, removeKey,
hasKey, hasKey,
fsckKey,
lookupFile, lookupFile,
chooseBackends chooseBackends
) where ) where
@ -113,6 +114,10 @@ hasKey key = do
bs <- Annex.supportedBackends bs <- Annex.supportedBackends
(Internals.hasKey (lookupBackendName bs $ backendName key)) key (Internals.hasKey (lookupBackendName bs $ backendName key)) key
{- Checks a key's backend for problems. -}
fsckKey :: Backend -> Key -> Annex Bool
fsckKey backend key = (Internals.fsckKey backend) key
{- Looks up the key and backend corresponding to an annexed file, {- Looks up the key and backend corresponding to an annexed file,
- by examining what the file symlinks to. -} - by examining what the file symlinks to. -}
lookupFile :: FilePath -> Annex (Maybe (Key, Backend)) lookupFile :: FilePath -> Annex (Maybe (Key, Backend))

View file

@ -4,15 +4,15 @@
- it relies on the file contents in .git/annex/ in this repo, - it relies on the file contents in .git/annex/ in this repo,
- and other accessible repos. - and other accessible repos.
- -
- This is an abstract backend; getKey has to be implemented to complete - This is an abstract backend; name, getKey and fsckKey have to be implemented
- it. - to complete it.
- -
- Copyright 2010 Joey Hess <joey@kitenet.net> - Copyright 2010 Joey Hess <joey@kitenet.net>
- -
- Licensed under the GNU GPL version 3 or higher. - Licensed under the GNU GPL version 3 or higher.
-} -}
module Backend.File (backend) where module Backend.File (backend, checkKey) where
import Control.Monad.State import Control.Monad.State
import System.Directory import System.Directory
@ -34,7 +34,8 @@ backend = Backend {
storeFileKey = dummyStore, storeFileKey = dummyStore,
retrieveKeyFile = copyKeyFile, retrieveKeyFile = copyKeyFile,
removeKey = checkRemoveKey, removeKey = checkRemoveKey,
hasKey = checkKeyFile hasKey = checkKeyFile,
fsckKey = mustProvide
} }
mustProvide :: a mustProvide :: a
@ -97,14 +98,12 @@ checkRemoveKey key = do
if (force) if (force)
then return True then return True
else do else do
g <- Annex.gitRepo
remotes <- Remotes.keyPossibilities key remotes <- Remotes.keyPossibilities key
let numcopies = read $ Git.configGet g config "1" numcopies <- getNumCopies
if (numcopies > length remotes) if (numcopies > length remotes)
then notEnoughCopies numcopies (length remotes) [] then notEnoughCopies numcopies (length remotes) []
else findcopies numcopies 0 remotes [] else findcopies numcopies 0 remotes []
where where
config = "annex.numcopies"
findcopies need have [] bad = findcopies need have [] bad =
if (have >= need) if (have >= need)
then return True then return True
@ -147,3 +146,33 @@ showTriedRemotes [] = return ()
showTriedRemotes remotes = showTriedRemotes remotes =
showLongNote $ "I was unable to access these remotes: " ++ showLongNote $ "I was unable to access these remotes: " ++
(Remotes.list remotes) (Remotes.list remotes)
getNumCopies :: Annex Int
getNumCopies = do
g <- Annex.gitRepo
return $ read $ Git.configGet g config "1"
where
config = "annex.numcopies"
{- This is used to check that numcopies is satisfied for the key on fsck.
- This trusts the location log, and so checks all keys, even those with
- data not present in the current annex.
-
- The passed action is first run to allow backends deriving this one
- to do their own checks.
-}
checkKey :: (Key -> Annex Bool) -> Key -> Annex Bool
checkKey a key = do
a_ok <- a key
copies_ok <- checkKeyNumCopies key
return $ a_ok && copies_ok
checkKeyNumCopies :: Key -> Annex Bool
checkKeyNumCopies key = do
remotes <- Remotes.keyPossibilities key
numcopies <- getNumCopies
if (length remotes < numcopies)
then do
showLongNote $ "only " ++ show (length remotes) ++ " of " ++ show numcopies ++ " copies"
return False
else return True

View file

@ -11,24 +11,51 @@ import Control.Monad.State
import Data.String.Utils import Data.String.Utils
import System.Cmd.Utils import System.Cmd.Utils
import System.IO import System.IO
import System.Directory
import qualified Backend.File import qualified Backend.File
import TypeInternals import TypeInternals
import Messages import Messages
import qualified Annex
import Locations
import Core
backend :: Backend backend :: Backend
backend = Backend.File.backend { backend = Backend.File.backend {
name = "SHA1", name = "SHA1",
getKey = keyValue getKey = keyValue,
fsckKey = Backend.File.checkKey checkKeySHA1
} }
-- checksum the file to get its key sha1 :: FilePath -> Annex String
keyValue :: FilePath -> Annex (Maybe Key) sha1 file = do
keyValue file = do
showNote "checksum..." showNote "checksum..."
liftIO $ pOpen ReadFromPipe "sha1sum" [file] $ \h -> do liftIO $ pOpen ReadFromPipe "sha1sum" [file] $ \h -> do
line <- hGetLine h line <- hGetLine h
let bits = split " " line let bits = split " " line
if (null bits) if (null bits)
then error "sha1sum parse error" then error "sha1sum parse error"
else return $ Just $ Key ((name backend), bits !! 0) else return $ bits !! 0
-- A key is a sha1 of its contents.
keyValue :: FilePath -> Annex (Maybe Key)
keyValue file = do
s <- sha1 file
return $ Just $ Key ((name backend), s)
-- A key's sha1 is checked during fsck.
checkKeySHA1 :: Key -> Annex Bool
checkKeySHA1 key = do
g <- Annex.gitRepo
let file = annexLocation g key
present <- liftIO $ doesFileExist file
if (not present)
then return True
else do
s <- sha1 file
if (s == keyName key)
then return True
else do
dest <- moveBad key
showNote $ "bad file content (moved to "++dest++")"
return False

View file

@ -20,8 +20,13 @@ backend = Backend {
getKey = keyValue, getKey = keyValue,
storeFileKey = dummyStore, storeFileKey = dummyStore,
retrieveKeyFile = downloadUrl, retrieveKeyFile = downloadUrl,
-- allow keys to be removed; presumably they can always be
-- downloaded again
removeKey = dummyOk, removeKey = dummyOk,
hasKey = dummyOk -- similarly, keys are always assumed to be out there on the web
hasKey = dummyOk,
-- and nothing needed to fsck
fsckKey = dummyOk
} }
-- cannot generate url from filename -- cannot generate url from filename
@ -32,7 +37,6 @@ keyValue _ = return Nothing
dummyStore :: FilePath -> Key -> Annex Bool dummyStore :: FilePath -> Key -> Annex Bool
dummyStore _ _ = return False dummyStore _ _ = return False
-- allow keys to be removed; presumably they can always be downloaded again
dummyOk :: Key -> Annex Bool dummyOk :: Key -> Annex Bool
dummyOk _ = return True dummyOk _ = return True

View file

@ -10,14 +10,22 @@ module Backend.WORM (backend) where
import Control.Monad.State import Control.Monad.State
import System.FilePath import System.FilePath
import System.Posix.Files import System.Posix.Files
import System.Posix.Types
import System.Directory
import Data.String.Utils
import qualified Backend.File import qualified Backend.File
import TypeInternals import TypeInternals
import Locations
import qualified Annex
import Core
import Messages
backend :: Backend backend :: Backend
backend = Backend.File.backend { backend = Backend.File.backend {
name = "WORM", name = "WORM",
getKey = keyValue getKey = keyValue,
fsckKey = Backend.File.checkKey checkKeySize
} }
-- The key is formed from the file size, modification time, and the -- The key is formed from the file size, modification time, and the
@ -36,3 +44,27 @@ keyValue file = do
(show $ fileSize stat) (show $ fileSize stat)
base = takeFileName file base = takeFileName file
sep = ":" sep = ":"
{- Extracts the file size from a key. -}
keySize :: Key -> FileOffset
keySize key = read $ section !! 2
where
section = split ":" (keyName key)
{- The size of the data for a key is checked against the size encoded in
- the key. Note that the modification time is not checked. -}
checkKeySize :: Key -> Annex Bool
checkKeySize key = do
g <- Annex.gitRepo
let file = annexLocation g key
present <- liftIO $ doesFileExist file
if (not present)
then return True
else do
s <- liftIO $ getFileStatus file
if (fileSize s == keySize key)
then return True
else do
dest <- moveBad key
showNote $ "bad file size (moved to "++dest++")"
return False

View file

@ -59,14 +59,14 @@ subCmds =
"sets annexed content for a key using a temp file" "sets annexed content for a key using a temp file"
, SubCommand "fix" path Command.Fix.seek , SubCommand "fix" path Command.Fix.seek
"fix up symlinks to point to annexed content" "fix up symlinks to point to annexed content"
, SubCommand "fsck" nothing Command.Fsck.seek , SubCommand "fsck" maybepath Command.Fsck.seek
"check annex for problems" "check for problems"
] ]
where where
path = "PATH ..." path = "PATH ..."
maybepath = "[PATH ...]"
key = "KEY ..." key = "KEY ..."
desc = "DESCRIPTION" desc = "DESCRIPTION"
nothing = ""
-- Each dashed command-line option results in generation of an action -- Each dashed command-line option results in generation of an action
-- in the Annex monad that performs the necessary setting. -- in the Annex monad that performs the necessary setting.

View file

@ -146,6 +146,16 @@ withTempFile a params = return $ map a params
withNothing :: SubCmdSeekNothing withNothing :: SubCmdSeekNothing
withNothing a _ = return [a] withNothing a _ = return [a]
{- Default to acting on all files matching the seek action if
- none are specified. -}
withAll :: SubCmdSeekStrings -> SubCmdSeekStrings
withAll w a params = do
if null params
then do
g <- Annex.gitRepo
w a [Git.workTree g]
else w a params
{- filter out files from the state directory -} {- filter out files from the state directory -}
notState :: FilePath -> Bool notState :: FilePath -> Bool
notState f = stateLoc /= take (length stateLoc) f notState f = stateLoc /= take (length stateLoc) f

View file

@ -13,9 +13,10 @@ import Command
import Types import Types
import Core import Core
import Messages import Messages
import qualified Command.FsckFile
seek :: [SubCmdSeek] seek :: [SubCmdSeek]
seek = [withNothing start] seek = [withNothing start, withAll withFilesInGit Command.FsckFile.start]
{- Checks the whole annex for problems. -} {- Checks the whole annex for problems. -}
start :: SubCmdStart start :: SubCmdStart
@ -26,11 +27,9 @@ start = do
perform :: SubCmdPerform perform :: SubCmdPerform
perform = do perform = do
ok <- checkUnused ok <- checkUnused
if (ok) if ok
then return $ Just $ return True then return $ Just $ return True
else do else return Nothing
showLongNote "Possible problems detected."
return Nothing
checkUnused :: Annex Bool checkUnused :: Annex Bool
checkUnused = do checkUnused = do

33
Command/FsckFile.hs Normal file
View file

@ -0,0 +1,33 @@
{- git-annex command
-
- Copyright 2010 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Command.FsckFile where
import Command
import qualified Backend
import Types
import Messages
seek :: [SubCmdSeek]
seek = [withFilesInGit start]
{- Checks a file's backend data for problems. -}
start :: SubCmdStartString
start file = isAnnexed file $ \(key, backend) -> do
inbackend <- Backend.hasKey key
if (not inbackend)
then return Nothing
else do
showStart "fsck" file
return $ Just $ perform key backend
perform :: Key -> Backend -> SubCmdPerform
perform key backend = do
success <- Backend.fsckKey backend key
if (success)
then return $ Just $ return True
else return Nothing

18
Core.hs
View file

@ -14,6 +14,7 @@ import System.Path
import Control.Monad (when, unless, filterM) import Control.Monad (when, unless, filterM)
import System.Posix.Files import System.Posix.Files
import Data.Maybe import Data.Maybe
import System.FilePath
import Types import Types
import Locations import Locations
@ -201,6 +202,16 @@ fromAnnex key dest = do
renameFile file dest renameFile file dest
removeDirectory dir removeDirectory dir
{- Moves a key out of .git/annex/objects/ into .git/annex/bad, and
- returns the directory it was moved to. -}
moveBad :: Key -> Annex FilePath
moveBad key = do
g <- Annex.gitRepo
let src = parentDir $ annexLocation g key
let dest = annexBadLocation g
liftIO $ renameDirectory src dest
return dest
{- List of keys whose content exists in .git/annex/objects/ -} {- List of keys whose content exists in .git/annex/objects/ -}
getKeysPresent :: Annex [Key] getKeysPresent :: Annex [Key]
getKeysPresent = do getKeysPresent = do
@ -209,11 +220,12 @@ getKeysPresent = do
getKeysPresent' :: FilePath -> Annex [Key] getKeysPresent' :: FilePath -> Annex [Key]
getKeysPresent' dir = do getKeysPresent' dir = do
contents <- liftIO $ getDirectoryContents dir contents <- liftIO $ getDirectoryContents dir
files <- liftIO $ filterM isreg contents files <- liftIO $ filterM present contents
return $ map fileKey files return $ map fileKey files
where where
isreg f = do present d = do
s <- getFileStatus $ dir ++ "/" ++ f s <- getFileStatus $ dir ++ "/" ++ d ++ "/"
++ (takeFileName d)
return $ isRegularFile s return $ isRegularFile s
{- List of keys referenced by symlinks in the git repo. -} {- List of keys referenced by symlinks in the git repo. -}

View file

@ -13,6 +13,7 @@ module Locations (
annexLocation, annexLocation,
annexLocationRelative, annexLocationRelative,
annexTmpLocation, annexTmpLocation,
annexBadLocation,
annexDir, annexDir,
annexObjectDir, annexObjectDir,
@ -59,6 +60,10 @@ annexObjectDir r = annexDir r ++ "/objects"
annexTmpLocation :: Git.Repo -> FilePath annexTmpLocation :: Git.Repo -> FilePath
annexTmpLocation r = annexDir r ++ "/tmp/" annexTmpLocation r = annexDir r ++ "/tmp/"
{- .git-annex/bad is used for bad files found during fsck -}
annexBadLocation :: Git.Repo -> FilePath
annexBadLocation r = annexDir r ++ "/bad/"
{- Converts a key into a filename fragment. {- Converts a key into a filename fragment.
- -
- Escape "/" in the key name, to keep a flat tree of files and avoid - Escape "/" in the key name, to keep a flat tree of files and avoid

View file

@ -75,7 +75,9 @@ data Backend = Backend {
-- removes a key -- removes a key
removeKey :: Key -> Annex Bool, removeKey :: Key -> Annex Bool,
-- checks if a backend is storing the content of a key -- checks if a backend is storing the content of a key
hasKey :: Key -> Annex Bool hasKey :: Key -> Annex Bool,
-- called during fsck to check a key
fsckKey :: Key -> Annex Bool
} }
instance Show Backend where instance Show Backend where

11
debian/changelog vendored
View file

@ -1,3 +1,14 @@
git-annex (0.06) UNRELEASED; urgency=low
* fsck: Check if annex.numcopies is satisfied.
* fsck: Verify the sha1 of files when the SHA1 backend is used.
* fsck: Verify the size of files when the WORM backend is used.
* fsck: Allow specifying individual files to fsk if fscking everything
is not desired.
* fsck: Fix bug, introduced in 0.04, in detection of unused data.
-- Joey Hess <joeyh@debian.org> Sat, 13 Nov 2010 14:08:58 -0400
git-annex (0.05) unstable; urgency=low git-annex (0.05) unstable; urgency=low
* Optimize both pre-commit and lock subcommands to not call git diff * Optimize both pre-commit and lock subcommands to not call git diff

View file

@ -159,10 +159,12 @@ Many git-annex subcommands will stage changes for later `git commit` by you.
git annex setkey --key=1287765018:3 /tmp/file git annex setkey --key=1287765018:3 /tmp/file
* fsck * fsck [path ...]
This subcommand checks the whole annex for consistency, and warns With no parameters, this subcommand checks the whole annex for consistency,
about any problems found. and warns about any problems found.
With parameters, only the specified files are checked.
# OPTIONS # OPTIONS

View file

@ -276,3 +276,37 @@ significantly for really big files. To make SHA1 the detault, just
add something like this to `.gitattributes`: add something like this to `.gitattributes`:
* git-annex-backend=SHA1 * git-annex-backend=SHA1
## fsck: verifying your data
You can use the fsck subcommand to check for problems in your data.
What can be checked depends on the [[backend|backends]] you've used to store
the data. For example, when you use the SHA1 backend, fsck will verify that
the checksums of your files are good. Fsck also checks that the annex.numcopies
setting is satisfied for all files, and it warns about any dangling values
in `.git/annex/objects/`.
# git annex fsck
fsck (checking for unused data...) (checking files...) ok
Fsck checks the entire repository for problems by default. But you can
also specify the files to check.
This is particularly useful if you're using sha1 and don't want to spend
a long time checksumming everything.
# git annex fsck my_cool_big_file
fsck my_cool_big_file (checksum..) ok
## fsck: When things go wrong
Fsck never deletes possibly bad data; instead it will be moved to
`.git/annex/bad/` for you to review. Here is a sample of what fsck
might say about a badly messed up annex:
# git annex fsck
fsck (checking for unused data...)
Some annexed data is no longer pointed to by any files in the repository.
If this data is no longer needed, it can be removed using git-annex dropkey:
WORM:1289672605:3:file
(checking files...)