fsck improvements
* fsck: Check if annex.numcopies is satisfied. * fsck: Verify the sha1 of files when the SHA1 backend is used. * fsck: Verify the size of files when the WORM backend is used. * fsck: Allow specifying individual files to fsk if fscking everything is not desired. * fsck: Fix bug, introduced in 0.04, in detection of unused data.
This commit is contained in:
parent
d4d65a3c92
commit
5fa25a812a
15 changed files with 236 additions and 31 deletions
|
@ -23,6 +23,7 @@ module Backend (
|
|||
retrieveKeyFile,
|
||||
removeKey,
|
||||
hasKey,
|
||||
fsckKey,
|
||||
lookupFile,
|
||||
chooseBackends
|
||||
) where
|
||||
|
@ -113,6 +114,10 @@ hasKey key = do
|
|||
bs <- Annex.supportedBackends
|
||||
(Internals.hasKey (lookupBackendName bs $ backendName key)) key
|
||||
|
||||
{- Checks a key's backend for problems. -}
|
||||
fsckKey :: Backend -> Key -> Annex Bool
|
||||
fsckKey backend key = (Internals.fsckKey backend) key
|
||||
|
||||
{- Looks up the key and backend corresponding to an annexed file,
|
||||
- by examining what the file symlinks to. -}
|
||||
lookupFile :: FilePath -> Annex (Maybe (Key, Backend))
|
||||
|
|
|
@ -4,15 +4,15 @@
|
|||
- it relies on the file contents in .git/annex/ in this repo,
|
||||
- and other accessible repos.
|
||||
-
|
||||
- This is an abstract backend; getKey has to be implemented to complete
|
||||
- it.
|
||||
- This is an abstract backend; name, getKey and fsckKey have to be implemented
|
||||
- to complete it.
|
||||
-
|
||||
- Copyright 2010 Joey Hess <joey@kitenet.net>
|
||||
-
|
||||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
||||
module Backend.File (backend) where
|
||||
module Backend.File (backend, checkKey) where
|
||||
|
||||
import Control.Monad.State
|
||||
import System.Directory
|
||||
|
@ -34,7 +34,8 @@ backend = Backend {
|
|||
storeFileKey = dummyStore,
|
||||
retrieveKeyFile = copyKeyFile,
|
||||
removeKey = checkRemoveKey,
|
||||
hasKey = checkKeyFile
|
||||
hasKey = checkKeyFile,
|
||||
fsckKey = mustProvide
|
||||
}
|
||||
|
||||
mustProvide :: a
|
||||
|
@ -97,14 +98,12 @@ checkRemoveKey key = do
|
|||
if (force)
|
||||
then return True
|
||||
else do
|
||||
g <- Annex.gitRepo
|
||||
remotes <- Remotes.keyPossibilities key
|
||||
let numcopies = read $ Git.configGet g config "1"
|
||||
numcopies <- getNumCopies
|
||||
if (numcopies > length remotes)
|
||||
then notEnoughCopies numcopies (length remotes) []
|
||||
else findcopies numcopies 0 remotes []
|
||||
where
|
||||
config = "annex.numcopies"
|
||||
findcopies need have [] bad =
|
||||
if (have >= need)
|
||||
then return True
|
||||
|
@ -147,3 +146,33 @@ showTriedRemotes [] = return ()
|
|||
showTriedRemotes remotes =
|
||||
showLongNote $ "I was unable to access these remotes: " ++
|
||||
(Remotes.list remotes)
|
||||
|
||||
getNumCopies :: Annex Int
|
||||
getNumCopies = do
|
||||
g <- Annex.gitRepo
|
||||
return $ read $ Git.configGet g config "1"
|
||||
where
|
||||
config = "annex.numcopies"
|
||||
|
||||
{- This is used to check that numcopies is satisfied for the key on fsck.
|
||||
- This trusts the location log, and so checks all keys, even those with
|
||||
- data not present in the current annex.
|
||||
-
|
||||
- The passed action is first run to allow backends deriving this one
|
||||
- to do their own checks.
|
||||
-}
|
||||
checkKey :: (Key -> Annex Bool) -> Key -> Annex Bool
|
||||
checkKey a key = do
|
||||
a_ok <- a key
|
||||
copies_ok <- checkKeyNumCopies key
|
||||
return $ a_ok && copies_ok
|
||||
|
||||
checkKeyNumCopies :: Key -> Annex Bool
|
||||
checkKeyNumCopies key = do
|
||||
remotes <- Remotes.keyPossibilities key
|
||||
numcopies <- getNumCopies
|
||||
if (length remotes < numcopies)
|
||||
then do
|
||||
showLongNote $ "only " ++ show (length remotes) ++ " of " ++ show numcopies ++ " copies"
|
||||
return False
|
||||
else return True
|
||||
|
|
|
@ -11,24 +11,51 @@ import Control.Monad.State
|
|||
import Data.String.Utils
|
||||
import System.Cmd.Utils
|
||||
import System.IO
|
||||
import System.Directory
|
||||
|
||||
import qualified Backend.File
|
||||
import TypeInternals
|
||||
import Messages
|
||||
import qualified Annex
|
||||
import Locations
|
||||
import Core
|
||||
|
||||
backend :: Backend
|
||||
backend = Backend.File.backend {
|
||||
name = "SHA1",
|
||||
getKey = keyValue
|
||||
getKey = keyValue,
|
||||
fsckKey = Backend.File.checkKey checkKeySHA1
|
||||
}
|
||||
|
||||
-- checksum the file to get its key
|
||||
keyValue :: FilePath -> Annex (Maybe Key)
|
||||
keyValue file = do
|
||||
sha1 :: FilePath -> Annex String
|
||||
sha1 file = do
|
||||
showNote "checksum..."
|
||||
liftIO $ pOpen ReadFromPipe "sha1sum" [file] $ \h -> do
|
||||
line <- hGetLine h
|
||||
let bits = split " " line
|
||||
if (null bits)
|
||||
then error "sha1sum parse error"
|
||||
else return $ Just $ Key ((name backend), bits !! 0)
|
||||
else return $ bits !! 0
|
||||
|
||||
-- A key is a sha1 of its contents.
|
||||
keyValue :: FilePath -> Annex (Maybe Key)
|
||||
keyValue file = do
|
||||
s <- sha1 file
|
||||
return $ Just $ Key ((name backend), s)
|
||||
|
||||
-- A key's sha1 is checked during fsck.
|
||||
checkKeySHA1 :: Key -> Annex Bool
|
||||
checkKeySHA1 key = do
|
||||
g <- Annex.gitRepo
|
||||
let file = annexLocation g key
|
||||
present <- liftIO $ doesFileExist file
|
||||
if (not present)
|
||||
then return True
|
||||
else do
|
||||
s <- sha1 file
|
||||
if (s == keyName key)
|
||||
then return True
|
||||
else do
|
||||
dest <- moveBad key
|
||||
showNote $ "bad file content (moved to "++dest++")"
|
||||
return False
|
||||
|
|
|
@ -20,8 +20,13 @@ backend = Backend {
|
|||
getKey = keyValue,
|
||||
storeFileKey = dummyStore,
|
||||
retrieveKeyFile = downloadUrl,
|
||||
-- allow keys to be removed; presumably they can always be
|
||||
-- downloaded again
|
||||
removeKey = dummyOk,
|
||||
hasKey = dummyOk
|
||||
-- similarly, keys are always assumed to be out there on the web
|
||||
hasKey = dummyOk,
|
||||
-- and nothing needed to fsck
|
||||
fsckKey = dummyOk
|
||||
}
|
||||
|
||||
-- cannot generate url from filename
|
||||
|
@ -32,7 +37,6 @@ keyValue _ = return Nothing
|
|||
dummyStore :: FilePath -> Key -> Annex Bool
|
||||
dummyStore _ _ = return False
|
||||
|
||||
-- allow keys to be removed; presumably they can always be downloaded again
|
||||
dummyOk :: Key -> Annex Bool
|
||||
dummyOk _ = return True
|
||||
|
||||
|
|
|
@ -10,14 +10,22 @@ module Backend.WORM (backend) where
|
|||
import Control.Monad.State
|
||||
import System.FilePath
|
||||
import System.Posix.Files
|
||||
import System.Posix.Types
|
||||
import System.Directory
|
||||
import Data.String.Utils
|
||||
|
||||
import qualified Backend.File
|
||||
import TypeInternals
|
||||
import Locations
|
||||
import qualified Annex
|
||||
import Core
|
||||
import Messages
|
||||
|
||||
backend :: Backend
|
||||
backend = Backend.File.backend {
|
||||
name = "WORM",
|
||||
getKey = keyValue
|
||||
getKey = keyValue,
|
||||
fsckKey = Backend.File.checkKey checkKeySize
|
||||
}
|
||||
|
||||
-- The key is formed from the file size, modification time, and the
|
||||
|
@ -36,3 +44,27 @@ keyValue file = do
|
|||
(show $ fileSize stat)
|
||||
base = takeFileName file
|
||||
sep = ":"
|
||||
|
||||
{- Extracts the file size from a key. -}
|
||||
keySize :: Key -> FileOffset
|
||||
keySize key = read $ section !! 2
|
||||
where
|
||||
section = split ":" (keyName key)
|
||||
|
||||
{- The size of the data for a key is checked against the size encoded in
|
||||
- the key. Note that the modification time is not checked. -}
|
||||
checkKeySize :: Key -> Annex Bool
|
||||
checkKeySize key = do
|
||||
g <- Annex.gitRepo
|
||||
let file = annexLocation g key
|
||||
present <- liftIO $ doesFileExist file
|
||||
if (not present)
|
||||
then return True
|
||||
else do
|
||||
s <- liftIO $ getFileStatus file
|
||||
if (fileSize s == keySize key)
|
||||
then return True
|
||||
else do
|
||||
dest <- moveBad key
|
||||
showNote $ "bad file size (moved to "++dest++")"
|
||||
return False
|
||||
|
|
|
@ -59,14 +59,14 @@ subCmds =
|
|||
"sets annexed content for a key using a temp file"
|
||||
, SubCommand "fix" path Command.Fix.seek
|
||||
"fix up symlinks to point to annexed content"
|
||||
, SubCommand "fsck" nothing Command.Fsck.seek
|
||||
"check annex for problems"
|
||||
, SubCommand "fsck" maybepath Command.Fsck.seek
|
||||
"check for problems"
|
||||
]
|
||||
where
|
||||
path = "PATH ..."
|
||||
maybepath = "[PATH ...]"
|
||||
key = "KEY ..."
|
||||
desc = "DESCRIPTION"
|
||||
nothing = ""
|
||||
|
||||
-- Each dashed command-line option results in generation of an action
|
||||
-- in the Annex monad that performs the necessary setting.
|
||||
|
|
10
Command.hs
10
Command.hs
|
@ -146,6 +146,16 @@ withTempFile a params = return $ map a params
|
|||
withNothing :: SubCmdSeekNothing
|
||||
withNothing a _ = return [a]
|
||||
|
||||
{- Default to acting on all files matching the seek action if
|
||||
- none are specified. -}
|
||||
withAll :: SubCmdSeekStrings -> SubCmdSeekStrings
|
||||
withAll w a params = do
|
||||
if null params
|
||||
then do
|
||||
g <- Annex.gitRepo
|
||||
w a [Git.workTree g]
|
||||
else w a params
|
||||
|
||||
{- filter out files from the state directory -}
|
||||
notState :: FilePath -> Bool
|
||||
notState f = stateLoc /= take (length stateLoc) f
|
||||
|
|
|
@ -13,9 +13,10 @@ import Command
|
|||
import Types
|
||||
import Core
|
||||
import Messages
|
||||
import qualified Command.FsckFile
|
||||
|
||||
seek :: [SubCmdSeek]
|
||||
seek = [withNothing start]
|
||||
seek = [withNothing start, withAll withFilesInGit Command.FsckFile.start]
|
||||
|
||||
{- Checks the whole annex for problems. -}
|
||||
start :: SubCmdStart
|
||||
|
@ -26,11 +27,9 @@ start = do
|
|||
perform :: SubCmdPerform
|
||||
perform = do
|
||||
ok <- checkUnused
|
||||
if (ok)
|
||||
if ok
|
||||
then return $ Just $ return True
|
||||
else do
|
||||
showLongNote "Possible problems detected."
|
||||
return Nothing
|
||||
else return Nothing
|
||||
|
||||
checkUnused :: Annex Bool
|
||||
checkUnused = do
|
||||
|
|
33
Command/FsckFile.hs
Normal file
33
Command/FsckFile.hs
Normal file
|
@ -0,0 +1,33 @@
|
|||
{- git-annex command
|
||||
-
|
||||
- Copyright 2010 Joey Hess <joey@kitenet.net>
|
||||
-
|
||||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
||||
module Command.FsckFile where
|
||||
|
||||
import Command
|
||||
import qualified Backend
|
||||
import Types
|
||||
import Messages
|
||||
|
||||
seek :: [SubCmdSeek]
|
||||
seek = [withFilesInGit start]
|
||||
|
||||
{- Checks a file's backend data for problems. -}
|
||||
start :: SubCmdStartString
|
||||
start file = isAnnexed file $ \(key, backend) -> do
|
||||
inbackend <- Backend.hasKey key
|
||||
if (not inbackend)
|
||||
then return Nothing
|
||||
else do
|
||||
showStart "fsck" file
|
||||
return $ Just $ perform key backend
|
||||
|
||||
perform :: Key -> Backend -> SubCmdPerform
|
||||
perform key backend = do
|
||||
success <- Backend.fsckKey backend key
|
||||
if (success)
|
||||
then return $ Just $ return True
|
||||
else return Nothing
|
18
Core.hs
18
Core.hs
|
@ -14,6 +14,7 @@ import System.Path
|
|||
import Control.Monad (when, unless, filterM)
|
||||
import System.Posix.Files
|
||||
import Data.Maybe
|
||||
import System.FilePath
|
||||
|
||||
import Types
|
||||
import Locations
|
||||
|
@ -201,6 +202,16 @@ fromAnnex key dest = do
|
|||
renameFile file dest
|
||||
removeDirectory dir
|
||||
|
||||
{- Moves a key out of .git/annex/objects/ into .git/annex/bad, and
|
||||
- returns the directory it was moved to. -}
|
||||
moveBad :: Key -> Annex FilePath
|
||||
moveBad key = do
|
||||
g <- Annex.gitRepo
|
||||
let src = parentDir $ annexLocation g key
|
||||
let dest = annexBadLocation g
|
||||
liftIO $ renameDirectory src dest
|
||||
return dest
|
||||
|
||||
{- List of keys whose content exists in .git/annex/objects/ -}
|
||||
getKeysPresent :: Annex [Key]
|
||||
getKeysPresent = do
|
||||
|
@ -209,11 +220,12 @@ getKeysPresent = do
|
|||
getKeysPresent' :: FilePath -> Annex [Key]
|
||||
getKeysPresent' dir = do
|
||||
contents <- liftIO $ getDirectoryContents dir
|
||||
files <- liftIO $ filterM isreg contents
|
||||
files <- liftIO $ filterM present contents
|
||||
return $ map fileKey files
|
||||
where
|
||||
isreg f = do
|
||||
s <- getFileStatus $ dir ++ "/" ++ f
|
||||
present d = do
|
||||
s <- getFileStatus $ dir ++ "/" ++ d ++ "/"
|
||||
++ (takeFileName d)
|
||||
return $ isRegularFile s
|
||||
|
||||
{- List of keys referenced by symlinks in the git repo. -}
|
||||
|
|
|
@ -13,6 +13,7 @@ module Locations (
|
|||
annexLocation,
|
||||
annexLocationRelative,
|
||||
annexTmpLocation,
|
||||
annexBadLocation,
|
||||
annexDir,
|
||||
annexObjectDir,
|
||||
|
||||
|
@ -59,6 +60,10 @@ annexObjectDir r = annexDir r ++ "/objects"
|
|||
annexTmpLocation :: Git.Repo -> FilePath
|
||||
annexTmpLocation r = annexDir r ++ "/tmp/"
|
||||
|
||||
{- .git-annex/bad is used for bad files found during fsck -}
|
||||
annexBadLocation :: Git.Repo -> FilePath
|
||||
annexBadLocation r = annexDir r ++ "/bad/"
|
||||
|
||||
{- Converts a key into a filename fragment.
|
||||
-
|
||||
- Escape "/" in the key name, to keep a flat tree of files and avoid
|
||||
|
|
|
@ -75,7 +75,9 @@ data Backend = Backend {
|
|||
-- removes a key
|
||||
removeKey :: Key -> Annex Bool,
|
||||
-- checks if a backend is storing the content of a key
|
||||
hasKey :: Key -> Annex Bool
|
||||
hasKey :: Key -> Annex Bool,
|
||||
-- called during fsck to check a key
|
||||
fsckKey :: Key -> Annex Bool
|
||||
}
|
||||
|
||||
instance Show Backend where
|
||||
|
|
11
debian/changelog
vendored
11
debian/changelog
vendored
|
@ -1,3 +1,14 @@
|
|||
git-annex (0.06) UNRELEASED; urgency=low
|
||||
|
||||
* fsck: Check if annex.numcopies is satisfied.
|
||||
* fsck: Verify the sha1 of files when the SHA1 backend is used.
|
||||
* fsck: Verify the size of files when the WORM backend is used.
|
||||
* fsck: Allow specifying individual files to fsk if fscking everything
|
||||
is not desired.
|
||||
* fsck: Fix bug, introduced in 0.04, in detection of unused data.
|
||||
|
||||
-- Joey Hess <joeyh@debian.org> Sat, 13 Nov 2010 14:08:58 -0400
|
||||
|
||||
git-annex (0.05) unstable; urgency=low
|
||||
|
||||
* Optimize both pre-commit and lock subcommands to not call git diff
|
||||
|
|
|
@ -159,10 +159,12 @@ Many git-annex subcommands will stage changes for later `git commit` by you.
|
|||
|
||||
git annex setkey --key=1287765018:3 /tmp/file
|
||||
|
||||
* fsck
|
||||
* fsck [path ...]
|
||||
|
||||
This subcommand checks the whole annex for consistency, and warns
|
||||
about any problems found.
|
||||
With no parameters, this subcommand checks the whole annex for consistency,
|
||||
and warns about any problems found.
|
||||
|
||||
With parameters, only the specified files are checked.
|
||||
|
||||
# OPTIONS
|
||||
|
||||
|
|
|
@ -276,3 +276,37 @@ significantly for really big files. To make SHA1 the detault, just
|
|||
add something like this to `.gitattributes`:
|
||||
|
||||
* git-annex-backend=SHA1
|
||||
|
||||
## fsck: verifying your data
|
||||
|
||||
You can use the fsck subcommand to check for problems in your data.
|
||||
What can be checked depends on the [[backend|backends]] you've used to store
|
||||
the data. For example, when you use the SHA1 backend, fsck will verify that
|
||||
the checksums of your files are good. Fsck also checks that the annex.numcopies
|
||||
setting is satisfied for all files, and it warns about any dangling values
|
||||
in `.git/annex/objects/`.
|
||||
|
||||
# git annex fsck
|
||||
fsck (checking for unused data...) (checking files...) ok
|
||||
|
||||
Fsck checks the entire repository for problems by default. But you can
|
||||
also specify the files to check.
|
||||
This is particularly useful if you're using sha1 and don't want to spend
|
||||
a long time checksumming everything.
|
||||
|
||||
# git annex fsck my_cool_big_file
|
||||
fsck my_cool_big_file (checksum..) ok
|
||||
|
||||
## fsck: When things go wrong
|
||||
|
||||
Fsck never deletes possibly bad data; instead it will be moved to
|
||||
`.git/annex/bad/` for you to review. Here is a sample of what fsck
|
||||
might say about a badly messed up annex:
|
||||
|
||||
# git annex fsck
|
||||
fsck (checking for unused data...)
|
||||
Some annexed data is no longer pointed to by any files in the repository.
|
||||
If this data is no longer needed, it can be removed using git-annex dropkey:
|
||||
WORM:1289672605:3:file
|
||||
(checking files...)
|
||||
|
||||
|
|
Loading…
Reference in a new issue