fsck improvements

* fsck: Check if annex.numcopies is satisfied.
* fsck: Verify the sha1 of files when the SHA1 backend is used.
* fsck: Verify the size of files when the WORM backend is used.
* fsck: Allow specifying individual files to fsk if fscking everything
  is not desired.
* fsck: Fix bug, introduced in 0.04, in detection of unused data.
This commit is contained in:
Joey Hess 2010-11-13 14:59:27 -04:00
parent d4d65a3c92
commit 5fa25a812a
15 changed files with 236 additions and 31 deletions

View file

@ -4,15 +4,15 @@
- it relies on the file contents in .git/annex/ in this repo,
- and other accessible repos.
-
- This is an abstract backend; getKey has to be implemented to complete
- it.
- This is an abstract backend; name, getKey and fsckKey have to be implemented
- to complete it.
-
- Copyright 2010 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Backend.File (backend) where
module Backend.File (backend, checkKey) where
import Control.Monad.State
import System.Directory
@ -34,7 +34,8 @@ backend = Backend {
storeFileKey = dummyStore,
retrieveKeyFile = copyKeyFile,
removeKey = checkRemoveKey,
hasKey = checkKeyFile
hasKey = checkKeyFile,
fsckKey = mustProvide
}
mustProvide :: a
@ -97,14 +98,12 @@ checkRemoveKey key = do
if (force)
then return True
else do
g <- Annex.gitRepo
remotes <- Remotes.keyPossibilities key
let numcopies = read $ Git.configGet g config "1"
numcopies <- getNumCopies
if (numcopies > length remotes)
then notEnoughCopies numcopies (length remotes) []
else findcopies numcopies 0 remotes []
where
config = "annex.numcopies"
findcopies need have [] bad =
if (have >= need)
then return True
@ -147,3 +146,33 @@ showTriedRemotes [] = return ()
showTriedRemotes remotes =
showLongNote $ "I was unable to access these remotes: " ++
(Remotes.list remotes)
getNumCopies :: Annex Int
getNumCopies = do
g <- Annex.gitRepo
return $ read $ Git.configGet g config "1"
where
config = "annex.numcopies"
{- This is used to check that numcopies is satisfied for the key on fsck.
- This trusts the location log, and so checks all keys, even those with
- data not present in the current annex.
-
- The passed action is first run to allow backends deriving this one
- to do their own checks.
-}
checkKey :: (Key -> Annex Bool) -> Key -> Annex Bool
checkKey a key = do
a_ok <- a key
copies_ok <- checkKeyNumCopies key
return $ a_ok && copies_ok
checkKeyNumCopies :: Key -> Annex Bool
checkKeyNumCopies key = do
remotes <- Remotes.keyPossibilities key
numcopies <- getNumCopies
if (length remotes < numcopies)
then do
showLongNote $ "only " ++ show (length remotes) ++ " of " ++ show numcopies ++ " copies"
return False
else return True

View file

@ -11,24 +11,51 @@ import Control.Monad.State
import Data.String.Utils
import System.Cmd.Utils
import System.IO
import System.Directory
import qualified Backend.File
import TypeInternals
import Messages
import qualified Annex
import Locations
import Core
backend :: Backend
backend = Backend.File.backend {
name = "SHA1",
getKey = keyValue
getKey = keyValue,
fsckKey = Backend.File.checkKey checkKeySHA1
}
-- checksum the file to get its key
keyValue :: FilePath -> Annex (Maybe Key)
keyValue file = do
sha1 :: FilePath -> Annex String
sha1 file = do
showNote "checksum..."
liftIO $ pOpen ReadFromPipe "sha1sum" [file] $ \h -> do
line <- hGetLine h
let bits = split " " line
if (null bits)
then error "sha1sum parse error"
else return $ Just $ Key ((name backend), bits !! 0)
else return $ bits !! 0
-- A key is a sha1 of its contents.
keyValue :: FilePath -> Annex (Maybe Key)
keyValue file = do
s <- sha1 file
return $ Just $ Key ((name backend), s)
-- A key's sha1 is checked during fsck.
checkKeySHA1 :: Key -> Annex Bool
checkKeySHA1 key = do
g <- Annex.gitRepo
let file = annexLocation g key
present <- liftIO $ doesFileExist file
if (not present)
then return True
else do
s <- sha1 file
if (s == keyName key)
then return True
else do
dest <- moveBad key
showNote $ "bad file content (moved to "++dest++")"
return False

View file

@ -20,8 +20,13 @@ backend = Backend {
getKey = keyValue,
storeFileKey = dummyStore,
retrieveKeyFile = downloadUrl,
-- allow keys to be removed; presumably they can always be
-- downloaded again
removeKey = dummyOk,
hasKey = dummyOk
-- similarly, keys are always assumed to be out there on the web
hasKey = dummyOk,
-- and nothing needed to fsck
fsckKey = dummyOk
}
-- cannot generate url from filename
@ -32,7 +37,6 @@ keyValue _ = return Nothing
dummyStore :: FilePath -> Key -> Annex Bool
dummyStore _ _ = return False
-- allow keys to be removed; presumably they can always be downloaded again
dummyOk :: Key -> Annex Bool
dummyOk _ = return True

View file

@ -10,14 +10,22 @@ module Backend.WORM (backend) where
import Control.Monad.State
import System.FilePath
import System.Posix.Files
import System.Posix.Types
import System.Directory
import Data.String.Utils
import qualified Backend.File
import TypeInternals
import Locations
import qualified Annex
import Core
import Messages
backend :: Backend
backend = Backend.File.backend {
name = "WORM",
getKey = keyValue
getKey = keyValue,
fsckKey = Backend.File.checkKey checkKeySize
}
-- The key is formed from the file size, modification time, and the
@ -36,3 +44,27 @@ keyValue file = do
(show $ fileSize stat)
base = takeFileName file
sep = ":"
{- Extracts the file size from a key. -}
keySize :: Key -> FileOffset
keySize key = read $ section !! 2
where
section = split ":" (keyName key)
{- The size of the data for a key is checked against the size encoded in
- the key. Note that the modification time is not checked. -}
checkKeySize :: Key -> Annex Bool
checkKeySize key = do
g <- Annex.gitRepo
let file = annexLocation g key
present <- liftIO $ doesFileExist file
if (not present)
then return True
else do
s <- liftIO $ getFileStatus file
if (fileSize s == keySize key)
then return True
else do
dest <- moveBad key
showNote $ "bad file size (moved to "++dest++")"
return False