git-annex/Command/Fsck.hs

484 lines
15 KiB
Haskell
Raw Normal View History

2010-11-06 21:06:19 +00:00
{- git-annex command
-
- Copyright 2010-2011 Joey Hess <joey@kitenet.net>
2010-11-06 21:06:19 +00:00
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Command.Fsck where
2011-10-05 20:02:51 +00:00
import Common.Annex
2010-11-06 21:06:19 +00:00
import Command
import qualified Annex
import qualified Annex.Queue
import qualified Remote
import qualified Types.Backend
import qualified Types.Key
import qualified Backend
2011-10-04 04:40:47 +00:00
import Annex.Content
2013-01-08 16:41:09 +00:00
import Annex.Content.Direct
import Annex.Perms
2011-10-15 20:21:08 +00:00
import Logs.Location
import Logs.Trust
import Annex.UUID
2011-07-06 00:36:43 +00:00
import Utility.DataUnits
2011-09-23 22:13:24 +00:00
import Utility.FileMode
import Config
import qualified Option
import Types.Key
import Utility.HumanTime
2010-11-06 21:06:19 +00:00
2012-09-25 18:16:34 +00:00
import System.Posix.Process (getProcessID)
import Data.Time.Clock.POSIX
import Data.Time
import System.Posix.Types (EpochTime)
import System.Locale
def :: [Command]
2013-01-06 19:42:49 +00:00
def = [withOptions options $ command "fsck" paramPaths seek
"check for problems"]
fromOption :: Option
fromOption = Option.field ['f'] "from" paramRemote "check remote"
startIncrementalOption :: Option
startIncrementalOption = Option.flag ['S'] "incremental" "start an incremental fsck"
moreIncrementalOption :: Option
moreIncrementalOption = Option.flag ['m'] "more" "continue an incremental fsck"
2012-09-25 23:43:33 +00:00
incrementalScheduleOption :: Option
incrementalScheduleOption = Option.field [] "incremental-schedule" paramTime
"schedule incremental fscking"
options :: [Option]
options =
[ fromOption
, startIncrementalOption
, moreIncrementalOption
2012-09-25 23:43:33 +00:00
, incrementalScheduleOption
]
seek :: [CommandSeek]
seek =
[ withField fromOption Remote.byName $ \from ->
2012-09-25 19:06:33 +00:00
withIncremental $ \i -> withFilesInGit $ whenAnnexed $ start from i
, withIncremental $ \i -> withBarePresentKeys $ startBare i
]
2010-11-15 22:22:50 +00:00
2012-09-25 19:06:33 +00:00
withIncremental :: (Incremental -> CommandSeek) -> CommandSeek
withIncremental = withValue $ do
2012-09-25 23:43:33 +00:00
i <- maybe (return False) (checkschedule . parseDuration)
=<< Annex.getField (Option.name incrementalScheduleOption)
starti <- Annex.getFlag (Option.name startIncrementalOption)
morei <- Annex.getFlag (Option.name moreIncrementalOption)
case (i, starti, morei) of
(False, False, False) -> return NonIncremental
(False, True, _) -> startIncremental
(False ,False, True) -> ContIncremental <$> getStartTime
(True, _, _) ->
maybe startIncremental (return . ContIncremental . Just)
=<< getStartTime
2012-11-12 05:05:04 +00:00
where
startIncremental = do
recordStartTime
return StartIncremental
checkschedule Nothing = error "bad --incremental-schedule value"
checkschedule (Just delta) = do
Annex.addCleanup "" $ do
v <- getStartTime
case v of
Nothing -> noop
Just started -> do
now <- liftIO getPOSIXTime
when (now - realToFrac started >= delta) $
resetStartTime
return True
start :: Maybe Remote -> Incremental -> FilePath -> (Key, Backend) -> CommandStart
start from inc file (key, backend) = do
numcopies <- numCopies file
case from of
2012-09-25 19:06:33 +00:00
Nothing -> go $ perform key file backend numcopies
Just r -> go $ performRemote key file backend numcopies r
2012-11-12 05:05:04 +00:00
where
go = runFsck inc file key
2010-11-15 22:22:50 +00:00
2012-09-25 19:06:33 +00:00
perform :: Key -> FilePath -> Backend -> Maybe Int -> Annex Bool
perform key file backend numcopies = check
2011-10-29 20:45:06 +00:00
-- order matters
[ fixLink key file
, verifyLocationLog key file
2011-10-29 20:45:06 +00:00
, checkKeySize key
, checkBackend backend key
, checkKeyNumCopies key file numcopies
2011-10-29 20:45:06 +00:00
]
{- To fsck a remote, the content is retrieved to a tmp file,
- and checked locally. -}
2012-09-25 19:06:33 +00:00
performRemote :: Key -> FilePath -> Backend -> Maybe Int -> Remote -> Annex Bool
performRemote key file backend numcopies remote =
dispatch =<< Remote.hasKey remote key
2012-11-12 05:05:04 +00:00
where
dispatch (Left err) = do
showNote err
return False
dispatch (Right True) = withtmp $ \tmpfile ->
ifM (getfile tmpfile)
( go True (Just tmpfile)
, go True Nothing
)
dispatch (Right False) = go False Nothing
go present localcopy = check
[ verifyLocationLogRemote key file remote present
, checkKeySizeRemote key remote localcopy
, checkBackendRemote backend key remote localcopy
, checkKeyNumCopies key file numcopies
]
withtmp a = do
pid <- liftIO getProcessID
t <- fromRepo gitAnnexTmpDir
createAnnexDirectory t
let tmp = t </> "fsck" ++ show pid ++ "." ++ keyFile key
let cleanup = liftIO $ catchIO (removeFile tmp) (const noop)
cleanup
cleanup `after` a tmp
getfile tmp =
ifM (Remote.retrieveKeyFileCheap remote key tmp)
( return True
, ifM (Annex.getState Annex.fast)
( return False
, Remote.retrieveKeyFile remote key Nothing tmp
)
2012-11-12 05:05:04 +00:00
)
{- To fsck a bare repository, fsck each key in the location log. -}
withBarePresentKeys :: (Key -> CommandStart) -> CommandSeek
2011-10-29 22:47:53 +00:00
withBarePresentKeys a params = isBareRepo >>= go
2012-11-12 05:05:04 +00:00
where
go False = return []
go True = do
unless (null params) $
error "fsck should be run without parameters in a bare repository"
map a <$> loggedKeys
2012-09-25 19:06:33 +00:00
startBare :: Incremental -> Key -> CommandStart
startBare inc key = case Backend.maybeLookupBackendName (Types.Key.keyBackendName key) of
Nothing -> stop
2012-09-25 19:06:33 +00:00
Just backend -> runFsck inc (key2file key) key $ performBare key backend
{- Note that numcopies cannot be checked in a bare repository, because
- getting the numcopies value requires a working copy with .gitattributes
- files. -}
2012-09-25 19:06:33 +00:00
performBare :: Key -> Backend -> Annex Bool
performBare key backend = check
[ verifyLocationLog key (key2file key)
, checkKeySize key
, checkBackend backend key
]
2012-09-25 19:06:33 +00:00
check :: [Annex Bool] -> Annex Bool
check cs = all id <$> sequence cs
2013-01-06 19:42:49 +00:00
{- Checks that the file's symlink points correctly to the content.
-
- In direct mode, there is only a symlink when the content is not present.
-}
fixLink :: Key -> FilePath -> Annex Bool
fixLink key file = do
want <- calcGitLink file key
2013-01-06 19:42:49 +00:00
have <- liftIO $ catchMaybeIO $ readSymbolicLink file
maybe noop (go want) have
return True
where
go want have = when (want /= have) $ do
{- Version 3.20120227 had a bug that could cause content
- to be stored in the wrong hash directory. Clean up
- after the bug by moving the content.
-}
whenM (liftIO $ doesFileExist file) $
unlessM (inAnnex key) $ do
2012-06-12 15:32:06 +00:00
showNote "fixing content location"
dir <- liftIO $ parentDir <$> absPath file
let content = absPathFrom dir have
liftIO $ allowWrite (parentDir content)
moveAnnex key content
2012-06-12 15:32:06 +00:00
showNote "fixing link"
liftIO $ createDirectoryIfMissing True (parentDir file)
liftIO $ removeFile file
liftIO $ createSymbolicLink want file
Annex.Queue.addCommand "add" [Param "--force", Param "--"] [file]
{- Checks that the location log reflects the current status of the key,
2012-12-13 04:45:27 +00:00
- in this repository only. -}
verifyLocationLog :: Key -> String -> Annex Bool
verifyLocationLog key desc = do
present <- inAnnex key
2013-01-06 19:42:49 +00:00
direct <- isDirect
u <- getUUID
2013-01-06 19:42:49 +00:00
{- Since we're checking that a key's file is present, throw
- in a permission fixup here too. -}
when (present && not direct) $ do
file <- inRepo $ gitAnnexLocation key
freezeContent file
freezeContentDir file
2013-01-06 19:42:49 +00:00
{- In direct mode, modified files will show up as not present,
- but that is expected and not something to do anything about. -}
if (direct && not present)
then return True
else verifyLocationLog' key desc present u (logChange key u)
verifyLocationLogRemote :: Key -> String -> Remote -> Bool -> Annex Bool
verifyLocationLogRemote key desc remote present =
verifyLocationLog' key desc present (Remote.uuid remote)
(Remote.logStatus remote key)
verifyLocationLog' :: Key -> String -> Bool -> UUID -> (LogStatus -> Annex ()) -> Annex Bool
verifyLocationLog' key desc present u bad = do
uuids <- Remote.keyLocations key
case (present, u `elem` uuids) of
(True, False) -> do
fix InfoPresent
-- There is no data loss, so do not fail.
return True
(False, True) -> do
fix InfoMissing
warning $
"** Based on the location log, " ++ desc
++ "\n** was expected to be present, " ++
"but its content is missing."
return False
_ -> return True
2012-11-12 05:05:04 +00:00
where
fix s = do
showNote "fixing location log"
bad s
{- The size of the data for a key is checked against the size encoded in
2013-01-06 19:42:49 +00:00
- the key's metadata, if available.
-
- Not checked in direct mode, because files can be changed directly.
-}
checkKeySize :: Key -> Annex Bool
2013-01-06 19:42:49 +00:00
checkKeySize key = ifM isDirect
( return True
, do
file <- inRepo $ gitAnnexLocation key
ifM (liftIO $ doesFileExist file)
( checkKeySizeOr badContent key file
, return True
)
)
checkKeySizeRemote :: Key -> Remote -> Maybe FilePath -> Annex Bool
checkKeySizeRemote _ _ Nothing = return True
checkKeySizeRemote key remote (Just file) =
checkKeySizeOr (badContentRemote remote) key file
checkKeySizeOr :: (Key -> Annex String) -> Key -> FilePath -> Annex Bool
checkKeySizeOr bad key file = case Types.Key.keySize key of
Nothing -> return True
Just size -> do
size' <- fromIntegral . fileSize
2012-06-12 15:32:06 +00:00
<$> liftIO (getFileStatus file)
comparesizes size size'
2012-11-12 05:05:04 +00:00
where
comparesizes a b = do
let same = a == b
unless same $ badsize a b
return same
badsize a b = do
msg <- bad key
warning $ concat
[ "Bad file size ("
, compareSizes storageUnits True a b
, "); "
, msg
]
2013-01-06 19:42:49 +00:00
{- Runs the backend specific check on a key's content.
-
2013-01-08 16:41:09 +00:00
- In direct mode this is not done if the file has clearly been modified,
- because modification of direct mode files is allowed. It's still done
- if the file does not appear modified, to catch disk corruption, etc.
-}
2011-12-31 08:11:39 +00:00
checkBackend :: Backend -> Key -> Annex Bool
2013-01-08 16:41:09 +00:00
checkBackend backend key = do
file <- inRepo $ gitAnnexLocation key
ifM isDirect
( ifM (goodContent key file)
( checkBackendOr' (badContentDirect file) backend key file
(goodContent key file)
2013-01-08 16:41:09 +00:00
, return True
)
, checkBackendOr badContent backend key file
)
checkBackendRemote :: Backend -> Key -> Remote -> Maybe FilePath -> Annex Bool
checkBackendRemote backend key remote = maybe (return True) go
2012-11-12 05:05:04 +00:00
where
go file = checkBackendOr (badContentRemote remote) backend key file
checkBackendOr :: (Key -> Annex String) -> Backend -> Key -> FilePath -> Annex Bool
checkBackendOr bad backend key file =
checkBackendOr' bad backend key file (return True)
checkBackendOr' :: (Key -> Annex String) -> Backend -> Key -> FilePath -> Annex Bool -> Annex Bool
checkBackendOr' bad backend key file postcheck =
case Types.Backend.fsckKey backend of
Nothing -> return True
Just a -> do
ok <- a key file
ifM postcheck
( do
unless ok $ do
msg <- bad key
warning $ "Bad file content; " ++ msg
return ok
, return True
)
2011-10-29 20:45:06 +00:00
checkKeyNumCopies :: Key -> FilePath -> Maybe Int -> Annex Bool
checkKeyNumCopies key file numcopies = do
needed <- getNumCopies numcopies
(untrustedlocations, safelocations) <- trustPartition UnTrusted =<< Remote.keyLocations key
let present = length safelocations
if present < needed
then do
ppuuids <- Remote.prettyPrintUUIDs "untrusted" untrustedlocations
2011-10-29 20:45:06 +00:00
warning $ missingNote file present needed ppuuids
return False
else return True
missingNote :: String -> Int -> Int -> String -> String
missingNote file 0 _ [] =
"** No known copies exist of " ++ file
missingNote file 0 _ untrusted =
"Only these untrusted locations may have copies of " ++ file ++
"\n" ++ untrusted ++
"Back it up to trusted locations with git-annex copy."
missingNote file present needed [] =
"Only " ++ show present ++ " of " ++ show needed ++
" trustworthy copies exist of " ++ file ++
"\nBack it up with git-annex copy."
missingNote file present needed untrusted =
missingNote file present needed [] ++
"\nThe following untrusted locations may also have copies: " ++
"\n" ++ untrusted
{- Bad content is moved aside. -}
badContent :: Key -> Annex String
badContent key = do
dest <- moveBad key
return $ "moved to " ++ dest
{- Bad content is left where it is, but we touch the file, so it'll be
- committed to a new key. -}
badContentDirect :: FilePath -> Key -> Annex String
badContentDirect file key = do
void $ liftIO $ catchMaybeIO $ touchFile file
logStatus key InfoMissing
return $ "left in place for you to examine"
badContentRemote :: Remote -> Key -> Annex String
badContentRemote remote key = do
ok <- Remote.removeKey remote key
-- better safe than sorry: assume the remote dropped the key
-- even if it seemed to fail; the failure could have occurred
-- after it really dropped it
Remote.logStatus remote key InfoMissing
return $ (if ok then "dropped from " else "failed to drop from ")
++ Remote.name remote
2012-09-25 17:22:12 +00:00
data Incremental = StartIncremental | ContIncremental (Maybe EpochTime) | NonIncremental
2012-09-25 19:06:33 +00:00
deriving (Eq)
runFsck :: Incremental -> FilePath -> Key -> Annex Bool -> CommandStart
runFsck inc file key a = ifM (needFsck inc key)
( do
showStart "fsck" file
next $ do
ok <- a
when ok $
recordFsckTime key
next $ return ok
, stop
)
2012-09-25 19:06:33 +00:00
{- Check if a key needs to be fscked, with support for incremental fscks. -}
needFsck :: Incremental -> Key -> Annex Bool
needFsck (ContIncremental Nothing) _ = return True
needFsck (ContIncremental starttime) key = do
2012-09-25 19:06:33 +00:00
fscktime <- getFsckTime key
return $ fscktime < starttime
needFsck _ _ = return True
2012-09-25 19:06:33 +00:00
{- To record the time that a key was last fscked, without
2012-09-25 18:16:34 +00:00
- modifying its mtime, we set the timestamp of its parent directory.
- Each annexed file is the only thing in its directory, so this is fine.
-
- To record that the file was fscked, the directory's sticky bit is set.
- (None of the normal unix behaviors of the sticky bit should matter, so
- we can reuse this permission bit.)
-
- Note that this relies on the parent directory being deleted when a file
2012-09-25 19:06:33 +00:00
- is dropped. That way, if it's later added back, the fsck record
2012-09-25 18:16:34 +00:00
- won't still be present.
-}
2012-09-25 19:06:33 +00:00
recordFsckTime :: Key -> Annex ()
recordFsckTime key = do
parent <- parentDir <$> inRepo (gitAnnexLocation key)
liftIO $ void $ tryIO $ do
touchFile parent
setSticky parent
getFsckTime :: Key -> Annex (Maybe EpochTime)
getFsckTime key = do
parent <- parentDir <$> inRepo (gitAnnexLocation key)
liftIO $ catchDefaultIO Nothing $ do
s <- getFileStatus parent
return $ if isSticky $ fileMode s
then Just $ modificationTime s
else Nothing
2012-09-25 18:16:34 +00:00
{- Records the start time of an interactive fsck.
2012-09-25 18:16:34 +00:00
-
- To guard against time stamp damange (for example, if an annex directory
- is copied without -a), the fsckstate file contains a time that should
- be identical to its modification time. -}
recordStartTime :: Annex ()
2012-09-25 18:16:34 +00:00
recordStartTime = do
f <- fromRepo gitAnnexFsckState
2012-09-25 19:06:33 +00:00
createAnnexDirectory $ parentDir f
2012-09-25 18:16:34 +00:00
liftIO $ do
nukeFile f
h <- openFile f WriteMode
t <- modificationTime <$> getFileStatus f
hPutStr h $ showTime $ realToFrac t
hClose h
2012-11-12 05:05:04 +00:00
where
showTime :: POSIXTime -> String
showTime = show
2012-09-25 18:16:34 +00:00
resetStartTime :: Annex ()
resetStartTime = liftIO . nukeFile =<< fromRepo gitAnnexFsckState
2012-09-25 18:16:34 +00:00
{- Gets the incremental fsck start time. -}
getStartTime :: Annex (Maybe EpochTime)
getStartTime = do
f <- fromRepo gitAnnexFsckState
liftIO $ catchDefaultIO Nothing $ do
timestamp <- modificationTime <$> getFileStatus f
t <- readishTime <$> readFile f
return $ if Just (realToFrac timestamp) == t
then Just timestamp
else Nothing
2012-11-12 05:05:04 +00:00
where
readishTime :: String -> Maybe POSIXTime
readishTime s = utcTimeToPOSIXSeconds <$>
parseTime defaultTimeLocale "%s%Qs" s