merge improved fsck types from git-repair and some associated changes

This commit is contained in:
Joey Hess 2013-11-30 14:29:11 -04:00
parent a8e8bd4360
commit 6edac746f0
6 changed files with 57 additions and 46 deletions

View file

@ -58,7 +58,7 @@ runRepair u mrmt destructiverepair = do
ok <- if u == myu ok <- if u == myu
then localrepair fsckresults then localrepair fsckresults
else remoterepair fsckresults else remoterepair fsckresults
liftAnnex $ writeFsckResults u Nothing liftAnnex $ clearFsckResults u
debug [ "Repaired", show u, show ok ] debug [ "Repaired", show u, show ok ]
return ok return ok

View file

@ -26,6 +26,7 @@ import Utility.NotificationBroadcaster
import Config import Config
import Utility.HumanTime import Utility.HumanTime
import Git.Repair import Git.Repair
import Git.Index
import Data.Time.Clock.POSIX import Data.Time.Clock.POSIX
import qualified Data.Set as S import qualified Data.Set as S
@ -43,7 +44,7 @@ sanityCheckerStartupThread startupdelay = namedThreadUnchecked "SanityCheckerSta
ifM (not <$> liftAnnex (inRepo (checkIndex S.empty))) ifM (not <$> liftAnnex (inRepo (checkIndex S.empty)))
( do ( do
notice ["corrupt index file found at startup; removing and restaging"] notice ["corrupt index file found at startup; removing and restaging"]
liftAnnex $ inRepo nukeIndex liftAnnex $ inRepo $ nukeFile . indexFile
{- Normally the startup scan avoids re-staging files, {- Normally the startup scan avoids re-staging files,
- but with the index deleted, everything needs to be - but with the index deleted, everything needs to be
- restaged. -} - restaged. -}

View file

@ -6,11 +6,12 @@
-} -}
module Git.Fsck ( module Git.Fsck (
FsckResults, FsckResults(..),
MissingObjects, MissingObjects,
findBroken, findBroken,
foundBroken, foundBroken,
findMissing, findMissing,
knownMissing,
) where ) where
import Common import Common
@ -23,9 +24,7 @@ import qualified Data.Set as S
type MissingObjects = S.Set Sha type MissingObjects = S.Set Sha
{- If fsck succeeded, Just a set of missing objects it found. data FsckResults = FsckFoundMissing MissingObjects | FsckFailed
- If it failed, Nothing. -}
type FsckResults = Maybe MissingObjects
{- Runs fsck to find some of the broken objects in the repository. {- Runs fsck to find some of the broken objects in the repository.
- May not find all broken objects, if fsck fails on bad data in some of - May not find all broken objects, if fsck fails on bad data in some of
@ -42,8 +41,8 @@ findBroken batchmode r = do
let objs = findShas output let objs = findShas output
badobjs <- findMissing objs r badobjs <- findMissing objs r
if S.null badobjs && not fsckok if S.null badobjs && not fsckok
then return Nothing then return FsckFailed
else return $ Just badobjs else return $ FsckFoundMissing badobjs
where where
(command, params) = ("git", fsckParams r) (command, params) = ("git", fsckParams r)
(command', params') (command', params')
@ -51,8 +50,12 @@ findBroken batchmode r = do
| otherwise = (command, params) | otherwise = (command, params)
foundBroken :: FsckResults -> Bool foundBroken :: FsckResults -> Bool
foundBroken Nothing = True foundBroken FsckFailed = True
foundBroken (Just s) = not (S.null s) foundBroken (FsckFoundMissing s) = not (S.null s)
knownMissing :: FsckResults -> MissingObjects
knownMissing FsckFailed = S.empty
knownMissing (FsckFoundMissing s) = s
{- Finds objects that are missing from the git repsitory, or are corrupt. {- Finds objects that are missing from the git repsitory, or are corrupt.
- -

View file

@ -7,6 +7,8 @@
module Git.Index where module Git.Index where
import Common
import Git
import Utility.Env import Utility.Env
{- Forces git to use the specified index file. {- Forces git to use the specified index file.
@ -19,9 +21,12 @@ import Utility.Env
override :: FilePath -> IO (IO ()) override :: FilePath -> IO (IO ())
override index = do override index = do
res <- getEnv var res <- getEnv var
setEnv var index True void $ setEnv var index True
return $ reset res return $ void $ reset res
where where
var = "GIT_INDEX_FILE" var = "GIT_INDEX_FILE"
reset (Just v) = setEnv var v True reset (Just v) = setEnv var v True
reset _ = unsetEnv var reset _ = unsetEnv var
indexFile :: Repo -> FilePath
indexFile r = localGitDir r </> "index"

View file

@ -15,7 +15,6 @@ module Git.Repair (
removeTrackingBranches, removeTrackingBranches,
checkIndex, checkIndex,
missingIndex, missingIndex,
nukeIndex,
emptyGoodCommits, emptyGoodCommits,
) where ) where
@ -26,6 +25,7 @@ import Git.Objects
import Git.Sha import Git.Sha
import Git.Types import Git.Types
import Git.Fsck import Git.Fsck
import Git.Index
import qualified Git.Config as Config import qualified Git.Config as Config
import qualified Git.Construct as Construct import qualified Git.Construct as Construct
import qualified Git.LsTree as LsTree import qualified Git.LsTree as LsTree
@ -43,16 +43,16 @@ import qualified Data.ByteString.Lazy as L
import Data.Tuple.Utils import Data.Tuple.Utils
{- Given a set of bad objects found by git fsck, which may not {- Given a set of bad objects found by git fsck, which may not
- be complete, finds and removes all corrupt objects, and - be complete, finds and removes all corrupt objects,
- returns a list of missing objects, which need to be - and returns missing objects.
- found elsewhere to finish recovery.
-} -}
cleanCorruptObjects :: FsckResults -> Repo -> IO (Maybe MissingObjects) cleanCorruptObjects :: FsckResults -> Repo -> IO FsckResults
cleanCorruptObjects fsckresults r = do cleanCorruptObjects fsckresults r = do
void $ explodePacks r void $ explodePacks r
objs <- listLooseObjectShas r objs <- listLooseObjectShas r
mapM_ (tryIO . allowRead . looseObjectFile r) objs
bad <- findMissing objs r bad <- findMissing objs r
void $ removeLoose r $ S.union bad (fromMaybe S.empty fsckresults) void $ removeLoose r $ S.union bad (knownMissing fsckresults)
-- Rather than returning the loose objects that were removed, re-run -- Rather than returning the loose objects that were removed, re-run
-- fsck. Other missing objects may have been in the packs, -- fsck. Other missing objects may have been in the packs,
-- and this way fsck will find them. -- and this way fsck will find them.
@ -98,20 +98,17 @@ explodePacks r = do
- If another clone of the repository exists locally, which might not be a - If another clone of the repository exists locally, which might not be a
- remote of the repo being repaired, its path can be passed as a reference - remote of the repo being repaired, its path can be passed as a reference
- repository. - repository.
- Can also be run with Nothing, if it's not known which objects are
- missing, just that some are. (Ie, fsck failed badly.)
-} -}
retrieveMissingObjects :: Maybe MissingObjects -> Maybe FilePath -> Repo -> IO (Maybe MissingObjects) retrieveMissingObjects :: FsckResults -> Maybe FilePath -> Repo -> IO FsckResults
retrieveMissingObjects missing referencerepo r retrieveMissingObjects missing referencerepo r
| missing == Just S.empty = return $ Just S.empty | not (foundBroken missing) = return missing
| otherwise = withTmpDir "tmprepo" $ \tmpdir -> do | otherwise = withTmpDir "tmprepo" $ \tmpdir -> do
unlessM (boolSystem "git" [Params "init", File tmpdir]) $ unlessM (boolSystem "git" [Params "init", File tmpdir]) $
error $ "failed to create temp repository in " ++ tmpdir error $ "failed to create temp repository in " ++ tmpdir
tmpr <- Config.read =<< Construct.fromAbsPath tmpdir tmpr <- Config.read =<< Construct.fromAbsPath tmpdir
stillmissing <- pullremotes tmpr (remotes r) fetchrefstags missing stillmissing <- pullremotes tmpr (remotes r) fetchrefstags missing
if stillmissing == Just S.empty if S.null (knownMissing stillmissing)
then return $ Just S.empty then return stillmissing
else pullremotes tmpr (remotes r) fetchallrefs stillmissing else pullremotes tmpr (remotes r) fetchallrefs stillmissing
where where
pullremotes tmpr [] fetchrefs stillmissing = case referencerepo of pullremotes tmpr [] fetchrefs stillmissing = case referencerepo of
@ -121,12 +118,12 @@ retrieveMissingObjects missing referencerepo r
void $ explodePacks tmpr void $ explodePacks tmpr
void $ copyObjects tmpr r void $ copyObjects tmpr r
case stillmissing of case stillmissing of
Nothing -> return $ Just S.empty FsckFailed -> return $ FsckFailed
Just s -> Just <$> findMissing (S.toList s) r FsckFoundMissing s -> FsckFoundMissing <$> findMissing (S.toList s) r
, return stillmissing , return stillmissing
) )
pullremotes tmpr (rmt:rmts) fetchrefs ms pullremotes tmpr (rmt:rmts) fetchrefs ms
| ms == Just S.empty = return $ Just S.empty | not (foundBroken ms) = return ms
| otherwise = do | otherwise = do
putStrLn $ "Trying to recover missing objects from remote " ++ repoDescribe rmt ++ "." putStrLn $ "Trying to recover missing objects from remote " ++ repoDescribe rmt ++ "."
ifM (fetchfrom (repoLocation rmt) fetchrefs tmpr) ifM (fetchfrom (repoLocation rmt) fetchrefs tmpr)
@ -134,10 +131,10 @@ retrieveMissingObjects missing referencerepo r
void $ explodePacks tmpr void $ explodePacks tmpr
void $ copyObjects tmpr r void $ copyObjects tmpr r
case ms of case ms of
Nothing -> pullremotes tmpr rmts fetchrefs ms FsckFailed -> pullremotes tmpr rmts fetchrefs ms
Just s -> do FsckFoundMissing s -> do
stillmissing <- findMissing (S.toList s) r stillmissing <- findMissing (S.toList s) r
pullremotes tmpr rmts fetchrefs (Just stillmissing) pullremotes tmpr rmts fetchrefs (FsckFoundMissing stillmissing)
, pullremotes tmpr rmts fetchrefs ms , pullremotes tmpr rmts fetchrefs ms
) )
fetchfrom fetchurl ps = runBool $ fetchfrom fetchurl ps = runBool $
@ -368,7 +365,7 @@ rewriteIndex missing r
| otherwise = do | otherwise = do
(bad, good, cleanup) <- partitionIndex missing r (bad, good, cleanup) <- partitionIndex missing r
unless (null bad) $ do unless (null bad) $ do
nukeIndex r nukeFile (indexFile r)
UpdateIndex.streamUpdateIndex r UpdateIndex.streamUpdateIndex r
=<< (catMaybes <$> mapM reinject good) =<< (catMaybes <$> mapM reinject good)
void cleanup void cleanup
@ -380,9 +377,6 @@ rewriteIndex missing r
UpdateIndex.stageFile sha blobtype file r UpdateIndex.stageFile sha blobtype file r
reinject _ = return Nothing reinject _ = return Nothing
nukeIndex :: Repo -> IO ()
nukeIndex r = nukeFile (localGitDir r </> "index")
newtype GoodCommits = GoodCommits (S.Set Sha) newtype GoodCommits = GoodCommits (S.Set Sha)
emptyGoodCommits :: GoodCommits emptyGoodCommits :: GoodCommits
@ -423,6 +417,9 @@ preRepair g = do
nukeFile headfile nukeFile headfile
writeFile headfile "ref: refs/heads/master" writeFile headfile "ref: refs/heads/master"
explodePackedRefsFile g explodePackedRefsFile g
unless (repoIsLocalBare g) $ do
let f = indexFile g
void $ tryIO $ allowWrite f
where where
headfile = localGitDir g </> "HEAD" headfile = localGitDir g </> "HEAD"
validhead s = "ref: refs/" `isPrefixOf` s || isJust (extractSha s) validhead s = "ref: refs/" `isPrefixOf` s || isJust (extractSha s)
@ -449,7 +446,7 @@ runRepair' fsckresult forced referencerepo g = do
missing <- cleanCorruptObjects fsckresult g missing <- cleanCorruptObjects fsckresult g
stillmissing <- retrieveMissingObjects missing referencerepo g stillmissing <- retrieveMissingObjects missing referencerepo g
case stillmissing of case stillmissing of
Just s FsckFoundMissing s
| S.null s -> if repoIsLocalBare g | S.null s -> if repoIsLocalBare g
then successfulfinish S.empty [] then successfulfinish S.empty []
else ifM (checkIndex S.empty g) else ifM (checkIndex S.empty g)
@ -471,13 +468,13 @@ runRepair' fsckresult forced referencerepo g = do
, "missing objects could not be recovered!" , "missing objects could not be recovered!"
] ]
unsuccessfulfinish s unsuccessfulfinish s
Nothing FsckFailed
| forced -> ifM (pure (repoIsLocalBare g) <||> checkIndex S.empty g) | forced -> ifM (pure (repoIsLocalBare g) <||> checkIndex S.empty g)
( do ( do
missing' <- cleanCorruptObjects Nothing g missing' <- cleanCorruptObjects FsckFailed g
case missing' of case missing' of
Nothing -> return (False, S.empty, []) FsckFailed -> return (False, S.empty, [])
Just stillmissing' -> continuerepairs stillmissing' FsckFoundMissing stillmissing' -> continuerepairs stillmissing'
, corruptedindex , corruptedindex
) )
| otherwise -> unsuccessfulfinish S.empty | otherwise -> unsuccessfulfinish S.empty
@ -517,7 +514,7 @@ runRepair' fsckresult forced referencerepo g = do
return (True, stillmissing, modifiedbranches) return (True, stillmissing, modifiedbranches)
corruptedindex = do corruptedindex = do
nukeIndex g nukeFile (indexFile g)
-- The corrupted index can prevent fsck from finding other -- The corrupted index can prevent fsck from finding other
-- problems, so re-run repair. -- problems, so re-run repair.
fsckresult' <- findBroken False g fsckresult' <- findBroken False g

View file

@ -7,7 +7,8 @@
module Logs.FsckResults ( module Logs.FsckResults (
writeFsckResults, writeFsckResults,
readFsckResults readFsckResults,
clearFsckResults,
) where ) where
import Common.Annex import Common.Annex
@ -22,8 +23,8 @@ writeFsckResults u fsckresults = do
logfile <- fromRepo $ gitAnnexFsckResultsLog u logfile <- fromRepo $ gitAnnexFsckResultsLog u
liftIO $ liftIO $
case fsckresults of case fsckresults of
Nothing -> store S.empty logfile FsckFailed -> store S.empty logfile
Just s FsckFoundMissing s
| S.null s -> nukeFile logfile | S.null s -> nukeFile logfile
| otherwise -> store s logfile | otherwise -> store s logfile
where where
@ -35,9 +36,13 @@ writeFsckResults u fsckresults = do
readFsckResults :: UUID -> Annex FsckResults readFsckResults :: UUID -> Annex FsckResults
readFsckResults u = do readFsckResults u = do
logfile <- fromRepo $ gitAnnexFsckResultsLog u logfile <- fromRepo $ gitAnnexFsckResultsLog u
liftIO $ catchDefaultIO (Just S.empty) $ liftIO $ catchDefaultIO (FsckFoundMissing S.empty) $
deserialize <$> readFile logfile deserialize <$> readFile logfile
where where
deserialize l = deserialize l =
let s = S.fromList $ map Ref $ lines l let s = S.fromList $ map Ref $ lines l
in if S.null s then Nothing else Just s in if S.null s then FsckFailed else FsckFoundMissing s
clearFsckResults :: UUID -> Annex ()
clearFsckResults = liftIO . nukeFile <=< fromRepo . gitAnnexFsckResultsLog