merge improved fsck types from git-repair and some associated changes

This commit is contained in:
Joey Hess 2013-11-30 14:29:11 -04:00
parent a8e8bd4360
commit 6edac746f0
6 changed files with 57 additions and 46 deletions

View file

@ -58,7 +58,7 @@ runRepair u mrmt destructiverepair = do
ok <- if u == myu
then localrepair fsckresults
else remoterepair fsckresults
liftAnnex $ writeFsckResults u Nothing
liftAnnex $ clearFsckResults u
debug [ "Repaired", show u, show ok ]
return ok

View file

@ -26,6 +26,7 @@ import Utility.NotificationBroadcaster
import Config
import Utility.HumanTime
import Git.Repair
import Git.Index
import Data.Time.Clock.POSIX
import qualified Data.Set as S
@ -43,7 +44,7 @@ sanityCheckerStartupThread startupdelay = namedThreadUnchecked "SanityCheckerSta
ifM (not <$> liftAnnex (inRepo (checkIndex S.empty)))
( do
notice ["corrupt index file found at startup; removing and restaging"]
liftAnnex $ inRepo nukeIndex
liftAnnex $ inRepo $ nukeFile . indexFile
{- Normally the startup scan avoids re-staging files,
- but with the index deleted, everything needs to be
- restaged. -}

View file

@ -6,11 +6,12 @@
-}
module Git.Fsck (
FsckResults,
FsckResults(..),
MissingObjects,
findBroken,
foundBroken,
findMissing,
knownMissing,
) where
import Common
@ -23,9 +24,7 @@ import qualified Data.Set as S
type MissingObjects = S.Set Sha
{- If fsck succeeded, Just a set of missing objects it found.
- If it failed, Nothing. -}
type FsckResults = Maybe MissingObjects
data FsckResults = FsckFoundMissing MissingObjects | FsckFailed
{- Runs fsck to find some of the broken objects in the repository.
- May not find all broken objects, if fsck fails on bad data in some of
@ -42,8 +41,8 @@ findBroken batchmode r = do
let objs = findShas output
badobjs <- findMissing objs r
if S.null badobjs && not fsckok
then return Nothing
else return $ Just badobjs
then return FsckFailed
else return $ FsckFoundMissing badobjs
where
(command, params) = ("git", fsckParams r)
(command', params')
@ -51,8 +50,12 @@ findBroken batchmode r = do
| otherwise = (command, params)
foundBroken :: FsckResults -> Bool
foundBroken Nothing = True
foundBroken (Just s) = not (S.null s)
foundBroken FsckFailed = True
foundBroken (FsckFoundMissing s) = not (S.null s)
knownMissing :: FsckResults -> MissingObjects
knownMissing FsckFailed = S.empty
knownMissing (FsckFoundMissing s) = s
{- Finds objects that are missing from the git repsitory, or are corrupt.
-

View file

@ -7,6 +7,8 @@
module Git.Index where
import Common
import Git
import Utility.Env
{- Forces git to use the specified index file.
@ -19,9 +21,12 @@ import Utility.Env
override :: FilePath -> IO (IO ())
override index = do
res <- getEnv var
setEnv var index True
return $ reset res
void $ setEnv var index True
return $ void $ reset res
where
var = "GIT_INDEX_FILE"
reset (Just v) = setEnv var v True
reset _ = unsetEnv var
indexFile :: Repo -> FilePath
indexFile r = localGitDir r </> "index"

View file

@ -15,7 +15,6 @@ module Git.Repair (
removeTrackingBranches,
checkIndex,
missingIndex,
nukeIndex,
emptyGoodCommits,
) where
@ -26,6 +25,7 @@ import Git.Objects
import Git.Sha
import Git.Types
import Git.Fsck
import Git.Index
import qualified Git.Config as Config
import qualified Git.Construct as Construct
import qualified Git.LsTree as LsTree
@ -43,16 +43,16 @@ import qualified Data.ByteString.Lazy as L
import Data.Tuple.Utils
{- Given a set of bad objects found by git fsck, which may not
- be complete, finds and removes all corrupt objects, and
- returns a list of missing objects, which need to be
- found elsewhere to finish recovery.
- be complete, finds and removes all corrupt objects,
- and returns missing objects.
-}
cleanCorruptObjects :: FsckResults -> Repo -> IO (Maybe MissingObjects)
cleanCorruptObjects :: FsckResults -> Repo -> IO FsckResults
cleanCorruptObjects fsckresults r = do
void $ explodePacks r
objs <- listLooseObjectShas r
mapM_ (tryIO . allowRead . looseObjectFile r) objs
bad <- findMissing objs r
void $ removeLoose r $ S.union bad (fromMaybe S.empty fsckresults)
void $ removeLoose r $ S.union bad (knownMissing fsckresults)
-- Rather than returning the loose objects that were removed, re-run
-- fsck. Other missing objects may have been in the packs,
-- and this way fsck will find them.
@ -98,20 +98,17 @@ explodePacks r = do
- If another clone of the repository exists locally, which might not be a
- remote of the repo being repaired, its path can be passed as a reference
- repository.
- Can also be run with Nothing, if it's not known which objects are
- missing, just that some are. (Ie, fsck failed badly.)
-}
retrieveMissingObjects :: Maybe MissingObjects -> Maybe FilePath -> Repo -> IO (Maybe MissingObjects)
retrieveMissingObjects :: FsckResults -> Maybe FilePath -> Repo -> IO FsckResults
retrieveMissingObjects missing referencerepo r
| missing == Just S.empty = return $ Just S.empty
| not (foundBroken missing) = return missing
| otherwise = withTmpDir "tmprepo" $ \tmpdir -> do
unlessM (boolSystem "git" [Params "init", File tmpdir]) $
error $ "failed to create temp repository in " ++ tmpdir
tmpr <- Config.read =<< Construct.fromAbsPath tmpdir
stillmissing <- pullremotes tmpr (remotes r) fetchrefstags missing
if stillmissing == Just S.empty
then return $ Just S.empty
if S.null (knownMissing stillmissing)
then return stillmissing
else pullremotes tmpr (remotes r) fetchallrefs stillmissing
where
pullremotes tmpr [] fetchrefs stillmissing = case referencerepo of
@ -121,12 +118,12 @@ retrieveMissingObjects missing referencerepo r
void $ explodePacks tmpr
void $ copyObjects tmpr r
case stillmissing of
Nothing -> return $ Just S.empty
Just s -> Just <$> findMissing (S.toList s) r
FsckFailed -> return $ FsckFailed
FsckFoundMissing s -> FsckFoundMissing <$> findMissing (S.toList s) r
, return stillmissing
)
pullremotes tmpr (rmt:rmts) fetchrefs ms
| ms == Just S.empty = return $ Just S.empty
| not (foundBroken ms) = return ms
| otherwise = do
putStrLn $ "Trying to recover missing objects from remote " ++ repoDescribe rmt ++ "."
ifM (fetchfrom (repoLocation rmt) fetchrefs tmpr)
@ -134,10 +131,10 @@ retrieveMissingObjects missing referencerepo r
void $ explodePacks tmpr
void $ copyObjects tmpr r
case ms of
Nothing -> pullremotes tmpr rmts fetchrefs ms
Just s -> do
FsckFailed -> pullremotes tmpr rmts fetchrefs ms
FsckFoundMissing s -> do
stillmissing <- findMissing (S.toList s) r
pullremotes tmpr rmts fetchrefs (Just stillmissing)
pullremotes tmpr rmts fetchrefs (FsckFoundMissing stillmissing)
, pullremotes tmpr rmts fetchrefs ms
)
fetchfrom fetchurl ps = runBool $
@ -368,7 +365,7 @@ rewriteIndex missing r
| otherwise = do
(bad, good, cleanup) <- partitionIndex missing r
unless (null bad) $ do
nukeIndex r
nukeFile (indexFile r)
UpdateIndex.streamUpdateIndex r
=<< (catMaybes <$> mapM reinject good)
void cleanup
@ -380,9 +377,6 @@ rewriteIndex missing r
UpdateIndex.stageFile sha blobtype file r
reinject _ = return Nothing
nukeIndex :: Repo -> IO ()
nukeIndex r = nukeFile (localGitDir r </> "index")
newtype GoodCommits = GoodCommits (S.Set Sha)
emptyGoodCommits :: GoodCommits
@ -423,6 +417,9 @@ preRepair g = do
nukeFile headfile
writeFile headfile "ref: refs/heads/master"
explodePackedRefsFile g
unless (repoIsLocalBare g) $ do
let f = indexFile g
void $ tryIO $ allowWrite f
where
headfile = localGitDir g </> "HEAD"
validhead s = "ref: refs/" `isPrefixOf` s || isJust (extractSha s)
@ -449,7 +446,7 @@ runRepair' fsckresult forced referencerepo g = do
missing <- cleanCorruptObjects fsckresult g
stillmissing <- retrieveMissingObjects missing referencerepo g
case stillmissing of
Just s
FsckFoundMissing s
| S.null s -> if repoIsLocalBare g
then successfulfinish S.empty []
else ifM (checkIndex S.empty g)
@ -471,13 +468,13 @@ runRepair' fsckresult forced referencerepo g = do
, "missing objects could not be recovered!"
]
unsuccessfulfinish s
Nothing
FsckFailed
| forced -> ifM (pure (repoIsLocalBare g) <||> checkIndex S.empty g)
( do
missing' <- cleanCorruptObjects Nothing g
missing' <- cleanCorruptObjects FsckFailed g
case missing' of
Nothing -> return (False, S.empty, [])
Just stillmissing' -> continuerepairs stillmissing'
FsckFailed -> return (False, S.empty, [])
FsckFoundMissing stillmissing' -> continuerepairs stillmissing'
, corruptedindex
)
| otherwise -> unsuccessfulfinish S.empty
@ -517,7 +514,7 @@ runRepair' fsckresult forced referencerepo g = do
return (True, stillmissing, modifiedbranches)
corruptedindex = do
nukeIndex g
nukeFile (indexFile g)
-- The corrupted index can prevent fsck from finding other
-- problems, so re-run repair.
fsckresult' <- findBroken False g

View file

@ -7,7 +7,8 @@
module Logs.FsckResults (
writeFsckResults,
readFsckResults
readFsckResults,
clearFsckResults,
) where
import Common.Annex
@ -22,8 +23,8 @@ writeFsckResults u fsckresults = do
logfile <- fromRepo $ gitAnnexFsckResultsLog u
liftIO $
case fsckresults of
Nothing -> store S.empty logfile
Just s
FsckFailed -> store S.empty logfile
FsckFoundMissing s
| S.null s -> nukeFile logfile
| otherwise -> store s logfile
where
@ -35,9 +36,13 @@ writeFsckResults u fsckresults = do
readFsckResults :: UUID -> Annex FsckResults
readFsckResults u = do
logfile <- fromRepo $ gitAnnexFsckResultsLog u
liftIO $ catchDefaultIO (Just S.empty) $
liftIO $ catchDefaultIO (FsckFoundMissing S.empty) $
deserialize <$> readFile logfile
where
deserialize l =
let s = S.fromList $ map Ref $ lines l
in if S.null s then Nothing else Just s
in if S.null s then FsckFailed else FsckFoundMissing s
clearFsckResults :: UUID -> Annex ()
clearFsckResults = liftIO . nukeFile <=< fromRepo . gitAnnexFsckResultsLog