merge from git-repair

This commit is contained in:
Joey Hess 2013-11-20 18:31:00 -04:00
parent a1f8621efc
commit 7dbb702edd
4 changed files with 136 additions and 71 deletions

View file

@ -40,7 +40,7 @@ type FsckResults = Maybe MissingObjects
findBroken :: Bool -> Repo -> IO FsckResults findBroken :: Bool -> Repo -> IO FsckResults
findBroken batchmode r = do findBroken batchmode r = do
(output, fsckok) <- processTranscript command' (toCommand params') Nothing (output, fsckok) <- processTranscript command' (toCommand params') Nothing
let objs = parseFsckOutput output let objs = findShas output
badobjs <- findMissing objs r badobjs <- findMissing objs r
if S.null badobjs && not fsckok if S.null badobjs && not fsckok
then return Nothing then return Nothing
@ -65,7 +65,7 @@ findMissing objs r = go objs [] =<< start
where where
start = catFileStart' False r start = catFileStart' False r
go [] c h = do go [] c h = do
catFileStop h void $ tryIO $ catFileStop h
return $ S.fromList c return $ S.fromList c
go (o:os) c h = do go (o:os) c h = do
v <- tryIO $ isNothing <$> catObjectDetails h o v <- tryIO $ isNothing <$> catObjectDetails h o
@ -76,11 +76,11 @@ findMissing objs r = go objs [] =<< start
Right True -> go os (o:c) h Right True -> go os (o:c) h
Right False -> go os c h Right False -> go os c h
parseFsckOutput :: String -> [Sha] findShas :: String -> [Sha]
parseFsckOutput = catMaybes . map extractSha . concat . map words . lines findShas = catMaybes . map extractSha . concat . map words . lines
fsckParams :: Repo -> [CommandParam] fsckParams :: Repo -> [CommandParam]
fsckParams = gitCommandLine fsckParams = gitCommandLine $
[ Param "fsck" [ Param "fsck"
, Param "--no-dangling" , Param "--no-dangling"
, Param "--no-reflogs" , Param "--no-reflogs"

View file

@ -9,6 +9,7 @@ module Git.Objects where
import Common import Common
import Git import Git
import Git.Sha
objectsDir :: Repo -> FilePath objectsDir :: Repo -> FilePath
objectsDir r = localGitDir r </> "objects" objectsDir r = localGitDir r </> "objects"
@ -16,12 +17,17 @@ objectsDir r = localGitDir r </> "objects"
packDir :: Repo -> FilePath packDir :: Repo -> FilePath
packDir r = objectsDir r </> "pack" packDir r = objectsDir r </> "pack"
packIdxFile :: FilePath -> FilePath
packIdxFile = flip replaceExtension "idx"
listPackFiles :: Repo -> IO [FilePath] listPackFiles :: Repo -> IO [FilePath]
listPackFiles r = filter (".pack" `isSuffixOf`) listPackFiles r = filter (".pack" `isSuffixOf`)
<$> catchDefaultIO [] (dirContents $ packDir r) <$> catchDefaultIO [] (dirContents $ packDir r)
packIdxFile :: FilePath -> FilePath listLooseObjectShas :: Repo -> IO [Sha]
packIdxFile = flip replaceExtension "idx" listLooseObjectShas r = catchDefaultIO [] $
mapMaybe (extractSha . concat . reverse . take 2 . reverse . splitDirectories)
<$> dirContentsRecursiveSkipping (== "pack") (objectsDir r)
looseObjectFile :: Repo -> Sha -> FilePath looseObjectFile :: Repo -> Sha -> FilePath
looseObjectFile r sha = objectsDir r </> prefix </> rest looseObjectFile r sha = objectsDir r </> prefix </> rest

View file

@ -36,6 +36,7 @@ import qualified Git.UpdateIndex as UpdateIndex
import qualified Git.Branch as Branch import qualified Git.Branch as Branch
import Utility.Tmp import Utility.Tmp
import Utility.Rsync import Utility.Rsync
import Utility.FileMode
import qualified Data.Set as S import qualified Data.Set as S
import qualified Data.ByteString.Lazy as L import qualified Data.ByteString.Lazy as L
@ -53,17 +54,15 @@ import Data.Tuple.Utils
- To remove corrupt objects, unpack all packs, and remove the packs - To remove corrupt objects, unpack all packs, and remove the packs
- (to handle corrupt packs), and remove loose object files. - (to handle corrupt packs), and remove loose object files.
-} -}
cleanCorruptObjects :: FsckResults -> Repo -> IO MissingObjects cleanCorruptObjects :: FsckResults -> Repo -> IO (Maybe MissingObjects)
cleanCorruptObjects mmissing r = check mmissing cleanCorruptObjects mmissing r = check mmissing
where where
check Nothing = do check Nothing = do
putStrLn "git fsck found a problem but no specific broken objects. Perhaps a corrupt pack file?" putStrLn "git fsck found a problem but no specific broken objects. Perhaps a corrupt pack file?"
ifM (explodePacks r) void $ explodePacks r
( retry S.empty retry 0 S.empty
, return S.empty
)
check (Just bad) check (Just bad)
| S.null bad = return S.empty | S.null bad = return $ Just S.empty
| otherwise = do | otherwise = do
putStrLn $ unwords putStrLn $ unwords
[ "git fsck found" [ "git fsck found"
@ -73,25 +72,38 @@ cleanCorruptObjects mmissing r = check mmissing
exploded <- explodePacks r exploded <- explodePacks r
removed <- removeLoose r bad removed <- removeLoose r bad
if exploded || removed if exploded || removed
then retry bad then retry (S.size bad) bad
else return bad else return $ Just bad
retry oldbad = do retry numremoved oldbad = do
putStrLn "Re-running git fsck to see if it finds more problems." putStrLn "Re-running git fsck to see if it finds more problems."
v <- findBroken False r v <- findBroken False r
case v of case v of
Nothing -> do Nothing
| numremoved > 0 -> do
hPutStrLn stderr $ unwords hPutStrLn stderr $ unwords
[ "git fsck found a problem, which was not corrected after removing" [ "git fsck found a problem, which was not corrected after removing"
, show (S.size oldbad) , show numremoved
, "corrupt objects." , "corrupt objects."
] ]
return S.empty return Nothing
| otherwise -> do
hPutStrLn stderr "Repacking all objects, to try to flush out unknown corrupt ones."
void $ runBool
[ Param "repack"
, Param "-a"
] r
void $ runBool
[ Param "prune-packed"
] r
s <- S.fromList <$> listLooseObjectShas r
void $ removeLoose r s
retry (S.size s) S.empty
Just newbad -> do Just newbad -> do
removed <- removeLoose r newbad removed <- removeLoose r newbad
let s = S.union oldbad newbad let s = S.union oldbad newbad
if not removed || s == oldbad if not removed || s == oldbad
then return s then return $ Just s
else retry s else retry (S.size newbad) s
removeLoose :: Repo -> MissingObjects -> IO Bool removeLoose :: Repo -> MissingObjects -> IO Bool
removeLoose r s = do removeLoose r s = do
@ -100,9 +112,9 @@ removeLoose r s = do
if (count > 0) if (count > 0)
then do then do
putStrLn $ unwords putStrLn $ unwords
[ "removing" [ "Removing"
, show count , show count
, "corrupt loose objects" , "corrupt loose objects."
] ]
mapM_ nukeFile fs mapM_ nukeFile fs
return True return True
@ -118,13 +130,13 @@ explodePacks r = do
mapM_ go packs mapM_ go packs
return True return True
where where
go packfile = do go packfile = withTmpFileIn (localGitDir r) "pack" $ \tmp _ -> do
moveFile packfile tmp
nukeFile $ packIdxFile packfile
-- May fail, if pack file is corrupt. -- May fail, if pack file is corrupt.
void $ tryIO $ void $ tryIO $
pipeWrite [Param "unpack-objects"] r $ \h -> pipeWrite [Param "unpack-objects", Param "-r"] r $ \h ->
L.hPut h =<< L.readFile packfile L.hPut h =<< L.readFile tmp
nukeFile packfile
nukeFile $ packIdxFile packfile
{- Try to retrieve a set of missing objects, from the remotes of a {- Try to retrieve a set of missing objects, from the remotes of a
- repository. Returns any that could not be retreived. - repository. Returns any that could not be retreived.
@ -132,43 +144,53 @@ explodePacks r = do
- If another clone of the repository exists locally, which might not be a - If another clone of the repository exists locally, which might not be a
- remote of the repo being repaired, its path can be passed as a reference - remote of the repo being repaired, its path can be passed as a reference
- repository. - repository.
- Can also be run with Nothing, if it's not known which objects are
- missing, just that some are. (Ie, fsck failed badly.)
-} -}
retrieveMissingObjects :: MissingObjects -> Maybe FilePath -> Repo -> IO MissingObjects retrieveMissingObjects :: Maybe MissingObjects -> Maybe FilePath -> Repo -> IO (Maybe MissingObjects)
retrieveMissingObjects missing referencerepo r retrieveMissingObjects missing referencerepo r
| S.null missing = return missing | missing == Just S.empty = return $ Just S.empty
| otherwise = withTmpDir "tmprepo" $ \tmpdir -> do | otherwise = withTmpDir "tmprepo" $ \tmpdir -> do
unlessM (boolSystem "git" [Params "init", File tmpdir]) $ unlessM (boolSystem "git" [Params "init", File tmpdir]) $
error $ "failed to create temp repository in " ++ tmpdir error $ "failed to create temp repository in " ++ tmpdir
tmpr <- Config.read =<< Construct.fromAbsPath tmpdir tmpr <- Config.read =<< Construct.fromAbsPath tmpdir
stillmissing <- pullremotes tmpr (remotes r) fetchrefstags missing stillmissing <- pullremotes tmpr (remotes r) fetchrefstags missing
if S.null stillmissing if stillmissing == Just S.empty
then return stillmissing then return $ Just S.empty
else pullremotes tmpr (remotes r) fetchallrefs stillmissing else pullremotes tmpr (remotes r) fetchallrefs stillmissing
where where
pullremotes tmpr [] fetchrefs stillmissing = case referencerepo of pullremotes tmpr [] fetchrefs stillmissing = case referencerepo of
Nothing -> return stillmissing Nothing -> return stillmissing
Just p -> ifM (fetchfrom p fetchrefs tmpr) Just p -> ifM (fetchfrom p fetchrefs tmpr)
( do ( do
void $ explodePacks tmpr
void $ copyObjects tmpr r void $ copyObjects tmpr r
findMissing (S.toList stillmissing) r case stillmissing of
Nothing -> return $ Just S.empty
Just s -> Just <$> findMissing (S.toList s) r
, return stillmissing , return stillmissing
) )
pullremotes tmpr (rmt:rmts) fetchrefs s pullremotes tmpr (rmt:rmts) fetchrefs ms
| S.null s = return s | ms == Just S.empty = return $ Just S.empty
| otherwise = do | otherwise = do
putStrLn $ "Trying to recover missing objects from remote " ++ repoDescribe rmt putStrLn $ "Trying to recover missing objects from remote " ++ repoDescribe rmt ++ "."
ifM (fetchfrom (repoLocation rmt) fetchrefs tmpr) ifM (fetchfrom (repoLocation rmt) fetchrefs tmpr)
( do ( do
void $ explodePacks tmpr
void $ copyObjects tmpr r void $ copyObjects tmpr r
case ms of
Nothing -> pullremotes tmpr rmts fetchrefs ms
Just s -> do
stillmissing <- findMissing (S.toList s) r stillmissing <- findMissing (S.toList s) r
pullremotes tmpr rmts fetchrefs stillmissing pullremotes tmpr rmts fetchrefs (Just stillmissing)
, do , do
putStrLn $ unwords putStrLn $ unwords
[ "failed to fetch from remote" [ "failed to fetch from remote"
, repoDescribe rmt , repoDescribe rmt
, "(will continue without it, but making this remote available may improve recovery)" , "(will continue without it, but making this remote available may improve recovery)"
] ]
pullremotes tmpr rmts fetchrefs s pullremotes tmpr rmts fetchrefs ms
) )
fetchfrom fetchurl ps = runBool $ fetchfrom fetchurl ps = runBool $
[ Param "fetch" [ Param "fetch"
@ -182,7 +204,7 @@ retrieveMissingObjects missing referencerepo r
fetchallrefs = [ Param "+*:*" ] fetchallrefs = [ Param "+*:*" ]
{- Copies all objects from the src repository to the dest repository. {- Copies all objects from the src repository to the dest repository.
- This is done using rsync, so it copies all missing object, and all - This is done using rsync, so it copies all missing objects, and all
- objects they rely on. -} - objects they rely on. -}
copyObjects :: Repo -> Repo -> IO Bool copyObjects :: Repo -> Repo -> IO Bool
copyObjects srcr destr = rsync copyObjects srcr destr = rsync
@ -245,7 +267,8 @@ removeTrackingBranches missing goodcommits r =
getAllRefs :: Repo -> IO [Ref] getAllRefs :: Repo -> IO [Ref]
getAllRefs r = do getAllRefs r = do
packedrs <- mapMaybe parsePacked . lines packedrs <- mapMaybe parsePacked . lines
<$> catchDefaultIO "" (readFile $ packedRefsFile r) <$> catchDefaultIO ""
(readFileStrictAnyEncoding $ packedRefsFile r)
loosers <- map toref <$> dirContentsRecursive refdir loosers <- map toref <$> dirContentsRecursive refdir
return $ packedrs ++ loosers return $ packedrs ++ loosers
where where
@ -275,7 +298,7 @@ nukeBranchRef b r = void $ usegit <||> byhand
nukeFile $ localGitDir r </> show b nukeFile $ localGitDir r </> show b
whenM (doesFileExist packedrefs) $ whenM (doesFileExist packedrefs) $
withTmpFile "packed-refs" $ \tmp h -> do withTmpFile "packed-refs" $ \tmp h -> do
ls <- lines <$> readFile packedrefs ls <- lines <$> readFileStrictAnyEncoding packedrefs
hPutStr h $ unlines $ hPutStr h $ unlines $
filter (not . skiprefline) ls filter (not . skiprefline) ls
hClose h hClose h
@ -444,9 +467,27 @@ displayList items header
| numitems > 10 = take 10 items ++ ["(and " ++ show (numitems - 10) ++ " more)"] | numitems > 10 = take 10 items ++ ["(and " ++ show (numitems - 10) ++ " more)"]
| otherwise = items | otherwise = items
{- Fix problems that would prevent repair from working at all
-
- A missing or corrupt .git/HEAD makes git not treat the repository as a
- git repo. If there is a git repo in a parent directory, it may move up
- the tree and use that one instead. So, cannot use `git show-ref HEAD` to
- test it.
-}
preRepair :: Repo -> IO ()
preRepair g = do
void $ tryIO $ allowRead headfile
unlessM (validhead <$> catchDefaultIO "" (readFileStrictAnyEncoding headfile)) $ do
nukeFile headfile
writeFile headfile "ref: refs/heads/master"
where
headfile = localGitDir g </> "HEAD"
validhead s = "ref: refs/" `isPrefixOf` s || isJust (extractSha s)
{- Put it all together. -} {- Put it all together. -}
runRepair :: Bool -> Repo -> IO (Bool, MissingObjects, [Branch]) runRepair :: Bool -> Repo -> IO (Bool, MissingObjects, [Branch])
runRepair forced g = do runRepair forced g = do
preRepair g
putStrLn "Running git fsck ..." putStrLn "Running git fsck ..."
fsckresult <- findBroken False g fsckresult <- findBroken False g
if foundBroken fsckresult if foundBroken fsckresult
@ -455,32 +496,42 @@ runRepair forced g = do
putStrLn "No problems found." putStrLn "No problems found."
return (True, S.empty, []) return (True, S.empty, [])
successfulRepair :: (Bool, MissingObjects, [Branch]) -> Bool
successfulRepair = fst3
runRepairOf :: FsckResults -> Bool -> Maybe FilePath -> Repo -> IO (Bool, MissingObjects, [Branch]) runRepairOf :: FsckResults -> Bool -> Maybe FilePath -> Repo -> IO (Bool, MissingObjects, [Branch])
runRepairOf fsckresult forced referencerepo g = do runRepairOf fsckresult forced referencerepo g = do
missing <- cleanCorruptObjects fsckresult g missing <- cleanCorruptObjects fsckresult g
stillmissing <- retrieveMissingObjects missing referencerepo g stillmissing <- retrieveMissingObjects missing referencerepo g
if S.null stillmissing case stillmissing of
then if repoIsLocalBare g Just s
then successfulfinish stillmissing [] | S.null s -> if repoIsLocalBare g
else ifM (checkIndex stillmissing g) then successfulfinish S.empty []
( successfulfinish stillmissing [] else ifM (checkIndex S.empty g)
( successfulfinish s []
, do , do
putStrLn "No missing objects found, but the index file is corrupt!" putStrLn "No missing objects found, but the index file is corrupt!"
if forced if forced
then corruptedindex then corruptedindex
else needforce stillmissing else needforce S.empty
) )
| otherwise -> if forced
then continuerepairs s
else do else do
putStrLn $ unwords putStrLn $ unwords
[ show (S.size stillmissing) [ show (S.size s)
, "missing objects could not be recovered!" , "missing objects could not be recovered!"
] ]
if forced unsuccessfulfinish s
then continuerepairs stillmissing Nothing
else unsuccessfulfinish stillmissing | forced -> ifM (pure (repoIsLocalBare g) <||> checkIndex S.empty g)
( do
fsckresult' <- findBroken False g
case fsckresult' of
Nothing -> do
putStrLn "Unable to fully recover; cannot find missing objects."
return (False, S.empty, [])
Just stillmissing' -> continuerepairs stillmissing'
, corruptedindex
)
| otherwise -> unsuccessfulfinish S.empty
where where
continuerepairs stillmissing = do continuerepairs stillmissing = do
(remotebranches, goodcommits) <- removeTrackingBranches stillmissing emptyGoodCommits g (remotebranches, goodcommits) <- removeTrackingBranches stillmissing emptyGoodCommits g
@ -528,8 +579,7 @@ runRepairOf fsckresult forced referencerepo g = do
successfulfinish stillmissing modifiedbranches = do successfulfinish stillmissing modifiedbranches = do
mapM_ putStrLn mapM_ putStrLn
[ "Successfully recovered repository!" [ "Successfully recovered repository!"
, "You should run \"git fsck\" to make sure, but it looks like" , "You should run \"git fsck\" to make sure, but it looks like everything was recovered ok."
, "everything was recovered ok."
] ]
return (True, stillmissing, modifiedbranches) return (True, stillmissing, modifiedbranches)
unsuccessfulfinish stillmissing = do unsuccessfulfinish stillmissing = do
@ -542,3 +592,6 @@ runRepairOf fsckresult forced referencerepo g = do
needforce stillmissing = do needforce stillmissing = do
putStrLn "To force a recovery to a usable state, retry with the --force parameter." putStrLn "To force a recovery to a usable state, retry with the --force parameter."
return (False, stillmissing, []) return (False, stillmissing, [])
successfulRepair :: (Bool, MissingObjects, [Branch]) -> Bool
successfulRepair = fst3

6
debian/changelog vendored
View file

@ -1,3 +1,9 @@
git-annex (5.20131121) UNRELEASED; urgency=low
* Futher improvements to git repair.
-- Joey Hess <joeyh@debian.org> Wed, 20 Nov 2013 18:30:47 -0400
git-annex (5.20131120) unstable; urgency=low git-annex (5.20131120) unstable; urgency=low
* Fix Debian package to not try to run test suite, since haskell-tasty * Fix Debian package to not try to run test suite, since haskell-tasty