implemented removal of corrupt tracking branches
Oh, git, you made this so hard. Not determining if a branch pointed to some corrupt object, that was easy, but dealing with corrupt branches using git plumbing is a PITA.
This commit is contained in:
parent
6d8250c255
commit
fcd91be6f0
5 changed files with 195 additions and 24 deletions
|
@ -7,7 +7,8 @@
|
|||
|
||||
module Git.Fsck (
|
||||
findBroken,
|
||||
findMissing
|
||||
findMissing,
|
||||
MissingObjects
|
||||
) where
|
||||
|
||||
import Common
|
||||
|
@ -18,6 +19,8 @@ import Git.CatFile
|
|||
|
||||
import qualified Data.Set as S
|
||||
|
||||
type MissingObjects = S.Set Sha
|
||||
|
||||
{- Runs fsck to find some of the broken objects in the repository.
|
||||
- May not find all broken objects, if fsck fails on bad data in some of
|
||||
- the broken objects it does find. If the fsck fails generally without
|
||||
|
@ -28,7 +31,7 @@ import qualified Data.Set as S
|
|||
- to be a git sha. Not all such shas are of broken objects, so ask git
|
||||
- to try to cat the object, and see if it fails.
|
||||
-}
|
||||
findBroken :: Repo -> IO (Maybe (S.Set Sha))
|
||||
findBroken :: Repo -> IO (Maybe MissingObjects)
|
||||
findBroken r = do
|
||||
(output, fsckok) <- processTranscript "git" (toCommand $ fsckParams r) Nothing
|
||||
let objs = parseFsckOutput output
|
||||
|
@ -39,7 +42,7 @@ findBroken r = do
|
|||
|
||||
{- Finds objects that are missing from the git repsitory, or are corrupt.
|
||||
- Note that catting a corrupt object will cause cat-file to crash. -}
|
||||
findMissing :: [Sha] -> Repo -> IO (S.Set Sha)
|
||||
findMissing :: [Sha] -> Repo -> IO MissingObjects
|
||||
findMissing objs r = go objs [] =<< start
|
||||
where
|
||||
start = catFileStart' False r
|
||||
|
|
|
@ -8,6 +8,7 @@
|
|||
module Git.LsTree (
|
||||
TreeItem(..),
|
||||
lsTree,
|
||||
lsTreeParams,
|
||||
lsTreeFiles,
|
||||
parseLsTree
|
||||
) where
|
||||
|
@ -33,9 +34,11 @@ data TreeItem = TreeItem
|
|||
{- Lists the complete contents of a tree, recursing into sub-trees,
|
||||
- with lazy output. -}
|
||||
lsTree :: Ref -> Repo -> IO [TreeItem]
|
||||
lsTree t repo = map parseLsTree <$> pipeNullSplitZombie ps repo
|
||||
where
|
||||
ps = [Params "ls-tree --full-tree -z -r --", File $ show t]
|
||||
lsTree t repo = map parseLsTree
|
||||
<$> pipeNullSplitZombie (lsTreeParams t) repo
|
||||
|
||||
lsTreeParams :: Ref -> [CommandParam]
|
||||
lsTreeParams t = [ Params "ls-tree --full-tree -z -r --", File $ show t ]
|
||||
|
||||
{- Lists specified files in a tree. -}
|
||||
lsTreeFiles :: Ref -> [FilePath] -> Repo -> IO [TreeItem]
|
||||
|
|
|
@ -10,6 +10,7 @@ module Git.RecoverRepository (
|
|||
retrieveMissingObjects,
|
||||
resetLocalBranches,
|
||||
removeTrackingBranches,
|
||||
emptyGoodCommits,
|
||||
) where
|
||||
|
||||
import Common
|
||||
|
@ -17,12 +18,12 @@ import Git
|
|||
import Git.Command
|
||||
import Git.Fsck
|
||||
import Git.Objects
|
||||
import Git.HashObject
|
||||
import Git.Types
|
||||
import Git.Sha
|
||||
import qualified Git.Config
|
||||
import qualified Git.Construct
|
||||
import qualified Git.LsTree as LsTree
|
||||
import qualified Git.Ref as Ref
|
||||
import Utility.Tmp
|
||||
import Utility.Monad
|
||||
import Utility.Rsync
|
||||
|
||||
import qualified Data.Set as S
|
||||
|
@ -39,7 +40,7 @@ import System.Log.Logger
|
|||
- To remove corrupt objects, unpack all packs, and remove the packs
|
||||
- (to handle corrupt packs), and remove loose object files.
|
||||
-}
|
||||
cleanCorruptObjects :: Repo -> IO (S.Set Sha)
|
||||
cleanCorruptObjects :: Repo -> IO MissingObjects
|
||||
cleanCorruptObjects r = do
|
||||
notice "Running git fsck ..."
|
||||
check =<< findBroken r
|
||||
|
@ -79,7 +80,7 @@ cleanCorruptObjects r = do
|
|||
then return s
|
||||
else retry s
|
||||
|
||||
removeLoose :: Repo -> S.Set Sha -> IO Bool
|
||||
removeLoose :: Repo -> MissingObjects -> IO Bool
|
||||
removeLoose r s = do
|
||||
let fs = map (looseObjectFile r) (S.toList s)
|
||||
count <- length <$> filterM doesFileExist fs
|
||||
|
@ -115,7 +116,7 @@ explodePacks r = do
|
|||
{- Try to retrieve a set of missing objects, from the remotes of a
|
||||
- repository. Returns any that could not be retreived.
|
||||
-}
|
||||
retrieveMissingObjects :: S.Set Sha -> Repo -> IO (S.Set Sha)
|
||||
retrieveMissingObjects :: MissingObjects -> Repo -> IO MissingObjects
|
||||
retrieveMissingObjects missing r
|
||||
| S.null missing = return missing
|
||||
| otherwise = withTmpDir "tmprepo" $ \tmpdir -> do
|
||||
|
@ -127,7 +128,7 @@ retrieveMissingObjects missing r
|
|||
then return stillmissing
|
||||
else pullremotes tmpr (remotes r) fetchallrefs stillmissing
|
||||
where
|
||||
pullremotes tmpr [] _ stillmissing = return stillmissing
|
||||
pullremotes _tmpr [] _ stillmissing = return stillmissing
|
||||
pullremotes tmpr (rmt:rmts) fetchrefs s
|
||||
| S.null s = return s
|
||||
| otherwise = do
|
||||
|
@ -170,16 +171,170 @@ copyObjects srcr destr = rsync
|
|||
- local branches to point to an old commit before the missing
|
||||
- objects.
|
||||
-}
|
||||
resetLocalBranches :: S.Set Sha -> Repo -> IO [Branch]
|
||||
resetLocalBranches missing r = do
|
||||
resetLocalBranches :: MissingObjects -> GoodCommits -> Repo -> IO [Branch]
|
||||
resetLocalBranches missing goodcommits r = do
|
||||
error "TODO"
|
||||
|
||||
{- To deal with missing objects that cannot be recovered, removes
|
||||
- any remote tracking branches that reference them.
|
||||
- any remote tracking branches that reference them. Returns a list of
|
||||
- all removed branches.
|
||||
-}
|
||||
removeTrackingBranches :: S.Set Sha -> Repo -> IO [Branch]
|
||||
removeTrackingBranches missing r = do
|
||||
error "TODO"
|
||||
removeTrackingBranches :: MissingObjects -> GoodCommits -> Repo -> IO ([Branch], GoodCommits)
|
||||
removeTrackingBranches missing goodcommits r =
|
||||
go [] goodcommits =<< filter istrackingbranch <$> getAllRefs r
|
||||
where
|
||||
istrackingbranch b = "refs/remotes/" `isPrefixOf` show b
|
||||
go removed gcs [] = return (removed, gcs)
|
||||
go removed gcs (b:bs) = do
|
||||
(ok, gcs') <- verifyCommit missing gcs b r
|
||||
if ok
|
||||
then go removed gcs' bs
|
||||
else do
|
||||
nukeBranchRef b r
|
||||
go (b:removed) gcs' bs
|
||||
|
||||
{- Gets all refs, including ones that are corrupt.
|
||||
- git show-ref does not output refs to commits that are directly
|
||||
- corrupted, so it is not used.
|
||||
-}
|
||||
getAllRefs :: Repo -> IO [Ref]
|
||||
getAllRefs r = do
|
||||
packedrs <- mapMaybe parsePacked . lines
|
||||
<$> catchDefaultIO "" (readFile $ packedRefsFile r)
|
||||
loosers <- map toref <$> dirContentsRecursive (localGitDir r </> "refs")
|
||||
return $ packedrs ++ loosers
|
||||
where
|
||||
refdir = localGitDir r </> "refs"
|
||||
toref = Ref . relPathDirToFile (localGitDir r)
|
||||
|
||||
packedRefsFile :: Repo -> FilePath
|
||||
packedRefsFile r = localGitDir r </> "packed-refs"
|
||||
|
||||
parsePacked :: String -> Maybe Ref
|
||||
parsePacked l = case words l of
|
||||
(sha:ref:[])
|
||||
| isJust (extractSha sha) -> Just $ Ref ref
|
||||
_ -> Nothing
|
||||
|
||||
{- git-branch -d cannot be used to remove a branch that is directly
|
||||
- pointing to a corrupt commit. However, it's tried first. -}
|
||||
nukeBranchRef :: Branch -> Repo -> IO ()
|
||||
nukeBranchRef b r = void $ usegit <||> byhand
|
||||
where
|
||||
usegit = runBool
|
||||
[ Param "branch"
|
||||
, Params "-r -d"
|
||||
, Param $ show $ Ref.base b
|
||||
] r
|
||||
byhand = do
|
||||
nukeFile $ localGitDir r </> show b
|
||||
whenM (doesFileExist packedrefs) $
|
||||
withTmpFile "packed-refs" $ \tmp h -> do
|
||||
ls <- lines <$> readFile packedrefs
|
||||
hPutStr h $ unlines $
|
||||
filter (not . skiprefline) ls
|
||||
hClose h
|
||||
renameFile tmp packedrefs
|
||||
return True
|
||||
skiprefline l = case parsePacked l of
|
||||
Just packedref
|
||||
| packedref == b -> True
|
||||
_ -> False
|
||||
packedrefs = packedRefsFile r
|
||||
|
||||
{- Finds the most recent commit to a branch that does not need any
|
||||
- of the missing objects. If the input branch is good as-is, returns it.
|
||||
- Otherwise, tries to traverse the commits in the branch to find one
|
||||
- that is ok (might fail, if one of them is corrupt).
|
||||
-}
|
||||
findUncorruptedCommit :: MissingObjects -> GoodCommits -> Branch -> Repo -> IO (Maybe Sha, GoodCommits)
|
||||
findUncorruptedCommit missing goodcommits branch r = do
|
||||
(ok, goodcommits') <- verifyCommit missing goodcommits branch r
|
||||
if ok
|
||||
then return (Just branch, goodcommits')
|
||||
else do
|
||||
(ls, cleanup) <- pipeNullSplit
|
||||
[ Param "log"
|
||||
, Param "--format=%H"
|
||||
, Param (show branch)
|
||||
] r
|
||||
cleanup `after` findfirst goodcommits (catMaybes $ map extractSha ls)
|
||||
where
|
||||
findfirst gcs [] = return (Nothing, gcs)
|
||||
findfirst gcs (c:cs) = do
|
||||
(ok, gcs') <- verifyCommit missing gcs c r
|
||||
if ok
|
||||
then return (Just c, gcs')
|
||||
else findfirst gcs' cs
|
||||
|
||||
{- Looks through the reflog to find an old version of a branch that
|
||||
- does not need any of the missing objects.
|
||||
-}
|
||||
findOldBranch :: MissingObjects -> GoodCommits -> Branch -> Repo -> IO (Maybe Sha, GoodCommits)
|
||||
findOldBranch missing goodcommits branch r = error "TODO"
|
||||
|
||||
{- Verifies tha none of the missing objects in the set are used by
|
||||
- the commit. Also adds to a set of commit shas that have been verified to
|
||||
- be good, which can be passed into subsequent calls to avoid
|
||||
- redundant work when eg, chasing down branches to find the first
|
||||
- uncorrupted commit. -}
|
||||
verifyCommit :: MissingObjects -> GoodCommits -> Sha -> Repo -> IO (Bool, GoodCommits)
|
||||
verifyCommit missing goodcommits commit r
|
||||
| checkGoodCommit commit goodcommits = return (True, goodcommits)
|
||||
| otherwise = do
|
||||
(ls, cleanup) <- pipeNullSplit
|
||||
[ Param "log"
|
||||
, Param "--format=%H %T"
|
||||
, Param (show commit)
|
||||
] r
|
||||
let committrees = map parse ls
|
||||
if any isNothing committrees
|
||||
then do
|
||||
void cleanup
|
||||
return (False, goodcommits)
|
||||
else do
|
||||
let cts = catMaybes committrees
|
||||
ifM (cleanup <&&> check cts)
|
||||
( return (True, addGoodCommits (map fst cts) goodcommits)
|
||||
, return (False, goodcommits)
|
||||
)
|
||||
where
|
||||
parse l = case words l of
|
||||
(commitsha:treesha:[]) -> (,)
|
||||
<$> extractSha commitsha
|
||||
<*> extractSha treesha
|
||||
_ -> Nothing
|
||||
check [] = return False
|
||||
check ((commit, tree):rest)
|
||||
| checkGoodCommit commit goodcommits = return True
|
||||
| otherwise = verifyTree missing tree r <&&> check rest
|
||||
|
||||
{- Verifies that a tree is good, including all trees and blobs
|
||||
- referenced by it. -}
|
||||
verifyTree :: MissingObjects -> Sha -> Repo -> IO Bool
|
||||
verifyTree missing treesha r
|
||||
| S.member treesha missing = return False
|
||||
| otherwise = do
|
||||
(ls, cleanup) <- pipeNullSplit (LsTree.lsTreeParams treesha) r
|
||||
let objshas = map (extractSha . LsTree.sha . LsTree.parseLsTree) ls
|
||||
if any isNothing objshas || any (`S.member` missing) (catMaybes objshas)
|
||||
then do
|
||||
void cleanup
|
||||
return False
|
||||
-- as long as ls-tree succeeded, we're good
|
||||
else cleanup
|
||||
|
||||
newtype GoodCommits = GoodCommits (S.Set Sha)
|
||||
|
||||
emptyGoodCommits :: GoodCommits
|
||||
emptyGoodCommits = GoodCommits S.empty
|
||||
|
||||
checkGoodCommit :: Sha -> GoodCommits -> Bool
|
||||
checkGoodCommit sha (GoodCommits s) = S.member sha s
|
||||
|
||||
addGoodCommits :: [Sha] -> GoodCommits -> GoodCommits
|
||||
addGoodCommits shas (GoodCommits s) = GoodCommits $
|
||||
S.union s (S.fromList shas)
|
||||
|
||||
notice :: String -> IO ()
|
||||
notice = noticeM "RecoverRepository"
|
||||
|
|
|
@ -15,9 +15,19 @@ It does by deleting all corrupt objects, and retreiving all missing
|
|||
objects that it can from the remotes of the repository.
|
||||
|
||||
If that is not sufficient to fully recover the repository, it can also
|
||||
reset branches back to commits before the corruption happened. It will only
|
||||
do this if run with the --force option, since that rewrites history
|
||||
and throws out missing data.
|
||||
reset branches back to commits before the corruption happened, and delete
|
||||
branches that are no longer available due to the lost data. It will only
|
||||
do this if run with the `--force` option, since that rewrites history
|
||||
and throws out missing data. Note that the `--force` option never touches
|
||||
tags, even if they are no longer usable due to missing data.
|
||||
|
||||
After running this command, you will probably want to run `git fsck` to
|
||||
verify it fixed the repository. Note that fsck may still complain about
|
||||
objects referenced by the reflog, if they were unable to be recovered.
|
||||
Use `git fsck --no-reflogs` to skip such objects.
|
||||
|
||||
Since this command unpacks all packs in the repository, you may want to
|
||||
run `git gc` afterwards.
|
||||
|
||||
# AUTHOR
|
||||
|
||||
|
|
|
@ -59,14 +59,14 @@ main = do
|
|||
]
|
||||
if forced
|
||||
then do
|
||||
remotebranches <- Git.RecoverRepository.removeTrackingBranches stillmissing g
|
||||
(remotebranches, goodcommits) <- Git.RecoverRepository.removeTrackingBranches stillmissing Git.RecoverRepository.emptyGoodCommits g
|
||||
unless (null remotebranches) $
|
||||
putStrLn $ unwords
|
||||
[ "removed"
|
||||
, show (length remotebranches)
|
||||
, "remote tracking branches that referred to missing objects"
|
||||
]
|
||||
localbranches <- Git.RecoverRepository.resetLocalBranches stillmissing g
|
||||
localbranches <- Git.RecoverRepository.resetLocalBranches stillmissing goodcommits g
|
||||
unless (null localbranches) $ do
|
||||
putStrLn "Reset these local branches to old versions before the missing objects were committed:"
|
||||
putStr $ unlines $ map show localbranches
|
||||
|
|
Loading…
Reference in a new issue