implemented removal of corrupt tracking branches
Oh, git, you made this so hard. Not determining if a branch pointed to some corrupt object, that was easy, but dealing with corrupt branches using git plumbing is a PITA.
This commit is contained in:
parent
6d8250c255
commit
fcd91be6f0
5 changed files with 195 additions and 24 deletions
|
@ -7,7 +7,8 @@
|
||||||
|
|
||||||
module Git.Fsck (
|
module Git.Fsck (
|
||||||
findBroken,
|
findBroken,
|
||||||
findMissing
|
findMissing,
|
||||||
|
MissingObjects
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Common
|
import Common
|
||||||
|
@ -18,6 +19,8 @@ import Git.CatFile
|
||||||
|
|
||||||
import qualified Data.Set as S
|
import qualified Data.Set as S
|
||||||
|
|
||||||
|
type MissingObjects = S.Set Sha
|
||||||
|
|
||||||
{- Runs fsck to find some of the broken objects in the repository.
|
{- Runs fsck to find some of the broken objects in the repository.
|
||||||
- May not find all broken objects, if fsck fails on bad data in some of
|
- May not find all broken objects, if fsck fails on bad data in some of
|
||||||
- the broken objects it does find. If the fsck fails generally without
|
- the broken objects it does find. If the fsck fails generally without
|
||||||
|
@ -28,7 +31,7 @@ import qualified Data.Set as S
|
||||||
- to be a git sha. Not all such shas are of broken objects, so ask git
|
- to be a git sha. Not all such shas are of broken objects, so ask git
|
||||||
- to try to cat the object, and see if it fails.
|
- to try to cat the object, and see if it fails.
|
||||||
-}
|
-}
|
||||||
findBroken :: Repo -> IO (Maybe (S.Set Sha))
|
findBroken :: Repo -> IO (Maybe MissingObjects)
|
||||||
findBroken r = do
|
findBroken r = do
|
||||||
(output, fsckok) <- processTranscript "git" (toCommand $ fsckParams r) Nothing
|
(output, fsckok) <- processTranscript "git" (toCommand $ fsckParams r) Nothing
|
||||||
let objs = parseFsckOutput output
|
let objs = parseFsckOutput output
|
||||||
|
@ -39,7 +42,7 @@ findBroken r = do
|
||||||
|
|
||||||
{- Finds objects that are missing from the git repsitory, or are corrupt.
|
{- Finds objects that are missing from the git repsitory, or are corrupt.
|
||||||
- Note that catting a corrupt object will cause cat-file to crash. -}
|
- Note that catting a corrupt object will cause cat-file to crash. -}
|
||||||
findMissing :: [Sha] -> Repo -> IO (S.Set Sha)
|
findMissing :: [Sha] -> Repo -> IO MissingObjects
|
||||||
findMissing objs r = go objs [] =<< start
|
findMissing objs r = go objs [] =<< start
|
||||||
where
|
where
|
||||||
start = catFileStart' False r
|
start = catFileStart' False r
|
||||||
|
|
|
@ -8,6 +8,7 @@
|
||||||
module Git.LsTree (
|
module Git.LsTree (
|
||||||
TreeItem(..),
|
TreeItem(..),
|
||||||
lsTree,
|
lsTree,
|
||||||
|
lsTreeParams,
|
||||||
lsTreeFiles,
|
lsTreeFiles,
|
||||||
parseLsTree
|
parseLsTree
|
||||||
) where
|
) where
|
||||||
|
@ -33,9 +34,11 @@ data TreeItem = TreeItem
|
||||||
{- Lists the complete contents of a tree, recursing into sub-trees,
|
{- Lists the complete contents of a tree, recursing into sub-trees,
|
||||||
- with lazy output. -}
|
- with lazy output. -}
|
||||||
lsTree :: Ref -> Repo -> IO [TreeItem]
|
lsTree :: Ref -> Repo -> IO [TreeItem]
|
||||||
lsTree t repo = map parseLsTree <$> pipeNullSplitZombie ps repo
|
lsTree t repo = map parseLsTree
|
||||||
where
|
<$> pipeNullSplitZombie (lsTreeParams t) repo
|
||||||
ps = [Params "ls-tree --full-tree -z -r --", File $ show t]
|
|
||||||
|
lsTreeParams :: Ref -> [CommandParam]
|
||||||
|
lsTreeParams t = [ Params "ls-tree --full-tree -z -r --", File $ show t ]
|
||||||
|
|
||||||
{- Lists specified files in a tree. -}
|
{- Lists specified files in a tree. -}
|
||||||
lsTreeFiles :: Ref -> [FilePath] -> Repo -> IO [TreeItem]
|
lsTreeFiles :: Ref -> [FilePath] -> Repo -> IO [TreeItem]
|
||||||
|
|
|
@ -10,6 +10,7 @@ module Git.RecoverRepository (
|
||||||
retrieveMissingObjects,
|
retrieveMissingObjects,
|
||||||
resetLocalBranches,
|
resetLocalBranches,
|
||||||
removeTrackingBranches,
|
removeTrackingBranches,
|
||||||
|
emptyGoodCommits,
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Common
|
import Common
|
||||||
|
@ -17,12 +18,12 @@ import Git
|
||||||
import Git.Command
|
import Git.Command
|
||||||
import Git.Fsck
|
import Git.Fsck
|
||||||
import Git.Objects
|
import Git.Objects
|
||||||
import Git.HashObject
|
import Git.Sha
|
||||||
import Git.Types
|
|
||||||
import qualified Git.Config
|
import qualified Git.Config
|
||||||
import qualified Git.Construct
|
import qualified Git.Construct
|
||||||
|
import qualified Git.LsTree as LsTree
|
||||||
|
import qualified Git.Ref as Ref
|
||||||
import Utility.Tmp
|
import Utility.Tmp
|
||||||
import Utility.Monad
|
|
||||||
import Utility.Rsync
|
import Utility.Rsync
|
||||||
|
|
||||||
import qualified Data.Set as S
|
import qualified Data.Set as S
|
||||||
|
@ -39,7 +40,7 @@ import System.Log.Logger
|
||||||
- To remove corrupt objects, unpack all packs, and remove the packs
|
- To remove corrupt objects, unpack all packs, and remove the packs
|
||||||
- (to handle corrupt packs), and remove loose object files.
|
- (to handle corrupt packs), and remove loose object files.
|
||||||
-}
|
-}
|
||||||
cleanCorruptObjects :: Repo -> IO (S.Set Sha)
|
cleanCorruptObjects :: Repo -> IO MissingObjects
|
||||||
cleanCorruptObjects r = do
|
cleanCorruptObjects r = do
|
||||||
notice "Running git fsck ..."
|
notice "Running git fsck ..."
|
||||||
check =<< findBroken r
|
check =<< findBroken r
|
||||||
|
@ -79,7 +80,7 @@ cleanCorruptObjects r = do
|
||||||
then return s
|
then return s
|
||||||
else retry s
|
else retry s
|
||||||
|
|
||||||
removeLoose :: Repo -> S.Set Sha -> IO Bool
|
removeLoose :: Repo -> MissingObjects -> IO Bool
|
||||||
removeLoose r s = do
|
removeLoose r s = do
|
||||||
let fs = map (looseObjectFile r) (S.toList s)
|
let fs = map (looseObjectFile r) (S.toList s)
|
||||||
count <- length <$> filterM doesFileExist fs
|
count <- length <$> filterM doesFileExist fs
|
||||||
|
@ -115,7 +116,7 @@ explodePacks r = do
|
||||||
{- Try to retrieve a set of missing objects, from the remotes of a
|
{- Try to retrieve a set of missing objects, from the remotes of a
|
||||||
- repository. Returns any that could not be retreived.
|
- repository. Returns any that could not be retreived.
|
||||||
-}
|
-}
|
||||||
retrieveMissingObjects :: S.Set Sha -> Repo -> IO (S.Set Sha)
|
retrieveMissingObjects :: MissingObjects -> Repo -> IO MissingObjects
|
||||||
retrieveMissingObjects missing r
|
retrieveMissingObjects missing r
|
||||||
| S.null missing = return missing
|
| S.null missing = return missing
|
||||||
| otherwise = withTmpDir "tmprepo" $ \tmpdir -> do
|
| otherwise = withTmpDir "tmprepo" $ \tmpdir -> do
|
||||||
|
@ -127,7 +128,7 @@ retrieveMissingObjects missing r
|
||||||
then return stillmissing
|
then return stillmissing
|
||||||
else pullremotes tmpr (remotes r) fetchallrefs stillmissing
|
else pullremotes tmpr (remotes r) fetchallrefs stillmissing
|
||||||
where
|
where
|
||||||
pullremotes tmpr [] _ stillmissing = return stillmissing
|
pullremotes _tmpr [] _ stillmissing = return stillmissing
|
||||||
pullremotes tmpr (rmt:rmts) fetchrefs s
|
pullremotes tmpr (rmt:rmts) fetchrefs s
|
||||||
| S.null s = return s
|
| S.null s = return s
|
||||||
| otherwise = do
|
| otherwise = do
|
||||||
|
@ -170,16 +171,170 @@ copyObjects srcr destr = rsync
|
||||||
- local branches to point to an old commit before the missing
|
- local branches to point to an old commit before the missing
|
||||||
- objects.
|
- objects.
|
||||||
-}
|
-}
|
||||||
resetLocalBranches :: S.Set Sha -> Repo -> IO [Branch]
|
resetLocalBranches :: MissingObjects -> GoodCommits -> Repo -> IO [Branch]
|
||||||
resetLocalBranches missing r = do
|
resetLocalBranches missing goodcommits r = do
|
||||||
error "TODO"
|
error "TODO"
|
||||||
|
|
||||||
{- To deal with missing objects that cannot be recovered, removes
|
{- To deal with missing objects that cannot be recovered, removes
|
||||||
- any remote tracking branches that reference them.
|
- any remote tracking branches that reference them. Returns a list of
|
||||||
|
- all removed branches.
|
||||||
-}
|
-}
|
||||||
removeTrackingBranches :: S.Set Sha -> Repo -> IO [Branch]
|
removeTrackingBranches :: MissingObjects -> GoodCommits -> Repo -> IO ([Branch], GoodCommits)
|
||||||
removeTrackingBranches missing r = do
|
removeTrackingBranches missing goodcommits r =
|
||||||
error "TODO"
|
go [] goodcommits =<< filter istrackingbranch <$> getAllRefs r
|
||||||
|
where
|
||||||
|
istrackingbranch b = "refs/remotes/" `isPrefixOf` show b
|
||||||
|
go removed gcs [] = return (removed, gcs)
|
||||||
|
go removed gcs (b:bs) = do
|
||||||
|
(ok, gcs') <- verifyCommit missing gcs b r
|
||||||
|
if ok
|
||||||
|
then go removed gcs' bs
|
||||||
|
else do
|
||||||
|
nukeBranchRef b r
|
||||||
|
go (b:removed) gcs' bs
|
||||||
|
|
||||||
|
{- Gets all refs, including ones that are corrupt.
|
||||||
|
- git show-ref does not output refs to commits that are directly
|
||||||
|
- corrupted, so it is not used.
|
||||||
|
-}
|
||||||
|
getAllRefs :: Repo -> IO [Ref]
|
||||||
|
getAllRefs r = do
|
||||||
|
packedrs <- mapMaybe parsePacked . lines
|
||||||
|
<$> catchDefaultIO "" (readFile $ packedRefsFile r)
|
||||||
|
loosers <- map toref <$> dirContentsRecursive (localGitDir r </> "refs")
|
||||||
|
return $ packedrs ++ loosers
|
||||||
|
where
|
||||||
|
refdir = localGitDir r </> "refs"
|
||||||
|
toref = Ref . relPathDirToFile (localGitDir r)
|
||||||
|
|
||||||
|
packedRefsFile :: Repo -> FilePath
|
||||||
|
packedRefsFile r = localGitDir r </> "packed-refs"
|
||||||
|
|
||||||
|
parsePacked :: String -> Maybe Ref
|
||||||
|
parsePacked l = case words l of
|
||||||
|
(sha:ref:[])
|
||||||
|
| isJust (extractSha sha) -> Just $ Ref ref
|
||||||
|
_ -> Nothing
|
||||||
|
|
||||||
|
{- git-branch -d cannot be used to remove a branch that is directly
|
||||||
|
- pointing to a corrupt commit. However, it's tried first. -}
|
||||||
|
nukeBranchRef :: Branch -> Repo -> IO ()
|
||||||
|
nukeBranchRef b r = void $ usegit <||> byhand
|
||||||
|
where
|
||||||
|
usegit = runBool
|
||||||
|
[ Param "branch"
|
||||||
|
, Params "-r -d"
|
||||||
|
, Param $ show $ Ref.base b
|
||||||
|
] r
|
||||||
|
byhand = do
|
||||||
|
nukeFile $ localGitDir r </> show b
|
||||||
|
whenM (doesFileExist packedrefs) $
|
||||||
|
withTmpFile "packed-refs" $ \tmp h -> do
|
||||||
|
ls <- lines <$> readFile packedrefs
|
||||||
|
hPutStr h $ unlines $
|
||||||
|
filter (not . skiprefline) ls
|
||||||
|
hClose h
|
||||||
|
renameFile tmp packedrefs
|
||||||
|
return True
|
||||||
|
skiprefline l = case parsePacked l of
|
||||||
|
Just packedref
|
||||||
|
| packedref == b -> True
|
||||||
|
_ -> False
|
||||||
|
packedrefs = packedRefsFile r
|
||||||
|
|
||||||
|
{- Finds the most recent commit to a branch that does not need any
|
||||||
|
- of the missing objects. If the input branch is good as-is, returns it.
|
||||||
|
- Otherwise, tries to traverse the commits in the branch to find one
|
||||||
|
- that is ok (might fail, if one of them is corrupt).
|
||||||
|
-}
|
||||||
|
findUncorruptedCommit :: MissingObjects -> GoodCommits -> Branch -> Repo -> IO (Maybe Sha, GoodCommits)
|
||||||
|
findUncorruptedCommit missing goodcommits branch r = do
|
||||||
|
(ok, goodcommits') <- verifyCommit missing goodcommits branch r
|
||||||
|
if ok
|
||||||
|
then return (Just branch, goodcommits')
|
||||||
|
else do
|
||||||
|
(ls, cleanup) <- pipeNullSplit
|
||||||
|
[ Param "log"
|
||||||
|
, Param "--format=%H"
|
||||||
|
, Param (show branch)
|
||||||
|
] r
|
||||||
|
cleanup `after` findfirst goodcommits (catMaybes $ map extractSha ls)
|
||||||
|
where
|
||||||
|
findfirst gcs [] = return (Nothing, gcs)
|
||||||
|
findfirst gcs (c:cs) = do
|
||||||
|
(ok, gcs') <- verifyCommit missing gcs c r
|
||||||
|
if ok
|
||||||
|
then return (Just c, gcs')
|
||||||
|
else findfirst gcs' cs
|
||||||
|
|
||||||
|
{- Looks through the reflog to find an old version of a branch that
|
||||||
|
- does not need any of the missing objects.
|
||||||
|
-}
|
||||||
|
findOldBranch :: MissingObjects -> GoodCommits -> Branch -> Repo -> IO (Maybe Sha, GoodCommits)
|
||||||
|
findOldBranch missing goodcommits branch r = error "TODO"
|
||||||
|
|
||||||
|
{- Verifies tha none of the missing objects in the set are used by
|
||||||
|
- the commit. Also adds to a set of commit shas that have been verified to
|
||||||
|
- be good, which can be passed into subsequent calls to avoid
|
||||||
|
- redundant work when eg, chasing down branches to find the first
|
||||||
|
- uncorrupted commit. -}
|
||||||
|
verifyCommit :: MissingObjects -> GoodCommits -> Sha -> Repo -> IO (Bool, GoodCommits)
|
||||||
|
verifyCommit missing goodcommits commit r
|
||||||
|
| checkGoodCommit commit goodcommits = return (True, goodcommits)
|
||||||
|
| otherwise = do
|
||||||
|
(ls, cleanup) <- pipeNullSplit
|
||||||
|
[ Param "log"
|
||||||
|
, Param "--format=%H %T"
|
||||||
|
, Param (show commit)
|
||||||
|
] r
|
||||||
|
let committrees = map parse ls
|
||||||
|
if any isNothing committrees
|
||||||
|
then do
|
||||||
|
void cleanup
|
||||||
|
return (False, goodcommits)
|
||||||
|
else do
|
||||||
|
let cts = catMaybes committrees
|
||||||
|
ifM (cleanup <&&> check cts)
|
||||||
|
( return (True, addGoodCommits (map fst cts) goodcommits)
|
||||||
|
, return (False, goodcommits)
|
||||||
|
)
|
||||||
|
where
|
||||||
|
parse l = case words l of
|
||||||
|
(commitsha:treesha:[]) -> (,)
|
||||||
|
<$> extractSha commitsha
|
||||||
|
<*> extractSha treesha
|
||||||
|
_ -> Nothing
|
||||||
|
check [] = return False
|
||||||
|
check ((commit, tree):rest)
|
||||||
|
| checkGoodCommit commit goodcommits = return True
|
||||||
|
| otherwise = verifyTree missing tree r <&&> check rest
|
||||||
|
|
||||||
|
{- Verifies that a tree is good, including all trees and blobs
|
||||||
|
- referenced by it. -}
|
||||||
|
verifyTree :: MissingObjects -> Sha -> Repo -> IO Bool
|
||||||
|
verifyTree missing treesha r
|
||||||
|
| S.member treesha missing = return False
|
||||||
|
| otherwise = do
|
||||||
|
(ls, cleanup) <- pipeNullSplit (LsTree.lsTreeParams treesha) r
|
||||||
|
let objshas = map (extractSha . LsTree.sha . LsTree.parseLsTree) ls
|
||||||
|
if any isNothing objshas || any (`S.member` missing) (catMaybes objshas)
|
||||||
|
then do
|
||||||
|
void cleanup
|
||||||
|
return False
|
||||||
|
-- as long as ls-tree succeeded, we're good
|
||||||
|
else cleanup
|
||||||
|
|
||||||
|
newtype GoodCommits = GoodCommits (S.Set Sha)
|
||||||
|
|
||||||
|
emptyGoodCommits :: GoodCommits
|
||||||
|
emptyGoodCommits = GoodCommits S.empty
|
||||||
|
|
||||||
|
checkGoodCommit :: Sha -> GoodCommits -> Bool
|
||||||
|
checkGoodCommit sha (GoodCommits s) = S.member sha s
|
||||||
|
|
||||||
|
addGoodCommits :: [Sha] -> GoodCommits -> GoodCommits
|
||||||
|
addGoodCommits shas (GoodCommits s) = GoodCommits $
|
||||||
|
S.union s (S.fromList shas)
|
||||||
|
|
||||||
notice :: String -> IO ()
|
notice :: String -> IO ()
|
||||||
notice = noticeM "RecoverRepository"
|
notice = noticeM "RecoverRepository"
|
||||||
|
|
|
@ -15,9 +15,19 @@ It does by deleting all corrupt objects, and retreiving all missing
|
||||||
objects that it can from the remotes of the repository.
|
objects that it can from the remotes of the repository.
|
||||||
|
|
||||||
If that is not sufficient to fully recover the repository, it can also
|
If that is not sufficient to fully recover the repository, it can also
|
||||||
reset branches back to commits before the corruption happened. It will only
|
reset branches back to commits before the corruption happened, and delete
|
||||||
do this if run with the --force option, since that rewrites history
|
branches that are no longer available due to the lost data. It will only
|
||||||
and throws out missing data.
|
do this if run with the `--force` option, since that rewrites history
|
||||||
|
and throws out missing data. Note that the `--force` option never touches
|
||||||
|
tags, even if they are no longer usable due to missing data.
|
||||||
|
|
||||||
|
After running this command, you will probably want to run `git fsck` to
|
||||||
|
verify it fixed the repository. Note that fsck may still complain about
|
||||||
|
objects referenced by the reflog, if they were unable to be recovered.
|
||||||
|
Use `git fsck --no-reflogs` to skip such objects.
|
||||||
|
|
||||||
|
Since this command unpacks all packs in the repository, you may want to
|
||||||
|
run `git gc` afterwards.
|
||||||
|
|
||||||
# AUTHOR
|
# AUTHOR
|
||||||
|
|
||||||
|
|
|
@ -59,14 +59,14 @@ main = do
|
||||||
]
|
]
|
||||||
if forced
|
if forced
|
||||||
then do
|
then do
|
||||||
remotebranches <- Git.RecoverRepository.removeTrackingBranches stillmissing g
|
(remotebranches, goodcommits) <- Git.RecoverRepository.removeTrackingBranches stillmissing Git.RecoverRepository.emptyGoodCommits g
|
||||||
unless (null remotebranches) $
|
unless (null remotebranches) $
|
||||||
putStrLn $ unwords
|
putStrLn $ unwords
|
||||||
[ "removed"
|
[ "removed"
|
||||||
, show (length remotebranches)
|
, show (length remotebranches)
|
||||||
, "remote tracking branches that referred to missing objects"
|
, "remote tracking branches that referred to missing objects"
|
||||||
]
|
]
|
||||||
localbranches <- Git.RecoverRepository.resetLocalBranches stillmissing g
|
localbranches <- Git.RecoverRepository.resetLocalBranches stillmissing goodcommits g
|
||||||
unless (null localbranches) $ do
|
unless (null localbranches) $ do
|
||||||
putStrLn "Reset these local branches to old versions before the missing objects were committed:"
|
putStrLn "Reset these local branches to old versions before the missing objects were committed:"
|
||||||
putStr $ unlines $ map show localbranches
|
putStr $ unlines $ map show localbranches
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue