git-annex/Git/RecoverRepository.hs
Joey Hess fcd91be6f0 implemented removal of corrupt tracking branches
Oh, git, you made this so hard. Not determining if a branch pointed to some
corrupt object, that was easy, but dealing with corrupt branches using git
plumbing is a PITA.
2013-10-21 15:28:06 -04:00

340 lines
10 KiB
Haskell

{- git repository recovery
-
- Copyright 2013 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Git.RecoverRepository (
cleanCorruptObjects,
retrieveMissingObjects,
resetLocalBranches,
removeTrackingBranches,
emptyGoodCommits,
) where
import Common
import Git
import Git.Command
import Git.Fsck
import Git.Objects
import Git.Sha
import qualified Git.Config
import qualified Git.Construct
import qualified Git.LsTree as LsTree
import qualified Git.Ref as Ref
import Utility.Tmp
import Utility.Rsync
import qualified Data.Set as S
import qualified Data.ByteString.Lazy as L
import System.Log.Logger
{- Finds and removes corrupt objects from the repository, returning a list
- of all such objects, which need to be found elsewhere to finish
- recovery.
-
- Strategy: Run git fsck, remove objects it identifies as corrupt,
- and repeat until git fsck finds no new objects.
-
- To remove corrupt objects, unpack all packs, and remove the packs
- (to handle corrupt packs), and remove loose object files.
-}
cleanCorruptObjects :: Repo -> IO MissingObjects
cleanCorruptObjects r = do
notice "Running git fsck ..."
check =<< findBroken r
where
check Nothing = do
notice "git fsck found a problem but no specific broken objects. Perhaps a corrupt pack file?"
ifM (explodePacks r)
( retry S.empty
, return S.empty
)
check (Just bad)
| S.null bad = return S.empty
| otherwise = do
notice $ unwords
[ "git fsck found"
, show (S.size bad)
, "broken objects."
]
exploded <- explodePacks r
removed <- removeLoose r bad
if exploded || removed
then retry bad
else return bad
retry oldbad = do
notice "Re-running git fsck to see if it finds more problems."
v <- findBroken r
case v of
Nothing -> error $ unwords
[ "git fsck found a problem, which was not corrected after removing"
, show (S.size oldbad)
, "corrupt objects."
]
Just newbad -> do
removed <- removeLoose r newbad
let s = S.union oldbad newbad
if not removed || s == oldbad
then return s
else retry s
removeLoose :: Repo -> MissingObjects -> IO Bool
removeLoose r s = do
let fs = map (looseObjectFile r) (S.toList s)
count <- length <$> filterM doesFileExist fs
if (count > 0)
then do
notice $ unwords
[ "removing"
, show count
, "corrupt loose objects"
]
mapM_ nukeFile fs
return True
else return False
explodePacks :: Repo -> IO Bool
explodePacks r = do
packs <- listPackFiles r
if null packs
then return False
else do
notice "Unpacking all pack files."
mapM_ go packs
return True
where
go packfile = do
-- May fail, if pack file is corrupt.
void $ tryIO $
pipeWrite [Param "unpack-objects"] r $ \h ->
L.hPut h =<< L.readFile packfile
nukeFile packfile
nukeFile $ packIdxFile packfile
{- Try to retrieve a set of missing objects, from the remotes of a
- repository. Returns any that could not be retreived.
-}
retrieveMissingObjects :: MissingObjects -> Repo -> IO MissingObjects
retrieveMissingObjects missing r
| S.null missing = return missing
| otherwise = withTmpDir "tmprepo" $ \tmpdir -> do
unlessM (boolSystem "git" [Params "init", File tmpdir]) $
error $ "failed to create temp repository in " ++ tmpdir
tmpr <- Git.Config.read =<< Git.Construct.fromAbsPath tmpdir
stillmissing <- pullremotes tmpr (remotes r) fetchrefstags missing
if S.null stillmissing
then return stillmissing
else pullremotes tmpr (remotes r) fetchallrefs stillmissing
where
pullremotes _tmpr [] _ stillmissing = return stillmissing
pullremotes tmpr (rmt:rmts) fetchrefs s
| S.null s = return s
| otherwise = do
notice $ "Trying to recover missing objects from remote " ++ repoDescribe rmt
ifM (fetchsome rmt fetchrefs tmpr)
( do
void $ copyObjects tmpr r
stillmissing <- findMissing (S.toList s) r
pullremotes tmpr rmts fetchrefs stillmissing
, do
notice $ unwords
[ "failed to fetch from remote"
, repoDescribe rmt
, "(will continue without it, but making this remote available may improve recovery)"
]
pullremotes tmpr rmts fetchrefs s
)
fetchsome rmt ps = runBool $
[ Param "fetch"
, Param (repoLocation rmt)
, Params "--force --update-head-ok --quiet"
] ++ ps
-- fetch refs and tags
fetchrefstags = [ Param "+refs/heads/*:refs/heads/*", Param "--tags"]
-- Fetch all available refs (more likely to fail,
-- as the remote may have refs it refuses to send).
fetchallrefs = [ Param "+*:*" ]
{- Copies all objects from the src repository to the dest repository.
- This is done using rsync, so it copies all missing object, and all
- objects they rely on. -}
copyObjects :: Repo -> Repo -> IO Bool
copyObjects srcr destr = rsync
[ Param "-qr"
, File $ addTrailingPathSeparator $ objectsDir srcr
, File $ addTrailingPathSeparator $ objectsDir destr
]
{- To deal with missing objects that cannot be recovered, resets any
- local branches to point to an old commit before the missing
- objects.
-}
resetLocalBranches :: MissingObjects -> GoodCommits -> Repo -> IO [Branch]
resetLocalBranches missing goodcommits r = do
error "TODO"
{- To deal with missing objects that cannot be recovered, removes
- any remote tracking branches that reference them. Returns a list of
- all removed branches.
-}
removeTrackingBranches :: MissingObjects -> GoodCommits -> Repo -> IO ([Branch], GoodCommits)
removeTrackingBranches missing goodcommits r =
go [] goodcommits =<< filter istrackingbranch <$> getAllRefs r
where
istrackingbranch b = "refs/remotes/" `isPrefixOf` show b
go removed gcs [] = return (removed, gcs)
go removed gcs (b:bs) = do
(ok, gcs') <- verifyCommit missing gcs b r
if ok
then go removed gcs' bs
else do
nukeBranchRef b r
go (b:removed) gcs' bs
{- Gets all refs, including ones that are corrupt.
- git show-ref does not output refs to commits that are directly
- corrupted, so it is not used.
-}
getAllRefs :: Repo -> IO [Ref]
getAllRefs r = do
packedrs <- mapMaybe parsePacked . lines
<$> catchDefaultIO "" (readFile $ packedRefsFile r)
loosers <- map toref <$> dirContentsRecursive (localGitDir r </> "refs")
return $ packedrs ++ loosers
where
refdir = localGitDir r </> "refs"
toref = Ref . relPathDirToFile (localGitDir r)
packedRefsFile :: Repo -> FilePath
packedRefsFile r = localGitDir r </> "packed-refs"
parsePacked :: String -> Maybe Ref
parsePacked l = case words l of
(sha:ref:[])
| isJust (extractSha sha) -> Just $ Ref ref
_ -> Nothing
{- git-branch -d cannot be used to remove a branch that is directly
- pointing to a corrupt commit. However, it's tried first. -}
nukeBranchRef :: Branch -> Repo -> IO ()
nukeBranchRef b r = void $ usegit <||> byhand
where
usegit = runBool
[ Param "branch"
, Params "-r -d"
, Param $ show $ Ref.base b
] r
byhand = do
nukeFile $ localGitDir r </> show b
whenM (doesFileExist packedrefs) $
withTmpFile "packed-refs" $ \tmp h -> do
ls <- lines <$> readFile packedrefs
hPutStr h $ unlines $
filter (not . skiprefline) ls
hClose h
renameFile tmp packedrefs
return True
skiprefline l = case parsePacked l of
Just packedref
| packedref == b -> True
_ -> False
packedrefs = packedRefsFile r
{- Finds the most recent commit to a branch that does not need any
- of the missing objects. If the input branch is good as-is, returns it.
- Otherwise, tries to traverse the commits in the branch to find one
- that is ok (might fail, if one of them is corrupt).
-}
findUncorruptedCommit :: MissingObjects -> GoodCommits -> Branch -> Repo -> IO (Maybe Sha, GoodCommits)
findUncorruptedCommit missing goodcommits branch r = do
(ok, goodcommits') <- verifyCommit missing goodcommits branch r
if ok
then return (Just branch, goodcommits')
else do
(ls, cleanup) <- pipeNullSplit
[ Param "log"
, Param "--format=%H"
, Param (show branch)
] r
cleanup `after` findfirst goodcommits (catMaybes $ map extractSha ls)
where
findfirst gcs [] = return (Nothing, gcs)
findfirst gcs (c:cs) = do
(ok, gcs') <- verifyCommit missing gcs c r
if ok
then return (Just c, gcs')
else findfirst gcs' cs
{- Looks through the reflog to find an old version of a branch that
- does not need any of the missing objects.
-}
findOldBranch :: MissingObjects -> GoodCommits -> Branch -> Repo -> IO (Maybe Sha, GoodCommits)
findOldBranch missing goodcommits branch r = error "TODO"
{- Verifies tha none of the missing objects in the set are used by
- the commit. Also adds to a set of commit shas that have been verified to
- be good, which can be passed into subsequent calls to avoid
- redundant work when eg, chasing down branches to find the first
- uncorrupted commit. -}
verifyCommit :: MissingObjects -> GoodCommits -> Sha -> Repo -> IO (Bool, GoodCommits)
verifyCommit missing goodcommits commit r
| checkGoodCommit commit goodcommits = return (True, goodcommits)
| otherwise = do
(ls, cleanup) <- pipeNullSplit
[ Param "log"
, Param "--format=%H %T"
, Param (show commit)
] r
let committrees = map parse ls
if any isNothing committrees
then do
void cleanup
return (False, goodcommits)
else do
let cts = catMaybes committrees
ifM (cleanup <&&> check cts)
( return (True, addGoodCommits (map fst cts) goodcommits)
, return (False, goodcommits)
)
where
parse l = case words l of
(commitsha:treesha:[]) -> (,)
<$> extractSha commitsha
<*> extractSha treesha
_ -> Nothing
check [] = return False
check ((commit, tree):rest)
| checkGoodCommit commit goodcommits = return True
| otherwise = verifyTree missing tree r <&&> check rest
{- Verifies that a tree is good, including all trees and blobs
- referenced by it. -}
verifyTree :: MissingObjects -> Sha -> Repo -> IO Bool
verifyTree missing treesha r
| S.member treesha missing = return False
| otherwise = do
(ls, cleanup) <- pipeNullSplit (LsTree.lsTreeParams treesha) r
let objshas = map (extractSha . LsTree.sha . LsTree.parseLsTree) ls
if any isNothing objshas || any (`S.member` missing) (catMaybes objshas)
then do
void cleanup
return False
-- as long as ls-tree succeeded, we're good
else cleanup
newtype GoodCommits = GoodCommits (S.Set Sha)
emptyGoodCommits :: GoodCommits
emptyGoodCommits = GoodCommits S.empty
checkGoodCommit :: Sha -> GoodCommits -> Bool
checkGoodCommit sha (GoodCommits s) = S.member sha s
addGoodCommits :: [Sha] -> GoodCommits -> GoodCommits
addGoodCommits shas (GoodCommits s) = GoodCommits $
S.union s (S.fromList shas)
notice :: String -> IO ()
notice = noticeM "RecoverRepository"