495 lines
16 KiB
Haskell
495 lines
16 KiB
Haskell
{- git repository recovery
|
|
-
|
|
- Copyright 2013 Joey Hess <joey@kitenet.net>
|
|
-
|
|
- Licensed under the GNU GPL version 3 or higher.
|
|
-}
|
|
|
|
module Git.Repair (
|
|
runRepair,
|
|
runRepairOf,
|
|
cleanCorruptObjects,
|
|
retrieveMissingObjects,
|
|
resetLocalBranches,
|
|
removeTrackingBranches,
|
|
rewriteIndex,
|
|
checkIndex,
|
|
emptyGoodCommits,
|
|
) where
|
|
|
|
import Common
|
|
import Git
|
|
import Git.Command
|
|
import Git.Objects
|
|
import Git.Sha
|
|
import Git.Types
|
|
import Git.Fsck
|
|
import qualified Git.Config as Config
|
|
import qualified Git.Construct as Construct
|
|
import qualified Git.LsTree as LsTree
|
|
import qualified Git.LsFiles as LsFiles
|
|
import qualified Git.Ref as Ref
|
|
import qualified Git.RefLog as RefLog
|
|
import qualified Git.UpdateIndex as UpdateIndex
|
|
import qualified Git.Branch as Branch
|
|
import Utility.Tmp
|
|
import Utility.Rsync
|
|
|
|
import qualified Data.Set as S
|
|
import qualified Data.ByteString.Lazy as L
|
|
import Data.Tuple.Utils
|
|
|
|
{- Given a set of bad objects found by git fsck, removes all
|
|
- corrupt objects, and returns a list of missing objects,
|
|
- which need to be found elsewhere to finish recovery.
|
|
-
|
|
- Since git fsck may crash on corrupt objects, and so not
|
|
- report the full set of corrupt or missing objects,
|
|
- this removes corrupt objects, and re-runs fsck, until it
|
|
- stabalizes.
|
|
-
|
|
- To remove corrupt objects, unpack all packs, and remove the packs
|
|
- (to handle corrupt packs), and remove loose object files.
|
|
-}
|
|
cleanCorruptObjects :: FsckResults -> Repo -> IO MissingObjects
|
|
cleanCorruptObjects mmissing r = check mmissing
|
|
where
|
|
check Nothing = do
|
|
putStrLn "git fsck found a problem but no specific broken objects. Perhaps a corrupt pack file?"
|
|
ifM (explodePacks r)
|
|
( retry S.empty
|
|
, return S.empty
|
|
)
|
|
check (Just bad)
|
|
| S.null bad = return S.empty
|
|
| otherwise = do
|
|
putStrLn $ unwords
|
|
[ "git fsck found"
|
|
, show (S.size bad)
|
|
, "broken objects."
|
|
]
|
|
exploded <- explodePacks r
|
|
removed <- removeLoose r bad
|
|
if exploded || removed
|
|
then retry bad
|
|
else return bad
|
|
retry oldbad = do
|
|
putStrLn "Re-running git fsck to see if it finds more problems."
|
|
v <- findBroken False r
|
|
case v of
|
|
Nothing -> error $ unwords
|
|
[ "git fsck found a problem, which was not corrected after removing"
|
|
, show (S.size oldbad)
|
|
, "corrupt objects."
|
|
]
|
|
Just newbad -> do
|
|
removed <- removeLoose r newbad
|
|
let s = S.union oldbad newbad
|
|
if not removed || s == oldbad
|
|
then return s
|
|
else retry s
|
|
|
|
removeLoose :: Repo -> MissingObjects -> IO Bool
|
|
removeLoose r s = do
|
|
let fs = map (looseObjectFile r) (S.toList s)
|
|
count <- length <$> filterM doesFileExist fs
|
|
if (count > 0)
|
|
then do
|
|
putStrLn $ unwords
|
|
[ "removing"
|
|
, show count
|
|
, "corrupt loose objects"
|
|
]
|
|
mapM_ nukeFile fs
|
|
return True
|
|
else return False
|
|
|
|
explodePacks :: Repo -> IO Bool
|
|
explodePacks r = do
|
|
packs <- listPackFiles r
|
|
if null packs
|
|
then return False
|
|
else do
|
|
putStrLn "Unpacking all pack files."
|
|
mapM_ go packs
|
|
return True
|
|
where
|
|
go packfile = do
|
|
-- May fail, if pack file is corrupt.
|
|
void $ tryIO $
|
|
pipeWrite [Param "unpack-objects"] r $ \h ->
|
|
L.hPut h =<< L.readFile packfile
|
|
nukeFile packfile
|
|
nukeFile $ packIdxFile packfile
|
|
|
|
{- Try to retrieve a set of missing objects, from the remotes of a
|
|
- repository. Returns any that could not be retreived.
|
|
-}
|
|
retrieveMissingObjects :: MissingObjects -> Repo -> IO MissingObjects
|
|
retrieveMissingObjects missing r
|
|
| S.null missing = return missing
|
|
| otherwise = withTmpDir "tmprepo" $ \tmpdir -> do
|
|
unlessM (boolSystem "git" [Params "init", File tmpdir]) $
|
|
error $ "failed to create temp repository in " ++ tmpdir
|
|
tmpr <- Config.read =<< Construct.fromAbsPath tmpdir
|
|
stillmissing <- pullremotes tmpr (remotes r) fetchrefstags missing
|
|
if S.null stillmissing
|
|
then return stillmissing
|
|
else pullremotes tmpr (remotes r) fetchallrefs stillmissing
|
|
where
|
|
pullremotes _tmpr [] _ stillmissing = return stillmissing
|
|
pullremotes tmpr (rmt:rmts) fetchrefs s
|
|
| S.null s = return s
|
|
| otherwise = do
|
|
putStrLn $ "Trying to recover missing objects from remote " ++ repoDescribe rmt
|
|
ifM (fetchsome rmt fetchrefs tmpr)
|
|
( do
|
|
void $ copyObjects tmpr r
|
|
stillmissing <- findMissing (S.toList s) r
|
|
pullremotes tmpr rmts fetchrefs stillmissing
|
|
, do
|
|
putStrLn $ unwords
|
|
[ "failed to fetch from remote"
|
|
, repoDescribe rmt
|
|
, "(will continue without it, but making this remote available may improve recovery)"
|
|
]
|
|
pullremotes tmpr rmts fetchrefs s
|
|
)
|
|
fetchsome rmt ps = runBool $
|
|
[ Param "fetch"
|
|
, Param (repoLocation rmt)
|
|
, Params "--force --update-head-ok --quiet"
|
|
] ++ ps
|
|
-- fetch refs and tags
|
|
fetchrefstags = [ Param "+refs/heads/*:refs/heads/*", Param "--tags"]
|
|
-- Fetch all available refs (more likely to fail,
|
|
-- as the remote may have refs it refuses to send).
|
|
fetchallrefs = [ Param "+*:*" ]
|
|
|
|
{- Copies all objects from the src repository to the dest repository.
|
|
- This is done using rsync, so it copies all missing object, and all
|
|
- objects they rely on. -}
|
|
copyObjects :: Repo -> Repo -> IO Bool
|
|
copyObjects srcr destr = rsync
|
|
[ Param "-qr"
|
|
, File $ addTrailingPathSeparator $ objectsDir srcr
|
|
, File $ addTrailingPathSeparator $ objectsDir destr
|
|
]
|
|
|
|
{- To deal with missing objects that cannot be recovered, resets any
|
|
- local branches to point to an old commit before the missing
|
|
- objects. Returns all branches that were changed, and deleted.
|
|
-}
|
|
resetLocalBranches :: MissingObjects -> GoodCommits -> Repo -> IO ([Branch], [Branch], GoodCommits)
|
|
resetLocalBranches missing goodcommits r =
|
|
go [] [] goodcommits =<< filter islocalbranch <$> getAllRefs r
|
|
where
|
|
islocalbranch b = "refs/heads/" `isPrefixOf` show b
|
|
go changed deleted gcs [] = return (changed, deleted, gcs)
|
|
go changed deleted gcs (b:bs) = do
|
|
(mc, gcs') <- findUncorruptedCommit missing gcs b r
|
|
case mc of
|
|
Just c
|
|
| c == b -> go changed deleted gcs' bs
|
|
| otherwise -> do
|
|
reset b c
|
|
go (b:changed) deleted gcs' bs
|
|
Nothing -> do
|
|
nukeBranchRef b r
|
|
go changed (b:deleted) gcs' bs
|
|
reset b c = do
|
|
nukeBranchRef b r
|
|
void $ runBool
|
|
[ Param "branch"
|
|
, Param (show $ Ref.base b)
|
|
, Param (show c)
|
|
] r
|
|
|
|
{- To deal with missing objects that cannot be recovered, removes
|
|
- any remote tracking branches that reference them. Returns a list of
|
|
- all removed branches.
|
|
-}
|
|
removeTrackingBranches :: MissingObjects -> GoodCommits -> Repo -> IO ([Branch], GoodCommits)
|
|
removeTrackingBranches missing goodcommits r =
|
|
go [] goodcommits =<< filter istrackingbranch <$> getAllRefs r
|
|
where
|
|
istrackingbranch b = "refs/remotes/" `isPrefixOf` show b
|
|
go removed gcs [] = return (removed, gcs)
|
|
go removed gcs (b:bs) = do
|
|
(ok, gcs') <- verifyCommit missing gcs b r
|
|
if ok
|
|
then go removed gcs' bs
|
|
else do
|
|
nukeBranchRef b r
|
|
go (b:removed) gcs' bs
|
|
|
|
{- Gets all refs, including ones that are corrupt.
|
|
- git show-ref does not output refs to commits that are directly
|
|
- corrupted, so it is not used.
|
|
-}
|
|
getAllRefs :: Repo -> IO [Ref]
|
|
getAllRefs r = do
|
|
packedrs <- mapMaybe parsePacked . lines
|
|
<$> catchDefaultIO "" (readFile $ packedRefsFile r)
|
|
loosers <- map toref <$> dirContentsRecursive refdir
|
|
return $ packedrs ++ loosers
|
|
where
|
|
refdir = localGitDir r </> "refs"
|
|
toref = Ref . relPathDirToFile (localGitDir r)
|
|
|
|
packedRefsFile :: Repo -> FilePath
|
|
packedRefsFile r = localGitDir r </> "packed-refs"
|
|
|
|
parsePacked :: String -> Maybe Ref
|
|
parsePacked l = case words l of
|
|
(sha:ref:[])
|
|
| isJust (extractSha sha) -> Just $ Ref ref
|
|
_ -> Nothing
|
|
|
|
{- git-branch -d cannot be used to remove a branch that is directly
|
|
- pointing to a corrupt commit. However, it's tried first. -}
|
|
nukeBranchRef :: Branch -> Repo -> IO ()
|
|
nukeBranchRef b r = void $ usegit <||> byhand
|
|
where
|
|
usegit = runBool
|
|
[ Param "branch"
|
|
, Params "-r -d"
|
|
, Param $ show $ Ref.base b
|
|
] r
|
|
byhand = do
|
|
nukeFile $ localGitDir r </> show b
|
|
whenM (doesFileExist packedrefs) $
|
|
withTmpFile "packed-refs" $ \tmp h -> do
|
|
ls <- lines <$> readFile packedrefs
|
|
hPutStr h $ unlines $
|
|
filter (not . skiprefline) ls
|
|
hClose h
|
|
renameFile tmp packedrefs
|
|
return True
|
|
skiprefline l = case parsePacked l of
|
|
Just packedref
|
|
| packedref == b -> True
|
|
_ -> False
|
|
packedrefs = packedRefsFile r
|
|
|
|
{- Finds the most recent commit to a branch that does not need any
|
|
- of the missing objects. If the input branch is good as-is, returns it.
|
|
- Otherwise, tries to traverse the commits in the branch to find one
|
|
- that is ok. That might fail, if one of them is corrupt, or if an object
|
|
- at the root of the branch is missing. Finally, looks for an old version
|
|
- of the branch from the reflog.
|
|
-}
|
|
findUncorruptedCommit :: MissingObjects -> GoodCommits -> Branch -> Repo -> IO (Maybe Sha, GoodCommits)
|
|
findUncorruptedCommit missing goodcommits branch r = do
|
|
(ok, goodcommits') <- verifyCommit missing goodcommits branch r
|
|
if ok
|
|
then return (Just branch, goodcommits')
|
|
else do
|
|
(ls, cleanup) <- pipeNullSplit
|
|
[ Param "log"
|
|
, Param "-z"
|
|
, Param "--format=%H"
|
|
, Param (show branch)
|
|
] r
|
|
let branchshas = catMaybes $ map extractSha ls
|
|
reflogshas <- RefLog.get branch r
|
|
-- XXX Could try a bit harder here, and look
|
|
-- for uncorrupted old commits in branches in the
|
|
-- reflog.
|
|
cleanup `after` findfirst goodcommits (branchshas ++ reflogshas)
|
|
where
|
|
findfirst gcs [] = return (Nothing, gcs)
|
|
findfirst gcs (c:cs) = do
|
|
(ok, gcs') <- verifyCommit missing gcs c r
|
|
if ok
|
|
then return (Just c, gcs')
|
|
else findfirst gcs' cs
|
|
|
|
{- Verifies tha none of the missing objects in the set are used by
|
|
- the commit. Also adds to a set of commit shas that have been verified to
|
|
- be good, which can be passed into subsequent calls to avoid
|
|
- redundant work when eg, chasing down branches to find the first
|
|
- uncorrupted commit. -}
|
|
verifyCommit :: MissingObjects -> GoodCommits -> Sha -> Repo -> IO (Bool, GoodCommits)
|
|
verifyCommit missing goodcommits commit r
|
|
| checkGoodCommit commit goodcommits = return (True, goodcommits)
|
|
| otherwise = do
|
|
(ls, cleanup) <- pipeNullSplit
|
|
[ Param "log"
|
|
, Param "-z"
|
|
, Param "--format=%H %T"
|
|
, Param (show commit)
|
|
] r
|
|
let committrees = map parse ls
|
|
if any isNothing committrees || null committrees
|
|
then do
|
|
void cleanup
|
|
return (False, goodcommits)
|
|
else do
|
|
let cts = catMaybes committrees
|
|
ifM (cleanup <&&> check cts)
|
|
( return (True, addGoodCommits (map fst cts) goodcommits)
|
|
, return (False, goodcommits)
|
|
)
|
|
where
|
|
parse l = case words l of
|
|
(commitsha:treesha:[]) -> (,)
|
|
<$> extractSha commitsha
|
|
<*> extractSha treesha
|
|
_ -> Nothing
|
|
check [] = return True
|
|
check ((c, t):rest)
|
|
| checkGoodCommit c goodcommits = return True
|
|
| otherwise = verifyTree missing t r <&&> check rest
|
|
|
|
{- Verifies that a tree is good, including all trees and blobs
|
|
- referenced by it. -}
|
|
verifyTree :: MissingObjects -> Sha -> Repo -> IO Bool
|
|
verifyTree missing treesha r
|
|
| S.member treesha missing = return False
|
|
| otherwise = do
|
|
(ls, cleanup) <- pipeNullSplit (LsTree.lsTreeParams treesha) r
|
|
let objshas = map (extractSha . LsTree.sha . LsTree.parseLsTree) ls
|
|
if any isNothing objshas || any (`S.member` missing) (catMaybes objshas)
|
|
then do
|
|
void cleanup
|
|
return False
|
|
-- as long as ls-tree succeeded, we're good
|
|
else cleanup
|
|
|
|
{- Checks that the index file only refers to objects that are not missing. -}
|
|
checkIndex :: MissingObjects -> Repo -> IO Bool
|
|
checkIndex missing r = do
|
|
(bad, _good, cleanup) <- partitionIndex missing r
|
|
if null bad
|
|
then cleanup
|
|
else do
|
|
void cleanup
|
|
return False
|
|
|
|
partitionIndex :: MissingObjects -> Repo -> IO ([LsFiles.StagedDetails], [LsFiles.StagedDetails], IO Bool)
|
|
partitionIndex missing r = do
|
|
(indexcontents, cleanup) <- LsFiles.stagedDetails [repoPath r] r
|
|
let (bad, good) = partition ismissing indexcontents
|
|
return (bad, good, cleanup)
|
|
where
|
|
getblob (_file, Just sha, Just _mode) = Just sha
|
|
getblob _ = Nothing
|
|
ismissing = maybe False (`S.member` missing) . getblob
|
|
|
|
{- Rewrites the index file, removing from it any files whose blobs are
|
|
- missing. Returns the list of affected files. -}
|
|
rewriteIndex :: MissingObjects -> Repo -> IO [FilePath]
|
|
rewriteIndex missing r
|
|
| repoIsLocalBare r = return []
|
|
| otherwise = do
|
|
(bad, good, cleanup) <- partitionIndex missing r
|
|
unless (null bad) $ do
|
|
nukeFile (localGitDir r </> "index")
|
|
UpdateIndex.streamUpdateIndex r
|
|
=<< (catMaybes <$> mapM reinject good)
|
|
void cleanup
|
|
return $ map fst3 bad
|
|
where
|
|
reinject (file, Just sha, Just mode) = case toBlobType mode of
|
|
Nothing -> return Nothing
|
|
Just blobtype -> Just <$>
|
|
UpdateIndex.stageFile sha blobtype file r
|
|
reinject _ = return Nothing
|
|
|
|
newtype GoodCommits = GoodCommits (S.Set Sha)
|
|
|
|
emptyGoodCommits :: GoodCommits
|
|
emptyGoodCommits = GoodCommits S.empty
|
|
|
|
checkGoodCommit :: Sha -> GoodCommits -> Bool
|
|
checkGoodCommit sha (GoodCommits s) = S.member sha s
|
|
|
|
addGoodCommits :: [Sha] -> GoodCommits -> GoodCommits
|
|
addGoodCommits shas (GoodCommits s) = GoodCommits $
|
|
S.union s (S.fromList shas)
|
|
|
|
displayList :: [String] -> String -> IO ()
|
|
displayList items header
|
|
| null items = return ()
|
|
| otherwise = do
|
|
putStrLn header
|
|
putStr $ unlines $ map (\i -> "\t" ++ i) truncateditems
|
|
where
|
|
numitems = length items
|
|
truncateditems
|
|
| numitems > 10 = take 10 items ++ ["(and " ++ show (numitems - 10) ++ " more)"]
|
|
| otherwise = items
|
|
|
|
{- Put it all together. -}
|
|
runRepair :: Bool -> Repo -> IO (Bool, MissingObjects, [Branch])
|
|
runRepair forced g = do
|
|
putStrLn "Running git fsck ..."
|
|
fsckresult <- findBroken False g
|
|
if foundBroken fsckresult
|
|
then runRepairOf fsckresult forced g
|
|
else do
|
|
putStrLn "No problems found."
|
|
return (True, S.empty, [])
|
|
runRepairOf :: FsckResults -> Bool -> Repo -> IO (Bool, MissingObjects, [Branch])
|
|
runRepairOf fsckresult forced g = do
|
|
missing <- cleanCorruptObjects fsckresult g
|
|
stillmissing <- retrieveMissingObjects missing g
|
|
if S.null stillmissing
|
|
then successfulfinish stillmissing []
|
|
else do
|
|
putStrLn $ unwords
|
|
[ show (S.size stillmissing)
|
|
, "missing objects could not be recovered!"
|
|
]
|
|
if forced
|
|
then continuerepairs stillmissing
|
|
else unsuccessfulfinish stillmissing
|
|
where
|
|
continuerepairs stillmissing = do
|
|
(remotebranches, goodcommits) <- removeTrackingBranches stillmissing emptyGoodCommits g
|
|
unless (null remotebranches) $
|
|
putStrLn $ unwords
|
|
[ "removed"
|
|
, show (length remotebranches)
|
|
, "remote tracking branches that referred to missing objects"
|
|
]
|
|
(resetbranches, deletedbranches, _) <- resetLocalBranches stillmissing goodcommits g
|
|
displayList (map show resetbranches)
|
|
"Reset these local branches to old versions before the missing objects were committed:"
|
|
displayList (map show deletedbranches)
|
|
"Deleted these local branches, which could not be recovered due to missing objects:"
|
|
deindexedfiles <- rewriteIndex stillmissing g
|
|
displayList deindexedfiles
|
|
"Removed these missing files from the index. You should look at what files are present in your working tree and git add them back to the index when appropriate."
|
|
let modifiedbranches = resetbranches ++ deletedbranches
|
|
if null resetbranches && null deletedbranches
|
|
then successfulfinish stillmissing modifiedbranches
|
|
else do
|
|
unless (repoIsLocalBare g) $ do
|
|
mcurr <- Branch.currentUnsafe g
|
|
case mcurr of
|
|
Nothing -> return ()
|
|
Just curr -> when (any (== curr) modifiedbranches) $ do
|
|
putStrLn $ unwords
|
|
[ "You currently have"
|
|
, show curr
|
|
, "checked out. You may have staged changes in the index that can be committed to recover the lost state of this branch!"
|
|
]
|
|
putStrLn "Successfully recovered repository!"
|
|
putStrLn "Please carefully check that the changes mentioned above are ok.."
|
|
return (True, stillmissing, modifiedbranches)
|
|
successfulfinish stillmissing modifiedbranches = do
|
|
mapM_ putStrLn
|
|
[ "Successfully recovered repository!"
|
|
, "You should run \"git fsck\" to make sure, but it looks like"
|
|
, "everything was recovered ok."
|
|
]
|
|
return (True, stillmissing, modifiedbranches)
|
|
unsuccessfulfinish stillmissing = do
|
|
if repoIsLocalBare g
|
|
then do
|
|
putStrLn "If you have a clone of this bare repository, you should add it as a remote of this repository, and re-run git-recover-repository."
|
|
putStrLn "If there are no clones of this repository, you can instead run git-recover-repository with the --force parameter to force recovery to a possibly usable state."
|
|
else putStrLn "To force a recovery to a usable state, run this command again with the --force parameter."
|
|
return (False, stillmissing, [])
|