git-annex/Git/RecoverRepository.hs

341 lines
10 KiB
Haskell
Raw Normal View History

2013-10-20 21:50:51 +00:00
{- git repository recovery
-
- Copyright 2013 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Git.RecoverRepository (
cleanCorruptObjects,
retrieveMissingObjects,
resetLocalBranches,
removeTrackingBranches,
emptyGoodCommits,
2013-10-20 21:50:51 +00:00
) where
import Common
import Git
import Git.Command
import Git.Fsck
import Git.Objects
import Git.Sha
2013-10-20 21:50:51 +00:00
import qualified Git.Config
import qualified Git.Construct
import qualified Git.LsTree as LsTree
import qualified Git.Ref as Ref
2013-10-20 21:50:51 +00:00
import Utility.Tmp
import Utility.Rsync
import qualified Data.Set as S
import qualified Data.ByteString.Lazy as L
import System.Log.Logger
{- Finds and removes corrupt objects from the repository, returning a list
- of all such objects, which need to be found elsewhere to finish
- recovery.
-
- Strategy: Run git fsck, remove objects it identifies as corrupt,
- and repeat until git fsck finds no new objects.
-
- To remove corrupt objects, unpack all packs, and remove the packs
- (to handle corrupt packs), and remove loose object files.
-}
cleanCorruptObjects :: Repo -> IO MissingObjects
2013-10-20 21:50:51 +00:00
cleanCorruptObjects r = do
notice "Running git fsck ..."
check =<< findBroken r
where
check Nothing = do
notice "git fsck found a problem but no specific broken objects. Perhaps a corrupt pack file?"
ifM (explodePacks r)
( retry S.empty
, return S.empty
)
2013-10-20 21:50:51 +00:00
check (Just bad)
| S.null bad = return S.empty
| otherwise = do
notice $ unwords
[ "git fsck found"
, show (S.size bad)
, "broken objects."
2013-10-20 21:50:51 +00:00
]
exploded <- explodePacks r
removed <- removeLoose r bad
if exploded || removed
then retry bad
else return bad
2013-10-20 21:50:51 +00:00
retry oldbad = do
notice "Re-running git fsck to see if it finds more problems."
v <- findBroken r
case v of
Nothing -> error $ unwords
[ "git fsck found a problem, which was not corrected after removing"
, show (S.size oldbad)
, "corrupt objects."
]
Just newbad -> do
removed <- removeLoose r newbad
2013-10-20 21:50:51 +00:00
let s = S.union oldbad newbad
if not removed || s == oldbad
2013-10-20 21:50:51 +00:00
then return s
else retry s
removeLoose :: Repo -> MissingObjects -> IO Bool
2013-10-20 21:50:51 +00:00
removeLoose r s = do
let fs = map (looseObjectFile r) (S.toList s)
count <- length <$> filterM doesFileExist fs
if (count > 0)
then do
notice $ unwords
[ "removing"
, show count
, "corrupt loose objects"
]
mapM_ nukeFile fs
return True
else return False
2013-10-20 21:50:51 +00:00
explodePacks :: Repo -> IO Bool
explodePacks r = do
packs <- listPackFiles r
if null packs
then return False
else do
notice "Unpacking all pack files."
mapM_ go packs
return True
2013-10-20 21:50:51 +00:00
where
go packfile = do
-- May fail, if pack file is corrupt.
void $ tryIO $
pipeWrite [Param "unpack-objects"] r $ \h ->
L.hPut h =<< L.readFile packfile
nukeFile packfile
nukeFile $ packIdxFile packfile
{- Try to retrieve a set of missing objects, from the remotes of a
- repository. Returns any that could not be retreived.
-}
retrieveMissingObjects :: MissingObjects -> Repo -> IO MissingObjects
2013-10-20 21:50:51 +00:00
retrieveMissingObjects missing r
| S.null missing = return missing
| otherwise = withTmpDir "tmprepo" $ \tmpdir -> do
unlessM (boolSystem "git" [Params "init", File tmpdir]) $
error $ "failed to create temp repository in " ++ tmpdir
tmpr <- Git.Config.read =<< Git.Construct.fromAbsPath tmpdir
stillmissing <- pullremotes tmpr (remotes r) fetchrefstags missing
if S.null stillmissing
then return stillmissing
else pullremotes tmpr (remotes r) fetchallrefs stillmissing
where
pullremotes _tmpr [] _ stillmissing = return stillmissing
2013-10-20 21:50:51 +00:00
pullremotes tmpr (rmt:rmts) fetchrefs s
| S.null s = return s
| otherwise = do
notice $ "Trying to recover missing objects from remote " ++ repoDescribe rmt
ifM (fetchsome rmt fetchrefs tmpr)
( do
void $ copyObjects tmpr r
stillmissing <- findMissing (S.toList s) r
pullremotes tmpr rmts fetchrefs stillmissing
, do
notice $ unwords
[ "failed to fetch from remote"
, repoDescribe rmt
, "(will continue without it, but making this remote available may improve recovery)"
]
pullremotes tmpr rmts fetchrefs s
)
fetchsome rmt ps = runBool $
[ Param "fetch"
, Param (repoLocation rmt)
, Params "--force --update-head-ok --quiet"
] ++ ps
-- fetch refs and tags
fetchrefstags = [ Param "+refs/heads/*:refs/heads/*", Param "--tags"]
-- Fetch all available refs (more likely to fail,
-- as the remote may have refs it refuses to send).
fetchallrefs = [ Param "+*:*" ]
{- Copies all objects from the src repository to the dest repository.
- This is done using rsync, so it copies all missing object, and all
- objects they rely on. -}
copyObjects :: Repo -> Repo -> IO Bool
copyObjects srcr destr = rsync
[ Param "-qr"
, File $ addTrailingPathSeparator $ objectsDir srcr
, File $ addTrailingPathSeparator $ objectsDir destr
]
{- To deal with missing objects that cannot be recovered, resets any
- local branches to point to an old commit before the missing
- objects.
-}
resetLocalBranches :: MissingObjects -> GoodCommits -> Repo -> IO [Branch]
resetLocalBranches missing goodcommits r = do
2013-10-20 21:50:51 +00:00
error "TODO"
{- To deal with missing objects that cannot be recovered, removes
- any remote tracking branches that reference them. Returns a list of
- all removed branches.
2013-10-20 21:50:51 +00:00
-}
removeTrackingBranches :: MissingObjects -> GoodCommits -> Repo -> IO ([Branch], GoodCommits)
removeTrackingBranches missing goodcommits r =
go [] goodcommits =<< filter istrackingbranch <$> getAllRefs r
where
istrackingbranch b = "refs/remotes/" `isPrefixOf` show b
go removed gcs [] = return (removed, gcs)
go removed gcs (b:bs) = do
(ok, gcs') <- verifyCommit missing gcs b r
if ok
then go removed gcs' bs
else do
nukeBranchRef b r
go (b:removed) gcs' bs
{- Gets all refs, including ones that are corrupt.
- git show-ref does not output refs to commits that are directly
- corrupted, so it is not used.
-}
getAllRefs :: Repo -> IO [Ref]
getAllRefs r = do
packedrs <- mapMaybe parsePacked . lines
<$> catchDefaultIO "" (readFile $ packedRefsFile r)
loosers <- map toref <$> dirContentsRecursive (localGitDir r </> "refs")
return $ packedrs ++ loosers
where
refdir = localGitDir r </> "refs"
toref = Ref . relPathDirToFile (localGitDir r)
packedRefsFile :: Repo -> FilePath
packedRefsFile r = localGitDir r </> "packed-refs"
parsePacked :: String -> Maybe Ref
parsePacked l = case words l of
(sha:ref:[])
| isJust (extractSha sha) -> Just $ Ref ref
_ -> Nothing
{- git-branch -d cannot be used to remove a branch that is directly
- pointing to a corrupt commit. However, it's tried first. -}
nukeBranchRef :: Branch -> Repo -> IO ()
nukeBranchRef b r = void $ usegit <||> byhand
where
usegit = runBool
[ Param "branch"
, Params "-r -d"
, Param $ show $ Ref.base b
] r
byhand = do
nukeFile $ localGitDir r </> show b
whenM (doesFileExist packedrefs) $
withTmpFile "packed-refs" $ \tmp h -> do
ls <- lines <$> readFile packedrefs
hPutStr h $ unlines $
filter (not . skiprefline) ls
hClose h
renameFile tmp packedrefs
return True
skiprefline l = case parsePacked l of
Just packedref
| packedref == b -> True
_ -> False
packedrefs = packedRefsFile r
{- Finds the most recent commit to a branch that does not need any
- of the missing objects. If the input branch is good as-is, returns it.
- Otherwise, tries to traverse the commits in the branch to find one
- that is ok (might fail, if one of them is corrupt).
-}
findUncorruptedCommit :: MissingObjects -> GoodCommits -> Branch -> Repo -> IO (Maybe Sha, GoodCommits)
findUncorruptedCommit missing goodcommits branch r = do
(ok, goodcommits') <- verifyCommit missing goodcommits branch r
if ok
then return (Just branch, goodcommits')
else do
(ls, cleanup) <- pipeNullSplit
[ Param "log"
, Param "--format=%H"
, Param (show branch)
] r
cleanup `after` findfirst goodcommits (catMaybes $ map extractSha ls)
where
findfirst gcs [] = return (Nothing, gcs)
findfirst gcs (c:cs) = do
(ok, gcs') <- verifyCommit missing gcs c r
if ok
then return (Just c, gcs')
else findfirst gcs' cs
{- Looks through the reflog to find an old version of a branch that
- does not need any of the missing objects.
-}
findOldBranch :: MissingObjects -> GoodCommits -> Branch -> Repo -> IO (Maybe Sha, GoodCommits)
findOldBranch missing goodcommits branch r = error "TODO"
{- Verifies tha none of the missing objects in the set are used by
- the commit. Also adds to a set of commit shas that have been verified to
- be good, which can be passed into subsequent calls to avoid
- redundant work when eg, chasing down branches to find the first
- uncorrupted commit. -}
verifyCommit :: MissingObjects -> GoodCommits -> Sha -> Repo -> IO (Bool, GoodCommits)
verifyCommit missing goodcommits commit r
| checkGoodCommit commit goodcommits = return (True, goodcommits)
| otherwise = do
(ls, cleanup) <- pipeNullSplit
[ Param "log"
, Param "--format=%H %T"
, Param (show commit)
] r
let committrees = map parse ls
if any isNothing committrees
then do
void cleanup
return (False, goodcommits)
else do
let cts = catMaybes committrees
ifM (cleanup <&&> check cts)
( return (True, addGoodCommits (map fst cts) goodcommits)
, return (False, goodcommits)
)
where
parse l = case words l of
(commitsha:treesha:[]) -> (,)
<$> extractSha commitsha
<*> extractSha treesha
_ -> Nothing
check [] = return False
check ((commit, tree):rest)
| checkGoodCommit commit goodcommits = return True
| otherwise = verifyTree missing tree r <&&> check rest
{- Verifies that a tree is good, including all trees and blobs
- referenced by it. -}
verifyTree :: MissingObjects -> Sha -> Repo -> IO Bool
verifyTree missing treesha r
| S.member treesha missing = return False
| otherwise = do
(ls, cleanup) <- pipeNullSplit (LsTree.lsTreeParams treesha) r
let objshas = map (extractSha . LsTree.sha . LsTree.parseLsTree) ls
if any isNothing objshas || any (`S.member` missing) (catMaybes objshas)
then do
void cleanup
return False
-- as long as ls-tree succeeded, we're good
else cleanup
newtype GoodCommits = GoodCommits (S.Set Sha)
emptyGoodCommits :: GoodCommits
emptyGoodCommits = GoodCommits S.empty
checkGoodCommit :: Sha -> GoodCommits -> Bool
checkGoodCommit sha (GoodCommits s) = S.member sha s
addGoodCommits :: [Sha] -> GoodCommits -> GoodCommits
addGoodCommits shas (GoodCommits s) = GoodCommits $
S.union s (S.fromList shas)
2013-10-20 21:50:51 +00:00
notice :: String -> IO ()
notice = noticeM "RecoverRepository"