2013-10-20 21:50:51 +00:00
{- git repository recovery
-
2021-06-29 16:57:19 +00:00
- Copyright 2013 - 2021 Joey Hess < id @ joeyh . name >
2013-10-20 21:50:51 +00:00
-
2019-03-13 19:48:14 +00:00
- Licensed under the GNU AGPL version 3 or higher .
2013-10-20 21:50:51 +00:00
- }
2020-11-24 16:38:12 +00:00
{- # LANGUAGE OverloadedStrings # -}
2013-10-23 16:58:01 +00:00
module Git.Repair (
runRepair ,
2013-10-23 18:43:58 +00:00
runRepairOf ,
2014-07-21 22:42:58 +00:00
removeBadBranches ,
2013-11-19 21:08:57 +00:00
successfulRepair ,
2013-10-20 21:50:51 +00:00
cleanCorruptObjects ,
resetLocalBranches ,
2013-10-23 16:58:01 +00:00
checkIndex ,
2013-12-10 19:45:22 +00:00
checkIndexFast ,
2013-11-13 18:39:26 +00:00
missingIndex ,
2013-10-21 19:28:06 +00:00
emptyGoodCommits ,
2013-12-10 20:17:49 +00:00
isTrackingBranch ,
2013-10-20 21:50:51 +00:00
) where
import Common
import Git
import Git.Command
import Git.Objects
2013-10-21 19:28:06 +00:00
import Git.Sha
2013-10-22 16:58:04 +00:00
import Git.Types
2013-10-23 16:21:59 +00:00
import Git.Fsck
2013-11-30 18:29:11 +00:00
import Git.Index
2021-06-29 16:57:19 +00:00
import Git.Env
2013-10-23 16:21:59 +00:00
import qualified Git.Config as Config
import qualified Git.Construct as Construct
2013-10-21 19:28:06 +00:00
import qualified Git.LsTree as LsTree
2013-10-22 16:58:04 +00:00
import qualified Git.LsFiles as LsFiles
2013-10-21 19:28:06 +00:00
import qualified Git.Ref as Ref
2013-10-21 20:41:46 +00:00
import qualified Git.RefLog as RefLog
2013-10-22 16:58:04 +00:00
import qualified Git.UpdateIndex as UpdateIndex
2013-10-23 16:21:59 +00:00
import qualified Git.Branch as Branch
2020-10-28 19:40:50 +00:00
import Utility.Directory.Create
2017-12-31 20:08:31 +00:00
import Utility.Tmp.Dir
2013-10-20 21:50:51 +00:00
import Utility.Rsync
2013-11-20 22:31:00 +00:00
import Utility.FileMode
2020-10-29 14:33:12 +00:00
import qualified Utility.RawFilePath as R
2013-10-20 21:50:51 +00:00
import qualified Data.Set as S
import qualified Data.ByteString.Lazy as L
2020-10-28 19:40:50 +00:00
import qualified System.FilePath.ByteString as P
2013-10-20 21:50:51 +00:00
2013-11-22 00:07:44 +00:00
{- Given a set of bad objects found by git fsck, which may not
2014-03-10 20:27:22 +00:00
- be complete , finds and removes all corrupt objects . - }
cleanCorruptObjects :: FsckResults -> Repo -> IO ()
2013-11-22 00:07:44 +00:00
cleanCorruptObjects fsckresults r = do
void $ explodePacks r
2014-03-10 20:27:22 +00:00
mapM_ removeLoose ( S . toList $ knownMissing fsckresults )
mapM_ removeBad =<< listLooseObjectShas r
where
2020-11-05 22:45:37 +00:00
removeLoose s = removeWhenExistsWith R . removeLink ( looseObjectFile r s )
2014-03-10 20:27:22 +00:00
removeBad s = do
2020-11-05 22:45:37 +00:00
void $ tryIO $ allowRead $ looseObjectFile r s
2014-03-10 20:27:22 +00:00
whenM ( isMissing s r ) $
removeLoose s
2013-10-20 21:50:51 +00:00
2021-06-29 16:57:19 +00:00
{- Explodes all pack files to loose objects, and deletes the pack files.
2014-02-24 23:36:58 +00:00
-
2021-06-29 16:57:19 +00:00
- git unpack - objects will not unpack objects from a pack file that are
- in the git repo . So , GIT_OBJECT_DIRECTORY is pointed to a temporary
- directory , and the loose objects then are moved into place , before
- deleting the pack files .
2014-02-24 23:36:58 +00:00
-
2021-06-29 16:57:19 +00:00
- Also , that prevents unpack - objects from possibly looking at corrupt
2014-02-24 23:36:58 +00:00
- pack files to see if they contain an object , while unpacking a
- non - corrupt pack file .
- }
2013-10-20 23:42:17 +00:00
explodePacks :: Repo -> IO Bool
2014-02-24 23:36:58 +00:00
explodePacks r = go =<< listPackFiles r
2013-10-20 21:50:51 +00:00
where
2014-02-24 23:36:58 +00:00
go [] = return False
go packs = withTmpDir " packs " $ \ tmpdir -> do
2021-06-29 16:57:19 +00:00
r' <- addGitEnv r " GIT_OBJECT_DIRECTORY " tmpdir
2014-02-24 23:36:58 +00:00
putStrLn " Unpacking all pack files. "
forM_ packs $ \ packfile -> do
2021-06-29 16:57:19 +00:00
-- Just in case permissions are messed up.
allowRead ( toRawFilePath packfile )
2014-02-24 23:36:58 +00:00
-- May fail, if pack file is corrupt.
void $ tryIO $
2021-06-29 16:57:19 +00:00
pipeWrite [ Param " unpack-objects " , Param " -r " ] r' $ \ h ->
L . hPut h =<< L . readFile packfile
objs <- dirContentsRecursive tmpdir
forM_ objs $ \ objfile -> do
f <- relPathDirToFile
( toRawFilePath tmpdir )
( toRawFilePath objfile )
let dest = objectsDir r P .</> f
createDirectoryIfMissing True
( fromRawFilePath ( parentDir dest ) )
2022-06-22 20:47:34 +00:00
moveFile ( toRawFilePath objfile ) dest
2021-06-29 16:57:19 +00:00
forM_ packs $ \ packfile -> do
let f = toRawFilePath packfile
removeWhenExistsWith R . removeLink f
removeWhenExistsWith R . removeLink ( packIdxFile f )
2014-02-24 23:36:58 +00:00
return True
2013-10-20 21:50:51 +00:00
{- Try to retrieve a set of missing objects, from the remotes of a
- repository . Returns any that could not be retreived .
2013-11-20 22:31:00 +00:00
-
2013-10-27 19:38:59 +00:00
- If another clone of the repository exists locally , which might not be a
- remote of the repo being repaired , its path can be passed as a reference
- repository .
2013-10-20 21:50:51 +00:00
- }
2013-11-30 18:29:11 +00:00
retrieveMissingObjects :: FsckResults -> Maybe FilePath -> Repo -> IO FsckResults
2013-10-27 19:38:59 +00:00
retrieveMissingObjects missing referencerepo r
2013-11-30 18:29:11 +00:00
| not ( foundBroken missing ) = return missing
2013-10-20 21:50:51 +00:00
| otherwise = withTmpDir " tmprepo " $ \ tmpdir -> do
2015-06-01 17:52:23 +00:00
unlessM ( boolSystem " git " [ Param " init " , File tmpdir ] ) $
2021-12-09 18:36:54 +00:00
giveup $ " failed to create temp repository in " ++ tmpdir
avoid making absolute git remote path relative
When a git remote is configured with an absolute path, use that path,
rather than making it relative. If it's configured with a relative path,
use that.
Git.Construct.fromPath changed to preserve the path as-is,
rather than making it absolute. And Annex.new changed to not
convert the path to relative. Instead, Git.CurrentRepo.get
generates a relative path.
A few things that used fromAbsPath unncessarily were changed in passing to
use fromPath instead. I'm seeing fromAbsPath as a security check,
while before it was being used in some cases when the path was
known absolute already. It may be that fromAbsPath is not really needed,
but only git-annex-shell uses it now, and I'm not 100% sure that there's
not some input that would cause a relative path to be used, opening a
security hole, without the security check. So left it as-is.
Test suite passes and strace shows the configured remote url is used
unchanged in the path into it. I can't be 100% sure there's not some code
somewhere that takes an absolute path to the repo and converts it to
relative and uses it, but it seems pretty unlikely that the code paths used
for a git remote would call such code. One place I know of is gitAnnexLink,
but I'm pretty sure that git remotes never deal with annex symlinks. If
that did get called, it generates a path relative to cwd, which would have
been wrong before this change as well, when operating on a remote.
2021-02-08 17:18:01 +00:00
tmpr <- Config . read =<< Construct . fromPath ( toRawFilePath tmpdir )
2021-06-29 17:21:21 +00:00
let repoconfig r' = fromRawFilePath ( localGitDir r' P .</> " config " )
2021-06-29 17:15:15 +00:00
whenM ( doesFileExist ( repoconfig r ) ) $
2021-06-29 17:21:21 +00:00
L . readFile ( repoconfig r ) >>= L . writeFile ( repoconfig tmpr )
2018-01-09 19:36:56 +00:00
rs <- Construct . fromRemotes r
stillmissing <- pullremotes tmpr rs fetchrefstags missing
2013-11-30 18:29:11 +00:00
if S . null ( knownMissing stillmissing )
then return stillmissing
2018-01-09 19:36:56 +00:00
else pullremotes tmpr rs fetchallrefs stillmissing
2013-10-20 21:50:51 +00:00
where
2013-10-27 19:38:59 +00:00
pullremotes tmpr [] fetchrefs stillmissing = case referencerepo of
Nothing -> return stillmissing
Just p -> ifM ( fetchfrom p fetchrefs tmpr )
( do
2013-11-20 22:31:00 +00:00
void $ explodePacks tmpr
2013-10-27 19:38:59 +00:00
void $ copyObjects tmpr r
2013-11-20 22:31:00 +00:00
case stillmissing of
2013-11-30 18:29:11 +00:00
FsckFailed -> return $ FsckFailed
2014-03-12 19:18:43 +00:00
FsckFoundMissing s t -> FsckFoundMissing
<$> findMissing ( S . toList s ) r
<*> pure t
2013-10-27 19:38:59 +00:00
, return stillmissing
)
2013-11-20 22:31:00 +00:00
pullremotes tmpr ( rmt : rmts ) fetchrefs ms
2013-11-30 18:29:11 +00:00
| not ( foundBroken ms ) = return ms
2020-05-04 19:32:06 +00:00
| otherwise = case remoteName rmt of
Just n -> do
putStrLn $ " Trying to recover missing objects from remote " ++ n ++ " . "
ifM ( fetchfrom n fetchrefs tmpr )
( do
void $ explodePacks tmpr
void $ copyObjects tmpr r
case ms of
FsckFailed -> pullremotes tmpr rmts fetchrefs ms
FsckFoundMissing s t -> do
stillmissing <- findMissing ( S . toList s ) r
pullremotes tmpr rmts fetchrefs ( FsckFoundMissing stillmissing t )
, pullremotes tmpr rmts fetchrefs ms
)
Nothing -> pullremotes tmpr rmts fetchrefs ms
fetchfrom loc ps fetchr = runBool ps' fetchr'
2014-10-12 18:27:46 +00:00
where
ps' =
[ Param " fetch "
2020-05-04 19:32:06 +00:00
, Param loc
2015-06-01 17:52:23 +00:00
, Param " --force "
, Param " --update-head-ok "
, Param " --quiet "
2014-10-12 18:27:46 +00:00
] ++ ps
fetchr' = fetchr { gitGlobalOpts = gitGlobalOpts fetchr ++ nogc }
nogc = [ Param " -c " , Param " gc.auto=0 " ]
2013-10-20 21:50:51 +00:00
-- fetch refs and tags
fetchrefstags = [ Param " +refs/heads/*:refs/heads/* " , Param " --tags " ]
-- Fetch all available refs (more likely to fail,
-- as the remote may have refs it refuses to send).
fetchallrefs = [ Param " +*:* " ]
{- Copies all objects from the src repository to the dest repository.
2013-11-20 22:31:00 +00:00
- This is done using rsync , so it copies all missing objects , and all
2013-10-20 21:50:51 +00:00
- objects they rely on . - }
copyObjects :: Repo -> Repo -> IO Bool
copyObjects srcr destr = rsync
[ Param " -qr "
2020-11-05 22:45:37 +00:00
, File $ addTrailingPathSeparator $ fromRawFilePath $ objectsDir srcr
, File $ addTrailingPathSeparator $ fromRawFilePath $ objectsDir destr
2013-10-20 21:50:51 +00:00
]
{- To deal with missing objects that cannot be recovered, resets any
- local branches to point to an old commit before the missing
2013-10-21 20:19:00 +00:00
- objects . Returns all branches that were changed , and deleted .
2013-10-20 21:50:51 +00:00
- }
2013-10-21 20:19:00 +00:00
resetLocalBranches :: MissingObjects -> GoodCommits -> Repo -> IO ( [ Branch ] , [ Branch ] , GoodCommits )
resetLocalBranches missing goodcommits r =
go [] [] goodcommits =<< filter islocalbranch <$> getAllRefs r
where
2014-02-19 05:09:17 +00:00
islocalbranch b = " refs/heads/ " ` isPrefixOf ` fromRef b
2013-10-21 20:19:00 +00:00
go changed deleted gcs [] = return ( changed , deleted , gcs )
go changed deleted gcs ( b : bs ) = do
( mc , gcs' ) <- findUncorruptedCommit missing gcs b r
case mc of
Just c
| c == b -> go changed deleted gcs' bs
| otherwise -> do
reset b c
go ( b : changed ) deleted gcs' bs
Nothing -> do
2013-10-21 20:41:46 +00:00
nukeBranchRef b r
go changed ( b : deleted ) gcs' bs
2013-10-21 20:19:00 +00:00
reset b c = do
nukeBranchRef b r
void $ runBool
[ Param " branch "
2014-02-19 05:09:17 +00:00
, Param ( fromRef $ Ref . base b )
, Param ( fromRef c )
2013-10-21 20:19:00 +00:00
] r
2013-10-20 21:50:51 +00:00
2013-12-10 20:17:49 +00:00
isTrackingBranch :: Ref -> Bool
2014-02-19 05:09:17 +00:00
isTrackingBranch b = " refs/remotes/ " ` isPrefixOf ` fromRef b
2013-12-10 20:17:49 +00:00
2013-10-20 21:50:51 +00:00
{- To deal with missing objects that cannot be recovered, removes
2013-12-10 20:17:49 +00:00
- any branches ( filtered by a predicate ) that reference them
- Returns a list of all removed branches .
2013-10-20 21:50:51 +00:00
- }
2014-07-21 22:42:58 +00:00
removeBadBranches :: ( Ref -> Bool ) -> Repo -> IO [ Branch ]
removeBadBranches removablebranch r = fst <$> removeBadBranches' removablebranch S . empty emptyGoodCommits r
removeBadBranches' :: ( Ref -> Bool ) -> MissingObjects -> GoodCommits -> Repo -> IO ( [ Branch ] , GoodCommits )
removeBadBranches' removablebranch missing goodcommits r =
2013-12-10 20:17:49 +00:00
go [] goodcommits =<< filter removablebranch <$> getAllRefs r
2013-10-21 19:28:06 +00:00
where
go removed gcs [] = return ( removed , gcs )
go removed gcs ( b : bs ) = do
( ok , gcs' ) <- verifyCommit missing gcs b r
if ok
then go removed gcs' bs
else do
nukeBranchRef b r
go ( b : removed ) gcs' bs
2014-07-21 22:42:58 +00:00
badBranches :: MissingObjects -> Repo -> IO [ Branch ]
badBranches missing r = filterM isbad =<< getAllRefs r
where
isbad b = not . fst <$> verifyCommit missing emptyGoodCommits b r
2013-10-21 19:28:06 +00:00
{- Gets all refs, including ones that are corrupt.
- git show - ref does not output refs to commits that are directly
- corrupted , so it is not used .
2013-11-22 00:07:44 +00:00
-
- Relies on packed refs being exploded before it's called .
2013-10-21 19:28:06 +00:00
- }
getAllRefs :: Repo -> IO [ Ref ]
2019-12-09 17:49:05 +00:00
getAllRefs r = getAllRefs' ( fromRawFilePath ( localGitDir r ) </> " refs " )
2015-01-06 19:31:24 +00:00
getAllRefs' :: FilePath -> IO [ Ref ]
getAllRefs' refdir = do
let topsegs = length ( splitPath refdir ) - 1
2021-08-11 00:45:02 +00:00
let toref = Ref . encodeBS . joinPath . drop topsegs . splitPath
2015-01-06 19:31:24 +00:00
map toref <$> dirContentsRecursive refdir
2013-10-21 19:28:06 +00:00
2013-11-22 00:07:44 +00:00
explodePackedRefsFile :: Repo -> IO ()
explodePackedRefsFile r = do
let f = packedRefsFile r
2020-11-24 16:38:12 +00:00
let f' = toRawFilePath f
2013-11-22 00:07:44 +00:00
whenM ( doesFileExist f ) $ do
rs <- mapMaybe parsePacked . lines
2020-11-24 16:38:12 +00:00
<$> catchDefaultIO " " ( safeReadFile f' )
2013-11-22 00:07:44 +00:00
forM_ rs makeref
2020-11-24 16:38:12 +00:00
removeWhenExistsWith R . removeLink f'
2013-11-22 00:07:44 +00:00
where
makeref ( sha , ref ) = do
2020-10-28 19:40:50 +00:00
let gitd = localGitDir r
let dest = gitd P .</> fromRef' ref
let dest' = fromRawFilePath dest
2022-08-12 16:45:46 +00:00
createDirectoryUnder [ gitd ] ( parentDir dest )
2020-10-28 19:40:50 +00:00
unlessM ( doesFileExist dest' ) $
writeFile dest' ( fromRef sha )
2013-11-22 00:07:44 +00:00
2013-10-21 19:28:06 +00:00
packedRefsFile :: Repo -> FilePath
2019-12-09 17:49:05 +00:00
packedRefsFile r = fromRawFilePath ( localGitDir r ) </> " packed-refs "
2013-10-21 19:28:06 +00:00
2013-11-22 00:07:44 +00:00
parsePacked :: String -> Maybe ( Sha , Ref )
2013-10-21 19:28:06 +00:00
parsePacked l = case words l of
( sha : ref : [] )
2021-08-11 00:45:02 +00:00
| isJust ( extractSha ( encodeBS sha ) ) && Ref . legal True ref ->
Just ( Ref ( encodeBS sha ) , Ref ( encodeBS ref ) )
2013-10-21 19:28:06 +00:00
_ -> Nothing
{- git - branch - d cannot be used to remove a branch that is directly
2013-11-22 00:07:44 +00:00
- pointing to a corrupt commit . - }
2013-10-21 19:28:06 +00:00
nukeBranchRef :: Branch -> Repo -> IO ()
2020-10-29 14:33:12 +00:00
nukeBranchRef b r = removeWhenExistsWith R . removeLink $ localGitDir r P .</> fromRef' b
2013-10-21 19:28:06 +00:00
{- Finds the most recent commit to a branch that does not need any
- of the missing objects . If the input branch is good as - is , returns it .
- Otherwise , tries to traverse the commits in the branch to find one
2013-10-21 20:41:46 +00:00
- that is ok . That might fail , if one of them is corrupt , or if an object
- at the root of the branch is missing . Finally , looks for an old version
- of the branch from the reflog .
2013-10-21 19:28:06 +00:00
- }
findUncorruptedCommit :: MissingObjects -> GoodCommits -> Branch -> Repo -> IO ( Maybe Sha , GoodCommits )
findUncorruptedCommit missing goodcommits branch r = do
( ok , goodcommits' ) <- verifyCommit missing goodcommits branch r
if ok
then return ( Just branch , goodcommits' )
else do
2020-04-07 17:27:11 +00:00
( ls , cleanup ) <- pipeNullSplit'
2013-10-21 19:28:06 +00:00
[ Param " log "
2013-10-21 20:19:00 +00:00
, Param " -z "
2013-10-21 19:28:06 +00:00
, Param " --format=%H "
2014-02-19 05:09:17 +00:00
, Param ( fromRef branch )
2013-10-21 19:28:06 +00:00
] r
2020-04-07 17:27:11 +00:00
let branchshas = catMaybes $ map extractSha ls
2013-10-21 20:41:46 +00:00
reflogshas <- RefLog . get branch r
-- XXX Could try a bit harder here, and look
-- for uncorrupted old commits in branches in the
-- reflog.
cleanup ` after ` findfirst goodcommits ( branchshas ++ reflogshas )
2013-10-21 19:28:06 +00:00
where
findfirst gcs [] = return ( Nothing , gcs )
findfirst gcs ( c : cs ) = do
( ok , gcs' ) <- verifyCommit missing gcs c r
if ok
then return ( Just c , gcs' )
else findfirst gcs' cs
2014-03-12 19:18:43 +00:00
{- Verifies that none of the missing objects in the set are used by
2013-10-21 19:28:06 +00:00
- the commit . Also adds to a set of commit shas that have been verified to
- be good , which can be passed into subsequent calls to avoid
- redundant work when eg , chasing down branches to find the first
2022-05-04 15:32:23 +00:00
- uncorrupted commit .
-
- When the sha is not a commit but some other git object , returns
- true , but does not add it to the set .
- }
2013-10-21 19:28:06 +00:00
verifyCommit :: MissingObjects -> GoodCommits -> Sha -> Repo -> IO ( Bool , GoodCommits )
verifyCommit missing goodcommits commit r
| checkGoodCommit commit goodcommits = return ( True , goodcommits )
| otherwise = do
( ls , cleanup ) <- pipeNullSplit
[ Param " log "
2013-10-21 20:19:00 +00:00
, Param " -z "
2013-10-21 19:28:06 +00:00
, Param " --format=%H %T "
2014-02-19 05:09:17 +00:00
, Param ( fromRef commit )
2013-10-21 19:28:06 +00:00
] r
2019-12-05 15:40:10 +00:00
let committrees = map ( parse . decodeBL ) ls
2022-05-04 15:32:23 +00:00
-- git log on an object that is not a commit will
-- succeed without any output
if null committrees
then ifM cleanup
( return ( True , goodcommits )
, return ( False , goodcommits )
)
else if any isNothing committrees
then do
void cleanup
return ( False , goodcommits )
else do
let cts = catMaybes committrees
ifM ( cleanup <&&> check cts )
( return ( True , addGoodCommits ( map fst cts ) goodcommits )
, return ( False , goodcommits )
)
2013-10-21 19:28:06 +00:00
where
parse l = case words l of
( commitsha : treesha : [] ) -> ( , )
2021-08-11 00:45:02 +00:00
<$> extractSha ( encodeBS commitsha )
<*> extractSha ( encodeBS treesha )
2013-10-21 19:28:06 +00:00
_ -> Nothing
2013-10-21 20:19:00 +00:00
check [] = return True
2013-10-22 18:52:17 +00:00
check ( ( c , t ) : rest )
| checkGoodCommit c goodcommits = return True
| otherwise = verifyTree missing t r <&&> check rest
2013-10-21 19:28:06 +00:00
{- Verifies that a tree is good, including all trees and blobs
- referenced by it . - }
verifyTree :: MissingObjects -> Sha -> Repo -> IO Bool
verifyTree missing treesha r
| S . member treesha missing = return False
| otherwise = do
2021-03-23 16:44:29 +00:00
let nolong = LsTree . LsTreeLong False
( ls , cleanup ) <- pipeNullSplit ( LsTree . lsTreeParams LsTree . LsTreeRecursive nolong treesha [] ) r
let objshas = mapMaybe ( LsTree . sha <$$> eitherToMaybe . LsTree . parseLsTree nolong ) ls
2016-01-01 19:56:24 +00:00
if any ( ` S . member ` missing ) objshas
2013-10-21 19:28:06 +00:00
then do
void cleanup
return False
-- as long as ls-tree succeeded, we're good
else cleanup
2013-11-13 17:41:02 +00:00
{- Checks that the index file only refers to objects that are not missing,
2013-11-13 18:39:26 +00:00
- and is not itself corrupt . Note that a missing index file is not
- considered a problem ( repo may be new ) . - }
2013-12-10 19:40:01 +00:00
checkIndex :: Repo -> IO Bool
checkIndex r = do
2013-12-10 19:45:22 +00:00
( bad , _good , cleanup ) <- partitionIndex r
2013-11-13 18:39:26 +00:00
if null bad
then cleanup
else do
void cleanup
return False
2013-12-10 19:45:22 +00:00
{- Does not check every object the index refers to, but only that the index
- itself is not corrupt . - }
checkIndexFast :: Repo -> IO Bool
checkIndexFast r = do
2019-12-09 17:49:05 +00:00
( indexcontents , cleanup ) <- LsFiles . stagedDetails [ repoPath r ] r
2013-12-10 19:45:22 +00:00
length indexcontents ` seq ` cleanup
2013-11-13 18:39:26 +00:00
missingIndex :: Repo -> IO Bool
2019-12-09 17:49:05 +00:00
missingIndex r = not <$> doesFileExist ( fromRawFilePath ( localGitDir r ) </> " index " )
2013-10-23 16:58:01 +00:00
2013-12-10 19:40:01 +00:00
{- Finds missing and ok files staged in the index. -}
partitionIndex :: Repo -> IO ( [ LsFiles . StagedDetails ] , [ LsFiles . StagedDetails ] , IO Bool )
partitionIndex r = do
2019-12-09 17:49:05 +00:00
( indexcontents , cleanup ) <- LsFiles . stagedDetails [ repoPath r ] r
2020-07-08 18:54:29 +00:00
l <- forM indexcontents $ \ i @ ( _file , sha , _mode , _stagenum ) ->
( , ) <$> isMissing sha r <*> pure i
2013-12-10 19:45:22 +00:00
let ( bad , good ) = partition fst l
2013-12-10 19:40:01 +00:00
return ( map snd bad , map snd good , cleanup )
2013-10-23 16:58:01 +00:00
2013-10-22 16:58:04 +00:00
{- Rewrites the index file, removing from it any files whose blobs are
- missing . Returns the list of affected files . - }
2013-12-10 19:40:01 +00:00
rewriteIndex :: Repo -> IO [ FilePath ]
rewriteIndex r
2013-10-22 16:58:04 +00:00
| repoIsLocalBare r = return []
| otherwise = do
2013-12-10 19:40:01 +00:00
( bad , good , cleanup ) <- partitionIndex r
2013-10-22 18:52:17 +00:00
unless ( null bad ) $ do
2020-11-05 22:45:37 +00:00
removeWhenExistsWith R . removeLink ( indexFile r )
2013-10-22 16:58:04 +00:00
UpdateIndex . streamUpdateIndex r
2013-10-22 18:52:17 +00:00
=<< ( catMaybes <$> mapM reinject good )
2013-10-22 16:58:04 +00:00
void cleanup
2020-07-08 18:54:29 +00:00
return $ map ( \ ( file , _ , _ , _ ) -> fromRawFilePath file ) bad
2013-10-22 16:58:04 +00:00
where
2020-07-08 18:54:29 +00:00
reinject ( file , sha , mode , _ ) = case toTreeItemType mode of
2013-10-22 16:58:04 +00:00
Nothing -> return Nothing
2018-05-14 18:22:44 +00:00
Just treeitemtype -> Just <$>
2019-12-05 15:40:10 +00:00
UpdateIndex . stageFile sha treeitemtype ( fromRawFilePath file ) r
2013-10-22 16:58:04 +00:00
2013-10-21 19:28:06 +00:00
newtype GoodCommits = GoodCommits ( S . Set Sha )
emptyGoodCommits :: GoodCommits
emptyGoodCommits = GoodCommits S . empty
checkGoodCommit :: Sha -> GoodCommits -> Bool
checkGoodCommit sha ( GoodCommits s ) = S . member sha s
addGoodCommits :: [ Sha ] -> GoodCommits -> GoodCommits
addGoodCommits shas ( GoodCommits s ) = GoodCommits $
S . union s ( S . fromList shas )
2013-10-20 21:50:51 +00:00
2013-10-23 16:21:59 +00:00
displayList :: [ String ] -> String -> IO ()
displayList items header
| null items = return ()
| otherwise = do
putStrLn header
putStr $ unlines $ map ( \ i -> " \ t " ++ i ) truncateditems
where
2014-10-09 18:53:13 +00:00
numitems = length items
2013-10-23 16:21:59 +00:00
truncateditems
| numitems > 10 = take 10 items ++ [ " (and " ++ show ( numitems - 10 ) ++ " more) " ]
| otherwise = items
2013-11-20 22:31:00 +00:00
{- Fix problems that would prevent repair from working at all
-
- A missing or corrupt . git / HEAD makes git not treat the repository as a
- git repo . If there is a git repo in a parent directory , it may move up
- the tree and use that one instead . So , cannot use ` git show - ref HEAD ` to
- test it .
2013-11-22 00:07:44 +00:00
-
- Explode the packed refs file , to simplify dealing with refs , and because
- fsck can complain about bad refs in it .
2013-11-20 22:31:00 +00:00
- }
preRepair :: Repo -> IO ()
preRepair g = do
2013-11-22 00:07:44 +00:00
unlessM ( validhead <$> catchDefaultIO " " ( safeReadFile headfile ) ) $ do
2020-11-24 16:38:12 +00:00
removeWhenExistsWith R . removeLink headfile
writeFile ( fromRawFilePath headfile ) " ref: refs/heads/master "
2013-11-22 00:07:44 +00:00
explodePackedRefsFile g
2020-11-05 22:45:37 +00:00
unless ( repoIsLocalBare g ) $
void $ tryIO $ allowWrite $ indexFile g
2013-11-20 22:31:00 +00:00
where
2020-11-24 16:38:12 +00:00
headfile = localGitDir g P .</> " HEAD "
2020-04-07 17:27:11 +00:00
validhead s = " ref: refs/ " ` isPrefixOf ` s
2021-08-11 00:45:02 +00:00
|| isJust ( extractSha ( encodeBS s ) )
2013-11-20 22:31:00 +00:00
2013-10-23 16:21:59 +00:00
{- Put it all together. -}
2013-12-10 20:17:49 +00:00
runRepair :: ( Ref -> Bool ) -> Bool -> Repo -> IO ( Bool , [ Branch ] )
runRepair removablebranch forced g = do
2013-11-20 22:31:00 +00:00
preRepair g
2013-10-23 16:21:59 +00:00
putStrLn " Running git fsck ... "
2021-06-30 21:57:49 +00:00
fsckresult <- findBroken False False g
2013-10-23 17:13:40 +00:00
if foundBroken fsckresult
2021-01-12 01:57:35 +00:00
then do
putStrLn " Fsck found problems, attempting repair. "
runRepair' removablebranch fsckresult forced Nothing g
2013-10-23 16:21:59 +00:00
else do
2021-01-12 01:57:35 +00:00
putStrLn " Fsck found no problems. Checking for broken branches. "
2014-07-21 22:42:58 +00:00
bad <- badBranches S . empty g
if null bad
then do
putStrLn " No problems found. "
return ( True , [] )
2021-01-12 01:57:35 +00:00
else do
putStrLn " Found problems, attempting repair. "
runRepair' removablebranch fsckresult forced Nothing g
2013-10-27 19:38:59 +00:00
2013-12-10 20:17:49 +00:00
runRepairOf :: FsckResults -> ( Ref -> Bool ) -> Bool -> Maybe FilePath -> Repo -> IO ( Bool , [ Branch ] )
runRepairOf fsckresult removablebranch forced referencerepo g = do
2013-11-22 00:13:55 +00:00
preRepair g
2013-12-10 20:17:49 +00:00
runRepair' removablebranch fsckresult forced referencerepo g
2013-11-22 00:13:55 +00:00
2013-12-10 20:17:49 +00:00
runRepair' :: ( Ref -> Bool ) -> FsckResults -> Bool -> Maybe FilePath -> Repo -> IO ( Bool , [ Branch ] )
runRepair' removablebranch fsckresult forced referencerepo g = do
2014-03-10 20:27:22 +00:00
cleanCorruptObjects fsckresult g
2021-06-30 21:57:49 +00:00
missing <- findBroken False False g
2013-10-27 19:38:59 +00:00
stillmissing <- retrieveMissingObjects missing referencerepo g
2013-11-20 22:31:00 +00:00
case stillmissing of
2014-03-12 19:18:43 +00:00
FsckFoundMissing s t
2013-11-20 22:31:00 +00:00
| S . null s -> if repoIsLocalBare g
2014-07-21 22:42:58 +00:00
then checkbadbranches s
2013-12-10 19:40:01 +00:00
else ifM ( checkIndex g )
2014-07-21 22:42:58 +00:00
( checkbadbranches s
2013-11-20 22:31:00 +00:00
, do
putStrLn " No missing objects found, but the index file is corrupt! "
if forced
then corruptedindex
2013-12-10 19:45:22 +00:00
else needforce
2013-11-20 22:31:00 +00:00
)
| otherwise -> if forced
2013-12-10 19:40:01 +00:00
then ifM ( checkIndex g )
2014-03-12 19:18:43 +00:00
( forcerepair s t
2013-11-20 23:34:30 +00:00
, corruptedindex
)
2013-11-20 22:31:00 +00:00
else do
putStrLn $ unwords
[ show ( S . size s )
, " missing objects could not be recovered! "
]
2013-12-10 19:40:01 +00:00
unsuccessfulfinish
2013-11-30 18:29:11 +00:00
FsckFailed
2013-12-10 19:40:01 +00:00
| forced -> ifM ( pure ( repoIsLocalBare g ) <||> checkIndex g )
2013-11-20 22:31:00 +00:00
( do
2014-03-10 20:27:22 +00:00
cleanCorruptObjects FsckFailed g
2021-06-30 21:57:49 +00:00
stillmissing' <- findBroken False False g
2014-03-12 19:18:43 +00:00
case stillmissing' of
2013-12-10 19:45:22 +00:00
FsckFailed -> return ( False , [] )
2014-03-12 19:18:43 +00:00
FsckFoundMissing s t -> forcerepair s t
2013-11-20 22:31:00 +00:00
, corruptedindex
)
2013-12-10 19:40:01 +00:00
| otherwise -> unsuccessfulfinish
2013-10-23 17:13:40 +00:00
where
2014-03-12 19:18:43 +00:00
repairbranches missing = do
2014-07-21 22:42:58 +00:00
( removedbranches , goodcommits ) <- removeBadBranches' removablebranch missing emptyGoodCommits g
2013-12-10 20:17:49 +00:00
let remotebranches = filter isTrackingBranch removedbranches
2013-10-23 17:13:40 +00:00
unless ( null remotebranches ) $
2013-10-23 16:21:59 +00:00
putStrLn $ unwords
2013-11-22 00:07:44 +00:00
[ " Removed "
2013-10-23 17:13:40 +00:00
, show ( length remotebranches )
2013-11-22 00:07:44 +00:00
, " remote tracking branches that referred to missing objects. "
2013-10-23 16:21:59 +00:00
]
2014-03-12 19:18:43 +00:00
( resetbranches , deletedbranches , _ ) <- resetLocalBranches missing goodcommits g
2014-02-19 05:09:17 +00:00
displayList ( map fromRef resetbranches )
2013-10-23 17:13:40 +00:00
" Reset these local branches to old versions before the missing objects were committed: "
2014-02-19 05:09:17 +00:00
displayList ( map fromRef deletedbranches )
2013-10-23 17:13:40 +00:00
" Deleted these local branches, which could not be recovered due to missing objects: "
2014-03-12 19:18:43 +00:00
return ( resetbranches ++ deletedbranches )
2014-07-21 22:42:58 +00:00
checkbadbranches missing = do
bad <- badBranches missing g
case ( null bad , forced ) of
( True , _ ) -> successfulfinish []
( False , False ) -> do
displayList ( map fromRef bad )
" Some git branches refer to missing objects: "
unsuccessfulfinish
( False , True ) -> successfulfinish =<< repairbranches missing
2014-03-12 19:18:43 +00:00
forcerepair missing fscktruncated = do
modifiedbranches <- repairbranches missing
2013-12-10 19:40:01 +00:00
deindexedfiles <- rewriteIndex g
2013-10-23 17:13:40 +00:00
displayList deindexedfiles
" Removed these missing files from the index. You should look at what files are present in your working tree and git add them back to the index when appropriate. "
2014-03-12 19:18:43 +00:00
-- When the fsck results were truncated, try
-- fscking again, and as long as different
-- missing objects are found, continue
-- the repair process.
if fscktruncated
then do
2021-06-30 21:57:49 +00:00
fsckresult' <- findBroken False False g
2014-03-12 19:18:43 +00:00
case fsckresult' of
FsckFailed -> do
putStrLn " git fsck is failing "
return ( False , modifiedbranches )
FsckFoundMissing s _
| S . null s -> successfulfinish modifiedbranches
| S . null ( s ` S . difference ` missing ) -> do
2013-10-23 17:13:40 +00:00
putStrLn $ unwords
2014-03-12 19:18:43 +00:00
[ show ( S . size s )
, " missing objects could not be recovered! "
2013-10-23 17:13:40 +00:00
]
2014-03-12 19:18:43 +00:00
return ( False , modifiedbranches )
| otherwise -> do
( ok , modifiedbranches' ) <- runRepairOf fsckresult' removablebranch forced referencerepo g
return ( ok , modifiedbranches ++ modifiedbranches' )
else successfulfinish modifiedbranches
2013-11-13 17:41:02 +00:00
corruptedindex = do
2020-11-05 22:45:37 +00:00
removeWhenExistsWith R . removeLink ( indexFile g )
2013-11-19 21:15:35 +00:00
-- The corrupted index can prevent fsck from finding other
-- problems, so re-run repair.
2021-06-30 21:57:49 +00:00
fsckresult' <- findBroken False False g
2013-12-10 20:17:49 +00:00
result <- runRepairOf fsckresult' removablebranch forced referencerepo g
2013-11-13 17:41:02 +00:00
putStrLn " Removed the corrupted index file. You should look at what files are present in your working tree and git add them back to the index when appropriate. "
2013-11-19 21:15:35 +00:00
return result
2013-11-13 17:41:02 +00:00
2014-03-12 19:18:43 +00:00
successfulfinish modifiedbranches
| null modifiedbranches = do
mapM_ putStrLn
[ " Successfully recovered repository! "
, " You should run \ " git fsck \ " to make sure, but it looks like everything was recovered ok. "
]
return ( True , modifiedbranches )
| otherwise = do
unless ( repoIsLocalBare g ) $ do
mcurr <- Branch . currentUnsafe g
case mcurr of
Nothing -> return ()
Just curr -> when ( any ( == curr ) modifiedbranches ) $ do
putStrLn $ unwords
[ " You currently have "
, fromRef curr
, " checked out. You may have staged changes in the index that can be committed to recover the lost state of this branch! "
]
putStrLn " Successfully recovered repository! "
putStrLn " Please carefully check that the changes mentioned above are ok.. "
return ( True , modifiedbranches )
2013-12-10 19:40:01 +00:00
unsuccessfulfinish = do
2013-10-23 17:13:40 +00:00
if repoIsLocalBare g
then do
2013-11-18 17:24:55 +00:00
putStrLn " If you have a clone of this bare repository, you should add it as a remote of this repository, and retry. "
putStrLn " If there are no clones of this repository, you can instead retry with the --force parameter to force recovery to a possibly usable state. "
2013-12-10 19:40:01 +00:00
return ( False , [] )
2013-12-10 19:45:22 +00:00
else needforce
2013-12-10 19:40:01 +00:00
needforce = do
2013-11-18 17:24:55 +00:00
putStrLn " To force a recovery to a usable state, retry with the --force parameter. "
2013-12-10 19:40:01 +00:00
return ( False , [] )
2013-11-20 22:31:00 +00:00
2013-12-10 19:48:24 +00:00
successfulRepair :: ( Bool , [ Branch ] ) -> Bool
successfulRepair = fst
2013-11-22 00:07:44 +00:00
2020-11-24 16:38:12 +00:00
safeReadFile :: RawFilePath -> IO String
2013-11-22 00:07:44 +00:00
safeReadFile f = do
2020-11-24 16:38:12 +00:00
allowRead f
readFileStrict ( fromRawFilePath f )