From 3e61749d08bb26cf5fc94baf9e9ecc0f30165b93 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 22 Oct 2013 12:58:04 -0400 Subject: [PATCH] index file recovery --- Git/RecoverRepository.hs | 29 +++++++++++++++++++++ Git/Types.hs | 7 +++++ Git/UpdateIndex.hs | 6 +++++ doc/design/assistant/disaster_recovery.mdwn | 7 ++--- doc/git-recover-repository.mdwn | 11 ++++---- git-recover-repository.hs | 25 +++++++++++++----- 6 files changed, 69 insertions(+), 16 deletions(-) diff --git a/Git/RecoverRepository.hs b/Git/RecoverRepository.hs index 8dce04f1e4..c2cad53f2c 100644 --- a/Git/RecoverRepository.hs +++ b/Git/RecoverRepository.hs @@ -10,6 +10,7 @@ module Git.RecoverRepository ( retrieveMissingObjects, resetLocalBranches, removeTrackingBranches, + rewriteIndex, emptyGoodCommits, ) where @@ -19,17 +20,21 @@ import Git.Command import Git.Fsck import Git.Objects import Git.Sha +import Git.Types import qualified Git.Config import qualified Git.Construct import qualified Git.LsTree as LsTree +import qualified Git.LsFiles as LsFiles import qualified Git.Ref as Ref import qualified Git.RefLog as RefLog +import qualified Git.UpdateIndex as UpdateIndex import Utility.Tmp import Utility.Rsync import qualified Data.Set as S import qualified Data.ByteString.Lazy as L import System.Log.Logger +import Data.Tuple.Utils {- Finds and removes corrupt objects from the repository, returning a list - of all such objects, which need to be found elsewhere to finish @@ -349,6 +354,30 @@ verifyTree missing treesha r -- as long as ls-tree succeeded, we're good else cleanup +{- Rewrites the index file, removing from it any files whose blobs are + - missing. Returns the list of affected files. -} +rewriteIndex :: MissingObjects -> Repo -> IO [FilePath] +rewriteIndex missing r + | repoIsLocalBare r = return [] + | otherwise = do + (indexcontents, cleanup) <- LsFiles.stagedDetails [Git.repoPath r] r + let (missing, present) = partition ismissing indexcontents + unless (null missing) $ do + nukeFile (localGitDir r "index") + UpdateIndex.streamUpdateIndex r + =<< (catMaybes <$> mapM reinject present) + void cleanup + return $ map fst3 missing + where + getblob (_file, Just sha, Just _mode) = Just sha + getblob _ = Nothing + ismissing = maybe False (`S.member` missing) . getblob + reinject (file, Just sha, Just mode) = case toBlobType mode of + Nothing -> return Nothing + Just blobtype -> Just <$> + UpdateIndex.stageFile sha blobtype file r + reinject _ = return Nothing + newtype GoodCommits = GoodCommits (S.Set Sha) emptyGoodCommits :: GoodCommits diff --git a/Git/Types.hs b/Git/Types.hs index 4765aad6c9..abfb99f9fe 100644 --- a/Git/Types.hs +++ b/Git/Types.hs @@ -9,6 +9,7 @@ module Git.Types where import Network.URI import qualified Data.Map as M +import System.Posix.Types {- Support repositories on local disk, and repositories accessed via an URL. - @@ -81,3 +82,9 @@ readBlobType "100644" = Just FileBlob readBlobType "100755" = Just ExecutableBlob readBlobType "120000" = Just SymlinkBlob readBlobType _ = Nothing + +toBlobType :: FileMode -> Maybe BlobType +toBlobType 0o100644 = Just FileBlob +toBlobType 0o100755 = Just ExecutableBlob +toBlobType 0o120000 = Just SymlinkBlob +toBlobType _ = Nothing diff --git a/Git/UpdateIndex.hs b/Git/UpdateIndex.hs index 5d07e20112..3b33ac8469 100644 --- a/Git/UpdateIndex.hs +++ b/Git/UpdateIndex.hs @@ -13,6 +13,7 @@ module Git.UpdateIndex ( streamUpdateIndex, lsTree, updateIndexLine, + stageFile, unstageFile, stageSymlink ) where @@ -61,6 +62,11 @@ updateIndexLine :: Sha -> BlobType -> TopFilePath -> String updateIndexLine sha filetype file = show filetype ++ " blob " ++ show sha ++ "\t" ++ indexPath file +stageFile :: Sha -> BlobType -> FilePath -> Repo -> IO Streamer +stageFile sha filetype file repo = do + p <- toTopFilePath file repo + return $ pureStreamer $ updateIndexLine sha filetype p + {- A streamer that removes a file from the index. -} unstageFile :: FilePath -> Repo -> IO Streamer unstageFile file repo = do diff --git a/doc/design/assistant/disaster_recovery.mdwn b/doc/design/assistant/disaster_recovery.mdwn index 4b3d2a2903..c630a72b47 100644 --- a/doc/design/assistant/disaster_recovery.mdwn +++ b/doc/design/assistant/disaster_recovery.mdwn @@ -148,8 +148,5 @@ that was found for it. if none was found. **done** * (Decided not to touch tags.) -TODO: The index file can still refer to objects that were missing. -This prevents git commit from working. And simply re-staging things doesn't -seem to help; git sees the sha is "known" and does not re-add it, -apparently. So, need to do something to clean up the index, while ideally -not losing any staged changes. +The index file can still refer to objects that were missing. +Rewrite to remove them. **done** diff --git a/doc/git-recover-repository.mdwn b/doc/git-recover-repository.mdwn index f437e5688b..dac4a310b3 100644 --- a/doc/git-recover-repository.mdwn +++ b/doc/git-recover-repository.mdwn @@ -15,11 +15,12 @@ It does by deleting all corrupt objects, and retreiving all missing objects that it can from the remotes of the repository. If that is not sufficient to fully recover the repository, it can also -reset branches back to commits before the corruption happened, and delete -branches that are no longer available due to the lost data. It will only -do this if run with the `--force` option, since that rewrites history -and throws out missing data. Note that the `--force` option never touches -tags, even if they are no longer usable due to missing data. +reset branches back to commits before the corruption happened, delete +branches that are no longer available due to the lost data, and remove any +missing files from the index. It will only do this if run with the +`--force` option, since that rewrites history and throws out missing data. +Note that the `--force` option never touches tags, even if they are no +longer usable due to missing data. After running this command, you will probably want to run `git fsck` to verify it fixed the repository. Note that fsck may still complain about diff --git a/git-recover-repository.hs b/git-recover-repository.hs index 21852e376a..998ab802c6 100644 --- a/git-recover-repository.hs +++ b/git-recover-repository.hs @@ -68,12 +68,13 @@ main = do , "remote tracking branches that referred to missing objects" ] (resetbranches, deletedbranches, _) <- Git.RecoverRepository.resetLocalBranches stillmissing goodcommits g - unless (null resetbranches) $ do - putStrLn "Reset these local branches to old versions before the missing objects were committed:" - putStr $ unlines $ map show resetbranches - unless (null deletedbranches) $ do - putStrLn "Deleted these local branches, which could not be recovered due to missing objects:" - putStr $ unlines $ map show deletedbranches + printList (map show resetbranches) + "Reset these local branches to old versions before the missing objects were committed:" + printList (map show deletedbranches) + "Deleted these local branches, which could not be recovered due to missing objects:" + deindexedfiles <- Git.RecoverRepository.rewriteIndex stillmissing g + printList deindexedfiles + "Removed these missing files from the index. You should look at what files are present in your working tree and git add them back to the index when appropriate." mcurr <- Git.Branch.currentUnsafe g case mcurr of Nothing -> return () @@ -84,3 +85,15 @@ main = do , "checked out. You may have staged changes in the index that can be committed to recover the lost state of this branch!" ] else putStrLn "To force a recovery to a usable state, run this command again with the --force parameter." + +printList :: [String] -> String -> IO () +printList items header + | null items = return () + | otherwise = do + putStrLn header + putStr $ unlines $ map (\i -> "\t" ++ i) truncateditems + where + numitems = length items + truncateditems + | numitems > 10 = take 10 items ++ ["(and " ++ show (numitems - 10) ++ " more)"] + | otherwise = items