From d5eb85acf474edd86ab0e7bbdd2c41f1e84c7a7b Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 23 Oct 2013 12:21:59 -0400 Subject: [PATCH] add repair command --- Command/Repair.hs | 25 +++++++++ Git/RecoverRepository.hs | 103 ++++++++++++++++++++++++++++++++------ GitAnnex.hs | 2 + debian/changelog | 2 + doc/git-annex.mdwn | 34 ++++++++++++- git-recover-repository.hs | 75 ++------------------------- 6 files changed, 154 insertions(+), 87 deletions(-) create mode 100644 Command/Repair.hs diff --git a/Command/Repair.hs b/Command/Repair.hs new file mode 100644 index 0000000000..0e655588b7 --- /dev/null +++ b/Command/Repair.hs @@ -0,0 +1,25 @@ +{- git-annex command + - + - Copyright 2013 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.Repair where + +import Common.Annex +import Command +import qualified Annex +import Git.RecoverRepository (runRecovery) + +def :: [Command] +def = [noCommit $ dontCheck repoExists $ + command "repair" paramNothing seek SectionMaintenance "recover broken git repository"] + +seek :: [CommandSeek] +seek = [withNothing start] + +start :: CommandStart +start = next $ next $ do + force <- Annex.getState Annex.force + inRepo $ runRecovery force diff --git a/Git/RecoverRepository.hs b/Git/RecoverRepository.hs index f591bd6b2e..1ae817fbc1 100644 --- a/Git/RecoverRepository.hs +++ b/Git/RecoverRepository.hs @@ -6,6 +6,7 @@ -} module Git.RecoverRepository ( + runRecovery, cleanCorruptObjects, retrieveMissingObjects, resetLocalBranches, @@ -17,23 +18,23 @@ module Git.RecoverRepository ( import Common import Git import Git.Command -import Git.Fsck import Git.Objects import Git.Sha import Git.Types -import qualified Git.Config -import qualified Git.Construct +import Git.Fsck +import qualified Git.Config as Config +import qualified Git.Construct as Construct import qualified Git.LsTree as LsTree import qualified Git.LsFiles as LsFiles import qualified Git.Ref as Ref import qualified Git.RefLog as RefLog import qualified Git.UpdateIndex as UpdateIndex +import qualified Git.Branch as Branch import Utility.Tmp import Utility.Rsync import qualified Data.Set as S import qualified Data.ByteString.Lazy as L -import System.Log.Logger import Data.Tuple.Utils {- Given a set of bad objects found by git fsck, removes all @@ -52,7 +53,7 @@ cleanCorruptObjects :: FsckResults -> Repo -> IO MissingObjects cleanCorruptObjects mmissing r = check mmissing where check Nothing = do - notice "git fsck found a problem but no specific broken objects. Perhaps a corrupt pack file?" + putStrLn "git fsck found a problem but no specific broken objects. Perhaps a corrupt pack file?" ifM (explodePacks r) ( retry S.empty , return S.empty @@ -60,7 +61,7 @@ cleanCorruptObjects mmissing r = check mmissing check (Just bad) | S.null bad = return S.empty | otherwise = do - notice $ unwords + putStrLn $ unwords [ "git fsck found" , show (S.size bad) , "broken objects." @@ -71,7 +72,7 @@ cleanCorruptObjects mmissing r = check mmissing then retry bad else return bad retry oldbad = do - notice "Re-running git fsck to see if it finds more problems." + putStrLn "Re-running git fsck to see if it finds more problems." v <- findBroken False r case v of Nothing -> error $ unwords @@ -92,7 +93,7 @@ removeLoose r s = do count <- length <$> filterM doesFileExist fs if (count > 0) then do - notice $ unwords + putStrLn $ unwords [ "removing" , show count , "corrupt loose objects" @@ -107,7 +108,7 @@ explodePacks r = do if null packs then return False else do - notice "Unpacking all pack files." + putStrLn "Unpacking all pack files." mapM_ go packs return True where @@ -128,7 +129,7 @@ retrieveMissingObjects missing r | otherwise = withTmpDir "tmprepo" $ \tmpdir -> do unlessM (boolSystem "git" [Params "init", File tmpdir]) $ error $ "failed to create temp repository in " ++ tmpdir - tmpr <- Git.Config.read =<< Git.Construct.fromAbsPath tmpdir + tmpr <- Config.read =<< Construct.fromAbsPath tmpdir stillmissing <- pullremotes tmpr (remotes r) fetchrefstags missing if S.null stillmissing then return stillmissing @@ -138,14 +139,14 @@ retrieveMissingObjects missing r pullremotes tmpr (rmt:rmts) fetchrefs s | S.null s = return s | otherwise = do - notice $ "Trying to recover missing objects from remote " ++ repoDescribe rmt + putStrLn $ "Trying to recover missing objects from remote " ++ repoDescribe rmt ifM (fetchsome rmt fetchrefs tmpr) ( do void $ copyObjects tmpr r stillmissing <- findMissing (S.toList s) r pullremotes tmpr rmts fetchrefs stillmissing , do - notice $ unwords + putStrLn $ unwords [ "failed to fetch from remote" , repoDescribe rmt , "(will continue without it, but making this remote available may improve recovery)" @@ -360,7 +361,7 @@ rewriteIndex :: MissingObjects -> Repo -> IO [FilePath] rewriteIndex missing r | repoIsLocalBare r = return [] | otherwise = do - (indexcontents, cleanup) <- LsFiles.stagedDetails [Git.repoPath r] r + (indexcontents, cleanup) <- LsFiles.stagedDetails [repoPath r] r let (bad, good) = partition ismissing indexcontents unless (null bad) $ do nukeFile (localGitDir r "index") @@ -390,5 +391,77 @@ addGoodCommits :: [Sha] -> GoodCommits -> GoodCommits addGoodCommits shas (GoodCommits s) = GoodCommits $ S.union s (S.fromList shas) -notice :: String -> IO () -notice = noticeM "RecoverRepository" +displayList :: [String] -> String -> IO () +displayList items header + | null items = return () + | otherwise = do + putStrLn header + putStr $ unlines $ map (\i -> "\t" ++ i) truncateditems + where + numitems = length items + truncateditems + | numitems > 10 = take 10 items ++ ["(and " ++ show (numitems - 10) ++ " more)"] + | otherwise = items + +{- Put it all together. -} +runRecovery :: Bool -> Repo -> IO Bool +runRecovery forced g = do + putStrLn "Running git fsck ..." + fsckresult <- findBroken False g + missing <- cleanCorruptObjects fsckresult g + stillmissing <- retrieveMissingObjects missing g + if S.null stillmissing + then successfulfinish + else do + putStrLn $ unwords + [ show (S.size stillmissing) + , "missing objects could not be recovered!" + ] + if forced + then do + (remotebranches, goodcommits) <- removeTrackingBranches stillmissing emptyGoodCommits g + unless (null remotebranches) $ + putStrLn $ unwords + [ "removed" + , show (length remotebranches) + , "remote tracking branches that referred to missing objects" + ] + (resetbranches, deletedbranches, _) <- resetLocalBranches stillmissing goodcommits g + displayList (map show resetbranches) + "Reset these local branches to old versions before the missing objects were committed:" + displayList (map show deletedbranches) + "Deleted these local branches, which could not be recovered due to missing objects:" + deindexedfiles <- rewriteIndex stillmissing g + displayList deindexedfiles + "Removed these missing files from the index. You should look at what files are present in your working tree and git add them back to the index when appropriate." + if null resetbranches && null deletedbranches + then successfulfinish + else do + unless (repoIsLocalBare g) $ do + mcurr <- Branch.currentUnsafe g + case mcurr of + Nothing -> return () + Just curr -> when (any (== curr) (resetbranches ++ deletedbranches)) $ do + putStrLn $ unwords + [ "You currently have" + , show curr + , "checked out. You may have staged changes in the index that can be committed to recover the lost state of this branch!" + ] + putStrLn "Successfully recovered repository!" + putStrLn "Please carefully check that the changes mentioned above are ok.." + return True + else do + if repoIsLocalBare g + then do + putStrLn "If you have a clone of this bare repository, you should add it as a remote of this repository, and re-run git-recover-repository." + putStrLn "If there are no clones of this repository, you can instead run git-recover-repository with the --force parameter to force recovery to a possibly usable state." + else putStrLn "To force a recovery to a usable state, run this command again with the --force parameter." + return False + where + successfulfinish = do + mapM_ putStrLn + [ "Successfully recovered repository!" + , "You should run \"git fsck\" to make sure, but it looks like" + , "everything was recovered ok." + ] + return True diff --git a/GitAnnex.hs b/GitAnnex.hs index 36fe6aa838..1001d7b5a0 100644 --- a/GitAnnex.hs +++ b/GitAnnex.hs @@ -34,6 +34,7 @@ import qualified Command.Describe import qualified Command.InitRemote import qualified Command.EnableRemote import qualified Command.Fsck +import qualified Command.Repair import qualified Command.Unused import qualified Command.DropUnused import qualified Command.AddUnused @@ -130,6 +131,7 @@ cmds = concat , Command.ReKey.def , Command.Fix.def , Command.Fsck.def + , Command.Repair.def , Command.Unused.def , Command.DropUnused.def , Command.AddUnused.def diff --git a/debian/changelog b/debian/changelog index 51dbba396e..d0faccd40b 100644 --- a/debian/changelog +++ b/debian/changelog @@ -5,6 +5,8 @@ git-annex (4.20131003) UNRELEASED; urgency=low * The assitant can now run scheduled incremental fsck jobs on the local repository and remotes. These can be configured using vicfg or with the webapp. + * repair: New command, which can repair damaged git repositories + (even ones not using git-annex). * Automatically and safely detect and recover from dangling .git/annex/index.lock files, which would prevent git from committing to the git-annex branch, eg after a crash. diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 18c10be0ca..506c7bb639 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -444,7 +444,8 @@ subdirectories). * `fsck [path ...]` With no parameters, this command checks the whole annex for consistency, - and warns about or fixes any problems found. + and warns about or fixes any problems found. This is a good compliment to + `git fsck`. With parameters, only the specified files are checked. @@ -529,6 +530,37 @@ subdirectories). git-annex have forgotten their old history. (You may need to force git to push the branch to any git repositories not running git-annex. +* `repair` + + This can repair many of the problems with git repositories that `git fsck` + detects, but does not itself fix. It's useful if a repository has become + badly damaged. One way this can happen is if a repisitory used by git-annex + is on a removable drive that gets unplugged at the wrong time. + + This command can actually be used inside git repositories that do not + use git-annex at all; when used in a repository using git-annex, it + does additional repairs of the git-annex branch. + + It works by deleting any corrupt objects from the git repository, and + retriving all missing objects it can from the remotes of the repository. + + If that is not sufficient to fully recover the repository, it can also + reset branches back to commits before the corruption happened, delete + branches that are no longer available due to the lost data, and remove any + missing files from the index. It will only do this if run with the + `--force` option, since that rewrites history and throws out missing data. + Note that the `--force` option never touches tags, even if they are no + longer usable due to missing data. + + After running this command, you will probably want to run `git fsck` to + verify it fixed the repository. Note that fsck may still complain about + objects referenced by the reflog, or the stash, if they were unable to be + recovered. This command does not try to clean up either the reflog or the + stash. + + It is also a good idea to run `git annex fsck --fast` after this command, + to make sure that the git-annex branch reflects reality. + # QUERY COMMANDS * `version` diff --git a/git-recover-repository.hs b/git-recover-repository.hs index d2249a433b..9f1ffb4237 100644 --- a/git-recover-repository.hs +++ b/git-recover-repository.hs @@ -6,10 +6,6 @@ -} import System.Environment -import System.Log.Logger -import System.Log.Formatter -import System.Log.Handler (setFormatter) -import System.Log.Handler.Simple import qualified Data.Set as S import Common @@ -34,75 +30,12 @@ parseArgs = do parse "--force" = True parse _ = usage -enableDebugOutput :: IO () -enableDebugOutput = do - s <- setFormatter - <$> streamHandler stderr NOTICE - <*> pure (simpleLogFormatter "$msg") - updateGlobalLogger rootLoggerName (setLevel DEBUG . setHandlers [s]) - main :: IO () main = do - enableDebugOutput forced <- parseArgs g <- Git.Config.read =<< Git.CurrentRepo.get - putStrLn "Running git fsck ..." - fsckresult <- Git.Fsck.findBroken False g - missing <- Git.RecoverRepository.cleanCorruptObjects fsckresult g - stillmissing <- Git.RecoverRepository.retrieveMissingObjects missing g - if S.null stillmissing - then putStr $ unlines - [ "Successfully recovered repository!" - , "You should run \"git fsck\" to make sure, but it looks like" - , "everything was recovered ok." - ] - else do - putStrLn $ unwords - [ show (S.size stillmissing) - , "missing objects could not be recovered!" - ] - if forced - then do - (remotebranches, goodcommits) <- Git.RecoverRepository.removeTrackingBranches stillmissing Git.RecoverRepository.emptyGoodCommits g - unless (null remotebranches) $ - putStrLn $ unwords - [ "removed" - , show (length remotebranches) - , "remote tracking branches that referred to missing objects" - ] - (resetbranches, deletedbranches, _) <- Git.RecoverRepository.resetLocalBranches stillmissing goodcommits g - printList (map show resetbranches) - "Reset these local branches to old versions before the missing objects were committed:" - printList (map show deletedbranches) - "Deleted these local branches, which could not be recovered due to missing objects:" - deindexedfiles <- Git.RecoverRepository.rewriteIndex stillmissing g - printList deindexedfiles - "Removed these missing files from the index. You should look at what files are present in your working tree and git add them back to the index when appropriate." - unless (Git.repoIsLocalBare g) $ do - mcurr <- Git.Branch.currentUnsafe g - case mcurr of - Nothing -> return () - Just curr -> when (any (== curr) (resetbranches ++ deletedbranches)) $ do - putStrLn $ unwords - [ "You currently have" - , show curr - , "checked out. You may have staged changes in the index that can be committed to recover the lost state of this branch!" - ] - else if Git.repoIsLocalBare g - then do - putStrLn "If you have a clone of this bare repository, you should add it as a remote of this repository, and re-run git-recover-repository." - putStrLn "If there are no clones of this repository, you can instead run git-recover-repository with the --force parameter to force recovery to a possibly usable state." - else putStrLn "To force a recovery to a usable state, run this command again with the --force parameter." - -printList :: [String] -> String -> IO () -printList items header - | null items = return () - | otherwise = do - putStrLn header - putStr $ unlines $ map (\i -> "\t" ++ i) truncateditems - where - numitems = length items - truncateditems - | numitems > 10 = take 10 items ++ ["(and " ++ show (numitems - 10) ++ " more)"] - | otherwise = items + ifM (Git.RecoverRepository.runRecovery forced g) + ( exitSuccess + , exitFailure + )