add repair command

This commit is contained in:
Joey Hess 2013-10-23 12:21:59 -04:00
parent 6b7f1baa6a
commit d5eb85acf4
6 changed files with 154 additions and 87 deletions

25
Command/Repair.hs Normal file
View file

@ -0,0 +1,25 @@
{- git-annex command
-
- Copyright 2013 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Command.Repair where
import Common.Annex
import Command
import qualified Annex
import Git.RecoverRepository (runRecovery)
def :: [Command]
def = [noCommit $ dontCheck repoExists $
command "repair" paramNothing seek SectionMaintenance "recover broken git repository"]
seek :: [CommandSeek]
seek = [withNothing start]
start :: CommandStart
start = next $ next $ do
force <- Annex.getState Annex.force
inRepo $ runRecovery force

View file

@ -6,6 +6,7 @@
-} -}
module Git.RecoverRepository ( module Git.RecoverRepository (
runRecovery,
cleanCorruptObjects, cleanCorruptObjects,
retrieveMissingObjects, retrieveMissingObjects,
resetLocalBranches, resetLocalBranches,
@ -17,23 +18,23 @@ module Git.RecoverRepository (
import Common import Common
import Git import Git
import Git.Command import Git.Command
import Git.Fsck
import Git.Objects import Git.Objects
import Git.Sha import Git.Sha
import Git.Types import Git.Types
import qualified Git.Config import Git.Fsck
import qualified Git.Construct import qualified Git.Config as Config
import qualified Git.Construct as Construct
import qualified Git.LsTree as LsTree import qualified Git.LsTree as LsTree
import qualified Git.LsFiles as LsFiles import qualified Git.LsFiles as LsFiles
import qualified Git.Ref as Ref import qualified Git.Ref as Ref
import qualified Git.RefLog as RefLog import qualified Git.RefLog as RefLog
import qualified Git.UpdateIndex as UpdateIndex import qualified Git.UpdateIndex as UpdateIndex
import qualified Git.Branch as Branch
import Utility.Tmp import Utility.Tmp
import Utility.Rsync import Utility.Rsync
import qualified Data.Set as S import qualified Data.Set as S
import qualified Data.ByteString.Lazy as L import qualified Data.ByteString.Lazy as L
import System.Log.Logger
import Data.Tuple.Utils import Data.Tuple.Utils
{- Given a set of bad objects found by git fsck, removes all {- Given a set of bad objects found by git fsck, removes all
@ -52,7 +53,7 @@ cleanCorruptObjects :: FsckResults -> Repo -> IO MissingObjects
cleanCorruptObjects mmissing r = check mmissing cleanCorruptObjects mmissing r = check mmissing
where where
check Nothing = do check Nothing = do
notice "git fsck found a problem but no specific broken objects. Perhaps a corrupt pack file?" putStrLn "git fsck found a problem but no specific broken objects. Perhaps a corrupt pack file?"
ifM (explodePacks r) ifM (explodePacks r)
( retry S.empty ( retry S.empty
, return S.empty , return S.empty
@ -60,7 +61,7 @@ cleanCorruptObjects mmissing r = check mmissing
check (Just bad) check (Just bad)
| S.null bad = return S.empty | S.null bad = return S.empty
| otherwise = do | otherwise = do
notice $ unwords putStrLn $ unwords
[ "git fsck found" [ "git fsck found"
, show (S.size bad) , show (S.size bad)
, "broken objects." , "broken objects."
@ -71,7 +72,7 @@ cleanCorruptObjects mmissing r = check mmissing
then retry bad then retry bad
else return bad else return bad
retry oldbad = do retry oldbad = do
notice "Re-running git fsck to see if it finds more problems." putStrLn "Re-running git fsck to see if it finds more problems."
v <- findBroken False r v <- findBroken False r
case v of case v of
Nothing -> error $ unwords Nothing -> error $ unwords
@ -92,7 +93,7 @@ removeLoose r s = do
count <- length <$> filterM doesFileExist fs count <- length <$> filterM doesFileExist fs
if (count > 0) if (count > 0)
then do then do
notice $ unwords putStrLn $ unwords
[ "removing" [ "removing"
, show count , show count
, "corrupt loose objects" , "corrupt loose objects"
@ -107,7 +108,7 @@ explodePacks r = do
if null packs if null packs
then return False then return False
else do else do
notice "Unpacking all pack files." putStrLn "Unpacking all pack files."
mapM_ go packs mapM_ go packs
return True return True
where where
@ -128,7 +129,7 @@ retrieveMissingObjects missing r
| otherwise = withTmpDir "tmprepo" $ \tmpdir -> do | otherwise = withTmpDir "tmprepo" $ \tmpdir -> do
unlessM (boolSystem "git" [Params "init", File tmpdir]) $ unlessM (boolSystem "git" [Params "init", File tmpdir]) $
error $ "failed to create temp repository in " ++ tmpdir error $ "failed to create temp repository in " ++ tmpdir
tmpr <- Git.Config.read =<< Git.Construct.fromAbsPath tmpdir tmpr <- Config.read =<< Construct.fromAbsPath tmpdir
stillmissing <- pullremotes tmpr (remotes r) fetchrefstags missing stillmissing <- pullremotes tmpr (remotes r) fetchrefstags missing
if S.null stillmissing if S.null stillmissing
then return stillmissing then return stillmissing
@ -138,14 +139,14 @@ retrieveMissingObjects missing r
pullremotes tmpr (rmt:rmts) fetchrefs s pullremotes tmpr (rmt:rmts) fetchrefs s
| S.null s = return s | S.null s = return s
| otherwise = do | otherwise = do
notice $ "Trying to recover missing objects from remote " ++ repoDescribe rmt putStrLn $ "Trying to recover missing objects from remote " ++ repoDescribe rmt
ifM (fetchsome rmt fetchrefs tmpr) ifM (fetchsome rmt fetchrefs tmpr)
( do ( do
void $ copyObjects tmpr r void $ copyObjects tmpr r
stillmissing <- findMissing (S.toList s) r stillmissing <- findMissing (S.toList s) r
pullremotes tmpr rmts fetchrefs stillmissing pullremotes tmpr rmts fetchrefs stillmissing
, do , do
notice $ unwords putStrLn $ unwords
[ "failed to fetch from remote" [ "failed to fetch from remote"
, repoDescribe rmt , repoDescribe rmt
, "(will continue without it, but making this remote available may improve recovery)" , "(will continue without it, but making this remote available may improve recovery)"
@ -360,7 +361,7 @@ rewriteIndex :: MissingObjects -> Repo -> IO [FilePath]
rewriteIndex missing r rewriteIndex missing r
| repoIsLocalBare r = return [] | repoIsLocalBare r = return []
| otherwise = do | otherwise = do
(indexcontents, cleanup) <- LsFiles.stagedDetails [Git.repoPath r] r (indexcontents, cleanup) <- LsFiles.stagedDetails [repoPath r] r
let (bad, good) = partition ismissing indexcontents let (bad, good) = partition ismissing indexcontents
unless (null bad) $ do unless (null bad) $ do
nukeFile (localGitDir r </> "index") nukeFile (localGitDir r </> "index")
@ -390,5 +391,77 @@ addGoodCommits :: [Sha] -> GoodCommits -> GoodCommits
addGoodCommits shas (GoodCommits s) = GoodCommits $ addGoodCommits shas (GoodCommits s) = GoodCommits $
S.union s (S.fromList shas) S.union s (S.fromList shas)
notice :: String -> IO () displayList :: [String] -> String -> IO ()
notice = noticeM "RecoverRepository" displayList items header
| null items = return ()
| otherwise = do
putStrLn header
putStr $ unlines $ map (\i -> "\t" ++ i) truncateditems
where
numitems = length items
truncateditems
| numitems > 10 = take 10 items ++ ["(and " ++ show (numitems - 10) ++ " more)"]
| otherwise = items
{- Put it all together. -}
runRecovery :: Bool -> Repo -> IO Bool
runRecovery forced g = do
putStrLn "Running git fsck ..."
fsckresult <- findBroken False g
missing <- cleanCorruptObjects fsckresult g
stillmissing <- retrieveMissingObjects missing g
if S.null stillmissing
then successfulfinish
else do
putStrLn $ unwords
[ show (S.size stillmissing)
, "missing objects could not be recovered!"
]
if forced
then do
(remotebranches, goodcommits) <- removeTrackingBranches stillmissing emptyGoodCommits g
unless (null remotebranches) $
putStrLn $ unwords
[ "removed"
, show (length remotebranches)
, "remote tracking branches that referred to missing objects"
]
(resetbranches, deletedbranches, _) <- resetLocalBranches stillmissing goodcommits g
displayList (map show resetbranches)
"Reset these local branches to old versions before the missing objects were committed:"
displayList (map show deletedbranches)
"Deleted these local branches, which could not be recovered due to missing objects:"
deindexedfiles <- rewriteIndex stillmissing g
displayList deindexedfiles
"Removed these missing files from the index. You should look at what files are present in your working tree and git add them back to the index when appropriate."
if null resetbranches && null deletedbranches
then successfulfinish
else do
unless (repoIsLocalBare g) $ do
mcurr <- Branch.currentUnsafe g
case mcurr of
Nothing -> return ()
Just curr -> when (any (== curr) (resetbranches ++ deletedbranches)) $ do
putStrLn $ unwords
[ "You currently have"
, show curr
, "checked out. You may have staged changes in the index that can be committed to recover the lost state of this branch!"
]
putStrLn "Successfully recovered repository!"
putStrLn "Please carefully check that the changes mentioned above are ok.."
return True
else do
if repoIsLocalBare g
then do
putStrLn "If you have a clone of this bare repository, you should add it as a remote of this repository, and re-run git-recover-repository."
putStrLn "If there are no clones of this repository, you can instead run git-recover-repository with the --force parameter to force recovery to a possibly usable state."
else putStrLn "To force a recovery to a usable state, run this command again with the --force parameter."
return False
where
successfulfinish = do
mapM_ putStrLn
[ "Successfully recovered repository!"
, "You should run \"git fsck\" to make sure, but it looks like"
, "everything was recovered ok."
]
return True

View file

@ -34,6 +34,7 @@ import qualified Command.Describe
import qualified Command.InitRemote import qualified Command.InitRemote
import qualified Command.EnableRemote import qualified Command.EnableRemote
import qualified Command.Fsck import qualified Command.Fsck
import qualified Command.Repair
import qualified Command.Unused import qualified Command.Unused
import qualified Command.DropUnused import qualified Command.DropUnused
import qualified Command.AddUnused import qualified Command.AddUnused
@ -130,6 +131,7 @@ cmds = concat
, Command.ReKey.def , Command.ReKey.def
, Command.Fix.def , Command.Fix.def
, Command.Fsck.def , Command.Fsck.def
, Command.Repair.def
, Command.Unused.def , Command.Unused.def
, Command.DropUnused.def , Command.DropUnused.def
, Command.AddUnused.def , Command.AddUnused.def

2
debian/changelog vendored
View file

@ -5,6 +5,8 @@ git-annex (4.20131003) UNRELEASED; urgency=low
* The assitant can now run scheduled incremental fsck jobs on the local * The assitant can now run scheduled incremental fsck jobs on the local
repository and remotes. These can be configured using vicfg or with the repository and remotes. These can be configured using vicfg or with the
webapp. webapp.
* repair: New command, which can repair damaged git repositories
(even ones not using git-annex).
* Automatically and safely detect and recover from dangling * Automatically and safely detect and recover from dangling
.git/annex/index.lock files, which would prevent git from .git/annex/index.lock files, which would prevent git from
committing to the git-annex branch, eg after a crash. committing to the git-annex branch, eg after a crash.

View file

@ -444,7 +444,8 @@ subdirectories).
* `fsck [path ...]` * `fsck [path ...]`
With no parameters, this command checks the whole annex for consistency, With no parameters, this command checks the whole annex for consistency,
and warns about or fixes any problems found. and warns about or fixes any problems found. This is a good compliment to
`git fsck`.
With parameters, only the specified files are checked. With parameters, only the specified files are checked.
@ -529,6 +530,37 @@ subdirectories).
git-annex have forgotten their old history. (You may need to force git-annex have forgotten their old history. (You may need to force
git to push the branch to any git repositories not running git-annex. git to push the branch to any git repositories not running git-annex.
* `repair`
This can repair many of the problems with git repositories that `git fsck`
detects, but does not itself fix. It's useful if a repository has become
badly damaged. One way this can happen is if a repisitory used by git-annex
is on a removable drive that gets unplugged at the wrong time.
This command can actually be used inside git repositories that do not
use git-annex at all; when used in a repository using git-annex, it
does additional repairs of the git-annex branch.
It works by deleting any corrupt objects from the git repository, and
retriving all missing objects it can from the remotes of the repository.
If that is not sufficient to fully recover the repository, it can also
reset branches back to commits before the corruption happened, delete
branches that are no longer available due to the lost data, and remove any
missing files from the index. It will only do this if run with the
`--force` option, since that rewrites history and throws out missing data.
Note that the `--force` option never touches tags, even if they are no
longer usable due to missing data.
After running this command, you will probably want to run `git fsck` to
verify it fixed the repository. Note that fsck may still complain about
objects referenced by the reflog, or the stash, if they were unable to be
recovered. This command does not try to clean up either the reflog or the
stash.
It is also a good idea to run `git annex fsck --fast` after this command,
to make sure that the git-annex branch reflects reality.
# QUERY COMMANDS # QUERY COMMANDS
* `version` * `version`

View file

@ -6,10 +6,6 @@
-} -}
import System.Environment import System.Environment
import System.Log.Logger
import System.Log.Formatter
import System.Log.Handler (setFormatter)
import System.Log.Handler.Simple
import qualified Data.Set as S import qualified Data.Set as S
import Common import Common
@ -34,75 +30,12 @@ parseArgs = do
parse "--force" = True parse "--force" = True
parse _ = usage parse _ = usage
enableDebugOutput :: IO ()
enableDebugOutput = do
s <- setFormatter
<$> streamHandler stderr NOTICE
<*> pure (simpleLogFormatter "$msg")
updateGlobalLogger rootLoggerName (setLevel DEBUG . setHandlers [s])
main :: IO () main :: IO ()
main = do main = do
enableDebugOutput
forced <- parseArgs forced <- parseArgs
g <- Git.Config.read =<< Git.CurrentRepo.get g <- Git.Config.read =<< Git.CurrentRepo.get
putStrLn "Running git fsck ..." ifM (Git.RecoverRepository.runRecovery forced g)
fsckresult <- Git.Fsck.findBroken False g ( exitSuccess
missing <- Git.RecoverRepository.cleanCorruptObjects fsckresult g , exitFailure
stillmissing <- Git.RecoverRepository.retrieveMissingObjects missing g )
if S.null stillmissing
then putStr $ unlines
[ "Successfully recovered repository!"
, "You should run \"git fsck\" to make sure, but it looks like"
, "everything was recovered ok."
]
else do
putStrLn $ unwords
[ show (S.size stillmissing)
, "missing objects could not be recovered!"
]
if forced
then do
(remotebranches, goodcommits) <- Git.RecoverRepository.removeTrackingBranches stillmissing Git.RecoverRepository.emptyGoodCommits g
unless (null remotebranches) $
putStrLn $ unwords
[ "removed"
, show (length remotebranches)
, "remote tracking branches that referred to missing objects"
]
(resetbranches, deletedbranches, _) <- Git.RecoverRepository.resetLocalBranches stillmissing goodcommits g
printList (map show resetbranches)
"Reset these local branches to old versions before the missing objects were committed:"
printList (map show deletedbranches)
"Deleted these local branches, which could not be recovered due to missing objects:"
deindexedfiles <- Git.RecoverRepository.rewriteIndex stillmissing g
printList deindexedfiles
"Removed these missing files from the index. You should look at what files are present in your working tree and git add them back to the index when appropriate."
unless (Git.repoIsLocalBare g) $ do
mcurr <- Git.Branch.currentUnsafe g
case mcurr of
Nothing -> return ()
Just curr -> when (any (== curr) (resetbranches ++ deletedbranches)) $ do
putStrLn $ unwords
[ "You currently have"
, show curr
, "checked out. You may have staged changes in the index that can be committed to recover the lost state of this branch!"
]
else if Git.repoIsLocalBare g
then do
putStrLn "If you have a clone of this bare repository, you should add it as a remote of this repository, and re-run git-recover-repository."
putStrLn "If there are no clones of this repository, you can instead run git-recover-repository with the --force parameter to force recovery to a possibly usable state."
else putStrLn "To force a recovery to a usable state, run this command again with the --force parameter."
printList :: [String] -> String -> IO ()
printList items header
| null items = return ()
| otherwise = do
putStrLn header
putStr $ unlines $ map (\i -> "\t" ++ i) truncateditems
where
numitems = length items
truncateditems
| numitems > 10 = take 10 items ++ ["(and " ++ show (numitems - 10) ++ " more)"]
| otherwise = items