Assistant, repair: Fix ignoring of git fsck errors due to duplicate file entries in tree objects.
This commit is contained in:
parent
632e9c93f0
commit
2ad7b00e29
3 changed files with 91 additions and 19 deletions
|
@ -1,3 +1,10 @@
|
||||||
|
git-annex (6.20161028) UNRELEASED; urgency=medium
|
||||||
|
|
||||||
|
* Assistant, repair: Fix ignoring of git fsck errors due to
|
||||||
|
duplicate file entries in tree objects.
|
||||||
|
|
||||||
|
-- Joey Hess <id@joeyh.name> Mon, 31 Oct 2016 13:59:47 -0400
|
||||||
|
|
||||||
git-annex (6.20161027) unstable; urgency=medium
|
git-annex (6.20161027) unstable; urgency=medium
|
||||||
|
|
||||||
* lock, smudge: Fix edge cases where data loss could occur in v6 mode
|
* lock, smudge: Fix edge cases where data loss could occur in v6 mode
|
||||||
|
|
69
Git/Fsck.hs
69
Git/Fsck.hs
|
@ -5,6 +5,8 @@
|
||||||
- Licensed under the GNU GPL version 3 or higher.
|
- Licensed under the GNU GPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
|
||||||
|
{-# LANGUAGE BangPatterns #-}
|
||||||
|
|
||||||
module Git.Fsck (
|
module Git.Fsck (
|
||||||
FsckResults(..),
|
FsckResults(..),
|
||||||
MissingObjects,
|
MissingObjects,
|
||||||
|
@ -25,8 +27,6 @@ import qualified Git.Version
|
||||||
import qualified Data.Set as S
|
import qualified Data.Set as S
|
||||||
import Control.Concurrent.Async
|
import Control.Concurrent.Async
|
||||||
|
|
||||||
type MissingObjects = S.Set Sha
|
|
||||||
|
|
||||||
data FsckResults
|
data FsckResults
|
||||||
= FsckFoundMissing
|
= FsckFoundMissing
|
||||||
{ missingObjects :: MissingObjects
|
{ missingObjects :: MissingObjects
|
||||||
|
@ -35,6 +35,25 @@ data FsckResults
|
||||||
| FsckFailed
|
| FsckFailed
|
||||||
deriving (Show)
|
deriving (Show)
|
||||||
|
|
||||||
|
data FsckOutput
|
||||||
|
= FsckOutput MissingObjects Truncated
|
||||||
|
| NoFsckOutput
|
||||||
|
| AllDuplicateEntriesWarning
|
||||||
|
|
||||||
|
type MissingObjects = S.Set Sha
|
||||||
|
|
||||||
|
type Truncated = Bool
|
||||||
|
|
||||||
|
instance Monoid FsckOutput where
|
||||||
|
mempty = NoFsckOutput
|
||||||
|
mappend (FsckOutput s1 t1) (FsckOutput s2 t2) = FsckOutput (S.union s1 s2) (t1 || t2)
|
||||||
|
mappend (FsckOutput s t) _ = FsckOutput s t
|
||||||
|
mappend _ (FsckOutput s t) = FsckOutput s t
|
||||||
|
mappend NoFsckOutput NoFsckOutput = NoFsckOutput
|
||||||
|
mappend AllDuplicateEntriesWarning AllDuplicateEntriesWarning = AllDuplicateEntriesWarning
|
||||||
|
mappend AllDuplicateEntriesWarning NoFsckOutput = AllDuplicateEntriesWarning
|
||||||
|
mappend NoFsckOutput AllDuplicateEntriesWarning = AllDuplicateEntriesWarning
|
||||||
|
|
||||||
{- Runs fsck to find some of the broken objects in the repository.
|
{- Runs fsck to find some of the broken objects in the repository.
|
||||||
- May not find all broken objects, if fsck fails on bad data in some of
|
- May not find all broken objects, if fsck fails on bad data in some of
|
||||||
- the broken objects it does find.
|
- the broken objects it does find.
|
||||||
|
@ -58,18 +77,24 @@ findBroken batchmode r = do
|
||||||
{ std_out = CreatePipe
|
{ std_out = CreatePipe
|
||||||
, std_err = CreatePipe
|
, std_err = CreatePipe
|
||||||
}
|
}
|
||||||
(bad1, bad2) <- concurrently
|
(o1, o2) <- concurrently
|
||||||
(readMissingObjs maxobjs r supportsNoDangling (stdoutHandle p))
|
(parseFsckOutput maxobjs r supportsNoDangling (stdoutHandle p))
|
||||||
(readMissingObjs maxobjs r supportsNoDangling (stderrHandle p))
|
(parseFsckOutput maxobjs r supportsNoDangling (stderrHandle p))
|
||||||
fsckok <- checkSuccessProcess pid
|
fsckok <- checkSuccessProcess pid
|
||||||
let truncated = S.size bad1 == maxobjs || S.size bad1 == maxobjs
|
case mappend o1 o2 of
|
||||||
let badobjs = S.union bad1 bad2
|
FsckOutput badobjs truncated
|
||||||
|
| S.null badobjs && not fsckok -> return FsckFailed
|
||||||
if S.null badobjs && not fsckok
|
| otherwise -> return $ FsckFoundMissing badobjs truncated
|
||||||
then return FsckFailed
|
NoFsckOutput
|
||||||
else return $ FsckFoundMissing badobjs truncated
|
| not fsckok -> return FsckFailed
|
||||||
|
| otherwise -> return noproblem
|
||||||
|
-- If all fsck output was duplicateEntries warnings,
|
||||||
|
-- the repository is not broken, it just has some unusual
|
||||||
|
-- tree objects in it. So ignore nonzero exit status.
|
||||||
|
AllDuplicateEntriesWarning -> return noproblem
|
||||||
where
|
where
|
||||||
maxobjs = 10000
|
maxobjs = 10000
|
||||||
|
noproblem = FsckFoundMissing S.empty False
|
||||||
|
|
||||||
foundBroken :: FsckResults -> Bool
|
foundBroken :: FsckResults -> Bool
|
||||||
foundBroken FsckFailed = True
|
foundBroken FsckFailed = True
|
||||||
|
@ -87,10 +112,18 @@ knownMissing (FsckFoundMissing s _) = s
|
||||||
findMissing :: [Sha] -> Repo -> IO MissingObjects
|
findMissing :: [Sha] -> Repo -> IO MissingObjects
|
||||||
findMissing objs r = S.fromList <$> filterM (`isMissing` r) objs
|
findMissing objs r = S.fromList <$> filterM (`isMissing` r) objs
|
||||||
|
|
||||||
readMissingObjs :: Int -> Repo -> Bool -> Handle -> IO MissingObjects
|
parseFsckOutput :: Int -> Repo -> Bool -> Handle -> IO FsckOutput
|
||||||
readMissingObjs maxobjs r supportsNoDangling h = do
|
parseFsckOutput maxobjs r supportsNoDangling h = do
|
||||||
objs <- take maxobjs . findShas supportsNoDangling <$> hGetContents h
|
ls <- lines <$> hGetContents h
|
||||||
findMissing objs r
|
if null ls
|
||||||
|
then return NoFsckOutput
|
||||||
|
else if all ("duplicateEntries" `isInfixOf`) ls
|
||||||
|
then return AllDuplicateEntriesWarning
|
||||||
|
else do
|
||||||
|
let shas = findShas supportsNoDangling ls
|
||||||
|
let !truncated = length shas > maxobjs
|
||||||
|
missingobjs <- findMissing (take maxobjs shas) r
|
||||||
|
return $ FsckOutput missingobjs truncated
|
||||||
|
|
||||||
isMissing :: Sha -> Repo -> IO Bool
|
isMissing :: Sha -> Repo -> IO Bool
|
||||||
isMissing s r = either (const True) (const False) <$> tryIO dump
|
isMissing s r = either (const True) (const False) <$> tryIO dump
|
||||||
|
@ -100,12 +133,10 @@ isMissing s r = either (const True) (const False) <$> tryIO dump
|
||||||
, Param (fromRef s)
|
, Param (fromRef s)
|
||||||
] r
|
] r
|
||||||
|
|
||||||
findShas :: Bool -> String -> [Sha]
|
findShas :: Bool -> [String] -> [Sha]
|
||||||
findShas supportsNoDangling = catMaybes . map extractSha . concat . map words . filter wanted . lines
|
findShas supportsNoDangling = catMaybes . map extractSha . concat . map words . filter wanted
|
||||||
where
|
where
|
||||||
wanted l
|
wanted l
|
||||||
-- Skip lines like "error in tree <sha>: duplicateEntries: contains duplicate file entries"
|
|
||||||
| "duplicateEntries" `isInfixOf` l = False
|
|
||||||
| supportsNoDangling = True
|
| supportsNoDangling = True
|
||||||
| otherwise = not ("dangling " `isPrefixOf` l)
|
| otherwise = not ("dangling " `isPrefixOf` l)
|
||||||
|
|
||||||
|
|
|
@ -0,0 +1,34 @@
|
||||||
|
[[!comment format=mdwn
|
||||||
|
username="joey"
|
||||||
|
subject="""comment 1"""
|
||||||
|
date="2016-10-31T17:07:43Z"
|
||||||
|
content="""
|
||||||
|
This was supposed to be dealt with in version 6.20161027. But, I see now
|
||||||
|
that the parser I thought was triggering on those missing object lines,
|
||||||
|
was not really (`extractSha` does not match in this case because of the
|
||||||
|
colon after the sha).
|
||||||
|
|
||||||
|
Instead, the problem seems to be that `git fsck` is exiting nonzero. So it
|
||||||
|
assumes that fsck is failing without printing out any shas, which is a
|
||||||
|
condition that calls for repairs.
|
||||||
|
|
||||||
|
About all I can think to do is, if fsck outputs "duplicateEntries" and
|
||||||
|
no other lines at all, and exits nonzero, treat this as a success.
|
||||||
|
This risks ignoring other reasons fsck might exit nonzero, but hopefully
|
||||||
|
it would output something else in such a case. I've implemented this.
|
||||||
|
|
||||||
|
---
|
||||||
|
|
||||||
|
I am interested in getting at the root cause of the problem of
|
||||||
|
duplicate directory entries. It seems pretty likely to result from using
|
||||||
|
adjusted branches.
|
||||||
|
|
||||||
|
It would be useful to get more information about the trees that fsck
|
||||||
|
is warning about; are they part of existing or past adjusted branches
|
||||||
|
or not? Are they merge commits?
|
||||||
|
|
||||||
|
(Previously: [[forum/how_to_disaster_recovery]])
|
||||||
|
|
||||||
|
Leaving this bug open since we really need to get at the root cause of
|
||||||
|
the problem.
|
||||||
|
"""]]
|
Loading…
Add table
Add a link
Reference in a new issue