fully fix fsck memory use by iterative fscking

Not very well tested, but I'm sure it doesn't eg, loop forever.
This commit is contained in:
Joey Hess 2014-03-12 15:18:43 -04:00
parent 475bf70af6
commit 67f09bca6d
4 changed files with 108 additions and 57 deletions

View file

@ -28,7 +28,12 @@ import Control.Concurrent.Async
type MissingObjects = S.Set Sha
data FsckResults = FsckFoundMissing MissingObjects | FsckFailed
data FsckResults
= FsckFoundMissing
{ missingObjects :: MissingObjects
, missingObjectsTruncated :: Bool
}
| FsckFailed
deriving (Show)
{- Runs fsck to find some of the broken objects in the repository.
@ -55,22 +60,25 @@ findBroken batchmode r = do
, std_err = CreatePipe
}
(bad1, bad2) <- concurrently
(readMissingObjs r supportsNoDangling (stdoutHandle p))
(readMissingObjs r supportsNoDangling (stderrHandle p))
(readMissingObjs maxobjs r supportsNoDangling (stdoutHandle p))
(readMissingObjs maxobjs r supportsNoDangling (stderrHandle p))
fsckok <- checkSuccessProcess pid
let truncated = S.size bad1 == maxobjs || S.size bad1 == maxobjs
let badobjs = S.union bad1 bad2
if S.null badobjs && not fsckok
then return FsckFailed
else return $ FsckFoundMissing badobjs
else return $ FsckFoundMissing badobjs truncated
where
maxobjs = 10000
foundBroken :: FsckResults -> Bool
foundBroken FsckFailed = True
foundBroken (FsckFoundMissing s) = not (S.null s)
foundBroken (FsckFoundMissing s _) = not (S.null s)
knownMissing :: FsckResults -> MissingObjects
knownMissing FsckFailed = S.empty
knownMissing (FsckFoundMissing s) = s
knownMissing (FsckFoundMissing s _) = s
{- Finds objects that are missing from the git repsitory, or are corrupt.
-
@ -80,9 +88,9 @@ knownMissing (FsckFoundMissing s) = s
findMissing :: [Sha] -> Repo -> IO MissingObjects
findMissing objs r = S.fromList <$> filterM (`isMissing` r) objs
readMissingObjs :: Repo -> Bool -> Handle -> IO MissingObjects
readMissingObjs r supportsNoDangling h = do
objs <- findShas supportsNoDangling <$> hGetContents h
readMissingObjs :: Int -> Repo -> Bool -> Handle -> IO MissingObjects
readMissingObjs maxobjs r supportsNoDangling h = do
objs <- take maxobjs . findShas supportsNoDangling <$> hGetContents h
findMissing objs r
isMissing :: Sha -> Repo -> IO Bool

View file

@ -1,7 +1,6 @@
{- git repository recovery
import qualified Data.Set as S
-
- Copyright 2013 Joey Hess <joey@kitenet.net>
- Copyright 2013-2014 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@ -115,7 +114,9 @@ retrieveMissingObjects missing referencerepo r
void $ copyObjects tmpr r
case stillmissing of
FsckFailed -> return $ FsckFailed
FsckFoundMissing s -> FsckFoundMissing <$> findMissing (S.toList s) r
FsckFoundMissing s t -> FsckFoundMissing
<$> findMissing (S.toList s) r
<*> pure t
, return stillmissing
)
pullremotes tmpr (rmt:rmts) fetchrefs ms
@ -128,9 +129,9 @@ retrieveMissingObjects missing referencerepo r
void $ copyObjects tmpr r
case ms of
FsckFailed -> pullremotes tmpr rmts fetchrefs ms
FsckFoundMissing s -> do
FsckFoundMissing s t -> do
stillmissing <- findMissing (S.toList s) r
pullremotes tmpr rmts fetchrefs (FsckFoundMissing stillmissing)
pullremotes tmpr rmts fetchrefs (FsckFoundMissing stillmissing t)
, pullremotes tmpr rmts fetchrefs ms
)
fetchfrom fetchurl ps = runBool $
@ -278,7 +279,7 @@ findUncorruptedCommit missing goodcommits branch r = do
then return (Just c, gcs')
else findfirst gcs' cs
{- Verifies tha none of the missing objects in the set are used by
{- Verifies that none of the missing objects in the set are used by
- the commit. Also adds to a set of commit shas that have been verified to
- be good, which can be passed into subsequent calls to avoid
- redundant work when eg, chasing down branches to find the first
@ -452,7 +453,7 @@ runRepair' removablebranch fsckresult forced referencerepo g = do
missing <- findBroken False g
stillmissing <- retrieveMissingObjects missing referencerepo g
case stillmissing of
FsckFoundMissing s
FsckFoundMissing s t
| S.null s -> if repoIsLocalBare g
then successfulfinish []
else ifM (checkIndex g)
@ -465,7 +466,7 @@ runRepair' removablebranch fsckresult forced referencerepo g = do
)
| otherwise -> if forced
then ifM (checkIndex g)
( continuerepairs s
( forcerepair s t
, corruptedindex
)
else do
@ -478,17 +479,16 @@ runRepair' removablebranch fsckresult forced referencerepo g = do
| forced -> ifM (pure (repoIsLocalBare g) <||> checkIndex g)
( do
cleanCorruptObjects FsckFailed g
missing' <- findBroken False g
case missing' of
stillmissing' <- findBroken False g
case stillmissing' of
FsckFailed -> return (False, [])
FsckFoundMissing stillmissing' ->
continuerepairs stillmissing'
FsckFoundMissing s t -> forcerepair s t
, corruptedindex
)
| otherwise -> unsuccessfulfinish
where
continuerepairs stillmissing = do
(removedbranches, goodcommits) <- removeBadBranches removablebranch stillmissing emptyGoodCommits g
repairbranches missing = do
(removedbranches, goodcommits) <- removeBadBranches removablebranch missing emptyGoodCommits g
let remotebranches = filter isTrackingBranch removedbranches
unless (null remotebranches) $
putStrLn $ unwords
@ -496,32 +496,43 @@ runRepair' removablebranch fsckresult forced referencerepo g = do
, show (length remotebranches)
, "remote tracking branches that referred to missing objects."
]
(resetbranches, deletedbranches, _) <- resetLocalBranches stillmissing goodcommits g
(resetbranches, deletedbranches, _) <- resetLocalBranches missing goodcommits g
displayList (map fromRef resetbranches)
"Reset these local branches to old versions before the missing objects were committed:"
displayList (map fromRef deletedbranches)
"Deleted these local branches, which could not be recovered due to missing objects:"
return (resetbranches ++ deletedbranches)
forcerepair missing fscktruncated = do
modifiedbranches <- repairbranches missing
deindexedfiles <- rewriteIndex g
displayList deindexedfiles
"Removed these missing files from the index. You should look at what files are present in your working tree and git add them back to the index when appropriate."
let modifiedbranches = resetbranches ++ deletedbranches
if null resetbranches && null deletedbranches
then successfulfinish modifiedbranches
else do
unless (repoIsLocalBare g) $ do
mcurr <- Branch.currentUnsafe g
case mcurr of
Nothing -> return ()
Just curr -> when (any (== curr) modifiedbranches) $ do
-- When the fsck results were truncated, try
-- fscking again, and as long as different
-- missing objects are found, continue
-- the repair process.
if fscktruncated
then do
fsckresult' <- findBroken False g
case fsckresult' of
FsckFailed -> do
putStrLn "git fsck is failing"
return (False, modifiedbranches)
FsckFoundMissing s _
| S.null s -> successfulfinish modifiedbranches
| S.null (s `S.difference` missing) -> do
putStrLn $ unwords
[ "You currently have"
, fromRef curr
, "checked out. You may have staged changes in the index that can be committed to recover the lost state of this branch!"
[ show (S.size s)
, "missing objects could not be recovered!"
]
putStrLn "Successfully recovered repository!"
putStrLn "Please carefully check that the changes mentioned above are ok.."
return (True, modifiedbranches)
return (False, modifiedbranches)
| otherwise -> do
(ok, modifiedbranches') <- runRepairOf fsckresult' removablebranch forced referencerepo g
return (ok, modifiedbranches++modifiedbranches')
else successfulfinish modifiedbranches
corruptedindex = do
nukeFile (indexFile g)
-- The corrupted index can prevent fsck from finding other
@ -531,12 +542,28 @@ runRepair' removablebranch fsckresult forced referencerepo g = do
putStrLn "Removed the corrupted index file. You should look at what files are present in your working tree and git add them back to the index when appropriate."
return result
successfulfinish modifiedbranches = do
mapM_ putStrLn
[ "Successfully recovered repository!"
, "You should run \"git fsck\" to make sure, but it looks like everything was recovered ok."
]
return (True, modifiedbranches)
successfulfinish modifiedbranches
| null modifiedbranches = do
mapM_ putStrLn
[ "Successfully recovered repository!"
, "You should run \"git fsck\" to make sure, but it looks like everything was recovered ok."
]
return (True, modifiedbranches)
| otherwise = do
unless (repoIsLocalBare g) $ do
mcurr <- Branch.currentUnsafe g
case mcurr of
Nothing -> return ()
Just curr -> when (any (== curr) modifiedbranches) $ do
putStrLn $ unwords
[ "You currently have"
, fromRef curr
, "checked out. You may have staged changes in the index that can be committed to recover the lost state of this branch!"
]
putStrLn "Successfully recovered repository!"
putStrLn "Please carefully check that the changes mentioned above are ok.."
return (True, modifiedbranches)
unsuccessfulfinish = do
if repoIsLocalBare g
then do

View file

@ -23,25 +23,31 @@ writeFsckResults u fsckresults = do
logfile <- fromRepo $ gitAnnexFsckResultsLog u
liftIO $
case fsckresults of
FsckFailed -> store S.empty logfile
FsckFoundMissing s
FsckFailed -> store S.empty False logfile
FsckFoundMissing s t
| S.null s -> nukeFile logfile
| otherwise -> store s logfile
| otherwise -> store s t logfile
where
store s logfile = do
store s t logfile = do
createDirectoryIfMissing True (parentDir logfile)
liftIO $ viaTmp writeFile logfile $ serialize s
serialize = unlines . map fromRef . S.toList
liftIO $ viaTmp writeFile logfile $ serialize s t
serialize s t =
let ls = map fromRef (S.toList s)
in if t
then unlines ("truncated":ls)
else unlines ls
readFsckResults :: UUID -> Annex FsckResults
readFsckResults u = do
logfile <- fromRepo $ gitAnnexFsckResultsLog u
liftIO $ catchDefaultIO (FsckFoundMissing S.empty) $
deserialize <$> readFile logfile
liftIO $ catchDefaultIO (FsckFoundMissing S.empty False) $
deserialize . lines <$> readFile logfile
where
deserialize l =
let s = S.fromList $ map Ref $ lines l
in if S.null s then FsckFailed else FsckFoundMissing s
deserialize ("truncated":ls) = deserialize' ls True
deserialize ls = deserialize' ls False
deserialize' ls t =
let s = S.fromList $ map Ref ls
in if S.null s then FsckFailed else FsckFoundMissing s t
clearFsckResults :: UUID -> Annex ()
clearFsckResults = liftIO . nukeFile <=< fromRepo . gitAnnexFsckResultsLog

View file

@ -18,3 +18,13 @@ So I tried to follow your advice here and increase the stack:
git-annex: Most RTS options are disabled. Link with -rtsopts to enable them.
I wasn't sure what to do next, so any help would be appreciated.
> Now only 20k problem shas max (more likely 10k) are collected from fsck,
> so it won't use much memory (60 mb or so). If it had to truncate
> shas from fsck, it will re-run fsck after the repair process,
> which should either find no problems left (common when eg when all missing shas
> were able to be fetched from remotes), or find a new set of problem
> shas, which it can feed back through the repair process.
>
> If the repository is very large, this means more work, but it shouldn't
> run out of memory now. [[fixed|done]] --[[Joey]]