fully fix fsck memory use by iterative fscking

Not very well tested, but I'm sure it doesn't eg, loop forever.
This commit is contained in:
Joey Hess 2014-03-12 15:18:43 -04:00
parent 475bf70af6
commit 67f09bca6d
4 changed files with 108 additions and 57 deletions

View file

@ -28,7 +28,12 @@ import Control.Concurrent.Async
type MissingObjects = S.Set Sha type MissingObjects = S.Set Sha
data FsckResults = FsckFoundMissing MissingObjects | FsckFailed data FsckResults
= FsckFoundMissing
{ missingObjects :: MissingObjects
, missingObjectsTruncated :: Bool
}
| FsckFailed
deriving (Show) deriving (Show)
{- Runs fsck to find some of the broken objects in the repository. {- Runs fsck to find some of the broken objects in the repository.
@ -55,22 +60,25 @@ findBroken batchmode r = do
, std_err = CreatePipe , std_err = CreatePipe
} }
(bad1, bad2) <- concurrently (bad1, bad2) <- concurrently
(readMissingObjs r supportsNoDangling (stdoutHandle p)) (readMissingObjs maxobjs r supportsNoDangling (stdoutHandle p))
(readMissingObjs r supportsNoDangling (stderrHandle p)) (readMissingObjs maxobjs r supportsNoDangling (stderrHandle p))
fsckok <- checkSuccessProcess pid fsckok <- checkSuccessProcess pid
let truncated = S.size bad1 == maxobjs || S.size bad1 == maxobjs
let badobjs = S.union bad1 bad2 let badobjs = S.union bad1 bad2
if S.null badobjs && not fsckok if S.null badobjs && not fsckok
then return FsckFailed then return FsckFailed
else return $ FsckFoundMissing badobjs else return $ FsckFoundMissing badobjs truncated
where
maxobjs = 10000
foundBroken :: FsckResults -> Bool foundBroken :: FsckResults -> Bool
foundBroken FsckFailed = True foundBroken FsckFailed = True
foundBroken (FsckFoundMissing s) = not (S.null s) foundBroken (FsckFoundMissing s _) = not (S.null s)
knownMissing :: FsckResults -> MissingObjects knownMissing :: FsckResults -> MissingObjects
knownMissing FsckFailed = S.empty knownMissing FsckFailed = S.empty
knownMissing (FsckFoundMissing s) = s knownMissing (FsckFoundMissing s _) = s
{- Finds objects that are missing from the git repsitory, or are corrupt. {- Finds objects that are missing from the git repsitory, or are corrupt.
- -
@ -80,9 +88,9 @@ knownMissing (FsckFoundMissing s) = s
findMissing :: [Sha] -> Repo -> IO MissingObjects findMissing :: [Sha] -> Repo -> IO MissingObjects
findMissing objs r = S.fromList <$> filterM (`isMissing` r) objs findMissing objs r = S.fromList <$> filterM (`isMissing` r) objs
readMissingObjs :: Repo -> Bool -> Handle -> IO MissingObjects readMissingObjs :: Int -> Repo -> Bool -> Handle -> IO MissingObjects
readMissingObjs r supportsNoDangling h = do readMissingObjs maxobjs r supportsNoDangling h = do
objs <- findShas supportsNoDangling <$> hGetContents h objs <- take maxobjs . findShas supportsNoDangling <$> hGetContents h
findMissing objs r findMissing objs r
isMissing :: Sha -> Repo -> IO Bool isMissing :: Sha -> Repo -> IO Bool

View file

@ -1,7 +1,6 @@
{- git repository recovery {- git repository recovery
import qualified Data.Set as S
- -
- Copyright 2013 Joey Hess <joey@kitenet.net> - Copyright 2013-2014 Joey Hess <joey@kitenet.net>
- -
- Licensed under the GNU GPL version 3 or higher. - Licensed under the GNU GPL version 3 or higher.
-} -}
@ -115,7 +114,9 @@ retrieveMissingObjects missing referencerepo r
void $ copyObjects tmpr r void $ copyObjects tmpr r
case stillmissing of case stillmissing of
FsckFailed -> return $ FsckFailed FsckFailed -> return $ FsckFailed
FsckFoundMissing s -> FsckFoundMissing <$> findMissing (S.toList s) r FsckFoundMissing s t -> FsckFoundMissing
<$> findMissing (S.toList s) r
<*> pure t
, return stillmissing , return stillmissing
) )
pullremotes tmpr (rmt:rmts) fetchrefs ms pullremotes tmpr (rmt:rmts) fetchrefs ms
@ -128,9 +129,9 @@ retrieveMissingObjects missing referencerepo r
void $ copyObjects tmpr r void $ copyObjects tmpr r
case ms of case ms of
FsckFailed -> pullremotes tmpr rmts fetchrefs ms FsckFailed -> pullremotes tmpr rmts fetchrefs ms
FsckFoundMissing s -> do FsckFoundMissing s t -> do
stillmissing <- findMissing (S.toList s) r stillmissing <- findMissing (S.toList s) r
pullremotes tmpr rmts fetchrefs (FsckFoundMissing stillmissing) pullremotes tmpr rmts fetchrefs (FsckFoundMissing stillmissing t)
, pullremotes tmpr rmts fetchrefs ms , pullremotes tmpr rmts fetchrefs ms
) )
fetchfrom fetchurl ps = runBool $ fetchfrom fetchurl ps = runBool $
@ -278,7 +279,7 @@ findUncorruptedCommit missing goodcommits branch r = do
then return (Just c, gcs') then return (Just c, gcs')
else findfirst gcs' cs else findfirst gcs' cs
{- Verifies tha none of the missing objects in the set are used by {- Verifies that none of the missing objects in the set are used by
- the commit. Also adds to a set of commit shas that have been verified to - the commit. Also adds to a set of commit shas that have been verified to
- be good, which can be passed into subsequent calls to avoid - be good, which can be passed into subsequent calls to avoid
- redundant work when eg, chasing down branches to find the first - redundant work when eg, chasing down branches to find the first
@ -452,7 +453,7 @@ runRepair' removablebranch fsckresult forced referencerepo g = do
missing <- findBroken False g missing <- findBroken False g
stillmissing <- retrieveMissingObjects missing referencerepo g stillmissing <- retrieveMissingObjects missing referencerepo g
case stillmissing of case stillmissing of
FsckFoundMissing s FsckFoundMissing s t
| S.null s -> if repoIsLocalBare g | S.null s -> if repoIsLocalBare g
then successfulfinish [] then successfulfinish []
else ifM (checkIndex g) else ifM (checkIndex g)
@ -465,7 +466,7 @@ runRepair' removablebranch fsckresult forced referencerepo g = do
) )
| otherwise -> if forced | otherwise -> if forced
then ifM (checkIndex g) then ifM (checkIndex g)
( continuerepairs s ( forcerepair s t
, corruptedindex , corruptedindex
) )
else do else do
@ -478,17 +479,16 @@ runRepair' removablebranch fsckresult forced referencerepo g = do
| forced -> ifM (pure (repoIsLocalBare g) <||> checkIndex g) | forced -> ifM (pure (repoIsLocalBare g) <||> checkIndex g)
( do ( do
cleanCorruptObjects FsckFailed g cleanCorruptObjects FsckFailed g
missing' <- findBroken False g stillmissing' <- findBroken False g
case missing' of case stillmissing' of
FsckFailed -> return (False, []) FsckFailed -> return (False, [])
FsckFoundMissing stillmissing' -> FsckFoundMissing s t -> forcerepair s t
continuerepairs stillmissing'
, corruptedindex , corruptedindex
) )
| otherwise -> unsuccessfulfinish | otherwise -> unsuccessfulfinish
where where
continuerepairs stillmissing = do repairbranches missing = do
(removedbranches, goodcommits) <- removeBadBranches removablebranch stillmissing emptyGoodCommits g (removedbranches, goodcommits) <- removeBadBranches removablebranch missing emptyGoodCommits g
let remotebranches = filter isTrackingBranch removedbranches let remotebranches = filter isTrackingBranch removedbranches
unless (null remotebranches) $ unless (null remotebranches) $
putStrLn $ unwords putStrLn $ unwords
@ -496,32 +496,43 @@ runRepair' removablebranch fsckresult forced referencerepo g = do
, show (length remotebranches) , show (length remotebranches)
, "remote tracking branches that referred to missing objects." , "remote tracking branches that referred to missing objects."
] ]
(resetbranches, deletedbranches, _) <- resetLocalBranches stillmissing goodcommits g (resetbranches, deletedbranches, _) <- resetLocalBranches missing goodcommits g
displayList (map fromRef resetbranches) displayList (map fromRef resetbranches)
"Reset these local branches to old versions before the missing objects were committed:" "Reset these local branches to old versions before the missing objects were committed:"
displayList (map fromRef deletedbranches) displayList (map fromRef deletedbranches)
"Deleted these local branches, which could not be recovered due to missing objects:" "Deleted these local branches, which could not be recovered due to missing objects:"
return (resetbranches ++ deletedbranches)
forcerepair missing fscktruncated = do
modifiedbranches <- repairbranches missing
deindexedfiles <- rewriteIndex g deindexedfiles <- rewriteIndex g
displayList deindexedfiles displayList deindexedfiles
"Removed these missing files from the index. You should look at what files are present in your working tree and git add them back to the index when appropriate." "Removed these missing files from the index. You should look at what files are present in your working tree and git add them back to the index when appropriate."
let modifiedbranches = resetbranches ++ deletedbranches
if null resetbranches && null deletedbranches -- When the fsck results were truncated, try
then successfulfinish modifiedbranches -- fscking again, and as long as different
else do -- missing objects are found, continue
unless (repoIsLocalBare g) $ do -- the repair process.
mcurr <- Branch.currentUnsafe g if fscktruncated
case mcurr of then do
Nothing -> return () fsckresult' <- findBroken False g
Just curr -> when (any (== curr) modifiedbranches) $ do case fsckresult' of
FsckFailed -> do
putStrLn "git fsck is failing"
return (False, modifiedbranches)
FsckFoundMissing s _
| S.null s -> successfulfinish modifiedbranches
| S.null (s `S.difference` missing) -> do
putStrLn $ unwords putStrLn $ unwords
[ "You currently have" [ show (S.size s)
, fromRef curr , "missing objects could not be recovered!"
, "checked out. You may have staged changes in the index that can be committed to recover the lost state of this branch!"
] ]
putStrLn "Successfully recovered repository!" return (False, modifiedbranches)
putStrLn "Please carefully check that the changes mentioned above are ok.." | otherwise -> do
return (True, modifiedbranches) (ok, modifiedbranches') <- runRepairOf fsckresult' removablebranch forced referencerepo g
return (ok, modifiedbranches++modifiedbranches')
else successfulfinish modifiedbranches
corruptedindex = do corruptedindex = do
nukeFile (indexFile g) nukeFile (indexFile g)
-- The corrupted index can prevent fsck from finding other -- The corrupted index can prevent fsck from finding other
@ -531,12 +542,28 @@ runRepair' removablebranch fsckresult forced referencerepo g = do
putStrLn "Removed the corrupted index file. You should look at what files are present in your working tree and git add them back to the index when appropriate." putStrLn "Removed the corrupted index file. You should look at what files are present in your working tree and git add them back to the index when appropriate."
return result return result
successfulfinish modifiedbranches = do successfulfinish modifiedbranches
mapM_ putStrLn | null modifiedbranches = do
[ "Successfully recovered repository!" mapM_ putStrLn
, "You should run \"git fsck\" to make sure, but it looks like everything was recovered ok." [ "Successfully recovered repository!"
] , "You should run \"git fsck\" to make sure, but it looks like everything was recovered ok."
return (True, modifiedbranches) ]
return (True, modifiedbranches)
| otherwise = do
unless (repoIsLocalBare g) $ do
mcurr <- Branch.currentUnsafe g
case mcurr of
Nothing -> return ()
Just curr -> when (any (== curr) modifiedbranches) $ do
putStrLn $ unwords
[ "You currently have"
, fromRef curr
, "checked out. You may have staged changes in the index that can be committed to recover the lost state of this branch!"
]
putStrLn "Successfully recovered repository!"
putStrLn "Please carefully check that the changes mentioned above are ok.."
return (True, modifiedbranches)
unsuccessfulfinish = do unsuccessfulfinish = do
if repoIsLocalBare g if repoIsLocalBare g
then do then do

View file

@ -23,25 +23,31 @@ writeFsckResults u fsckresults = do
logfile <- fromRepo $ gitAnnexFsckResultsLog u logfile <- fromRepo $ gitAnnexFsckResultsLog u
liftIO $ liftIO $
case fsckresults of case fsckresults of
FsckFailed -> store S.empty logfile FsckFailed -> store S.empty False logfile
FsckFoundMissing s FsckFoundMissing s t
| S.null s -> nukeFile logfile | S.null s -> nukeFile logfile
| otherwise -> store s logfile | otherwise -> store s t logfile
where where
store s logfile = do store s t logfile = do
createDirectoryIfMissing True (parentDir logfile) createDirectoryIfMissing True (parentDir logfile)
liftIO $ viaTmp writeFile logfile $ serialize s liftIO $ viaTmp writeFile logfile $ serialize s t
serialize = unlines . map fromRef . S.toList serialize s t =
let ls = map fromRef (S.toList s)
in if t
then unlines ("truncated":ls)
else unlines ls
readFsckResults :: UUID -> Annex FsckResults readFsckResults :: UUID -> Annex FsckResults
readFsckResults u = do readFsckResults u = do
logfile <- fromRepo $ gitAnnexFsckResultsLog u logfile <- fromRepo $ gitAnnexFsckResultsLog u
liftIO $ catchDefaultIO (FsckFoundMissing S.empty) $ liftIO $ catchDefaultIO (FsckFoundMissing S.empty False) $
deserialize <$> readFile logfile deserialize . lines <$> readFile logfile
where where
deserialize l = deserialize ("truncated":ls) = deserialize' ls True
let s = S.fromList $ map Ref $ lines l deserialize ls = deserialize' ls False
in if S.null s then FsckFailed else FsckFoundMissing s deserialize' ls t =
let s = S.fromList $ map Ref ls
in if S.null s then FsckFailed else FsckFoundMissing s t
clearFsckResults :: UUID -> Annex () clearFsckResults :: UUID -> Annex ()
clearFsckResults = liftIO . nukeFile <=< fromRepo . gitAnnexFsckResultsLog clearFsckResults = liftIO . nukeFile <=< fromRepo . gitAnnexFsckResultsLog

View file

@ -18,3 +18,13 @@ So I tried to follow your advice here and increase the stack:
git-annex: Most RTS options are disabled. Link with -rtsopts to enable them. git-annex: Most RTS options are disabled. Link with -rtsopts to enable them.
I wasn't sure what to do next, so any help would be appreciated. I wasn't sure what to do next, so any help would be appreciated.
> Now only 20k problem shas max (more likely 10k) are collected from fsck,
> so it won't use much memory (60 mb or so). If it had to truncate
> shas from fsck, it will re-run fsck after the repair process,
> which should either find no problems left (common when eg when all missing shas
> were able to be fetched from remotes), or find a new set of problem
> shas, which it can feed back through the repair process.
>
> If the repository is very large, this means more work, but it shouldn't
> run out of memory now. [[fixed|done]] --[[Joey]]