2013-10-20 21:50:51 +00:00
|
|
|
{- git fsck interface
|
|
|
|
-
|
2015-01-21 16:50:09 +00:00
|
|
|
- Copyright 2013 Joey Hess <id@joeyh.name>
|
2013-10-20 21:50:51 +00:00
|
|
|
-
|
2019-03-13 19:48:14 +00:00
|
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
2013-10-20 21:50:51 +00:00
|
|
|
-}
|
|
|
|
|
2019-07-05 19:09:37 +00:00
|
|
|
{-# LANGUAGE BangPatterns #-}
|
2016-10-31 18:00:37 +00:00
|
|
|
|
2013-10-20 21:50:51 +00:00
|
|
|
module Git.Fsck (
|
2013-11-30 18:29:11 +00:00
|
|
|
FsckResults(..),
|
2013-10-22 20:02:52 +00:00
|
|
|
MissingObjects,
|
2013-10-20 21:50:51 +00:00
|
|
|
findBroken,
|
2013-10-22 20:02:52 +00:00
|
|
|
foundBroken,
|
2013-10-21 19:28:06 +00:00
|
|
|
findMissing,
|
2013-12-10 19:40:01 +00:00
|
|
|
isMissing,
|
2013-11-30 18:29:11 +00:00
|
|
|
knownMissing,
|
2013-10-20 21:50:51 +00:00
|
|
|
) where
|
|
|
|
|
|
|
|
import Common
|
|
|
|
import Git
|
|
|
|
import Git.Command
|
|
|
|
import Git.Sha
|
2013-10-22 18:39:45 +00:00
|
|
|
import Utility.Batch
|
2013-10-20 21:50:51 +00:00
|
|
|
|
|
|
|
import qualified Data.Set as S
|
2014-03-12 17:54:29 +00:00
|
|
|
import Control.Concurrent.Async
|
Fix build with ghc 8.4+, which broke due to the Semigroup Monoid change
https://prime.haskell.org/wiki/Libraries/Proposals/SemigroupMonoid
I am not happy with the fragile pile of CPP boilerplate required to support
ghc back to 7.0, which git-annex still targets for both the android build
and the standalone build targeting old linux kernels. It makes me unlikely
to want to use Semigroup more in git-annex, because the benefit of the
abstraction is swamped by the ugliness. I actually considered ripping out
all the Semigroup instances, but some are needed to use
optparse-applicative.
The problem, I think, is they made this transaction on too fast a timeline.
(Although ironically, work on it started in 2015 or earlier!)
In particular, Debian oldstable is not out of security support, and it's
not possible to follow the simpler workarounds documented on the wiki and
have it build on oldstable (because the semigroups package in it is too
old).
I have only tested this build with ghc 8.2.2, not the newer and older
versions that branches of the CPP support. So there could be typoes, we'll
see.
This commit was sponsored by Brock Spratlen on Patreon.
2018-05-30 16:28:43 +00:00
|
|
|
import qualified Data.Semigroup as Sem
|
|
|
|
import Prelude
|
2013-10-20 21:50:51 +00:00
|
|
|
|
2014-03-12 19:18:43 +00:00
|
|
|
data FsckResults
|
|
|
|
= FsckFoundMissing
|
|
|
|
{ missingObjects :: MissingObjects
|
|
|
|
, missingObjectsTruncated :: Bool
|
|
|
|
}
|
|
|
|
| FsckFailed
|
2013-12-10 19:40:01 +00:00
|
|
|
deriving (Show)
|
2013-10-22 20:02:52 +00:00
|
|
|
|
2016-10-31 18:00:37 +00:00
|
|
|
data FsckOutput
|
|
|
|
= FsckOutput MissingObjects Truncated
|
|
|
|
| NoFsckOutput
|
|
|
|
| AllDuplicateEntriesWarning
|
|
|
|
|
|
|
|
type MissingObjects = S.Set Sha
|
|
|
|
|
|
|
|
type Truncated = Bool
|
|
|
|
|
Fix build with ghc 8.4+, which broke due to the Semigroup Monoid change
https://prime.haskell.org/wiki/Libraries/Proposals/SemigroupMonoid
I am not happy with the fragile pile of CPP boilerplate required to support
ghc back to 7.0, which git-annex still targets for both the android build
and the standalone build targeting old linux kernels. It makes me unlikely
to want to use Semigroup more in git-annex, because the benefit of the
abstraction is swamped by the ugliness. I actually considered ripping out
all the Semigroup instances, but some are needed to use
optparse-applicative.
The problem, I think, is they made this transaction on too fast a timeline.
(Although ironically, work on it started in 2015 or earlier!)
In particular, Debian oldstable is not out of security support, and it's
not possible to follow the simpler workarounds documented on the wiki and
have it build on oldstable (because the semigroups package in it is too
old).
I have only tested this build with ghc 8.2.2, not the newer and older
versions that branches of the CPP support. So there could be typoes, we'll
see.
This commit was sponsored by Brock Spratlen on Patreon.
2018-05-30 16:28:43 +00:00
|
|
|
appendFsckOutput :: FsckOutput -> FsckOutput -> FsckOutput
|
|
|
|
appendFsckOutput (FsckOutput s1 t1) (FsckOutput s2 t2) =
|
|
|
|
FsckOutput (S.union s1 s2) (t1 || t2)
|
|
|
|
appendFsckOutput (FsckOutput s t) _ = FsckOutput s t
|
|
|
|
appendFsckOutput _ (FsckOutput s t) = FsckOutput s t
|
|
|
|
appendFsckOutput NoFsckOutput NoFsckOutput = NoFsckOutput
|
|
|
|
appendFsckOutput AllDuplicateEntriesWarning AllDuplicateEntriesWarning = AllDuplicateEntriesWarning
|
|
|
|
appendFsckOutput AllDuplicateEntriesWarning NoFsckOutput = AllDuplicateEntriesWarning
|
|
|
|
appendFsckOutput NoFsckOutput AllDuplicateEntriesWarning = AllDuplicateEntriesWarning
|
|
|
|
|
|
|
|
instance Sem.Semigroup FsckOutput where
|
|
|
|
(<>) = appendFsckOutput
|
|
|
|
|
2016-10-31 18:00:37 +00:00
|
|
|
instance Monoid FsckOutput where
|
|
|
|
mempty = NoFsckOutput
|
|
|
|
|
2013-10-20 21:50:51 +00:00
|
|
|
{- Runs fsck to find some of the broken objects in the repository.
|
|
|
|
- May not find all broken objects, if fsck fails on bad data in some of
|
2013-10-22 20:02:52 +00:00
|
|
|
- the broken objects it does find.
|
2013-10-20 21:50:51 +00:00
|
|
|
-
|
|
|
|
- Strategy: Rather than parsing fsck's current specific output,
|
|
|
|
- look for anything in its output (both stdout and stderr) that appears
|
|
|
|
- to be a git sha. Not all such shas are of broken objects, so ask git
|
|
|
|
- to try to cat the object, and see if it fails.
|
|
|
|
-}
|
2013-10-22 20:02:52 +00:00
|
|
|
findBroken :: Bool -> Repo -> IO FsckResults
|
2013-10-22 18:39:45 +00:00
|
|
|
findBroken batchmode r = do
|
2019-09-11 20:10:25 +00:00
|
|
|
let (command, params) = ("git", fsckParams r)
|
2013-12-01 18:53:15 +00:00
|
|
|
(command', params') <- if batchmode
|
|
|
|
then toBatchCommand (command, params)
|
|
|
|
else return (command, params)
|
2014-03-10 19:12:54 +00:00
|
|
|
|
2020-06-04 16:13:26 +00:00
|
|
|
let p = (proc command' (toCommand params'))
|
|
|
|
{ std_out = CreatePipe
|
|
|
|
, std_err = CreatePipe
|
|
|
|
}
|
|
|
|
withCreateProcess p go
|
2014-03-12 19:18:43 +00:00
|
|
|
where
|
2020-06-04 16:13:26 +00:00
|
|
|
go _ (Just outh) (Just errh) pid = do
|
|
|
|
(o1, o2) <- concurrently
|
2020-11-19 20:21:17 +00:00
|
|
|
(parseFsckOutput maxobjs r outh pid)
|
|
|
|
(parseFsckOutput maxobjs r errh pid)
|
2020-06-04 16:13:26 +00:00
|
|
|
fsckok <- checkSuccessProcess pid
|
|
|
|
case mappend o1 o2 of
|
|
|
|
FsckOutput badobjs truncated
|
|
|
|
| S.null badobjs && not fsckok -> return FsckFailed
|
|
|
|
| otherwise -> return $ FsckFoundMissing badobjs truncated
|
|
|
|
NoFsckOutput
|
|
|
|
| not fsckok -> return FsckFailed
|
|
|
|
| otherwise -> return noproblem
|
|
|
|
-- If all fsck output was duplicateEntries warnings,
|
|
|
|
-- the repository is not broken, it just has some
|
|
|
|
-- unusual tree objects in it. So ignore nonzero
|
|
|
|
-- exit status.
|
|
|
|
AllDuplicateEntriesWarning -> return noproblem
|
|
|
|
go _ _ _ _ = error "internal"
|
|
|
|
|
2014-03-12 19:18:43 +00:00
|
|
|
maxobjs = 10000
|
2016-10-31 18:00:37 +00:00
|
|
|
noproblem = FsckFoundMissing S.empty False
|
2013-10-20 21:50:51 +00:00
|
|
|
|
2013-10-22 20:02:52 +00:00
|
|
|
foundBroken :: FsckResults -> Bool
|
2013-11-30 18:29:11 +00:00
|
|
|
foundBroken FsckFailed = True
|
2014-03-12 19:18:43 +00:00
|
|
|
foundBroken (FsckFoundMissing s _) = not (S.null s)
|
2013-11-30 18:29:11 +00:00
|
|
|
|
|
|
|
knownMissing :: FsckResults -> MissingObjects
|
|
|
|
knownMissing FsckFailed = S.empty
|
2014-03-12 19:18:43 +00:00
|
|
|
knownMissing (FsckFoundMissing s _) = s
|
2013-10-22 20:02:52 +00:00
|
|
|
|
2013-10-20 21:50:51 +00:00
|
|
|
{- Finds objects that are missing from the git repsitory, or are corrupt.
|
2013-10-22 18:39:45 +00:00
|
|
|
-
|
2013-11-21 04:43:30 +00:00
|
|
|
- This does not use git cat-file --batch, because catting a corrupt
|
2013-12-10 19:40:01 +00:00
|
|
|
- object can cause it to crash, or to report incorrect size information.
|
2013-10-22 18:39:45 +00:00
|
|
|
-}
|
2013-10-21 19:28:06 +00:00
|
|
|
findMissing :: [Sha] -> Repo -> IO MissingObjects
|
2013-12-10 19:40:01 +00:00
|
|
|
findMissing objs r = S.fromList <$> filterM (`isMissing` r) objs
|
|
|
|
|
2020-11-19 20:21:17 +00:00
|
|
|
parseFsckOutput :: Int -> Repo -> Handle -> ProcessHandle -> IO FsckOutput
|
|
|
|
parseFsckOutput maxobjs r h pid = do
|
|
|
|
ls <- getlines []
|
2016-10-31 18:00:37 +00:00
|
|
|
if null ls
|
|
|
|
then return NoFsckOutput
|
|
|
|
else if all ("duplicateEntries" `isInfixOf`) ls
|
|
|
|
then return AllDuplicateEntriesWarning
|
|
|
|
else do
|
2019-09-11 20:10:25 +00:00
|
|
|
let shas = findShas ls
|
2016-10-31 18:00:37 +00:00
|
|
|
let !truncated = length shas > maxobjs
|
|
|
|
missingobjs <- findMissing (take maxobjs shas) r
|
|
|
|
return $ FsckOutput missingobjs truncated
|
2020-11-19 20:21:17 +00:00
|
|
|
where
|
|
|
|
getlines c = hGetLineUntilExitOrEOF pid h >>= \case
|
|
|
|
Nothing -> return (reverse c)
|
|
|
|
Just l -> getlines (l:c)
|
2014-03-10 19:12:54 +00:00
|
|
|
|
2013-12-10 19:40:01 +00:00
|
|
|
isMissing :: Sha -> Repo -> IO Bool
|
|
|
|
isMissing s r = either (const True) (const False) <$> tryIO dump
|
2013-10-20 21:50:51 +00:00
|
|
|
where
|
2013-12-10 19:40:01 +00:00
|
|
|
dump = runQuiet
|
2013-11-22 00:07:44 +00:00
|
|
|
[ Param "show"
|
2014-02-19 05:09:17 +00:00
|
|
|
, Param (fromRef s)
|
2013-11-21 04:43:30 +00:00
|
|
|
] r
|
2013-10-20 21:50:51 +00:00
|
|
|
|
2019-09-11 20:10:25 +00:00
|
|
|
findShas :: [String] -> [Sha]
|
2020-04-06 21:14:49 +00:00
|
|
|
findShas = catMaybes . map (extractSha . encodeBS')
|
|
|
|
. concat . map words . filter wanted
|
2014-01-13 22:10:45 +00:00
|
|
|
where
|
2019-09-11 20:10:25 +00:00
|
|
|
wanted l = not ("dangling " `isPrefixOf` l)
|
|
|
|
|
|
|
|
fsckParams :: Repo -> [CommandParam]
|
|
|
|
fsckParams = gitCommandLine $ map Param
|
|
|
|
[ "fsck"
|
|
|
|
, "--no-dangling"
|
|
|
|
, "--no-reflogs"
|
2013-10-20 21:50:51 +00:00
|
|
|
]
|