diff --git a/Annex/VectorClock.hs b/Annex/VectorClock.hs index 951145ca14..5e9e2cb89b 100644 --- a/Annex/VectorClock.hs +++ b/Annex/VectorClock.hs @@ -17,6 +17,7 @@ import Prelude import Utility.Env import Utility.TimeStamp import Utility.QuickCheck +import qualified Data.Attoparsec.ByteString.Lazy as A -- | Some very old logs did not have any time stamp at all; -- Unknown is used for those. @@ -44,3 +45,6 @@ formatVectorClock (VectorClock t) = show t parseVectorClock :: String -> Maybe VectorClock parseVectorClock t = VectorClock <$> parsePOSIXTime t + +vectorClockParser :: A.Parser VectorClock +vectorClockParser = VectorClock <$> parserPOSIXTime diff --git a/Logs/Line.hs b/Logs/Line.hs index a7e17190e2..716ee8be87 100644 --- a/Logs/Line.hs +++ b/Logs/Line.hs @@ -36,6 +36,12 @@ DAMAGE. module Logs.Line where +import Common + +import qualified Data.Attoparsec.ByteString.Lazy as A +import Data.Attoparsec.ByteString.Char8 (isEndOfLine) +import qualified Data.DList as D + -- This is the same as Data.List.lines, with \r added. -- This works around some versions of git-annex which wrote \r -- into git-annex branch files on Windows. Those \r's sometimes @@ -49,3 +55,27 @@ splitLines s = cons (case break (\c -> c == '\n' || c == '\r') s of _:s'' -> splitLines s'')) where cons ~(h, t) = h : t + +{- Applies a parser to each line of a log file. + - + - If the parser fails to parse a line, that line is skipped, instead of + - the overall parse failing. This is generally a good idea in case a newer + - version of git-annex somehow changed the format of the log file. + - + - Any combination of \r and \n are taken to be the end of the line. + - (Some versions of git-annex on Windows wrote \r into git-annex branch + - files, and multiple \r's sometimes accumulated.) + - + - The parser does not itself need to avoid parsing beyond the end of line; + - this is implemented only pass the content of a line to the parser. + -} +parseLogLines :: A.Parser a -> A.Parser [a] +parseLogLines parser = go D.empty + where + go dl = do + line <- A.takeTill isEndOfLine + A.skipWhile isEndOfLine + let dl' = case A.parseOnly parser line of + Left _ -> dl + Right v -> D.snoc dl v + (A.endOfInput *> return (D.toList dl')) <|> go dl' diff --git a/Logs/Presence/Pure.hs b/Logs/Presence/Pure.hs index 4411fa0176..776ae42018 100644 --- a/Logs/Presence/Pure.hs +++ b/Logs/Presence/Pure.hs @@ -15,6 +15,8 @@ import Utility.QuickCheck import qualified Data.Map as M import qualified Data.ByteString.Lazy as L import qualified Data.ByteString as S +import qualified Data.Attoparsec.ByteString.Lazy as A +import Data.Attoparsec.ByteString.Char8 (char, anyChar) import Data.ByteString.Builder newtype LogInfo = LogInfo { fromLogInfo :: S.ByteString } @@ -32,17 +34,25 @@ instance Show LogLine where data LogStatus = InfoPresent | InfoMissing | InfoDead deriving (Eq, Show, Bounded, Enum) -{- Parses a log file. Unparseable lines are ignored. -} parseLog :: L.ByteString -> [LogLine] -parseLog = mapMaybe parseline . splitLines . decodeBL - where - parseline l = LogLine - <$> parseVectorClock c - <*> parseStatus s - <*> pure (LogInfo (encodeBS rest)) - where - (c, pastc) = separate (== ' ') l - (s, rest) = separate (== ' ') pastc +parseLog = fromMaybe [] . A.maybeResult . A.parse (logParser <* A.endOfInput) + +logParser :: A.Parser [LogLine] +logParser = parseLogLines $ LogLine + <$> vectorClockParser + <* char ' ' + <*> statusParser + <* char ' ' + <*> (LogInfo <$> A.takeByteString) + +statusParser :: A.Parser LogStatus +statusParser = do + c <- anyChar + case c of + '1' -> return InfoPresent + '0' -> return InfoMissing + 'X' -> return InfoDead + _ -> fail "unknown status character" parseStatus :: String -> Maybe LogStatus parseStatus "1" = Just InfoPresent