attoparsec parser for presence logs
This commit is contained in:
parent
bfc9039ead
commit
ef8ddaa713
3 changed files with 54 additions and 10 deletions
|
@ -17,6 +17,7 @@ import Prelude
|
|||
import Utility.Env
|
||||
import Utility.TimeStamp
|
||||
import Utility.QuickCheck
|
||||
import qualified Data.Attoparsec.ByteString.Lazy as A
|
||||
|
||||
-- | Some very old logs did not have any time stamp at all;
|
||||
-- Unknown is used for those.
|
||||
|
@ -44,3 +45,6 @@ formatVectorClock (VectorClock t) = show t
|
|||
|
||||
parseVectorClock :: String -> Maybe VectorClock
|
||||
parseVectorClock t = VectorClock <$> parsePOSIXTime t
|
||||
|
||||
vectorClockParser :: A.Parser VectorClock
|
||||
vectorClockParser = VectorClock <$> parserPOSIXTime
|
||||
|
|
30
Logs/Line.hs
30
Logs/Line.hs
|
@ -36,6 +36,12 @@ DAMAGE.
|
|||
|
||||
module Logs.Line where
|
||||
|
||||
import Common
|
||||
|
||||
import qualified Data.Attoparsec.ByteString.Lazy as A
|
||||
import Data.Attoparsec.ByteString.Char8 (isEndOfLine)
|
||||
import qualified Data.DList as D
|
||||
|
||||
-- This is the same as Data.List.lines, with \r added.
|
||||
-- This works around some versions of git-annex which wrote \r
|
||||
-- into git-annex branch files on Windows. Those \r's sometimes
|
||||
|
@ -49,3 +55,27 @@ splitLines s = cons (case break (\c -> c == '\n' || c == '\r') s of
|
|||
_:s'' -> splitLines s''))
|
||||
where
|
||||
cons ~(h, t) = h : t
|
||||
|
||||
{- Applies a parser to each line of a log file.
|
||||
-
|
||||
- If the parser fails to parse a line, that line is skipped, instead of
|
||||
- the overall parse failing. This is generally a good idea in case a newer
|
||||
- version of git-annex somehow changed the format of the log file.
|
||||
-
|
||||
- Any combination of \r and \n are taken to be the end of the line.
|
||||
- (Some versions of git-annex on Windows wrote \r into git-annex branch
|
||||
- files, and multiple \r's sometimes accumulated.)
|
||||
-
|
||||
- The parser does not itself need to avoid parsing beyond the end of line;
|
||||
- this is implemented only pass the content of a line to the parser.
|
||||
-}
|
||||
parseLogLines :: A.Parser a -> A.Parser [a]
|
||||
parseLogLines parser = go D.empty
|
||||
where
|
||||
go dl = do
|
||||
line <- A.takeTill isEndOfLine
|
||||
A.skipWhile isEndOfLine
|
||||
let dl' = case A.parseOnly parser line of
|
||||
Left _ -> dl
|
||||
Right v -> D.snoc dl v
|
||||
(A.endOfInput *> return (D.toList dl')) <|> go dl'
|
||||
|
|
|
@ -15,6 +15,8 @@ import Utility.QuickCheck
|
|||
import qualified Data.Map as M
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
import qualified Data.ByteString as S
|
||||
import qualified Data.Attoparsec.ByteString.Lazy as A
|
||||
import Data.Attoparsec.ByteString.Char8 (char, anyChar)
|
||||
import Data.ByteString.Builder
|
||||
|
||||
newtype LogInfo = LogInfo { fromLogInfo :: S.ByteString }
|
||||
|
@ -32,17 +34,25 @@ instance Show LogLine where
|
|||
data LogStatus = InfoPresent | InfoMissing | InfoDead
|
||||
deriving (Eq, Show, Bounded, Enum)
|
||||
|
||||
{- Parses a log file. Unparseable lines are ignored. -}
|
||||
parseLog :: L.ByteString -> [LogLine]
|
||||
parseLog = mapMaybe parseline . splitLines . decodeBL
|
||||
where
|
||||
parseline l = LogLine
|
||||
<$> parseVectorClock c
|
||||
<*> parseStatus s
|
||||
<*> pure (LogInfo (encodeBS rest))
|
||||
where
|
||||
(c, pastc) = separate (== ' ') l
|
||||
(s, rest) = separate (== ' ') pastc
|
||||
parseLog = fromMaybe [] . A.maybeResult . A.parse (logParser <* A.endOfInput)
|
||||
|
||||
logParser :: A.Parser [LogLine]
|
||||
logParser = parseLogLines $ LogLine
|
||||
<$> vectorClockParser
|
||||
<* char ' '
|
||||
<*> statusParser
|
||||
<* char ' '
|
||||
<*> (LogInfo <$> A.takeByteString)
|
||||
|
||||
statusParser :: A.Parser LogStatus
|
||||
statusParser = do
|
||||
c <- anyChar
|
||||
case c of
|
||||
'1' -> return InfoPresent
|
||||
'0' -> return InfoMissing
|
||||
'X' -> return InfoDead
|
||||
_ -> fail "unknown status character"
|
||||
|
||||
parseStatus :: String -> Maybe LogStatus
|
||||
parseStatus "1" = Just InfoPresent
|
||||
|
|
Loading…
Reference in a new issue