git pkt-line format
Git uses pkt-line in the pack and http protocols, and for the long-running filter processes protocol as well. This should be a quite efficient parser and builder since it uses attoparsec and bytestring-builder. This adds a dependency on attoparsec, but it's a free dependency because eg aeson depends on attoparsec and git-annex depends on aeson. This commit was supported by the NSF-funded DataLad project.
This commit is contained in:
parent
816e0e356b
commit
1272ccdf1e
3 changed files with 139 additions and 0 deletions
136
Git/Protocol/PktLine.hs
Normal file
136
Git/Protocol/PktLine.hs
Normal file
|
@ -0,0 +1,136 @@
|
||||||
|
{- Git pkt-line format, as documented in
|
||||||
|
- git/Documentation/technical/protocol-common.txt
|
||||||
|
-
|
||||||
|
- Copyright 2018 Joey Hess <id@joeyh.name>
|
||||||
|
-
|
||||||
|
- Licensed under the GNU GPL version 3 or higher.
|
||||||
|
-}
|
||||||
|
|
||||||
|
{-# LANGUAGE OverloadedStrings #-}
|
||||||
|
|
||||||
|
module Git.Protocol.PktLine (
|
||||||
|
PktLine,
|
||||||
|
flushPkt,
|
||||||
|
textPktLine,
|
||||||
|
pktLineText,
|
||||||
|
streamPktLine,
|
||||||
|
encodePktLine,
|
||||||
|
parsePktLine,
|
||||||
|
decodePktLine,
|
||||||
|
splitPktLine,
|
||||||
|
) where
|
||||||
|
|
||||||
|
import qualified Data.ByteString as S
|
||||||
|
import qualified Data.ByteString.Lazy as L
|
||||||
|
import Data.Attoparsec.ByteString as A
|
||||||
|
import Data.Attoparsec.ByteString.Char8 as A
|
||||||
|
import Data.Attoparsec.ByteString.Lazy as AL
|
||||||
|
import Data.ByteString.Builder
|
||||||
|
import qualified Data.Text as T
|
||||||
|
import qualified Data.Text.Encoding as E
|
||||||
|
import qualified Data.Text.Encoding.Error as E
|
||||||
|
import Data.Monoid
|
||||||
|
import Data.Word
|
||||||
|
|
||||||
|
-- | A pkt-line encodes a variable length binary string with a maximum size.
|
||||||
|
--
|
||||||
|
-- This module only exports smart constructors for legal pkt-lines.
|
||||||
|
newtype PktLine = PktLine S.ByteString
|
||||||
|
deriving (Show)
|
||||||
|
|
||||||
|
-- | Maximum data that can be contained in a pkt-line, not
|
||||||
|
-- including the 4 byte length header.
|
||||||
|
maxPktLineContent :: Word16
|
||||||
|
maxPktLineContent = 65516
|
||||||
|
|
||||||
|
-- | Maximum length of a pkt-line, including the 4 byte size header.
|
||||||
|
maxPktLineLength :: Word16
|
||||||
|
maxPktLineLength = 65520
|
||||||
|
|
||||||
|
-- | The flush-pkt, a special case in the protocol that is often used to
|
||||||
|
-- eg, signal the end of a stream of binary data.
|
||||||
|
flushPkt :: PktLine
|
||||||
|
flushPkt = PktLine S.empty
|
||||||
|
|
||||||
|
-- | Encodes a Text as a PktLine. Fails if the Text it too large.
|
||||||
|
--
|
||||||
|
-- A trailing newline is included after it, as the protocol recommends
|
||||||
|
-- doing for non-binary data.
|
||||||
|
textPktLine :: T.Text -> Maybe PktLine
|
||||||
|
textPktLine t =
|
||||||
|
let b = E.encodeUtf8 t <> "\n"
|
||||||
|
in if S.length b > fromIntegral maxPktLineContent
|
||||||
|
then Nothing
|
||||||
|
else Just (PktLine b)
|
||||||
|
|
||||||
|
-- | Extracts Text from a PktLine. Any trailing newline is removed.
|
||||||
|
pktLineText :: PktLine -> Either E.UnicodeException T.Text
|
||||||
|
pktLineText (PktLine b) = case E.decodeUtf8' b of
|
||||||
|
Left e -> Left e
|
||||||
|
Right t ->
|
||||||
|
let (t', end) = T.splitAt (T.length t - 1) t
|
||||||
|
in if end == "\n"
|
||||||
|
then Right t'
|
||||||
|
else Right t
|
||||||
|
|
||||||
|
-- | Creates a stream of PktLines encoding a lazy ByteString of any size.
|
||||||
|
-- Note that the stream is not terminated with a flushPkt.
|
||||||
|
streamPktLine :: L.ByteString -> [PktLine]
|
||||||
|
streamPktLine l =
|
||||||
|
let (chunk, rest) = L.splitAt (fromIntegral maxPktLineContent) l
|
||||||
|
pktline = PktLine $ mconcat $ L.toChunks chunk
|
||||||
|
in if L.null rest
|
||||||
|
then pktline : []
|
||||||
|
else pktline : streamPktLine rest
|
||||||
|
|
||||||
|
-- | ByteString builder for a pkt-line.
|
||||||
|
encodePktLine :: PktLine -> Builder
|
||||||
|
encodePktLine (PktLine b)
|
||||||
|
-- Avoid sending an empty pkt-line; send a flush-pkt instead.
|
||||||
|
| S.null b = "0000"
|
||||||
|
| otherwise = lengthheader <> byteString b
|
||||||
|
where
|
||||||
|
-- The length header is always 4 bytes long, and includes
|
||||||
|
-- itself in its length.
|
||||||
|
lengthheader = word16HexFixed (fromIntegral (S.length b) + 4)
|
||||||
|
|
||||||
|
-- | Attoparsec parser for a pkt-line.
|
||||||
|
parsePktLine :: Parser PktLine
|
||||||
|
parsePktLine = do
|
||||||
|
len <- parseLengthHeader
|
||||||
|
if len == 0
|
||||||
|
then do
|
||||||
|
endOfInput
|
||||||
|
return flushPkt
|
||||||
|
else if len < 4
|
||||||
|
-- It's impossible for a pkt-line to be less than
|
||||||
|
-- 4 bytes long, since the length header is 4 bytes.
|
||||||
|
then fail "invalid pkt-line length"
|
||||||
|
else PktLine <$> A.take (len - 4)
|
||||||
|
|
||||||
|
parseLengthHeader :: Parser Int
|
||||||
|
parseLengthHeader = do
|
||||||
|
-- Attoparsec's hexidecimal parser will consume any amount
|
||||||
|
-- of hex, but the length header is limited to 4 bytes, so
|
||||||
|
-- take those and parse only them.
|
||||||
|
h <- A.take 4
|
||||||
|
-- Require all 4 bytes to be hexidecimal by using endOfInput.
|
||||||
|
case parseOnly (hexadecimal <* endOfInput) h of
|
||||||
|
Left e -> fail e
|
||||||
|
Right len
|
||||||
|
| len > fromIntegral maxPktLineLength ->
|
||||||
|
fail "pkt-line too long"
|
||||||
|
| otherwise -> return len
|
||||||
|
|
||||||
|
-- | The ByteString must contain only a pkt-line with no additional data
|
||||||
|
-- or this will fail.
|
||||||
|
decodePktLine :: S.ByteString -> Either String PktLine
|
||||||
|
decodePktLine = parseOnly (parsePktLine <* endOfInput)
|
||||||
|
|
||||||
|
-- | Split the next PktLine from a lazy ByteString, returning it and the
|
||||||
|
-- remainder of the ByteString.
|
||||||
|
splitPktLine :: L.ByteString -> Either String (PktLine, L.ByteString)
|
||||||
|
splitPktLine = go . AL.parse parsePktLine
|
||||||
|
where
|
||||||
|
go (AL.Done rest p) = Right (p, rest)
|
||||||
|
go (AL.Fail _ _ e) = Left e
|
1
debian/control
vendored
1
debian/control
vendored
|
@ -12,6 +12,7 @@ Build-Depends:
|
||||||
libghc-pcre-light-dev,
|
libghc-pcre-light-dev,
|
||||||
libghc-cryptonite-dev,
|
libghc-cryptonite-dev,
|
||||||
libghc-memory-dev,
|
libghc-memory-dev,
|
||||||
|
libghc-attoparsec-dev,
|
||||||
libghc-sandi-dev,
|
libghc-sandi-dev,
|
||||||
libghc-utf8-string-dev,
|
libghc-utf8-string-dev,
|
||||||
libghc-aws-dev (>= 0.9.2-2~),
|
libghc-aws-dev (>= 0.9.2-2~),
|
||||||
|
|
|
@ -364,6 +364,7 @@ Executable git-annex
|
||||||
cryptonite,
|
cryptonite,
|
||||||
memory,
|
memory,
|
||||||
split,
|
split,
|
||||||
|
attoparsec,
|
||||||
QuickCheck (>= 2.1),
|
QuickCheck (>= 2.1),
|
||||||
tasty (>= 0.7),
|
tasty (>= 0.7),
|
||||||
tasty-hunit,
|
tasty-hunit,
|
||||||
|
@ -846,6 +847,7 @@ Executable git-annex
|
||||||
Git.LsTree
|
Git.LsTree
|
||||||
Git.Merge
|
Git.Merge
|
||||||
Git.Objects
|
Git.Objects
|
||||||
|
Git.Protocol.PktLine
|
||||||
Git.Queue
|
Git.Queue
|
||||||
Git.Ref
|
Git.Ref
|
||||||
Git.RefLog
|
Git.RefLog
|
||||||
|
|
Loading…
Reference in a new issue