git pkt-line format
Git uses pkt-line in the pack and http protocols, and for the long-running filter processes protocol as well. This should be a quite efficient parser and builder since it uses attoparsec and bytestring-builder. This adds a dependency on attoparsec, but it's a free dependency because eg aeson depends on attoparsec and git-annex depends on aeson. This commit was supported by the NSF-funded DataLad project.
This commit is contained in:
parent
816e0e356b
commit
1272ccdf1e
3 changed files with 139 additions and 0 deletions
136
Git/Protocol/PktLine.hs
Normal file
136
Git/Protocol/PktLine.hs
Normal file
|
@ -0,0 +1,136 @@
|
|||
{- Git pkt-line format, as documented in
|
||||
- git/Documentation/technical/protocol-common.txt
|
||||
-
|
||||
- Copyright 2018 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
||||
{-# LANGUAGE OverloadedStrings #-}
|
||||
|
||||
module Git.Protocol.PktLine (
|
||||
PktLine,
|
||||
flushPkt,
|
||||
textPktLine,
|
||||
pktLineText,
|
||||
streamPktLine,
|
||||
encodePktLine,
|
||||
parsePktLine,
|
||||
decodePktLine,
|
||||
splitPktLine,
|
||||
) where
|
||||
|
||||
import qualified Data.ByteString as S
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
import Data.Attoparsec.ByteString as A
|
||||
import Data.Attoparsec.ByteString.Char8 as A
|
||||
import Data.Attoparsec.ByteString.Lazy as AL
|
||||
import Data.ByteString.Builder
|
||||
import qualified Data.Text as T
|
||||
import qualified Data.Text.Encoding as E
|
||||
import qualified Data.Text.Encoding.Error as E
|
||||
import Data.Monoid
|
||||
import Data.Word
|
||||
|
||||
-- | A pkt-line encodes a variable length binary string with a maximum size.
|
||||
--
|
||||
-- This module only exports smart constructors for legal pkt-lines.
|
||||
newtype PktLine = PktLine S.ByteString
|
||||
deriving (Show)
|
||||
|
||||
-- | Maximum data that can be contained in a pkt-line, not
|
||||
-- including the 4 byte length header.
|
||||
maxPktLineContent :: Word16
|
||||
maxPktLineContent = 65516
|
||||
|
||||
-- | Maximum length of a pkt-line, including the 4 byte size header.
|
||||
maxPktLineLength :: Word16
|
||||
maxPktLineLength = 65520
|
||||
|
||||
-- | The flush-pkt, a special case in the protocol that is often used to
|
||||
-- eg, signal the end of a stream of binary data.
|
||||
flushPkt :: PktLine
|
||||
flushPkt = PktLine S.empty
|
||||
|
||||
-- | Encodes a Text as a PktLine. Fails if the Text it too large.
|
||||
--
|
||||
-- A trailing newline is included after it, as the protocol recommends
|
||||
-- doing for non-binary data.
|
||||
textPktLine :: T.Text -> Maybe PktLine
|
||||
textPktLine t =
|
||||
let b = E.encodeUtf8 t <> "\n"
|
||||
in if S.length b > fromIntegral maxPktLineContent
|
||||
then Nothing
|
||||
else Just (PktLine b)
|
||||
|
||||
-- | Extracts Text from a PktLine. Any trailing newline is removed.
|
||||
pktLineText :: PktLine -> Either E.UnicodeException T.Text
|
||||
pktLineText (PktLine b) = case E.decodeUtf8' b of
|
||||
Left e -> Left e
|
||||
Right t ->
|
||||
let (t', end) = T.splitAt (T.length t - 1) t
|
||||
in if end == "\n"
|
||||
then Right t'
|
||||
else Right t
|
||||
|
||||
-- | Creates a stream of PktLines encoding a lazy ByteString of any size.
|
||||
-- Note that the stream is not terminated with a flushPkt.
|
||||
streamPktLine :: L.ByteString -> [PktLine]
|
||||
streamPktLine l =
|
||||
let (chunk, rest) = L.splitAt (fromIntegral maxPktLineContent) l
|
||||
pktline = PktLine $ mconcat $ L.toChunks chunk
|
||||
in if L.null rest
|
||||
then pktline : []
|
||||
else pktline : streamPktLine rest
|
||||
|
||||
-- | ByteString builder for a pkt-line.
|
||||
encodePktLine :: PktLine -> Builder
|
||||
encodePktLine (PktLine b)
|
||||
-- Avoid sending an empty pkt-line; send a flush-pkt instead.
|
||||
| S.null b = "0000"
|
||||
| otherwise = lengthheader <> byteString b
|
||||
where
|
||||
-- The length header is always 4 bytes long, and includes
|
||||
-- itself in its length.
|
||||
lengthheader = word16HexFixed (fromIntegral (S.length b) + 4)
|
||||
|
||||
-- | Attoparsec parser for a pkt-line.
|
||||
parsePktLine :: Parser PktLine
|
||||
parsePktLine = do
|
||||
len <- parseLengthHeader
|
||||
if len == 0
|
||||
then do
|
||||
endOfInput
|
||||
return flushPkt
|
||||
else if len < 4
|
||||
-- It's impossible for a pkt-line to be less than
|
||||
-- 4 bytes long, since the length header is 4 bytes.
|
||||
then fail "invalid pkt-line length"
|
||||
else PktLine <$> A.take (len - 4)
|
||||
|
||||
parseLengthHeader :: Parser Int
|
||||
parseLengthHeader = do
|
||||
-- Attoparsec's hexidecimal parser will consume any amount
|
||||
-- of hex, but the length header is limited to 4 bytes, so
|
||||
-- take those and parse only them.
|
||||
h <- A.take 4
|
||||
-- Require all 4 bytes to be hexidecimal by using endOfInput.
|
||||
case parseOnly (hexadecimal <* endOfInput) h of
|
||||
Left e -> fail e
|
||||
Right len
|
||||
| len > fromIntegral maxPktLineLength ->
|
||||
fail "pkt-line too long"
|
||||
| otherwise -> return len
|
||||
|
||||
-- | The ByteString must contain only a pkt-line with no additional data
|
||||
-- or this will fail.
|
||||
decodePktLine :: S.ByteString -> Either String PktLine
|
||||
decodePktLine = parseOnly (parsePktLine <* endOfInput)
|
||||
|
||||
-- | Split the next PktLine from a lazy ByteString, returning it and the
|
||||
-- remainder of the ByteString.
|
||||
splitPktLine :: L.ByteString -> Either String (PktLine, L.ByteString)
|
||||
splitPktLine = go . AL.parse parsePktLine
|
||||
where
|
||||
go (AL.Done rest p) = Right (p, rest)
|
||||
go (AL.Fail _ _ e) = Left e
|
1
debian/control
vendored
1
debian/control
vendored
|
@ -12,6 +12,7 @@ Build-Depends:
|
|||
libghc-pcre-light-dev,
|
||||
libghc-cryptonite-dev,
|
||||
libghc-memory-dev,
|
||||
libghc-attoparsec-dev,
|
||||
libghc-sandi-dev,
|
||||
libghc-utf8-string-dev,
|
||||
libghc-aws-dev (>= 0.9.2-2~),
|
||||
|
|
|
@ -364,6 +364,7 @@ Executable git-annex
|
|||
cryptonite,
|
||||
memory,
|
||||
split,
|
||||
attoparsec,
|
||||
QuickCheck (>= 2.1),
|
||||
tasty (>= 0.7),
|
||||
tasty-hunit,
|
||||
|
@ -846,6 +847,7 @@ Executable git-annex
|
|||
Git.LsTree
|
||||
Git.Merge
|
||||
Git.Objects
|
||||
Git.Protocol.PktLine
|
||||
Git.Queue
|
||||
Git.Ref
|
||||
Git.RefLog
|
||||
|
|
Loading…
Reference in a new issue