From eb4fb388bd450b89435c62282442f614f2d542de Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 11 Jul 2024 15:47:16 -0400 Subject: [PATCH] only base64 non-utf8 --- P2P/Http/Types.hs | 53 +++++++++++++------ doc/design/p2p_protocol_over_http/draft1.mdwn | 10 ++-- doc/git-annex-p2phttp.mdwn | 7 +++ 3 files changed, 51 insertions(+), 19 deletions(-) diff --git a/P2P/Http/Types.hs b/P2P/Http/Types.hs index d4cd980b37..ed8f9ad9d7 100644 --- a/P2P/Http/Types.hs +++ b/P2P/Http/Types.hs @@ -41,7 +41,7 @@ instance APIVersion V2 where protocolVersion _ = P2P.ProtocolVersion 2 instance APIVersion V1 where protocolVersion _ = P2P.ProtocolVersion 1 instance APIVersion V0 where protocolVersion _ = P2P.ProtocolVersion 0 --- Keys, UUIDs, and filenames are base64 encoded since Servant uses +-- Keys, UUIDs, and filenames can be base64 encoded since Servant uses -- Text and so needs UTF-8. newtype B64Key = B64Key Key deriving (Show) @@ -52,7 +52,32 @@ newtype B64FilePath = B64FilePath RawFilePath newtype B64UUID t = B64UUID { fromB64UUID :: UUID } deriving (Show, Ord, Eq, Generic, NFData) --- Phantom types for B64UIID +encodeB64Text :: B.ByteString -> T.Text +encodeB64Text b = case TE.decodeUtf8' b of + Right t + | (snd <$> B.unsnoc b) == Just closebracket + && (fst <$> B.uncons b) == Just openbracket -> + b64wrapped + | otherwise -> t + Left _ -> b64wrapped + where + b64wrapped = TE.decodeUtf8Lenient $ "[" <> B64.encode b <> "]" + openbracket = fromIntegral (ord '[') + closebracket = fromIntegral (ord ']') + +decodeB64Text :: T.Text -> Either T.Text B.ByteString +decodeB64Text t = + case T.unsnoc t of + Just (t', lastc) | lastc == ']' -> + case T.uncons t' of + Just (firstc, t'') | firstc == '[' -> + case B64.decode (TE.encodeUtf8 t'') of + Right b -> Right b + Left _ -> Left "unable to base64 decode [] wrapped value" + _ -> Right (TE.encodeUtf8 t) + _ -> Right (TE.encodeUtf8 t) + +-- Phantom types. data ClientSide data ServerSide data Bypass @@ -163,33 +188,31 @@ parseAPIVersion v need t | otherwise = Left "bad version" instance ToHttpApiData B64Key where - toUrlPiece (B64Key k) = TE.decodeUtf8Lenient $ - B64.encode (serializeKey' k) + toUrlPiece (B64Key k) = encodeB64Text (serializeKey' k) instance FromHttpApiData B64Key where - parseUrlPiece t = case B64.decode (TE.encodeUtf8 t) of - Left _ -> Left "unable to base64 decode key" + parseUrlPiece t = case decodeB64Text t of Right b -> maybe (Left "key parse error") (Right . B64Key) (deserializeKey' b) + Left err -> Left err instance ToHttpApiData (B64UUID t) where - toUrlPiece (B64UUID u) = TE.decodeUtf8Lenient $ - B64.encode (fromUUID u) + toUrlPiece (B64UUID u) = encodeB64Text (fromUUID u) instance FromHttpApiData (B64UUID t) where - parseUrlPiece t = case B64.decode (TE.encodeUtf8 t) of - Left _ -> Left "unable to base64 decode UUID" + parseUrlPiece t = case decodeB64Text t of Right b -> case toUUID b of u@(UUID _) -> Right (B64UUID u) NoUUID -> Left "empty UUID" + Left err -> Left err instance ToHttpApiData B64FilePath where - toUrlPiece (B64FilePath f) = TE.decodeUtf8Lenient $ B64.encode f + toUrlPiece (B64FilePath f) = encodeB64Text f instance FromHttpApiData B64FilePath where - parseUrlPiece t = case B64.decode (TE.encodeUtf8 t) of - Left _ -> Left "unable to base64 decode filename" + parseUrlPiece t = case decodeB64Text t of Right b -> Right (B64FilePath b) + Left err -> Left err instance ToHttpApiData Offset where toUrlPiece (Offset (P2P.Offset n)) = T.pack (show n) @@ -292,7 +315,7 @@ instance FromJSON PutOffsetResultPlus where <*> v .: "plusuuids" instance FromJSON (B64UUID t) where - parseJSON (String t) = case B64.decode (TE.encodeUtf8 t) of + parseJSON (String t) = case decodeB64Text t of Right s -> pure (B64UUID (toUUID s)) Left _ -> mempty parseJSON _ = mempty @@ -300,7 +323,7 @@ instance FromJSON (B64UUID t) where instance ToJSON LockResult where toJSON (LockResult v (Just (B64UUID lck))) = object [ "locked" .= v - , "lockid" .= TE.decodeUtf8Lenient (B64.encode (fromUUID lck)) + , "lockid" .= encodeB64Text (fromUUID lck) ] toJSON (LockResult v Nothing) = object [ "locked" .= v diff --git a/doc/design/p2p_protocol_over_http/draft1.mdwn b/doc/design/p2p_protocol_over_http/draft1.mdwn index 683d1a5ab9..3988e38875 100644 --- a/doc/design/p2p_protocol_over_http/draft1.mdwn +++ b/doc/design/p2p_protocol_over_http/draft1.mdwn @@ -11,11 +11,13 @@ repository might. But this protocol requires that UTF-8 be used throughout, except where bodies use `Content-Type: application/octet-stream`. -So, all git-annex keys, uuids, and filenames in this protocol are -[base64url](https://datatracker.ietf.org/doc/html/rfc4648#section-5) encoded. +So this protocol allows using +[base64url](https://datatracker.ietf.org/doc/html/rfc4648#section-5) +encoding for such values. Any key, filename, or UUID wrapped in square +brackets is a base64url encoded value. +For example, "[Zm9v]" is the same as "foo". -Examples in this document use non-base64url-encoded values to show the -underlying data. +A filename like "[foo]" will need to itself be encoded that way: "[W2Zvb10=]" ## authentication diff --git a/doc/git-annex-p2phttp.mdwn b/doc/git-annex-p2phttp.mdwn index 655084e37b..323ec7006a 100644 --- a/doc/git-annex-p2phttp.mdwn +++ b/doc/git-annex-p2phttp.mdwn @@ -12,6 +12,13 @@ This allows a git-annex repository to be accessed over HTTP. It is the git-annex equivilant of git-http-backend(1), for serving a repository over HTTP with write access for authenticated users. +As well as serving the git-annex HTTP API, this server provides a +convenient way to download the content of any key, by using the path +"/git-annex/$uuid/$key". For example: + + $ curl http://localhost:8080/git-annex/f11773f0-11e1-45b2-9805-06db16768efe/key/SHA256E-s6--5891b5b522d5df086d0ff0b110fbd9d21bb4fc7163af34d08286a2e846f6be03 + hello + # OPTIONS * `--jobs=N`