use colon not space to delimit content identifier list

InodeCache serializes to a value with spaces, and seems likely other
things will too, and want to avoid unncessary base64 of content
identifiers when possible.
This commit is contained in:
Joey Hess 2019-02-21 13:45:16 -04:00
parent 1f6339ade7
commit 56137ce0d2
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
2 changed files with 6 additions and 6 deletions

View file

@ -30,10 +30,10 @@ buildContentIdentifierList :: [ContentIdentifier] -> Builder
buildContentIdentifierList l = case l of
[] -> mempty
[c] -> buildcid c
(c:cs) -> buildcid c <> charUtf8 ' ' <> buildContentIdentifierList cs
(c:cs) -> buildcid c <> charUtf8 ':' <> buildContentIdentifierList cs
where
buildcid (ContentIdentifier c)
| S8.any (`elem` [' ', '\r', '\n']) c || "!" `S8.isPrefixOf` c =
| S8.any (`elem` [':', '\r', '\n']) c || "!" `S8.isPrefixOf` c =
charUtf8 '!' <> byteString (toB64' c)
| otherwise = byteString c
@ -44,7 +44,7 @@ parseContentIdentifierList :: A.Parser [ContentIdentifier]
parseContentIdentifierList = reverse . catMaybes <$> valueparser []
where
valueparser l = do
b <- A8.takeWhile (/= ' ')
b <- A8.takeWhile (/= ':')
let cid = if "!" `S8.isPrefixOf` b
then ContentIdentifier <$> fromB64Maybe' (S.drop 1 b)
else Just $ ContentIdentifier b

View file

@ -287,11 +287,11 @@ These log files store per-remote content identifiers for keys.
A given key may have any number of content identifiers.
The format is a timestamp, followed by the uuid or the remote,
followed by the content identifiers. If a content identifier
contains any whitespace (including \r or \n), it will be base64
followed by the content identifiers which are separated by colons.
If a content identifier contains a colon or \r or \n, it will be base64
encoded. Base64 encoded values are indicated by prefixing them with "!".
1287290776.765152s e605dca6-446a-11e0-8b2a-002170d25c55 5248916 5250378
1287290776.765152s e605dca6-446a-11e0-8b2a-002170d25c55 5248916:5250378
## `aaa/bbb/*.log.cnk`