From 9862d64bf90de645ef1acfbf862c5b340475aacf Mon Sep 17 00:00:00 2001
From: Joey Hess <joeyh@joeyh.name>
Date: Wed, 1 Nov 2023 13:09:42 -0400
Subject: [PATCH] bring back "bag of bytes" handling for ciphers

Fixes test suite failure with LANG=C caused by commit
3742263c99180d1391e4fd51724aae52d6d02137

Sponsored-By: the NIH-funded NICEMAN (ReproNim TR&D3) project
---
 Remote/Helper/Encryptable.hs                  | 34 ++++++++++++++++---
 ..._52185ae4ebdfcb61840444e3ef1e0404._comment | 25 ++++++++++++++
 2 files changed, 54 insertions(+), 5 deletions(-)
 create mode 100644 doc/bugs/fresh_test_fails__58___hPut__58___invalid_argument_/comment_2_52185ae4ebdfcb61840444e3ef1e0404._comment

diff --git a/Remote/Helper/Encryptable.hs b/Remote/Helper/Encryptable.hs
index e5f31de691..8e3e0a3f00 100644
--- a/Remote/Helper/Encryptable.hs
+++ b/Remote/Helper/Encryptable.hs
@@ -28,6 +28,8 @@ module Remote.Helper.Encryptable (
 
 import qualified Data.Map as M
 import qualified Data.Set as S
+import qualified Data.ByteString as B
+import Data.Word
 import Control.Concurrent.STM
 
 import Annex.Common
@@ -271,7 +273,7 @@ storeCipher cip = case cip of
 	(EncryptedCipher t _ ks) -> addcipher t . storekeys ks cipherkeysField
 	(SharedPubKeyCipher t ks) -> addcipher t . storekeys ks pubkeysField
   where
-	addcipher t = M.insert cipherField (Accepted (decodeBS (toB64 (encodeBS t))))
+	addcipher t = M.insert cipherField (Accepted (toB64bs t))
 	storekeys (KeyIds l) n = M.insert n (Accepted (intercalate "," l))
 
 {- Extracts an StorableCipher from a remote's configuration. -}
@@ -280,13 +282,13 @@ extractCipher c = case (getRemoteConfigValue cipherField c,
 			(getRemoteConfigValue cipherkeysField c <|> getRemoteConfigValue pubkeysField c),
 			getRemoteConfigValue encryptionField c) of
 	(Just t, Just ks, Just HybridEncryption) ->
-		Just $ EncryptedCipher (decodeBS (fromB64 (encodeBS t))) Hybrid (readkeys ks)
+		Just $ EncryptedCipher (fromB64bs t) Hybrid (readkeys ks)
 	(Just t, Just ks, Just PubKeyEncryption) ->
-		Just $ EncryptedCipher (decodeBS (fromB64 (encodeBS t))) PubKey (readkeys ks)
+		Just $ EncryptedCipher (fromB64bs t) PubKey (readkeys ks)
 	(Just t, Just ks, Just SharedPubKeyEncryption) ->
-		Just $ SharedPubKeyCipher (decodeBS (fromB64 (encodeBS t))) (readkeys ks)
+		Just $ SharedPubKeyCipher (fromB64bs t) (readkeys ks)
 	(Just t, Nothing, Just SharedEncryption) ->
-		Just $ SharedCipher (decodeBS (fromB64 (encodeBS t)))
+		Just $ SharedCipher (fromB64bs t)
 	_ -> Nothing
   where
 	readkeys = KeyIds . splitc ','
@@ -320,3 +322,25 @@ describeCipher c = case c of
 	(SharedPubKeyCipher _ ks) -> showkeys ks
   where
 	showkeys (KeyIds { keyIds = ks }) = "to gpg keys: " ++ unwords ks
+
+{- Not using encodeBS because these "Strings" are really
+ - bags of bytes and are not encoding with the filesystem encoding.
+ - So this hack is needed to work on all locales and roundtrip cleanly.
+ -}
+toB64bs :: String -> String
+toB64bs = w82s . B.unpack . toB64 . B.pack . s2w8
+
+fromB64bs :: String -> String
+fromB64bs = w82s . B.unpack . fromB64 . B.pack . s2w8
+
+c2w8 :: Char -> Word8
+c2w8 = fromIntegral . fromEnum
+
+w82c :: Word8 -> Char
+w82c = toEnum . fromIntegral
+
+s2w8 :: String -> [Word8]
+s2w8 = map c2w8
+
+w82s :: [Word8] -> String
+w82s = map w82c
diff --git a/doc/bugs/fresh_test_fails__58___hPut__58___invalid_argument_/comment_2_52185ae4ebdfcb61840444e3ef1e0404._comment b/doc/bugs/fresh_test_fails__58___hPut__58___invalid_argument_/comment_2_52185ae4ebdfcb61840444e3ef1e0404._comment
new file mode 100644
index 0000000000..ea1469e6cd
--- /dev/null
+++ b/doc/bugs/fresh_test_fails__58___hPut__58___invalid_argument_/comment_2_52185ae4ebdfcb61840444e3ef1e0404._comment
@@ -0,0 +1,25 @@
+[[!comment format=mdwn
+ username="joey"
+ subject="""comment 2"""
+ date="2023-11-01T16:53:48Z"
+ content="""
+Will probably need to revert the Remote/Helper/Encryptable.hs part of that
+commit. 
+
+What is happening here is, encodeBS is failing when run on the String from
+a SharedPubKeyCipher. That String comes from Utility.Gpg.genRandom and is
+literally a bunch of random bytes. So it's not encoded with the filesystem
+encoding. And it really ought to be a ByteString of course, but since it's
+not, anything involving encoding it fails.
+
+That's why the old code had this comment:
+
+	{- Not using Utility.Base64 because these "Strings" are really
+	 - bags of bytes and that would convert to unicode and not round-trip
+	 - cleanly. -}
+
+And converted that String to a ByteString via `B.pack . s2w8`, which avoids this problem.
+
+What an ugly thing. Really ought to be fixed to use ByteString throughout. 
+But for now, let's revert.
+"""]]