diff --git a/CHANGELOG b/CHANGELOG index b9701f3546..126a80dc69 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -7,6 +7,8 @@ git-annex (6.20180410) UNRELEASED; urgency=medium Note that it's still allowed to move the content of a file from one repository to another when numcopies is not satisfied, as long as the move does not result in there being fewer copies. + * Fix mangling of --json output of utf-8 characters when not + running in a utf-8 locale. -- Joey Hess Mon, 09 Apr 2018 14:03:28 -0400 diff --git a/Command/Info.hs b/Command/Info.hs index 6ec6a1f71e..cc9c1b5fe0 100644 --- a/Command/Info.hs +++ b/Command/Info.hs @@ -11,9 +11,8 @@ module Command.Info where import "mtl" Control.Monad.State.Strict import qualified Data.Map.Strict as M -import qualified Data.Text as T +import qualified Data.Vector as V import Data.Ord -import Data.Aeson hiding (json) import Command import qualified Git @@ -34,6 +33,7 @@ import Config import Git.Config (boolConfig) import qualified Git.LsTree as LsTree import Utility.Percentage +import Utility.Aeson hiding (json) import Types.Transfer import Logs.Transfer import Types.Key @@ -283,7 +283,7 @@ simpleStat desc getval = stat desc $ json id getval nostat :: Stat nostat = return Nothing -json :: ToJSON j => (j -> String) -> StatState j -> String -> StatState String +json :: ToJSON' j => (j -> String) -> StatState j -> String -> StatState String json fmt a desc = do j <- a lift $ maybeShowJSON $ JSONChunk [(desc, j)] @@ -422,7 +422,7 @@ transfer_list :: Stat transfer_list = stat desc $ nojson $ lift $ do uuidmap <- Remote.remoteMap id ts <- getTransfers - maybeShowJSON $ JSONChunk [(desc, map (uncurry jsonify) ts)] + maybeShowJSON $ JSONChunk [(desc, V.fromList $ map (uncurry jsonify) ts)] return $ if null ts then "none" else multiLine $ @@ -438,11 +438,11 @@ transfer_list = stat desc $ nojson $ lift $ do , maybe (fromUUID $ transferUUID t) Remote.name $ M.lookup (transferUUID t) uuidmap ] - jsonify t i = object $ map (\(k, v) -> (T.pack k, v)) $ - [ ("transfer", toJSON (formatDirection (transferDirection t))) - , ("key", toJSON (key2file (transferKey t))) - , ("file", toJSON afile) - , ("remote", toJSON (fromUUID (transferUUID t))) + jsonify t i = object $ map (\(k, v) -> (packString k, v)) $ + [ ("transfer", toJSON' (formatDirection (transferDirection t))) + , ("key", toJSON' (transferKey t)) + , ("file", toJSON' afile) + , ("remote", toJSON' (fromUUID (transferUUID t))) ] where AssociatedFile afile = associatedFile i @@ -476,10 +476,13 @@ numcopies_stats :: Stat numcopies_stats = stat "numcopies stats" $ json fmt $ calc <$> (maybe M.empty numCopiesVarianceMap <$> cachedNumCopiesStats) where - calc = map (\(variance, count) -> (show variance, count)) + calc = V.fromList + . map (\(variance, count) -> (show variance, count)) . sortBy (flip (comparing fst)) . M.toList - fmt = multiLine . map (\(variance, count) -> "numcopies " ++ variance ++ ": " ++ show count) + fmt = multiLine + . map (\(variance, count) -> "numcopies " ++ variance ++ ": " ++ show count) + . V.toList reposizes_stats :: Stat reposizes_stats = stat desc $ nojson $ do diff --git a/Command/MetaData.hs b/Command/MetaData.hs index ef3f1da9a5..282b7fda05 100644 --- a/Command/MetaData.hs +++ b/Command/MetaData.hs @@ -14,12 +14,12 @@ import Logs.MetaData import Annex.WorkTree import Messages.JSON (JSONActionItem(..)) import Types.Messages +import Utility.Aeson import qualified Data.Set as S import qualified Data.Map as M import qualified Data.Text as T import qualified Data.ByteString.Lazy.UTF8 as BU -import Data.Aeson import Control.Concurrent cmd :: Command @@ -115,7 +115,7 @@ perform c o k = case getSet o of cleanup :: Key -> CommandCleanup cleanup k = do m <- getCurrentMetaData k - let Object o = toJSON (MetaDataFields m) + let Object o = toJSON' (MetaDataFields m) maybeShowJSON $ AesonObject o showLongNote $ unlines $ concatMap showmeta $ map unwrapmeta (fromMetaData m) @@ -129,8 +129,8 @@ cleanup k = do newtype MetaDataFields = MetaDataFields MetaData deriving (Show) -instance ToJSON MetaDataFields where - toJSON (MetaDataFields m) = object [ (fieldsField, toJSON m) ] +instance ToJSON' MetaDataFields where + toJSON' (MetaDataFields m) = object [ (fieldsField, toJSON' m) ] instance FromJSON MetaDataFields where parseJSON (Object v) = do diff --git a/Command/Whereis.hs b/Command/Whereis.hs index fddb2b5c6e..b14e231c17 100644 --- a/Command/Whereis.hs +++ b/Command/Whereis.hs @@ -15,6 +15,7 @@ import Remote.Web (getWebUrls) import Annex.UUID import qualified Data.Map as M +import qualified Data.Vector as V cmd :: Command cmd = noCommit $ withGlobalOptions [jsonOptions, annexedMatchingOptions] $ @@ -77,7 +78,7 @@ perform remotemap key = do untrustedheader = "The following untrusted locations may also have copies:\n" ppwhereis h ls urls = do descm <- uuidDescriptions - let urlvals = map (\(u, us) -> (u, Just us)) $ + let urlvals = map (\(u, us) -> (u, Just (V.fromList us))) $ filter (\(u,_) -> u `elem` ls) urls prettyPrintUUIDsWith (Just "urls") h descm (const Nothing) urlvals diff --git a/Key.hs b/Key.hs index 44e9acea4e..ade012a4ba 100644 --- a/Key.hs +++ b/Key.hs @@ -22,7 +22,6 @@ module Key ( prop_isomorphic_key_decode ) where -import Data.Aeson import Data.Char import qualified Data.Text as T @@ -30,6 +29,7 @@ import Common import Types.Key import Utility.QuickCheck import Utility.Bloom +import Utility.Aeson import qualified Utility.SimpleProtocol as Proto stubKey :: Key @@ -155,8 +155,8 @@ instance Hashable Key where hashIO32 = hashIO32 . key2file hashIO64 = hashIO64 . key2file -instance ToJSON Key where - toJSON = toJSON . key2file +instance ToJSON' Key where + toJSON' = toJSON' . key2file instance FromJSON Key where parseJSON (String t) = maybe mempty pure $ file2key $ T.unpack t diff --git a/Messages/JSON.hs b/Messages/JSON.hs index 897eb8cbfe..e63a263e3f 100644 --- a/Messages/JSON.hs +++ b/Messages/JSON.hs @@ -26,12 +26,10 @@ module Messages.JSON ( JSONActionItem(..), ) where -import Data.Aeson import Control.Applicative import qualified Data.Map as M -import qualified Data.Text as T import qualified Data.Vector as V -import qualified Data.ByteString.Lazy as B +import qualified Data.ByteString.Lazy as L import qualified Data.HashMap.Strict as HM import System.IO import System.IO.Unsafe (unsafePerformIO) @@ -44,6 +42,7 @@ import Types.Messages import Key import Utility.Metered import Utility.Percentage +import Utility.Aeson -- A global lock to avoid concurrent threads emitting json at the same time. {-# NOINLINE emitLock #-} @@ -53,7 +52,7 @@ emitLock = unsafePerformIO $ newMVar () emit :: Object -> IO () emit o = do takeMVar emitLock - B.hPut stdout (encode o) + L.hPut stdout (encode o) putStr "\n" putMVar emitLock () @@ -67,7 +66,7 @@ none = id start :: String -> Maybe FilePath -> Maybe Key -> JSONBuilder start command file key _ = Just (o, False) where - Object o = toJSON $ JSONActionItem + Object o = toJSON' $ JSONActionItem { itemCommand = Just command , itemKey = key , itemFile = file @@ -75,7 +74,7 @@ start command file key _ = Just (o, False) } end :: Bool -> JSONBuilder -end b (Just (o, _)) = Just (HM.insert "success" (toJSON b) o, True) +end b (Just (o, _)) = Just (HM.insert "success" (toJSON' b) o, True) end _ Nothing = Nothing finalize :: JSONOptions -> Object -> Object @@ -91,32 +90,32 @@ addErrorMessage msg o = where combinearray (Array new) (Array old) = Array (old <> new) combinearray new _old = new - v = Array $ V.fromList $ map (String . T.pack) msg + v = Array $ V.fromList $ map (String . packString) msg note :: String -> JSONBuilder note _ Nothing = Nothing -note s (Just (o, e)) = Just (HM.insertWith combinelines "note" (toJSON s) o, e) +note s (Just (o, e)) = Just (HM.insertWith combinelines "note" (toJSON' s) o, e) where combinelines (String new) (String old) = - String (old <> T.pack "\n" <> new) + String (old <> "\n" <> new) combinelines new _old = new info :: String -> JSONBuilder info s _ = Just (o, True) where - Object o = object ["info" .= toJSON s] + Object o = object ["info" .= toJSON' s] data JSONChunk v where AesonObject :: Object -> JSONChunk Object - JSONChunk :: ToJSON v => [(String, v)] -> JSONChunk [(String, v)] + JSONChunk :: ToJSON' v => [(String, v)] -> JSONChunk [(String, v)] add :: JSONChunk v -> JSONBuilder add v (Just (o, e)) = Just (HM.union o' o, e) where Object o' = case v of AesonObject ao -> Object ao - JSONChunk l -> object (map mkPair l) - mkPair (s, d) = (T.pack s, toJSON d) + JSONChunk l -> object $ map mkPair l + mkPair (s, d) = (packString s, toJSON' d) add _ Nothing = Nothing complete :: JSONChunk v -> JSONBuilder @@ -145,8 +144,8 @@ data DualDisp = DualDisp , dispJson :: String } -instance ToJSON DualDisp where - toJSON = toJSON . dispJson +instance ToJSON' DualDisp where + toJSON' = toJSON' . dispJson instance Show DualDisp where show = dispNormal @@ -156,10 +155,10 @@ instance Show DualDisp where -- serialization of Map, which uses "[key, value]". data ObjectMap a = ObjectMap { fromObjectMap :: M.Map String a } -instance ToJSON a => ToJSON (ObjectMap a) where - toJSON (ObjectMap m) = object $ map go $ M.toList m +instance ToJSON' a => ToJSON' (ObjectMap a) where + toJSON' (ObjectMap m) = object $ map go $ M.toList m where - go (k, v) = (T.pack k, toJSON v) + go (k, v) = (packString k, toJSON' v) -- An item that a git-annex command acts on, and displays a JSON object about. data JSONActionItem a = JSONActionItem @@ -170,13 +169,13 @@ data JSONActionItem a = JSONActionItem } deriving (Show) -instance ToJSON (JSONActionItem a) where - toJSON i = object $ catMaybes +instance ToJSON' (JSONActionItem a) where + toJSON' i = object $ catMaybes [ Just $ "command" .= itemCommand i , case itemKey i of Nothing -> Nothing - Just k -> Just $ "key" .= toJSON k - , Just $ "file" .= itemFile i + Just k -> Just $ "key" .= toJSON' k + , Just $ "file" .= toJSON' (itemFile i) -- itemAdded is not included; must be added later by 'add' ] diff --git a/Remote.hs b/Remote.hs index 8d826712c1..29f59a7bf8 100644 --- a/Remote.hs +++ b/Remote.hs @@ -57,9 +57,8 @@ module Remote ( ) where import Data.Ord -import Data.Aeson import qualified Data.Map as M -import qualified Data.Text as T +import qualified Data.Vector as V import Annex.Common import Types.Remote @@ -74,6 +73,7 @@ import Config import Config.DynamicConfig import Git.Types (RemoteName) import qualified Git +import Utility.Aeson {- Map from UUIDs of Remotes to a calculated value. -} remoteMap :: (Remote -> v) -> Annex (M.Map UUID v) @@ -197,7 +197,7 @@ prettyPrintUUIDsDescs header descm uuids = {- An optional field can be included in the list of UUIDs. -} prettyPrintUUIDsWith - :: ToJSON v + :: ToJSON' v => Maybe String -> String -> M.Map UUID RemoteName @@ -206,7 +206,7 @@ prettyPrintUUIDsWith -> Annex String prettyPrintUUIDsWith optfield header descm showval uuidvals = do hereu <- getUUID - maybeShowJSON $ JSONChunk [(header, map (jsonify hereu) uuidvals)] + maybeShowJSON $ JSONChunk [(header, V.fromList $ map (jsonify hereu) uuidvals)] return $ unwords $ map (\u -> "\t" ++ prettify hereu u ++ "\n") uuidvals where finddescription u = M.findWithDefault "" u descm @@ -224,11 +224,11 @@ prettyPrintUUIDsWith optfield header descm showval uuidvals = do Nothing -> s Just val -> val ++ ": " ++ s jsonify hereu (u, optval) = object $ catMaybes - [ Just (T.pack "uuid", toJSON $ fromUUID u) - , Just (T.pack "description", toJSON $ finddescription u) - , Just (T.pack "here", toJSON $ hereu == u) + [ Just (packString "uuid", toJSON' $ fromUUID u) + , Just (packString "description", toJSON' $ finddescription u) + , Just (packString "here", toJSON' $ hereu == u) , case (optfield, optval) of - (Just field, Just val) -> Just (T.pack field, toJSON val) + (Just field, Just val) -> Just (packString field, toJSON' val) _ -> Nothing ] diff --git a/Remote/Tahoe.hs b/Remote/Tahoe.hs index 0091f27ba3..6423fefdb1 100644 --- a/Remote/Tahoe.hs +++ b/Remote/Tahoe.hs @@ -23,7 +23,7 @@ module Remote.Tahoe (remote) where import qualified Data.Map as M -import Data.Aeson +import Utility.Aeson import Data.ByteString.Lazy.UTF8 (fromString) import Control.Concurrent.STM diff --git a/Test.hs b/Test.hs index b0f4186563..9bd43bbe69 100644 --- a/Test.hs +++ b/Test.hs @@ -21,7 +21,6 @@ import Test.Tasty.Ingredients.Rerun import Options.Applicative (switch, long, help, internal) import qualified Data.Map as M -import qualified Data.Aeson import qualified Data.ByteString.Lazy.UTF8 as BU8 import System.Environment @@ -83,6 +82,7 @@ import qualified Utility.HumanTime import qualified Utility.Base64 import qualified Utility.Tmp.Dir import qualified Utility.FileSystemEncoding +import qualified Utility.Aeson #ifndef mingw32_HOST_OS import qualified Remote.Helper.Encryptable import qualified Types.Crypto @@ -971,7 +971,7 @@ test_merge = intmpclonerepo $ test_info :: Assertion test_info = intmpclonerepo $ do json <- BU8.fromString <$> git_annex_output "info" ["--json"] - case Data.Aeson.eitherDecode json :: Either String Data.Aeson.Value of + case Utility.Aeson.eitherDecode json :: Either String Utility.Aeson.Value of Right _ -> return () Left e -> assertFailure e diff --git a/Types/Messages.hs b/Types/Messages.hs index d45174bb71..8ca60651f6 100644 --- a/Types/Messages.hs +++ b/Types/Messages.hs @@ -9,7 +9,7 @@ module Types.Messages where -import qualified Data.Aeson as Aeson +import qualified Utility.Aeson as Aeson import Control.Concurrent #ifdef WITH_CONCURRENTOUTPUT diff --git a/Types/MetaData.hs b/Types/MetaData.hs index 279aacbb84..e05a8f72ec 100644 --- a/Types/MetaData.hs +++ b/Types/MetaData.hs @@ -43,6 +43,7 @@ module Types.MetaData ( import Common import Utility.Base64 import Utility.QuickCheck +import Utility.Aeson import qualified Data.Text as T import qualified Data.Set as S @@ -50,15 +51,14 @@ import qualified Data.Map as M import qualified Data.HashMap.Strict as HM import Data.Char import qualified Data.CaseInsensitive as CI -import Data.Aeson newtype MetaData = MetaData (M.Map MetaField (S.Set MetaValue)) deriving (Show, Eq, Ord) -instance ToJSON MetaData where - toJSON (MetaData m) = object $ map go (M.toList m) +instance ToJSON' MetaData where + toJSON' (MetaData m) = object $ map go (M.toList m) where - go (MetaField f, s) = (T.pack (CI.original f), toJSON s) + go (MetaField f, s) = (packString (CI.original f), toJSON' s) instance FromJSON MetaData where parseJSON (Object o) = do @@ -82,8 +82,8 @@ newtype MetaField = MetaField (CI.CI String) data MetaValue = MetaValue CurrentlySet String deriving (Read, Show) -instance ToJSON MetaValue where - toJSON (MetaValue _ v) = toJSON v +instance ToJSON' MetaValue where + toJSON' (MetaValue _ v) = toJSON' v instance FromJSON MetaValue where parseJSON (String v) = return $ MetaValue (CurrentlySet True) (T.unpack v) diff --git a/Utility/Aeson.hs b/Utility/Aeson.hs new file mode 100644 index 0000000000..7147e516bb --- /dev/null +++ b/Utility/Aeson.hs @@ -0,0 +1,86 @@ +{- GHC File system encoding support for Aeson. + - + - Import instead of Data.Aeson + - + - Copyright 2018 Joey Hess + - + - License: BSD-2-clause + -} + +{-# LANGUAGE FlexibleInstances, TypeSynonymInstances #-} + +module Utility.Aeson ( + module X, + ToJSON'(..), + encode, + packString, +) where + +import Data.Aeson as X hiding (ToJSON, toJSON, encode) +import Data.Aeson hiding (encode) +import qualified Data.Aeson +import qualified Data.Text as T +import qualified Data.Text.Encoding as T +import qualified Data.ByteString.Lazy as L +import qualified Data.ByteString as S +import qualified Data.Set +import qualified Data.Vector +import Prelude + +import Utility.FileSystemEncoding + +-- | Use this instead of Data.Aeson.encode to make sure that the +-- below String instance is used. +encode :: ToJSON' a => a -> L.ByteString +encode = Data.Aeson.encode . toJSON' + +-- | Aeson has an unfortunate ToJSON instance for Char and [Char] +-- which does not support Strings containing UTF8 characters +-- encoded using the filesystem encoding when run in a non-utf8 locale. +-- +-- Since we can't replace that with a instance that does the right +-- thing, instead here's a new class that handles String right. +class ToJSON' a where + toJSON' :: a -> Value + +instance ToJSON' String where + toJSON' = toJSON . packString + +-- | Pack a String to Text, correctly handling the filesystem encoding. +-- +-- Use this instead of Data.Text.pack. +-- +-- Note that if the string contains invalid UTF8 characters not using +-- the FileSystemEncoding, this is the same as Data.Text.pack. +packString :: String -> T.Text +packString s = case T.decodeUtf8' (S.concat $ L.toChunks $ encodeBS s) of + Right t -> t + Left _ -> T.pack s + +-- | An instance for lists cannot be included as it would overlap with +-- the String instance. Instead, you can use a Vector. +instance ToJSON' s => ToJSON' (Data.Vector.Vector s) where + toJSON' = toJSON . map toJSON' . Data.Vector.toList + +-- Aeson generates the same JSON for a Set as for a list. +instance ToJSON' s => ToJSON' (Data.Set.Set s) where + toJSON' = toJSON . map toJSON' . Data.Set.toList + +instance (ToJSON' a, ToJSON a) => ToJSON' (Maybe a) where + toJSON' (Just a) = toJSON (Just (toJSON' a)) + toJSON' v@Nothing = toJSON v + +instance (ToJSON' a, ToJSON a, ToJSON' b, ToJSON b) => ToJSON' (a, b) where + toJSON' (a, b) = toJSON ((toJSON' a, toJSON' b)) + +instance ToJSON' Bool where + toJSON' = toJSON + +instance ToJSON' Integer where + toJSON' = toJSON + +instance ToJSON' Object where + toJSON' = toJSON + +instance ToJSON' Value where + toJSON' = toJSON diff --git a/doc/bugs/json_should_be_utf8_regardless_of_locale.mdwn b/doc/bugs/json_should_be_utf8_regardless_of_locale.mdwn index 66d0de2244..ac7275a77a 100644 --- a/doc/bugs/json_should_be_utf8_regardless_of_locale.mdwn +++ b/doc/bugs/json_should_be_utf8_regardless_of_locale.mdwn @@ -2,6 +2,25 @@ json is defined as always utf-8. However, when LANG=C, git-annex --json currently outputs "file":"���������" instead of "file":"äöü東" for that utf-8 filename. --[[Joey]] -(Note that git-annex can operate on non-utf8 filenames; it's not defined -what the json contains then, which might or might not be considered a bug -but this is not about that.) +This can also affect keys when they contain some non-utf8 from eg the +extension. And metadata keys and values can contain non-utf8 and also get +converted to json with similar results. + +Note that git-annex can operate on non-utf8 filenames and keys; +it's not defined what the json contains then, and it currently contains +similar garbage. + +This happens because aeson's instance of ToJSON for Char uses +Text.singleton, and Text does not handle ghc's filesystem encoding +for String. Instead it defaults to `\65533` for each byte encoded with the +filesystem encoding. + +So, git-annex will need to convert filenames and keys and anything else +that might use the filesystem encoding to Text itself in some +way that does respect the filesystem encoding. Ie, use encodeBS to convert +it to a ByteString and then Data.Text.Encoding.decodeUtf8. + +> [[done]] that. --[[Joey]] + +What about git-annex commands that take json as input, +when run in a non-utf8 locale? Tested that, it is handled ok. --[[Joey]] diff --git a/git-annex.cabal b/git-annex.cabal index 4aa5bfcb8e..d2b2a06ffc 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -999,6 +999,7 @@ Executable git-annex Upgrade.V3 Upgrade.V4 Upgrade.V5 + Utility.Aeson Utility.Applicative Utility.AuthToken Utility.Base64