fix mojibake reversion in display of utf8

When displaying a ByteString like "💕", safeOutput operates on
individual bytes like "\240\159\146\149" and isControl '\146' = True,
so it got truncated to just "\240".

So, only treat the low control characters, and DEL, as control
characters.

Also split Utility.Terminal out of Utility.SafeOutput. The latter needs
win32, but Utility.SafeOutput is used by Control.Exception, which is
used by Setup.

Sponsored-by: Nicholas Golder-Manning on Patreon
This commit is contained in:
Joey Hess 2023-04-12 13:48:21 -04:00
parent c50aa21d5f
commit a576fc3b12
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
7 changed files with 61 additions and 42 deletions

View file

@ -11,55 +11,27 @@
module Utility.SafeOutput (
safeOutput,
IsTerminal(..),
checkIsTerminal,
safeOutputChar,
) where
import Data.Char
import qualified Data.ByteString as S
import System.IO
#ifdef mingw32_HOST_OS
import System.Win32.MinTTY (isMinTTYHandle)
import System.Win32.File
import System.Win32.Types
import Graphics.Win32.Misc
import Control.Exception
#endif
class SafeOutputtable t where
safeOutput :: t -> t
instance SafeOutputtable String where
safeOutput = filter safeChar
safeOutput = filter safeOutputChar
instance SafeOutputtable S.ByteString where
safeOutput = S.filter (safeChar . chr . fromIntegral)
safeOutput = S.filter (safeOutputChar . chr . fromIntegral)
safeChar :: Char -> Bool
safeChar c
safeOutputChar :: Char -> Bool
safeOutputChar c
| not (isControl c) = True
| c == '\n' = True
| c == '\t' = True
| c == '\DEL' = False
| ord c > 31 = True
| otherwise = False
newtype IsTerminal = IsTerminal Bool
checkIsTerminal :: Handle -> IO IsTerminal
checkIsTerminal h = do
#ifndef mingw32_HOST_OS
b <- hIsTerminalDevice h
return (IsTerminal b)
#else
b <- hIsTerminalDevice h
if b
then return (IsTerminal b)
else do
h' <- getStdHandle sTD_OUTPUT_HANDLE
`catch` \(_ :: IOError) ->
return nullHANDLE
if h == nullHANDLE
then return (IsTerminal False)
else do
b' <- isMinTTYHandle h'
return (IsTerminal b)
#endif