Always use filesystem encoding for all file and handle reads and writes.

This is a big scary change. I have convinced myself it should be safe. I
hope!
This commit is contained in:
Joey Hess 2016-12-24 14:46:31 -04:00
parent c89a9e6ca5
commit 8484c0c197
No known key found for this signature in database
GPG key ID: C910D9222512E3C7
48 changed files with 75 additions and 109 deletions

View file

@ -1,6 +1,6 @@
{- GHC File system encoding handling.
-
- Copyright 2012-2014 Joey Hess <id@joeyh.name>
- Copyright 2012-2016 Joey Hess <id@joeyh.name>
-
- License: BSD-2-clause
-}
@ -9,7 +9,7 @@
{-# OPTIONS_GHC -fno-warn-tabs #-}
module Utility.FileSystemEncoding (
fileEncoding,
useFileSystemEncoding,
withFilePath,
md5FilePath,
decodeBS,
@ -19,7 +19,6 @@ module Utility.FileSystemEncoding (
encodeW8NUL,
decodeW8NUL,
truncateFilePath,
setConsoleEncoding,
) where
import qualified GHC.Foreign as GHC
@ -39,19 +38,30 @@ import qualified Data.ByteString.Lazy.UTF8 as L8
import Utility.Exception
{- Sets a Handle to use the filesystem encoding. This causes data
- written or read from it to be encoded/decoded the same
- as ghc 7.4 does to filenames etc. This special encoding
- allows "arbitrary undecodable bytes to be round-tripped through it".
{- Makes all subsequent Handles that are opened, as well as stdio Handles,
- use the filesystem encoding, instead of the encoding of the current
- locale.
-
- The filesystem encoding allows "arbitrary undecodable bytes to be
- round-tripped through it". This avoids encoded failures when data is not
- encoded matching the current locale.
-
- Note that code can still use hSetEncoding to change the encoding of a
- Handle. This only affects the default encoding.
-}
fileEncoding :: Handle -> IO ()
useFileSystemEncoding :: IO ()
useFileSystemEncoding = do
#ifndef mingw32_HOST_OS
fileEncoding h = hSetEncoding h =<< Encoding.getFileSystemEncoding
e <- Encoding.getFileSystemEncoding
#else
{- The file system encoding does not work well on Windows,
- and Windows only has utf FilePaths anyway. -}
fileEncoding h = hSetEncoding h Encoding.utf8
{- The file system encoding does not work well on Windows,
- and Windows only has utf FilePaths anyway. -}
let e = Encoding.utf8
#endif
hSetEncoding stdin e
hSetEncoding stdout e
hSetEncoding stderr e
Encoding.setLocaleEncoding e
{- Marshal a Haskell FilePath into a NUL terminated C string using temporary
- storage. The FilePath is encoded using the filesystem encoding,
@ -165,10 +175,3 @@ truncateFilePath n = reverse . go [] n . L8.fromString
else go (c:coll) (cnt - x') (L8.drop 1 bs)
_ -> coll
#endif
{- This avoids ghc's output layer crashing on invalid encoded characters in
- filenames when printing them out. -}
setConsoleEncoding :: IO ()
setConsoleEncoding = do
fileEncoding stdout
fileEncoding stderr