Always use filesystem encoding for all file and handle reads and writes.
This is a big scary change. I have convinced myself it should be safe. I hope!
This commit is contained in:
parent
c89a9e6ca5
commit
8484c0c197
48 changed files with 75 additions and 109 deletions
|
@ -1,6 +1,6 @@
|
|||
{- GHC File system encoding handling.
|
||||
-
|
||||
- Copyright 2012-2014 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2012-2016 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- License: BSD-2-clause
|
||||
-}
|
||||
|
@ -9,7 +9,7 @@
|
|||
{-# OPTIONS_GHC -fno-warn-tabs #-}
|
||||
|
||||
module Utility.FileSystemEncoding (
|
||||
fileEncoding,
|
||||
useFileSystemEncoding,
|
||||
withFilePath,
|
||||
md5FilePath,
|
||||
decodeBS,
|
||||
|
@ -19,7 +19,6 @@ module Utility.FileSystemEncoding (
|
|||
encodeW8NUL,
|
||||
decodeW8NUL,
|
||||
truncateFilePath,
|
||||
setConsoleEncoding,
|
||||
) where
|
||||
|
||||
import qualified GHC.Foreign as GHC
|
||||
|
@ -39,19 +38,30 @@ import qualified Data.ByteString.Lazy.UTF8 as L8
|
|||
|
||||
import Utility.Exception
|
||||
|
||||
{- Sets a Handle to use the filesystem encoding. This causes data
|
||||
- written or read from it to be encoded/decoded the same
|
||||
- as ghc 7.4 does to filenames etc. This special encoding
|
||||
- allows "arbitrary undecodable bytes to be round-tripped through it".
|
||||
{- Makes all subsequent Handles that are opened, as well as stdio Handles,
|
||||
- use the filesystem encoding, instead of the encoding of the current
|
||||
- locale.
|
||||
-
|
||||
- The filesystem encoding allows "arbitrary undecodable bytes to be
|
||||
- round-tripped through it". This avoids encoded failures when data is not
|
||||
- encoded matching the current locale.
|
||||
-
|
||||
- Note that code can still use hSetEncoding to change the encoding of a
|
||||
- Handle. This only affects the default encoding.
|
||||
-}
|
||||
fileEncoding :: Handle -> IO ()
|
||||
useFileSystemEncoding :: IO ()
|
||||
useFileSystemEncoding = do
|
||||
#ifndef mingw32_HOST_OS
|
||||
fileEncoding h = hSetEncoding h =<< Encoding.getFileSystemEncoding
|
||||
e <- Encoding.getFileSystemEncoding
|
||||
#else
|
||||
{- The file system encoding does not work well on Windows,
|
||||
- and Windows only has utf FilePaths anyway. -}
|
||||
fileEncoding h = hSetEncoding h Encoding.utf8
|
||||
{- The file system encoding does not work well on Windows,
|
||||
- and Windows only has utf FilePaths anyway. -}
|
||||
let e = Encoding.utf8
|
||||
#endif
|
||||
hSetEncoding stdin e
|
||||
hSetEncoding stdout e
|
||||
hSetEncoding stderr e
|
||||
Encoding.setLocaleEncoding e
|
||||
|
||||
{- Marshal a Haskell FilePath into a NUL terminated C string using temporary
|
||||
- storage. The FilePath is encoded using the filesystem encoding,
|
||||
|
@ -165,10 +175,3 @@ truncateFilePath n = reverse . go [] n . L8.fromString
|
|||
else go (c:coll) (cnt - x') (L8.drop 1 bs)
|
||||
_ -> coll
|
||||
#endif
|
||||
|
||||
{- This avoids ghc's output layer crashing on invalid encoded characters in
|
||||
- filenames when printing them out. -}
|
||||
setConsoleEncoding :: IO ()
|
||||
setConsoleEncoding = do
|
||||
fileEncoding stdout
|
||||
fileEncoding stderr
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue