Windows: Fix some filename encoding bugs.
http://git-annex.branchable.com/bugs/Unicode_file_names_ignored_on_Windows/ Not a complete fix yet.
This commit is contained in:
parent
2f52f727c0
commit
1052eeface
8 changed files with 86 additions and 8 deletions
|
@ -1,14 +1,17 @@
|
|||
{- GHC File system encoding handling.
|
||||
-
|
||||
- Copyright 2012-2013 Joey Hess <joey@kitenet.net>
|
||||
- Copyright 2012-2014 Joey Hess <joey@kitenet.net>
|
||||
-
|
||||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
||||
{-# LANGUAGE CPP #-}
|
||||
|
||||
module Utility.FileSystemEncoding (
|
||||
fileEncoding,
|
||||
withFilePath,
|
||||
md5FilePath,
|
||||
decodeBS,
|
||||
decodeW8,
|
||||
encodeW8,
|
||||
truncateFilePath,
|
||||
|
@ -22,13 +25,24 @@ import System.IO.Unsafe
|
|||
import qualified Data.Hash.MD5 as MD5
|
||||
import Data.Word
|
||||
import Data.Bits.Utils
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
#ifdef mingw32_HOST_OS
|
||||
import qualified Data.ByteString.Lazy.UTF8 as L8
|
||||
#endif
|
||||
|
||||
{- Sets a Handle to use the filesystem encoding. This causes data
|
||||
- written or read from it to be encoded/decoded the same
|
||||
- as ghc 7.4 does to filenames etc. This special encoding
|
||||
- allows "arbitrary undecodable bytes to be round-tripped through it". -}
|
||||
- allows "arbitrary undecodable bytes to be round-tripped through it".
|
||||
-}
|
||||
fileEncoding :: Handle -> IO ()
|
||||
#ifndef mingw32_HOST_OS
|
||||
fileEncoding h = hSetEncoding h =<< Encoding.getFileSystemEncoding
|
||||
#else
|
||||
{- The file system encoding does not work well on Windows,
|
||||
- and Windows only has utf FilePaths anyway. -}
|
||||
fileEncoding h = hSetEncoding h Encoding.utf8
|
||||
#endif
|
||||
|
||||
{- Marshal a Haskell FilePath into a NUL terminated C string using temporary
|
||||
- storage. The FilePath is encoded using the filesystem encoding,
|
||||
|
@ -60,6 +74,16 @@ _encodeFilePath fp = unsafePerformIO $ do
|
|||
md5FilePath :: FilePath -> MD5.Str
|
||||
md5FilePath = MD5.Str . _encodeFilePath
|
||||
|
||||
{- Decodes a ByteString into a FilePath, applying the filesystem encoding. -}
|
||||
decodeBS :: L.ByteString -> FilePath
|
||||
#ifndef mingw32_HOST_OS
|
||||
decodeBS = encodeW8 . L.unpack
|
||||
#else
|
||||
{- On Windows, we assume that the ByteString is utf-8, since Windows
|
||||
- only uses unicode for filenames. -}
|
||||
decodeBS = L8.toString
|
||||
#endif
|
||||
|
||||
{- Converts a [Word8] to a FilePath, encoding using the filesystem encoding.
|
||||
-
|
||||
- w82c produces a String, which may contain Chars that are invalid
|
||||
|
@ -84,6 +108,7 @@ decodeW8 = s2w8 . _encodeFilePath
|
|||
- cost of efficiency when running on a large FilePath.
|
||||
-}
|
||||
truncateFilePath :: Int -> FilePath -> FilePath
|
||||
#ifndef mingw32_HOST_OS
|
||||
truncateFilePath n = go . reverse
|
||||
where
|
||||
go f =
|
||||
|
@ -91,3 +116,17 @@ truncateFilePath n = go . reverse
|
|||
in if length bytes <= n
|
||||
then reverse f
|
||||
else go (drop 1 f)
|
||||
#else
|
||||
{- On Windows, count the number of bytes used by each utf8 character. -}
|
||||
truncateFilePath n = reverse . go [] n . L8.fromString
|
||||
where
|
||||
go coll cnt bs
|
||||
| cnt <= 0 = coll
|
||||
| otherwise = case L8.decode bs of
|
||||
Just (c, x) | c /= L8.replacement_char ->
|
||||
let x' = fromIntegral x
|
||||
in if cnt - x' < 0
|
||||
then coll
|
||||
else go (c:coll) (cnt - x') (L8.drop 1 bs)
|
||||
_ -> coll
|
||||
#endif
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue