git-annex/Utility/FileSystemEncoding.hs
Joey Hess 7189dfd77d git-annex (5.20131127) unstable; urgency=low
* webapp: Detect when upgrades are available, and upgrade if the user
    desires.
    (Only when git-annex is installed using the prebuilt binaries
    from git-annex upstream, not from eg Debian.)
  * assistant: Detect when the git-annex binary is modified or replaced,
    and either prompt the user to restart the program, or automatically
    restart it.
  * annex.autoupgrade configures both the above upgrade behaviors.
  * Added support for quvi 0.9. Slightly suboptimal due to limitations in its
    interface compared with the old version.
  * Bug fix: annex.version did not get set on automatic upgrade to v5 direct
    mode repo, so the upgrade was performed repeatedly, slowing commands down.
  * webapp: Fix bug that broke switching between local repositories
    that use the new guarded direct mode.
  * Android: Fix stripping of the git-annex binary.
  * Android: Make terminal app show git-annex version number.
  * Android: Re-enable XMPP support.
  * reinject: Allow to be used in direct mode.
  * Futher improvements to git repo repair. Has now been tested in tens
    of thousands of intentionally damaged repos, and successfully
    repaired them all.
  * Allow use of --unused in bare repository.

# imported from the archive
2013-11-27 18:41:44 -04:00

93 lines
3.2 KiB
Haskell

{- GHC File system encoding handling.
-
- Copyright 2012-2013 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Utility.FileSystemEncoding (
fileEncoding,
withFilePath,
md5FilePath,
decodeW8,
encodeW8,
truncateFilePath,
) where
import qualified GHC.Foreign as GHC
import qualified GHC.IO.Encoding as Encoding
import Foreign.C
import System.IO
import System.IO.Unsafe
import qualified Data.Hash.MD5 as MD5
import Data.Word
import Data.Bits.Utils
{- Sets a Handle to use the filesystem encoding. This causes data
- written or read from it to be encoded/decoded the same
- as ghc 7.4 does to filenames etc. This special encoding
- allows "arbitrary undecodable bytes to be round-tripped through it". -}
fileEncoding :: Handle -> IO ()
fileEncoding h = hSetEncoding h =<< Encoding.getFileSystemEncoding
{- Marshal a Haskell FilePath into a NUL terminated C string using temporary
- storage. The FilePath is encoded using the filesystem encoding,
- reversing the decoding that should have been done when the FilePath
- was obtained. -}
withFilePath :: FilePath -> (CString -> IO a) -> IO a
withFilePath fp f = Encoding.getFileSystemEncoding
>>= \enc -> GHC.withCString enc fp f
{- Encodes a FilePath into a String, applying the filesystem encoding.
-
- There are very few things it makes sense to do with such an encoded
- string. It's not a legal filename; it should not be displayed.
- So this function is not exported, but instead used by the few functions
- that can usefully consume it.
-
- This use of unsafePerformIO is belived to be safe; GHC's interface
- only allows doing this conversion with CStrings, and the CString buffer
- is allocated, used, and deallocated within the call, with no side
- effects.
-}
{-# NOINLINE _encodeFilePath #-}
_encodeFilePath :: FilePath -> String
_encodeFilePath fp = unsafePerformIO $ do
enc <- Encoding.getFileSystemEncoding
GHC.withCString enc fp $ GHC.peekCString Encoding.char8
{- Encodes a FilePath into a Md5.Str, applying the filesystem encoding. -}
md5FilePath :: FilePath -> MD5.Str
md5FilePath = MD5.Str . _encodeFilePath
{- Converts a [Word8] to a FilePath, encoding using the filesystem encoding.
-
- w82c produces a String, which may contain Chars that are invalid
- unicode. From there, this is really a simple matter of applying the
- file system encoding, only complicated by GHC's interface to doing so.
-}
{-# NOINLINE encodeW8 #-}
encodeW8 :: [Word8] -> FilePath
encodeW8 w8 = unsafePerformIO $ do
enc <- Encoding.getFileSystemEncoding
GHC.withCString Encoding.char8 (w82s w8) $ GHC.peekCString enc
{- Useful when you want the actual number of bytes that will be used to
- represent the FilePath on disk. -}
decodeW8 :: FilePath -> [Word8]
decodeW8 = s2w8 . _encodeFilePath
{- Truncates a FilePath to the given number of bytes (or less),
- as represented on disk.
-
- Avoids returning an invalid part of a unicode byte sequence, at the
- cost of efficiency when running on a large FilePath.
-}
truncateFilePath :: Int -> FilePath -> FilePath
truncateFilePath n = go . reverse
where
go f =
let bytes = decodeW8 f
in if length bytes <= n
then reverse f
else go (drop 1 f)