Rethink filename encoding handling for display. Since filename encoding may or may not match locale settings, any attempt to decode filenames will fail for some files. So instead, do all output in binary mode.

This commit is contained in:
Joey Hess 2011-03-12 15:30:17 -04:00
parent 9229d182d3
commit 72d2684016
12 changed files with 37 additions and 36 deletions

View file

@ -193,14 +193,14 @@ checkKeyNumCopies key file numcopies = do
missingNote :: String -> Int -> Int -> String -> String
missingNote file 0 _ [] =
"** No known copies of " ++ filePathToString file ++ " exist!"
"** No known copies of " ++ file ++ " exist!"
missingNote file 0 _ untrusted =
"Only these untrusted locations may have copies of " ++ filePathToString file ++
"Only these untrusted locations may have copies of " ++ file ++
"\n" ++ untrusted ++
"Back it up to trusted locations with git-annex copy."
missingNote file present needed [] =
"Only " ++ show present ++ " of " ++ show needed ++
" trustworthy copies of " ++ filePathToString file ++ " exist." ++
" trustworthy copies of " ++ file ++ " exist." ++
"\nBack it up with git-annex copy."
missingNote file present needed untrusted =
missingNote file present needed [] ++

View file

@ -83,5 +83,5 @@ checkKeyChecksum size key = do
then return True
else do
dest <- moveBad key
warning $ "Bad file content; moved to " ++ filePathToString dest
warning $ "Bad file content; moved to " ++ dest
return False

View file

@ -70,5 +70,5 @@ checkKeySize key = do
then return True
else do
dest <- moveBad key
warning $ "Bad file size; moved to " ++ filePathToString dest
warning $ "Bad file size; moved to " ++ dest
return False

View file

@ -11,7 +11,6 @@ module CmdLine (
shutdown
) where
import System.IO
import System.IO.Error (try)
import System.Console.GetOpt
import Control.Monad.State (liftIO)
@ -31,7 +30,7 @@ import UUID
{- Runs the passed command line. -}
dispatch :: Git.Repo -> [String] -> [Command] -> [Option] -> String -> IO ()
dispatch gitrepo args cmds options header = do
forceUtf8
setupConsole
state <- Annex.new gitrepo allBackends
(actions, state') <- Annex.run state $ parseCmd args header cmds options
tryRun state' $ [startup, upgrade] ++ actions ++ [shutdown]

View file

@ -25,5 +25,5 @@ seek = [withFilesInGit start]
start :: CommandStartString
start file = isAnnexed file $ \(key, _) -> do
exists <- inAnnex key
when exists $ liftIO $ putStrLn $ filePathToString file
when exists $ liftIO $ putStrLn file
return Nothing

View file

@ -34,7 +34,7 @@ perform pair@(file, _) = do
ok <- doCommand $ Command.Add.start pair
if ok
then return $ Just $ cleanup file
else error $ "failed to add " ++ filePathToString file ++ "; canceling commit"
else error $ "failed to add " ++ file ++ "; canceling commit"
cleanup :: FilePath -> CommandCleanup
cleanup file = do

View file

@ -68,7 +68,7 @@ checkUnused = do
dropmsg = ["(To remove unwanted data: git-annex dropunused NUMBER)"]
table l = [" NUMBER KEY"] ++ map cols l
cols (n,k) = " " ++ pad 6 (show n) ++ " " ++ (filePathToString . show) k
cols (n,k) = " " ++ pad 6 (show n) ++ " " ++ show k
pad n s = s ++ replicate (n - length s) ' '
number :: Int -> [a] -> [(Int, a)]

View file

@ -50,7 +50,7 @@ calcGitLink file key = do
cwd <- liftIO $ getCurrentDirectory
let absfile = case absNormPath cwd file of
Just f -> f
Nothing -> error $ "unable to normalize " ++ filePathToString file
Nothing -> error $ "unable to normalize " ++ file
return $ relPathDirToDir (parentDir absfile)
(Git.workTree g) </> ".git" </> annexLocation key

View file

@ -11,11 +11,9 @@ import Control.Monad.State (liftIO)
import System.IO
import Control.Monad (unless)
import Data.String.Utils
import qualified Codec.Binary.UTF8.String as UTF8
import Types
import qualified Annex
import qualified SysConfig
verbose :: Annex () -> Annex ()
verbose a = do
@ -27,7 +25,7 @@ showSideAction s = verbose $ liftIO $ putStrLn $ "(" ++ s ++ ")"
showStart :: String -> String -> Annex ()
showStart command file = verbose $ do
liftIO $ putStr $ command ++ " " ++ filePathToString file ++ " "
liftIO $ putStr $ command ++ " " ++ file ++ " "
liftIO $ hFlush stdout
showNote :: String -> Annex ()
@ -59,17 +57,15 @@ warning w = do
indent :: String -> String
indent s = join "\n" $ map (\l -> " " ++ l) $ lines s
{- Prepares a filename for display. This is needed because on many
- platforms (eg, unix), FilePaths are internally stored in
- non-decoded form. -}
filePathToString :: FilePath -> String
filePathToString = if SysConfig.unicodefilepath then id else UTF8.decodeString
{- Workaround to avoid crashes displaying filenames containing
- characters > 255 in non-utf8 locales. Force encodings to utf-8,
- even though this may mean some characters in the encoding
- are mangled. -}
forceUtf8 :: IO ()
forceUtf8 = do
hSetEncoding stdout utf8
hSetEncoding stderr utf8
{- By default, haskell honors the user's locale in its output to stdout
- and stderr. While that's great for proper unicode support, for git-annex
- all that's really needed is the ability to display simple messages
- (currently untranslated), and importantly, to display filenames exactly
- as they are written on disk, no matter what their encoding. So, force
- raw mode.
-
- NB: Once git-annex gets localized, this will need a rethink. -}
setupConsole :: IO ()
setupConsole = do
hSetBinaryMode stdout True
hSetBinaryMode stderr True

13
debian/changelog vendored
View file

@ -1,15 +1,12 @@
git-annex (0.23) UNRELEASED; urgency=low
git-annex (0.23) unstable; urgency=low
* Support ssh remotes with a port specified.
* whereis: New subcommand to show where a file's content has gotten to.
* Haskell's IO layer crashes on characters > 255 when in a non-unicode
locale. Until Haskell gets better behavior, put in an admittedly
ugly workaround for that: git-annex forces utf8 output mode no matter
what locale is selected. So if you use a non-utf8 locale, your
filenames with characters > 127 will not be displayed as you'd expect.
But at least it won't crash.
* Rethink filename encoding handling for display. Since filename encoding
may or may not match locale settings, any attempt to decode filenames
will fail for some files. So instead, do all output in binary mode.
-- Joey Hess <joeyh@debian.org> Sat, 05 Mar 2011 15:39:13 -0400
-- Joey Hess <joeyh@debian.org> Sat, 12 Mar 2011 15:02:49 -0400
git-annex (0.22) unstable; urgency=low

View file

@ -63,6 +63,9 @@ It looks like the common latin1-to-UTF8 encoding. Functionality other than otupu
> One other possible
> issue would be that this could cause problems if git-annex were
> translated.
> > On second thought, I switched to this. Any decoding of a filename
> > is going to make someone unhappy; the previous approach broke
> > non-utf8 filenames.
----

View file

@ -18,3 +18,9 @@ of filename encodings. In particular,
git-annex's behavior is unlikely to improve much until haskell's
support for utf8 filenames improves. --[[Joey]]
> [[done]] -- I just turned off all encoding handling on stdout and stderr,
> which avoids these problems nicely. Git-annex now displays just what it
> input, at least on platforms where haskell does not decode unicode in
> FilePaths. This will later be a problem when it gets localized, but for
> now works great. --[[Joey]]