Rethink filename encoding handling for display. Since filename encoding may or may not match locale settings, any attempt to decode filenames will fail for some files. So instead, do all output in binary mode.

This commit is contained in:
Joey Hess 2011-03-12 15:30:17 -04:00
parent 9229d182d3
commit 72d2684016
12 changed files with 37 additions and 36 deletions

View file

@ -193,14 +193,14 @@ checkKeyNumCopies key file numcopies = do
missingNote :: String -> Int -> Int -> String -> String missingNote :: String -> Int -> Int -> String -> String
missingNote file 0 _ [] = missingNote file 0 _ [] =
"** No known copies of " ++ filePathToString file ++ " exist!" "** No known copies of " ++ file ++ " exist!"
missingNote file 0 _ untrusted = missingNote file 0 _ untrusted =
"Only these untrusted locations may have copies of " ++ filePathToString file ++ "Only these untrusted locations may have copies of " ++ file ++
"\n" ++ untrusted ++ "\n" ++ untrusted ++
"Back it up to trusted locations with git-annex copy." "Back it up to trusted locations with git-annex copy."
missingNote file present needed [] = missingNote file present needed [] =
"Only " ++ show present ++ " of " ++ show needed ++ "Only " ++ show present ++ " of " ++ show needed ++
" trustworthy copies of " ++ filePathToString file ++ " exist." ++ " trustworthy copies of " ++ file ++ " exist." ++
"\nBack it up with git-annex copy." "\nBack it up with git-annex copy."
missingNote file present needed untrusted = missingNote file present needed untrusted =
missingNote file present needed [] ++ missingNote file present needed [] ++

View file

@ -83,5 +83,5 @@ checkKeyChecksum size key = do
then return True then return True
else do else do
dest <- moveBad key dest <- moveBad key
warning $ "Bad file content; moved to " ++ filePathToString dest warning $ "Bad file content; moved to " ++ dest
return False return False

View file

@ -70,5 +70,5 @@ checkKeySize key = do
then return True then return True
else do else do
dest <- moveBad key dest <- moveBad key
warning $ "Bad file size; moved to " ++ filePathToString dest warning $ "Bad file size; moved to " ++ dest
return False return False

View file

@ -11,7 +11,6 @@ module CmdLine (
shutdown shutdown
) where ) where
import System.IO
import System.IO.Error (try) import System.IO.Error (try)
import System.Console.GetOpt import System.Console.GetOpt
import Control.Monad.State (liftIO) import Control.Monad.State (liftIO)
@ -31,7 +30,7 @@ import UUID
{- Runs the passed command line. -} {- Runs the passed command line. -}
dispatch :: Git.Repo -> [String] -> [Command] -> [Option] -> String -> IO () dispatch :: Git.Repo -> [String] -> [Command] -> [Option] -> String -> IO ()
dispatch gitrepo args cmds options header = do dispatch gitrepo args cmds options header = do
forceUtf8 setupConsole
state <- Annex.new gitrepo allBackends state <- Annex.new gitrepo allBackends
(actions, state') <- Annex.run state $ parseCmd args header cmds options (actions, state') <- Annex.run state $ parseCmd args header cmds options
tryRun state' $ [startup, upgrade] ++ actions ++ [shutdown] tryRun state' $ [startup, upgrade] ++ actions ++ [shutdown]

View file

@ -25,5 +25,5 @@ seek = [withFilesInGit start]
start :: CommandStartString start :: CommandStartString
start file = isAnnexed file $ \(key, _) -> do start file = isAnnexed file $ \(key, _) -> do
exists <- inAnnex key exists <- inAnnex key
when exists $ liftIO $ putStrLn $ filePathToString file when exists $ liftIO $ putStrLn file
return Nothing return Nothing

View file

@ -34,7 +34,7 @@ perform pair@(file, _) = do
ok <- doCommand $ Command.Add.start pair ok <- doCommand $ Command.Add.start pair
if ok if ok
then return $ Just $ cleanup file then return $ Just $ cleanup file
else error $ "failed to add " ++ filePathToString file ++ "; canceling commit" else error $ "failed to add " ++ file ++ "; canceling commit"
cleanup :: FilePath -> CommandCleanup cleanup :: FilePath -> CommandCleanup
cleanup file = do cleanup file = do

View file

@ -68,7 +68,7 @@ checkUnused = do
dropmsg = ["(To remove unwanted data: git-annex dropunused NUMBER)"] dropmsg = ["(To remove unwanted data: git-annex dropunused NUMBER)"]
table l = [" NUMBER KEY"] ++ map cols l table l = [" NUMBER KEY"] ++ map cols l
cols (n,k) = " " ++ pad 6 (show n) ++ " " ++ (filePathToString . show) k cols (n,k) = " " ++ pad 6 (show n) ++ " " ++ show k
pad n s = s ++ replicate (n - length s) ' ' pad n s = s ++ replicate (n - length s) ' '
number :: Int -> [a] -> [(Int, a)] number :: Int -> [a] -> [(Int, a)]

View file

@ -50,7 +50,7 @@ calcGitLink file key = do
cwd <- liftIO $ getCurrentDirectory cwd <- liftIO $ getCurrentDirectory
let absfile = case absNormPath cwd file of let absfile = case absNormPath cwd file of
Just f -> f Just f -> f
Nothing -> error $ "unable to normalize " ++ filePathToString file Nothing -> error $ "unable to normalize " ++ file
return $ relPathDirToDir (parentDir absfile) return $ relPathDirToDir (parentDir absfile)
(Git.workTree g) </> ".git" </> annexLocation key (Git.workTree g) </> ".git" </> annexLocation key

View file

@ -11,11 +11,9 @@ import Control.Monad.State (liftIO)
import System.IO import System.IO
import Control.Monad (unless) import Control.Monad (unless)
import Data.String.Utils import Data.String.Utils
import qualified Codec.Binary.UTF8.String as UTF8
import Types import Types
import qualified Annex import qualified Annex
import qualified SysConfig
verbose :: Annex () -> Annex () verbose :: Annex () -> Annex ()
verbose a = do verbose a = do
@ -27,7 +25,7 @@ showSideAction s = verbose $ liftIO $ putStrLn $ "(" ++ s ++ ")"
showStart :: String -> String -> Annex () showStart :: String -> String -> Annex ()
showStart command file = verbose $ do showStart command file = verbose $ do
liftIO $ putStr $ command ++ " " ++ filePathToString file ++ " " liftIO $ putStr $ command ++ " " ++ file ++ " "
liftIO $ hFlush stdout liftIO $ hFlush stdout
showNote :: String -> Annex () showNote :: String -> Annex ()
@ -59,17 +57,15 @@ warning w = do
indent :: String -> String indent :: String -> String
indent s = join "\n" $ map (\l -> " " ++ l) $ lines s indent s = join "\n" $ map (\l -> " " ++ l) $ lines s
{- Prepares a filename for display. This is needed because on many {- By default, haskell honors the user's locale in its output to stdout
- platforms (eg, unix), FilePaths are internally stored in - and stderr. While that's great for proper unicode support, for git-annex
- non-decoded form. -} - all that's really needed is the ability to display simple messages
filePathToString :: FilePath -> String - (currently untranslated), and importantly, to display filenames exactly
filePathToString = if SysConfig.unicodefilepath then id else UTF8.decodeString - as they are written on disk, no matter what their encoding. So, force
- raw mode.
{- Workaround to avoid crashes displaying filenames containing -
- characters > 255 in non-utf8 locales. Force encodings to utf-8, - NB: Once git-annex gets localized, this will need a rethink. -}
- even though this may mean some characters in the encoding setupConsole :: IO ()
- are mangled. -} setupConsole = do
forceUtf8 :: IO () hSetBinaryMode stdout True
forceUtf8 = do hSetBinaryMode stderr True
hSetEncoding stdout utf8
hSetEncoding stderr utf8

13
debian/changelog vendored
View file

@ -1,15 +1,12 @@
git-annex (0.23) UNRELEASED; urgency=low git-annex (0.23) unstable; urgency=low
* Support ssh remotes with a port specified. * Support ssh remotes with a port specified.
* whereis: New subcommand to show where a file's content has gotten to. * whereis: New subcommand to show where a file's content has gotten to.
* Haskell's IO layer crashes on characters > 255 when in a non-unicode * Rethink filename encoding handling for display. Since filename encoding
locale. Until Haskell gets better behavior, put in an admittedly may or may not match locale settings, any attempt to decode filenames
ugly workaround for that: git-annex forces utf8 output mode no matter will fail for some files. So instead, do all output in binary mode.
what locale is selected. So if you use a non-utf8 locale, your
filenames with characters > 127 will not be displayed as you'd expect.
But at least it won't crash.
-- Joey Hess <joeyh@debian.org> Sat, 05 Mar 2011 15:39:13 -0400 -- Joey Hess <joeyh@debian.org> Sat, 12 Mar 2011 15:02:49 -0400
git-annex (0.22) unstable; urgency=low git-annex (0.22) unstable; urgency=low

View file

@ -63,6 +63,9 @@ It looks like the common latin1-to-UTF8 encoding. Functionality other than otupu
> One other possible > One other possible
> issue would be that this could cause problems if git-annex were > issue would be that this could cause problems if git-annex were
> translated. > translated.
> > On second thought, I switched to this. Any decoding of a filename
> > is going to make someone unhappy; the previous approach broke
> > non-utf8 filenames.
---- ----

View file

@ -18,3 +18,9 @@ of filename encodings. In particular,
git-annex's behavior is unlikely to improve much until haskell's git-annex's behavior is unlikely to improve much until haskell's
support for utf8 filenames improves. --[[Joey]] support for utf8 filenames improves. --[[Joey]]
> [[done]] -- I just turned off all encoding handling on stdout and stderr,
> which avoids these problems nicely. Git-annex now displays just what it
> input, at least on platforms where haskell does not decode unicode in
> FilePaths. This will later be a problem when it gets localized, but for
> now works great. --[[Joey]]