From 72d268401604fbac93ca4701ab53d32880483686 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sat, 12 Mar 2011 15:30:17 -0400 Subject: [PATCH] Rethink filename encoding handling for display. Since filename encoding may or may not match locale settings, any attempt to decode filenames will fail for some files. So instead, do all output in binary mode. --- Backend/File.hs | 6 +++--- Backend/SHA.hs | 2 +- Backend/WORM.hs | 2 +- CmdLine.hs | 3 +-- Command/Find.hs | 2 +- Command/PreCommit.hs | 2 +- Command/Unused.hs | 2 +- Content.hs | 2 +- Messages.hs | 30 +++++++++++--------------- debian/changelog | 13 +++++------ doc/bugs/problems_with_utf8_names.mdwn | 3 +++ doc/todo/support-non-utf8-locales.mdwn | 6 ++++++ 12 files changed, 37 insertions(+), 36 deletions(-) diff --git a/Backend/File.hs b/Backend/File.hs index d5691595a8..d76cd29391 100644 --- a/Backend/File.hs +++ b/Backend/File.hs @@ -193,14 +193,14 @@ checkKeyNumCopies key file numcopies = do missingNote :: String -> Int -> Int -> String -> String missingNote file 0 _ [] = - "** No known copies of " ++ filePathToString file ++ " exist!" + "** No known copies of " ++ file ++ " exist!" missingNote file 0 _ untrusted = - "Only these untrusted locations may have copies of " ++ filePathToString file ++ + "Only these untrusted locations may have copies of " ++ file ++ "\n" ++ untrusted ++ "Back it up to trusted locations with git-annex copy." missingNote file present needed [] = "Only " ++ show present ++ " of " ++ show needed ++ - " trustworthy copies of " ++ filePathToString file ++ " exist." ++ + " trustworthy copies of " ++ file ++ " exist." ++ "\nBack it up with git-annex copy." missingNote file present needed untrusted = missingNote file present needed [] ++ diff --git a/Backend/SHA.hs b/Backend/SHA.hs index c074ab48a2..4eea890ce4 100644 --- a/Backend/SHA.hs +++ b/Backend/SHA.hs @@ -83,5 +83,5 @@ checkKeyChecksum size key = do then return True else do dest <- moveBad key - warning $ "Bad file content; moved to " ++ filePathToString dest + warning $ "Bad file content; moved to " ++ dest return False diff --git a/Backend/WORM.hs b/Backend/WORM.hs index 8a6412eb11..a0d814aa08 100644 --- a/Backend/WORM.hs +++ b/Backend/WORM.hs @@ -70,5 +70,5 @@ checkKeySize key = do then return True else do dest <- moveBad key - warning $ "Bad file size; moved to " ++ filePathToString dest + warning $ "Bad file size; moved to " ++ dest return False diff --git a/CmdLine.hs b/CmdLine.hs index 1c01aa75f6..b8fd6af7ce 100644 --- a/CmdLine.hs +++ b/CmdLine.hs @@ -11,7 +11,6 @@ module CmdLine ( shutdown ) where -import System.IO import System.IO.Error (try) import System.Console.GetOpt import Control.Monad.State (liftIO) @@ -31,7 +30,7 @@ import UUID {- Runs the passed command line. -} dispatch :: Git.Repo -> [String] -> [Command] -> [Option] -> String -> IO () dispatch gitrepo args cmds options header = do - forceUtf8 + setupConsole state <- Annex.new gitrepo allBackends (actions, state') <- Annex.run state $ parseCmd args header cmds options tryRun state' $ [startup, upgrade] ++ actions ++ [shutdown] diff --git a/Command/Find.hs b/Command/Find.hs index 3e9125b9a6..1ca6ff1e7c 100644 --- a/Command/Find.hs +++ b/Command/Find.hs @@ -25,5 +25,5 @@ seek = [withFilesInGit start] start :: CommandStartString start file = isAnnexed file $ \(key, _) -> do exists <- inAnnex key - when exists $ liftIO $ putStrLn $ filePathToString file + when exists $ liftIO $ putStrLn file return Nothing diff --git a/Command/PreCommit.hs b/Command/PreCommit.hs index d2f6964343..6f9adb79a5 100644 --- a/Command/PreCommit.hs +++ b/Command/PreCommit.hs @@ -34,7 +34,7 @@ perform pair@(file, _) = do ok <- doCommand $ Command.Add.start pair if ok then return $ Just $ cleanup file - else error $ "failed to add " ++ filePathToString file ++ "; canceling commit" + else error $ "failed to add " ++ file ++ "; canceling commit" cleanup :: FilePath -> CommandCleanup cleanup file = do diff --git a/Command/Unused.hs b/Command/Unused.hs index 9f3881d595..a614ce5d94 100644 --- a/Command/Unused.hs +++ b/Command/Unused.hs @@ -68,7 +68,7 @@ checkUnused = do dropmsg = ["(To remove unwanted data: git-annex dropunused NUMBER)"] table l = [" NUMBER KEY"] ++ map cols l - cols (n,k) = " " ++ pad 6 (show n) ++ " " ++ (filePathToString . show) k + cols (n,k) = " " ++ pad 6 (show n) ++ " " ++ show k pad n s = s ++ replicate (n - length s) ' ' number :: Int -> [a] -> [(Int, a)] diff --git a/Content.hs b/Content.hs index bcd4ac0e13..895a8812c2 100644 --- a/Content.hs +++ b/Content.hs @@ -50,7 +50,7 @@ calcGitLink file key = do cwd <- liftIO $ getCurrentDirectory let absfile = case absNormPath cwd file of Just f -> f - Nothing -> error $ "unable to normalize " ++ filePathToString file + Nothing -> error $ "unable to normalize " ++ file return $ relPathDirToDir (parentDir absfile) (Git.workTree g) ".git" annexLocation key diff --git a/Messages.hs b/Messages.hs index 83b3ecf239..733638ce12 100644 --- a/Messages.hs +++ b/Messages.hs @@ -11,11 +11,9 @@ import Control.Monad.State (liftIO) import System.IO import Control.Monad (unless) import Data.String.Utils -import qualified Codec.Binary.UTF8.String as UTF8 import Types import qualified Annex -import qualified SysConfig verbose :: Annex () -> Annex () verbose a = do @@ -27,7 +25,7 @@ showSideAction s = verbose $ liftIO $ putStrLn $ "(" ++ s ++ ")" showStart :: String -> String -> Annex () showStart command file = verbose $ do - liftIO $ putStr $ command ++ " " ++ filePathToString file ++ " " + liftIO $ putStr $ command ++ " " ++ file ++ " " liftIO $ hFlush stdout showNote :: String -> Annex () @@ -59,17 +57,15 @@ warning w = do indent :: String -> String indent s = join "\n" $ map (\l -> " " ++ l) $ lines s -{- Prepares a filename for display. This is needed because on many - - platforms (eg, unix), FilePaths are internally stored in - - non-decoded form. -} -filePathToString :: FilePath -> String -filePathToString = if SysConfig.unicodefilepath then id else UTF8.decodeString - -{- Workaround to avoid crashes displaying filenames containing - - characters > 255 in non-utf8 locales. Force encodings to utf-8, - - even though this may mean some characters in the encoding - - are mangled. -} -forceUtf8 :: IO () -forceUtf8 = do - hSetEncoding stdout utf8 - hSetEncoding stderr utf8 +{- By default, haskell honors the user's locale in its output to stdout + - and stderr. While that's great for proper unicode support, for git-annex + - all that's really needed is the ability to display simple messages + - (currently untranslated), and importantly, to display filenames exactly + - as they are written on disk, no matter what their encoding. So, force + - raw mode. + - + - NB: Once git-annex gets localized, this will need a rethink. -} +setupConsole :: IO () +setupConsole = do + hSetBinaryMode stdout True + hSetBinaryMode stderr True diff --git a/debian/changelog b/debian/changelog index a414b3befa..90b4cf6b25 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,15 +1,12 @@ -git-annex (0.23) UNRELEASED; urgency=low +git-annex (0.23) unstable; urgency=low * Support ssh remotes with a port specified. * whereis: New subcommand to show where a file's content has gotten to. - * Haskell's IO layer crashes on characters > 255 when in a non-unicode - locale. Until Haskell gets better behavior, put in an admittedly - ugly workaround for that: git-annex forces utf8 output mode no matter - what locale is selected. So if you use a non-utf8 locale, your - filenames with characters > 127 will not be displayed as you'd expect. - But at least it won't crash. + * Rethink filename encoding handling for display. Since filename encoding + may or may not match locale settings, any attempt to decode filenames + will fail for some files. So instead, do all output in binary mode. - -- Joey Hess Sat, 05 Mar 2011 15:39:13 -0400 + -- Joey Hess Sat, 12 Mar 2011 15:02:49 -0400 git-annex (0.22) unstable; urgency=low diff --git a/doc/bugs/problems_with_utf8_names.mdwn b/doc/bugs/problems_with_utf8_names.mdwn index efde1c9a3a..d6dc6ca3c3 100644 --- a/doc/bugs/problems_with_utf8_names.mdwn +++ b/doc/bugs/problems_with_utf8_names.mdwn @@ -63,6 +63,9 @@ It looks like the common latin1-to-UTF8 encoding. Functionality other than otupu > One other possible > issue would be that this could cause problems if git-annex were > translated. +> > On second thought, I switched to this. Any decoding of a filename +> > is going to make someone unhappy; the previous approach broke +> > non-utf8 filenames. ---- diff --git a/doc/todo/support-non-utf8-locales.mdwn b/doc/todo/support-non-utf8-locales.mdwn index 60f35eec81..da40118d52 100644 --- a/doc/todo/support-non-utf8-locales.mdwn +++ b/doc/todo/support-non-utf8-locales.mdwn @@ -18,3 +18,9 @@ of filename encodings. In particular, git-annex's behavior is unlikely to improve much until haskell's support for utf8 filenames improves. --[[Joey]] + +> [[done]] -- I just turned off all encoding handling on stdout and stderr, +> which avoids these problems nicely. Git-annex now displays just what it +> input, at least on platforms where haskell does not decode unicode in +> FilePaths. This will later be a problem when it gets localized, but for +> now works great. --[[Joey]]