Fix display of unicode filenames.
Internally, the filenames are stored as un-decoded unicode. I tried decoding them, but then haskell tries to access the wrong files. Hmm. So, I've unhappily chosen option "B", which is to decode filenames before they are displayed.
This commit is contained in:
parent
e7a3475704
commit
fe55b4644e
11 changed files with 63 additions and 21 deletions
|
@ -193,14 +193,14 @@ checkKeyNumCopies key file numcopies = do
|
|||
|
||||
missingNote :: String -> Int -> Int -> String -> String
|
||||
missingNote file 0 _ [] =
|
||||
"** No known copies of " ++ file ++ " exist!"
|
||||
"** No known copies of " ++ showFile file ++ " exist!"
|
||||
missingNote file 0 _ untrusted =
|
||||
"Only these untrusted locations may have copies of " ++ file ++
|
||||
"Only these untrusted locations may have copies of " ++ showFile file ++
|
||||
"\n" ++ untrusted ++
|
||||
"Back it up to trusted locations with git-annex copy."
|
||||
missingNote file present needed [] =
|
||||
"Only " ++ show present ++ " of " ++ show needed ++
|
||||
" trustworthy copies of " ++ file ++ " exist." ++
|
||||
" trustworthy copies of " ++ showFile file ++ " exist." ++
|
||||
"\nBack it up with git-annex copy."
|
||||
missingNote file present needed untrusted =
|
||||
missingNote file present needed [] ++
|
||||
|
|
|
@ -58,5 +58,5 @@ checkKeySHA1 key = do
|
|||
then return True
|
||||
else do
|
||||
dest <- moveBad key
|
||||
warning $ "Bad file content; moved to "++dest
|
||||
warning $ "Bad file content; moved to " ++ showFile dest
|
||||
return False
|
||||
|
|
|
@ -67,5 +67,5 @@ checkKeySize key = do
|
|||
then return True
|
||||
else do
|
||||
dest <- moveBad key
|
||||
warning $ "Bad file size; moved to "++dest
|
||||
warning $ "Bad file size; moved to " ++ showFile dest
|
||||
return False
|
||||
|
|
|
@ -12,6 +12,7 @@ import Control.Monad.State (liftIO)
|
|||
|
||||
import Command
|
||||
import Content
|
||||
import Messages
|
||||
|
||||
command :: [Command]
|
||||
command = [Command "find" (paramOptional $ paramRepeating paramPath) seek
|
||||
|
@ -24,5 +25,5 @@ seek = [withFilesInGit start]
|
|||
start :: CommandStartString
|
||||
start file = isAnnexed file $ \(key, _) -> do
|
||||
exists <- inAnnex key
|
||||
when exists $ liftIO $ putStrLn file
|
||||
when exists $ liftIO $ putStrLn $ showFile file
|
||||
return Nothing
|
||||
|
|
|
@ -32,7 +32,7 @@ perform pair@(file, _) = do
|
|||
ok <- doCommand $ Command.Add.start pair
|
||||
if ok
|
||||
then return $ Just $ cleanup file
|
||||
else error $ "failed to add " ++ file ++ "; canceling commit"
|
||||
else error $ "failed to add " ++ showFile file ++ "; canceling commit"
|
||||
|
||||
cleanup :: FilePath -> CommandCleanup
|
||||
cleanup file = do
|
||||
|
|
|
@ -68,7 +68,7 @@ checkUnused = do
|
|||
dropmsg = ["(To remove unwanted data: git-annex dropunused NUMBER)"]
|
||||
|
||||
table l = [" NUMBER KEY"] ++ map cols l
|
||||
cols (n,k) = " " ++ pad 6 (show n) ++ " " ++ show k
|
||||
cols (n,k) = " " ++ pad 6 (show n) ++ " " ++ (showFile . show) k
|
||||
pad n s = s ++ replicate (n - length s) ' '
|
||||
|
||||
number :: Int -> [a] -> [(Int, a)]
|
||||
|
|
|
@ -49,7 +49,7 @@ calcGitLink file key = do
|
|||
cwd <- liftIO $ getCurrentDirectory
|
||||
let absfile = case absNormPath cwd file of
|
||||
Just f -> f
|
||||
Nothing -> error $ "unable to normalize " ++ file
|
||||
Nothing -> error $ "unable to normalize " ++ showFile file
|
||||
return $ relPathDirToDir (parentDir absfile) (Git.workTree g) ++
|
||||
annexLocation key
|
||||
|
||||
|
|
|
@ -11,6 +11,7 @@ import Control.Monad.State (liftIO)
|
|||
import System.IO
|
||||
import Control.Monad (unless)
|
||||
import Data.String.Utils
|
||||
import Codec.Binary.UTF8.String as UTF8
|
||||
|
||||
import Types
|
||||
import qualified Annex
|
||||
|
@ -25,7 +26,7 @@ showSideAction s = verbose $ liftIO $ putStrLn $ "(" ++ s ++ ")"
|
|||
|
||||
showStart :: String -> String -> Annex ()
|
||||
showStart command file = verbose $ do
|
||||
liftIO $ putStr $ command ++ " " ++ file ++ " "
|
||||
liftIO $ putStr $ command ++ " " ++ showFile file ++ " "
|
||||
liftIO $ hFlush stdout
|
||||
|
||||
showNote :: String -> Annex ()
|
||||
|
@ -45,7 +46,6 @@ showEndOk = verbose $ liftIO $ putStrLn "ok"
|
|||
showEndFail :: Annex ()
|
||||
showEndFail = verbose $ liftIO $ putStrLn "\nfailed"
|
||||
|
||||
{- Exception pretty-printing. -}
|
||||
showErr :: (Show a) => a -> Annex ()
|
||||
showErr e = warning $ "git-annex: " ++ show e
|
||||
|
||||
|
@ -57,3 +57,8 @@ warning w = do
|
|||
|
||||
indent :: String -> String
|
||||
indent s = join "\n" $ map (\l -> " " ++ l) $ lines s
|
||||
|
||||
{- Prepares a filename for display. This is needed because strings are
|
||||
- internally represented in git-annex is non-decoded form. -}
|
||||
showFile :: String -> String
|
||||
showFile = decodeString
|
||||
|
|
1
debian/changelog
vendored
1
debian/changelog
vendored
|
@ -4,6 +4,7 @@ git-annex (0.21) UNRELEASED; urgency=low
|
|||
* unannex: Fix recently introduced bug when attempting to unannex more
|
||||
than one file at a time.
|
||||
* test: Set git user name and email in case git can't guess values.
|
||||
* Fix display of unicode filenames.
|
||||
|
||||
-- Joey Hess <joeyh@debian.org> Wed, 09 Feb 2011 00:12:11 -0400
|
||||
|
||||
|
|
|
@ -37,10 +37,22 @@ It looks like the common latin1-to-UTF8 encoding. Functionality other than otupu
|
|||
> encoded in utf-8 (an archive could have historical filenames using
|
||||
> varying encodings), and you don't want which files are accessed to
|
||||
> depend on locale settings.
|
||||
> > I tried to do this by making parts of GitRepo call
|
||||
> > Codec.Binary.UTF8.String.decodeString when reading filenames from
|
||||
> > git. This seemed to break attempts to operate on the files,
|
||||
> > weirdly encoded strings were seen in syscalls in strace.
|
||||
> 1. Keep input and internal data un-decoded, but decode it when
|
||||
> outputting a filename (assuming the filename is encoded using the
|
||||
> user's configured encoding), and allow haskell's output encoding to then
|
||||
> encode it according to the user's locale configuration.
|
||||
> > This is now [[implemented|done]]. I'm not very happy that I have to watch
|
||||
> > out for any place that a filename is output and call `showFile`
|
||||
> > on it, but there are really not too many such places in git-annex.
|
||||
> >
|
||||
> > Note that this only affects filenames apparently.
|
||||
> > (Names of files in the annex, and also some places where names
|
||||
> > of keys are displayed.) Utf-8 in the uuid.map file etc seems
|
||||
> > to be handled cleanly.
|
||||
> 1. Avoid encodings entirely. Mostly what I'm doing now; probably
|
||||
> could find a way to disable encoding of console output. Then the raw
|
||||
> filename would be displayed, which should work ok. git-annex does
|
||||
|
@ -50,13 +62,3 @@ It looks like the common latin1-to-UTF8 encoding. Functionality other than otupu
|
|||
> One other possible
|
||||
> issue would be that this could cause problems if git-annex were
|
||||
> translated.
|
||||
>
|
||||
> BTW, for more fun, try unsetting LANG, and then you can see
|
||||
> stuff like this:
|
||||
|
||||
joey@gnu:~/tmp/aa>git annex add ./Üa
|
||||
add add add add git-annex: <stdout>: commitAndReleaseBuffer: invalid
|
||||
argument (Invalid or incomplete multibyte or wide character)
|
||||
|
||||
> (Add -q to work around this; once it doesn't need to print the filename,
|
||||
> it can act on it ok!)
|
||||
|
|
33
doc/bugs/unhappy_without_UTF8_locale.mdwn
Normal file
33
doc/bugs/unhappy_without_UTF8_locale.mdwn
Normal file
|
@ -0,0 +1,33 @@
|
|||
Try unsetting LANG and passing git-annex unicode filenames.
|
||||
|
||||
joey@gnu:~/tmp/aa>git annex add ./Üa
|
||||
add add add add git-annex: <stdout>: commitAndReleaseBuffer: invalid
|
||||
argument (Invalid or incomplete multibyte or wide character)
|
||||
|
||||
The same problem can be seen with a simple haskell program:
|
||||
|
||||
import System.Environment
|
||||
import Codec.Binary.UTF8.String
|
||||
main = do
|
||||
args <- getArgs
|
||||
putStrLn $ decodeString $ args !! 0
|
||||
|
||||
joey@gnu:~/src/git-annex>LANG= runghc ~/foo.hs Ü
|
||||
foo.hs: <stdout>: hPutChar: invalid argument (Invalid or incomplete multibyte or wide character)
|
||||
|
||||
(The call to `decodeString` is necessary to make the input
|
||||
unicode string be displayed properly in a utf8 locale, but
|
||||
does not contribute to this problem.)
|
||||
|
||||
I guess that haskell is setting the IO encoding to latin1, which
|
||||
is [documented](http://haskell.org/ghc/docs/latest/html/libraries/base/System-IO.html#v:latin1)
|
||||
to error out on characters > 255.
|
||||
|
||||
So this program doesn't have the problem -- but may output garbage
|
||||
on non-utf-8 capable terminals:
|
||||
|
||||
import System.IO
|
||||
main = do
|
||||
hSetEncoding stdout utf8
|
||||
args <- getArgs
|
||||
putStrLn $ decodeString $ args !! 0
|
Loading…
Reference in a new issue