split out two more Git modules

2011-12-13 15:22:43 -04:00 · 2011-12-13 15:22:43 -04:00 · 9db8ec210f
commit 9db8ec210f
parent 25b2cc4148
8 changed files with 139 additions and 117 deletions
--- a/Git/Filename.hs
+++ b/Git/Filename.hs
@ -0,0 +1,84 @@
+{- Some git commands output encoded filenames, in a rather annoyingly complex
+ - C-style encoding.
+ -
+ - Copyright 2010, 2011 Joey Hess <joey@kitenet.net>
+ -
+ - Licensed under the GNU GPL version 3 or higher.
+ -}
+
+module Git.Filename where
+
+import qualified Codec.Binary.UTF8.String
+import Data.Char
+import Data.Word (Word8)
+import Text.Printf
+
+decode :: String -> FilePath
+decode [] = []
+decode f@(c:s)
+	-- encoded strings will be inside double quotes
+	| c == '"' = unescape ("", middle)
+	| otherwise = f
+	where
+		e = '\\'
+		middle = init s
+		unescape (b, []) = b
+		-- look for escapes starting with '\'
+		unescape (b, v) = b ++ beginning ++ unescape (handle rest)
+			where
+				pair = span (/= e) v
+				beginning = fst pair
+				rest = snd pair
+		isescape x = x == e
+		-- \NNN is an octal encoded character
+		handle (x:n1:n2:n3:rest)
+			| isescape x && alloctal = (fromoctal, rest)
+				where
+					alloctal = isOctDigit n1 &&
+						isOctDigit n2 &&
+						isOctDigit n3
+					fromoctal = [chr $ readoctal [n1, n2, n3]]
+					readoctal o = read $ "0o" ++ o :: Int
+		-- \C is used for a few special characters
+		handle (x:nc:rest)
+			| isescape x = ([echar nc], rest)
+			where
+				echar 'a' = '\a'
+				echar 'b' = '\b'
+				echar 'f' = '\f'
+				echar 'n' = '\n'
+				echar 'r' = '\r'
+				echar 't' = '\t'
+				echar 'v' = '\v'
+				echar a = a
+		handle n = ("", n)
+
+{- Should not need to use this, except for testing decode. -}
+encode :: FilePath -> String
+encode s = foldl (++) "\"" (map echar s) ++ "\""
+	where
+		e c = '\\' : [c]
+		echar '\a' = e 'a'
+		echar '\b' = e 'b'
+		echar '\f' = e 'f'
+		echar '\n' = e 'n'
+		echar '\r' = e 'r'
+		echar '\t' = e 't'
+		echar '\v' = e 'v'
+		echar '\\' = e '\\'
+		echar '"'  = e '"'
+		echar x
+			| ord x < 0x20 = e_num x -- low ascii
+			| ord x >= 256 = e_utf x
+			| ord x > 0x7E = e_num x -- high ascii
+			| otherwise = [x]        -- printable ascii
+			where 
+				showoctal i = '\\' : printf "%03o" i
+				e_num c = showoctal $ ord c
+				-- unicode character is decomposed to
+				-- Word8s and each is shown in octal
+				e_utf c = showoctal =<< (Codec.Binary.UTF8.String.encode [c] :: [Word8])
+
+{- for quickcheck -}
+prop_idempotent_deencode :: String -> Bool
+prop_idempotent_deencode s = s == decode (encode s)