007e302637
UnquotedString does not need to be quoted, but still it's possible it contains something attacker-controlled, which could have an escape sequence or control character in it. This is a convenient place to filter out such things, since quoting alrready handles those in filenames. Sponsored-by: Luke Shumaker on Patreon
118 lines
3.3 KiB
Haskell
118 lines
3.3 KiB
Haskell
{- Some git commands output quoted filenames, in a rather annoyingly complex
|
|
- C-style encoding.
|
|
-
|
|
- Copyright 2010-2023 Joey Hess <id@joeyh.name>
|
|
-
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
|
-}
|
|
|
|
{-# LANGUAGE OverloadedStrings, TypeSynonymInstances #-}
|
|
|
|
module Git.Filename (
|
|
unquote,
|
|
quote,
|
|
noquote,
|
|
QuotePath(..),
|
|
StringContainingQuotedPath(..),
|
|
quotedPaths,
|
|
prop_quote_unquote_roundtrip,
|
|
) where
|
|
|
|
import Common
|
|
import Utility.Format (decode_c, encode_c, encode_c', isUtf8Byte)
|
|
import Utility.QuickCheck
|
|
import Utility.SafeOutput
|
|
|
|
import Data.Char
|
|
import Data.Word
|
|
import Data.String
|
|
import qualified Data.ByteString as S
|
|
import qualified Data.Semigroup as Sem
|
|
import Prelude
|
|
|
|
unquote :: S.ByteString -> RawFilePath
|
|
unquote b = case S.uncons b of
|
|
Nothing -> b
|
|
Just (h, t)
|
|
| h /= q -> b
|
|
| otherwise -> case S.unsnoc t of
|
|
Nothing -> b
|
|
Just (i, l)
|
|
| l /= q -> b
|
|
| otherwise -> decode_c i
|
|
where
|
|
q :: Word8
|
|
q = fromIntegral (ord '"')
|
|
|
|
-- always encodes and double quotes, even in cases that git does not
|
|
quoteAlways :: RawFilePath -> S.ByteString
|
|
quoteAlways s = "\"" <> encode_c needencode s <> "\""
|
|
where
|
|
needencode c = isUtf8Byte c || c == fromIntegral (ord '"')
|
|
|
|
-- git config core.quotePath controls whether to quote unicode characters
|
|
newtype QuotePath = QuotePath Bool
|
|
|
|
class Quoteable t where
|
|
-- double quotes and encodes when git would
|
|
quote :: QuotePath -> t -> S.ByteString
|
|
|
|
noquote :: t -> S.ByteString
|
|
|
|
instance Quoteable RawFilePath where
|
|
quote (QuotePath qp) s = case encode_c' needencode s of
|
|
Nothing -> s
|
|
Just s' -> "\"" <> s' <> "\""
|
|
where
|
|
needencode c
|
|
| c == fromIntegral (ord '"') = True
|
|
| qp = isUtf8Byte c
|
|
| otherwise = False
|
|
|
|
noquote = id
|
|
|
|
-- Allows building up a string that contains paths, which will get quoted.
|
|
-- With OverloadedStrings, strings are passed through without quoting.
|
|
-- Eg: QuotedPath f <> ": not found"
|
|
data StringContainingQuotedPath
|
|
= UnquotedString String
|
|
| QuotedPath RawFilePath
|
|
| StringContainingQuotedPath :+: StringContainingQuotedPath
|
|
deriving (Show, Eq)
|
|
|
|
quotedPaths :: [RawFilePath] -> StringContainingQuotedPath
|
|
quotedPaths [] = mempty
|
|
quotedPaths (p:ps) = QuotedPath p <> if null ps
|
|
then mempty
|
|
else " " <> quotedPaths ps
|
|
|
|
instance Quoteable StringContainingQuotedPath where
|
|
quote _ (UnquotedString s) = safeOutput (encodeBS s)
|
|
quote qp (QuotedPath p) = quote qp p
|
|
quote qp (a :+: b) = quote qp a <> quote qp b
|
|
|
|
noquote (UnquotedString s) = encodeBS s
|
|
noquote (QuotedPath p) = p
|
|
noquote (a :+: b) = noquote a <> noquote b
|
|
|
|
instance IsString StringContainingQuotedPath where
|
|
fromString = UnquotedString
|
|
|
|
instance Sem.Semigroup StringContainingQuotedPath where
|
|
UnquotedString a <> UnquotedString b = UnquotedString (a <> b)
|
|
a <> b = a :+: b
|
|
|
|
instance Monoid StringContainingQuotedPath where
|
|
mempty = UnquotedString mempty
|
|
|
|
-- Encoding and then decoding roundtrips only when the string does not
|
|
-- contain high unicode, because eg, both "\12345" and "\227\128\185"
|
|
-- are encoded to "\343\200\271".
|
|
--
|
|
-- That is not a real-world problem, and using TestableFilePath
|
|
-- limits what's tested to ascii, so avoids running into it.
|
|
prop_quote_unquote_roundtrip :: TestableFilePath -> Bool
|
|
prop_quote_unquote_roundtrip ts =
|
|
s == fromRawFilePath (unquote (quoteAlways (toRawFilePath s)))
|
|
where
|
|
s = fromTestableFilePath ts
|