diff --git a/Backend/Utilities.hs b/Backend/Utilities.hs index 304cfaac16..244ded29e5 100644 --- a/Backend/Utilities.hs +++ b/Backend/Utilities.hs @@ -29,12 +29,12 @@ import Data.Word genKeyName :: String -> S.ShortByteString genKeyName s -- Avoid making keys longer than the length of a SHA256 checksum. - | bytelen > sha256len = S.toShort $ encodeBS $ - truncateFilePath (sha256len - md5len - 1) s' ++ "-" ++ - show (md5 bl) - | otherwise = S.toShort $ encodeBS s' + | bytelen > sha256len = S.toShort $ + truncateFilePath (sha256len - md5len - 1) s' + <> "-" <> encodeBS (show (md5 bl)) + | otherwise = S.toShort s' where - s' = preSanitizeKeyName s + s' = encodeBS $ preSanitizeKeyName s bl = encodeBL s bytelen = fromIntegral $ L.length bl diff --git a/Command/AddUrl.hs b/Command/AddUrl.hs index a749c55527..d464dbd048 100644 --- a/Command/AddUrl.hs +++ b/Command/AddUrl.hs @@ -200,12 +200,12 @@ checkUrl addunlockedmatcher r o si u = do startRemote :: AddUnlockedMatcher -> Remote -> AddUrlOptions -> SeekInput -> FilePath -> URLString -> Maybe Integer -> CommandStart startRemote addunlockedmatcher r o si file uri sz = do pathmax <- liftIO $ fileNameLengthLimit "." - let file' = joinPath $ map (truncateFilePath pathmax) $ - splitDirectories file + let file' = P.joinPath $ map (truncateFilePath pathmax) $ + P.splitDirectories (toRawFilePath file) startingAddUrl si uri o $ do showNote $ UnquotedString $ "from " ++ Remote.name r - showDestinationFile (toRawFilePath file') - performRemote addunlockedmatcher r o uri (toRawFilePath file') sz + showDestinationFile file' + performRemote addunlockedmatcher r o uri file' sz performRemote :: AddUnlockedMatcher -> Remote -> AddUrlOptions -> URLString -> RawFilePath -> Maybe Integer -> CommandPerform performRemote addunlockedmatcher r o uri file sz = lookupKey file >>= \case @@ -279,7 +279,8 @@ sanitizeOrPreserveFilePath o f return f | otherwise = do pathmax <- liftIO $ fileNameLengthLimit "." - return $ truncateFilePath pathmax $ sanitizeFilePath f + return $ fromRawFilePath $ truncateFilePath pathmax $ + toRawFilePath $ sanitizeFilePath f -- sanitizeFilePath avoids all these security problems -- (and probably others, but at least this catches the most egrarious ones). @@ -567,8 +568,8 @@ nodownloadWeb' o addunlockedmatcher url key file = checkCanAdd o file $ \canadd url2file :: URI -> Maybe Int -> Int -> FilePath url2file url pathdepth pathmax = case pathdepth of - Nothing -> truncateFilePath pathmax $ sanitizeFilePath fullurl - Just depth + Nothing -> truncatesanitize fullurl + Just depth | depth >= length urlbits -> frombits id | depth > 0 -> frombits $ drop depth | depth < 0 -> frombits $ reverse . take (negate depth) . reverse @@ -580,8 +581,12 @@ url2file url pathdepth pathmax = case pathdepth of , uriQuery url ] frombits a = intercalate "/" $ a urlbits - urlbits = map (truncateFilePath pathmax . sanitizeFilePath) $ + urlbits = map truncatesanitize $ filter (not . null) $ splitc '/' fullurl + truncatesanitize = fromRawFilePath + . truncateFilePath pathmax + . toRawFilePath + . sanitizeFilePath urlString2file :: URLString -> Maybe Int -> Int -> FilePath urlString2file s pathdepth pathmax = case Url.parseURIRelaxed s of diff --git a/Utility/FileSystemEncoding.hs b/Utility/FileSystemEncoding.hs index 10c87ca2f3..b4497f30af 100644 --- a/Utility/FileSystemEncoding.hs +++ b/Utility/FileSystemEncoding.hs @@ -33,6 +33,8 @@ import qualified Data.ByteString.Lazy.UTF8 as L8 import qualified GHC.Foreign as GHC import System.IO.Unsafe import Data.ByteString.Unsafe (unsafePackMallocCStringLen) +import Data.Char +import Data.List #endif {- Makes all subsequent Handles that are opened, as well as stdio Handles, @@ -125,26 +127,40 @@ toRawFilePath = encodeFilePath - Avoids returning an invalid part of a unicode byte sequence, at the - cost of efficiency when running on a large FilePath. -} -truncateFilePath :: Int -> FilePath -> FilePath +truncateFilePath :: Int -> RawFilePath -> RawFilePath #ifndef mingw32_HOST_OS -truncateFilePath n = go . reverse +{- On unix, do not assume a unicode locale, but does assume ascii + - characters are a single byte. -} +truncateFilePath n b = + let blen = S.length b + in if blen <= n + then b + else go blen (reverse (fromRawFilePath b)) where - go f = - let b = encodeBS f - in if S.length b <= n - then reverse f - else go (drop 1 f) + go blen f = case uncons f of + Just (c, f') + | isAscii c -> + let blen' = blen - 1 + in if blen' <= n + then toRawFilePath (reverse f') + else go blen' f' + | otherwise -> + let blen' = S.length (toRawFilePath f') + in if blen' <= n + then toRawFilePath (reverse f') + else go blen' f' + Nothing -> toRawFilePath (reverse f) #else {- On Windows, count the number of bytes used by each utf8 character. -} -truncateFilePath n = reverse . go [] n . L8.fromString +truncateFilePath n = toRawFilePath . reverse . go [] n where go coll cnt bs | cnt <= 0 = coll - | otherwise = case L8.decode bs of - Just (c, x) | c /= L8.replacement_char -> + | otherwise = case S8.decode bs of + Just (c, x) | c /= S8.replacement_char -> let x' = fromIntegral x in if cnt - x' < 0 then coll - else go (c:coll) (cnt - x') (L8.drop 1 bs) + else go (c:coll) (cnt - x') (S8.drop 1 bs) _ -> coll #endif diff --git a/Utility/Tmp.hs b/Utility/Tmp.hs index 29fce9652c..c33a9916c1 100644 --- a/Utility/Tmp.hs +++ b/Utility/Tmp.hs @@ -112,8 +112,8 @@ relatedTemplate f {- Some filesystems like FAT have issues with filenames - ending in ".", so avoid truncating a filename to end - that way. -} - toOsPath $ B.dropWhileEnd (== dot) $ toRawFilePath $ - truncateFilePath (len - templateAddedLength) (fromRawFilePath f) + toOsPath $ B.dropWhileEnd (== dot) $ + truncateFilePath (len - templateAddedLength) f | otherwise = toOsPath f where len = B.length f