optimize truncateFilePath
Often the filepath will be all ascii, or mostly so, and this optimisation makes a file that has an ascii suffix of sufficient length be roundtrip converted between String and ByteString only once, rather than once per character. Sponsored-by: Graham Spencer
This commit is contained in:
parent
6211e2af4a
commit
f17ec601c4
4 changed files with 47 additions and 26 deletions
|
@ -29,12 +29,12 @@ import Data.Word
|
|||
genKeyName :: String -> S.ShortByteString
|
||||
genKeyName s
|
||||
-- Avoid making keys longer than the length of a SHA256 checksum.
|
||||
| bytelen > sha256len = S.toShort $ encodeBS $
|
||||
truncateFilePath (sha256len - md5len - 1) s' ++ "-" ++
|
||||
show (md5 bl)
|
||||
| otherwise = S.toShort $ encodeBS s'
|
||||
| bytelen > sha256len = S.toShort $
|
||||
truncateFilePath (sha256len - md5len - 1) s'
|
||||
<> "-" <> encodeBS (show (md5 bl))
|
||||
| otherwise = S.toShort s'
|
||||
where
|
||||
s' = preSanitizeKeyName s
|
||||
s' = encodeBS $ preSanitizeKeyName s
|
||||
bl = encodeBL s
|
||||
bytelen = fromIntegral $ L.length bl
|
||||
|
||||
|
|
|
@ -200,12 +200,12 @@ checkUrl addunlockedmatcher r o si u = do
|
|||
startRemote :: AddUnlockedMatcher -> Remote -> AddUrlOptions -> SeekInput -> FilePath -> URLString -> Maybe Integer -> CommandStart
|
||||
startRemote addunlockedmatcher r o si file uri sz = do
|
||||
pathmax <- liftIO $ fileNameLengthLimit "."
|
||||
let file' = joinPath $ map (truncateFilePath pathmax) $
|
||||
splitDirectories file
|
||||
let file' = P.joinPath $ map (truncateFilePath pathmax) $
|
||||
P.splitDirectories (toRawFilePath file)
|
||||
startingAddUrl si uri o $ do
|
||||
showNote $ UnquotedString $ "from " ++ Remote.name r
|
||||
showDestinationFile (toRawFilePath file')
|
||||
performRemote addunlockedmatcher r o uri (toRawFilePath file') sz
|
||||
showDestinationFile file'
|
||||
performRemote addunlockedmatcher r o uri file' sz
|
||||
|
||||
performRemote :: AddUnlockedMatcher -> Remote -> AddUrlOptions -> URLString -> RawFilePath -> Maybe Integer -> CommandPerform
|
||||
performRemote addunlockedmatcher r o uri file sz = lookupKey file >>= \case
|
||||
|
@ -279,7 +279,8 @@ sanitizeOrPreserveFilePath o f
|
|||
return f
|
||||
| otherwise = do
|
||||
pathmax <- liftIO $ fileNameLengthLimit "."
|
||||
return $ truncateFilePath pathmax $ sanitizeFilePath f
|
||||
return $ fromRawFilePath $ truncateFilePath pathmax $
|
||||
toRawFilePath $ sanitizeFilePath f
|
||||
|
||||
-- sanitizeFilePath avoids all these security problems
|
||||
-- (and probably others, but at least this catches the most egrarious ones).
|
||||
|
@ -567,7 +568,7 @@ nodownloadWeb' o addunlockedmatcher url key file = checkCanAdd o file $ \canadd
|
|||
|
||||
url2file :: URI -> Maybe Int -> Int -> FilePath
|
||||
url2file url pathdepth pathmax = case pathdepth of
|
||||
Nothing -> truncateFilePath pathmax $ sanitizeFilePath fullurl
|
||||
Nothing -> truncatesanitize fullurl
|
||||
Just depth
|
||||
| depth >= length urlbits -> frombits id
|
||||
| depth > 0 -> frombits $ drop depth
|
||||
|
@ -580,8 +581,12 @@ url2file url pathdepth pathmax = case pathdepth of
|
|||
, uriQuery url
|
||||
]
|
||||
frombits a = intercalate "/" $ a urlbits
|
||||
urlbits = map (truncateFilePath pathmax . sanitizeFilePath) $
|
||||
urlbits = map truncatesanitize $
|
||||
filter (not . null) $ splitc '/' fullurl
|
||||
truncatesanitize = fromRawFilePath
|
||||
. truncateFilePath pathmax
|
||||
. toRawFilePath
|
||||
. sanitizeFilePath
|
||||
|
||||
urlString2file :: URLString -> Maybe Int -> Int -> FilePath
|
||||
urlString2file s pathdepth pathmax = case Url.parseURIRelaxed s of
|
||||
|
|
|
@ -33,6 +33,8 @@ import qualified Data.ByteString.Lazy.UTF8 as L8
|
|||
import qualified GHC.Foreign as GHC
|
||||
import System.IO.Unsafe
|
||||
import Data.ByteString.Unsafe (unsafePackMallocCStringLen)
|
||||
import Data.Char
|
||||
import Data.List
|
||||
#endif
|
||||
|
||||
{- Makes all subsequent Handles that are opened, as well as stdio Handles,
|
||||
|
@ -125,26 +127,40 @@ toRawFilePath = encodeFilePath
|
|||
- Avoids returning an invalid part of a unicode byte sequence, at the
|
||||
- cost of efficiency when running on a large FilePath.
|
||||
-}
|
||||
truncateFilePath :: Int -> FilePath -> FilePath
|
||||
truncateFilePath :: Int -> RawFilePath -> RawFilePath
|
||||
#ifndef mingw32_HOST_OS
|
||||
truncateFilePath n = go . reverse
|
||||
{- On unix, do not assume a unicode locale, but does assume ascii
|
||||
- characters are a single byte. -}
|
||||
truncateFilePath n b =
|
||||
let blen = S.length b
|
||||
in if blen <= n
|
||||
then b
|
||||
else go blen (reverse (fromRawFilePath b))
|
||||
where
|
||||
go f =
|
||||
let b = encodeBS f
|
||||
in if S.length b <= n
|
||||
then reverse f
|
||||
else go (drop 1 f)
|
||||
go blen f = case uncons f of
|
||||
Just (c, f')
|
||||
| isAscii c ->
|
||||
let blen' = blen - 1
|
||||
in if blen' <= n
|
||||
then toRawFilePath (reverse f')
|
||||
else go blen' f'
|
||||
| otherwise ->
|
||||
let blen' = S.length (toRawFilePath f')
|
||||
in if blen' <= n
|
||||
then toRawFilePath (reverse f')
|
||||
else go blen' f'
|
||||
Nothing -> toRawFilePath (reverse f)
|
||||
#else
|
||||
{- On Windows, count the number of bytes used by each utf8 character. -}
|
||||
truncateFilePath n = reverse . go [] n . L8.fromString
|
||||
truncateFilePath n = toRawFilePath . reverse . go [] n
|
||||
where
|
||||
go coll cnt bs
|
||||
| cnt <= 0 = coll
|
||||
| otherwise = case L8.decode bs of
|
||||
Just (c, x) | c /= L8.replacement_char ->
|
||||
| otherwise = case S8.decode bs of
|
||||
Just (c, x) | c /= S8.replacement_char ->
|
||||
let x' = fromIntegral x
|
||||
in if cnt - x' < 0
|
||||
then coll
|
||||
else go (c:coll) (cnt - x') (L8.drop 1 bs)
|
||||
else go (c:coll) (cnt - x') (S8.drop 1 bs)
|
||||
_ -> coll
|
||||
#endif
|
||||
|
|
|
@ -112,8 +112,8 @@ relatedTemplate f
|
|||
{- Some filesystems like FAT have issues with filenames
|
||||
- ending in ".", so avoid truncating a filename to end
|
||||
- that way. -}
|
||||
toOsPath $ B.dropWhileEnd (== dot) $ toRawFilePath $
|
||||
truncateFilePath (len - templateAddedLength) (fromRawFilePath f)
|
||||
toOsPath $ B.dropWhileEnd (== dot) $
|
||||
truncateFilePath (len - templateAddedLength) f
|
||||
| otherwise = toOsPath f
|
||||
where
|
||||
len = B.length f
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue