optimize truncateFilePath
Often the filepath will be all ascii, or mostly so, and this optimisation makes a file that has an ascii suffix of sufficient length be roundtrip converted between String and ByteString only once, rather than once per character. Sponsored-by: Graham Spencer
This commit is contained in:
parent
6211e2af4a
commit
f17ec601c4
4 changed files with 47 additions and 26 deletions
|
@ -29,12 +29,12 @@ import Data.Word
|
||||||
genKeyName :: String -> S.ShortByteString
|
genKeyName :: String -> S.ShortByteString
|
||||||
genKeyName s
|
genKeyName s
|
||||||
-- Avoid making keys longer than the length of a SHA256 checksum.
|
-- Avoid making keys longer than the length of a SHA256 checksum.
|
||||||
| bytelen > sha256len = S.toShort $ encodeBS $
|
| bytelen > sha256len = S.toShort $
|
||||||
truncateFilePath (sha256len - md5len - 1) s' ++ "-" ++
|
truncateFilePath (sha256len - md5len - 1) s'
|
||||||
show (md5 bl)
|
<> "-" <> encodeBS (show (md5 bl))
|
||||||
| otherwise = S.toShort $ encodeBS s'
|
| otherwise = S.toShort s'
|
||||||
where
|
where
|
||||||
s' = preSanitizeKeyName s
|
s' = encodeBS $ preSanitizeKeyName s
|
||||||
bl = encodeBL s
|
bl = encodeBL s
|
||||||
bytelen = fromIntegral $ L.length bl
|
bytelen = fromIntegral $ L.length bl
|
||||||
|
|
||||||
|
|
|
@ -200,12 +200,12 @@ checkUrl addunlockedmatcher r o si u = do
|
||||||
startRemote :: AddUnlockedMatcher -> Remote -> AddUrlOptions -> SeekInput -> FilePath -> URLString -> Maybe Integer -> CommandStart
|
startRemote :: AddUnlockedMatcher -> Remote -> AddUrlOptions -> SeekInput -> FilePath -> URLString -> Maybe Integer -> CommandStart
|
||||||
startRemote addunlockedmatcher r o si file uri sz = do
|
startRemote addunlockedmatcher r o si file uri sz = do
|
||||||
pathmax <- liftIO $ fileNameLengthLimit "."
|
pathmax <- liftIO $ fileNameLengthLimit "."
|
||||||
let file' = joinPath $ map (truncateFilePath pathmax) $
|
let file' = P.joinPath $ map (truncateFilePath pathmax) $
|
||||||
splitDirectories file
|
P.splitDirectories (toRawFilePath file)
|
||||||
startingAddUrl si uri o $ do
|
startingAddUrl si uri o $ do
|
||||||
showNote $ UnquotedString $ "from " ++ Remote.name r
|
showNote $ UnquotedString $ "from " ++ Remote.name r
|
||||||
showDestinationFile (toRawFilePath file')
|
showDestinationFile file'
|
||||||
performRemote addunlockedmatcher r o uri (toRawFilePath file') sz
|
performRemote addunlockedmatcher r o uri file' sz
|
||||||
|
|
||||||
performRemote :: AddUnlockedMatcher -> Remote -> AddUrlOptions -> URLString -> RawFilePath -> Maybe Integer -> CommandPerform
|
performRemote :: AddUnlockedMatcher -> Remote -> AddUrlOptions -> URLString -> RawFilePath -> Maybe Integer -> CommandPerform
|
||||||
performRemote addunlockedmatcher r o uri file sz = lookupKey file >>= \case
|
performRemote addunlockedmatcher r o uri file sz = lookupKey file >>= \case
|
||||||
|
@ -279,7 +279,8 @@ sanitizeOrPreserveFilePath o f
|
||||||
return f
|
return f
|
||||||
| otherwise = do
|
| otherwise = do
|
||||||
pathmax <- liftIO $ fileNameLengthLimit "."
|
pathmax <- liftIO $ fileNameLengthLimit "."
|
||||||
return $ truncateFilePath pathmax $ sanitizeFilePath f
|
return $ fromRawFilePath $ truncateFilePath pathmax $
|
||||||
|
toRawFilePath $ sanitizeFilePath f
|
||||||
|
|
||||||
-- sanitizeFilePath avoids all these security problems
|
-- sanitizeFilePath avoids all these security problems
|
||||||
-- (and probably others, but at least this catches the most egrarious ones).
|
-- (and probably others, but at least this catches the most egrarious ones).
|
||||||
|
@ -567,8 +568,8 @@ nodownloadWeb' o addunlockedmatcher url key file = checkCanAdd o file $ \canadd
|
||||||
|
|
||||||
url2file :: URI -> Maybe Int -> Int -> FilePath
|
url2file :: URI -> Maybe Int -> Int -> FilePath
|
||||||
url2file url pathdepth pathmax = case pathdepth of
|
url2file url pathdepth pathmax = case pathdepth of
|
||||||
Nothing -> truncateFilePath pathmax $ sanitizeFilePath fullurl
|
Nothing -> truncatesanitize fullurl
|
||||||
Just depth
|
Just depth
|
||||||
| depth >= length urlbits -> frombits id
|
| depth >= length urlbits -> frombits id
|
||||||
| depth > 0 -> frombits $ drop depth
|
| depth > 0 -> frombits $ drop depth
|
||||||
| depth < 0 -> frombits $ reverse . take (negate depth) . reverse
|
| depth < 0 -> frombits $ reverse . take (negate depth) . reverse
|
||||||
|
@ -580,8 +581,12 @@ url2file url pathdepth pathmax = case pathdepth of
|
||||||
, uriQuery url
|
, uriQuery url
|
||||||
]
|
]
|
||||||
frombits a = intercalate "/" $ a urlbits
|
frombits a = intercalate "/" $ a urlbits
|
||||||
urlbits = map (truncateFilePath pathmax . sanitizeFilePath) $
|
urlbits = map truncatesanitize $
|
||||||
filter (not . null) $ splitc '/' fullurl
|
filter (not . null) $ splitc '/' fullurl
|
||||||
|
truncatesanitize = fromRawFilePath
|
||||||
|
. truncateFilePath pathmax
|
||||||
|
. toRawFilePath
|
||||||
|
. sanitizeFilePath
|
||||||
|
|
||||||
urlString2file :: URLString -> Maybe Int -> Int -> FilePath
|
urlString2file :: URLString -> Maybe Int -> Int -> FilePath
|
||||||
urlString2file s pathdepth pathmax = case Url.parseURIRelaxed s of
|
urlString2file s pathdepth pathmax = case Url.parseURIRelaxed s of
|
||||||
|
|
|
@ -33,6 +33,8 @@ import qualified Data.ByteString.Lazy.UTF8 as L8
|
||||||
import qualified GHC.Foreign as GHC
|
import qualified GHC.Foreign as GHC
|
||||||
import System.IO.Unsafe
|
import System.IO.Unsafe
|
||||||
import Data.ByteString.Unsafe (unsafePackMallocCStringLen)
|
import Data.ByteString.Unsafe (unsafePackMallocCStringLen)
|
||||||
|
import Data.Char
|
||||||
|
import Data.List
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
{- Makes all subsequent Handles that are opened, as well as stdio Handles,
|
{- Makes all subsequent Handles that are opened, as well as stdio Handles,
|
||||||
|
@ -125,26 +127,40 @@ toRawFilePath = encodeFilePath
|
||||||
- Avoids returning an invalid part of a unicode byte sequence, at the
|
- Avoids returning an invalid part of a unicode byte sequence, at the
|
||||||
- cost of efficiency when running on a large FilePath.
|
- cost of efficiency when running on a large FilePath.
|
||||||
-}
|
-}
|
||||||
truncateFilePath :: Int -> FilePath -> FilePath
|
truncateFilePath :: Int -> RawFilePath -> RawFilePath
|
||||||
#ifndef mingw32_HOST_OS
|
#ifndef mingw32_HOST_OS
|
||||||
truncateFilePath n = go . reverse
|
{- On unix, do not assume a unicode locale, but does assume ascii
|
||||||
|
- characters are a single byte. -}
|
||||||
|
truncateFilePath n b =
|
||||||
|
let blen = S.length b
|
||||||
|
in if blen <= n
|
||||||
|
then b
|
||||||
|
else go blen (reverse (fromRawFilePath b))
|
||||||
where
|
where
|
||||||
go f =
|
go blen f = case uncons f of
|
||||||
let b = encodeBS f
|
Just (c, f')
|
||||||
in if S.length b <= n
|
| isAscii c ->
|
||||||
then reverse f
|
let blen' = blen - 1
|
||||||
else go (drop 1 f)
|
in if blen' <= n
|
||||||
|
then toRawFilePath (reverse f')
|
||||||
|
else go blen' f'
|
||||||
|
| otherwise ->
|
||||||
|
let blen' = S.length (toRawFilePath f')
|
||||||
|
in if blen' <= n
|
||||||
|
then toRawFilePath (reverse f')
|
||||||
|
else go blen' f'
|
||||||
|
Nothing -> toRawFilePath (reverse f)
|
||||||
#else
|
#else
|
||||||
{- On Windows, count the number of bytes used by each utf8 character. -}
|
{- On Windows, count the number of bytes used by each utf8 character. -}
|
||||||
truncateFilePath n = reverse . go [] n . L8.fromString
|
truncateFilePath n = toRawFilePath . reverse . go [] n
|
||||||
where
|
where
|
||||||
go coll cnt bs
|
go coll cnt bs
|
||||||
| cnt <= 0 = coll
|
| cnt <= 0 = coll
|
||||||
| otherwise = case L8.decode bs of
|
| otherwise = case S8.decode bs of
|
||||||
Just (c, x) | c /= L8.replacement_char ->
|
Just (c, x) | c /= S8.replacement_char ->
|
||||||
let x' = fromIntegral x
|
let x' = fromIntegral x
|
||||||
in if cnt - x' < 0
|
in if cnt - x' < 0
|
||||||
then coll
|
then coll
|
||||||
else go (c:coll) (cnt - x') (L8.drop 1 bs)
|
else go (c:coll) (cnt - x') (S8.drop 1 bs)
|
||||||
_ -> coll
|
_ -> coll
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -112,8 +112,8 @@ relatedTemplate f
|
||||||
{- Some filesystems like FAT have issues with filenames
|
{- Some filesystems like FAT have issues with filenames
|
||||||
- ending in ".", so avoid truncating a filename to end
|
- ending in ".", so avoid truncating a filename to end
|
||||||
- that way. -}
|
- that way. -}
|
||||||
toOsPath $ B.dropWhileEnd (== dot) $ toRawFilePath $
|
toOsPath $ B.dropWhileEnd (== dot) $
|
||||||
truncateFilePath (len - templateAddedLength) (fromRawFilePath f)
|
truncateFilePath (len - templateAddedLength) f
|
||||||
| otherwise = toOsPath f
|
| otherwise = toOsPath f
|
||||||
where
|
where
|
||||||
len = B.length f
|
len = B.length f
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue