strict bytestring encoders and decoders

Only had lazy ones before.

Already sped up a few parts of the code.
This commit is contained in:
Joey Hess 2019-01-01 14:54:06 -04:00
parent 9cc6d5549b
commit b3c69eaaf8
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
14 changed files with 41 additions and 25 deletions

View file

@ -243,7 +243,7 @@ getHistorical date file =
) )
getRef :: Ref -> FilePath -> Annex String getRef :: Ref -> FilePath -> Annex String
getRef ref file = withIndex $ decodeBS <$> catFile ref file getRef ref file = withIndex $ decodeBL <$> catFile ref file
{- Applies a function to modify the content of a file. {- Applies a function to modify the content of a file.
- -
@ -320,7 +320,7 @@ commitIndex' jl branchref message basemessage retrynum parents = do
where where
-- look for "parent ref" lines and return the refs -- look for "parent ref" lines and return the refs
commitparents = map (Git.Ref . snd) . filter isparent . commitparents = map (Git.Ref . snd) . filter isparent .
map (toassoc . decodeBS) . L.split newline map (toassoc . decodeBL) . L.split newline
newline = fromIntegral (ord '\n') newline = fromIntegral (ord '\n')
toassoc = separate (== ' ') toassoc = separate (== ' ')
isparent (k,_) = k == "parent" isparent (k,_) = k == "parent"
@ -522,7 +522,7 @@ handleTransitions jl localts refs = do
return True return True
where where
getreftransition ref = do getreftransition ref = do
ts <- parseTransitionsStrictly "remote" . decodeBS ts <- parseTransitionsStrictly "remote" . decodeBL
<$> catFile ref transitionsLog <$> catFile ref transitionsLog
return (ref, ts) return (ref, ts)
@ -595,7 +595,7 @@ performTransitionsLocked jl ts neednewlocalbranch transitionedrefs = do
checkBranchDifferences :: Git.Ref -> Annex () checkBranchDifferences :: Git.Ref -> Annex ()
checkBranchDifferences ref = do checkBranchDifferences ref = do
theirdiffs <- allDifferences . parseDifferencesLog . decodeBS theirdiffs <- allDifferences . parseDifferencesLog . decodeBL
<$> catFile ref differenceLog <$> catFile ref differenceLog
mydiffs <- annexDifferences <$> Annex.getGitConfig mydiffs <- annexDifferences <$> Annex.getGitConfig
when (theirdiffs /= mydiffs) $ when (theirdiffs /= mydiffs) $

View file

@ -111,7 +111,7 @@ catKey ref = go =<< catObjectMetaData ref
{- Gets a symlink target. -} {- Gets a symlink target. -}
catSymLinkTarget :: Sha -> Annex String catSymLinkTarget :: Sha -> Annex String
catSymLinkTarget sha = fromInternalGitPath . decodeBS <$> get catSymLinkTarget sha = fromInternalGitPath . decodeBL <$> get
where where
-- Avoid buffering the whole file content, which might be large. -- Avoid buffering the whole file content, which might be large.
-- 8192 is enough if it really is a symlink. -- 8192 is enough if it really is a symlink.

View file

@ -67,14 +67,14 @@ hashDirs _ sz s = addTrailingPathSeparator $ take sz s </> drop sz s
hashDirLower :: HashLevels -> Hasher hashDirLower :: HashLevels -> Hasher
hashDirLower n k = hashDirs n 3 $ take 6 $ show $ md5 $ hashDirLower n k = hashDirs n 3 $ take 6 $ show $ md5 $
encodeBS $ key2file $ nonChunkKey k encodeBL $ key2file $ nonChunkKey k
{- This was originally using Data.Hash.MD5 from MissingH. This new version {- This was originally using Data.Hash.MD5 from MissingH. This new version
- is faster, but ugly as it has to replicate the 4 Word32's that produced. -} - is faster, but ugly as it has to replicate the 4 Word32's that produced. -}
hashDirMixed :: HashLevels -> Hasher hashDirMixed :: HashLevels -> Hasher
hashDirMixed n k = hashDirs n 2 $ take 4 $ concatMap display_32bits_as_dir $ hashDirMixed n k = hashDirs n 2 $ take 4 $ concatMap display_32bits_as_dir $
encodeWord32 $ map fromIntegral $ Data.ByteArray.unpack $ encodeWord32 $ map fromIntegral $ Data.ByteArray.unpack $
Utility.Hash.md5 $ encodeBS $ key2file $ nonChunkKey k Utility.Hash.md5 $ encodeBL $ key2file $ nonChunkKey k
where where
encodeWord32 (b1:b2:b3:b4:rest) = encodeWord32 (b1:b2:b3:b4:rest) =
(shiftL b4 24 .|. shiftL b3 16 .|. shiftL b2 8 .|. b1) (shiftL b4 24 .|. shiftL b3 16 .|. shiftL b2 8 .|. b1)

View file

@ -228,7 +228,7 @@ unableToRestage mf = unwords
- lines. -} - lines. -}
parseLinkOrPointer :: L.ByteString -> Maybe Key parseLinkOrPointer :: L.ByteString -> Maybe Key
parseLinkOrPointer = parseLinkOrPointer' parseLinkOrPointer = parseLinkOrPointer'
. decodeBS . L.take (fromIntegral maxPointerSz) . decodeBL . L.take (fromIntegral maxPointerSz)
where where
{- Want to avoid buffering really big files in git into {- Want to avoid buffering really big files in git into

View file

@ -300,7 +300,7 @@ hostport2socket host (Just port) = hostport2socket' $
fromSshHost host ++ "!" ++ show port fromSshHost host ++ "!" ++ show port
hostport2socket' :: String -> FilePath hostport2socket' :: String -> FilePath
hostport2socket' s hostport2socket' s
| length s > lengthofmd5s = show $ md5 $ encodeBS s | length s > lengthofmd5s = show $ md5 $ encodeBL s
| otherwise = s | otherwise = s
where where
lengthofmd5s = 32 lengthofmd5s = 32

View file

@ -41,4 +41,4 @@ variantFile file key
doubleconflict = variantMarker `isInfixOf` file doubleconflict = variantMarker `isInfixOf` file
shortHash :: String -> String shortHash :: String -> String
shortHash = take 4 . show . md5 . encodeBS shortHash = take 4 . show . md5 . encodeBL

View file

@ -20,7 +20,7 @@ genKeyName s
-- Avoid making keys longer than the length of a SHA256 checksum. -- Avoid making keys longer than the length of a SHA256 checksum.
| bytelen > sha256len = | bytelen > sha256len =
truncateFilePath (sha256len - md5len - 1) s' ++ "-" ++ truncateFilePath (sha256len - md5len - 1) s' ++ "-" ++
show (md5 (encodeBS s)) show (md5 (encodeBL s))
| otherwise = s' | otherwise = s'
where where
s' = preSanitizeKeyName s s' = preSanitizeKeyName s

View file

@ -207,7 +207,7 @@ catTree h treeref = go <$> catObjectDetails h treeref
dropsha = L.drop 21 dropsha = L.drop 21
parsemodefile b = parsemodefile b =
let (modestr, file) = separate (== ' ') (decodeBS b) let (modestr, file) = separate (== ' ') (decodeBL b)
in (file, readmode modestr) in (file, readmode modestr)
readmode = fromMaybe 0 . fmap fst . headMaybe . readOct readmode = fromMaybe 0 . fmap fst . headMaybe . readOct

View file

@ -350,7 +350,7 @@ btshowmetainfo torrent field =
torrentFileSizes :: FilePath -> IO [(FilePath, Integer)] torrentFileSizes :: FilePath -> IO [(FilePath, Integer)]
torrentFileSizes torrent = do torrentFileSizes torrent = do
#ifdef WITH_TORRENTPARSER #ifdef WITH_TORRENTPARSER
let mkfile = joinPath . map (scrub . decodeBS) let mkfile = joinPath . map (scrub . decodeBL)
b <- B.readFile torrent b <- B.readFile torrent
return $ case readTorrent b of return $ case readTorrent b of
Left e -> giveup $ "failed to parse torrent: " ++ e Left e -> giveup $ "failed to parse torrent: " ++ e

View file

@ -1704,7 +1704,7 @@ test_add_subdirs = intmpclonerepo $ do
- calculated correctly for files in subdirs. -} - calculated correctly for files in subdirs. -}
unlessM (unlockedFiles <$> getTestMode) $ do unlessM (unlockedFiles <$> getTestMode) $ do
git_annex "sync" [] @? "sync failed" git_annex "sync" [] @? "sync failed"
l <- annexeval $ Utility.FileSystemEncoding.decodeBS l <- annexeval $ Utility.FileSystemEncoding.decodeBL
<$> Annex.CatFile.catObject (Git.Types.Ref "HEAD:dir/foo") <$> Annex.CatFile.catObject (Git.Types.Ref "HEAD:dir/foo")
"../.git/annex/" `isPrefixOf` l @? ("symlink from subdir to .git/annex is wrong: " ++ l) "../.git/annex/" `isPrefixOf` l @? ("symlink from subdir to .git/annex is wrong: " ++ l)

View file

@ -22,7 +22,6 @@ import qualified Data.Aeson
import qualified Data.Text as T import qualified Data.Text as T
import qualified Data.Text.Encoding as T import qualified Data.Text.Encoding as T
import qualified Data.ByteString.Lazy as L import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString as S
import qualified Data.Set import qualified Data.Set
import qualified Data.Vector import qualified Data.Vector
import Prelude import Prelude
@ -53,7 +52,7 @@ instance ToJSON' String where
-- Note that if the string contains invalid UTF8 characters not using -- Note that if the string contains invalid UTF8 characters not using
-- the FileSystemEncoding, this is the same as Data.Text.pack. -- the FileSystemEncoding, this is the same as Data.Text.pack.
packString :: String -> T.Text packString :: String -> T.Text
packString s = case T.decodeUtf8' (S.concat $ L.toChunks $ encodeBS s) of packString s = case T.decodeUtf8' (encodeBS s) of
Right t -> t Right t -> t
Left _ -> T.pack s Left _ -> T.pack s

View file

@ -19,10 +19,10 @@ import Data.ByteString.UTF8 (fromString, toString)
import Data.Char import Data.Char
toB64 :: String -> String toB64 :: String -> String
toB64 = toString . B64.encode . L.toStrict . encodeBS toB64 = toString . B64.encode . encodeBS
fromB64Maybe :: String -> Maybe String fromB64Maybe :: String -> Maybe String
fromB64Maybe s = either (const Nothing) (Just . decodeBS . L.fromStrict) fromB64Maybe s = either (const Nothing) (Just . decodeBL . L.fromStrict)
(B64.decode $ fromString s) (B64.decode $ fromString s)
fromB64 :: String -> String fromB64 :: String -> String

View file

@ -15,7 +15,9 @@ module Utility.FileSystemEncoding (
RawFilePath, RawFilePath,
fromRawFilePath, fromRawFilePath,
toRawFilePath, toRawFilePath,
decodeBL,
decodeBS, decodeBS,
encodeBL,
encodeBS, encodeBS,
decodeW8, decodeW8,
encodeW8, encodeW8,
@ -38,6 +40,7 @@ import Data.List
import qualified Data.ByteString as S import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L import qualified Data.ByteString.Lazy as L
#ifdef mingw32_HOST_OS #ifdef mingw32_HOST_OS
import qualified Data.ByteString.UTF8 as S8
import qualified Data.ByteString.Lazy.UTF8 as L8 import qualified Data.ByteString.Lazy.UTF8 as L8
#endif #endif
@ -107,21 +110,35 @@ _encodeFilePath fp = unsafePerformIO $ do
`catchNonAsync` (\_ -> return fp) `catchNonAsync` (\_ -> return fp)
{- Decodes a ByteString into a FilePath, applying the filesystem encoding. -} {- Decodes a ByteString into a FilePath, applying the filesystem encoding. -}
decodeBS :: L.ByteString -> FilePath decodeBL :: L.ByteString -> FilePath
#ifndef mingw32_HOST_OS #ifndef mingw32_HOST_OS
decodeBS = encodeW8NUL . L.unpack decodeBL = encodeW8NUL . L.unpack
#else #else
{- On Windows, we assume that the ByteString is utf-8, since Windows {- On Windows, we assume that the ByteString is utf-8, since Windows
- only uses unicode for filenames. -} - only uses unicode for filenames. -}
decodeBS = L8.toString decodeBL = L8.toString
#endif
decodeBS :: S.ByteString -> FilePath
#ifndef mingw32_HOST_OS
decodeBS = encodeW8NUL . S.unpack
#else
decodeBS = S8.toString
#endif #endif
{- Encodes a FilePath into a ByteString, applying the filesystem encoding. -} {- Encodes a FilePath into a ByteString, applying the filesystem encoding. -}
encodeBS :: FilePath -> L.ByteString encodeBL :: FilePath -> L.ByteString
#ifndef mingw32_HOST_OS #ifndef mingw32_HOST_OS
encodeBS = L.pack . decodeW8NUL encodeBL = L.pack . decodeW8NUL
#else #else
encodeBS = L8.fromString encodeBL = L8.fromString
#endif
encodeBS :: FilePath -> S.ByteString
#ifndef mingw32_HOST_OS
encodeBS = S.pack . decodeW8NUL
#else
encodeBS = S8.fromString
#endif #endif
{- Recent versions of the unix package have this alias; defined here {- Recent versions of the unix package have this alias; defined here

View file

@ -102,7 +102,7 @@ sideLockFile lockfile = do
let shortbase = reverse $ take 32 $ reverse base let shortbase = reverse $ take 32 $ reverse base
let md5sum = if base == shortbase let md5sum = if base == shortbase
then "" then ""
else show (md5 (encodeBS base)) else show (md5 (encodeBL base))
dir <- ifM (doesDirectoryExist "/dev/shm") dir <- ifM (doesDirectoryExist "/dev/shm")
( return "/dev/shm" ( return "/dev/shm"
, return "/tmp" , return "/tmp"