From e0c4ac99b5f3f3185db06f5108487ee546177665 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 14 Jan 2019 16:59:27 -0400 Subject: [PATCH] convert serializeKey' to strict ByteString The builder produces a lazy ByteString, and L.toStrict has to copy it, but needing to use the builder is no longer to common case; the serialization will normally be cached already as a strict ByteString, and this avoids keyFile' needing to use L.toStrict . serializeKey' --- Annex/DirHashes.hs | 4 ++-- Annex/Locations.hs | 2 +- Annex/VariantFile.hs | 6 +++--- Key.hs | 21 ++++++++++++--------- Utility/Hash.hs | 5 +++++ 5 files changed, 23 insertions(+), 15 deletions(-) diff --git a/Annex/DirHashes.hs b/Annex/DirHashes.hs index 194b4932c5..1e1991364a 100644 --- a/Annex/DirHashes.hs +++ b/Annex/DirHashes.hs @@ -65,14 +65,14 @@ hashDirs (HashLevels 1) sz s = addTrailingPathSeparator $ take sz s hashDirs _ sz s = addTrailingPathSeparator $ take sz s drop sz s hashDirLower :: HashLevels -> Hasher -hashDirLower n k = hashDirs n 3 $ take 6 $ show $ md5 $ serializeKey' $ nonChunkKey k +hashDirLower n k = hashDirs n 3 $ take 6 $ show $ md5s $ serializeKey' $ nonChunkKey k {- This was originally using Data.Hash.MD5 from MissingH. This new version - is faster, but ugly as it has to replicate the 4 Word32's that produced. -} hashDirMixed :: HashLevels -> Hasher hashDirMixed n k = hashDirs n 2 $ take 4 $ concatMap display_32bits_as_dir $ encodeWord32 $ map fromIntegral $ Data.ByteArray.unpack $ - Utility.Hash.md5 $ serializeKey' $ nonChunkKey k + Utility.Hash.md5s $ serializeKey' $ nonChunkKey k where encodeWord32 (b1:b2:b3:b4:rest) = (shiftL b4 24 .|. shiftL b3 16 .|. shiftL b2 8 .|. b1) diff --git a/Annex/Locations.hs b/Annex/Locations.hs index a45fa02c21..32113d94a1 100644 --- a/Annex/Locations.hs +++ b/Annex/Locations.hs @@ -515,7 +515,7 @@ keyFile :: Key -> FilePath keyFile = fromRawFilePath . keyFile' keyFile' :: Key -> RawFilePath -keyFile' = S8.concatMap esc . L.toStrict . serializeKey' +keyFile' = S8.concatMap esc . serializeKey' where esc '&' = "&a" esc '%' = "&s" diff --git a/Annex/VariantFile.hs b/Annex/VariantFile.hs index acab1e8281..9808293be9 100644 --- a/Annex/VariantFile.hs +++ b/Annex/VariantFile.hs @@ -10,7 +10,7 @@ module Annex.VariantFile where import Annex.Common import Utility.Hash -import qualified Data.ByteString.Lazy as L +import qualified Data.ByteString as S variantMarker :: String variantMarker = ".variant-" @@ -41,5 +41,5 @@ variantFile file key where doubleconflict = variantMarker `isInfixOf` file -shortHash :: L.ByteString -> String -shortHash = take 4 . show . md5 +shortHash :: S.ByteString -> String +shortHash = take 4 . show . md5s diff --git a/Key.hs b/Key.hs index eaa179d9f4..16f56e4185 100644 --- a/Key.hs +++ b/Key.hs @@ -56,11 +56,13 @@ stubKey = Key -- Gets the parent of a chunk key. nonChunkKey :: Key -> Key -nonChunkKey k = k - { keyChunkSize = Nothing - , keyChunkNum = Nothing - , keySerialization = Nothing - } +nonChunkKey k + | keyChunkSize k == Nothing && keyChunkNum k == Nothing = k + | otherwise = k + { keyChunkSize = Nothing + , keyChunkNum = Nothing + , keySerialization = Nothing + } -- Where a chunk key is offset within its parent. chunkKeyOffset :: Key -> Maybe Integer @@ -96,12 +98,13 @@ buildKey k = byteString (formatKeyVariety (keyVariety k)) _ ?: Nothing = mempty serializeKey :: Key -> String -serializeKey = decodeBL' . serializeKey' +serializeKey = decodeBS' . serializeKey' -serializeKey' :: Key -> L.ByteString +serializeKey' :: Key -> S.ByteString serializeKey' k = case keySerialization k of - Nothing -> toLazyByteStringWith (safeStrategy 128 smallChunkSize) L.empty (buildKey k) - Just b -> L.fromStrict b + Nothing -> L.toStrict $ + toLazyByteStringWith (safeStrategy 128 smallChunkSize) L.empty (buildKey k) + Just b -> b {- This is a strict parser for security reasons; a key - can contain only 4 fields, which all consist only of numbers. diff --git a/Utility/Hash.hs b/Utility/Hash.hs index 3bbb0627b7..d422c0bd53 100644 --- a/Utility/Hash.hs +++ b/Utility/Hash.hs @@ -27,12 +27,14 @@ module Utility.Hash ( blake2b_512, #endif md5, + md5s, prop_hashes_stable, Mac(..), calcMac, prop_mac_stable, ) where +import qualified Data.ByteString as S import qualified Data.ByteString.Lazy as L import qualified Data.Text as T import qualified Data.Text.Encoding as T @@ -112,6 +114,9 @@ blake2b_512 = hashlazy md5 :: L.ByteString -> Digest MD5 md5 = hashlazy +md5s :: S.ByteString -> Digest MD5 +md5s = hash + {- Check that all the hashes continue to hash the same. -} prop_hashes_stable :: Bool prop_hashes_stable = all (\(hasher, result) -> hasher foo == result)