Revert "cache the serialization of a Key"

This reverts commit 4536c93bb2.

That broke Read/Show of a Key, and unfortunately Key is read in at least
one place; the GitAnnexDistribution data type.

It would be worth bringing this optimisation back, but it would need
either a custom Read/Show instance that preserves back-compat, or
wrapping Key in a data type that contains the serialization, or changing
how GitAnnexDistribution is serialized.

Also, the Eq instance would need to compare keys with and without a
cached seralization the same.
This commit is contained in:
Joey Hess 2019-01-16 16:09:53 -04:00
parent 863ed51ae0
commit 96aba8eff7
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
12 changed files with 19 additions and 52 deletions

View file

@ -65,14 +65,14 @@ hashDirs (HashLevels 1) sz s = addTrailingPathSeparator $ take sz s
hashDirs _ sz s = addTrailingPathSeparator $ take sz s </> drop sz s hashDirs _ sz s = addTrailingPathSeparator $ take sz s </> drop sz s
hashDirLower :: HashLevels -> Hasher hashDirLower :: HashLevels -> Hasher
hashDirLower n k = hashDirs n 3 $ take 6 $ show $ md5s $ serializeKey' $ nonChunkKey k hashDirLower n k = hashDirs n 3 $ take 6 $ show $ md5 $ serializeKey' $ nonChunkKey k
{- This was originally using Data.Hash.MD5 from MissingH. This new version {- This was originally using Data.Hash.MD5 from MissingH. This new version
- is faster, but ugly as it has to replicate the 4 Word32's that produced. -} - is faster, but ugly as it has to replicate the 4 Word32's that produced. -}
hashDirMixed :: HashLevels -> Hasher hashDirMixed :: HashLevels -> Hasher
hashDirMixed n k = hashDirs n 2 $ take 4 $ concatMap display_32bits_as_dir $ hashDirMixed n k = hashDirs n 2 $ take 4 $ concatMap display_32bits_as_dir $
encodeWord32 $ map fromIntegral $ Data.ByteArray.unpack $ encodeWord32 $ map fromIntegral $ Data.ByteArray.unpack $
Utility.Hash.md5s $ serializeKey' $ nonChunkKey k Utility.Hash.md5 $ serializeKey' $ nonChunkKey k
where where
encodeWord32 (b1:b2:b3:b4:rest) = encodeWord32 (b1:b2:b3:b4:rest) =
(shiftL b4 24 .|. shiftL b3 16 .|. shiftL b2 8 .|. b1) (shiftL b4 24 .|. shiftL b3 16 .|. shiftL b2 8 .|. b1)

View file

@ -42,7 +42,6 @@ exportKey sha = mk <$> catKey sha
, keyMtime = Nothing , keyMtime = Nothing
, keyChunkSize = Nothing , keyChunkSize = Nothing
, keyChunkNum = Nothing , keyChunkNum = Nothing
, keySerialization = Nothing
} }
exportTree :: Remote.RemoteConfig -> Bool exportTree :: Remote.RemoteConfig -> Bool

View file

@ -85,6 +85,7 @@ module Annex.Locations (
import Data.Char import Data.Char
import Data.Default import Data.Default
import qualified Data.ByteString.Char8 as S8 import qualified Data.ByteString.Char8 as S8
import qualified Data.ByteString.Lazy as L
import Common import Common
import Key import Key
@ -515,7 +516,7 @@ keyFile = fromRawFilePath . keyFile'
keyFile' :: Key -> RawFilePath keyFile' :: Key -> RawFilePath
keyFile' k = keyFile' k =
let b = serializeKey' k let b = L.toStrict (serializeKey' k)
in if any (`S8.elem` b) ['&', '%', ':', '/'] in if any (`S8.elem` b) ['&', '%', ':', '/']
then S8.concatMap esc b then S8.concatMap esc b
else b else b

View file

@ -10,7 +10,7 @@ module Annex.VariantFile where
import Annex.Common import Annex.Common
import Utility.Hash import Utility.Hash
import qualified Data.ByteString as S import qualified Data.ByteString.Lazy as L
variantMarker :: String variantMarker :: String
variantMarker = ".variant-" variantMarker = ".variant-"
@ -41,5 +41,5 @@ variantFile file key
where where
doubleconflict = variantMarker `isInfixOf` file doubleconflict = variantMarker `isInfixOf` file
shortHash :: S.ByteString -> String shortHash :: L.ByteString -> String
shortHash = take 4 . show . md5s shortHash = take 4 . show . md5

View file

@ -58,10 +58,7 @@ genKey source preferredbackend = do
Just k -> Just (makesane k, b) Just k -> Just (makesane k, b)
where where
-- keyNames should not contain newline characters. -- keyNames should not contain newline characters.
makesane k = k makesane k = k { keyName = S8.map fixbadchar (keyName k) }
{ keyName = S8.map fixbadchar (keyName k)
, keySerialization = Nothing
}
fixbadchar c fixbadchar c
| c == '\n' = '_' | c == '\n' = '_'
| otherwise = c | otherwise = c

View file

@ -181,7 +181,6 @@ trivialMigrate' oldkey newbackend afile maxextlen
| migratable && hasExt oldvariety = Just $ oldkey | migratable && hasExt oldvariety = Just $ oldkey
{ keyName = keyHash oldkey { keyName = keyHash oldkey
, keyVariety = newvariety , keyVariety = newvariety
, keySerialization = Nothing
} }
{- Fast migration from hash to hashE backend. -} {- Fast migration from hash to hashE backend. -}
| migratable && hasExt newvariety = case afile of | migratable && hasExt newvariety = case afile of
@ -190,7 +189,6 @@ trivialMigrate' oldkey newbackend afile maxextlen
{ keyName = keyHash oldkey { keyName = keyHash oldkey
<> encodeBS (selectExtension maxextlen file) <> encodeBS (selectExtension maxextlen file)
, keyVariety = newvariety , keyVariety = newvariety
, keySerialization = Nothing
} }
{- Upgrade to fix bad previous migration that created a {- Upgrade to fix bad previous migration that created a
- non-extension preserving key, with an extension - non-extension preserving key, with an extension
@ -198,7 +196,6 @@ trivialMigrate' oldkey newbackend afile maxextlen
| newvariety == oldvariety && not (hasExt oldvariety) && | newvariety == oldvariety && not (hasExt oldvariety) &&
keyHash oldkey /= keyName oldkey = Just $ oldkey keyHash oldkey /= keyName oldkey = Just $ oldkey
{ keyName = keyHash oldkey { keyName = keyHash oldkey
, keySerialization = Nothing
} }
| otherwise = Nothing | otherwise = Nothing
where where
@ -291,8 +288,5 @@ testKeyBackend =
let b = genBackendE (SHA2Hash (HashSize 256)) let b = genBackendE (SHA2Hash (HashSize 256))
in b { getKey = (fmap addE) <$$> getKey b } in b { getKey = (fmap addE) <$$> getKey b }
where where
addE k = k addE k = k { keyName = keyName k <> longext }
{ keyName = keyName k <> longext
, keySerialization = Nothing
}
longext = ".this-is-a-test-key" longext = ".this-is-a-test-key"

View file

@ -377,10 +377,7 @@ finishDownloadWith tmp u url file = do
{- Adds the url size to the Key. -} {- Adds the url size to the Key. -}
addSizeUrlKey :: Url.UrlInfo -> Key -> Key addSizeUrlKey :: Url.UrlInfo -> Key -> Key
addSizeUrlKey urlinfo key = key addSizeUrlKey urlinfo key = key { keySize = Url.urlSize urlinfo }
{ keySize = Url.urlSize urlinfo
, keySerialization = Nothing
}
{- Adds worktree file to the repository. -} {- Adds worktree file to the repository. -}
addWorkTree :: UUID -> URLString -> FilePath -> Key -> Maybe FilePath -> Annex () addWorkTree :: UUID -> URLString -> FilePath -> Key -> Maybe FilePath -> Annex ()

28
Key.hs
View file

@ -51,18 +51,14 @@ stubKey = Key
, keyMtime = Nothing , keyMtime = Nothing
, keyChunkSize = Nothing , keyChunkSize = Nothing
, keyChunkNum = Nothing , keyChunkNum = Nothing
, keySerialization = Nothing
} }
-- Gets the parent of a chunk key. -- Gets the parent of a chunk key.
nonChunkKey :: Key -> Key nonChunkKey :: Key -> Key
nonChunkKey k nonChunkKey k = k
| keyChunkSize k == Nothing && keyChunkNum k == Nothing = k { keyChunkSize = Nothing
| otherwise = k , keyChunkNum = Nothing
{ keyChunkSize = Nothing }
, keyChunkNum = Nothing
, keySerialization = Nothing
}
-- Where a chunk key is offset within its parent. -- Where a chunk key is offset within its parent.
chunkKeyOffset :: Key -> Maybe Integer chunkKeyOffset :: Key -> Maybe Integer
@ -98,13 +94,10 @@ buildKey k = byteString (formatKeyVariety (keyVariety k))
_ ?: Nothing = mempty _ ?: Nothing = mempty
serializeKey :: Key -> String serializeKey :: Key -> String
serializeKey = decodeBS' . serializeKey' serializeKey = decodeBL' . serializeKey'
serializeKey' :: Key -> S.ByteString serializeKey' :: Key -> L.ByteString
serializeKey' k = case keySerialization k of serializeKey' = toLazyByteStringWith (safeStrategy 128 smallChunkSize) L.empty . buildKey
Nothing -> L.toStrict $
toLazyByteStringWith (safeStrategy 128 smallChunkSize) L.empty (buildKey k)
Just b -> b
{- This is a strict parser for security reasons; a key {- This is a strict parser for security reasons; a key
- can contain only 4 fields, which all consist only of numbers. - can contain only 4 fields, which all consist only of numbers.
@ -134,7 +127,6 @@ keyParser = do
, keyMtime = m , keyMtime = m
, keyChunkSize = cs , keyChunkSize = cs
, keyChunkNum = cn , keyChunkNum = cn
, keySerialization = Nothing
} }
else fail "invalid keyName" else fail "invalid keyName"
where where
@ -148,10 +140,7 @@ deserializeKey :: String -> Maybe Key
deserializeKey = deserializeKey' . encodeBS' deserializeKey = deserializeKey' . encodeBS'
deserializeKey' :: S.ByteString -> Maybe Key deserializeKey' :: S.ByteString -> Maybe Key
deserializeKey' b = either deserializeKey' b = eitherToMaybe $ A.parseOnly keyParser b
(const Nothing)
(\k -> Just $ k { keySerialization = Just b })
(A.parseOnly keyParser b)
{- This splits any extension out of the keyName, returning the {- This splits any extension out of the keyName, returning the
- keyName minus extension, and the extension (including leading dot). - keyName minus extension, and the extension (including leading dot).
@ -189,7 +178,6 @@ instance Arbitrary Key where
<*> ((abs . fromInteger <$>) <$> arbitrary) -- mtime cannot be negative <*> ((abs . fromInteger <$>) <$> arbitrary) -- mtime cannot be negative
<*> ((abs <$>) <$> arbitrary) -- chunksize cannot be negative <*> ((abs <$>) <$> arbitrary) -- chunksize cannot be negative
<*> ((succ . abs <$>) <$> arbitrary) -- chunknum cannot be 0 or negative <*> ((succ . abs <$>) <$> arbitrary) -- chunknum cannot be 0 or negative
<*> pure Nothing
instance Hashable Key where instance Hashable Key where
hashIO32 = hashIO32 . serializeKey' hashIO32 = hashIO32 . serializeKey'

View file

@ -68,10 +68,7 @@ chunkKeyStream :: Key -> ChunkSize -> ChunkKeyStream
chunkKeyStream basek chunksize = ChunkKeyStream $ map mk [1..] chunkKeyStream basek chunksize = ChunkKeyStream $ map mk [1..]
where where
mk chunknum = sizedk { keyChunkNum = Just chunknum } mk chunknum = sizedk { keyChunkNum = Just chunknum }
sizedk = basek sizedk = basek { keyChunkSize = Just (toInteger chunksize) }
{ keyChunkSize = Just (toInteger chunksize)
, keySerialization = Nothing
}
nextChunkKeyStream :: ChunkKeyStream -> (Key, ChunkKeyStream) nextChunkKeyStream :: ChunkKeyStream -> (Key, ChunkKeyStream)
nextChunkKeyStream (ChunkKeyStream (k:l)) = (k, ChunkKeyStream l) nextChunkKeyStream (ChunkKeyStream (k:l)) = (k, ChunkKeyStream l)

View file

@ -23,7 +23,6 @@ data Key = Key
, keyMtime :: Maybe EpochTime , keyMtime :: Maybe EpochTime
, keyChunkSize :: Maybe Integer , keyChunkSize :: Maybe Integer
, keyChunkNum :: Maybe Integer , keyChunkNum :: Maybe Integer
, keySerialization :: Maybe S.ByteString -- ^ cached serialization
} deriving (Eq, Ord, Read, Show) } deriving (Eq, Ord, Read, Show)
{- A filename may be associated with a Key. -} {- A filename may be associated with a Key. -}

View file

@ -150,7 +150,6 @@ readKey1 v
, keyVariety = parseKeyVariety (encodeBS b) , keyVariety = parseKeyVariety (encodeBS b)
, keySize = s , keySize = s
, keyMtime = t , keyMtime = t
, keySerialization = Nothing
} }
where where
bits = splitc ':' v bits = splitc ':' v

View file

@ -27,7 +27,6 @@ module Utility.Hash (
blake2b_512, blake2b_512,
#endif #endif
md5, md5,
md5s,
prop_hashes_stable, prop_hashes_stable,
Mac(..), Mac(..),
calcMac, calcMac,
@ -113,9 +112,6 @@ blake2b_512 = hashlazy
md5 :: L.ByteString -> Digest MD5 md5 :: L.ByteString -> Digest MD5
md5 = hashlazy md5 = hashlazy
md5s :: S.ByteString -> Digest MD5
md5s = hash
{- Check that all the hashes continue to hash the same. -} {- Check that all the hashes continue to hash the same. -}
prop_hashes_stable :: Bool prop_hashes_stable :: Bool
prop_hashes_stable = all (\(hasher, result) -> hasher foo == result) prop_hashes_stable = all (\(hasher, result) -> hasher foo == result)