From b552551b332b540f4afac7d744cb8a1ddd790e35 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 11 Jan 2019 13:55:00 -0400 Subject: [PATCH] use ByteString in Key for speed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is an easy win for parseKeyVariety: benchmarking old/parseKeyVariety time 1.515 μs (1.512 μs .. 1.517 μs) 1.000 R² (1.000 R² .. 1.000 R²) mean 1.515 μs (1.513 μs .. 1.517 μs) std dev 6.417 ns (4.992 ns .. 8.113 ns) benchmarking new/parseKeyVariety time 54.97 ns (54.70 ns .. 55.40 ns) 0.999 R² (0.999 R² .. 1.000 R²) mean 55.42 ns (55.05 ns .. 56.03 ns) std dev 1.562 ns (969.5 ps .. 2.442 ns) variance introduced by outliers: 44% (moderately inflated) For formatKeyVariety, using a Builder is marginally worse than building a String... (This is with criterion evaluating fully to nf not whnf) benchmarking old/formatKeyVariety time 434.3 ns (428.0 ns .. 440.4 ns) 0.999 R² (0.999 R² .. 1.000 R²) mean 430.6 ns (428.2 ns .. 433.9 ns) std dev 9.166 ns (6.932 ns .. 11.94 ns) variance introduced by outliers: 27% (moderately inflated) benchmarking Builder/formatKeyVariety time 526.5 ns (524.7 ns .. 528.8 ns) 1.000 R² (1.000 R² .. 1.000 R²) mean 526.1 ns (524.9 ns .. 528.5 ns) std dev 5.687 ns (3.762 ns .. 8.000 ns) Manually building the ByteString was better, but still slightly slower than String, due to innefficient need to S.pack . show the HashSize: benchmarking formatKeyVariety time 459.5 ns (455.8 ns .. 463.2 ns) 1.000 R² (0.999 R² .. 1.000 R²) mean 459.9 ns (457.4 ns .. 466.6 ns) std dev 11.65 ns (6.860 ns .. 21.41 ns) variance introduced by outliers: 35% (moderately inflated) So I cheated and made parseKeyVariety cache the original ByteString, for formatKeyVariety to use instead of re-building it. Final benchmark: benchmarking new/formatKeyVariety time 50.64 ns (50.57 ns .. 50.73 ns) 1.000 R² (0.999 R² .. 1.000 R²) mean 51.05 ns (50.60 ns .. 52.71 ns) std dev 2.790 ns (259.6 ps .. 5.916 ns) variance introduced by outliers: 75% (severely inflated) benchmarking new/parseKeyVariety time 71.88 ns (71.54 ns .. 72.24 ns) 1.000 R² (1.000 R² .. 1.000 R²) mean 71.97 ns (71.69 ns .. 72.47 ns) std dev 1.249 ns (910.7 ps .. 1.791 ns) variance introduced by outliers: 22% (moderately inflated) --- Types/Key.hs | 220 +++++++++++++++++++++++++++------------------------ 1 file changed, 115 insertions(+), 105 deletions(-) diff --git a/Types/Key.hs b/Types/Key.hs index 4b81850d80..89880a2cd9 100644 --- a/Types/Key.hs +++ b/Types/Key.hs @@ -1,22 +1,25 @@ {- git-annex Key data type - - - Copyright 2011-2018 Joey Hess + - Copyright 2011-2019 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} {-# LANGUAGE CPP #-} +{-# LANGUAGE OverloadedStrings #-} module Types.Key where -import Utility.PartialPrelude - +import qualified Data.ByteString as S +import qualified Data.ByteString.Char8 as S8 +import qualified Data.ByteString.Lazy as L +import Data.ByteString.Builder import System.Posix.Types {- A Key has a unique name, which is derived from a particular backend, - and may contain other optional metadata. -} data Key = Key - { keyName :: String + { keyName :: S.ByteString , keyVariety :: KeyVariety , keySize :: Maybe Integer , keyMtime :: Maybe EpochTime @@ -28,21 +31,25 @@ data Key = Key newtype AssociatedFile = AssociatedFile (Maybe FilePath) deriving (Show, Eq, Ord) -{- There are several different varieties of keys. -} +{- There are several different varieties of keys. + - + - The trailing ByteString can either be empty, or contain a cached + - formatting of the KeyVariety, in the form generated by formatKeyVariety. + -} data KeyVariety - = SHA2Key HashSize HasExt - | SHA3Key HashSize HasExt - | SKEINKey HashSize HasExt - | Blake2bKey HashSize HasExt - | Blake2sKey HashSize HasExt - | Blake2spKey HashSize HasExt - | SHA1Key HasExt - | MD5Key HasExt + = SHA2Key HashSize HasExt S.ByteString + | SHA3Key HashSize HasExt S.ByteString + | SKEINKey HashSize HasExt S.ByteString + | Blake2bKey HashSize HasExt S.ByteString + | Blake2sKey HashSize HasExt S.ByteString + | Blake2spKey HashSize HasExt S.ByteString + | SHA1Key HasExt S.ByteString + | MD5Key HasExt S.ByteString | WORMKey | URLKey -- Some repositories may contain keys of other varieties, -- which can still be processed to some extent. - | OtherKey String + | OtherKey S.ByteString deriving (Eq, Ord, Read, Show) {- Some varieties of keys may contain an extension at the end of the @@ -54,38 +61,38 @@ newtype HashSize = HashSize Int deriving (Eq, Ord, Read, Show) hasExt :: KeyVariety -> Bool -hasExt (SHA2Key _ (HasExt b)) = b -hasExt (SHA3Key _ (HasExt b)) = b -hasExt (SKEINKey _ (HasExt b)) = b -hasExt (Blake2bKey _ (HasExt b)) = b -hasExt (Blake2sKey _ (HasExt b)) = b -hasExt (Blake2spKey _ (HasExt b)) = b -hasExt (SHA1Key (HasExt b)) = b -hasExt (MD5Key (HasExt b)) = b +hasExt (SHA2Key _ (HasExt b) _) = b +hasExt (SHA3Key _ (HasExt b) _) = b +hasExt (SKEINKey _ (HasExt b) _) = b +hasExt (Blake2bKey _ (HasExt b) _) = b +hasExt (Blake2sKey _ (HasExt b) _) = b +hasExt (Blake2spKey _ (HasExt b) _) = b +hasExt (SHA1Key (HasExt b) _) = b +hasExt (MD5Key (HasExt b) _) = b hasExt WORMKey = False hasExt URLKey = False -hasExt (OtherKey s) = end s == "E" +hasExt (OtherKey s) = (snd <$> S8.unsnoc s) == Just 'E' sameExceptExt :: KeyVariety -> KeyVariety -> Bool -sameExceptExt (SHA2Key sz1 _) (SHA2Key sz2 _) = sz1 == sz2 -sameExceptExt (SHA3Key sz1 _) (SHA3Key sz2 _) = sz1 == sz2 -sameExceptExt (SKEINKey sz1 _) (SKEINKey sz2 _) = sz1 == sz2 -sameExceptExt (Blake2bKey sz1 _) (Blake2bKey sz2 _) = sz1 == sz2 -sameExceptExt (Blake2sKey sz1 _) (Blake2sKey sz2 _) = sz1 == sz2 -sameExceptExt (Blake2spKey sz1 _) (Blake2spKey sz2 _) = sz1 == sz2 -sameExceptExt (SHA1Key _) (SHA1Key _) = True -sameExceptExt (MD5Key _) (MD5Key _) = True +sameExceptExt (SHA2Key sz1 _ _) (SHA2Key sz2 _ _) = sz1 == sz2 +sameExceptExt (SHA3Key sz1 _ _) (SHA3Key sz2 _ _) = sz1 == sz2 +sameExceptExt (SKEINKey sz1 _ _) (SKEINKey sz2 _ _) = sz1 == sz2 +sameExceptExt (Blake2bKey sz1 _ _) (Blake2bKey sz2 _ _) = sz1 == sz2 +sameExceptExt (Blake2sKey sz1 _ _) (Blake2sKey sz2 _ _) = sz1 == sz2 +sameExceptExt (Blake2spKey sz1 _ _) (Blake2spKey sz2 _ _) = sz1 == sz2 +sameExceptExt (SHA1Key _ _) (SHA1Key _ _) = True +sameExceptExt (MD5Key _ _) (MD5Key _ _) = True sameExceptExt _ _ = False {- Is the Key variety cryptographically secure, such that no two differing - file contents can be mapped to the same Key? -} cryptographicallySecure :: KeyVariety -> Bool -cryptographicallySecure (SHA2Key _ _) = True -cryptographicallySecure (SHA3Key _ _) = True -cryptographicallySecure (SKEINKey _ _) = True -cryptographicallySecure (Blake2bKey _ _) = True -cryptographicallySecure (Blake2sKey _ _) = True -cryptographicallySecure (Blake2spKey _ _) = True +cryptographicallySecure (SHA2Key _ _ _) = True +cryptographicallySecure (SHA3Key _ _ _) = True +cryptographicallySecure (SKEINKey _ _ _) = True +cryptographicallySecure (Blake2bKey _ _ _) = True +cryptographicallySecure (Blake2sKey _ _ _) = True +cryptographicallySecure (Blake2spKey _ _ _) = True cryptographicallySecure _ = False {- Is the Key variety backed by a hash, which allows verifying content? @@ -93,83 +100,86 @@ cryptographicallySecure _ = False - attacks. -} isVerifiable :: KeyVariety -> Bool -isVerifiable (SHA2Key _ _) = True -isVerifiable (SHA3Key _ _) = True -isVerifiable (SKEINKey _ _) = True -isVerifiable (Blake2bKey _ _) = True -isVerifiable (Blake2sKey _ _) = True -isVerifiable (Blake2spKey _ _) = True -isVerifiable (SHA1Key _) = True -isVerifiable (MD5Key _) = True +isVerifiable (SHA2Key _ _ _) = True +isVerifiable (SHA3Key _ _ _) = True +isVerifiable (SKEINKey _ _ _) = True +isVerifiable (Blake2bKey _ _ _) = True +isVerifiable (Blake2sKey _ _ _) = True +isVerifiable (Blake2spKey _ _ _) = True +isVerifiable (SHA1Key _ _) = True +isVerifiable (MD5Key _ _) = True isVerifiable WORMKey = False isVerifiable URLKey = False -isVerifiable (OtherKey _) = False +isVerifiable (OtherKey _) = False -formatKeyVariety :: KeyVariety -> String +formatKeyVariety :: KeyVariety -> S.ByteString formatKeyVariety v = case v of - SHA2Key sz e -> adde e (addsz sz "SHA") - SHA3Key sz e -> adde e (addsz sz "SHA3_") - SKEINKey sz e -> adde e (addsz sz "SKEIN") - Blake2bKey sz e -> adde e (addsz sz "BLAKE2B") - Blake2sKey sz e -> adde e (addsz sz "BLAKE2S") - Blake2spKey sz e -> adde e (addsz sz "BLAKE2SP") - SHA1Key e -> adde e "SHA1" - MD5Key e -> adde e "MD5" + SHA2Key sz e f -> f ! adde e (addsz sz "SHA") + SHA3Key sz e f -> f ! adde e (addsz sz "SHA3_") + SKEINKey sz e f -> f ! adde e (addsz sz "SKEIN") + Blake2bKey sz e f -> f ! adde e (addsz sz "BLAKE2B") + Blake2sKey sz e f -> f ! adde e (addsz sz "BLAKE2S") + Blake2spKey sz e f -> f ! adde e (addsz sz "BLAKE2SP") + SHA1Key e f -> f ! adde e "SHA1" + MD5Key e f -> f ! adde e "MD5" WORMKey -> "WORM" URLKey -> "URL" OtherKey s -> s where adde (HasExt False) s = s - adde (HasExt True) s = s ++ "E" - addsz (HashSize n) s = s ++ show n + adde (HasExt True) s = s <> "E" + addsz (HashSize n) s = s <> S8.pack (show n) -parseKeyVariety :: String -> KeyVariety -parseKeyVariety "SHA256" = SHA2Key (HashSize 256) (HasExt False) -parseKeyVariety "SHA256E" = SHA2Key (HashSize 256) (HasExt True) -parseKeyVariety "SHA512" = SHA2Key (HashSize 512) (HasExt False) -parseKeyVariety "SHA512E" = SHA2Key (HashSize 512) (HasExt True) -parseKeyVariety "SHA224" = SHA2Key (HashSize 224) (HasExt False) -parseKeyVariety "SHA224E" = SHA2Key (HashSize 224) (HasExt True) -parseKeyVariety "SHA384" = SHA2Key (HashSize 384) (HasExt False) -parseKeyVariety "SHA384E" = SHA2Key (HashSize 384) (HasExt True) -parseKeyVariety "SHA3_512" = SHA3Key (HashSize 512) (HasExt False) -parseKeyVariety "SHA3_512E" = SHA3Key (HashSize 512) (HasExt True) -parseKeyVariety "SHA3_384" = SHA3Key (HashSize 384) (HasExt False) -parseKeyVariety "SHA3_384E" = SHA3Key (HashSize 384) (HasExt True) -parseKeyVariety "SHA3_256" = SHA3Key (HashSize 256) (HasExt False) -parseKeyVariety "SHA3_256E" = SHA3Key (HashSize 256) (HasExt True) -parseKeyVariety "SHA3_224" = SHA3Key (HashSize 224) (HasExt False) -parseKeyVariety "SHA3_224E" = SHA3Key (HashSize 224) (HasExt True) -parseKeyVariety "SKEIN512" = SKEINKey (HashSize 512) (HasExt False) -parseKeyVariety "SKEIN512E" = SKEINKey (HashSize 512) (HasExt True) -parseKeyVariety "SKEIN256" = SKEINKey (HashSize 256) (HasExt False) -parseKeyVariety "SKEIN256E" = SKEINKey (HashSize 256) (HasExt True) + f ! s = if S.null f then s else f + +parseKeyVariety :: S.ByteString -> KeyVariety +parseKeyVariety b + | b == "SHA256" = SHA2Key (HashSize 256) (HasExt False) b + | b == "SHA256E" = SHA2Key (HashSize 256) (HasExt True) b + | b == "SHA512" = SHA2Key (HashSize 512) (HasExt False) b + | b == "SHA512E" = SHA2Key (HashSize 512) (HasExt True) b + | b == "SHA224" = SHA2Key (HashSize 224) (HasExt False) b + | b == "SHA224E" = SHA2Key (HashSize 224) (HasExt True) b + | b == "SHA384" = SHA2Key (HashSize 384) (HasExt False) b + | b == "SHA384E" = SHA2Key (HashSize 384) (HasExt True) b + | b == "SHA3_512" = SHA3Key (HashSize 512) (HasExt False) b + | b == "SHA3_512E" = SHA3Key (HashSize 512) (HasExt True) b + | b == "SHA3_384" = SHA3Key (HashSize 384) (HasExt False) b + | b == "SHA3_384E" = SHA3Key (HashSize 384) (HasExt True) b + | b == "SHA3_256" = SHA3Key (HashSize 256) (HasExt False) b + | b == "SHA3_256E" = SHA3Key (HashSize 256) (HasExt True) b + | b == "SHA3_224" = SHA3Key (HashSize 224) (HasExt False) b + | b == "SHA3_224E" = SHA3Key (HashSize 224) (HasExt True) b + | b == "SKEIN512" = SKEINKey (HashSize 512) (HasExt False) b + | b == "SKEIN512E" = SKEINKey (HashSize 512) (HasExt True) b + | b == "SKEIN256" = SKEINKey (HashSize 256) (HasExt False) b + | b == "SKEIN256E" = SKEINKey (HashSize 256) (HasExt True) b #if MIN_VERSION_cryptonite(0,23,0) -parseKeyVariety "BLAKE2B160" = Blake2bKey (HashSize 160) (HasExt False) -parseKeyVariety "BLAKE2B160E" = Blake2bKey (HashSize 160) (HasExt True) -parseKeyVariety "BLAKE2B224" = Blake2bKey (HashSize 224) (HasExt False) -parseKeyVariety "BLAKE2B224E" = Blake2bKey (HashSize 224) (HasExt True) -parseKeyVariety "BLAKE2B256" = Blake2bKey (HashSize 256) (HasExt False) -parseKeyVariety "BLAKE2B256E" = Blake2bKey (HashSize 256) (HasExt True) -parseKeyVariety "BLAKE2B384" = Blake2bKey (HashSize 384) (HasExt False) -parseKeyVariety "BLAKE2B384E" = Blake2bKey (HashSize 384) (HasExt True) -parseKeyVariety "BLAKE2B512" = Blake2bKey (HashSize 512) (HasExt False) -parseKeyVariety "BLAKE2B512E" = Blake2bKey (HashSize 512) (HasExt True) -parseKeyVariety "BLAKE2S160" = Blake2sKey (HashSize 160) (HasExt False) -parseKeyVariety "BLAKE2S160E" = Blake2sKey (HashSize 160) (HasExt True) -parseKeyVariety "BLAKE2S224" = Blake2sKey (HashSize 224) (HasExt False) -parseKeyVariety "BLAKE2S224E" = Blake2sKey (HashSize 224) (HasExt True) -parseKeyVariety "BLAKE2S256" = Blake2sKey (HashSize 256) (HasExt False) -parseKeyVariety "BLAKE2S256E" = Blake2sKey (HashSize 256) (HasExt True) -parseKeyVariety "BLAKE2SP224" = Blake2spKey (HashSize 224) (HasExt False) -parseKeyVariety "BLAKE2SP224E" = Blake2spKey (HashSize 224) (HasExt True) -parseKeyVariety "BLAKE2SP256" = Blake2spKey (HashSize 256) (HasExt False) -parseKeyVariety "BLAKE2SP256E" = Blake2spKey (HashSize 256) (HasExt True) + | b == "BLAKE2B160" = Blake2bKey (HashSize 160) (HasExt False) b + | b == "BLAKE2B160E" = Blake2bKey (HashSize 160) (HasExt True) b + | b == "BLAKE2B224" = Blake2bKey (HashSize 224) (HasExt False) b + | b == "BLAKE2B224E" = Blake2bKey (HashSize 224) (HasExt True) b + | b == "BLAKE2B256" = Blake2bKey (HashSize 256) (HasExt False) b + | b == "BLAKE2B256E" = Blake2bKey (HashSize 256) (HasExt True) b + | b == "BLAKE2B384" = Blake2bKey (HashSize 384) (HasExt False) b + | b == "BLAKE2B384E" = Blake2bKey (HashSize 384) (HasExt True) b + | b == "BLAKE2B512" = Blake2bKey (HashSize 512) (HasExt False) b + | b == "BLAKE2B512E" = Blake2bKey (HashSize 512) (HasExt True) b + | b == "BLAKE2S160" = Blake2sKey (HashSize 160) (HasExt False) b + | b == "BLAKE2S160E" = Blake2sKey (HashSize 160) (HasExt True) b + | b == "BLAKE2S224" = Blake2sKey (HashSize 224) (HasExt False) b + | b == "BLAKE2S224E" = Blake2sKey (HashSize 224) (HasExt True) b + | b == "BLAKE2S256" = Blake2sKey (HashSize 256) (HasExt False) b + | b == "BLAKE2S256E" = Blake2sKey (HashSize 256) (HasExt True) b + | b == "BLAKE2SP224" = Blake2spKey (HashSize 224) (HasExt False) b + | b == "BLAKE2SP224E" = Blake2spKey (HashSize 224) (HasExt True) b + | b == "BLAKE2SP256" = Blake2spKey (HashSize 256) (HasExt False) b + | b == "BLAKE2SP256E" = Blake2spKey (HashSize 256) (HasExt True) b #endif -parseKeyVariety "SHA1" = SHA1Key (HasExt False) -parseKeyVariety "SHA1E" = SHA1Key (HasExt True) -parseKeyVariety "MD5" = MD5Key (HasExt False) -parseKeyVariety "MD5E" = MD5Key (HasExt True) -parseKeyVariety "WORM" = WORMKey -parseKeyVariety "URL" = URLKey -parseKeyVariety s = OtherKey s + | b == "SHA1" = SHA1Key (HasExt False) b + | b == "SHA1E" = SHA1Key (HasExt True) b + | b == "MD5" = MD5Key (HasExt False) b + | b == "MD5E" = MD5Key (HasExt True) b + | b == "WORM" = WORMKey + | b == "URL" = URLKey + | otherwise = OtherKey b