git-annex/Types/Key.hs
Joey Hess 151562b537
convert key2file and file2key to use builder and attoparsec
The new parser is significantly stricter than the old one:

The old file2key allowed the fields to come in any order,
but the new one requires the fixed order that git-annex has always used.
Hopefully this will not cause any breakage.

And the old file2key allowed eg SHA1-m1-m2-m3-m4-m5-m6--xxxx
while the new does not allow duplication of fields. This could potentially
improve security, because allowing lots of extra junk like that in a key
could potentially be used in a SHA1 collision attack, although the current
attacks need binary data and not this kind of structured numeric data.

Speed improved of course, and fairly substantially, in microbenchmarks:

benchmarking old/key2file
time                 2.264 μs   (2.257 μs .. 2.273 μs)
                     1.000 R²   (1.000 R² .. 1.000 R²)
mean                 2.265 μs   (2.260 μs .. 2.275 μs)
std dev              21.17 ns   (13.06 ns .. 39.26 ns)

benchmarking new/key2file'
time                 1.744 μs   (1.741 μs .. 1.747 μs)
                     1.000 R²   (1.000 R² .. 1.000 R²)
mean                 1.745 μs   (1.742 μs .. 1.751 μs)
std dev              13.55 ns   (9.099 ns .. 21.89 ns)

benchmarking old/file2key
time                 6.114 μs   (6.102 μs .. 6.129 μs)
                     1.000 R²   (1.000 R² .. 1.000 R²)
mean                 6.118 μs   (6.106 μs .. 6.143 μs)
std dev              55.00 ns   (30.08 ns .. 100.2 ns)

benchmarking new/file2key'
time                 1.791 μs   (1.782 μs .. 1.801 μs)
                     1.000 R²   (0.999 R² .. 1.000 R²)
mean                 1.792 μs   (1.785 μs .. 1.804 μs)
std dev              32.46 ns   (20.59 ns .. 50.82 ns)
variance introduced by outliers: 19% (moderately inflated)
2019-01-11 16:33:42 -04:00

183 lines
7.3 KiB
Haskell

{- git-annex Key data type
-
- Copyright 2011-2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE OverloadedStrings #-}
module Types.Key where
import qualified Data.ByteString as S
import qualified Data.ByteString.Char8 as S8
import System.Posix.Types
{- A Key has a unique name, which is derived from a particular backend,
- and may contain other optional metadata. -}
data Key = Key
{ keyName :: S.ByteString
, keyVariety :: KeyVariety
, keySize :: Maybe Integer
, keyMtime :: Maybe EpochTime
, keyChunkSize :: Maybe Integer
, keyChunkNum :: Maybe Integer
} deriving (Eq, Ord, Read, Show)
{- A filename may be associated with a Key. -}
newtype AssociatedFile = AssociatedFile (Maybe FilePath)
deriving (Show, Eq, Ord)
{- There are several different varieties of keys.
-
- The trailing ByteString can either be empty, or contain a cached
- formatting of the KeyVariety, in the form generated by formatKeyVariety.
-}
data KeyVariety
= SHA2Key HashSize HasExt S.ByteString
| SHA3Key HashSize HasExt S.ByteString
| SKEINKey HashSize HasExt S.ByteString
| Blake2bKey HashSize HasExt S.ByteString
| Blake2sKey HashSize HasExt S.ByteString
| Blake2spKey HashSize HasExt S.ByteString
| SHA1Key HasExt S.ByteString
| MD5Key HasExt S.ByteString
| WORMKey
| URLKey
-- Some repositories may contain keys of other varieties,
-- which can still be processed to some extent.
| OtherKey S.ByteString
deriving (Eq, Ord, Read, Show)
{- Some varieties of keys may contain an extension at the end of the
- keyName -}
newtype HasExt = HasExt Bool
deriving (Eq, Ord, Read, Show)
newtype HashSize = HashSize Int
deriving (Eq, Ord, Read, Show)
hasExt :: KeyVariety -> Bool
hasExt (SHA2Key _ (HasExt b) _) = b
hasExt (SHA3Key _ (HasExt b) _) = b
hasExt (SKEINKey _ (HasExt b) _) = b
hasExt (Blake2bKey _ (HasExt b) _) = b
hasExt (Blake2sKey _ (HasExt b) _) = b
hasExt (Blake2spKey _ (HasExt b) _) = b
hasExt (SHA1Key (HasExt b) _) = b
hasExt (MD5Key (HasExt b) _) = b
hasExt WORMKey = False
hasExt URLKey = False
hasExt (OtherKey s) = (snd <$> S8.unsnoc s) == Just 'E'
sameExceptExt :: KeyVariety -> KeyVariety -> Bool
sameExceptExt (SHA2Key sz1 _ _) (SHA2Key sz2 _ _) = sz1 == sz2
sameExceptExt (SHA3Key sz1 _ _) (SHA3Key sz2 _ _) = sz1 == sz2
sameExceptExt (SKEINKey sz1 _ _) (SKEINKey sz2 _ _) = sz1 == sz2
sameExceptExt (Blake2bKey sz1 _ _) (Blake2bKey sz2 _ _) = sz1 == sz2
sameExceptExt (Blake2sKey sz1 _ _) (Blake2sKey sz2 _ _) = sz1 == sz2
sameExceptExt (Blake2spKey sz1 _ _) (Blake2spKey sz2 _ _) = sz1 == sz2
sameExceptExt (SHA1Key _ _) (SHA1Key _ _) = True
sameExceptExt (MD5Key _ _) (MD5Key _ _) = True
sameExceptExt _ _ = False
{- Is the Key variety cryptographically secure, such that no two differing
- file contents can be mapped to the same Key? -}
cryptographicallySecure :: KeyVariety -> Bool
cryptographicallySecure (SHA2Key _ _ _) = True
cryptographicallySecure (SHA3Key _ _ _) = True
cryptographicallySecure (SKEINKey _ _ _) = True
cryptographicallySecure (Blake2bKey _ _ _) = True
cryptographicallySecure (Blake2sKey _ _ _) = True
cryptographicallySecure (Blake2spKey _ _ _) = True
cryptographicallySecure _ = False
{- Is the Key variety backed by a hash, which allows verifying content?
- It does not have to be cryptographically secure against eg birthday
- attacks.
-}
isVerifiable :: KeyVariety -> Bool
isVerifiable (SHA2Key _ _ _) = True
isVerifiable (SHA3Key _ _ _) = True
isVerifiable (SKEINKey _ _ _) = True
isVerifiable (Blake2bKey _ _ _) = True
isVerifiable (Blake2sKey _ _ _) = True
isVerifiable (Blake2spKey _ _ _) = True
isVerifiable (SHA1Key _ _) = True
isVerifiable (MD5Key _ _) = True
isVerifiable WORMKey = False
isVerifiable URLKey = False
isVerifiable (OtherKey _) = False
formatKeyVariety :: KeyVariety -> S.ByteString
formatKeyVariety v = case v of
SHA2Key sz e f -> f ! adde e (addsz sz "SHA")
SHA3Key sz e f -> f ! adde e (addsz sz "SHA3_")
SKEINKey sz e f -> f ! adde e (addsz sz "SKEIN")
Blake2bKey sz e f -> f ! adde e (addsz sz "BLAKE2B")
Blake2sKey sz e f -> f ! adde e (addsz sz "BLAKE2S")
Blake2spKey sz e f -> f ! adde e (addsz sz "BLAKE2SP")
SHA1Key e f -> f ! adde e "SHA1"
MD5Key e f -> f ! adde e "MD5"
WORMKey -> "WORM"
URLKey -> "URL"
OtherKey s -> s
where
adde (HasExt False) s = s
adde (HasExt True) s = s <> "E"
addsz (HashSize n) s = s <> S8.pack (show n)
f ! s = if S.null f then s else f
parseKeyVariety :: S.ByteString -> KeyVariety
parseKeyVariety b
| b == "SHA256" = SHA2Key (HashSize 256) (HasExt False) b
| b == "SHA256E" = SHA2Key (HashSize 256) (HasExt True) b
| b == "SHA512" = SHA2Key (HashSize 512) (HasExt False) b
| b == "SHA512E" = SHA2Key (HashSize 512) (HasExt True) b
| b == "SHA224" = SHA2Key (HashSize 224) (HasExt False) b
| b == "SHA224E" = SHA2Key (HashSize 224) (HasExt True) b
| b == "SHA384" = SHA2Key (HashSize 384) (HasExt False) b
| b == "SHA384E" = SHA2Key (HashSize 384) (HasExt True) b
| b == "SHA3_512" = SHA3Key (HashSize 512) (HasExt False) b
| b == "SHA3_512E" = SHA3Key (HashSize 512) (HasExt True) b
| b == "SHA3_384" = SHA3Key (HashSize 384) (HasExt False) b
| b == "SHA3_384E" = SHA3Key (HashSize 384) (HasExt True) b
| b == "SHA3_256" = SHA3Key (HashSize 256) (HasExt False) b
| b == "SHA3_256E" = SHA3Key (HashSize 256) (HasExt True) b
| b == "SHA3_224" = SHA3Key (HashSize 224) (HasExt False) b
| b == "SHA3_224E" = SHA3Key (HashSize 224) (HasExt True) b
| b == "SKEIN512" = SKEINKey (HashSize 512) (HasExt False) b
| b == "SKEIN512E" = SKEINKey (HashSize 512) (HasExt True) b
| b == "SKEIN256" = SKEINKey (HashSize 256) (HasExt False) b
| b == "SKEIN256E" = SKEINKey (HashSize 256) (HasExt True) b
#if MIN_VERSION_cryptonite(0,23,0)
| b == "BLAKE2B160" = Blake2bKey (HashSize 160) (HasExt False) b
| b == "BLAKE2B160E" = Blake2bKey (HashSize 160) (HasExt True) b
| b == "BLAKE2B224" = Blake2bKey (HashSize 224) (HasExt False) b
| b == "BLAKE2B224E" = Blake2bKey (HashSize 224) (HasExt True) b
| b == "BLAKE2B256" = Blake2bKey (HashSize 256) (HasExt False) b
| b == "BLAKE2B256E" = Blake2bKey (HashSize 256) (HasExt True) b
| b == "BLAKE2B384" = Blake2bKey (HashSize 384) (HasExt False) b
| b == "BLAKE2B384E" = Blake2bKey (HashSize 384) (HasExt True) b
| b == "BLAKE2B512" = Blake2bKey (HashSize 512) (HasExt False) b
| b == "BLAKE2B512E" = Blake2bKey (HashSize 512) (HasExt True) b
| b == "BLAKE2S160" = Blake2sKey (HashSize 160) (HasExt False) b
| b == "BLAKE2S160E" = Blake2sKey (HashSize 160) (HasExt True) b
| b == "BLAKE2S224" = Blake2sKey (HashSize 224) (HasExt False) b
| b == "BLAKE2S224E" = Blake2sKey (HashSize 224) (HasExt True) b
| b == "BLAKE2S256" = Blake2sKey (HashSize 256) (HasExt False) b
| b == "BLAKE2S256E" = Blake2sKey (HashSize 256) (HasExt True) b
| b == "BLAKE2SP224" = Blake2spKey (HashSize 224) (HasExt False) b
| b == "BLAKE2SP224E" = Blake2spKey (HashSize 224) (HasExt True) b
| b == "BLAKE2SP256" = Blake2spKey (HashSize 256) (HasExt False) b
| b == "BLAKE2SP256E" = Blake2spKey (HashSize 256) (HasExt True) b
#endif
| b == "SHA1" = SHA1Key (HasExt False) b
| b == "SHA1E" = SHA1Key (HasExt True) b
| b == "MD5" = MD5Key (HasExt False) b
| b == "MD5E" = MD5Key (HasExt True) b
| b == "WORM" = WORMKey
| b == "URL" = URLKey
| otherwise = OtherKey b