add KeyVariety type

Where before the "name" of a key and a backend was a string, this makes
it a concrete data type.

This is groundwork for allowing some varieties of keys to be disabled
in file2key, so git-annex won't use them at all.

Benchmarks ran in my big repo:

old git-annex info:

real	0m3.338s
user	0m3.124s
sys	0m0.244s

new git-annex info:

real	0m3.216s
user	0m3.024s
sys	0m0.220s

new git-annex find:

real	0m7.138s
user	0m6.924s
sys	0m0.252s

old git-annex find:

real	0m7.433s
user	0m7.240s
sys	0m0.232s

Surprising result; I'd have expected it to be slower since it now parses
all the key varieties. But, the parser is very simple and perhaps
sharing KeyVarieties uses less memory or something like that.

This commit was supported by the NSF-funded DataLad project.
This commit is contained in:
Joey Hess 2017-02-24 15:16:56 -04:00
parent ca0daa8bb8
commit 9c4650358c
No known key found for this signature in database
GPG key ID: C910D9222512E3C7
22 changed files with 202 additions and 99 deletions

30
Key.hs
View file

@ -35,7 +35,7 @@ import qualified Utility.SimpleProtocol as Proto
stubKey :: Key
stubKey = Key
{ keyName = ""
, keyBackendName = ""
, keyVariety = OtherKey ""
, keySize = Nothing
, keyMtime = Nothing
, keyChunkSize = Nothing
@ -69,8 +69,8 @@ fieldSep = '-'
- The name field is always shown last, separated by doubled fieldSeps,
- and is the only field allowed to contain the fieldSep. -}
key2file :: Key -> FilePath
key2file Key { keyBackendName = b, keySize = s, keyMtime = m, keyChunkSize = cs, keyChunkNum = cn, keyName = n } =
b +++ ('s' ?: s) +++ ('m' ?: m) +++ ('S' ?: cs) +++ ('C' ?: cn) +++ (fieldSep : n)
key2file Key { keyVariety = kv, keySize = s, keyMtime = m, keyChunkSize = cs, keyChunkNum = cn, keyName = n } =
formatKeyVariety kv +++ ('s' ?: s) +++ ('m' ?: m) +++ ('S' ?: cs) +++ ('C' ?: cn) +++ (fieldSep : n)
where
"" +++ y = y
x +++ "" = x
@ -80,12 +80,12 @@ key2file Key { keyBackendName = b, keySize = s, keyMtime = m, keyChunkSize = cs,
file2key :: FilePath -> Maybe Key
file2key s
| key == Just stubKey || (keyName <$> key) == Just "" || (keyBackendName <$> key) == Just "" = Nothing
| key == Just stubKey || (keyName <$> key) == Just "" || (keyVariety <$> key) == Just (OtherKey "") = Nothing
| otherwise = key
where
key = startbackend stubKey s
startbackend k v = sepfield k v addbackend
startbackend k v = sepfield k v addvariety
sepfield k v a = case span (/= fieldSep) v of
(v', _:r) -> findfields r $ a k v'
@ -96,7 +96,7 @@ file2key s
| otherwise = sepfield k v $ addfield c
findfields _ v = v
addbackend k v = Just k { keyBackendName = v }
addvariety k v = Just k { keyVariety = parseKeyVariety v }
-- This is a strict parser for security reasons; a key
-- can contain only 4 fields, which all consist only of numbers.
@ -126,31 +126,27 @@ file2key s
| validKeyName k v = Just $ k { keyName = v }
| otherwise = Nothing
{- A key with a backend ending in "E" is an extension preserving key,
- using some hash.
{- When a key HasExt, the length of the extension is limited in order to
- mitigate against SHA1 collision attacks (specifically, chosen-prefix
- attacks).
-
- The length of the extension is limited in order to mitigate against
- SHA1 collision attacks (specifically, chosen-prefix attacks).
- In such an attack, the extension of the key could be made to contain
- the collision generation data, with the result that a signed git commit
- including such keys would not be secure.
-
- The maximum extension length ever generated for such a key was 8
- characters; 20 is used here to give a little future wiggle-room.
- The SHA1 common-prefix attack used 128 bytes of data.
-
- This code is here, and not in Backend.Hash (where it really belongs)
- so that file2key can check it whenever a Key is constructed.
- The SHA1 common-prefix attack needs 128 bytes of data.
-}
validKeyName :: Key -> String -> Bool
validKeyName k v
| end (keyBackendName k) == "E" = length (takeExtensions v) <= 20
validKeyName k name
| hasExt (keyVariety k) = length (takeExtensions name) <= 20
| otherwise = True
instance Arbitrary Key where
arbitrary = Key
<$> (listOf1 $ elements $ ['A'..'Z'] ++ ['a'..'z'] ++ ['0'..'9'] ++ "-_\r\n \t")
<*> (listOf1 $ elements ['A'..'Z']) -- BACKEND
<*> (parseKeyVariety <$> (listOf1 $ elements ['A'..'Z'])) -- BACKEND
<*> ((abs <$>) <$> arbitrary) -- size cannot be negative
<*> arbitrary
<*> ((abs <$>) <$> arbitrary) -- chunksize cannot be negative