convert key2file and file2key to use builder and attoparsec
The new parser is significantly stricter than the old one: The old file2key allowed the fields to come in any order, but the new one requires the fixed order that git-annex has always used. Hopefully this will not cause any breakage. And the old file2key allowed eg SHA1-m1-m2-m3-m4-m5-m6--xxxx while the new does not allow duplication of fields. This could potentially improve security, because allowing lots of extra junk like that in a key could potentially be used in a SHA1 collision attack, although the current attacks need binary data and not this kind of structured numeric data. Speed improved of course, and fairly substantially, in microbenchmarks: benchmarking old/key2file time 2.264 μs (2.257 μs .. 2.273 μs) 1.000 R² (1.000 R² .. 1.000 R²) mean 2.265 μs (2.260 μs .. 2.275 μs) std dev 21.17 ns (13.06 ns .. 39.26 ns) benchmarking new/key2file' time 1.744 μs (1.741 μs .. 1.747 μs) 1.000 R² (1.000 R² .. 1.000 R²) mean 1.745 μs (1.742 μs .. 1.751 μs) std dev 13.55 ns (9.099 ns .. 21.89 ns) benchmarking old/file2key time 6.114 μs (6.102 μs .. 6.129 μs) 1.000 R² (1.000 R² .. 1.000 R²) mean 6.118 μs (6.106 μs .. 6.143 μs) std dev 55.00 ns (30.08 ns .. 100.2 ns) benchmarking new/file2key' time 1.791 μs (1.782 μs .. 1.801 μs) 1.000 R² (0.999 R² .. 1.000 R²) mean 1.792 μs (1.785 μs .. 1.804 μs) std dev 32.46 ns (20.59 ns .. 50.82 ns) variance introduced by outliers: 19% (moderately inflated)
This commit is contained in:
parent
b552551b33
commit
151562b537
2 changed files with 93 additions and 69 deletions
160
Key.hs
160
Key.hs
|
@ -1,6 +1,6 @@
|
||||||
{- git-annex Keys
|
{- git-annex Keys
|
||||||
-
|
-
|
||||||
- Copyright 2011-2017 Joey Hess <id@joeyh.name>
|
- Copyright 2011-2019 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU GPL version 3 or higher.
|
- Licensed under the GNU GPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
@ -11,19 +11,31 @@ module Key (
|
||||||
Key(..),
|
Key(..),
|
||||||
AssociatedFile(..),
|
AssociatedFile(..),
|
||||||
stubKey,
|
stubKey,
|
||||||
key2file,
|
buildKeyFile,
|
||||||
|
keyFileParser,
|
||||||
file2key,
|
file2key,
|
||||||
|
key2file,
|
||||||
|
file2key',
|
||||||
|
key2file',
|
||||||
nonChunkKey,
|
nonChunkKey,
|
||||||
chunkKeyOffset,
|
chunkKeyOffset,
|
||||||
isChunkKey,
|
isChunkKey,
|
||||||
isKeyPrefix,
|
isKeyPrefix,
|
||||||
|
splitKeyNameExtension,
|
||||||
|
|
||||||
prop_isomorphic_key_encode,
|
prop_isomorphic_key_encode,
|
||||||
prop_isomorphic_key_decode
|
prop_isomorphic_key_decode
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Data.Char
|
|
||||||
import qualified Data.Text as T
|
import qualified Data.Text as T
|
||||||
|
import qualified Data.ByteString as S
|
||||||
|
import qualified Data.ByteString.Char8 as S8
|
||||||
|
import qualified Data.ByteString.Lazy as L
|
||||||
|
import Data.ByteString.Builder
|
||||||
|
import Data.ByteString.Builder.Extra
|
||||||
|
import qualified Data.Attoparsec.ByteString as A
|
||||||
|
import qualified Data.Attoparsec.ByteString.Char8 as A8
|
||||||
|
import Foreign.C.Types
|
||||||
|
|
||||||
import Common
|
import Common
|
||||||
import Types.Key
|
import Types.Key
|
||||||
|
@ -34,8 +46,8 @@ import qualified Utility.SimpleProtocol as Proto
|
||||||
|
|
||||||
stubKey :: Key
|
stubKey :: Key
|
||||||
stubKey = Key
|
stubKey = Key
|
||||||
{ keyName = ""
|
{ keyName = mempty
|
||||||
, keyVariety = OtherKey ""
|
, keyVariety = OtherKey mempty
|
||||||
, keySize = Nothing
|
, keySize = Nothing
|
||||||
, keyMtime = Nothing
|
, keyMtime = Nothing
|
||||||
, keyChunkSize = Nothing
|
, keyChunkSize = Nothing
|
||||||
|
@ -65,69 +77,81 @@ isKeyPrefix s = [fieldSep, fieldSep] `isInfixOf` s
|
||||||
fieldSep :: Char
|
fieldSep :: Char
|
||||||
fieldSep = '-'
|
fieldSep = '-'
|
||||||
|
|
||||||
{- Converts a key to a string that is suitable for use as a filename.
|
{- Builds a ByteString that is suitable for use as a filename representing
|
||||||
- The name field is always shown last, separated by doubled fieldSeps,
|
- a Key. The name field is always shown last, separated by doubled fieldSeps,
|
||||||
- and is the only field allowed to contain the fieldSep. -}
|
- and is the only field allowed to contain the fieldSep. -}
|
||||||
key2file :: Key -> FilePath
|
buildKeyFile :: Key -> Builder
|
||||||
key2file Key { keyVariety = kv, keySize = s, keyMtime = m, keyChunkSize = cs, keyChunkNum = cn, keyName = n } =
|
buildKeyFile k = byteString (formatKeyVariety (keyVariety k))
|
||||||
formatKeyVariety kv +++ ('s' ?: s) +++ ('m' ?: m) +++ ('S' ?: cs) +++ ('C' ?: cn) +++ (fieldSep : n)
|
<> 's' ?: (integerDec <$> keySize k)
|
||||||
|
<> 'm' ?: (integerDec . (\(CTime t) -> fromIntegral t) <$> keyMtime k)
|
||||||
|
<> 'S' ?: (integerDec <$> keyChunkSize k)
|
||||||
|
<> 'C' ?: (integerDec <$> keyChunkNum k)
|
||||||
|
<> sepbefore (sepbefore (byteString (keyName k)))
|
||||||
where
|
where
|
||||||
"" +++ y = y
|
sepbefore s = char7 fieldSep <> s
|
||||||
x +++ "" = x
|
c ?: (Just b) = sepbefore (char7 c <> b)
|
||||||
x +++ y = x ++ fieldSep:y
|
_ ?: Nothing = mempty
|
||||||
f ?: (Just v) = f : show v
|
|
||||||
_ ?: _ = ""
|
key2file :: Key -> FilePath
|
||||||
|
key2file = decodeBL . key2file'
|
||||||
|
|
||||||
|
key2file' :: Key -> L.ByteString
|
||||||
|
key2file' = toLazyByteStringWith (safeStrategy 128 smallChunkSize) L.empty . buildKeyFile
|
||||||
|
|
||||||
|
{- This is a strict parser for security reasons; a key
|
||||||
|
- can contain only 4 fields, which all consist only of numbers.
|
||||||
|
- Any key containing other fields, or non-numeric data will fail
|
||||||
|
- to parse.
|
||||||
|
-
|
||||||
|
- If a key contained non-numeric fields, they could be used to
|
||||||
|
- embed data used in a SHA1 collision attack, which would be a
|
||||||
|
- problem since the keys are committed to git.
|
||||||
|
-}
|
||||||
|
keyFileParser :: A.Parser Key
|
||||||
|
keyFileParser = do
|
||||||
|
-- key variety cannot be empty
|
||||||
|
v <- (parseKeyVariety <$> A8.takeWhile1 (/= fieldSep))
|
||||||
|
s <- parsesize
|
||||||
|
m <- parsemtime
|
||||||
|
cs <- parsechunksize
|
||||||
|
cn <- parsechunknum
|
||||||
|
_ <- A8.char fieldSep
|
||||||
|
_ <- A8.char fieldSep
|
||||||
|
n <- A.takeByteString
|
||||||
|
if validKeyName v n
|
||||||
|
then return $ Key
|
||||||
|
{ keyName = n
|
||||||
|
, keyVariety = v
|
||||||
|
, keySize = s
|
||||||
|
, keyMtime = m
|
||||||
|
, keyChunkSize = cs
|
||||||
|
, keyChunkNum = cn
|
||||||
|
}
|
||||||
|
else fail "invalid keyName"
|
||||||
|
where
|
||||||
|
parseopt p = (Just <$> (A8.char fieldSep *> p)) <|> pure Nothing
|
||||||
|
parsesize = parseopt $ A8.char 's' *> A8.decimal
|
||||||
|
parsemtime = parseopt $ CTime <$> (A8.char 'm' *> A8.decimal)
|
||||||
|
parsechunksize = parseopt $ A8.char 'S' *> A8.decimal
|
||||||
|
parsechunknum = parseopt $ A8.char 'C' *> A8.decimal
|
||||||
|
|
||||||
file2key :: FilePath -> Maybe Key
|
file2key :: FilePath -> Maybe Key
|
||||||
file2key s
|
file2key = file2key' . encodeBS
|
||||||
| key == Just stubKey || (keyName <$> key) == Just "" || (keyVariety <$> key) == Just (OtherKey "") = Nothing
|
|
||||||
| otherwise = key
|
|
||||||
where
|
|
||||||
key = startbackend stubKey s
|
|
||||||
|
|
||||||
startbackend k v = sepfield k v addvariety
|
file2key' :: S.ByteString -> Maybe Key
|
||||||
|
file2key' b = eitherToMaybe $ A.parseOnly keyFileParser b
|
||||||
sepfield k v a = case span (/= fieldSep) v of
|
|
||||||
(v', _:r) -> findfields r $ a k v'
|
|
||||||
_ -> Nothing
|
|
||||||
|
|
||||||
findfields (c:v) (Just k)
|
{- This splits any extension out of the keyName, returning the
|
||||||
| c == fieldSep = addkeyname k v
|
- keyName minus extension, and the extension (including leading dot).
|
||||||
| otherwise = sepfield k v $ addfield c
|
-}
|
||||||
findfields _ v = v
|
splitKeyNameExtension :: Key -> (S.ByteString, S.ByteString)
|
||||||
|
splitKeyNameExtension = splitKeyNameExtension' . keyName
|
||||||
|
|
||||||
addvariety k v = Just k { keyVariety = parseKeyVariety v }
|
splitKeyNameExtension' :: S.ByteString -> (S.ByteString, S.ByteString)
|
||||||
|
splitKeyNameExtension' keyname = S8.span (/= '.') keyname
|
||||||
|
|
||||||
-- This is a strict parser for security reasons; a key
|
{- Limits the length of the extension in the keyName to mitigate against
|
||||||
-- can contain only 4 fields, which all consist only of numbers.
|
- SHA1 collision attacks.
|
||||||
-- Any key containing other fields, or non-numeric data is
|
|
||||||
-- rejected with Nothing.
|
|
||||||
--
|
|
||||||
-- If a key contained non-numeric fields, they could be used to
|
|
||||||
-- embed data used in a SHA1 collision attack, which would be a
|
|
||||||
-- problem since the keys are committed to git.
|
|
||||||
addfield _ _ v | not (all isDigit v) = Nothing
|
|
||||||
addfield 's' k v = do
|
|
||||||
sz <- readish v
|
|
||||||
return $ k { keySize = Just sz }
|
|
||||||
addfield 'm' k v = do
|
|
||||||
mtime <- readish v
|
|
||||||
return $ k { keyMtime = Just mtime }
|
|
||||||
addfield 'S' k v = do
|
|
||||||
chunksize <- readish v
|
|
||||||
return $ k { keyChunkSize = Just chunksize }
|
|
||||||
addfield 'C' k v = case readish v of
|
|
||||||
Just chunknum | chunknum > 0 ->
|
|
||||||
return $ k { keyChunkNum = Just chunknum }
|
|
||||||
_ -> Nothing
|
|
||||||
addfield _ _ _ = Nothing
|
|
||||||
|
|
||||||
addkeyname k v
|
|
||||||
| validKeyName k v = Just $ k { keyName = v }
|
|
||||||
| otherwise = Nothing
|
|
||||||
|
|
||||||
{- When a key HasExt, the length of the extension is limited in order to
|
|
||||||
- mitigate against SHA1 collision attacks.
|
|
||||||
-
|
-
|
||||||
- In such an attack, the extension of the key could be made to contain
|
- In such an attack, the extension of the key could be made to contain
|
||||||
- the collision generation data, with the result that a signed git commit
|
- the collision generation data, with the result that a signed git commit
|
||||||
|
@ -137,23 +161,25 @@ file2key s
|
||||||
- characters; 20 is used here to give a little future wiggle-room.
|
- characters; 20 is used here to give a little future wiggle-room.
|
||||||
- The SHA1 common-prefix attack needs 128 bytes of data.
|
- The SHA1 common-prefix attack needs 128 bytes of data.
|
||||||
-}
|
-}
|
||||||
validKeyName :: Key -> String -> Bool
|
validKeyName :: KeyVariety -> S.ByteString -> Bool
|
||||||
validKeyName k name
|
validKeyName kv name
|
||||||
| hasExt (keyVariety k) = length (takeExtensions name) <= 20
|
| hasExt kv =
|
||||||
|
let ext = snd $ splitKeyNameExtension' name
|
||||||
|
in S.length ext <= 20
|
||||||
| otherwise = True
|
| otherwise = True
|
||||||
|
|
||||||
instance Arbitrary Key where
|
instance Arbitrary Key where
|
||||||
arbitrary = Key
|
arbitrary = Key
|
||||||
<$> (listOf1 $ elements $ ['A'..'Z'] ++ ['a'..'z'] ++ ['0'..'9'] ++ "-_\r\n \t")
|
<$> (encodeBS <$> (listOf1 $ elements $ ['A'..'Z'] ++ ['a'..'z'] ++ ['0'..'9'] ++ "-_\r\n \t"))
|
||||||
<*> (parseKeyVariety <$> (listOf1 $ elements ['A'..'Z'])) -- BACKEND
|
<*> (parseKeyVariety . encodeBS <$> (listOf1 $ elements ['A'..'Z'])) -- BACKEND
|
||||||
<*> ((abs <$>) <$> arbitrary) -- size cannot be negative
|
<*> ((abs <$>) <$> arbitrary) -- size cannot be negative
|
||||||
<*> ((abs . fromInteger <$>) <$> arbitrary) -- mtime cannot be negative
|
<*> ((abs . fromInteger <$>) <$> arbitrary) -- mtime cannot be negative
|
||||||
<*> ((abs <$>) <$> arbitrary) -- chunksize cannot be negative
|
<*> ((abs <$>) <$> arbitrary) -- chunksize cannot be negative
|
||||||
<*> ((succ . abs <$>) <$> arbitrary) -- chunknum cannot be 0 or negative
|
<*> ((succ . abs <$>) <$> arbitrary) -- chunknum cannot be 0 or negative
|
||||||
|
|
||||||
instance Hashable Key where
|
instance Hashable Key where
|
||||||
hashIO32 = hashIO32 . key2file
|
hashIO32 = hashIO32 . key2file'
|
||||||
hashIO64 = hashIO64 . key2file
|
hashIO64 = hashIO64 . key2file'
|
||||||
|
|
||||||
instance ToJSON' Key where
|
instance ToJSON' Key where
|
||||||
toJSON' = toJSON' . key2file
|
toJSON' = toJSON' . key2file
|
||||||
|
|
|
@ -12,8 +12,6 @@ module Types.Key where
|
||||||
|
|
||||||
import qualified Data.ByteString as S
|
import qualified Data.ByteString as S
|
||||||
import qualified Data.ByteString.Char8 as S8
|
import qualified Data.ByteString.Char8 as S8
|
||||||
import qualified Data.ByteString.Lazy as L
|
|
||||||
import Data.ByteString.Builder
|
|
||||||
import System.Posix.Types
|
import System.Posix.Types
|
||||||
|
|
||||||
{- A Key has a unique name, which is derived from a particular backend,
|
{- A Key has a unique name, which is derived from a particular backend,
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue