Added support for SHA3 hashed keys (in 8 varieties), when git-annex is built using the cryptonite library.

While cryptohash has SHA3 support, it has not been updated for the final
version of the spec. Note that cryptonite has not been ported to all arches
that cryptohash builds on yet.
This commit is contained in:
Joey Hess 2015-08-06 15:02:25 -04:00
parent 6ded6fd19a
commit 0ec9bc2200
8 changed files with 133 additions and 57 deletions

View file

@ -1,10 +1,12 @@
{- git-tnnex hashing backends
{- git-annex hashing backends
-
- Copyright 2011-2013 Joey Hess <id@joeyh.name>
- Copyright 2011-2015 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
{-# LANGUAGE CPP #-}
module Backend.Hash (
backends,
testKeyBackend,
@ -22,21 +24,30 @@ import qualified Build.SysConfig as SysConfig
import qualified Data.ByteString.Lazy as L
import Data.Char
data Hash = SHAHash HashSize | SkeinHash HashSize | MD5Hash
data Hash
= MD5Hash
| SHA1Hash
| SHA2Hash HashSize
| SHA3Hash HashSize
| SkeinHash HashSize
type HashSize = Int
{- Order is slightly significant; want SHA256 first, and more general
- sizes earlier. -}
hashes :: [Hash]
hashes = concat
[ map SHAHash [256, 1, 512, 224, 384]
[ map SHA2Hash [256, 512, 224, 384]
#ifdef WITH_CRYPTONITE
, map SHA3Hash [256, 512, 224, 384]
#endif
, map SkeinHash [256, 512]
, [SHA1Hash]
, [MD5Hash]
]
{- The SHA256E backend is the default, so genBackendE comes first. -}
backends :: [Backend]
backends = map genBackendE hashes ++ map genBackend hashes
backends = concatMap (\h -> [genBackendE h, genBackend h]) hashes
genBackend :: Hash -> Backend
genBackend hash = Backend
@ -55,9 +66,11 @@ genBackendE hash = (genBackend hash)
}
hashName :: Hash -> String
hashName (SHAHash size) = "SHA" ++ show size
hashName (SkeinHash size) = "SKEIN" ++ show size
hashName MD5Hash = "MD5"
hashName SHA1Hash = "SHA1"
hashName (SHA2Hash size) = "SHA" ++ show size
hashName (SHA3Hash size) = "SHA3_" ++ show size
hashName (SkeinHash size) = "SKEIN" ++ show size
hashNameE :: Hash -> String
hashNameE hash = hashName hash ++ "E"
@ -153,7 +166,15 @@ trivialMigrate oldkey newbackend afile
hashFile :: Hash -> FilePath -> Integer -> Annex String
hashFile hash file filesize = go hash
where
go (SHAHash hashsize) = case shaHasher hashsize filesize of
go MD5Hash = use md5Hasher
go SHA1Hash = usehasher 1
go (SHA2Hash hashsize) = usehasher hashsize
go (SHA3Hash hashsize) = use (sha3Hasher hashsize)
go (SkeinHash hashsize) = use (skeinHasher hashsize)
use hasher = liftIO $ hasher <$> L.readFile file
usehasher hashsize = case shaHasher hashsize filesize of
Left sha -> use sha
Right (external, internal) -> do
v <- liftIO $ externalSHA external hashsize file
@ -164,19 +185,15 @@ hashFile hash file filesize = go hash
-- fall back to internal since
-- external command failed
use internal
go (SkeinHash hashsize) = use (skeinHasher hashsize)
go MD5Hash = use md5Hasher
use hasher = liftIO $ hasher <$> L.readFile file
shaHasher :: HashSize -> Integer -> Either (L.ByteString -> String) (String, L.ByteString -> String)
shaHasher hashsize filesize
| hashsize == 1 = use SysConfig.sha1 sha1
| hashsize == 256 = use SysConfig.sha256 sha256
| hashsize == 224 = use SysConfig.sha224 sha224
| hashsize == 384 = use SysConfig.sha384 sha384
| hashsize == 512 = use SysConfig.sha512 sha512
| otherwise = error $ "unsupported sha size " ++ show hashsize
| hashsize == 256 = use SysConfig.sha256 sha2_256
| hashsize == 224 = use SysConfig.sha224 sha2_224
| hashsize == 384 = use SysConfig.sha384 sha2_384
| hashsize == 512 = use SysConfig.sha512 sha2_512
| otherwise = error $ "unsupported SHA size " ++ show hashsize
where
use Nothing hasher = Left $ usehasher hasher
use (Just c) hasher
@ -189,11 +206,21 @@ shaHasher hashsize filesize
| otherwise = Right (c, usehasher hasher)
usehasher hasher = show . hasher
sha3Hasher :: HashSize -> (L.ByteString -> String)
sha3Hasher hashsize
#ifdef WITH_CRYPTONITE
| hashsize == 256 = show . sha3_256
| hashsize == 224 = show . sha3_224
| hashsize == 384 = show . sha3_384
| hashsize == 512 = show . sha3_512
#endif
| otherwise = error $ "unsupported SHA3 size " ++ show hashsize
skeinHasher :: HashSize -> (L.ByteString -> String)
skeinHasher hashsize
| hashsize == 256 = show . skein256
| hashsize == 512 = show . skein512
| otherwise = error $ "unsupported skein size " ++ show hashsize
| otherwise = error $ "unsupported SKEIN size " ++ show hashsize
md5Hasher :: L.ByteString -> String
md5Hasher = show . md5
@ -207,7 +234,7 @@ md5Hasher = show . md5
-}
testKeyBackend :: Backend
testKeyBackend =
let b = genBackendE (SHAHash 256)
let b = genBackendE (SHA2Hash 256)
in b { getKey = (fmap addE) <$$> getKey b }
where
addE k = k { keyName = keyName k ++ longext }

View file

@ -270,7 +270,7 @@ bup2GitRemote r
bupRef :: Key -> String
bupRef k
| Git.Ref.legal True shown = shown
| otherwise = "git-annex-" ++ show (sha256 (fromString shown))
| otherwise = "git-annex-" ++ show (sha2_256 (fromString shown))
where
shown = key2file k

View file

@ -1,11 +1,23 @@
{- Convenience wrapper around cryptohash. -}
{- Convenience wrapper around cryptohash/cryptonite.
-
- SHA3 hashes are currently only enabled when using cryptonite,
- because of https://github.com/vincenthz/hs-cryptohash/issues/36
-}
{-# LANGUAGE CPP #-}
module Utility.Hash (
sha1,
sha224,
sha256,
sha384,
sha512,
sha2_224,
sha2_256,
sha2_384,
sha2_512,
#ifdef WITH_CRYPTONITE
sha3_224,
sha3_256,
sha3_384,
sha3_512,
#endif
skein256,
skein512,
md5,
@ -20,25 +32,38 @@ import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import qualified Data.ByteString as S
import Crypto.Hash
#ifdef WITH_CRYPTONITE
import Crypto.MAC.HMAC
#endif
sha1 :: L.ByteString -> Digest SHA1
sha1 = hashlazy
sha224 :: L.ByteString -> Digest SHA224
sha224 = hashlazy
sha2_224 :: L.ByteString -> Digest SHA224
sha2_224 = hashlazy
sha256 :: L.ByteString -> Digest SHA256
sha256 = hashlazy
sha2_256 :: L.ByteString -> Digest SHA256
sha2_256 = hashlazy
sha384 :: L.ByteString -> Digest SHA384
sha384 = hashlazy
sha2_384 :: L.ByteString -> Digest SHA384
sha2_384 = hashlazy
sha512 :: L.ByteString -> Digest SHA512
sha512 = hashlazy
sha2_512 :: L.ByteString -> Digest SHA512
sha2_512 = hashlazy
-- sha3 is not yet fully standardized
--sha3 :: L.ByteString -> Digest SHA3
--sha3 = hashlazy
#ifdef WITH_CRYPTONITE
sha3_224 :: L.ByteString -> Digest SHA3_224
sha3_224 = hashlazy
sha3_256 :: L.ByteString -> Digest SHA3_256
sha3_256 = hashlazy
sha3_384 :: L.ByteString -> Digest SHA3_384
sha3_384 = hashlazy
sha3_512 :: L.ByteString -> Digest SHA3_512
sha3_512 = hashlazy
#endif
skein256 :: L.ByteString -> Digest Skein256_256
skein256 = hashlazy
@ -53,12 +78,18 @@ md5 = hashlazy
prop_hashes_stable :: Bool
prop_hashes_stable = all (\(hasher, result) -> hasher foo == result)
[ (show . sha1, "0beec7b5ea3f0fdbc95d0dd47f3c5bc275da8a33")
, (show . sha224, "0808f64e60d58979fcb676c96ec938270dea42445aeefcd3a4e6f8db")
, (show . sha256, "2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae")
, (show . sha384, "98c11ffdfdd540676b1a137cb1a22b2a70350c9a44171d6b1180c6be5cbb2ee3f79d532c8a1dd9ef2e8e08e752a3babb")
, (show . sha512, "f7fbba6e0636f890e56fbbf3283e524c6fa3204ae298382d624741d0dc6638326e282c41be5e4254d8820772c5518a2c5a8c0c7f7eda19594a7eb539453e1ed7")
, (show . sha2_224, "0808f64e60d58979fcb676c96ec938270dea42445aeefcd3a4e6f8db")
, (show . sha2_256, "2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae")
, (show . sha2_384, "98c11ffdfdd540676b1a137cb1a22b2a70350c9a44171d6b1180c6be5cbb2ee3f79d532c8a1dd9ef2e8e08e752a3babb")
, (show . sha2_512, "f7fbba6e0636f890e56fbbf3283e524c6fa3204ae298382d624741d0dc6638326e282c41be5e4254d8820772c5518a2c5a8c0c7f7eda19594a7eb539453e1ed7")
, (show . skein256, "a04efd9a0aeed6ede40fe5ce0d9361ae7b7d88b524aa19917b9315f1ecf00d33")
, (show . skein512, "fd8956898113510180aa4658e6c0ac85bd74fb47f4a4ba264a6b705d7a8e8526756e75aecda12cff4f1aca1a4c2830fbf57f458012a66b2b15a3dd7d251690a7")
#ifdef WITH_CRYPTONITE
, (show . sha3_224, "f4f6779e153c391bbd29c95e72b0708e39d9166c7cea51d1f10ef58a")
, (show . sha3_256, "76d3bc41c9f588f7fcd0d5bf4718f8f84b1c41b20882703100b9eb9413807c01")
, (show . sha3_384, "665551928d13b7d84ee02734502b018d896a0fb87eed5adb4c87ba91bbd6489410e11b0fbcc06ed7d0ebad559e5d3bb5")
, (show . sha3_512, "4bca2b137edc580fe50a88983ef860ebaca36c857b1f492839d6d7392452a63c82cbebc68e3b70a2a1480b4bb5d437a7cba6ecf9d89f9ff3ccd14cd6146ea7e7")
#endif
, (show . md5, "acbd18db4cc2f85cedef654fccc4a4d8")
]
where
@ -83,7 +114,10 @@ calcMac mac = case mac of
HmacSha384 -> use SHA384
HmacSha512 -> use SHA512
where
use alg k m = show (hmacGetDigest (hmacAlg alg k m))
use alg k m = show (hmacGetDigest (hmacWitnessAlg alg k m))
hmacWitnessAlg :: HashAlgorithm a => a -> S.ByteString -> S.ByteString -> HMAC a
hmacWitnessAlg _ = hmac
-- Check that all the MACs continue to produce the same.
prop_mac_stable :: Bool

View file

@ -185,7 +185,7 @@ fromAuthToken = TE.decodeLatin1 . toBytes
fromAuthToken = id
#endif
{- Generates a random sha512 string, encapsulated in a SecureMem,
{- Generates a random sha2_512 string, encapsulated in a SecureMem,
- suitable to be used for an authentication secret. -}
genAuthToken :: IO AuthToken
genAuthToken = do
@ -193,7 +193,7 @@ genAuthToken = do
return $
case genBytes 512 g of
Left e -> error $ "failed to generate auth token: " ++ show e
Right (s, _) -> toAuthToken $ T.pack $ show $ sha512 $ L.fromChunks [s]
Right (s, _) -> toAuthToken $ T.pack $ show $ sha2_512 $ L.fromChunks [s]
{- A Yesod isAuthorized method, which checks the auth cgi parameter
- against a token extracted from the Yesod application.

2
debian/changelog vendored
View file

@ -30,6 +30,8 @@ git-annex (5.20150732) UNRELEASED; urgency=medium
version, the manual push is still needed.
* git-annex-shell: Don't let configlist auto-init repository when in
readonly mode.
* Added support for SHA3 hashed keys (in 8 varieties), when git-annex is
built using the cryptonite library.
-- Joey Hess <id@joeyh.name> Fri, 31 Jul 2015 12:31:39 -0400

2
debian/control vendored
View file

@ -10,7 +10,7 @@ Build-Depends:
libghc-data-default-dev,
libghc-hslogger-dev,
libghc-pcre-light-dev,
libghc-cryptohash-dev (>= 0.11.0),
libghc-cryptonite-dev | libghc-cryptohash-dev (>= 0.11.0),
libghc-sandi-dev,
libghc-utf8-string-dev,
libghc-aws-dev (>= 0.9.2-2~),

View file

@ -10,24 +10,29 @@ can use different ones for different files.
verifying that the file content is right, and can avoid duplicates of
files with the same content. Its need to generate checksums
can make it slower for large files.
* `SHA256` -- Does not include the file extension in the key, which can
lead to better deduplication but can confuse some programs.
* `WORM` ("Write Once, Read Many") This assumes that any file with
the same filename, size, and modification time has the same content.
This is the least expensive backend, recommended for really large
files or slow systems.
* `SHA256` -- SHA-2 hash that does not include the file extension in the
key, which can lead to better deduplication but can confuse some programs.
* `SHA512`, `SHA512E` -- Best SHA-2 hash, for the very paranoid.
* `SHA1`, `SHA1E`, `MD5`, `MD5E` -- Smaller hashes than `SHA256`
for those who want a checksum but are not concerned about security.
* `SHA384`, `SHA384E`, `SHA224`, `SHA224E` -- Hashes for people who like
unusual sizes.
* `SHA384`, `SHA384E`, `SHA224`, `SHA224E` -- SHA-2 hashes for
people who like unusual sizes.
* `SHA3_512`, `SHA_512E`, `SHA3_384`, `SHA3_384E`, `SHA3_256`, `SHA3_256E`, `SHA3_224`, `SHA3_224E`
-- SHA-3 hashes, for bleeding edge fun.
* `SKEIN512`, `SKEIN512E`, `SKEIN256`, `SKEIN256E`
-- [Skein hash](http://en.wikipedia.org/wiki/Skein_hash),
a well-regarded SHA3 hash competition finalist.
* `SHA1`, `SHA1E`, `MD5`, `MD5E` -- Smaller hashes than `SHA256`
for those who want a checksum but are not concerned about security.
* `WORM` ("Write Once, Read Many") -- This assumes that any file with
the same filename, size, and modification time has the same content.
This is the least expensive backend, recommended for really large
files or slow systems.
* `URL` -- This is a key that is generated from the url to a file.
It's generated when using eg, `git annex addurl --fast`, when the file
content is not available for hashing.
Note that the SHA512, SKEIN512 and SHA384 generate long paths,
Note that the various 512 and 384 length hashes result in long paths,
which are known to not work on Windows. If interoperability on Windows is a
concern, avoid those backends.
concern, avoid those.
The `annex.backends` git-config setting can be used to list the backends
git-annex should use. The first one listed will be used by default when

View file

@ -109,12 +109,14 @@ Flag Database
Description: Enable building with persistent for database use (disable to build on platforms not supporting TH)
Default: True
Flag Cryptonite
Description: Use the cryptonite library, instead of the older cryptohash
Executable git-annex
Main-Is: git-annex.hs
Build-Depends:
base (>= 4.5 && < 4.9),
optparse-applicative (>= 0.11.0),
cryptohash (>= 0.11.0),
containers (>= 0.5.0.0),
exceptions (>= 0.6),
QuickCheck (>= 2.1),
@ -144,6 +146,12 @@ Executable git-annex
else
Build-Depends: network (< 2.6), network (>= 2.0)
if flag(Cryptonite)
Build-Depends: cryptonite
CPP-Options: -DWITH_CRYPTONITE
else
Build-Depends: cryptohash (>= 0.11.0)
if flag(Production)
GHC-Options: -O2