Added SKEIN256 and SKEIN512 backends

SHA3 is still waiting for final standardization.
Although this is looking less likely given
https://www.cdt.org/blogs/joseph-lorenzo-hall/2409-nist-sha-3

In the meantime, cryptohash implements skein, and it's used by some of the
haskell ecosystem (for yesod sessions, IIRC), so this implementation is
likely to continue working. Also, I've talked with the cryprohash author
and he's a reasonable guy.

It makes sense to have an alternate high security hash, in case some
horrible attack is found against SHA2 tomorrow, or in case SHA3 comes out
and worst fears are realized.

I'd also like to support using skein for HMAC. But no hurry there and
a new version of cryptohash has much nicer HMAC code, so I will probably
wait until I can use that version.
This commit is contained in:
Joey Hess 2013-10-01 20:34:06 -04:00
parent d9355d8064
commit a05b763b01
5 changed files with 89 additions and 68 deletions

View file

@ -27,12 +27,12 @@ import qualified Types.Backend as B
import Config
-- When adding a new backend, import it here and add it to the list.
import qualified Backend.SHA
import qualified Backend.Hash
import qualified Backend.WORM
import qualified Backend.URL
list :: [Backend]
list = Backend.SHA.backends ++ Backend.WORM.backends ++ Backend.URL.backends
list = Backend.Hash.backends ++ Backend.WORM.backends ++ Backend.URL.backends
{- List of backends in the order to try them when storing a new key. -}
orderedList :: Annex [Backend]

View file

@ -1,11 +1,11 @@
{- git-annex SHA backends
{- git-annex hashing backends
-
- Copyright 2011-2013 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Backend.SHA (backends) where
module Backend.Hash (backends) where
import Common.Annex
import qualified Annex
@ -19,87 +19,64 @@ import qualified Build.SysConfig as SysConfig
import qualified Data.ByteString.Lazy as L
import Data.Char
type SHASize = Int
data Hash = SHAHash HashSize | SkeinHash HashSize
type HashSize = Int
{- Order is slightly significant; want SHA256 first, and more general
- sizes earlier. -}
sizes :: [Int]
sizes = [256, 1, 512, 224, 384]
hashes :: [Hash]
hashes = concat
[ map SHAHash [256, 1, 512, 224, 384]
, map SkeinHash [256, 512]
]
{- The SHA256E backend is the default. -}
{- The SHA256E backend is the default, so genBackendE comes first. -}
backends :: [Backend]
backends = catMaybes $ map genBackendE sizes ++ map genBackend sizes
backends = catMaybes $ map genBackendE hashes ++ map genBackend hashes
genBackend :: SHASize -> Maybe Backend
genBackend size = Just Backend
{ name = shaName size
, getKey = keyValue size
, fsckKey = Just $ checkKeyChecksum size
genBackend :: Hash -> Maybe Backend
genBackend hash = Just Backend
{ name = hashName hash
, getKey = keyValue hash
, fsckKey = Just $ checkKeyChecksum hash
, canUpgradeKey = Just needsUpgrade
}
genBackendE :: SHASize -> Maybe Backend
genBackendE size = do
b <- genBackend size
genBackendE :: Hash -> Maybe Backend
genBackendE hash = do
b <- genBackend hash
return $ b
{ name = shaNameE size
, getKey = keyValueE size
{ name = hashNameE hash
, getKey = keyValueE hash
}
shaName :: SHASize -> String
shaName size = "SHA" ++ show size
hashName :: Hash -> String
hashName (SHAHash size) = "SHA" ++ show size
hashName (SkeinHash size) = "SKEIN" ++ show size
shaNameE :: SHASize -> String
shaNameE size = shaName size ++ "E"
hashNameE :: Hash -> String
hashNameE hash = hashName hash ++ "E"
shaN :: SHASize -> FilePath -> Integer -> Annex String
shaN shasize file filesize = do
showAction "checksum"
liftIO $ case shaCommand shasize filesize of
Left sha -> sha <$> L.readFile file
Right command ->
either error return
=<< externalSHA command shasize file
shaCommand :: SHASize -> Integer -> Either (L.ByteString -> String) String
shaCommand shasize filesize
| shasize == 1 = use SysConfig.sha1 sha1
| shasize == 256 = use SysConfig.sha256 sha256
| shasize == 224 = use SysConfig.sha224 sha224
| shasize == 384 = use SysConfig.sha384 sha384
| shasize == 512 = use SysConfig.sha512 sha512
| otherwise = error $ "bad sha size " ++ show shasize
where
use Nothing hasher = Left $ show . hasher
use (Just c) hasher
{- Use builtin, but slightly slower hashing for
- smallish files. Cryptohash benchmarks 90 to 101%
- faster than external hashers, depending on the hash
- and system. So there is no point forking an external
- process unless the file is large. -}
| filesize < 1048576 = use Nothing hasher
| otherwise = Right c
{- A key is a checksum of its contents. -}
keyValue :: SHASize -> KeySource -> Annex (Maybe Key)
keyValue shasize source = do
{- A key is a hash of its contents. -}
keyValue :: Hash -> KeySource -> Annex (Maybe Key)
keyValue hash source = do
let file = contentLocation source
stat <- liftIO $ getFileStatus file
let filesize = fromIntegral $ fileSize stat
s <- shaN shasize file filesize
s <- hashFile hash file filesize
return $ Just $ stubKey
{ keyName = s
, keyBackendName = shaName shasize
, keyBackendName = hashName hash
, keySize = Just filesize
}
{- Extension preserving keys. -}
keyValueE :: SHASize -> KeySource -> Annex (Maybe Key)
keyValueE size source = keyValue size source >>= maybe (return Nothing) addE
keyValueE :: Hash -> KeySource -> Annex (Maybe Key)
keyValueE hash source = keyValue hash source >>= maybe (return Nothing) addE
where
addE k = return $ Just $ k
{ keyName = keyName k ++ selectExtension (keyFilename source)
, keyBackendName = shaNameE size
, keyBackendName = hashNameE hash
}
selectExtension :: FilePath -> String
@ -113,27 +90,27 @@ selectExtension f
shortenough e = length e <= 4 -- long enough for "jpeg"
{- A key's checksum is checked during fsck. -}
checkKeyChecksum :: SHASize -> Key -> FilePath -> Annex Bool
checkKeyChecksum size key file = do
checkKeyChecksum :: Hash -> Key -> FilePath -> Annex Bool
checkKeyChecksum hash key file = do
fast <- Annex.getState Annex.fast
mstat <- liftIO $ catchMaybeIO $ getFileStatus file
case (mstat, fast) of
(Just stat, False) -> do
let filesize = fromIntegral $ fileSize stat
check <$> shaN size file filesize
check <$> hashFile hash file filesize
_ -> return True
where
sha = keySha key
expected = keyHash key
check s
| s == sha = True
| s == expected = True
{- A bug caused checksums to be prefixed with \ in some
- cases; still accept these as legal now that the bug has been
- fixed. -}
| '\\' : s == sha = True
| '\\' : s == expected = True
| otherwise = False
keySha :: Key -> String
keySha key = dropExtensions (keyName key)
keyHash :: Key -> String
keyHash key = dropExtensions (keyName key)
validExtension :: Char -> Bool
validExtension c
@ -144,5 +121,42 @@ validExtension c
{- Upgrade keys that have the \ prefix on their sha due to a bug, or
- that contain non-alphanumeric characters in their extension. -}
needsUpgrade :: Key -> Bool
needsUpgrade key = "\\" `isPrefixOf` keySha key ||
needsUpgrade key = "\\" `isPrefixOf` keyHash key ||
any (not . validExtension) (takeExtensions $ keyName key)
hashFile :: Hash -> FilePath -> Integer -> Annex String
hashFile hash file filesize = do
showAction "checksum"
liftIO $ go hash
where
go (SHAHash hashsize) = case shaCommand hashsize filesize of
Left sha -> sha <$> L.readFile file
Right command ->
either error return
=<< externalSHA command hashsize file
go (SkeinHash hashsize) = skeinHasher hashsize <$> L.readFile file
skeinHasher :: HashSize -> (L.ByteString -> String)
skeinHasher hashsize
| hashsize == 256 = show . skein256
| hashsize == 512 = show . skein512
| otherwise = error $ "bad skein size " ++ show hashsize
shaCommand :: HashSize -> Integer -> Either (L.ByteString -> String) String
shaCommand hashsize filesize
| hashsize == 1 = use SysConfig.sha1 sha1
| hashsize == 256 = use SysConfig.sha256 sha256
| hashsize == 224 = use SysConfig.sha224 sha224
| hashsize == 384 = use SysConfig.sha384 sha384
| hashsize == 512 = use SysConfig.sha512 sha512
| otherwise = error $ "bad sha size " ++ show hashsize
where
use Nothing hasher = Left $ show . hasher
use (Just c) hasher
{- Use builtin, but slightly slower hashing for
- smallish files. Cryptohash benchmarks 90 to 101%
- faster than external hashers, depending on the hash
- and system. So there is no point forking an external
- process unless the file is large. -}
| filesize < 1048576 = use Nothing hasher
| otherwise = Right c

View file

@ -26,4 +26,8 @@ sha512 = hashlazy
--sha3 :: L.ByteString -> Digest SHA3
--sha3 = hashlazy
skein256 :: L.ByteString -> Digest Skein256_256
skein256 = hashlazy
skein512 :: L.ByteString -> Digest Skein512_512
skein512 = hashlazy

1
debian/changelog vendored
View file

@ -12,6 +12,7 @@ git-annex (4.20130921) UNRELEASED; urgency=low
* Use cryptohash rather than SHA for hashing when no external hash program
is available. This is a significant speedup for SHA256 on OSX, for
example.
* Added SKEIN256 and SKEIN512 backends.
* Android build redone from scratch, many dependencies updated,
and entire build can now be done using provided scripts.
* assistant: Clear the list of failed transfers when doing a full transfer

View file

@ -21,6 +21,8 @@ can use different ones for different files.
but are not concerned about security.
* `SHA384`, `SHA384E`, `SHA224`, `SHA224E` -- Hashes for people who like
unusual sizes.
* `SKEIN512`, `SKEIN256` -- [Skein hash](http://en.wikipedia.org/wiki/Skein_hash),
a well-regarded SHA3 hash competition finalist.
The `annex.backends` git-config setting can be used to list the backends
git-annex should use. The first one listed will be used by default when