2015-08-06 19:02:25 +00:00
|
|
|
{- git-annex hashing backends
|
2011-03-01 20:50:53 +00:00
|
|
|
-
|
2018-03-13 20:15:35 +00:00
|
|
|
- Copyright 2011-2018 Joey Hess <id@joeyh.name>
|
2011-03-01 20:50:53 +00:00
|
|
|
-
|
|
|
|
- Licensed under the GNU GPL version 3 or higher.
|
|
|
|
-}
|
|
|
|
|
2018-03-15 15:16:00 +00:00
|
|
|
{-# LANGUAGE CPP #-}
|
|
|
|
|
2014-08-01 19:09:49 +00:00
|
|
|
module Backend.Hash (
|
|
|
|
backends,
|
|
|
|
testKeyBackend,
|
|
|
|
) where
|
2011-03-01 20:50:53 +00:00
|
|
|
|
2016-01-20 20:36:33 +00:00
|
|
|
import Annex.Common
|
2011-03-01 20:50:53 +00:00
|
|
|
import qualified Annex
|
2017-02-24 19:16:56 +00:00
|
|
|
import Types.Key
|
2011-06-02 01:56:04 +00:00
|
|
|
import Types.Backend
|
2012-06-20 20:07:14 +00:00
|
|
|
import Types.KeySource
|
Use cryptohash rather than SHA for hashing.
This is a massive win on OSX, which doesn't have a sha256sum normally.
Only use external hash commands when the file is > 1 mb,
since cryptohash is quite close to them in speed.
SHA is still used to calculate HMACs. I don't quite understand
cryptohash's API for those.
Used the following benchmark to arrive at the 1 mb number.
1 mb file:
benchmarking sha256/internal
mean: 13.86696 ms, lb 13.83010 ms, ub 13.93453 ms, ci 0.950
std dev: 249.3235 us, lb 162.0448 us, ub 458.1744 us, ci 0.950
found 5 outliers among 100 samples (5.0%)
4 (4.0%) high mild
1 (1.0%) high severe
variance introduced by outliers: 10.415%
variance is moderately inflated by outliers
benchmarking sha256/external
mean: 14.20670 ms, lb 14.17237 ms, ub 14.27004 ms, ci 0.950
std dev: 230.5448 us, lb 150.7310 us, ub 427.6068 us, ci 0.950
found 3 outliers among 100 samples (3.0%)
2 (2.0%) high mild
1 (1.0%) high severe
2 mb file:
benchmarking sha256/internal
mean: 26.44270 ms, lb 26.23701 ms, ub 26.63414 ms, ci 0.950
std dev: 1.012303 ms, lb 925.8921 us, ub 1.122267 ms, ci 0.950
variance introduced by outliers: 35.540%
variance is moderately inflated by outliers
benchmarking sha256/external
mean: 26.84521 ms, lb 26.77644 ms, ub 26.91433 ms, ci 0.950
std dev: 347.7867 us, lb 210.6283 us, ub 571.3351 us, ci 0.950
found 6 outliers among 100 samples (6.0%)
import Crypto.Hash
import Data.ByteString.Lazy as L
import Criterion.Main
import Common
testfile :: FilePath
testfile = "/run/shm/data" -- on ram disk
main = defaultMain
[ bgroup "sha256"
[ bench "internal" $ whnfIO internal
, bench "external" $ whnfIO external
]
]
sha256 :: L.ByteString -> Digest SHA256
sha256 = hashlazy
internal :: IO String
internal = show . sha256 <$> L.readFile testfile
external :: IO String
external = do
s <- readProcess "sha256sum" [testfile]
return $ fst $ separate (== ' ') s
2013-09-22 23:45:08 +00:00
|
|
|
import Utility.Hash
|
2012-07-04 13:08:20 +00:00
|
|
|
|
|
|
|
import qualified Data.ByteString.Lazy as L
|
2012-12-20 21:16:55 +00:00
|
|
|
import Data.Char
|
2011-03-01 20:50:53 +00:00
|
|
|
|
2015-08-06 19:02:25 +00:00
|
|
|
data Hash
|
|
|
|
= MD5Hash
|
|
|
|
| SHA1Hash
|
|
|
|
| SHA2Hash HashSize
|
|
|
|
| SHA3Hash HashSize
|
|
|
|
| SkeinHash HashSize
|
2018-03-15 15:16:00 +00:00
|
|
|
#if MIN_VERSION_cryptonite(0,23,0)
|
2018-03-13 20:15:35 +00:00
|
|
|
| Blake2bHash HashSize
|
|
|
|
| Blake2sHash HashSize
|
|
|
|
| Blake2spHash HashSize
|
2018-03-15 15:16:00 +00:00
|
|
|
#endif
|
2011-05-16 15:46:34 +00:00
|
|
|
|
2012-09-12 17:22:16 +00:00
|
|
|
{- Order is slightly significant; want SHA256 first, and more general
|
|
|
|
- sizes earlier. -}
|
2013-10-02 00:34:06 +00:00
|
|
|
hashes :: [Hash]
|
|
|
|
hashes = concat
|
2017-02-24 19:16:56 +00:00
|
|
|
[ map (SHA2Hash . HashSize) [256, 512, 224, 384]
|
|
|
|
, map (SHA3Hash . HashSize) [256, 512, 224, 384]
|
|
|
|
, map (SkeinHash . HashSize) [256, 512]
|
2018-03-15 15:16:00 +00:00
|
|
|
#if MIN_VERSION_cryptonite(0,23,0)
|
2018-03-13 20:15:35 +00:00
|
|
|
, map (Blake2bHash . HashSize) [256, 512, 160, 224, 384]
|
|
|
|
, map (Blake2sHash . HashSize) [256, 160, 224]
|
|
|
|
, map (Blake2spHash . HashSize) [256, 224]
|
2018-03-15 15:16:00 +00:00
|
|
|
#endif
|
2015-08-06 19:02:25 +00:00
|
|
|
, [SHA1Hash]
|
2015-02-04 17:47:54 +00:00
|
|
|
, [MD5Hash]
|
2013-10-02 00:34:06 +00:00
|
|
|
]
|
2011-05-16 15:46:34 +00:00
|
|
|
|
2013-10-02 00:34:06 +00:00
|
|
|
{- The SHA256E backend is the default, so genBackendE comes first. -}
|
2011-12-31 08:11:39 +00:00
|
|
|
backends :: [Backend]
|
2015-08-06 19:02:25 +00:00
|
|
|
backends = concatMap (\h -> [genBackendE h, genBackend h]) hashes
|
2011-03-02 17:47:45 +00:00
|
|
|
|
2014-08-01 19:09:49 +00:00
|
|
|
genBackend :: Hash -> Backend
|
|
|
|
genBackend hash = Backend
|
2017-02-24 19:16:56 +00:00
|
|
|
{ backendVariety = hashKeyVariety hash (HasExt False)
|
2013-10-02 00:34:06 +00:00
|
|
|
, getKey = keyValue hash
|
2015-10-01 17:28:49 +00:00
|
|
|
, verifyKeyContent = Just $ checkKeyChecksum hash
|
2013-09-25 07:09:06 +00:00
|
|
|
, canUpgradeKey = Just needsUpgrade
|
2014-07-10 21:06:04 +00:00
|
|
|
, fastMigrate = Just trivialMigrate
|
2014-07-27 16:33:46 +00:00
|
|
|
, isStableKey = const True
|
2012-07-04 13:08:20 +00:00
|
|
|
}
|
2011-04-08 00:08:11 +00:00
|
|
|
|
2014-08-01 19:09:49 +00:00
|
|
|
genBackendE :: Hash -> Backend
|
|
|
|
genBackendE hash = (genBackend hash)
|
2017-02-24 19:16:56 +00:00
|
|
|
{ backendVariety = hashKeyVariety hash (HasExt True)
|
2014-08-01 19:09:49 +00:00
|
|
|
, getKey = keyValueE hash
|
|
|
|
}
|
2011-05-16 15:46:34 +00:00
|
|
|
|
2017-02-24 19:16:56 +00:00
|
|
|
hashKeyVariety :: Hash -> HasExt -> KeyVariety
|
|
|
|
hashKeyVariety MD5Hash = MD5Key
|
|
|
|
hashKeyVariety SHA1Hash = SHA1Key
|
|
|
|
hashKeyVariety (SHA2Hash size) = SHA2Key size
|
|
|
|
hashKeyVariety (SHA3Hash size) = SHA3Key size
|
|
|
|
hashKeyVariety (SkeinHash size) = SKEINKey size
|
2018-03-15 15:16:00 +00:00
|
|
|
#if MIN_VERSION_cryptonite(0,23,0)
|
2018-03-13 20:15:35 +00:00
|
|
|
hashKeyVariety (Blake2bHash size) = Blake2bKey size
|
|
|
|
hashKeyVariety (Blake2sHash size) = Blake2sKey size
|
|
|
|
hashKeyVariety (Blake2spHash size) = Blake2spKey size
|
2018-03-15 15:16:00 +00:00
|
|
|
#endif
|
2011-05-16 15:46:34 +00:00
|
|
|
|
2013-10-02 00:34:06 +00:00
|
|
|
{- A key is a hash of its contents. -}
|
|
|
|
keyValue :: Hash -> KeySource -> Annex (Maybe Key)
|
|
|
|
keyValue hash source = do
|
2012-06-05 23:51:03 +00:00
|
|
|
let file = contentLocation source
|
2015-01-20 20:58:48 +00:00
|
|
|
filesize <- liftIO $ getFileSize file
|
2018-08-28 22:01:13 +00:00
|
|
|
s <- hashFile hash file
|
2011-05-16 15:46:34 +00:00
|
|
|
return $ Just $ stubKey
|
|
|
|
{ keyName = s
|
2017-02-24 19:16:56 +00:00
|
|
|
, keyVariety = hashKeyVariety hash (HasExt False)
|
2012-07-04 17:04:01 +00:00
|
|
|
, keySize = Just filesize
|
2011-05-16 15:46:34 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
{- Extension preserving keys. -}
|
2013-10-02 00:34:06 +00:00
|
|
|
keyValueE :: Hash -> KeySource -> Annex (Maybe Key)
|
|
|
|
keyValueE hash source = keyValue hash source >>= maybe (return Nothing) addE
|
2012-11-11 04:51:07 +00:00
|
|
|
where
|
|
|
|
addE k = return $ Just $ k
|
|
|
|
{ keyName = keyName k ++ selectExtension (keyFilename source)
|
2017-02-24 19:16:56 +00:00
|
|
|
, keyVariety = hashKeyVariety hash (HasExt True)
|
2012-11-11 04:51:07 +00:00
|
|
|
}
|
2012-07-05 22:24:02 +00:00
|
|
|
|
|
|
|
selectExtension :: FilePath -> String
|
2012-07-06 23:22:56 +00:00
|
|
|
selectExtension f
|
|
|
|
| null es = ""
|
2013-04-23 00:24:53 +00:00
|
|
|
| otherwise = intercalate "." ("":es)
|
2012-11-11 04:51:07 +00:00
|
|
|
where
|
|
|
|
es = filter (not . null) $ reverse $
|
2018-03-05 15:25:01 +00:00
|
|
|
take 2 $ filter (all validInExtension) $
|
2016-05-27 17:14:51 +00:00
|
|
|
takeWhile shortenough $
|
2017-01-31 22:40:42 +00:00
|
|
|
reverse $ splitc '.' $ takeExtensions f
|
2012-12-20 21:16:55 +00:00
|
|
|
shortenough e = length e <= 4 -- long enough for "jpeg"
|
2011-03-01 20:50:53 +00:00
|
|
|
|
2018-08-28 22:01:13 +00:00
|
|
|
{- A key's checksum is checked during fsck when it's content is present
|
|
|
|
- except for in fast mode. -}
|
2013-10-02 00:34:06 +00:00
|
|
|
checkKeyChecksum :: Hash -> Key -> FilePath -> Annex Bool
|
2015-12-06 20:26:38 +00:00
|
|
|
checkKeyChecksum hash key file = catchIOErrorType HardwareFault hwfault $ do
|
|
|
|
fast <- Annex.getState Annex.fast
|
2018-08-28 22:01:13 +00:00
|
|
|
exists <- liftIO $ doesFileExist file
|
|
|
|
case (exists, fast) of
|
|
|
|
(True, False) -> do
|
2015-12-06 20:26:38 +00:00
|
|
|
showAction "checksum"
|
2018-08-28 22:01:13 +00:00
|
|
|
check <$> hashFile hash file
|
2015-12-06 20:26:38 +00:00
|
|
|
_ -> return True
|
2012-11-11 04:51:07 +00:00
|
|
|
where
|
2013-10-02 00:34:06 +00:00
|
|
|
expected = keyHash key
|
2012-11-11 04:51:07 +00:00
|
|
|
check s
|
2013-10-02 00:34:06 +00:00
|
|
|
| s == expected = True
|
2012-12-20 19:43:14 +00:00
|
|
|
{- A bug caused checksums to be prefixed with \ in some
|
|
|
|
- cases; still accept these as legal now that the bug has been
|
|
|
|
- fixed. -}
|
2013-10-02 00:34:06 +00:00
|
|
|
| '\\' : s == expected = True
|
2012-11-11 04:51:07 +00:00
|
|
|
| otherwise = False
|
2012-12-20 19:43:14 +00:00
|
|
|
|
2015-05-27 20:40:03 +00:00
|
|
|
hwfault e = do
|
|
|
|
warning $ "hardware fault: " ++ show e
|
|
|
|
return False
|
|
|
|
|
2013-10-02 00:34:06 +00:00
|
|
|
keyHash :: Key -> String
|
|
|
|
keyHash key = dropExtensions (keyName key)
|
2012-12-20 19:43:14 +00:00
|
|
|
|
2016-05-27 17:10:23 +00:00
|
|
|
validInExtension :: Char -> Bool
|
|
|
|
validInExtension c
|
2012-12-20 21:16:55 +00:00
|
|
|
| isAlphaNum c = True
|
|
|
|
| c == '.' = True
|
|
|
|
| otherwise = False
|
|
|
|
|
2018-05-23 18:07:51 +00:00
|
|
|
{- Upgrade keys that have the \ prefix on their hash due to a bug, or
|
|
|
|
- that contain non-alphanumeric characters in their extension.
|
|
|
|
-
|
|
|
|
- Also, for a while migrate from eg SHA256E to SHA256 resulted in a SHA256
|
|
|
|
- key that contained an extension inside its keyName. Upgrade those
|
|
|
|
- keys, removing the extension.
|
|
|
|
-}
|
2012-12-20 19:43:14 +00:00
|
|
|
needsUpgrade :: Key -> Bool
|
2018-05-23 18:07:51 +00:00
|
|
|
needsUpgrade key = or
|
|
|
|
[ "\\" `isPrefixOf` keyHash key
|
|
|
|
, any (not . validInExtension) (takeExtensions $ keyName key)
|
|
|
|
, not (hasExt (keyVariety key)) && keyHash key /= keyName key
|
|
|
|
]
|
2013-10-02 00:34:06 +00:00
|
|
|
|
2015-01-04 16:33:10 +00:00
|
|
|
trivialMigrate :: Key -> Backend -> AssociatedFile -> Maybe Key
|
|
|
|
trivialMigrate oldkey newbackend afile
|
|
|
|
{- Fast migration from hashE to hash backend. -}
|
2018-05-23 18:07:51 +00:00
|
|
|
| migratable && hasExt oldvariety = Just $ oldkey
|
2014-07-10 21:06:04 +00:00
|
|
|
{ keyName = keyHash oldkey
|
2017-02-24 19:16:56 +00:00
|
|
|
, keyVariety = newvariety
|
2014-07-10 21:06:04 +00:00
|
|
|
}
|
2015-01-04 16:33:10 +00:00
|
|
|
{- Fast migration from hash to hashE backend. -}
|
2018-05-23 18:07:51 +00:00
|
|
|
| migratable && hasExt newvariety = case afile of
|
2017-03-10 17:12:24 +00:00
|
|
|
AssociatedFile Nothing -> Nothing
|
|
|
|
AssociatedFile (Just file) -> Just $ oldkey
|
2015-01-04 16:33:10 +00:00
|
|
|
{ keyName = keyHash oldkey ++ selectExtension file
|
2017-02-24 19:16:56 +00:00
|
|
|
, keyVariety = newvariety
|
2015-01-04 16:33:10 +00:00
|
|
|
}
|
2018-05-23 18:07:51 +00:00
|
|
|
{- Upgrade to fix bad previous migration that created a
|
|
|
|
- non-extension preserving key, with an extension
|
|
|
|
- in its keyName. -}
|
|
|
|
| newvariety == oldvariety && not (hasExt oldvariety) &&
|
|
|
|
keyHash oldkey /= keyName oldkey = Just $ oldkey
|
|
|
|
{ keyName = keyHash oldkey
|
|
|
|
}
|
2014-07-10 21:06:04 +00:00
|
|
|
| otherwise = Nothing
|
2017-02-24 19:16:56 +00:00
|
|
|
where
|
|
|
|
migratable = oldvariety /= newvariety
|
|
|
|
&& sameExceptExt oldvariety newvariety
|
|
|
|
oldvariety = keyVariety oldkey
|
|
|
|
newvariety = backendVariety newbackend
|
2014-07-10 21:06:04 +00:00
|
|
|
|
2018-08-28 22:01:13 +00:00
|
|
|
hashFile :: Hash -> FilePath -> Annex String
|
|
|
|
hashFile hash file = liftIO $ do
|
|
|
|
h <- hasher <$> L.readFile file
|
|
|
|
-- Force full evaluation so file is read and closed.
|
|
|
|
return (length h `seq` h)
|
2013-10-02 00:34:06 +00:00
|
|
|
where
|
2018-08-28 22:01:13 +00:00
|
|
|
hasher = case hash of
|
|
|
|
MD5Hash -> md5Hasher
|
|
|
|
SHA1Hash -> sha1Hasher
|
|
|
|
SHA2Hash hashsize -> sha2Hasher hashsize
|
|
|
|
SHA3Hash hashsize -> sha3Hasher hashsize
|
|
|
|
SkeinHash hashsize -> skeinHasher hashsize
|
2018-03-15 15:16:00 +00:00
|
|
|
#if MIN_VERSION_cryptonite(0,23,0)
|
2018-08-28 22:01:13 +00:00
|
|
|
Blake2bHash hashsize -> blake2bHasher hashsize
|
|
|
|
Blake2sHash hashsize -> blake2sHasher hashsize
|
|
|
|
Blake2spHash hashsize -> blake2spHasher hashsize
|
2018-03-15 15:16:00 +00:00
|
|
|
#endif
|
if external hash command fails for any reason, fall back to internal hashing
This way, if a system's sha1sum etc is broken, it will be tried if
git-annex was built to use it, but at least it will fall back to using
internal hashing when it fails.
A side benefit of this is that hashFile consistently throws an IOError if
the file is unable to be read. In particular, if the disk is failing with
IO errors, and external hash command is used, it used to throw a user error
with the error message from externalSHA. Now, the external hash command
will fail, that message will be printed as a warning, and it'll fall back
to the internal hash command. If the disk IO error is not intermittent, it
will re-occur, and so an IOError will be thrown.
Of course, this can mean it reads a file twice, but only in edge cases.
2015-05-27 19:58:32 +00:00
|
|
|
|
2018-08-28 22:01:13 +00:00
|
|
|
sha2Hasher :: HashSize -> (L.ByteString -> String)
|
|
|
|
sha2Hasher (HashSize hashsize)
|
|
|
|
| hashsize == 256 = use sha2_256
|
|
|
|
| hashsize == 224 = use sha2_224
|
|
|
|
| hashsize == 384 = use sha2_384
|
|
|
|
| hashsize == 512 = use sha2_512
|
2015-08-06 19:02:25 +00:00
|
|
|
| otherwise = error $ "unsupported SHA size " ++ show hashsize
|
2013-10-02 00:34:06 +00:00
|
|
|
where
|
2018-08-28 22:01:13 +00:00
|
|
|
use hasher = show . hasher
|
2013-10-02 01:10:56 +00:00
|
|
|
|
2015-08-06 19:02:25 +00:00
|
|
|
sha3Hasher :: HashSize -> (L.ByteString -> String)
|
2017-02-24 19:16:56 +00:00
|
|
|
sha3Hasher (HashSize hashsize)
|
2015-08-06 19:02:25 +00:00
|
|
|
| hashsize == 256 = show . sha3_256
|
|
|
|
| hashsize == 224 = show . sha3_224
|
|
|
|
| hashsize == 384 = show . sha3_384
|
|
|
|
| hashsize == 512 = show . sha3_512
|
|
|
|
| otherwise = error $ "unsupported SHA3 size " ++ show hashsize
|
|
|
|
|
2013-10-02 01:10:56 +00:00
|
|
|
skeinHasher :: HashSize -> (L.ByteString -> String)
|
2017-02-24 19:16:56 +00:00
|
|
|
skeinHasher (HashSize hashsize)
|
2013-10-02 01:10:56 +00:00
|
|
|
| hashsize == 256 = show . skein256
|
|
|
|
| hashsize == 512 = show . skein512
|
2015-08-06 19:02:25 +00:00
|
|
|
| otherwise = error $ "unsupported SKEIN size " ++ show hashsize
|
2014-08-01 19:09:49 +00:00
|
|
|
|
2018-03-15 15:16:00 +00:00
|
|
|
#if MIN_VERSION_cryptonite(0,23,0)
|
2018-03-13 20:15:35 +00:00
|
|
|
blake2bHasher :: HashSize -> (L.ByteString -> String)
|
|
|
|
blake2bHasher (HashSize hashsize)
|
|
|
|
| hashsize == 256 = show . blake2b_256
|
|
|
|
| hashsize == 512 = show . blake2b_512
|
|
|
|
| hashsize == 160 = show . blake2b_160
|
|
|
|
| hashsize == 224 = show . blake2b_224
|
|
|
|
| hashsize == 384 = show . blake2b_384
|
|
|
|
| otherwise = error $ "unsupported BLAKE2B size " ++ show hashsize
|
|
|
|
|
|
|
|
blake2sHasher :: HashSize -> (L.ByteString -> String)
|
|
|
|
blake2sHasher (HashSize hashsize)
|
|
|
|
| hashsize == 256 = show . blake2s_256
|
|
|
|
| hashsize == 160 = show . blake2s_160
|
|
|
|
| hashsize == 224 = show . blake2s_224
|
|
|
|
| otherwise = error $ "unsupported BLAKE2S size " ++ show hashsize
|
|
|
|
|
|
|
|
blake2spHasher :: HashSize -> (L.ByteString -> String)
|
|
|
|
blake2spHasher (HashSize hashsize)
|
|
|
|
| hashsize == 256 = show . blake2sp_256
|
|
|
|
| hashsize == 224 = show . blake2sp_224
|
|
|
|
| otherwise = error $ "unsupported BLAKE2SP size " ++ show hashsize
|
2018-03-15 15:16:00 +00:00
|
|
|
#endif
|
2018-03-13 20:15:35 +00:00
|
|
|
|
2018-08-28 22:01:13 +00:00
|
|
|
sha1Hasher :: L.ByteString -> String
|
|
|
|
sha1Hasher = show . sha1
|
|
|
|
|
2015-02-04 17:47:54 +00:00
|
|
|
md5Hasher :: L.ByteString -> String
|
|
|
|
md5Hasher = show . md5
|
|
|
|
|
2014-08-01 19:09:49 +00:00
|
|
|
{- A varient of the SHA256E backend, for testing that needs special keys
|
|
|
|
- that cannot collide with legitimate keys in the repository.
|
|
|
|
-
|
|
|
|
- This is accomplished by appending a special extension to the key,
|
|
|
|
- that is not one that selectExtension would select (due to being too
|
|
|
|
- long).
|
|
|
|
-}
|
|
|
|
testKeyBackend :: Backend
|
|
|
|
testKeyBackend =
|
2017-02-24 19:16:56 +00:00
|
|
|
let b = genBackendE (SHA2Hash (HashSize 256))
|
2014-08-01 19:09:49 +00:00
|
|
|
in b { getKey = (fmap addE) <$$> getKey b }
|
|
|
|
where
|
|
|
|
addE k = k { keyName = keyName k ++ longext }
|
|
|
|
longext = ".this-is-a-test-key"
|