diff --git a/Backend/Hash.hs b/Backend/Hash.hs index e4845b7725..f9dddaaa11 100644 --- a/Backend/Hash.hs +++ b/Backend/Hash.hs @@ -18,9 +18,7 @@ import Types.Key import Types.Backend import Types.KeySource import Utility.Hash -import Utility.ExternalSHA -import qualified BuildInfo import qualified Data.ByteString.Lazy as L import Data.Char @@ -89,7 +87,7 @@ keyValue :: Hash -> KeySource -> Annex (Maybe Key) keyValue hash source = do let file = contentLocation source filesize <- liftIO $ getFileSize file - s <- hashFile hash file filesize + s <- hashFile hash file return $ Just $ stubKey { keyName = s , keyVariety = hashKeyVariety hash (HasExt False) @@ -116,16 +114,16 @@ selectExtension f reverse $ splitc '.' $ takeExtensions f shortenough e = length e <= 4 -- long enough for "jpeg" -{- A key's checksum is checked during fsck. -} +{- A key's checksum is checked during fsck when it's content is present + - except for in fast mode. -} checkKeyChecksum :: Hash -> Key -> FilePath -> Annex Bool checkKeyChecksum hash key file = catchIOErrorType HardwareFault hwfault $ do fast <- Annex.getState Annex.fast - mstat <- liftIO $ catchMaybeIO $ getFileStatus file - case (mstat, fast) of - (Just stat, False) -> do - filesize <- liftIO $ getFileSize' file stat + exists <- liftIO $ doesFileExist file + case (exists, fast) of + (True, False) -> do showAction "checksum" - check <$> hashFile hash file filesize + check <$> hashFile hash file _ -> return True where expected = keyHash key @@ -192,55 +190,33 @@ trivialMigrate oldkey newbackend afile oldvariety = keyVariety oldkey newvariety = backendVariety newbackend -hashFile :: Hash -> FilePath -> Integer -> Annex String -hashFile hash file filesize = go hash +hashFile :: Hash -> FilePath -> Annex String +hashFile hash file = liftIO $ do + h <- hasher <$> L.readFile file + -- Force full evaluation so file is read and closed. + return (length h `seq` h) where - go MD5Hash = use md5Hasher - go SHA1Hash = usehasher (HashSize 1) - go (SHA2Hash hashsize) = usehasher hashsize - go (SHA3Hash hashsize) = use (sha3Hasher hashsize) - go (SkeinHash hashsize) = use (skeinHasher hashsize) + hasher = case hash of + MD5Hash -> md5Hasher + SHA1Hash -> sha1Hasher + SHA2Hash hashsize -> sha2Hasher hashsize + SHA3Hash hashsize -> sha3Hasher hashsize + SkeinHash hashsize -> skeinHasher hashsize #if MIN_VERSION_cryptonite(0,23,0) - go (Blake2bHash hashsize) = use (blake2bHasher hashsize) - go (Blake2sHash hashsize) = use (blake2sHasher hashsize) - go (Blake2spHash hashsize) = use (blake2spHasher hashsize) + Blake2bHash hashsize -> blake2bHasher hashsize + Blake2sHash hashsize -> blake2sHasher hashsize + Blake2spHash hashsize -> blake2spHasher hashsize #endif - - use hasher = liftIO $ do - h <- hasher <$> L.readFile file - -- Force full evaluation so file is read and closed. - return (length h `seq` h) - - usehasher hashsize@(HashSize sz) = case shaHasher hashsize filesize of - Left sha -> use sha - Right (external, internal) -> - liftIO (externalSHA external sz file) >>= \case - Right r -> return r - Left e -> do - warning e - -- fall back to internal since - -- external command failed - use internal -shaHasher :: HashSize -> Integer -> Either (L.ByteString -> String) (String, L.ByteString -> String) -shaHasher (HashSize hashsize) filesize - | hashsize == 1 = use BuildInfo.sha1 sha1 - | hashsize == 256 = use BuildInfo.sha256 sha2_256 - | hashsize == 224 = use BuildInfo.sha224 sha2_224 - | hashsize == 384 = use BuildInfo.sha384 sha2_384 - | hashsize == 512 = use BuildInfo.sha512 sha2_512 +sha2Hasher :: HashSize -> (L.ByteString -> String) +sha2Hasher (HashSize hashsize) + | hashsize == 256 = use sha2_256 + | hashsize == 224 = use sha2_224 + | hashsize == 384 = use sha2_384 + | hashsize == 512 = use sha2_512 | otherwise = error $ "unsupported SHA size " ++ show hashsize where - use Nothing hasher = Left $ usehasher hasher - use (Just c) hasher - {- Use builtin, but slightly slower hashing for - - smallish files. Cryptohash benchmarks 90 to 101% - - faster than external hashers, depending on the hash - - and system. So there is no point forking an external - - process unless the file is large. -} - | filesize < 1048576 = Left $ usehasher hasher - | otherwise = Right (c, usehasher hasher) - usehasher hasher = show . hasher + use hasher = show . hasher sha3Hasher :: HashSize -> (L.ByteString -> String) sha3Hasher (HashSize hashsize) @@ -280,6 +256,9 @@ blake2spHasher (HashSize hashsize) | otherwise = error $ "unsupported BLAKE2SP size " ++ show hashsize #endif +sha1Hasher :: L.ByteString -> String +sha1Hasher = show . sha1 + md5Hasher :: L.ByteString -> String md5Hasher = show . md5 diff --git a/Build/BundledPrograms.hs b/Build/BundledPrograms.hs index cf16da0f2b..f593c98103 100644 --- a/Build/BundledPrograms.hs +++ b/Build/BundledPrograms.hs @@ -75,13 +75,8 @@ preferredBundledPrograms = catMaybes , BuildInfo.lsof , BuildInfo.gcrypt #ifndef mingw32_HOST_OS - -- All these utilities are included in git for Windows + -- These utilities are included in git for Windows , ifset BuildInfo.curl "curl" - , BuildInfo.sha1 - , BuildInfo.sha256 - , BuildInfo.sha512 - , BuildInfo.sha224 - , BuildInfo.sha384 , Just "cp" #endif #ifdef linux_HOST_OS diff --git a/Build/Configure.hs b/Build/Configure.hs index c490148f50..6157921b58 100644 --- a/Build/Configure.hs +++ b/Build/Configure.hs @@ -7,7 +7,6 @@ module Build.Configure where import Build.TestConfig import Build.Version import Utility.SafeCommand -import Utility.ExternalSHA import Utility.Env.Basic import qualified Git.Version import Utility.Directory @@ -38,38 +37,8 @@ tests = , TestCase "lsof" $ findCmdPath "lsof" "lsof" , TestCase "git-remote-gcrypt" $ findCmdPath "gcrypt" "git-remote-gcrypt" , TestCase "ssh connection caching" getSshConnectionCaching - ] ++ shaTestCases - [ (1, "da39a3ee5e6b4b0d3255bfef95601890afd80709") - , (256, "e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855") - , (512, "cf83e1357eefb8bdf1542850d66d8007d620e4050b5715dc83f4a921d36ce9ce47d0d13c5d85f2b0ff8318d2877eec2f63b931bd47417a81a538327af927da3e") - , (224, "d14a028c2a3a2bc9476102bb288234c415a2b01f828ea62ac5b3e42f") - , (384, "38b060a751ac96384cd9327eb1b1e36a21fdb71114be07434c0cc7bf63f6e1da274edebfe76f65fbd51ad2f14898b95b") ] -{- shaNsum are the program names used by coreutils. Some systems - - install these with 'g' prefixes. - - - - On some systems, shaN is used instead, but on other - - systems, it might be "hashalot", which does not produce - - usable checksums. Only accept programs that produce - - known-good hashes when run on files. -} -shaTestCases :: [(Int, String)] -> [TestCase] -shaTestCases l = map make l - where - make (n, knowngood) = TestCase key $ - Config key . MaybeStringConfig <$> search (shacmds n) - where - key = "sha" ++ show n - search [] = return Nothing - search (c:cmds) = do - sha <- externalSHA c n "/dev/null" - if sha == Right knowngood - then return $ Just c - else search cmds - - shacmds n = concatMap (\x -> [x, 'g':x]) $ - map (\x -> "sha" ++ show n ++ x) ["sum", ""] - tmpDir :: String tmpDir = "tmp" @@ -128,8 +97,6 @@ androidConfig c = overrides ++ filter (not . overridden) c overrides = [ Config "cp_reflink_auto" $ BoolConfig False , Config "curl" $ BoolConfig False - , Config "sha224" $ MaybeStringConfig Nothing - , Config "sha384" $ MaybeStringConfig Nothing ] overridden (Config k _) = k `elem` overridekeys overridekeys = map (\(Config k _) -> k) overrides diff --git a/CHANGELOG b/CHANGELOG index 7963c2ee6c..502eb25c1c 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -23,6 +23,7 @@ git-annex (6.20180808) UNRELEASED; urgency=medium * linux standalone: When LOCPATH is already set, use it instead of the bundled locales. It can be set to an empty string to use the system locales too. + * Stop using external hash programs, since cryptonite is faster. -- Joey Hess Wed, 08 Aug 2018 11:24:08 -0400 diff --git a/Utility/ExternalSHA.hs b/Utility/ExternalSHA.hs deleted file mode 100644 index 7b08820040..0000000000 --- a/Utility/ExternalSHA.hs +++ /dev/null @@ -1,67 +0,0 @@ -{- Calculating a SHA checksum with an external command. - - - - This is typically a bit faster than using Haskell libraries, - - by around 1% to 10%. Worth it for really big files. - - - - Copyright 2011-2013 Joey Hess - - - - License: BSD-2-clause - -} - -{-# OPTIONS_GHC -fno-warn-tabs #-} - -module Utility.ExternalSHA (externalSHA) where - -import Utility.SafeCommand -import Utility.Process -import Utility.Misc -import Utility.Exception - -import Data.List -import Data.Char -import System.IO - -externalSHA :: String -> Int -> FilePath -> IO (Either String String) -externalSHA command shasize file = do - v <- tryNonAsync $ readsha $ toCommand [File file] - return $ case v of - Right s -> sanitycheck =<< parse (lines s) - Left _ -> Left (command ++ " failed") - where - readsha args = withHandle StdoutHandle createProcessSuccess p $ \h -> do - output <- hGetContentsStrict h - hClose h - return output - where - p = (proc command args) { std_out = CreatePipe } - - {- The first word of the output is taken to be the sha. -} - parse [] = bad - parse (l:_) - | null sha = bad - -- sha is prefixed with \ when filename contains certian chars - | "\\" `isPrefixOf` sha = Right $ drop 1 sha - | otherwise = Right sha - where - sha = fst $ separate (== ' ') l - bad = Left $ command ++ " parse error" - - {- Check that we've correctly parsing the output of the command, - - by making sure the sha we read is of the expected length - - and contains only the right characters. -} - sanitycheck sha - | length sha /= expectedSHALength shasize = - Left $ "Failed to parse the output of " ++ command - | any (`notElem` "0123456789abcdef") sha' = - Left $ "Unexpected character in output of " ++ command ++ "\"" ++ sha ++ "\"" - | otherwise = Right sha' - where - sha' = map toLower sha - -expectedSHALength :: Int -> Int -expectedSHALength 1 = 40 -expectedSHALength 256 = 64 -expectedSHALength 512 = 128 -expectedSHALength 224 = 56 -expectedSHALength 384 = 96 -expectedSHALength _ = 0 diff --git a/git-annex.cabal b/git-annex.cabal index 7e1af56a89..16bd3195b2 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -1021,7 +1021,6 @@ Executable git-annex Utility.Env.Basic Utility.Env.Set Utility.Exception - Utility.ExternalSHA Utility.FileMode Utility.FileSize Utility.FileSystemEncoding