stop using MissingH for MD5

Cryptonite is faster and allocates less, and I want to get rid of
MissingH use.

Note that the new dependency on memory is free; it's a dependency of
cryptonite.

This commit was supported by the NSF-funded DataLad project.
This commit is contained in:
Joey Hess 2017-05-15 18:10:13 -04:00
parent 44baa7b306
commit 6dd806f1ad
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
11 changed files with 98 additions and 30 deletions

View file

@ -1,6 +1,6 @@
{- git-annex file locations
-
- Copyright 2010-2015 Joey Hess <id@joeyh.name>
- Copyright 2010-2017 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@ -19,14 +19,15 @@ module Annex.DirHashes (
import Data.Bits
import Data.Word
import Data.Hash.MD5
import Data.Default
import qualified Data.ByteArray
import Common
import Key
import Types.GitConfig
import Types.Difference
import Utility.FileSystemEncoding
import Utility.Hash
type Hasher = Key -> FilePath
@ -62,15 +63,24 @@ hashDirs :: HashLevels -> Int -> String -> FilePath
hashDirs (HashLevels 1) sz s = addTrailingPathSeparator $ take sz s
hashDirs _ sz s = addTrailingPathSeparator $ take sz s </> drop sz s
hashDirMixed :: HashLevels -> Hasher
hashDirMixed n k = hashDirs n 2 $ take 4 $ display_32bits_as_dir =<< [a,b,c,d]
where
ABCD (a,b,c,d) = md5 $ md5FilePath $ key2file $ nonChunkKey k
hashDirLower :: HashLevels -> Hasher
hashDirLower n k = hashDirs n 3 $ take 6 $ md5s $ md5FilePath $ key2file $ nonChunkKey k
hashDirLower n k = hashDirs n 3 $ take 6 $ show $ md5 $
encodeBS $ key2file $ nonChunkKey k
{- This was originally using Data.Hash.MD5 from MissingH. This new version
- is faster, but ugly as it has to replicate the 4 Word32's that produced. -}
hashDirMixed :: HashLevels -> Hasher
hashDirMixed n k = hashDirs n 2 $ take 4 $ concatMap display_32bits_as_dir $
encodeWord32 $ map fromIntegral $ Data.ByteArray.unpack $
Utility.Hash.md5 $ encodeBS $ key2file $ nonChunkKey k
where
encodeWord32 (b1:b2:b3:b4:rest) =
(shiftL b4 24 .|. shiftL b3 16 .|. shiftL b2 8 .|. b1)
: encodeWord32 rest
encodeWord32 _ = []
{- modified version of display_32bits_as_hex from Data.Hash.MD5
- in MissingH
- Copyright (C) 2001 Ian Lynagh
- License: Either BSD or GPL
-}

View file

@ -33,6 +33,7 @@ import Config
import Annex.Path
import Utility.Env
import Utility.FileSystemEncoding
import Utility.Hash
import Types.CleanupActions
import Types.Concurrency
import Git.Env
@ -42,7 +43,6 @@ import Annex.Perms
import Annex.LockPool
#endif
import Data.Hash.MD5
import Control.Concurrent.STM
{- Some ssh commands are fed stdin on a pipe and so should be allowed to
@ -287,7 +287,7 @@ hostport2socket host Nothing = hostport2socket' host
hostport2socket host (Just port) = hostport2socket' $ host ++ "!" ++ show port
hostport2socket' :: String -> FilePath
hostport2socket' s
| length s > lengthofmd5s = md5s (Str s)
| length s > lengthofmd5s = show $ md5 $ encodeBS s
| otherwise = s
where
lengthofmd5s = 32

View file

@ -9,8 +9,7 @@ module Annex.VariantFile where
import Annex.Common
import Utility.FileSystemEncoding
import Data.Hash.MD5
import Utility.Hash
variantMarker :: String
variantMarker = ".variant-"
@ -42,4 +41,4 @@ variantFile file key
doubleconflict = variantMarker `isInfixOf` file
shortHash :: String -> String
shortHash = take 4 . md5s . md5FilePath
shortHash = take 4 . show . md5 . encodeBS

View file

@ -7,10 +7,9 @@
module Backend.Utilities where
import Data.Hash.MD5
import Annex.Common
import Utility.FileSystemEncoding
import Utility.Hash
{- Generates a keyName from an input string. Takes care of sanitizing it.
- If it's not too long, the full string is used as the keyName.
@ -20,7 +19,8 @@ genKeyName :: String -> String
genKeyName s
-- Avoid making keys longer than the length of a SHA256 checksum.
| bytelen > sha256len =
truncateFilePath (sha256len - md5len - 1) s' ++ "-" ++ md5s (Str s)
truncateFilePath (sha256len - md5len - 1) s' ++ "-" ++
show (md5 (encodeBS s))
| otherwise = s'
where
s' = preSanitizeKeyName s

View file

@ -12,7 +12,6 @@ module Utility.FileSystemEncoding (
useFileSystemEncoding,
fileEncoding,
withFilePath,
md5FilePath,
decodeBS,
encodeBS,
decodeW8,
@ -27,7 +26,6 @@ import qualified GHC.IO.Encoding as Encoding
import Foreign.C
import System.IO
import System.IO.Unsafe
import qualified Data.Hash.MD5 as MD5
import Data.Word
import Data.Bits.Utils
import Data.List
@ -101,10 +99,6 @@ _encodeFilePath fp = unsafePerformIO $ do
GHC.withCString enc fp (GHC.peekCString Encoding.char8)
`catchNonAsync` (\_ -> return fp)
{- Encodes a FilePath into a Md5.Str, applying the filesystem encoding. -}
md5FilePath :: FilePath -> MD5.Str
md5FilePath = MD5.Str . _encodeFilePath
{- Decodes a ByteString into a FilePath, applying the filesystem encoding. -}
decodeBS :: L.ByteString -> FilePath
#ifndef mingw32_HOST_OS

View file

@ -1,8 +1,4 @@
{- Convenience wrapper around cryptohash/cryptonite.
-
- SHA3 hashes are currently only enabled when using cryptonite,
- because of https://github.com/vincenthz/hs-cryptohash/issues/36
-}
{- Convenience wrapper around cryptonite's hashing. -}
module Utility.Hash (
sha1,

View file

@ -25,6 +25,8 @@ import Utility.Path
import Utility.FileMode
import Utility.LockFile.LockStatus
import Utility.ThreadScheduler
import Utility.Hash
import Utility.FileSystemEncoding
import qualified Utility.LockFile.Posix as Posix
import System.IO
@ -33,7 +35,6 @@ import Data.Maybe
import Data.List
import Network.BSD
import System.FilePath
import Data.Hash.MD5
import Control.Applicative
import Prelude
@ -99,7 +100,9 @@ sideLockFile lockfile = do
f <- absPath lockfile
let base = intercalate "_" (splitDirectories (makeRelative "/" f))
let shortbase = reverse $ take 32 $ reverse base
let md5sum = if base == shortbase then "" else md5s (Str base)
let md5sum = if base == shortbase
then ""
else show (md5 (encodeBS base))
dir <- ifM (doesDirectoryExist "/dev/shm")
( return "/dev/shm"
, return "/tmp"

15
Utility/Tuple.hs Normal file
View file

@ -0,0 +1,15 @@
{- tuple utility functions
-
- Copyright 2017 Joey Hess <id@joeyh.name>
-
- License: BSD-2-clause
-}
fst3 :: (a,b,c) -> a
fst3 (a,b,c) = a
snd3 :: (a,b,c) -> b
snd3 (a,b,c) = b
thd3 :: (a,b,c) -> c
thd3 (a,b,c) = c

1
debian/control vendored
View file

@ -11,6 +11,7 @@ Build-Depends:
libghc-hslogger-dev,
libghc-pcre-light-dev,
libghc-cryptonite-dev,
libghc-memory-dev,
libghc-sandi-dev,
libghc-utf8-string-dev,
libghc-aws-dev (>= 0.9.2-2~),

View file

@ -0,0 +1,49 @@
[[!comment format=mdwn
username="joey"
subject="""comment 11"""
date="2017-05-15T21:56:52Z"
content="""
Switched from MissingH to cryptonite for md5. It did move md5 out of the top CPU spot but
the overall runtime didn't change much. Memory allocations did go down by a
good amount.
Updated profiles:
git-annex +RTS -p -RTS find
total time = 1.63 secs (1629 ticks @ 1000 us, 1 processor)
total alloc = 1,496,336,496 bytes (excludes profiling overheads)
COST CENTRE MODULE SRC %time %alloc
catchIO Utility.Exception Utility/Exception.hs:79:1-17 14.1 15.1
inAnnex'.checkindirect Annex.Content Annex/Content.hs:(108,9)-(119,39) 10.6 4.8
catches Control.Monad.Catch src/Control/Monad/Catch.hs:(432,1)-(436,76) 8.6 6.9
spanList Data.List.Utils src/Data/List/Utils.hs:(150,1)-(155,36) 6.7 11.1
isAnnexLink Annex.Link Annex/Link.hs:35:1-85 5.0 10.2
keyFile Annex.Locations Annex/Locations.hs:(456,1)-(462,19) 5.0 7.0
readish Utility.PartialPrelude Utility/PartialPrelude.hs:(48,1)-(50,20) 3.8 2.0
startswith Data.List.Utils src/Data/List/Utils.hs:103:1-23 3.6 2.3
splitc Utility.Misc Utility/Misc.hs:(52,1)-(54,25) 3.4 6.5
s2w8 Data.Bits.Utils src/Data/Bits/Utils.hs:65:1-15 2.6 6.4
keyPath Annex.Locations Annex/Locations.hs:(492,1)-(494,23) 2.5 4.4
fileKey.unesc Annex.Locations Annex/Locations.hs:(469,9)-(474,39) 2.0 3.5
copyAndFreeze Data.ByteArray.Methods Data/ByteArray/Methods.hs:(224,1)-(227,21) 1.8 0.5
git-annex +RTS -p -RTS find --not --in web
total time = 5.33 secs (5327 ticks @ 1000 us, 1 processor)
total alloc = 2,908,489,000 bytes (excludes profiling overheads)
COST CENTRE MODULE SRC %time %alloc
catObjectDetails.\ Git.CatFile Git/CatFile.hs:(80,72)-(88,97) 7.8 2.8
catchIO Utility.Exception Utility/Exception.hs:79:1-17 7.6 8.3
spanList Data.List.Utils src/Data/List/Utils.hs:(150,1)-(155,36) 5.8 9.1
readish Utility.PartialPrelude Utility/PartialPrelude.hs:(48,1)-(50,20) 4.5 4.0
parseResp Git.CatFile Git/CatFile.hs:(113,1)-(124,28) 4.4 2.9
readFileStrict Utility.Misc Utility/Misc.hs:33:1-59 3.7 1.6
catches Control.Monad.Catch src/Control/Monad/Catch.hs:(432,1)-(436,76) 3.1 3.6
encodeW8 Utility.FileSystemEncoding Utility/FileSystemEncoding.hs:(131,1)-(133,70) 3.1 2.3
"""]]

View file

@ -361,7 +361,8 @@ Executable git-annex
stm-chans,
securemem,
crypto-api,
cryptonite
cryptonite,
memory
CC-Options: -Wall
GHC-Options: -Wall -fno-warn-tabs
Extensions: PackageImports