stop using MissingH for MD5
Cryptonite is faster and allocates less, and I want to get rid of MissingH use. Note that the new dependency on memory is free; it's a dependency of cryptonite. This commit was supported by the NSF-funded DataLad project.
This commit is contained in:
parent
44baa7b306
commit
6dd806f1ad
11 changed files with 98 additions and 30 deletions
|
@ -1,6 +1,6 @@
|
|||
{- git-annex file locations
|
||||
-
|
||||
- Copyright 2010-2015 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2010-2017 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
@ -19,14 +19,15 @@ module Annex.DirHashes (
|
|||
|
||||
import Data.Bits
|
||||
import Data.Word
|
||||
import Data.Hash.MD5
|
||||
import Data.Default
|
||||
import qualified Data.ByteArray
|
||||
|
||||
import Common
|
||||
import Key
|
||||
import Types.GitConfig
|
||||
import Types.Difference
|
||||
import Utility.FileSystemEncoding
|
||||
import Utility.Hash
|
||||
|
||||
type Hasher = Key -> FilePath
|
||||
|
||||
|
@ -62,15 +63,24 @@ hashDirs :: HashLevels -> Int -> String -> FilePath
|
|||
hashDirs (HashLevels 1) sz s = addTrailingPathSeparator $ take sz s
|
||||
hashDirs _ sz s = addTrailingPathSeparator $ take sz s </> drop sz s
|
||||
|
||||
hashDirMixed :: HashLevels -> Hasher
|
||||
hashDirMixed n k = hashDirs n 2 $ take 4 $ display_32bits_as_dir =<< [a,b,c,d]
|
||||
where
|
||||
ABCD (a,b,c,d) = md5 $ md5FilePath $ key2file $ nonChunkKey k
|
||||
|
||||
hashDirLower :: HashLevels -> Hasher
|
||||
hashDirLower n k = hashDirs n 3 $ take 6 $ md5s $ md5FilePath $ key2file $ nonChunkKey k
|
||||
hashDirLower n k = hashDirs n 3 $ take 6 $ show $ md5 $
|
||||
encodeBS $ key2file $ nonChunkKey k
|
||||
|
||||
{- This was originally using Data.Hash.MD5 from MissingH. This new version
|
||||
- is faster, but ugly as it has to replicate the 4 Word32's that produced. -}
|
||||
hashDirMixed :: HashLevels -> Hasher
|
||||
hashDirMixed n k = hashDirs n 2 $ take 4 $ concatMap display_32bits_as_dir $
|
||||
encodeWord32 $ map fromIntegral $ Data.ByteArray.unpack $
|
||||
Utility.Hash.md5 $ encodeBS $ key2file $ nonChunkKey k
|
||||
where
|
||||
encodeWord32 (b1:b2:b3:b4:rest) =
|
||||
(shiftL b4 24 .|. shiftL b3 16 .|. shiftL b2 8 .|. b1)
|
||||
: encodeWord32 rest
|
||||
encodeWord32 _ = []
|
||||
|
||||
{- modified version of display_32bits_as_hex from Data.Hash.MD5
|
||||
- in MissingH
|
||||
- Copyright (C) 2001 Ian Lynagh
|
||||
- License: Either BSD or GPL
|
||||
-}
|
||||
|
|
|
@ -33,6 +33,7 @@ import Config
|
|||
import Annex.Path
|
||||
import Utility.Env
|
||||
import Utility.FileSystemEncoding
|
||||
import Utility.Hash
|
||||
import Types.CleanupActions
|
||||
import Types.Concurrency
|
||||
import Git.Env
|
||||
|
@ -42,7 +43,6 @@ import Annex.Perms
|
|||
import Annex.LockPool
|
||||
#endif
|
||||
|
||||
import Data.Hash.MD5
|
||||
import Control.Concurrent.STM
|
||||
|
||||
{- Some ssh commands are fed stdin on a pipe and so should be allowed to
|
||||
|
@ -287,7 +287,7 @@ hostport2socket host Nothing = hostport2socket' host
|
|||
hostport2socket host (Just port) = hostport2socket' $ host ++ "!" ++ show port
|
||||
hostport2socket' :: String -> FilePath
|
||||
hostport2socket' s
|
||||
| length s > lengthofmd5s = md5s (Str s)
|
||||
| length s > lengthofmd5s = show $ md5 $ encodeBS s
|
||||
| otherwise = s
|
||||
where
|
||||
lengthofmd5s = 32
|
||||
|
|
|
@ -9,8 +9,7 @@ module Annex.VariantFile where
|
|||
|
||||
import Annex.Common
|
||||
import Utility.FileSystemEncoding
|
||||
|
||||
import Data.Hash.MD5
|
||||
import Utility.Hash
|
||||
|
||||
variantMarker :: String
|
||||
variantMarker = ".variant-"
|
||||
|
@ -42,4 +41,4 @@ variantFile file key
|
|||
doubleconflict = variantMarker `isInfixOf` file
|
||||
|
||||
shortHash :: String -> String
|
||||
shortHash = take 4 . md5s . md5FilePath
|
||||
shortHash = take 4 . show . md5 . encodeBS
|
||||
|
|
|
@ -7,10 +7,9 @@
|
|||
|
||||
module Backend.Utilities where
|
||||
|
||||
import Data.Hash.MD5
|
||||
|
||||
import Annex.Common
|
||||
import Utility.FileSystemEncoding
|
||||
import Utility.Hash
|
||||
|
||||
{- Generates a keyName from an input string. Takes care of sanitizing it.
|
||||
- If it's not too long, the full string is used as the keyName.
|
||||
|
@ -20,7 +19,8 @@ genKeyName :: String -> String
|
|||
genKeyName s
|
||||
-- Avoid making keys longer than the length of a SHA256 checksum.
|
||||
| bytelen > sha256len =
|
||||
truncateFilePath (sha256len - md5len - 1) s' ++ "-" ++ md5s (Str s)
|
||||
truncateFilePath (sha256len - md5len - 1) s' ++ "-" ++
|
||||
show (md5 (encodeBS s))
|
||||
| otherwise = s'
|
||||
where
|
||||
s' = preSanitizeKeyName s
|
||||
|
|
|
@ -12,7 +12,6 @@ module Utility.FileSystemEncoding (
|
|||
useFileSystemEncoding,
|
||||
fileEncoding,
|
||||
withFilePath,
|
||||
md5FilePath,
|
||||
decodeBS,
|
||||
encodeBS,
|
||||
decodeW8,
|
||||
|
@ -27,7 +26,6 @@ import qualified GHC.IO.Encoding as Encoding
|
|||
import Foreign.C
|
||||
import System.IO
|
||||
import System.IO.Unsafe
|
||||
import qualified Data.Hash.MD5 as MD5
|
||||
import Data.Word
|
||||
import Data.Bits.Utils
|
||||
import Data.List
|
||||
|
@ -101,10 +99,6 @@ _encodeFilePath fp = unsafePerformIO $ do
|
|||
GHC.withCString enc fp (GHC.peekCString Encoding.char8)
|
||||
`catchNonAsync` (\_ -> return fp)
|
||||
|
||||
{- Encodes a FilePath into a Md5.Str, applying the filesystem encoding. -}
|
||||
md5FilePath :: FilePath -> MD5.Str
|
||||
md5FilePath = MD5.Str . _encodeFilePath
|
||||
|
||||
{- Decodes a ByteString into a FilePath, applying the filesystem encoding. -}
|
||||
decodeBS :: L.ByteString -> FilePath
|
||||
#ifndef mingw32_HOST_OS
|
||||
|
|
|
@ -1,8 +1,4 @@
|
|||
{- Convenience wrapper around cryptohash/cryptonite.
|
||||
-
|
||||
- SHA3 hashes are currently only enabled when using cryptonite,
|
||||
- because of https://github.com/vincenthz/hs-cryptohash/issues/36
|
||||
-}
|
||||
{- Convenience wrapper around cryptonite's hashing. -}
|
||||
|
||||
module Utility.Hash (
|
||||
sha1,
|
||||
|
|
|
@ -25,6 +25,8 @@ import Utility.Path
|
|||
import Utility.FileMode
|
||||
import Utility.LockFile.LockStatus
|
||||
import Utility.ThreadScheduler
|
||||
import Utility.Hash
|
||||
import Utility.FileSystemEncoding
|
||||
import qualified Utility.LockFile.Posix as Posix
|
||||
|
||||
import System.IO
|
||||
|
@ -33,7 +35,6 @@ import Data.Maybe
|
|||
import Data.List
|
||||
import Network.BSD
|
||||
import System.FilePath
|
||||
import Data.Hash.MD5
|
||||
import Control.Applicative
|
||||
import Prelude
|
||||
|
||||
|
@ -99,7 +100,9 @@ sideLockFile lockfile = do
|
|||
f <- absPath lockfile
|
||||
let base = intercalate "_" (splitDirectories (makeRelative "/" f))
|
||||
let shortbase = reverse $ take 32 $ reverse base
|
||||
let md5sum = if base == shortbase then "" else md5s (Str base)
|
||||
let md5sum = if base == shortbase
|
||||
then ""
|
||||
else show (md5 (encodeBS base))
|
||||
dir <- ifM (doesDirectoryExist "/dev/shm")
|
||||
( return "/dev/shm"
|
||||
, return "/tmp"
|
||||
|
|
15
Utility/Tuple.hs
Normal file
15
Utility/Tuple.hs
Normal file
|
@ -0,0 +1,15 @@
|
|||
{- tuple utility functions
|
||||
-
|
||||
- Copyright 2017 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- License: BSD-2-clause
|
||||
-}
|
||||
|
||||
fst3 :: (a,b,c) -> a
|
||||
fst3 (a,b,c) = a
|
||||
|
||||
snd3 :: (a,b,c) -> b
|
||||
snd3 (a,b,c) = b
|
||||
|
||||
thd3 :: (a,b,c) -> c
|
||||
thd3 (a,b,c) = c
|
1
debian/control
vendored
1
debian/control
vendored
|
@ -11,6 +11,7 @@ Build-Depends:
|
|||
libghc-hslogger-dev,
|
||||
libghc-pcre-light-dev,
|
||||
libghc-cryptonite-dev,
|
||||
libghc-memory-dev,
|
||||
libghc-sandi-dev,
|
||||
libghc-utf8-string-dev,
|
||||
libghc-aws-dev (>= 0.9.2-2~),
|
||||
|
|
|
@ -0,0 +1,49 @@
|
|||
[[!comment format=mdwn
|
||||
username="joey"
|
||||
subject="""comment 11"""
|
||||
date="2017-05-15T21:56:52Z"
|
||||
content="""
|
||||
Switched from MissingH to cryptonite for md5. It did move md5 out of the top CPU spot but
|
||||
the overall runtime didn't change much. Memory allocations did go down by a
|
||||
good amount.
|
||||
|
||||
Updated profiles:
|
||||
|
||||
git-annex +RTS -p -RTS find
|
||||
|
||||
total time = 1.63 secs (1629 ticks @ 1000 us, 1 processor)
|
||||
total alloc = 1,496,336,496 bytes (excludes profiling overheads)
|
||||
|
||||
COST CENTRE MODULE SRC %time %alloc
|
||||
|
||||
catchIO Utility.Exception Utility/Exception.hs:79:1-17 14.1 15.1
|
||||
inAnnex'.checkindirect Annex.Content Annex/Content.hs:(108,9)-(119,39) 10.6 4.8
|
||||
catches Control.Monad.Catch src/Control/Monad/Catch.hs:(432,1)-(436,76) 8.6 6.9
|
||||
spanList Data.List.Utils src/Data/List/Utils.hs:(150,1)-(155,36) 6.7 11.1
|
||||
isAnnexLink Annex.Link Annex/Link.hs:35:1-85 5.0 10.2
|
||||
keyFile Annex.Locations Annex/Locations.hs:(456,1)-(462,19) 5.0 7.0
|
||||
readish Utility.PartialPrelude Utility/PartialPrelude.hs:(48,1)-(50,20) 3.8 2.0
|
||||
startswith Data.List.Utils src/Data/List/Utils.hs:103:1-23 3.6 2.3
|
||||
splitc Utility.Misc Utility/Misc.hs:(52,1)-(54,25) 3.4 6.5
|
||||
s2w8 Data.Bits.Utils src/Data/Bits/Utils.hs:65:1-15 2.6 6.4
|
||||
keyPath Annex.Locations Annex/Locations.hs:(492,1)-(494,23) 2.5 4.4
|
||||
fileKey.unesc Annex.Locations Annex/Locations.hs:(469,9)-(474,39) 2.0 3.5
|
||||
copyAndFreeze Data.ByteArray.Methods Data/ByteArray/Methods.hs:(224,1)-(227,21) 1.8 0.5
|
||||
|
||||
git-annex +RTS -p -RTS find --not --in web
|
||||
|
||||
total time = 5.33 secs (5327 ticks @ 1000 us, 1 processor)
|
||||
total alloc = 2,908,489,000 bytes (excludes profiling overheads)
|
||||
|
||||
COST CENTRE MODULE SRC %time %alloc
|
||||
|
||||
catObjectDetails.\ Git.CatFile Git/CatFile.hs:(80,72)-(88,97) 7.8 2.8
|
||||
catchIO Utility.Exception Utility/Exception.hs:79:1-17 7.6 8.3
|
||||
spanList Data.List.Utils src/Data/List/Utils.hs:(150,1)-(155,36) 5.8 9.1
|
||||
readish Utility.PartialPrelude Utility/PartialPrelude.hs:(48,1)-(50,20) 4.5 4.0
|
||||
parseResp Git.CatFile Git/CatFile.hs:(113,1)-(124,28) 4.4 2.9
|
||||
readFileStrict Utility.Misc Utility/Misc.hs:33:1-59 3.7 1.6
|
||||
catches Control.Monad.Catch src/Control/Monad/Catch.hs:(432,1)-(436,76) 3.1 3.6
|
||||
encodeW8 Utility.FileSystemEncoding Utility/FileSystemEncoding.hs:(131,1)-(133,70) 3.1 2.3
|
||||
|
||||
"""]]
|
|
@ -361,7 +361,8 @@ Executable git-annex
|
|||
stm-chans,
|
||||
securemem,
|
||||
crypto-api,
|
||||
cryptonite
|
||||
cryptonite,
|
||||
memory
|
||||
CC-Options: -Wall
|
||||
GHC-Options: -Wall -fno-warn-tabs
|
||||
Extensions: PackageImports
|
||||
|
|
Loading…
Reference in a new issue