81d402216d
This will speed up the common case where a Key is deserialized from disk, but is then serialized to build eg, the path to the annex object. Previously attempted in4536c93bb2
and reverted in96aba8eff7
. The problems mentioned in the latter commit are addressed now: Read/Show of KeyData is backwards-compatible with Read/Show of Key from before this change, so Types.Distribution will keep working. The Eq instance is fixed. Also, Key has smart constructors, avoiding needing to remember to update the cached serialization. Used git-annex benchmark: find is 7% faster whereis is 3% faster get when all files are already present is 5% faster Generally, the benchmarks are running 0.1 seconds faster per 2000 files, on a ram disk in my laptop.
80 lines
2.4 KiB
Haskell
80 lines
2.4 KiB
Haskell
{- git-annex file locations
|
|
-
|
|
- Copyright 2010-2017 Joey Hess <id@joeyh.name>
|
|
-
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
|
-}
|
|
|
|
module Annex.DirHashes (
|
|
Hasher,
|
|
HashLevels(..),
|
|
objectHashLevels,
|
|
branchHashLevels,
|
|
branchHashDir,
|
|
dirHashes,
|
|
hashDirMixed,
|
|
hashDirLower,
|
|
display_32bits_as_dir
|
|
) where
|
|
|
|
import Data.Default
|
|
import Data.Bits
|
|
import qualified Data.ByteArray
|
|
|
|
import Common
|
|
import Key
|
|
import Types.GitConfig
|
|
import Types.Difference
|
|
import Utility.Hash
|
|
import Utility.MD5
|
|
|
|
type Hasher = Key -> FilePath
|
|
|
|
-- Number of hash levels to use. 2 is the default.
|
|
newtype HashLevels = HashLevels Int
|
|
|
|
instance Default HashLevels where
|
|
def = HashLevels 2
|
|
|
|
objectHashLevels :: GitConfig -> HashLevels
|
|
objectHashLevels = configHashLevels OneLevelObjectHash
|
|
|
|
branchHashLevels :: GitConfig -> HashLevels
|
|
branchHashLevels = configHashLevels OneLevelBranchHash
|
|
|
|
configHashLevels :: Difference -> GitConfig -> HashLevels
|
|
configHashLevels d config
|
|
| hasDifference d (annexDifferences config) = HashLevels 1
|
|
| otherwise = def
|
|
|
|
branchHashDir :: GitConfig -> Key -> String
|
|
branchHashDir = hashDirLower . branchHashLevels
|
|
|
|
{- Two different directory hashes may be used. The mixed case hash
|
|
- came first, and is fine, except for the problem of case-strict
|
|
- filesystems such as Linux VFAT (mounted with shortname=mixed),
|
|
- which do not allow using a directory "XX" when "xx" already exists.
|
|
- To support that, some git-annex repositories use the lower case-hash.
|
|
- All special remotes use the lower-case hash for new data, but old data
|
|
- may still used the mixed case hash. -}
|
|
dirHashes :: [HashLevels -> Hasher]
|
|
dirHashes = [hashDirLower, hashDirMixed]
|
|
|
|
hashDirs :: HashLevels -> Int -> String -> FilePath
|
|
hashDirs (HashLevels 1) sz s = addTrailingPathSeparator $ take sz s
|
|
hashDirs _ sz s = addTrailingPathSeparator $ take sz s </> drop sz s
|
|
|
|
hashDirLower :: HashLevels -> Hasher
|
|
hashDirLower n k = hashDirs n 3 $ take 6 $ show $ md5s $ serializeKey' $ nonChunkKey k
|
|
|
|
{- This was originally using Data.Hash.MD5 from MissingH. This new version
|
|
- is faster, but ugly as it has to replicate the 4 Word32's that produced. -}
|
|
hashDirMixed :: HashLevels -> Hasher
|
|
hashDirMixed n k = hashDirs n 2 $ take 4 $ concatMap display_32bits_as_dir $
|
|
encodeWord32 $ map fromIntegral $ Data.ByteArray.unpack $
|
|
Utility.Hash.md5s $ serializeKey' $ nonChunkKey k
|
|
where
|
|
encodeWord32 (b1:b2:b3:b4:rest) =
|
|
(shiftL b4 24 .|. shiftL b3 16 .|. shiftL b2 8 .|. b1)
|
|
: encodeWord32 rest
|
|
encodeWord32 _ = []
|