implement annex.tune.objecthashlower

Split out Annex.DirHashes which never really belonged in Locations.
This commit is contained in:
Joey Hess 2015-01-28 16:51:40 -04:00
parent e8c376e0ad
commit 009bd050c1
6 changed files with 109 additions and 68 deletions

76
Annex/DirHashes.hs Normal file
View file

@ -0,0 +1,76 @@
{- git-annex file locations
-
- Copyright 2010-2015 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Annex.DirHashes (
Hasher,
HashLevels(..),
objectHashLevels,
dirHashes,
hashDirMixed,
hashDirLower,
) where
import Data.Bits
import Data.Word
import Data.Hash.MD5
import Data.Default
import Common
import Types.Key
import Types.GitConfig
import Types.Difference
type Hasher = Key -> FilePath
-- Number of hash levels to use. 2 is the default.
newtype HashLevels = HashLevels Int
instance Default HashLevels where
def = HashLevels 2
objectHashLevels :: GitConfig -> HashLevels
objectHashLevels config
| hasDifference (== OneLevelObjectHash) (annexDifferences config) =
HashLevels 1
| otherwise = def
{- Two different directory hashes may be used. The mixed case hash
- came first, and is fine, except for the problem of case-strict
- filesystems such as Linux VFAT (mounted with shortname=mixed),
- which do not allow using a directory "XX" when "xx" already exists.
- To support that, most repositories use the lower case hash for new data. -}
dirHashes :: [HashLevels -> Hasher]
dirHashes = [hashDirLower, hashDirMixed]
hashDirs :: HashLevels -> Int -> String -> FilePath
hashDirs (HashLevels 1) sz s = addTrailingPathSeparator $ take sz s
hashDirs _ sz s = addTrailingPathSeparator $ take sz s </> drop sz s
hashDirMixed :: HashLevels -> Hasher
hashDirMixed n k = hashDirs n 2 $ take 4 $ display_32bits_as_dir =<< [a,b,c,d]
where
ABCD (a,b,c,d) = md5 $ md5FilePath $ key2file $ nonChunkKey k
hashDirLower :: HashLevels -> Hasher
hashDirLower n k = hashDirs n 3 $ take 6 $ md5s $ md5FilePath $ key2file $ nonChunkKey k
{- modified version of display_32bits_as_hex from Data.Hash.MD5
- Copyright (C) 2001 Ian Lynagh
- License: Either BSD or GPL
-}
display_32bits_as_dir :: Word32 -> String
display_32bits_as_dir w = trim $ swap_pairs cs
where
-- Need 32 characters to use. To avoid inaverdently making
-- a real word, use letters that appear less frequently.
chars = ['0'..'9'] ++ "zqjxkmvwgpfZQJXKMVWGPF"
cs = map (\x -> getc $ (shiftR w (6*x)) .&. 31) [0..7]
getc n = chars !! fromIntegral n
swap_pairs (x1:x2:xs) = x2:x1:swap_pairs xs
swap_pairs _ = []
-- Last 2 will always be 00, so omit.
trim = take 6

View file

@ -1,6 +1,6 @@
{- git-annex file locations {- git-annex file locations
- -
- Copyright 2010-2013 Joey Hess <id@joeyh.name> - Copyright 2010-2015 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU GPL version 3 or higher. - Licensed under the GNU GPL version 3 or higher.
-} -}
@ -20,7 +20,6 @@ module Locations (
gitAnnexInodeSentinal, gitAnnexInodeSentinal,
gitAnnexInodeSentinalCache, gitAnnexInodeSentinalCache,
annexLocations, annexLocations,
annexLocation,
gitAnnexDir, gitAnnexDir,
gitAnnexObjectDir, gitAnnexObjectDir,
gitAnnexTmpMiscDir, gitAnnexTmpMiscDir,
@ -59,7 +58,6 @@ module Locations (
gitAnnexRemotesDir, gitAnnexRemotesDir,
gitAnnexAssistantDefaultDir, gitAnnexAssistantDefaultDir,
isLinkToAnnex, isLinkToAnnex,
annexHashes,
HashLevels(..), HashLevels(..),
hashDirMixed, hashDirMixed,
hashDirLower, hashDirLower,
@ -68,18 +66,16 @@ module Locations (
prop_idempotent_fileKey prop_idempotent_fileKey
) where ) where
import Data.Bits
import Data.Word
import Data.Hash.MD5
import Data.Char import Data.Char
import Data.Default import Data.Default
import Common import Common
import Types import Types.GitConfig
import Types.Key import Types.Key
import Types.UUID import Types.UUID
import Types.Difference import Types.Difference
import qualified Git import qualified Git
import Annex.DirHashes
{- Conventions: {- Conventions:
- -
@ -105,11 +101,15 @@ objectDir :: FilePath
objectDir = addTrailingPathSeparator $ annexDir </> "objects" objectDir = addTrailingPathSeparator $ annexDir </> "objects"
{- Annexed file's possible locations relative to the .git directory. {- Annexed file's possible locations relative to the .git directory.
- There are two different possibilities, using different hashes. -} - There are two different possibilities, using different hashes.
annexLocations :: Key -> [FilePath] -
annexLocations key = map (annexLocation key) (annexHashes def) - Also, some repositories have a Difference in hash directory depth.
annexLocation :: Key -> Hasher -> FilePath -}
annexLocation key hasher = objectDir </> keyPath key hasher annexLocations :: GitConfig -> Key -> [FilePath]
annexLocations config key = map (annexLocation config key) dirHashes
annexLocation :: GitConfig -> Key -> (HashLevels -> Hasher) -> FilePath
annexLocation config key hasher = objectDir </> keyPath key (hasher $ objectHashLevels config)
{- Annexed object's location in a repository. {- Annexed object's location in a repository.
- -
@ -138,11 +138,11 @@ gitAnnexLocation' key r config crippled
| Git.repoIsLocalBare r | Git.repoIsLocalBare r
|| crippled || crippled
|| hasDifference (== ObjectHashLower) (annexDifferences config) = || hasDifference (== ObjectHashLower) (annexDifferences config) =
check $ map inrepo $ annexLocations key check $ map inrepo $ annexLocations config key
{- Non-bare repositories only use hashDirMixed, so {- Non-bare repositories only use hashDirMixed, so
- don't need to do any work to check if the file is - don't need to do any work to check if the file is
- present. -} - present. -}
| otherwise = return $ inrepo $ annexLocation key (hashDirMixed def) | otherwise = return $ inrepo $ annexLocation config key hashDirMixed
where where
inrepo d = Git.localGitDir r </> d inrepo d = Git.localGitDir r </> d
check locs@(l:_) = fromMaybe l <$> firstM doesFileExist locs check locs@(l:_) = fromMaybe l <$> firstM doesFileExist locs
@ -407,9 +407,9 @@ prop_idempotent_fileKey s
where where
k = stubKey { keyName = s, keyBackendName = "test" } k = stubKey { keyName = s, keyBackendName = "test" }
{- A location to store a key on the filesystem. A directory hash is used, {- A location to store a key on a special remote that uses a filesystem.
- to protect against filesystems that dislike having many items in a - A directory hash is used, to protect against filesystems that dislike
- single directory. - having many items in a single directory.
- -
- The file is put in a directory with the same name, this allows - The file is put in a directory with the same name, this allows
- write-protecting the directory to avoid accidental deletion of the file. - write-protecting the directory to avoid accidental deletion of the file.
@ -419,51 +419,11 @@ keyPath key hasher = hasher key </> f </> f
where where
f = keyFile key f = keyFile key
{- All possibile locations to store a key using different directory hashes. -} {- All possibile locations to store a key in a special remote
keyPaths :: Key -> [FilePath] - using different directory hashes.
keyPaths key = map (keyPath key) (annexHashes def) -
- This is compatible with the annexLocations, for interoperability between
{- Two different directory hashes may be used. The mixed case hash - special remotes and git-annex repos.
- came first, and is fine, except for the problem of case-strict
- filesystems such as Linux VFAT (mounted with shortname=mixed),
- which do not allow using a directory "XX" when "xx" already exists.
- To support that, most repositories use the lower case hash for new data. -}
annexHashes :: HashLevels -> [Hasher]
annexHashes n = [hashDirLower n, hashDirMixed n]
type Hasher = Key -> FilePath
-- Number of hash levels to use. 2 is the default.
newtype HashLevels = HashLevels Int
instance Default HashLevels where
def = HashLevels 2
hashDirs :: HashLevels -> Int -> String -> FilePath
hashDirs (HashLevels 1) sz s = addTrailingPathSeparator $ take sz s
hashDirs _ sz s = addTrailingPathSeparator $ take sz s </> drop sz s
hashDirMixed :: HashLevels -> Hasher
hashDirMixed n k = hashDirs n 2 $ take 4 $ display_32bits_as_dir =<< [a,b,c,d]
where
ABCD (a,b,c,d) = md5 $ md5FilePath $ key2file $ nonChunkKey k
hashDirLower :: HashLevels -> Hasher
hashDirLower n k = hashDirs n 3 $ take 6 $ md5s $ md5FilePath $ key2file $ nonChunkKey k
{- modified version of display_32bits_as_hex from Data.Hash.MD5
- Copyright (C) 2001 Ian Lynagh
- License: Either BSD or GPL
-} -}
display_32bits_as_dir :: Word32 -> String keyPaths :: Key -> [FilePath]
display_32bits_as_dir w = trim $ swap_pairs cs keyPaths key = map (keyPath key . def) dirHashes
where
-- Need 32 characters to use. To avoid inaverdently making
-- a real word, use letters that appear less frequently.
chars = ['0'..'9'] ++ "zqjxkmvwgpfZQJXKMVWGPF"
cs = map (\x -> getc $ (shiftR w (6*x)) .&. 31) [0..7]
getc n = chars !! fromIntegral n
swap_pairs (x1:x2:xs) = x2:x1:swap_pairs xs
swap_pairs _ = []
-- Last 2 will always be 00, so omit.
trim = take 6

View file

@ -327,13 +327,15 @@ keyUrls r key = map tourl locs'
-- If the remote is known to not be bare, try the hash locations -- If the remote is known to not be bare, try the hash locations
-- used for non-bare repos first, as an optimisation. -- used for non-bare repos first, as an optimisation.
locs locs
| remoteAnnexBare (gitconfig r) == Just False = reverse (annexLocations key) | remoteAnnexBare remoteconfig == Just False = reverse (annexLocations cfg key)
| otherwise = annexLocations key | otherwise = annexLocations cfg key
#ifndef mingw32_HOST_OS #ifndef mingw32_HOST_OS
locs' = locs locs' = locs
#else #else
locs' = map (replace "\\" "/") locs locs' = map (replace "\\" "/") locs
#endif #endif
remoteconfig = gitconfig r
cfg = fromJust $ remoteGitConfig remoteconfig
dropKey :: Remote -> Key -> Annex Bool dropKey :: Remote -> Key -> Annex Bool
dropKey r key dropKey r key

View file

@ -37,6 +37,7 @@ import Annex.Perms
import Logs.Transfer import Logs.Transfer
import Types.Creds import Types.Creds
import Types.Key (isChunkKey) import Types.Key (isChunkKey)
import Annex.DirHashes
import qualified Data.Map as M import qualified Data.Map as M
@ -212,7 +213,7 @@ remove o k = do
- content could be. Note that the parent directories have - content could be. Note that the parent directories have
- to also be explicitly included, due to how rsync - to also be explicitly included, due to how rsync
- traverses directories. -} - traverses directories. -}
includes = concatMap use (annexHashes def) includes = concatMap (use .def) dirHashes
use h = let dir = h k in use h = let dir = h k in
[ parentDir dir [ parentDir dir
, dir , dir

View file

@ -19,6 +19,7 @@ import System.FilePath.Posix
#ifdef mingw32_HOST_OS #ifdef mingw32_HOST_OS
import Data.String.Utils import Data.String.Utils
#endif #endif
import Annex.DirHashes
type RsyncUrl = String type RsyncUrl = String
@ -36,7 +37,7 @@ rsyncEscape o u
| otherwise = u | otherwise = u
rsyncUrls :: RsyncOpts -> Key -> [RsyncUrl] rsyncUrls :: RsyncOpts -> Key -> [RsyncUrl]
rsyncUrls o k = map use (annexHashes def) rsyncUrls o k = map (use . def) dirHashes
where where
use h = rsyncUrl o </> hash h </> rsyncEscape o (f </> f) use h = rsyncUrl o </> hash h </> rsyncEscape o (f </> f)
f = keyFile k f = keyFile k

1
debian/changelog vendored
View file

@ -26,6 +26,7 @@ git-annex (5.20150114) UNRELEASED; urgency=medium
http://git-annex.branchable.com/tuning/ http://git-annex.branchable.com/tuning/
* merge: Refuse to merge changes from a git-annex branch of a repo * merge: Refuse to merge changes from a git-annex branch of a repo
that has been tuned in incompatable ways. that has been tuned in incompatable ways.
* Support annex.tune.objecthash1 and annex.tune.objecthashlower.
-- Joey Hess <id@joeyh.name> Tue, 13 Jan 2015 17:03:39 -0400 -- Joey Hess <id@joeyh.name> Tue, 13 Jan 2015 17:03:39 -0400