implement annex.tune.objecthashlower
Split out Annex.DirHashes which never really belonged in Locations.
This commit is contained in:
parent
e8c376e0ad
commit
009bd050c1
6 changed files with 109 additions and 68 deletions
76
Annex/DirHashes.hs
Normal file
76
Annex/DirHashes.hs
Normal file
|
@ -0,0 +1,76 @@
|
|||
{- git-annex file locations
|
||||
-
|
||||
- Copyright 2010-2015 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
||||
module Annex.DirHashes (
|
||||
Hasher,
|
||||
HashLevels(..),
|
||||
objectHashLevels,
|
||||
dirHashes,
|
||||
hashDirMixed,
|
||||
hashDirLower,
|
||||
) where
|
||||
|
||||
import Data.Bits
|
||||
import Data.Word
|
||||
import Data.Hash.MD5
|
||||
import Data.Default
|
||||
|
||||
import Common
|
||||
import Types.Key
|
||||
import Types.GitConfig
|
||||
import Types.Difference
|
||||
|
||||
type Hasher = Key -> FilePath
|
||||
|
||||
-- Number of hash levels to use. 2 is the default.
|
||||
newtype HashLevels = HashLevels Int
|
||||
|
||||
instance Default HashLevels where
|
||||
def = HashLevels 2
|
||||
|
||||
objectHashLevels :: GitConfig -> HashLevels
|
||||
objectHashLevels config
|
||||
| hasDifference (== OneLevelObjectHash) (annexDifferences config) =
|
||||
HashLevels 1
|
||||
| otherwise = def
|
||||
|
||||
{- Two different directory hashes may be used. The mixed case hash
|
||||
- came first, and is fine, except for the problem of case-strict
|
||||
- filesystems such as Linux VFAT (mounted with shortname=mixed),
|
||||
- which do not allow using a directory "XX" when "xx" already exists.
|
||||
- To support that, most repositories use the lower case hash for new data. -}
|
||||
dirHashes :: [HashLevels -> Hasher]
|
||||
dirHashes = [hashDirLower, hashDirMixed]
|
||||
|
||||
hashDirs :: HashLevels -> Int -> String -> FilePath
|
||||
hashDirs (HashLevels 1) sz s = addTrailingPathSeparator $ take sz s
|
||||
hashDirs _ sz s = addTrailingPathSeparator $ take sz s </> drop sz s
|
||||
|
||||
hashDirMixed :: HashLevels -> Hasher
|
||||
hashDirMixed n k = hashDirs n 2 $ take 4 $ display_32bits_as_dir =<< [a,b,c,d]
|
||||
where
|
||||
ABCD (a,b,c,d) = md5 $ md5FilePath $ key2file $ nonChunkKey k
|
||||
|
||||
hashDirLower :: HashLevels -> Hasher
|
||||
hashDirLower n k = hashDirs n 3 $ take 6 $ md5s $ md5FilePath $ key2file $ nonChunkKey k
|
||||
|
||||
{- modified version of display_32bits_as_hex from Data.Hash.MD5
|
||||
- Copyright (C) 2001 Ian Lynagh
|
||||
- License: Either BSD or GPL
|
||||
-}
|
||||
display_32bits_as_dir :: Word32 -> String
|
||||
display_32bits_as_dir w = trim $ swap_pairs cs
|
||||
where
|
||||
-- Need 32 characters to use. To avoid inaverdently making
|
||||
-- a real word, use letters that appear less frequently.
|
||||
chars = ['0'..'9'] ++ "zqjxkmvwgpfZQJXKMVWGPF"
|
||||
cs = map (\x -> getc $ (shiftR w (6*x)) .&. 31) [0..7]
|
||||
getc n = chars !! fromIntegral n
|
||||
swap_pairs (x1:x2:xs) = x2:x1:swap_pairs xs
|
||||
swap_pairs _ = []
|
||||
-- Last 2 will always be 00, so omit.
|
||||
trim = take 6
|
88
Locations.hs
88
Locations.hs
|
@ -1,6 +1,6 @@
|
|||
{- git-annex file locations
|
||||
-
|
||||
- Copyright 2010-2013 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2010-2015 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
@ -20,7 +20,6 @@ module Locations (
|
|||
gitAnnexInodeSentinal,
|
||||
gitAnnexInodeSentinalCache,
|
||||
annexLocations,
|
||||
annexLocation,
|
||||
gitAnnexDir,
|
||||
gitAnnexObjectDir,
|
||||
gitAnnexTmpMiscDir,
|
||||
|
@ -59,7 +58,6 @@ module Locations (
|
|||
gitAnnexRemotesDir,
|
||||
gitAnnexAssistantDefaultDir,
|
||||
isLinkToAnnex,
|
||||
annexHashes,
|
||||
HashLevels(..),
|
||||
hashDirMixed,
|
||||
hashDirLower,
|
||||
|
@ -68,18 +66,16 @@ module Locations (
|
|||
prop_idempotent_fileKey
|
||||
) where
|
||||
|
||||
import Data.Bits
|
||||
import Data.Word
|
||||
import Data.Hash.MD5
|
||||
import Data.Char
|
||||
import Data.Default
|
||||
|
||||
import Common
|
||||
import Types
|
||||
import Types.GitConfig
|
||||
import Types.Key
|
||||
import Types.UUID
|
||||
import Types.Difference
|
||||
import qualified Git
|
||||
import Annex.DirHashes
|
||||
|
||||
{- Conventions:
|
||||
-
|
||||
|
@ -105,11 +101,15 @@ objectDir :: FilePath
|
|||
objectDir = addTrailingPathSeparator $ annexDir </> "objects"
|
||||
|
||||
{- Annexed file's possible locations relative to the .git directory.
|
||||
- There are two different possibilities, using different hashes. -}
|
||||
annexLocations :: Key -> [FilePath]
|
||||
annexLocations key = map (annexLocation key) (annexHashes def)
|
||||
annexLocation :: Key -> Hasher -> FilePath
|
||||
annexLocation key hasher = objectDir </> keyPath key hasher
|
||||
- There are two different possibilities, using different hashes.
|
||||
-
|
||||
- Also, some repositories have a Difference in hash directory depth.
|
||||
-}
|
||||
annexLocations :: GitConfig -> Key -> [FilePath]
|
||||
annexLocations config key = map (annexLocation config key) dirHashes
|
||||
|
||||
annexLocation :: GitConfig -> Key -> (HashLevels -> Hasher) -> FilePath
|
||||
annexLocation config key hasher = objectDir </> keyPath key (hasher $ objectHashLevels config)
|
||||
|
||||
{- Annexed object's location in a repository.
|
||||
-
|
||||
|
@ -138,11 +138,11 @@ gitAnnexLocation' key r config crippled
|
|||
| Git.repoIsLocalBare r
|
||||
|| crippled
|
||||
|| hasDifference (== ObjectHashLower) (annexDifferences config) =
|
||||
check $ map inrepo $ annexLocations key
|
||||
check $ map inrepo $ annexLocations config key
|
||||
{- Non-bare repositories only use hashDirMixed, so
|
||||
- don't need to do any work to check if the file is
|
||||
- present. -}
|
||||
| otherwise = return $ inrepo $ annexLocation key (hashDirMixed def)
|
||||
| otherwise = return $ inrepo $ annexLocation config key hashDirMixed
|
||||
where
|
||||
inrepo d = Git.localGitDir r </> d
|
||||
check locs@(l:_) = fromMaybe l <$> firstM doesFileExist locs
|
||||
|
@ -407,9 +407,9 @@ prop_idempotent_fileKey s
|
|||
where
|
||||
k = stubKey { keyName = s, keyBackendName = "test" }
|
||||
|
||||
{- A location to store a key on the filesystem. A directory hash is used,
|
||||
- to protect against filesystems that dislike having many items in a
|
||||
- single directory.
|
||||
{- A location to store a key on a special remote that uses a filesystem.
|
||||
- A directory hash is used, to protect against filesystems that dislike
|
||||
- having many items in a single directory.
|
||||
-
|
||||
- The file is put in a directory with the same name, this allows
|
||||
- write-protecting the directory to avoid accidental deletion of the file.
|
||||
|
@ -419,51 +419,11 @@ keyPath key hasher = hasher key </> f </> f
|
|||
where
|
||||
f = keyFile key
|
||||
|
||||
{- All possibile locations to store a key using different directory hashes. -}
|
||||
keyPaths :: Key -> [FilePath]
|
||||
keyPaths key = map (keyPath key) (annexHashes def)
|
||||
|
||||
{- Two different directory hashes may be used. The mixed case hash
|
||||
- came first, and is fine, except for the problem of case-strict
|
||||
- filesystems such as Linux VFAT (mounted with shortname=mixed),
|
||||
- which do not allow using a directory "XX" when "xx" already exists.
|
||||
- To support that, most repositories use the lower case hash for new data. -}
|
||||
annexHashes :: HashLevels -> [Hasher]
|
||||
annexHashes n = [hashDirLower n, hashDirMixed n]
|
||||
|
||||
type Hasher = Key -> FilePath
|
||||
|
||||
-- Number of hash levels to use. 2 is the default.
|
||||
newtype HashLevels = HashLevels Int
|
||||
|
||||
instance Default HashLevels where
|
||||
def = HashLevels 2
|
||||
|
||||
hashDirs :: HashLevels -> Int -> String -> FilePath
|
||||
hashDirs (HashLevels 1) sz s = addTrailingPathSeparator $ take sz s
|
||||
hashDirs _ sz s = addTrailingPathSeparator $ take sz s </> drop sz s
|
||||
|
||||
hashDirMixed :: HashLevels -> Hasher
|
||||
hashDirMixed n k = hashDirs n 2 $ take 4 $ display_32bits_as_dir =<< [a,b,c,d]
|
||||
where
|
||||
ABCD (a,b,c,d) = md5 $ md5FilePath $ key2file $ nonChunkKey k
|
||||
|
||||
hashDirLower :: HashLevels -> Hasher
|
||||
hashDirLower n k = hashDirs n 3 $ take 6 $ md5s $ md5FilePath $ key2file $ nonChunkKey k
|
||||
|
||||
{- modified version of display_32bits_as_hex from Data.Hash.MD5
|
||||
- Copyright (C) 2001 Ian Lynagh
|
||||
- License: Either BSD or GPL
|
||||
{- All possibile locations to store a key in a special remote
|
||||
- using different directory hashes.
|
||||
-
|
||||
- This is compatible with the annexLocations, for interoperability between
|
||||
- special remotes and git-annex repos.
|
||||
-}
|
||||
display_32bits_as_dir :: Word32 -> String
|
||||
display_32bits_as_dir w = trim $ swap_pairs cs
|
||||
where
|
||||
-- Need 32 characters to use. To avoid inaverdently making
|
||||
-- a real word, use letters that appear less frequently.
|
||||
chars = ['0'..'9'] ++ "zqjxkmvwgpfZQJXKMVWGPF"
|
||||
cs = map (\x -> getc $ (shiftR w (6*x)) .&. 31) [0..7]
|
||||
getc n = chars !! fromIntegral n
|
||||
swap_pairs (x1:x2:xs) = x2:x1:swap_pairs xs
|
||||
swap_pairs _ = []
|
||||
-- Last 2 will always be 00, so omit.
|
||||
trim = take 6
|
||||
keyPaths :: Key -> [FilePath]
|
||||
keyPaths key = map (keyPath key . def) dirHashes
|
||||
|
|
|
@ -327,13 +327,15 @@ keyUrls r key = map tourl locs'
|
|||
-- If the remote is known to not be bare, try the hash locations
|
||||
-- used for non-bare repos first, as an optimisation.
|
||||
locs
|
||||
| remoteAnnexBare (gitconfig r) == Just False = reverse (annexLocations key)
|
||||
| otherwise = annexLocations key
|
||||
| remoteAnnexBare remoteconfig == Just False = reverse (annexLocations cfg key)
|
||||
| otherwise = annexLocations cfg key
|
||||
#ifndef mingw32_HOST_OS
|
||||
locs' = locs
|
||||
#else
|
||||
locs' = map (replace "\\" "/") locs
|
||||
#endif
|
||||
remoteconfig = gitconfig r
|
||||
cfg = fromJust $ remoteGitConfig remoteconfig
|
||||
|
||||
dropKey :: Remote -> Key -> Annex Bool
|
||||
dropKey r key
|
||||
|
|
|
@ -37,6 +37,7 @@ import Annex.Perms
|
|||
import Logs.Transfer
|
||||
import Types.Creds
|
||||
import Types.Key (isChunkKey)
|
||||
import Annex.DirHashes
|
||||
|
||||
import qualified Data.Map as M
|
||||
|
||||
|
@ -212,7 +213,7 @@ remove o k = do
|
|||
- content could be. Note that the parent directories have
|
||||
- to also be explicitly included, due to how rsync
|
||||
- traverses directories. -}
|
||||
includes = concatMap use (annexHashes def)
|
||||
includes = concatMap (use .def) dirHashes
|
||||
use h = let dir = h k in
|
||||
[ parentDir dir
|
||||
, dir
|
||||
|
|
|
@ -19,6 +19,7 @@ import System.FilePath.Posix
|
|||
#ifdef mingw32_HOST_OS
|
||||
import Data.String.Utils
|
||||
#endif
|
||||
import Annex.DirHashes
|
||||
|
||||
type RsyncUrl = String
|
||||
|
||||
|
@ -36,7 +37,7 @@ rsyncEscape o u
|
|||
| otherwise = u
|
||||
|
||||
rsyncUrls :: RsyncOpts -> Key -> [RsyncUrl]
|
||||
rsyncUrls o k = map use (annexHashes def)
|
||||
rsyncUrls o k = map (use . def) dirHashes
|
||||
where
|
||||
use h = rsyncUrl o </> hash h </> rsyncEscape o (f </> f)
|
||||
f = keyFile k
|
||||
|
|
1
debian/changelog
vendored
1
debian/changelog
vendored
|
@ -26,6 +26,7 @@ git-annex (5.20150114) UNRELEASED; urgency=medium
|
|||
http://git-annex.branchable.com/tuning/
|
||||
* merge: Refuse to merge changes from a git-annex branch of a repo
|
||||
that has been tuned in incompatable ways.
|
||||
* Support annex.tune.objecthash1 and annex.tune.objecthashlower.
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Tue, 13 Jan 2015 17:03:39 -0400
|
||||
|
||||
|
|
Loading…
Add table
Reference in a new issue