implement annex.tune.objecthashlower
Split out Annex.DirHashes which never really belonged in Locations.
This commit is contained in:
parent
e8c376e0ad
commit
009bd050c1
6 changed files with 109 additions and 68 deletions
76
Annex/DirHashes.hs
Normal file
76
Annex/DirHashes.hs
Normal file
|
@ -0,0 +1,76 @@
|
||||||
|
{- git-annex file locations
|
||||||
|
-
|
||||||
|
- Copyright 2010-2015 Joey Hess <id@joeyh.name>
|
||||||
|
-
|
||||||
|
- Licensed under the GNU GPL version 3 or higher.
|
||||||
|
-}
|
||||||
|
|
||||||
|
module Annex.DirHashes (
|
||||||
|
Hasher,
|
||||||
|
HashLevels(..),
|
||||||
|
objectHashLevels,
|
||||||
|
dirHashes,
|
||||||
|
hashDirMixed,
|
||||||
|
hashDirLower,
|
||||||
|
) where
|
||||||
|
|
||||||
|
import Data.Bits
|
||||||
|
import Data.Word
|
||||||
|
import Data.Hash.MD5
|
||||||
|
import Data.Default
|
||||||
|
|
||||||
|
import Common
|
||||||
|
import Types.Key
|
||||||
|
import Types.GitConfig
|
||||||
|
import Types.Difference
|
||||||
|
|
||||||
|
type Hasher = Key -> FilePath
|
||||||
|
|
||||||
|
-- Number of hash levels to use. 2 is the default.
|
||||||
|
newtype HashLevels = HashLevels Int
|
||||||
|
|
||||||
|
instance Default HashLevels where
|
||||||
|
def = HashLevels 2
|
||||||
|
|
||||||
|
objectHashLevels :: GitConfig -> HashLevels
|
||||||
|
objectHashLevels config
|
||||||
|
| hasDifference (== OneLevelObjectHash) (annexDifferences config) =
|
||||||
|
HashLevels 1
|
||||||
|
| otherwise = def
|
||||||
|
|
||||||
|
{- Two different directory hashes may be used. The mixed case hash
|
||||||
|
- came first, and is fine, except for the problem of case-strict
|
||||||
|
- filesystems such as Linux VFAT (mounted with shortname=mixed),
|
||||||
|
- which do not allow using a directory "XX" when "xx" already exists.
|
||||||
|
- To support that, most repositories use the lower case hash for new data. -}
|
||||||
|
dirHashes :: [HashLevels -> Hasher]
|
||||||
|
dirHashes = [hashDirLower, hashDirMixed]
|
||||||
|
|
||||||
|
hashDirs :: HashLevels -> Int -> String -> FilePath
|
||||||
|
hashDirs (HashLevels 1) sz s = addTrailingPathSeparator $ take sz s
|
||||||
|
hashDirs _ sz s = addTrailingPathSeparator $ take sz s </> drop sz s
|
||||||
|
|
||||||
|
hashDirMixed :: HashLevels -> Hasher
|
||||||
|
hashDirMixed n k = hashDirs n 2 $ take 4 $ display_32bits_as_dir =<< [a,b,c,d]
|
||||||
|
where
|
||||||
|
ABCD (a,b,c,d) = md5 $ md5FilePath $ key2file $ nonChunkKey k
|
||||||
|
|
||||||
|
hashDirLower :: HashLevels -> Hasher
|
||||||
|
hashDirLower n k = hashDirs n 3 $ take 6 $ md5s $ md5FilePath $ key2file $ nonChunkKey k
|
||||||
|
|
||||||
|
{- modified version of display_32bits_as_hex from Data.Hash.MD5
|
||||||
|
- Copyright (C) 2001 Ian Lynagh
|
||||||
|
- License: Either BSD or GPL
|
||||||
|
-}
|
||||||
|
display_32bits_as_dir :: Word32 -> String
|
||||||
|
display_32bits_as_dir w = trim $ swap_pairs cs
|
||||||
|
where
|
||||||
|
-- Need 32 characters to use. To avoid inaverdently making
|
||||||
|
-- a real word, use letters that appear less frequently.
|
||||||
|
chars = ['0'..'9'] ++ "zqjxkmvwgpfZQJXKMVWGPF"
|
||||||
|
cs = map (\x -> getc $ (shiftR w (6*x)) .&. 31) [0..7]
|
||||||
|
getc n = chars !! fromIntegral n
|
||||||
|
swap_pairs (x1:x2:xs) = x2:x1:swap_pairs xs
|
||||||
|
swap_pairs _ = []
|
||||||
|
-- Last 2 will always be 00, so omit.
|
||||||
|
trim = take 6
|
88
Locations.hs
88
Locations.hs
|
@ -1,6 +1,6 @@
|
||||||
{- git-annex file locations
|
{- git-annex file locations
|
||||||
-
|
-
|
||||||
- Copyright 2010-2013 Joey Hess <id@joeyh.name>
|
- Copyright 2010-2015 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU GPL version 3 or higher.
|
- Licensed under the GNU GPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
@ -20,7 +20,6 @@ module Locations (
|
||||||
gitAnnexInodeSentinal,
|
gitAnnexInodeSentinal,
|
||||||
gitAnnexInodeSentinalCache,
|
gitAnnexInodeSentinalCache,
|
||||||
annexLocations,
|
annexLocations,
|
||||||
annexLocation,
|
|
||||||
gitAnnexDir,
|
gitAnnexDir,
|
||||||
gitAnnexObjectDir,
|
gitAnnexObjectDir,
|
||||||
gitAnnexTmpMiscDir,
|
gitAnnexTmpMiscDir,
|
||||||
|
@ -59,7 +58,6 @@ module Locations (
|
||||||
gitAnnexRemotesDir,
|
gitAnnexRemotesDir,
|
||||||
gitAnnexAssistantDefaultDir,
|
gitAnnexAssistantDefaultDir,
|
||||||
isLinkToAnnex,
|
isLinkToAnnex,
|
||||||
annexHashes,
|
|
||||||
HashLevels(..),
|
HashLevels(..),
|
||||||
hashDirMixed,
|
hashDirMixed,
|
||||||
hashDirLower,
|
hashDirLower,
|
||||||
|
@ -68,18 +66,16 @@ module Locations (
|
||||||
prop_idempotent_fileKey
|
prop_idempotent_fileKey
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Data.Bits
|
|
||||||
import Data.Word
|
|
||||||
import Data.Hash.MD5
|
|
||||||
import Data.Char
|
import Data.Char
|
||||||
import Data.Default
|
import Data.Default
|
||||||
|
|
||||||
import Common
|
import Common
|
||||||
import Types
|
import Types.GitConfig
|
||||||
import Types.Key
|
import Types.Key
|
||||||
import Types.UUID
|
import Types.UUID
|
||||||
import Types.Difference
|
import Types.Difference
|
||||||
import qualified Git
|
import qualified Git
|
||||||
|
import Annex.DirHashes
|
||||||
|
|
||||||
{- Conventions:
|
{- Conventions:
|
||||||
-
|
-
|
||||||
|
@ -105,11 +101,15 @@ objectDir :: FilePath
|
||||||
objectDir = addTrailingPathSeparator $ annexDir </> "objects"
|
objectDir = addTrailingPathSeparator $ annexDir </> "objects"
|
||||||
|
|
||||||
{- Annexed file's possible locations relative to the .git directory.
|
{- Annexed file's possible locations relative to the .git directory.
|
||||||
- There are two different possibilities, using different hashes. -}
|
- There are two different possibilities, using different hashes.
|
||||||
annexLocations :: Key -> [FilePath]
|
-
|
||||||
annexLocations key = map (annexLocation key) (annexHashes def)
|
- Also, some repositories have a Difference in hash directory depth.
|
||||||
annexLocation :: Key -> Hasher -> FilePath
|
-}
|
||||||
annexLocation key hasher = objectDir </> keyPath key hasher
|
annexLocations :: GitConfig -> Key -> [FilePath]
|
||||||
|
annexLocations config key = map (annexLocation config key) dirHashes
|
||||||
|
|
||||||
|
annexLocation :: GitConfig -> Key -> (HashLevels -> Hasher) -> FilePath
|
||||||
|
annexLocation config key hasher = objectDir </> keyPath key (hasher $ objectHashLevels config)
|
||||||
|
|
||||||
{- Annexed object's location in a repository.
|
{- Annexed object's location in a repository.
|
||||||
-
|
-
|
||||||
|
@ -138,11 +138,11 @@ gitAnnexLocation' key r config crippled
|
||||||
| Git.repoIsLocalBare r
|
| Git.repoIsLocalBare r
|
||||||
|| crippled
|
|| crippled
|
||||||
|| hasDifference (== ObjectHashLower) (annexDifferences config) =
|
|| hasDifference (== ObjectHashLower) (annexDifferences config) =
|
||||||
check $ map inrepo $ annexLocations key
|
check $ map inrepo $ annexLocations config key
|
||||||
{- Non-bare repositories only use hashDirMixed, so
|
{- Non-bare repositories only use hashDirMixed, so
|
||||||
- don't need to do any work to check if the file is
|
- don't need to do any work to check if the file is
|
||||||
- present. -}
|
- present. -}
|
||||||
| otherwise = return $ inrepo $ annexLocation key (hashDirMixed def)
|
| otherwise = return $ inrepo $ annexLocation config key hashDirMixed
|
||||||
where
|
where
|
||||||
inrepo d = Git.localGitDir r </> d
|
inrepo d = Git.localGitDir r </> d
|
||||||
check locs@(l:_) = fromMaybe l <$> firstM doesFileExist locs
|
check locs@(l:_) = fromMaybe l <$> firstM doesFileExist locs
|
||||||
|
@ -407,9 +407,9 @@ prop_idempotent_fileKey s
|
||||||
where
|
where
|
||||||
k = stubKey { keyName = s, keyBackendName = "test" }
|
k = stubKey { keyName = s, keyBackendName = "test" }
|
||||||
|
|
||||||
{- A location to store a key on the filesystem. A directory hash is used,
|
{- A location to store a key on a special remote that uses a filesystem.
|
||||||
- to protect against filesystems that dislike having many items in a
|
- A directory hash is used, to protect against filesystems that dislike
|
||||||
- single directory.
|
- having many items in a single directory.
|
||||||
-
|
-
|
||||||
- The file is put in a directory with the same name, this allows
|
- The file is put in a directory with the same name, this allows
|
||||||
- write-protecting the directory to avoid accidental deletion of the file.
|
- write-protecting the directory to avoid accidental deletion of the file.
|
||||||
|
@ -419,51 +419,11 @@ keyPath key hasher = hasher key </> f </> f
|
||||||
where
|
where
|
||||||
f = keyFile key
|
f = keyFile key
|
||||||
|
|
||||||
{- All possibile locations to store a key using different directory hashes. -}
|
{- All possibile locations to store a key in a special remote
|
||||||
keyPaths :: Key -> [FilePath]
|
- using different directory hashes.
|
||||||
keyPaths key = map (keyPath key) (annexHashes def)
|
-
|
||||||
|
- This is compatible with the annexLocations, for interoperability between
|
||||||
{- Two different directory hashes may be used. The mixed case hash
|
- special remotes and git-annex repos.
|
||||||
- came first, and is fine, except for the problem of case-strict
|
|
||||||
- filesystems such as Linux VFAT (mounted with shortname=mixed),
|
|
||||||
- which do not allow using a directory "XX" when "xx" already exists.
|
|
||||||
- To support that, most repositories use the lower case hash for new data. -}
|
|
||||||
annexHashes :: HashLevels -> [Hasher]
|
|
||||||
annexHashes n = [hashDirLower n, hashDirMixed n]
|
|
||||||
|
|
||||||
type Hasher = Key -> FilePath
|
|
||||||
|
|
||||||
-- Number of hash levels to use. 2 is the default.
|
|
||||||
newtype HashLevels = HashLevels Int
|
|
||||||
|
|
||||||
instance Default HashLevels where
|
|
||||||
def = HashLevels 2
|
|
||||||
|
|
||||||
hashDirs :: HashLevels -> Int -> String -> FilePath
|
|
||||||
hashDirs (HashLevels 1) sz s = addTrailingPathSeparator $ take sz s
|
|
||||||
hashDirs _ sz s = addTrailingPathSeparator $ take sz s </> drop sz s
|
|
||||||
|
|
||||||
hashDirMixed :: HashLevels -> Hasher
|
|
||||||
hashDirMixed n k = hashDirs n 2 $ take 4 $ display_32bits_as_dir =<< [a,b,c,d]
|
|
||||||
where
|
|
||||||
ABCD (a,b,c,d) = md5 $ md5FilePath $ key2file $ nonChunkKey k
|
|
||||||
|
|
||||||
hashDirLower :: HashLevels -> Hasher
|
|
||||||
hashDirLower n k = hashDirs n 3 $ take 6 $ md5s $ md5FilePath $ key2file $ nonChunkKey k
|
|
||||||
|
|
||||||
{- modified version of display_32bits_as_hex from Data.Hash.MD5
|
|
||||||
- Copyright (C) 2001 Ian Lynagh
|
|
||||||
- License: Either BSD or GPL
|
|
||||||
-}
|
-}
|
||||||
display_32bits_as_dir :: Word32 -> String
|
keyPaths :: Key -> [FilePath]
|
||||||
display_32bits_as_dir w = trim $ swap_pairs cs
|
keyPaths key = map (keyPath key . def) dirHashes
|
||||||
where
|
|
||||||
-- Need 32 characters to use. To avoid inaverdently making
|
|
||||||
-- a real word, use letters that appear less frequently.
|
|
||||||
chars = ['0'..'9'] ++ "zqjxkmvwgpfZQJXKMVWGPF"
|
|
||||||
cs = map (\x -> getc $ (shiftR w (6*x)) .&. 31) [0..7]
|
|
||||||
getc n = chars !! fromIntegral n
|
|
||||||
swap_pairs (x1:x2:xs) = x2:x1:swap_pairs xs
|
|
||||||
swap_pairs _ = []
|
|
||||||
-- Last 2 will always be 00, so omit.
|
|
||||||
trim = take 6
|
|
||||||
|
|
|
@ -327,13 +327,15 @@ keyUrls r key = map tourl locs'
|
||||||
-- If the remote is known to not be bare, try the hash locations
|
-- If the remote is known to not be bare, try the hash locations
|
||||||
-- used for non-bare repos first, as an optimisation.
|
-- used for non-bare repos first, as an optimisation.
|
||||||
locs
|
locs
|
||||||
| remoteAnnexBare (gitconfig r) == Just False = reverse (annexLocations key)
|
| remoteAnnexBare remoteconfig == Just False = reverse (annexLocations cfg key)
|
||||||
| otherwise = annexLocations key
|
| otherwise = annexLocations cfg key
|
||||||
#ifndef mingw32_HOST_OS
|
#ifndef mingw32_HOST_OS
|
||||||
locs' = locs
|
locs' = locs
|
||||||
#else
|
#else
|
||||||
locs' = map (replace "\\" "/") locs
|
locs' = map (replace "\\" "/") locs
|
||||||
#endif
|
#endif
|
||||||
|
remoteconfig = gitconfig r
|
||||||
|
cfg = fromJust $ remoteGitConfig remoteconfig
|
||||||
|
|
||||||
dropKey :: Remote -> Key -> Annex Bool
|
dropKey :: Remote -> Key -> Annex Bool
|
||||||
dropKey r key
|
dropKey r key
|
||||||
|
|
|
@ -37,6 +37,7 @@ import Annex.Perms
|
||||||
import Logs.Transfer
|
import Logs.Transfer
|
||||||
import Types.Creds
|
import Types.Creds
|
||||||
import Types.Key (isChunkKey)
|
import Types.Key (isChunkKey)
|
||||||
|
import Annex.DirHashes
|
||||||
|
|
||||||
import qualified Data.Map as M
|
import qualified Data.Map as M
|
||||||
|
|
||||||
|
@ -212,7 +213,7 @@ remove o k = do
|
||||||
- content could be. Note that the parent directories have
|
- content could be. Note that the parent directories have
|
||||||
- to also be explicitly included, due to how rsync
|
- to also be explicitly included, due to how rsync
|
||||||
- traverses directories. -}
|
- traverses directories. -}
|
||||||
includes = concatMap use (annexHashes def)
|
includes = concatMap (use .def) dirHashes
|
||||||
use h = let dir = h k in
|
use h = let dir = h k in
|
||||||
[ parentDir dir
|
[ parentDir dir
|
||||||
, dir
|
, dir
|
||||||
|
|
|
@ -19,6 +19,7 @@ import System.FilePath.Posix
|
||||||
#ifdef mingw32_HOST_OS
|
#ifdef mingw32_HOST_OS
|
||||||
import Data.String.Utils
|
import Data.String.Utils
|
||||||
#endif
|
#endif
|
||||||
|
import Annex.DirHashes
|
||||||
|
|
||||||
type RsyncUrl = String
|
type RsyncUrl = String
|
||||||
|
|
||||||
|
@ -36,7 +37,7 @@ rsyncEscape o u
|
||||||
| otherwise = u
|
| otherwise = u
|
||||||
|
|
||||||
rsyncUrls :: RsyncOpts -> Key -> [RsyncUrl]
|
rsyncUrls :: RsyncOpts -> Key -> [RsyncUrl]
|
||||||
rsyncUrls o k = map use (annexHashes def)
|
rsyncUrls o k = map (use . def) dirHashes
|
||||||
where
|
where
|
||||||
use h = rsyncUrl o </> hash h </> rsyncEscape o (f </> f)
|
use h = rsyncUrl o </> hash h </> rsyncEscape o (f </> f)
|
||||||
f = keyFile k
|
f = keyFile k
|
||||||
|
|
1
debian/changelog
vendored
1
debian/changelog
vendored
|
@ -26,6 +26,7 @@ git-annex (5.20150114) UNRELEASED; urgency=medium
|
||||||
http://git-annex.branchable.com/tuning/
|
http://git-annex.branchable.com/tuning/
|
||||||
* merge: Refuse to merge changes from a git-annex branch of a repo
|
* merge: Refuse to merge changes from a git-annex branch of a repo
|
||||||
that has been tuned in incompatable ways.
|
that has been tuned in incompatable ways.
|
||||||
|
* Support annex.tune.objecthash1 and annex.tune.objecthashlower.
|
||||||
|
|
||||||
-- Joey Hess <id@joeyh.name> Tue, 13 Jan 2015 17:03:39 -0400
|
-- Joey Hess <id@joeyh.name> Tue, 13 Jan 2015 17:03:39 -0400
|
||||||
|
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue