implement annex.tune.branchhash1

I hope this doesn't impact speed much -- it does have to pull out a value
from Annex state every time it accesses the branch now.

The test case I dropped has never caught any problems that I can remember,
and would have been rather difficult to convert.
This commit is contained in:
Joey Hess 2015-01-28 17:17:26 -04:00
parent 009bd050c1
commit b0575c621f
11 changed files with 78 additions and 64 deletions

View file

@ -9,6 +9,8 @@ module Annex.DirHashes (
Hasher, Hasher,
HashLevels(..), HashLevels(..),
objectHashLevels, objectHashLevels,
branchHashLevels,
branchHashDir,
dirHashes, dirHashes,
hashDirMixed, hashDirMixed,
hashDirLower, hashDirLower,
@ -33,11 +35,19 @@ instance Default HashLevels where
def = HashLevels 2 def = HashLevels 2
objectHashLevels :: GitConfig -> HashLevels objectHashLevels :: GitConfig -> HashLevels
objectHashLevels config objectHashLevels = configHashLevels OneLevelObjectHash
| hasDifference (== OneLevelObjectHash) (annexDifferences config) =
HashLevels 1 branchHashLevels :: GitConfig -> HashLevels
branchHashLevels = configHashLevels OneLevelBranchHash
configHashLevels :: Difference -> GitConfig -> HashLevels
configHashLevels d config
| hasDifference (== d) (annexDifferences config) = HashLevels 1
| otherwise = def | otherwise = def
branchHashDir :: GitConfig -> Key -> String
branchHashDir config key = hashDirLower (branchHashLevels config) key
{- Two different directory hashes may be used. The mixed case hash {- Two different directory hashes may be used. The mixed case hash
- came first, and is fine, except for the problem of case-strict - came first, and is fine, except for the problem of case-strict
- filesystems such as Linux VFAT (mounted with shortname=mixed), - filesystems such as Linux VFAT (mounted with shortname=mixed),

View file

@ -141,7 +141,8 @@ getLog :: Key -> [CommandParam] -> Annex [String]
getLog key os = do getLog key os = do
top <- fromRepo Git.repoPath top <- fromRepo Git.repoPath
p <- liftIO $ relPathCwdToFile top p <- liftIO $ relPathCwdToFile top
let logfile = p </> locationLogFile key config <- Annex.getGitConfig
let logfile = p </> locationLogFile config key
inRepo $ pipeNullSplitZombie $ inRepo $ pipeNullSplitZombie $
[ Params "log -z --pretty=format:%ct --raw --abbrev=40" [ Params "log -z --pretty=format:%ct --raw --abbrev=40"
, Param "--remove-empty" , Param "--remove-empty"

56
Logs.hs
View file

@ -9,6 +9,7 @@ module Logs where
import Common.Annex import Common.Annex
import Types.Key import Types.Key
import Annex.DirHashes
{- There are several varieties of log file formats. -} {- There are several varieties of log file formats. -}
data LogVariety data LogVariety
@ -87,8 +88,8 @@ differenceLog :: FilePath
differenceLog = "difference.log" differenceLog = "difference.log"
{- The pathname of the location log file for a given key. -} {- The pathname of the location log file for a given key. -}
locationLogFile :: Key -> String locationLogFile :: GitConfig -> Key -> String
locationLogFile key = hashDirLower def key ++ keyFile key ++ ".log" locationLogFile config key = branchHashDir config key </> keyFile key ++ ".log"
{- Converts a pathname into a key if it's a location log. -} {- Converts a pathname into a key if it's a location log. -}
locationLogFileKey :: FilePath -> Maybe Key locationLogFileKey :: FilePath -> Maybe Key
@ -101,15 +102,17 @@ locationLogFileKey path
(base, ext) = splitAt (length file - 4) file (base, ext) = splitAt (length file - 4) file
{- The filename of the url log for a given key. -} {- The filename of the url log for a given key. -}
urlLogFile :: Key -> FilePath urlLogFile :: GitConfig -> Key -> FilePath
urlLogFile key = hashDirLower def key </> keyFile key ++ urlLogExt urlLogFile config key = branchHashDir config key </> keyFile key ++ urlLogExt
{- Old versions stored the urls elsewhere. -} {- Old versions stored the urls elsewhere. -}
oldurlLogs :: Key -> [FilePath] oldurlLogs :: GitConfig -> Key -> [FilePath]
oldurlLogs key = oldurlLogs config key =
[ "remote/web" </> hashDirLower def key </> key2file key ++ ".log" [ "remote/web" </> hdir </> key2file key ++ ".log"
, "remote/web" </> hashDirLower def key </> keyFile key ++ ".log" , "remote/web" </> hdir </> keyFile key ++ ".log"
] ]
where
hdir = branchHashDir config key
urlLogExt :: String urlLogExt :: String
urlLogExt = ".log.web" urlLogExt = ".log.web"
@ -130,8 +133,9 @@ isUrlLog :: FilePath -> Bool
isUrlLog file = urlLogExt `isSuffixOf` file isUrlLog file = urlLogExt `isSuffixOf` file
{- The filename of the remote state log for a given key. -} {- The filename of the remote state log for a given key. -}
remoteStateLogFile :: Key -> FilePath remoteStateLogFile :: GitConfig -> Key -> FilePath
remoteStateLogFile key = hashDirLower def key </> keyFile key ++ remoteStateLogExt remoteStateLogFile config key = branchHashDir config key
</> keyFile key ++ remoteStateLogExt
remoteStateLogExt :: String remoteStateLogExt :: String
remoteStateLogExt = ".log.rmt" remoteStateLogExt = ".log.rmt"
@ -140,8 +144,8 @@ isRemoteStateLog :: FilePath -> Bool
isRemoteStateLog path = remoteStateLogExt `isSuffixOf` path isRemoteStateLog path = remoteStateLogExt `isSuffixOf` path
{- The filename of the chunk log for a given key. -} {- The filename of the chunk log for a given key. -}
chunkLogFile :: Key -> FilePath chunkLogFile :: GitConfig -> Key -> FilePath
chunkLogFile key = hashDirLower def key </> keyFile key ++ chunkLogExt chunkLogFile config key = branchHashDir config key </> keyFile key ++ chunkLogExt
chunkLogFileKey :: FilePath -> Maybe Key chunkLogFileKey :: FilePath -> Maybe Key
chunkLogFileKey path chunkLogFileKey path
@ -159,35 +163,11 @@ isChunkLog :: FilePath -> Bool
isChunkLog path = chunkLogExt `isSuffixOf` path isChunkLog path = chunkLogExt `isSuffixOf` path
{- The filename of the metadata log for a given key. -} {- The filename of the metadata log for a given key. -}
metaDataLogFile :: Key -> FilePath metaDataLogFile :: GitConfig -> Key -> FilePath
metaDataLogFile key = hashDirLower def key </> keyFile key ++ metaDataLogExt metaDataLogFile config key = branchHashDir config key </> keyFile key ++ metaDataLogExt
metaDataLogExt :: String metaDataLogExt :: String
metaDataLogExt = ".log.met" metaDataLogExt = ".log.met"
isMetaDataLog :: FilePath -> Bool isMetaDataLog :: FilePath -> Bool
isMetaDataLog path = metaDataLogExt `isSuffixOf` path isMetaDataLog path = metaDataLogExt `isSuffixOf` path
prop_logs_sane :: Key -> Bool
prop_logs_sane dummykey = and
[ isNothing (getLogVariety "unknown")
, expect gotUUIDBasedLog (getLogVariety uuidLog)
, expect gotPresenceLog (getLogVariety $ locationLogFile dummykey)
, expect gotPresenceLog (getLogVariety $ urlLogFile dummykey)
, expect gotNewUUIDBasedLog (getLogVariety $ remoteStateLogFile dummykey)
, expect gotChunkLog (getLogVariety $ chunkLogFile dummykey)
, expect gotOtherLog (getLogVariety $ metaDataLogFile dummykey)
, expect gotOtherLog (getLogVariety numcopiesLog)
]
where
expect = maybe False
gotUUIDBasedLog UUIDBasedLog = True
gotUUIDBasedLog _ = False
gotNewUUIDBasedLog NewUUIDBasedLog = True
gotNewUUIDBasedLog _ = False
gotChunkLog (ChunkLog k) = k == dummykey
gotChunkLog _ = False
gotPresenceLog (PresenceLog k) = k == dummykey
gotPresenceLog _ = False
gotOtherLog OtherLog = True
gotOtherLog _ = False

View file

@ -29,6 +29,7 @@ import Logs
import Logs.MapLog import Logs.MapLog
import qualified Annex.Branch import qualified Annex.Branch
import Logs.Chunk.Pure import Logs.Chunk.Pure
import qualified Annex
import qualified Data.Map as M import qualified Data.Map as M
import Data.Time.Clock.POSIX import Data.Time.Clock.POSIX
@ -36,14 +37,17 @@ import Data.Time.Clock.POSIX
chunksStored :: UUID -> Key -> ChunkMethod -> ChunkCount -> Annex () chunksStored :: UUID -> Key -> ChunkMethod -> ChunkCount -> Annex ()
chunksStored u k chunkmethod chunkcount = do chunksStored u k chunkmethod chunkcount = do
ts <- liftIO getPOSIXTime ts <- liftIO getPOSIXTime
Annex.Branch.change (chunkLogFile k) $ config <- Annex.getGitConfig
Annex.Branch.change (chunkLogFile config k) $
showLog . changeMapLog ts (u, chunkmethod) chunkcount . parseLog showLog . changeMapLog ts (u, chunkmethod) chunkcount . parseLog
chunksRemoved :: UUID -> Key -> ChunkMethod -> Annex () chunksRemoved :: UUID -> Key -> ChunkMethod -> Annex ()
chunksRemoved u k chunkmethod = chunksStored u k chunkmethod 0 chunksRemoved u k chunkmethod = chunksStored u k chunkmethod 0
getCurrentChunks :: UUID -> Key -> Annex [(ChunkMethod, ChunkCount)] getCurrentChunks :: UUID -> Key -> Annex [(ChunkMethod, ChunkCount)]
getCurrentChunks u k = select . parseLog <$> Annex.Branch.get (chunkLogFile k) getCurrentChunks u k = do
config <- Annex.getGitConfig
select . parseLog <$> Annex.Branch.get (chunkLogFile config k)
where where
select = filter (\(_m, ct) -> ct > 0) select = filter (\(_m, ct) -> ct > 0)
. map (\((_ku, m), l) -> (m, value l)) . map (\((_ku, m), l) -> (m, value l))

View file

@ -29,6 +29,7 @@ import Logs
import Logs.Presence import Logs.Presence
import Annex.UUID import Annex.UUID
import Git.Types (RefDate) import Git.Types (RefDate)
import qualified Annex
{- Log a change in the presence of a key's value in current repository. -} {- Log a change in the presence of a key's value in current repository. -}
logStatus :: Key -> LogStatus -> Annex () logStatus :: Key -> LogStatus -> Annex ()
@ -38,7 +39,9 @@ logStatus key s = do
{- Log a change in the presence of a key's value in a repository. -} {- Log a change in the presence of a key's value in a repository. -}
logChange :: Key -> UUID -> LogStatus -> Annex () logChange :: Key -> UUID -> LogStatus -> Annex ()
logChange key (UUID u) s = addLog (locationLogFile key) =<< logNow s u logChange key (UUID u) s = do
config <- Annex.getGitConfig
addLog (locationLogFile config key) =<< logNow s u
logChange _ NoUUID _ = noop logChange _ NoUUID _ = noop
{- Returns a list of repository UUIDs that, according to the log, have {- Returns a list of repository UUIDs that, according to the log, have
@ -51,7 +54,9 @@ loggedLocationsHistorical :: RefDate -> Key -> Annex [UUID]
loggedLocationsHistorical = getLoggedLocations . historicalLog loggedLocationsHistorical = getLoggedLocations . historicalLog
getLoggedLocations :: (FilePath -> Annex [String]) -> Key -> Annex [UUID] getLoggedLocations :: (FilePath -> Annex [String]) -> Key -> Annex [UUID]
getLoggedLocations getter key = map toUUID <$> (getter . locationLogFile) key getLoggedLocations getter key = do
config <- Annex.getGitConfig
map toUUID <$> (getter . locationLogFile config) key
{- Finds all keys that have location log information. {- Finds all keys that have location log information.
- (There may be duplicate keys in the list.) -} - (There may be duplicate keys in the list.) -}

View file

@ -38,6 +38,7 @@ import Common.Annex
import Types.MetaData import Types.MetaData
import Annex.MetaData.StandardFields import Annex.MetaData.StandardFields
import qualified Annex.Branch import qualified Annex.Branch
import qualified Annex
import Logs import Logs
import Logs.SingleValue import Logs.SingleValue
@ -52,7 +53,9 @@ instance SingleValueSerializable MetaData where
deserialize = Types.MetaData.deserialize deserialize = Types.MetaData.deserialize
getMetaDataLog :: Key -> Annex (Log MetaData) getMetaDataLog :: Key -> Annex (Log MetaData)
getMetaDataLog = readLog . metaDataLogFile getMetaDataLog key = do
config <- Annex.getGitConfig
readLog $ metaDataLogFile config key
{- Go through the log from oldest to newest, and combine it all {- Go through the log from oldest to newest, and combine it all
- into a single MetaData representing the current state. - into a single MetaData representing the current state.
@ -97,10 +100,12 @@ addMetaData k metadata = addMetaData' k metadata =<< liftIO getPOSIXTime
addMetaData' :: Key -> MetaData -> POSIXTime -> Annex () addMetaData' :: Key -> MetaData -> POSIXTime -> Annex ()
addMetaData' k d@(MetaData m) now addMetaData' k d@(MetaData m) now
| d == emptyMetaData = noop | d == emptyMetaData = noop
| otherwise = Annex.Branch.change (metaDataLogFile k) $ | otherwise = do
showLog . simplifyLog config <- Annex.getGitConfig
. S.insert (LogEntry now metadata) Annex.Branch.change (metaDataLogFile config k) $
. parseLog showLog . simplifyLog
. S.insert (LogEntry now metadata)
. parseLog
where where
metadata = MetaData $ M.filterWithKey (\f _ -> not (isLastChangedField f)) m metadata = MetaData $ M.filterWithKey (\f _ -> not (isLastChangedField f)) m
@ -181,6 +186,7 @@ copyMetaData oldkey newkey
| oldkey == newkey = noop | oldkey == newkey = noop
| otherwise = do | otherwise = do
l <- getMetaDataLog oldkey l <- getMetaDataLog oldkey
unless (S.null l) $ unless (S.null l) $ do
Annex.Branch.change (metaDataLogFile newkey) $ config <- Annex.getGitConfig
Annex.Branch.change (metaDataLogFile config newkey) $
const $ showLog l const $ showLog l

View file

@ -14,6 +14,7 @@ import Common.Annex
import Logs import Logs
import Logs.UUIDBased import Logs.UUIDBased
import qualified Annex.Branch import qualified Annex.Branch
import qualified Annex
import qualified Data.Map as M import qualified Data.Map as M
import Data.Time.Clock.POSIX import Data.Time.Clock.POSIX
@ -23,11 +24,14 @@ type RemoteState = String
setRemoteState :: UUID -> Key -> RemoteState -> Annex () setRemoteState :: UUID -> Key -> RemoteState -> Annex ()
setRemoteState u k s = do setRemoteState u k s = do
ts <- liftIO getPOSIXTime ts <- liftIO getPOSIXTime
Annex.Branch.change (remoteStateLogFile k) $ config <- Annex.getGitConfig
Annex.Branch.change (remoteStateLogFile config k) $
showLogNew id . changeLog ts u s . parseLogNew Just showLogNew id . changeLog ts u s . parseLogNew Just
getRemoteState :: UUID -> Key -> Annex (Maybe RemoteState) getRemoteState :: UUID -> Key -> Annex (Maybe RemoteState)
getRemoteState u k = extract . parseLogNew Just getRemoteState u k = do
<$> Annex.Branch.get (remoteStateLogFile k) config <- Annex.getGitConfig
extract . parseLogNew Just
<$> Annex.Branch.get (remoteStateLogFile config k)
where where
extract m = value <$> M.lookup u m extract m = value <$> M.lookup u m

View file

@ -37,7 +37,8 @@ import Utility.Url
{- Gets all urls that a key might be available from. -} {- Gets all urls that a key might be available from. -}
getUrls :: Key -> Annex [URLString] getUrls :: Key -> Annex [URLString]
getUrls key = do getUrls key = do
l <- go $ urlLogFile key : oldurlLogs key config <- Annex.getGitConfig
l <- go $ urlLogFile config key : oldurlLogs config key
tmpl <- Annex.getState (maybeToList . M.lookup key . Annex.tempurls) tmpl <- Annex.getState (maybeToList . M.lookup key . Annex.tempurls)
return (tmpl ++ l) return (tmpl ++ l)
where where
@ -54,13 +55,15 @@ getUrlsWithPrefix key prefix = filter (prefix `isPrefixOf`) <$> getUrls key
setUrlPresent :: UUID -> Key -> URLString -> Annex () setUrlPresent :: UUID -> Key -> URLString -> Annex ()
setUrlPresent uuid key url = do setUrlPresent uuid key url = do
us <- getUrls key us <- getUrls key
unless (url `elem` us) $ unless (url `elem` us) $ do
addLog (urlLogFile key) =<< logNow InfoPresent url config <- Annex.getGitConfig
addLog (urlLogFile config key) =<< logNow InfoPresent url
logChange key uuid InfoPresent logChange key uuid InfoPresent
setUrlMissing :: UUID -> Key -> URLString -> Annex () setUrlMissing :: UUID -> Key -> URLString -> Annex ()
setUrlMissing uuid key url = do setUrlMissing uuid key url = do
addLog (urlLogFile key) =<< logNow InfoMissing url config <- Annex.getGitConfig
addLog (urlLogFile config key) =<< logNow InfoMissing url
whenM (null <$> getUrls key) $ whenM (null <$> getUrls key) $
logChange key uuid InfoMissing logChange key uuid InfoMissing

View file

@ -38,7 +38,6 @@ import qualified Types.KeySource
import qualified Types.Backend import qualified Types.Backend
import qualified Types.TrustLevel import qualified Types.TrustLevel
import qualified Types import qualified Types
import qualified Logs
import qualified Logs.MapLog import qualified Logs.MapLog
import qualified Logs.Trust import qualified Logs.Trust
import qualified Logs.Remote import qualified Logs.Remote
@ -138,7 +137,6 @@ properties = localOption (QuickCheckTests 1000) $ testGroup "QuickCheck"
, testProperty "prop_idempotent_key_decode" Types.Key.prop_idempotent_key_decode , testProperty "prop_idempotent_key_decode" Types.Key.prop_idempotent_key_decode
, testProperty "prop_idempotent_shellEscape" Utility.SafeCommand.prop_idempotent_shellEscape , testProperty "prop_idempotent_shellEscape" Utility.SafeCommand.prop_idempotent_shellEscape
, testProperty "prop_idempotent_shellEscape_multiword" Utility.SafeCommand.prop_idempotent_shellEscape_multiword , testProperty "prop_idempotent_shellEscape_multiword" Utility.SafeCommand.prop_idempotent_shellEscape_multiword
, testProperty "prop_logs_sane" Logs.prop_logs_sane
, testProperty "prop_idempotent_configEscape" Logs.Remote.prop_idempotent_configEscape , testProperty "prop_idempotent_configEscape" Logs.Remote.prop_idempotent_configEscape
, testProperty "prop_parse_show_Config" Logs.Remote.prop_parse_show_Config , testProperty "prop_parse_show_Config" Logs.Remote.prop_parse_show_Config
, testProperty "prop_upFrom_basics" Utility.Path.prop_upFrom_basics , testProperty "prop_upFrom_basics" Utility.Path.prop_upFrom_basics

View file

@ -12,6 +12,7 @@ import qualified Git
import qualified Git.Command import qualified Git.Command
import qualified Git.Ref import qualified Git.Ref
import qualified Annex.Branch import qualified Annex.Branch
import qualified Annex
import Annex.Content import Annex.Content
import Utility.Tmp import Utility.Tmp
import Logs import Logs
@ -47,7 +48,8 @@ upgrade = do
e <- liftIO $ doesDirectoryExist old e <- liftIO $ doesDirectoryExist old
when e $ do when e $ do
mapM_ (\(k, f) -> inject f $ locationLogFile k) =<< locationLogs config <- Annex.getGitConfig
mapM_ (\(k, f) -> inject f $ locationLogFile config k) =<< locationLogs
mapM_ (\f -> inject f f) =<< logFiles old mapM_ (\f -> inject f f) =<< logFiles old
saveState False saveState False

3
debian/changelog vendored
View file

@ -26,7 +26,8 @@ git-annex (5.20150114) UNRELEASED; urgency=medium
http://git-annex.branchable.com/tuning/ http://git-annex.branchable.com/tuning/
* merge: Refuse to merge changes from a git-annex branch of a repo * merge: Refuse to merge changes from a git-annex branch of a repo
that has been tuned in incompatable ways. that has been tuned in incompatable ways.
* Support annex.tune.objecthash1 and annex.tune.objecthashlower. * Support annex.tune.objecthash1, annex.tune.objecthashlower, and
annex.tune.branchhash1.
-- Joey Hess <id@joeyh.name> Tue, 13 Jan 2015 17:03:39 -0400 -- Joey Hess <id@joeyh.name> Tue, 13 Jan 2015 17:03:39 -0400