Repository tuning parameters can now be passed when initializing a repository for the first time.

* init: Repository tuning parameters can now be passed when initializing a
  repository for the first time. For details, see
  http://git-annex.branchable.com/tuning/
* merge: Refuse to merge changes from a git-annex branch of a repo
  that has been tuned in incompatable ways.
This commit is contained in:
Joey Hess 2015-01-27 17:38:06 -04:00
parent b11a7b0ace
commit 70736d2b41
25 changed files with 376 additions and 25 deletions

View file

@ -134,7 +134,7 @@ resolveMerge' (Just us) them u = do
makelink key = do
let dest = variantFile file key
l <- inRepo $ gitAnnexLink dest key
l <- calcRepo $ gitAnnexLink dest key
replacewithlink dest l
stageSymlink dest =<< hashSymlink l

View file

@ -49,9 +49,11 @@ import Annex.Perms
import Logs
import Logs.Transitions
import Logs.Trust.Pure
import Logs.Difference.Pure
import Annex.ReplaceFile
import qualified Annex.Queue
import Annex.Branch.Transitions
import qualified Annex
{- Name of the branch that is used to store git-annex's information. -}
name :: Git.Ref
@ -160,6 +162,7 @@ updateTo pairs = do
<$> getLocal transitionsLog
unless (null branches) $ do
showSideAction merge_desc
mapM_ checkBranchDifferences refs
mergeIndex jl refs
let commitrefs = nub $ fullname:refs
unlessM (handleTransitions jl localtransitions commitrefs) $ do
@ -537,3 +540,11 @@ performTransitionsLocked jl ts neednewlocalbranch transitionedrefs = do
apply rest hasher file content' trustmap
PreserveFile ->
apply rest hasher file content trustmap
checkBranchDifferences :: Git.Ref -> Annex ()
checkBranchDifferences ref = do
theirdiffs <- allDifferences . parseDifferencesLog . decodeBS
<$> catFile ref differenceLog
mydiffs <- annexDifferences <$> Annex.getGitConfig
when (theirdiffs /= mydiffs) $
error "Remote repository is tuned in incompatable way; cannot be merged with local repository."

View file

@ -446,7 +446,7 @@ removeAnnex (ContentLock key) = withObjectLoc key remove removedirect
removeInodeCache key
mapM_ (resetfile cache) fs
resetfile cache f = whenM (sameInodeCache f cache) $ do
l <- inRepo $ gitAnnexLink f key
l <- calcRepo $ gitAnnexLink f key
secureErase f
replaceFile f $ makeAnnexLink l

60
Annex/Difference.hs Normal file
View file

@ -0,0 +1,60 @@
{- git-annex repository differences
-
- Copyright 2015 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Annex.Difference (
module Types.Difference,
setDifferences,
) where
import Common.Annex
import Types.Difference
import Logs.Difference
import Config
import Annex.UUID
import Logs.UUID
import Annex.Version
import qualified Annex
import qualified Data.Map as M
-- Differences are only allowed to be tweaked when initializing a
-- repository for the first time, and then only if there is not another
-- known uuid. If the repository was cloned from elsewhere, it inherits
-- the existing settings.
--
-- Must be called before setVersion, so it can check if this is the first
-- time the repository is being initialized.
setDifferences :: Annex ()
setDifferences = do
u <- getUUID
otherds <- either error return
=<< sanityCheckDifferences . allDifferences
<$> recordedDifferences
ds <- mappend otherds . annexDifferences <$> Annex.getGitConfig
when (ds /= mempty) $ do
ds'@(Differences l) <- ifM (isJust <$> getVersion)
( do
oldds <- recordedDifferencesFor u
when (ds /= oldds) $
warning $ "Cannot change tunable parameters in already initialized repository."
return oldds
, if otherds == mempty
then ifM (not . null . filter (/= u) . M.keys <$> uuidMap)
( do
warning "Cannot change tunable parameters in a clone of an existing repository."
return mempty
, return ds
)
else if otherds /= ds
then do
warning "The specified tunable parameters differ from values being used in other clones of this repository."
return otherds
else return ds
)
forM_ l $ \d ->
setConfig (ConfigKey $ differenceConfigKey d) (differenceConfigVal d)
recordDifferences ds' u

View file

@ -86,7 +86,7 @@ stageDirect = do
deletegit file
stageannexlink file key = do
l <- inRepo $ gitAnnexLink file key
l <- calcRepo $ gitAnnexLink file key
stageSymlink file =<< hashSymlink l
void $ addAssociatedFile key file
@ -131,7 +131,7 @@ addDirect file cache = do
return False
got (Just (key, _)) = ifM (sameInodeCache file [cache])
( do
l <- inRepo $ gitAnnexLink file key
l <- calcRepo $ gitAnnexLink file key
stageSymlink file =<< hashSymlink l
addInodeCache key cache
void $ addAssociatedFile key file
@ -282,7 +282,7 @@ updateWorkTree d oldref = do
- with the content. -}
movein item makeabs k f = unlessM (goodContent k f) $ do
preserveUnannexed item makeabs f oldref
l <- inRepo $ gitAnnexLink f k
l <- calcRepo $ gitAnnexLink f k
replaceFile f $ makeAnnexLink l
toDirect k f

View file

@ -27,6 +27,7 @@ import Logs.UUID
import Logs.Trust.Basic
import Types.TrustLevel
import Annex.Version
import Annex.Difference
import Annex.UUID
import Config
import Annex.Direct
@ -73,6 +74,7 @@ initialize' = do
checkCrippledFileSystem
unlessM isBare $
hookWrite preCommitHook
setDifferences
setVersion supportedVersion
ifM (crippledFileSystem <&&> not <$> isBare)
( do

View file

@ -354,7 +354,7 @@ applyView' mkviewedfile getfilemetadata view = do
let metadata' = getfilemetadata f `unionMetaData` metadata
forM_ (genviewedfiles f metadata') $ \fv -> do
f' <- fromRepo $ fromTopFilePath $ asTopFilePath fv
stagesymlink uh hasher f' =<< inRepo (gitAnnexLink f' k)
stagesymlink uh hasher f' =<< calcRepo (gitAnnexLink f' k)
go uh hasher f Nothing
| "." `isPrefixOf` f = do
s <- liftIO $ getSymbolicLinkStatus f

View file

@ -352,7 +352,7 @@ handleAdds havelsof delayadd cs = returnWhen (null incomplete) $ do
done change mcache file key = liftAnnex $ do
logStatus key InfoPresent
link <- ifM isDirect
( inRepo $ gitAnnexLink file key
( calcRepo $ gitAnnexLink file key
, Command.Add.link file key mcache
)
whenM (pure DirWatcher.eventsCoalesce <||> isDirect) $

View file

@ -232,7 +232,7 @@ onAddDirect symlinkssupported matcher file fs = do
- so it symlink is restaged to make sure. -}
( ifM (shouldRestage <$> getDaemonStatus)
( do
link <- liftAnnex $ inRepo $ gitAnnexLink file key
link <- liftAnnex $ calcRepo $ gitAnnexLink file key
addLink file link (Just key)
, noChange
)
@ -279,7 +279,7 @@ onAddSymlink' linktarget mk isdirect file filestatus = go mk
go (Just key) = do
when isdirect $
liftAnnex $ void $ addAssociatedFile key file
link <- liftAnnex $ inRepo $ gitAnnexLink file key
link <- liftAnnex $ calcRepo $ gitAnnexLink file key
if linktarget == Just link
then ensurestaged (Just link) =<< getDaemonStatus
else do

View file

@ -230,7 +230,7 @@ undo file key e = do
{- Creates the symlink to the annexed content, returns the link target. -}
link :: FilePath -> Key -> Maybe InodeCache -> Annex String
link file key mcache = flip catchNonAsync (undo file key) $ do
l <- inRepo $ gitAnnexLink file key
l <- calcRepo $ gitAnnexLink file key
replaceFile file $ makeAnnexLink l
-- touch symlink to have same time as the original file,
@ -272,7 +272,7 @@ cleanup :: FilePath -> Key -> Maybe InodeCache -> Bool -> CommandCleanup
cleanup file key mcache hascontent = do
ifM (isDirect <&&> pure hascontent)
( do
l <- inRepo $ gitAnnexLink file key
l <- calcRepo $ gitAnnexLink file key
stageSymlink file =<< hashSymlink l
, addLink file key mcache
)

View file

@ -28,7 +28,7 @@ seek = withFilesInGit $ whenAnnexed start
{- Fixes the symlink to an annexed file. -}
start :: FilePath -> Key -> CommandStart
start file key = do
link <- inRepo $ gitAnnexLink file key
link <- calcRepo $ gitAnnexLink file key
stopUnless ((/=) (Just link) <$> liftIO (catchMaybeIO $ readSymbolicLink file)) $ do
showStart "fix" file
next $ perform file link

View file

@ -33,7 +33,7 @@ start _ = error "specify a key and a dest file"
perform :: Key -> FilePath -> CommandPerform
perform key file = do
link <- inRepo $ gitAnnexLink file key
link <- calcRepo $ gitAnnexLink file key
liftIO $ createDirectoryIfMissing True (parentDir file)
liftIO $ createSymbolicLink link file
next $ cleanup file

View file

@ -192,7 +192,7 @@ check cs = and <$> sequence cs
-}
fixLink :: Key -> FilePath -> Annex Bool
fixLink key file = do
want <- inRepo $ gitAnnexLink file key
want <- calcRepo $ gitAnnexLink file key
have <- getAnnexLinkTarget file
maybe noop (go want) have
return True

View file

@ -88,7 +88,7 @@ perform = do
v <- tryNonAsync (moveAnnex k f)
case v of
Right _ -> do
l <- inRepo $ gitAnnexLink f k
l <- calcRepo $ gitAnnexLink f k
liftIO $ createSymbolicLink l f
Left e -> catchNonAsync (Command.Add.undo f k e)
warnlocked

View file

@ -76,6 +76,7 @@ import Common
import Types
import Types.Key
import Types.UUID
import Types.Difference
import qualified Git
{- Conventions:
@ -120,17 +121,22 @@ annexLocation key hasher = objectDir </> keyPath key hasher
- the actual location of the file's content.
-}
gitAnnexLocation :: Key -> Git.Repo -> GitConfig -> IO FilePath
gitAnnexLocation key r config = gitAnnexLocation' key r (annexCrippledFileSystem config)
gitAnnexLocation' :: Key -> Git.Repo -> Bool -> IO FilePath
gitAnnexLocation' key r crippled
gitAnnexLocation key r config = gitAnnexLocation' key r config (annexCrippledFileSystem config)
gitAnnexLocation' :: Key -> Git.Repo -> GitConfig -> Bool -> IO FilePath
gitAnnexLocation' key r config crippled
{- Bare repositories default to hashDirLower for new
- content, as it's more portable.
-
- Repositories on filesystems that are crippled also use
- hashDirLower, since they do not use symlinks and it's
- more portable. -}
| Git.repoIsLocalBare r || crippled =
check $ map inrepo $ annexLocations key
- more portable.
-
- ObjectHashLower can also be set to force it.
-}
| Git.repoIsLocalBare r
|| crippled
|| hasDifference (== ObjectHashLower True) (annexDifferences config) =
check $ map inrepo $ annexLocations key
{- Non-bare repositories only use hashDirMixed, so
- don't need to do any work to check if the file is
- present. -}
@ -141,11 +147,11 @@ gitAnnexLocation' key r crippled
check [] = error "internal"
{- Calculates a symlink to link a file to an annexed object. -}
gitAnnexLink :: FilePath -> Key -> Git.Repo -> IO FilePath
gitAnnexLink file key r = do
gitAnnexLink :: FilePath -> Key -> Git.Repo -> GitConfig -> IO FilePath
gitAnnexLink file key r config = do
currdir <- getCurrentDirectory
let absfile = fromMaybe whoops $ absNormPathUnix currdir file
loc <- gitAnnexLocation' key r False
loc <- gitAnnexLocation' key r config False
relPathDirToFile (parentDir absfile) loc
where
whoops = error $ "unable to normalize " ++ file

View file

@ -39,6 +39,7 @@ topLevelUUIDBasedLogs =
, preferredContentLog
, requiredContentLog
, scheduleLog
, differenceLog
]
{- All the ways to get a key from a presence log file -}
@ -82,6 +83,9 @@ groupPreferredContentLog = "group-preferred-content.log"
scheduleLog :: FilePath
scheduleLog = "schedule.log"
differenceLog :: FilePath
differenceLog = "difference.log"
{- The pathname of the location log file for a given key. -}
locationLogFile :: Key -> String
locationLogFile key = hashDirLower key ++ keyFile key ++ ".log"

40
Logs/Difference.hs Normal file
View file

@ -0,0 +1,40 @@
{- git-annex difference log
-
- Copyright 2015 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Logs.Difference (
recordDifferences,
recordedDifferences,
recordedDifferencesFor,
module Logs.Difference.Pure
) where
import Data.Monoid
import Data.Time.Clock.POSIX
import qualified Data.Map as M
import Common.Annex
import Types.Difference
import qualified Annex.Branch
import Logs
import Logs.UUIDBased
import Logs.Difference.Pure
recordDifferences :: Differences -> UUID -> Annex ()
recordDifferences differences uuid = do
ts <- liftIO getPOSIXTime
Annex.Branch.change differenceLog $
showLog id . changeLog ts uuid (show differences) . parseLog Just
-- Map of UUIDs that have Differences recorded.
-- If a new version of git-annex has added a Difference this version
-- doesn't know about, it will contain UnknownDifferences.
recordedDifferences :: Annex (M.Map UUID Differences)
recordedDifferences = parseDifferencesLog <$> Annex.Branch.get differenceLog
recordedDifferencesFor :: UUID -> Annex Differences
recordedDifferencesFor u = fromMaybe mempty . M.lookup u
<$> recordedDifferences

26
Logs/Difference/Pure.hs Normal file
View file

@ -0,0 +1,26 @@
{- git-annex difference log, pure functions
-
- Copyright 2015 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Logs.Difference.Pure (
allDifferences,
parseDifferencesLog,
) where
import Data.Monoid
import qualified Data.Map as M
import Common.Annex
import Types.Difference
import Logs.UUIDBased
parseDifferencesLog :: String -> (M.Map UUID Differences)
parseDifferencesLog = simpleMap
. parseLog (Just . fromMaybe UnknownDifferences . readish)
-- The sum of all recorded differences, across all UUIDs.
allDifferences :: M.Map UUID Differences -> Differences
allDifferences = mconcat . M.elems

135
Types/Difference.hs Normal file
View file

@ -0,0 +1,135 @@
{- git-annex repository differences
-
- Copyright 2015 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Types.Difference (
Difference(..),
Differences(..),
getDifferences,
sanityCheckDifferences,
differenceConfigKey,
differenceConfigVal,
hasDifference,
) where
import Utility.PartialPrelude
import qualified Git
import qualified Git.Config
import Data.List
import Data.Maybe
import Data.Monoid
import Control.Applicative
-- Describes differences from the v5 repository format.
--
-- The serilization is stored in difference.log, so avoid changes that
-- would break compatability.
--
-- Not breaking comparability is why a list of Differences is used, rather
-- than a sum type. With a sum type, adding a new field for some future
-- difference would serialize to a value that an older version could not
-- parse, even if that new field was not used. With the Differences list,
-- old versions can still parse it, unless the new Difference constructor
-- is used.
data Difference
= Version Int
| ObjectHashLower Bool
| ObjectHashDirectories Int
| BranchHashDirectories Int
deriving (Show, Read, Ord)
instance Eq Difference where
Version a == Version b = a == b
ObjectHashLower a == ObjectHashLower b = a == b
ObjectHashDirectories a == ObjectHashDirectories b = a == b
BranchHashDirectories a == BranchHashDirectories b = a == b
_ == _ = False
data Differences
= Differences [Difference]
| UnknownDifferences
deriving (Show, Read, Ord)
instance Eq Differences where
Differences a == Differences b = simplify (defver:a) == simplify (defver:b)
_ == _ = False
instance Monoid Differences where
mempty = Differences []
mappend (Differences l1) (Differences l2) = Differences (simplify (l1 ++ l2))
mappend _ _ = UnknownDifferences
-- This is the default repository version that is assumed when no other one
-- is given. Note that [] == [Version 5]
defver :: Difference
defver = Version 5
-- Larger values of the same Difference constructor dominate
-- over smaller values, so given [Version 6, Version 5], returns [Version 6]
simplify :: [Difference] -> [Difference]
simplify = go . sort
where
go [] = []
go (d:[]) = [d]
go (d1:d2:ds)
| like d1 d2 = go (d2:ds)
| otherwise = d1 : go (d2:ds)
like (Version _) (Version _) = True
like (ObjectHashLower _) (ObjectHashLower _) = True
like (ObjectHashDirectories _) (ObjectHashDirectories _) = True
like (BranchHashDirectories _) (BranchHashDirectories _) = True
like _ _ = False
getDifferences :: Git.Repo -> Differences
getDifferences r = checksane $ Differences $ catMaybes
[ ObjectHashLower
<$> getmaybebool (differenceConfigKey (ObjectHashLower undefined))
, ObjectHashDirectories
<$> getmayberead (differenceConfigKey (ObjectHashDirectories undefined))
, BranchHashDirectories
<$> getmayberead (differenceConfigKey (BranchHashDirectories undefined))
]
where
getmaybe k = Git.Config.getMaybe k r
getmayberead k = readish =<< getmaybe k
getmaybebool k = Git.Config.isTrue =<< getmaybe k
checksane = either error id . sanityCheckDifferences
differenceConfigKey :: Difference -> String
differenceConfigKey (Version _) = "annex.version"
differenceConfigKey (ObjectHashLower _) = tunable "objecthashlower"
differenceConfigKey (ObjectHashDirectories _) = tunable "objecthashdirectories"
differenceConfigKey (BranchHashDirectories _) = tunable "branchhashdirectories"
differenceConfigVal :: Difference -> String
differenceConfigVal (Version v) = show v
differenceConfigVal (ObjectHashLower b) = Git.Config.boolConfig b
differenceConfigVal (ObjectHashDirectories n) = show n
differenceConfigVal (BranchHashDirectories n) = show n
tunable :: String -> String
tunable k = "annex.tune." ++ k
sanityCheckDifferences :: Differences -> Either String Differences
sanityCheckDifferences d@(Differences l)
| null problems = Right d
| otherwise = Left (intercalate "; " problems)
where
problems = catMaybes (map check l)
check (ObjectHashDirectories n)
| n == 1 || n == 2 = Nothing
| otherwise = Just $ "Bad value for objecthashdirectories -- should be 1 or 2, not " ++ show n
check (BranchHashDirectories n)
| n == 1 || n == 2 = Nothing
| otherwise = Just $ "Bad value for branhhashdirectories -- should be 1 or 2, not " ++ show n
check _ = Nothing
sanityCheckDifferences UnknownDifferences = Left "unknown differences detected; update git-annex"
hasDifference :: (Difference -> Bool) -> Differences -> Bool
hasDifference f (Differences l) = any f l
hasDifference _ UnknownDifferences = False

View file

@ -20,6 +20,7 @@ import Config.Cost
import Types.Distribution
import Types.Availability
import Types.NumCopies
import Types.Difference
import Utility.HumanTime
{- Main git-annex settings. Each setting corresponds to a git-config key
@ -56,6 +57,7 @@ data GitConfig = GitConfig
, annexHardLink :: Bool
, coreSymlinks :: Bool
, gcryptId :: Maybe String
, annexDifferences :: Differences
}
extractGitConfig :: Git.Repo -> GitConfig
@ -93,6 +95,7 @@ extractGitConfig r = GitConfig
, annexHardLink = getbool (annex "hardlink") False
, coreSymlinks = getbool "core.symlinks" True
, gcryptId = getmaybe "core.gcrypt-id"
, annexDifferences = getDifferences r
}
where
getbool k def = fromMaybe def $ getmaybebool k

View file

@ -92,7 +92,7 @@ updateSymlinks = do
case r of
Nothing -> noop
Just (k, _) -> do
link <- inRepo $ gitAnnexLink f k
link <- calcRepo $ gitAnnexLink f k
liftIO $ removeFile f
liftIO $ createSymbolicLink link f
Annex.Queue.addCommand "add" [Param "--"] [f]

5
debian/changelog vendored
View file

@ -21,6 +21,11 @@ git-annex (5.20150114) UNRELEASED; urgency=medium
with an existing file. (--file overrides this)
* Fix default repository description created by git annex init,
which got broken by the relative path changes in the last release.
* init: Repository tuning parameters can now be passed when initializing a
repository for the first time. For details, see
http://git-annex.branchable.com/tuning/
* merge: Refuse to merge changes from a git-annex branch of a repo
that has been tuned in incompatable ways.
-- Joey Hess <id@joeyh.name> Tue, 13 Jan 2015 17:03:39 -0400

View file

@ -1839,6 +1839,12 @@ Here are all the supported configuration settings.
Used by hook special remotes and external special remotes to record
the type of the remote.
* `annex.tune.objecthashdirectories`, `annex.tune.objecthashlower`, `annex.tune.branchhashdirectories`
These can be passed to `git annex init` to tune the repository.
They cannot be safely changed in a running repository.
For details, see <http://git-annex.branchable.com/tuning/>.
# CONFIGURATION VIA .gitattributes
The key-value backend used when adding a new file to the annex can be

View file

@ -257,3 +257,12 @@ Example:
ForgetGitHistory 1387325539.685136s
ForgetDeadRemotes 1387325539.685136s
## `difference.log`
Used when a repository has fundamental differences from other repositories,
that should prevent merging.
Example:
e605dca6-446a-11e0-8b2a-002170d25c55 [Version 5] timestamp=1422387398.30395s

44
doc/tuning.mdwn Normal file
View file

@ -0,0 +1,44 @@
git-annex now has experimental support for tuning a repository for
different work loads.
For example, a repository with a very large number of files in it may work
better if git-annex uses some nonstandard hash format, for either the
`.git/annex/objects/` directory, or for the log files in the git-annex
branch.
A repository can currently only be tuned when it is first created; this is
done by passing `-c name=value` parameters to `git annex init`.
For example, this will make git-annex use only 1 level for hash directories
in `.git/annex/objects`:
git -c annex.tune.objecthashdirectories=1 annex init
It's very important to keep in mind that this makes a nonstandard format
git-annex repository. In general, this cannot safely be used with
git-annex older than version 5.20150128. Older version of git-annex will
not understand and will get confused and perhaps do bad things.
Also, it's not safe to merge two separate git repositories that have been
tuned differently (or one tuned and the other one not). git-annex will
prevent merging their git-annex branches together, but it cannot prevent
`git merge remote/master` merging two branches, and the result will be ugly
at best (`git annex fix` can fix up the mess somewhat).
Again, tuned repositories are an experimental feature; use with caution!
The following tuning parameters are available:
* `annex.tune.objecthashdirectories` (default: 2)
Sets the number of hash directories to use in `.git/annex/objects/`
* `annex.tune.objecthashlower` (default: false)
Set to true to make the hash directories in `.git/annex/objects/` use
all lower-case.
* `annex.tune.branchhashdirectories` (default: 2)
Sets the number of hash directories to use in the git-annex branch.
Note that git-annex will automatically propigate these setting to
`.git/config` for tuned repsitories. You should never directly change
these settings in `.git/config`