rework Differences data type

Eliminated complexity and future proofed. The most important change is that
all functions over Difference are now total; any Difference that can be
expressed should be handled. Avoids needs for sanity checking of inputs,
and version skew with the future.

Also, the difference.log now serializes a [Difference], not a Differences.
This saves space and keeps it simpler.

Note that [Difference] might contain conflicting differences (eg,
[Version5, Version6]. In this case, one of them needs to consistently win
over the others, probably based on Ord.
This commit is contained in:
Joey Hess 2015-01-28 13:47:41 -04:00
parent 354de19cbe
commit ba3825441c
8 changed files with 42 additions and 90 deletions

View file

@ -31,9 +31,7 @@ import qualified Data.Map as M
setDifferences :: Annex ()
setDifferences = do
u <- getUUID
otherds <- either error return
=<< sanityCheckDifferences . allDifferences
<$> recordedDifferences
otherds <- allDifferences <$> recordedDifferences
ds <- mappend otherds . annexDifferences <$> Annex.getGitConfig
when (ds /= mempty) $ do
ds'@(Differences l) <- ifM (isJust <$> getVersion)

View file

@ -135,7 +135,7 @@ gitAnnexLocation' key r config crippled
-}
| Git.repoIsLocalBare r
|| crippled
|| hasDifference (== ObjectHashLower True) (annexDifferences config) =
|| hasDifference (== ObjectHashLower) (annexDifferences config) =
check $ map inrepo $ annexLocations key
{- Non-bare repositories only use hashDirMixed, so
- don't need to do any work to check if the file is

View file

@ -24,10 +24,11 @@ import Logs.UUIDBased
import Logs.Difference.Pure
recordDifferences :: Differences -> UUID -> Annex ()
recordDifferences differences uuid = do
recordDifferences (Differences differences) uuid = do
ts <- liftIO getPOSIXTime
Annex.Branch.change differenceLog $
showLog id . changeLog ts uuid (show differences) . parseLog Just
recordDifferences UnknownDifferences _ = return ()
-- Map of UUIDs that have Differences recorded.
-- If a new version of git-annex has added a Difference this version

View file

@ -19,7 +19,7 @@ import Logs.UUIDBased
parseDifferencesLog :: String -> (M.Map UUID Differences)
parseDifferencesLog = simpleMap
. parseLog (Just . fromMaybe UnknownDifferences . readish)
. parseLog (Just . maybe UnknownDifferences Differences . readish)
-- The sum of all recorded differences, across all UUIDs.
allDifferences :: M.Map UUID Differences -> Differences

View file

@ -9,123 +9,74 @@ module Types.Difference (
Difference(..),
Differences(..),
getDifferences,
sanityCheckDifferences,
differenceConfigKey,
differenceConfigVal,
hasDifference,
) where
import Utility.PartialPrelude
import qualified Git
import qualified Git.Config
import Data.List
import Data.Maybe
import Data.Monoid
import Control.Applicative
-- Describes differences from the v5 repository format.
--
-- The serilization is stored in difference.log, so avoid changes that
-- The serialization is stored in difference.log, so avoid changes that
-- would break compatability.
--
-- Not breaking comparability is why a list of Differences is used, rather
-- Not breaking compatability is why a list of Differences is used, rather
-- than a sum type. With a sum type, adding a new field for some future
-- difference would serialize to a value that an older version could not
-- parse, even if that new field was not used. With the Differences list,
-- old versions can still parse it, unless the new Difference constructor
-- is used.
--
-- The constructors intentionally do not have parameters; this is to
-- ensure that any Difference that can be expressed is supported.
-- So, a new repository version would be Version6, rather than Version Int.
data Difference
= Version Int
| ObjectHashLower Bool
| ObjectHashDirectories Int
| BranchHashDirectories Int
deriving (Show, Read, Ord)
instance Eq Difference where
Version a == Version b = a == b
ObjectHashLower a == ObjectHashLower b = a == b
ObjectHashDirectories a == ObjectHashDirectories b = a == b
BranchHashDirectories a == BranchHashDirectories b = a == b
_ == _ = False
= ObjectHashLower
| OneLevelObjectHash
| OneLevelBranchHash
deriving (Show, Read, Ord, Eq, Enum, Bounded)
data Differences
= Differences [Difference]
| UnknownDifferences
deriving (Show, Read, Ord)
instance Eq Differences where
Differences a == Differences b = simplify (defver:a) == simplify (defver:b)
_ == _ = False
Differences a == Differences b = canon a == canon b
_ == _ = False -- UnknownDifferences cannot be equal
instance Monoid Differences where
mempty = Differences []
mappend (Differences l1) (Differences l2) = Differences (simplify (l1 ++ l2))
mappend (Differences l1) (Differences l2) = Differences (canon (l1 ++ l2))
mappend _ _ = UnknownDifferences
-- This is the default repository version that is assumed when no other one
-- is given. Note that [] == [Version 5]
defver :: Difference
defver = Version 5
-- Given [Version 6, Version 5], returns [Version 6]
simplify :: [Difference] -> [Difference]
simplify = go . sort
where
go [] = []
go (d:[]) = [d]
go (d1:d2:ds)
| like d1 d2 = go (d2:ds)
| otherwise = d1 : go (d2:ds)
like (Version _) (Version _) = True
like _ _ = False
canon :: [Difference] -> [Difference]
canon = nub . sort
getDifferences :: Git.Repo -> Differences
getDifferences r = checksane $ Differences $ catMaybes
[ ObjectHashLower
<$> getmaybebool (differenceConfigKey (ObjectHashLower undefined))
, ObjectHashDirectories
<$> getmayberead (differenceConfigKey (ObjectHashDirectories undefined))
, BranchHashDirectories
<$> getmayberead (differenceConfigKey (BranchHashDirectories undefined))
]
getDifferences r = Differences $ catMaybes $
map getmaybe [minBound .. maxBound]
where
getmaybe k = Git.Config.getMaybe k r
getmayberead k = readish =<< getmaybe k
getmaybebool k = Git.Config.isTrue =<< getmaybe k
checksane = either error id . sanityCheckDifferences
getmaybe d = case Git.Config.isTrue =<< Git.Config.getMaybe (differenceConfigKey d) r of
Just True -> Just d
_ -> Nothing
differenceConfigKey :: Difference -> String
differenceConfigKey (Version _) = "annex.version"
differenceConfigKey (ObjectHashLower _) = tunable "objecthashlower"
differenceConfigKey (ObjectHashDirectories _) = tunable "objecthashdirectories"
differenceConfigKey (BranchHashDirectories _) = tunable "branchhashdirectories"
differenceConfigKey ObjectHashLower = tunable "objecthashlower"
differenceConfigKey OneLevelObjectHash = tunable "objecthash1"
differenceConfigKey OneLevelBranchHash = tunable "branchhash1"
differenceConfigVal :: Difference -> String
differenceConfigVal (Version v) = show v
differenceConfigVal (ObjectHashLower b) = Git.Config.boolConfig b
differenceConfigVal (ObjectHashDirectories n) = show n
differenceConfigVal (BranchHashDirectories n) = show n
differenceConfigVal _ = Git.Config.boolConfig True
tunable :: String -> String
tunable k = "annex.tune." ++ k
sanityCheckDifferences :: Differences -> Either String Differences
sanityCheckDifferences d@(Differences l)
| null problems = Right d
| otherwise = Left (intercalate "; " problems)
where
problems = catMaybes (map check l)
check (ObjectHashDirectories n)
| n == 1 || n == 2 = Nothing
| otherwise = Just $ "Bad value for objecthashdirectories -- should be 1 or 2, not " ++ show n
check (BranchHashDirectories n)
| n == 1 || n == 2 = Nothing
| otherwise = Just $ "Bad value for branhhashdirectories -- should be 1 or 2, not " ++ show n
check _ = Nothing
sanityCheckDifferences UnknownDifferences = Left "unknown differences detected; update git-annex"
hasDifference :: (Difference -> Bool) -> Differences -> Bool
hasDifference f (Differences l) = any f l
hasDifference _ UnknownDifferences = False

View file

@ -1839,7 +1839,7 @@ Here are all the supported configuration settings.
Used by hook special remotes and external special remotes to record
the type of the remote.
* `annex.tune.objecthashdirectories`, `annex.tune.objecthashlower`, `annex.tune.branchhashdirectories`
* `annex.tune.objecthash1`, `annex.tune.objecthashlower`, `annex.tune.branchhash1`
These can be passed to `git annex init` to tune the repository.
They cannot be safely changed in a running repository.

View file

@ -265,4 +265,4 @@ that should prevent merging.
Example:
e605dca6-446a-11e0-8b2a-002170d25c55 [Version 5] timestamp=1422387398.30395s
e605dca6-446a-11e0-8b2a-002170d25c55 [ObjectHashLower] timestamp=1422387398.30395s

View file

@ -12,7 +12,7 @@ done by passing `-c name=value` parameters to `git annex init`.
For example, this will make git-annex use only 1 level for hash directories
in `.git/annex/objects`:
git -c annex.tune.objecthashdirectories=1 annex init
git -c annex.tune.objecthash1=true annex init
It's very important to keep in mind that this makes a nonstandard format
git-annex repository. In general, this cannot safely be used with
@ -29,16 +29,18 @@ Again, tuned repositories are an experimental feature; use with caution!
The following tuning parameters are available:
* `annex.tune.objecthashdirectories` (default: 2)
Sets the number of hash directories to use in `.git/annex/objects/`
* `annex.tune.objecthash1=true`
Use just one level of hash directories in `.git/annex/objects/`,
instead of the default two levels.
* `annex.tune.objecthashlower` (default: false)
Set to true to make the hash directories in `.git/annex/objects/` use
all lower-case.
* `annex.tune.objecthashlower=true`
Make the hash directories in `.git/annex/objects/` use
all lower-case, instead of the default mixed-case.
* `annex.tune.branchhashdirectories` (default: 2)
Sets the number of hash directories to use in the git-annex branch.
* `annex.tune.branchhash1=true`
Use just one level of hash directories in the git-annex branch,
instead of the default two levels.
Note that git-annex will automatically propigate these setting to
`.git/config` for tuned repsitories. You should never directly change
`.git/config` for tuned repositories. You should never directly change
these settings in `.git/config`