rework Differences data type

Eliminated complexity and future proofed. The most important change is that
all functions over Difference are now total; any Difference that can be
expressed should be handled. Avoids needs for sanity checking of inputs,
and version skew with the future.

Also, the difference.log now serializes a [Difference], not a Differences.
This saves space and keeps it simpler.

Note that [Difference] might contain conflicting differences (eg,
[Version5, Version6]. In this case, one of them needs to consistently win
over the others, probably based on Ord.
This commit is contained in:
Joey Hess 2015-01-28 13:47:41 -04:00
parent 354de19cbe
commit ba3825441c
8 changed files with 42 additions and 90 deletions

View file

@ -31,9 +31,7 @@ import qualified Data.Map as M
setDifferences :: Annex () setDifferences :: Annex ()
setDifferences = do setDifferences = do
u <- getUUID u <- getUUID
otherds <- either error return otherds <- allDifferences <$> recordedDifferences
=<< sanityCheckDifferences . allDifferences
<$> recordedDifferences
ds <- mappend otherds . annexDifferences <$> Annex.getGitConfig ds <- mappend otherds . annexDifferences <$> Annex.getGitConfig
when (ds /= mempty) $ do when (ds /= mempty) $ do
ds'@(Differences l) <- ifM (isJust <$> getVersion) ds'@(Differences l) <- ifM (isJust <$> getVersion)

View file

@ -135,7 +135,7 @@ gitAnnexLocation' key r config crippled
-} -}
| Git.repoIsLocalBare r | Git.repoIsLocalBare r
|| crippled || crippled
|| hasDifference (== ObjectHashLower True) (annexDifferences config) = || hasDifference (== ObjectHashLower) (annexDifferences config) =
check $ map inrepo $ annexLocations key check $ map inrepo $ annexLocations key
{- Non-bare repositories only use hashDirMixed, so {- Non-bare repositories only use hashDirMixed, so
- don't need to do any work to check if the file is - don't need to do any work to check if the file is

View file

@ -24,10 +24,11 @@ import Logs.UUIDBased
import Logs.Difference.Pure import Logs.Difference.Pure
recordDifferences :: Differences -> UUID -> Annex () recordDifferences :: Differences -> UUID -> Annex ()
recordDifferences differences uuid = do recordDifferences (Differences differences) uuid = do
ts <- liftIO getPOSIXTime ts <- liftIO getPOSIXTime
Annex.Branch.change differenceLog $ Annex.Branch.change differenceLog $
showLog id . changeLog ts uuid (show differences) . parseLog Just showLog id . changeLog ts uuid (show differences) . parseLog Just
recordDifferences UnknownDifferences _ = return ()
-- Map of UUIDs that have Differences recorded. -- Map of UUIDs that have Differences recorded.
-- If a new version of git-annex has added a Difference this version -- If a new version of git-annex has added a Difference this version

View file

@ -19,7 +19,7 @@ import Logs.UUIDBased
parseDifferencesLog :: String -> (M.Map UUID Differences) parseDifferencesLog :: String -> (M.Map UUID Differences)
parseDifferencesLog = simpleMap parseDifferencesLog = simpleMap
. parseLog (Just . fromMaybe UnknownDifferences . readish) . parseLog (Just . maybe UnknownDifferences Differences . readish)
-- The sum of all recorded differences, across all UUIDs. -- The sum of all recorded differences, across all UUIDs.
allDifferences :: M.Map UUID Differences -> Differences allDifferences :: M.Map UUID Differences -> Differences

View file

@ -9,123 +9,74 @@ module Types.Difference (
Difference(..), Difference(..),
Differences(..), Differences(..),
getDifferences, getDifferences,
sanityCheckDifferences,
differenceConfigKey, differenceConfigKey,
differenceConfigVal, differenceConfigVal,
hasDifference, hasDifference,
) where ) where
import Utility.PartialPrelude
import qualified Git import qualified Git
import qualified Git.Config import qualified Git.Config
import Data.List import Data.List
import Data.Maybe import Data.Maybe
import Data.Monoid import Data.Monoid
import Control.Applicative
-- Describes differences from the v5 repository format. -- Describes differences from the v5 repository format.
-- --
-- The serilization is stored in difference.log, so avoid changes that -- The serialization is stored in difference.log, so avoid changes that
-- would break compatability. -- would break compatability.
-- --
-- Not breaking comparability is why a list of Differences is used, rather -- Not breaking compatability is why a list of Differences is used, rather
-- than a sum type. With a sum type, adding a new field for some future -- than a sum type. With a sum type, adding a new field for some future
-- difference would serialize to a value that an older version could not -- difference would serialize to a value that an older version could not
-- parse, even if that new field was not used. With the Differences list, -- parse, even if that new field was not used. With the Differences list,
-- old versions can still parse it, unless the new Difference constructor -- old versions can still parse it, unless the new Difference constructor
-- is used. -- is used.
--
-- The constructors intentionally do not have parameters; this is to
-- ensure that any Difference that can be expressed is supported.
-- So, a new repository version would be Version6, rather than Version Int.
data Difference data Difference
= Version Int = ObjectHashLower
| ObjectHashLower Bool | OneLevelObjectHash
| ObjectHashDirectories Int | OneLevelBranchHash
| BranchHashDirectories Int deriving (Show, Read, Ord, Eq, Enum, Bounded)
deriving (Show, Read, Ord)
instance Eq Difference where
Version a == Version b = a == b
ObjectHashLower a == ObjectHashLower b = a == b
ObjectHashDirectories a == ObjectHashDirectories b = a == b
BranchHashDirectories a == BranchHashDirectories b = a == b
_ == _ = False
data Differences data Differences
= Differences [Difference] = Differences [Difference]
| UnknownDifferences | UnknownDifferences
deriving (Show, Read, Ord)
instance Eq Differences where instance Eq Differences where
Differences a == Differences b = simplify (defver:a) == simplify (defver:b) Differences a == Differences b = canon a == canon b
_ == _ = False _ == _ = False -- UnknownDifferences cannot be equal
instance Monoid Differences where instance Monoid Differences where
mempty = Differences [] mempty = Differences []
mappend (Differences l1) (Differences l2) = Differences (simplify (l1 ++ l2)) mappend (Differences l1) (Differences l2) = Differences (canon (l1 ++ l2))
mappend _ _ = UnknownDifferences mappend _ _ = UnknownDifferences
-- This is the default repository version that is assumed when no other one canon :: [Difference] -> [Difference]
-- is given. Note that [] == [Version 5] canon = nub . sort
defver :: Difference
defver = Version 5
-- Given [Version 6, Version 5], returns [Version 6]
simplify :: [Difference] -> [Difference]
simplify = go . sort
where
go [] = []
go (d:[]) = [d]
go (d1:d2:ds)
| like d1 d2 = go (d2:ds)
| otherwise = d1 : go (d2:ds)
like (Version _) (Version _) = True
like _ _ = False
getDifferences :: Git.Repo -> Differences getDifferences :: Git.Repo -> Differences
getDifferences r = checksane $ Differences $ catMaybes getDifferences r = Differences $ catMaybes $
[ ObjectHashLower map getmaybe [minBound .. maxBound]
<$> getmaybebool (differenceConfigKey (ObjectHashLower undefined))
, ObjectHashDirectories
<$> getmayberead (differenceConfigKey (ObjectHashDirectories undefined))
, BranchHashDirectories
<$> getmayberead (differenceConfigKey (BranchHashDirectories undefined))
]
where where
getmaybe k = Git.Config.getMaybe k r getmaybe d = case Git.Config.isTrue =<< Git.Config.getMaybe (differenceConfigKey d) r of
getmayberead k = readish =<< getmaybe k Just True -> Just d
getmaybebool k = Git.Config.isTrue =<< getmaybe k _ -> Nothing
checksane = either error id . sanityCheckDifferences
differenceConfigKey :: Difference -> String differenceConfigKey :: Difference -> String
differenceConfigKey (Version _) = "annex.version" differenceConfigKey ObjectHashLower = tunable "objecthashlower"
differenceConfigKey (ObjectHashLower _) = tunable "objecthashlower" differenceConfigKey OneLevelObjectHash = tunable "objecthash1"
differenceConfigKey (ObjectHashDirectories _) = tunable "objecthashdirectories" differenceConfigKey OneLevelBranchHash = tunable "branchhash1"
differenceConfigKey (BranchHashDirectories _) = tunable "branchhashdirectories"
differenceConfigVal :: Difference -> String differenceConfigVal :: Difference -> String
differenceConfigVal (Version v) = show v differenceConfigVal _ = Git.Config.boolConfig True
differenceConfigVal (ObjectHashLower b) = Git.Config.boolConfig b
differenceConfigVal (ObjectHashDirectories n) = show n
differenceConfigVal (BranchHashDirectories n) = show n
tunable :: String -> String tunable :: String -> String
tunable k = "annex.tune." ++ k tunable k = "annex.tune." ++ k
sanityCheckDifferences :: Differences -> Either String Differences
sanityCheckDifferences d@(Differences l)
| null problems = Right d
| otherwise = Left (intercalate "; " problems)
where
problems = catMaybes (map check l)
check (ObjectHashDirectories n)
| n == 1 || n == 2 = Nothing
| otherwise = Just $ "Bad value for objecthashdirectories -- should be 1 or 2, not " ++ show n
check (BranchHashDirectories n)
| n == 1 || n == 2 = Nothing
| otherwise = Just $ "Bad value for branhhashdirectories -- should be 1 or 2, not " ++ show n
check _ = Nothing
sanityCheckDifferences UnknownDifferences = Left "unknown differences detected; update git-annex"
hasDifference :: (Difference -> Bool) -> Differences -> Bool hasDifference :: (Difference -> Bool) -> Differences -> Bool
hasDifference f (Differences l) = any f l hasDifference f (Differences l) = any f l
hasDifference _ UnknownDifferences = False hasDifference _ UnknownDifferences = False

View file

@ -1839,7 +1839,7 @@ Here are all the supported configuration settings.
Used by hook special remotes and external special remotes to record Used by hook special remotes and external special remotes to record
the type of the remote. the type of the remote.
* `annex.tune.objecthashdirectories`, `annex.tune.objecthashlower`, `annex.tune.branchhashdirectories` * `annex.tune.objecthash1`, `annex.tune.objecthashlower`, `annex.tune.branchhash1`
These can be passed to `git annex init` to tune the repository. These can be passed to `git annex init` to tune the repository.
They cannot be safely changed in a running repository. They cannot be safely changed in a running repository.

View file

@ -265,4 +265,4 @@ that should prevent merging.
Example: Example:
e605dca6-446a-11e0-8b2a-002170d25c55 [Version 5] timestamp=1422387398.30395s e605dca6-446a-11e0-8b2a-002170d25c55 [ObjectHashLower] timestamp=1422387398.30395s

View file

@ -12,7 +12,7 @@ done by passing `-c name=value` parameters to `git annex init`.
For example, this will make git-annex use only 1 level for hash directories For example, this will make git-annex use only 1 level for hash directories
in `.git/annex/objects`: in `.git/annex/objects`:
git -c annex.tune.objecthashdirectories=1 annex init git -c annex.tune.objecthash1=true annex init
It's very important to keep in mind that this makes a nonstandard format It's very important to keep in mind that this makes a nonstandard format
git-annex repository. In general, this cannot safely be used with git-annex repository. In general, this cannot safely be used with
@ -29,16 +29,18 @@ Again, tuned repositories are an experimental feature; use with caution!
The following tuning parameters are available: The following tuning parameters are available:
* `annex.tune.objecthashdirectories` (default: 2) * `annex.tune.objecthash1=true`
Sets the number of hash directories to use in `.git/annex/objects/` Use just one level of hash directories in `.git/annex/objects/`,
instead of the default two levels.
* `annex.tune.objecthashlower` (default: false) * `annex.tune.objecthashlower=true`
Set to true to make the hash directories in `.git/annex/objects/` use Make the hash directories in `.git/annex/objects/` use
all lower-case. all lower-case, instead of the default mixed-case.
* `annex.tune.branchhashdirectories` (default: 2) * `annex.tune.branchhash1=true`
Sets the number of hash directories to use in the git-annex branch. Use just one level of hash directories in the git-annex branch,
instead of the default two levels.
Note that git-annex will automatically propigate these setting to Note that git-annex will automatically propigate these setting to
`.git/config` for tuned repsitories. You should never directly change `.git/config` for tuned repositories. You should never directly change
these settings in `.git/config` these settings in `.git/config`