add metadata command to get/set metadata

Adds metadata log, and command.

Note that unsetting field values seems to currently be broken.
And in general this has had all of 2 minutes worth of testing.

This commit was sponsored by Julien Lefrique.
This commit is contained in:
Joey Hess 2014-02-12 21:12:22 -04:00
parent 1b79d18a40
commit 9f7e76130e
9 changed files with 312 additions and 32 deletions

View file

@ -41,7 +41,7 @@ dropDead f content trustmap = case getLogVariety f of
in if null newlog
then RemoveFile
else ChangeFile $ Presence.showLog newlog
Just SingleValueLog -> PreserveFile
Just OtherLog -> PreserveFile
Nothing -> PreserveFile
dropDeadFromUUIDBasedLog :: TrustMap -> UUIDBased.Log String -> UUIDBased.Log String

View file

@ -26,6 +26,7 @@ import qualified Command.DropKey
import qualified Command.TransferKey
import qualified Command.TransferKeys
import qualified Command.ReKey
import qualified Command.MetaData
import qualified Command.Reinject
import qualified Command.Fix
import qualified Command.Init
@ -134,6 +135,7 @@ cmds = concat
, Command.TransferKey.def
, Command.TransferKeys.def
, Command.ReKey.def
, Command.MetaData.def
, Command.Fix.def
, Command.Fsck.def
, Command.Repair.def

73
Command/MetaData.hs Normal file
View file

@ -0,0 +1,73 @@
{- git-annex command
-
- Copyright 2014 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Command.MetaData where
import Common.Annex
import Command
import Logs.MetaData
import Types.MetaData
import qualified Data.Set as S
def :: [Command]
def = [command "metadata" (paramPair paramFile (paramRepeating "FIELD[+-]=VALUE")) seek
SectionUtility "sets metadata of a file"]
seek :: CommandSeek
seek = withWords start
start :: [String] -> CommandStart
start (file:settings) = ifAnnexed file
go
(error $ "not an annexed file, so cannot add metadata: " ++ file)
where
go (k, _b) = do
showStart "metadata" file
next $ perform k (map parse settings)
start _ = error "specify a file and the metadata to set"
perform :: Key -> [Action] -> CommandPerform
perform k actions = do
m <- getCurrentMetaData k
if null actions
then next $ cleanup m
else do
let m' = foldr apply m actions
addMetaData k m'
next $ cleanup m'
cleanup :: MetaData -> CommandCleanup
cleanup m = do
showLongNote $ unlines $ concatMap showmeta $ fromMetaData $ currentMetaData m
return True
where
showmeta (f, vs) = map (\v -> fromMetaField f ++ "=" ++ fromMetaValue v) $ S.toList vs
data Action
= AddMeta MetaField MetaValue
| DelMeta MetaField MetaValue
| SetMeta MetaField MetaValue
parse :: String -> Action
parse p = case lastMaybe f of
Just '+' -> AddMeta (mkf f') v
Just '-' -> DelMeta (mkf f') v
_ -> SetMeta (mkf f) v
where
(f, sv) = separate (== '=') p
f' = beginning f
v = toMetaValue sv
mkf fld = fromMaybe (badfield fld) (toMetaField fld)
badfield fld = error $ "Illegal metadata field name, \"" ++ fld ++ "\""
apply :: Action -> MetaData -> MetaData
apply (AddMeta f v) m = updateMetaData f v m
apply (DelMeta f oldv) m = updateMetaData f (unsetMetaValue oldv) m
apply (SetMeta f v) m = updateMetaData f v $
foldr (updateMetaData f) m $
map unsetMetaValue $ S.toList $ currentMetaDataValues f m

23
Logs.hs
View file

@ -1,6 +1,6 @@
{- git-annex log file names
-
- Copyright 2013 Joey Hess <joey@kitenet.net>
- Copyright 2013-2014 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
@ -15,7 +15,7 @@ data LogVariety
= UUIDBasedLog
| NewUUIDBasedLog
| PresenceLog Key
| SingleValueLog
| OtherLog
deriving (Show)
{- Converts a path from the git-annex branch into one of the varieties
@ -24,7 +24,7 @@ getLogVariety :: FilePath -> Maybe LogVariety
getLogVariety f
| f `elem` topLevelUUIDBasedLogs = Just UUIDBasedLog
| isRemoteStateLog f = Just NewUUIDBasedLog
| f == numcopiesLog = Just SingleValueLog
| isMetaDataLog f || f == numcopiesLog = Just OtherLog
| otherwise = PresenceLog <$> firstJust (presenceLogs f)
{- All the uuid-based logs stored in the top of the git-annex branch. -}
@ -119,6 +119,16 @@ remoteStateLogExt = ".log.rmt"
isRemoteStateLog :: FilePath -> Bool
isRemoteStateLog path = remoteStateLogExt `isSuffixOf` path
{- The filename of the metadata log for a given key. -}
metaDataLogFile :: Key -> FilePath
metaDataLogFile key = hashDirLower key </> keyFile key ++ metaDataLogExt
metaDataLogExt :: String
metaDataLogExt = ".log.met"
isMetaDataLog :: FilePath -> Bool
isMetaDataLog path = metaDataLogExt `isSuffixOf` path
prop_logs_sane :: Key -> Bool
prop_logs_sane dummykey = and
[ isNothing (getLogVariety "unknown")
@ -126,7 +136,8 @@ prop_logs_sane dummykey = and
, expect isPresenceLog (getLogVariety $ locationLogFile dummykey)
, expect isPresenceLog (getLogVariety $ urlLogFile dummykey)
, expect isNewUUIDBasedLog (getLogVariety $ remoteStateLogFile dummykey)
, expect isSingleValueLog (getLogVariety $ numcopiesLog)
, expect isOtherLog (getLogVariety $ metaDataLogFile dummykey)
, expect isOtherLog (getLogVariety $ numcopiesLog)
]
where
expect = maybe False
@ -136,5 +147,5 @@ prop_logs_sane dummykey = and
isNewUUIDBasedLog _ = False
isPresenceLog (PresenceLog k) = k == dummykey
isPresenceLog _ = False
isSingleValueLog SingleValueLog = True
isSingleValueLog _ = False
isOtherLog OtherLog = True
isOtherLog _ = False

135
Logs/MetaData.hs Normal file
View file

@ -0,0 +1,135 @@
{- git-annex general metadata storage log
-
- A line of the log will look like "timestamp field [+-]value [...]"
-
- Note that unset values are preserved. Consider this case:
-
- We have:
-
- 100 foo +x
- 200 foo -x
-
- An unmerged remote has:
-
- 150 foo +x
-
- After union merge, because the foo -x was preserved, we know that
- after the other remote redundantly set foo +x, it was unset,
- and so foo currently has no value.
-
-
- Copyright 2014 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Logs.MetaData (
getCurrentMetaData,
getMetaData,
setMetaData,
unsetMetaData,
addMetaData,
currentMetaData,
) where
import Common.Annex
import Types.MetaData
import qualified Annex.Branch
import Logs
import Logs.SingleValue
import qualified Data.Set as S
import Data.Time.Clock.POSIX
instance SingleValueSerializable MetaData where
serialize = Types.MetaData.serialize
deserialize = Types.MetaData.deserialize
getMetaData :: Key -> Annex (Log MetaData)
getMetaData = readLog . metaDataLogFile
{- Go through the log from oldest to newest, and combine it all
- into a single MetaData representing the current state. -}
getCurrentMetaData :: Key -> Annex MetaData
getCurrentMetaData = currentMetaData . collect <$$> getMetaData
where
collect = foldl' unionMetaData newMetaData . map value . S.toAscList
setMetaData :: Key -> MetaField -> String -> Annex ()
setMetaData = setMetaData' True
unsetMetaData :: Key -> MetaField -> String -> Annex ()
unsetMetaData = setMetaData' False
setMetaData' :: Bool -> Key -> MetaField -> String -> Annex ()
setMetaData' isset k field s = addMetaData k $
updateMetaData field (mkMetaValue (CurrentlySet isset) s) newMetaData
{- Adds in some metadata, which can override existing values, or unset
- them, but otherwise leaves any existing metadata as-is. -}
addMetaData :: Key -> MetaData -> Annex ()
addMetaData k metadata = do
now <- liftIO getPOSIXTime
Annex.Branch.change (metaDataLogFile k) $
showLog . simplifyLog
. S.insert (LogEntry now metadata)
. parseLog
{- Simplify a log, removing historical values that are no longer
- needed.
-
- This is not as simple as just making a single log line with the newest
- state of all metadata. Consider this case:
-
- We have:
-
- 100 foo +x bar +y
- 200 foo -x
-
- An unmerged remote has:
-
- 150 bar +z baz +w
-
- If what we have were simplified to "200 foo -x bar +y" then when the line
- from the remote became available, it would be older than the simplified
- line, and its change to bar would not take effect. That is wrong.
-
- Instead, simplify it to: (this simpliciation is optional)
-
- 100 bar +y (100 foo +x bar +y)
- 200 foo -x
-
- Now merging with the remote yields:
-
- 100 bar +y (100 foo +x bar +y)
- 150 bar +z baz +w
- 200 foo -x
-
- Simplifying again:
-
- 150 bar +z baz +w
- 200 foo -x
-
- In practice, there is little benefit to making simplications to lines
- that only remove some values, while leaving others on the line.
- Since lines are kept in git, that likely increases the size of the
- git repo (depending on compression), rather than saving any space.
-
- So, the only simplication that is actually done is to throw out an
- old line when all the values in it have been overridden by lines that
- came before
-}
simplifyLog :: Log MetaData -> Log MetaData
simplifyLog s = case S.toDescList s of
(newest:rest) -> S.fromList $ go [newest] (value newest) rest
_ -> s
where
go c _ [] = c
go c newer (l:ls)
| older `hasUniqueMetaData` newer =
go (l:c) (unionMetaData older newer) ls
| otherwise = go c newer ls
where
older = value l

View file

@ -16,11 +16,16 @@ module Types.MetaData (
toMetaField,
fromMetaField,
toMetaValue,
toMetaValue',
mkMetaValue,
unsetMetaValue,
fromMetaValue,
fromMetaData,
newMetaData,
updateMetaData,
getCurrentMetaData,
unionMetaData,
hasUniqueMetaData,
currentMetaData,
currentMetaDataValues,
getAllMetaData,
serialize,
deserialize,
@ -37,7 +42,7 @@ import qualified Data.Map as M
import Data.Char
newtype MetaData = MetaData (M.Map MetaField (S.Set MetaValue))
deriving (Show, Eq)
deriving (Show, Eq, Ord)
{- A metadata value can be currently be set (True), or may have been
- set before and we're remembering it no longer is (False). -}
@ -118,8 +123,11 @@ legalField f
toMetaValue :: String -> MetaValue
toMetaValue = MetaValue (CurrentlySet True)
toMetaValue' :: CurrentlySet -> String -> MetaValue
toMetaValue' = MetaValue
mkMetaValue :: CurrentlySet -> String -> MetaValue
mkMetaValue = MetaValue
unsetMetaValue :: MetaValue -> MetaValue
unsetMetaValue (MetaValue _ s) = MetaValue (CurrentlySet False) s
fromMetaField :: MetaField -> String
fromMetaField (MetaField f) = f
@ -127,6 +135,9 @@ fromMetaField (MetaField f) = f
fromMetaValue :: MetaValue -> String
fromMetaValue (MetaValue _ f) = f
fromMetaData :: MetaData -> [(MetaField, S.Set MetaValue)]
fromMetaData (MetaData m) = M.toList m
newMetaData :: MetaData
newMetaData = MetaData M.empty
@ -136,13 +147,38 @@ updateMetaData :: MetaField -> MetaValue -> MetaData -> MetaData
updateMetaData f v (MetaData m) = MetaData $
M.insertWith' S.union f (S.singleton v) m
{- Gets only currently set values -}
getCurrentMetaData :: MetaField -> MetaData -> S.Set MetaValue
getCurrentMetaData f m = S.filter isSet (getAllMetaData f m)
{- New metadata overrides old._-}
unionMetaData :: MetaData -> MetaData -> MetaData
unionMetaData (MetaData old) (MetaData new) = MetaData $
M.unionWith S.union new old
{- Checks if m contains any fields with values that are not
- the same in comparewith. Note that unset and set values are
- considered to be the same, so if m sets a value and comparewith
- unsets it, m is not unique. However, if m changes the value,
- or adds a new value, it is unique. -}
hasUniqueMetaData :: MetaData -> MetaData -> Bool
hasUniqueMetaData (MetaData comparewith) (MetaData m) =
any uniquefield (M.toList m)
where
uniquefield :: (MetaField, S.Set MetaValue) -> Bool
uniquefield (f, v) = maybe True (uniquevalue v) (M.lookup f comparewith)
uniquevalue v1 v2 = not $ S.null $ S.difference v1 v2
isSet :: MetaValue -> Bool
isSet (MetaValue (CurrentlySet isset) _) = isset
{- Gets only currently set values -}
currentMetaDataValues :: MetaField -> MetaData -> S.Set MetaValue
currentMetaDataValues f m = S.filter isSet (getAllMetaData f m)
currentMetaData :: MetaData -> MetaData
currentMetaData (MetaData m) = removeEmptyFields $ MetaData $
M.map (S.filter isSet) m
removeEmptyFields :: MetaData -> MetaData
removeEmptyFields (MetaData m) = MetaData $ M.filter (not . S.null) m
{- Gets currently set values, but also values that have been unset. -}
getAllMetaData :: MetaField -> MetaData -> S.Set MetaValue
getAllMetaData f (MetaData m) = fromMaybe S.empty (M.lookup f m)
@ -164,7 +200,7 @@ instance Arbitrary MetaField where
prop_updateMetaData_sane :: MetaData -> MetaField -> MetaValue -> Bool
prop_updateMetaData_sane m f v = and
[ S.member v $ getAllMetaData f m'
, not (isSet v) || S.member v (getCurrentMetaData f m')
, not (isSet v) || S.member v (currentMetaDataValues f m')
]
where
m' = updateMetaData f v m
@ -176,5 +212,4 @@ prop_metadata_serialize f v m = and
, deserialize (serialize m') == Just m'
]
where
m' = removeemptyfields m
removeemptyfields (MetaData x) = MetaData $ M.filter (not . S.null) x
m' = removeEmptyFields m

View file

@ -145,20 +145,6 @@ a tag was removed:
1287290991.152124s tag +baz
1291237510.141453s tag -bar
The end result is that tags foo and baz are set. This can be simplified:
1291237510.141453s tag +foo +baz -bar
Note the reuse of the most recent timestamp in the simplified version,
rather than putting in the timestamp when the simplification was done.
This ensures that is some other repo is making changes, they won't get
trampled over. For example:
1291237510.141453s tag +foo +baz -bar
1291239999.000000s tag +bar -foo
Now tags bar and baz are set.
# efficient metadata lookup
Looking up metadata for filtering so far requires traversing all keys in

View file

@ -695,6 +695,23 @@ subdirectories).
# UTILITY COMMANDS
* `metadata file [field=value field+=value field-=value ...]`
Each file can have any number of metadata fields attached to it,
which each in turn have any number of values. This sets metadata
for a file, or if run without any values, shows its current metadata.
To set a field's value, removing any old value(s), use field=value.
To add an additional value, use field+=value.
To remove a value, use field-=value.
For example, to set some tags on a file:
git annex metadata annexscreencast.ogv tag+=video tag+=screencast
* `migrate [path ...]`
Changes the specified annexed files to use the default key-value backend

View file

@ -146,6 +146,27 @@ Example:
1287290776.765152s e605dca6-446a-11e0-8b2a-002170d25c55 blah blah
1287290767.478634s 26339d22-446b-11e0-9101-002170d25c55 foo=bar
## `aaa/bbb/*.log.met`
These log files are used to store arbitrary [[design/metadata]] about keys.
Each key can have any number of metadata fields. Each field has a set of
values.
Lines are timestamped, and record when values are added (`field +value`),
but also when values are removed (`field -value`). Removed values
are retained in the log so that when merging an old line that sets a value
that was later unset, the value is not accidentially added back.
For example:
1287290776.765152s tag +foo +bar author +joey
1291237510.141453s tag -bar +baz
The value can be completely arbitrary data, although it's typically
reasonably short. If the value contains any whitespace
(including \r or \r), it will be base64 encoded. Base64 encoded values
are indicated by prefixing them with "!"
## `schedule.log`
Used to record scheduled events, such as periodic fscks.