add metadata command to get/set metadata
Adds metadata log, and command. Note that unsetting field values seems to currently be broken. And in general this has had all of 2 minutes worth of testing. This commit was sponsored by Julien Lefrique.
This commit is contained in:
parent
1b79d18a40
commit
9f7e76130e
9 changed files with 312 additions and 32 deletions
|
@ -41,7 +41,7 @@ dropDead f content trustmap = case getLogVariety f of
|
|||
in if null newlog
|
||||
then RemoveFile
|
||||
else ChangeFile $ Presence.showLog newlog
|
||||
Just SingleValueLog -> PreserveFile
|
||||
Just OtherLog -> PreserveFile
|
||||
Nothing -> PreserveFile
|
||||
|
||||
dropDeadFromUUIDBasedLog :: TrustMap -> UUIDBased.Log String -> UUIDBased.Log String
|
||||
|
|
|
@ -26,6 +26,7 @@ import qualified Command.DropKey
|
|||
import qualified Command.TransferKey
|
||||
import qualified Command.TransferKeys
|
||||
import qualified Command.ReKey
|
||||
import qualified Command.MetaData
|
||||
import qualified Command.Reinject
|
||||
import qualified Command.Fix
|
||||
import qualified Command.Init
|
||||
|
@ -134,6 +135,7 @@ cmds = concat
|
|||
, Command.TransferKey.def
|
||||
, Command.TransferKeys.def
|
||||
, Command.ReKey.def
|
||||
, Command.MetaData.def
|
||||
, Command.Fix.def
|
||||
, Command.Fsck.def
|
||||
, Command.Repair.def
|
||||
|
|
73
Command/MetaData.hs
Normal file
73
Command/MetaData.hs
Normal file
|
@ -0,0 +1,73 @@
|
|||
{- git-annex command
|
||||
-
|
||||
- Copyright 2014 Joey Hess <joey@kitenet.net>
|
||||
-
|
||||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
||||
module Command.MetaData where
|
||||
|
||||
import Common.Annex
|
||||
import Command
|
||||
import Logs.MetaData
|
||||
import Types.MetaData
|
||||
|
||||
import qualified Data.Set as S
|
||||
|
||||
def :: [Command]
|
||||
def = [command "metadata" (paramPair paramFile (paramRepeating "FIELD[+-]=VALUE")) seek
|
||||
SectionUtility "sets metadata of a file"]
|
||||
|
||||
seek :: CommandSeek
|
||||
seek = withWords start
|
||||
|
||||
start :: [String] -> CommandStart
|
||||
start (file:settings) = ifAnnexed file
|
||||
go
|
||||
(error $ "not an annexed file, so cannot add metadata: " ++ file)
|
||||
where
|
||||
go (k, _b) = do
|
||||
showStart "metadata" file
|
||||
next $ perform k (map parse settings)
|
||||
start _ = error "specify a file and the metadata to set"
|
||||
|
||||
perform :: Key -> [Action] -> CommandPerform
|
||||
perform k actions = do
|
||||
m <- getCurrentMetaData k
|
||||
if null actions
|
||||
then next $ cleanup m
|
||||
else do
|
||||
let m' = foldr apply m actions
|
||||
addMetaData k m'
|
||||
next $ cleanup m'
|
||||
|
||||
cleanup :: MetaData -> CommandCleanup
|
||||
cleanup m = do
|
||||
showLongNote $ unlines $ concatMap showmeta $ fromMetaData $ currentMetaData m
|
||||
return True
|
||||
where
|
||||
showmeta (f, vs) = map (\v -> fromMetaField f ++ "=" ++ fromMetaValue v) $ S.toList vs
|
||||
|
||||
data Action
|
||||
= AddMeta MetaField MetaValue
|
||||
| DelMeta MetaField MetaValue
|
||||
| SetMeta MetaField MetaValue
|
||||
|
||||
parse :: String -> Action
|
||||
parse p = case lastMaybe f of
|
||||
Just '+' -> AddMeta (mkf f') v
|
||||
Just '-' -> DelMeta (mkf f') v
|
||||
_ -> SetMeta (mkf f) v
|
||||
where
|
||||
(f, sv) = separate (== '=') p
|
||||
f' = beginning f
|
||||
v = toMetaValue sv
|
||||
mkf fld = fromMaybe (badfield fld) (toMetaField fld)
|
||||
badfield fld = error $ "Illegal metadata field name, \"" ++ fld ++ "\""
|
||||
|
||||
apply :: Action -> MetaData -> MetaData
|
||||
apply (AddMeta f v) m = updateMetaData f v m
|
||||
apply (DelMeta f oldv) m = updateMetaData f (unsetMetaValue oldv) m
|
||||
apply (SetMeta f v) m = updateMetaData f v $
|
||||
foldr (updateMetaData f) m $
|
||||
map unsetMetaValue $ S.toList $ currentMetaDataValues f m
|
23
Logs.hs
23
Logs.hs
|
@ -1,6 +1,6 @@
|
|||
{- git-annex log file names
|
||||
-
|
||||
- Copyright 2013 Joey Hess <joey@kitenet.net>
|
||||
- Copyright 2013-2014 Joey Hess <joey@kitenet.net>
|
||||
-
|
||||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
@ -15,7 +15,7 @@ data LogVariety
|
|||
= UUIDBasedLog
|
||||
| NewUUIDBasedLog
|
||||
| PresenceLog Key
|
||||
| SingleValueLog
|
||||
| OtherLog
|
||||
deriving (Show)
|
||||
|
||||
{- Converts a path from the git-annex branch into one of the varieties
|
||||
|
@ -24,7 +24,7 @@ getLogVariety :: FilePath -> Maybe LogVariety
|
|||
getLogVariety f
|
||||
| f `elem` topLevelUUIDBasedLogs = Just UUIDBasedLog
|
||||
| isRemoteStateLog f = Just NewUUIDBasedLog
|
||||
| f == numcopiesLog = Just SingleValueLog
|
||||
| isMetaDataLog f || f == numcopiesLog = Just OtherLog
|
||||
| otherwise = PresenceLog <$> firstJust (presenceLogs f)
|
||||
|
||||
{- All the uuid-based logs stored in the top of the git-annex branch. -}
|
||||
|
@ -119,6 +119,16 @@ remoteStateLogExt = ".log.rmt"
|
|||
isRemoteStateLog :: FilePath -> Bool
|
||||
isRemoteStateLog path = remoteStateLogExt `isSuffixOf` path
|
||||
|
||||
{- The filename of the metadata log for a given key. -}
|
||||
metaDataLogFile :: Key -> FilePath
|
||||
metaDataLogFile key = hashDirLower key </> keyFile key ++ metaDataLogExt
|
||||
|
||||
metaDataLogExt :: String
|
||||
metaDataLogExt = ".log.met"
|
||||
|
||||
isMetaDataLog :: FilePath -> Bool
|
||||
isMetaDataLog path = metaDataLogExt `isSuffixOf` path
|
||||
|
||||
prop_logs_sane :: Key -> Bool
|
||||
prop_logs_sane dummykey = and
|
||||
[ isNothing (getLogVariety "unknown")
|
||||
|
@ -126,7 +136,8 @@ prop_logs_sane dummykey = and
|
|||
, expect isPresenceLog (getLogVariety $ locationLogFile dummykey)
|
||||
, expect isPresenceLog (getLogVariety $ urlLogFile dummykey)
|
||||
, expect isNewUUIDBasedLog (getLogVariety $ remoteStateLogFile dummykey)
|
||||
, expect isSingleValueLog (getLogVariety $ numcopiesLog)
|
||||
, expect isOtherLog (getLogVariety $ metaDataLogFile dummykey)
|
||||
, expect isOtherLog (getLogVariety $ numcopiesLog)
|
||||
]
|
||||
where
|
||||
expect = maybe False
|
||||
|
@ -136,5 +147,5 @@ prop_logs_sane dummykey = and
|
|||
isNewUUIDBasedLog _ = False
|
||||
isPresenceLog (PresenceLog k) = k == dummykey
|
||||
isPresenceLog _ = False
|
||||
isSingleValueLog SingleValueLog = True
|
||||
isSingleValueLog _ = False
|
||||
isOtherLog OtherLog = True
|
||||
isOtherLog _ = False
|
||||
|
|
135
Logs/MetaData.hs
Normal file
135
Logs/MetaData.hs
Normal file
|
@ -0,0 +1,135 @@
|
|||
{- git-annex general metadata storage log
|
||||
-
|
||||
- A line of the log will look like "timestamp field [+-]value [...]"
|
||||
-
|
||||
- Note that unset values are preserved. Consider this case:
|
||||
-
|
||||
- We have:
|
||||
-
|
||||
- 100 foo +x
|
||||
- 200 foo -x
|
||||
-
|
||||
- An unmerged remote has:
|
||||
-
|
||||
- 150 foo +x
|
||||
-
|
||||
- After union merge, because the foo -x was preserved, we know that
|
||||
- after the other remote redundantly set foo +x, it was unset,
|
||||
- and so foo currently has no value.
|
||||
-
|
||||
-
|
||||
- Copyright 2014 Joey Hess <joey@kitenet.net>
|
||||
-
|
||||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
||||
{-# OPTIONS_GHC -fno-warn-orphans #-}
|
||||
|
||||
module Logs.MetaData (
|
||||
getCurrentMetaData,
|
||||
getMetaData,
|
||||
setMetaData,
|
||||
unsetMetaData,
|
||||
addMetaData,
|
||||
currentMetaData,
|
||||
) where
|
||||
|
||||
import Common.Annex
|
||||
import Types.MetaData
|
||||
import qualified Annex.Branch
|
||||
import Logs
|
||||
import Logs.SingleValue
|
||||
|
||||
import qualified Data.Set as S
|
||||
import Data.Time.Clock.POSIX
|
||||
|
||||
instance SingleValueSerializable MetaData where
|
||||
serialize = Types.MetaData.serialize
|
||||
deserialize = Types.MetaData.deserialize
|
||||
|
||||
getMetaData :: Key -> Annex (Log MetaData)
|
||||
getMetaData = readLog . metaDataLogFile
|
||||
|
||||
{- Go through the log from oldest to newest, and combine it all
|
||||
- into a single MetaData representing the current state. -}
|
||||
getCurrentMetaData :: Key -> Annex MetaData
|
||||
getCurrentMetaData = currentMetaData . collect <$$> getMetaData
|
||||
where
|
||||
collect = foldl' unionMetaData newMetaData . map value . S.toAscList
|
||||
|
||||
setMetaData :: Key -> MetaField -> String -> Annex ()
|
||||
setMetaData = setMetaData' True
|
||||
|
||||
unsetMetaData :: Key -> MetaField -> String -> Annex ()
|
||||
unsetMetaData = setMetaData' False
|
||||
|
||||
setMetaData' :: Bool -> Key -> MetaField -> String -> Annex ()
|
||||
setMetaData' isset k field s = addMetaData k $
|
||||
updateMetaData field (mkMetaValue (CurrentlySet isset) s) newMetaData
|
||||
|
||||
{- Adds in some metadata, which can override existing values, or unset
|
||||
- them, but otherwise leaves any existing metadata as-is. -}
|
||||
addMetaData :: Key -> MetaData -> Annex ()
|
||||
addMetaData k metadata = do
|
||||
now <- liftIO getPOSIXTime
|
||||
Annex.Branch.change (metaDataLogFile k) $
|
||||
showLog . simplifyLog
|
||||
. S.insert (LogEntry now metadata)
|
||||
. parseLog
|
||||
|
||||
{- Simplify a log, removing historical values that are no longer
|
||||
- needed.
|
||||
-
|
||||
- This is not as simple as just making a single log line with the newest
|
||||
- state of all metadata. Consider this case:
|
||||
-
|
||||
- We have:
|
||||
-
|
||||
- 100 foo +x bar +y
|
||||
- 200 foo -x
|
||||
-
|
||||
- An unmerged remote has:
|
||||
-
|
||||
- 150 bar +z baz +w
|
||||
-
|
||||
- If what we have were simplified to "200 foo -x bar +y" then when the line
|
||||
- from the remote became available, it would be older than the simplified
|
||||
- line, and its change to bar would not take effect. That is wrong.
|
||||
-
|
||||
- Instead, simplify it to: (this simpliciation is optional)
|
||||
-
|
||||
- 100 bar +y (100 foo +x bar +y)
|
||||
- 200 foo -x
|
||||
-
|
||||
- Now merging with the remote yields:
|
||||
-
|
||||
- 100 bar +y (100 foo +x bar +y)
|
||||
- 150 bar +z baz +w
|
||||
- 200 foo -x
|
||||
-
|
||||
- Simplifying again:
|
||||
-
|
||||
- 150 bar +z baz +w
|
||||
- 200 foo -x
|
||||
-
|
||||
- In practice, there is little benefit to making simplications to lines
|
||||
- that only remove some values, while leaving others on the line.
|
||||
- Since lines are kept in git, that likely increases the size of the
|
||||
- git repo (depending on compression), rather than saving any space.
|
||||
-
|
||||
- So, the only simplication that is actually done is to throw out an
|
||||
- old line when all the values in it have been overridden by lines that
|
||||
- came before
|
||||
-}
|
||||
simplifyLog :: Log MetaData -> Log MetaData
|
||||
simplifyLog s = case S.toDescList s of
|
||||
(newest:rest) -> S.fromList $ go [newest] (value newest) rest
|
||||
_ -> s
|
||||
where
|
||||
go c _ [] = c
|
||||
go c newer (l:ls)
|
||||
| older `hasUniqueMetaData` newer =
|
||||
go (l:c) (unionMetaData older newer) ls
|
||||
| otherwise = go c newer ls
|
||||
where
|
||||
older = value l
|
|
@ -16,11 +16,16 @@ module Types.MetaData (
|
|||
toMetaField,
|
||||
fromMetaField,
|
||||
toMetaValue,
|
||||
toMetaValue',
|
||||
mkMetaValue,
|
||||
unsetMetaValue,
|
||||
fromMetaValue,
|
||||
fromMetaData,
|
||||
newMetaData,
|
||||
updateMetaData,
|
||||
getCurrentMetaData,
|
||||
unionMetaData,
|
||||
hasUniqueMetaData,
|
||||
currentMetaData,
|
||||
currentMetaDataValues,
|
||||
getAllMetaData,
|
||||
serialize,
|
||||
deserialize,
|
||||
|
@ -37,7 +42,7 @@ import qualified Data.Map as M
|
|||
import Data.Char
|
||||
|
||||
newtype MetaData = MetaData (M.Map MetaField (S.Set MetaValue))
|
||||
deriving (Show, Eq)
|
||||
deriving (Show, Eq, Ord)
|
||||
|
||||
{- A metadata value can be currently be set (True), or may have been
|
||||
- set before and we're remembering it no longer is (False). -}
|
||||
|
@ -118,8 +123,11 @@ legalField f
|
|||
toMetaValue :: String -> MetaValue
|
||||
toMetaValue = MetaValue (CurrentlySet True)
|
||||
|
||||
toMetaValue' :: CurrentlySet -> String -> MetaValue
|
||||
toMetaValue' = MetaValue
|
||||
mkMetaValue :: CurrentlySet -> String -> MetaValue
|
||||
mkMetaValue = MetaValue
|
||||
|
||||
unsetMetaValue :: MetaValue -> MetaValue
|
||||
unsetMetaValue (MetaValue _ s) = MetaValue (CurrentlySet False) s
|
||||
|
||||
fromMetaField :: MetaField -> String
|
||||
fromMetaField (MetaField f) = f
|
||||
|
@ -127,6 +135,9 @@ fromMetaField (MetaField f) = f
|
|||
fromMetaValue :: MetaValue -> String
|
||||
fromMetaValue (MetaValue _ f) = f
|
||||
|
||||
fromMetaData :: MetaData -> [(MetaField, S.Set MetaValue)]
|
||||
fromMetaData (MetaData m) = M.toList m
|
||||
|
||||
newMetaData :: MetaData
|
||||
newMetaData = MetaData M.empty
|
||||
|
||||
|
@ -136,13 +147,38 @@ updateMetaData :: MetaField -> MetaValue -> MetaData -> MetaData
|
|||
updateMetaData f v (MetaData m) = MetaData $
|
||||
M.insertWith' S.union f (S.singleton v) m
|
||||
|
||||
{- Gets only currently set values -}
|
||||
getCurrentMetaData :: MetaField -> MetaData -> S.Set MetaValue
|
||||
getCurrentMetaData f m = S.filter isSet (getAllMetaData f m)
|
||||
{- New metadata overrides old._-}
|
||||
unionMetaData :: MetaData -> MetaData -> MetaData
|
||||
unionMetaData (MetaData old) (MetaData new) = MetaData $
|
||||
M.unionWith S.union new old
|
||||
|
||||
{- Checks if m contains any fields with values that are not
|
||||
- the same in comparewith. Note that unset and set values are
|
||||
- considered to be the same, so if m sets a value and comparewith
|
||||
- unsets it, m is not unique. However, if m changes the value,
|
||||
- or adds a new value, it is unique. -}
|
||||
hasUniqueMetaData :: MetaData -> MetaData -> Bool
|
||||
hasUniqueMetaData (MetaData comparewith) (MetaData m) =
|
||||
any uniquefield (M.toList m)
|
||||
where
|
||||
uniquefield :: (MetaField, S.Set MetaValue) -> Bool
|
||||
uniquefield (f, v) = maybe True (uniquevalue v) (M.lookup f comparewith)
|
||||
uniquevalue v1 v2 = not $ S.null $ S.difference v1 v2
|
||||
|
||||
isSet :: MetaValue -> Bool
|
||||
isSet (MetaValue (CurrentlySet isset) _) = isset
|
||||
|
||||
{- Gets only currently set values -}
|
||||
currentMetaDataValues :: MetaField -> MetaData -> S.Set MetaValue
|
||||
currentMetaDataValues f m = S.filter isSet (getAllMetaData f m)
|
||||
|
||||
currentMetaData :: MetaData -> MetaData
|
||||
currentMetaData (MetaData m) = removeEmptyFields $ MetaData $
|
||||
M.map (S.filter isSet) m
|
||||
|
||||
removeEmptyFields :: MetaData -> MetaData
|
||||
removeEmptyFields (MetaData m) = MetaData $ M.filter (not . S.null) m
|
||||
|
||||
{- Gets currently set values, but also values that have been unset. -}
|
||||
getAllMetaData :: MetaField -> MetaData -> S.Set MetaValue
|
||||
getAllMetaData f (MetaData m) = fromMaybe S.empty (M.lookup f m)
|
||||
|
@ -164,7 +200,7 @@ instance Arbitrary MetaField where
|
|||
prop_updateMetaData_sane :: MetaData -> MetaField -> MetaValue -> Bool
|
||||
prop_updateMetaData_sane m f v = and
|
||||
[ S.member v $ getAllMetaData f m'
|
||||
, not (isSet v) || S.member v (getCurrentMetaData f m')
|
||||
, not (isSet v) || S.member v (currentMetaDataValues f m')
|
||||
]
|
||||
where
|
||||
m' = updateMetaData f v m
|
||||
|
@ -176,5 +212,4 @@ prop_metadata_serialize f v m = and
|
|||
, deserialize (serialize m') == Just m'
|
||||
]
|
||||
where
|
||||
m' = removeemptyfields m
|
||||
removeemptyfields (MetaData x) = MetaData $ M.filter (not . S.null) x
|
||||
m' = removeEmptyFields m
|
||||
|
|
|
@ -145,20 +145,6 @@ a tag was removed:
|
|||
1287290991.152124s tag +baz
|
||||
1291237510.141453s tag -bar
|
||||
|
||||
The end result is that tags foo and baz are set. This can be simplified:
|
||||
|
||||
1291237510.141453s tag +foo +baz -bar
|
||||
|
||||
Note the reuse of the most recent timestamp in the simplified version,
|
||||
rather than putting in the timestamp when the simplification was done.
|
||||
This ensures that is some other repo is making changes, they won't get
|
||||
trampled over. For example:
|
||||
|
||||
1291237510.141453s tag +foo +baz -bar
|
||||
1291239999.000000s tag +bar -foo
|
||||
|
||||
Now tags bar and baz are set.
|
||||
|
||||
# efficient metadata lookup
|
||||
|
||||
Looking up metadata for filtering so far requires traversing all keys in
|
||||
|
|
|
@ -695,6 +695,23 @@ subdirectories).
|
|||
|
||||
# UTILITY COMMANDS
|
||||
|
||||
* `metadata file [field=value field+=value field-=value ...]`
|
||||
|
||||
Each file can have any number of metadata fields attached to it,
|
||||
which each in turn have any number of values. This sets metadata
|
||||
for a file, or if run without any values, shows its current metadata.
|
||||
|
||||
To set a field's value, removing any old value(s), use field=value.
|
||||
|
||||
To add an additional value, use field+=value.
|
||||
|
||||
To remove a value, use field-=value.
|
||||
|
||||
For example, to set some tags on a file:
|
||||
|
||||
git annex metadata annexscreencast.ogv tag+=video tag+=screencast
|
||||
|
||||
|
||||
* `migrate [path ...]`
|
||||
|
||||
Changes the specified annexed files to use the default key-value backend
|
||||
|
|
|
@ -146,6 +146,27 @@ Example:
|
|||
1287290776.765152s e605dca6-446a-11e0-8b2a-002170d25c55 blah blah
|
||||
1287290767.478634s 26339d22-446b-11e0-9101-002170d25c55 foo=bar
|
||||
|
||||
## `aaa/bbb/*.log.met`
|
||||
|
||||
These log files are used to store arbitrary [[design/metadata]] about keys.
|
||||
Each key can have any number of metadata fields. Each field has a set of
|
||||
values.
|
||||
|
||||
Lines are timestamped, and record when values are added (`field +value`),
|
||||
but also when values are removed (`field -value`). Removed values
|
||||
are retained in the log so that when merging an old line that sets a value
|
||||
that was later unset, the value is not accidentially added back.
|
||||
|
||||
For example:
|
||||
|
||||
1287290776.765152s tag +foo +bar author +joey
|
||||
1291237510.141453s tag -bar +baz
|
||||
|
||||
The value can be completely arbitrary data, although it's typically
|
||||
reasonably short. If the value contains any whitespace
|
||||
(including \r or \r), it will be base64 encoded. Base64 encoded values
|
||||
are indicated by prefixing them with "!"
|
||||
|
||||
## `schedule.log`
|
||||
|
||||
Used to record scheduled events, such as periodic fscks.
|
||||
|
|
Loading…
Reference in a new issue