annex.genmetadata can be set to make git-annex automatically set metadata (year and month) when adding files

This commit is contained in:
Joey Hess 2014-02-23 00:08:29 -04:00
parent fa6f553083
commit 7498c5dd96
13 changed files with 135 additions and 43 deletions

51
Annex/MetaData.hs Normal file
View file

@ -0,0 +1,51 @@
{- git-annex metadata
-
- Copyright 2014 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Annex.MetaData where
import Common.Annex
import qualified Annex
import Types.MetaData
import Logs.MetaData
import qualified Data.Set as S
import qualified Data.Map as M
import Data.Time.Calendar
import Data.Time.Clock
import Data.Time.Clock.POSIX
tagMetaField :: MetaField
tagMetaField = MetaField "tag"
yearMetaField :: MetaField
yearMetaField = MetaField "year"
monthMetaField :: MetaField
monthMetaField = MetaField "month"
{- Generates metadata for a file that has just been ingested into the
- annex. Passed the FileStatus of the content file.
-
- Does not overwrite any existing metadata values for the key.
-}
genMetaData :: Key -> FileStatus -> Annex ()
genMetaData key status = whenM (annexGenMetaData <$> Annex.getGitConfig) $ do
metadata <- getCurrentMetaData key
let metadata' = genMetaData' status metadata
unless (metadata' == emptyMetaData) $
addMetaData key metadata'
genMetaData' :: FileStatus -> MetaData -> MetaData
genMetaData' status old = MetaData $ M.fromList $ filter isnew
[ (yearMetaField, S.singleton $ toMetaValue $ show y)
, (monthMetaField, S.singleton $ toMetaValue $ show m)
]
where
isnew (f, _) = S.null (currentMetaDataValues f old)
(y, m, _d) = toGregorian $ utctDay $
posixSecondsToUTCTime $ realToFrac $
modificationTime status

View file

@ -300,8 +300,7 @@ applyView' mkviewedfile getfilemetadata view = do
genviewedfiles = viewedFiles view mkviewedfile -- enables memoization
go uh hasher f (Just (k, _)) = do
metadata <- getCurrentMetaData k
let dirmetadata = getfilemetadata f
let metadata' = unionMetaData dirmetadata metadata
let metadata' = getfilemetadata f `unionMetaData` metadata
forM_ (genviewedfiles f metadata') $ \fv -> do
stagesymlink uh hasher fv =<< inRepo (gitAnnexLink fv k)
go uh hasher f Nothing

View file

@ -19,6 +19,7 @@ import Annex.Content
import Annex.Content.Direct
import Annex.Perms
import Annex.Link
import Annex.MetaData
import qualified Annex
import qualified Annex.Queue
#ifdef WITH_CLIBS
@ -145,26 +146,32 @@ ingest Nothing = return (Nothing, Nothing)
ingest (Just source) = do
backend <- chooseBackend $ keyFilename source
k <- genKey source backend
cache <- liftIO $ genInodeCache $ contentLocation source
case (cache, inodeCache source) of
(_, Nothing) -> go k cache
(Just newc, Just c) | compareStrong c newc -> go k cache
ms <- liftIO $ catchMaybeIO $ getFileStatus $ contentLocation source
let mcache = toInodeCache =<< ms
case (mcache, inodeCache source) of
(_, Nothing) -> go k mcache ms
(Just newc, Just c) | compareStrong c newc -> go k mcache ms
_ -> failure "changed while it was being added"
where
go k cache = ifM isDirect ( godirect k cache , goindirect k cache )
go k mcache ms = ifM isDirect
( godirect k mcache ms
, goindirect k mcache ms
)
goindirect (Just (key, _)) mcache = do
goindirect (Just (key, _)) mcache ms = do
catchAnnex (moveAnnex key $ contentLocation source)
(undo (keyFilename source) key)
maybe noop (genMetaData key) ms
liftIO $ nukeFile $ keyFilename source
return $ (Just key, mcache)
goindirect Nothing _ = failure "failed to generate a key"
goindirect _ _ _ = failure "failed to generate a key"
godirect (Just (key, _)) (Just cache) = do
godirect (Just (key, _)) (Just cache) ms = do
addInodeCache key cache
maybe noop (genMetaData key) ms
finishIngestDirect key source
return $ (Just key, Just cache)
godirect _ _ = failure "failed to generate a key"
godirect _ _ _ = failure "failed to generate a key"
failure msg = do
warning $ keyFilename source ++ " " ++ msg

View file

@ -10,6 +10,7 @@ module Command.MetaData where
import Common.Annex
import qualified Annex
import Command
import Annex.MetaData
import Logs.MetaData
import Types.MetaData
@ -55,7 +56,7 @@ perform :: POSIXTime -> [ModMeta] -> Key -> CommandPerform
perform _ [] k = next $ cleanup k
perform now ms k = do
oldm <- getCurrentMetaData k
let m = foldl' unionMetaData newMetaData $ map (modMeta oldm) ms
let m = foldl' unionMetaData emptyMetaData $ map (modMeta oldm) ms
addMetaData' k m now
next $ cleanup k

View file

@ -14,6 +14,7 @@ import qualified Git.Command
import qualified Git.Ref
import qualified Git.Branch
import Types.MetaData
import Annex.MetaData
import Types.View
import Annex.View
import Logs.View

View file

@ -55,7 +55,7 @@ getMetaData = readLog . metaDataLogFile
getCurrentMetaData :: Key -> Annex MetaData
getCurrentMetaData = currentMetaData . collect <$$> getMetaData
where
collect = foldl' unionMetaData newMetaData . map value . S.toAscList
collect = foldl' unionMetaData emptyMetaData . map value . S.toAscList
{- Adds in some metadata, which can override existing values, or unset
- them, but otherwise leaves any existing metadata as-is. -}
@ -129,7 +129,7 @@ simplifyLog s = case sl of
go c _ [] = c
go c newer (l:ls)
| unique == newMetaData = go c newer ls
| unique == emptyMetaData = go c newer ls
| otherwise = go (l { value = unique } : c)
(unionMetaData unique newer) ls
where

View file

@ -49,6 +49,7 @@ data GitConfig = GitConfig
, annexAutoUpgrade :: AutoUpgrade
, annexExpireUnused :: Maybe (Maybe Duration)
, annexSecureEraseCommand :: Maybe String
, annexGenMetaData :: Bool
, coreSymlinks :: Bool
, gcryptId :: Maybe String
}
@ -81,6 +82,7 @@ extractGitConfig r = GitConfig
, annexExpireUnused = maybe Nothing Just . parseDuration
<$> getmaybe (annex "expireunused")
, annexSecureEraseCommand = getmaybe (annex "secure-erase-command")
, annexGenMetaData = getbool (annex "genmetadata") False
, coreSymlinks = getbool "core.symlinks" True
, gcryptId = getmaybe "core.gcrypt-id"
}

View file

@ -17,7 +17,6 @@ module Types.MetaData (
MetaSerializable,
toMetaField,
mkMetaField,
tagMetaField,
fromMetaField,
toMetaValue,
mkMetaValue,
@ -25,7 +24,7 @@ module Types.MetaData (
unsetMetaData,
fromMetaValue,
fromMetaData,
newMetaData,
emptyMetaData,
updateMetaData,
unionMetaData,
differenceMetaData,
@ -81,7 +80,7 @@ instance MetaSerializable MetaData where
serialize (MetaData m) = unwords $ concatMap go $ M.toList m
where
go (f, vs) = serialize f : map serialize (S.toList vs)
deserialize = Just . getfield newMetaData . words
deserialize = Just . getfield emptyMetaData . words
where
getfield m [] = m
getfield m (w:ws) = maybe m (getvalues m ws) (deserialize w)
@ -152,8 +151,8 @@ fromMetaValue (MetaValue _ f) = f
fromMetaData :: MetaData -> [(MetaField, S.Set MetaValue)]
fromMetaData (MetaData m) = M.toList m
newMetaData :: MetaData
newMetaData = MetaData M.empty
emptyMetaData :: MetaData
emptyMetaData = MetaData M.empty
{- Can be used to set a value, or to unset it, depending on whether
- the MetaValue has CurrentlySet or not. -}
@ -202,10 +201,10 @@ data ModMeta
- Note that the new MetaData does not include all the
- values set in the input metadata. It only contains changed values. -}
modMeta :: MetaData -> ModMeta -> MetaData
modMeta _ (AddMeta f v) = updateMetaData f v newMetaData
modMeta _ (DelMeta f oldv) = updateMetaData f (unsetMetaValue oldv) newMetaData
modMeta _ (AddMeta f v) = updateMetaData f v emptyMetaData
modMeta _ (DelMeta f oldv) = updateMetaData f (unsetMetaValue oldv) emptyMetaData
modMeta m (SetMeta f v) = updateMetaData f v $
foldr (updateMetaData f) newMetaData $
foldr (updateMetaData f) emptyMetaData $
map unsetMetaValue $ S.toList $ currentMetaDataValues f m
{- Parses field=value, field+=value, field-=value -}
@ -233,9 +232,6 @@ mkMetaField f = maybe (Left $ badField f) Right (toMetaField f)
badField :: String -> String
badField f = "Illegal metadata field name, \"" ++ f ++ "\""
tagMetaField :: MetaField
tagMetaField = MetaField "tag"
{- Avoid putting too many fields in the map; extremely large maps make
- the seriaization test slow due to the sheer amount of data.
- It's unlikely that more than 100 fields of metadata will be used. -}
@ -254,7 +250,7 @@ prop_metadata_sane :: MetaData -> MetaField -> MetaValue -> Bool
prop_metadata_sane m f v = and
[ S.member v $ metaDataValues f m'
, not (isSet v) || S.member v (currentMetaDataValues f m')
, differenceMetaData m' newMetaData == m'
, differenceMetaData m' emptyMetaData == m'
]
where
m' = updateMetaData f v m

2
debian/changelog vendored
View file

@ -9,6 +9,8 @@ git-annex (5.20140222) UNRELEASED; urgency=medium
directory hierarchy in a view.
For example `git annex view tag=* podcasts/=*` makes a view in the form
tag/showname.
* annex.genmetadata can be set to make git-annex automatically set
metadata (year and month) when adding files.
-- Joey Hess <joeyh@debian.org> Fri, 21 Feb 2014 13:03:04 -0400

View file

@ -29,7 +29,7 @@ directories nest.
relevant metadata from the files.
TODO: It's not clear that
removing a file should nuke all the metadata used to filter it into the
branch (especially if it's derived metadata like the year).
branch
Currently, only metadata used for visible subdirs is added and removed
this way.
Also, this is not usable in direct mode because deleting the
@ -56,19 +56,7 @@ For example, by examining MP3 metadata.
Also auto add metadata when adding files to view branches. See below.
## derived metadata
This is probably not stored anywhere. It's computed on demand by a pure
function from the other metadata.
(Should be a general mechanism for this. (It probably generalizes to
sql queries if we want to go that far.))
### data metadata
TODO From the ctime, some additional
metadata is derived, at least year=yyyy and probably also month, etc.
### directory hierarchy metadata
## directory hierarchy metadata
From the original filename used in the master branch, when
constructing a view, generate fields. For example foo/bar/baz.mp3
@ -87,8 +75,6 @@ those filenames to derive the same metadata, unless there is persistent
storage. Luckily, the filenames used in the views currently include the
subdirs.
**done**!
# other uses for metadata
Uses are not limited to view branches.

View file

@ -1279,6 +1279,12 @@ Here are all the supported configuration settings.
Note that setting numcopies to 0 is very unsafe.
* `annex.genmetadata`
Set this to `true` to make git-annex automatically generate some metadata
when adding files to the repository. In particular, it stores
year and month metadata, from the file's modification date.
* `annex.queuesize`
git-annex builds a queue of git commands, in order to combine similar

41
doc/metadata.mdwn Normal file
View file

@ -0,0 +1,41 @@
git-annex allows you to store arbitrary metadata about files stored in the
git-annex repository. The metadata is stored in the `git-annex` branch, and
so is automatically kept in sync with the rest of git-annex's state, such
as [[location_tracking]] information.
Some of the things you can do with metadata include:
* Using `git annex metadata file` to show all
the metadata associated with a file.
* [[tips/metadata_driven_views]]
* Limiting the files git-annex commands act on to those with
or without particular metadata.
For example `git annex find --metadata tag=foo --or --metadata tag=bar`
* Using it in [[preferred_content]] expressions.
For example "tag=important or not author=me"
Each file (actually the underlying key) can have any number of metadata
fields, which each can have any number of values. For example, to tag
files, the `tag` field is typically used, with values set to each tag that
applies to the file.
The field names are freeform (but cannot include spaces). The metadata
values can contain absolutely anything you like -- but you're recommended
to keep it simple and reasonably short.
Here are some recommended metadata fields to use:
* `tag` - With each tag being a different value.
* `year`, `month` - When this particular version of the file came into
being.
To make git-annex automatically set the year and month when adding files,
run `git config annex.genmetadata true`
git-annex's metadata can be updated in a distributed fashion. For example,
two users, each with their own clone of a repository, can set and unset
metadata at the same time, even for the same field of the same file.
When they push their changes, `git annex merge` will combine their
metadata changes in a consistent and (probably) intuitive way.
See [[the metadata design page|design/metadata]] for more details.

View file

@ -1,5 +1,5 @@
git-annex now has support for storing
[[arbitrary metadata|design/metadata]] about annexed files. For example, this can be
[[arbitrary metadata|metadata]] about annexed files. For example, this can be
used to tag files, to record the author of a file, etc. The metadata is
synced around between repositories with the other information git-annex
keeps track of.