annex.genmetadata can be set to make git-annex automatically set metadata (year and month) when adding files
This commit is contained in:
parent
fa6f553083
commit
7498c5dd96
13 changed files with 135 additions and 43 deletions
51
Annex/MetaData.hs
Normal file
51
Annex/MetaData.hs
Normal file
|
@ -0,0 +1,51 @@
|
|||
{- git-annex metadata
|
||||
-
|
||||
- Copyright 2014 Joey Hess <joey@kitenet.net>
|
||||
-
|
||||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
||||
module Annex.MetaData where
|
||||
|
||||
import Common.Annex
|
||||
import qualified Annex
|
||||
import Types.MetaData
|
||||
import Logs.MetaData
|
||||
|
||||
import qualified Data.Set as S
|
||||
import qualified Data.Map as M
|
||||
import Data.Time.Calendar
|
||||
import Data.Time.Clock
|
||||
import Data.Time.Clock.POSIX
|
||||
|
||||
tagMetaField :: MetaField
|
||||
tagMetaField = MetaField "tag"
|
||||
|
||||
yearMetaField :: MetaField
|
||||
yearMetaField = MetaField "year"
|
||||
|
||||
monthMetaField :: MetaField
|
||||
monthMetaField = MetaField "month"
|
||||
|
||||
{- Generates metadata for a file that has just been ingested into the
|
||||
- annex. Passed the FileStatus of the content file.
|
||||
-
|
||||
- Does not overwrite any existing metadata values for the key.
|
||||
-}
|
||||
genMetaData :: Key -> FileStatus -> Annex ()
|
||||
genMetaData key status = whenM (annexGenMetaData <$> Annex.getGitConfig) $ do
|
||||
metadata <- getCurrentMetaData key
|
||||
let metadata' = genMetaData' status metadata
|
||||
unless (metadata' == emptyMetaData) $
|
||||
addMetaData key metadata'
|
||||
|
||||
genMetaData' :: FileStatus -> MetaData -> MetaData
|
||||
genMetaData' status old = MetaData $ M.fromList $ filter isnew
|
||||
[ (yearMetaField, S.singleton $ toMetaValue $ show y)
|
||||
, (monthMetaField, S.singleton $ toMetaValue $ show m)
|
||||
]
|
||||
where
|
||||
isnew (f, _) = S.null (currentMetaDataValues f old)
|
||||
(y, m, _d) = toGregorian $ utctDay $
|
||||
posixSecondsToUTCTime $ realToFrac $
|
||||
modificationTime status
|
|
@ -300,8 +300,7 @@ applyView' mkviewedfile getfilemetadata view = do
|
|||
genviewedfiles = viewedFiles view mkviewedfile -- enables memoization
|
||||
go uh hasher f (Just (k, _)) = do
|
||||
metadata <- getCurrentMetaData k
|
||||
let dirmetadata = getfilemetadata f
|
||||
let metadata' = unionMetaData dirmetadata metadata
|
||||
let metadata' = getfilemetadata f `unionMetaData` metadata
|
||||
forM_ (genviewedfiles f metadata') $ \fv -> do
|
||||
stagesymlink uh hasher fv =<< inRepo (gitAnnexLink fv k)
|
||||
go uh hasher f Nothing
|
||||
|
|
|
@ -19,6 +19,7 @@ import Annex.Content
|
|||
import Annex.Content.Direct
|
||||
import Annex.Perms
|
||||
import Annex.Link
|
||||
import Annex.MetaData
|
||||
import qualified Annex
|
||||
import qualified Annex.Queue
|
||||
#ifdef WITH_CLIBS
|
||||
|
@ -145,26 +146,32 @@ ingest Nothing = return (Nothing, Nothing)
|
|||
ingest (Just source) = do
|
||||
backend <- chooseBackend $ keyFilename source
|
||||
k <- genKey source backend
|
||||
cache <- liftIO $ genInodeCache $ contentLocation source
|
||||
case (cache, inodeCache source) of
|
||||
(_, Nothing) -> go k cache
|
||||
(Just newc, Just c) | compareStrong c newc -> go k cache
|
||||
ms <- liftIO $ catchMaybeIO $ getFileStatus $ contentLocation source
|
||||
let mcache = toInodeCache =<< ms
|
||||
case (mcache, inodeCache source) of
|
||||
(_, Nothing) -> go k mcache ms
|
||||
(Just newc, Just c) | compareStrong c newc -> go k mcache ms
|
||||
_ -> failure "changed while it was being added"
|
||||
where
|
||||
go k cache = ifM isDirect ( godirect k cache , goindirect k cache )
|
||||
go k mcache ms = ifM isDirect
|
||||
( godirect k mcache ms
|
||||
, goindirect k mcache ms
|
||||
)
|
||||
|
||||
goindirect (Just (key, _)) mcache = do
|
||||
goindirect (Just (key, _)) mcache ms = do
|
||||
catchAnnex (moveAnnex key $ contentLocation source)
|
||||
(undo (keyFilename source) key)
|
||||
maybe noop (genMetaData key) ms
|
||||
liftIO $ nukeFile $ keyFilename source
|
||||
return $ (Just key, mcache)
|
||||
goindirect Nothing _ = failure "failed to generate a key"
|
||||
goindirect _ _ _ = failure "failed to generate a key"
|
||||
|
||||
godirect (Just (key, _)) (Just cache) = do
|
||||
godirect (Just (key, _)) (Just cache) ms = do
|
||||
addInodeCache key cache
|
||||
maybe noop (genMetaData key) ms
|
||||
finishIngestDirect key source
|
||||
return $ (Just key, Just cache)
|
||||
godirect _ _ = failure "failed to generate a key"
|
||||
godirect _ _ _ = failure "failed to generate a key"
|
||||
|
||||
failure msg = do
|
||||
warning $ keyFilename source ++ " " ++ msg
|
||||
|
|
|
@ -10,6 +10,7 @@ module Command.MetaData where
|
|||
import Common.Annex
|
||||
import qualified Annex
|
||||
import Command
|
||||
import Annex.MetaData
|
||||
import Logs.MetaData
|
||||
import Types.MetaData
|
||||
|
||||
|
@ -55,7 +56,7 @@ perform :: POSIXTime -> [ModMeta] -> Key -> CommandPerform
|
|||
perform _ [] k = next $ cleanup k
|
||||
perform now ms k = do
|
||||
oldm <- getCurrentMetaData k
|
||||
let m = foldl' unionMetaData newMetaData $ map (modMeta oldm) ms
|
||||
let m = foldl' unionMetaData emptyMetaData $ map (modMeta oldm) ms
|
||||
addMetaData' k m now
|
||||
next $ cleanup k
|
||||
|
||||
|
|
|
@ -14,6 +14,7 @@ import qualified Git.Command
|
|||
import qualified Git.Ref
|
||||
import qualified Git.Branch
|
||||
import Types.MetaData
|
||||
import Annex.MetaData
|
||||
import Types.View
|
||||
import Annex.View
|
||||
import Logs.View
|
||||
|
|
|
@ -55,7 +55,7 @@ getMetaData = readLog . metaDataLogFile
|
|||
getCurrentMetaData :: Key -> Annex MetaData
|
||||
getCurrentMetaData = currentMetaData . collect <$$> getMetaData
|
||||
where
|
||||
collect = foldl' unionMetaData newMetaData . map value . S.toAscList
|
||||
collect = foldl' unionMetaData emptyMetaData . map value . S.toAscList
|
||||
|
||||
{- Adds in some metadata, which can override existing values, or unset
|
||||
- them, but otherwise leaves any existing metadata as-is. -}
|
||||
|
@ -129,7 +129,7 @@ simplifyLog s = case sl of
|
|||
|
||||
go c _ [] = c
|
||||
go c newer (l:ls)
|
||||
| unique == newMetaData = go c newer ls
|
||||
| unique == emptyMetaData = go c newer ls
|
||||
| otherwise = go (l { value = unique } : c)
|
||||
(unionMetaData unique newer) ls
|
||||
where
|
||||
|
|
|
@ -49,6 +49,7 @@ data GitConfig = GitConfig
|
|||
, annexAutoUpgrade :: AutoUpgrade
|
||||
, annexExpireUnused :: Maybe (Maybe Duration)
|
||||
, annexSecureEraseCommand :: Maybe String
|
||||
, annexGenMetaData :: Bool
|
||||
, coreSymlinks :: Bool
|
||||
, gcryptId :: Maybe String
|
||||
}
|
||||
|
@ -81,6 +82,7 @@ extractGitConfig r = GitConfig
|
|||
, annexExpireUnused = maybe Nothing Just . parseDuration
|
||||
<$> getmaybe (annex "expireunused")
|
||||
, annexSecureEraseCommand = getmaybe (annex "secure-erase-command")
|
||||
, annexGenMetaData = getbool (annex "genmetadata") False
|
||||
, coreSymlinks = getbool "core.symlinks" True
|
||||
, gcryptId = getmaybe "core.gcrypt-id"
|
||||
}
|
||||
|
|
|
@ -17,7 +17,6 @@ module Types.MetaData (
|
|||
MetaSerializable,
|
||||
toMetaField,
|
||||
mkMetaField,
|
||||
tagMetaField,
|
||||
fromMetaField,
|
||||
toMetaValue,
|
||||
mkMetaValue,
|
||||
|
@ -25,7 +24,7 @@ module Types.MetaData (
|
|||
unsetMetaData,
|
||||
fromMetaValue,
|
||||
fromMetaData,
|
||||
newMetaData,
|
||||
emptyMetaData,
|
||||
updateMetaData,
|
||||
unionMetaData,
|
||||
differenceMetaData,
|
||||
|
@ -81,7 +80,7 @@ instance MetaSerializable MetaData where
|
|||
serialize (MetaData m) = unwords $ concatMap go $ M.toList m
|
||||
where
|
||||
go (f, vs) = serialize f : map serialize (S.toList vs)
|
||||
deserialize = Just . getfield newMetaData . words
|
||||
deserialize = Just . getfield emptyMetaData . words
|
||||
where
|
||||
getfield m [] = m
|
||||
getfield m (w:ws) = maybe m (getvalues m ws) (deserialize w)
|
||||
|
@ -152,8 +151,8 @@ fromMetaValue (MetaValue _ f) = f
|
|||
fromMetaData :: MetaData -> [(MetaField, S.Set MetaValue)]
|
||||
fromMetaData (MetaData m) = M.toList m
|
||||
|
||||
newMetaData :: MetaData
|
||||
newMetaData = MetaData M.empty
|
||||
emptyMetaData :: MetaData
|
||||
emptyMetaData = MetaData M.empty
|
||||
|
||||
{- Can be used to set a value, or to unset it, depending on whether
|
||||
- the MetaValue has CurrentlySet or not. -}
|
||||
|
@ -202,10 +201,10 @@ data ModMeta
|
|||
- Note that the new MetaData does not include all the
|
||||
- values set in the input metadata. It only contains changed values. -}
|
||||
modMeta :: MetaData -> ModMeta -> MetaData
|
||||
modMeta _ (AddMeta f v) = updateMetaData f v newMetaData
|
||||
modMeta _ (DelMeta f oldv) = updateMetaData f (unsetMetaValue oldv) newMetaData
|
||||
modMeta _ (AddMeta f v) = updateMetaData f v emptyMetaData
|
||||
modMeta _ (DelMeta f oldv) = updateMetaData f (unsetMetaValue oldv) emptyMetaData
|
||||
modMeta m (SetMeta f v) = updateMetaData f v $
|
||||
foldr (updateMetaData f) newMetaData $
|
||||
foldr (updateMetaData f) emptyMetaData $
|
||||
map unsetMetaValue $ S.toList $ currentMetaDataValues f m
|
||||
|
||||
{- Parses field=value, field+=value, field-=value -}
|
||||
|
@ -233,9 +232,6 @@ mkMetaField f = maybe (Left $ badField f) Right (toMetaField f)
|
|||
badField :: String -> String
|
||||
badField f = "Illegal metadata field name, \"" ++ f ++ "\""
|
||||
|
||||
tagMetaField :: MetaField
|
||||
tagMetaField = MetaField "tag"
|
||||
|
||||
{- Avoid putting too many fields in the map; extremely large maps make
|
||||
- the seriaization test slow due to the sheer amount of data.
|
||||
- It's unlikely that more than 100 fields of metadata will be used. -}
|
||||
|
@ -254,7 +250,7 @@ prop_metadata_sane :: MetaData -> MetaField -> MetaValue -> Bool
|
|||
prop_metadata_sane m f v = and
|
||||
[ S.member v $ metaDataValues f m'
|
||||
, not (isSet v) || S.member v (currentMetaDataValues f m')
|
||||
, differenceMetaData m' newMetaData == m'
|
||||
, differenceMetaData m' emptyMetaData == m'
|
||||
]
|
||||
where
|
||||
m' = updateMetaData f v m
|
||||
|
|
2
debian/changelog
vendored
2
debian/changelog
vendored
|
@ -9,6 +9,8 @@ git-annex (5.20140222) UNRELEASED; urgency=medium
|
|||
directory hierarchy in a view.
|
||||
For example `git annex view tag=* podcasts/=*` makes a view in the form
|
||||
tag/showname.
|
||||
* annex.genmetadata can be set to make git-annex automatically set
|
||||
metadata (year and month) when adding files.
|
||||
|
||||
-- Joey Hess <joeyh@debian.org> Fri, 21 Feb 2014 13:03:04 -0400
|
||||
|
||||
|
|
|
@ -29,7 +29,7 @@ directories nest.
|
|||
relevant metadata from the files.
|
||||
TODO: It's not clear that
|
||||
removing a file should nuke all the metadata used to filter it into the
|
||||
branch (especially if it's derived metadata like the year).
|
||||
branch
|
||||
Currently, only metadata used for visible subdirs is added and removed
|
||||
this way.
|
||||
Also, this is not usable in direct mode because deleting the
|
||||
|
@ -56,19 +56,7 @@ For example, by examining MP3 metadata.
|
|||
|
||||
Also auto add metadata when adding files to view branches. See below.
|
||||
|
||||
## derived metadata
|
||||
|
||||
This is probably not stored anywhere. It's computed on demand by a pure
|
||||
function from the other metadata.
|
||||
(Should be a general mechanism for this. (It probably generalizes to
|
||||
sql queries if we want to go that far.))
|
||||
|
||||
### data metadata
|
||||
|
||||
TODO From the ctime, some additional
|
||||
metadata is derived, at least year=yyyy and probably also month, etc.
|
||||
|
||||
### directory hierarchy metadata
|
||||
## directory hierarchy metadata
|
||||
|
||||
From the original filename used in the master branch, when
|
||||
constructing a view, generate fields. For example foo/bar/baz.mp3
|
||||
|
@ -87,8 +75,6 @@ those filenames to derive the same metadata, unless there is persistent
|
|||
storage. Luckily, the filenames used in the views currently include the
|
||||
subdirs.
|
||||
|
||||
**done**!
|
||||
|
||||
# other uses for metadata
|
||||
|
||||
Uses are not limited to view branches.
|
||||
|
|
|
@ -1279,6 +1279,12 @@ Here are all the supported configuration settings.
|
|||
|
||||
Note that setting numcopies to 0 is very unsafe.
|
||||
|
||||
* `annex.genmetadata`
|
||||
|
||||
Set this to `true` to make git-annex automatically generate some metadata
|
||||
when adding files to the repository. In particular, it stores
|
||||
year and month metadata, from the file's modification date.
|
||||
|
||||
* `annex.queuesize`
|
||||
|
||||
git-annex builds a queue of git commands, in order to combine similar
|
||||
|
|
41
doc/metadata.mdwn
Normal file
41
doc/metadata.mdwn
Normal file
|
@ -0,0 +1,41 @@
|
|||
git-annex allows you to store arbitrary metadata about files stored in the
|
||||
git-annex repository. The metadata is stored in the `git-annex` branch, and
|
||||
so is automatically kept in sync with the rest of git-annex's state, such
|
||||
as [[location_tracking]] information.
|
||||
|
||||
Some of the things you can do with metadata include:
|
||||
|
||||
* Using `git annex metadata file` to show all
|
||||
the metadata associated with a file.
|
||||
* [[tips/metadata_driven_views]]
|
||||
* Limiting the files git-annex commands act on to those with
|
||||
or without particular metadata.
|
||||
For example `git annex find --metadata tag=foo --or --metadata tag=bar`
|
||||
* Using it in [[preferred_content]] expressions.
|
||||
For example "tag=important or not author=me"
|
||||
|
||||
Each file (actually the underlying key) can have any number of metadata
|
||||
fields, which each can have any number of values. For example, to tag
|
||||
files, the `tag` field is typically used, with values set to each tag that
|
||||
applies to the file.
|
||||
|
||||
The field names are freeform (but cannot include spaces). The metadata
|
||||
values can contain absolutely anything you like -- but you're recommended
|
||||
to keep it simple and reasonably short.
|
||||
|
||||
Here are some recommended metadata fields to use:
|
||||
|
||||
* `tag` - With each tag being a different value.
|
||||
* `year`, `month` - When this particular version of the file came into
|
||||
being.
|
||||
|
||||
To make git-annex automatically set the year and month when adding files,
|
||||
run `git config annex.genmetadata true`
|
||||
|
||||
git-annex's metadata can be updated in a distributed fashion. For example,
|
||||
two users, each with their own clone of a repository, can set and unset
|
||||
metadata at the same time, even for the same field of the same file.
|
||||
When they push their changes, `git annex merge` will combine their
|
||||
metadata changes in a consistent and (probably) intuitive way.
|
||||
|
||||
See [[the metadata design page|design/metadata]] for more details.
|
|
@ -1,5 +1,5 @@
|
|||
git-annex now has support for storing
|
||||
[[arbitrary metadata|design/metadata]] about annexed files. For example, this can be
|
||||
[[arbitrary metadata|metadata]] about annexed files. For example, this can be
|
||||
used to tag files, to record the author of a file, etc. The metadata is
|
||||
synced around between repositories with the other information git-annex
|
||||
keeps track of.
|
||||
|
|
Loading…
Reference in a new issue