annex.genmetadata can be set to make git-annex automatically set metadata (year and month) when adding files
This commit is contained in:
parent
fa6f553083
commit
7498c5dd96
13 changed files with 135 additions and 43 deletions
51
Annex/MetaData.hs
Normal file
51
Annex/MetaData.hs
Normal file
|
@ -0,0 +1,51 @@
|
||||||
|
{- git-annex metadata
|
||||||
|
-
|
||||||
|
- Copyright 2014 Joey Hess <joey@kitenet.net>
|
||||||
|
-
|
||||||
|
- Licensed under the GNU GPL version 3 or higher.
|
||||||
|
-}
|
||||||
|
|
||||||
|
module Annex.MetaData where
|
||||||
|
|
||||||
|
import Common.Annex
|
||||||
|
import qualified Annex
|
||||||
|
import Types.MetaData
|
||||||
|
import Logs.MetaData
|
||||||
|
|
||||||
|
import qualified Data.Set as S
|
||||||
|
import qualified Data.Map as M
|
||||||
|
import Data.Time.Calendar
|
||||||
|
import Data.Time.Clock
|
||||||
|
import Data.Time.Clock.POSIX
|
||||||
|
|
||||||
|
tagMetaField :: MetaField
|
||||||
|
tagMetaField = MetaField "tag"
|
||||||
|
|
||||||
|
yearMetaField :: MetaField
|
||||||
|
yearMetaField = MetaField "year"
|
||||||
|
|
||||||
|
monthMetaField :: MetaField
|
||||||
|
monthMetaField = MetaField "month"
|
||||||
|
|
||||||
|
{- Generates metadata for a file that has just been ingested into the
|
||||||
|
- annex. Passed the FileStatus of the content file.
|
||||||
|
-
|
||||||
|
- Does not overwrite any existing metadata values for the key.
|
||||||
|
-}
|
||||||
|
genMetaData :: Key -> FileStatus -> Annex ()
|
||||||
|
genMetaData key status = whenM (annexGenMetaData <$> Annex.getGitConfig) $ do
|
||||||
|
metadata <- getCurrentMetaData key
|
||||||
|
let metadata' = genMetaData' status metadata
|
||||||
|
unless (metadata' == emptyMetaData) $
|
||||||
|
addMetaData key metadata'
|
||||||
|
|
||||||
|
genMetaData' :: FileStatus -> MetaData -> MetaData
|
||||||
|
genMetaData' status old = MetaData $ M.fromList $ filter isnew
|
||||||
|
[ (yearMetaField, S.singleton $ toMetaValue $ show y)
|
||||||
|
, (monthMetaField, S.singleton $ toMetaValue $ show m)
|
||||||
|
]
|
||||||
|
where
|
||||||
|
isnew (f, _) = S.null (currentMetaDataValues f old)
|
||||||
|
(y, m, _d) = toGregorian $ utctDay $
|
||||||
|
posixSecondsToUTCTime $ realToFrac $
|
||||||
|
modificationTime status
|
|
@ -300,8 +300,7 @@ applyView' mkviewedfile getfilemetadata view = do
|
||||||
genviewedfiles = viewedFiles view mkviewedfile -- enables memoization
|
genviewedfiles = viewedFiles view mkviewedfile -- enables memoization
|
||||||
go uh hasher f (Just (k, _)) = do
|
go uh hasher f (Just (k, _)) = do
|
||||||
metadata <- getCurrentMetaData k
|
metadata <- getCurrentMetaData k
|
||||||
let dirmetadata = getfilemetadata f
|
let metadata' = getfilemetadata f `unionMetaData` metadata
|
||||||
let metadata' = unionMetaData dirmetadata metadata
|
|
||||||
forM_ (genviewedfiles f metadata') $ \fv -> do
|
forM_ (genviewedfiles f metadata') $ \fv -> do
|
||||||
stagesymlink uh hasher fv =<< inRepo (gitAnnexLink fv k)
|
stagesymlink uh hasher fv =<< inRepo (gitAnnexLink fv k)
|
||||||
go uh hasher f Nothing
|
go uh hasher f Nothing
|
||||||
|
|
|
@ -19,6 +19,7 @@ import Annex.Content
|
||||||
import Annex.Content.Direct
|
import Annex.Content.Direct
|
||||||
import Annex.Perms
|
import Annex.Perms
|
||||||
import Annex.Link
|
import Annex.Link
|
||||||
|
import Annex.MetaData
|
||||||
import qualified Annex
|
import qualified Annex
|
||||||
import qualified Annex.Queue
|
import qualified Annex.Queue
|
||||||
#ifdef WITH_CLIBS
|
#ifdef WITH_CLIBS
|
||||||
|
@ -145,26 +146,32 @@ ingest Nothing = return (Nothing, Nothing)
|
||||||
ingest (Just source) = do
|
ingest (Just source) = do
|
||||||
backend <- chooseBackend $ keyFilename source
|
backend <- chooseBackend $ keyFilename source
|
||||||
k <- genKey source backend
|
k <- genKey source backend
|
||||||
cache <- liftIO $ genInodeCache $ contentLocation source
|
ms <- liftIO $ catchMaybeIO $ getFileStatus $ contentLocation source
|
||||||
case (cache, inodeCache source) of
|
let mcache = toInodeCache =<< ms
|
||||||
(_, Nothing) -> go k cache
|
case (mcache, inodeCache source) of
|
||||||
(Just newc, Just c) | compareStrong c newc -> go k cache
|
(_, Nothing) -> go k mcache ms
|
||||||
|
(Just newc, Just c) | compareStrong c newc -> go k mcache ms
|
||||||
_ -> failure "changed while it was being added"
|
_ -> failure "changed while it was being added"
|
||||||
where
|
where
|
||||||
go k cache = ifM isDirect ( godirect k cache , goindirect k cache )
|
go k mcache ms = ifM isDirect
|
||||||
|
( godirect k mcache ms
|
||||||
|
, goindirect k mcache ms
|
||||||
|
)
|
||||||
|
|
||||||
goindirect (Just (key, _)) mcache = do
|
goindirect (Just (key, _)) mcache ms = do
|
||||||
catchAnnex (moveAnnex key $ contentLocation source)
|
catchAnnex (moveAnnex key $ contentLocation source)
|
||||||
(undo (keyFilename source) key)
|
(undo (keyFilename source) key)
|
||||||
|
maybe noop (genMetaData key) ms
|
||||||
liftIO $ nukeFile $ keyFilename source
|
liftIO $ nukeFile $ keyFilename source
|
||||||
return $ (Just key, mcache)
|
return $ (Just key, mcache)
|
||||||
goindirect Nothing _ = failure "failed to generate a key"
|
goindirect _ _ _ = failure "failed to generate a key"
|
||||||
|
|
||||||
godirect (Just (key, _)) (Just cache) = do
|
godirect (Just (key, _)) (Just cache) ms = do
|
||||||
addInodeCache key cache
|
addInodeCache key cache
|
||||||
|
maybe noop (genMetaData key) ms
|
||||||
finishIngestDirect key source
|
finishIngestDirect key source
|
||||||
return $ (Just key, Just cache)
|
return $ (Just key, Just cache)
|
||||||
godirect _ _ = failure "failed to generate a key"
|
godirect _ _ _ = failure "failed to generate a key"
|
||||||
|
|
||||||
failure msg = do
|
failure msg = do
|
||||||
warning $ keyFilename source ++ " " ++ msg
|
warning $ keyFilename source ++ " " ++ msg
|
||||||
|
|
|
@ -10,6 +10,7 @@ module Command.MetaData where
|
||||||
import Common.Annex
|
import Common.Annex
|
||||||
import qualified Annex
|
import qualified Annex
|
||||||
import Command
|
import Command
|
||||||
|
import Annex.MetaData
|
||||||
import Logs.MetaData
|
import Logs.MetaData
|
||||||
import Types.MetaData
|
import Types.MetaData
|
||||||
|
|
||||||
|
@ -55,7 +56,7 @@ perform :: POSIXTime -> [ModMeta] -> Key -> CommandPerform
|
||||||
perform _ [] k = next $ cleanup k
|
perform _ [] k = next $ cleanup k
|
||||||
perform now ms k = do
|
perform now ms k = do
|
||||||
oldm <- getCurrentMetaData k
|
oldm <- getCurrentMetaData k
|
||||||
let m = foldl' unionMetaData newMetaData $ map (modMeta oldm) ms
|
let m = foldl' unionMetaData emptyMetaData $ map (modMeta oldm) ms
|
||||||
addMetaData' k m now
|
addMetaData' k m now
|
||||||
next $ cleanup k
|
next $ cleanup k
|
||||||
|
|
||||||
|
|
|
@ -14,6 +14,7 @@ import qualified Git.Command
|
||||||
import qualified Git.Ref
|
import qualified Git.Ref
|
||||||
import qualified Git.Branch
|
import qualified Git.Branch
|
||||||
import Types.MetaData
|
import Types.MetaData
|
||||||
|
import Annex.MetaData
|
||||||
import Types.View
|
import Types.View
|
||||||
import Annex.View
|
import Annex.View
|
||||||
import Logs.View
|
import Logs.View
|
||||||
|
|
|
@ -55,7 +55,7 @@ getMetaData = readLog . metaDataLogFile
|
||||||
getCurrentMetaData :: Key -> Annex MetaData
|
getCurrentMetaData :: Key -> Annex MetaData
|
||||||
getCurrentMetaData = currentMetaData . collect <$$> getMetaData
|
getCurrentMetaData = currentMetaData . collect <$$> getMetaData
|
||||||
where
|
where
|
||||||
collect = foldl' unionMetaData newMetaData . map value . S.toAscList
|
collect = foldl' unionMetaData emptyMetaData . map value . S.toAscList
|
||||||
|
|
||||||
{- Adds in some metadata, which can override existing values, or unset
|
{- Adds in some metadata, which can override existing values, or unset
|
||||||
- them, but otherwise leaves any existing metadata as-is. -}
|
- them, but otherwise leaves any existing metadata as-is. -}
|
||||||
|
@ -129,7 +129,7 @@ simplifyLog s = case sl of
|
||||||
|
|
||||||
go c _ [] = c
|
go c _ [] = c
|
||||||
go c newer (l:ls)
|
go c newer (l:ls)
|
||||||
| unique == newMetaData = go c newer ls
|
| unique == emptyMetaData = go c newer ls
|
||||||
| otherwise = go (l { value = unique } : c)
|
| otherwise = go (l { value = unique } : c)
|
||||||
(unionMetaData unique newer) ls
|
(unionMetaData unique newer) ls
|
||||||
where
|
where
|
||||||
|
|
|
@ -49,6 +49,7 @@ data GitConfig = GitConfig
|
||||||
, annexAutoUpgrade :: AutoUpgrade
|
, annexAutoUpgrade :: AutoUpgrade
|
||||||
, annexExpireUnused :: Maybe (Maybe Duration)
|
, annexExpireUnused :: Maybe (Maybe Duration)
|
||||||
, annexSecureEraseCommand :: Maybe String
|
, annexSecureEraseCommand :: Maybe String
|
||||||
|
, annexGenMetaData :: Bool
|
||||||
, coreSymlinks :: Bool
|
, coreSymlinks :: Bool
|
||||||
, gcryptId :: Maybe String
|
, gcryptId :: Maybe String
|
||||||
}
|
}
|
||||||
|
@ -81,6 +82,7 @@ extractGitConfig r = GitConfig
|
||||||
, annexExpireUnused = maybe Nothing Just . parseDuration
|
, annexExpireUnused = maybe Nothing Just . parseDuration
|
||||||
<$> getmaybe (annex "expireunused")
|
<$> getmaybe (annex "expireunused")
|
||||||
, annexSecureEraseCommand = getmaybe (annex "secure-erase-command")
|
, annexSecureEraseCommand = getmaybe (annex "secure-erase-command")
|
||||||
|
, annexGenMetaData = getbool (annex "genmetadata") False
|
||||||
, coreSymlinks = getbool "core.symlinks" True
|
, coreSymlinks = getbool "core.symlinks" True
|
||||||
, gcryptId = getmaybe "core.gcrypt-id"
|
, gcryptId = getmaybe "core.gcrypt-id"
|
||||||
}
|
}
|
||||||
|
|
|
@ -17,7 +17,6 @@ module Types.MetaData (
|
||||||
MetaSerializable,
|
MetaSerializable,
|
||||||
toMetaField,
|
toMetaField,
|
||||||
mkMetaField,
|
mkMetaField,
|
||||||
tagMetaField,
|
|
||||||
fromMetaField,
|
fromMetaField,
|
||||||
toMetaValue,
|
toMetaValue,
|
||||||
mkMetaValue,
|
mkMetaValue,
|
||||||
|
@ -25,7 +24,7 @@ module Types.MetaData (
|
||||||
unsetMetaData,
|
unsetMetaData,
|
||||||
fromMetaValue,
|
fromMetaValue,
|
||||||
fromMetaData,
|
fromMetaData,
|
||||||
newMetaData,
|
emptyMetaData,
|
||||||
updateMetaData,
|
updateMetaData,
|
||||||
unionMetaData,
|
unionMetaData,
|
||||||
differenceMetaData,
|
differenceMetaData,
|
||||||
|
@ -81,7 +80,7 @@ instance MetaSerializable MetaData where
|
||||||
serialize (MetaData m) = unwords $ concatMap go $ M.toList m
|
serialize (MetaData m) = unwords $ concatMap go $ M.toList m
|
||||||
where
|
where
|
||||||
go (f, vs) = serialize f : map serialize (S.toList vs)
|
go (f, vs) = serialize f : map serialize (S.toList vs)
|
||||||
deserialize = Just . getfield newMetaData . words
|
deserialize = Just . getfield emptyMetaData . words
|
||||||
where
|
where
|
||||||
getfield m [] = m
|
getfield m [] = m
|
||||||
getfield m (w:ws) = maybe m (getvalues m ws) (deserialize w)
|
getfield m (w:ws) = maybe m (getvalues m ws) (deserialize w)
|
||||||
|
@ -152,8 +151,8 @@ fromMetaValue (MetaValue _ f) = f
|
||||||
fromMetaData :: MetaData -> [(MetaField, S.Set MetaValue)]
|
fromMetaData :: MetaData -> [(MetaField, S.Set MetaValue)]
|
||||||
fromMetaData (MetaData m) = M.toList m
|
fromMetaData (MetaData m) = M.toList m
|
||||||
|
|
||||||
newMetaData :: MetaData
|
emptyMetaData :: MetaData
|
||||||
newMetaData = MetaData M.empty
|
emptyMetaData = MetaData M.empty
|
||||||
|
|
||||||
{- Can be used to set a value, or to unset it, depending on whether
|
{- Can be used to set a value, or to unset it, depending on whether
|
||||||
- the MetaValue has CurrentlySet or not. -}
|
- the MetaValue has CurrentlySet or not. -}
|
||||||
|
@ -202,10 +201,10 @@ data ModMeta
|
||||||
- Note that the new MetaData does not include all the
|
- Note that the new MetaData does not include all the
|
||||||
- values set in the input metadata. It only contains changed values. -}
|
- values set in the input metadata. It only contains changed values. -}
|
||||||
modMeta :: MetaData -> ModMeta -> MetaData
|
modMeta :: MetaData -> ModMeta -> MetaData
|
||||||
modMeta _ (AddMeta f v) = updateMetaData f v newMetaData
|
modMeta _ (AddMeta f v) = updateMetaData f v emptyMetaData
|
||||||
modMeta _ (DelMeta f oldv) = updateMetaData f (unsetMetaValue oldv) newMetaData
|
modMeta _ (DelMeta f oldv) = updateMetaData f (unsetMetaValue oldv) emptyMetaData
|
||||||
modMeta m (SetMeta f v) = updateMetaData f v $
|
modMeta m (SetMeta f v) = updateMetaData f v $
|
||||||
foldr (updateMetaData f) newMetaData $
|
foldr (updateMetaData f) emptyMetaData $
|
||||||
map unsetMetaValue $ S.toList $ currentMetaDataValues f m
|
map unsetMetaValue $ S.toList $ currentMetaDataValues f m
|
||||||
|
|
||||||
{- Parses field=value, field+=value, field-=value -}
|
{- Parses field=value, field+=value, field-=value -}
|
||||||
|
@ -233,9 +232,6 @@ mkMetaField f = maybe (Left $ badField f) Right (toMetaField f)
|
||||||
badField :: String -> String
|
badField :: String -> String
|
||||||
badField f = "Illegal metadata field name, \"" ++ f ++ "\""
|
badField f = "Illegal metadata field name, \"" ++ f ++ "\""
|
||||||
|
|
||||||
tagMetaField :: MetaField
|
|
||||||
tagMetaField = MetaField "tag"
|
|
||||||
|
|
||||||
{- Avoid putting too many fields in the map; extremely large maps make
|
{- Avoid putting too many fields in the map; extremely large maps make
|
||||||
- the seriaization test slow due to the sheer amount of data.
|
- the seriaization test slow due to the sheer amount of data.
|
||||||
- It's unlikely that more than 100 fields of metadata will be used. -}
|
- It's unlikely that more than 100 fields of metadata will be used. -}
|
||||||
|
@ -254,7 +250,7 @@ prop_metadata_sane :: MetaData -> MetaField -> MetaValue -> Bool
|
||||||
prop_metadata_sane m f v = and
|
prop_metadata_sane m f v = and
|
||||||
[ S.member v $ metaDataValues f m'
|
[ S.member v $ metaDataValues f m'
|
||||||
, not (isSet v) || S.member v (currentMetaDataValues f m')
|
, not (isSet v) || S.member v (currentMetaDataValues f m')
|
||||||
, differenceMetaData m' newMetaData == m'
|
, differenceMetaData m' emptyMetaData == m'
|
||||||
]
|
]
|
||||||
where
|
where
|
||||||
m' = updateMetaData f v m
|
m' = updateMetaData f v m
|
||||||
|
|
2
debian/changelog
vendored
2
debian/changelog
vendored
|
@ -9,6 +9,8 @@ git-annex (5.20140222) UNRELEASED; urgency=medium
|
||||||
directory hierarchy in a view.
|
directory hierarchy in a view.
|
||||||
For example `git annex view tag=* podcasts/=*` makes a view in the form
|
For example `git annex view tag=* podcasts/=*` makes a view in the form
|
||||||
tag/showname.
|
tag/showname.
|
||||||
|
* annex.genmetadata can be set to make git-annex automatically set
|
||||||
|
metadata (year and month) when adding files.
|
||||||
|
|
||||||
-- Joey Hess <joeyh@debian.org> Fri, 21 Feb 2014 13:03:04 -0400
|
-- Joey Hess <joeyh@debian.org> Fri, 21 Feb 2014 13:03:04 -0400
|
||||||
|
|
||||||
|
|
|
@ -29,7 +29,7 @@ directories nest.
|
||||||
relevant metadata from the files.
|
relevant metadata from the files.
|
||||||
TODO: It's not clear that
|
TODO: It's not clear that
|
||||||
removing a file should nuke all the metadata used to filter it into the
|
removing a file should nuke all the metadata used to filter it into the
|
||||||
branch (especially if it's derived metadata like the year).
|
branch
|
||||||
Currently, only metadata used for visible subdirs is added and removed
|
Currently, only metadata used for visible subdirs is added and removed
|
||||||
this way.
|
this way.
|
||||||
Also, this is not usable in direct mode because deleting the
|
Also, this is not usable in direct mode because deleting the
|
||||||
|
@ -56,19 +56,7 @@ For example, by examining MP3 metadata.
|
||||||
|
|
||||||
Also auto add metadata when adding files to view branches. See below.
|
Also auto add metadata when adding files to view branches. See below.
|
||||||
|
|
||||||
## derived metadata
|
## directory hierarchy metadata
|
||||||
|
|
||||||
This is probably not stored anywhere. It's computed on demand by a pure
|
|
||||||
function from the other metadata.
|
|
||||||
(Should be a general mechanism for this. (It probably generalizes to
|
|
||||||
sql queries if we want to go that far.))
|
|
||||||
|
|
||||||
### data metadata
|
|
||||||
|
|
||||||
TODO From the ctime, some additional
|
|
||||||
metadata is derived, at least year=yyyy and probably also month, etc.
|
|
||||||
|
|
||||||
### directory hierarchy metadata
|
|
||||||
|
|
||||||
From the original filename used in the master branch, when
|
From the original filename used in the master branch, when
|
||||||
constructing a view, generate fields. For example foo/bar/baz.mp3
|
constructing a view, generate fields. For example foo/bar/baz.mp3
|
||||||
|
@ -87,8 +75,6 @@ those filenames to derive the same metadata, unless there is persistent
|
||||||
storage. Luckily, the filenames used in the views currently include the
|
storage. Luckily, the filenames used in the views currently include the
|
||||||
subdirs.
|
subdirs.
|
||||||
|
|
||||||
**done**!
|
|
||||||
|
|
||||||
# other uses for metadata
|
# other uses for metadata
|
||||||
|
|
||||||
Uses are not limited to view branches.
|
Uses are not limited to view branches.
|
||||||
|
|
|
@ -1279,6 +1279,12 @@ Here are all the supported configuration settings.
|
||||||
|
|
||||||
Note that setting numcopies to 0 is very unsafe.
|
Note that setting numcopies to 0 is very unsafe.
|
||||||
|
|
||||||
|
* `annex.genmetadata`
|
||||||
|
|
||||||
|
Set this to `true` to make git-annex automatically generate some metadata
|
||||||
|
when adding files to the repository. In particular, it stores
|
||||||
|
year and month metadata, from the file's modification date.
|
||||||
|
|
||||||
* `annex.queuesize`
|
* `annex.queuesize`
|
||||||
|
|
||||||
git-annex builds a queue of git commands, in order to combine similar
|
git-annex builds a queue of git commands, in order to combine similar
|
||||||
|
|
41
doc/metadata.mdwn
Normal file
41
doc/metadata.mdwn
Normal file
|
@ -0,0 +1,41 @@
|
||||||
|
git-annex allows you to store arbitrary metadata about files stored in the
|
||||||
|
git-annex repository. The metadata is stored in the `git-annex` branch, and
|
||||||
|
so is automatically kept in sync with the rest of git-annex's state, such
|
||||||
|
as [[location_tracking]] information.
|
||||||
|
|
||||||
|
Some of the things you can do with metadata include:
|
||||||
|
|
||||||
|
* Using `git annex metadata file` to show all
|
||||||
|
the metadata associated with a file.
|
||||||
|
* [[tips/metadata_driven_views]]
|
||||||
|
* Limiting the files git-annex commands act on to those with
|
||||||
|
or without particular metadata.
|
||||||
|
For example `git annex find --metadata tag=foo --or --metadata tag=bar`
|
||||||
|
* Using it in [[preferred_content]] expressions.
|
||||||
|
For example "tag=important or not author=me"
|
||||||
|
|
||||||
|
Each file (actually the underlying key) can have any number of metadata
|
||||||
|
fields, which each can have any number of values. For example, to tag
|
||||||
|
files, the `tag` field is typically used, with values set to each tag that
|
||||||
|
applies to the file.
|
||||||
|
|
||||||
|
The field names are freeform (but cannot include spaces). The metadata
|
||||||
|
values can contain absolutely anything you like -- but you're recommended
|
||||||
|
to keep it simple and reasonably short.
|
||||||
|
|
||||||
|
Here are some recommended metadata fields to use:
|
||||||
|
|
||||||
|
* `tag` - With each tag being a different value.
|
||||||
|
* `year`, `month` - When this particular version of the file came into
|
||||||
|
being.
|
||||||
|
|
||||||
|
To make git-annex automatically set the year and month when adding files,
|
||||||
|
run `git config annex.genmetadata true`
|
||||||
|
|
||||||
|
git-annex's metadata can be updated in a distributed fashion. For example,
|
||||||
|
two users, each with their own clone of a repository, can set and unset
|
||||||
|
metadata at the same time, even for the same field of the same file.
|
||||||
|
When they push their changes, `git annex merge` will combine their
|
||||||
|
metadata changes in a consistent and (probably) intuitive way.
|
||||||
|
|
||||||
|
See [[the metadata design page|design/metadata]] for more details.
|
|
@ -1,5 +1,5 @@
|
||||||
git-annex now has support for storing
|
git-annex now has support for storing
|
||||||
[[arbitrary metadata|design/metadata]] about annexed files. For example, this can be
|
[[arbitrary metadata|metadata]] about annexed files. For example, this can be
|
||||||
used to tag files, to record the author of a file, etc. The metadata is
|
used to tag files, to record the author of a file, etc. The metadata is
|
||||||
synced around between repositories with the other information git-annex
|
synced around between repositories with the other information git-annex
|
||||||
keeps track of.
|
keeps track of.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue