pre-commit-annex hook script to automatically extract metadata from lots of types of files

Using the extract(1) program to do the heavy lifting.

Decided to make git-annex run pre-commit-annex when committing. Since
git-annex pre-commit also runs it, it'll be run when git commit is run too,
via the pre-commit hook. This basically gives back the pre-commit hook
that git-annex took away. The implementation avoids repeatedly looking
for the hook script when the assistant is running and committing
repeatedly; only checks if the hook is available once.

To make the script simpler, made git-annex metadata -s field?=value
only set a field when it's not already got a value.

This commit was sponsored by bak.
This commit is contained in:
Joey Hess 2014-03-02 18:01:07 -04:00
parent 34059c09e2
commit d0fce426c4
12 changed files with 152 additions and 16 deletions

View file

@ -44,6 +44,7 @@ import Git.CatFile
import Git.CheckAttr import Git.CheckAttr
import Git.CheckIgnore import Git.CheckIgnore
import Git.SharedRepository import Git.SharedRepository
import qualified Git.Hook
import qualified Git.Queue import qualified Git.Queue
import Types.Key import Types.Key
import Types.Backend import Types.Backend
@ -118,6 +119,7 @@ data AnnexState = AnnexState
, errcounter :: Integer , errcounter :: Integer
, unusedkeys :: Maybe (S.Set Key) , unusedkeys :: Maybe (S.Set Key)
, quviversion :: Maybe QuviVersion , quviversion :: Maybe QuviVersion
, existinghooks :: M.Map Git.Hook.Hook Bool
} }
newState :: GitConfig -> Git.Repo -> AnnexState newState :: GitConfig -> Git.Repo -> AnnexState
@ -157,6 +159,7 @@ newState c r = AnnexState
, errcounter = 0 , errcounter = 0
, unusedkeys = Nothing , unusedkeys = Nothing
, quviversion = Nothing , quviversion = Nothing
, existinghooks = M.empty
} }
{- Makes an Annex state object for the specified git repo. {- Makes an Annex state object for the specified git repo.

View file

@ -1,9 +1,10 @@
{- git-annex git hooks {- git-annex git hooks
- -
- Note that it's important that the scripts not change, otherwise - Note that it's important that the scripts installed by git-annex
- removing old hooks using an old version of the script would fail. - not change, otherwise removing old hooks using an old version of
- the script would fail.
- -
- Copyright 2013 Joey Hess <joey@kitenet.net> - Copyright 2013-2014 Joey Hess <joey@kitenet.net>
- -
- Licensed under the GNU GPL version 3 or higher. - Licensed under the GNU GPL version 3 or higher.
-} -}
@ -12,12 +13,19 @@ module Annex.Hook where
import Common.Annex import Common.Annex
import qualified Git.Hook as Git import qualified Git.Hook as Git
import Utility.Shell
import Config import Config
import qualified Annex
import Utility.Shell
import Utility.FileMode
import qualified Data.Map as M
preCommitHook :: Git.Hook preCommitHook :: Git.Hook
preCommitHook = Git.Hook "pre-commit" (mkHookScript "git annex pre-commit .") preCommitHook = Git.Hook "pre-commit" (mkHookScript "git annex pre-commit .")
preCommitAnnexHook :: Git.Hook
preCommitAnnexHook = Git.Hook "pre-commit-annex" ""
mkHookScript :: String -> String mkHookScript :: String -> String
mkHookScript s = unlines mkHookScript s = unlines
[ shebang_local [ shebang_local
@ -40,3 +48,23 @@ hookWarning :: Git.Hook -> String -> Annex ()
hookWarning h msg = do hookWarning h msg = do
r <- gitRepo r <- gitRepo
warning $ Git.hookName h ++ " hook (" ++ Git.hookFile h r ++ ") " ++ msg warning $ Git.hookName h ++ " hook (" ++ Git.hookFile h r ++ ") " ++ msg
{- Runs a hook. To avoid checking if the hook exists every time,
- the existing hooks are cached. -}
runAnnexHook :: Git.Hook -> Annex ()
runAnnexHook hook = do
cmd <- fromRepo $ Git.hookFile hook
m <- Annex.getState Annex.existinghooks
case M.lookup hook m of
Just True -> run cmd
Just False -> noop
Nothing -> do
exists <- hookexists cmd
Annex.changeState $ \s -> s
{ Annex.existinghooks = M.insert hook exists m }
when exists $
run cmd
where
hookexists f = liftIO $ isExecutable . fileMode <$> getFileStatus f
run cmd = unlessM (liftIO $ boolSystem cmd []) $
warning $ cmd ++ " failed"

View file

@ -13,6 +13,7 @@ import Config
import qualified Command.Add import qualified Command.Add
import qualified Command.Fix import qualified Command.Fix
import Annex.Direct import Annex.Direct
import Annex.Hook
import Annex.View import Annex.View
import Annex.View.ViewedFile import Annex.View.ViewedFile
import Logs.View import Logs.View
@ -28,13 +29,16 @@ def = [command "pre-commit" paramPaths seek SectionPlumbing
seek :: CommandSeek seek :: CommandSeek
seek ps = ifM isDirect seek ps = ifM isDirect
-- update direct mode mappings for committed files ( do
( withWords startDirect ps -- update direct mode mappings for committed files
withWords startDirect ps
runAnnexHook preCommitAnnexHook
, do , do
-- fix symlinks to files being committed -- fix symlinks to files being committed
withFilesToBeCommitted (whenAnnexed Command.Fix.start) ps withFilesToBeCommitted (whenAnnexed Command.Fix.start) ps
-- inject unlocked files into the annex -- inject unlocked files into the annex
withFilesUnlockedToBeCommitted startIndirect ps withFilesUnlockedToBeCommitted startIndirect ps
runAnnexHook preCommitAnnexHook
-- committing changes to a view updates metadata -- committing changes to a view updates metadata
mv <- currentView mv <- currentView
case mv of case mv of
@ -44,6 +48,7 @@ seek ps = ifM isDirect
(removeViewMetaData v) (removeViewMetaData v)
) )
startIndirect :: FilePath -> CommandStart startIndirect :: FilePath -> CommandStart
startIndirect f = next $ do startIndirect f = next $ do
unlessM (callCommandAction $ Command.Add.start f) $ unlessM (callCommandAction $ Command.Add.start f) $

View file

@ -18,6 +18,7 @@ import qualified Types.Remote as Remote
import Annex.Direct import Annex.Direct
import Annex.CatFile import Annex.CatFile
import Annex.Link import Annex.Link
import Annex.Hook
import qualified Git.Command import qualified Git.Command
import qualified Git.LsFiles as LsFiles import qualified Git.LsFiles as LsFiles
import qualified Git.Merge import qualified Git.Merge
@ -156,6 +157,7 @@ commitStaged commitmessage = go =<< inRepo Git.Branch.currentUnsafe
where where
go Nothing = return False go Nothing = return False
go (Just branch) = do go (Just branch) = do
runAnnexHook preCommitAnnexHook
parent <- inRepo $ Git.Ref.sha branch parent <- inRepo $ Git.Ref.sha branch
void $ inRepo $ Git.Branch.commit False commitmessage branch void $ inRepo $ Git.Branch.commit False commitmessage branch
(maybeToList parent) (maybeToList parent)

View file

@ -15,6 +15,10 @@ data Hook = Hook
{ hookName :: FilePath { hookName :: FilePath
, hookScript :: String , hookScript :: String
} }
deriving (Ord)
instance Eq Hook where
a == b = hookName a == hookName b
hookFile :: Hook -> Repo -> FilePath hookFile :: Hook -> Repo -> FilePath
hookFile h r = localGitDir r </> "hooks" </> hookName h hookFile h r = localGitDir r </> "hooks" </> hookName h

View file

@ -219,6 +219,7 @@ data ModMeta
= AddMeta MetaField MetaValue = AddMeta MetaField MetaValue
| DelMeta MetaField MetaValue | DelMeta MetaField MetaValue
| SetMeta MetaField MetaValue -- removes any existing values | SetMeta MetaField MetaValue -- removes any existing values
| MaybeSetMeta MetaField MetaValue -- when field has no existing value
{- Applies a ModMeta, generating the new MetaData. {- Applies a ModMeta, generating the new MetaData.
- Note that the new MetaData does not include all the - Note that the new MetaData does not include all the
@ -229,12 +230,16 @@ modMeta _ (DelMeta f oldv) = updateMetaData f (unsetMetaValue oldv) emptyMetaDat
modMeta m (SetMeta f v) = updateMetaData f v $ modMeta m (SetMeta f v) = updateMetaData f v $
foldr (updateMetaData f) emptyMetaData $ foldr (updateMetaData f) emptyMetaData $
map unsetMetaValue $ S.toList $ currentMetaDataValues f m map unsetMetaValue $ S.toList $ currentMetaDataValues f m
modMeta m (MaybeSetMeta f v)
| S.null (currentMetaDataValues f m) = updateMetaData f v emptyMetaData
| otherwise = emptyMetaData
{- Parses field=value, field+=value, field-=value -} {- Parses field=value, field+=value, field-=value, field?=value -}
parseModMeta :: String -> Either String ModMeta parseModMeta :: String -> Either String ModMeta
parseModMeta p = case lastMaybe f of parseModMeta p = case lastMaybe f of
Just '+' -> AddMeta <$> mkMetaField f' <*> v Just '+' -> AddMeta <$> mkMetaField f' <*> v
Just '-' -> DelMeta <$> mkMetaField f' <*> v Just '-' -> DelMeta <$> mkMetaField f' <*> v
Just '?' -> MaybeSetMeta <$> mkMetaField f' <*> v
_ -> SetMeta <$> mkMetaField f <*> v _ -> SetMeta <$> mkMetaField f <*> v
where where
(f, sv) = separate (== '=') p (f, sv) = separate (== '=') p

3
debian/changelog vendored
View file

@ -16,6 +16,9 @@ git-annex (5.20140228) UNRELEASED; urgency=medium
using !tag and field!=value. using !tag and field!=value.
* vadd: Allow listing multiple desired values for a field. * vadd: Allow listing multiple desired values for a field.
* view: Refuse to enter a view when no branch is currently checked out. * view: Refuse to enter a view when no branch is currently checked out.
* metadata: To only set a field when it's not already got a value, use
-s field?=value
* Run .git/hooks/pre-commit-annex whenever a commit is made.
-- Joey Hess <joeyh@debian.org> Fri, 28 Feb 2014 14:52:15 -0400 -- Joey Hess <joeyh@debian.org> Fri, 28 Feb 2014 14:52:15 -0400

View file

@ -56,14 +56,7 @@ once, and can be left alone when refining a view.
When annex.genmetadata is set, git annex add automatically attaches When annex.genmetadata is set, git annex add automatically attaches
some metadata to a file. Currently year and month fields, from its mtime. some metadata to a file. Currently year and month fields, from its mtime.
TODO A git hook could be run by git annex add to gather more metadata. There's also a post-commit-annex hook script.
For example, by examining file permisions or MP3 metadata.
Alternatively, this could be a
regular post-commit hook, that examines the files committed, and runs git
annex metadata to add metadata. No extra git-annex support is needed
to do that!
However, in direct mode, or when using the assistant, git-annex does its
own committing, not using git commit, so bypassing the commit hooks.
## directory hierarchy metadata ## directory hierarchy metadata

View file

@ -717,6 +717,9 @@ subdirectories).
To remove a value, use -s field-=value. To remove a value, use -s field-=value.
To set a value, only if the field does not already have a value,
use -s field?=value
To set a tag, use -t tag, and use -u tag to remove a tag. To set a tag, use -t tag, and use -u tag to remove a tag.
For example, to set some tags on a file and also its author: For example, to set some tags on a file and also its author:
@ -1678,6 +1681,10 @@ used by git-annex.
`~/.config/git-annex/autostart` is a list of git repositories `~/.config/git-annex/autostart` is a list of git repositories
to start the git-annex assistant in. to start the git-annex assistant in.
`.git/hooks/pre-commit-annex` in your git repsitory will be run whenever
a commit is made, either by git commit, git-annex sync, or the git-annex
assistant.
# SEE ALSO # SEE ALSO
Most of git-annex's documentation is available on its web site, Most of git-annex's documentation is available on its web site,

View file

@ -30,7 +30,8 @@ Here are some recommended metadata fields to use:
being. being.
To make git-annex automatically set the year and month when adding files, To make git-annex automatically set the year and month when adding files,
run `git config annex.genmetadata true` run `git config annex.genmetadata true`. Also, see
[[tips/automatically_adding_metadata]].
git-annex's metadata can be updated in a distributed fashion. For example, git-annex's metadata can be updated in a distributed fashion. For example,
two users, each with their own clone of a repository, can set and unset two users, each with their own clone of a repository, can set and unset

View file

@ -0,0 +1,24 @@
git-annex's [[metadata]] works best when files have a lot of useful
metadata attached to them.
To make git-annex automatically set the year and month when adding files,
run `git config annex.genmetadata true`.
A git commit hook can be set up to extract lots of metadata from files
like photos, mp3s, etc.
* Install the `extract` utility, from <http://www.gnu.org/software/libextractor/>
`apt-get install extract`
* Download [[pre-commit-annex]] and install it in your git-annex repository
as `.git/hooks/pre-commit-annex`.
Remember to make the script executable!
* Run: `git config metadata.extract "artist album title camera_make video_dimensions"`
Now any fields you list in metadata.extract to will be extracted and
stored when files are committed.
To get a list of all possible fields, run: `extract -L | sed ' ' _`
By default, if a git-annex already has a metadata field for a file,
its value will not be overwritten with metadata taken from files.
To allow overwriting, run: `git config metadata.overwrite true`

View file

@ -0,0 +1,61 @@
#!/bin/sh
# This script can be used to add git-annex metadata to files when they're
# committed.
#
# Copyright 2014 Joey Hess <id@joeyh.name>
# License: GPL-3+
extract="$(git config metadata.extract || true)"
want="$(perl -e 'print (join("|", map {s/_/ /g; "^$_ - "} (split " ", shift())))' "$extract")"
if [ -z "$want" ]; then
exit 0
fi
echo "$want"
case "$(git config --bool metadata.overwrite || true)" in
true)
overwrite=1
;;
*)
overwrite=""
;;
esac
# By default, any existing metadata git-annex has stored is not overwritten
# with metadata taken from files. Uncomment this to enable.
#overwrite_metadata=1
addmeta () {
file="$1"
field="$2"
value="$3"
afield="$(echo "$field" | tr ' ' _)"
if [ "$overwrite" ]; then
p="$afield=$value"
else
p="$afield?=$value"
fi
git -c annex.alwayscommit=false annex metadata "$file" -s "$p" --quiet
}
if git rev-parse --verify HEAD >/dev/null 2>&1; then
against=HEAD
else
# Initial commit: diff against an empty tree object
against=4b825dc642cb6eb9a060e54bf8d69288fbee4904
fi
IFS="
"
for f in $(git diff-index --name-only --cached $against); do
if [ -e "$f" ]; then
for l in $(extract "$f" | egrep "$want"); do
field="${l%% - *}"
value="${l#* - }"
addmeta "$f" "$field" "$value"
done
fi
done