pre-commit-annex hook script to automatically extract metadata from lots of types of files
Using the extract(1) program to do the heavy lifting. Decided to make git-annex run pre-commit-annex when committing. Since git-annex pre-commit also runs it, it'll be run when git commit is run too, via the pre-commit hook. This basically gives back the pre-commit hook that git-annex took away. The implementation avoids repeatedly looking for the hook script when the assistant is running and committing repeatedly; only checks if the hook is available once. To make the script simpler, made git-annex metadata -s field?=value only set a field when it's not already got a value. This commit was sponsored by bak.
This commit is contained in:
parent
34059c09e2
commit
d0fce426c4
12 changed files with 152 additions and 16 deletions
3
Annex.hs
3
Annex.hs
|
@ -44,6 +44,7 @@ import Git.CatFile
|
||||||
import Git.CheckAttr
|
import Git.CheckAttr
|
||||||
import Git.CheckIgnore
|
import Git.CheckIgnore
|
||||||
import Git.SharedRepository
|
import Git.SharedRepository
|
||||||
|
import qualified Git.Hook
|
||||||
import qualified Git.Queue
|
import qualified Git.Queue
|
||||||
import Types.Key
|
import Types.Key
|
||||||
import Types.Backend
|
import Types.Backend
|
||||||
|
@ -118,6 +119,7 @@ data AnnexState = AnnexState
|
||||||
, errcounter :: Integer
|
, errcounter :: Integer
|
||||||
, unusedkeys :: Maybe (S.Set Key)
|
, unusedkeys :: Maybe (S.Set Key)
|
||||||
, quviversion :: Maybe QuviVersion
|
, quviversion :: Maybe QuviVersion
|
||||||
|
, existinghooks :: M.Map Git.Hook.Hook Bool
|
||||||
}
|
}
|
||||||
|
|
||||||
newState :: GitConfig -> Git.Repo -> AnnexState
|
newState :: GitConfig -> Git.Repo -> AnnexState
|
||||||
|
@ -157,6 +159,7 @@ newState c r = AnnexState
|
||||||
, errcounter = 0
|
, errcounter = 0
|
||||||
, unusedkeys = Nothing
|
, unusedkeys = Nothing
|
||||||
, quviversion = Nothing
|
, quviversion = Nothing
|
||||||
|
, existinghooks = M.empty
|
||||||
}
|
}
|
||||||
|
|
||||||
{- Makes an Annex state object for the specified git repo.
|
{- Makes an Annex state object for the specified git repo.
|
||||||
|
|
|
@ -1,9 +1,10 @@
|
||||||
{- git-annex git hooks
|
{- git-annex git hooks
|
||||||
-
|
-
|
||||||
- Note that it's important that the scripts not change, otherwise
|
- Note that it's important that the scripts installed by git-annex
|
||||||
- removing old hooks using an old version of the script would fail.
|
- not change, otherwise removing old hooks using an old version of
|
||||||
|
- the script would fail.
|
||||||
-
|
-
|
||||||
- Copyright 2013 Joey Hess <joey@kitenet.net>
|
- Copyright 2013-2014 Joey Hess <joey@kitenet.net>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU GPL version 3 or higher.
|
- Licensed under the GNU GPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
@ -12,12 +13,19 @@ module Annex.Hook where
|
||||||
|
|
||||||
import Common.Annex
|
import Common.Annex
|
||||||
import qualified Git.Hook as Git
|
import qualified Git.Hook as Git
|
||||||
import Utility.Shell
|
|
||||||
import Config
|
import Config
|
||||||
|
import qualified Annex
|
||||||
|
import Utility.Shell
|
||||||
|
import Utility.FileMode
|
||||||
|
|
||||||
|
import qualified Data.Map as M
|
||||||
|
|
||||||
preCommitHook :: Git.Hook
|
preCommitHook :: Git.Hook
|
||||||
preCommitHook = Git.Hook "pre-commit" (mkHookScript "git annex pre-commit .")
|
preCommitHook = Git.Hook "pre-commit" (mkHookScript "git annex pre-commit .")
|
||||||
|
|
||||||
|
preCommitAnnexHook :: Git.Hook
|
||||||
|
preCommitAnnexHook = Git.Hook "pre-commit-annex" ""
|
||||||
|
|
||||||
mkHookScript :: String -> String
|
mkHookScript :: String -> String
|
||||||
mkHookScript s = unlines
|
mkHookScript s = unlines
|
||||||
[ shebang_local
|
[ shebang_local
|
||||||
|
@ -40,3 +48,23 @@ hookWarning :: Git.Hook -> String -> Annex ()
|
||||||
hookWarning h msg = do
|
hookWarning h msg = do
|
||||||
r <- gitRepo
|
r <- gitRepo
|
||||||
warning $ Git.hookName h ++ " hook (" ++ Git.hookFile h r ++ ") " ++ msg
|
warning $ Git.hookName h ++ " hook (" ++ Git.hookFile h r ++ ") " ++ msg
|
||||||
|
|
||||||
|
{- Runs a hook. To avoid checking if the hook exists every time,
|
||||||
|
- the existing hooks are cached. -}
|
||||||
|
runAnnexHook :: Git.Hook -> Annex ()
|
||||||
|
runAnnexHook hook = do
|
||||||
|
cmd <- fromRepo $ Git.hookFile hook
|
||||||
|
m <- Annex.getState Annex.existinghooks
|
||||||
|
case M.lookup hook m of
|
||||||
|
Just True -> run cmd
|
||||||
|
Just False -> noop
|
||||||
|
Nothing -> do
|
||||||
|
exists <- hookexists cmd
|
||||||
|
Annex.changeState $ \s -> s
|
||||||
|
{ Annex.existinghooks = M.insert hook exists m }
|
||||||
|
when exists $
|
||||||
|
run cmd
|
||||||
|
where
|
||||||
|
hookexists f = liftIO $ isExecutable . fileMode <$> getFileStatus f
|
||||||
|
run cmd = unlessM (liftIO $ boolSystem cmd []) $
|
||||||
|
warning $ cmd ++ " failed"
|
||||||
|
|
|
@ -13,6 +13,7 @@ import Config
|
||||||
import qualified Command.Add
|
import qualified Command.Add
|
||||||
import qualified Command.Fix
|
import qualified Command.Fix
|
||||||
import Annex.Direct
|
import Annex.Direct
|
||||||
|
import Annex.Hook
|
||||||
import Annex.View
|
import Annex.View
|
||||||
import Annex.View.ViewedFile
|
import Annex.View.ViewedFile
|
||||||
import Logs.View
|
import Logs.View
|
||||||
|
@ -28,13 +29,16 @@ def = [command "pre-commit" paramPaths seek SectionPlumbing
|
||||||
|
|
||||||
seek :: CommandSeek
|
seek :: CommandSeek
|
||||||
seek ps = ifM isDirect
|
seek ps = ifM isDirect
|
||||||
|
( do
|
||||||
-- update direct mode mappings for committed files
|
-- update direct mode mappings for committed files
|
||||||
( withWords startDirect ps
|
withWords startDirect ps
|
||||||
|
runAnnexHook preCommitAnnexHook
|
||||||
, do
|
, do
|
||||||
-- fix symlinks to files being committed
|
-- fix symlinks to files being committed
|
||||||
withFilesToBeCommitted (whenAnnexed Command.Fix.start) ps
|
withFilesToBeCommitted (whenAnnexed Command.Fix.start) ps
|
||||||
-- inject unlocked files into the annex
|
-- inject unlocked files into the annex
|
||||||
withFilesUnlockedToBeCommitted startIndirect ps
|
withFilesUnlockedToBeCommitted startIndirect ps
|
||||||
|
runAnnexHook preCommitAnnexHook
|
||||||
-- committing changes to a view updates metadata
|
-- committing changes to a view updates metadata
|
||||||
mv <- currentView
|
mv <- currentView
|
||||||
case mv of
|
case mv of
|
||||||
|
@ -44,6 +48,7 @@ seek ps = ifM isDirect
|
||||||
(removeViewMetaData v)
|
(removeViewMetaData v)
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
startIndirect :: FilePath -> CommandStart
|
startIndirect :: FilePath -> CommandStart
|
||||||
startIndirect f = next $ do
|
startIndirect f = next $ do
|
||||||
unlessM (callCommandAction $ Command.Add.start f) $
|
unlessM (callCommandAction $ Command.Add.start f) $
|
||||||
|
|
|
@ -18,6 +18,7 @@ import qualified Types.Remote as Remote
|
||||||
import Annex.Direct
|
import Annex.Direct
|
||||||
import Annex.CatFile
|
import Annex.CatFile
|
||||||
import Annex.Link
|
import Annex.Link
|
||||||
|
import Annex.Hook
|
||||||
import qualified Git.Command
|
import qualified Git.Command
|
||||||
import qualified Git.LsFiles as LsFiles
|
import qualified Git.LsFiles as LsFiles
|
||||||
import qualified Git.Merge
|
import qualified Git.Merge
|
||||||
|
@ -156,6 +157,7 @@ commitStaged commitmessage = go =<< inRepo Git.Branch.currentUnsafe
|
||||||
where
|
where
|
||||||
go Nothing = return False
|
go Nothing = return False
|
||||||
go (Just branch) = do
|
go (Just branch) = do
|
||||||
|
runAnnexHook preCommitAnnexHook
|
||||||
parent <- inRepo $ Git.Ref.sha branch
|
parent <- inRepo $ Git.Ref.sha branch
|
||||||
void $ inRepo $ Git.Branch.commit False commitmessage branch
|
void $ inRepo $ Git.Branch.commit False commitmessage branch
|
||||||
(maybeToList parent)
|
(maybeToList parent)
|
||||||
|
|
|
@ -15,6 +15,10 @@ data Hook = Hook
|
||||||
{ hookName :: FilePath
|
{ hookName :: FilePath
|
||||||
, hookScript :: String
|
, hookScript :: String
|
||||||
}
|
}
|
||||||
|
deriving (Ord)
|
||||||
|
|
||||||
|
instance Eq Hook where
|
||||||
|
a == b = hookName a == hookName b
|
||||||
|
|
||||||
hookFile :: Hook -> Repo -> FilePath
|
hookFile :: Hook -> Repo -> FilePath
|
||||||
hookFile h r = localGitDir r </> "hooks" </> hookName h
|
hookFile h r = localGitDir r </> "hooks" </> hookName h
|
||||||
|
|
|
@ -219,6 +219,7 @@ data ModMeta
|
||||||
= AddMeta MetaField MetaValue
|
= AddMeta MetaField MetaValue
|
||||||
| DelMeta MetaField MetaValue
|
| DelMeta MetaField MetaValue
|
||||||
| SetMeta MetaField MetaValue -- removes any existing values
|
| SetMeta MetaField MetaValue -- removes any existing values
|
||||||
|
| MaybeSetMeta MetaField MetaValue -- when field has no existing value
|
||||||
|
|
||||||
{- Applies a ModMeta, generating the new MetaData.
|
{- Applies a ModMeta, generating the new MetaData.
|
||||||
- Note that the new MetaData does not include all the
|
- Note that the new MetaData does not include all the
|
||||||
|
@ -229,12 +230,16 @@ modMeta _ (DelMeta f oldv) = updateMetaData f (unsetMetaValue oldv) emptyMetaDat
|
||||||
modMeta m (SetMeta f v) = updateMetaData f v $
|
modMeta m (SetMeta f v) = updateMetaData f v $
|
||||||
foldr (updateMetaData f) emptyMetaData $
|
foldr (updateMetaData f) emptyMetaData $
|
||||||
map unsetMetaValue $ S.toList $ currentMetaDataValues f m
|
map unsetMetaValue $ S.toList $ currentMetaDataValues f m
|
||||||
|
modMeta m (MaybeSetMeta f v)
|
||||||
|
| S.null (currentMetaDataValues f m) = updateMetaData f v emptyMetaData
|
||||||
|
| otherwise = emptyMetaData
|
||||||
|
|
||||||
{- Parses field=value, field+=value, field-=value -}
|
{- Parses field=value, field+=value, field-=value, field?=value -}
|
||||||
parseModMeta :: String -> Either String ModMeta
|
parseModMeta :: String -> Either String ModMeta
|
||||||
parseModMeta p = case lastMaybe f of
|
parseModMeta p = case lastMaybe f of
|
||||||
Just '+' -> AddMeta <$> mkMetaField f' <*> v
|
Just '+' -> AddMeta <$> mkMetaField f' <*> v
|
||||||
Just '-' -> DelMeta <$> mkMetaField f' <*> v
|
Just '-' -> DelMeta <$> mkMetaField f' <*> v
|
||||||
|
Just '?' -> MaybeSetMeta <$> mkMetaField f' <*> v
|
||||||
_ -> SetMeta <$> mkMetaField f <*> v
|
_ -> SetMeta <$> mkMetaField f <*> v
|
||||||
where
|
where
|
||||||
(f, sv) = separate (== '=') p
|
(f, sv) = separate (== '=') p
|
||||||
|
|
3
debian/changelog
vendored
3
debian/changelog
vendored
|
@ -16,6 +16,9 @@ git-annex (5.20140228) UNRELEASED; urgency=medium
|
||||||
using !tag and field!=value.
|
using !tag and field!=value.
|
||||||
* vadd: Allow listing multiple desired values for a field.
|
* vadd: Allow listing multiple desired values for a field.
|
||||||
* view: Refuse to enter a view when no branch is currently checked out.
|
* view: Refuse to enter a view when no branch is currently checked out.
|
||||||
|
* metadata: To only set a field when it's not already got a value, use
|
||||||
|
-s field?=value
|
||||||
|
* Run .git/hooks/pre-commit-annex whenever a commit is made.
|
||||||
|
|
||||||
-- Joey Hess <joeyh@debian.org> Fri, 28 Feb 2014 14:52:15 -0400
|
-- Joey Hess <joeyh@debian.org> Fri, 28 Feb 2014 14:52:15 -0400
|
||||||
|
|
||||||
|
|
|
@ -56,14 +56,7 @@ once, and can be left alone when refining a view.
|
||||||
When annex.genmetadata is set, git annex add automatically attaches
|
When annex.genmetadata is set, git annex add automatically attaches
|
||||||
some metadata to a file. Currently year and month fields, from its mtime.
|
some metadata to a file. Currently year and month fields, from its mtime.
|
||||||
|
|
||||||
TODO A git hook could be run by git annex add to gather more metadata.
|
There's also a post-commit-annex hook script.
|
||||||
For example, by examining file permisions or MP3 metadata.
|
|
||||||
Alternatively, this could be a
|
|
||||||
regular post-commit hook, that examines the files committed, and runs git
|
|
||||||
annex metadata to add metadata. No extra git-annex support is needed
|
|
||||||
to do that!
|
|
||||||
However, in direct mode, or when using the assistant, git-annex does its
|
|
||||||
own committing, not using git commit, so bypassing the commit hooks.
|
|
||||||
|
|
||||||
## directory hierarchy metadata
|
## directory hierarchy metadata
|
||||||
|
|
||||||
|
|
|
@ -717,6 +717,9 @@ subdirectories).
|
||||||
|
|
||||||
To remove a value, use -s field-=value.
|
To remove a value, use -s field-=value.
|
||||||
|
|
||||||
|
To set a value, only if the field does not already have a value,
|
||||||
|
use -s field?=value
|
||||||
|
|
||||||
To set a tag, use -t tag, and use -u tag to remove a tag.
|
To set a tag, use -t tag, and use -u tag to remove a tag.
|
||||||
|
|
||||||
For example, to set some tags on a file and also its author:
|
For example, to set some tags on a file and also its author:
|
||||||
|
@ -1678,6 +1681,10 @@ used by git-annex.
|
||||||
`~/.config/git-annex/autostart` is a list of git repositories
|
`~/.config/git-annex/autostart` is a list of git repositories
|
||||||
to start the git-annex assistant in.
|
to start the git-annex assistant in.
|
||||||
|
|
||||||
|
`.git/hooks/pre-commit-annex` in your git repsitory will be run whenever
|
||||||
|
a commit is made, either by git commit, git-annex sync, or the git-annex
|
||||||
|
assistant.
|
||||||
|
|
||||||
# SEE ALSO
|
# SEE ALSO
|
||||||
|
|
||||||
Most of git-annex's documentation is available on its web site,
|
Most of git-annex's documentation is available on its web site,
|
||||||
|
|
|
@ -30,7 +30,8 @@ Here are some recommended metadata fields to use:
|
||||||
being.
|
being.
|
||||||
|
|
||||||
To make git-annex automatically set the year and month when adding files,
|
To make git-annex automatically set the year and month when adding files,
|
||||||
run `git config annex.genmetadata true`
|
run `git config annex.genmetadata true`. Also, see
|
||||||
|
[[tips/automatically_adding_metadata]].
|
||||||
|
|
||||||
git-annex's metadata can be updated in a distributed fashion. For example,
|
git-annex's metadata can be updated in a distributed fashion. For example,
|
||||||
two users, each with their own clone of a repository, can set and unset
|
two users, each with their own clone of a repository, can set and unset
|
||||||
|
|
24
doc/tips/automatically_adding_metadata.mdwn
Normal file
24
doc/tips/automatically_adding_metadata.mdwn
Normal file
|
@ -0,0 +1,24 @@
|
||||||
|
git-annex's [[metadata]] works best when files have a lot of useful
|
||||||
|
metadata attached to them.
|
||||||
|
|
||||||
|
To make git-annex automatically set the year and month when adding files,
|
||||||
|
run `git config annex.genmetadata true`.
|
||||||
|
|
||||||
|
A git commit hook can be set up to extract lots of metadata from files
|
||||||
|
like photos, mp3s, etc.
|
||||||
|
|
||||||
|
* Install the `extract` utility, from <http://www.gnu.org/software/libextractor/>
|
||||||
|
`apt-get install extract`
|
||||||
|
* Download [[pre-commit-annex]] and install it in your git-annex repository
|
||||||
|
as `.git/hooks/pre-commit-annex`.
|
||||||
|
Remember to make the script executable!
|
||||||
|
* Run: `git config metadata.extract "artist album title camera_make video_dimensions"`
|
||||||
|
|
||||||
|
Now any fields you list in metadata.extract to will be extracted and
|
||||||
|
stored when files are committed.
|
||||||
|
|
||||||
|
To get a list of all possible fields, run: `extract -L | sed ' ' _`
|
||||||
|
|
||||||
|
By default, if a git-annex already has a metadata field for a file,
|
||||||
|
its value will not be overwritten with metadata taken from files.
|
||||||
|
To allow overwriting, run: `git config metadata.overwrite true`
|
61
doc/tips/automatically_adding_metadata/pre-commit-annex
Executable file
61
doc/tips/automatically_adding_metadata/pre-commit-annex
Executable file
|
@ -0,0 +1,61 @@
|
||||||
|
#!/bin/sh
|
||||||
|
# This script can be used to add git-annex metadata to files when they're
|
||||||
|
# committed.
|
||||||
|
#
|
||||||
|
# Copyright 2014 Joey Hess <id@joeyh.name>
|
||||||
|
# License: GPL-3+
|
||||||
|
|
||||||
|
extract="$(git config metadata.extract || true)"
|
||||||
|
want="$(perl -e 'print (join("|", map {s/_/ /g; "^$_ - "} (split " ", shift())))' "$extract")"
|
||||||
|
|
||||||
|
if [ -z "$want" ]; then
|
||||||
|
exit 0
|
||||||
|
fi
|
||||||
|
|
||||||
|
echo "$want"
|
||||||
|
|
||||||
|
case "$(git config --bool metadata.overwrite || true)" in
|
||||||
|
true)
|
||||||
|
overwrite=1
|
||||||
|
;;
|
||||||
|
*)
|
||||||
|
overwrite=""
|
||||||
|
;;
|
||||||
|
esac
|
||||||
|
|
||||||
|
# By default, any existing metadata git-annex has stored is not overwritten
|
||||||
|
# with metadata taken from files. Uncomment this to enable.
|
||||||
|
#overwrite_metadata=1
|
||||||
|
|
||||||
|
addmeta () {
|
||||||
|
file="$1"
|
||||||
|
field="$2"
|
||||||
|
value="$3"
|
||||||
|
afield="$(echo "$field" | tr ' ' _)"
|
||||||
|
if [ "$overwrite" ]; then
|
||||||
|
p="$afield=$value"
|
||||||
|
|
||||||
|
else
|
||||||
|
p="$afield?=$value"
|
||||||
|
fi
|
||||||
|
git -c annex.alwayscommit=false annex metadata "$file" -s "$p" --quiet
|
||||||
|
}
|
||||||
|
|
||||||
|
if git rev-parse --verify HEAD >/dev/null 2>&1; then
|
||||||
|
against=HEAD
|
||||||
|
else
|
||||||
|
# Initial commit: diff against an empty tree object
|
||||||
|
against=4b825dc642cb6eb9a060e54bf8d69288fbee4904
|
||||||
|
fi
|
||||||
|
|
||||||
|
IFS="
|
||||||
|
"
|
||||||
|
for f in $(git diff-index --name-only --cached $against); do
|
||||||
|
if [ -e "$f" ]; then
|
||||||
|
for l in $(extract "$f" | egrep "$want"); do
|
||||||
|
field="${l%% - *}"
|
||||||
|
value="${l#* - }"
|
||||||
|
addmeta "$f" "$field" "$value"
|
||||||
|
done
|
||||||
|
fi
|
||||||
|
done
|
Loading…
Reference in a new issue