Preserve metadata when staging a new version of an annexed file.
Performance impact: When adding a large tree of new files, this needs to do some git cat-file queries to check if any of the files already existed and might need a metadata copy. I tried a benchmark in a copy of my sound repository (so there was already a significant git tree to check against. Adding 10000 small files, with a cold cache: before: 1m48.539s after: 1m52.791s So, impact is 0.0004 seconds per file added. Which seems acceptable, so did not add some kind of configuration to enable/disable this. This commit was sponsored by Lisa Feilen.
This commit is contained in:
parent
e7252cf192
commit
8d5158fa31
5 changed files with 41 additions and 14 deletions
|
@ -87,8 +87,7 @@ catKey' modeguaranteed ref mode
|
||||||
| modeguaranteed = catObject ref
|
| modeguaranteed = catObject ref
|
||||||
| otherwise = L.take 8192 <$> catObject ref
|
| otherwise = L.take 8192 <$> catObject ref
|
||||||
|
|
||||||
{- Looks up the file mode corresponding to the Ref using the running
|
{- Looks up the key corresponding to the Ref using the running cat-file.
|
||||||
- cat-file.
|
|
||||||
-
|
-
|
||||||
- Currently this always has to look in HEAD, because cat-file --batch
|
- Currently this always has to look in HEAD, because cat-file --batch
|
||||||
- does not offer a way to specify that we want to look up a tree object
|
- does not offer a way to specify that we want to look up a tree object
|
||||||
|
|
|
@ -11,6 +11,7 @@ import Common.Annex
|
||||||
import qualified Annex
|
import qualified Annex
|
||||||
import Types.MetaData
|
import Types.MetaData
|
||||||
import Logs.MetaData
|
import Logs.MetaData
|
||||||
|
import Annex.CatFile
|
||||||
|
|
||||||
import qualified Data.Set as S
|
import qualified Data.Set as S
|
||||||
import qualified Data.Map as M
|
import qualified Data.Map as M
|
||||||
|
@ -27,18 +28,27 @@ yearMetaField = MetaField "year"
|
||||||
monthMetaField :: MetaField
|
monthMetaField :: MetaField
|
||||||
monthMetaField = MetaField "month"
|
monthMetaField = MetaField "month"
|
||||||
|
|
||||||
{- Generates metadata for a file that has just been ingested into the
|
{- Adds metadata for a file that has just been ingested into the
|
||||||
- annex. Passed the FileStatus of the content file.
|
- annex, but has not yet been committed to git.
|
||||||
-
|
-
|
||||||
- Does not overwrite any existing metadata values for the key.
|
- When the file has been modified, the metadata is copied over
|
||||||
|
- from the old key to the new key. Note that it looks at the old key as
|
||||||
|
- committed to HEAD -- the new key may or may not have already been staged
|
||||||
|
- in th annex.
|
||||||
|
-
|
||||||
|
- Also, can generate new metadata, if configured to do so.
|
||||||
-}
|
-}
|
||||||
genMetaData :: Key -> FileStatus -> Annex ()
|
genMetaData :: Key -> FilePath -> FileStatus -> Annex ()
|
||||||
genMetaData key status = whenM (annexGenMetaData <$> Annex.getGitConfig) $ do
|
genMetaData key file status = do
|
||||||
metadata <- getCurrentMetaData key
|
maybe noop (flip copyMetaData key) =<< catKeyFileHEAD file
|
||||||
let metadata' = genMetaData' status metadata
|
whenM (annexGenMetaData <$> Annex.getGitConfig) $ do
|
||||||
unless (metadata' == emptyMetaData) $
|
metadata <- getCurrentMetaData key
|
||||||
addMetaData key metadata'
|
let metadata' = genMetaData' status metadata
|
||||||
|
unless (metadata' == emptyMetaData) $
|
||||||
|
addMetaData key metadata'
|
||||||
|
|
||||||
|
{- Generates metadata from the FileStatus.
|
||||||
|
- Does not overwrite any existing metadata values. -}
|
||||||
genMetaData' :: FileStatus -> MetaData -> MetaData
|
genMetaData' :: FileStatus -> MetaData -> MetaData
|
||||||
genMetaData' status old = MetaData $ M.fromList $ filter isnew
|
genMetaData' status old = MetaData $ M.fromList $ filter isnew
|
||||||
[ (yearMetaField, S.singleton $ toMetaValue $ show y)
|
[ (yearMetaField, S.singleton $ toMetaValue $ show y)
|
||||||
|
|
|
@ -161,14 +161,14 @@ ingest (Just source) = do
|
||||||
goindirect (Just (key, _)) mcache ms = do
|
goindirect (Just (key, _)) mcache ms = do
|
||||||
catchAnnex (moveAnnex key $ contentLocation source)
|
catchAnnex (moveAnnex key $ contentLocation source)
|
||||||
(undo (keyFilename source) key)
|
(undo (keyFilename source) key)
|
||||||
maybe noop (genMetaData key) ms
|
maybe noop (genMetaData key (keyFilename source)) ms
|
||||||
liftIO $ nukeFile $ keyFilename source
|
liftIO $ nukeFile $ keyFilename source
|
||||||
return $ (Just key, mcache)
|
return $ (Just key, mcache)
|
||||||
goindirect _ _ _ = failure "failed to generate a key"
|
goindirect _ _ _ = failure "failed to generate a key"
|
||||||
|
|
||||||
godirect (Just (key, _)) (Just cache) ms = do
|
godirect (Just (key, _)) (Just cache) ms = do
|
||||||
addInodeCache key cache
|
addInodeCache key cache
|
||||||
maybe noop (genMetaData key) ms
|
maybe noop (genMetaData key (keyFilename source)) ms
|
||||||
finishIngestDirect key source
|
finishIngestDirect key source
|
||||||
return $ (Just key, Just cache)
|
return $ (Just key, Just cache)
|
||||||
godirect _ _ _ = failure "failed to generate a key"
|
godirect _ _ _ = failure "failed to generate a key"
|
||||||
|
|
|
@ -28,10 +28,10 @@
|
||||||
|
|
||||||
module Logs.MetaData (
|
module Logs.MetaData (
|
||||||
getCurrentMetaData,
|
getCurrentMetaData,
|
||||||
getMetaData,
|
|
||||||
addMetaData,
|
addMetaData,
|
||||||
addMetaData',
|
addMetaData',
|
||||||
currentMetaData,
|
currentMetaData,
|
||||||
|
copyMetaData,
|
||||||
) where
|
) where
|
||||||
|
|
||||||
import Common.Annex
|
import Common.Annex
|
||||||
|
@ -135,3 +135,20 @@ simplifyLog s = case sl of
|
||||||
where
|
where
|
||||||
older = value l
|
older = value l
|
||||||
unique = older `differenceMetaData` newer
|
unique = older `differenceMetaData` newer
|
||||||
|
|
||||||
|
{- Copies the metadata from the old key to the new key.
|
||||||
|
-
|
||||||
|
- The exact content of the metadata file is copied, so that the timestamps
|
||||||
|
- remain the same, and because this is more space-efficient in the git
|
||||||
|
- repository.
|
||||||
|
-
|
||||||
|
- Any metadata already attached to the new key is not preserved.
|
||||||
|
-}
|
||||||
|
copyMetaData :: Key -> Key -> Annex ()
|
||||||
|
copyMetaData oldkey newkey
|
||||||
|
| oldkey == newkey = noop
|
||||||
|
| otherwise = do
|
||||||
|
l <- getMetaData oldkey
|
||||||
|
unless (S.null l) $
|
||||||
|
Annex.Branch.change (metaDataLogFile newkey) $
|
||||||
|
const $ showLog l
|
||||||
|
|
1
debian/changelog
vendored
1
debian/changelog
vendored
|
@ -11,6 +11,7 @@ git-annex (5.20140222) UNRELEASED; urgency=medium
|
||||||
tag/showname.
|
tag/showname.
|
||||||
* annex.genmetadata can be set to make git-annex automatically set
|
* annex.genmetadata can be set to make git-annex automatically set
|
||||||
metadata (year and month) when adding files.
|
metadata (year and month) when adding files.
|
||||||
|
* Preserve metadata when staging a new version of an annexed file.
|
||||||
* metadata: Field names limited to alphanumerics and a few whitelisted
|
* metadata: Field names limited to alphanumerics and a few whitelisted
|
||||||
punctuation characters to avoid issues with views, etc.
|
punctuation characters to avoid issues with views, etc.
|
||||||
* metadata: Support --json
|
* metadata: Support --json
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue