per-remote metadata storage
Actually very straightforward reuse of the metadata log file code. Although I had to add a todo item as git-annex forget won't clean up dead remote's metadata yet. This would be worth adding to the external special remote interface sometime. Have not opened a todo though, guess I'll wait until something needs it. This commit was supported by the NSF-funded DataLad project.
This commit is contained in:
parent
9d78a4387f
commit
5c99f6247e
6 changed files with 100 additions and 18 deletions
|
@ -112,7 +112,7 @@ perform c o k = case getSet o of
|
|||
Set ms -> do
|
||||
oldm <- getCurrentMetaData k
|
||||
let m = combineMetaData $ map (modMeta oldm) ms
|
||||
addMetaData' k m c
|
||||
addMetaDataClocked k m c
|
||||
next $ cleanup k
|
||||
_ -> next $ cleanup k
|
||||
|
||||
|
|
14
Logs.hs
14
Logs.hs
|
@ -1,6 +1,6 @@
|
|||
{- git-annex log file names
|
||||
-
|
||||
- Copyright 2013-2015 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2013-2018 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
@ -26,7 +26,7 @@ getLogVariety f
|
|||
| f `elem` topLevelUUIDBasedLogs = Just UUIDBasedLog
|
||||
| isRemoteStateLog f = Just NewUUIDBasedLog
|
||||
| isChunkLog f = ChunkLog <$> chunkLogFileKey f
|
||||
| isMetaDataLog f || f `elem` otherLogs = Just OtherLog
|
||||
| isMetaDataLog f || isRemoteMetaDataLog f || f `elem` otherLogs = Just OtherLog
|
||||
| otherwise = PresenceLog <$> firstJust (presenceLogs f)
|
||||
|
||||
{- All the uuid-based logs stored in the top of the git-annex branch. -}
|
||||
|
@ -185,3 +185,13 @@ metaDataLogExt = ".log.met"
|
|||
|
||||
isMetaDataLog :: FilePath -> Bool
|
||||
isMetaDataLog path = metaDataLogExt `isSuffixOf` path
|
||||
|
||||
{- The filename of the remote metadata log for a given key. -}
|
||||
remoteMetaDataLogFile :: GitConfig -> Key -> FilePath
|
||||
remoteMetaDataLogFile config key = branchHashDir config key </> keyFile key ++ remoteMetaDataLogExt
|
||||
|
||||
remoteMetaDataLogExt :: String
|
||||
remoteMetaDataLogExt = ".log.rmet"
|
||||
|
||||
isRemoteMetaDataLog :: FilePath -> Bool
|
||||
isRemoteMetaDataLog path = remoteMetaDataLogExt `isSuffixOf` path
|
||||
|
|
|
@ -1,7 +1,9 @@
|
|||
{- git-annex general metadata storage log
|
||||
{- git-annex general metadata storage log and per-remote metadata storage log.
|
||||
-
|
||||
- A line of the log will look like "timestamp field [+-]value [...]"
|
||||
-
|
||||
- (In the per-remote log, each field is prefixed with "uuid:")
|
||||
-
|
||||
- Note that unset values are preserved. Consider this case:
|
||||
-
|
||||
- We have:
|
||||
|
@ -18,7 +20,7 @@
|
|||
- and so foo currently has no value.
|
||||
-
|
||||
-
|
||||
- Copyright 2014 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2014-2018 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
@ -27,8 +29,10 @@
|
|||
|
||||
module Logs.MetaData (
|
||||
getCurrentMetaData,
|
||||
getCurrentRemoteMetaData,
|
||||
addMetaData,
|
||||
addMetaData',
|
||||
addRemoteMetaData,
|
||||
addMetaDataClocked,
|
||||
currentMetaData,
|
||||
copyMetaData,
|
||||
) where
|
||||
|
@ -50,11 +54,6 @@ instance SingleValueSerializable MetaData where
|
|||
serialize = Types.MetaData.serialize
|
||||
deserialize = Types.MetaData.deserialize
|
||||
|
||||
getMetaDataLog :: Key -> Annex (Log MetaData)
|
||||
getMetaDataLog key = do
|
||||
config <- Annex.getGitConfig
|
||||
readLog $ metaDataLogFile config key
|
||||
|
||||
logToCurrentMetaData :: [LogEntry MetaData] -> MetaData
|
||||
logToCurrentMetaData = currentMetaData . combineMetaData . map value
|
||||
|
||||
|
@ -65,8 +64,12 @@ logToCurrentMetaData = currentMetaData . combineMetaData . map value
|
|||
- currently set, based on timestamps in the log.
|
||||
-}
|
||||
getCurrentMetaData :: Key -> Annex MetaData
|
||||
getCurrentMetaData k = do
|
||||
ls <- S.toAscList <$> getMetaDataLog k
|
||||
getCurrentMetaData = getCurrentMetaData' metaDataLogFile
|
||||
|
||||
getCurrentMetaData' :: (GitConfig -> Key -> FilePath) -> Key -> Annex MetaData
|
||||
getCurrentMetaData' getlogfile k = do
|
||||
config <- Annex.getGitConfig
|
||||
ls <- S.toAscList <$> readLog (getlogfile config k)
|
||||
let loggedmeta = logToCurrentMetaData ls
|
||||
return $ currentMetaData $ unionMetaData loggedmeta
|
||||
(lastchanged ls loggedmeta)
|
||||
|
@ -92,27 +95,42 @@ getCurrentMetaData k = do
|
|||
Unknown -> 0
|
||||
showts = formatPOSIXTime "%F@%H-%M-%S"
|
||||
|
||||
getCurrentRemoteMetaData :: UUID -> Key -> Annex RemoteMetaData
|
||||
getCurrentRemoteMetaData u k = mkRemoteMetaData u <$>
|
||||
getCurrentMetaData' remoteMetaDataLogFile k
|
||||
|
||||
{- Adds in some metadata, which can override existing values, or unset
|
||||
- them, but otherwise leaves any existing metadata as-is. -}
|
||||
addMetaData :: Key -> MetaData -> Annex ()
|
||||
addMetaData k metadata = addMetaData' k metadata =<< liftIO currentVectorClock
|
||||
addMetaData = addMetaData' metaDataLogFile
|
||||
|
||||
addMetaData' :: (GitConfig -> Key -> FilePath) -> Key -> MetaData -> Annex ()
|
||||
addMetaData' getlogfile k metadata =
|
||||
addMetaDataClocked' getlogfile k metadata =<< liftIO currentVectorClock
|
||||
|
||||
{- Reusing the same VectorClock when making changes to the metadata
|
||||
- of multiple keys is a nice optimisation. The same metadata lines
|
||||
- will tend to be generated across the different log files, and so
|
||||
- git will be able to pack the data more efficiently. -}
|
||||
addMetaData' :: Key -> MetaData -> VectorClock -> Annex ()
|
||||
addMetaData' k d@(MetaData m) c
|
||||
addMetaDataClocked :: Key -> MetaData -> VectorClock -> Annex ()
|
||||
addMetaDataClocked = addMetaDataClocked' metaDataLogFile
|
||||
|
||||
addMetaDataClocked' :: (GitConfig -> Key -> FilePath) -> Key -> MetaData -> VectorClock -> Annex ()
|
||||
addMetaDataClocked' getlogfile k d@(MetaData m) c
|
||||
| d == emptyMetaData = noop
|
||||
| otherwise = do
|
||||
config <- Annex.getGitConfig
|
||||
Annex.Branch.change (metaDataLogFile config k) $
|
||||
Annex.Branch.change (getlogfile config k) $
|
||||
showLog . simplifyLog
|
||||
. S.insert (LogEntry c metadata)
|
||||
. parseLog
|
||||
where
|
||||
metadata = MetaData $ M.filterWithKey (\f _ -> not (isLastChangedField f)) m
|
||||
|
||||
addRemoteMetaData :: Key -> RemoteMetaData -> Annex ()
|
||||
addRemoteMetaData k m = do
|
||||
addMetaData' remoteMetaDataLogFile k (fromRemoteMetaData m)
|
||||
|
||||
{- Simplify a log, removing historical values that are no longer
|
||||
- needed.
|
||||
-
|
||||
|
@ -173,6 +191,11 @@ simplifyLog s = case sl of
|
|||
older = value l
|
||||
unique = older `differenceMetaData` newer
|
||||
|
||||
getMetaDataLog :: Key -> Annex (Log MetaData)
|
||||
getMetaDataLog key = do
|
||||
config <- Annex.getGitConfig
|
||||
readLog $ metaDataLogFile config key
|
||||
|
||||
{- Copies the metadata from the old key to the new key.
|
||||
-
|
||||
- The exact content of the metadata file is copied, so that the timestamps
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{- git-annex general metadata
|
||||
-
|
||||
- Copyright 2014 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2014-2018 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
@ -36,6 +36,9 @@ module Types.MetaData (
|
|||
metaDataValues,
|
||||
ModMeta(..),
|
||||
modMeta,
|
||||
RemoteMetaData(..),
|
||||
mkRemoteMetaData,
|
||||
fromRemoteMetaData,
|
||||
prop_metadata_sane,
|
||||
prop_metadata_serialize
|
||||
) where
|
||||
|
@ -44,6 +47,7 @@ import Common
|
|||
import Utility.Base64
|
||||
import Utility.QuickCheck
|
||||
import Utility.Aeson
|
||||
import Types.UUID
|
||||
|
||||
import qualified Data.Text as T
|
||||
import qualified Data.Set as S
|
||||
|
@ -282,6 +286,31 @@ modMeta m (MaybeSetMeta f v)
|
|||
| otherwise = emptyMetaData
|
||||
modMeta m (ComposeModMeta a b) = unionMetaData (modMeta m a) (modMeta m b)
|
||||
|
||||
data RemoteMetaData = RemoteMetaData UUID MetaData
|
||||
deriving (Show, Eq, Ord)
|
||||
|
||||
{- Extracts only the fields prefixed with "uuid:", which belong to that
|
||||
- remote. -}
|
||||
mkRemoteMetaData :: UUID -> MetaData -> RemoteMetaData
|
||||
mkRemoteMetaData u (MetaData m) = RemoteMetaData u $ MetaData $
|
||||
M.mapKeys removeprefix $ M.filterWithKey belongsremote m
|
||||
where
|
||||
belongsremote (MetaField f) _v = prefix `isPrefixOf` CI.original f
|
||||
removeprefix (MetaField f) = MetaField $
|
||||
CI.mk $ drop prefixlen $ CI.original f
|
||||
prefix = remoteMetaDataPrefix u
|
||||
prefixlen = length prefix
|
||||
|
||||
remoteMetaDataPrefix :: UUID -> String
|
||||
remoteMetaDataPrefix u = fromUUID u ++ ":"
|
||||
|
||||
fromRemoteMetaData :: RemoteMetaData -> MetaData
|
||||
fromRemoteMetaData (RemoteMetaData u (MetaData m)) = MetaData $
|
||||
M.mapKeys addprefix m
|
||||
where
|
||||
addprefix (MetaField f) = MetaField $ CI.mk $ (++ prefix) $ CI.original f
|
||||
prefix = remoteMetaDataPrefix u
|
||||
|
||||
{- Avoid putting too many fields in the map; extremely large maps make
|
||||
- the seriaization test slow due to the sheer amount of data.
|
||||
- It's unlikely that more than 100 fields of metadata will be used. -}
|
||||
|
|
|
@ -236,6 +236,10 @@ These log files are used by remotes that need to record their own state
|
|||
about keys. Each remote can store one line of data about a key, in
|
||||
its own format.
|
||||
|
||||
Note that only the most recently set state about a key is seen
|
||||
by remotes using this. The `log.rmet` documented below does not have this
|
||||
limitation.
|
||||
|
||||
Example:
|
||||
|
||||
1287290776.765152s e605dca6-446a-11e0-8b2a-002170d25c55 blah blah
|
||||
|
@ -262,6 +266,20 @@ reasonably short. If the value contains any whitespace
|
|||
(including \r or \n), it will be base64 encoded. Base64 encoded values
|
||||
are indicated by prefixing them with "!".
|
||||
|
||||
## `aaa/bbb/*.log.rmet`
|
||||
|
||||
These log files store per-remote metadata about keys. This metadata
|
||||
is only used by the remote.
|
||||
|
||||
Format is the same as the metadata log files above, but each metadata key
|
||||
is prefixed with "uuid:" to indicate the remote it belongs to.
|
||||
|
||||
For example:
|
||||
|
||||
1287290776.765152s e605dca6-446a-11e0-8b2a-002170d25c55:foo +bar
|
||||
1287290776.765152s 26339d22-446b-11e0-9101-002170d25c55:x +1
|
||||
1291237510.141453s 26339d22-446b-11e0-9101-002170d25c55:x -1 26339d22-446b-11e0-9101-002170d25c55:x +2
|
||||
|
||||
## `aaa/bbb/*.log.cnk`
|
||||
|
||||
These log files are used when objects are stored in chunked form on
|
||||
|
|
|
@ -0,0 +1,2 @@
|
|||
The newly added per-remote metadata log files need to be scrubbed clean of
|
||||
dead remotes during a transition. --[[Joey]]
|
Loading…
Add table
Add a link
Reference in a new issue