per-remote metadata storage
Actually very straightforward reuse of the metadata log file code. Although I had to add a todo item as git-annex forget won't clean up dead remote's metadata yet. This would be worth adding to the external special remote interface sometime. Have not opened a todo though, guess I'll wait until something needs it. This commit was supported by the NSF-funded DataLad project.
This commit is contained in:
parent
9d78a4387f
commit
5c99f6247e
6 changed files with 100 additions and 18 deletions
|
@ -112,7 +112,7 @@ perform c o k = case getSet o of
|
||||||
Set ms -> do
|
Set ms -> do
|
||||||
oldm <- getCurrentMetaData k
|
oldm <- getCurrentMetaData k
|
||||||
let m = combineMetaData $ map (modMeta oldm) ms
|
let m = combineMetaData $ map (modMeta oldm) ms
|
||||||
addMetaData' k m c
|
addMetaDataClocked k m c
|
||||||
next $ cleanup k
|
next $ cleanup k
|
||||||
_ -> next $ cleanup k
|
_ -> next $ cleanup k
|
||||||
|
|
||||||
|
|
14
Logs.hs
14
Logs.hs
|
@ -1,6 +1,6 @@
|
||||||
{- git-annex log file names
|
{- git-annex log file names
|
||||||
-
|
-
|
||||||
- Copyright 2013-2015 Joey Hess <id@joeyh.name>
|
- Copyright 2013-2018 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU GPL version 3 or higher.
|
- Licensed under the GNU GPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
@ -26,7 +26,7 @@ getLogVariety f
|
||||||
| f `elem` topLevelUUIDBasedLogs = Just UUIDBasedLog
|
| f `elem` topLevelUUIDBasedLogs = Just UUIDBasedLog
|
||||||
| isRemoteStateLog f = Just NewUUIDBasedLog
|
| isRemoteStateLog f = Just NewUUIDBasedLog
|
||||||
| isChunkLog f = ChunkLog <$> chunkLogFileKey f
|
| isChunkLog f = ChunkLog <$> chunkLogFileKey f
|
||||||
| isMetaDataLog f || f `elem` otherLogs = Just OtherLog
|
| isMetaDataLog f || isRemoteMetaDataLog f || f `elem` otherLogs = Just OtherLog
|
||||||
| otherwise = PresenceLog <$> firstJust (presenceLogs f)
|
| otherwise = PresenceLog <$> firstJust (presenceLogs f)
|
||||||
|
|
||||||
{- All the uuid-based logs stored in the top of the git-annex branch. -}
|
{- All the uuid-based logs stored in the top of the git-annex branch. -}
|
||||||
|
@ -185,3 +185,13 @@ metaDataLogExt = ".log.met"
|
||||||
|
|
||||||
isMetaDataLog :: FilePath -> Bool
|
isMetaDataLog :: FilePath -> Bool
|
||||||
isMetaDataLog path = metaDataLogExt `isSuffixOf` path
|
isMetaDataLog path = metaDataLogExt `isSuffixOf` path
|
||||||
|
|
||||||
|
{- The filename of the remote metadata log for a given key. -}
|
||||||
|
remoteMetaDataLogFile :: GitConfig -> Key -> FilePath
|
||||||
|
remoteMetaDataLogFile config key = branchHashDir config key </> keyFile key ++ remoteMetaDataLogExt
|
||||||
|
|
||||||
|
remoteMetaDataLogExt :: String
|
||||||
|
remoteMetaDataLogExt = ".log.rmet"
|
||||||
|
|
||||||
|
isRemoteMetaDataLog :: FilePath -> Bool
|
||||||
|
isRemoteMetaDataLog path = remoteMetaDataLogExt `isSuffixOf` path
|
||||||
|
|
|
@ -1,7 +1,9 @@
|
||||||
{- git-annex general metadata storage log
|
{- git-annex general metadata storage log and per-remote metadata storage log.
|
||||||
-
|
-
|
||||||
- A line of the log will look like "timestamp field [+-]value [...]"
|
- A line of the log will look like "timestamp field [+-]value [...]"
|
||||||
-
|
-
|
||||||
|
- (In the per-remote log, each field is prefixed with "uuid:")
|
||||||
|
-
|
||||||
- Note that unset values are preserved. Consider this case:
|
- Note that unset values are preserved. Consider this case:
|
||||||
-
|
-
|
||||||
- We have:
|
- We have:
|
||||||
|
@ -18,7 +20,7 @@
|
||||||
- and so foo currently has no value.
|
- and so foo currently has no value.
|
||||||
-
|
-
|
||||||
-
|
-
|
||||||
- Copyright 2014 Joey Hess <id@joeyh.name>
|
- Copyright 2014-2018 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU GPL version 3 or higher.
|
- Licensed under the GNU GPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
@ -27,8 +29,10 @@
|
||||||
|
|
||||||
module Logs.MetaData (
|
module Logs.MetaData (
|
||||||
getCurrentMetaData,
|
getCurrentMetaData,
|
||||||
|
getCurrentRemoteMetaData,
|
||||||
addMetaData,
|
addMetaData,
|
||||||
addMetaData',
|
addRemoteMetaData,
|
||||||
|
addMetaDataClocked,
|
||||||
currentMetaData,
|
currentMetaData,
|
||||||
copyMetaData,
|
copyMetaData,
|
||||||
) where
|
) where
|
||||||
|
@ -50,11 +54,6 @@ instance SingleValueSerializable MetaData where
|
||||||
serialize = Types.MetaData.serialize
|
serialize = Types.MetaData.serialize
|
||||||
deserialize = Types.MetaData.deserialize
|
deserialize = Types.MetaData.deserialize
|
||||||
|
|
||||||
getMetaDataLog :: Key -> Annex (Log MetaData)
|
|
||||||
getMetaDataLog key = do
|
|
||||||
config <- Annex.getGitConfig
|
|
||||||
readLog $ metaDataLogFile config key
|
|
||||||
|
|
||||||
logToCurrentMetaData :: [LogEntry MetaData] -> MetaData
|
logToCurrentMetaData :: [LogEntry MetaData] -> MetaData
|
||||||
logToCurrentMetaData = currentMetaData . combineMetaData . map value
|
logToCurrentMetaData = currentMetaData . combineMetaData . map value
|
||||||
|
|
||||||
|
@ -65,8 +64,12 @@ logToCurrentMetaData = currentMetaData . combineMetaData . map value
|
||||||
- currently set, based on timestamps in the log.
|
- currently set, based on timestamps in the log.
|
||||||
-}
|
-}
|
||||||
getCurrentMetaData :: Key -> Annex MetaData
|
getCurrentMetaData :: Key -> Annex MetaData
|
||||||
getCurrentMetaData k = do
|
getCurrentMetaData = getCurrentMetaData' metaDataLogFile
|
||||||
ls <- S.toAscList <$> getMetaDataLog k
|
|
||||||
|
getCurrentMetaData' :: (GitConfig -> Key -> FilePath) -> Key -> Annex MetaData
|
||||||
|
getCurrentMetaData' getlogfile k = do
|
||||||
|
config <- Annex.getGitConfig
|
||||||
|
ls <- S.toAscList <$> readLog (getlogfile config k)
|
||||||
let loggedmeta = logToCurrentMetaData ls
|
let loggedmeta = logToCurrentMetaData ls
|
||||||
return $ currentMetaData $ unionMetaData loggedmeta
|
return $ currentMetaData $ unionMetaData loggedmeta
|
||||||
(lastchanged ls loggedmeta)
|
(lastchanged ls loggedmeta)
|
||||||
|
@ -92,27 +95,42 @@ getCurrentMetaData k = do
|
||||||
Unknown -> 0
|
Unknown -> 0
|
||||||
showts = formatPOSIXTime "%F@%H-%M-%S"
|
showts = formatPOSIXTime "%F@%H-%M-%S"
|
||||||
|
|
||||||
|
getCurrentRemoteMetaData :: UUID -> Key -> Annex RemoteMetaData
|
||||||
|
getCurrentRemoteMetaData u k = mkRemoteMetaData u <$>
|
||||||
|
getCurrentMetaData' remoteMetaDataLogFile k
|
||||||
|
|
||||||
{- Adds in some metadata, which can override existing values, or unset
|
{- Adds in some metadata, which can override existing values, or unset
|
||||||
- them, but otherwise leaves any existing metadata as-is. -}
|
- them, but otherwise leaves any existing metadata as-is. -}
|
||||||
addMetaData :: Key -> MetaData -> Annex ()
|
addMetaData :: Key -> MetaData -> Annex ()
|
||||||
addMetaData k metadata = addMetaData' k metadata =<< liftIO currentVectorClock
|
addMetaData = addMetaData' metaDataLogFile
|
||||||
|
|
||||||
|
addMetaData' :: (GitConfig -> Key -> FilePath) -> Key -> MetaData -> Annex ()
|
||||||
|
addMetaData' getlogfile k metadata =
|
||||||
|
addMetaDataClocked' getlogfile k metadata =<< liftIO currentVectorClock
|
||||||
|
|
||||||
{- Reusing the same VectorClock when making changes to the metadata
|
{- Reusing the same VectorClock when making changes to the metadata
|
||||||
- of multiple keys is a nice optimisation. The same metadata lines
|
- of multiple keys is a nice optimisation. The same metadata lines
|
||||||
- will tend to be generated across the different log files, and so
|
- will tend to be generated across the different log files, and so
|
||||||
- git will be able to pack the data more efficiently. -}
|
- git will be able to pack the data more efficiently. -}
|
||||||
addMetaData' :: Key -> MetaData -> VectorClock -> Annex ()
|
addMetaDataClocked :: Key -> MetaData -> VectorClock -> Annex ()
|
||||||
addMetaData' k d@(MetaData m) c
|
addMetaDataClocked = addMetaDataClocked' metaDataLogFile
|
||||||
|
|
||||||
|
addMetaDataClocked' :: (GitConfig -> Key -> FilePath) -> Key -> MetaData -> VectorClock -> Annex ()
|
||||||
|
addMetaDataClocked' getlogfile k d@(MetaData m) c
|
||||||
| d == emptyMetaData = noop
|
| d == emptyMetaData = noop
|
||||||
| otherwise = do
|
| otherwise = do
|
||||||
config <- Annex.getGitConfig
|
config <- Annex.getGitConfig
|
||||||
Annex.Branch.change (metaDataLogFile config k) $
|
Annex.Branch.change (getlogfile config k) $
|
||||||
showLog . simplifyLog
|
showLog . simplifyLog
|
||||||
. S.insert (LogEntry c metadata)
|
. S.insert (LogEntry c metadata)
|
||||||
. parseLog
|
. parseLog
|
||||||
where
|
where
|
||||||
metadata = MetaData $ M.filterWithKey (\f _ -> not (isLastChangedField f)) m
|
metadata = MetaData $ M.filterWithKey (\f _ -> not (isLastChangedField f)) m
|
||||||
|
|
||||||
|
addRemoteMetaData :: Key -> RemoteMetaData -> Annex ()
|
||||||
|
addRemoteMetaData k m = do
|
||||||
|
addMetaData' remoteMetaDataLogFile k (fromRemoteMetaData m)
|
||||||
|
|
||||||
{- Simplify a log, removing historical values that are no longer
|
{- Simplify a log, removing historical values that are no longer
|
||||||
- needed.
|
- needed.
|
||||||
-
|
-
|
||||||
|
@ -173,6 +191,11 @@ simplifyLog s = case sl of
|
||||||
older = value l
|
older = value l
|
||||||
unique = older `differenceMetaData` newer
|
unique = older `differenceMetaData` newer
|
||||||
|
|
||||||
|
getMetaDataLog :: Key -> Annex (Log MetaData)
|
||||||
|
getMetaDataLog key = do
|
||||||
|
config <- Annex.getGitConfig
|
||||||
|
readLog $ metaDataLogFile config key
|
||||||
|
|
||||||
{- Copies the metadata from the old key to the new key.
|
{- Copies the metadata from the old key to the new key.
|
||||||
-
|
-
|
||||||
- The exact content of the metadata file is copied, so that the timestamps
|
- The exact content of the metadata file is copied, so that the timestamps
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
{- git-annex general metadata
|
{- git-annex general metadata
|
||||||
-
|
-
|
||||||
- Copyright 2014 Joey Hess <id@joeyh.name>
|
- Copyright 2014-2018 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU GPL version 3 or higher.
|
- Licensed under the GNU GPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
@ -36,6 +36,9 @@ module Types.MetaData (
|
||||||
metaDataValues,
|
metaDataValues,
|
||||||
ModMeta(..),
|
ModMeta(..),
|
||||||
modMeta,
|
modMeta,
|
||||||
|
RemoteMetaData(..),
|
||||||
|
mkRemoteMetaData,
|
||||||
|
fromRemoteMetaData,
|
||||||
prop_metadata_sane,
|
prop_metadata_sane,
|
||||||
prop_metadata_serialize
|
prop_metadata_serialize
|
||||||
) where
|
) where
|
||||||
|
@ -44,6 +47,7 @@ import Common
|
||||||
import Utility.Base64
|
import Utility.Base64
|
||||||
import Utility.QuickCheck
|
import Utility.QuickCheck
|
||||||
import Utility.Aeson
|
import Utility.Aeson
|
||||||
|
import Types.UUID
|
||||||
|
|
||||||
import qualified Data.Text as T
|
import qualified Data.Text as T
|
||||||
import qualified Data.Set as S
|
import qualified Data.Set as S
|
||||||
|
@ -282,6 +286,31 @@ modMeta m (MaybeSetMeta f v)
|
||||||
| otherwise = emptyMetaData
|
| otherwise = emptyMetaData
|
||||||
modMeta m (ComposeModMeta a b) = unionMetaData (modMeta m a) (modMeta m b)
|
modMeta m (ComposeModMeta a b) = unionMetaData (modMeta m a) (modMeta m b)
|
||||||
|
|
||||||
|
data RemoteMetaData = RemoteMetaData UUID MetaData
|
||||||
|
deriving (Show, Eq, Ord)
|
||||||
|
|
||||||
|
{- Extracts only the fields prefixed with "uuid:", which belong to that
|
||||||
|
- remote. -}
|
||||||
|
mkRemoteMetaData :: UUID -> MetaData -> RemoteMetaData
|
||||||
|
mkRemoteMetaData u (MetaData m) = RemoteMetaData u $ MetaData $
|
||||||
|
M.mapKeys removeprefix $ M.filterWithKey belongsremote m
|
||||||
|
where
|
||||||
|
belongsremote (MetaField f) _v = prefix `isPrefixOf` CI.original f
|
||||||
|
removeprefix (MetaField f) = MetaField $
|
||||||
|
CI.mk $ drop prefixlen $ CI.original f
|
||||||
|
prefix = remoteMetaDataPrefix u
|
||||||
|
prefixlen = length prefix
|
||||||
|
|
||||||
|
remoteMetaDataPrefix :: UUID -> String
|
||||||
|
remoteMetaDataPrefix u = fromUUID u ++ ":"
|
||||||
|
|
||||||
|
fromRemoteMetaData :: RemoteMetaData -> MetaData
|
||||||
|
fromRemoteMetaData (RemoteMetaData u (MetaData m)) = MetaData $
|
||||||
|
M.mapKeys addprefix m
|
||||||
|
where
|
||||||
|
addprefix (MetaField f) = MetaField $ CI.mk $ (++ prefix) $ CI.original f
|
||||||
|
prefix = remoteMetaDataPrefix u
|
||||||
|
|
||||||
{- Avoid putting too many fields in the map; extremely large maps make
|
{- Avoid putting too many fields in the map; extremely large maps make
|
||||||
- the seriaization test slow due to the sheer amount of data.
|
- the seriaization test slow due to the sheer amount of data.
|
||||||
- It's unlikely that more than 100 fields of metadata will be used. -}
|
- It's unlikely that more than 100 fields of metadata will be used. -}
|
||||||
|
|
|
@ -236,6 +236,10 @@ These log files are used by remotes that need to record their own state
|
||||||
about keys. Each remote can store one line of data about a key, in
|
about keys. Each remote can store one line of data about a key, in
|
||||||
its own format.
|
its own format.
|
||||||
|
|
||||||
|
Note that only the most recently set state about a key is seen
|
||||||
|
by remotes using this. The `log.rmet` documented below does not have this
|
||||||
|
limitation.
|
||||||
|
|
||||||
Example:
|
Example:
|
||||||
|
|
||||||
1287290776.765152s e605dca6-446a-11e0-8b2a-002170d25c55 blah blah
|
1287290776.765152s e605dca6-446a-11e0-8b2a-002170d25c55 blah blah
|
||||||
|
@ -262,6 +266,20 @@ reasonably short. If the value contains any whitespace
|
||||||
(including \r or \n), it will be base64 encoded. Base64 encoded values
|
(including \r or \n), it will be base64 encoded. Base64 encoded values
|
||||||
are indicated by prefixing them with "!".
|
are indicated by prefixing them with "!".
|
||||||
|
|
||||||
|
## `aaa/bbb/*.log.rmet`
|
||||||
|
|
||||||
|
These log files store per-remote metadata about keys. This metadata
|
||||||
|
is only used by the remote.
|
||||||
|
|
||||||
|
Format is the same as the metadata log files above, but each metadata key
|
||||||
|
is prefixed with "uuid:" to indicate the remote it belongs to.
|
||||||
|
|
||||||
|
For example:
|
||||||
|
|
||||||
|
1287290776.765152s e605dca6-446a-11e0-8b2a-002170d25c55:foo +bar
|
||||||
|
1287290776.765152s 26339d22-446b-11e0-9101-002170d25c55:x +1
|
||||||
|
1291237510.141453s 26339d22-446b-11e0-9101-002170d25c55:x -1 26339d22-446b-11e0-9101-002170d25c55:x +2
|
||||||
|
|
||||||
## `aaa/bbb/*.log.cnk`
|
## `aaa/bbb/*.log.cnk`
|
||||||
|
|
||||||
These log files are used when objects are stored in chunked form on
|
These log files are used when objects are stored in chunked form on
|
||||||
|
|
|
@ -0,0 +1,2 @@
|
||||||
|
The newly added per-remote metadata log files need to be scrubbed clean of
|
||||||
|
dead remotes during a transition. --[[Joey]]
|
Loading…
Add table
Add a link
Reference in a new issue