From 3e68c1c2fd0a1d689cc08d3c20cdb2ed510cd1db Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 3 Jan 2014 16:35:57 -0400 Subject: [PATCH] add remote state logs This allows a remote to store a piece of arbitrary state associated with a key. This is needed to support Tahoe, where the file-cap is calculated from the data stored in it, and used to retrieve a key later. Glacier also would be much improved by using this. GETSTATE and SETSTATE are added to the external special remote protocol. Note that the state is left as-is even when a key is removed from a remote. It's up to the remote to decide when it wants to clear the state. The remote state log, $KEY.log.rmt, is a UUID-based log. However, rather than using the old UUID-based log format, I created a new variant of that format. The new varient is more space efficient (since it lacks the "timestamp=" hack, and easier to parse (and the parser doesn't mess with whitespace in the value), and avoids compatability cruft in the old one. This seemed worth cleaning up for these new files, since there could be a lot of them, while before UUID-based logs were only used for a few log files at the top of the git-annex branch. The transition code has also been updated to handle these new UUID-based logs. This commit was sponsored by Daniel Hofer. --- Annex/Branch/Transitions.hs | 2 ++ Logs.hs | 25 +++++++++++--- Logs/RemoteState.hs | 33 +++++++++++++++++++ Logs/UUIDBased.hs | 25 +++++++++++++- Remote/External.hs | 7 ++++ Remote/External/Types.hs | 4 +++ debian/changelog | 3 +- .../external_special_remote_protocol.mdwn | 11 +++++++ doc/internals.mdwn | 20 ++++++++--- 9 files changed, 119 insertions(+), 11 deletions(-) create mode 100644 Logs/RemoteState.hs diff --git a/Annex/Branch/Transitions.hs b/Annex/Branch/Transitions.hs index 90002de624..84cd1bbd94 100644 --- a/Annex/Branch/Transitions.hs +++ b/Annex/Branch/Transitions.hs @@ -34,6 +34,8 @@ dropDead :: FilePath -> String -> TrustMap -> FileTransition dropDead f content trustmap = case getLogVariety f of Just UUIDBasedLog -> ChangeFile $ UUIDBased.showLog id $ dropDeadFromUUIDBasedLog trustmap $ UUIDBased.parseLog Just content + Just NewUUIDBasedLog -> ChangeFile $ + UUIDBased.showLogNew id $ dropDeadFromUUIDBasedLog trustmap $ UUIDBased.parseLogNew Just content Just (PresenceLog _) -> let newlog = Presence.compactLog $ dropDeadFromPresenceLog trustmap $ Presence.parseLog content in if null newlog diff --git a/Logs.hs b/Logs.hs index 4386b7fd7c..2952d6920c 100644 --- a/Logs.hs +++ b/Logs.hs @@ -10,19 +10,21 @@ module Logs where import Common.Annex import Types.Key -data LogVariety = UUIDBasedLog | PresenceLog Key +{- There are several varieties of log file formats. -} +data LogVariety = UUIDBasedLog | NewUUIDBasedLog | PresenceLog Key deriving (Show) {- Converts a path from the git-annex branch into one of the varieties - of logs used by git-annex, if it's a known path. -} getLogVariety :: FilePath -> Maybe LogVariety getLogVariety f - | f `elem` uuidBasedLogs = Just UUIDBasedLog + | f `elem` topLevelUUIDBasedLogs = Just UUIDBasedLog + | isRemoteStateLog f = Just NewUUIDBasedLog | otherwise = PresenceLog <$> firstJust (presenceLogs f) -{- All the uuid-based logs stored in the git-annex branch. -} -uuidBasedLogs :: [FilePath] -uuidBasedLogs = +{- All the uuid-based logs stored in the top of the git-annex branch. -} +topLevelUUIDBasedLogs :: [FilePath] +topLevelUUIDBasedLogs = [ uuidLog , remoteLog , trustLog @@ -99,16 +101,29 @@ urlLogFileKey path isUrlLog :: FilePath -> Bool isUrlLog file = urlLogExt `isSuffixOf` file +{- The filename of the remote state log for a given key. -} +remoteStateLogFile :: Key -> FilePath +remoteStateLogFile key = hashDirLower key keyFile key ++ remoteStateLogExt + +remoteStateLogExt :: String +remoteStateLogExt = ".log.rmt" + +isRemoteStateLog :: FilePath -> Bool +isRemoteStateLog path = remoteStateLogExt `isSuffixOf` path + prop_logs_sane :: Key -> Bool prop_logs_sane dummykey = all id [ isNothing (getLogVariety "unknown") , expect isUUIDBasedLog (getLogVariety uuidLog) , expect isPresenceLog (getLogVariety $ locationLogFile dummykey) , expect isPresenceLog (getLogVariety $ urlLogFile dummykey) + , expect isNewUUIDBasedLog (getLogVariety $ remoteStateLogFile dummykey) ] where expect = maybe False isUUIDBasedLog UUIDBasedLog = True isUUIDBasedLog _ = False + isNewUUIDBasedLog NewUUIDBasedLog = True + isNewUUIDBasedLog _ = False isPresenceLog (PresenceLog k) = k == dummykey isPresenceLog _ = False diff --git a/Logs/RemoteState.hs b/Logs/RemoteState.hs new file mode 100644 index 0000000000..95e51832ee --- /dev/null +++ b/Logs/RemoteState.hs @@ -0,0 +1,33 @@ +{- Remote state logs. + - + - Copyright 2013 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Logs.RemoteState ( + getRemoteState, + setRemoteState, +) where + +import Common.Annex +import Logs +import Logs.UUIDBased +import qualified Annex.Branch + +import qualified Data.Map as M +import Data.Time.Clock.POSIX + +type RemoteState = String + +setRemoteState :: UUID -> Key -> RemoteState -> Annex () +setRemoteState u k s = do + ts <- liftIO getPOSIXTime + Annex.Branch.change (remoteStateLogFile k) $ + showLogNew id . changeLog ts u s . parseLogNew Just + +getRemoteState :: UUID -> Key -> Annex (Maybe RemoteState) +getRemoteState u k = extract . parseLogNew Just + <$> Annex.Branch.get (remoteStateLogFile k) + where + extract m = value <$> M.lookup u m diff --git a/Logs/UUIDBased.hs b/Logs/UUIDBased.hs index 10b3bf55d1..430c92d553 100644 --- a/Logs/UUIDBased.hs +++ b/Logs/UUIDBased.hs @@ -6,8 +6,10 @@ - A line of the log will look like: "UUID[ INFO[ timestamp=foo]]" - The timestamp is last for backwards compatability reasons, - and may not be present on old log lines. + - + - New uuid based logs instead use the form: "timestamp UUID INFO" - - - Copyright 2011 Joey Hess + - Copyright 2011-2013 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -17,8 +19,10 @@ module Logs.UUIDBased ( LogEntry(..), TimeStamp(..), parseLog, + parseLogNew, parseLogWithUUID, showLog, + showLogNew, changeLog, addLog, simpleMap, @@ -56,6 +60,14 @@ showLog shower = unlines . map showpair . M.toList showpair (k, LogEntry Unknown v) = unwords [fromUUID k, shower v] +showLogNew :: (a -> String) -> Log a -> String +showLogNew shower = unlines . map showpair . M.toList + where + showpair (k, LogEntry (Date p) v) = + unwords [show p, fromUUID k, shower v] + showpair (k, LogEntry Unknown v) = + unwords ["0", fromUUID k, shower v] + parseLog :: (String -> Maybe a) -> String -> Log a parseLog = parseLogWithUUID . const @@ -86,6 +98,17 @@ parseLogWithUUID parser = M.fromListWith best . mapMaybe parse . lines Nothing -> Unknown Just d -> Date $ utcTimeToPOSIXSeconds d +parseLogNew :: (String -> Maybe a) -> String -> Log a +parseLogNew parser = M.fromListWith best . mapMaybe parse . lines + where + parse line = do + let (ts, rest) = splitword line + (u, v) = splitword rest + date <- Date . utcTimeToPOSIXSeconds <$> parseTime defaultTimeLocale "%s%Qs" ts + val <- parser v + Just (toUUID u, LogEntry date val) + splitword = separate (== ' ') + changeLog :: POSIXTime -> UUID -> a -> Log a -> Log a changeLog t u v = M.insert u $ LogEntry (Date t) v diff --git a/Remote/External.hs b/Remote/External.hs index f682d242d9..a0c3ef2d6e 100644 --- a/Remote/External.hs +++ b/Remote/External.hs @@ -19,6 +19,7 @@ import Crypto import Utility.Metered import Logs.Transfer import Logs.PreferredContent.Raw +import Logs.RemoteState import Config.Cost import Annex.Content import Annex.UUID @@ -235,6 +236,12 @@ handleRequest' lck external req mp responsehandler expr <- fromMaybe "" . M.lookup (externalUUID external) <$> preferredContentMapRaw send $ VALUE expr + handleRemoteRequest (SETSTATE key state) = + setRemoteState (externalUUID external) key state + handleRemoteRequest (GETSTATE key) = do + state <- fromMaybe "" + <$> getRemoteState (externalUUID external) key + send $ VALUE state handleRemoteRequest (VERSION _) = sendMessage lck external $ ERROR "too late to send VERSION" diff --git a/Remote/External/Types.hs b/Remote/External/Types.hs index e925f0e91e..88c2126d75 100644 --- a/Remote/External/Types.hs +++ b/Remote/External/Types.hs @@ -170,6 +170,8 @@ data RemoteRequest | GETUUID | SETWANTED PreferredContentExpression | GETWANTED + | SETSTATE Key String + | GETSTATE Key deriving (Show) instance Receivable RemoteRequest where @@ -183,6 +185,8 @@ instance Receivable RemoteRequest where parseCommand "GETUUID" = parse0 GETUUID parseCommand "SETWANTED" = parse1 SETWANTED parseCommand "GETWANTED" = parse0 GETWANTED + parseCommand "SETSTATE" = parse2 SETSTATE + parseCommand "GETSTATE" = parse1 GETSTATE parseCommand _ = parseFail -- Responses to RemoteRequest. diff --git a/debian/changelog b/debian/changelog index 6f9acd2341..3727d4c5d8 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,7 +1,8 @@ git-annex (5.20131231) UNRELEASED; urgency=medium * mirror: Support --all (and --unused). - * external special remote protocol: Added GETUUID, GETWANTED, SETWANTED. + * external special remote protocol: Added GETUUID, GETWANTED, SETWANTED, + SETSTATE, GETSTATE. * Windows: Fix bug in direct mode merge code that could cause files in subdirectories to go missing. * Windows: Avoid eating stdin when running ssh to add a authorized key, diff --git a/doc/design/external_special_remote_protocol.mdwn b/doc/design/external_special_remote_protocol.mdwn index 138d9dd182..cac5489d27 100644 --- a/doc/design/external_special_remote_protocol.mdwn +++ b/doc/design/external_special_remote_protocol.mdwn @@ -222,6 +222,17 @@ in control. Gets the current preferred content setting of the repository. (git-annex replies with VALUE followed by the preferred content expression.) +* `SETSTATE Key Value` + Can be used to store some form of state for a Key. The state stored + can be anything this remote needs to store, in any format. + It is stored in the git-annex branch. Note that this means that if + multiple repositories are using the same special remote, and store + different state, whichever one stored the state last will win. Also, + it's best to avoid storing much state, since this will bloat the + git-annex branch. Most remotes will not need to store any state. +* `GETSTATE Key` + Gets any state that has been stored for the key. + (git-annex replies with VALUE followed by the state.) ## general messages diff --git a/doc/internals.mdwn b/doc/internals.mdwn index 4cc6d3c938..d95ab3f5ef 100644 --- a/doc/internals.mdwn +++ b/doc/internals.mdwn @@ -39,6 +39,10 @@ are added to git. This branch operates on objects exclusively. No file names will ever be stored in this branch. +The files stored in this branch are all designed to be auto-merged +using git's [[union merge driver|git-union-merge]]. So each line +has a timestamp, to allow the most recent information to be identified. + ### `uuid.log` Records the UUIDs of known repositories, and associates them with a @@ -110,7 +114,7 @@ somewhere else. ## `aaa/bbb/*.log` These log files record [[location_tracking]] information -for file contents. Again these are placed in two levels of subdirectories +for file contents. These are placed in two levels of subdirectories for hashing. See [[hashing]] for details. The name of the key is the filename, and the content @@ -122,15 +126,23 @@ Example: 1287290776.765152s 1 e605dca6-446a-11e0-8b2a-002170d25c55 1287290767.478634s 0 26339d22-446b-11e0-9101-002170d25c55 -These files are designed to be auto-merged using git's [[union merge driver|git-union-merge]]. -The timestamps allow the most recent information to be identified. - ## `aaa/bbb/*.log.web` These log files record urls used by the [[web_special_remote|special_remotes/web]]. Their format is similar to the location tracking files, but with urls rather than UUIDs. +## `aaa/bbb/*.log.rmt` + +These log files are used by remotes that need to record their own state +about keys. Each remote can store one line of data about a key, in +its own format. + +Example: + + 1287290776.765152s e605dca6-446a-11e0-8b2a-002170d25c55 blah blah + 1287290767.478634s 26339d22-446b-11e0-9101-002170d25c55 foo=bar + ## `schedule.log` Used to record scheduled events, such as periodic fscks.