From 18a6935e4247be8c0c4651e9f29e41c74e9cefa8 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 11 Jan 2013 15:43:09 -0400 Subject: [PATCH] safe recv-key in direct mode Checks the key's size and checksum. This is sorta expensive, but it avoids needing to add another round-trip to the protocol. --- Command/RecvKey.hs | 31 ++++++++++++++- Fields.hs | 3 ++ GitAnnexShell.hs | 1 + Remote/Git.hs | 2 + debian/changelog | 6 ++- doc/design/assistant/desymlink.mdwn | 59 ++++++++++++++--------------- doc/git-annex-shell.mdwn | 3 +- 7 files changed, 71 insertions(+), 34 deletions(-) diff --git a/Command/RecvKey.hs b/Command/RecvKey.hs index 62ab9a7eef..11a5fd5caa 100644 --- a/Command/RecvKey.hs +++ b/Command/RecvKey.hs @@ -14,6 +14,10 @@ import Annex.Content import Utility.Rsync import Logs.Transfer import Command.SendKey (fieldTransfer) +import qualified Fields +import qualified Types.Key +import qualified Types.Backend +import qualified Backend def :: [Command] def = [noCommit $ command "recvkey" paramKey seek @@ -26,7 +30,7 @@ start :: Key -> CommandStart start key = ifM (inAnnex key) ( error "key is already present in annex" , fieldTransfer Download key $ \_p -> do - ifM (getViaTmp key $ liftIO . rsyncServerReceive) + ifM (getViaTmp key go) ( do -- forcibly quit after receiving one key, -- and shutdown cleanly @@ -35,3 +39,28 @@ start key = ifM (inAnnex key) , return False ) ) + where + go tmp = ifM (liftIO $ rsyncServerReceive tmp) + ( ifM (isJust <$> Fields.getField Fields.direct) + ( directcheck tmp + , return True + ) + , return False + ) + {- If the sending repository uses direct mode, the file + - it sends could be modified as it's sending it. So check + - that the right size file was received, and that the key/value + - Backend is happy with it. -} + directcheck tmp = do + oksize <- case Types.Key.keySize key of + Nothing -> return True + Just size -> do + size' <- fromIntegral . fileSize + <$> liftIO (getFileStatus tmp) + return $ size == size' + if oksize + then case Backend.maybeLookupBackendName (Types.Key.keyBackendName key) of + Nothing -> return False + Just backend -> maybe (return True) (\a -> a key tmp) + (Types.Backend.fsckKey backend) + else return False diff --git a/Fields.hs b/Fields.hs index 145a8adca1..ffd273be67 100644 --- a/Fields.hs +++ b/Fields.hs @@ -30,3 +30,6 @@ associatedFile :: Field associatedFile = Field "associatedfile" $ \f -> -- is the file a safe relative filename? not (isAbsolute f) && not ("../" `isPrefixOf` f) + +direct :: Field +direct = Field "direct" $ \f -> f == "1" diff --git a/GitAnnexShell.hs b/GitAnnexShell.hs index f77347a1c9..fca36cfc5d 100644 --- a/GitAnnexShell.hs +++ b/GitAnnexShell.hs @@ -122,6 +122,7 @@ checkField :: (String, String) -> Bool checkField (field, value) | field == fieldName remoteUUID = fieldCheck remoteUUID value | field == fieldName associatedFile = fieldCheck associatedFile value + | field == fieldName direct = fieldCheck direct value | otherwise = False failure :: IO () diff --git a/Remote/Git.hs b/Remote/Git.hs index e8e1a1ba27..8c8d1274c4 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -398,7 +398,9 @@ rsyncOrCopyFile rsyncparams src dest p = rsyncParamsRemote :: Remote -> Direction -> Key -> FilePath -> AssociatedFile -> Annex [CommandParam] rsyncParamsRemote r direction key file afile = do u <- getUUID + direct <- isDirect let fields = (Fields.remoteUUID, fromUUID u) + : (Fields.direct, if direct then "1" else "") : maybe [] (\f -> [(Fields.associatedFile, f)]) afile Just (shellcmd, shellparams) <- git_annex_shell (repo r) (if direction == Download then "sendkey" else "recvkey") diff --git a/debian/changelog b/debian/changelog index 4230d12c43..4a4498880a 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,8 +1,10 @@ git-annex (3.20130108) UNRELEASED; urgency=low + * Now handles the case where a file that's being transferred to a remote + is modified in place, which direct mode allows to happen. When this + happens, the transfer now fails, rather than allow possibly corrupt + data into the remote. * fsck: Better checking of file content in direct mode. - * Special remotes now all rollback storage of keys that get modified - during the transfer, which can happen in direct mode. * drop: Suggest using git annex move when numcopies prevents dropping a file. * webapp: Repo switcher filters out repos that do not exist any more (or are on a drive that's not mounted). diff --git a/doc/design/assistant/desymlink.mdwn b/doc/design/assistant/desymlink.mdwn index 7931e9e6ba..64c56f466d 100644 --- a/doc/design/assistant/desymlink.mdwn +++ b/doc/design/assistant/desymlink.mdwn @@ -84,6 +84,32 @@ is converted to a real file when it becomes present. ## TODO +* kqueue does not deliver an event when an existing file is modified. + This doesn't affect OSX, which uses FSEvents now, but it makes direct + mode assistant not 100% on other BSD's. + +## done + +* `git annex sync` updates the key to files mappings for files changed, + but needs much other work to handle direct mode: + * Generate git commit, without running `git commit`, because it will + want to stage the full files. **done** + * Update location logs for any files deleted by a commit. **done** + * Generate a git merge, without running `git merge` (or possibly running + it in a scratch repo?), because it will stumble over the direct files. + **done** + * Drop contents of files deleted by a merge (including updating the + location log), or if we cannot drop, + move their contents to `.git/annex/objects/`. **no** .. instead, + avoid ever losing file contents in a direct mode merge. If the file is + deleted, its content is moved back to .git/annex/objects, if necessary. + * When a merge adds a symlink pointing at a key that is present in the + repo, replace the symlink with the direct file (either moving out + of `.git/annex/objects/` or hard-linking if the same key is present + elsewhere in the tree. **done** + * handle merge conflicts on direct mode files **done** +* support direct mode in the assistant (many little fixes) + * Deal with files changing as they're being transferred from a direct mode repository to another git repository. The remote repo currently will accept the bad data and update the location log to say it has the key. @@ -113,34 +139,7 @@ is converted to a real file when it becomes present. the temp file, which is probably corrupt. (Could in future use it as a basis for transferring the new key..) **done** - For git remotes, add a flag to `git-annex-shell recvkey` (using a field + For git remotes, added a flag to `git-annex-shell recvkey` (using a field after the "--" to remain back-compat). With this flag, after receiving - the data, the remote should wait for a signal that the data is good - before it updates the location log. The signal could just be a "1" - sent over the ssh channel. Or another `git-annex-shell` command. **TODO** - -* kqueue does not deliver an event when an existing file is modified. - This doesn't affect OSX, which uses FSEvents now, but it makes direct - mode assistant not 100% on other BSD's. - -## done - -* `git annex sync` updates the key to files mappings for files changed, - but needs much other work to handle direct mode: - * Generate git commit, without running `git commit`, because it will - want to stage the full files. **done** - * Update location logs for any files deleted by a commit. **done** - * Generate a git merge, without running `git merge` (or possibly running - it in a scratch repo?), because it will stumble over the direct files. - **done** - * Drop contents of files deleted by a merge (including updating the - location log), or if we cannot drop, - move their contents to `.git/annex/objects/`. **no** .. instead, - avoid ever losing file contents in a direct mode merge. If the file is - deleted, its content is moved back to .git/annex/objects, if necessary. - * When a merge adds a symlink pointing at a key that is present in the - repo, replace the symlink with the direct file (either moving out - of `.git/annex/objects/` or hard-linking if the same key is present - elsewhere in the tree. **done** - * handle merge conflicts on direct mode files **done** -* support direct mode in the assistant (many little fixes) + the data, the remote fscks the data. This is not optimal, but avoids + needing another round-trip, or a protocol change. diff --git a/doc/git-annex-shell.mdwn b/doc/git-annex-shell.mdwn index 5fbc6de534..38659d0e28 100644 --- a/doc/git-annex-shell.mdwn +++ b/doc/git-annex-shell.mdwn @@ -76,7 +76,8 @@ to git-annex-shell are: past versions of git-annex-shell (that ignore these, but would choke on new dashed options). - Currently used fields include remoteuuid= and associatedfile= + Currently used fields include remoteuuid=, associatedfile=, + and direct= # HOOK