safe recv-key in direct mode
Checks the key's size and checksum. This is sorta expensive, but it avoids needing to add another round-trip to the protocol.
This commit is contained in:
		
					parent
					
						
							
								043c9562f3
							
						
					
				
			
			
				commit
				
					
						18a6935e42
					
				
			
		
					 7 changed files with 71 additions and 34 deletions
				
			
		| 
						 | 
				
			
			@ -14,6 +14,10 @@ import Annex.Content
 | 
			
		|||
import Utility.Rsync
 | 
			
		||||
import Logs.Transfer
 | 
			
		||||
import Command.SendKey (fieldTransfer)
 | 
			
		||||
import qualified Fields
 | 
			
		||||
import qualified Types.Key
 | 
			
		||||
import qualified Types.Backend
 | 
			
		||||
import qualified Backend
 | 
			
		||||
 | 
			
		||||
def :: [Command]
 | 
			
		||||
def = [noCommit $ command "recvkey" paramKey seek
 | 
			
		||||
| 
						 | 
				
			
			@ -26,7 +30,7 @@ start :: Key -> CommandStart
 | 
			
		|||
start key = ifM (inAnnex key)
 | 
			
		||||
	( error "key is already present in annex"
 | 
			
		||||
	, fieldTransfer Download key $ \_p -> do
 | 
			
		||||
		ifM (getViaTmp key $ liftIO . rsyncServerReceive)
 | 
			
		||||
		ifM (getViaTmp key go)
 | 
			
		||||
			( do
 | 
			
		||||
				-- forcibly quit after receiving one key,
 | 
			
		||||
				-- and shutdown cleanly
 | 
			
		||||
| 
						 | 
				
			
			@ -35,3 +39,28 @@ start key = ifM (inAnnex key)
 | 
			
		|||
			, return False
 | 
			
		||||
			)
 | 
			
		||||
	)
 | 
			
		||||
  where
 | 
			
		||||
	go tmp = ifM (liftIO $ rsyncServerReceive tmp)
 | 
			
		||||
		( ifM (isJust <$> Fields.getField Fields.direct)
 | 
			
		||||
			( directcheck tmp
 | 
			
		||||
			, return True
 | 
			
		||||
			)
 | 
			
		||||
		, return False
 | 
			
		||||
		)
 | 
			
		||||
	{- If the sending repository uses direct mode, the file
 | 
			
		||||
	 - it sends could be modified as it's sending it. So check
 | 
			
		||||
	 - that the right size file was received, and that the key/value
 | 
			
		||||
	 - Backend is happy with it. -}
 | 
			
		||||
	directcheck tmp = do
 | 
			
		||||
		oksize <- case Types.Key.keySize key of
 | 
			
		||||
		        Nothing -> return True
 | 
			
		||||
		        Just size -> do
 | 
			
		||||
				size' <- fromIntegral . fileSize
 | 
			
		||||
       	        	        	<$> liftIO (getFileStatus tmp)
 | 
			
		||||
				return $ size == size'
 | 
			
		||||
		if oksize
 | 
			
		||||
			then case Backend.maybeLookupBackendName (Types.Key.keyBackendName key) of
 | 
			
		||||
				Nothing -> return False
 | 
			
		||||
				Just backend -> maybe (return True) (\a -> a key tmp)
 | 
			
		||||
					(Types.Backend.fsckKey backend)
 | 
			
		||||
			else return False
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -30,3 +30,6 @@ associatedFile :: Field
 | 
			
		|||
associatedFile = Field "associatedfile" $ \f ->
 | 
			
		||||
	-- is the file a safe relative filename?
 | 
			
		||||
	not (isAbsolute f) && not ("../" `isPrefixOf` f)
 | 
			
		||||
 | 
			
		||||
direct :: Field
 | 
			
		||||
direct = Field "direct" $ \f -> f == "1"
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -122,6 +122,7 @@ checkField :: (String, String) -> Bool
 | 
			
		|||
checkField (field, value)
 | 
			
		||||
	| field == fieldName remoteUUID = fieldCheck remoteUUID value
 | 
			
		||||
	| field == fieldName associatedFile = fieldCheck associatedFile value
 | 
			
		||||
	| field == fieldName direct = fieldCheck direct value
 | 
			
		||||
	| otherwise = False
 | 
			
		||||
 | 
			
		||||
failure :: IO ()
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -398,7 +398,9 @@ rsyncOrCopyFile rsyncparams src dest p =
 | 
			
		|||
rsyncParamsRemote :: Remote -> Direction -> Key -> FilePath -> AssociatedFile -> Annex [CommandParam]
 | 
			
		||||
rsyncParamsRemote r direction key file afile = do
 | 
			
		||||
	u <- getUUID
 | 
			
		||||
	direct <- isDirect
 | 
			
		||||
	let fields = (Fields.remoteUUID, fromUUID u)
 | 
			
		||||
		: (Fields.direct, if direct then "1" else "")
 | 
			
		||||
		: maybe [] (\f -> [(Fields.associatedFile, f)]) afile
 | 
			
		||||
	Just (shellcmd, shellparams) <- git_annex_shell (repo r)
 | 
			
		||||
		(if direction == Download then "sendkey" else "recvkey")
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
							
								
								
									
										6
									
								
								debian/changelog
									
										
									
									
										vendored
									
									
								
							
							
						
						
									
										6
									
								
								debian/changelog
									
										
									
									
										vendored
									
									
								
							| 
						 | 
				
			
			@ -1,8 +1,10 @@
 | 
			
		|||
git-annex (3.20130108) UNRELEASED; urgency=low
 | 
			
		||||
 | 
			
		||||
  * Now handles the case where a file that's being transferred to a remote
 | 
			
		||||
    is modified in place, which direct mode allows to happen. When this
 | 
			
		||||
    happens, the transfer now fails, rather than allow possibly corrupt
 | 
			
		||||
    data into the remote.
 | 
			
		||||
  * fsck: Better checking of file content in direct mode.
 | 
			
		||||
  * Special remotes now all rollback storage of keys that get modified
 | 
			
		||||
    during the transfer, which can happen in direct mode.
 | 
			
		||||
  * drop: Suggest using git annex move when numcopies prevents dropping a file.
 | 
			
		||||
  * webapp: Repo switcher filters out repos that do not exist any more
 | 
			
		||||
    (or are on a drive that's not mounted).
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -84,6 +84,32 @@ is converted to a real file when it becomes present.
 | 
			
		|||
 | 
			
		||||
## TODO
 | 
			
		||||
 | 
			
		||||
* kqueue does not deliver an event when an existing file is modified.
 | 
			
		||||
  This doesn't affect OSX, which uses FSEvents now, but it makes direct
 | 
			
		||||
  mode assistant not 100% on other BSD's.
 | 
			
		||||
 | 
			
		||||
## done
 | 
			
		||||
 | 
			
		||||
* `git annex sync` updates the key to files mappings for files changed,
 | 
			
		||||
  but needs much other work to handle direct mode:
 | 
			
		||||
  * Generate git commit, without running `git commit`, because it will
 | 
			
		||||
    want to stage the full files. **done**
 | 
			
		||||
  * Update location logs for any files deleted by a commit. **done**
 | 
			
		||||
  * Generate a git merge, without running `git merge` (or possibly running
 | 
			
		||||
    it in a scratch repo?), because it will stumble over the direct files.
 | 
			
		||||
    **done**
 | 
			
		||||
  * Drop contents of files deleted by a merge (including updating the
 | 
			
		||||
    location log), or if we cannot drop,
 | 
			
		||||
    move their contents to `.git/annex/objects/`.  **no**  .. instead, 
 | 
			
		||||
    avoid ever losing file contents in a direct mode merge. If the file is
 | 
			
		||||
    deleted, its content is moved back to .git/annex/objects, if necessary.
 | 
			
		||||
  * When a merge adds a symlink pointing at a key that is present in the
 | 
			
		||||
    repo, replace the symlink with the direct file (either moving out
 | 
			
		||||
    of `.git/annex/objects/` or hard-linking if the same key is present
 | 
			
		||||
    elsewhere in the tree. **done**
 | 
			
		||||
  * handle merge conflicts on direct mode files **done**
 | 
			
		||||
* support direct mode in the assistant (many little fixes)
 | 
			
		||||
 | 
			
		||||
* Deal with files changing as they're being transferred from a direct mode
 | 
			
		||||
  repository to another git repository. The remote repo currently will 
 | 
			
		||||
  accept the bad data and update the location log to say it has the key.
 | 
			
		||||
| 
						 | 
				
			
			@ -113,34 +139,7 @@ is converted to a real file when it becomes present.
 | 
			
		|||
  the temp file, which is probably corrupt. (Could in future use it as a
 | 
			
		||||
  basis for transferring the new key..) **done**
 | 
			
		||||
 | 
			
		||||
  For git remotes, add a flag to `git-annex-shell recvkey` (using a field
 | 
			
		||||
  For git remotes, added a flag to `git-annex-shell recvkey` (using a field
 | 
			
		||||
  after the "--" to remain back-compat). With this flag, after receiving
 | 
			
		||||
  the data, the remote should wait for a signal that the data is good
 | 
			
		||||
  before it updates the location log. The signal could just be a "1"
 | 
			
		||||
  sent over the ssh channel. Or another `git-annex-shell` command. **TODO**
 | 
			
		||||
 | 
			
		||||
* kqueue does not deliver an event when an existing file is modified.
 | 
			
		||||
  This doesn't affect OSX, which uses FSEvents now, but it makes direct
 | 
			
		||||
  mode assistant not 100% on other BSD's.
 | 
			
		||||
 | 
			
		||||
## done
 | 
			
		||||
 | 
			
		||||
* `git annex sync` updates the key to files mappings for files changed,
 | 
			
		||||
  but needs much other work to handle direct mode:
 | 
			
		||||
  * Generate git commit, without running `git commit`, because it will
 | 
			
		||||
    want to stage the full files. **done**
 | 
			
		||||
  * Update location logs for any files deleted by a commit. **done**
 | 
			
		||||
  * Generate a git merge, without running `git merge` (or possibly running
 | 
			
		||||
    it in a scratch repo?), because it will stumble over the direct files.
 | 
			
		||||
    **done**
 | 
			
		||||
  * Drop contents of files deleted by a merge (including updating the
 | 
			
		||||
    location log), or if we cannot drop,
 | 
			
		||||
    move their contents to `.git/annex/objects/`.  **no**  .. instead, 
 | 
			
		||||
    avoid ever losing file contents in a direct mode merge. If the file is
 | 
			
		||||
    deleted, its content is moved back to .git/annex/objects, if necessary.
 | 
			
		||||
  * When a merge adds a symlink pointing at a key that is present in the
 | 
			
		||||
    repo, replace the symlink with the direct file (either moving out
 | 
			
		||||
    of `.git/annex/objects/` or hard-linking if the same key is present
 | 
			
		||||
    elsewhere in the tree. **done**
 | 
			
		||||
  * handle merge conflicts on direct mode files **done**
 | 
			
		||||
* support direct mode in the assistant (many little fixes)
 | 
			
		||||
  the data, the remote fscks the data. This is not optimal, but avoids
 | 
			
		||||
  needing another round-trip, or a protocol change.
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -76,7 +76,8 @@ to git-annex-shell are:
 | 
			
		|||
  past versions of git-annex-shell (that ignore these, but would choke
 | 
			
		||||
  on new dashed options).
 | 
			
		||||
 | 
			
		||||
  Currently used fields include remoteuuid= and associatedfile=
 | 
			
		||||
  Currently used fields include remoteuuid=, associatedfile=,
 | 
			
		||||
  and direct=
 | 
			
		||||
 | 
			
		||||
# HOOK
 | 
			
		||||
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue