git-annex/Assistant/TransferrerPool.hs

{- A pool of "git-annex transferkeys" processes
 -
 - Copyright 2013 Joey Hess <id@joeyh.name>
 -
 - Licensed under the GNU GPL version 3 or higher.
 -}

module Assistant.TransferrerPool where

import Assistant.Common
import Assistant.Types.TransferrerPool
import Types.Transfer
import Utility.Batch

import qualified Command.TransferKeys as T

import Control.Concurrent.STM hiding (check)
import Control.Exception (throw)
import Control.Concurrent

{- Runs an action with a Transferrer from the pool.
 -
 - Only one Transferrer is left running in the pool at a time.
 - So if this needed to start a new Transferrer, it's stopped when done.
 -}
withTransferrer :: FilePath -> BatchCommandMaker -> TransferrerPool -> (Transferrer -> IO a) -> IO a
withTransferrer program batchmaker pool a = do
	(mi, leftinpool) <- atomically (popTransferrerPool pool)
	i@(TransferrerPoolItem (Just t) check) <- case mi of
		Nothing -> mkTransferrerPoolItem pool =<< mkTransferrer program batchmaker
		Just i -> checkTransferrerPoolItem program batchmaker i
	v <- tryNonAsync $ a t
	if leftinpool == 0
		then atomically $ pushTransferrerPool pool i
		else do
			void $ forkIO $ stopTransferrer t
			atomically $ pushTransferrerPool pool $ TransferrerPoolItem Nothing check
	either throw return v

{- Check if a Transferrer from the pool is still ok to be used.
 - If not, stop it and start a new one. -}
checkTransferrerPoolItem :: FilePath -> BatchCommandMaker -> TransferrerPoolItem -> IO TransferrerPoolItem
checkTransferrerPoolItem program batchmaker i = case i of
	TransferrerPoolItem (Just t) check -> ifM check
		( return i
		, do
			stopTransferrer t
			new check
		)
	TransferrerPoolItem Nothing check -> new check
  where
	new check = do
		t <- mkTransferrer program batchmaker
		return $ TransferrerPoolItem (Just t) check

{- Requests that a Transferrer perform a Transfer, and waits for it to
 - finish. -}
performTransfer :: Transferrer -> Transfer -> TransferInfo -> IO Bool
performTransfer transferrer t info = catchBoolIO $ do
	T.sendRequest t info (transferrerWrite transferrer)
	T.readResponse (transferrerRead transferrer)

{- Starts a new git-annex transferkeys process, setting up handles
 - that will be used to communicate with it. -}
mkTransferrer :: FilePath -> BatchCommandMaker -> IO Transferrer
mkTransferrer program batchmaker = do
	{- It runs as a batch job. -}
	let (program', params') = batchmaker (program, [Param "transferkeys"])
	{- It's put into its own group so that the whole group can be
	 - killed to stop a transfer. -}
	(Just writeh, Just readh, _, pid) <- createProcess
		(proc program' $ toCommand params')
		{ create_group = True
		, std_in = CreatePipe
		, std_out = CreatePipe
		}
	fileEncoding readh
	fileEncoding writeh
	return $ Transferrer
		{ transferrerRead = readh
		, transferrerWrite = writeh
		, transferrerHandle = pid
		}

{- Checks if a Transferrer is still running. If not, makes a new one. -}
checkTransferrer :: FilePath -> BatchCommandMaker -> Transferrer -> IO Transferrer
checkTransferrer program batchmaker t =
	maybe (return t) (const $ mkTransferrer program batchmaker)
		=<< getProcessExitCode (transferrerHandle t)

{- Closing the fds will stop the transferrer. -}
stopTransferrer :: Transferrer -> IO ()
stopTransferrer t = do
	hClose $ transferrerRead t
	hClose $ transferrerWrite t
	void $ waitForProcess $ transferrerHandle t
maintain pools of running transferkeys processes (untested) 2013-03-19 22:46:29 +00:00			`{- A pool of "git-annex transferkeys" processes`
			`-`
update my email address and homepage url 2015-01-21 16:50:09 +00:00			`- Copyright 2013 Joey Hess <id@joeyh.name>`
maintain pools of running transferkeys processes (untested) 2013-03-19 22:46:29 +00:00			`-`
			`- Licensed under the GNU GPL version 3 or higher.`
			`-}`

			`module Assistant.TransferrerPool where`

			`import Assistant.Common`
			`import Assistant.Types.TransferrerPool`
get, move, copy, mirror: Added --failed switch which retries failed copies/moves Note that get --from foo --failed will get things that a previous get --from bar tried and failed to get, etc. I considered making --failed only retry transfers from the same remote, but it was easier, and seems more useful, to not have the same remote requirement. Noisy due to some refactoring into Types/ 2016-08-03 16:37:12 +00:00			`import Types.Transfer`
assistant: Run transferkeys as batch jobs. 2013-12-01 18:56:37 +00:00			`import Utility.Batch`
build assistant and watcher on windows (doesn't work yet) 2013-11-12 18:54:02 +00:00
maintain pools of running transferkeys processes (untested) 2013-03-19 22:46:29 +00:00			`import qualified Command.TransferKeys as T`

assistant: Start a new git-annex transferkeys process after a network connection change So that remotes that use a persistent network connection are restarted. A remote might keep open a long duration network connection, and could fail to deal well with losing the connection. This is particularly a concern now that we have external special reotes. An external special remote that is implemented naively might open the connection only when PREPARE is sent, and if it loses connection, throw errors on each request that is made. (Note that the ssh connection caching should not have this problem; if the long-duration ssh process loses connection, the named pipe is disconnected and the next ssh attempt will reconnect. Also, XMPP already deals with disconnection robustly in its own way.) There's no way for git-annex to know if a lost network connection actually affects a given remote, which might have a transfer in process. It does not make sense to force kill the transferkeys process every time the NetWatcher detects a change. (Especially because the NetWatcher sometimes polls 1 change per hour.) In any case, the NetWatcher only detects connection to a network, not disconnection. So if a transfer is in progress over the network, and the network goes down, that will need to time out on its own. An alternate approch that was considered is to use a separate transferkeys process for each remote, and detect when a request fails, and assume that means that process is in a failing state and restart it. The problem with that approach is that if a resource is not available and a remote fails every time, it degrades to starting a new transferkeys process for every file transfer, which is too expensive. Instead, this commit only handles the network reconnection case, and restarts transferkeys only once the network has reconnected and another transfer needs to be made. So, a transferkeys process will be reused for 1 hour, or until the next network connection. ---- The NotificationBroadcaster was rewritten to use TMVars rather than MSampleVars, to allow checking without blocking if a notification has been received. ---- This commit was sponsored by Tobias Brunner. 2014-01-06 20:03:39 +00:00			`import Control.Concurrent.STM hiding (check)`
maintain pools of running transferkeys processes (untested) 2013-03-19 22:46:29 +00:00			`import Control.Exception (throw)`
			`import Control.Concurrent`

assistant: Start a new git-annex transferkeys process after a network connection change So that remotes that use a persistent network connection are restarted. A remote might keep open a long duration network connection, and could fail to deal well with losing the connection. This is particularly a concern now that we have external special reotes. An external special remote that is implemented naively might open the connection only when PREPARE is sent, and if it loses connection, throw errors on each request that is made. (Note that the ssh connection caching should not have this problem; if the long-duration ssh process loses connection, the named pipe is disconnected and the next ssh attempt will reconnect. Also, XMPP already deals with disconnection robustly in its own way.) There's no way for git-annex to know if a lost network connection actually affects a given remote, which might have a transfer in process. It does not make sense to force kill the transferkeys process every time the NetWatcher detects a change. (Especially because the NetWatcher sometimes polls 1 change per hour.) In any case, the NetWatcher only detects connection to a network, not disconnection. So if a transfer is in progress over the network, and the network goes down, that will need to time out on its own. An alternate approch that was considered is to use a separate transferkeys process for each remote, and detect when a request fails, and assume that means that process is in a failing state and restart it. The problem with that approach is that if a resource is not available and a remote fails every time, it degrades to starting a new transferkeys process for every file transfer, which is too expensive. Instead, this commit only handles the network reconnection case, and restarts transferkeys only once the network has reconnected and another transfer needs to be made. So, a transferkeys process will be reused for 1 hour, or until the next network connection. ---- The NotificationBroadcaster was rewritten to use TMVars rather than MSampleVars, to allow checking without blocking if a notification has been received. ---- This commit was sponsored by Tobias Brunner. 2014-01-06 20:03:39 +00:00			`{- Runs an action with a Transferrer from the pool.`
			`-`
			`- Only one Transferrer is left running in the pool at a time.`
			`- So if this needed to start a new Transferrer, it's stopped when done.`
			`-}`
avoid repeatedly searching path to make batch command when running transferkeys 2013-12-01 19:37:51 +00:00			`withTransferrer :: FilePath -> BatchCommandMaker -> TransferrerPool -> (Transferrer -> IO a) -> IO a`
			`withTransferrer program batchmaker pool a = do`
tested transferkeys restarting; fix some bugs 2014-01-06 21:07:08 +00:00			`(mi, leftinpool) <- atomically (popTransferrerPool pool)`
			`i@(TransferrerPoolItem (Just t) check) <- case mi of`
			`Nothing -> mkTransferrerPoolItem pool =<< mkTransferrer program batchmaker`
			`Just i -> checkTransferrerPoolItem program batchmaker i`
maintain pools of running transferkeys processes (untested) 2013-03-19 22:46:29 +00:00			`v <- tryNonAsync $ a t`
tested transferkeys restarting; fix some bugs 2014-01-06 21:07:08 +00:00			`if leftinpool == 0`
			`then atomically $ pushTransferrerPool pool i`
			`else do`
			`void $ forkIO $ stopTransferrer t`
			`atomically $ pushTransferrerPool pool $ TransferrerPoolItem Nothing check`
maintain pools of running transferkeys processes (untested) 2013-03-19 22:46:29 +00:00			`either throw return v`
assistant: Start a new git-annex transferkeys process after a network connection change So that remotes that use a persistent network connection are restarted. A remote might keep open a long duration network connection, and could fail to deal well with losing the connection. This is particularly a concern now that we have external special reotes. An external special remote that is implemented naively might open the connection only when PREPARE is sent, and if it loses connection, throw errors on each request that is made. (Note that the ssh connection caching should not have this problem; if the long-duration ssh process loses connection, the named pipe is disconnected and the next ssh attempt will reconnect. Also, XMPP already deals with disconnection robustly in its own way.) There's no way for git-annex to know if a lost network connection actually affects a given remote, which might have a transfer in process. It does not make sense to force kill the transferkeys process every time the NetWatcher detects a change. (Especially because the NetWatcher sometimes polls 1 change per hour.) In any case, the NetWatcher only detects connection to a network, not disconnection. So if a transfer is in progress over the network, and the network goes down, that will need to time out on its own. An alternate approch that was considered is to use a separate transferkeys process for each remote, and detect when a request fails, and assume that means that process is in a failing state and restart it. The problem with that approach is that if a resource is not available and a remote fails every time, it degrades to starting a new transferkeys process for every file transfer, which is too expensive. Instead, this commit only handles the network reconnection case, and restarts transferkeys only once the network has reconnected and another transfer needs to be made. So, a transferkeys process will be reused for 1 hour, or until the next network connection. ---- The NotificationBroadcaster was rewritten to use TMVars rather than MSampleVars, to allow checking without blocking if a notification has been received. ---- This commit was sponsored by Tobias Brunner. 2014-01-06 20:03:39 +00:00
			`{- Check if a Transferrer from the pool is still ok to be used.`
			`- If not, stop it and start a new one. -}`
			`checkTransferrerPoolItem :: FilePath -> BatchCommandMaker -> TransferrerPoolItem -> IO TransferrerPoolItem`
			`checkTransferrerPoolItem program batchmaker i = case i of`
			`TransferrerPoolItem (Just t) check -> ifM check`
			`( return i`
			`, do`
			`stopTransferrer t`
			`new check`
maintain pools of running transferkeys processes (untested) 2013-03-19 22:46:29 +00:00			`)`
assistant: Start a new git-annex transferkeys process after a network connection change So that remotes that use a persistent network connection are restarted. A remote might keep open a long duration network connection, and could fail to deal well with losing the connection. This is particularly a concern now that we have external special reotes. An external special remote that is implemented naively might open the connection only when PREPARE is sent, and if it loses connection, throw errors on each request that is made. (Note that the ssh connection caching should not have this problem; if the long-duration ssh process loses connection, the named pipe is disconnected and the next ssh attempt will reconnect. Also, XMPP already deals with disconnection robustly in its own way.) There's no way for git-annex to know if a lost network connection actually affects a given remote, which might have a transfer in process. It does not make sense to force kill the transferkeys process every time the NetWatcher detects a change. (Especially because the NetWatcher sometimes polls 1 change per hour.) In any case, the NetWatcher only detects connection to a network, not disconnection. So if a transfer is in progress over the network, and the network goes down, that will need to time out on its own. An alternate approch that was considered is to use a separate transferkeys process for each remote, and detect when a request fails, and assume that means that process is in a failing state and restart it. The problem with that approach is that if a resource is not available and a remote fails every time, it degrades to starting a new transferkeys process for every file transfer, which is too expensive. Instead, this commit only handles the network reconnection case, and restarts transferkeys only once the network has reconnected and another transfer needs to be made. So, a transferkeys process will be reused for 1 hour, or until the next network connection. ---- The NotificationBroadcaster was rewritten to use TMVars rather than MSampleVars, to allow checking without blocking if a notification has been received. ---- This commit was sponsored by Tobias Brunner. 2014-01-06 20:03:39 +00:00			`TransferrerPoolItem Nothing check -> new check`
			`where`
			`new check = do`
			`t <- mkTransferrer program batchmaker`
			`return $ TransferrerPoolItem (Just t) check`
maintain pools of running transferkeys processes (untested) 2013-03-19 22:46:29 +00:00
			`{- Requests that a Transferrer perform a Transfer, and waits for it to`
			`- finish. -}`
assistant: When there are multiple remotes giving different ways to access the same repository, honor remote cost settings and use the cheapest available. Note that TransferInfo does not always contain the Remote, although any transfer added to the TransferQueue does have a Remote in its TransferInfo. The transferkeys command still accepts a UUID, which is useful to handle upgrades, where an old assistant version runs the new transferkeys. This commit was sponsored by Kalle Svensson. 2014-05-19 20:19:33 +00:00			`performTransfer :: Transferrer -> Transfer -> TransferInfo -> IO Bool`
			`performTransfer transferrer t info = catchBoolIO $ do`
			`T.sendRequest t info (transferrerWrite transferrer)`
maintain pools of running transferkeys processes (untested) 2013-03-19 22:46:29 +00:00			`T.readResponse (transferrerRead transferrer)`

port transferkeys to windows; make stopping in progress transfers work too (probably) transferkeys had used special FDs for communication, but that would be quite annoying to do in Windows. Instead, use stdin and stdout. But, to avoid commands like rsync stomping on them and messing up the communications channel, they're duplicated to a different handle; stdin is replaced with a null handle, and stdout is replaced with a copy of stderr. This should all work in windows too. Stopping in progress transfers may work on windows.. if the types unify anyway. ;) May need some more porting. 2013-12-11 03:19:18 +00:00			`{- Starts a new git-annex transferkeys process, setting up handles`
maintain pools of running transferkeys processes (untested) 2013-03-19 22:46:29 +00:00			`- that will be used to communicate with it. -}`
avoid repeatedly searching path to make batch command when running transferkeys 2013-12-01 19:37:51 +00:00			`mkTransferrer :: FilePath -> BatchCommandMaker -> IO Transferrer`
			`mkTransferrer program batchmaker = do`
assistant: Run transferkeys as batch jobs. 2013-12-01 18:56:37 +00:00			`{- It runs as a batch job. -}`
port transferkeys to windows; make stopping in progress transfers work too (probably) transferkeys had used special FDs for communication, but that would be quite annoying to do in Windows. Instead, use stdin and stdout. But, to avoid commands like rsync stomping on them and messing up the communications channel, they're duplicated to a different handle; stdin is replaced with a null handle, and stdout is replaced with a copy of stderr. This should all work in windows too. Stopping in progress transfers may work on windows.. if the types unify anyway. ;) May need some more porting. 2013-12-11 03:19:18 +00:00			`let (program', params') = batchmaker (program, [Param "transferkeys"])`
maintain pools of running transferkeys processes (untested) 2013-03-19 22:46:29 +00:00			`{- It's put into its own group so that the whole group can be`
			`- killed to stop a transfer. -}`
port transferkeys to windows; make stopping in progress transfers work too (probably) transferkeys had used special FDs for communication, but that would be quite annoying to do in Windows. Instead, use stdin and stdout. But, to avoid commands like rsync stomping on them and messing up the communications channel, they're duplicated to a different handle; stdin is replaced with a null handle, and stdout is replaced with a copy of stderr. This should all work in windows too. Stopping in progress transfers may work on windows.. if the types unify anyway. ;) May need some more porting. 2013-12-11 03:19:18 +00:00			`(Just writeh, Just readh, _, pid) <- createProcess`
			`(proc program' $ toCommand params')`
			`{ create_group = True`
			`, std_in = CreatePipe`
			`, std_out = CreatePipe`
			`}`
			`fileEncoding readh`
			`fileEncoding writeh`
maintain pools of running transferkeys processes (untested) 2013-03-19 22:46:29 +00:00			`return $ Transferrer`
port transferkeys to windows; make stopping in progress transfers work too (probably) transferkeys had used special FDs for communication, but that would be quite annoying to do in Windows. Instead, use stdin and stdout. But, to avoid commands like rsync stomping on them and messing up the communications channel, they're duplicated to a different handle; stdin is replaced with a null handle, and stdout is replaced with a copy of stderr. This should all work in windows too. Stopping in progress transfers may work on windows.. if the types unify anyway. ;) May need some more porting. 2013-12-11 03:19:18 +00:00			`{ transferrerRead = readh`
			`, transferrerWrite = writeh`
maintain pools of running transferkeys processes (untested) 2013-03-19 22:46:29 +00:00			`, transferrerHandle = pid`
			`}`

			`{- Checks if a Transferrer is still running. If not, makes a new one. -}`
avoid repeatedly searching path to make batch command when running transferkeys 2013-12-01 19:37:51 +00:00			`checkTransferrer :: FilePath -> BatchCommandMaker -> Transferrer -> IO Transferrer`
			`checkTransferrer program batchmaker t =`
			`maybe (return t) (const $ mkTransferrer program batchmaker)`
			`=<< getProcessExitCode (transferrerHandle t)`
maintain pools of running transferkeys processes (untested) 2013-03-19 22:46:29 +00:00
			`{- Closing the fds will stop the transferrer. -}`
			`stopTransferrer :: Transferrer -> IO ()`
			`stopTransferrer t = do`
			`hClose $ transferrerRead t`
			`hClose $ transferrerWrite t`
			`void $ waitForProcess $ transferrerHandle t`