From 3a0cffcfed4e6824b0771ce69f70095a4e3b9917 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 18 Sep 2012 14:10:33 -0400 Subject: [PATCH] when a Download finishes, queue Uploads This ensures file propigate takes place in situations such as: Usb drive A is connected to B. A's master branch is already in sync with B, but it is being used to sneakernet some files around, so B downloads those. There is no master branch change, so C does not request these files. B needs to upload the files it just downloaded on to C, etc. My first try at this, I saw loops happen. B uploaded to C, which then tried to upload back to B (because it had not received the updated git-annex branch from B yet). B already had the file, but it still created a transfer info file from the incoming transfer, and its watcher saw that be removed, and tried to upload back to C. These loops should have been fixed by my previous commit. (They never affected ssh remotes, only local ones, it seemed.) While C might still try to upload to B, or to some other remote that already has the file, the extra work dies out there. --- Assistant.hs | 2 +- Assistant/Threads/TransferWatcher.hs | 34 ++++++++----- Assistant/Threads/Transferrer.hs | 4 ++ doc/design/assistant/syncing.mdwn | 72 +++++++++++++++------------- 4 files changed, 66 insertions(+), 46 deletions(-) diff --git a/Assistant.hs b/Assistant.hs index b0a00ebe85..d1ef129754 100644 --- a/Assistant.hs +++ b/Assistant.hs @@ -186,7 +186,7 @@ startAssistant assistant daemonize webappwaiter = withThreadState $ \st -> do , assist $ pushThread st dstatus commitchan pushmap , assist $ pushRetryThread st dstatus pushmap , assist $ mergeThread st dstatus transferqueue - , assist $ transferWatcherThread st dstatus + , assist $ transferWatcherThread st dstatus transferqueue , assist $ transferPollerThread st dstatus , assist $ transfererThread st dstatus transferqueue transferslots , assist $ daemonStatusThread st dstatus diff --git a/Assistant/Threads/TransferWatcher.hs b/Assistant/Threads/TransferWatcher.hs index 9ae4eb365c..e62e3db3af 100644 --- a/Assistant/Threads/TransferWatcher.hs +++ b/Assistant/Threads/TransferWatcher.hs @@ -10,6 +10,7 @@ module Assistant.Threads.TransferWatcher where import Assistant.Common import Assistant.ThreadedMonad import Assistant.DaemonStatus +import Assistant.TransferQueue import Logs.Transfer import Utility.DirWatcher import Utility.Types.DirWatcher @@ -20,12 +21,12 @@ thisThread = "TransferWatcher" {- This thread watches for changes to the gitAnnexTransferDir, - and updates the DaemonStatus's map of ongoing transfers. -} -transferWatcherThread :: ThreadState -> DaemonStatusHandle -> NamedThread -transferWatcherThread st dstatus = thread $ do +transferWatcherThread :: ThreadState -> DaemonStatusHandle -> TransferQueue -> NamedThread +transferWatcherThread st dstatus transferqueue = thread $ do g <- runThreadState st $ fromRepo id let dir = gitAnnexTransferDir g createDirectoryIfMissing True dir - let hook a = Just $ runHandler st dstatus a + let hook a = Just $ runHandler st dstatus transferqueue a let hooks = mkWatchHooks { addHook = hook onAdd , delHook = hook onDel @@ -36,25 +37,25 @@ transferWatcherThread st dstatus = thread $ do where thread = NamedThread thisThread -type Handler = ThreadState -> DaemonStatusHandle -> FilePath -> Maybe FileStatus -> IO () +type Handler = ThreadState -> DaemonStatusHandle -> TransferQueue -> FilePath -> Maybe FileStatus -> IO () {- Runs an action handler. - - Exceptions are ignored, otherwise a whole thread could be crashed. -} -runHandler :: ThreadState -> DaemonStatusHandle -> Handler -> FilePath -> Maybe FileStatus -> IO () -runHandler st dstatus handler file filestatus = void $ +runHandler :: ThreadState -> DaemonStatusHandle -> TransferQueue -> Handler -> FilePath -> Maybe FileStatus -> IO () +runHandler st dstatus transferqueue handler file filestatus = void $ either print (const noop) =<< tryIO go where - go = handler st dstatus file filestatus + go = handler st dstatus transferqueue file filestatus {- Called when there's an error with inotify. -} onErr :: Handler -onErr _ _ msg _ = error msg +onErr _ _ _ msg _ = error msg {- Called when a new transfer information file is written. -} onAdd :: Handler -onAdd st dstatus file _ = case parseTransferFile file of +onAdd st dstatus _ file _ = case parseTransferFile file of Nothing -> noop Just t -> go t =<< runThreadState st (checkTransfer t) where @@ -72,11 +73,22 @@ onAdd st dstatus file _ = case parseTransferFile file of {- Called when a transfer information file is removed. -} onDel :: Handler -onDel _ dstatus file _ = case parseTransferFile file of +onDel st dstatus transferqueue file _ = case parseTransferFile file of Nothing -> noop Just t -> do debug thisThread [ "transfer finishing:" , show t ] - void $ removeTransfer dstatus t + minfo <- removeTransfer dstatus t + + {- Queue uploads of files we successfully downloaded, + - spreading them out to other reachable remotes. -} + case (minfo, transferDirection t) of + (Just info, Download) -> runThreadState st $ + queueTransfers Later transferqueue dstatus + (transferKey t) + (associatedFile info) + Upload + _ -> noop + diff --git a/Assistant/Threads/Transferrer.hs b/Assistant/Threads/Transferrer.hs index bd73d06d62..8e2b672438 100644 --- a/Assistant/Threads/Transferrer.hs +++ b/Assistant/Threads/Transferrer.hs @@ -63,6 +63,10 @@ startTransfer st dstatus program t info = case (transferRemote info, associatedF (_, _, _, pid) <- createProcess (proc program $ toCommand params) { create_group = True } + {- Alerts are only shown for successful transfers. + - Transfers can temporarily fail for many reasons, + - so there's no point in bothering the user about + - those. The assistant should recover. -} whenM ((==) ExitSuccess <$> waitForProcess pid) $ void $ addAlert dstatus $ makeAlertFiller True $ diff --git a/doc/design/assistant/syncing.mdwn b/doc/design/assistant/syncing.mdwn index 7a445b3538..a7592d84b7 100644 --- a/doc/design/assistant/syncing.mdwn +++ b/doc/design/assistant/syncing.mdwn @@ -10,40 +10,6 @@ all the other git clones, at both the git level and the key/value level. which has prevented me from debugging it. This could possibly have been caused by the bug fixed in 750c4ac6c282d14d19f79e0711f858367da145e4. -* The transfer code doesn't always manage to transfer file contents. - - Besides reconnection events, there are two places where transfers get queued: - - 1. When the committer commits a file, it queues uploads. - 2. When the watcher sees a broken symlink be created, it queues downloads. - - Consider a doubly-linked chain of three repositories, A B and C. - (C and A do not directly communicate.) - - * File is added to A. - * A uploads its content to B. - * At the same time, A git syncs to B. - * Once B gets the git sync, it git syncs to C. - * When C's watcher sees the file appear, it tries to download it. But if - B had not finished receiving the file from A, C doesn't know B has it, - and cannot download it from anywhere. - - Possible solution: After B receives content, it could queue uploads of it - to all remotes that it doesn't know have it yet, which would include C. - - In practice, this has the problem that when C receives the content, - it will queue uploads of it, which can send back to B (or to some other repo - that already has the content) and loop, until the git-annex branches catch - up and break the cycle. - - Possible solution: C could record a deferred download. (Similar to a failed - download, but with an unknown source.) When C next receives a git-annex - branch push, it could try to queue deferred downloads. **done** - - Note that this solution won't cover use cases the other does. For example, - connect a USB drive A; B syncs files from it, and then should pass them to C. - If the files are not new, C won't immediatly request them from B. - ## TODO * Test MountWatcher on LXDE. @@ -250,3 +216,41 @@ redone to check it. if we assume that's synced between existing git remotes, it should be possible for them to do file transfers to/from special remotes. **done** + +* The transfer code doesn't always manage to transfer file contents. + + Besides reconnection events, there are two places where transfers get queued: + + 1. When the committer commits a file, it queues uploads. + 2. When the watcher sees a broken symlink be created, it queues downloads. + + Consider a doubly-linked chain of three repositories, A B and C. + (C and A do not directly communicate.) + + * File is added to A. + * A uploads its content to B. + * At the same time, A git syncs to B. + * Once B gets the git sync, it git syncs to C. + * When C's watcher sees the file appear, it tries to download it. But if + B had not finished receiving the file from A, C doesn't know B has it, + and cannot download it from anywhere. + + Possible solution: After B receives content, it could queue uploads of it + to all remotes that it doesn't know have it yet, which would include C. + **done** + + In practice, this had the problem that when C receives the content, + it will queue uploads of it, which can send back to B (or to some other repo + that already has the content) and loop, until the git-annex branches catch + up and break the cycle. + + To avoid that problem, incoming uploads should not result in a transfer + info file being written when the key is already present. **done** + + Possible solution: C could record a deferred download. (Similar to a failed + download, but with an unknown source.) When C next receives a git-annex + branch push, it could try to queue deferred downloads. **done** + + Note that this solution won't cover use cases the other does. For example, + connect a USB drive A; B syncs files from it, and then should pass them to C. + If the files are not new, C won't immediatly request them from B.