add TransferScanner thread

Efficiently finding transfers that need to be done to get two repos back
in sync seems like an interesting problem.
This commit is contained in:
Joey Hess 2012-07-22 23:16:56 -04:00
parent 26e4e65307
commit 522f568450
6 changed files with 138 additions and 43 deletions

View file

@ -36,8 +36,7 @@
- inotify threads associated with it, too.) - inotify threads associated with it, too.)
- Thread 9: transfer watcher - Thread 9: transfer watcher
- Watches for transfer information files being created and removed, - Watches for transfer information files being created and removed,
- and maintains the DaemonStatus currentTransfers map and the - and maintains the DaemonStatus currentTransfers map.
- TransferSlots QSemN.
- (This uses inotify on .git/annex/transfer/, so there are - (This uses inotify on .git/annex/transfer/, so there are
- additional inotify threads associated with it, too.) - additional inotify threads associated with it, too.)
- Thread 10: transferrer - Thread 10: transferrer
@ -49,8 +48,14 @@
- Thread 13: mount watcher - Thread 13: mount watcher
- Either uses dbus to watch for drive mount events, or, when - Either uses dbus to watch for drive mount events, or, when
- there's no dbus, polls to find newly mounted filesystems. - there's no dbus, polls to find newly mounted filesystems.
- Once a filesystem that contains a remote is mounted, syncs - Once a filesystem that contains a remote is mounted, updates
- with it. - state about that remote, pulls from it, and queues a push to it,
- as well as an update, and queues it onto the
- ConnectedRemoteChan
- Thread 14: transfer scanner
- Does potentially expensive checks to find data that needs to be
- transferred from or to remotes, and queues Transfers.
- Uses the ScanRemotes map.
- -
- ThreadState: (MVar) - ThreadState: (MVar)
- The Annex state is stored here, which allows resuscitating the - The Annex state is stored here, which allows resuscitating the
@ -78,6 +83,9 @@
- to block until a slot is available. - to block until a slot is available.
- This MVar should only be manipulated from inside the Annex monad, - This MVar should only be manipulated from inside the Annex monad,
- which ensures it's accessed only after the ThreadState MVar. - which ensures it's accessed only after the ThreadState MVar.
- ScanRemotes (STM TMVar)
- Remotes that have been disconnected, and should be scanned
- are indicated by writing to this TMVar.
-} -}
module Assistant where module Assistant where
@ -88,6 +96,7 @@ import Assistant.DaemonStatus
import Assistant.Changes import Assistant.Changes
import Assistant.Commits import Assistant.Commits
import Assistant.Pushes import Assistant.Pushes
import Assistant.ScanRemotes
import Assistant.TransferQueue import Assistant.TransferQueue
import Assistant.TransferSlots import Assistant.TransferSlots
import Assistant.Threads.Watcher import Assistant.Threads.Watcher
@ -98,6 +107,7 @@ import Assistant.Threads.TransferWatcher
import Assistant.Threads.Transferrer import Assistant.Threads.Transferrer
import Assistant.Threads.SanityChecker import Assistant.Threads.SanityChecker
import Assistant.Threads.MountWatcher import Assistant.Threads.MountWatcher
import Assistant.Threads.TransferScanner
import qualified Utility.Daemon import qualified Utility.Daemon
import Utility.LogFile import Utility.LogFile
import Utility.ThreadScheduler import Utility.ThreadScheduler
@ -124,6 +134,7 @@ startDaemon assistant foreground
pushmap <- newFailedPushMap pushmap <- newFailedPushMap
transferqueue <- newTransferQueue transferqueue <- newTransferQueue
transferslots <- newTransferSlots transferslots <- newTransferSlots
scanremotes <- newScanRemoteMap
mapM_ forkIO mapM_ forkIO
[ commitThread st changechan commitchan transferqueue dstatus [ commitThread st changechan commitchan transferqueue dstatus
, pushThread st dstatus commitchan pushmap , pushThread st dstatus commitchan pushmap
@ -133,7 +144,8 @@ startDaemon assistant foreground
, transfererThread st dstatus transferqueue transferslots , transfererThread st dstatus transferqueue transferslots
, daemonStatusThread st dstatus , daemonStatusThread st dstatus
, sanityCheckerThread st dstatus transferqueue changechan , sanityCheckerThread st dstatus transferqueue changechan
, mountWatcherThread st dstatus , mountWatcherThread st dstatus scanremotes
, transferScannerThread st scanremotes transferqueue
, watchThread st dstatus transferqueue changechan , watchThread st dstatus transferqueue changechan
] ]
debug "assistant" debug "assistant"

41
Assistant/ScanRemotes.hs Normal file
View file

@ -0,0 +1,41 @@
{- git-annex assistant remotes needing scanning
-
- Copyright 2012 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Assistant.ScanRemotes where
import Common.Annex
import Data.Function
import Control.Concurrent.STM
import Data.Time.Clock
import qualified Data.Map as M
type ScanRemoteMap = TMVar (M.Map Remote UTCTime)
{- The TMVar starts empty, and is left empty when there are no remotes
- to scan. -}
newScanRemoteMap :: IO ScanRemoteMap
newScanRemoteMap = atomically newEmptyTMVar
{- Blocks until there is a remote that needs to be scanned.
- Processes remotes added most recently first. -}
getScanRemote :: ScanRemoteMap -> IO Remote
getScanRemote v = atomically $ do
m <- takeTMVar v
let newest = Prelude.head $ reverse $
map fst $ sortBy (compare `on` snd) $ M.toList m
putTMVar v $ M.delete newest m
return newest
{- Adds new remotes that need scanning to the map. -}
addScanRemotes :: ScanRemoteMap -> [Remote] -> IO ()
addScanRemotes _ [] = return ()
addScanRemotes v rs = do
now <- getCurrentTime
atomically $ do
m <- fromMaybe M.empty <$> tryTakeTMVar v
putTMVar v $ foldr (`M.insert` now) m rs

View file

@ -13,6 +13,8 @@ module Assistant.Threads.MountWatcher where
import Assistant.Common import Assistant.Common
import Assistant.ThreadedMonad import Assistant.ThreadedMonad
import Assistant.DaemonStatus import Assistant.DaemonStatus
import Assistant.ScanRemotes
import Assistant.Threads.Pusher (pushToRemotes)
import qualified Annex import qualified Annex
import qualified Git import qualified Git
import Utility.ThreadScheduler import Utility.ThreadScheduler
@ -27,6 +29,7 @@ import Logs.Remote
import Control.Concurrent import Control.Concurrent
import qualified Control.Exception as E import qualified Control.Exception as E
import qualified Data.Set as S import qualified Data.Set as S
import Data.Time.Clock
#if WITH_DBUS #if WITH_DBUS
import DBus.Client import DBus.Client
@ -39,18 +42,18 @@ import Data.Word (Word32)
thisThread :: ThreadName thisThread :: ThreadName
thisThread = "MountWatcher" thisThread = "MountWatcher"
mountWatcherThread :: ThreadState -> DaemonStatusHandle -> IO () mountWatcherThread :: ThreadState -> DaemonStatusHandle -> ScanRemoteMap -> IO ()
mountWatcherThread st handle = mountWatcherThread st handle scanremotes =
#if WITH_DBUS #if WITH_DBUS
dbusThread st handle dbusThread st handle scanremotes
#else #else
pollingThread st handle pollingThread st handle scanremotes
#endif #endif
#if WITH_DBUS #if WITH_DBUS
dbusThread :: ThreadState -> DaemonStatusHandle -> IO () dbusThread :: ThreadState -> DaemonStatusHandle -> ScanRemoteMap -> IO ()
dbusThread st dstatus = E.catch (go =<< connectSession) onerr dbusThread st dstatus scanremotes = E.catch (go =<< connectSession) onerr
where where
go client = ifM (checkMountMonitor client) go client = ifM (checkMountMonitor client)
( do ( do
@ -63,7 +66,7 @@ dbusThread st dstatus = E.catch (go =<< connectSession) onerr
listen client matcher $ \_event -> do listen client matcher $ \_event -> do
nowmounted <- currentMountPoints nowmounted <- currentMountPoints
wasmounted <- swapMVar mvar nowmounted wasmounted <- swapMVar mvar nowmounted
handleMounts st dstatus wasmounted nowmounted handleMounts st dstatus scanremotes wasmounted nowmounted
, do , do
runThreadState st $ runThreadState st $
warning "No known volume monitor available through dbus; falling back to mtab polling" warning "No known volume monitor available through dbus; falling back to mtab polling"
@ -74,7 +77,7 @@ dbusThread st dstatus = E.catch (go =<< connectSession) onerr
runThreadState st $ runThreadState st $
warning $ "Failed to use dbus; falling back to mtab polling (" ++ show e ++ ")" warning $ "Failed to use dbus; falling back to mtab polling (" ++ show e ++ ")"
pollinstead pollinstead
pollinstead = pollingThread st dstatus pollinstead = pollingThread st dstatus scanremotes
type ServiceName = String type ServiceName = String
@ -140,30 +143,32 @@ mountAdded = [gvfs, kde]
#endif #endif
pollingThread :: ThreadState -> DaemonStatusHandle -> IO () pollingThread :: ThreadState -> DaemonStatusHandle -> ScanRemoteMap -> IO ()
pollingThread st dstatus = go =<< currentMountPoints pollingThread st dstatus scanremotes = go =<< currentMountPoints
where where
go wasmounted = do go wasmounted = do
threadDelaySeconds (Seconds 10) threadDelaySeconds (Seconds 10)
nowmounted <- currentMountPoints nowmounted <- currentMountPoints
handleMounts st dstatus wasmounted nowmounted handleMounts st dstatus scanremotes wasmounted nowmounted
go nowmounted go nowmounted
handleMounts :: ThreadState -> DaemonStatusHandle -> MountPoints -> MountPoints -> IO () handleMounts :: ThreadState -> DaemonStatusHandle -> ScanRemoteMap -> MountPoints -> MountPoints -> IO ()
handleMounts st dstatus wasmounted nowmounted = mapM_ (handleMount st dstatus) $ handleMounts st dstatus scanremotes wasmounted nowmounted = mapM_ (handleMount st dstatus scanremotes) $
S.toList $ newMountPoints wasmounted nowmounted S.toList $ newMountPoints wasmounted nowmounted
handleMount :: ThreadState -> DaemonStatusHandle -> Mntent -> IO () handleMount :: ThreadState -> DaemonStatusHandle -> ScanRemoteMap -> Mntent -> IO ()
handleMount st dstatus mntent = do handleMount st dstatus scanremotes mntent = do
debug thisThread ["detected mount of", mnt_dir mntent] debug thisThread ["detected mount of", mnt_dir mntent]
rs <- remotesUnder st dstatus mntent rs <- remotesUnder st dstatus mntent
unless (null rs) $ do unless (null rs) $ do
branch <- runThreadState st $ Command.Sync.currentBranch branch <- runThreadState st $ Command.Sync.currentBranch
let pullrs = filter Git.repoIsLocal rs let nonspecial = filter (Git.repoIsLocal . Remote.repo) rs
debug thisThread ["pulling from", show pullrs] unless (null nonspecial) $ do
runThreadState st $ manualPull branch pullrs debug thisThread ["pulling from", show nonspecial]
-- TODO queue transfers for new files in both directions runThreadState st $ manualPull branch nonspecial
where now <- getCurrentTime
pushToRemotes thisThread now st Nothing nonspecial
addScanRemotes scanremotes rs
{- Finds remotes located underneath the mount point. {- Finds remotes located underneath the mount point.
- -

View file

@ -1,4 +1,4 @@
{- git-annex assistant git pushing threads {- git-annex assistant git pushing thread
- -
- Copyright 2012 Joey Hess <joey@kitenet.net> - Copyright 2012 Joey Hess <joey@kitenet.net>
- -
@ -36,7 +36,7 @@ pushRetryThread st pushmap = runEvery (Seconds halfhour) $ do
, "failed pushes" , "failed pushes"
] ]
now <- getCurrentTime now <- getCurrentTime
pushToRemotes now st pushmap topush pushToRemotes thisThread now st (Just pushmap) topush
where where
halfhour = 1800 halfhour = 1800
@ -53,7 +53,7 @@ pushThread st daemonstatus commitchan pushmap = do
then do then do
remotes <- runThreadState st $ remotes <- runThreadState st $
knownRemotes <$> getDaemonStatus daemonstatus knownRemotes <$> getDaemonStatus daemonstatus
pushToRemotes now st pushmap remotes pushToRemotes thisThread now st (Just pushmap) remotes
else do else do
debug thisThread debug thisThread
[ "delaying push of" [ "delaying push of"
@ -78,24 +78,27 @@ shouldPush _now commits
- -
- Avoids running possibly long-duration commands in the Annex monad, so - Avoids running possibly long-duration commands in the Annex monad, so
- as not to block other threads. -} - as not to block other threads. -}
pushToRemotes :: UTCTime -> ThreadState -> FailedPushMap -> [Remote] -> IO () pushToRemotes :: ThreadName -> UTCTime -> ThreadState -> (Maybe FailedPushMap) -> [Remote] -> IO ()
pushToRemotes now st pushmap remotes = do pushToRemotes threadname now st mpushmap remotes = do
(g, branch) <- runThreadState st $ (g, branch) <- runThreadState st $
(,) <$> fromRepo id <*> Command.Sync.currentBranch (,) <$> fromRepo id <*> Command.Sync.currentBranch
go True branch g remotes go True branch g remotes
where where
go shouldretry branch g rs = do go shouldretry branch g rs = do
debug thisThread debug threadname
[ "pushing to" [ "pushing to"
, show rs , show rs
] ]
Command.Sync.updateBranch (Command.Sync.syncBranch branch) g Command.Sync.updateBranch (Command.Sync.syncBranch branch) g
(succeeded, failed) <- inParallel (push g branch) rs (succeeded, failed) <- inParallel (push g branch) rs
case mpushmap of
Nothing -> noop
Just pushmap ->
changeFailedPushMap pushmap $ \m -> changeFailedPushMap pushmap $ \m ->
M.union (makemap failed) $ M.union (makemap failed) $
M.difference m (makemap succeeded) M.difference m (makemap succeeded)
unless (null failed) $ unless (null failed) $
debug thisThread debug threadname
[ "failed to push to" [ "failed to push to"
, show failed , show failed
] ]
@ -109,6 +112,6 @@ pushToRemotes now st pushmap remotes = do
( exitSuccess, exitFailure) ( exitSuccess, exitFailure)
retry branch g rs = do retry branch g rs = do
debug thisThread [ "trying manual pull to resolve failed pushes" ] debug threadname [ "trying manual pull to resolve failed pushes" ]
runThreadState st $ manualPull branch rs runThreadState st $ manualPull branch rs
go False branch g rs go False branch g rs

View file

@ -0,0 +1,34 @@
{- git-annex assistant thread to scan remotes to find needed transfers
-
- Copyright 2012 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Assistant.Threads.TransferScanner where
import Assistant.Common
import Assistant.ScanRemotes
import Assistant.TransferQueue
import Assistant.ThreadedMonad
import Logs.Transfer
import Types.Remote
import Utility.ThreadScheduler
thisThread :: ThreadName
thisThread = "TransferScanner"
{- This thread scans remotes, to find transfers that need to be made to
- keep their data in sync. The transfers are queued with lot priority. -}
transferScannerThread :: ThreadState -> ScanRemoteMap -> TransferQueue -> IO ()
transferScannerThread st scanremotes transferqueue = do
runEvery (Seconds 2) $ do
r <- getScanRemote scanremotes
needtransfer <- scan st r
forM_ needtransfer $ \(f, t) ->
queueLaterTransfer transferqueue f t
scan :: ThreadState -> Remote -> IO [(AssociatedFile, Transfer)]
scan st r = do
debug thisThread ["scanning", show r]
return [] -- TODO

View file

@ -38,19 +38,19 @@ queueTransfers q daemonstatus k f direction = do
mapM_ (\r -> queue r $ gentransfer r) mapM_ (\r -> queue r $ gentransfer r)
=<< sufficientremotes rs =<< sufficientremotes rs
where where
sufficientremotes l sufficientremotes rs
-- Queue downloads from all remotes that -- Queue downloads from all remotes that
-- have the key, with the cheapest ones first. -- have the key, with the cheapest ones first.
-- More expensive ones will only be tried if -- More expensive ones will only be tried if
-- downloading from a cheap one fails. -- downloading from a cheap one fails.
| direction == Download = do | direction == Download = do
uuids <- Remote.keyLocations k uuids <- Remote.keyLocations k
return $ filter (\r -> uuid r `elem` uuids) l return $ filter (\r -> uuid r `elem` uuids) rs
-- TODO: Determine a smaller set of remotes that -- TODO: Determine a smaller set of remotes that
-- can be uploaded to, in order to ensure all -- can be uploaded to, in order to ensure all
-- remotes can access the content. Currently, -- remotes can access the content. Currently,
-- send to every remote we can. -- send to every remote we can.
| otherwise = return l | otherwise = return rs
gentransfer r = Transfer gentransfer r = Transfer
{ transferDirection = direction { transferDirection = direction
, transferKey = k , transferKey = k
@ -60,12 +60,12 @@ queueTransfers q daemonstatus k f direction = do
let info = (stubInfo f) { transferRemote = Just r } let info = (stubInfo f) { transferRemote = Just r }
writeTChan q (t, info) writeTChan q (t, info)
{- Adds a pending transfer to the end of the queue. -} {- Adds a transfer to the end of the queue, to be processed later. -}
queueTransfer :: TransferQueue -> AssociatedFile -> Transfer -> IO () queueLaterTransfer :: TransferQueue -> AssociatedFile -> Transfer -> IO ()
queueTransfer q f t = void $ atomically $ queueLaterTransfer q f t = void $ atomically $
writeTChan q (t, stubInfo f) writeTChan q (t, stubInfo f)
{- Adds a pending transfer to the start of the queue, to be processed next. -} {- Adds a transfer to the start of the queue, to be processed next. -}
queueNextTransfer :: TransferQueue -> AssociatedFile -> Transfer -> IO () queueNextTransfer :: TransferQueue -> AssociatedFile -> Transfer -> IO ()
queueNextTransfer q f t = void $ atomically $ queueNextTransfer q f t = void $ atomically $
unGetTChan q (t, stubInfo f) unGetTChan q (t, stubInfo f)