scan multiple remotes in one pass

The expensive transfer scan now scans a whole set of remotes in one pass.
So at startup, or when network comes up, it will run only once.

Note that this can result in transfers from/to higher cost remotes being
queued before other transfers of other content from/to lower cost remotes.
Before, low cost remotes were scanned first and all their transfers came
first. When multiple transfers are queued for a key, the lower cost ones
are still queued first. However, this could result in transfers from slow
remotes running for a long time while transfers of other data from faster
remotes waits.

I expect to make the transfer queue smarter about ordering
and/or make it allow multiple transfers at a time, which should eliminate
this annoyance. (Also, it was already possible to get into that situation,
for example if the network was up, lots of transfers from slow remotes
might be queued, and then a disk is mounted and its faster transfers have
to wait.)

Also note that this means I don't need to improve the code in
Assistant.Sync that currently checks if any of the reconnected remotes
have diverged, and if so, queues scans of all of them. That had been very
innefficient, but now doesn't matter.
This commit is contained in:
Joey Hess 2012-08-26 14:01:43 -04:00
parent c9b3b8829d
commit ae52efc673
4 changed files with 41 additions and 47 deletions

View file

@ -214,10 +214,10 @@ syncAlert rs = baseActivityAlert
, alertPriority = Low
}
scanAlert :: Remote -> Alert
scanAlert r = baseActivityAlert
scanAlert :: [Remote] -> Alert
scanAlert rs = baseActivityAlert
{ alertHeader = Just $ tenseWords
[Tensed "Scanning" "Scanned", showRemotes [r]]
[Tensed "Scanning" "Scanned", showRemotes rs]
, alertBlockDisplay = True
, alertPriority = Low
}

View file

@ -26,21 +26,15 @@ type ScanRemoteMap = TMVar (M.Map Remote ScanInfo)
newScanRemoteMap :: IO ScanRemoteMap
newScanRemoteMap = atomically newEmptyTMVar
{- Blocks until there is a remote that needs to be scanned.
- Processes higher priority remotes first. -}
getScanRemote :: ScanRemoteMap -> IO (Remote, ScanInfo)
getScanRemote v = atomically $ do
m <- takeTMVar v
let l = reverse $ sortBy (compare `on` scanPriority . snd) $ M.toList m
case l of
[] -> retry -- should never happen
(ret@(r, _):_) -> do
let m' = M.delete r m
unless (M.null m') $
putTMVar v m'
return ret
{- Blocks until there is a remote or remotes that need to be scanned.
-
- The list has higher priority remotes listed first. -}
getScanRemote :: ScanRemoteMap -> IO [(Remote, ScanInfo)]
getScanRemote v = atomically $
reverse . sortBy (compare `on` scanPriority . snd) . M.toList
<$> takeTMVar v
{- Adds new remotes that need scanning to the map. -}
{- Adds new remotes that need scanning. -}
addScanRemotes :: ScanRemoteMap -> Bool -> [Remote] -> IO ()
addScanRemotes _ _ [] = noop
addScanRemotes v full rs = atomically $ do

View file

@ -28,8 +28,8 @@ import qualified Data.Map as M
-
- First gets git in sync, and then prepares any necessary file transfers.
-
- An expensive full scan is queued when the git-annex branches of the
- remotes have diverged from the local git-annex branch. Otherwise,
- An expensive full scan is queued when the git-annex branches of some of
- the remotes have diverged from the local git-annex branch. Otherwise,
- it's sufficient to requeue failed transfers.
-}
reconnectRemotes :: ThreadName -> ThreadState -> DaemonStatusHandle -> ScanRemoteMap -> [Remote] -> IO ()

View file

@ -36,13 +36,13 @@ transferScannerThread st dstatus scanremotes transferqueue = do
where
go scanned = do
threadDelaySeconds (Seconds 2)
(r, info) <- getScanRemote scanremotes
if fullScan info || not (S.member r scanned)
(rs, infos) <- unzip <$> getScanRemote scanremotes
if any fullScan infos || any (`S.notMember` scanned) rs
then do
expensiveScan st dstatus transferqueue r
go (S.insert r scanned)
expensiveScan st dstatus transferqueue rs
go (S.union scanned (S.fromList rs))
else do
failedTransferScan st dstatus transferqueue r
mapM_ (failedTransferScan st dstatus transferqueue) rs
go scanned
{- All available remotes are scanned in full on startup,
- for multiple reasons, including:
@ -92,39 +92,39 @@ failedTransferScan st dstatus transferqueue r = do
dequeue t = void $ runThreadState st $ inRepo $
liftIO . tryIO . removeFile . failedTransferFile t
{- This is a expensive scan through the full git work tree.
{- This is a expensive scan through the full git work tree, finding
- files to download from or upload to any of the remotes.
-
- The scan is blocked when the transfer queue gets too large. -}
expensiveScan :: ThreadState -> DaemonStatusHandle -> TransferQueue -> Remote -> IO ()
expensiveScan st dstatus transferqueue r = do
liftIO $ debug thisThread ["starting scan of", show r]
void $ alertWhile dstatus (scanAlert r) $ do
expensiveScan :: ThreadState -> DaemonStatusHandle -> TransferQueue -> [Remote] -> IO ()
expensiveScan st dstatus transferqueue rs = do
liftIO $ debug thisThread ["starting scan of", show rs]
void $ alertWhile dstatus (scanAlert rs) $ do
g <- runThreadState st $ fromRepo id
files <- LsFiles.inRepo [] g
go files
return True
liftIO $ debug thisThread ["finished scan of", show r]
liftIO $ debug thisThread ["finished scan of", show rs]
where
go [] = noop
go (f:fs) = do
v <- runThreadState st $ whenAnnexed check f
case v of
Nothing -> noop
Just t -> do
debug thisThread ["queuing", show t]
enqueue f t
mapM_ (enqueue f) =<< catMaybes <$> runThreadState st
(ifAnnexed f findtransfers $ return [])
go fs
where
check _ (key, _) = ifM (inAnnex key)
( helper key Upload False =<< remoteHas r key
, helper key Download True =<< remoteHas r key
)
helper key direction x y
| x == y = return $ Just $
Transfer direction (Remote.uuid r) key
| otherwise = return Nothing
enqueue f t = queueTransferWhenSmall transferqueue dstatus (Just f) t r
enqueue f (r, t) = do
debug thisThread ["queuing", show t]
queueTransferWhenSmall transferqueue dstatus (Just f) t r
findtransfers (key, _) = do
locs <- loggedLocations key
let use a = return $ map (a key locs) rs
ifM (inAnnex key)
( use $ check Upload False
, use $ check Download True
)
check direction want key locs r
| (Remote.uuid r `elem` locs) == want = Just $
(r, Transfer direction (Remote.uuid r) key)
| otherwise = Nothing
remoteHas :: Remote -> Key -> Annex Bool
remoteHas r key = elem