2012-07-23 03:16:56 +00:00
|
|
|
{- git-annex assistant thread to scan remotes to find needed transfers
|
|
|
|
-
|
|
|
|
- Copyright 2012 Joey Hess <joey@kitenet.net>
|
|
|
|
-
|
|
|
|
- Licensed under the GNU GPL version 3 or higher.
|
|
|
|
-}
|
|
|
|
|
|
|
|
module Assistant.Threads.TransferScanner where
|
|
|
|
|
|
|
|
import Assistant.Common
|
|
|
|
import Assistant.ScanRemotes
|
|
|
|
import Assistant.TransferQueue
|
|
|
|
import Assistant.ThreadedMonad
|
2012-07-28 22:47:24 +00:00
|
|
|
import Assistant.DaemonStatus
|
2012-07-29 17:22:08 +00:00
|
|
|
import Assistant.Alert
|
2012-07-23 03:16:56 +00:00
|
|
|
import Logs.Transfer
|
2012-07-25 18:15:09 +00:00
|
|
|
import Logs.Location
|
|
|
|
import qualified Remote
|
2012-07-23 03:16:56 +00:00
|
|
|
import Utility.ThreadScheduler
|
2012-07-25 18:15:09 +00:00
|
|
|
import qualified Git.LsFiles as LsFiles
|
|
|
|
import Command
|
|
|
|
import Annex.Content
|
2012-07-23 03:16:56 +00:00
|
|
|
|
2012-08-24 19:52:23 +00:00
|
|
|
import qualified Data.Set as S
|
|
|
|
|
2012-07-23 03:16:56 +00:00
|
|
|
thisThread :: ThreadName
|
|
|
|
thisThread = "TransferScanner"
|
|
|
|
|
2012-07-25 17:12:34 +00:00
|
|
|
{- This thread waits until a remote needs to be scanned, to find transfers
|
|
|
|
- that need to be made, to keep data in sync.
|
|
|
|
-}
|
2012-07-28 22:47:24 +00:00
|
|
|
transferScannerThread :: ThreadState -> DaemonStatusHandle -> ScanRemoteMap -> TransferQueue -> IO ()
|
|
|
|
transferScannerThread st dstatus scanremotes transferqueue = do
|
2012-08-24 17:46:10 +00:00
|
|
|
startupScan
|
2012-08-24 19:52:23 +00:00
|
|
|
go S.empty
|
2012-08-24 17:46:10 +00:00
|
|
|
where
|
2012-08-24 19:52:23 +00:00
|
|
|
go scanned = do
|
|
|
|
threadDelaySeconds (Seconds 2)
|
scan multiple remotes in one pass
The expensive transfer scan now scans a whole set of remotes in one pass.
So at startup, or when network comes up, it will run only once.
Note that this can result in transfers from/to higher cost remotes being
queued before other transfers of other content from/to lower cost remotes.
Before, low cost remotes were scanned first and all their transfers came
first. When multiple transfers are queued for a key, the lower cost ones
are still queued first. However, this could result in transfers from slow
remotes running for a long time while transfers of other data from faster
remotes waits.
I expect to make the transfer queue smarter about ordering
and/or make it allow multiple transfers at a time, which should eliminate
this annoyance. (Also, it was already possible to get into that situation,
for example if the network was up, lots of transfers from slow remotes
might be queued, and then a disk is mounted and its faster transfers have
to wait.)
Also note that this means I don't need to improve the code in
Assistant.Sync that currently checks if any of the reconnected remotes
have diverged, and if so, queues scans of all of them. That had been very
innefficient, but now doesn't matter.
2012-08-26 18:01:43 +00:00
|
|
|
(rs, infos) <- unzip <$> getScanRemote scanremotes
|
|
|
|
if any fullScan infos || any (`S.notMember` scanned) rs
|
2012-08-24 19:52:23 +00:00
|
|
|
then do
|
scan multiple remotes in one pass
The expensive transfer scan now scans a whole set of remotes in one pass.
So at startup, or when network comes up, it will run only once.
Note that this can result in transfers from/to higher cost remotes being
queued before other transfers of other content from/to lower cost remotes.
Before, low cost remotes were scanned first and all their transfers came
first. When multiple transfers are queued for a key, the lower cost ones
are still queued first. However, this could result in transfers from slow
remotes running for a long time while transfers of other data from faster
remotes waits.
I expect to make the transfer queue smarter about ordering
and/or make it allow multiple transfers at a time, which should eliminate
this annoyance. (Also, it was already possible to get into that situation,
for example if the network was up, lots of transfers from slow remotes
might be queued, and then a disk is mounted and its faster transfers have
to wait.)
Also note that this means I don't need to improve the code in
Assistant.Sync that currently checks if any of the reconnected remotes
have diverged, and if so, queues scans of all of them. That had been very
innefficient, but now doesn't matter.
2012-08-26 18:01:43 +00:00
|
|
|
expensiveScan st dstatus transferqueue rs
|
|
|
|
go (S.union scanned (S.fromList rs))
|
2012-08-24 19:52:23 +00:00
|
|
|
else do
|
scan multiple remotes in one pass
The expensive transfer scan now scans a whole set of remotes in one pass.
So at startup, or when network comes up, it will run only once.
Note that this can result in transfers from/to higher cost remotes being
queued before other transfers of other content from/to lower cost remotes.
Before, low cost remotes were scanned first and all their transfers came
first. When multiple transfers are queued for a key, the lower cost ones
are still queued first. However, this could result in transfers from slow
remotes running for a long time while transfers of other data from faster
remotes waits.
I expect to make the transfer queue smarter about ordering
and/or make it allow multiple transfers at a time, which should eliminate
this annoyance. (Also, it was already possible to get into that situation,
for example if the network was up, lots of transfers from slow remotes
might be queued, and then a disk is mounted and its faster transfers have
to wait.)
Also note that this means I don't need to improve the code in
Assistant.Sync that currently checks if any of the reconnected remotes
have diverged, and if so, queues scans of all of them. That had been very
innefficient, but now doesn't matter.
2012-08-26 18:01:43 +00:00
|
|
|
mapM_ (failedTransferScan st dstatus transferqueue) rs
|
2012-08-24 19:52:23 +00:00
|
|
|
go scanned
|
|
|
|
{- All available remotes are scanned in full on startup,
|
|
|
|
- for multiple reasons, including:
|
2012-08-24 17:46:10 +00:00
|
|
|
-
|
|
|
|
- * This may be the first run, and there may be remotes
|
|
|
|
- already in place, that need to be synced.
|
|
|
|
- * We may have run before, and scanned a remote, but
|
|
|
|
- only been in a subdirectory of the git remote, and so
|
|
|
|
- not synced it all.
|
|
|
|
- * We may have run before, and had transfers queued,
|
|
|
|
- and then the system (or us) crashed, and that info was
|
|
|
|
- lost.
|
|
|
|
-}
|
2012-08-24 19:52:23 +00:00
|
|
|
startupScan = addScanRemotes scanremotes True
|
|
|
|
=<< knownRemotes <$> getDaemonStatus dstatus
|
2012-07-23 03:16:56 +00:00
|
|
|
|
2012-08-23 19:22:23 +00:00
|
|
|
{- This is a cheap scan for failed transfers involving a remote. -}
|
|
|
|
failedTransferScan :: ThreadState -> DaemonStatusHandle -> TransferQueue -> Remote -> IO ()
|
|
|
|
failedTransferScan st dstatus transferqueue r = do
|
|
|
|
ts <- runThreadState st $
|
|
|
|
getFailedTransfers $ Remote.uuid r
|
|
|
|
go ts
|
|
|
|
where
|
|
|
|
go [] = noop
|
2012-08-24 17:04:28 +00:00
|
|
|
go ((t, info):ts)
|
|
|
|
| transferDirection t == Download = do
|
|
|
|
{- Check if the remote still has the key.
|
|
|
|
- If not, relies on the expensiveScan to
|
|
|
|
- get it queued from some other remote. -}
|
|
|
|
ifM (runThreadState st $ remoteHas r $ transferKey t)
|
|
|
|
( requeue t info
|
|
|
|
, dequeue t
|
|
|
|
)
|
|
|
|
go ts
|
|
|
|
| otherwise = do
|
|
|
|
{- The Transferrer checks when uploading
|
|
|
|
- that the remote doesn't already have the
|
|
|
|
- key, so it's not redundantly checked
|
|
|
|
- here. -}
|
|
|
|
requeue t info
|
|
|
|
go ts
|
|
|
|
|
|
|
|
requeue t info = do
|
2012-08-23 19:22:23 +00:00
|
|
|
queueTransferWhenSmall
|
|
|
|
transferqueue dstatus (associatedFile info) t r
|
2012-08-24 17:04:28 +00:00
|
|
|
dequeue t
|
|
|
|
dequeue t = void $ runThreadState st $ inRepo $
|
|
|
|
liftIO . tryIO . removeFile . failedTransferFile t
|
2012-08-23 19:22:23 +00:00
|
|
|
|
scan multiple remotes in one pass
The expensive transfer scan now scans a whole set of remotes in one pass.
So at startup, or when network comes up, it will run only once.
Note that this can result in transfers from/to higher cost remotes being
queued before other transfers of other content from/to lower cost remotes.
Before, low cost remotes were scanned first and all their transfers came
first. When multiple transfers are queued for a key, the lower cost ones
are still queued first. However, this could result in transfers from slow
remotes running for a long time while transfers of other data from faster
remotes waits.
I expect to make the transfer queue smarter about ordering
and/or make it allow multiple transfers at a time, which should eliminate
this annoyance. (Also, it was already possible to get into that situation,
for example if the network was up, lots of transfers from slow remotes
might be queued, and then a disk is mounted and its faster transfers have
to wait.)
Also note that this means I don't need to improve the code in
Assistant.Sync that currently checks if any of the reconnected remotes
have diverged, and if so, queues scans of all of them. That had been very
innefficient, but now doesn't matter.
2012-08-26 18:01:43 +00:00
|
|
|
{- This is a expensive scan through the full git work tree, finding
|
|
|
|
- files to download from or upload to any of the remotes.
|
2012-07-25 18:54:09 +00:00
|
|
|
-
|
|
|
|
- The scan is blocked when the transfer queue gets too large. -}
|
scan multiple remotes in one pass
The expensive transfer scan now scans a whole set of remotes in one pass.
So at startup, or when network comes up, it will run only once.
Note that this can result in transfers from/to higher cost remotes being
queued before other transfers of other content from/to lower cost remotes.
Before, low cost remotes were scanned first and all their transfers came
first. When multiple transfers are queued for a key, the lower cost ones
are still queued first. However, this could result in transfers from slow
remotes running for a long time while transfers of other data from faster
remotes waits.
I expect to make the transfer queue smarter about ordering
and/or make it allow multiple transfers at a time, which should eliminate
this annoyance. (Also, it was already possible to get into that situation,
for example if the network was up, lots of transfers from slow remotes
might be queued, and then a disk is mounted and its faster transfers have
to wait.)
Also note that this means I don't need to improve the code in
Assistant.Sync that currently checks if any of the reconnected remotes
have diverged, and if so, queues scans of all of them. That had been very
innefficient, but now doesn't matter.
2012-08-26 18:01:43 +00:00
|
|
|
expensiveScan :: ThreadState -> DaemonStatusHandle -> TransferQueue -> [Remote] -> IO ()
|
|
|
|
expensiveScan st dstatus transferqueue rs = do
|
|
|
|
liftIO $ debug thisThread ["starting scan of", show rs]
|
|
|
|
void $ alertWhile dstatus (scanAlert rs) $ do
|
2012-08-24 17:46:10 +00:00
|
|
|
g <- runThreadState st $ fromRepo id
|
|
|
|
files <- LsFiles.inRepo [] g
|
|
|
|
go files
|
|
|
|
return True
|
scan multiple remotes in one pass
The expensive transfer scan now scans a whole set of remotes in one pass.
So at startup, or when network comes up, it will run only once.
Note that this can result in transfers from/to higher cost remotes being
queued before other transfers of other content from/to lower cost remotes.
Before, low cost remotes were scanned first and all their transfers came
first. When multiple transfers are queued for a key, the lower cost ones
are still queued first. However, this could result in transfers from slow
remotes running for a long time while transfers of other data from faster
remotes waits.
I expect to make the transfer queue smarter about ordering
and/or make it allow multiple transfers at a time, which should eliminate
this annoyance. (Also, it was already possible to get into that situation,
for example if the network was up, lots of transfers from slow remotes
might be queued, and then a disk is mounted and its faster transfers have
to wait.)
Also note that this means I don't need to improve the code in
Assistant.Sync that currently checks if any of the reconnected remotes
have diverged, and if so, queues scans of all of them. That had been very
innefficient, but now doesn't matter.
2012-08-26 18:01:43 +00:00
|
|
|
liftIO $ debug thisThread ["finished scan of", show rs]
|
2012-07-25 18:15:09 +00:00
|
|
|
where
|
2012-08-23 19:22:23 +00:00
|
|
|
go [] = noop
|
2012-07-25 18:54:09 +00:00
|
|
|
go (f:fs) = do
|
scan multiple remotes in one pass
The expensive transfer scan now scans a whole set of remotes in one pass.
So at startup, or when network comes up, it will run only once.
Note that this can result in transfers from/to higher cost remotes being
queued before other transfers of other content from/to lower cost remotes.
Before, low cost remotes were scanned first and all their transfers came
first. When multiple transfers are queued for a key, the lower cost ones
are still queued first. However, this could result in transfers from slow
remotes running for a long time while transfers of other data from faster
remotes waits.
I expect to make the transfer queue smarter about ordering
and/or make it allow multiple transfers at a time, which should eliminate
this annoyance. (Also, it was already possible to get into that situation,
for example if the network was up, lots of transfers from slow remotes
might be queued, and then a disk is mounted and its faster transfers have
to wait.)
Also note that this means I don't need to improve the code in
Assistant.Sync that currently checks if any of the reconnected remotes
have diverged, and if so, queues scans of all of them. That had been very
innefficient, but now doesn't matter.
2012-08-26 18:01:43 +00:00
|
|
|
mapM_ (enqueue f) =<< catMaybes <$> runThreadState st
|
|
|
|
(ifAnnexed f findtransfers $ return [])
|
2012-07-25 18:54:09 +00:00
|
|
|
go fs
|
scan multiple remotes in one pass
The expensive transfer scan now scans a whole set of remotes in one pass.
So at startup, or when network comes up, it will run only once.
Note that this can result in transfers from/to higher cost remotes being
queued before other transfers of other content from/to lower cost remotes.
Before, low cost remotes were scanned first and all their transfers came
first. When multiple transfers are queued for a key, the lower cost ones
are still queued first. However, this could result in transfers from slow
remotes running for a long time while transfers of other data from faster
remotes waits.
I expect to make the transfer queue smarter about ordering
and/or make it allow multiple transfers at a time, which should eliminate
this annoyance. (Also, it was already possible to get into that situation,
for example if the network was up, lots of transfers from slow remotes
might be queued, and then a disk is mounted and its faster transfers have
to wait.)
Also note that this means I don't need to improve the code in
Assistant.Sync that currently checks if any of the reconnected remotes
have diverged, and if so, queues scans of all of them. That had been very
innefficient, but now doesn't matter.
2012-08-26 18:01:43 +00:00
|
|
|
enqueue f (r, t) = do
|
|
|
|
debug thisThread ["queuing", show t]
|
|
|
|
queueTransferWhenSmall transferqueue dstatus (Just f) t r
|
|
|
|
findtransfers (key, _) = do
|
|
|
|
locs <- loggedLocations key
|
|
|
|
let use a = return $ map (a key locs) rs
|
|
|
|
ifM (inAnnex key)
|
|
|
|
( use $ check Upload False
|
|
|
|
, use $ check Download True
|
|
|
|
)
|
|
|
|
check direction want key locs r
|
|
|
|
| (Remote.uuid r `elem` locs) == want = Just $
|
|
|
|
(r, Transfer direction (Remote.uuid r) key)
|
|
|
|
| otherwise = Nothing
|
2012-07-25 18:15:09 +00:00
|
|
|
|
2012-08-24 17:04:28 +00:00
|
|
|
remoteHas :: Remote -> Key -> Annex Bool
|
|
|
|
remoteHas r key = elem
|
|
|
|
<$> pure (Remote.uuid r)
|
|
|
|
<*> loggedLocations key
|