2012-07-23 03:16:56 +00:00
|
|
|
{- git-annex assistant thread to scan remotes to find needed transfers
|
|
|
|
-
|
2015-01-21 16:50:09 +00:00
|
|
|
- Copyright 2012 Joey Hess <id@joeyh.name>
|
2012-07-23 03:16:56 +00:00
|
|
|
-
|
2019-03-13 19:48:14 +00:00
|
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
2012-07-23 03:16:56 +00:00
|
|
|
-}
|
|
|
|
|
|
|
|
module Assistant.Threads.TransferScanner where
|
|
|
|
|
|
|
|
import Assistant.Common
|
2012-10-29 23:14:30 +00:00
|
|
|
import Assistant.Types.ScanRemotes
|
2012-07-23 03:16:56 +00:00
|
|
|
import Assistant.ScanRemotes
|
|
|
|
import Assistant.TransferQueue
|
2012-07-28 22:47:24 +00:00
|
|
|
import Assistant.DaemonStatus
|
2012-10-18 19:22:28 +00:00
|
|
|
import Assistant.Drop
|
2013-03-08 17:44:59 +00:00
|
|
|
import Assistant.Sync
|
2013-04-03 21:01:40 +00:00
|
|
|
import Assistant.DeleteRemote
|
2013-04-03 21:44:34 +00:00
|
|
|
import Assistant.Types.UrlRenderer
|
2016-08-03 16:37:12 +00:00
|
|
|
import Types.Transfer
|
2012-07-23 03:16:56 +00:00
|
|
|
import Logs.Transfer
|
2012-07-25 18:15:09 +00:00
|
|
|
import Logs.Location
|
2013-04-03 21:01:40 +00:00
|
|
|
import Logs.Group
|
2012-07-25 18:15:09 +00:00
|
|
|
import qualified Remote
|
2012-08-26 19:39:02 +00:00
|
|
|
import qualified Types.Remote as Remote
|
2012-07-23 03:16:56 +00:00
|
|
|
import Utility.ThreadScheduler
|
2013-03-19 16:51:22 +00:00
|
|
|
import Utility.NotificationBroadcaster
|
2013-06-21 01:10:16 +00:00
|
|
|
import Utility.Batch
|
2012-07-25 18:15:09 +00:00
|
|
|
import qualified Git.LsFiles as LsFiles
|
2015-12-15 19:34:28 +00:00
|
|
|
import Annex.WorkTree
|
2012-07-25 18:15:09 +00:00
|
|
|
import Annex.Content
|
2012-10-09 16:18:41 +00:00
|
|
|
import Annex.Wanted
|
2014-01-29 17:44:53 +00:00
|
|
|
import CmdLine.Action
|
2012-07-23 03:16:56 +00:00
|
|
|
|
2012-08-24 19:52:23 +00:00
|
|
|
import qualified Data.Set as S
|
2017-03-06 17:32:47 +00:00
|
|
|
import Control.Concurrent
|
2012-08-24 19:52:23 +00:00
|
|
|
|
2012-07-25 17:12:34 +00:00
|
|
|
{- This thread waits until a remote needs to be scanned, to find transfers
|
|
|
|
- that need to be made, to keep data in sync.
|
|
|
|
-}
|
2013-04-03 21:01:40 +00:00
|
|
|
transferScannerThread :: UrlRenderer -> NamedThread
|
|
|
|
transferScannerThread urlrenderer = namedThread "TransferScanner" $ do
|
2012-08-24 17:46:10 +00:00
|
|
|
startupScan
|
2012-08-24 19:52:23 +00:00
|
|
|
go S.empty
|
2012-10-29 15:40:22 +00:00
|
|
|
where
|
|
|
|
go scanned = do
|
2013-03-19 16:51:22 +00:00
|
|
|
scanrunning False
|
2012-10-29 15:40:22 +00:00
|
|
|
liftIO $ threadDelaySeconds (Seconds 2)
|
2012-10-29 23:14:30 +00:00
|
|
|
(rs, infos) <- unzip <$> getScanRemote
|
2013-03-19 16:51:22 +00:00
|
|
|
scanrunning True
|
|
|
|
if any fullScan infos || any (`S.notMember` scanned) rs
|
|
|
|
then do
|
2013-04-03 21:01:40 +00:00
|
|
|
expensiveScan urlrenderer rs
|
2013-03-19 16:51:22 +00:00
|
|
|
go $ scanned `S.union` S.fromList rs
|
|
|
|
else do
|
|
|
|
mapM_ failedTransferScan rs
|
|
|
|
go scanned
|
|
|
|
scanrunning b = do
|
|
|
|
ds <- modifyDaemonStatus $ \s ->
|
|
|
|
(s { transferScanRunning = b }, s)
|
|
|
|
liftIO $ sendNotification $ transferNotifier ds
|
|
|
|
|
2017-09-20 18:37:20 +00:00
|
|
|
{- All git remotes are synced, all exports are updated,
|
|
|
|
- and all available remotes are scanned in full on startup,
|
|
|
|
- for multiple reasons, including:
|
2012-10-29 15:40:22 +00:00
|
|
|
-
|
|
|
|
- * This may be the first run, and there may be remotes
|
|
|
|
- already in place, that need to be synced.
|
2013-03-08 17:44:59 +00:00
|
|
|
- * Changes may have been made last time we run, but remotes were
|
|
|
|
- not available to be synced with.
|
|
|
|
- * Changes may have been made to remotes while we were down.
|
2012-10-29 15:40:22 +00:00
|
|
|
- * We may have run before, and scanned a remote, but
|
|
|
|
- only been in a subdirectory of the git remote, and so
|
|
|
|
- not synced it all.
|
|
|
|
- * We may have run before, and had transfers queued,
|
|
|
|
- and then the system (or us) crashed, and that info was
|
|
|
|
- lost.
|
2013-04-03 21:01:40 +00:00
|
|
|
- * A remote may be in the unwanted group, and this is a chance
|
|
|
|
- to determine if the remote has been emptied.
|
2012-10-29 15:40:22 +00:00
|
|
|
-}
|
2013-03-08 17:44:59 +00:00
|
|
|
startupScan = do
|
2016-11-14 18:26:20 +00:00
|
|
|
reconnectRemotes =<< syncGitRemotes <$> getDaemonStatus
|
2017-09-20 17:27:59 +00:00
|
|
|
addScanRemotes True =<< scannableRemotes
|
2012-07-23 03:16:56 +00:00
|
|
|
|
2012-08-23 19:22:23 +00:00
|
|
|
{- This is a cheap scan for failed transfers involving a remote. -}
|
2012-10-29 15:40:22 +00:00
|
|
|
failedTransferScan :: Remote -> Assistant ()
|
|
|
|
failedTransferScan r = do
|
2013-09-25 15:46:17 +00:00
|
|
|
failed <- liftAnnex $ clearFailedTransfers (Remote.uuid r)
|
2012-09-17 18:58:43 +00:00
|
|
|
mapM_ retry failed
|
2012-10-29 15:40:22 +00:00
|
|
|
where
|
|
|
|
retry (t, info)
|
2013-10-03 02:59:07 +00:00
|
|
|
| transferDirection t == Download =
|
2012-10-29 15:40:22 +00:00
|
|
|
{- Check if the remote still has the key.
|
|
|
|
- If not, relies on the expensiveScan to
|
|
|
|
- get it queued from some other remote. -}
|
|
|
|
whenM (liftAnnex $ remoteHas r $ transferKey t) $
|
2012-08-24 17:04:28 +00:00
|
|
|
requeue t info
|
2013-10-03 02:59:07 +00:00
|
|
|
| otherwise =
|
2012-10-29 15:40:22 +00:00
|
|
|
{- The Transferrer checks when uploading
|
|
|
|
- that the remote doesn't already have the
|
|
|
|
- key, so it's not redundantly checked here. -}
|
|
|
|
requeue t info
|
2013-03-01 19:23:59 +00:00
|
|
|
requeue t info = queueTransferWhenSmall "retrying failed transfer" (associatedFile info) t r
|
2013-09-25 15:46:17 +00:00
|
|
|
|
scan multiple remotes in one pass
The expensive transfer scan now scans a whole set of remotes in one pass.
So at startup, or when network comes up, it will run only once.
Note that this can result in transfers from/to higher cost remotes being
queued before other transfers of other content from/to lower cost remotes.
Before, low cost remotes were scanned first and all their transfers came
first. When multiple transfers are queued for a key, the lower cost ones
are still queued first. However, this could result in transfers from slow
remotes running for a long time while transfers of other data from faster
remotes waits.
I expect to make the transfer queue smarter about ordering
and/or make it allow multiple transfers at a time, which should eliminate
this annoyance. (Also, it was already possible to get into that situation,
for example if the network was up, lots of transfers from slow remotes
might be queued, and then a disk is mounted and its faster transfers have
to wait.)
Also note that this means I don't need to improve the code in
Assistant.Sync that currently checks if any of the reconnected remotes
have diverged, and if so, queues scans of all of them. That had been very
innefficient, but now doesn't matter.
2012-08-26 18:01:43 +00:00
|
|
|
{- This is a expensive scan through the full git work tree, finding
|
2012-10-18 17:42:17 +00:00
|
|
|
- files to transfer. The scan is blocked when the transfer queue gets
|
|
|
|
- too large.
|
|
|
|
-
|
|
|
|
- This also finds files that are present either here or on a remote
|
|
|
|
- but that are not preferred content, and drops them. Searching for files
|
|
|
|
- to drop is done concurrently with the scan for transfers.
|
|
|
|
-
|
|
|
|
- TODO: It would be better to first drop as much as we can, before
|
|
|
|
- transferring much, to minimise disk use.
|
2013-04-03 21:01:40 +00:00
|
|
|
-
|
|
|
|
- During the scan, we'll also check if any unwanted repositories are empty,
|
|
|
|
- and can be removed. While unrelated, this is a cheap place to do it,
|
|
|
|
- since we need to look at the locations of all keys anyway.
|
2012-10-18 17:42:17 +00:00
|
|
|
-}
|
2013-04-03 21:01:40 +00:00
|
|
|
expensiveScan :: UrlRenderer -> [Remote] -> Assistant ()
|
2014-12-17 17:50:46 +00:00
|
|
|
expensiveScan urlrenderer rs = batch <~> do
|
2012-10-29 15:40:22 +00:00
|
|
|
debug ["starting scan of", show visiblers]
|
2013-04-03 21:01:40 +00:00
|
|
|
|
2013-09-25 15:46:17 +00:00
|
|
|
let us = map Remote.uuid rs
|
|
|
|
|
|
|
|
mapM_ (liftAnnex . clearFailedTransfers) us
|
|
|
|
|
2013-04-03 21:01:40 +00:00
|
|
|
unwantedrs <- liftAnnex $ S.fromList
|
2013-09-25 15:46:17 +00:00
|
|
|
<$> filterM inUnwantedGroup us
|
2013-04-03 21:01:40 +00:00
|
|
|
|
2013-03-18 20:19:42 +00:00
|
|
|
g <- liftAnnex gitRepo
|
2020-05-28 19:55:17 +00:00
|
|
|
(files, cleanup) <- liftIO $ LsFiles.inRepo [] [] g
|
2013-04-03 21:01:40 +00:00
|
|
|
removablers <- scan unwantedrs files
|
2013-03-18 20:19:42 +00:00
|
|
|
void $ liftIO cleanup
|
2013-04-03 21:01:40 +00:00
|
|
|
|
2012-10-29 15:40:22 +00:00
|
|
|
debug ["finished scan of", show visiblers]
|
2013-04-03 21:01:40 +00:00
|
|
|
|
2013-04-03 23:03:16 +00:00
|
|
|
remove <- asIO1 $ removableRemote urlrenderer
|
|
|
|
liftIO $ mapM_ (void . tryNonAsync . remove) $ S.toList removablers
|
2012-10-29 15:40:22 +00:00
|
|
|
where
|
|
|
|
visiblers = let rs' = filter (not . Remote.readonly) rs
|
|
|
|
in if null rs' then rs else rs'
|
2013-04-03 21:01:40 +00:00
|
|
|
|
|
|
|
scan unwanted [] = return unwanted
|
|
|
|
scan unwanted (f:fs) = do
|
|
|
|
(unwanted', ts) <- maybe
|
|
|
|
(return (unwanted, []))
|
|
|
|
(findtransfers f unwanted)
|
2015-12-15 19:34:28 +00:00
|
|
|
=<< liftAnnex (lookupFile f)
|
2013-04-03 21:01:40 +00:00
|
|
|
mapM_ (enqueue f) ts
|
2017-03-06 17:32:47 +00:00
|
|
|
|
|
|
|
{- Delay for a short time to avoid using too much CPU. -}
|
|
|
|
liftIO $ threadDelay $ fromIntegral $ oneSecond `div` 200
|
|
|
|
|
2013-04-03 21:01:40 +00:00
|
|
|
scan unwanted' fs
|
|
|
|
|
2013-03-01 19:23:59 +00:00
|
|
|
enqueue f (r, t) =
|
|
|
|
queueTransferWhenSmall "expensive scan found missing object"
|
2017-03-10 17:12:24 +00:00
|
|
|
(AssociatedFile (Just f)) t r
|
2014-04-17 22:03:39 +00:00
|
|
|
findtransfers f unwanted key = do
|
2017-03-10 17:12:24 +00:00
|
|
|
let af = AssociatedFile (Just f)
|
2013-03-01 19:58:44 +00:00
|
|
|
locs <- liftAnnex $ loggedLocations key
|
|
|
|
present <- liftAnnex $ inAnnex key
|
2017-09-20 17:27:59 +00:00
|
|
|
let slocs = S.fromList locs
|
|
|
|
|
|
|
|
{- The remotes may have changed since this scan began. -}
|
|
|
|
syncrs <- syncDataRemotes <$> getDaemonStatus
|
|
|
|
let use l a = mapMaybe (a key slocs) . l <$> getDaemonStatus
|
|
|
|
|
2014-01-19 21:35:36 +00:00
|
|
|
liftAnnex $ handleDropsFrom locs syncrs
|
2013-03-01 19:58:44 +00:00
|
|
|
"expensive scan found too many copies of object"
|
2017-03-10 17:12:24 +00:00
|
|
|
present key af [] callCommandAction
|
2017-09-20 17:27:59 +00:00
|
|
|
ts <- if present
|
|
|
|
then liftAnnex . filterM (wantSend True (Just key) af . Remote.uuid . fst)
|
|
|
|
=<< use syncDataRemotes (genTransfer Upload False)
|
|
|
|
else ifM (liftAnnex $ wantGet True (Just key) af)
|
|
|
|
( use downloadRemotes (genTransfer Download True) , return [] )
|
|
|
|
let unwanted' = S.difference unwanted slocs
|
|
|
|
return (unwanted', ts)
|
|
|
|
|
|
|
|
-- Both syncDataRemotes and exportRemotes can be scanned.
|
|
|
|
-- The downloadRemotes list contains both.
|
|
|
|
scannableRemotes :: Assistant [Remote]
|
|
|
|
scannableRemotes = downloadRemotes <$> getDaemonStatus
|
2012-10-18 17:42:17 +00:00
|
|
|
|
|
|
|
genTransfer :: Direction -> Bool -> Key -> S.Set UUID -> Remote -> Maybe (Remote, Transfer)
|
2012-10-18 18:55:59 +00:00
|
|
|
genTransfer direction want key slocs r
|
2012-10-18 17:42:17 +00:00
|
|
|
| direction == Upload && Remote.readonly r = Nothing
|
2013-10-03 02:59:07 +00:00
|
|
|
| S.member (Remote.uuid r) slocs == want = Just
|
2019-11-22 20:24:04 +00:00
|
|
|
(r, Transfer direction (Remote.uuid r) (fromKey id key))
|
2012-10-18 17:42:17 +00:00
|
|
|
| otherwise = Nothing
|
2012-07-25 18:15:09 +00:00
|
|
|
|
2012-08-24 17:04:28 +00:00
|
|
|
remoteHas :: Remote -> Key -> Annex Bool
|
|
|
|
remoteHas r key = elem
|
|
|
|
<$> pure (Remote.uuid r)
|
|
|
|
<*> loggedLocations key
|