2012-08-22 18:32:17 +00:00
|
|
|
{- git-annex assistant repo syncing
|
|
|
|
-
|
|
|
|
- Copyright 2012 Joey Hess <joey@kitenet.net>
|
|
|
|
-
|
|
|
|
- Licensed under the GNU GPL version 3 or higher.
|
|
|
|
-}
|
|
|
|
|
|
|
|
module Assistant.Sync where
|
|
|
|
|
|
|
|
import Assistant.Common
|
|
|
|
import Assistant.Pushes
|
|
|
|
import Assistant.Alert
|
|
|
|
import Assistant.ThreadedMonad
|
|
|
|
import Assistant.DaemonStatus
|
|
|
|
import Assistant.ScanRemotes
|
|
|
|
import qualified Command.Sync
|
|
|
|
import Utility.Parallel
|
|
|
|
import qualified Git
|
|
|
|
import qualified Git.Branch
|
2012-09-16 23:48:12 +00:00
|
|
|
import qualified Git.Ref
|
2012-08-22 18:32:17 +00:00
|
|
|
import qualified Git.Command
|
|
|
|
import qualified Remote
|
2012-09-04 19:54:30 +00:00
|
|
|
import qualified Types.Remote as Remote
|
2012-08-22 18:32:17 +00:00
|
|
|
import qualified Annex.Branch
|
2012-09-16 23:48:12 +00:00
|
|
|
import Annex.UUID
|
2012-08-22 18:32:17 +00:00
|
|
|
|
|
|
|
import Data.Time.Clock
|
|
|
|
import qualified Data.Map as M
|
2012-09-11 01:55:59 +00:00
|
|
|
import Control.Concurrent
|
2012-08-22 18:32:17 +00:00
|
|
|
|
|
|
|
{- Syncs with remotes that may have been disconnected for a while.
|
|
|
|
-
|
2012-08-23 19:22:23 +00:00
|
|
|
- First gets git in sync, and then prepares any necessary file transfers.
|
|
|
|
-
|
scan multiple remotes in one pass
The expensive transfer scan now scans a whole set of remotes in one pass.
So at startup, or when network comes up, it will run only once.
Note that this can result in transfers from/to higher cost remotes being
queued before other transfers of other content from/to lower cost remotes.
Before, low cost remotes were scanned first and all their transfers came
first. When multiple transfers are queued for a key, the lower cost ones
are still queued first. However, this could result in transfers from slow
remotes running for a long time while transfers of other data from faster
remotes waits.
I expect to make the transfer queue smarter about ordering
and/or make it allow multiple transfers at a time, which should eliminate
this annoyance. (Also, it was already possible to get into that situation,
for example if the network was up, lots of transfers from slow remotes
might be queued, and then a disk is mounted and its faster transfers have
to wait.)
Also note that this means I don't need to improve the code in
Assistant.Sync that currently checks if any of the reconnected remotes
have diverged, and if so, queues scans of all of them. That had been very
innefficient, but now doesn't matter.
2012-08-26 18:01:43 +00:00
|
|
|
- An expensive full scan is queued when the git-annex branches of some of
|
|
|
|
- the remotes have diverged from the local git-annex branch. Otherwise,
|
2012-08-23 19:22:23 +00:00
|
|
|
- it's sufficient to requeue failed transfers.
|
2012-08-22 18:32:17 +00:00
|
|
|
-}
|
2012-08-22 19:37:26 +00:00
|
|
|
reconnectRemotes :: ThreadName -> ThreadState -> DaemonStatusHandle -> ScanRemoteMap -> [Remote] -> IO ()
|
|
|
|
reconnectRemotes _ _ _ _ [] = noop
|
|
|
|
reconnectRemotes threadname st dstatus scanremotes rs = void $
|
avoid unnecessary transfer scans when syncing a disconnected remote
Found a very cheap way to determine when a disconnected remote has
diverged, and has new content that needs to be transferred: Piggyback on
the git-annex branch update, which already checks for divergence.
However, this does not check if new content has appeared locally while
disconnected, that should be transferred to the remote.
Also, this does not handle cases where the two git repos are in sync,
but their content syncing has not caught up yet.
This code could have its efficiency improved:
* When multiple remotes are synced, if any one has diverged, they're
all queued for transfer scans.
* The transfer scanner could be told whether the remote has new content,
the local repo has new content, or both, and could optimise its scan
accordingly.
2012-08-22 18:51:11 +00:00
|
|
|
alertWhile dstatus (syncAlert rs) $ do
|
2012-09-04 19:54:30 +00:00
|
|
|
(ok, diverged) <- sync
|
|
|
|
=<< runThreadState st (inRepo Git.Branch.current)
|
|
|
|
addScanRemotes scanremotes diverged rs
|
|
|
|
return ok
|
2012-08-22 18:32:17 +00:00
|
|
|
where
|
2012-09-06 18:56:04 +00:00
|
|
|
(gitremotes, _specialremotes) =
|
2012-09-04 19:54:30 +00:00
|
|
|
partition (Git.repoIsUrl . Remote.repo) rs
|
2012-08-22 18:32:17 +00:00
|
|
|
sync (Just branch) = do
|
2012-09-04 19:54:30 +00:00
|
|
|
diverged <- manualPull st (Just branch) gitremotes
|
|
|
|
now <- getCurrentTime
|
|
|
|
ok <- pushToRemotes threadname now st Nothing gitremotes
|
|
|
|
return (ok, diverged)
|
2012-08-22 18:32:17 +00:00
|
|
|
{- No local branch exists yet, but we can try pulling. -}
|
|
|
|
sync Nothing = do
|
2012-09-04 19:54:30 +00:00
|
|
|
diverged <- manualPull st Nothing gitremotes
|
|
|
|
return (True, diverged)
|
2012-08-22 18:32:17 +00:00
|
|
|
|
|
|
|
{- Updates the local sync branch, then pushes it to all remotes, in
|
2012-09-16 23:48:12 +00:00
|
|
|
- parallel, along with the git-annex branch. This is the same
|
|
|
|
- as "git annex sync", except in parallel, and will co-exist with use of
|
|
|
|
- "git annex sync".
|
2012-08-22 18:32:17 +00:00
|
|
|
-
|
|
|
|
- Avoids running possibly long-duration commands in the Annex monad, so
|
2012-09-16 23:48:12 +00:00
|
|
|
- as not to block other threads.
|
|
|
|
-
|
|
|
|
- This can fail, when the remote's sync branch (or git-annex branch) has
|
|
|
|
- been updated by some other remote pushing into it, or by the remote
|
|
|
|
- itself. To handle failure, a manual pull and merge is done, and the push
|
|
|
|
- is retried.
|
|
|
|
-
|
|
|
|
- When there's a lot of activity, we may fail more than once.
|
|
|
|
- On the other hand, we may fail because the remote is not available.
|
|
|
|
- Rather than retrying indefinitely, after the first retry we enter a
|
|
|
|
- fallback mode, where our push is guarenteed to succeed if the remote is
|
|
|
|
- reachable. If the fallback fails, the push is queued to be retried
|
|
|
|
- later.
|
|
|
|
-
|
|
|
|
- The fallback mode pushes to branches on the remote that have our uuid in
|
|
|
|
- them. While ugly, those branches are reserved for pushing by us, and
|
|
|
|
- so our pushes will succeed.
|
|
|
|
-}
|
2012-09-13 04:57:52 +00:00
|
|
|
pushToRemotes :: ThreadName -> UTCTime -> ThreadState -> Maybe FailedPushMap -> [Remote] -> IO Bool
|
2012-08-22 18:32:17 +00:00
|
|
|
pushToRemotes threadname now st mpushmap remotes = do
|
2012-09-16 23:48:12 +00:00
|
|
|
(g, branch, u) <- runThreadState st $ (,,)
|
|
|
|
<$> fromRepo id
|
|
|
|
<*> inRepo Git.Branch.current
|
|
|
|
<*> getUUID
|
|
|
|
go True branch g u remotes
|
2012-08-22 18:32:17 +00:00
|
|
|
where
|
2012-09-16 23:48:12 +00:00
|
|
|
go _ Nothing _ _ _ = return True -- no branch, so nothing to do
|
2012-09-17 03:09:08 +00:00
|
|
|
go shouldretry (Just branch) g u rs = do
|
2012-08-22 18:32:17 +00:00
|
|
|
debug threadname
|
|
|
|
[ "pushing to"
|
|
|
|
, show rs
|
|
|
|
]
|
|
|
|
Command.Sync.updateBranch (Command.Sync.syncBranch branch) g
|
|
|
|
(succeeded, failed) <- inParallel (push g branch) rs
|
2012-09-16 23:48:12 +00:00
|
|
|
updatemap succeeded []
|
2012-08-22 18:32:17 +00:00
|
|
|
let ok = null failed
|
2012-09-16 23:48:12 +00:00
|
|
|
if ok
|
2012-08-22 18:32:17 +00:00
|
|
|
then return ok
|
2012-09-16 23:48:12 +00:00
|
|
|
else if shouldretry
|
|
|
|
then retry branch g u failed
|
|
|
|
else fallback branch g u failed
|
2012-08-22 18:32:17 +00:00
|
|
|
|
2012-09-16 23:48:12 +00:00
|
|
|
updatemap succeeded failed = case mpushmap of
|
|
|
|
Nothing -> noop
|
|
|
|
Just pushmap -> changeFailedPushMap pushmap $ \m ->
|
|
|
|
M.union (makemap failed) $
|
|
|
|
M.difference m (makemap succeeded)
|
2012-08-22 18:32:17 +00:00
|
|
|
makemap l = M.fromList $ zip l (repeat now)
|
|
|
|
|
2012-09-16 23:48:12 +00:00
|
|
|
retry branch g u rs = do
|
2012-08-22 18:32:17 +00:00
|
|
|
debug threadname [ "trying manual pull to resolve failed pushes" ]
|
avoid unnecessary transfer scans when syncing a disconnected remote
Found a very cheap way to determine when a disconnected remote has
diverged, and has new content that needs to be transferred: Piggyback on
the git-annex branch update, which already checks for divergence.
However, this does not check if new content has appeared locally while
disconnected, that should be transferred to the remote.
Also, this does not handle cases where the two git repos are in sync,
but their content syncing has not caught up yet.
This code could have its efficiency improved:
* When multiple remotes are synced, if any one has diverged, they're
all queued for transfer scans.
* The transfer scanner could be told whether the remote has new content,
the local repo has new content, or both, and could optimise its scan
accordingly.
2012-08-22 18:51:11 +00:00
|
|
|
void $ manualPull st (Just branch) rs
|
2012-09-16 23:48:12 +00:00
|
|
|
go False (Just branch) g u rs
|
|
|
|
|
|
|
|
fallback branch g u rs = do
|
|
|
|
debug threadname
|
|
|
|
[ "fallback pushing to"
|
|
|
|
, show rs
|
|
|
|
]
|
|
|
|
(succeeded, failed) <- inParallel (pushfallback g u branch) rs
|
|
|
|
updatemap succeeded failed
|
|
|
|
return $ null failed
|
|
|
|
|
|
|
|
push g branch remote = Command.Sync.pushBranch remote branch g
|
|
|
|
pushfallback g u branch remote = Git.Command.runBool "push"
|
|
|
|
[ Param $ Remote.name remote
|
|
|
|
, Param $ refspec Annex.Branch.name
|
|
|
|
, Param $ refspec branch
|
|
|
|
] g
|
|
|
|
where
|
2012-09-17 03:09:08 +00:00
|
|
|
{- Push to refs/synced/uuid/branch; this
|
|
|
|
- avoids cluttering up the branch display. -}
|
2012-09-16 23:48:12 +00:00
|
|
|
refspec b = concat
|
|
|
|
[ s
|
|
|
|
, ":"
|
2012-09-21 04:49:48 +00:00
|
|
|
, "refs/synced/" ++ fromUUID u ++ "/" ++ s
|
2012-09-16 23:48:12 +00:00
|
|
|
]
|
|
|
|
where s = show $ Git.Ref.base b
|
2012-08-22 18:32:17 +00:00
|
|
|
|
|
|
|
{- Manually pull from remotes and merge their branches. -}
|
2012-09-13 04:57:52 +00:00
|
|
|
manualPull :: ThreadState -> Maybe Git.Ref -> [Remote] -> IO Bool
|
2012-08-22 18:36:58 +00:00
|
|
|
manualPull st currentbranch remotes = do
|
|
|
|
g <- runThreadState st $ fromRepo id
|
2012-08-22 18:32:17 +00:00
|
|
|
forM_ remotes $ \r ->
|
2012-08-22 18:36:58 +00:00
|
|
|
Git.Command.runBool "fetch" [Param $ Remote.name r] g
|
2012-09-13 04:57:52 +00:00
|
|
|
haddiverged <- runThreadState st Annex.Branch.forceUpdate
|
2012-08-22 18:32:17 +00:00
|
|
|
forM_ remotes $ \r ->
|
2012-08-22 18:36:58 +00:00
|
|
|
runThreadState st $ Command.Sync.mergeRemote r currentbranch
|
avoid unnecessary transfer scans when syncing a disconnected remote
Found a very cheap way to determine when a disconnected remote has
diverged, and has new content that needs to be transferred: Piggyback on
the git-annex branch update, which already checks for divergence.
However, this does not check if new content has appeared locally while
disconnected, that should be transferred to the remote.
Also, this does not handle cases where the two git repos are in sync,
but their content syncing has not caught up yet.
This code could have its efficiency improved:
* When multiple remotes are synced, if any one has diverged, they're
all queued for transfer scans.
* The transfer scanner could be told whether the remote has new content,
the local repo has new content, or both, and could optimise its scan
accordingly.
2012-08-22 18:51:11 +00:00
|
|
|
return haddiverged
|
2012-09-11 01:55:59 +00:00
|
|
|
|
|
|
|
{- Start syncing a newly added remote, using a background thread. -}
|
|
|
|
syncNewRemote :: ThreadState -> DaemonStatusHandle -> ScanRemoteMap -> Remote -> IO ()
|
|
|
|
syncNewRemote st dstatus scanremotes remote = do
|
|
|
|
runThreadState st $ updateKnownRemotes dstatus
|
2012-09-13 04:57:52 +00:00
|
|
|
void $ forkIO $ reconnectRemotes "SyncRemote" st dstatus scanremotes [remote]
|