From 29e73f76ef7dda081b7ff5be11bd74279d569a86 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 5 Apr 2017 13:04:02 -0400 Subject: [PATCH] Added remote..annex-push and remote..annex-pull The former can be useful to make remotes that don't get fully synced with local changes, which comes up in a lot of situations. The latter was mostly added for symmetry, but could be useful (though less likely to be). Implementing `remote..annex-pull` was a bit tricky, as there's no one place where git-annex pulls/fetches from remotes. I audited all instances of "fetch" and "pull". A few cases were left not checking this config: * Git.Repair can try to pull missing refs from a remote, and if the local repo is corrupted, that seems a reasonable thing to do even though the config would normally prevent it. * Assistant.WebApp.Gpg and Remote.Gcrypt and Remote.Git do fetches as part of the setup process of a remote. The config would probably not be set then, and having the setup fail seems worse than honoring it if it is already set. I have not prevented all the code that does a "merge" from merging branches from remotes with remote..annex-pull=false. That could perhaps be done, but it would need a way to map from branch name to remote name, and the way refspecs work makes that hard to get really correct. So if the user fetches manually, the git-annex branch will get merged, for example. Anther way of looking at/justifying this is that the setting is called "annex-pull", not "annex-merge". This commit was supported by the NSF-funded DataLad project. --- Assistant/Sync.hs | 24 ++++++++++++----- CHANGELOG | 3 +++ Command/Sync.hs | 9 ++++--- RemoteDaemon/Common.hs | 9 ++++++- RemoteDaemon/Transport/Ssh.hs | 4 +-- RemoteDaemon/Transport/Tor.hs | 4 +-- Types/GitConfig.hs | 4 +++ doc/git-annex-remotedaemon.mdwn | 3 +++ doc/git-annex-sync.mdwn | 27 ++++++++++++++----- doc/git-annex.mdwn | 13 ++++++++- ..._d0459d72b7e0441fe833a5c8e1588a4f._comment | 19 +++++++++++++ 11 files changed, 96 insertions(+), 23 deletions(-) create mode 100644 doc/tips/semi-synchronized_remotes/comment_1_d0459d72b7e0441fe833a5c8e1588a4f._comment diff --git a/Assistant/Sync.hs b/Assistant/Sync.hs index 702f1e98f7..8f30aa4f73 100644 --- a/Assistant/Sync.hs +++ b/Assistant/Sync.hs @@ -110,8 +110,14 @@ reconnectRemotes rs = void $ do pushToRemotes :: [Remote] -> Assistant [Remote] pushToRemotes remotes = do now <- liftIO getCurrentTime - let remotes' = filter (not . remoteAnnexReadOnly . Remote.gitconfig) remotes + let remotes' = filter (wantpush . Remote.gitconfig) remotes syncAction remotes' (pushToRemotes' now) + where + wantpush gc + | remoteAnnexReadOnly gc = False + | not (remoteAnnexPush gc) = False + | otherwise = True + pushToRemotes' :: UTCTime -> [Remote] -> Assistant [Remote] pushToRemotes' now remotes = do (g, branch, u) <- liftAnnex $ do @@ -195,16 +201,20 @@ manualPull :: Command.Sync.CurrBranch -> [Remote] -> Assistant ([Remote], Bool) manualPull currentbranch remotes = do g <- liftAnnex gitRepo let (_xmppremotes, normalremotes) = partition Remote.isXMPPRemote remotes - failed <- forM normalremotes $ \r -> do - g' <- liftAnnex $ sshOptionsTo (Remote.repo r) (Remote.gitconfig r) g - ifM (liftIO $ Git.Command.runBool [Param "fetch", Param $ Remote.name r] g') - ( return Nothing - , return $ Just r - ) + failed <- forM normalremotes $ \r -> if wantpull $ Remote.gitconfig r + then do + g' <- liftAnnex $ sshOptionsTo (Remote.repo r) (Remote.gitconfig r) g + ifM (liftIO $ Git.Command.runBool [Param "fetch", Param $ Remote.name r] g') + ( return Nothing + , return $ Just r + ) + else return Nothing haddiverged <- liftAnnex Annex.Branch.forceUpdate forM_ normalremotes $ \r -> liftAnnex $ Command.Sync.mergeRemote r currentbranch Command.Sync.mergeConfig return (catMaybes failed, haddiverged) + where + wantpull gc = remoteAnnexPull gc {- Start syncing a remote, using a background thread. -} syncRemote :: Remote -> Assistant () diff --git a/CHANGELOG b/CHANGELOG index f69aadedd9..0a9e56f1b6 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -4,6 +4,9 @@ git-annex (6.20170322) UNRELEASED; urgency=medium about it once, not every time git-annex is run. * multicast: New command, uses uftp to multicast annexed files, for eg a classroom setting. + * Added remote..annex-push and remote..annex-pull + which can be useful to make remotes that don't get fully synced with + local changes. -- Joey Hess Wed, 29 Mar 2017 12:41:46 -0400 diff --git a/Command/Sync.hs b/Command/Sync.hs index f2c1945a78..85bb8c1058 100644 --- a/Command/Sync.hs +++ b/Command/Sync.hs @@ -360,7 +360,7 @@ updateBranch syncbranch updateto g = ] g pullRemote :: SyncOptions -> [Git.Merge.MergeConfig] -> Remote -> CurrBranch -> CommandStart -pullRemote o mergeconfig remote branch = stopUnless (pure $ pullOption o) $ do +pullRemote o mergeconfig remote branch = stopUnless (pure $ pullOption o && wantpull) $ do showStart "pull" (Remote.name remote) next $ do showOutput @@ -370,6 +370,7 @@ pullRemote o mergeconfig remote branch = stopUnless (pure $ pullOption o) $ do fetch = inRepoWithSshOptionsTo (Remote.repo remote) (Remote.gitconfig remote) $ Git.Command.runBool [Param "fetch", Param $ Remote.name remote] + wantpull = remoteAnnexPull (Remote.gitconfig remote) {- The remote probably has both a master and a synced/master branch. - Which to merge from? Well, the master has whatever latest changes @@ -400,7 +401,7 @@ pushRemote o remote (Just branch, _) = stopUnless (pure (pushOption o) <&&> need showStart "push" (Remote.name remote) next $ next $ do showOutput - ok <- inRepoWithSshOptionsTo (Remote.repo remote) (Remote.gitconfig remote) $ + ok <- inRepoWithSshOptionsTo (Remote.repo remote) gc $ pushBranch remote branch if ok then postpushupdate @@ -410,7 +411,8 @@ pushRemote o remote (Just branch, _) = stopUnless (pure (pushOption o) <&&> need return ok where needpush - | remoteAnnexReadOnly (Remote.gitconfig remote) = return False + | remoteAnnexReadOnly gc = return False + | not (remoteAnnexPush gc) = return False | otherwise = anyM (newer remote) [syncBranch branch, Annex.Branch.name] -- Do updateInstead emulation for remotes on eg removable drives -- formatted FAT, where the post-update hook won't run. @@ -426,6 +428,7 @@ pushRemote o remote (Just branch, _) = stopUnless (pure (pushOption o) <&&> need , return True ) | otherwise = return True + gc = Remote.gitconfig remote {- Pushes a regular branch like master to a remote. Also pushes the git-annex - branch. diff --git a/RemoteDaemon/Common.hs b/RemoteDaemon/Common.hs index 711771f974..366f6aacae 100644 --- a/RemoteDaemon/Common.hs +++ b/RemoteDaemon/Common.hs @@ -8,7 +8,7 @@ module RemoteDaemon.Common ( liftAnnex , inLocalRepo - , checkNewShas + , checkShouldFetch , ConnectionStatus(..) , robustConnection ) where @@ -35,6 +35,13 @@ liftAnnex (TransportHandle _ annexstate) a = do inLocalRepo :: TransportHandle -> (Git.Repo -> IO a) -> IO a inLocalRepo (TransportHandle (LocalRepo g) _) a = a g +-- Check if some shas should be fetched from the remote, +-- and presumably later merged. +checkShouldFetch :: RemoteGitConfig -> TransportHandle -> [Git.Sha] -> IO Bool +checkShouldFetch gc transporthandle shas + | remoteAnnexPull gc = checkNewShas transporthandle shas + | otherwise = return False + -- Check if any of the shas are actally new in the local git repo, -- to avoid unnecessary fetching. checkNewShas :: TransportHandle -> [Git.Sha] -> IO Bool diff --git a/RemoteDaemon/Transport/Ssh.hs b/RemoteDaemon/Transport/Ssh.hs index fdb75e8715..772ae97715 100644 --- a/RemoteDaemon/Transport/Ssh.hs +++ b/RemoteDaemon/Transport/Ssh.hs @@ -36,7 +36,7 @@ transportUsingCmd cmd params rr@(RemoteRepo r gc) url h@(TransportHandle (LocalR transportUsingCmd' cmd params rr url transporthandle ichan ochan transportUsingCmd' :: FilePath -> [CommandParam] -> Transport -transportUsingCmd' cmd params (RemoteRepo r _) url transporthandle ichan ochan = +transportUsingCmd' cmd params (RemoteRepo r gc) url transporthandle ichan ochan = robustConnection 1 $ do (Just toh, Just fromh, Just errh, pid) <- createProcess (proc cmd (toCommand params)) @@ -74,7 +74,7 @@ transportUsingCmd' cmd params (RemoteRepo r _) url transporthandle ichan ochan = send (CONNECTED url) handlestdout fromh Just (SshRemote.CHANGED (ChangedRefs shas)) -> do - whenM (checkNewShas transporthandle shas) $ + whenM (checkShouldFetch gc transporthandle shas) $ fetch handlestdout fromh -- avoid reconnect on protocol error diff --git a/RemoteDaemon/Transport/Tor.hs b/RemoteDaemon/Transport/Tor.hs index afa249b330..b0fa3c189e 100644 --- a/RemoteDaemon/Transport/Tor.hs +++ b/RemoteDaemon/Transport/Tor.hs @@ -129,7 +129,7 @@ serveClient th u r q = bracket setup cleanup start -- Connect to peer's tor hidden service. transport :: Transport -transport (RemoteRepo r _) url@(RemoteURI uri) th ichan ochan = +transport (RemoteRepo r gc) url@(RemoteURI uri) th ichan ochan = case unformatP2PAddress (show uri) of Nothing -> return () Just addr -> robustConnection 1 $ do @@ -168,7 +168,7 @@ transport (RemoteRepo r _) url@(RemoteURI uri) th ichan ochan = v <- runNetProto conn P2P.notifyChange case v of Right (Just (ChangedRefs shas)) -> do - whenM (checkNewShas th shas) $ + whenM (checkShouldFetch gc th shas) $ fetch handlepeer conn _ -> return ConnectionClosed diff --git a/Types/GitConfig.hs b/Types/GitConfig.hs index af699a7b9b..da548d478c 100644 --- a/Types/GitConfig.hs +++ b/Types/GitConfig.hs @@ -181,6 +181,8 @@ data RemoteGitConfig = RemoteGitConfig , remoteAnnexCostCommand :: Maybe String , remoteAnnexIgnore :: Bool , remoteAnnexSync :: Bool + , remoteAnnexPull :: Bool + , remoteAnnexPush :: Bool , remoteAnnexReadOnly :: Bool , remoteAnnexVerify :: Bool , remoteAnnexTrustLevel :: Maybe String @@ -218,6 +220,8 @@ extractRemoteGitConfig r remotename = RemoteGitConfig , remoteAnnexCostCommand = notempty $ getmaybe "cost-command" , remoteAnnexIgnore = getbool "ignore" False , remoteAnnexSync = getbool "sync" True + , remoteAnnexPull = getbool "pull" True + , remoteAnnexPush = getbool "push" True , remoteAnnexReadOnly = getbool "readonly" False , remoteAnnexVerify = getbool "verify" True , remoteAnnexTrustLevel = notempty $ getmaybe "trustlevel" diff --git a/doc/git-annex-remotedaemon.mdwn b/doc/git-annex-remotedaemon.mdwn index b01002dc95..609698e973 100644 --- a/doc/git-annex-remotedaemon.mdwn +++ b/doc/git-annex-remotedaemon.mdwn @@ -29,6 +29,9 @@ accepting connections from other nodes and serving up the contents of the repository. This is only done if you first run `git annex enable-tor`. Use `git annex p2p` to configure access to tor-annex remotes. +Note that when `remote..annex-pull` is set to false, the remotedaemon +will avoid fetching changes from that remote. + # OPTIONS * `--foreground` diff --git a/doc/git-annex-sync.mdwn b/doc/git-annex-sync.mdwn index 97c63d3403..cabe5fed96 100644 --- a/doc/git-annex-sync.mdwn +++ b/doc/git-annex-sync.mdwn @@ -8,10 +8,7 @@ git annex sync `[remote ...]` # DESCRIPTION -Use this command when you want to synchronize the local repository with -one or more of its remotes. You can specify the remotes (or remote -groups) to sync with by name; the default if none are specified is to -sync with all remotes. +This command synchronizes the local repository with its remotes. The sync process involves first committing any local changes to files that have previously been added to the repository, @@ -36,6 +33,12 @@ by running "git annex sync" on the remote. # OPTIONS +* `[remote]` + + By default, all remotes are synced, except for remotes that have + `remote..annex-sync` set to false. By specifying the names + of remotes (or remote groups), you can control which ones to sync with. + * `--fast` Only sync with the remotes with the lowest annex-cost value configured. @@ -52,11 +55,21 @@ by running "git annex sync" on the remote. * `--pull`, `--no-pull` - By default, git pulls from remotes. Use --no-pull to disable. + By default, git pulls from remotes. Use --no-pull to disable all pulling. + + When `remote..annex-pull` or `remote..annex-sync` + are set to false, pulling is disabled for those remotes, and using + `--pull` will not enable it. * `--push`, `--no-push` - By default, git pushes to remotes. Use --no-push to disable. + By default, git pushes changes to remotes. + Use --no-push to disable all pushing. + + When `remote..annex-push` or `remote..annex-sync` are + set to false, or `remote..annex-readonly` is set to true, + pushing is disabled for those remotes, and using `--push` will not enable + it. * `--content`, `--no-content` @@ -64,7 +77,7 @@ by running "git annex sync" on the remote. The --content option causes the content of files in the work tree to also be uploaded and downloaded as necessary. - The annex.synccontent configuration can be set to true to make content + The `annex.synccontent` configuration can be set to true to make content be synced by default. Normally this tries to get each annexed file in the work tree diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 07b8b19e19..2bebd9f49f 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -1156,7 +1156,18 @@ Here are all the supported configuration settings. * `remote..annex-sync` If set to `false`, prevents git-annex sync (and the git-annex assistant) - from syncing with this remote. + from syncing with this remote by default. However, `git annex sync ` + can still be used to sync with the remote. + +* `remote..annex-pull` + + If set to `false`, prevents git-annex sync (and the git-annex assistant + etc) from ever pulling (or fetching) from the remote. + +* `remote..annex-push` + + If set to `false`, prevents git-annex sync (and the git-annex assistant + etc) from ever pushing to the remote. * `remote..annex-readonly` diff --git a/doc/tips/semi-synchronized_remotes/comment_1_d0459d72b7e0441fe833a5c8e1588a4f._comment b/doc/tips/semi-synchronized_remotes/comment_1_d0459d72b7e0441fe833a5c8e1588a4f._comment new file mode 100644 index 0000000000..0a6aeb9d68 --- /dev/null +++ b/doc/tips/semi-synchronized_remotes/comment_1_d0459d72b7e0441fe833a5c8e1588a4f._comment @@ -0,0 +1,19 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2017-04-05T16:11:57Z" + content=""" +Setting `remote..annex-readonly=true` prevents git-annex sync +from pushing changes to the remote. It also prevents any git-annex command +from copying annexed file contents to the remote, or deleting annexed file +contents. So I think it's ideal for this kind of situation. + +There does seem to be room for configs to prevent sync from pulling/pushing +without making the remote fully readonly. For example, the remote might be +a source of content, that only knows about the files it added and not other +files in the local repository, so dropping files from it should be allowed +but not pushing to it. + +So, I've added `remote..annex-push` and +`remote..annex-pull`. +"""]]