From 1d7fa63149647885b23d475f3c87a2aa4aab6ffb Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 9 Mar 2021 15:58:09 -0400 Subject: [PATCH] Added support for git-remote-gcrypt's rsync URIs Which access a remote using rsync over ssh, and which git pushes to much more efficiently than ssh urls. There was some old partial support for rsync URIs from 2013, but it seemed incomplete, and did not use rsync over ssh. Weird. I'm not sure if there's any remaining benefit to using the non-rsync url forms with gcrypt, now that this is implemented? Updated docs to encourage using the rsync urls. This commit was sponsored by Svenne Krap on Patreon. --- CHANGELOG | 3 + Remote/GCrypt.hs | 135 ++++++++++-------- ...58___every_sync_uploads_huge_manifest.mdwn | 2 + ..._83ea673f86450a856889b8fbb894bb4b._comment | 24 ++++ doc/special_remotes/gcrypt.mdwn | 17 ++- ...ncrypted_git_repositories_with_gcrypt.mdwn | 14 +- 6 files changed, 127 insertions(+), 68 deletions(-) create mode 100644 doc/bugs/gcrypt_remote__58___every_sync_uploads_huge_manifest/comment_10_83ea673f86450a856889b8fbb894bb4b._comment diff --git a/CHANGELOG b/CHANGELOG index 90bed09f9a..b2cfbee2e2 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -15,6 +15,9 @@ git-annex (8.20210224) UNRELEASED; urgency=medium (Such a conversion will still happen when importing from a remote an old git-annex exported such a tree to before; export the tree with the new git-annex before importing to avoid that.) + * Added support for git-remote-gcrypt's rsync URIs, which access a remote + using rsync over ssh, and which git pushes to much more efficiently + than ssh urls. * Fix support for local gcrypt repositories with a space in their URI. -- Joey Hess Wed, 24 Feb 2021 13:18:38 -0400 diff --git a/Remote/GCrypt.hs b/Remote/GCrypt.hs index 522f382c1a..e4f923b0ef 100644 --- a/Remote/GCrypt.hs +++ b/Remote/GCrypt.hs @@ -128,7 +128,7 @@ gen' :: Git.Repo -> UUID -> ParsedRemoteConfig -> RemoteGitConfig -> RemoteState gen' r u c gc rs = do cst <- remoteCost gc $ if repoCheap r then nearlyCheapRemoteCost else expensiveRemoteCost - (rsynctransport, rsyncurl) <- rsyncTransportToObjects r gc + let (rsynctransport, rsyncurl, accessmethod) = rsyncTransportToObjects r gc let rsyncopts = Remote.Rsync.genRsyncOpts c gc rsynctransport rsyncurl let this = Remote { uuid = u @@ -163,10 +163,10 @@ gen' r u c gc rs = do , remoteStateHandle = rs } return $ Just $ specialRemote' specialcfg c - (store this rsyncopts) - (retrieve this rsyncopts) - (remove this rsyncopts) - (checkKey this rsyncopts) + (store this rsyncopts accessmethod) + (retrieve this rsyncopts accessmethod) + (remove this rsyncopts accessmethod) + (checkKey this rsyncopts accessmethod) this where specialcfg @@ -175,35 +175,47 @@ gen' r u c gc rs = do { displayProgress = False } | otherwise = specialRemoteCfg c -rsyncTransportToObjects :: Git.Repo -> RemoteGitConfig -> Annex (Annex [CommandParam], String) -rsyncTransportToObjects r gc = do - (rsynctransport, rsyncurl, _) <- rsyncTransport r gc - return (rsynctransport, rsyncurl ++ "/annex/objects") +rsyncTransportToObjects :: Git.Repo -> RemoteGitConfig -> (Annex [CommandParam], String, AccessMethod) +rsyncTransportToObjects r gc = + let (rsynctransport, rsyncurl, m) = rsyncTransport r gc + in (rsynctransport, rsyncurl ++ "/annex/objects", m) -rsyncTransport :: Git.Repo -> RemoteGitConfig -> Annex (Annex [CommandParam], String, AccessMethod) +rsyncTransport :: Git.Repo -> RemoteGitConfig -> (Annex [CommandParam], String, AccessMethod) rsyncTransport r gc | sshprefix `isPrefixOf` loc = sshtransport $ break (== '/') $ drop (length sshprefix) loc - | "//:" `isInfixOf` loc = othertransport + | "rsync://" `isPrefixOf` loc = rsyncoversshtransport | ":" `isInfixOf` loc = sshtransport $ separate (== ':') loc - | otherwise = othertransport + | otherwise = rsyncoversshtransport where sshprefix = "ssh://" :: String loc = Git.repoLocation r - sshtransport (host, path) = do + sshtransport (host, path) = let rsyncpath = if "/~/" `isPrefixOf` path then drop 3 path else path - let sshhost = either error id (mkSshHost host) - let mkopts = rsyncShell . (Param "ssh" :) + sshhost = either error id (mkSshHost host) + mkopts = rsyncShell . (Param "ssh" :) <$> sshOptions ConsumeStdin (sshhost, Nothing) gc [] - return (mkopts, fromSshHost sshhost ++ ":" ++ rsyncpath, AccessShell) - othertransport = return (pure [], loc, AccessDirect) + in (mkopts, fromSshHost sshhost ++ ":" ++ rsyncpath, AccessGitAnnexShell) + rsyncoversshtransport = + -- git-remote-gcrypt uses a rsync:// url to mean + -- rsync over ssh. But to rsync, that's rsync protocol, + -- so it must be converted to a form that rsync will treat + -- as rsync over ssh. + -- There are two url forms that git-remote-gcrypt + -- supports: rsync://userhost/path and rsync://userhost:path + -- change to: userhost:/path userhost:path + let loc' = replace "rsync://" "" loc + loc'' = if ':' `elem` loc' + then loc' + else let (a, b) = break (== '/') loc' in a ++ ":" ++ b + in (pure [], loc'', AccessRsyncOverSsh) noCrypto :: Annex a noCrypto = giveup "cannot use gcrypt remote without encryption enabled" unsupportedUrl :: a -unsupportedUrl = giveup "using non-ssh remote repo url with gcrypt is not supported" +unsupportedUrl = giveup "unsupported repo url for gcrypt" gCryptSetup :: SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> Annex (RemoteConfig, UUID) gCryptSetup _ mu _ c gc = go $ fromProposedAccepted <$> M.lookup gitRepoField c @@ -256,8 +268,9 @@ gCryptSetup _ mu _ c gc = go $ fromProposedAccepted <$> M.lookup gitRepoField c else giveup $ "uuid mismatch; expected " ++ show mu ++ " but remote gitrepo has " ++ show u ++ " (" ++ show gcryptid ++ ")" {- Sets up the gcrypt repository. The repository is either a local - - repo, or it is accessed via rsync directly, or it is accessed over ssh - - and git-annex-shell is available to manage it. + - repo, or it is accessed via rsync over ssh (without using + - git-annex-shell), or it is accessed over ssh and git-annex-shell + - is available to manage it. - - The GCryptID is recorded in the repository's git config for later use. - Also, if the git config has receive.denyNonFastForwards set, disable @@ -267,11 +280,11 @@ setupRepo :: Git.GCrypt.GCryptId -> Git.Repo -> Annex AccessMethod setupRepo gcryptid r | Git.repoIsUrl r = do dummycfg <- liftIO dummyRemoteGitConfig - (_, _, accessmethod) <- rsyncTransport r dummycfg + let (_, _, accessmethod) = rsyncTransport r dummycfg case accessmethod of - AccessDirect -> rsyncsetup - AccessShell -> ifM gitannexshellsetup - ( return AccessShell + AccessRsyncOverSsh -> rsyncsetup + AccessGitAnnexShell -> ifM gitannexshellsetup + ( return AccessGitAnnexShell , rsyncsetup ) | Git.repoIsLocalUnknown r = localsetup =<< liftIO (Git.Config.read r) @@ -281,16 +294,16 @@ setupRepo gcryptid r let setconfig k v = liftIO $ Git.Command.run [Param "config", Param (fromConfigKey k), Param v] r' setconfig coreGCryptId gcryptid setconfig denyNonFastForwards (Git.Config.boolConfig False) - return AccessDirect + return AccessRsyncOverSsh {- As well as modifying the remote's git config, - create the objectDir on the remote, - - which is needed for direct rsync of objects to work. + - which is needed for rsync of objects to it to work. -} rsyncsetup = Remote.Rsync.withRsyncScratchDir $ \tmp -> do createAnnexDirectory (toRawFilePath (tmp objectDir)) dummycfg <- liftIO dummyRemoteGitConfig - (rsynctransport, rsyncurl, _) <- rsyncTransport r dummycfg + let (rsynctransport, rsyncurl, _) = rsyncTransport r dummycfg let tmpconfig = tmp "config" opts <- rsynctransport void $ liftIO $ rsync $ opts ++ @@ -307,7 +320,7 @@ setupRepo gcryptid r ] unless ok $ giveup "Failed to connect to remote to set it up." - return AccessDirect + return AccessRsyncOverSsh {- Ask git-annex-shell to configure the repository as a gcrypt - repository. May fail if it is too old. -} @@ -322,7 +335,7 @@ accessShell = accessShellConfig . gitconfig accessShellConfig :: RemoteGitConfig -> Bool accessShellConfig c = case method of - AccessShell -> True + AccessGitAnnexShell -> True _ -> False where method = toAccessMethod $ fromMaybe "" $ remoteAnnexGCrypt c @@ -363,13 +376,13 @@ setGcryptEncryption c remotename = do where remoteconfig n = n remotename -store :: Remote -> Remote.Rsync.RsyncOpts -> Storer -store r rsyncopts k s p = do +store :: Remote -> Remote.Rsync.RsyncOpts -> AccessMethod -> Storer +store r rsyncopts accessmethod k s p = do repo <- getRepo r - store' repo r rsyncopts k s p + store' repo r rsyncopts accessmethod k s p -store' :: Git.Repo -> Remote -> Remote.Rsync.RsyncOpts -> Storer -store' repo r rsyncopts +store' :: Git.Repo -> Remote -> Remote.Rsync.RsyncOpts -> AccessMethod -> Storer +store' repo r rsyncopts accessmethod | not $ Git.repoIsUrl repo = byteStorer $ \k b p -> guardUsable repo (giveup "cannot access remote") $ liftIO $ do let tmpdir = Git.repoPath repo P. "tmp" P. keyFile k @@ -386,16 +399,19 @@ store' repo r rsyncopts (AssociatedFile Nothing) unless ok $ giveup "rsync failed" - else fileStorer $ Remote.Rsync.store rsyncopts + else storersync + | accessmethod == AccessRsyncOverSsh = storersync | otherwise = unsupportedUrl + where + storersync = fileStorer $ Remote.Rsync.store rsyncopts -retrieve :: Remote -> Remote.Rsync.RsyncOpts -> Retriever -retrieve r rsyncopts k p sink = do +retrieve :: Remote -> Remote.Rsync.RsyncOpts -> AccessMethod -> Retriever +retrieve r rsyncopts accessmethod k p sink = do repo <- getRepo r - retrieve' repo r rsyncopts k p sink + retrieve' repo r rsyncopts accessmethod k p sink -retrieve' :: Git.Repo -> Remote -> Remote.Rsync.RsyncOpts -> Retriever -retrieve' repo r rsyncopts +retrieve' :: Git.Repo -> Remote -> Remote.Rsync.RsyncOpts -> AccessMethod -> Retriever +retrieve' repo r rsyncopts accessmethod | not $ Git.repoIsUrl repo = byteRetriever $ \k sink -> guardUsable repo (giveup "cannot access remote") $ sink =<< liftIO (L.readFile $ gCryptLocation repo k) @@ -406,38 +422,42 @@ retrieve' repo r rsyncopts oh <- mkOutputHandler unlessM (Ssh.rsyncHelper oh (Just p) ps) $ giveup "rsync failed" - else fileRetriever $ Remote.Rsync.retrieve rsyncopts + else retrieversync + | accessmethod == AccessRsyncOverSsh = retrieversync | otherwise = unsupportedUrl where + retrieversync = fileRetriever $ Remote.Rsync.retrieve rsyncopts -remove :: Remote -> Remote.Rsync.RsyncOpts -> Remover -remove r rsyncopts k = do +remove :: Remote -> Remote.Rsync.RsyncOpts -> AccessMethod -> Remover +remove r rsyncopts accessmethod k = do repo <- getRepo r - remove' repo r rsyncopts k + remove' repo r rsyncopts accessmethod k -remove' :: Git.Repo -> Remote -> Remote.Rsync.RsyncOpts -> Remover -remove' repo r rsyncopts k +remove' :: Git.Repo -> Remote -> Remote.Rsync.RsyncOpts -> AccessMethod -> Remover +remove' repo r rsyncopts accessmethod k | not $ Git.repoIsUrl repo = guardUsable repo (giveup "cannot access remote") $ liftIO $ Remote.Directory.removeDirGeneric (fromRawFilePath (Git.repoPath repo)) (fromRawFilePath (parentDir (toRawFilePath (gCryptLocation repo k)))) | Git.repoIsSsh repo = shellOrRsync r removeshell removersync + | accessmethod == AccessRsyncOverSsh = removersync | otherwise = unsupportedUrl where removersync = Remote.Rsync.remove rsyncopts k removeshell = Ssh.dropKey repo k -checkKey :: Remote -> Remote.Rsync.RsyncOpts -> CheckPresent -checkKey r rsyncopts k = do +checkKey :: Remote -> Remote.Rsync.RsyncOpts -> AccessMethod -> CheckPresent +checkKey r rsyncopts accessmethod k = do repo <- getRepo r - checkKey' repo r rsyncopts k + checkKey' repo r rsyncopts accessmethod k -checkKey' :: Git.Repo -> Remote -> Remote.Rsync.RsyncOpts -> CheckPresent -checkKey' repo r rsyncopts k +checkKey' :: Git.Repo -> Remote -> Remote.Rsync.RsyncOpts -> AccessMethod -> CheckPresent +checkKey' repo r rsyncopts accessmethod k | not $ Git.repoIsUrl repo = guardUsable repo (cantCheck repo) $ liftIO $ doesFileExist (gCryptLocation repo k) | Git.repoIsSsh repo = shellOrRsync r checkshell checkrsync + | accessmethod == AccessRsyncOverSsh = checkrsync | otherwise = unsupportedUrl where checkrsync = Remote.Rsync.checkKey repo rsyncopts k @@ -449,15 +469,16 @@ gCryptLocation :: Git.Repo -> Key -> FilePath gCryptLocation repo key = Git.repoLocation repo objectDir fromRawFilePath (keyPath key (hashDirLower def)) -data AccessMethod = AccessDirect | AccessShell +data AccessMethod = AccessRsyncOverSsh | AccessGitAnnexShell + deriving (Eq) fromAccessMethod :: AccessMethod -> String -fromAccessMethod AccessShell = "shell" -fromAccessMethod AccessDirect = "true" +fromAccessMethod AccessGitAnnexShell = "shell" +fromAccessMethod AccessRsyncOverSsh = "true" toAccessMethod :: String -> AccessMethod -toAccessMethod "shell" = AccessShell -toAccessMethod _ = AccessDirect +toAccessMethod "shell" = AccessGitAnnexShell +toAccessMethod _ = AccessRsyncOverSsh getGCryptUUID :: Bool -> Git.Repo -> Annex (Maybe UUID) getGCryptUUID fast r = do @@ -491,7 +512,7 @@ getGCryptId fast r gc getConfigViaRsync :: Git.Repo -> RemoteGitConfig -> Annex (Either SomeException (Git.Repo, S.ByteString, String)) getConfigViaRsync r gc = do - (rsynctransport, rsyncurl, _) <- rsyncTransport r gc + let (rsynctransport, rsyncurl, _) = rsyncTransport r gc opts <- rsynctransport liftIO $ do withTmpFile "tmpconfig" $ \tmpconfig _ -> do diff --git a/doc/bugs/gcrypt_remote__58___every_sync_uploads_huge_manifest.mdwn b/doc/bugs/gcrypt_remote__58___every_sync_uploads_huge_manifest.mdwn index 4b84a4bba3..ccbc2dabf6 100644 --- a/doc/bugs/gcrypt_remote__58___every_sync_uploads_huge_manifest.mdwn +++ b/doc/bugs/gcrypt_remote__58___every_sync_uploads_huge_manifest.mdwn @@ -28,3 +28,5 @@ is running. The upload of the actual changeset starts after this, the processes git-annex is great and revolutionized my file organization and backup structure (if they were even existing before) [[!meta title="gcrypt special remotes should support rsync:// and perhaps also sftp://"]] + +> [[done]] --[[Joey]] diff --git a/doc/bugs/gcrypt_remote__58___every_sync_uploads_huge_manifest/comment_10_83ea673f86450a856889b8fbb894bb4b._comment b/doc/bugs/gcrypt_remote__58___every_sync_uploads_huge_manifest/comment_10_83ea673f86450a856889b8fbb894bb4b._comment new file mode 100644 index 0000000000..f439e13d75 --- /dev/null +++ b/doc/bugs/gcrypt_remote__58___every_sync_uploads_huge_manifest/comment_10_83ea673f86450a856889b8fbb894bb4b._comment @@ -0,0 +1,24 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 10""" + date="2021-03-09T18:10:38Z" + content=""" +Actually, it supports rsync urls already. But git-annex treats the url as +literally the rsync protocol, so it will run rsync with an url like +rsync://localhost/foo. Which will mostly fail unless there's actually a +rsync server running. +git-remote-gcrypt treats the same url as rsync over ssh. + +See [[!commit 3e079cdcd1ac4f52ceb1c3ad483917d7c6133d0b]] for when that was +added in 2013. Although I'm doubtful if it actually worked; it seems like +initremote supported it, but it was never implemented the rest of +the way. + +If anyone has been using that, it seems git-annex and git-remote-gcrypt +would be accessing the server in different ways, so the same server must +support rsync over ssh too, or git pushes to the remote would fail. So, it +seems all that's needed is for git-annex to treat that url as rsync over +ssh, and finish the incomplete implementation of rsync urls. + +Ok, done! +"""]] diff --git a/doc/special_remotes/gcrypt.mdwn b/doc/special_remotes/gcrypt.mdwn index 787b3e1ca1..91e0ac60e8 100644 --- a/doc/special_remotes/gcrypt.mdwn +++ b/doc/special_remotes/gcrypt.mdwn @@ -26,10 +26,21 @@ gcrypt: repository itself. May be repeated when multiple participants should have access to the repository. -* `gitrepo` - Required. The path or url to the git repository +* `gitrepo` - Required. The location of the git repository for gcrypt to use. This repository should be either an unpopulated bare git repo, or an existing gcrypt repository. + To use a local git repository, use: `gitrepo=/path/to/repo` + + For a git repository accessed using rsync over ssh, use: + `gitrepo=rsync://user@host/path/to/repo` + + For a git repository accessed over ssh, and using git-annex-shell + to transfer data, use: + `gitrepo=ssh://user@host/path/to/repo` or `host:path` + Note that each `git push` has to re-send the whole content of the git + repository when using this option. + * `chunk` - Enables [[chunking]] when storing large files. * `shellescape` - See [[rsync]] for the details of this option. @@ -37,9 +48,7 @@ gcrypt: ## notes For git-annex to store files in a repository on a remote server, you need -shell access, and `rsync` must be installed. Those are the minimum -requirements, but it's also recommended to install git-annex on the remote -server, so that [[git-annex-shell]] can be used. +shell access, and it needs to be able to run `rsync` or `git-annex-shell`. If you can't run `rsync` or `git-annex-shell` on the remote server, you can't use this special remote. Other options are the [[git-lfs]] diff --git a/doc/tips/fully_encrypted_git_repositories_with_gcrypt.mdwn b/doc/tips/fully_encrypted_git_repositories_with_gcrypt.mdwn index 1847a6fb8c..a91cd27f48 100644 --- a/doc/tips/fully_encrypted_git_repositories_with_gcrypt.mdwn +++ b/doc/tips/fully_encrypted_git_repositories_with_gcrypt.mdwn @@ -66,9 +66,9 @@ the gpg key used to encrypt it, and then: ## encrypted git-annex repository on a ssh server -If you have a ssh server that has git-annex or rsync installed on it, you -can set up an encrypted repository there. Works just like the encrypted -drive except without the cable. +If you have a server that has ssh and rsync installed on it, you can set up an +encrypted repository there. Works just like the encrypted drive except +without the cable. First, on the server, run: @@ -76,14 +76,14 @@ First, on the server, run: Now, in your existing git-annex repository, set up the encrypted remote: - git annex initremote encryptedrepo type=gcrypt gitrepo=ssh://my.server/home/me/encryptedrepo keyid=$mykey + git annex initremote encryptedrepo type=gcrypt gitrepo=rsync://my.server/home/me/encryptedrepo keyid=$mykey git annex sync encryptedrepo (Remember to replace "$mykey" with the keyid of your gpg key.) This uses the [[gcrypt special remote|special_remotes/gcrypt]] to encrypt pushes to the git remote, and git-annex will also encrypt the files it -stores there. +stores there. Data is transferred using rsync over ssh. If you're going to be sharing this repository with others, be sure to also include their keyids, by specifying keyid= repeatedly. @@ -98,9 +98,9 @@ the git repository changes to it as well. Anyone who has access to the repo it and has one of the keys used to encrypt it can check it out: - git clone gcrypt::ssh://my.server/home/me/encryptedrepo myrepo + git clone gcrypt::rsync://my.server/home/me/encryptedrepo myrepo cd myrepo - git annex enableremote encryptedrepo gitrepo=ssh://my.server/home/me/encryptedrepo + git annex enableremote encryptedrepo gitrepo=rsync://my.server/home/me/encryptedrepo git annex get --from encryptedrepo ## private encrypted git remote on a git-lfs hosting site