diff --git a/CHANGELOG b/CHANGELOG index 90bed09f9a..b2cfbee2e2 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -15,6 +15,9 @@ git-annex (8.20210224) UNRELEASED; urgency=medium (Such a conversion will still happen when importing from a remote an old git-annex exported such a tree to before; export the tree with the new git-annex before importing to avoid that.) + * Added support for git-remote-gcrypt's rsync URIs, which access a remote + using rsync over ssh, and which git pushes to much more efficiently + than ssh urls. * Fix support for local gcrypt repositories with a space in their URI. -- Joey Hess Wed, 24 Feb 2021 13:18:38 -0400 diff --git a/Remote/GCrypt.hs b/Remote/GCrypt.hs index 522f382c1a..e4f923b0ef 100644 --- a/Remote/GCrypt.hs +++ b/Remote/GCrypt.hs @@ -128,7 +128,7 @@ gen' :: Git.Repo -> UUID -> ParsedRemoteConfig -> RemoteGitConfig -> RemoteState gen' r u c gc rs = do cst <- remoteCost gc $ if repoCheap r then nearlyCheapRemoteCost else expensiveRemoteCost - (rsynctransport, rsyncurl) <- rsyncTransportToObjects r gc + let (rsynctransport, rsyncurl, accessmethod) = rsyncTransportToObjects r gc let rsyncopts = Remote.Rsync.genRsyncOpts c gc rsynctransport rsyncurl let this = Remote { uuid = u @@ -163,10 +163,10 @@ gen' r u c gc rs = do , remoteStateHandle = rs } return $ Just $ specialRemote' specialcfg c - (store this rsyncopts) - (retrieve this rsyncopts) - (remove this rsyncopts) - (checkKey this rsyncopts) + (store this rsyncopts accessmethod) + (retrieve this rsyncopts accessmethod) + (remove this rsyncopts accessmethod) + (checkKey this rsyncopts accessmethod) this where specialcfg @@ -175,35 +175,47 @@ gen' r u c gc rs = do { displayProgress = False } | otherwise = specialRemoteCfg c -rsyncTransportToObjects :: Git.Repo -> RemoteGitConfig -> Annex (Annex [CommandParam], String) -rsyncTransportToObjects r gc = do - (rsynctransport, rsyncurl, _) <- rsyncTransport r gc - return (rsynctransport, rsyncurl ++ "/annex/objects") +rsyncTransportToObjects :: Git.Repo -> RemoteGitConfig -> (Annex [CommandParam], String, AccessMethod) +rsyncTransportToObjects r gc = + let (rsynctransport, rsyncurl, m) = rsyncTransport r gc + in (rsynctransport, rsyncurl ++ "/annex/objects", m) -rsyncTransport :: Git.Repo -> RemoteGitConfig -> Annex (Annex [CommandParam], String, AccessMethod) +rsyncTransport :: Git.Repo -> RemoteGitConfig -> (Annex [CommandParam], String, AccessMethod) rsyncTransport r gc | sshprefix `isPrefixOf` loc = sshtransport $ break (== '/') $ drop (length sshprefix) loc - | "//:" `isInfixOf` loc = othertransport + | "rsync://" `isPrefixOf` loc = rsyncoversshtransport | ":" `isInfixOf` loc = sshtransport $ separate (== ':') loc - | otherwise = othertransport + | otherwise = rsyncoversshtransport where sshprefix = "ssh://" :: String loc = Git.repoLocation r - sshtransport (host, path) = do + sshtransport (host, path) = let rsyncpath = if "/~/" `isPrefixOf` path then drop 3 path else path - let sshhost = either error id (mkSshHost host) - let mkopts = rsyncShell . (Param "ssh" :) + sshhost = either error id (mkSshHost host) + mkopts = rsyncShell . (Param "ssh" :) <$> sshOptions ConsumeStdin (sshhost, Nothing) gc [] - return (mkopts, fromSshHost sshhost ++ ":" ++ rsyncpath, AccessShell) - othertransport = return (pure [], loc, AccessDirect) + in (mkopts, fromSshHost sshhost ++ ":" ++ rsyncpath, AccessGitAnnexShell) + rsyncoversshtransport = + -- git-remote-gcrypt uses a rsync:// url to mean + -- rsync over ssh. But to rsync, that's rsync protocol, + -- so it must be converted to a form that rsync will treat + -- as rsync over ssh. + -- There are two url forms that git-remote-gcrypt + -- supports: rsync://userhost/path and rsync://userhost:path + -- change to: userhost:/path userhost:path + let loc' = replace "rsync://" "" loc + loc'' = if ':' `elem` loc' + then loc' + else let (a, b) = break (== '/') loc' in a ++ ":" ++ b + in (pure [], loc'', AccessRsyncOverSsh) noCrypto :: Annex a noCrypto = giveup "cannot use gcrypt remote without encryption enabled" unsupportedUrl :: a -unsupportedUrl = giveup "using non-ssh remote repo url with gcrypt is not supported" +unsupportedUrl = giveup "unsupported repo url for gcrypt" gCryptSetup :: SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> Annex (RemoteConfig, UUID) gCryptSetup _ mu _ c gc = go $ fromProposedAccepted <$> M.lookup gitRepoField c @@ -256,8 +268,9 @@ gCryptSetup _ mu _ c gc = go $ fromProposedAccepted <$> M.lookup gitRepoField c else giveup $ "uuid mismatch; expected " ++ show mu ++ " but remote gitrepo has " ++ show u ++ " (" ++ show gcryptid ++ ")" {- Sets up the gcrypt repository. The repository is either a local - - repo, or it is accessed via rsync directly, or it is accessed over ssh - - and git-annex-shell is available to manage it. + - repo, or it is accessed via rsync over ssh (without using + - git-annex-shell), or it is accessed over ssh and git-annex-shell + - is available to manage it. - - The GCryptID is recorded in the repository's git config for later use. - Also, if the git config has receive.denyNonFastForwards set, disable @@ -267,11 +280,11 @@ setupRepo :: Git.GCrypt.GCryptId -> Git.Repo -> Annex AccessMethod setupRepo gcryptid r | Git.repoIsUrl r = do dummycfg <- liftIO dummyRemoteGitConfig - (_, _, accessmethod) <- rsyncTransport r dummycfg + let (_, _, accessmethod) = rsyncTransport r dummycfg case accessmethod of - AccessDirect -> rsyncsetup - AccessShell -> ifM gitannexshellsetup - ( return AccessShell + AccessRsyncOverSsh -> rsyncsetup + AccessGitAnnexShell -> ifM gitannexshellsetup + ( return AccessGitAnnexShell , rsyncsetup ) | Git.repoIsLocalUnknown r = localsetup =<< liftIO (Git.Config.read r) @@ -281,16 +294,16 @@ setupRepo gcryptid r let setconfig k v = liftIO $ Git.Command.run [Param "config", Param (fromConfigKey k), Param v] r' setconfig coreGCryptId gcryptid setconfig denyNonFastForwards (Git.Config.boolConfig False) - return AccessDirect + return AccessRsyncOverSsh {- As well as modifying the remote's git config, - create the objectDir on the remote, - - which is needed for direct rsync of objects to work. + - which is needed for rsync of objects to it to work. -} rsyncsetup = Remote.Rsync.withRsyncScratchDir $ \tmp -> do createAnnexDirectory (toRawFilePath (tmp objectDir)) dummycfg <- liftIO dummyRemoteGitConfig - (rsynctransport, rsyncurl, _) <- rsyncTransport r dummycfg + let (rsynctransport, rsyncurl, _) = rsyncTransport r dummycfg let tmpconfig = tmp "config" opts <- rsynctransport void $ liftIO $ rsync $ opts ++ @@ -307,7 +320,7 @@ setupRepo gcryptid r ] unless ok $ giveup "Failed to connect to remote to set it up." - return AccessDirect + return AccessRsyncOverSsh {- Ask git-annex-shell to configure the repository as a gcrypt - repository. May fail if it is too old. -} @@ -322,7 +335,7 @@ accessShell = accessShellConfig . gitconfig accessShellConfig :: RemoteGitConfig -> Bool accessShellConfig c = case method of - AccessShell -> True + AccessGitAnnexShell -> True _ -> False where method = toAccessMethod $ fromMaybe "" $ remoteAnnexGCrypt c @@ -363,13 +376,13 @@ setGcryptEncryption c remotename = do where remoteconfig n = n remotename -store :: Remote -> Remote.Rsync.RsyncOpts -> Storer -store r rsyncopts k s p = do +store :: Remote -> Remote.Rsync.RsyncOpts -> AccessMethod -> Storer +store r rsyncopts accessmethod k s p = do repo <- getRepo r - store' repo r rsyncopts k s p + store' repo r rsyncopts accessmethod k s p -store' :: Git.Repo -> Remote -> Remote.Rsync.RsyncOpts -> Storer -store' repo r rsyncopts +store' :: Git.Repo -> Remote -> Remote.Rsync.RsyncOpts -> AccessMethod -> Storer +store' repo r rsyncopts accessmethod | not $ Git.repoIsUrl repo = byteStorer $ \k b p -> guardUsable repo (giveup "cannot access remote") $ liftIO $ do let tmpdir = Git.repoPath repo P. "tmp" P. keyFile k @@ -386,16 +399,19 @@ store' repo r rsyncopts (AssociatedFile Nothing) unless ok $ giveup "rsync failed" - else fileStorer $ Remote.Rsync.store rsyncopts + else storersync + | accessmethod == AccessRsyncOverSsh = storersync | otherwise = unsupportedUrl + where + storersync = fileStorer $ Remote.Rsync.store rsyncopts -retrieve :: Remote -> Remote.Rsync.RsyncOpts -> Retriever -retrieve r rsyncopts k p sink = do +retrieve :: Remote -> Remote.Rsync.RsyncOpts -> AccessMethod -> Retriever +retrieve r rsyncopts accessmethod k p sink = do repo <- getRepo r - retrieve' repo r rsyncopts k p sink + retrieve' repo r rsyncopts accessmethod k p sink -retrieve' :: Git.Repo -> Remote -> Remote.Rsync.RsyncOpts -> Retriever -retrieve' repo r rsyncopts +retrieve' :: Git.Repo -> Remote -> Remote.Rsync.RsyncOpts -> AccessMethod -> Retriever +retrieve' repo r rsyncopts accessmethod | not $ Git.repoIsUrl repo = byteRetriever $ \k sink -> guardUsable repo (giveup "cannot access remote") $ sink =<< liftIO (L.readFile $ gCryptLocation repo k) @@ -406,38 +422,42 @@ retrieve' repo r rsyncopts oh <- mkOutputHandler unlessM (Ssh.rsyncHelper oh (Just p) ps) $ giveup "rsync failed" - else fileRetriever $ Remote.Rsync.retrieve rsyncopts + else retrieversync + | accessmethod == AccessRsyncOverSsh = retrieversync | otherwise = unsupportedUrl where + retrieversync = fileRetriever $ Remote.Rsync.retrieve rsyncopts -remove :: Remote -> Remote.Rsync.RsyncOpts -> Remover -remove r rsyncopts k = do +remove :: Remote -> Remote.Rsync.RsyncOpts -> AccessMethod -> Remover +remove r rsyncopts accessmethod k = do repo <- getRepo r - remove' repo r rsyncopts k + remove' repo r rsyncopts accessmethod k -remove' :: Git.Repo -> Remote -> Remote.Rsync.RsyncOpts -> Remover -remove' repo r rsyncopts k +remove' :: Git.Repo -> Remote -> Remote.Rsync.RsyncOpts -> AccessMethod -> Remover +remove' repo r rsyncopts accessmethod k | not $ Git.repoIsUrl repo = guardUsable repo (giveup "cannot access remote") $ liftIO $ Remote.Directory.removeDirGeneric (fromRawFilePath (Git.repoPath repo)) (fromRawFilePath (parentDir (toRawFilePath (gCryptLocation repo k)))) | Git.repoIsSsh repo = shellOrRsync r removeshell removersync + | accessmethod == AccessRsyncOverSsh = removersync | otherwise = unsupportedUrl where removersync = Remote.Rsync.remove rsyncopts k removeshell = Ssh.dropKey repo k -checkKey :: Remote -> Remote.Rsync.RsyncOpts -> CheckPresent -checkKey r rsyncopts k = do +checkKey :: Remote -> Remote.Rsync.RsyncOpts -> AccessMethod -> CheckPresent +checkKey r rsyncopts accessmethod k = do repo <- getRepo r - checkKey' repo r rsyncopts k + checkKey' repo r rsyncopts accessmethod k -checkKey' :: Git.Repo -> Remote -> Remote.Rsync.RsyncOpts -> CheckPresent -checkKey' repo r rsyncopts k +checkKey' :: Git.Repo -> Remote -> Remote.Rsync.RsyncOpts -> AccessMethod -> CheckPresent +checkKey' repo r rsyncopts accessmethod k | not $ Git.repoIsUrl repo = guardUsable repo (cantCheck repo) $ liftIO $ doesFileExist (gCryptLocation repo k) | Git.repoIsSsh repo = shellOrRsync r checkshell checkrsync + | accessmethod == AccessRsyncOverSsh = checkrsync | otherwise = unsupportedUrl where checkrsync = Remote.Rsync.checkKey repo rsyncopts k @@ -449,15 +469,16 @@ gCryptLocation :: Git.Repo -> Key -> FilePath gCryptLocation repo key = Git.repoLocation repo objectDir fromRawFilePath (keyPath key (hashDirLower def)) -data AccessMethod = AccessDirect | AccessShell +data AccessMethod = AccessRsyncOverSsh | AccessGitAnnexShell + deriving (Eq) fromAccessMethod :: AccessMethod -> String -fromAccessMethod AccessShell = "shell" -fromAccessMethod AccessDirect = "true" +fromAccessMethod AccessGitAnnexShell = "shell" +fromAccessMethod AccessRsyncOverSsh = "true" toAccessMethod :: String -> AccessMethod -toAccessMethod "shell" = AccessShell -toAccessMethod _ = AccessDirect +toAccessMethod "shell" = AccessGitAnnexShell +toAccessMethod _ = AccessRsyncOverSsh getGCryptUUID :: Bool -> Git.Repo -> Annex (Maybe UUID) getGCryptUUID fast r = do @@ -491,7 +512,7 @@ getGCryptId fast r gc getConfigViaRsync :: Git.Repo -> RemoteGitConfig -> Annex (Either SomeException (Git.Repo, S.ByteString, String)) getConfigViaRsync r gc = do - (rsynctransport, rsyncurl, _) <- rsyncTransport r gc + let (rsynctransport, rsyncurl, _) = rsyncTransport r gc opts <- rsynctransport liftIO $ do withTmpFile "tmpconfig" $ \tmpconfig _ -> do diff --git a/doc/bugs/gcrypt_remote__58___every_sync_uploads_huge_manifest.mdwn b/doc/bugs/gcrypt_remote__58___every_sync_uploads_huge_manifest.mdwn index 4b84a4bba3..ccbc2dabf6 100644 --- a/doc/bugs/gcrypt_remote__58___every_sync_uploads_huge_manifest.mdwn +++ b/doc/bugs/gcrypt_remote__58___every_sync_uploads_huge_manifest.mdwn @@ -28,3 +28,5 @@ is running. The upload of the actual changeset starts after this, the processes git-annex is great and revolutionized my file organization and backup structure (if they were even existing before) [[!meta title="gcrypt special remotes should support rsync:// and perhaps also sftp://"]] + +> [[done]] --[[Joey]] diff --git a/doc/bugs/gcrypt_remote__58___every_sync_uploads_huge_manifest/comment_10_83ea673f86450a856889b8fbb894bb4b._comment b/doc/bugs/gcrypt_remote__58___every_sync_uploads_huge_manifest/comment_10_83ea673f86450a856889b8fbb894bb4b._comment new file mode 100644 index 0000000000..f439e13d75 --- /dev/null +++ b/doc/bugs/gcrypt_remote__58___every_sync_uploads_huge_manifest/comment_10_83ea673f86450a856889b8fbb894bb4b._comment @@ -0,0 +1,24 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 10""" + date="2021-03-09T18:10:38Z" + content=""" +Actually, it supports rsync urls already. But git-annex treats the url as +literally the rsync protocol, so it will run rsync with an url like +rsync://localhost/foo. Which will mostly fail unless there's actually a +rsync server running. +git-remote-gcrypt treats the same url as rsync over ssh. + +See [[!commit 3e079cdcd1ac4f52ceb1c3ad483917d7c6133d0b]] for when that was +added in 2013. Although I'm doubtful if it actually worked; it seems like +initremote supported it, but it was never implemented the rest of +the way. + +If anyone has been using that, it seems git-annex and git-remote-gcrypt +would be accessing the server in different ways, so the same server must +support rsync over ssh too, or git pushes to the remote would fail. So, it +seems all that's needed is for git-annex to treat that url as rsync over +ssh, and finish the incomplete implementation of rsync urls. + +Ok, done! +"""]] diff --git a/doc/special_remotes/gcrypt.mdwn b/doc/special_remotes/gcrypt.mdwn index 787b3e1ca1..91e0ac60e8 100644 --- a/doc/special_remotes/gcrypt.mdwn +++ b/doc/special_remotes/gcrypt.mdwn @@ -26,10 +26,21 @@ gcrypt: repository itself. May be repeated when multiple participants should have access to the repository. -* `gitrepo` - Required. The path or url to the git repository +* `gitrepo` - Required. The location of the git repository for gcrypt to use. This repository should be either an unpopulated bare git repo, or an existing gcrypt repository. + To use a local git repository, use: `gitrepo=/path/to/repo` + + For a git repository accessed using rsync over ssh, use: + `gitrepo=rsync://user@host/path/to/repo` + + For a git repository accessed over ssh, and using git-annex-shell + to transfer data, use: + `gitrepo=ssh://user@host/path/to/repo` or `host:path` + Note that each `git push` has to re-send the whole content of the git + repository when using this option. + * `chunk` - Enables [[chunking]] when storing large files. * `shellescape` - See [[rsync]] for the details of this option. @@ -37,9 +48,7 @@ gcrypt: ## notes For git-annex to store files in a repository on a remote server, you need -shell access, and `rsync` must be installed. Those are the minimum -requirements, but it's also recommended to install git-annex on the remote -server, so that [[git-annex-shell]] can be used. +shell access, and it needs to be able to run `rsync` or `git-annex-shell`. If you can't run `rsync` or `git-annex-shell` on the remote server, you can't use this special remote. Other options are the [[git-lfs]] diff --git a/doc/tips/fully_encrypted_git_repositories_with_gcrypt.mdwn b/doc/tips/fully_encrypted_git_repositories_with_gcrypt.mdwn index 1847a6fb8c..a91cd27f48 100644 --- a/doc/tips/fully_encrypted_git_repositories_with_gcrypt.mdwn +++ b/doc/tips/fully_encrypted_git_repositories_with_gcrypt.mdwn @@ -66,9 +66,9 @@ the gpg key used to encrypt it, and then: ## encrypted git-annex repository on a ssh server -If you have a ssh server that has git-annex or rsync installed on it, you -can set up an encrypted repository there. Works just like the encrypted -drive except without the cable. +If you have a server that has ssh and rsync installed on it, you can set up an +encrypted repository there. Works just like the encrypted drive except +without the cable. First, on the server, run: @@ -76,14 +76,14 @@ First, on the server, run: Now, in your existing git-annex repository, set up the encrypted remote: - git annex initremote encryptedrepo type=gcrypt gitrepo=ssh://my.server/home/me/encryptedrepo keyid=$mykey + git annex initremote encryptedrepo type=gcrypt gitrepo=rsync://my.server/home/me/encryptedrepo keyid=$mykey git annex sync encryptedrepo (Remember to replace "$mykey" with the keyid of your gpg key.) This uses the [[gcrypt special remote|special_remotes/gcrypt]] to encrypt pushes to the git remote, and git-annex will also encrypt the files it -stores there. +stores there. Data is transferred using rsync over ssh. If you're going to be sharing this repository with others, be sure to also include their keyids, by specifying keyid= repeatedly. @@ -98,9 +98,9 @@ the git repository changes to it as well. Anyone who has access to the repo it and has one of the keys used to encrypt it can check it out: - git clone gcrypt::ssh://my.server/home/me/encryptedrepo myrepo + git clone gcrypt::rsync://my.server/home/me/encryptedrepo myrepo cd myrepo - git annex enableremote encryptedrepo gitrepo=ssh://my.server/home/me/encryptedrepo + git annex enableremote encryptedrepo gitrepo=rsync://my.server/home/me/encryptedrepo git annex get --from encryptedrepo ## private encrypted git remote on a git-lfs hosting site