support using gcrypt with git-lfs special remote

This commit is contained in:
Joey Hess 2019-08-05 13:24:21 -04:00
parent 8401b09e32
commit fb7d92457f
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
8 changed files with 156 additions and 70 deletions

View file

@ -1,5 +1,10 @@
git-annex (7.20190731) UNRELEASED; urgency=medium git-annex (7.20190731) UNRELEASED; urgency=medium
* New git-lfs special remote, which can be used to store data on any git-lfs
server, including github, gitlab, and gogs.
* Support fully encrypting all data sent to a git-lfs special remote,
using a combination of gcrypt to encrypt the git data, and git-annex's
encryption of its data.
* Use the same optimisation for --in=here as has always been * Use the same optimisation for --in=here as has always been
used for --in=. rather than the slow code path that unncessarily used for --in=. rather than the slow code path that unncessarily
queries the git-annex branch. queries the git-annex branch.

View file

@ -12,6 +12,7 @@ module Remote.GCrypt (
coreGCryptId, coreGCryptId,
setupRepo, setupRepo,
accessShellConfig, accessShellConfig,
setGcryptEncryption,
) where ) where
import qualified Data.Map as M import qualified Data.Map as M

View file

@ -144,8 +144,10 @@ configRead autoinit r = do
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
gen r u c gc gen r u c gc
| Git.GCrypt.isEncrypted r = Remote.GCrypt.chainGen r u c gc -- Remote.GitLFS may be used with a repo that is also encrypted
-- with gcrypt so is checked first.
| remoteAnnexGitLFS gc = Remote.GitLFS.gen r u c gc | remoteAnnexGitLFS gc = Remote.GitLFS.gen r u c gc
| Git.GCrypt.isEncrypted r = Remote.GCrypt.chainGen r u c gc
| otherwise = case repoP2PAddress r of | otherwise = case repoP2PAddress r of
Nothing -> do Nothing -> do
st <- mkState r u gc st <- mkState r u gc

View file

@ -16,12 +16,14 @@ import qualified Annex
import qualified Git import qualified Git
import qualified Git.Types as Git import qualified Git.Types as Git
import qualified Git.Url import qualified Git.Url
import qualified Git.GCrypt
import Config import Config
import Config.Cost import Config.Cost
import Remote.Helper.Special import Remote.Helper.Special
import Remote.Helper.ExportImport import Remote.Helper.ExportImport
import Remote.Helper.Git import Remote.Helper.Git
import Remote.Helper.Http import Remote.Helper.Http
import qualified Remote.GCrypt
import Annex.Ssh import Annex.Ssh
import Annex.UUID import Annex.UUID
import Crypto import Crypto
@ -55,7 +57,14 @@ remote = RemoteType
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
gen r u c gc = do gen r u c gc = do
h <- liftIO $ newTVarIO $ LFSHandle Nothing Nothing r gc -- If the repo uses gcrypt, get the underlaying repo without the
-- gcrypt url, to do LFS endpoint discovery on.
r' <- if Git.GCrypt.isEncrypted r
then do
g <- Annex.gitRepo
liftIO $ Git.GCrypt.encryptedRemote g r
else pure r
h <- liftIO $ newTVarIO $ LFSHandle Nothing Nothing r' gc
cst <- remoteCost gc expensiveRemoteCost cst <- remoteCost gc expensiveRemoteCost
return $ Just $ specialRemote' specialcfg c return $ Just $ specialRemote' specialcfg c
(simplyPrepare $ store u h) (simplyPrepare $ store u h)
@ -107,36 +116,45 @@ mySetup :: SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteG
mySetup _ mu _ c gc = do mySetup _ mu _ c gc = do
u <- maybe (liftIO genUUID) return mu u <- maybe (liftIO genUUID) return mu
let repo = fromMaybe (giveup "Specify url=") $ (c', _encsetup) <- encryptionSetup c gc
M.lookup "url" c case (isEncrypted c', Git.GCrypt.urlPrefix `isPrefixOf` url) of
(False, False) -> noop
when (isEncrypted c) $ (True, True) -> Remote.GCrypt.setGcryptEncryption c' remotename
unlessM (Annex.getState Annex.force) $ (True, False) -> unlessM (Annex.getState Annex.force) $
giveup $ unwords $ giveup $ unwords $
[ "You asked that encryption be enabled for" [ "Encryption is enabled for this remote,"
, "this remote, but only the files that" , "but only the files that git-annex stores on"
, "git-annex stores on it would be encrypted;" , "it would be encrypted; "
, "anything that git push sends to it would" , "anything that git push sends to it would"
, "not be encrypted. Even encryption=shared" , "not be encrypted. Recommend prefixing the"
, "encryption keys will be stored on the" , "url with \"gcrypt::\" to also encrypt"
, "remote for anyone who can access it to" , "git pushes."
, "see." , "(Use --force if you want to use this"
, "likely insecure configuration.)"
]
(False, True) -> unlessM (Annex.getState Annex.force) $
giveup $ unwords $
[ "You used a \"gcrypt::\" url for this remote,"
, "but encryption=none prevents git-annex"
, "from encrypting files it stores there."
, "(Use --force if you want to use this" , "(Use --force if you want to use this"
, "likely insecure configuration.)" , "likely insecure configuration.)"
] ]
(c', _encsetup) <- encryptionSetup c gc
-- The repo is not stored in the remote log, because the same -- The url is not stored in the remote log, because the same
-- git-lfs repo can be accessed using different urls by different -- git-lfs repo can be accessed using different urls by different
-- people (eg over ssh or http). -- people (eg over ssh or http).
-- --
-- Instead, set up remote.name.url to point to the repo, -- Instead, set up remote.name.url to point to the repo,
-- (so it's also usable by git as a non-special remote), -- (so it's also usable by git as a non-special remote),
-- and set remote.name.git-lfs = true -- and set remote.name.git-lfs = true
let c'' = M.delete "repo" c' let c'' = M.delete "url" c'
gitConfigSpecialRemote u c'' [("git-lfs", "true")] gitConfigSpecialRemote u c'' [("git-lfs", "true")]
setConfig (ConfigKey ("remote." ++ getRemoteName c ++ ".url")) repo setConfig (ConfigKey ("remote." ++ getRemoteName c ++ ".url")) url
return (c'', u) return (c'', u)
where
url = fromMaybe (giveup "Specify url=") (M.lookup "url" c)
remotename = fromJust (M.lookup "name" c)
data LFSHandle = LFSHandle data LFSHandle = LFSHandle
{ downloadEndpoint :: Maybe LFS.Endpoint { downloadEndpoint :: Maybe LFS.Endpoint

View file

@ -4,6 +4,12 @@ remote allows git-annex to also store its files in such repositories.
Naturally, git-annex encrypts the files it stores too, so everything Naturally, git-annex encrypts the files it stores too, so everything
stored on the remote is encrypted. stored on the remote is encrypted.
This special remote needs the server hosting the remote repository
to either have git-annex-shell or rsync accessible via ssh. git-annex
uses those to store its content in the remote. If the remote repository
is instead hosted on a server using git-lfs, you can use the [[git-lfs]]
special remote instead of this one; it also supports using gcrypt.
See [[tips/fully_encrypted_git_repositories_with_gcrypt]] for some examples See [[tips/fully_encrypted_git_repositories_with_gcrypt]] for some examples
of using gcrypt. of using gcrypt.
@ -35,11 +41,12 @@ shell access, and `rsync` must be installed. Those are the minimum
requirements, but it's also recommended to install git-annex on the remote requirements, but it's also recommended to install git-annex on the remote
server, so that [[git-annex-shell]] can be used. server, so that [[git-annex-shell]] can be used.
While you can use git-remote-gcrypt with servers like github, git-annex If you can't run `rsync` or `git-annex-shell` on the remote server,
can't store files on them. In such a case, you can just use you can't use this special remote. Other options are the [[git-lfs]]
git-remote-gcrypt directly. special remote, which can also be combined with gcrypt, or
using git-remote-gcrypt to encrypt a remote that git-annex cannot use.
If you use encryption=hybrid, you can add more gpg keys that can access If you use encryption=hybrid, you can later add more gpg keys that can access
the files git-annex stored in the gcrypt repository. However, due to the the files git-annex stored in the gcrypt repository. However, due to the
way git-remote-gcrypt encrypts the git repository, you will need to somehow way git-remote-gcrypt encrypts the git repository, you will need to somehow
force it to re-push everything again, so that the encrypted repository can force it to re-push everything again, so that the encrypted repository can

View file

@ -23,8 +23,8 @@ the git-lfs special remote:
* `keyid` - Specifies the gpg key to use for encryption of both the files * `keyid` - Specifies the gpg key to use for encryption of both the files
git-annex stores in the repository, as well as to encrypt the git git-annex stores in the repository, as well as to encrypt the git
repository itself. May be repeated when multiple participants repository itself when using gcrypt. May be repeated when
should have access to the repository. multiple participants should have access to the repository.
## efficiency note ## efficiency note
@ -41,15 +41,43 @@ store its SHA256 checksum in the git-annex branch.
## encryption notes ## encryption notes
The encryption= parameter only makes git-annex encrypt data it stores To encrypt a git-lfs repository, there are two separate things that
on the remote. `git push` can also be used with the remote have to be encrypted: the data git-annex stores there, and the content
(it is a git repository after all), and data pushed to it with of the git repository itself. After all, a git-lfs remote is a git remote
git will *not* be encrypted. and git push doesn't encrypt data by default.
Using encryption=shared with a git-lfs special remote is especially To encrypt your git pushes, you can use
unlikely to be secure, because the encryption key is committed to the git [git-remote-gcrypt](https://spwhitton.name/tech/code/git-remote-gcrypt/)
repository. It would only make sense if you never pushed it to the and prefix the repository url with "gcrypt::"
remote, or trusted the remote's host to keep it secure.
To make git-annex encrypt the data it stores, you can use the encrption=
configuration.
An example of combining the two:
git annex initremote lfstest type=git-lfs url=gcrypt::git@github.com:username/somerepo.git encryption=shared
In that example, the git-annex shared encryption key is stored in
git, but that's ok because git push will encrypt it, along with all
the other git data, using your gpg key. You could instead use
"encryption=shared keyid=" to make git-annex and gcrypt both encrypt
to a specified gpg key.
git-annex will detect if one part of the repository is encrypted,
but you forgot to encrypt the other part, and will refuse to set up
such an insecure half-encrypted repository.
If you use encryption=hybrid, you can later add more gpg keys that can access
the files git-annex stored in the git-lfs repository. However, due to the
way git-remote-gcrypt encrypts the git repository, you will need to somehow
force it to re-push everything again, so that the encrypted repository can
be decrypted by the added keys. Probably this can be done by setting
`GCRYPT_FULL_REPACK` and doing a forced push of branches.
git-annex will set `remote.<name>`gcrypt-publish-participants` when setting
up a repository that uses gcrypt. This is done to avoid unncessary gpg
passphrase prompts, but it does publish the gpg keyids that can decrypt the
repository. Unset it if you need to obscure that.
## limitations ## limitations

View file

@ -1,8 +1,7 @@
[git-remote-gcrypt](https://spwhitton.name/tech/code/git-remote-gcrypt/) [git-remote-gcrypt](https://spwhitton.name/tech/code/git-remote-gcrypt/)
adds support for encrypted remotes to git. The git-annex adds support for encrypted remotes to git. Combine this with git-annex
[[gcrypt special remote|special_remotes/gcrypt]] allows git-annex to encrypting the files it stores in a remote, and you can fully encrypt
also store its files in such repositories. Naturally, git-annex encrypts all the data stored on a remote.
the files it stores too, so everything stored on the remote is encrypted.
Here are some ways you can use this awesome stuff.. Here are some ways you can use this awesome stuff..
@ -15,7 +14,12 @@ repositories.
## prerequisites ## prerequisites
* Install [git-remote-gcrypt](https://spwhitton.name/tech/code/git-remote-gcrypt/) * Install [git-remote-gcrypt](https://spwhitton.name/tech/code/git-remote-gcrypt/)
* Install git-annex version 4.20130909 or newer.
* Set up a gpg key. You might consider generating a special purpose key
just for this use case, since you may end up wanting to put the key
on multiple machines that you would not trust with your main gpg key.
The examples below use "$mykey" where you should put your gpg keyid.
## encrypted backup drive ## encrypted backup drive
@ -24,18 +28,18 @@ both the full contents of your git repository, and all the files you
instruct git-annex to store on it, and everything will be encrypted so that instruct git-annex to store on it, and everything will be encrypted so that
only you can see it. only you can see it.
First, you need to set up a gpg key. You might consider generating a Here's how to set up the encrypted repository:
special purpose key just for this use case, since you may end up wanting to
put the key on multiple machines that you would not trust with your
main gpg key.
You need to tell git-annex the keyid of the key when setting up the
encrypted repository:
git init --bare /mnt/encryptedbackup git init --bare /mnt/encryptedbackup
git annex initremote encryptedbackup type=gcrypt gitrepo=/mnt/encryptedbackup keyid=$mykey git annex initremote encryptedbackup type=gcrypt gitrepo=/mnt/encryptedbackup keyid=$mykey
git annex sync encryptedbackup git annex sync encryptedbackup
(Remember to replace "$mykey" with the keyid of your gpg key.)
This uses the [[gcrypt special remote|special_remotes/gcrypt]] to encrypt
pushes to the git remote, and git-annex will also encrypt the files it
stores there.
Now you can copy (or even move) files to the repository. After Now you can copy (or even move) files to the repository. After
sending files to it, you'll probably want to do a sync, which pushes sending files to it, you'll probably want to do a sync, which pushes
the git repository changes to it as well. the git repository changes to it as well.
@ -62,23 +66,25 @@ the gpg key used to encrypt it, and then:
## encrypted git-annex repository on a ssh server ## encrypted git-annex repository on a ssh server
If you have a ssh server that has rsync installed, you can set up an If you have a ssh server that has git-annex or rsync installed on it, you
encrypted repository there. Works just like the encrypted drive except can set up an encrypted repository there. Works just like the encrypted
without the cable. drive except without the cable.
First, on the server, run: First, on the server, run:
git init --bare encryptedrepo git init --bare encryptedrepo
(Also, install git-annex on the server if it's possible & easy to do so.
While this will work without git-annex being installed on the server, it
is recommended to have it installed.)
Now, in your existing git-annex repository, set up the encrypted remote: Now, in your existing git-annex repository, set up the encrypted remote:
git annex initremote encryptedrepo type=gcrypt gitrepo=ssh://my.server/home/me/encryptedrepo keyid=$mykey git annex initremote encryptedrepo type=gcrypt gitrepo=ssh://my.server/home/me/encryptedrepo keyid=$mykey
git annex sync encryptedrepo git annex sync encryptedrepo
(Remember to replace "$mykey" with the keyid of your gpg key.)
This uses the [[gcrypt special remote|special_remotes/gcrypt]] to encrypt
pushes to the git remote, and git-annex will also encrypt the files it
stores there.
If you're going to be sharing this repository with others, be sure to also If you're going to be sharing this repository with others, be sure to also
include their keyids, by specifying keyid= repeatedly. include their keyids, by specifying keyid= repeatedly.
@ -97,11 +103,31 @@ used to encrypt it can check it out:
git annex enableremote encryptedrepo gitrepo=ssh://my.server/home/me/encryptedrepo git annex enableremote encryptedrepo gitrepo=ssh://my.server/home/me/encryptedrepo
git annex get --from encryptedrepo git annex get --from encryptedrepo
## private encrypted git remote on hosting site ## private encrypted git remote on a git-lfs hosting site
Some git repository hosting sites do not support git-annex, but do support
the similar git-lfs for storing large files alongside a git repository.
git-annex can use the git-lfs protocol to store files in such repositories,
and with gcrypt, everything stored in the remote can be encrypted.
First, make a new, empty git repository on the hosting site.
Get the ssh clone url for the repository, which might look
like "git@github.com:username/somerepo.git"
Then, in your git-annex repository, set up the encrypted remote:
git annex initremote lfstest type=git-lfs url=gcrypt::git@github.com:username/somerepo.git keyid=$mykey
(Remember to replace "$mykey" with the keyid of your gpg key.)
This uses the [[git-lfs special remote|special_remotes/git-lfs]], and the
`gcrypt::` prefix on the url makes pushes be encrypted with gcrypt.
## private encrypted git remote on a git hosting site
You can use gcrypt to store your git repository in encrypted form on any You can use gcrypt to store your git repository in encrypted form on any
hosting site that supports git. Only you can decrypt its contents. hosting site that supports git. Only you can decrypt its contents. Using it
Using it this way, git-annex does not store large files on the hosting site; it's this way, git-annex does not store large files on the hosting site; it's
only used to store your git repository itself. only used to store your git repository itself.
git remote add encrypted gcrypt::ssh://hostingsite/myrepo.git git remote add encrypted gcrypt::ssh://hostingsite/myrepo.git
@ -115,7 +141,7 @@ url you used when setting it up:
git clone gcrypt::ssh://hostingsite/myrepo.git git clone gcrypt::ssh://hostingsite/myrepo.git
## multiuser encrypted git remote on hosting site ## multiuser encrypted git remote on a git hosting site
Suppose two users want to share an encrypted git remote. Both of you Suppose two users want to share an encrypted git remote. Both of you
need to set up the remote, and configure gcrypt to encrypt it so that both need to set up the remote, and configure gcrypt to encrypt it so that both

View file

@ -8,22 +8,12 @@ Here's how to initialize a git-lfs special remote on Github.
git annex initremote lfs type=git-lfs encryption=none url=git@github.com:yourname/yourrepo.git git annex initremote lfs type=git-lfs encryption=none url=git@github.com:yourname/yourrepo.git
If you want git-annex to encrypt the objects it stores in the remote, In this example, the remote will not be encrypted, so anyone who can access
change the encryption= parameter. But be sure to read the it can see its contents. It is possible to encrypt everything stored in a
[[git-lfs special remote|special_remotes/git-lfs]] page's git-lfs remote, see [[fully_encrypted_git_repositories_with_gcrypt]].
**encryption notes** first!
To enable the same remote in another clone of the repository, Once the git-lfs remote is set up, git-annex can store and retrieve
you'll need to provide an url to it again. It's ok to provide a different content in the usual ways:
url as long as it points to the same git-lfs repository.
git annex enableremote lfs url=https://github.com/yourname/yourrepo.git
Note that http urls currently only allow read access to the git-lfs
repository.
Once the remote is set up, you git-annex can store and retrieve content in
the usual ways:
git annex copy * --to lfs git annex copy * --to lfs
git annex get --from lfs git annex get --from lfs
@ -33,3 +23,12 @@ because the protocol does not support deletion.
A git-lfs special remote also functions as a regular git remote. You can A git-lfs special remote also functions as a regular git remote. You can
use things like `git push` and `git pull` with it. use things like `git push` and `git pull` with it.
To enable an existing git-lgs remote in another clone of the repository,
you'll need to provide an url to it again. It's ok to provide a different
url as long as it points to the same git-lfs repository.
git annex enableremote lfs url=https://github.com/yourname/yourrepo.git
Note that http urls currently only allow read access to the git-lfs
repository.