diff --git a/CHANGELOG b/CHANGELOG index 2781a67ba8..2aa6630d00 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,10 @@ git-annex (7.20190731) UNRELEASED; urgency=medium + * New git-lfs special remote, which can be used to store data on any git-lfs + server, including github, gitlab, and gogs. + * Support fully encrypting all data sent to a git-lfs special remote, + using a combination of gcrypt to encrypt the git data, and git-annex's + encryption of its data. * Use the same optimisation for --in=here as has always been used for --in=. rather than the slow code path that unncessarily queries the git-annex branch. diff --git a/Remote/GCrypt.hs b/Remote/GCrypt.hs index 83ab5c3b8a..931a1491f3 100644 --- a/Remote/GCrypt.hs +++ b/Remote/GCrypt.hs @@ -12,6 +12,7 @@ module Remote.GCrypt ( coreGCryptId, setupRepo, accessShellConfig, + setGcryptEncryption, ) where import qualified Data.Map as M diff --git a/Remote/Git.hs b/Remote/Git.hs index 6e9af5dd5f..e7ed224047 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -144,8 +144,10 @@ configRead autoinit r = do gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) gen r u c gc - | Git.GCrypt.isEncrypted r = Remote.GCrypt.chainGen r u c gc + -- Remote.GitLFS may be used with a repo that is also encrypted + -- with gcrypt so is checked first. | remoteAnnexGitLFS gc = Remote.GitLFS.gen r u c gc + | Git.GCrypt.isEncrypted r = Remote.GCrypt.chainGen r u c gc | otherwise = case repoP2PAddress r of Nothing -> do st <- mkState r u gc diff --git a/Remote/GitLFS.hs b/Remote/GitLFS.hs index 60b3567616..4765d2fddb 100644 --- a/Remote/GitLFS.hs +++ b/Remote/GitLFS.hs @@ -16,12 +16,14 @@ import qualified Annex import qualified Git import qualified Git.Types as Git import qualified Git.Url +import qualified Git.GCrypt import Config import Config.Cost import Remote.Helper.Special import Remote.Helper.ExportImport import Remote.Helper.Git import Remote.Helper.Http +import qualified Remote.GCrypt import Annex.Ssh import Annex.UUID import Crypto @@ -55,7 +57,14 @@ remote = RemoteType gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) gen r u c gc = do - h <- liftIO $ newTVarIO $ LFSHandle Nothing Nothing r gc + -- If the repo uses gcrypt, get the underlaying repo without the + -- gcrypt url, to do LFS endpoint discovery on. + r' <- if Git.GCrypt.isEncrypted r + then do + g <- Annex.gitRepo + liftIO $ Git.GCrypt.encryptedRemote g r + else pure r + h <- liftIO $ newTVarIO $ LFSHandle Nothing Nothing r' gc cst <- remoteCost gc expensiveRemoteCost return $ Just $ specialRemote' specialcfg c (simplyPrepare $ store u h) @@ -107,36 +116,45 @@ mySetup :: SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteG mySetup _ mu _ c gc = do u <- maybe (liftIO genUUID) return mu - let repo = fromMaybe (giveup "Specify url=") $ - M.lookup "url" c - - when (isEncrypted c) $ - unlessM (Annex.getState Annex.force) $ + (c', _encsetup) <- encryptionSetup c gc + case (isEncrypted c', Git.GCrypt.urlPrefix `isPrefixOf` url) of + (False, False) -> noop + (True, True) -> Remote.GCrypt.setGcryptEncryption c' remotename + (True, False) -> unlessM (Annex.getState Annex.force) $ giveup $ unwords $ - [ "You asked that encryption be enabled for" - , "this remote, but only the files that" - , "git-annex stores on it would be encrypted;" + [ "Encryption is enabled for this remote," + , "but only the files that git-annex stores on" + , "it would be encrypted; " , "anything that git push sends to it would" - , "not be encrypted. Even encryption=shared" - , "encryption keys will be stored on the" - , "remote for anyone who can access it to" - , "see." + , "not be encrypted. Recommend prefixing the" + , "url with \"gcrypt::\" to also encrypt" + , "git pushes." + , "(Use --force if you want to use this" + , "likely insecure configuration.)" + ] + (False, True) -> unlessM (Annex.getState Annex.force) $ + giveup $ unwords $ + [ "You used a \"gcrypt::\" url for this remote," + , "but encryption=none prevents git-annex" + , "from encrypting files it stores there." , "(Use --force if you want to use this" , "likely insecure configuration.)" ] - (c', _encsetup) <- encryptionSetup c gc - -- The repo is not stored in the remote log, because the same + -- The url is not stored in the remote log, because the same -- git-lfs repo can be accessed using different urls by different -- people (eg over ssh or http). -- -- Instead, set up remote.name.url to point to the repo, -- (so it's also usable by git as a non-special remote), -- and set remote.name.git-lfs = true - let c'' = M.delete "repo" c' + let c'' = M.delete "url" c' gitConfigSpecialRemote u c'' [("git-lfs", "true")] - setConfig (ConfigKey ("remote." ++ getRemoteName c ++ ".url")) repo + setConfig (ConfigKey ("remote." ++ getRemoteName c ++ ".url")) url return (c'', u) + where + url = fromMaybe (giveup "Specify url=") (M.lookup "url" c) + remotename = fromJust (M.lookup "name" c) data LFSHandle = LFSHandle { downloadEndpoint :: Maybe LFS.Endpoint diff --git a/doc/special_remotes/gcrypt.mdwn b/doc/special_remotes/gcrypt.mdwn index 5807c9e5f2..2842e43303 100644 --- a/doc/special_remotes/gcrypt.mdwn +++ b/doc/special_remotes/gcrypt.mdwn @@ -4,6 +4,12 @@ remote allows git-annex to also store its files in such repositories. Naturally, git-annex encrypts the files it stores too, so everything stored on the remote is encrypted. +This special remote needs the server hosting the remote repository +to either have git-annex-shell or rsync accessible via ssh. git-annex +uses those to store its content in the remote. If the remote repository +is instead hosted on a server using git-lfs, you can use the [[git-lfs]] +special remote instead of this one; it also supports using gcrypt. + See [[tips/fully_encrypted_git_repositories_with_gcrypt]] for some examples of using gcrypt. @@ -35,11 +41,12 @@ shell access, and `rsync` must be installed. Those are the minimum requirements, but it's also recommended to install git-annex on the remote server, so that [[git-annex-shell]] can be used. -While you can use git-remote-gcrypt with servers like github, git-annex -can't store files on them. In such a case, you can just use -git-remote-gcrypt directly. +If you can't run `rsync` or `git-annex-shell` on the remote server, +you can't use this special remote. Other options are the [[git-lfs]] +special remote, which can also be combined with gcrypt, or +using git-remote-gcrypt to encrypt a remote that git-annex cannot use. -If you use encryption=hybrid, you can add more gpg keys that can access +If you use encryption=hybrid, you can later add more gpg keys that can access the files git-annex stored in the gcrypt repository. However, due to the way git-remote-gcrypt encrypts the git repository, you will need to somehow force it to re-push everything again, so that the encrypted repository can diff --git a/doc/special_remotes/git-lfs.mdwn b/doc/special_remotes/git-lfs.mdwn index ffb0e7411a..e48a76cf4f 100644 --- a/doc/special_remotes/git-lfs.mdwn +++ b/doc/special_remotes/git-lfs.mdwn @@ -23,8 +23,8 @@ the git-lfs special remote: * `keyid` - Specifies the gpg key to use for encryption of both the files git-annex stores in the repository, as well as to encrypt the git - repository itself. May be repeated when multiple participants - should have access to the repository. + repository itself when using gcrypt. May be repeated when + multiple participants should have access to the repository. ## efficiency note @@ -41,15 +41,43 @@ store its SHA256 checksum in the git-annex branch. ## encryption notes -The encryption= parameter only makes git-annex encrypt data it stores -on the remote. `git push` can also be used with the remote -(it is a git repository after all), and data pushed to it with -git will *not* be encrypted. +To encrypt a git-lfs repository, there are two separate things that +have to be encrypted: the data git-annex stores there, and the content +of the git repository itself. After all, a git-lfs remote is a git remote +and git push doesn't encrypt data by default. -Using encryption=shared with a git-lfs special remote is especially -unlikely to be secure, because the encryption key is committed to the git -repository. It would only make sense if you never pushed it to the -remote, or trusted the remote's host to keep it secure. +To encrypt your git pushes, you can use +[git-remote-gcrypt](https://spwhitton.name/tech/code/git-remote-gcrypt/) +and prefix the repository url with "gcrypt::" + +To make git-annex encrypt the data it stores, you can use the encrption= +configuration. + +An example of combining the two: + + git annex initremote lfstest type=git-lfs url=gcrypt::git@github.com:username/somerepo.git encryption=shared + +In that example, the git-annex shared encryption key is stored in +git, but that's ok because git push will encrypt it, along with all +the other git data, using your gpg key. You could instead use +"encryption=shared keyid=" to make git-annex and gcrypt both encrypt +to a specified gpg key. + +git-annex will detect if one part of the repository is encrypted, +but you forgot to encrypt the other part, and will refuse to set up +such an insecure half-encrypted repository. + +If you use encryption=hybrid, you can later add more gpg keys that can access +the files git-annex stored in the git-lfs repository. However, due to the +way git-remote-gcrypt encrypts the git repository, you will need to somehow +force it to re-push everything again, so that the encrypted repository can +be decrypted by the added keys. Probably this can be done by setting +`GCRYPT_FULL_REPACK` and doing a forced push of branches. + +git-annex will set `remote.`gcrypt-publish-participants` when setting +up a repository that uses gcrypt. This is done to avoid unncessary gpg +passphrase prompts, but it does publish the gpg keyids that can decrypt the +repository. Unset it if you need to obscure that. ## limitations diff --git a/doc/tips/fully_encrypted_git_repositories_with_gcrypt.mdwn b/doc/tips/fully_encrypted_git_repositories_with_gcrypt.mdwn index 2df15f193f..1847a6fb8c 100644 --- a/doc/tips/fully_encrypted_git_repositories_with_gcrypt.mdwn +++ b/doc/tips/fully_encrypted_git_repositories_with_gcrypt.mdwn @@ -1,8 +1,7 @@ [git-remote-gcrypt](https://spwhitton.name/tech/code/git-remote-gcrypt/) -adds support for encrypted remotes to git. The git-annex -[[gcrypt special remote|special_remotes/gcrypt]] allows git-annex to -also store its files in such repositories. Naturally, git-annex encrypts -the files it stores too, so everything stored on the remote is encrypted. +adds support for encrypted remotes to git. Combine this with git-annex +encrypting the files it stores in a remote, and you can fully encrypt +all the data stored on a remote. Here are some ways you can use this awesome stuff.. @@ -15,7 +14,12 @@ repositories. ## prerequisites * Install [git-remote-gcrypt](https://spwhitton.name/tech/code/git-remote-gcrypt/) -* Install git-annex version 4.20130909 or newer. + +* Set up a gpg key. You might consider generating a special purpose key + just for this use case, since you may end up wanting to put the key + on multiple machines that you would not trust with your main gpg key. + + The examples below use "$mykey" where you should put your gpg keyid. ## encrypted backup drive @@ -24,18 +28,18 @@ both the full contents of your git repository, and all the files you instruct git-annex to store on it, and everything will be encrypted so that only you can see it. -First, you need to set up a gpg key. You might consider generating a -special purpose key just for this use case, since you may end up wanting to -put the key on multiple machines that you would not trust with your -main gpg key. - -You need to tell git-annex the keyid of the key when setting up the -encrypted repository: +Here's how to set up the encrypted repository: git init --bare /mnt/encryptedbackup git annex initremote encryptedbackup type=gcrypt gitrepo=/mnt/encryptedbackup keyid=$mykey git annex sync encryptedbackup +(Remember to replace "$mykey" with the keyid of your gpg key.) + +This uses the [[gcrypt special remote|special_remotes/gcrypt]] to encrypt +pushes to the git remote, and git-annex will also encrypt the files it +stores there. + Now you can copy (or even move) files to the repository. After sending files to it, you'll probably want to do a sync, which pushes the git repository changes to it as well. @@ -62,23 +66,25 @@ the gpg key used to encrypt it, and then: ## encrypted git-annex repository on a ssh server -If you have a ssh server that has rsync installed, you can set up an -encrypted repository there. Works just like the encrypted drive except -without the cable. +If you have a ssh server that has git-annex or rsync installed on it, you +can set up an encrypted repository there. Works just like the encrypted +drive except without the cable. First, on the server, run: git init --bare encryptedrepo -(Also, install git-annex on the server if it's possible & easy to do so. -While this will work without git-annex being installed on the server, it -is recommended to have it installed.) - Now, in your existing git-annex repository, set up the encrypted remote: git annex initremote encryptedrepo type=gcrypt gitrepo=ssh://my.server/home/me/encryptedrepo keyid=$mykey git annex sync encryptedrepo +(Remember to replace "$mykey" with the keyid of your gpg key.) + +This uses the [[gcrypt special remote|special_remotes/gcrypt]] to encrypt +pushes to the git remote, and git-annex will also encrypt the files it +stores there. + If you're going to be sharing this repository with others, be sure to also include their keyids, by specifying keyid= repeatedly. @@ -97,11 +103,31 @@ used to encrypt it can check it out: git annex enableremote encryptedrepo gitrepo=ssh://my.server/home/me/encryptedrepo git annex get --from encryptedrepo -## private encrypted git remote on hosting site +## private encrypted git remote on a git-lfs hosting site + +Some git repository hosting sites do not support git-annex, but do support +the similar git-lfs for storing large files alongside a git repository. +git-annex can use the git-lfs protocol to store files in such repositories, +and with gcrypt, everything stored in the remote can be encrypted. + +First, make a new, empty git repository on the hosting site. +Get the ssh clone url for the repository, which might look +like "git@github.com:username/somerepo.git" + +Then, in your git-annex repository, set up the encrypted remote: + + git annex initremote lfstest type=git-lfs url=gcrypt::git@github.com:username/somerepo.git keyid=$mykey + +(Remember to replace "$mykey" with the keyid of your gpg key.) + +This uses the [[git-lfs special remote|special_remotes/git-lfs]], and the +`gcrypt::` prefix on the url makes pushes be encrypted with gcrypt. + +## private encrypted git remote on a git hosting site You can use gcrypt to store your git repository in encrypted form on any -hosting site that supports git. Only you can decrypt its contents. -Using it this way, git-annex does not store large files on the hosting site; it's +hosting site that supports git. Only you can decrypt its contents. Using it +this way, git-annex does not store large files on the hosting site; it's only used to store your git repository itself. git remote add encrypted gcrypt::ssh://hostingsite/myrepo.git @@ -115,7 +141,7 @@ url you used when setting it up: git clone gcrypt::ssh://hostingsite/myrepo.git -## multiuser encrypted git remote on hosting site +## multiuser encrypted git remote on a git hosting site Suppose two users want to share an encrypted git remote. Both of you need to set up the remote, and configure gcrypt to encrypt it so that both diff --git a/doc/tips/storing_data_in_git-lfs.mdwn b/doc/tips/storing_data_in_git-lfs.mdwn index 83945059b3..38779cfc64 100644 --- a/doc/tips/storing_data_in_git-lfs.mdwn +++ b/doc/tips/storing_data_in_git-lfs.mdwn @@ -8,22 +8,12 @@ Here's how to initialize a git-lfs special remote on Github. git annex initremote lfs type=git-lfs encryption=none url=git@github.com:yourname/yourrepo.git -If you want git-annex to encrypt the objects it stores in the remote, -change the encryption= parameter. But be sure to read the -[[git-lfs special remote|special_remotes/git-lfs]] page's -**encryption notes** first! +In this example, the remote will not be encrypted, so anyone who can access +it can see its contents. It is possible to encrypt everything stored in a +git-lfs remote, see [[fully_encrypted_git_repositories_with_gcrypt]]. -To enable the same remote in another clone of the repository, -you'll need to provide an url to it again. It's ok to provide a different -url as long as it points to the same git-lfs repository. - - git annex enableremote lfs url=https://github.com/yourname/yourrepo.git - -Note that http urls currently only allow read access to the git-lfs -repository. - -Once the remote is set up, you git-annex can store and retrieve content in -the usual ways: +Once the git-lfs remote is set up, git-annex can store and retrieve +content in the usual ways: git annex copy * --to lfs git annex get --from lfs @@ -33,3 +23,12 @@ because the protocol does not support deletion. A git-lfs special remote also functions as a regular git remote. You can use things like `git push` and `git pull` with it. + +To enable an existing git-lgs remote in another clone of the repository, +you'll need to provide an url to it again. It's ok to provide a different +url as long as it points to the same git-lfs repository. + + git annex enableremote lfs url=https://github.com/yourname/yourrepo.git + +Note that http urls currently only allow read access to the git-lfs +repository.