Merge branch 'git-lfs'

This commit is contained in:
Joey Hess 2019-08-05 13:44:04 -04:00
commit 3e0770e800
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
19 changed files with 1217 additions and 108 deletions

View file

@ -10,6 +10,7 @@
module Backend.Hash (
backends,
testKeyBackend,
keyHash,
) where
import Annex.Common

View file

@ -1,8 +1,14 @@
git-annex (7.20190731) UNRELEASED; urgency=medium
* New git-lfs special remote, which can be used to store data on any git-lfs
server, including github, gitlab, and gogs.
* Support fully encrypting all data sent to a git-lfs special remote,
using a combination of gcrypt to encrypt the git data, and git-annex's
encryption of its data.
* Use the same optimisation for --in=here as has always been
used for --in=. rather than the slow code path that unncessarily
queries the git-annex branch.
* Allow setting up a gcrypt special remote with encryption=shared.
-- Joey Hess <id@joeyh.name> Thu, 01 Aug 2019 00:11:56 -0400

View file

@ -25,6 +25,10 @@ Copyright: 2018 Joey Hess <id@joeyh.name>
2013 Michael Snoyman
License: Expat
Files: Utility/GitLFS.hs
Copyright: © 2019 Joey Hess <id@joeyh.name>
License: AGPL-3+
Files: Utility/*
Copyright: 2012-2019 Joey Hess <id@joeyh.name>
License: BSD-2-clause

View file

@ -11,9 +11,10 @@ module Git.Url (
port,
hostuser,
authority,
path,
) where
import Network.URI hiding (scheme, authority)
import Network.URI hiding (scheme, authority, path)
import Common
import Git.Types
@ -66,6 +67,11 @@ authpart :: (URIAuth -> a) -> Repo -> Maybe a
authpart a Repo { location = Url u } = a <$> uriAuthority u
authpart _ repo = notUrl repo
{- Path part of an URL repo. -}
path :: Repo -> FilePath
path Repo { location = Url u } = uriPath u
path repo = notUrl repo
notUrl :: Repo -> a
notUrl repo = error $
"acting on local git repo " ++ repoDescribe repo ++ " not supported"

View file

@ -12,6 +12,7 @@ module Remote.GCrypt (
coreGCryptId,
setupRepo,
accessShellConfig,
setGcryptEncryption,
) where
import qualified Data.Map as M
@ -318,16 +319,18 @@ shellOrRsync r ashell arsync
setGcryptEncryption :: RemoteConfig -> String -> Annex ()
setGcryptEncryption c remotename = do
let participants = remoteconfig Git.GCrypt.remoteParticipantConfigKey
case cipherKeyIds =<< extractCipher c of
case extractCipher c of
Nothing -> noCrypto
Just (KeyIds { keyIds = ks}) -> do
setConfig participants (unwords ks)
let signingkey = ConfigKey $ Git.GCrypt.remoteSigningKey remotename
cmd <- gpgCmd <$> Annex.getGitConfig
skeys <- M.keys <$> liftIO (secretKeys cmd)
case filter (`elem` ks) skeys of
[] -> noop
(k:_) -> setConfig signingkey k
Just cip -> case cipherKeyIds cip of
Nothing -> noop
Just (KeyIds { keyIds = ks}) -> do
setConfig participants (unwords ks)
let signingkey = ConfigKey $ Git.GCrypt.remoteSigningKey remotename
cmd <- gpgCmd <$> Annex.getGitConfig
skeys <- M.keys <$> liftIO (secretKeys cmd)
case filter (`elem` ks) skeys of
[] -> noop
(k:_) -> setConfig signingkey k
setConfig (remoteconfig Git.GCrypt.remotePublishParticipantConfigKey)
(Git.Config.boolConfig True)
where

View file

@ -51,6 +51,7 @@ import Remote.Helper.Messages
import Remote.Helper.ExportImport
import qualified Remote.Helper.Ssh as Ssh
import qualified Remote.GCrypt
import qualified Remote.GitLFS
import qualified Remote.P2P
import qualified Remote.Helper.P2P as P2PHelper
import P2P.Address
@ -143,6 +144,9 @@ configRead autoinit r = do
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
gen r u c gc
-- Remote.GitLFS may be used with a repo that is also encrypted
-- with gcrypt so is checked first.
| remoteAnnexGitLFS gc = Remote.GitLFS.gen r u c gc
| Git.GCrypt.isEncrypted r = Remote.GCrypt.chainGen r u c gc
| otherwise = case repoP2PAddress r of
Nothing -> do

449
Remote/GitLFS.hs Normal file
View file

@ -0,0 +1,449 @@
{- Using git-lfs as a remote.
-
- Copyright 2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
module Remote.GitLFS (remote, gen) where
import Annex.Common
import Types.Remote
import Annex.Url
import Types.Key
import Types.Creds
import qualified Annex
import qualified Git
import qualified Git.Types as Git
import qualified Git.Url
import qualified Git.GCrypt
import Config
import Config.Cost
import Remote.Helper.Special
import Remote.Helper.ExportImport
import Remote.Helper.Git
import Remote.Helper.Http
import qualified Remote.GCrypt
import Annex.Ssh
import Annex.UUID
import Crypto
import Backend.Hash
import Utility.Hash
import Utility.SshHost
import Logs.RemoteState
import qualified Utility.GitLFS as LFS
import Control.Concurrent.STM
import Data.String
import Network.HTTP.Types
import Network.HTTP.Client hiding (port)
import System.Log.Logger
import qualified Data.Map as M
import qualified Data.ByteString.Lazy as L
import qualified Data.Text as T
import qualified Data.Text.Encoding as E
remote :: RemoteType
remote = RemoteType
{ typename = "git-lfs"
-- Remote.Git takes care of enumerating git-lfs remotes too,
-- and will call our gen on them.
, enumerate = const (return [])
, generate = gen
, setup = mySetup
, exportSupported = exportUnsupported
, importSupported = importUnsupported
}
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
gen r u c gc = do
-- If the repo uses gcrypt, get the underlaying repo without the
-- gcrypt url, to do LFS endpoint discovery on.
r' <- if Git.GCrypt.isEncrypted r
then do
g <- Annex.gitRepo
liftIO $ Git.GCrypt.encryptedRemote g r
else pure r
h <- liftIO $ newTVarIO $ LFSHandle Nothing Nothing r' gc
cst <- remoteCost gc expensiveRemoteCost
return $ Just $ specialRemote' specialcfg c
(simplyPrepare $ store u h)
(simplyPrepare $ retrieve u h)
(simplyPrepare $ remove h)
(simplyPrepare $ checkKey u h)
(this cst)
where
this cst = Remote
{ uuid = u
, cost = cst
, name = Git.repoDescribe r
, storeKey = storeKeyDummy
, retrieveKeyFile = retreiveKeyFileDummy
, retrieveKeyFileCheap = retrieveCheap
-- content stored on git-lfs is hashed with SHA256
-- no matter what git-annex key it's for, and the hash
-- is checked on download
, retrievalSecurityPolicy = RetrievalAllKeysSecure
, removeKey = removeKeyDummy
, lockContent = Nothing
, checkPresent = checkPresentDummy
, checkPresentCheap = False
, exportActions = exportUnsupported
, importActions = importUnsupported
, whereisKey = Nothing
, remoteFsck = Nothing
, repairRepo = Nothing
, config = c
, getRepo = return r
, gitconfig = gc
, localpath = Nothing
, remotetype = remote
, availability = GloballyAvailable
, readonly = False
-- content cannot be removed from a git-lfs repo
, appendonly = True
, mkUnavailable = return Nothing
, getInfo = gitRepoInfo (this cst)
, claimUrl = Nothing
, checkUrl = Nothing
}
specialcfg = (specialRemoteCfg c)
-- chunking would not improve git-lfs
{ chunkConfig = NoChunks
}
mySetup :: SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> Annex (RemoteConfig, UUID)
mySetup _ mu _ c gc = do
u <- maybe (liftIO genUUID) return mu
(c', _encsetup) <- encryptionSetup c gc
case (isEncrypted c', Git.GCrypt.urlPrefix `isPrefixOf` url) of
(False, False) -> noop
(True, True) -> Remote.GCrypt.setGcryptEncryption c' remotename
(True, False) -> unlessM (Annex.getState Annex.force) $
giveup $ unwords $
[ "Encryption is enabled for this remote,"
, "but only the files that git-annex stores on"
, "it would be encrypted; "
, "anything that git push sends to it would"
, "not be encrypted. Recommend prefixing the"
, "url with \"gcrypt::\" to also encrypt"
, "git pushes."
, "(Use --force if you want to use this"
, "likely insecure configuration.)"
]
(False, True) -> unlessM (Annex.getState Annex.force) $
giveup $ unwords $
[ "You used a \"gcrypt::\" url for this remote,"
, "but encryption=none prevents git-annex"
, "from encrypting files it stores there."
, "(Use --force if you want to use this"
, "likely insecure configuration.)"
]
-- The url is not stored in the remote log, because the same
-- git-lfs repo can be accessed using different urls by different
-- people (eg over ssh or http).
--
-- Instead, set up remote.name.url to point to the repo,
-- (so it's also usable by git as a non-special remote),
-- and set remote.name.git-lfs = true
let c'' = M.delete "url" c'
gitConfigSpecialRemote u c'' [("git-lfs", "true")]
setConfig (ConfigKey ("remote." ++ getRemoteName c ++ ".url")) url
return (c'', u)
where
url = fromMaybe (giveup "Specify url=") (M.lookup "url" c)
remotename = fromJust (M.lookup "name" c)
data LFSHandle = LFSHandle
{ downloadEndpoint :: Maybe LFS.Endpoint
, uploadEndpoint :: Maybe LFS.Endpoint
, remoteRepo :: Git.Repo
, remoteGitConfig :: RemoteGitConfig
}
discoverLFSEndpoint :: LFS.TransferRequestOperation -> LFSHandle -> Annex (Maybe LFS.Endpoint)
discoverLFSEndpoint tro h
| Git.repoIsSsh r = gossh
| Git.repoIsHttp r = gohttp
| otherwise = do
warning "git-lfs endpoint has unsupported URI scheme"
return Nothing
where
r = remoteRepo h
lfsrepouri = case Git.location r of
Git.Url u -> u
_ -> giveup $ "unsupported git-lfs remote location " ++ Git.repoLocation r
gohttp = case tro of
LFS.RequestDownload -> return $ LFS.guessEndpoint lfsrepouri
LFS.RequestUpload -> do
-- git-lfs does support storing over http,
-- but it would need prompting for http basic
-- authentication each time git-annex discovered
-- the endpoint.
warning "Storing content in git-lfs currently needs a ssh repository url, not http."
return Nothing
gossh = case mkSshHost <$> Git.Url.hostuser r of
Nothing -> do
warning "Unable to parse ssh url for git-lfs remote."
return Nothing
Just (Left err) -> do
warning err
return Nothing
Just (Right hostuser) -> do
let port = Git.Url.port r
-- Remove leading /~/ from path. That is added when
-- converting a scp-style repository location with
-- a relative path into an url, and is legal
-- according to git-clone(1), but github does not
-- support it.
let remotepath = if "/~/" `isPrefixOf` Git.Url.path r
then drop 3 (Git.Url.path r)
else Git.Url.path r
let ps = LFS.sshDiscoverEndpointCommand remotepath tro
-- Note that no shellEscape is done here, because
-- at least github's git-lfs implementation does
-- not allow for shell quoting.
let remotecmd = unwords ps
(sshcommand, sshparams) <- sshCommand NoConsumeStdin (hostuser, port) (remoteGitConfig h) remotecmd
liftIO (tryIO (readProcess sshcommand (toCommand sshparams))) >>= \case
Left err -> do
warning $ "ssh connection to git-lfs remote failed: " ++ show err
return Nothing
Right resp -> case LFS.parseSshDiscoverEndpointResponse (fromString resp) of
Nothing -> do
warning $ "unexpected response from git-lfs remote when doing ssh endpoint discovery"
return Nothing
Just endpoint -> return (Just endpoint)
-- The endpoint is cached for later use.
getLFSEndpoint :: LFS.TransferRequestOperation -> TVar LFSHandle -> Annex (Maybe LFS.Endpoint)
getLFSEndpoint tro hv = do
h <- liftIO $ atomically $ readTVar hv
case f h of
Just endpoint -> return (Just endpoint)
Nothing -> discoverLFSEndpoint tro h >>= \case
Just endpoint -> do
liftIO $ atomically $ writeTVar hv $
case tro of
LFS.RequestDownload ->
h { downloadEndpoint = Just endpoint }
LFS.RequestUpload ->
h { uploadEndpoint = Just endpoint }
return (Just endpoint)
Nothing -> return Nothing
where
f = case tro of
LFS.RequestDownload -> downloadEndpoint
LFS.RequestUpload -> uploadEndpoint
-- Make an API request that is expected to have a small response body.
-- Not for use in downloading an object.
makeSmallAPIRequest :: Request -> Annex (Response L.ByteString)
makeSmallAPIRequest req = do
uo <- getUrlOptions
let req' = applyRequest uo req
liftIO $ debugM "git-lfs" (show req')
resp <- liftIO $ httpLbs req' (httpManager uo)
-- Only debug the http status code, not the json
-- which may include an authentication token.
liftIO $ debugM "git-lfs" (show $ responseStatus resp)
return resp
sendTransferRequest
:: LFS.IsTransferResponseOperation op
=> LFS.TransferRequest
-> LFS.Endpoint
-> Annex (Either String (LFS.TransferResponse op))
sendTransferRequest req endpoint =
case LFS.startTransferRequest endpoint req of
Just httpreq -> do
httpresp <- makeSmallAPIRequest $ setRequestCheckStatus httpreq
return $ case LFS.parseTransferResponse (responseBody httpresp) of
LFS.ParsedTransferResponse resp -> Right resp
LFS.ParsedTransferResponseError tro -> Left $
T.unpack $ LFS.resperr_message tro
LFS.ParseFailed err -> Left err
Nothing -> return $ Left "unable to parse git-lfs endpoint url"
extractKeySha256 :: Key -> Maybe LFS.SHA256
extractKeySha256 k = case keyVariety k of
SHA2Key (HashSize 256) (HasExt hasext)
| hasext -> eitherToMaybe $ E.decodeUtf8' (keyHash k)
| otherwise -> eitherToMaybe $ E.decodeUtf8' (keyName k)
_ -> Nothing
-- The size of an encrypted key is the size of the input data, but we need
-- the actual object size.
extractKeySize :: Key -> Maybe Integer
extractKeySize k
| isEncKey k = Nothing
| otherwise = keySize k
mkUploadRequest :: UUID -> Key -> FilePath -> Annex (LFS.TransferRequest, LFS.SHA256, Integer)
mkUploadRequest u k content = case (extractKeySha256 k, extractKeySize k) of
(Just sha256, Just size) ->
ret sha256 size
(_, Just size) -> do
sha256 <- calcsha256
remembersha256 sha256
ret sha256 size
_ -> do
sha256 <- calcsha256
size <- liftIO $ getFileSize content
rememberboth sha256 size
ret sha256 size
where
calcsha256 = liftIO $ T.pack . show . sha2_256 <$> L.readFile content
ret sha256 size = do
let obj = LFS.TransferRequestObject
{ LFS.req_oid = sha256
, LFS.req_size = size
}
let req = LFS.TransferRequest
{ LFS.req_operation = LFS.RequestUpload
, LFS.req_transfers = [LFS.Basic]
, LFS.req_ref = Nothing
, LFS.req_objects = [obj]
}
return (req, sha256, size)
remembersha256 sha256 = setRemoteState u k (T.unpack sha256)
rememberboth sha256 size = setRemoteState u k $
show size ++ " " ++ T.unpack sha256
mkDownloadRequest :: UUID -> Key -> Annex (Maybe (LFS.TransferRequest, LFS.SHA256, Integer))
mkDownloadRequest u k = case (extractKeySha256 k, extractKeySize k) of
(Just sha256, Just size) -> ret sha256 size
(_, Just size) ->
remembersha256 >>= \case
Just sha256 -> ret sha256 size
Nothing -> return Nothing
_ -> do
rememberboth >>= \case
Just (sha256, size) -> ret sha256 size
Nothing -> return Nothing
where
ret sha256 size = do
let obj = LFS.TransferRequestObject
{ LFS.req_oid = sha256
, LFS.req_size = size
}
let req = LFS.TransferRequest
{ LFS.req_operation = LFS.RequestDownload
, LFS.req_transfers = [LFS.Basic]
, LFS.req_ref = Nothing
, LFS.req_objects = [obj]
}
return $ Just (req, sha256, size)
remembersha256 = fmap T.pack <$> getRemoteState u k
rememberboth = maybe Nothing parse <$> getRemoteState u k
where
parse s = case words s of
[ssize, ssha256] -> do
size <- readish ssize
return (T.pack ssha256, size)
_ -> Nothing
store :: UUID -> TVar LFSHandle -> Storer
store u h = fileStorer $ \k src p -> getLFSEndpoint LFS.RequestUpload h >>= \case
Nothing -> return False
Just endpoint -> flip catchNonAsync failederr $ do
(req, sha256, size) <- mkUploadRequest u k src
sendTransferRequest req endpoint >>= \case
Left err -> do
warning err
return False
Right resp -> do
body <- liftIO $ httpBodyStorer src p
forM_ (LFS.objects resp) $
send body sha256 size
return True
where
send body sha256 size tro
| LFS.resp_oid tro /= sha256 || LFS.resp_size tro /= size =
giveup "git-lfs server requested other object than the one we asked to send"
| otherwise = case LFS.resp_error tro of
Just err -> giveup $
T.unpack $ LFS.respobjerr_message err
Nothing -> case LFS.resp_actions tro of
Nothing -> noop
Just op -> case LFS.uploadOperationRequests op body sha256 size of
Nothing -> giveup "unable to parse git-lfs server upload url"
Just [] -> noop -- server already has it
Just reqs -> forM_ reqs $
makeSmallAPIRequest . setRequestCheckStatus
failederr e = do
warning (show e)
return False
retrieve :: UUID -> TVar LFSHandle -> Retriever
retrieve u h = fileRetriever $ \dest k p -> getLFSEndpoint LFS.RequestDownload h >>= \case
Nothing -> giveup "unable to connect to git-lfs endpoint"
Just endpoint -> mkDownloadRequest u k >>= \case
Nothing -> giveup "unable to download this object from git-lfs"
Just (req, sha256, size) -> sendTransferRequest req endpoint >>= \case
Left err -> giveup (show err)
Right resp -> case LFS.objects resp of
[] -> giveup "git-lfs server did not provide a way to download this object"
(tro:_)
| LFS.resp_oid tro /= sha256 || LFS.resp_size tro /= size ->
giveup "git-lfs server replied with other object than the one we requested"
| otherwise -> go dest p tro
where
go dest p tro = case LFS.resp_error tro of
Just err -> giveup $ T.unpack $ LFS.respobjerr_message err
Nothing -> case LFS.resp_actions tro of
Nothing -> giveup "git-lfs server did not provide a way to download this object"
Just op -> case LFS.downloadOperationRequest op of
Nothing -> giveup "unable to parse git-lfs server download url"
Just req -> do
uo <- getUrlOptions
liftIO $ downloadConduit p req dest uo
checkKey :: UUID -> TVar LFSHandle -> CheckPresent
checkKey u h key = getLFSEndpoint LFS.RequestDownload h >>= \case
Nothing -> giveup "unable to connect to git-lfs endpoint"
Just endpoint -> mkDownloadRequest u key >>= \case
-- Unable to find enough information to request the key
-- from git-lfs, so it's not present there.
Nothing -> return False
Just (req, sha256, size) -> case LFS.startTransferRequest endpoint req of
Nothing -> giveup "unable to parse git-lfs endpoint url"
Just httpreq -> go sha256 size =<< makeSmallAPIRequest httpreq
where
go sha256 size httpresp
| responseStatus httpresp == status200 = go' sha256 size $
LFS.parseTransferResponse (responseBody httpresp)
| otherwise = giveup $
"git-lfs server refused request: " ++ show (responseStatus httpresp)
go' :: LFS.SHA256 -> Integer -> LFS.ParsedTransferResponse LFS.DownloadOperation -> Annex Bool
go' _ _ (LFS.ParseFailed err) =
giveup $ "unable to parse response from git-lfs server: " ++ err
-- If the server responds with a json error message,
-- the content is presumably not present.
go' _ _ (LFS.ParsedTransferResponseError _) = return False
-- If the server responds with at least one download operation,
-- we will assume the content is present. We could also try to HEAD
-- that download, but there's no guarantee HEAD is supported, and
-- at most that would detect breakage where the server is confused
-- about what objects it has.
go' sha256 size (LFS.ParsedTransferResponse resp) =
case LFS.objects resp of
[] -> return False
(tro:_)
| isNothing (LFS.resp_actions tro) -> return False
| isJust (LFS.resp_error tro) -> return False
| LFS.resp_oid tro /= sha256 || LFS.resp_size tro /= size ->
giveup "git-lfs server replied with other object than the one we requested"
| otherwise -> return True
retrieveCheap :: Key -> AssociatedFile -> FilePath -> Annex Bool
retrieveCheap _ _ _ = return False
remove :: TVar LFSHandle -> Remover
remove _h _key = do
warning "git-lfs does not support removing content"
return False

View file

@ -40,6 +40,7 @@ import qualified Remote.Adb
import qualified Remote.Tahoe
import qualified Remote.Glacier
import qualified Remote.Ddar
import qualified Remote.GitLFS
import qualified Remote.Hook
import qualified Remote.External
@ -63,6 +64,7 @@ remoteTypes = map adjustExportImportRemoteType
, Remote.Tahoe.remote
, Remote.Glacier.remote
, Remote.Ddar.remote
, Remote.GitLFS.remote
, Remote.Hook.remote
, Remote.External.remote
]

View file

@ -263,6 +263,7 @@ data RemoteGitConfig = RemoteGitConfig
, remoteAnnexAndroidDirectory :: Maybe FilePath
, remoteAnnexAndroidSerial :: Maybe String
, remoteAnnexGCrypt :: Maybe String
, remoteAnnexGitLFS :: Bool
, remoteAnnexDdarRepo :: Maybe String
, remoteAnnexHookType :: Maybe String
, remoteAnnexExternalType :: Maybe String
@ -321,6 +322,7 @@ extractRemoteGitConfig r remotename = do
, remoteAnnexAndroidDirectory = notempty $ getmaybe "androiddirectory"
, remoteAnnexAndroidSerial = notempty $ getmaybe "androidserial"
, remoteAnnexGCrypt = notempty $ getmaybe "gcrypt"
, remoteAnnexGitLFS = getbool "git-lfs" False
, remoteAnnexDdarRepo = getmaybe "ddarrepo"
, remoteAnnexHookType = notempty $ getmaybe "hooktype"
, remoteAnnexExternalType = notempty $ getmaybe "externaltype"

438
Utility/GitLFS.hs Normal file
View file

@ -0,0 +1,438 @@
{- git-lfs API
-
- https://github.com/git-lfs/git-lfs/blob/master/docs/api
-
- Copyright 2019 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE DeriveGeneric, FlexibleInstances, FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE LambdaCase #-}
module Utility.GitLFS (
-- * transfer requests
TransferRequest(..),
TransferRequestOperation(..),
TransferAdapter(..),
TransferRequestObject(..),
startTransferRequest,
-- * responses to transfer requests
TransferResponse(..),
TransferResponseOperation(..),
IsTransferResponseOperation,
DownloadOperation,
UploadOperation,
ParsedTransferResponse(..),
parseTransferResponse,
-- * making transfers
downloadOperationRequest,
uploadOperationRequests,
-- * endpoint discovery
Endpoint,
guessEndpoint,
HostUser,
sshDiscoverEndpointCommand,
parseSshDiscoverEndpointResponse,
-- * errors
TransferResponseError(..),
TransferResponseObjectError(..),
-- * additional data types
Url,
SHA256,
GitRef(..),
NumSeconds,
HTTPHeader,
HTTPHeaderValue,
) where
-- | This implementation of the git-lfs API uses http Request and Response,
-- but leaves actually connecting up the http client to the user.
--
-- You'll want to use a Manager that supports https, since the protocol
-- uses http basic auth.
--
-- Some LFS servers, notably Github's, may require a User-Agent header
-- in some of the requests, in order to allow eg, uploads. No such header
-- is added by dedault, so be sure to add your own.
import Data.Aeson
import Data.Aeson.Types
import GHC.Generics
import Network.HTTP.Client
import Data.List
import qualified Data.Map as M
import qualified Data.Text as T
import qualified Data.Text.Encoding as E
import qualified Data.ByteString.Lazy as L
import qualified Data.CaseInsensitive as CI
import qualified Network.URI as URI
data TransferRequest = TransferRequest
{ req_operation :: TransferRequestOperation
, req_transfers :: [TransferAdapter]
, req_ref :: Maybe GitRef
, req_objects :: [TransferRequestObject]
}
deriving (Generic, Show)
instance ToJSON TransferRequest where
toJSON = genericToJSON transferRequestOptions
toEncoding = genericToEncoding transferRequestOptions
instance FromJSON TransferRequest where
parseJSON = genericParseJSON transferRequestOptions
transferRequestOptions :: Options
transferRequestOptions = stripFieldPrefix nonNullOptions
data TransferRequestObject = TransferRequestObject
{ req_oid :: SHA256
, req_size :: Integer
}
deriving (Generic, Show)
instance ToJSON TransferRequestObject where
toJSON = genericToJSON transferRequestObjectOptions
toEncoding = genericToEncoding transferRequestObjectOptions
instance FromJSON TransferRequestObject where
parseJSON = genericParseJSON transferRequestObjectOptions
transferRequestObjectOptions :: Options
transferRequestObjectOptions = stripFieldPrefix defaultOptions
data TransferRequestOperation = RequestDownload | RequestUpload
deriving (Show)
instance ToJSON TransferRequestOperation where
toJSON RequestDownload = "download"
toJSON RequestUpload = "upload"
instance FromJSON TransferRequestOperation where
parseJSON (String "download") = pure RequestDownload
parseJSON (String "upload") = pure RequestUpload
parseJSON invalid = typeMismatch "TransferRequestOperation" invalid
data TransferResponse op = TransferResponse
{ transfer :: Maybe TransferAdapter
, objects :: [TransferResponseOperation op]
}
deriving (Generic, Show)
instance IsTransferResponseOperation op => ToJSON (TransferResponse op) where
toJSON = genericToJSON nonNullOptions
toEncoding = genericToEncoding nonNullOptions
instance IsTransferResponseOperation op => FromJSON (TransferResponse op)
-- | This is an error with a TransferRequest as a whole. It's also possible
-- for a TransferRequest to overall succeed, but fail for some
-- objects; such failures use TransferResponseObjectError.
data TransferResponseError = TransferResponseError
{ resperr_message :: T.Text
, resperr_request_id :: Maybe T.Text
, resperr_documentation_url :: Maybe Url
}
deriving (Generic, Show)
instance ToJSON TransferResponseError where
toJSON = genericToJSON transferResponseErrorOptions
toEncoding = genericToEncoding transferResponseErrorOptions
instance FromJSON TransferResponseError where
parseJSON = genericParseJSON transferResponseErrorOptions
transferResponseErrorOptions :: Options
transferResponseErrorOptions = stripFieldPrefix nonNullOptions
-- | An error with a single object within a TransferRequest.
data TransferResponseObjectError = TransferResponseObjectError
{ respobjerr_code :: Int
, respobjerr_message :: T.Text
}
deriving (Generic, Show)
instance ToJSON TransferResponseObjectError where
toJSON = genericToJSON transferResponseObjectErrorOptions
toEncoding = genericToEncoding transferResponseObjectErrorOptions
instance FromJSON TransferResponseObjectError where
parseJSON = genericParseJSON transferResponseObjectErrorOptions
transferResponseObjectErrorOptions :: Options
transferResponseObjectErrorOptions = stripFieldPrefix nonNullOptions
data TransferAdapter = Basic
deriving (Show)
instance ToJSON TransferAdapter where
toJSON Basic = "basic"
instance FromJSON TransferAdapter where
parseJSON (String "basic") = pure Basic
parseJSON invalid = typeMismatch "basic" invalid
data TransferResponseOperation op = TransferResponseOperation
{ resp_oid :: SHA256
, resp_size :: Integer
, resp_authenticated :: Maybe Bool
, resp_actions :: Maybe op
, resp_error :: Maybe TransferResponseObjectError
}
deriving (Generic, Show)
instance ToJSON op => ToJSON (TransferResponseOperation op) where
toJSON = genericToJSON transferResponseOperationOptions
toEncoding = genericToEncoding transferResponseOperationOptions
instance FromJSON op => FromJSON (TransferResponseOperation op) where
parseJSON = genericParseJSON transferResponseOperationOptions
transferResponseOperationOptions :: Options
transferResponseOperationOptions = stripFieldPrefix nonNullOptions
-- | Class of types that can be responses to a transfer request,
-- that contain an operation to use to make the transfer.
class (FromJSON op, ToJSON op) => IsTransferResponseOperation op
data DownloadOperation = DownloadOperation
{ download :: OperationParams }
deriving (Generic, Show)
instance IsTransferResponseOperation DownloadOperation
instance ToJSON DownloadOperation
instance FromJSON DownloadOperation
data UploadOperation = UploadOperation
{ upload :: OperationParams
, verify :: Maybe OperationParams
}
deriving (Generic, Show)
instance IsTransferResponseOperation UploadOperation
instance ToJSON UploadOperation where
toJSON = genericToJSON nonNullOptions
toEncoding = genericToEncoding nonNullOptions
instance FromJSON UploadOperation
data OperationParams = OperationParams
{ href :: Url
, header :: Maybe (M.Map HTTPHeader HTTPHeaderValue)
, expires_in :: Maybe NumSeconds
, expires_at :: Maybe T.Text
}
deriving (Generic, Show)
instance ToJSON OperationParams where
toJSON = genericToJSON nonNullOptions
toEncoding = genericToEncoding nonNullOptions
instance FromJSON OperationParams
data Verification = Verification
{ verification_oid :: SHA256
, verification_size :: Integer
}
deriving (Generic, Show)
instance ToJSON Verification where
toJSON = genericToJSON verificationOptions
toEncoding = genericToEncoding verificationOptions
instance FromJSON Verification where
parseJSON = genericParseJSON verificationOptions
verificationOptions :: Options
verificationOptions = stripFieldPrefix defaultOptions
-- | Sent over ssh connection when using that to find the endpoint.
data SshDiscoveryResponse = SshDiscoveryResponse
{ endpoint_href :: Url
, endpoint_header :: Maybe (M.Map HTTPHeader HTTPHeaderValue)
, endpoint_expires_in :: Maybe NumSeconds
, endpoint_expires_at :: Maybe T.Text
} deriving (Generic, Show)
instance ToJSON SshDiscoveryResponse where
toJSON = genericToJSON sshDiscoveryResponseOptions
toEncoding = genericToEncoding sshDiscoveryResponseOptions
instance FromJSON SshDiscoveryResponse where
parseJSON = genericParseJSON sshDiscoveryResponseOptions
sshDiscoveryResponseOptions :: Options
sshDiscoveryResponseOptions = stripFieldPrefix nonNullOptions
data GitRef = GitRef
{ name :: T.Text }
deriving (Generic, Show)
instance FromJSON GitRef
instance ToJSON GitRef
type SHA256 = T.Text
-- | The endpoint of a git-lfs server.
data Endpoint
= EndpointURI URI.URI
| EndpointDiscovered SshDiscoveryResponse
deriving (Show)
-- | Command to run via ssh with to discover an endpoint. The FilePath is
-- the location of the git repository on the ssh server.
--
-- Note that, when sshing to the server, you should take care that the
-- hostname you pass to ssh is really a hostname and not something that ssh
-- will parse an an option, such as -oProxyCommand=".
sshDiscoverEndpointCommand :: FilePath -> TransferRequestOperation -> [String]
sshDiscoverEndpointCommand remotepath tro =
[ "git-lfs-authenticate"
, remotepath
, case tro of
RequestDownload -> "download"
RequestUpload -> "upload"
]
-- | Parse the json output when doing ssh endpoint discovery.
parseSshDiscoverEndpointResponse :: L.ByteString -> Maybe Endpoint
parseSshDiscoverEndpointResponse resp = EndpointDiscovered <$> decode resp
-- | Guesses the LFS endpoint from the http url of a git remote.
--
-- https://github.com/git-lfs/git-lfs/blob/master/docs/api/server-discovery.md
guessEndpoint :: URI.URI -> Maybe Endpoint
guessEndpoint uri = case URI.uriScheme uri of
"https:" -> Just endpoint
"http:" -> Just endpoint
_ -> Nothing
where
endpoint = EndpointURI $ uri
-- force https because the git-lfs protocol uses http
-- basic auth tokens, which should not be exposed
{ URI.uriScheme = "https:"
, URI.uriPath = guessedpath
}
guessedpath
| ".git" `isSuffixOf` URI.uriPath uri =
URI.uriPath uri ++ "/info/lfs"
| ".git/" `isSuffixOf` URI.uriPath uri =
URI.uriPath uri ++ "info/lfs"
| otherwise = (droptrailing '/' (URI.uriPath uri)) ++ ".git/info/lfs"
droptrailing c = reverse . dropWhile (== c) . reverse
-- | Makes a Request that will start the process of making a transfer to or
-- from the LFS endpoint.
startTransferRequest :: Endpoint -> TransferRequest -> Maybe Request
startTransferRequest (EndpointURI uri) tr = do
r <- requestFromURI uri
return $ addLfsJsonHeaders $ r
-- Since this uses the LFS batch API, it adds /objects/batch
-- to the endpoint url.
{ path = path r <> "/objects/batch"
, method = "POST"
, requestBody = RequestBodyLBS (encode tr)
}
startTransferRequest (EndpointDiscovered sr) tr = do
uri <- URI.parseURI (T.unpack (endpoint_href sr))
req <- startTransferRequest (EndpointURI uri) tr
let headers = map convheader $ maybe [] M.toList $ endpoint_header sr
return $ req { requestHeaders = requestHeaders req ++ headers }
where
convheader (k, v) = (CI.mk (E.encodeUtf8 k), E.encodeUtf8 v)
-- | "user@host" or just the hostname.
type HostUser = String
addLfsJsonHeaders :: Request -> Request
addLfsJsonHeaders r = r
{ requestHeaders = requestHeaders r ++
[ ("Accept", lfsjson)
, ("Content-Type", lfsjson)
]
}
where
lfsjson = "application/vnd.git-lfs+json"
data ParsedTransferResponse op
= ParsedTransferResponse (TransferResponse op)
| ParsedTransferResponseError TransferResponseError
| ParseFailed String
-- | Parse the body of a response to a transfer request.
parseTransferResponse
:: IsTransferResponseOperation op
=> L.ByteString
-> ParsedTransferResponse op
parseTransferResponse resp = case eitherDecode resp of
Right tr -> ParsedTransferResponse tr
-- If unable to decode as a TransferResponse, try to decode
-- as a TransferResponseError instead, in case the LFS server
-- sent an error message.
Left err ->
either (const $ ParseFailed err) ParsedTransferResponseError $
eitherDecode resp
-- | Builds a http request to perform a download.
downloadOperationRequest :: DownloadOperation -> Maybe Request
downloadOperationRequest = operationParamsRequest . download
-- | Builds http request to perform an upload. The content to upload is
-- provided in the RequestBody, along with its SHA256 and size.
--
-- When the LFS server requested verification, there will be a second
-- Request that does that; it should be run only after the upload has
-- succeeded.
--
-- When the LFS server already contains the object, an empty list may be
-- returned.
uploadOperationRequests :: UploadOperation -> RequestBody -> SHA256 -> Integer -> Maybe [Request]
uploadOperationRequests op content oid size =
case (mkdlreq, mkverifyreq) of
(Nothing, _) -> Nothing
(Just dlreq, Nothing) -> Just [dlreq]
(Just dlreq, Just verifyreq) -> Just [dlreq, verifyreq]
where
mkdlreq = mkdlreq'
<$> operationParamsRequest (upload op)
mkdlreq' r = r
{ method = "PUT"
, requestBody = content
}
mkverifyreq = mkverifyreq'
<$> (operationParamsRequest =<< verify op)
mkverifyreq' r = addLfsJsonHeaders $ r
{ method = "POST"
, requestBody = RequestBodyLBS $ encode $
Verification oid size
}
operationParamsRequest :: OperationParams -> Maybe Request
operationParamsRequest ps = do
r <- parseRequest (T.unpack (href ps))
let headers = map convheader $ maybe [] M.toList (header ps)
return $ r { requestHeaders = headers }
where
convheader (k, v) = (CI.mk (E.encodeUtf8 k), E.encodeUtf8 v)
type Url = T.Text
type NumSeconds = Integer
type HTTPHeader = T.Text
type HTTPHeaderValue = T.Text
-- Prevent Nothing from serializing to null.
nonNullOptions :: Options
nonNullOptions = defaultOptions { omitNothingFields = True }
-- Remove prefix from field names.
stripFieldPrefix :: Options -> Options
stripFieldPrefix o =
o { fieldLabelModifier = drop 1 . dropWhile (/= '_') }

View file

@ -29,6 +29,7 @@ module Utility.Url (
assumeUrlExists,
download,
downloadQuiet,
downloadConduit,
sinkResponseFile,
downloadPartial,
parseURIRelaxed,
@ -335,8 +336,9 @@ download' noerror meterupdate url file uo =
case (urlDownloader uo, parseUrlRequest (show u)) of
(DownloadWithConduit (DownloadWithCurlRestricted r), Just req) -> catchJust
(matchStatusCodeException (== found302))
(downloadconduit req)
(downloadConduit meterupdate req file uo >> return True)
(followredir r)
`catchNonAsync` (dlfailed . show)
(DownloadWithConduit (DownloadWithCurlRestricted r), Nothing)
| isfileurl u -> downloadfile u
| isftpurl u -> downloadcurlrestricted r u url ftpport
@ -354,58 +356,6 @@ download' noerror meterupdate url file uo =
ftpport = 21
downloadconduit req = catchMaybeIO (getFileSize file) >>= \case
Just sz | sz > 0 -> resumeconduit req' sz
_ -> runResourceT $ do
liftIO $ debugM "url" (show req')
resp <- http req' (httpManager uo)
if responseStatus resp == ok200
then store zeroBytesProcessed WriteMode resp
else showrespfailure resp
where
req' = applyRequest uo $ req
-- Override http-client's default decompression of gzip
-- compressed files. We want the unmodified file content.
{ requestHeaders = (hAcceptEncoding, "identity") :
filter ((/= hAcceptEncoding) . fst)
(requestHeaders req)
, decompress = const False
}
alreadydownloaded sz s h = s == requestedRangeNotSatisfiable416
&& case lookup hContentRange h of
-- This could be improved by fixing
-- https://github.com/aristidb/http-types/issues/87
Just crh -> crh == B8.fromString ("bytes */" ++ show sz)
-- Some http servers send no Content-Range header when
-- the range extends beyond the end of the file.
-- There is no way to distinguish between the file
-- being the same size on the http server, vs
-- it being shorter than the file we already have.
-- So assume we have the whole content of the file
-- already, the same as wget and curl do.
Nothing -> True
-- Resume download from where a previous download was interrupted,
-- when supported by the http server. The server may also opt to
-- send the whole file rather than resuming.
resumeconduit req sz = catchJust
(matchStatusCodeHeadersException (alreadydownloaded sz))
dl
(const $ return True)
where
dl = runResourceT $ do
let req' = req { requestHeaders = resumeFromHeader sz : requestHeaders req }
liftIO $ debugM "url" (show req')
resp <- http req' (httpManager uo)
if responseStatus resp == partialContent206
then store (BytesProcessed sz) AppendMode resp
else if responseStatus resp == ok200
then store zeroBytesProcessed WriteMode resp
else showrespfailure resp
showrespfailure = liftIO . dlfailed . B8.toString
. statusMessage . responseStatus
showhttpexception he = do
let msg = case he of
HttpExceptionRequest _ (StatusCodeException r _) ->
@ -417,6 +367,7 @@ download' noerror meterupdate url file uo =
HttpExceptionRequest _ other -> show other
_ -> show he
dlfailed msg
dlfailed msg
| noerror = return False
| otherwise = do
@ -424,10 +375,6 @@ download' noerror meterupdate url file uo =
hFlush stderr
return False
store initialp mode resp = do
sinkResponseFile meterupdate initialp file mode resp
return True
basecurlparams = curlParams uo
[ if noerror
then Param "-S"
@ -453,6 +400,8 @@ download' noerror meterupdate url file uo =
L.writeFile file
return True
-- Conduit does not support ftp, so will throw an exception on a
-- redirect to a ftp url; fall back to curl.
followredir r ex@(HttpExceptionRequest _ (StatusCodeException resp _)) =
case headMaybe $ map decodeBS $ getResponseHeader hLocation resp of
Just url' -> case parseURIRelaxed url' of
@ -463,6 +412,68 @@ download' noerror meterupdate url file uo =
Nothing -> throwIO ex
followredir _ ex = throwIO ex
{- Download a perhaps large file using conduit, with auto-resume
- of incomplete downloads.
-
- Does not catch exceptions.
-}
downloadConduit :: MeterUpdate -> Request -> FilePath -> UrlOptions -> IO ()
downloadConduit meterupdate req file uo =
catchMaybeIO (getFileSize file) >>= \case
Just sz | sz > 0 -> resumedownload sz
_ -> runResourceT $ do
liftIO $ debugM "url" (show req')
resp <- http req' (httpManager uo)
if responseStatus resp == ok200
then store zeroBytesProcessed WriteMode resp
else respfailure resp
where
req' = applyRequest uo $ req
-- Override http-client's default decompression of gzip
-- compressed files. We want the unmodified file content.
{ requestHeaders = (hAcceptEncoding, "identity") :
filter ((/= hAcceptEncoding) . fst)
(requestHeaders req)
, decompress = const False
}
-- Resume download from where a previous download was interrupted,
-- when supported by the http server. The server may also opt to
-- send the whole file rather than resuming.
resumedownload sz = catchJust
(matchStatusCodeHeadersException (alreadydownloaded sz))
dl
(const noop)
where
dl = runResourceT $ do
let req'' = req' { requestHeaders = resumeFromHeader sz : requestHeaders req }
liftIO $ debugM "url" (show req'')
resp <- http req'' (httpManager uo)
if responseStatus resp == partialContent206
then store (BytesProcessed sz) AppendMode resp
else if responseStatus resp == ok200
then store zeroBytesProcessed WriteMode resp
else respfailure resp
alreadydownloaded sz s h = s == requestedRangeNotSatisfiable416
&& case lookup hContentRange h of
-- This could be improved by fixing
-- https://github.com/aristidb/http-types/issues/87
Just crh -> crh == B8.fromString ("bytes */" ++ show sz)
-- Some http servers send no Content-Range header when
-- the range extends beyond the end of the file.
-- There is no way to distinguish between the file
-- being the same size on the http server, vs
-- it being shorter than the file we already have.
-- So assume we have the whole content of the file
-- already, the same as wget and curl do.
Nothing -> True
store initialp mode resp =
sinkResponseFile meterupdate initialp file mode resp
respfailure = giveup . B8.toString . statusMessage . responseStatus
{- Sinks a Response's body to a file. The file can either be opened in
- WriteMode or AppendMode. Updates the meter as data is received.
-

View file

@ -1546,71 +1546,71 @@ Here are all the supported configuration settings.
For example, to use the wipe command, set it to `wipe -f %file`.
* `remote.<name>.rsyncurl`
* `remote.<name>.annex-rsyncurl`
Used by rsync special remotes, this configures
the location of the rsync repository to use. Normally this is automatically
set up by `git annex initremote`, but you can change it if needed.
* `remote.<name>.buprepo`
* `remote.<name>.annex-buprepo`
Used by bup special remotes, this configures
the location of the bup repository to use. Normally this is automatically
set up by `git annex initremote`, but you can change it if needed.
* `remote.<name>.ddarrepo`
* `remote.<name>.annex-ddarrepo`
Used by ddar special remotes, this configures
the location of the ddar repository to use. Normally this is automatically
set up by `git annex initremote`, but you can change it if needed.
* `remote.<name>.directory`
* `remote.<name>.annex-directory`
Used by directory special remotes, this configures
the location of the directory where annexed files are stored for this
remote. Normally this is automatically set up by `git annex initremote`,
but you can change it if needed.
* `remote.<name>.adb`
* `remote.<name>.annex-adb`
Used to identify remotes on Android devices accessed via adb.
Normally this is automatically set up by `git annex initremote`.
* `remote.<name>.androiddirectory`
* `remote.<name>.annex-androiddirectory`
Used by adb special remotes, this is the directory on the Android
device where files are stored for this remote. Normally this is
automatically set up by `git annex initremote`, but you can change
it if needed.
* `remote.<name>.androidserial`
* `remote.<name>.annex-androidserial`
Used by adb special remotes, this is the serial number of the Android
device used by the remote. Normally this is automatically set up by
`git annex initremote`, but you can change it if needed, eg when
upgrading to a new Android device.
* `remote.<name>.s3`
* `remote.<name>.annex-s3`
Used to identify Amazon S3 special remotes.
Normally this is automatically set up by `git annex initremote`.
* `remote.<name>.glacier`
* `remote.<name>.annex-glacier`
Used to identify Amazon Glacier special remotes.
Normally this is automatically set up by `git annex initremote`.
* `remote.<name>.webdav`
* `remote.<name>.annex-webdav`
Used to identify webdav special remotes.
Normally this is automatically set up by `git annex initremote`.
* `remote.<name>.tahoe`
* `remote.<name>.annex-tahoe`
Used to identify tahoe special remotes.
Points to the configuration directory for tahoe.
* `remote.<name>.gcrypt`
* `remote.<name>.annex-gcrypt`
Used to identify gcrypt special remotes.
Normally this is automatically set up by `git annex initremote`.
@ -1619,7 +1619,14 @@ Here are all the supported configuration settings.
If the gcrypt remote is accessible over ssh and has git-annex-shell
available to manage it, it's set to "shell".
* `remote.<name>.hooktype`, `remote.<name>.externaltype`
* `remote.<name>.annex-git-lfs`
Used to identify git-lfs special remotes.
Normally this is automatically set up by `git annex initremote`.
It is set to "true" if this is a git-lfs remote.
* `remote.<name>.annex-hooktype`, `remote.<name>.annex-externaltype`
Used by hook special remotes and external special remotes to record
the type of the remote.

View file

@ -15,6 +15,7 @@ the git history is not stored in them.
* [[ddar]]
* [[directory]]
* [[gcrypt]] (encrypted git repositories!)
* [[git-lfs]]
* [[hook]]
* [[rclone]]
* [[rsync]]

View file

@ -4,6 +4,12 @@ remote allows git-annex to also store its files in such repositories.
Naturally, git-annex encrypts the files it stores too, so everything
stored on the remote is encrypted.
This special remote needs the server hosting the remote repository
to either have git-annex-shell or rsync accessible via ssh. git-annex
uses those to store its content in the remote. If the remote repository
is instead hosted on a server using git-lfs, you can use the [[git-lfs]]
special remote instead of this one; it also supports using gcrypt.
See [[tips/fully_encrypted_git_repositories_with_gcrypt]] for some examples
of using gcrypt.
@ -35,11 +41,12 @@ shell access, and `rsync` must be installed. Those are the minimum
requirements, but it's also recommended to install git-annex on the remote
server, so that [[git-annex-shell]] can be used.
While you can use git-remote-gcrypt with servers like github, git-annex
can't store files on them. In such a case, you can just use
git-remote-gcrypt directly.
If you can't run `rsync` or `git-annex-shell` on the remote server,
you can't use this special remote. Other options are the [[git-lfs]]
special remote, which can also be combined with gcrypt, or
using git-remote-gcrypt to encrypt a remote that git-annex cannot use.
If you use encryption=hybrid, you can add more gpg keys that can access
If you use encryption=hybrid, you can later add more gpg keys that can access
the files git-annex stored in the gcrypt repository. However, due to the
way git-remote-gcrypt encrypts the git repository, you will need to somehow
force it to re-push everything again, so that the encrypted repository can

View file

@ -0,0 +1,101 @@
git-annex has a special remote that lets it store content in git-lfs
repositories.
See [[tips/storing_data_in_git-lfs]] for some examples of how to use this.
## configuration
These parameters can be passed to `git annex initremote` to configure
the git-lfs special remote:
* `url` - Required. The url to the git-lfs repository to use.
Can be either a ssh url (scp-style is also accepted) or a http url.
But currently, a http url accesses the git-lfs repository without
authentication. To authenticate, you will need to use a ssh url.
This parameter needs to be specified in the initial `git annex
initremote` but also each time you `git annex enableremote`
an existing git-lfs special remote. It's fine to use different urls
at different times as long as they point to the same git-lfs repository.
* `encryption` - One of "none", "hybrid", "shared", or "pubkey".
Required. See [[encryption]]. Also see the encryption notes below.
* `keyid` - Specifies the gpg key to use for encryption of both the files
git-annex stores in the repository, as well as to encrypt the git
repository itself when using gcrypt. May be repeated when
multiple participants should have access to the repository.
## efficiency note
Since git-lfs uses SHA256 checksums, git-annex needs to keep track of the
SHA256 of content stored in it, in order to be able to retrieve that
content. When a git-annex key uses a [[backend|backends]]
of SHA256 or SHA256E, that's easy. But, if a git-annex key uses some
other backend, git-annex has to additionally store the SHA256 checksum
into the git-annex branch when storing content in git-lfs. That adds a
small bit of size overhead to using this remote.
When encrypting data sent to the git-lfs remote, git-annex always has to
store its SHA256 checksum in the git-annex branch.
## encryption notes
To encrypt a git-lfs repository, there are two separate things that
have to be encrypted: the data git-annex stores there, and the content
of the git repository itself. After all, a git-lfs remote is a git remote
and git push doesn't encrypt data by default.
To encrypt your git pushes, you can use
[git-remote-gcrypt](https://spwhitton.name/tech/code/git-remote-gcrypt/)
and prefix the repository url with "gcrypt::"
To make git-annex encrypt the data it stores, you can use the encrption=
configuration.
An example of combining the two:
git annex initremote lfstest type=git-lfs url=gcrypt::git@github.com:username/somerepo.git encryption=shared
In that example, the git-annex shared encryption key is stored in
git, but that's ok because git push will encrypt it, along with all
the other git data, using your gpg key. You could instead use
"encryption=shared keyid=" to make git-annex and gcrypt both encrypt
to a specified gpg key.
git-annex will detect if one part of the repository is encrypted,
but you forgot to encrypt the other part, and will refuse to set up
such an insecure half-encrypted repository.
If you use encryption=hybrid, you can later add more gpg keys that can access
the files git-annex stored in the git-lfs repository. However, due to the
way git-remote-gcrypt encrypts the git repository, you will need to somehow
force it to re-push everything again, so that the encrypted repository can
be decrypted by the added keys. Probably this can be done by setting
`GCRYPT_FULL_REPACK` and doing a forced push of branches.
git-annex will set `remote.<name>`gcrypt-publish-participants` when setting
up a repository that uses gcrypt. This is done to avoid unncessary gpg
passphrase prompts, but it does publish the gpg keyids that can decrypt the
repository. Unset it if you need to obscure that.
## limitations
The git-lfs protocol does not support deleting content, so git-annex
**cannot delete anything** from a git-lfs special remote.
The git-lfs protocol does not support resuming uploads, and so an
interrupted upload will have to restart from the beginning. Interrupted
downloads will resume.
git-lfs has a concept of git ref based access control, so a user may only
be able to send content associated with a particular git ref. git-annex
does not currently provide any git ref, so won't work with a git-lfs server
that uses that.
git-annex only supports the "basic" git-lfs transfer adapter, but that's
the one used by most git-lfs servers.
The git-lfs protocol is designed around batching of transfers, but
git-annex doesn't do batching. This may cause it to fall afoul of
rate limiting of git-lfs servers when transferring a lot of files.

View file

@ -59,4 +59,9 @@ Walltime,
Caleb Allen,
TD,
Pedro Araújo,
Ryan Newton,
David W,
L N D,
EVAN HENSHAWPLATH,
James Read,
Luke Shumaker,

View file

@ -1,8 +1,7 @@
[git-remote-gcrypt](https://spwhitton.name/tech/code/git-remote-gcrypt/)
adds support for encrypted remotes to git. The git-annex
[[gcrypt special remote|special_remotes/gcrypt]] allows git-annex to
also store its files in such repositories. Naturally, git-annex encrypts
the files it stores too, so everything stored on the remote is encrypted.
adds support for encrypted remotes to git. Combine this with git-annex
encrypting the files it stores in a remote, and you can fully encrypt
all the data stored on a remote.
Here are some ways you can use this awesome stuff..
@ -15,7 +14,12 @@ repositories.
## prerequisites
* Install [git-remote-gcrypt](https://spwhitton.name/tech/code/git-remote-gcrypt/)
* Install git-annex version 4.20130909 or newer.
* Set up a gpg key. You might consider generating a special purpose key
just for this use case, since you may end up wanting to put the key
on multiple machines that you would not trust with your main gpg key.
The examples below use "$mykey" where you should put your gpg keyid.
## encrypted backup drive
@ -24,18 +28,18 @@ both the full contents of your git repository, and all the files you
instruct git-annex to store on it, and everything will be encrypted so that
only you can see it.
First, you need to set up a gpg key. You might consider generating a
special purpose key just for this use case, since you may end up wanting to
put the key on multiple machines that you would not trust with your
main gpg key.
You need to tell git-annex the keyid of the key when setting up the
encrypted repository:
Here's how to set up the encrypted repository:
git init --bare /mnt/encryptedbackup
git annex initremote encryptedbackup type=gcrypt gitrepo=/mnt/encryptedbackup keyid=$mykey
git annex sync encryptedbackup
(Remember to replace "$mykey" with the keyid of your gpg key.)
This uses the [[gcrypt special remote|special_remotes/gcrypt]] to encrypt
pushes to the git remote, and git-annex will also encrypt the files it
stores there.
Now you can copy (or even move) files to the repository. After
sending files to it, you'll probably want to do a sync, which pushes
the git repository changes to it as well.
@ -62,23 +66,25 @@ the gpg key used to encrypt it, and then:
## encrypted git-annex repository on a ssh server
If you have a ssh server that has rsync installed, you can set up an
encrypted repository there. Works just like the encrypted drive except
without the cable.
If you have a ssh server that has git-annex or rsync installed on it, you
can set up an encrypted repository there. Works just like the encrypted
drive except without the cable.
First, on the server, run:
git init --bare encryptedrepo
(Also, install git-annex on the server if it's possible & easy to do so.
While this will work without git-annex being installed on the server, it
is recommended to have it installed.)
Now, in your existing git-annex repository, set up the encrypted remote:
git annex initremote encryptedrepo type=gcrypt gitrepo=ssh://my.server/home/me/encryptedrepo keyid=$mykey
git annex sync encryptedrepo
(Remember to replace "$mykey" with the keyid of your gpg key.)
This uses the [[gcrypt special remote|special_remotes/gcrypt]] to encrypt
pushes to the git remote, and git-annex will also encrypt the files it
stores there.
If you're going to be sharing this repository with others, be sure to also
include their keyids, by specifying keyid= repeatedly.
@ -97,11 +103,31 @@ used to encrypt it can check it out:
git annex enableremote encryptedrepo gitrepo=ssh://my.server/home/me/encryptedrepo
git annex get --from encryptedrepo
## private encrypted git remote on hosting site
## private encrypted git remote on a git-lfs hosting site
Some git repository hosting sites do not support git-annex, but do support
the similar git-lfs for storing large files alongside a git repository.
git-annex can use the git-lfs protocol to store files in such repositories,
and with gcrypt, everything stored in the remote can be encrypted.
First, make a new, empty git repository on the hosting site.
Get the ssh clone url for the repository, which might look
like "git@github.com:username/somerepo.git"
Then, in your git-annex repository, set up the encrypted remote:
git annex initremote lfstest type=git-lfs url=gcrypt::git@github.com:username/somerepo.git keyid=$mykey
(Remember to replace "$mykey" with the keyid of your gpg key.)
This uses the [[git-lfs special remote|special_remotes/git-lfs]], and the
`gcrypt::` prefix on the url makes pushes be encrypted with gcrypt.
## private encrypted git remote on a git hosting site
You can use gcrypt to store your git repository in encrypted form on any
hosting site that supports git. Only you can decrypt its contents.
Using it this way, git-annex does not store large files on the hosting site; it's
hosting site that supports git. Only you can decrypt its contents. Using it
this way, git-annex does not store large files on the hosting site; it's
only used to store your git repository itself.
git remote add encrypted gcrypt::ssh://hostingsite/myrepo.git
@ -115,7 +141,7 @@ url you used when setting it up:
git clone gcrypt::ssh://hostingsite/myrepo.git
## multiuser encrypted git remote on hosting site
## multiuser encrypted git remote on a git hosting site
Suppose two users want to share an encrypted git remote. Both of you
need to set up the remote, and configure gcrypt to encrypt it so that both

View file

@ -0,0 +1,34 @@
git-annex can store data in [git-lfs](https://git-lfs.github.com/)
repositories, using the [[git-lfs special remote|special_remotes/git-lfs]].
You do not need the git-lfs program installed to use it, just a recent
enough version of git-annex.
Here's how to initialize a git-lfs special remote on Github.
git annex initremote lfs type=git-lfs encryption=none url=git@github.com:yourname/yourrepo.git
In this example, the remote will not be encrypted, so anyone who can access
it can see its contents. It is possible to encrypt everything stored in a
git-lfs remote, see [[fully_encrypted_git_repositories_with_gcrypt]].
Once the git-lfs remote is set up, git-annex can store and retrieve
content in the usual ways:
git annex copy * --to lfs
git annex get --from lfs
But, git-annex **cannot delete anything** from a git-lfs special remote,
because the protocol does not support deletion.
A git-lfs special remote also functions as a regular git remote. You can
use things like `git push` and `git pull` with it.
To enable an existing git-lgs remote in another clone of the repository,
you'll need to provide an url to it again. It's ok to provide a different
url as long as it points to the same git-lfs repository.
git annex enableremote lfs url=https://github.com/yourname/yourrepo.git
Note that http urls currently only allow read access to the git-lfs
repository.

View file

@ -931,6 +931,7 @@ Executable git-annex
Remote.External.Types
Remote.GCrypt
Remote.Git
Remote.GitLFS
Remote.Glacier
Remote.Helper.AWS
Remote.Helper.Chunked
@ -1039,6 +1040,7 @@ Executable git-annex
Utility.FileSystemEncoding
Utility.Format
Utility.FreeDesktop
Utility.GitLFS
Utility.Glob
Utility.Gpg
Utility.Hash