From 6eb3a56daa7665419551b2b7680c2c1f0c1c893f Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 29 Jul 2019 15:47:17 -0400 Subject: [PATCH 01/37] git-lfs protocol json instances Have only tested that the ToJSON instances look like what's documented for the git-lfs API. Have not tested the FromJSON instances at all. --- Lfs.hs | 153 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 153 insertions(+) create mode 100644 Lfs.hs diff --git a/Lfs.hs b/Lfs.hs new file mode 100644 index 0000000000..7a35213e19 --- /dev/null +++ b/Lfs.hs @@ -0,0 +1,153 @@ +{-# LANGUAGE DeriveGeneric, FlexibleInstances, OverloadedStrings #-} + +import Data.Aeson +import Data.Aeson.Types +import GHC.Generics +import qualified Data.Map as M +import qualified Data.Text as T + +data TransferRequest = TransferRequest + { req_operation :: TransferRequestOperation + , req_transfers :: [TransferAdapter] + , req_ref :: Maybe GitRef + , req_objects :: [TransferRequestObject] + } + deriving (Generic, Show) + +instance ToJSON TransferRequest where + toJSON = genericToJSON transferRequestOptions + toEncoding = genericToEncoding transferRequestOptions + +instance FromJSON TransferRequest where + parseJSON = genericParseJSON transferRequestOptions + +transferRequestOptions :: Options +transferRequestOptions = nonNullOptions + -- remove "req_" + { fieldLabelModifier = drop 4 } + +data TransferRequestObject = TransferRequestObject + { oid :: SHA256 + , size :: Integer + } + deriving (Generic, Show) + +instance ToJSON TransferRequestObject +instance FromJSON TransferRequestObject + +data TransferRequestOperation = RequestDownload | RequestUpload + deriving (Show) + +instance ToJSON TransferRequestOperation where + toJSON RequestDownload = "download" + toJSON RequestUpload = "upload" + +instance FromJSON TransferRequestOperation where + parseJSON (String "download") = pure RequestDownload + parseJSON (String "upload") = pure RequestUpload + parseJSON invalid = typeMismatch "TransferRequestOperation" invalid + +data TransferResponse op = TransferResponse + { transfer :: TransferAdapter + , objects :: [TransferResponseOperation op] + } + deriving (Generic, Show) + +instance ToJSON (TransferResponse DownloadOperation) +instance FromJSON (TransferResponse DownloadOperation) +instance ToJSON (TransferResponse UploadOperation) +instance FromJSON (TransferResponse UploadOperation) + +data TransferResponseError = TransferResponseError + { message :: T.Text + , request_id :: Maybe T.Text + , documentation_url :: Maybe Url + } + deriving (Generic, Show) + +instance ToJSON TransferResponseError where + toJSON = genericToJSON nonNullOptions + toEncoding = genericToEncoding nonNullOptions + +instance FromJSON TransferResponseError + +data TransferAdapter = Basic + deriving (Show) + +instance ToJSON TransferAdapter where + toJSON Basic = "basic" + +instance FromJSON TransferAdapter where + parseJSON (String "basic") = pure Basic + parseJSON invalid = typeMismatch "basic" invalid + +data TransferResponseOperation op = TransferResponseOperation + { resp_oid :: SHA256 + , resp_size :: Integer + , resp_authenticated :: Bool + , resp_actions :: op + } + deriving (Generic, Show) + +instance ToJSON op => ToJSON (TransferResponseOperation op) where + toJSON = genericToJSON transferResponseOperationOptions + toEncoding = genericToEncoding transferResponseOperationOptions + +instance FromJSON op => FromJSON (TransferResponseOperation op) where + parseJSON = genericParseJSON transferResponseOperationOptions + +transferResponseOperationOptions :: Options +transferResponseOperationOptions = defaultOptions + -- remove "resp_" + { fieldLabelModifier = drop 5 } + +data DownloadOperation = DownloadOperation + { download :: OperationParams } + deriving (Generic, Show) + +instance ToJSON DownloadOperation +instance FromJSON DownloadOperation + +data UploadOperation = UploadOperation + { upload :: OperationParams } + deriving (Generic, Show) + +instance FromJSON UploadOperation +instance ToJSON UploadOperation + +data OperationParams = OperationParams + { href :: Url + , header :: M.Map HTTPHeader HTTPHeaderValue + , expires_in :: Maybe NumSeconds + , expires_at :: Maybe T.Text + } + deriving (Generic, Show) + +instance ToJSON OperationParams where + toJSON = genericToJSON nonNullOptions + toEncoding = genericToEncoding nonNullOptions + +instance FromJSON OperationParams + +data GitRef = GitRef + { name :: T.Text } + deriving (Generic, Show) + +instance FromJSON GitRef +instance ToJSON GitRef + +type SHA256 = T.Text + +type Url = T.Text + +type NumSeconds = Integer + +type HTTPHeader = T.Text + +type HTTPHeaderValue = T.Text + +-- Prevent Nothing from serializing to null. +nonNullOptions :: Options +nonNullOptions = defaultOptions { omitNothingFields = True } + + From 909952d8e51f302a3b4750b0f124815d03f887bb Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 31 Jul 2019 12:06:56 -0400 Subject: [PATCH 02/37] got transfer response part of protocol working Testing against github, I was able to request an unauthenticated download of an oid and parse the response that contains the url of the object. --- Lfs.hs | 64 +++++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 55 insertions(+), 9 deletions(-) diff --git a/Lfs.hs b/Lfs.hs index 7a35213e19..193e924bca 100644 --- a/Lfs.hs +++ b/Lfs.hs @@ -1,10 +1,49 @@ -{-# LANGUAGE DeriveGeneric, FlexibleInstances, OverloadedStrings #-} +{-# LANGUAGE DeriveGeneric, FlexibleInstances, FlexibleContexts #-} +{-# LANGUAGE OverloadedStrings #-} import Data.Aeson import Data.Aeson.Types import GHC.Generics import qualified Data.Map as M import qualified Data.Text as T +import qualified Data.ByteString.Lazy as L +import Network.HTTP.Client + +-- | Adds necessary headers to a Request and makes it post the +-- specified TransferRequest. +-- +-- The input Request's url should be the discovered LFS endpoint. +-- Since this uses the LFS batch API, it adds /objects/batch to the end of +-- that url. +transferRequest :: Request -> TransferRequest -> Request +transferRequest r tr = r + { path = path r <> "/objects/batch" + , requestHeaders = + [ ("Accept", lfsjson) + , ("Content-Type", lfsjson) + ] + , method = "POST" + , requestBody = RequestBodyLBS (encode tr) + } + where + lfsjson = "application/vnd.git-lfs+json" + +type ParsedTransferResponse op = + Either (Either String TransferResponseError) (TransferResponse op) + +-- | Parse the body of a response to a transfer request. +parseResponseBody + :: IsTransferResponseOperation op + => L.ByteString + -> ParsedTransferResponse op +parseResponseBody resp = case eitherDecode resp of + -- If unable to decode as a TransferResponse, try to decode + -- as a TransferResponseError instead, in case the LFS server + -- sent an error message. + Left err -> case eitherDecode resp of + Right responseerror -> Left (Right responseerror) + Left _ -> Left $ Left err + Right resp -> Right resp data TransferRequest = TransferRequest { req_operation :: TransferRequestOperation @@ -48,15 +87,16 @@ instance FromJSON TransferRequestOperation where parseJSON invalid = typeMismatch "TransferRequestOperation" invalid data TransferResponse op = TransferResponse - { transfer :: TransferAdapter + { transfer :: Maybe TransferAdapter , objects :: [TransferResponseOperation op] } deriving (Generic, Show) -instance ToJSON (TransferResponse DownloadOperation) -instance FromJSON (TransferResponse DownloadOperation) -instance ToJSON (TransferResponse UploadOperation) -instance FromJSON (TransferResponse UploadOperation) +instance IsTransferResponseOperation op => ToJSON (TransferResponse op) where + toJSON = genericToJSON nonNullOptions + toEncoding = genericToEncoding nonNullOptions + +instance IsTransferResponseOperation op => FromJSON (TransferResponse op) data TransferResponseError = TransferResponseError { message :: T.Text @@ -84,7 +124,7 @@ instance FromJSON TransferAdapter where data TransferResponseOperation op = TransferResponseOperation { resp_oid :: SHA256 , resp_size :: Integer - , resp_authenticated :: Bool + , resp_authenticated :: Maybe Bool , resp_actions :: op } deriving (Generic, Show) @@ -97,14 +137,19 @@ instance FromJSON op => FromJSON (TransferResponseOperation op) where parseJSON = genericParseJSON transferResponseOperationOptions transferResponseOperationOptions :: Options -transferResponseOperationOptions = defaultOptions +transferResponseOperationOptions = nonNullOptions -- remove "resp_" { fieldLabelModifier = drop 5 } +-- | Class of types that can be responses to a transfer request, +-- that contain an operation to use to make the transfer. +class (FromJSON op, ToJSON op) => IsTransferResponseOperation op + data DownloadOperation = DownloadOperation { download :: OperationParams } deriving (Generic, Show) +instance IsTransferResponseOperation DownloadOperation instance ToJSON DownloadOperation instance FromJSON DownloadOperation @@ -112,12 +157,13 @@ data UploadOperation = UploadOperation { upload :: OperationParams } deriving (Generic, Show) +instance IsTransferResponseOperation UploadOperation instance FromJSON UploadOperation instance ToJSON UploadOperation data OperationParams = OperationParams { href :: Url - , header :: M.Map HTTPHeader HTTPHeaderValue + , header :: Maybe (M.Map HTTPHeader HTTPHeaderValue) , expires_in :: Maybe NumSeconds , expires_at :: Maybe T.Text } From b4d2fc6219a530bc534ff783b369091adb11a26e Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 31 Jul 2019 12:27:27 -0400 Subject: [PATCH 03/37] add downloadRequests --- Lfs.hs | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) diff --git a/Lfs.hs b/Lfs.hs index 193e924bca..4e4e60ee51 100644 --- a/Lfs.hs +++ b/Lfs.hs @@ -6,7 +6,11 @@ import Data.Aeson.Types import GHC.Generics import qualified Data.Map as M import qualified Data.Text as T +import qualified Data.Text.Encoding as E import qualified Data.ByteString.Lazy as L +import qualified Data.CaseInsensitive as CI +import Data.Maybe +import Network.HTTP.Types import Network.HTTP.Client -- | Adds necessary headers to a Request and makes it post the @@ -45,6 +49,18 @@ parseResponseBody resp = case eitherDecode resp of Left _ -> Left $ Left err Right resp -> Right resp +-- | Builds http requests that can be used to download the objects that +-- were requested using a TransferRequest. +downloadRequests :: TransferResponse DownloadOperation -> ([(TransferResponseOperation DownloadOperation, Maybe Request)]) +downloadRequests = map mkreq . objects + where + mkreq op = (op, mkreq' (download (resp_actions op))) + mkreq' ps = do + r <- parseRequest (T.unpack (href ps)) + let headers = map convheader $ maybe [] M.toList (header ps) + return $ r { requestHeaders = headers } + convheader (k, v) = (CI.mk (E.encodeUtf8 k), E.encodeUtf8 v) + data TransferRequest = TransferRequest { req_operation :: TransferRequestOperation , req_transfers :: [TransferAdapter] @@ -195,5 +211,3 @@ type HTTPHeaderValue = T.Text -- Prevent Nothing from serializing to null. nonNullOptions :: Options nonNullOptions = defaultOptions { omitNothingFields = True } - - From 9040fea09ee5efb7015ecc51a6987748f378f616 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 31 Jul 2019 13:22:33 -0400 Subject: [PATCH 04/37] add uploadRequests --- Lfs.hs | 110 +++++++++++++++++++++++++++++++-------------------------- 1 file changed, 60 insertions(+), 50 deletions(-) diff --git a/Lfs.hs b/Lfs.hs index 4e4e60ee51..fb10ea5455 100644 --- a/Lfs.hs +++ b/Lfs.hs @@ -9,58 +9,8 @@ import qualified Data.Text as T import qualified Data.Text.Encoding as E import qualified Data.ByteString.Lazy as L import qualified Data.CaseInsensitive as CI -import Data.Maybe -import Network.HTTP.Types import Network.HTTP.Client --- | Adds necessary headers to a Request and makes it post the --- specified TransferRequest. --- --- The input Request's url should be the discovered LFS endpoint. --- Since this uses the LFS batch API, it adds /objects/batch to the end of --- that url. -transferRequest :: Request -> TransferRequest -> Request -transferRequest r tr = r - { path = path r <> "/objects/batch" - , requestHeaders = - [ ("Accept", lfsjson) - , ("Content-Type", lfsjson) - ] - , method = "POST" - , requestBody = RequestBodyLBS (encode tr) - } - where - lfsjson = "application/vnd.git-lfs+json" - -type ParsedTransferResponse op = - Either (Either String TransferResponseError) (TransferResponse op) - --- | Parse the body of a response to a transfer request. -parseResponseBody - :: IsTransferResponseOperation op - => L.ByteString - -> ParsedTransferResponse op -parseResponseBody resp = case eitherDecode resp of - -- If unable to decode as a TransferResponse, try to decode - -- as a TransferResponseError instead, in case the LFS server - -- sent an error message. - Left err -> case eitherDecode resp of - Right responseerror -> Left (Right responseerror) - Left _ -> Left $ Left err - Right resp -> Right resp - --- | Builds http requests that can be used to download the objects that --- were requested using a TransferRequest. -downloadRequests :: TransferResponse DownloadOperation -> ([(TransferResponseOperation DownloadOperation, Maybe Request)]) -downloadRequests = map mkreq . objects - where - mkreq op = (op, mkreq' (download (resp_actions op))) - mkreq' ps = do - r <- parseRequest (T.unpack (href ps)) - let headers = map convheader $ maybe [] M.toList (header ps) - return $ r { requestHeaders = headers } - convheader (k, v) = (CI.mk (E.encodeUtf8 k), E.encodeUtf8 v) - data TransferRequest = TransferRequest { req_operation :: TransferRequestOperation , req_transfers :: [TransferAdapter] @@ -200,6 +150,66 @@ instance ToJSON GitRef type SHA256 = T.Text +-- | Adds necessary headers to a Request and makes it post the +-- specified TransferRequest. +-- +-- The input Request's url should be the discovered LFS endpoint. +-- Since this uses the LFS batch API, it adds /objects/batch to the end of +-- that url. +transferRequest :: Request -> TransferRequest -> Request +transferRequest r tr = r + { path = path r <> "/objects/batch" + , requestHeaders = + [ ("Accept", lfsjson) + , ("Content-Type", lfsjson) + ] + , method = "POST" + , requestBody = RequestBodyLBS (encode tr) + } + where + lfsjson = "application/vnd.git-lfs+json" + +type ParsedTransferResponse op = + Either (Either String TransferResponseError) (TransferResponse op) + +-- | Parse the body of a response to a transfer request. +parseResponseBody + :: IsTransferResponseOperation op + => L.ByteString + -> ParsedTransferResponse op +parseResponseBody resp = case eitherDecode resp of + -- If unable to decode as a TransferResponse, try to decode + -- as a TransferResponseError instead, in case the LFS server + -- sent an error message. + Left err -> case eitherDecode resp of + Right responseerror -> Left (Right responseerror) + Left _ -> Left $ Left err + Right tr -> Right tr + +-- | Builds http requests that can be used to download the objects that +-- were requested using a TransferRequest. +downloadRequests :: TransferResponse DownloadOperation -> ([(TransferResponseOperation DownloadOperation, Maybe Request)]) +downloadRequests = transferRequests download + +-- | Builds http requests that can be used to upload objects. +-- +-- When the server already has an object, no request will be generated. +-- +-- The requestBody is not set here. When making a request, +-- the content of the object needs to be provided as the body. +uploadRequests :: TransferResponse UploadOperation -> ([(TransferResponseOperation UploadOperation, Maybe Request)]) +uploadRequests = transferRequests upload + +transferRequests :: (op -> OperationParams) -> TransferResponse op -> ([(TransferResponseOperation op, Maybe Request)]) +transferRequests getps = map mkreq . objects + where + mkreq op = (op, mkreq' (getps (resp_actions op))) + mkreq' ps = do + r <- parseRequest (T.unpack (href ps)) + let headers = map convheader $ maybe [] M.toList (header ps) + return $ r { requestHeaders = headers } + convheader (k, v) = (CI.mk (E.encodeUtf8 k), E.encodeUtf8 v) + type Url = T.Text type NumSeconds = Integer From f4e8ab969ec9ffcd05e5fc2609a6737510b27d39 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 31 Jul 2019 14:55:15 -0400 Subject: [PATCH 05/37] improve protocol support support verification after upload support for errors embedded in json --- Lfs.hs | 160 +++++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 117 insertions(+), 43 deletions(-) diff --git a/Lfs.hs b/Lfs.hs index fb10ea5455..56ff1723f0 100644 --- a/Lfs.hs +++ b/Lfs.hs @@ -27,18 +27,23 @@ instance FromJSON TransferRequest where parseJSON = genericParseJSON transferRequestOptions transferRequestOptions :: Options -transferRequestOptions = nonNullOptions - -- remove "req_" - { fieldLabelModifier = drop 4 } +transferRequestOptions = stripFieldPrefix nonNullOptions data TransferRequestObject = TransferRequestObject - { oid :: SHA256 - , size :: Integer + { req_oid :: SHA256 + , req_size :: Integer } deriving (Generic, Show) -instance ToJSON TransferRequestObject -instance FromJSON TransferRequestObject +instance ToJSON TransferRequestObject where + toJSON = genericToJSON transferRequestObjectOptions + toEncoding = genericToEncoding transferRequestObjectOptions + +instance FromJSON TransferRequestObject where + parseJSON = genericParseJSON transferRequestObjectOptions + +transferRequestObjectOptions :: Options +transferRequestObjectOptions = stripFieldPrefix defaultOptions data TransferRequestOperation = RequestDownload | RequestUpload deriving (Show) @@ -64,18 +69,42 @@ instance IsTransferResponseOperation op => ToJSON (TransferResponse op) where instance IsTransferResponseOperation op => FromJSON (TransferResponse op) +-- | This is an error with a TransferRequest as a whole. It's also possible +-- for a TransferRequest to overall succeed, but fail for some +-- objects; such failures use TransferResponseObjectError. data TransferResponseError = TransferResponseError - { message :: T.Text - , request_id :: Maybe T.Text - , documentation_url :: Maybe Url + { resperr_message :: T.Text + , resperr_request_id :: Maybe T.Text + , resperr_documentation_url :: Maybe Url } deriving (Generic, Show) instance ToJSON TransferResponseError where - toJSON = genericToJSON nonNullOptions - toEncoding = genericToEncoding nonNullOptions + toJSON = genericToJSON transferResponseErrorOptions + toEncoding = genericToEncoding transferResponseErrorOptions -instance FromJSON TransferResponseError +instance FromJSON TransferResponseError where + parseJSON = genericParseJSON transferResponseErrorOptions + +transferResponseErrorOptions :: Options +transferResponseErrorOptions = stripFieldPrefix nonNullOptions + +-- | An error with a single object within a TransferRequest. +data TransferResponseObjectError = TransferResponseObjectError + { respobjerr_code :: Int + , respobjerr_message :: T.Text + } + deriving (Generic, Show) + +instance ToJSON TransferResponseObjectError where + toJSON = genericToJSON transferResponseObjectErrorOptions + toEncoding = genericToEncoding transferResponseObjectErrorOptions + +instance FromJSON TransferResponseObjectError where + parseJSON = genericParseJSON transferResponseObjectErrorOptions + +transferResponseObjectErrorOptions :: Options +transferResponseObjectErrorOptions = stripFieldPrefix nonNullOptions data TransferAdapter = Basic deriving (Show) @@ -91,7 +120,8 @@ data TransferResponseOperation op = TransferResponseOperation { resp_oid :: SHA256 , resp_size :: Integer , resp_authenticated :: Maybe Bool - , resp_actions :: op + , resp_actions :: Maybe op + , resp_error :: Maybe TransferResponseObjectError } deriving (Generic, Show) @@ -103,9 +133,7 @@ instance FromJSON op => FromJSON (TransferResponseOperation op) where parseJSON = genericParseJSON transferResponseOperationOptions transferResponseOperationOptions :: Options -transferResponseOperationOptions = nonNullOptions - -- remove "resp_" - { fieldLabelModifier = drop 5 } +transferResponseOperationOptions = stripFieldPrefix nonNullOptions -- | Class of types that can be responses to a transfer request, -- that contain an operation to use to make the transfer. @@ -120,12 +148,18 @@ instance ToJSON DownloadOperation instance FromJSON DownloadOperation data UploadOperation = UploadOperation - { upload :: OperationParams } + { upload :: OperationParams + , verify :: Maybe OperationParams + } deriving (Generic, Show) instance IsTransferResponseOperation UploadOperation + +instance ToJSON UploadOperation where + toJSON = genericToJSON nonNullOptions + toEncoding = genericToEncoding nonNullOptions + instance FromJSON UploadOperation -instance ToJSON UploadOperation data OperationParams = OperationParams { href :: Url @@ -156,15 +190,19 @@ type SHA256 = T.Text -- The input Request's url should be the discovered LFS endpoint. -- Since this uses the LFS batch API, it adds /objects/batch to the end of -- that url. -transferRequest :: Request -> TransferRequest -> Request -transferRequest r tr = r +startTransferRequest :: Request -> TransferRequest -> Request +startTransferRequest r tr = addLfsJsonHeaders $ r { path = path r <> "/objects/batch" - , requestHeaders = + , method = "POST" + , requestBody = RequestBodyLBS (encode tr) + } + +addLfsJsonHeaders :: Request -> Request +addLfsJsonHeaders r = r + { requestHeaders = [ ("Accept", lfsjson) , ("Content-Type", lfsjson) ] - , method = "POST" - , requestBody = RequestBodyLBS (encode tr) } where lfsjson = "application/vnd.git-lfs+json" @@ -186,28 +224,59 @@ parseResponseBody resp = case eitherDecode resp of Left _ -> Left $ Left err Right tr -> Right tr --- | Builds http requests that can be used to download the objects that --- were requested using a TransferRequest. -downloadRequests :: TransferResponse DownloadOperation -> ([(TransferResponseOperation DownloadOperation, Maybe Request)]) -downloadRequests = transferRequests download +-- | Builds a http request to perform a download. +downloadOperationRequest :: DownloadOperation -> Maybe Request +downloadOperationRequest = operationParamsRequest . download --- | Builds http requests that can be used to upload objects. +-- | Builds http request to perform an upload. The content to upload is +-- provided in the RequestBody, along with its SHA256 and size. -- --- When the server already has an object, no request will be generated. --- --- The requestBody is not set here. When making a request, --- the content of the object needs to be provided as the body. -uploadRequests :: TransferResponse UploadOperation -> ([(TransferResponseOperation UploadOperation, Maybe Request)]) -uploadRequests = transferRequests upload - -transferRequests :: (op -> OperationParams) -> TransferResponse op -> ([(TransferResponseOperation op, Maybe Request)]) -transferRequests getps = map mkreq . objects +-- If the LFS server requested verification, there will be a second +-- Request that does that; it should be run only after the upload has +-- succeeded. +uploadOperation :: UploadOperation -> RequestBody -> SHA256 -> Integer -> Maybe [Request] +uploadOperation op content oid size = + case (mkdlreq, mkverifyreq) of + (Nothing, _) -> Nothing + (Just dlreq, Nothing) -> Just [dlreq] + (Just dlreq, Just verifyreq) -> Just [dlreq, verifyreq] + where + mkdlreq = mkdlreq' + <$> operationParamsRequest (upload op) + mkdlreq' r = r + { method = "PUT" + , requestBody = content + } + mkverifyreq = mkverifyreq' + <$> (operationParamsRequest =<< verify op) + mkverifyreq' r = addLfsJsonHeaders $ r + { method = "POST" + , requestBody = RequestBodyLBS $ encode $ + VerifyBody oid size + } + +data VerifyBody = VerifyBody + { verifybody_oid :: SHA256 + , verifybody_size :: Integer + } + deriving (Generic, Show) + +instance ToJSON VerifyBody where + toJSON = genericToJSON verifyBodyOptions + toEncoding = genericToEncoding verifyBodyOptions + +instance FromJSON VerifyBody where + parseJSON = genericParseJSON verifyBodyOptions + +verifyBodyOptions :: Options +verifyBodyOptions = stripFieldPrefix defaultOptions + +operationParamsRequest :: OperationParams -> Maybe Request +operationParamsRequest ps = do + r <- parseRequest (T.unpack (href ps)) + let headers = map convheader $ maybe [] M.toList (header ps) + return $ r { requestHeaders = headers } where - mkreq op = (op, mkreq' (getps (resp_actions op))) - mkreq' ps = do - r <- parseRequest (T.unpack (href ps)) - let headers = map convheader $ maybe [] M.toList (header ps) - return $ r { requestHeaders = headers } convheader (k, v) = (CI.mk (E.encodeUtf8 k), E.encodeUtf8 v) type Url = T.Text @@ -221,3 +290,8 @@ type HTTPHeaderValue = T.Text -- Prevent Nothing from serializing to null. nonNullOptions :: Options nonNullOptions = defaultOptions { omitNothingFields = True } + +-- Remove prefix from field names. +stripFieldPrefix :: Options -> Options +stripFieldPrefix o = + o { fieldLabelModifier = drop 1 . dropWhile (/= '_') } From 78983d1e338e968b6928a0aa2631b1f08380ec9b Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 31 Jul 2019 15:04:37 -0400 Subject: [PATCH 06/37] rename reorg --- Lfs.hs | 34 +++++++++++++++++----------------- 1 file changed, 17 insertions(+), 17 deletions(-) diff --git a/Lfs.hs b/Lfs.hs index 56ff1723f0..80e2c040c9 100644 --- a/Lfs.hs +++ b/Lfs.hs @@ -175,6 +175,22 @@ instance ToJSON OperationParams where instance FromJSON OperationParams +data Verification = Verification + { verification_oid :: SHA256 + , verification_size :: Integer + } + deriving (Generic, Show) + +instance ToJSON Verification where + toJSON = genericToJSON verifyBodyOptions + toEncoding = genericToEncoding verifyBodyOptions + +instance FromJSON Verification where + parseJSON = genericParseJSON verifyBodyOptions + +verifyBodyOptions :: Options +verifyBodyOptions = stripFieldPrefix defaultOptions + data GitRef = GitRef { name :: T.Text } deriving (Generic, Show) @@ -252,25 +268,9 @@ uploadOperation op content oid size = mkverifyreq' r = addLfsJsonHeaders $ r { method = "POST" , requestBody = RequestBodyLBS $ encode $ - VerifyBody oid size + Verification oid size } -data VerifyBody = VerifyBody - { verifybody_oid :: SHA256 - , verifybody_size :: Integer - } - deriving (Generic, Show) - -instance ToJSON VerifyBody where - toJSON = genericToJSON verifyBodyOptions - toEncoding = genericToEncoding verifyBodyOptions - -instance FromJSON VerifyBody where - parseJSON = genericParseJSON verifyBodyOptions - -verifyBodyOptions :: Options -verifyBodyOptions = stripFieldPrefix defaultOptions - operationParamsRequest :: OperationParams -> Maybe Request operationParamsRequest ps = do r <- parseRequest (T.unpack (href ps)) From 426a74265d869d5b5ef1b3aef465cf0b79537106 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 31 Jul 2019 15:39:01 -0400 Subject: [PATCH 07/37] ssh discovery of LFS endpoint At this point, I'm able to discover an endpoint, and requesting an upload also worked, though I didn't try actually uploading content. --- Lfs.hs | 75 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 74 insertions(+), 1 deletion(-) diff --git a/Lfs.hs b/Lfs.hs index 80e2c040c9..316b7d20cd 100644 --- a/Lfs.hs +++ b/Lfs.hs @@ -1,15 +1,38 @@ +{- git-lfs API + - + - https://github.com/git-lfs/git-lfs/blob/master/docs/api + - + - Copyright 2010-2018 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +-- | This implementation of the git-lfs API uses http Request and Response, +-- but leaves actually connecting up the http client to the user. +-- +-- You'll want to use a Manager that supports https, since the protocol +-- uses http basic auth. +-- +-- Some LFS servers, notably Github's, may require a User-Agent header +-- in some of the requests, in order to allow eg, uploads. No such header +-- is added by dedault, so be sure to add your own. + {-# LANGUAGE DeriveGeneric, FlexibleInstances, FlexibleContexts #-} {-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE LambdaCase #-} import Data.Aeson import Data.Aeson.Types import GHC.Generics +import Network.HTTP.Client +import System.Process +import Control.Exception import qualified Data.Map as M import qualified Data.Text as T import qualified Data.Text.Encoding as E import qualified Data.ByteString.Lazy as L import qualified Data.CaseInsensitive as CI -import Network.HTTP.Client +import Data.String data TransferRequest = TransferRequest { req_operation :: TransferRequestOperation @@ -191,6 +214,24 @@ instance FromJSON Verification where verifyBodyOptions :: Options verifyBodyOptions = stripFieldPrefix defaultOptions +-- | Sent over ssh connection when using that to find the endpoint. +data SshDiscoveryResponse = SshDiscoveryResponse + { endpoint_href :: Url + , endpoint_header :: Maybe (M.Map HTTPHeader HTTPHeaderValue) + , endpoint_expires_in :: Maybe NumSeconds + , endpoint_expires_at :: Maybe T.Text + } deriving (Generic, Show) + +instance ToJSON SshDiscoveryResponse where + toJSON = genericToJSON sshDiscoveryResponseOptions + toEncoding = genericToEncoding sshDiscoveryResponseOptions + +instance FromJSON SshDiscoveryResponse where + parseJSON = genericParseJSON sshDiscoveryResponseOptions + +sshDiscoveryResponseOptions :: Options +sshDiscoveryResponseOptions = stripFieldPrefix nonNullOptions + data GitRef = GitRef { name :: T.Text } deriving (Generic, Show) @@ -213,6 +254,38 @@ startTransferRequest r tr = addLfsJsonHeaders $ r , requestBody = RequestBodyLBS (encode tr) } +-- | Makes a Request using an endpoint discovered via ssh. +startTransferRequestSsh :: SshDiscoveryResponse -> TransferRequest -> Maybe Request +startTransferRequestSsh sr tr = do + basereq <- parseRequest $ T.unpack $ endpoint_href sr + let req = startTransferRequest basereq tr + let headers = map convheader $ maybe [] M.toList $ endpoint_header sr + return $ req { requestHeaders = requestHeaders req ++ headers } + where + convheader (k, v) = (CI.mk (E.encodeUtf8 k), E.encodeUtf8 v) + +-- | Discovers an LFS endpoint for a git remote using ssh. +-- +-- May generate console output, including error messages from ssh or the +-- remote server, and ssh password prompting. +sshDiscovery :: HostUser -> FilePath -> TransferRequestOperation -> IO (Maybe SshDiscoveryResponse) +sshDiscovery hostuser remotepath tro = + (try (readProcess "ssh" ps "") :: IO (Either IOError String)) >>= \case + Left _err -> return Nothing + Right resp -> return $ decode $ fromString resp + where + ps = + [ hostuser + , "git-lfs-authenticate" + , remotepath + , case tro of + RequestDownload -> "download" + RequestUpload -> "upload" + ] + +-- | "user@host" or just the hostname. +type HostUser = String + addLfsJsonHeaders :: Request -> Request addLfsJsonHeaders r = r { requestHeaders = From b4a416b996e54da2e1cd7dbff366624a6a5c86f7 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 31 Jul 2019 16:06:59 -0400 Subject: [PATCH 08/37] cleaner endpoint type --- Lfs.hs | 52 +++++++++++++++++++++++++++------------------------- 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/Lfs.hs b/Lfs.hs index 316b7d20cd..c1ef5caee8 100644 --- a/Lfs.hs +++ b/Lfs.hs @@ -241,38 +241,21 @@ instance ToJSON GitRef type SHA256 = T.Text --- | Adds necessary headers to a Request and makes it post the --- specified TransferRequest. --- --- The input Request's url should be the discovered LFS endpoint. --- Since this uses the LFS batch API, it adds /objects/batch to the end of --- that url. -startTransferRequest :: Request -> TransferRequest -> Request -startTransferRequest r tr = addLfsJsonHeaders $ r - { path = path r <> "/objects/batch" - , method = "POST" - , requestBody = RequestBodyLBS (encode tr) - } - --- | Makes a Request using an endpoint discovered via ssh. -startTransferRequestSsh :: SshDiscoveryResponse -> TransferRequest -> Maybe Request -startTransferRequestSsh sr tr = do - basereq <- parseRequest $ T.unpack $ endpoint_href sr - let req = startTransferRequest basereq tr - let headers = map convheader $ maybe [] M.toList $ endpoint_header sr - return $ req { requestHeaders = requestHeaders req ++ headers } - where - convheader (k, v) = (CI.mk (E.encodeUtf8 k), E.encodeUtf8 v) +-- | The endpoint of a git-lfs server. +data Endpoint + = EndpointRequest Url + | EndpointDiscovered SshDiscoveryResponse -- | Discovers an LFS endpoint for a git remote using ssh. -- -- May generate console output, including error messages from ssh or the -- remote server, and ssh password prompting. -sshDiscovery :: HostUser -> FilePath -> TransferRequestOperation -> IO (Maybe SshDiscoveryResponse) -sshDiscovery hostuser remotepath tro = +sshDiscoverEndpoint :: HostUser -> FilePath -> TransferRequestOperation -> IO (Maybe Endpoint) +sshDiscoverEndpoint hostuser remotepath tro = (try (readProcess "ssh" ps "") :: IO (Either IOError String)) >>= \case Left _err -> return Nothing - Right resp -> return $ decode $ fromString resp + Right resp -> return $ + EndpointDiscovered <$> decode (fromString resp) where ps = [ hostuser @@ -283,6 +266,25 @@ sshDiscovery hostuser remotepath tro = RequestUpload -> "upload" ] +-- | Makes a Request that will start the process of making a transfer to or +-- from the LFS endpoint. +startTransferRequest :: Endpoint -> TransferRequest -> Maybe Request +startTransferRequest (EndpointRequest url) tr = do + r <- parseRequest $ T.unpack url + return $ addLfsJsonHeaders $ r + -- Since this uses the LFS batch API, it adds /objects/batch + -- to the endpoint url. + { path = path r <> "/objects/batch" + , method = "POST" + , requestBody = RequestBodyLBS (encode tr) + } +startTransferRequest (EndpointDiscovered sr) tr = do + req <- startTransferRequest (EndpointRequest (endpoint_href sr)) tr + let headers = map convheader $ maybe [] M.toList $ endpoint_header sr + return $ req { requestHeaders = requestHeaders req ++ headers } + where + convheader (k, v) = (CI.mk (E.encodeUtf8 k), E.encodeUtf8 v) + -- | "user@host" or just the hostname. type HostUser = String From 435287db15355a1ae6be669a1d5c472dc885edce Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 31 Jul 2019 16:25:13 -0400 Subject: [PATCH 09/37] LFS endpoint guessing from remote url --- Lfs.hs | 37 ++++++++++++++++++++++++++++++++----- 1 file changed, 32 insertions(+), 5 deletions(-) diff --git a/Lfs.hs b/Lfs.hs index c1ef5caee8..f04b13f049 100644 --- a/Lfs.hs +++ b/Lfs.hs @@ -27,12 +27,14 @@ import GHC.Generics import Network.HTTP.Client import System.Process import Control.Exception +import Data.String +import Data.List import qualified Data.Map as M import qualified Data.Text as T import qualified Data.Text.Encoding as E import qualified Data.ByteString.Lazy as L import qualified Data.CaseInsensitive as CI -import Data.String +import qualified Network.URI as URI data TransferRequest = TransferRequest { req_operation :: TransferRequestOperation @@ -243,8 +245,9 @@ type SHA256 = T.Text -- | The endpoint of a git-lfs server. data Endpoint - = EndpointRequest Url + = EndpointURI URI.URI | EndpointDiscovered SshDiscoveryResponse + deriving (Show) -- | Discovers an LFS endpoint for a git remote using ssh. -- @@ -266,11 +269,34 @@ sshDiscoverEndpoint hostuser remotepath tro = RequestUpload -> "upload" ] +-- | Guesses the LFS endpoint from the http url of a git remote. +-- +-- https://github.com/git-lfs/git-lfs/blob/master/docs/api/server-discovery.md +guessEndpoint :: Url -> Maybe Endpoint +guessEndpoint remoteurl = do + uri <- URI.parseURI (T.unpack remoteurl) + let guessedpath + | ".git" `isSuffixOf` URI.uriPath uri = + URI.uriPath uri ++ "/info/lfs" + | ".git/" `isSuffixOf` URI.uriPath uri = + URI.uriPath uri ++ "info/lfs" + | otherwise = (droptrailing '/' (URI.uriPath uri)) ++ ".git/info/lfs" + let endpoint = EndpointURI $ uri + { URI.uriScheme = "https" + , URI.uriPath = guessedpath + } + case URI.uriScheme uri of + "https:" -> Just endpoint + "http:" -> Just endpoint + _ -> Nothing + where + droptrailing c = reverse . dropWhile (== c) . reverse + -- | Makes a Request that will start the process of making a transfer to or -- from the LFS endpoint. startTransferRequest :: Endpoint -> TransferRequest -> Maybe Request -startTransferRequest (EndpointRequest url) tr = do - r <- parseRequest $ T.unpack url +startTransferRequest (EndpointURI uri) tr = do + r <- requestFromURI uri return $ addLfsJsonHeaders $ r -- Since this uses the LFS batch API, it adds /objects/batch -- to the endpoint url. @@ -279,7 +305,8 @@ startTransferRequest (EndpointRequest url) tr = do , requestBody = RequestBodyLBS (encode tr) } startTransferRequest (EndpointDiscovered sr) tr = do - req <- startTransferRequest (EndpointRequest (endpoint_href sr)) tr + uri <- URI.parseURI (T.unpack (endpoint_href sr)) + req <- startTransferRequest (EndpointURI uri) tr let headers = map convheader $ maybe [] M.toList $ endpoint_header sr return $ req { requestHeaders = requestHeaders req ++ headers } where From bd6c5083348c16e720f85ee095dabbc66d96b45c Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 1 Aug 2019 13:29:43 -0400 Subject: [PATCH 10/37] finalizing lfs module It may eventually move to its own package. --- COPYRIGHT | 4 +++ Lfs.hs => Utility/GitLFS.hs | 62 ++++++++++++++++++++++++++++--------- 2 files changed, 52 insertions(+), 14 deletions(-) rename Lfs.hs => Utility/GitLFS.hs (91%) diff --git a/COPYRIGHT b/COPYRIGHT index fd94655fbf..7fe4c9d946 100644 --- a/COPYRIGHT +++ b/COPYRIGHT @@ -25,6 +25,10 @@ Copyright: 2018 Joey Hess 2013 Michael Snoyman License: Expat +Files: Utility/GitLFS.hs +Copyright: © 2019 Joey Hess +License: AGPL-3+ + Files: Utility/* Copyright: 2012-2019 Joey Hess License: BSD-2-clause diff --git a/Lfs.hs b/Utility/GitLFS.hs similarity index 91% rename from Lfs.hs rename to Utility/GitLFS.hs index f04b13f049..9f745b1224 100644 --- a/Lfs.hs +++ b/Utility/GitLFS.hs @@ -2,11 +2,49 @@ - - https://github.com/git-lfs/git-lfs/blob/master/docs/api - - - Copyright 2010-2018 Joey Hess + - Copyright 2019 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} +{-# LANGUAGE DeriveGeneric, FlexibleInstances, FlexibleContexts #-} +{-# LANGUAGE OverloadedStrings #-} +{-# LANGUAGE LambdaCase #-} + +module Utility.GitLFS ( + -- * transfer requests + TransferRequest(..), + TransferAdapter(..), + TransferRequestObject(..), + startTransferRequest, + -- * responses to transfer requests + TransferResponse(..), + TransferResponseOperation(..), + IsTransferResponseOperation, + DownloadOperation, + UploadOperation, + ParsedTransferResponse, + parseTransferResponse, + -- * making transfers + downloadOperationRequest, + uploadOperationRequests, + -- * endpoint discovery + Endpoint, + guessEndpoint, + HostUser, + sshDiscoverEndpoint, + -- * errors + TransferResponseError(..), + TransferResponseObjectError(..), + -- * additional data types + Url, + SHA256, + GitRef(..), + NumSeconds, + HTTPHeader, + HTTPHeaderValue, +) where + -- | This implementation of the git-lfs API uses http Request and Response, -- but leaves actually connecting up the http client to the user. -- @@ -17,10 +55,6 @@ -- in some of the requests, in order to allow eg, uploads. No such header -- is added by dedault, so be sure to add your own. -{-# LANGUAGE DeriveGeneric, FlexibleInstances, FlexibleContexts #-} -{-# LANGUAGE OverloadedStrings #-} -{-# LANGUAGE LambdaCase #-} - import Data.Aeson import Data.Aeson.Types import GHC.Generics @@ -207,14 +241,14 @@ data Verification = Verification deriving (Generic, Show) instance ToJSON Verification where - toJSON = genericToJSON verifyBodyOptions - toEncoding = genericToEncoding verifyBodyOptions + toJSON = genericToJSON verificationOptions + toEncoding = genericToEncoding verificationOptions instance FromJSON Verification where - parseJSON = genericParseJSON verifyBodyOptions + parseJSON = genericParseJSON verificationOptions -verifyBodyOptions :: Options -verifyBodyOptions = stripFieldPrefix defaultOptions +verificationOptions :: Options +verificationOptions = stripFieldPrefix defaultOptions -- | Sent over ssh connection when using that to find the endpoint. data SshDiscoveryResponse = SshDiscoveryResponse @@ -329,11 +363,11 @@ type ParsedTransferResponse op = Either (Either String TransferResponseError) (TransferResponse op) -- | Parse the body of a response to a transfer request. -parseResponseBody +parseTransferResponse :: IsTransferResponseOperation op => L.ByteString -> ParsedTransferResponse op -parseResponseBody resp = case eitherDecode resp of +parseTransferResponse resp = case eitherDecode resp of -- If unable to decode as a TransferResponse, try to decode -- as a TransferResponseError instead, in case the LFS server -- sent an error message. @@ -352,8 +386,8 @@ downloadOperationRequest = operationParamsRequest . download -- If the LFS server requested verification, there will be a second -- Request that does that; it should be run only after the upload has -- succeeded. -uploadOperation :: UploadOperation -> RequestBody -> SHA256 -> Integer -> Maybe [Request] -uploadOperation op content oid size = +uploadOperationRequests :: UploadOperation -> RequestBody -> SHA256 -> Integer -> Maybe [Request] +uploadOperationRequests op content oid size = case (mkdlreq, mkverifyreq) of (Nothing, _) -> Nothing (Just dlreq, Nothing) -> Just [dlreq] From 9c20a8792d790db43eac7e601324355f6b35433c Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 1 Aug 2019 14:24:59 -0400 Subject: [PATCH 11/37] fix names of per-remote git config keys These are all prefixed by annex- and always have been, the docs were just wrong. --- doc/git-annex.mdwn | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 212c78985b..7af5f1265c 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -1546,71 +1546,71 @@ Here are all the supported configuration settings. For example, to use the wipe command, set it to `wipe -f %file`. -* `remote..rsyncurl` +* `remote..annex-rsyncurl` Used by rsync special remotes, this configures the location of the rsync repository to use. Normally this is automatically set up by `git annex initremote`, but you can change it if needed. -* `remote..buprepo` +* `remote..annex-buprepo` Used by bup special remotes, this configures the location of the bup repository to use. Normally this is automatically set up by `git annex initremote`, but you can change it if needed. -* `remote..ddarrepo` +* `remote..annex-ddarrepo` Used by ddar special remotes, this configures the location of the ddar repository to use. Normally this is automatically set up by `git annex initremote`, but you can change it if needed. -* `remote..directory` +* `remote..annex-directory` Used by directory special remotes, this configures the location of the directory where annexed files are stored for this remote. Normally this is automatically set up by `git annex initremote`, but you can change it if needed. -* `remote..adb` +* `remote..annex-adb` Used to identify remotes on Android devices accessed via adb. Normally this is automatically set up by `git annex initremote`. -* `remote..androiddirectory` +* `remote..annex-androiddirectory` Used by adb special remotes, this is the directory on the Android device where files are stored for this remote. Normally this is automatically set up by `git annex initremote`, but you can change it if needed. -* `remote..androidserial` +* `remote..annex-androidserial` Used by adb special remotes, this is the serial number of the Android device used by the remote. Normally this is automatically set up by `git annex initremote`, but you can change it if needed, eg when upgrading to a new Android device. -* `remote..s3` +* `remote..annex-s3` Used to identify Amazon S3 special remotes. Normally this is automatically set up by `git annex initremote`. -* `remote..glacier` +* `remote..annex-glacier` Used to identify Amazon Glacier special remotes. Normally this is automatically set up by `git annex initremote`. -* `remote..webdav` +* `remote..annex-webdav` Used to identify webdav special remotes. Normally this is automatically set up by `git annex initremote`. -* `remote..tahoe` +* `remote..annex-tahoe` Used to identify tahoe special remotes. Points to the configuration directory for tahoe. -* `remote..gcrypt` +* `remote..annex-gcrypt` Used to identify gcrypt special remotes. Normally this is automatically set up by `git annex initremote`. @@ -1619,7 +1619,7 @@ Here are all the supported configuration settings. If the gcrypt remote is accessible over ssh and has git-annex-shell available to manage it, it's set to "shell". -* `remote..hooktype`, `remote..externaltype` +* `remote..annex-hooktype`, `remote..annex-externaltype` Used by hook special remotes and external special remotes to record the type of the remote. From 1cef791cf3abed4be16d3210ecce80758f182fa4 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 1 Aug 2019 15:11:45 -0400 Subject: [PATCH 12/37] skeleton git-lfs special remote This is a special remote and a git remote at the same time; git can pull and push to it and git-annex can use it as a special remote. Remote.Git has to check if it's configured as a git-lfs special remote and sets it up as one if so. Object methods not implemented yet. --- Remote/Git.hs | 2 + Remote/GitLFS.hs | 130 +++++++++++++++++++++++++++++++++++++++++++++ Remote/List.hs | 2 + Types/GitConfig.hs | 2 + doc/git-annex.mdwn | 7 +++ git-annex.cabal | 2 + 6 files changed, 145 insertions(+) create mode 100644 Remote/GitLFS.hs diff --git a/Remote/Git.hs b/Remote/Git.hs index 61b58a4890..6e9af5dd5f 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -51,6 +51,7 @@ import Remote.Helper.Messages import Remote.Helper.ExportImport import qualified Remote.Helper.Ssh as Ssh import qualified Remote.GCrypt +import qualified Remote.GitLFS import qualified Remote.P2P import qualified Remote.Helper.P2P as P2PHelper import P2P.Address @@ -144,6 +145,7 @@ configRead autoinit r = do gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) gen r u c gc | Git.GCrypt.isEncrypted r = Remote.GCrypt.chainGen r u c gc + | remoteAnnexGitLFS gc = Remote.GitLFS.gen r u c gc | otherwise = case repoP2PAddress r of Nothing -> do st <- mkState r u gc diff --git a/Remote/GitLFS.hs b/Remote/GitLFS.hs new file mode 100644 index 0000000000..d46e971603 --- /dev/null +++ b/Remote/GitLFS.hs @@ -0,0 +1,130 @@ +{- Using git-lfs as a remote. + - + - Copyright 2019 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Remote.GitLFS (remote, gen) where + +import Annex.Common +import Types.Remote +import Annex.Url +import Types.Creds +import qualified Git +import Config +import Config.Cost +import Remote.Helper.Special +import Remote.Helper.ExportImport +import Remote.Helper.Git +import Annex.Ssh +import Annex.UUID +import Utility.SshHost +import qualified Utility.GitLFS as LFS + +import Control.Concurrent.STM +import qualified Data.Map as M + +remote :: RemoteType +remote = RemoteType + { typename = "git-lfs" + -- Remote.Git takes care of enumerating git-lfs remotes too, + -- and will call our gen on them. + , enumerate = const (return []) + , generate = gen + , setup = mySetup + , exportSupported = exportUnsupported + , importSupported = importUnsupported + } + +type LFSHandle = TVar (String, Maybe LFS.Endpoint) + +gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) +gen r u c gc = do + handle <- liftIO $ newTVarIO (lfsrepo, Nothing) + cst <- remoteCost gc expensiveRemoteCost + return $ Just $ specialRemote' specialcfg c + (simplyPrepare $ store handle) + (simplyPrepare $ retrieve handle) + (simplyPrepare $ remove handle) + (simplyPrepare $ checkKey handle) + (this cst) + where + this cst = Remote + { uuid = u + , cost = cst + , name = Git.repoDescribe r + , storeKey = storeKeyDummy + , retrieveKeyFile = retreiveKeyFileDummy + , retrieveKeyFileCheap = retrieveCheap + -- content stored on git-lfs is hashed with SHA256 + -- no matter what git-annex key it's for, and the hash + -- is checked on download + , retrievalSecurityPolicy = RetrievalAllKeysSecure + , removeKey = removeKeyDummy + , lockContent = Nothing + , checkPresent = checkPresentDummy + , checkPresentCheap = False + , exportActions = exportUnsupported + , importActions = importUnsupported + , whereisKey = Nothing + , remoteFsck = Nothing + , repairRepo = Nothing + , config = c + , getRepo = return r + , gitconfig = gc + , localpath = Nothing + , remotetype = remote + , availability = GloballyAvailable + , readonly = False + -- content cannot be removed from a git-lfs repo + , appendonly = True + , mkUnavailable = return Nothing + , getInfo = gitRepoInfo (this cst) + , claimUrl = Nothing + , checkUrl = Nothing + } + lfsrepo = fromMaybe + (giveup "remote url is not configured") + (M.lookup "url" $ Git.config r) + specialcfg = (specialRemoteCfg c) + -- chunking would not improve git-lfs + { chunkConfig = NoChunks + } + +mySetup :: SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> Annex (RemoteConfig, UUID) +mySetup _ mu _ c gc = do + u <- maybe (liftIO genUUID) return mu + + let repo = fromMaybe (giveup "Specify url=") $ + M.lookup "url" c + (c', _encsetup) <- encryptionSetup c gc + + -- The repo is not stored in the remote log, because the same + -- git-lfs repo can be accessed using different urls by different + -- people (eg over ssh or http). + -- + -- Instead, set up remote.name.url to point to the repo, + -- (so it's also usable by git as a non-special remote), + -- and set remote.name.git-lfs = true + let c'' = M.delete "repo" c' + gitConfigSpecialRemote u c'' [("git-lfs", "true")] + setConfig (ConfigKey ("remote." ++ getRemoteName c ++ ".url")) repo + return (c'', u) + +store :: LFSHandle -> Storer +store h = fileStorer $ \k src p -> undefined + +retrieve :: LFSHandle -> Retriever +retrieve h = byteRetriever $ \k sink -> undefined + +retrieveCheap :: Key -> AssociatedFile -> FilePath -> Annex Bool +retrieveCheap _ _ _ = return False + +checkKey :: LFSHandle -> CheckPresent +checkKey h key = undefined + +remove :: LFSHandle -> Remover +remove h key = do + warning "git-lfs does not support removing content" + return False diff --git a/Remote/List.hs b/Remote/List.hs index b1cd8ff6ac..d4ed4dfe28 100644 --- a/Remote/List.hs +++ b/Remote/List.hs @@ -40,6 +40,7 @@ import qualified Remote.Adb import qualified Remote.Tahoe import qualified Remote.Glacier import qualified Remote.Ddar +import qualified Remote.GitLFS import qualified Remote.Hook import qualified Remote.External @@ -63,6 +64,7 @@ remoteTypes = map adjustExportImportRemoteType , Remote.Tahoe.remote , Remote.Glacier.remote , Remote.Ddar.remote + , Remote.GitLFS.remote , Remote.Hook.remote , Remote.External.remote ] diff --git a/Types/GitConfig.hs b/Types/GitConfig.hs index 0bc72d4023..7976f08e9f 100644 --- a/Types/GitConfig.hs +++ b/Types/GitConfig.hs @@ -263,6 +263,7 @@ data RemoteGitConfig = RemoteGitConfig , remoteAnnexAndroidDirectory :: Maybe FilePath , remoteAnnexAndroidSerial :: Maybe String , remoteAnnexGCrypt :: Maybe String + , remoteAnnexGitLFS :: Bool , remoteAnnexDdarRepo :: Maybe String , remoteAnnexHookType :: Maybe String , remoteAnnexExternalType :: Maybe String @@ -321,6 +322,7 @@ extractRemoteGitConfig r remotename = do , remoteAnnexAndroidDirectory = notempty $ getmaybe "androiddirectory" , remoteAnnexAndroidSerial = notempty $ getmaybe "androidserial" , remoteAnnexGCrypt = notempty $ getmaybe "gcrypt" + , remoteAnnexGitLFS = getbool "git-lfs" False , remoteAnnexDdarRepo = getmaybe "ddarrepo" , remoteAnnexHookType = notempty $ getmaybe "hooktype" , remoteAnnexExternalType = notempty $ getmaybe "externaltype" diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 7af5f1265c..dc9868e91d 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -1619,6 +1619,13 @@ Here are all the supported configuration settings. If the gcrypt remote is accessible over ssh and has git-annex-shell available to manage it, it's set to "shell". +* `remote..annex-git-lfs` + + Used to identify git-lfs special remotes. + Normally this is automatically set up by `git annex initremote`. + + It is set to "true" if this is a git-lfs remote. + * `remote..annex-hooktype`, `remote..annex-externaltype` Used by hook special remotes and external special remotes to record diff --git a/git-annex.cabal b/git-annex.cabal index 4571255992..82aaed5bb4 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -931,6 +931,7 @@ Executable git-annex Remote.External.Types Remote.GCrypt Remote.Git + Remote.GitLFS Remote.Glacier Remote.Helper.AWS Remote.Helper.Chunked @@ -1039,6 +1040,7 @@ Executable git-annex Utility.FileSystemEncoding Utility.Format Utility.FreeDesktop + Utility.GitLFS Utility.Glob Utility.Gpg Utility.Hash From 2533acc7a27313b6bb4f9f4a0d6c20b93e7e1f0d Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 2 Aug 2019 10:40:55 -0400 Subject: [PATCH 13/37] note about ssh hostname sanitization --- Utility/GitLFS.hs | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/Utility/GitLFS.hs b/Utility/GitLFS.hs index 9f745b1224..277126e138 100644 --- a/Utility/GitLFS.hs +++ b/Utility/GitLFS.hs @@ -287,6 +287,10 @@ data Endpoint -- -- May generate console output, including error messages from ssh or the -- remote server, and ssh password prompting. +-- +-- Note that this does not sanitize the hostname. It is the responsibility +-- of the caller to avoid calling this with a value that ssh will +-- interpert as an option, such as "-oProxyCommand=" sshDiscoverEndpoint :: HostUser -> FilePath -> TransferRequestOperation -> IO (Maybe Endpoint) sshDiscoverEndpoint hostuser remotepath tro = (try (readProcess "ssh" ps "") :: IO (Either IOError String)) >>= \case From 03a765909c6e156fbcf3cf360b3192af1b234465 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 2 Aug 2019 10:57:40 -0400 Subject: [PATCH 14/37] move IO code out Let's keep this entirely pure. git-annex has its own facilities for running a ssh command, that make it respect various config settings, and cache connections, etc. So better not to have the library run ssh itself. --- Utility/GitLFS.hs | 45 +++++++++++++++++++-------------------------- 1 file changed, 19 insertions(+), 26 deletions(-) diff --git a/Utility/GitLFS.hs b/Utility/GitLFS.hs index 277126e138..4d6c977fc7 100644 --- a/Utility/GitLFS.hs +++ b/Utility/GitLFS.hs @@ -32,7 +32,8 @@ module Utility.GitLFS ( Endpoint, guessEndpoint, HostUser, - sshDiscoverEndpoint, + sshDiscoverEndpointCommand, + parseSshDiscoverEndpointResponse, -- * errors TransferResponseError(..), TransferResponseObjectError(..), @@ -59,9 +60,6 @@ import Data.Aeson import Data.Aeson.Types import GHC.Generics import Network.HTTP.Client -import System.Process -import Control.Exception -import Data.String import Data.List import qualified Data.Map as M import qualified Data.Text as T @@ -283,29 +281,24 @@ data Endpoint | EndpointDiscovered SshDiscoveryResponse deriving (Show) --- | Discovers an LFS endpoint for a git remote using ssh. +-- | Command to run via ssh with to discover an endpoint. The FilePath is +-- the location of the git repository on the ssh server. -- --- May generate console output, including error messages from ssh or the --- remote server, and ssh password prompting. --- --- Note that this does not sanitize the hostname. It is the responsibility --- of the caller to avoid calling this with a value that ssh will --- interpert as an option, such as "-oProxyCommand=" -sshDiscoverEndpoint :: HostUser -> FilePath -> TransferRequestOperation -> IO (Maybe Endpoint) -sshDiscoverEndpoint hostuser remotepath tro = - (try (readProcess "ssh" ps "") :: IO (Either IOError String)) >>= \case - Left _err -> return Nothing - Right resp -> return $ - EndpointDiscovered <$> decode (fromString resp) - where - ps = - [ hostuser - , "git-lfs-authenticate" - , remotepath - , case tro of - RequestDownload -> "download" - RequestUpload -> "upload" - ] +-- Note that, when sshing to the server, you should take care that the +-- hostname you pass to ssh is really a hostname and not something that ssh +-- will parse an an option, such as -oProxyCommand=". +sshDiscoverEndpointCommand :: FilePath -> TransferRequestOperation -> [String] +sshDiscoverEndpointCommand remotepath tro = + [ "git-lfs-authenticate" + , remotepath + , case tro of + RequestDownload -> "download" + RequestUpload -> "upload" + ] + +-- | Parse the json output when doing ssh endpoint discovery. +parseSshDiscoverEndpointResponse :: L.ByteString -> Maybe Endpoint +parseSshDiscoverEndpointResponse resp = EndpointDiscovered <$> decode resp -- | Guesses the LFS endpoint from the http url of a git remote. -- From 6c1130a3bbb4d2aae538d543c5a397c65b21761b Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 2 Aug 2019 12:38:14 -0400 Subject: [PATCH 15/37] lfs endpoint discovery and caching in git-lfs special remote --- Git/Url.hs | 8 ++- Remote/GitLFS.hs | 122 ++++++++++++++++++++++++++++++++++++++++------ Utility/GitLFS.hs | 27 +++++----- 3 files changed, 129 insertions(+), 28 deletions(-) diff --git a/Git/Url.hs b/Git/Url.hs index f9cc575a63..8430655758 100644 --- a/Git/Url.hs +++ b/Git/Url.hs @@ -11,9 +11,10 @@ module Git.Url ( port, hostuser, authority, + path, ) where -import Network.URI hiding (scheme, authority) +import Network.URI hiding (scheme, authority, path) import Common import Git.Types @@ -66,6 +67,11 @@ authpart :: (URIAuth -> a) -> Repo -> Maybe a authpart a Repo { location = Url u } = a <$> uriAuthority u authpart _ repo = notUrl repo +{- Path part of an URL repo. -} +path :: Repo -> FilePath +path Repo { location = Url u } = uriPath u +path repo = notUrl repo + notUrl :: Repo -> a notUrl repo = error $ "acting on local git repo " ++ repoDescribe repo ++ " not supported" diff --git a/Remote/GitLFS.hs b/Remote/GitLFS.hs index d46e971603..7c054b8d1e 100644 --- a/Remote/GitLFS.hs +++ b/Remote/GitLFS.hs @@ -12,18 +12,23 @@ import Types.Remote import Annex.Url import Types.Creds import qualified Git +import qualified Git.Types as Git +import qualified Git.Url import Config import Config.Cost import Remote.Helper.Special import Remote.Helper.ExportImport import Remote.Helper.Git +import qualified Remote.Helper.Ssh as Ssh import Annex.Ssh import Annex.UUID import Utility.SshHost import qualified Utility.GitLFS as LFS import Control.Concurrent.STM +import Data.String import qualified Data.Map as M +import qualified Network.URI as URI remote :: RemoteType remote = RemoteType @@ -37,11 +42,9 @@ remote = RemoteType , importSupported = importUnsupported } -type LFSHandle = TVar (String, Maybe LFS.Endpoint) - gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) gen r u c gc = do - handle <- liftIO $ newTVarIO (lfsrepo, Nothing) + handle <- liftIO $ newTVarIO $ LFSHandle Nothing Nothing r gc cst <- remoteCost gc expensiveRemoteCost return $ Just $ specialRemote' specialcfg c (simplyPrepare $ store handle) @@ -84,9 +87,6 @@ gen r u c gc = do , claimUrl = Nothing , checkUrl = Nothing } - lfsrepo = fromMaybe - (giveup "remote url is not configured") - (M.lookup "url" $ Git.config r) specialcfg = (specialRemoteCfg c) -- chunking would not improve git-lfs { chunkConfig = NoChunks @@ -112,19 +112,113 @@ mySetup _ mu _ c gc = do setConfig (ConfigKey ("remote." ++ getRemoteName c ++ ".url")) repo return (c'', u) -store :: LFSHandle -> Storer -store h = fileStorer $ \k src p -> undefined +data LFSHandle = LFSHandle + { downloadEndpoint :: Maybe LFS.Endpoint + , uploadEndpoint :: Maybe LFS.Endpoint + , remoteRepo :: Git.Repo + , remoteGitConfig :: RemoteGitConfig + } -retrieve :: LFSHandle -> Retriever -retrieve h = byteRetriever $ \k sink -> undefined +discoverLFSEndpoint :: LFS.TransferRequestOperation -> LFSHandle -> Annex (Maybe LFS.Endpoint) +discoverLFSEndpoint tro h + | Git.repoIsSsh r = gossh + | Git.repoIsHttp r = gohttp + | otherwise = do + warning "git-lfs endpoint has unsupported URI scheme" + return Nothing + where + r = remoteRepo h + lfsrepouri = case Git.location r of + Git.Url u -> u + _ -> giveup $ "unsupported git-lfs remote location " ++ Git.repoLocation r + gohttp = case tro of + LFS.RequestDownload -> return $ LFS.guessEndpoint lfsrepouri + LFS.RequestUpload -> do + -- git-lfs does support storing over http, + -- but it would need prompting for http basic + -- authentication each time git-annex discovered + -- the endpoint. + warning "Storing content in git-lfs currently needs a ssh repository url, not http." + return Nothing + gossh = case mkSshHost <$> Git.Url.hostuser r of + Nothing -> do + warning "Unable to parse ssh url for git-lfs remote." + return Nothing + Just (Left err) -> do + warning err + return Nothing + Just (Right hostuser) -> do + let port = Git.Url.port r + -- Remove leading /~/ from path. That is added when + -- converting a scp-style repository location with + -- a relative path into an url, and is legal + -- according to git-clone(1), but github does not + -- support it. + let remotepath = if "/~/" `isPrefixOf` Git.Url.path r + then drop 3 (Git.Url.path r) + else Git.Url.path r + let ps = LFS.sshDiscoverEndpointCommand remotepath tro + -- Note that no shellEscape is done here, because + -- at least github's git-lfs implementation does + -- not allow for shell quoting. + let remotecmd = unwords ps + (sshcommand, sshparams) <- sshCommand NoConsumeStdin (hostuser, port) (remoteGitConfig h) remotecmd + liftIO (tryIO (readProcess sshcommand (toCommand sshparams))) >>= \case + Left err -> do + warning $ "ssh connection to git-lfs remote failed: " ++ show err + return Nothing + Right resp -> case LFS.parseSshDiscoverEndpointResponse (fromString resp) of + Nothing -> do + warning $ "unexpected response from git-lfs remote when doing ssh endpoint discovery" + return Nothing + Just endpoint -> return (Just endpoint) + +-- The endpoint is cached for later use. +getLFSEndpoint :: LFS.TransferRequestOperation -> TVar LFSHandle -> Annex (Maybe LFS.Endpoint) +getLFSEndpoint tro hv = do + h <- liftIO $ atomically $ readTVar hv + case f h of + Just endpoint -> return (Just endpoint) + Nothing -> discoverLFSEndpoint tro h >>= \case + Just endpoint -> do + liftIO $ atomically $ writeTVar hv $ + case tro of + LFS.RequestDownload -> + h { downloadEndpoint = Just endpoint } + LFS.RequestUpload -> + h { uploadEndpoint = Just endpoint } + return (Just endpoint) + Nothing -> return Nothing + where + f = case tro of + LFS.RequestDownload -> downloadEndpoint + LFS.RequestUpload -> uploadEndpoint + +store :: TVar LFSHandle -> Storer +store h = fileStorer $ \k src p -> getLFSEndpoint LFS.RequestUpload h >>= \case + Nothing -> return False + Just endpoint -> do + liftIO $ print ("endpoint", endpoint) + return False + +retrieve :: TVar LFSHandle -> Retriever +retrieve h = byteRetriever $ \k sink -> getLFSEndpoint LFS.RequestDownload h >>= \case + Nothing -> return False + Just endpoint -> do + liftIO $ print ("endpoint", endpoint) + return False + +checkKey :: TVar LFSHandle -> CheckPresent +checkKey h key = getLFSEndpoint LFS.RequestDownload h >>= \case + Nothing -> giveup "unable to connect to git-lfs endpoint" + Just endpoint -> do + liftIO $ print ("endpoint", endpoint) + return False retrieveCheap :: Key -> AssociatedFile -> FilePath -> Annex Bool retrieveCheap _ _ _ = return False -checkKey :: LFSHandle -> CheckPresent -checkKey h key = undefined - -remove :: LFSHandle -> Remover +remove :: TVar LFSHandle -> Remover remove h key = do warning "git-lfs does not support removing content" return False diff --git a/Utility/GitLFS.hs b/Utility/GitLFS.hs index 4d6c977fc7..b85b71e1d4 100644 --- a/Utility/GitLFS.hs +++ b/Utility/GitLFS.hs @@ -14,6 +14,7 @@ module Utility.GitLFS ( -- * transfer requests TransferRequest(..), + TransferRequestOperation(..), TransferAdapter(..), TransferRequestObject(..), startTransferRequest, @@ -303,24 +304,24 @@ parseSshDiscoverEndpointResponse resp = EndpointDiscovered <$> decode resp -- | Guesses the LFS endpoint from the http url of a git remote. -- -- https://github.com/git-lfs/git-lfs/blob/master/docs/api/server-discovery.md -guessEndpoint :: Url -> Maybe Endpoint -guessEndpoint remoteurl = do - uri <- URI.parseURI (T.unpack remoteurl) - let guessedpath +guessEndpoint :: URI.URI -> Maybe Endpoint +guessEndpoint uri = case URI.uriScheme uri of + "https:" -> Just endpoint + "http:" -> Just endpoint + _ -> Nothing + where + endpoint = EndpointURI $ uri + { URI.uriScheme = "https" + , URI.uriPath = guessedpath + } + + guessedpath | ".git" `isSuffixOf` URI.uriPath uri = URI.uriPath uri ++ "/info/lfs" | ".git/" `isSuffixOf` URI.uriPath uri = URI.uriPath uri ++ "info/lfs" | otherwise = (droptrailing '/' (URI.uriPath uri)) ++ ".git/info/lfs" - let endpoint = EndpointURI $ uri - { URI.uriScheme = "https" - , URI.uriPath = guessedpath - } - case URI.uriScheme uri of - "https:" -> Just endpoint - "http:" -> Just endpoint - _ -> Nothing - where + droptrailing c = reverse . dropWhile (== c) . reverse -- | Makes a Request that will start the process of making a transfer to or From fc09a41ed1a8c503cdf2e78e881cd6c65938cc5e Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 2 Aug 2019 13:56:55 -0400 Subject: [PATCH 16/37] storing objects in git-lfs is working Still need to record the sha256 and size when they cannot be determined by inspecting the key. --- Backend/Hash.hs | 1 + Remote/GitLFS.hs | 102 ++++++++++++++++++++++++++++++++++++++++++++-- Utility/GitLFS.hs | 5 ++- 3 files changed, 104 insertions(+), 4 deletions(-) diff --git a/Backend/Hash.hs b/Backend/Hash.hs index 08bc6b1a3d..6cac6e3718 100644 --- a/Backend/Hash.hs +++ b/Backend/Hash.hs @@ -10,6 +10,7 @@ module Backend.Hash ( backends, testKeyBackend, + keyHash, ) where import Annex.Common diff --git a/Remote/GitLFS.hs b/Remote/GitLFS.hs index 7c054b8d1e..a94ad58478 100644 --- a/Remote/GitLFS.hs +++ b/Remote/GitLFS.hs @@ -10,6 +10,7 @@ module Remote.GitLFS (remote, gen) where import Annex.Common import Types.Remote import Annex.Url +import Types.Key import Types.Creds import qualified Git import qualified Git.Types as Git @@ -19,16 +20,26 @@ import Config.Cost import Remote.Helper.Special import Remote.Helper.ExportImport import Remote.Helper.Git +import Remote.Helper.Http import qualified Remote.Helper.Ssh as Ssh import Annex.Ssh import Annex.UUID import Utility.SshHost import qualified Utility.GitLFS as LFS +import Backend.Hash +import Utility.Hash +import Utility.FileSize +import Crypto import Control.Concurrent.STM import Data.String +import Network.HTTP.Client +import System.Log.Logger import qualified Data.Map as M import qualified Network.URI as URI +import qualified Data.ByteString.Lazy as L +import qualified Data.Text as T +import qualified Data.Text.Encoding as E remote :: RemoteType remote = RemoteType @@ -194,12 +205,97 @@ getLFSEndpoint tro hv = do LFS.RequestDownload -> downloadEndpoint LFS.RequestUpload -> uploadEndpoint +sendTransferRequest + :: LFS.IsTransferResponseOperation op + => LFS.TransferRequest + -> LFS.Endpoint + -> Annex (Either String (LFS.TransferResponse op)) +sendTransferRequest req endpoint = do + uo <- getUrlOptions + case applyRequest uo <$> LFS.startTransferRequest endpoint req of + Just httpreq -> do + liftIO $ debugM "git-lfs" (show httpreq) + httpresp <- liftIO $ httpLbs + (setRequestCheckStatus httpreq) + (httpManager uo) + return $ case LFS.parseTransferResponse (responseBody httpresp) of + Left (Right tro) -> Left $ + T.unpack $ LFS.resperr_message tro + Left (Left err) -> Left err + Right resp -> Right resp + Nothing -> return (Left "unable to parse git-lfs endpoint url") + +extractKeySha256 :: Key -> Maybe LFS.SHA256 +extractKeySha256 k = case keyVariety k of + SHA2Key (HashSize 256) (HasExt hasext) + | hasext -> eitherToMaybe $ E.decodeUtf8' (keyHash k) + | otherwise -> eitherToMaybe $ E.decodeUtf8' (keyName k) + _ -> Nothing + +-- The size of an encrypted key is the size of the input data, but we need +-- the actual object size. +extractKeySize :: Key -> Maybe Integer +extractKeySize k + | isEncKey k = Nothing + | otherwise = keySize k + store :: TVar LFSHandle -> Storer store h = fileStorer $ \k src p -> getLFSEndpoint LFS.RequestUpload h >>= \case Nothing -> return False - Just endpoint -> do - liftIO $ print ("endpoint", endpoint) - return False + Just endpoint -> flip catchNonAsync (const $ return False) $ do + sha256 <- case extractKeySha256 k of + Just sha -> pure sha + Nothing -> do + sha <- liftIO $ + show . sha2_256 <$> L.readFile src + -- TODO: rmemeber the sha256 for this key, + -- to use when retrieving it. + return (T.pack sha) + size <- case extractKeySize k of + Just size -> pure size + Nothing -> do + -- TODO: remember the size of this key, + -- to use when retrieving it. + liftIO $ getFileSize src + let obj = LFS.TransferRequestObject + { LFS.req_oid = sha256 + , LFS.req_size = size + } + let req = LFS.TransferRequest + { LFS.req_operation = LFS.RequestUpload + , LFS.req_transfers = [LFS.Basic] + , LFS.req_ref = Nothing + , LFS.req_objects = [obj] + } + sendTransferRequest req endpoint >>= \case + Left err -> do + warning err + return False + Right resp -> do + body <- liftIO $ httpBodyStorer src p + forM_ (LFS.objects resp) $ + send body sha256 size + return True + where + send body sha256 size tro + | LFS.resp_oid tro /= sha256 = + giveup "git-lfs server requested other sha256 than the one we asked to send" + | LFS.resp_size tro /= size = + giveup "git-lfs server requested other object size than we asked to send" + | otherwise = case LFS.resp_error tro of + Just err -> giveup $ + T.unpack $ LFS.respobjerr_message err + Nothing -> case LFS.resp_actions tro of + Nothing -> noop + Just op -> case LFS.uploadOperationRequests op body sha256 size of + Nothing -> giveup "unable to parse git-lfs server upload url" + Just [] -> noop -- server already has it + Just reqs -> do + uo <- getUrlOptions + let reqs' = map (setRequestCheckStatus . applyRequest uo) reqs + liftIO $ forM_ reqs $ \r -> do + debugM "git-lfs" (show r) + httpLbs r (httpManager uo) retrieve :: TVar LFSHandle -> Retriever retrieve h = byteRetriever $ \k sink -> getLFSEndpoint LFS.RequestDownload h >>= \case diff --git a/Utility/GitLFS.hs b/Utility/GitLFS.hs index b85b71e1d4..e3edb7a216 100644 --- a/Utility/GitLFS.hs +++ b/Utility/GitLFS.hs @@ -381,9 +381,12 @@ downloadOperationRequest = operationParamsRequest . download -- | Builds http request to perform an upload. The content to upload is -- provided in the RequestBody, along with its SHA256 and size. -- --- If the LFS server requested verification, there will be a second +-- When the LFS server requested verification, there will be a second -- Request that does that; it should be run only after the upload has -- succeeded. +-- +-- When the LFS server already contains the object, an empty list will be +-- returned. uploadOperationRequests :: UploadOperation -> RequestBody -> SHA256 -> Integer -> Maybe [Request] uploadOperationRequests op content oid size = case (mkdlreq, mkverifyreq) of From 74e9e3ccf0022779da4ff96a0c749239d36ad03f Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sat, 3 Aug 2019 11:15:08 -0400 Subject: [PATCH 17/37] add to request headers, don't overwrite --- Utility/GitLFS.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Utility/GitLFS.hs b/Utility/GitLFS.hs index e3edb7a216..40f594448e 100644 --- a/Utility/GitLFS.hs +++ b/Utility/GitLFS.hs @@ -349,7 +349,7 @@ type HostUser = String addLfsJsonHeaders :: Request -> Request addLfsJsonHeaders r = r - { requestHeaders = + { requestHeaders = requestHeaders r ++ [ ("Accept", lfsjson) , ("Content-Type", lfsjson) ] From a16e83eec8fe6cd2ca5bae5590f9d5b262ccb5ee Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sat, 3 Aug 2019 11:30:06 -0400 Subject: [PATCH 18/37] also debug http response status code --- Remote/GitLFS.hs | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) diff --git a/Remote/GitLFS.hs b/Remote/GitLFS.hs index a94ad58478..44a58d60f5 100644 --- a/Remote/GitLFS.hs +++ b/Remote/GitLFS.hs @@ -205,19 +205,26 @@ getLFSEndpoint tro hv = do LFS.RequestDownload -> downloadEndpoint LFS.RequestUpload -> uploadEndpoint +-- makeAPIRequest :: Request -> Annex (Response t) +makeAPIRequest req = do + uo <- getUrlOptions + let req' = applyRequest uo req + liftIO $ debugM "git-lfs" (show req') + resp <- liftIO $ httpLbs req' (httpManager uo) + -- Only debug the http status code, not the json + -- which may include an authentication token. + liftIO $ debugM "git-lfs" (show $ responseStatus resp) + return resp + sendTransferRequest :: LFS.IsTransferResponseOperation op => LFS.TransferRequest -> LFS.Endpoint -> Annex (Either String (LFS.TransferResponse op)) -sendTransferRequest req endpoint = do - uo <- getUrlOptions - case applyRequest uo <$> LFS.startTransferRequest endpoint req of +sendTransferRequest req endpoint = + case LFS.startTransferRequest endpoint req of Just httpreq -> do - liftIO $ debugM "git-lfs" (show httpreq) - httpresp <- liftIO $ httpLbs - (setRequestCheckStatus httpreq) - (httpManager uo) + httpresp <- makeAPIRequest $ setRequestCheckStatus httpreq return $ case LFS.parseTransferResponse (responseBody httpresp) of Left (Right tro) -> Left $ T.unpack $ LFS.resperr_message tro @@ -290,12 +297,8 @@ store h = fileStorer $ \k src p -> getLFSEndpoint LFS.RequestUpload h >>= \case Just op -> case LFS.uploadOperationRequests op body sha256 size of Nothing -> giveup "unable to parse git-lfs server upload url" Just [] -> noop -- server already has it - Just reqs -> do - uo <- getUrlOptions - let reqs' = map (setRequestCheckStatus . applyRequest uo) reqs - liftIO $ forM_ reqs $ \r -> do - debugM "git-lfs" (show r) - httpLbs r (httpManager uo) + Just reqs -> forM_ reqs $ + makeAPIRequest . setRequestCheckStatus retrieve :: TVar LFSHandle -> Retriever retrieve h = byteRetriever $ \k sink -> getLFSEndpoint LFS.RequestDownload h >>= \case From f536a0b264fd233040fd747e34907cbf48468470 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sat, 3 Aug 2019 11:31:02 -0400 Subject: [PATCH 19/37] weaken comment I'm seeing the github lfs server request an upload of an object that has already been uploaded to it before. Probably because they offload storage to S3 and so skipped the overhead of checking for an unncessary upload. --- Utility/GitLFS.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Utility/GitLFS.hs b/Utility/GitLFS.hs index 40f594448e..cc3e0f37a5 100644 --- a/Utility/GitLFS.hs +++ b/Utility/GitLFS.hs @@ -385,7 +385,7 @@ downloadOperationRequest = operationParamsRequest . download -- Request that does that; it should be run only after the upload has -- succeeded. -- --- When the LFS server already contains the object, an empty list will be +-- When the LFS server already contains the object, an empty list may be -- returned. uploadOperationRequests :: UploadOperation -> RequestBody -> SHA256 -> Integer -> Maybe [Request] uploadOperationRequests op content oid size = From 5be0a35dae638fb8d90f5a5cdffbf5967648b685 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sat, 3 Aug 2019 12:21:28 -0400 Subject: [PATCH 20/37] implemented checkPresent for git-lfs --- Remote/GitLFS.hs | 65 ++++++++++++++++++++++++++++++++++++++++------- Utility/GitLFS.hs | 20 +++++++++------ 2 files changed, 68 insertions(+), 17 deletions(-) diff --git a/Remote/GitLFS.hs b/Remote/GitLFS.hs index 44a58d60f5..78049ce3fd 100644 --- a/Remote/GitLFS.hs +++ b/Remote/GitLFS.hs @@ -34,6 +34,7 @@ import Crypto import Control.Concurrent.STM import Data.String import Network.HTTP.Client +import Network.HTTP.Types import System.Log.Logger import qualified Data.Map as M import qualified Network.URI as URI @@ -205,7 +206,7 @@ getLFSEndpoint tro hv = do LFS.RequestDownload -> downloadEndpoint LFS.RequestUpload -> uploadEndpoint --- makeAPIRequest :: Request -> Annex (Response t) +makeAPIRequest :: Request -> Annex (Response L.ByteString) makeAPIRequest req = do uo <- getUrlOptions let req' = applyRequest uo req @@ -226,11 +227,11 @@ sendTransferRequest req endpoint = Just httpreq -> do httpresp <- makeAPIRequest $ setRequestCheckStatus httpreq return $ case LFS.parseTransferResponse (responseBody httpresp) of - Left (Right tro) -> Left $ + LFS.ParsedTransferResponse resp -> Right resp + LFS.ParsedTransferResponseError tro -> Left $ T.unpack $ LFS.resperr_message tro - Left (Left err) -> Left err - Right resp -> Right resp - Nothing -> return (Left "unable to parse git-lfs endpoint url") + LFS.ParseFailed err -> Left err + Nothing -> return $ Left "unable to parse git-lfs endpoint url" extractKeySha256 :: Key -> Maybe LFS.SHA256 extractKeySha256 k = case keyVariety k of @@ -246,10 +247,28 @@ extractKeySize k | isEncKey k = Nothing | otherwise = keySize k +mkDownloadRequest :: Key -> Annex (Maybe LFS.TransferRequest) +mkDownloadRequest k = case (extractKeySha256 k, extractKeySize k) of + (Just sha256, Just sz) -> go sha256 sz + -- TODO get from git-annex branch + _ -> return Nothing + where + go sha256 sz = do + let obj = LFS.TransferRequestObject + { LFS.req_oid = sha256 + , LFS.req_size = sz + } + return $ Just $ LFS.TransferRequest + { LFS.req_operation = LFS.RequestDownload + , LFS.req_transfers = [LFS.Basic] + , LFS.req_ref = Nothing + , LFS.req_objects = [obj] + } + store :: TVar LFSHandle -> Storer store h = fileStorer $ \k src p -> getLFSEndpoint LFS.RequestUpload h >>= \case Nothing -> return False - Just endpoint -> flip catchNonAsync (const $ return False) $ do + Just endpoint -> flip catchNonAsync failederr $ do sha256 <- case extractKeySha256 k of Just sha -> pure sha Nothing -> do @@ -299,6 +318,9 @@ store h = fileStorer $ \k src p -> getLFSEndpoint LFS.RequestUpload h >>= \case Just [] -> noop -- server already has it Just reqs -> forM_ reqs $ makeAPIRequest . setRequestCheckStatus + failederr e = do + warning (show e) + return False retrieve :: TVar LFSHandle -> Retriever retrieve h = byteRetriever $ \k sink -> getLFSEndpoint LFS.RequestDownload h >>= \case @@ -310,9 +332,34 @@ retrieve h = byteRetriever $ \k sink -> getLFSEndpoint LFS.RequestDownload h >>= checkKey :: TVar LFSHandle -> CheckPresent checkKey h key = getLFSEndpoint LFS.RequestDownload h >>= \case Nothing -> giveup "unable to connect to git-lfs endpoint" - Just endpoint -> do - liftIO $ print ("endpoint", endpoint) - return False + Just endpoint -> mkDownloadRequest key >>= \case + -- Unable to find enough information to request the key + -- from git-lfs, so it's not present there. + Nothing -> return False + Just req -> case LFS.startTransferRequest endpoint req of + Nothing -> giveup "unable to parse git-lfs endpoint url" + Just httpreq -> go =<< makeAPIRequest httpreq + where + go httpresp + | responseStatus httpresp == status200 = + go' $ LFS.parseTransferResponse (responseBody httpresp) + | otherwise = + giveup $ "git-lfs server refused request: " ++ show (responseStatus httpresp) + + go' :: LFS.ParsedTransferResponse LFS.DownloadOperation -> Annex Bool + go' (LFS.ParseFailed err) = + giveup $ "unable to parse response from git-lfs server: " ++ err + -- If the server responds with a json error message, + -- the content is presumably not present. + go' (LFS.ParsedTransferResponseError _) = return False + -- If the server responds with at least one download operation, + -- we will assume the content is present. We could also try to HEAD + -- that download, but there's no guarantee HEAD is supported, and + -- at most that would detect breakage where the server is confused + -- about what objects it has. + go' (LFS.ParsedTransferResponse resp) = + return $ not $ null $ + mapMaybe LFS.resp_actions $ LFS.objects resp retrieveCheap :: Key -> AssociatedFile -> FilePath -> Annex Bool retrieveCheap _ _ _ = return False diff --git a/Utility/GitLFS.hs b/Utility/GitLFS.hs index cc3e0f37a5..00fac8b1ad 100644 --- a/Utility/GitLFS.hs +++ b/Utility/GitLFS.hs @@ -24,7 +24,7 @@ module Utility.GitLFS ( IsTransferResponseOperation, DownloadOperation, UploadOperation, - ParsedTransferResponse, + ParsedTransferResponse(..), parseTransferResponse, -- * making transfers downloadOperationRequest, @@ -311,7 +311,9 @@ guessEndpoint uri = case URI.uriScheme uri of _ -> Nothing where endpoint = EndpointURI $ uri - { URI.uriScheme = "https" + -- force https because the git-lfs protocol uses http + -- basic auth tokens, which should not be exposed + { URI.uriScheme = "https:" , URI.uriPath = guessedpath } @@ -357,8 +359,10 @@ addLfsJsonHeaders r = r where lfsjson = "application/vnd.git-lfs+json" -type ParsedTransferResponse op = - Either (Either String TransferResponseError) (TransferResponse op) +data ParsedTransferResponse op + = ParsedTransferResponse (TransferResponse op) + | ParsedTransferResponseError TransferResponseError + | ParseFailed String -- | Parse the body of a response to a transfer request. parseTransferResponse @@ -366,13 +370,13 @@ parseTransferResponse => L.ByteString -> ParsedTransferResponse op parseTransferResponse resp = case eitherDecode resp of + Right tr -> ParsedTransferResponse tr -- If unable to decode as a TransferResponse, try to decode -- as a TransferResponseError instead, in case the LFS server -- sent an error message. - Left err -> case eitherDecode resp of - Right responseerror -> Left (Right responseerror) - Left _ -> Left $ Left err - Right tr -> Right tr + Left err -> + either (const $ ParseFailed err) ParsedTransferResponseError $ + eitherDecode resp -- | Builds a http request to perform a download. downloadOperationRequest :: DownloadOperation -> Maybe Request From 28c0395d619eb6b491fbc34b3b58ef1cabc0b7a3 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sat, 3 Aug 2019 12:51:16 -0400 Subject: [PATCH 21/37] start at retieval from LFS Doesn't yet download the content, which will need to support resuming. --- Remote/GitLFS.hs | 55 ++++++++++++++++++++++++++++++------------------ 1 file changed, 35 insertions(+), 20 deletions(-) diff --git a/Remote/GitLFS.hs b/Remote/GitLFS.hs index 78049ce3fd..02dee87e25 100644 --- a/Remote/GitLFS.hs +++ b/Remote/GitLFS.hs @@ -21,23 +21,20 @@ import Remote.Helper.Special import Remote.Helper.ExportImport import Remote.Helper.Git import Remote.Helper.Http -import qualified Remote.Helper.Ssh as Ssh import Annex.Ssh import Annex.UUID import Utility.SshHost import qualified Utility.GitLFS as LFS import Backend.Hash import Utility.Hash -import Utility.FileSize import Crypto import Control.Concurrent.STM import Data.String -import Network.HTTP.Client +import Network.HTTP.Client hiding (port) import Network.HTTP.Types import System.Log.Logger import qualified Data.Map as M -import qualified Network.URI as URI import qualified Data.ByteString.Lazy as L import qualified Data.Text as T import qualified Data.Text.Encoding as E @@ -56,13 +53,13 @@ remote = RemoteType gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) gen r u c gc = do - handle <- liftIO $ newTVarIO $ LFSHandle Nothing Nothing r gc + h <- liftIO $ newTVarIO $ LFSHandle Nothing Nothing r gc cst <- remoteCost gc expensiveRemoteCost return $ Just $ specialRemote' specialcfg c - (simplyPrepare $ store handle) - (simplyPrepare $ retrieve handle) - (simplyPrepare $ remove handle) - (simplyPrepare $ checkKey handle) + (simplyPrepare $ store h) + (simplyPrepare $ retrieve h) + (simplyPrepare $ remove h) + (simplyPrepare $ checkKey h) (this cst) where this cst = Remote @@ -206,8 +203,10 @@ getLFSEndpoint tro hv = do LFS.RequestDownload -> downloadEndpoint LFS.RequestUpload -> uploadEndpoint -makeAPIRequest :: Request -> Annex (Response L.ByteString) -makeAPIRequest req = do +-- Make an API request that is expected to have a small response body. +-- Not for use in downloading an object. +makeSmallAPIRequest :: Request -> Annex (Response L.ByteString) +makeSmallAPIRequest req = do uo <- getUrlOptions let req' = applyRequest uo req liftIO $ debugM "git-lfs" (show req') @@ -225,7 +224,7 @@ sendTransferRequest sendTransferRequest req endpoint = case LFS.startTransferRequest endpoint req of Just httpreq -> do - httpresp <- makeAPIRequest $ setRequestCheckStatus httpreq + httpresp <- makeSmallAPIRequest $ setRequestCheckStatus httpreq return $ case LFS.parseTransferResponse (responseBody httpresp) of LFS.ParsedTransferResponse resp -> Right resp LFS.ParsedTransferResponseError tro -> Left $ @@ -317,17 +316,33 @@ store h = fileStorer $ \k src p -> getLFSEndpoint LFS.RequestUpload h >>= \case Nothing -> giveup "unable to parse git-lfs server upload url" Just [] -> noop -- server already has it Just reqs -> forM_ reqs $ - makeAPIRequest . setRequestCheckStatus + makeSmallAPIRequest . setRequestCheckStatus failederr e = do warning (show e) return False retrieve :: TVar LFSHandle -> Retriever -retrieve h = byteRetriever $ \k sink -> getLFSEndpoint LFS.RequestDownload h >>= \case - Nothing -> return False - Just endpoint -> do - liftIO $ print ("endpoint", endpoint) - return False +retrieve h = fileRetriever $ \dest k p -> getLFSEndpoint LFS.RequestDownload h >>= \case + Nothing -> giveup "unable to connect to git-lfs endpoint" + Just endpoint -> mkDownloadRequest k >>= \case + Nothing -> giveup "unable to download this object from git-lfs" + Just req -> sendTransferRequest req endpoint >>= \case + Left err -> giveup (show err) + Right resp -> case LFS.objects resp of + [] -> giveup "git-lfs server did not provide a way to download this object" + (tro:_) -> receive dest p tro + + where + receive dest p tro = case LFS.resp_error tro of + Just err -> giveup $ T.unpack $ LFS.respobjerr_message err + Nothing -> case LFS.resp_actions tro of + Nothing -> giveup "git-lfs server did not provide a way to download this object" + Just op -> case LFS.downloadOperationRequest op of + Nothing -> giveup "unable to parse git-lfs server download url" + Just req -> + -- TODO stream to file + -- TODO resume and append if the file already exists + giveup "TODO" checkKey :: TVar LFSHandle -> CheckPresent checkKey h key = getLFSEndpoint LFS.RequestDownload h >>= \case @@ -338,7 +353,7 @@ checkKey h key = getLFSEndpoint LFS.RequestDownload h >>= \case Nothing -> return False Just req -> case LFS.startTransferRequest endpoint req of Nothing -> giveup "unable to parse git-lfs endpoint url" - Just httpreq -> go =<< makeAPIRequest httpreq + Just httpreq -> go =<< makeSmallAPIRequest httpreq where go httpresp | responseStatus httpresp == status200 = @@ -365,6 +380,6 @@ retrieveCheap :: Key -> AssociatedFile -> FilePath -> Annex Bool retrieveCheap _ _ _ = return False remove :: TVar LFSHandle -> Remover -remove h key = do +remove _h _key = do warning "git-lfs does not support removing content" return False From b82ecf7076355bc38c63f4e0d2182c4896cdc693 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sat, 3 Aug 2019 16:23:47 -0400 Subject: [PATCH 22/37] verify that LFS server responds with requested object The protocol design allows the server to respond with some other object; if a server for some reason a server did that, it would not be right for git-annex to download its content. I don't think it would be a security hole, since git-annex is downloading a specific key and will verify the key's content. Seems like a good idea to belt-and-suspenders test for such a misuse of the protocol. --- Remote/GitLFS.hs | 49 ++++++++++++++++++++++++++---------------------- 1 file changed, 27 insertions(+), 22 deletions(-) diff --git a/Remote/GitLFS.hs b/Remote/GitLFS.hs index 02dee87e25..dcbdc439f9 100644 --- a/Remote/GitLFS.hs +++ b/Remote/GitLFS.hs @@ -246,7 +246,7 @@ extractKeySize k | isEncKey k = Nothing | otherwise = keySize k -mkDownloadRequest :: Key -> Annex (Maybe LFS.TransferRequest) +mkDownloadRequest :: Key -> Annex (Maybe (LFS.TransferRequest, LFS.SHA256, Integer)) mkDownloadRequest k = case (extractKeySha256 k, extractKeySize k) of (Just sha256, Just sz) -> go sha256 sz -- TODO get from git-annex branch @@ -257,12 +257,13 @@ mkDownloadRequest k = case (extractKeySha256 k, extractKeySize k) of { LFS.req_oid = sha256 , LFS.req_size = sz } - return $ Just $ LFS.TransferRequest + let req = LFS.TransferRequest { LFS.req_operation = LFS.RequestDownload , LFS.req_transfers = [LFS.Basic] , LFS.req_ref = Nothing , LFS.req_objects = [obj] } + return $ Just (req, sha256, sz) store :: TVar LFSHandle -> Storer store h = fileStorer $ \k src p -> getLFSEndpoint LFS.RequestUpload h >>= \case @@ -303,10 +304,8 @@ store h = fileStorer $ \k src p -> getLFSEndpoint LFS.RequestUpload h >>= \case return True where send body sha256 size tro - | LFS.resp_oid tro /= sha256 = - giveup "git-lfs server requested other sha256 than the one we asked to send" - | LFS.resp_size tro /= size = - giveup "git-lfs server requested other object size than we asked to send" + | LFS.resp_oid tro /= sha256 || LFS.resp_size tro /= size = + giveup "git-lfs server requested other object than the one we asked to send" | otherwise = case LFS.resp_error tro of Just err -> giveup $ T.unpack $ LFS.respobjerr_message err @@ -326,12 +325,14 @@ retrieve h = fileRetriever $ \dest k p -> getLFSEndpoint LFS.RequestDownload h > Nothing -> giveup "unable to connect to git-lfs endpoint" Just endpoint -> mkDownloadRequest k >>= \case Nothing -> giveup "unable to download this object from git-lfs" - Just req -> sendTransferRequest req endpoint >>= \case + Just (req, sha256, size) -> sendTransferRequest req endpoint >>= \case Left err -> giveup (show err) Right resp -> case LFS.objects resp of [] -> giveup "git-lfs server did not provide a way to download this object" - (tro:_) -> receive dest p tro - + (tro:_) + | LFS.resp_oid tro /= sha256 || LFS.resp_size tro /= size -> + giveup "git-lfs server replied with other object than the one we requested" + | otherwise -> receive dest p tro where receive dest p tro = case LFS.resp_error tro of Just err -> giveup $ T.unpack $ LFS.respobjerr_message err @@ -351,30 +352,34 @@ checkKey h key = getLFSEndpoint LFS.RequestDownload h >>= \case -- Unable to find enough information to request the key -- from git-lfs, so it's not present there. Nothing -> return False - Just req -> case LFS.startTransferRequest endpoint req of + Just (req, sha256, size) -> case LFS.startTransferRequest endpoint req of Nothing -> giveup "unable to parse git-lfs endpoint url" - Just httpreq -> go =<< makeSmallAPIRequest httpreq + Just httpreq -> go sha256 size =<< makeSmallAPIRequest httpreq where - go httpresp - | responseStatus httpresp == status200 = - go' $ LFS.parseTransferResponse (responseBody httpresp) - | otherwise = - giveup $ "git-lfs server refused request: " ++ show (responseStatus httpresp) + go sha256 size httpresp + | responseStatus httpresp == status200 = go' sha256 size $ + LFS.parseTransferResponse (responseBody httpresp) + | otherwise = giveup $ + "git-lfs server refused request: " ++ show (responseStatus httpresp) - go' :: LFS.ParsedTransferResponse LFS.DownloadOperation -> Annex Bool - go' (LFS.ParseFailed err) = + go' :: LFS.SHA256 -> Integer -> LFS.ParsedTransferResponse LFS.DownloadOperation -> Annex Bool + go' _ _ (LFS.ParseFailed err) = giveup $ "unable to parse response from git-lfs server: " ++ err -- If the server responds with a json error message, -- the content is presumably not present. - go' (LFS.ParsedTransferResponseError _) = return False + go' _ _ (LFS.ParsedTransferResponseError _) = return False -- If the server responds with at least one download operation, -- we will assume the content is present. We could also try to HEAD -- that download, but there's no guarantee HEAD is supported, and -- at most that would detect breakage where the server is confused -- about what objects it has. - go' (LFS.ParsedTransferResponse resp) = - return $ not $ null $ - mapMaybe LFS.resp_actions $ LFS.objects resp + go' sha256 size (LFS.ParsedTransferResponse resp) = + case LFS.objects resp of + [] -> return False + (tro:_) + | LFS.resp_oid tro /= sha256 || LFS.resp_size tro /= size -> + giveup "git-lfs server replied with other object than the one we requested" + | otherwise -> return True retrieveCheap :: Key -> AssociatedFile -> FilePath -> Annex Bool retrieveCheap _ _ _ = return False From 4af55c42bfb3e8adcd103a8deb82310753a3b047 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 4 Aug 2019 12:31:54 -0400 Subject: [PATCH 23/37] factored out downloadConduit from download useful when an API provides a Request to download --- Utility/Url.hs | 125 +++++++++++++++++++++++++++---------------------- 1 file changed, 68 insertions(+), 57 deletions(-) diff --git a/Utility/Url.hs b/Utility/Url.hs index 15bc0239f3..a038bb68a7 100644 --- a/Utility/Url.hs +++ b/Utility/Url.hs @@ -29,6 +29,7 @@ module Utility.Url ( assumeUrlExists, download, downloadQuiet, + downloadConduit, sinkResponseFile, downloadPartial, parseURIRelaxed, @@ -335,7 +336,8 @@ download' noerror meterupdate url file uo = case (urlDownloader uo, parseUrlRequest (show u)) of (DownloadWithConduit (DownloadWithCurlRestricted r), Just req) -> catchJust (matchStatusCodeException (== found302)) - (downloadconduit req) + ((downloadConduit meterupdate req file uo >> return True) + `catchNonAsync` (dlfailed . show)) (followredir r) (DownloadWithConduit (DownloadWithCurlRestricted r), Nothing) | isfileurl u -> downloadfile u @@ -354,58 +356,6 @@ download' noerror meterupdate url file uo = ftpport = 21 - downloadconduit req = catchMaybeIO (getFileSize file) >>= \case - Just sz | sz > 0 -> resumeconduit req' sz - _ -> runResourceT $ do - liftIO $ debugM "url" (show req') - resp <- http req' (httpManager uo) - if responseStatus resp == ok200 - then store zeroBytesProcessed WriteMode resp - else showrespfailure resp - where - req' = applyRequest uo $ req - -- Override http-client's default decompression of gzip - -- compressed files. We want the unmodified file content. - { requestHeaders = (hAcceptEncoding, "identity") : - filter ((/= hAcceptEncoding) . fst) - (requestHeaders req) - , decompress = const False - } - - alreadydownloaded sz s h = s == requestedRangeNotSatisfiable416 - && case lookup hContentRange h of - -- This could be improved by fixing - -- https://github.com/aristidb/http-types/issues/87 - Just crh -> crh == B8.fromString ("bytes */" ++ show sz) - -- Some http servers send no Content-Range header when - -- the range extends beyond the end of the file. - -- There is no way to distinguish between the file - -- being the same size on the http server, vs - -- it being shorter than the file we already have. - -- So assume we have the whole content of the file - -- already, the same as wget and curl do. - Nothing -> True - - -- Resume download from where a previous download was interrupted, - -- when supported by the http server. The server may also opt to - -- send the whole file rather than resuming. - resumeconduit req sz = catchJust - (matchStatusCodeHeadersException (alreadydownloaded sz)) - dl - (const $ return True) - where - dl = runResourceT $ do - let req' = req { requestHeaders = resumeFromHeader sz : requestHeaders req } - liftIO $ debugM "url" (show req') - resp <- http req' (httpManager uo) - if responseStatus resp == partialContent206 - then store (BytesProcessed sz) AppendMode resp - else if responseStatus resp == ok200 - then store zeroBytesProcessed WriteMode resp - else showrespfailure resp - - showrespfailure = liftIO . dlfailed . B8.toString - . statusMessage . responseStatus showhttpexception he = do let msg = case he of HttpExceptionRequest _ (StatusCodeException r _) -> @@ -417,6 +367,7 @@ download' noerror meterupdate url file uo = HttpExceptionRequest _ other -> show other _ -> show he dlfailed msg + dlfailed msg | noerror = return False | otherwise = do @@ -424,10 +375,6 @@ download' noerror meterupdate url file uo = hFlush stderr return False - store initialp mode resp = do - sinkResponseFile meterupdate initialp file mode resp - return True - basecurlparams = curlParams uo [ if noerror then Param "-S" @@ -453,6 +400,8 @@ download' noerror meterupdate url file uo = L.writeFile file return True + -- Conduit does not support ftp, so will throw an exception on a + -- redirect to a ftp url; fall back to curl. followredir r ex@(HttpExceptionRequest _ (StatusCodeException resp _)) = case headMaybe $ map decodeBS $ getResponseHeader hLocation resp of Just url' -> case parseURIRelaxed url' of @@ -463,6 +412,68 @@ download' noerror meterupdate url file uo = Nothing -> throwIO ex followredir _ ex = throwIO ex +{- Download a perhaps large file using conduit, with auto-resume + - of incomplete downloads. + - + - Does not catch exceptions. + -} +downloadConduit :: MeterUpdate -> Request -> FilePath -> UrlOptions -> IO () +downloadConduit meterupdate req file uo = + catchMaybeIO (getFileSize file) >>= \case + Just sz | sz > 0 -> resumedownload sz + _ -> runResourceT $ do + liftIO $ debugM "url" (show req') + resp <- http req' (httpManager uo) + if responseStatus resp == ok200 + then store zeroBytesProcessed WriteMode resp + else respfailure resp + where + req' = applyRequest uo $ req + -- Override http-client's default decompression of gzip + -- compressed files. We want the unmodified file content. + { requestHeaders = (hAcceptEncoding, "identity") : + filter ((/= hAcceptEncoding) . fst) + (requestHeaders req) + , decompress = const False + } + + -- Resume download from where a previous download was interrupted, + -- when supported by the http server. The server may also opt to + -- send the whole file rather than resuming. + resumedownload sz = catchJust + (matchStatusCodeHeadersException (alreadydownloaded sz)) + dl + (const noop) + where + dl = runResourceT $ do + let req'' = req' { requestHeaders = resumeFromHeader sz : requestHeaders req } + liftIO $ debugM "url" (show req'') + resp <- http req'' (httpManager uo) + if responseStatus resp == partialContent206 + then store (BytesProcessed sz) AppendMode resp + else if responseStatus resp == ok200 + then store zeroBytesProcessed WriteMode resp + else respfailure resp + + alreadydownloaded sz s h = s == requestedRangeNotSatisfiable416 + && case lookup hContentRange h of + -- This could be improved by fixing + -- https://github.com/aristidb/http-types/issues/87 + Just crh -> crh == B8.fromString ("bytes */" ++ show sz) + -- Some http servers send no Content-Range header when + -- the range extends beyond the end of the file. + -- There is no way to distinguish between the file + -- being the same size on the http server, vs + -- it being shorter than the file we already have. + -- So assume we have the whole content of the file + -- already, the same as wget and curl do. + Nothing -> True + + store initialp mode resp = + sinkResponseFile meterupdate initialp file mode resp + + respfailure = giveup . B8.toString . statusMessage . responseStatus + {- Sinks a Response's body to a file. The file can either be opened in - WriteMode or AppendMode. Updates the meter as data is received. - From 7269851550f0b9a08f84328e9d14ddec7a51f9c1 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 4 Aug 2019 12:32:36 -0400 Subject: [PATCH 24/37] download from LFS working including resuming --- Remote/GitLFS.hs | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/Remote/GitLFS.hs b/Remote/GitLFS.hs index dcbdc439f9..28d92678aa 100644 --- a/Remote/GitLFS.hs +++ b/Remote/GitLFS.hs @@ -23,16 +23,19 @@ import Remote.Helper.Git import Remote.Helper.Http import Annex.Ssh import Annex.UUID -import Utility.SshHost -import qualified Utility.GitLFS as LFS +import Crypto import Backend.Hash import Utility.Hash -import Crypto +import Utility.Metered +import Utility.SshHost +import qualified Utility.GitLFS as LFS import Control.Concurrent.STM import Data.String -import Network.HTTP.Client hiding (port) import Network.HTTP.Types +import Network.HTTP.Client hiding (port) +import Network.HTTP.Conduit (http) +import Control.Monad.Trans.Resource (runResourceT) import System.Log.Logger import qualified Data.Map as M import qualified Data.ByteString.Lazy as L @@ -332,18 +335,17 @@ retrieve h = fileRetriever $ \dest k p -> getLFSEndpoint LFS.RequestDownload h > (tro:_) | LFS.resp_oid tro /= sha256 || LFS.resp_size tro /= size -> giveup "git-lfs server replied with other object than the one we requested" - | otherwise -> receive dest p tro + | otherwise -> go dest p tro where - receive dest p tro = case LFS.resp_error tro of + go dest p tro = case LFS.resp_error tro of Just err -> giveup $ T.unpack $ LFS.respobjerr_message err Nothing -> case LFS.resp_actions tro of Nothing -> giveup "git-lfs server did not provide a way to download this object" Just op -> case LFS.downloadOperationRequest op of Nothing -> giveup "unable to parse git-lfs server download url" - Just req -> - -- TODO stream to file - -- TODO resume and append if the file already exists - giveup "TODO" + Just req -> do + uo <- getUrlOptions + liftIO $ downloadConduit p req dest uo checkKey :: TVar LFSHandle -> CheckPresent checkKey h key = getLFSEndpoint LFS.RequestDownload h >>= \case From 9aab851a5596b56f637b8cd5786fe307f6fe18f5 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 4 Aug 2019 12:43:16 -0400 Subject: [PATCH 25/37] fix reversion lost check of resp_actions in b82ecf7076355bc38c63f4e0d2182c4896cdc693 --- Remote/GitLFS.hs | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Remote/GitLFS.hs b/Remote/GitLFS.hs index 28d92678aa..7c5605fbfc 100644 --- a/Remote/GitLFS.hs +++ b/Remote/GitLFS.hs @@ -379,6 +379,8 @@ checkKey h key = getLFSEndpoint LFS.RequestDownload h >>= \case case LFS.objects resp of [] -> return False (tro:_) + | isNothing (LFS.resp_actions tro) -> return False + | isJust (LFS.resp_error tro) -> return False | LFS.resp_oid tro /= sha256 || LFS.resp_size tro /= size -> giveup "git-lfs server replied with other object than the one we requested" | otherwise -> return True From 408cb0af3916e7d9efc8fa6323acea053b119c0f Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 4 Aug 2019 12:43:53 -0400 Subject: [PATCH 26/37] remove unused imports --- Remote/GitLFS.hs | 3 --- 1 file changed, 3 deletions(-) diff --git a/Remote/GitLFS.hs b/Remote/GitLFS.hs index 7c5605fbfc..02d83a4b67 100644 --- a/Remote/GitLFS.hs +++ b/Remote/GitLFS.hs @@ -26,7 +26,6 @@ import Annex.UUID import Crypto import Backend.Hash import Utility.Hash -import Utility.Metered import Utility.SshHost import qualified Utility.GitLFS as LFS @@ -34,8 +33,6 @@ import Control.Concurrent.STM import Data.String import Network.HTTP.Types import Network.HTTP.Client hiding (port) -import Network.HTTP.Conduit (http) -import Control.Monad.Trans.Resource (runResourceT) import System.Log.Logger import qualified Data.Map as M import qualified Data.ByteString.Lazy as L From c5ed11bc3fa44bb4d420e872351f79613f29b713 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 4 Aug 2019 13:15:33 -0400 Subject: [PATCH 27/37] documentation for git-lfs special remote --- doc/special_remotes.mdwn | 1 + doc/special_remotes/git-lfs.mdwn | 64 +++++++++++++++++++++++++++ doc/tips/storing_data_in_git-lfs.mdwn | 23 ++++++++++ 3 files changed, 88 insertions(+) create mode 100644 doc/special_remotes/git-lfs.mdwn create mode 100644 doc/tips/storing_data_in_git-lfs.mdwn diff --git a/doc/special_remotes.mdwn b/doc/special_remotes.mdwn index 0ae4346ed0..0203f5778f 100644 --- a/doc/special_remotes.mdwn +++ b/doc/special_remotes.mdwn @@ -15,6 +15,7 @@ the git history is not stored in them. * [[ddar]] * [[directory]] * [[gcrypt]] (encrypted git repositories!) +* [[git-lfs]] * [[hook]] * [[rclone]] * [[rsync]] diff --git a/doc/special_remotes/git-lfs.mdwn b/doc/special_remotes/git-lfs.mdwn new file mode 100644 index 0000000000..21e07fa223 --- /dev/null +++ b/doc/special_remotes/git-lfs.mdwn @@ -0,0 +1,64 @@ +git-annex has a special remote that lets it store content in git-lfs +repositories. + +See [[tips/storing_data_in_git-lfs]] for some examples of how to use this. + +## configuration + +These parameters can be passed to `git annex initremote` to configure +the git-lfs special remote: + +* `url` - Required. The url to the git-lfs repository to use. + Can be either a ssh url (scp-style is also accepted) or a http url. + But currently, a http url accesses the git-lfs repository without + authentication. To authenticate, you will need to use a ssh url. + + This parameter needs to be specified in the initial `git annex + initremote` but also each time you `git annex enableremote` + an existing git-lfs special remote. It's fine to use different urls + at different times as long as they point to the same git-lfs repository. + +* `encryption` - One of "none", "hybrid", "shared", or "pubkey". + Required. See [[encryption]]. + +* `keyid` - Specifies the gpg key to use for encryption of both the files + git-annex stores in the repository, as well as to encrypt the git + repository itself. May be repeated when multiple participants + should have access to the repository. + +## efficiency note + +Since git-lfs uses SHA256 checksums, git-annex needs to keep track of the +SHA256 of content stored in it, in order to be able to retrieve that +content. When a git-annex key uses a [[backend|backends]] +of SHA256 or SHA256E, that's easy. But, if a git-annex key uses some +other backend, git-annex has to additionally store the SHA256 checksum +into the git-annex branch when storing content in git-lfs. That adds a +small bit of size overhead to using this remote. + +## encryption notes + +The encryption= parameter only makes git-annex encrypt data stored on the +remote, `git push` can also be used with the remote (it is a git repository +after all), and data pushed to it with git will *not* be encrypted. + +When encrypting data sent to the git-lfs remote, git-annex always has to +store its SHA256 checksum in the git-annex branch. + +## limitations + +The git-lfs protocol does not support resuming uploads, and so an +interrupted upload will have to restart from the beginning. Interrupted +downloads will resume. + +git-lfs has a concept of git ref based access control, so a user may only +be able to send content associated with a particular git ref. git-annex +does not currently provide any git ref, so won't work with a git-lfs server +that uses that. + +git-annex only supports the "basic" git-lfs transfer adapter, but that's +the one used by most git-lfs servers. + +The git-lfs protocol is designed around batching of transfers, but +git-annex doesn't do batching. This may cause it to fall afoul of +rate limiting of git-lfs servers when transferring a lot of files. diff --git a/doc/tips/storing_data_in_git-lfs.mdwn b/doc/tips/storing_data_in_git-lfs.mdwn new file mode 100644 index 0000000000..d6ab8827ef --- /dev/null +++ b/doc/tips/storing_data_in_git-lfs.mdwn @@ -0,0 +1,23 @@ +git-annex can store data in [git-lfs](https://git-lfs.github.com/) +repositories, using the [[git-lfs special remote|special_remotes/git-lfs]]. + +Here's how to initialize a git-lfs special remote on Github. + + git annex initremote lfs type=git-lfs encryption=none url=git@github.com:yourname/yourrepo.git + +If you want git-annex to encrypt the objects it stores in the remote, +change the encryption= parameter. But be sure to read the +[[git-lfs special remote|special_remotes/git-lfs]] page's +**encryption notes** first! + +To enable the same remote in another clone of the repository, +you'll need to provide an url to it again. It's ok to provide a different +url as long as it points to the same git-lfs repository. + + git annex enableremote lfs url=https://github.com/yourname/yourrepo.git + +Note that http urls currently only allow read access to the git-lfs +repository. + +A git-lfs special remote also functions as a regular git remote. You can +use things like `git push` and `git pull` with it. From 8460bbcea9b9b361f30237c62577a96637c01911 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 4 Aug 2019 13:16:59 -0400 Subject: [PATCH 28/37] note on git-lfs program --- doc/tips/storing_data_in_git-lfs.mdwn | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/tips/storing_data_in_git-lfs.mdwn b/doc/tips/storing_data_in_git-lfs.mdwn index d6ab8827ef..8600941da7 100644 --- a/doc/tips/storing_data_in_git-lfs.mdwn +++ b/doc/tips/storing_data_in_git-lfs.mdwn @@ -1,6 +1,9 @@ git-annex can store data in [git-lfs](https://git-lfs.github.com/) repositories, using the [[git-lfs special remote|special_remotes/git-lfs]]. +You do not need the git-lfs program installed to use it, just a recent +enough version of git-annex. + Here's how to initialize a git-lfs special remote on Github. git annex initremote lfs type=git-lfs encryption=none url=git@github.com:yourname/yourrepo.git From f5eb28682ad0e80141bebd8c6731487b05c3760f Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 4 Aug 2019 13:59:24 -0400 Subject: [PATCH 29/37] expand --- Remote/GitLFS.hs | 5 +++++ doc/special_remotes/git-lfs.mdwn | 3 +++ doc/tips/storing_data_in_git-lfs.mdwn | 9 +++++++++ 3 files changed, 17 insertions(+) diff --git a/Remote/GitLFS.hs b/Remote/GitLFS.hs index 02d83a4b67..d9617058eb 100644 --- a/Remote/GitLFS.hs +++ b/Remote/GitLFS.hs @@ -107,6 +107,11 @@ mySetup _ mu _ c gc = do let repo = fromMaybe (giveup "Specify url=") $ M.lookup "url" c + -- TODO: don't allow using encryption w/o the user indicating they + -- know it will only encrypt git-annex objects, not git pushes + -- TODO: don't allow using encryption=shared w/o the user + -- indicating that pushing to the git-lfs remote will expose the + -- encrypted data. (c', _encsetup) <- encryptionSetup c gc -- The repo is not stored in the remote log, because the same diff --git a/doc/special_remotes/git-lfs.mdwn b/doc/special_remotes/git-lfs.mdwn index 21e07fa223..515720c715 100644 --- a/doc/special_remotes/git-lfs.mdwn +++ b/doc/special_remotes/git-lfs.mdwn @@ -47,6 +47,9 @@ store its SHA256 checksum in the git-annex branch. ## limitations +The git-lfs protocol does not support deleting content, so git-annex +**cannot delete anything** from a git-lfs special remote. + The git-lfs protocol does not support resuming uploads, and so an interrupted upload will have to restart from the beginning. Interrupted downloads will resume. diff --git a/doc/tips/storing_data_in_git-lfs.mdwn b/doc/tips/storing_data_in_git-lfs.mdwn index 8600941da7..83945059b3 100644 --- a/doc/tips/storing_data_in_git-lfs.mdwn +++ b/doc/tips/storing_data_in_git-lfs.mdwn @@ -22,5 +22,14 @@ url as long as it points to the same git-lfs repository. Note that http urls currently only allow read access to the git-lfs repository. +Once the remote is set up, you git-annex can store and retrieve content in +the usual ways: + + git annex copy * --to lfs + git annex get --from lfs + +But, git-annex **cannot delete anything** from a git-lfs special remote, +because the protocol does not support deletion. + A git-lfs special remote also functions as a regular git remote. You can use things like `git push` and `git pull` with it. From 19defc793292073ff35644b1a84d77703986448b Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 4 Aug 2019 14:32:06 -0400 Subject: [PATCH 30/37] fix reversion 4af55c42bfb3e8adcd103a8deb82310753a3b047 reordered the exception catching, preventing following ftp redirect --- Utility/Url.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Utility/Url.hs b/Utility/Url.hs index a038bb68a7..4ab79e5a76 100644 --- a/Utility/Url.hs +++ b/Utility/Url.hs @@ -336,9 +336,9 @@ download' noerror meterupdate url file uo = case (urlDownloader uo, parseUrlRequest (show u)) of (DownloadWithConduit (DownloadWithCurlRestricted r), Just req) -> catchJust (matchStatusCodeException (== found302)) - ((downloadConduit meterupdate req file uo >> return True) - `catchNonAsync` (dlfailed . show)) + (downloadConduit meterupdate req file uo >> return True) (followredir r) + `catchNonAsync` (dlfailed . show) (DownloadWithConduit (DownloadWithCurlRestricted r), Nothing) | isfileurl u -> downloadfile u | isftpurl u -> downloadcurlrestricted r u url ftpport From bf7da09cf6694c54a974102ed25ce5d28c64256d Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 5 Aug 2019 10:36:46 -0400 Subject: [PATCH 31/37] update --- doc/thanks/list | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/doc/thanks/list b/doc/thanks/list index 6b58c7273b..05eb8d3a65 100644 --- a/doc/thanks/list +++ b/doc/thanks/list @@ -60,3 +60,9 @@ Caleb Allen, TD, Pedro Araújo, +Ryan Newton, +David W, +L N D, +EVAN HENSHAWPLATH, +James Read, +Luke Shumaker, From 922434bccccbe8fc36058516372bdab3e399f59b Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 5 Aug 2019 10:37:19 -0400 Subject: [PATCH 32/37] remove extra blank line --- doc/thanks/list | 1 - 1 file changed, 1 deletion(-) diff --git a/doc/thanks/list b/doc/thanks/list index 05eb8d3a65..f7ce6507eb 100644 --- a/doc/thanks/list +++ b/doc/thanks/list @@ -59,7 +59,6 @@ Walltime, Caleb Allen, TD, Pedro Araújo, - Ryan Newton, David W, L N D, From 87e9ed38b8715e71e04eb13e7d7bd1066059911a Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 5 Aug 2019 10:43:51 -0400 Subject: [PATCH 33/37] expand encryption warning --- doc/special_remotes/git-lfs.mdwn | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/doc/special_remotes/git-lfs.mdwn b/doc/special_remotes/git-lfs.mdwn index 515720c715..c476185f9a 100644 --- a/doc/special_remotes/git-lfs.mdwn +++ b/doc/special_remotes/git-lfs.mdwn @@ -19,7 +19,7 @@ the git-lfs special remote: at different times as long as they point to the same git-lfs repository. * `encryption` - One of "none", "hybrid", "shared", or "pubkey". - Required. See [[encryption]]. + Required. See [[encryption]]. Also see the encryption notes below. * `keyid` - Specifies the gpg key to use for encryption of both the files git-annex stores in the repository, as well as to encrypt the git @@ -36,14 +36,19 @@ other backend, git-annex has to additionally store the SHA256 checksum into the git-annex branch when storing content in git-lfs. That adds a small bit of size overhead to using this remote. +When encrypting data sent to the git-lfs remote, git-annex always has to +store its SHA256 checksum in the git-annex branch. + ## encryption notes The encryption= parameter only makes git-annex encrypt data stored on the -remote, `git push` can also be used with the remote (it is a git repository +remote. `git push` can also be used with the remote (it is a git repository after all), and data pushed to it with git will *not* be encrypted. -When encrypting data sent to the git-lfs remote, git-annex always has to -store its SHA256 checksum in the git-annex branch. +This makes using encryption=shared with a git-lfs special remote very +unlikely to be secure, because the encryption key is committed to the git +repository. It would only make sense if you never pushed it to the +remote, or trusted the remote's host to keep it secure. ## limitations From ecf7f34c23107cfafa32600bade2d14fd9f398e9 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 5 Aug 2019 11:05:59 -0400 Subject: [PATCH 34/37] remember sha256 and size when necessary Using Logs.RemoteState for this means that if the same key gets uploaded twice to a git-lfs remote, but somehow has different content the two times (eg it's an URL key with non-stable content), the sha256/size of the newer content uploaded will overwrite what was remembered before. That seems ok; it just means that git-annex will request the newer version of the content when downloading from git-lfs. It will remember the sha256 and size if both are not known, or if only the sha256 is not known but the size is known, it only remembers the sha256, to avoid wasting space on the size. I did not add special case for when the sha256 is known and the size is not, because it's been a long time since git-annex created SHA256 keys without a size. (See doc/upgrades/SHA_size.mdwn) --- Remote/GitLFS.hs | 114 ++++++++++++++++++++++++++++------------------- 1 file changed, 69 insertions(+), 45 deletions(-) diff --git a/Remote/GitLFS.hs b/Remote/GitLFS.hs index d9617058eb..bdacf59489 100644 --- a/Remote/GitLFS.hs +++ b/Remote/GitLFS.hs @@ -27,6 +27,7 @@ import Crypto import Backend.Hash import Utility.Hash import Utility.SshHost +import Logs.RemoteState import qualified Utility.GitLFS as LFS import Control.Concurrent.STM @@ -56,10 +57,10 @@ gen r u c gc = do h <- liftIO $ newTVarIO $ LFSHandle Nothing Nothing r gc cst <- remoteCost gc expensiveRemoteCost return $ Just $ specialRemote' specialcfg c - (simplyPrepare $ store h) - (simplyPrepare $ retrieve h) + (simplyPrepare $ store u h) + (simplyPrepare $ retrieve u h) (simplyPrepare $ remove h) - (simplyPrepare $ checkKey h) + (simplyPrepare $ checkKey u h) (this cst) where this cst = Remote @@ -251,43 +252,22 @@ extractKeySize k | isEncKey k = Nothing | otherwise = keySize k -mkDownloadRequest :: Key -> Annex (Maybe (LFS.TransferRequest, LFS.SHA256, Integer)) -mkDownloadRequest k = case (extractKeySha256 k, extractKeySize k) of - (Just sha256, Just sz) -> go sha256 sz - -- TODO get from git-annex branch - _ -> return Nothing +mkUploadRequest :: UUID -> Key -> FilePath -> Annex (LFS.TransferRequest, LFS.SHA256, Integer) +mkUploadRequest u k content = case (extractKeySha256 k, extractKeySize k) of + (Just sha256, Just size) -> + ret sha256 size + (_, Just size) -> do + sha256 <- calcsha256 + remembersha256 sha256 + ret sha256 size + _ -> do + sha256 <- calcsha256 + size <- liftIO $ getFileSize content + rememberboth sha256 size + ret sha256 size where - go sha256 sz = do - let obj = LFS.TransferRequestObject - { LFS.req_oid = sha256 - , LFS.req_size = sz - } - let req = LFS.TransferRequest - { LFS.req_operation = LFS.RequestDownload - , LFS.req_transfers = [LFS.Basic] - , LFS.req_ref = Nothing - , LFS.req_objects = [obj] - } - return $ Just (req, sha256, sz) - -store :: TVar LFSHandle -> Storer -store h = fileStorer $ \k src p -> getLFSEndpoint LFS.RequestUpload h >>= \case - Nothing -> return False - Just endpoint -> flip catchNonAsync failederr $ do - sha256 <- case extractKeySha256 k of - Just sha -> pure sha - Nothing -> do - sha <- liftIO $ - show . sha2_256 <$> L.readFile src - -- TODO: rmemeber the sha256 for this key, - -- to use when retrieving it. - return (T.pack sha) - size <- case extractKeySize k of - Just size -> pure size - Nothing -> do - -- TODO: remember the size of this key, - -- to use when retrieving it. - liftIO $ getFileSize src + calcsha256 = liftIO $ T.pack . show . sha2_256 <$> L.readFile content + ret sha256 size = do let obj = LFS.TransferRequestObject { LFS.req_oid = sha256 , LFS.req_size = size @@ -298,6 +278,50 @@ store h = fileStorer $ \k src p -> getLFSEndpoint LFS.RequestUpload h >>= \case , LFS.req_ref = Nothing , LFS.req_objects = [obj] } + return (req, sha256, size) + + remembersha256 sha256 = setRemoteState u k (T.unpack sha256) + rememberboth sha256 size = setRemoteState u k $ + show size ++ " " ++ T.unpack sha256 + +mkDownloadRequest :: UUID -> Key -> Annex (Maybe (LFS.TransferRequest, LFS.SHA256, Integer)) +mkDownloadRequest u k = case (extractKeySha256 k, extractKeySize k) of + (Just sha256, Just size) -> ret sha256 size + (_, Just size) -> + remembersha256 >>= \case + Just sha256 -> ret sha256 size + Nothing -> return Nothing + _ -> do + rememberboth >>= \case + Just (sha256, size) -> ret sha256 size + Nothing -> return Nothing + where + ret sha256 size = do + let obj = LFS.TransferRequestObject + { LFS.req_oid = sha256 + , LFS.req_size = size + } + let req = LFS.TransferRequest + { LFS.req_operation = LFS.RequestDownload + , LFS.req_transfers = [LFS.Basic] + , LFS.req_ref = Nothing + , LFS.req_objects = [obj] + } + return $ Just (req, sha256, size) + remembersha256 = fmap T.pack <$> getRemoteState u k + rememberboth = maybe Nothing parse <$> getRemoteState u k + where + parse s = case words s of + [ssize, ssha256] -> do + size <- readish ssize + return (T.pack ssha256, size) + _ -> Nothing + +store :: UUID -> TVar LFSHandle -> Storer +store u h = fileStorer $ \k src p -> getLFSEndpoint LFS.RequestUpload h >>= \case + Nothing -> return False + Just endpoint -> flip catchNonAsync failederr $ do + (req, sha256, size) <- mkUploadRequest u k src sendTransferRequest req endpoint >>= \case Left err -> do warning err @@ -325,10 +349,10 @@ store h = fileStorer $ \k src p -> getLFSEndpoint LFS.RequestUpload h >>= \case warning (show e) return False -retrieve :: TVar LFSHandle -> Retriever -retrieve h = fileRetriever $ \dest k p -> getLFSEndpoint LFS.RequestDownload h >>= \case +retrieve :: UUID -> TVar LFSHandle -> Retriever +retrieve u h = fileRetriever $ \dest k p -> getLFSEndpoint LFS.RequestDownload h >>= \case Nothing -> giveup "unable to connect to git-lfs endpoint" - Just endpoint -> mkDownloadRequest k >>= \case + Just endpoint -> mkDownloadRequest u k >>= \case Nothing -> giveup "unable to download this object from git-lfs" Just (req, sha256, size) -> sendTransferRequest req endpoint >>= \case Left err -> giveup (show err) @@ -349,10 +373,10 @@ retrieve h = fileRetriever $ \dest k p -> getLFSEndpoint LFS.RequestDownload h > uo <- getUrlOptions liftIO $ downloadConduit p req dest uo -checkKey :: TVar LFSHandle -> CheckPresent -checkKey h key = getLFSEndpoint LFS.RequestDownload h >>= \case +checkKey :: UUID -> TVar LFSHandle -> CheckPresent +checkKey u h key = getLFSEndpoint LFS.RequestDownload h >>= \case Nothing -> giveup "unable to connect to git-lfs endpoint" - Just endpoint -> mkDownloadRequest key >>= \case + Just endpoint -> mkDownloadRequest u key >>= \case -- Unable to find enough information to request the key -- from git-lfs, so it's not present there. Nothing -> return False From 3f450f0f4a063d77c19e6ac95223073bc86cd849 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 5 Aug 2019 11:29:32 -0400 Subject: [PATCH 35/37] add encryption warning --- Remote/GitLFS.hs | 21 ++++++++++++++++----- doc/special_remotes/git-lfs.mdwn | 9 +++++---- 2 files changed, 21 insertions(+), 9 deletions(-) diff --git a/Remote/GitLFS.hs b/Remote/GitLFS.hs index bdacf59489..60b3567616 100644 --- a/Remote/GitLFS.hs +++ b/Remote/GitLFS.hs @@ -12,6 +12,7 @@ import Types.Remote import Annex.Url import Types.Key import Types.Creds +import qualified Annex import qualified Git import qualified Git.Types as Git import qualified Git.Url @@ -108,11 +109,21 @@ mySetup _ mu _ c gc = do let repo = fromMaybe (giveup "Specify url=") $ M.lookup "url" c - -- TODO: don't allow using encryption w/o the user indicating they - -- know it will only encrypt git-annex objects, not git pushes - -- TODO: don't allow using encryption=shared w/o the user - -- indicating that pushing to the git-lfs remote will expose the - -- encrypted data. + + when (isEncrypted c) $ + unlessM (Annex.getState Annex.force) $ + giveup $ unwords $ + [ "You asked that encryption be enabled for" + , "this remote, but only the files that" + , "git-annex stores on it would be encrypted;" + , "anything that git push sends to it would" + , "not be encrypted. Even encryption=shared" + , "encryption keys will be stored on the" + , "remote for anyone who can access it to" + , "see." + , "(Use --force if you want to use this" + , "likely insecure configuration.)" + ] (c', _encsetup) <- encryptionSetup c gc -- The repo is not stored in the remote log, because the same diff --git a/doc/special_remotes/git-lfs.mdwn b/doc/special_remotes/git-lfs.mdwn index c476185f9a..ffb0e7411a 100644 --- a/doc/special_remotes/git-lfs.mdwn +++ b/doc/special_remotes/git-lfs.mdwn @@ -41,11 +41,12 @@ store its SHA256 checksum in the git-annex branch. ## encryption notes -The encryption= parameter only makes git-annex encrypt data stored on the -remote. `git push` can also be used with the remote (it is a git repository -after all), and data pushed to it with git will *not* be encrypted. +The encryption= parameter only makes git-annex encrypt data it stores +on the remote. `git push` can also be used with the remote +(it is a git repository after all), and data pushed to it with +git will *not* be encrypted. -This makes using encryption=shared with a git-lfs special remote very +Using encryption=shared with a git-lfs special remote is especially unlikely to be secure, because the encryption key is committed to the git repository. It would only make sense if you never pushed it to the remote, or trusted the remote's host to keep it secure. From 8401b09e32b9378fa4c02515255a743d8f0ad4b1 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 5 Aug 2019 12:41:05 -0400 Subject: [PATCH 36/37] Allow setting up a gcrypt special remote with encryption=shared It was documented to work, but seems it has been broken for a while/forever. --- CHANGELOG | 1 + Remote/GCrypt.hs | 20 +++++++++++--------- 2 files changed, 12 insertions(+), 9 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index ae4c2e96ad..2781a67ba8 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -3,6 +3,7 @@ git-annex (7.20190731) UNRELEASED; urgency=medium * Use the same optimisation for --in=here as has always been used for --in=. rather than the slow code path that unncessarily queries the git-annex branch. + * Allow setting up a gcrypt special remote with encryption=shared. -- Joey Hess Thu, 01 Aug 2019 00:11:56 -0400 diff --git a/Remote/GCrypt.hs b/Remote/GCrypt.hs index 22a88cd6b9..83ab5c3b8a 100644 --- a/Remote/GCrypt.hs +++ b/Remote/GCrypt.hs @@ -318,16 +318,18 @@ shellOrRsync r ashell arsync setGcryptEncryption :: RemoteConfig -> String -> Annex () setGcryptEncryption c remotename = do let participants = remoteconfig Git.GCrypt.remoteParticipantConfigKey - case cipherKeyIds =<< extractCipher c of + case extractCipher c of Nothing -> noCrypto - Just (KeyIds { keyIds = ks}) -> do - setConfig participants (unwords ks) - let signingkey = ConfigKey $ Git.GCrypt.remoteSigningKey remotename - cmd <- gpgCmd <$> Annex.getGitConfig - skeys <- M.keys <$> liftIO (secretKeys cmd) - case filter (`elem` ks) skeys of - [] -> noop - (k:_) -> setConfig signingkey k + Just cip -> case cipherKeyIds cip of + Nothing -> noop + Just (KeyIds { keyIds = ks}) -> do + setConfig participants (unwords ks) + let signingkey = ConfigKey $ Git.GCrypt.remoteSigningKey remotename + cmd <- gpgCmd <$> Annex.getGitConfig + skeys <- M.keys <$> liftIO (secretKeys cmd) + case filter (`elem` ks) skeys of + [] -> noop + (k:_) -> setConfig signingkey k setConfig (remoteconfig Git.GCrypt.remotePublishParticipantConfigKey) (Git.Config.boolConfig True) where From fb7d92457fa20d9a1608fc45161654e6b3d6bd9a Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 5 Aug 2019 13:24:21 -0400 Subject: [PATCH 37/37] support using gcrypt with git-lfs special remote --- CHANGELOG | 5 ++ Remote/GCrypt.hs | 1 + Remote/Git.hs | 4 +- Remote/GitLFS.hs | 52 +++++++++----- doc/special_remotes/gcrypt.mdwn | 15 ++-- doc/special_remotes/git-lfs.mdwn | 48 ++++++++++--- ...ncrypted_git_repositories_with_gcrypt.mdwn | 72 +++++++++++++------ doc/tips/storing_data_in_git-lfs.mdwn | 29 ++++---- 8 files changed, 156 insertions(+), 70 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 2781a67ba8..2aa6630d00 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,10 @@ git-annex (7.20190731) UNRELEASED; urgency=medium + * New git-lfs special remote, which can be used to store data on any git-lfs + server, including github, gitlab, and gogs. + * Support fully encrypting all data sent to a git-lfs special remote, + using a combination of gcrypt to encrypt the git data, and git-annex's + encryption of its data. * Use the same optimisation for --in=here as has always been used for --in=. rather than the slow code path that unncessarily queries the git-annex branch. diff --git a/Remote/GCrypt.hs b/Remote/GCrypt.hs index 83ab5c3b8a..931a1491f3 100644 --- a/Remote/GCrypt.hs +++ b/Remote/GCrypt.hs @@ -12,6 +12,7 @@ module Remote.GCrypt ( coreGCryptId, setupRepo, accessShellConfig, + setGcryptEncryption, ) where import qualified Data.Map as M diff --git a/Remote/Git.hs b/Remote/Git.hs index 6e9af5dd5f..e7ed224047 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -144,8 +144,10 @@ configRead autoinit r = do gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) gen r u c gc - | Git.GCrypt.isEncrypted r = Remote.GCrypt.chainGen r u c gc + -- Remote.GitLFS may be used with a repo that is also encrypted + -- with gcrypt so is checked first. | remoteAnnexGitLFS gc = Remote.GitLFS.gen r u c gc + | Git.GCrypt.isEncrypted r = Remote.GCrypt.chainGen r u c gc | otherwise = case repoP2PAddress r of Nothing -> do st <- mkState r u gc diff --git a/Remote/GitLFS.hs b/Remote/GitLFS.hs index 60b3567616..4765d2fddb 100644 --- a/Remote/GitLFS.hs +++ b/Remote/GitLFS.hs @@ -16,12 +16,14 @@ import qualified Annex import qualified Git import qualified Git.Types as Git import qualified Git.Url +import qualified Git.GCrypt import Config import Config.Cost import Remote.Helper.Special import Remote.Helper.ExportImport import Remote.Helper.Git import Remote.Helper.Http +import qualified Remote.GCrypt import Annex.Ssh import Annex.UUID import Crypto @@ -55,7 +57,14 @@ remote = RemoteType gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) gen r u c gc = do - h <- liftIO $ newTVarIO $ LFSHandle Nothing Nothing r gc + -- If the repo uses gcrypt, get the underlaying repo without the + -- gcrypt url, to do LFS endpoint discovery on. + r' <- if Git.GCrypt.isEncrypted r + then do + g <- Annex.gitRepo + liftIO $ Git.GCrypt.encryptedRemote g r + else pure r + h <- liftIO $ newTVarIO $ LFSHandle Nothing Nothing r' gc cst <- remoteCost gc expensiveRemoteCost return $ Just $ specialRemote' specialcfg c (simplyPrepare $ store u h) @@ -107,36 +116,45 @@ mySetup :: SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteG mySetup _ mu _ c gc = do u <- maybe (liftIO genUUID) return mu - let repo = fromMaybe (giveup "Specify url=") $ - M.lookup "url" c - - when (isEncrypted c) $ - unlessM (Annex.getState Annex.force) $ + (c', _encsetup) <- encryptionSetup c gc + case (isEncrypted c', Git.GCrypt.urlPrefix `isPrefixOf` url) of + (False, False) -> noop + (True, True) -> Remote.GCrypt.setGcryptEncryption c' remotename + (True, False) -> unlessM (Annex.getState Annex.force) $ giveup $ unwords $ - [ "You asked that encryption be enabled for" - , "this remote, but only the files that" - , "git-annex stores on it would be encrypted;" + [ "Encryption is enabled for this remote," + , "but only the files that git-annex stores on" + , "it would be encrypted; " , "anything that git push sends to it would" - , "not be encrypted. Even encryption=shared" - , "encryption keys will be stored on the" - , "remote for anyone who can access it to" - , "see." + , "not be encrypted. Recommend prefixing the" + , "url with \"gcrypt::\" to also encrypt" + , "git pushes." + , "(Use --force if you want to use this" + , "likely insecure configuration.)" + ] + (False, True) -> unlessM (Annex.getState Annex.force) $ + giveup $ unwords $ + [ "You used a \"gcrypt::\" url for this remote," + , "but encryption=none prevents git-annex" + , "from encrypting files it stores there." , "(Use --force if you want to use this" , "likely insecure configuration.)" ] - (c', _encsetup) <- encryptionSetup c gc - -- The repo is not stored in the remote log, because the same + -- The url is not stored in the remote log, because the same -- git-lfs repo can be accessed using different urls by different -- people (eg over ssh or http). -- -- Instead, set up remote.name.url to point to the repo, -- (so it's also usable by git as a non-special remote), -- and set remote.name.git-lfs = true - let c'' = M.delete "repo" c' + let c'' = M.delete "url" c' gitConfigSpecialRemote u c'' [("git-lfs", "true")] - setConfig (ConfigKey ("remote." ++ getRemoteName c ++ ".url")) repo + setConfig (ConfigKey ("remote." ++ getRemoteName c ++ ".url")) url return (c'', u) + where + url = fromMaybe (giveup "Specify url=") (M.lookup "url" c) + remotename = fromJust (M.lookup "name" c) data LFSHandle = LFSHandle { downloadEndpoint :: Maybe LFS.Endpoint diff --git a/doc/special_remotes/gcrypt.mdwn b/doc/special_remotes/gcrypt.mdwn index 5807c9e5f2..2842e43303 100644 --- a/doc/special_remotes/gcrypt.mdwn +++ b/doc/special_remotes/gcrypt.mdwn @@ -4,6 +4,12 @@ remote allows git-annex to also store its files in such repositories. Naturally, git-annex encrypts the files it stores too, so everything stored on the remote is encrypted. +This special remote needs the server hosting the remote repository +to either have git-annex-shell or rsync accessible via ssh. git-annex +uses those to store its content in the remote. If the remote repository +is instead hosted on a server using git-lfs, you can use the [[git-lfs]] +special remote instead of this one; it also supports using gcrypt. + See [[tips/fully_encrypted_git_repositories_with_gcrypt]] for some examples of using gcrypt. @@ -35,11 +41,12 @@ shell access, and `rsync` must be installed. Those are the minimum requirements, but it's also recommended to install git-annex on the remote server, so that [[git-annex-shell]] can be used. -While you can use git-remote-gcrypt with servers like github, git-annex -can't store files on them. In such a case, you can just use -git-remote-gcrypt directly. +If you can't run `rsync` or `git-annex-shell` on the remote server, +you can't use this special remote. Other options are the [[git-lfs]] +special remote, which can also be combined with gcrypt, or +using git-remote-gcrypt to encrypt a remote that git-annex cannot use. -If you use encryption=hybrid, you can add more gpg keys that can access +If you use encryption=hybrid, you can later add more gpg keys that can access the files git-annex stored in the gcrypt repository. However, due to the way git-remote-gcrypt encrypts the git repository, you will need to somehow force it to re-push everything again, so that the encrypted repository can diff --git a/doc/special_remotes/git-lfs.mdwn b/doc/special_remotes/git-lfs.mdwn index ffb0e7411a..e48a76cf4f 100644 --- a/doc/special_remotes/git-lfs.mdwn +++ b/doc/special_remotes/git-lfs.mdwn @@ -23,8 +23,8 @@ the git-lfs special remote: * `keyid` - Specifies the gpg key to use for encryption of both the files git-annex stores in the repository, as well as to encrypt the git - repository itself. May be repeated when multiple participants - should have access to the repository. + repository itself when using gcrypt. May be repeated when + multiple participants should have access to the repository. ## efficiency note @@ -41,15 +41,43 @@ store its SHA256 checksum in the git-annex branch. ## encryption notes -The encryption= parameter only makes git-annex encrypt data it stores -on the remote. `git push` can also be used with the remote -(it is a git repository after all), and data pushed to it with -git will *not* be encrypted. +To encrypt a git-lfs repository, there are two separate things that +have to be encrypted: the data git-annex stores there, and the content +of the git repository itself. After all, a git-lfs remote is a git remote +and git push doesn't encrypt data by default. -Using encryption=shared with a git-lfs special remote is especially -unlikely to be secure, because the encryption key is committed to the git -repository. It would only make sense if you never pushed it to the -remote, or trusted the remote's host to keep it secure. +To encrypt your git pushes, you can use +[git-remote-gcrypt](https://spwhitton.name/tech/code/git-remote-gcrypt/) +and prefix the repository url with "gcrypt::" + +To make git-annex encrypt the data it stores, you can use the encrption= +configuration. + +An example of combining the two: + + git annex initremote lfstest type=git-lfs url=gcrypt::git@github.com:username/somerepo.git encryption=shared + +In that example, the git-annex shared encryption key is stored in +git, but that's ok because git push will encrypt it, along with all +the other git data, using your gpg key. You could instead use +"encryption=shared keyid=" to make git-annex and gcrypt both encrypt +to a specified gpg key. + +git-annex will detect if one part of the repository is encrypted, +but you forgot to encrypt the other part, and will refuse to set up +such an insecure half-encrypted repository. + +If you use encryption=hybrid, you can later add more gpg keys that can access +the files git-annex stored in the git-lfs repository. However, due to the +way git-remote-gcrypt encrypts the git repository, you will need to somehow +force it to re-push everything again, so that the encrypted repository can +be decrypted by the added keys. Probably this can be done by setting +`GCRYPT_FULL_REPACK` and doing a forced push of branches. + +git-annex will set `remote.`gcrypt-publish-participants` when setting +up a repository that uses gcrypt. This is done to avoid unncessary gpg +passphrase prompts, but it does publish the gpg keyids that can decrypt the +repository. Unset it if you need to obscure that. ## limitations diff --git a/doc/tips/fully_encrypted_git_repositories_with_gcrypt.mdwn b/doc/tips/fully_encrypted_git_repositories_with_gcrypt.mdwn index 2df15f193f..1847a6fb8c 100644 --- a/doc/tips/fully_encrypted_git_repositories_with_gcrypt.mdwn +++ b/doc/tips/fully_encrypted_git_repositories_with_gcrypt.mdwn @@ -1,8 +1,7 @@ [git-remote-gcrypt](https://spwhitton.name/tech/code/git-remote-gcrypt/) -adds support for encrypted remotes to git. The git-annex -[[gcrypt special remote|special_remotes/gcrypt]] allows git-annex to -also store its files in such repositories. Naturally, git-annex encrypts -the files it stores too, so everything stored on the remote is encrypted. +adds support for encrypted remotes to git. Combine this with git-annex +encrypting the files it stores in a remote, and you can fully encrypt +all the data stored on a remote. Here are some ways you can use this awesome stuff.. @@ -15,7 +14,12 @@ repositories. ## prerequisites * Install [git-remote-gcrypt](https://spwhitton.name/tech/code/git-remote-gcrypt/) -* Install git-annex version 4.20130909 or newer. + +* Set up a gpg key. You might consider generating a special purpose key + just for this use case, since you may end up wanting to put the key + on multiple machines that you would not trust with your main gpg key. + + The examples below use "$mykey" where you should put your gpg keyid. ## encrypted backup drive @@ -24,18 +28,18 @@ both the full contents of your git repository, and all the files you instruct git-annex to store on it, and everything will be encrypted so that only you can see it. -First, you need to set up a gpg key. You might consider generating a -special purpose key just for this use case, since you may end up wanting to -put the key on multiple machines that you would not trust with your -main gpg key. - -You need to tell git-annex the keyid of the key when setting up the -encrypted repository: +Here's how to set up the encrypted repository: git init --bare /mnt/encryptedbackup git annex initremote encryptedbackup type=gcrypt gitrepo=/mnt/encryptedbackup keyid=$mykey git annex sync encryptedbackup +(Remember to replace "$mykey" with the keyid of your gpg key.) + +This uses the [[gcrypt special remote|special_remotes/gcrypt]] to encrypt +pushes to the git remote, and git-annex will also encrypt the files it +stores there. + Now you can copy (or even move) files to the repository. After sending files to it, you'll probably want to do a sync, which pushes the git repository changes to it as well. @@ -62,23 +66,25 @@ the gpg key used to encrypt it, and then: ## encrypted git-annex repository on a ssh server -If you have a ssh server that has rsync installed, you can set up an -encrypted repository there. Works just like the encrypted drive except -without the cable. +If you have a ssh server that has git-annex or rsync installed on it, you +can set up an encrypted repository there. Works just like the encrypted +drive except without the cable. First, on the server, run: git init --bare encryptedrepo -(Also, install git-annex on the server if it's possible & easy to do so. -While this will work without git-annex being installed on the server, it -is recommended to have it installed.) - Now, in your existing git-annex repository, set up the encrypted remote: git annex initremote encryptedrepo type=gcrypt gitrepo=ssh://my.server/home/me/encryptedrepo keyid=$mykey git annex sync encryptedrepo +(Remember to replace "$mykey" with the keyid of your gpg key.) + +This uses the [[gcrypt special remote|special_remotes/gcrypt]] to encrypt +pushes to the git remote, and git-annex will also encrypt the files it +stores there. + If you're going to be sharing this repository with others, be sure to also include their keyids, by specifying keyid= repeatedly. @@ -97,11 +103,31 @@ used to encrypt it can check it out: git annex enableremote encryptedrepo gitrepo=ssh://my.server/home/me/encryptedrepo git annex get --from encryptedrepo -## private encrypted git remote on hosting site +## private encrypted git remote on a git-lfs hosting site + +Some git repository hosting sites do not support git-annex, but do support +the similar git-lfs for storing large files alongside a git repository. +git-annex can use the git-lfs protocol to store files in such repositories, +and with gcrypt, everything stored in the remote can be encrypted. + +First, make a new, empty git repository on the hosting site. +Get the ssh clone url for the repository, which might look +like "git@github.com:username/somerepo.git" + +Then, in your git-annex repository, set up the encrypted remote: + + git annex initremote lfstest type=git-lfs url=gcrypt::git@github.com:username/somerepo.git keyid=$mykey + +(Remember to replace "$mykey" with the keyid of your gpg key.) + +This uses the [[git-lfs special remote|special_remotes/git-lfs]], and the +`gcrypt::` prefix on the url makes pushes be encrypted with gcrypt. + +## private encrypted git remote on a git hosting site You can use gcrypt to store your git repository in encrypted form on any -hosting site that supports git. Only you can decrypt its contents. -Using it this way, git-annex does not store large files on the hosting site; it's +hosting site that supports git. Only you can decrypt its contents. Using it +this way, git-annex does not store large files on the hosting site; it's only used to store your git repository itself. git remote add encrypted gcrypt::ssh://hostingsite/myrepo.git @@ -115,7 +141,7 @@ url you used when setting it up: git clone gcrypt::ssh://hostingsite/myrepo.git -## multiuser encrypted git remote on hosting site +## multiuser encrypted git remote on a git hosting site Suppose two users want to share an encrypted git remote. Both of you need to set up the remote, and configure gcrypt to encrypt it so that both diff --git a/doc/tips/storing_data_in_git-lfs.mdwn b/doc/tips/storing_data_in_git-lfs.mdwn index 83945059b3..38779cfc64 100644 --- a/doc/tips/storing_data_in_git-lfs.mdwn +++ b/doc/tips/storing_data_in_git-lfs.mdwn @@ -8,22 +8,12 @@ Here's how to initialize a git-lfs special remote on Github. git annex initremote lfs type=git-lfs encryption=none url=git@github.com:yourname/yourrepo.git -If you want git-annex to encrypt the objects it stores in the remote, -change the encryption= parameter. But be sure to read the -[[git-lfs special remote|special_remotes/git-lfs]] page's -**encryption notes** first! +In this example, the remote will not be encrypted, so anyone who can access +it can see its contents. It is possible to encrypt everything stored in a +git-lfs remote, see [[fully_encrypted_git_repositories_with_gcrypt]]. -To enable the same remote in another clone of the repository, -you'll need to provide an url to it again. It's ok to provide a different -url as long as it points to the same git-lfs repository. - - git annex enableremote lfs url=https://github.com/yourname/yourrepo.git - -Note that http urls currently only allow read access to the git-lfs -repository. - -Once the remote is set up, you git-annex can store and retrieve content in -the usual ways: +Once the git-lfs remote is set up, git-annex can store and retrieve +content in the usual ways: git annex copy * --to lfs git annex get --from lfs @@ -33,3 +23,12 @@ because the protocol does not support deletion. A git-lfs special remote also functions as a regular git remote. You can use things like `git push` and `git pull` with it. + +To enable an existing git-lgs remote in another clone of the repository, +you'll need to provide an url to it again. It's ok to provide a different +url as long as it points to the same git-lfs repository. + + git annex enableremote lfs url=https://github.com/yourname/yourrepo.git + +Note that http urls currently only allow read access to the git-lfs +repository.