git-lfs gitlab interoperability fix
git-lfs: Fix interoperability with gitlab's implementation of the git-lfs protocol, which requests Content-Encoding chunked. Sponsored-by: Dartmouth College's Datalad project
This commit is contained in:
parent
dee462f536
commit
f3326b8b5a
8 changed files with 105 additions and 11 deletions
|
@ -11,6 +11,8 @@ git-annex (8.20211029) UNRELEASED; urgency=medium
|
||||||
* uninit: Avoid error message when no commits have been made to the
|
* uninit: Avoid error message when no commits have been made to the
|
||||||
repository yet.
|
repository yet.
|
||||||
* uninit: Avoid error message when there is no git-annex branch.
|
* uninit: Avoid error message when there is no git-annex branch.
|
||||||
|
* git-lfs: Fix interoperability with gitlab's implementation of the
|
||||||
|
git-lfs protocol, which requests Content-Encoding chunked.
|
||||||
|
|
||||||
-- Joey Hess <id@joeyh.name> Mon, 01 Nov 2021 13:19:46 -0400
|
-- Joey Hess <id@joeyh.name> Mon, 01 Nov 2021 13:19:46 -0400
|
||||||
|
|
||||||
|
|
|
@ -457,7 +457,11 @@ store rs h = fileStorer $ \k src p -> getLFSEndpoint LFS.RequestUpload h >>= \ca
|
||||||
(req, sha256, size) <- mkUploadRequest rs k src
|
(req, sha256, size) <- mkUploadRequest rs k src
|
||||||
sendTransferRequest req endpoint >>= \case
|
sendTransferRequest req endpoint >>= \case
|
||||||
Right resp -> do
|
Right resp -> do
|
||||||
body <- liftIO $ httpBodyStorer src p
|
let body (LFS.ServerSupportsChunks ssc) =
|
||||||
|
if ssc
|
||||||
|
then httpBodyStorerChunked src p
|
||||||
|
else RequestBodyIO $
|
||||||
|
httpBodyStorer src p
|
||||||
forM_ (LFS.objects resp) $
|
forM_ (LFS.objects resp) $
|
||||||
send body sha256 size
|
send body sha256 size
|
||||||
Left err -> giveup err
|
Left err -> giveup err
|
||||||
|
|
|
@ -37,6 +37,12 @@ httpBodyStorer src m = do
|
||||||
let streamer sink = withMeteredFile src m $ \b -> byteStringPopper b sink
|
let streamer sink = withMeteredFile src m $ \b -> byteStringPopper b sink
|
||||||
return $ RequestBodyStream (fromInteger size) streamer
|
return $ RequestBodyStream (fromInteger size) streamer
|
||||||
|
|
||||||
|
-- Like httpBodyStorer, but generates a chunked request body.
|
||||||
|
httpBodyStorerChunked :: FilePath -> MeterUpdate -> RequestBody
|
||||||
|
httpBodyStorerChunked src m =
|
||||||
|
let streamer sink = withMeteredFile src m $ \b -> byteStringPopper b sink
|
||||||
|
in RequestBodyStreamChunked streamer
|
||||||
|
|
||||||
byteStringPopper :: L.ByteString -> NeedsPopper () -> IO ()
|
byteStringPopper :: L.ByteString -> NeedsPopper () -> IO ()
|
||||||
byteStringPopper b sink = do
|
byteStringPopper b sink = do
|
||||||
mvar <- newMVar $ L.toChunks b
|
mvar <- newMVar $ L.toChunks b
|
||||||
|
|
|
@ -45,6 +45,7 @@ module Utility.GitLFS (
|
||||||
-- * Making transfers
|
-- * Making transfers
|
||||||
downloadOperationRequest,
|
downloadOperationRequest,
|
||||||
uploadOperationRequests,
|
uploadOperationRequests,
|
||||||
|
ServerSupportsChunks(..),
|
||||||
|
|
||||||
-- * Endpoint discovery
|
-- * Endpoint discovery
|
||||||
Endpoint,
|
Endpoint,
|
||||||
|
@ -402,10 +403,10 @@ parseTransferResponse resp = case eitherDecode resp of
|
||||||
|
|
||||||
-- | Builds a http request to perform a download.
|
-- | Builds a http request to perform a download.
|
||||||
downloadOperationRequest :: DownloadOperation -> Maybe Request
|
downloadOperationRequest :: DownloadOperation -> Maybe Request
|
||||||
downloadOperationRequest = operationParamsRequest . download
|
downloadOperationRequest = fmap fst . operationParamsRequest . download
|
||||||
|
|
||||||
-- | Builds http request to perform an upload. The content to upload is
|
-- | Builds http request to perform an upload. The content to upload is
|
||||||
-- provided in the RequestBody, along with its SHA256 and size.
|
-- provided, along with its SHA256 and size.
|
||||||
--
|
--
|
||||||
-- When the LFS server requested verification, there will be a second
|
-- When the LFS server requested verification, there will be a second
|
||||||
-- Request that does that; it should be run only after the upload has
|
-- Request that does that; it should be run only after the upload has
|
||||||
|
@ -413,8 +414,8 @@ downloadOperationRequest = operationParamsRequest . download
|
||||||
--
|
--
|
||||||
-- When the LFS server already contains the object, an empty list may be
|
-- When the LFS server already contains the object, an empty list may be
|
||||||
-- returned.
|
-- returned.
|
||||||
uploadOperationRequests :: UploadOperation -> RequestBody -> SHA256 -> Integer -> Maybe [Request]
|
uploadOperationRequests :: UploadOperation -> (ServerSupportsChunks -> RequestBody) -> SHA256 -> Integer -> Maybe [Request]
|
||||||
uploadOperationRequests op content oid size =
|
uploadOperationRequests op mkcontent oid size =
|
||||||
case (mkdlreq, mkverifyreq) of
|
case (mkdlreq, mkverifyreq) of
|
||||||
(Nothing, _) -> Nothing
|
(Nothing, _) -> Nothing
|
||||||
(Just dlreq, Nothing) -> Just [dlreq]
|
(Just dlreq, Nothing) -> Just [dlreq]
|
||||||
|
@ -422,25 +423,40 @@ uploadOperationRequests op content oid size =
|
||||||
where
|
where
|
||||||
mkdlreq = mkdlreq'
|
mkdlreq = mkdlreq'
|
||||||
<$> operationParamsRequest (upload op)
|
<$> operationParamsRequest (upload op)
|
||||||
mkdlreq' r = r
|
mkdlreq' (r, ssc) = r
|
||||||
{ method = "PUT"
|
{ method = "PUT"
|
||||||
, requestBody = content
|
, requestBody = mkcontent ssc
|
||||||
}
|
}
|
||||||
mkverifyreq = mkverifyreq'
|
mkverifyreq = mkverifyreq'
|
||||||
<$> (operationParamsRequest =<< verify op)
|
<$> (operationParamsRequest =<< verify op)
|
||||||
mkverifyreq' r = addLfsJsonHeaders $ r
|
mkverifyreq' (r, _ssc) = addLfsJsonHeaders $ r
|
||||||
{ method = "POST"
|
{ method = "POST"
|
||||||
, requestBody = RequestBodyLBS $ encode $
|
, requestBody = RequestBodyLBS $ encode $
|
||||||
Verification oid size
|
Verification oid size
|
||||||
}
|
}
|
||||||
|
|
||||||
operationParamsRequest :: OperationParams -> Maybe Request
|
-- | When the LFS server indicates that it supports Transfer-Encoding chunked,
|
||||||
|
-- this will contain a true value, and the RequestBody provided to
|
||||||
|
-- uploadOperationRequests may be created using RequestBodyStreamChunked.
|
||||||
|
-- Otherwise, that should be avoided as the server may not support the
|
||||||
|
-- chunked encoding.
|
||||||
|
newtype ServerSupportsChunks = ServerSupportsChunks Bool
|
||||||
|
|
||||||
|
operationParamsRequest :: OperationParams -> Maybe (Request, ServerSupportsChunks)
|
||||||
operationParamsRequest ps = do
|
operationParamsRequest ps = do
|
||||||
r <- parseRequest (T.unpack (href ps))
|
r <- parseRequest (T.unpack (href ps))
|
||||||
let headers = map convheader $ maybe [] M.toList (header ps)
|
let headers = map convheader $ maybe [] M.toList (header ps)
|
||||||
return $ r { requestHeaders = headers }
|
let headers' = filter allowedheader headers
|
||||||
|
let ssc = ServerSupportsChunks $
|
||||||
|
any (== ("Transfer-Encoding", "chunked")) headers
|
||||||
|
return (r { requestHeaders = headers' }, ssc)
|
||||||
where
|
where
|
||||||
convheader (k, v) = (CI.mk (E.encodeUtf8 k), E.encodeUtf8 v)
|
convheader (k, v) = (CI.mk (E.encodeUtf8 k), E.encodeUtf8 v)
|
||||||
|
-- requestHeaders is not allowed to set Transfer-Encoding or
|
||||||
|
-- Content-Length; copying those over blindly could request in a
|
||||||
|
-- malformed request.
|
||||||
|
allowedheader (k, _) = k /= "Transfer-Encoding"
|
||||||
|
&& k /= "Content-Length"
|
||||||
|
|
||||||
type Url = T.Text
|
type Url = T.Text
|
||||||
|
|
||||||
|
|
|
@ -115,3 +115,5 @@ copy: 1 failed
|
||||||
Yes, I'm using DataLad for some of my projects and I'm really impressed how it makes use of git-annex to solve many of the tasks that I struggled with pure git before.
|
Yes, I'm using DataLad for some of my projects and I'm really impressed how it makes use of git-annex to solve many of the tasks that I struggled with pure git before.
|
||||||
|
|
||||||
[[!tag projects/datalad]]
|
[[!tag projects/datalad]]
|
||||||
|
|
||||||
|
> [[fixed|done]] --[[Joey]]
|
||||||
|
|
|
@ -0,0 +1,56 @@
|
||||||
|
[[!comment format=mdwn
|
||||||
|
username="joey"
|
||||||
|
subject="""comment 1"""
|
||||||
|
date="2021-11-09T20:09:01Z"
|
||||||
|
content="""
|
||||||
|
Let's see.. git-lfs endpoint discovery over ssh works.
|
||||||
|
|
||||||
|
The request to start a transfer works:
|
||||||
|
|
||||||
|
host = "gitlab.com"
|
||||||
|
path = "/joeyh/test.git/info/lfs/objects/batch"
|
||||||
|
...
|
||||||
|
[2021-11-10 12:06:35.409182815] (Remote.GitLFS) Status {statusCode = 200, statusMessage = "OK"}
|
||||||
|
|
||||||
|
So it's the actual PUT that fails:
|
||||||
|
|
||||||
|
requestHeaders = [("Authorization","<REDACTED>"),("Content-Type","application/octet-stream"),("Transfer-Encoding","chunked"),("User-Agent","git-annex/8.20211029-ga5a7d8433")]
|
||||||
|
path = "/joeyh/test.git/gitlab-lfs/objects/922d58c647a679e17ee7c30f7de0111b56b90e84129fa3663568b81822a2628a/30"
|
||||||
|
|
||||||
|
Seems that the Transfer-Encoding chunked header is the problem.
|
||||||
|
That header is provided by the git-lfs endpoint as one to include in
|
||||||
|
the PUT (along with the Authorization header), and git-annex dutifully does
|
||||||
|
include it. But it seems that does not make the PUT use that
|
||||||
|
transfer encoding. And then in the server error, we see
|
||||||
|
"invalid chunked body".
|
||||||
|
|
||||||
|
I tried filtering out the Transfer-Encoding header, and that does
|
||||||
|
fix the problem. But I dunno if that's the best fix. Should git-annex support
|
||||||
|
Transfer-Encoding chunked?
|
||||||
|
|
||||||
|
git-lfs has itself supported Transfer-Encoding chunked since 2015,
|
||||||
|
see <https://github.com/git-lfs/git-lfs/issues/385>. That says
|
||||||
|
"client may send data via chunked Transfer-Encoding when the server
|
||||||
|
explicitly advertises that it's supported". Which is an interesting
|
||||||
|
wording -- "may", "supported" -- implying it's not required to use it.
|
||||||
|
|
||||||
|
The API docs <https://github.com/git-lfs/git-lfs/blob/main/docs/api/batch.md>
|
||||||
|
says the http headers are "Optional hash of String HTTP header key/value
|
||||||
|
pairs to apply to the request". I think it means optional as in the
|
||||||
|
server may optionally not send any, not necessarily
|
||||||
|
that applying them to the request is optional, but that's not really clear.
|
||||||
|
(Surely a header like Authorization is not optional to include.)
|
||||||
|
|
||||||
|
If the headers are not optional to include then the API would let the server
|
||||||
|
specify any http headers at all, and the client has to send a PUT that includes
|
||||||
|
those headers and that complies with them. So Transfer-Encoding deflate would
|
||||||
|
need to use that compression method, etc.
|
||||||
|
|
||||||
|
But looking at the git-lfs implementation, it only actually handles
|
||||||
|
Transfer-Encoding chunked and not other values. I think it may also
|
||||||
|
not include other headers than Authorization in the PUT?
|
||||||
|
|
||||||
|
It seems possible there are other headers that might cause problems if they are
|
||||||
|
blindly copied into the PUT. Content-Encoding is the only other obvious one,
|
||||||
|
but who knows what may lurk in some odd corner of a HTTP spec.
|
||||||
|
"""]]
|
|
@ -0,0 +1,8 @@
|
||||||
|
[[!comment format=mdwn
|
||||||
|
username="joey"
|
||||||
|
subject="""comment 2"""
|
||||||
|
date="2021-11-10T17:21:01Z"
|
||||||
|
content="""
|
||||||
|
Fixed by not passing through those 2 problem headers. And also made it
|
||||||
|
actually used chunked encoding when the server indicates it's supported.
|
||||||
|
"""]]
|
|
@ -419,7 +419,7 @@ Executable git-annex
|
||||||
Build-Depends: network (< 3.0.0.0), network (>= 2.6.3.0)
|
Build-Depends: network (< 3.0.0.0), network (>= 2.6.3.0)
|
||||||
|
|
||||||
if flag(GitLfs)
|
if flag(GitLfs)
|
||||||
Build-Depends: git-lfs (>= 1.1.0)
|
Build-Depends: git-lfs (>= 1.2.0)
|
||||||
CPP-Options: -DWITH_GIT_LFS
|
CPP-Options: -DWITH_GIT_LFS
|
||||||
else
|
else
|
||||||
Other-Modules: Utility.GitLFS
|
Other-Modules: Utility.GitLFS
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue