git-lfs gitlab interoperability fix
git-lfs: Fix interoperability with gitlab's implementation of the git-lfs protocol, which requests Content-Encoding chunked. Sponsored-by: Dartmouth College's Datalad project
This commit is contained in:
parent
dee462f536
commit
f3326b8b5a
8 changed files with 105 additions and 11 deletions
|
@ -11,6 +11,8 @@ git-annex (8.20211029) UNRELEASED; urgency=medium
|
|||
* uninit: Avoid error message when no commits have been made to the
|
||||
repository yet.
|
||||
* uninit: Avoid error message when there is no git-annex branch.
|
||||
* git-lfs: Fix interoperability with gitlab's implementation of the
|
||||
git-lfs protocol, which requests Content-Encoding chunked.
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Mon, 01 Nov 2021 13:19:46 -0400
|
||||
|
||||
|
|
|
@ -457,7 +457,11 @@ store rs h = fileStorer $ \k src p -> getLFSEndpoint LFS.RequestUpload h >>= \ca
|
|||
(req, sha256, size) <- mkUploadRequest rs k src
|
||||
sendTransferRequest req endpoint >>= \case
|
||||
Right resp -> do
|
||||
body <- liftIO $ httpBodyStorer src p
|
||||
let body (LFS.ServerSupportsChunks ssc) =
|
||||
if ssc
|
||||
then httpBodyStorerChunked src p
|
||||
else RequestBodyIO $
|
||||
httpBodyStorer src p
|
||||
forM_ (LFS.objects resp) $
|
||||
send body sha256 size
|
||||
Left err -> giveup err
|
||||
|
|
|
@ -37,6 +37,12 @@ httpBodyStorer src m = do
|
|||
let streamer sink = withMeteredFile src m $ \b -> byteStringPopper b sink
|
||||
return $ RequestBodyStream (fromInteger size) streamer
|
||||
|
||||
-- Like httpBodyStorer, but generates a chunked request body.
|
||||
httpBodyStorerChunked :: FilePath -> MeterUpdate -> RequestBody
|
||||
httpBodyStorerChunked src m =
|
||||
let streamer sink = withMeteredFile src m $ \b -> byteStringPopper b sink
|
||||
in RequestBodyStreamChunked streamer
|
||||
|
||||
byteStringPopper :: L.ByteString -> NeedsPopper () -> IO ()
|
||||
byteStringPopper b sink = do
|
||||
mvar <- newMVar $ L.toChunks b
|
||||
|
|
|
@ -45,6 +45,7 @@ module Utility.GitLFS (
|
|||
-- * Making transfers
|
||||
downloadOperationRequest,
|
||||
uploadOperationRequests,
|
||||
ServerSupportsChunks(..),
|
||||
|
||||
-- * Endpoint discovery
|
||||
Endpoint,
|
||||
|
@ -402,10 +403,10 @@ parseTransferResponse resp = case eitherDecode resp of
|
|||
|
||||
-- | Builds a http request to perform a download.
|
||||
downloadOperationRequest :: DownloadOperation -> Maybe Request
|
||||
downloadOperationRequest = operationParamsRequest . download
|
||||
downloadOperationRequest = fmap fst . operationParamsRequest . download
|
||||
|
||||
-- | Builds http request to perform an upload. The content to upload is
|
||||
-- provided in the RequestBody, along with its SHA256 and size.
|
||||
-- provided, along with its SHA256 and size.
|
||||
--
|
||||
-- When the LFS server requested verification, there will be a second
|
||||
-- Request that does that; it should be run only after the upload has
|
||||
|
@ -413,8 +414,8 @@ downloadOperationRequest = operationParamsRequest . download
|
|||
--
|
||||
-- When the LFS server already contains the object, an empty list may be
|
||||
-- returned.
|
||||
uploadOperationRequests :: UploadOperation -> RequestBody -> SHA256 -> Integer -> Maybe [Request]
|
||||
uploadOperationRequests op content oid size =
|
||||
uploadOperationRequests :: UploadOperation -> (ServerSupportsChunks -> RequestBody) -> SHA256 -> Integer -> Maybe [Request]
|
||||
uploadOperationRequests op mkcontent oid size =
|
||||
case (mkdlreq, mkverifyreq) of
|
||||
(Nothing, _) -> Nothing
|
||||
(Just dlreq, Nothing) -> Just [dlreq]
|
||||
|
@ -422,25 +423,40 @@ uploadOperationRequests op content oid size =
|
|||
where
|
||||
mkdlreq = mkdlreq'
|
||||
<$> operationParamsRequest (upload op)
|
||||
mkdlreq' r = r
|
||||
mkdlreq' (r, ssc) = r
|
||||
{ method = "PUT"
|
||||
, requestBody = content
|
||||
, requestBody = mkcontent ssc
|
||||
}
|
||||
mkverifyreq = mkverifyreq'
|
||||
<$> (operationParamsRequest =<< verify op)
|
||||
mkverifyreq' r = addLfsJsonHeaders $ r
|
||||
mkverifyreq' (r, _ssc) = addLfsJsonHeaders $ r
|
||||
{ method = "POST"
|
||||
, requestBody = RequestBodyLBS $ encode $
|
||||
Verification oid size
|
||||
}
|
||||
|
||||
operationParamsRequest :: OperationParams -> Maybe Request
|
||||
-- | When the LFS server indicates that it supports Transfer-Encoding chunked,
|
||||
-- this will contain a true value, and the RequestBody provided to
|
||||
-- uploadOperationRequests may be created using RequestBodyStreamChunked.
|
||||
-- Otherwise, that should be avoided as the server may not support the
|
||||
-- chunked encoding.
|
||||
newtype ServerSupportsChunks = ServerSupportsChunks Bool
|
||||
|
||||
operationParamsRequest :: OperationParams -> Maybe (Request, ServerSupportsChunks)
|
||||
operationParamsRequest ps = do
|
||||
r <- parseRequest (T.unpack (href ps))
|
||||
let headers = map convheader $ maybe [] M.toList (header ps)
|
||||
return $ r { requestHeaders = headers }
|
||||
let headers' = filter allowedheader headers
|
||||
let ssc = ServerSupportsChunks $
|
||||
any (== ("Transfer-Encoding", "chunked")) headers
|
||||
return (r { requestHeaders = headers' }, ssc)
|
||||
where
|
||||
convheader (k, v) = (CI.mk (E.encodeUtf8 k), E.encodeUtf8 v)
|
||||
-- requestHeaders is not allowed to set Transfer-Encoding or
|
||||
-- Content-Length; copying those over blindly could request in a
|
||||
-- malformed request.
|
||||
allowedheader (k, _) = k /= "Transfer-Encoding"
|
||||
&& k /= "Content-Length"
|
||||
|
||||
type Url = T.Text
|
||||
|
||||
|
|
|
@ -115,3 +115,5 @@ copy: 1 failed
|
|||
Yes, I'm using DataLad for some of my projects and I'm really impressed how it makes use of git-annex to solve many of the tasks that I struggled with pure git before.
|
||||
|
||||
[[!tag projects/datalad]]
|
||||
|
||||
> [[fixed|done]] --[[Joey]]
|
||||
|
|
|
@ -0,0 +1,56 @@
|
|||
[[!comment format=mdwn
|
||||
username="joey"
|
||||
subject="""comment 1"""
|
||||
date="2021-11-09T20:09:01Z"
|
||||
content="""
|
||||
Let's see.. git-lfs endpoint discovery over ssh works.
|
||||
|
||||
The request to start a transfer works:
|
||||
|
||||
host = "gitlab.com"
|
||||
path = "/joeyh/test.git/info/lfs/objects/batch"
|
||||
...
|
||||
[2021-11-10 12:06:35.409182815] (Remote.GitLFS) Status {statusCode = 200, statusMessage = "OK"}
|
||||
|
||||
So it's the actual PUT that fails:
|
||||
|
||||
requestHeaders = [("Authorization","<REDACTED>"),("Content-Type","application/octet-stream"),("Transfer-Encoding","chunked"),("User-Agent","git-annex/8.20211029-ga5a7d8433")]
|
||||
path = "/joeyh/test.git/gitlab-lfs/objects/922d58c647a679e17ee7c30f7de0111b56b90e84129fa3663568b81822a2628a/30"
|
||||
|
||||
Seems that the Transfer-Encoding chunked header is the problem.
|
||||
That header is provided by the git-lfs endpoint as one to include in
|
||||
the PUT (along with the Authorization header), and git-annex dutifully does
|
||||
include it. But it seems that does not make the PUT use that
|
||||
transfer encoding. And then in the server error, we see
|
||||
"invalid chunked body".
|
||||
|
||||
I tried filtering out the Transfer-Encoding header, and that does
|
||||
fix the problem. But I dunno if that's the best fix. Should git-annex support
|
||||
Transfer-Encoding chunked?
|
||||
|
||||
git-lfs has itself supported Transfer-Encoding chunked since 2015,
|
||||
see <https://github.com/git-lfs/git-lfs/issues/385>. That says
|
||||
"client may send data via chunked Transfer-Encoding when the server
|
||||
explicitly advertises that it's supported". Which is an interesting
|
||||
wording -- "may", "supported" -- implying it's not required to use it.
|
||||
|
||||
The API docs <https://github.com/git-lfs/git-lfs/blob/main/docs/api/batch.md>
|
||||
says the http headers are "Optional hash of String HTTP header key/value
|
||||
pairs to apply to the request". I think it means optional as in the
|
||||
server may optionally not send any, not necessarily
|
||||
that applying them to the request is optional, but that's not really clear.
|
||||
(Surely a header like Authorization is not optional to include.)
|
||||
|
||||
If the headers are not optional to include then the API would let the server
|
||||
specify any http headers at all, and the client has to send a PUT that includes
|
||||
those headers and that complies with them. So Transfer-Encoding deflate would
|
||||
need to use that compression method, etc.
|
||||
|
||||
But looking at the git-lfs implementation, it only actually handles
|
||||
Transfer-Encoding chunked and not other values. I think it may also
|
||||
not include other headers than Authorization in the PUT?
|
||||
|
||||
It seems possible there are other headers that might cause problems if they are
|
||||
blindly copied into the PUT. Content-Encoding is the only other obvious one,
|
||||
but who knows what may lurk in some odd corner of a HTTP spec.
|
||||
"""]]
|
|
@ -0,0 +1,8 @@
|
|||
[[!comment format=mdwn
|
||||
username="joey"
|
||||
subject="""comment 2"""
|
||||
date="2021-11-10T17:21:01Z"
|
||||
content="""
|
||||
Fixed by not passing through those 2 problem headers. And also made it
|
||||
actually used chunked encoding when the server indicates it's supported.
|
||||
"""]]
|
|
@ -419,7 +419,7 @@ Executable git-annex
|
|||
Build-Depends: network (< 3.0.0.0), network (>= 2.6.3.0)
|
||||
|
||||
if flag(GitLfs)
|
||||
Build-Depends: git-lfs (>= 1.1.0)
|
||||
Build-Depends: git-lfs (>= 1.2.0)
|
||||
CPP-Options: -DWITH_GIT_LFS
|
||||
else
|
||||
Other-Modules: Utility.GitLFS
|
||||
|
|
Loading…
Add table
Reference in a new issue