deal with Amazon S3 breaking change for public=yes
* S3: Amazon S3 buckets created after April 2023 do not support ACLs, so public=yes cannot be used with them. Existing buckets configured with public=yes will keep working. * S3: Allow setting publicurl=yes without public=yes, to support buckets that are configured with a Bucket Policy that allows public access. Sponsored-by: Joshua Antonishen on Patreon
This commit is contained in:
parent
ddc7f36d53
commit
33ba537728
7 changed files with 105 additions and 23 deletions
|
@ -16,6 +16,12 @@ git-annex (10.20230627) UNRELEASED; urgency=medium
|
||||||
* Improve resuming interrupted download when using yt-dlp.
|
* Improve resuming interrupted download when using yt-dlp.
|
||||||
* importfeed bug fix: When -J was used with multiple feeds, some feeds
|
* importfeed bug fix: When -J was used with multiple feeds, some feeds
|
||||||
did not get their items downloaded.
|
did not get their items downloaded.
|
||||||
|
* S3: Amazon S3 buckets created after April 2023 do not support ACLs,
|
||||||
|
so public=yes cannot be used with them. Existing buckets configured
|
||||||
|
with public=yes will keep working.
|
||||||
|
* S3: Allow setting publicurl=yes without public=yes, to support
|
||||||
|
buckets that are configured with a Bucket Policy that allows public
|
||||||
|
access.
|
||||||
|
|
||||||
-- Joey Hess <id@joeyh.name> Mon, 26 Jun 2023 13:10:40 -0400
|
-- Joey Hess <id@joeyh.name> Mon, 26 Jun 2023 13:10:40 -0400
|
||||||
|
|
||||||
|
|
27
Remote/S3.hs
27
Remote/S3.hs
|
@ -94,7 +94,7 @@ remote = specialRemoteType $ RemoteType
|
||||||
, yesNoParser versioningField (Just False)
|
, yesNoParser versioningField (Just False)
|
||||||
(FieldDesc "enable versioning of bucket content")
|
(FieldDesc "enable versioning of bucket content")
|
||||||
, yesNoParser publicField (Just False)
|
, yesNoParser publicField (Just False)
|
||||||
(FieldDesc "allow public read access to the bucket")
|
(FieldDesc "allow public read access to the bucket via ACLs (only supported for old Amazon S3 buckets)")
|
||||||
, optionalStringParser publicurlField
|
, optionalStringParser publicurlField
|
||||||
(FieldDesc "url that can be used by public to download files")
|
(FieldDesc "url that can be used by public to download files")
|
||||||
, optionalStringParser protocolField
|
, optionalStringParser protocolField
|
||||||
|
@ -238,7 +238,7 @@ gen r u rc gc rs = do
|
||||||
, removeExportDirectoryWhenEmpty = Nothing
|
, removeExportDirectoryWhenEmpty = Nothing
|
||||||
, checkPresentExportWithContentIdentifier = checkPresentExportWithContentIdentifierS3 hdl this info
|
, checkPresentExportWithContentIdentifier = checkPresentExportWithContentIdentifierS3 hdl this info
|
||||||
}
|
}
|
||||||
, whereisKey = Just (getPublicWebUrls u rs info c)
|
, whereisKey = Just (getPublicWebUrls rs info c)
|
||||||
, remoteFsck = Nothing
|
, remoteFsck = Nothing
|
||||||
, repairRepo = Nothing
|
, repairRepo = Nothing
|
||||||
, config = c
|
, config = c
|
||||||
|
@ -427,7 +427,7 @@ retrieve hv r rs c info = fileRetriever' $ \f k p iv -> withS3Handle hv $ \case
|
||||||
giveup "cannot download content"
|
giveup "cannot download content"
|
||||||
Right loc -> retrieveHelper info h loc (fromRawFilePath f) p iv
|
Right loc -> retrieveHelper info h loc (fromRawFilePath f) p iv
|
||||||
Left S3HandleNeedCreds ->
|
Left S3HandleNeedCreds ->
|
||||||
getPublicWebUrls' (uuid r) rs info c k >>= \case
|
getPublicWebUrls' rs info c k >>= \case
|
||||||
Left failreason -> do
|
Left failreason -> do
|
||||||
warning (UnquotedString failreason)
|
warning (UnquotedString failreason)
|
||||||
giveup "cannot download content"
|
giveup "cannot download content"
|
||||||
|
@ -474,7 +474,7 @@ checkKey hv r rs c info k = withS3Handle hv $ \case
|
||||||
giveup "cannot check content"
|
giveup "cannot check content"
|
||||||
Right loc -> checkKeyHelper info h loc
|
Right loc -> checkKeyHelper info h loc
|
||||||
Left S3HandleNeedCreds ->
|
Left S3HandleNeedCreds ->
|
||||||
getPublicWebUrls' (uuid r) rs info c k >>= \case
|
getPublicWebUrls' rs info c k >>= \case
|
||||||
Left failreason -> do
|
Left failreason -> do
|
||||||
warning (UnquotedString failreason)
|
warning (UnquotedString failreason)
|
||||||
giveup "cannot check content"
|
giveup "cannot check content"
|
||||||
|
@ -974,7 +974,7 @@ data S3Info = S3Info
|
||||||
, partSize :: Maybe Integer
|
, partSize :: Maybe Integer
|
||||||
, isIA :: Bool
|
, isIA :: Bool
|
||||||
, versioning :: Bool
|
, versioning :: Bool
|
||||||
, public :: Bool
|
, publicACL :: Bool
|
||||||
, publicurl :: Maybe URLString
|
, publicurl :: Maybe URLString
|
||||||
, host :: Maybe String
|
, host :: Maybe String
|
||||||
, region :: Maybe String
|
, region :: Maybe String
|
||||||
|
@ -997,7 +997,7 @@ extractS3Info c = do
|
||||||
, isIA = configIA c
|
, isIA = configIA c
|
||||||
, versioning = fromMaybe False $
|
, versioning = fromMaybe False $
|
||||||
getRemoteConfigValue versioningField c
|
getRemoteConfigValue versioningField c
|
||||||
, public = fromMaybe False $
|
, publicACL = fromMaybe False $
|
||||||
getRemoteConfigValue publicField c
|
getRemoteConfigValue publicField c
|
||||||
, publicurl = getRemoteConfigValue publicurlField c
|
, publicurl = getRemoteConfigValue publicurlField c
|
||||||
, host = getRemoteConfigValue hostField c
|
, host = getRemoteConfigValue hostField c
|
||||||
|
@ -1014,7 +1014,7 @@ putObject info file rbody = (S3.putObject (bucket info) file rbody)
|
||||||
|
|
||||||
acl :: S3Info -> Maybe S3.CannedAcl
|
acl :: S3Info -> Maybe S3.CannedAcl
|
||||||
acl info
|
acl info
|
||||||
| public info = Just S3.AclPublicRead
|
| publicACL info = Just S3.AclPublicRead
|
||||||
| otherwise = Nothing
|
| otherwise = Nothing
|
||||||
|
|
||||||
getBucketName :: ParsedRemoteConfig -> Maybe BucketName
|
getBucketName :: ParsedRemoteConfig -> Maybe BucketName
|
||||||
|
@ -1154,7 +1154,8 @@ s3Info c info = catMaybes
|
||||||
then Just ("internet archive item", iaItemUrl $ fromMaybe "unknown" $ getBucketName c)
|
then Just ("internet archive item", iaItemUrl $ fromMaybe "unknown" $ getBucketName c)
|
||||||
else Nothing
|
else Nothing
|
||||||
, Just ("partsize", maybe "unlimited" (roughSize storageUnits False) (getPartSize c))
|
, Just ("partsize", maybe "unlimited" (roughSize storageUnits False) (getPartSize c))
|
||||||
, Just ("public", if public info then "yes" else "no")
|
, Just ("publicurl", fromMaybe "" (publicurl info))
|
||||||
|
, Just ("public", if publicACL info then "yes" else "no")
|
||||||
, Just ("versioning", if versioning info then "yes" else "no")
|
, Just ("versioning", if versioning info then "yes" else "no")
|
||||||
]
|
]
|
||||||
where
|
where
|
||||||
|
@ -1162,13 +1163,11 @@ s3Info c info = catMaybes
|
||||||
showstorageclass (S3.OtherStorageClass t) = T.unpack t
|
showstorageclass (S3.OtherStorageClass t) = T.unpack t
|
||||||
showstorageclass sc = show sc
|
showstorageclass sc = show sc
|
||||||
|
|
||||||
getPublicWebUrls :: UUID -> RemoteStateHandle -> S3Info -> ParsedRemoteConfig -> Key -> Annex [URLString]
|
getPublicWebUrls :: RemoteStateHandle -> S3Info -> ParsedRemoteConfig -> Key -> Annex [URLString]
|
||||||
getPublicWebUrls u rs info c k = either (const []) id <$> getPublicWebUrls' u rs info c k
|
getPublicWebUrls rs info c k = either (const []) id <$> getPublicWebUrls' rs info c k
|
||||||
|
|
||||||
getPublicWebUrls' :: UUID -> RemoteStateHandle -> S3Info -> ParsedRemoteConfig -> Key -> Annex (Either String [URLString])
|
getPublicWebUrls' :: RemoteStateHandle -> S3Info -> ParsedRemoteConfig -> Key -> Annex (Either String [URLString])
|
||||||
getPublicWebUrls' u rs info c k
|
getPublicWebUrls' rs info c k
|
||||||
| not (public info) = return $ Left $
|
|
||||||
"S3 bucket does not allow public access; " ++ needS3Creds u
|
|
||||||
| exportTree c = if versioning info
|
| exportTree c = if versioning info
|
||||||
then case publicurl info of
|
then case publicurl info of
|
||||||
Just url -> getversionid (const $ genericPublicUrl url)
|
Just url -> getversionid (const $ genericPublicUrl url)
|
||||||
|
|
|
@ -155,4 +155,4 @@ git-annex: get: 1 failed
|
||||||
|
|
||||||
We use git-annex to share large datasets with the scientific community at https://github.com/spine-generic/data-multi-subject !
|
We use git-annex to share large datasets with the scientific community at https://github.com/spine-generic/data-multi-subject !
|
||||||
|
|
||||||
|
> [[fixed|done]] --[[Joey]]
|
||||||
|
|
|
@ -0,0 +1,29 @@
|
||||||
|
[[!comment format=mdwn
|
||||||
|
username="joey"
|
||||||
|
subject="""comment 2"""
|
||||||
|
date="2023-07-21T17:53:25Z"
|
||||||
|
content="""
|
||||||
|
This only affects new S3 buckets. Existing S3 buckets that were
|
||||||
|
created before April 2023 and were set up to allow public access should
|
||||||
|
keep working, including ACL settings when storing new files in them.
|
||||||
|
Per [Amazon's announcement](https://aws.amazon.com/about-aws/whats-new/2022/12/amazon-s3-automatically-enable-block-public-access-disable-access-control-lists-buckets-april-2023/),
|
||||||
|
"There is no change for existing buckets."
|
||||||
|
|
||||||
|
I've made `publicurl` orthogonal to `public`.
|
||||||
|
|
||||||
|
As for the idea of `HTTP HEAD` before trying to set the ACL,
|
||||||
|
the ACL is currently sent at past of the PutObject request. And
|
||||||
|
either there is not a way to change the ACL later, or the aws haskell library
|
||||||
|
is missing support for the API to do that.
|
||||||
|
|
||||||
|
While git-annex could HEAD without creds when publicyes=yes to verify that the
|
||||||
|
user has configured the bucket correctly, and at least warn about a
|
||||||
|
misconfiguration, that would add some overhead, and I guess if the user has not
|
||||||
|
configured the bucket correctly, they will notice in some other way eventually
|
||||||
|
and can fix its bucket policy after the fact. So I'm inclined not to do
|
||||||
|
that.
|
||||||
|
|
||||||
|
Instead I've simply depredated `public`, noting that it should not be set
|
||||||
|
on new buckets. The user will have to deal with setting up the Bucket
|
||||||
|
Policy themselves.
|
||||||
|
"""]]
|
|
@ -37,3 +37,5 @@ upgrade supported from repository versions: 0 1 2 3 4 5 6 7 8 9 10
|
||||||
### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders)
|
### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders)
|
||||||
|
|
||||||
I work daily with git-annex and I never fail to be amazed by it. Thank you for your work!
|
I work daily with git-annex and I never fail to be amazed by it. Thank you for your work!
|
||||||
|
|
||||||
|
> [[fixed|done]] --[[Joey]]
|
||||||
|
|
|
@ -0,0 +1,43 @@
|
||||||
|
[[!comment format=mdwn
|
||||||
|
username="joey"
|
||||||
|
subject="""comment 1"""
|
||||||
|
date="2023-07-21T17:04:19Z"
|
||||||
|
content="""
|
||||||
|
This only affects new S3 buckets. Existing S3 buckets that were
|
||||||
|
created before April 2023 and were set up to allow public access should
|
||||||
|
keep working, including ACL settings when storing new files in them.
|
||||||
|
Per Amazon's announcement, "There is no change for existing buckets."
|
||||||
|
|
||||||
|
So users who create new buckets will need to set `public=no`
|
||||||
|
(the default) and set a bucket policy instread. See
|
||||||
|
[this comment](https://git-annex.branchable.com/special_remotes/S3/#comment-fcfba0021592de4c1425d3bf3c9563d3)
|
||||||
|
for an example policy.
|
||||||
|
|
||||||
|
That comment also suggests:
|
||||||
|
|
||||||
|
* If public=yes, instead of trying to set an ACL, first try HEAD on the
|
||||||
|
newly uploaded object without using the AWS_ACCESS_KEY. Only if that
|
||||||
|
fails, fall over to trying to set an ACL. And if you get
|
||||||
|
AccessControlListNotSupported (i.e. the error due to
|
||||||
|
BucketOwnerEnforced), then give a warning that the bucket policy is not
|
||||||
|
configured for public access.
|
||||||
|
|
||||||
|
However, the ACL is currently sent at past of the PutObject request. And
|
||||||
|
either there is not a way to change the ACL later, or the aws haskell library
|
||||||
|
is missing support for the API to do that.
|
||||||
|
|
||||||
|
I think what needs to be done is discourage initializing new S3 remotes
|
||||||
|
with public=yes, since it won't work. (Assuming some other S3
|
||||||
|
implementation than Amazon doesn't keep on supporting ACLs.)
|
||||||
|
|
||||||
|
And allow setting publicurl=yes without public=yes, so users who create
|
||||||
|
new buckets and configure a bucket policy to allow public access can tell
|
||||||
|
git-annex it's set up that way, so it will download from the bucket w/o S3
|
||||||
|
credentials.
|
||||||
|
|
||||||
|
While git-annex could HEAD without creds when publicyes=yes to verify that
|
||||||
|
the user has configured the bucket correctly, that would add some overhead,
|
||||||
|
and I guess if the user has not configured the bucket correctly, they will
|
||||||
|
notice in some other way eventually and can fix its bucket policy after the
|
||||||
|
fact. So I'm inclined not to do that.
|
||||||
|
"""]]
|
|
@ -125,15 +125,18 @@ the S3 remote.
|
||||||
When versioning is not enabled, this risks data loss, and so git-annex
|
When versioning is not enabled, this risks data loss, and so git-annex
|
||||||
will not let you enable a remote with that configuration unless forced.
|
will not let you enable a remote with that configuration unless forced.
|
||||||
|
|
||||||
* `public` - Set to "yes" to allow public read access to files sent
|
|
||||||
to the S3 remote. This is accomplished by setting an ACL when each
|
|
||||||
file is uploaded to the remote. So, changes to this setting will
|
|
||||||
only affect subseqent uploads.
|
|
||||||
|
|
||||||
* `publicurl` - Configure the URL that is used to download files
|
* `publicurl` - Configure the URL that is used to download files
|
||||||
from the bucket. Using this in combination with public=yes allows
|
from the bucket. Using this with a S3 bucket that has been configured
|
||||||
git-annex to download files from the S3 remote without needing to
|
to allow anyone to download its content allows git-annex to download
|
||||||
know the S3 credentials.
|
files from the S3 remote without needing to know the S3 credentials.
|
||||||
|
|
||||||
|
To configure the S3 bucket to allow anyone to download its content,
|
||||||
|
refer to S3 documentation to set a Bucket Policy.
|
||||||
|
|
||||||
|
* `public` - Deprecated. This enables public read access to files sent to
|
||||||
|
the S3 remote using ACLs. Note that Amazon S3 buckets created after April
|
||||||
|
2023 do not support using ACLs in this way and a Bucket Policy must instead
|
||||||
|
be used. This should only be set for older buckets.
|
||||||
|
|
||||||
* `partsize` - Amazon S3 only accepts uploads up to a certian file size,
|
* `partsize` - Amazon S3 only accepts uploads up to a certian file size,
|
||||||
and storing larger files requires a multipart upload process.
|
and storing larger files requires a multipart upload process.
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue