deal with Amazon S3 breaking change for public=yes
* S3: Amazon S3 buckets created after April 2023 do not support ACLs, so public=yes cannot be used with them. Existing buckets configured with public=yes will keep working. * S3: Allow setting publicurl=yes without public=yes, to support buckets that are configured with a Bucket Policy that allows public access. Sponsored-by: Joshua Antonishen on Patreon
This commit is contained in:
parent
ddc7f36d53
commit
33ba537728
7 changed files with 105 additions and 23 deletions
CHANGELOG
Remote
doc
bugs
S3_ACL_deprecation.mdwn
S3_ACL_deprecation
s3_remote__58___public__61__yes_sets_disabled_ACLs.mdwns3_remote__58___public__61__yes_sets_disabled_ACLs
special_remotes
|
@ -16,6 +16,12 @@ git-annex (10.20230627) UNRELEASED; urgency=medium
|
|||
* Improve resuming interrupted download when using yt-dlp.
|
||||
* importfeed bug fix: When -J was used with multiple feeds, some feeds
|
||||
did not get their items downloaded.
|
||||
* S3: Amazon S3 buckets created after April 2023 do not support ACLs,
|
||||
so public=yes cannot be used with them. Existing buckets configured
|
||||
with public=yes will keep working.
|
||||
* S3: Allow setting publicurl=yes without public=yes, to support
|
||||
buckets that are configured with a Bucket Policy that allows public
|
||||
access.
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Mon, 26 Jun 2023 13:10:40 -0400
|
||||
|
||||
|
|
27
Remote/S3.hs
27
Remote/S3.hs
|
@ -94,7 +94,7 @@ remote = specialRemoteType $ RemoteType
|
|||
, yesNoParser versioningField (Just False)
|
||||
(FieldDesc "enable versioning of bucket content")
|
||||
, yesNoParser publicField (Just False)
|
||||
(FieldDesc "allow public read access to the bucket")
|
||||
(FieldDesc "allow public read access to the bucket via ACLs (only supported for old Amazon S3 buckets)")
|
||||
, optionalStringParser publicurlField
|
||||
(FieldDesc "url that can be used by public to download files")
|
||||
, optionalStringParser protocolField
|
||||
|
@ -238,7 +238,7 @@ gen r u rc gc rs = do
|
|||
, removeExportDirectoryWhenEmpty = Nothing
|
||||
, checkPresentExportWithContentIdentifier = checkPresentExportWithContentIdentifierS3 hdl this info
|
||||
}
|
||||
, whereisKey = Just (getPublicWebUrls u rs info c)
|
||||
, whereisKey = Just (getPublicWebUrls rs info c)
|
||||
, remoteFsck = Nothing
|
||||
, repairRepo = Nothing
|
||||
, config = c
|
||||
|
@ -427,7 +427,7 @@ retrieve hv r rs c info = fileRetriever' $ \f k p iv -> withS3Handle hv $ \case
|
|||
giveup "cannot download content"
|
||||
Right loc -> retrieveHelper info h loc (fromRawFilePath f) p iv
|
||||
Left S3HandleNeedCreds ->
|
||||
getPublicWebUrls' (uuid r) rs info c k >>= \case
|
||||
getPublicWebUrls' rs info c k >>= \case
|
||||
Left failreason -> do
|
||||
warning (UnquotedString failreason)
|
||||
giveup "cannot download content"
|
||||
|
@ -474,7 +474,7 @@ checkKey hv r rs c info k = withS3Handle hv $ \case
|
|||
giveup "cannot check content"
|
||||
Right loc -> checkKeyHelper info h loc
|
||||
Left S3HandleNeedCreds ->
|
||||
getPublicWebUrls' (uuid r) rs info c k >>= \case
|
||||
getPublicWebUrls' rs info c k >>= \case
|
||||
Left failreason -> do
|
||||
warning (UnquotedString failreason)
|
||||
giveup "cannot check content"
|
||||
|
@ -974,7 +974,7 @@ data S3Info = S3Info
|
|||
, partSize :: Maybe Integer
|
||||
, isIA :: Bool
|
||||
, versioning :: Bool
|
||||
, public :: Bool
|
||||
, publicACL :: Bool
|
||||
, publicurl :: Maybe URLString
|
||||
, host :: Maybe String
|
||||
, region :: Maybe String
|
||||
|
@ -997,7 +997,7 @@ extractS3Info c = do
|
|||
, isIA = configIA c
|
||||
, versioning = fromMaybe False $
|
||||
getRemoteConfigValue versioningField c
|
||||
, public = fromMaybe False $
|
||||
, publicACL = fromMaybe False $
|
||||
getRemoteConfigValue publicField c
|
||||
, publicurl = getRemoteConfigValue publicurlField c
|
||||
, host = getRemoteConfigValue hostField c
|
||||
|
@ -1014,7 +1014,7 @@ putObject info file rbody = (S3.putObject (bucket info) file rbody)
|
|||
|
||||
acl :: S3Info -> Maybe S3.CannedAcl
|
||||
acl info
|
||||
| public info = Just S3.AclPublicRead
|
||||
| publicACL info = Just S3.AclPublicRead
|
||||
| otherwise = Nothing
|
||||
|
||||
getBucketName :: ParsedRemoteConfig -> Maybe BucketName
|
||||
|
@ -1154,7 +1154,8 @@ s3Info c info = catMaybes
|
|||
then Just ("internet archive item", iaItemUrl $ fromMaybe "unknown" $ getBucketName c)
|
||||
else Nothing
|
||||
, Just ("partsize", maybe "unlimited" (roughSize storageUnits False) (getPartSize c))
|
||||
, Just ("public", if public info then "yes" else "no")
|
||||
, Just ("publicurl", fromMaybe "" (publicurl info))
|
||||
, Just ("public", if publicACL info then "yes" else "no")
|
||||
, Just ("versioning", if versioning info then "yes" else "no")
|
||||
]
|
||||
where
|
||||
|
@ -1162,13 +1163,11 @@ s3Info c info = catMaybes
|
|||
showstorageclass (S3.OtherStorageClass t) = T.unpack t
|
||||
showstorageclass sc = show sc
|
||||
|
||||
getPublicWebUrls :: UUID -> RemoteStateHandle -> S3Info -> ParsedRemoteConfig -> Key -> Annex [URLString]
|
||||
getPublicWebUrls u rs info c k = either (const []) id <$> getPublicWebUrls' u rs info c k
|
||||
getPublicWebUrls :: RemoteStateHandle -> S3Info -> ParsedRemoteConfig -> Key -> Annex [URLString]
|
||||
getPublicWebUrls rs info c k = either (const []) id <$> getPublicWebUrls' rs info c k
|
||||
|
||||
getPublicWebUrls' :: UUID -> RemoteStateHandle -> S3Info -> ParsedRemoteConfig -> Key -> Annex (Either String [URLString])
|
||||
getPublicWebUrls' u rs info c k
|
||||
| not (public info) = return $ Left $
|
||||
"S3 bucket does not allow public access; " ++ needS3Creds u
|
||||
getPublicWebUrls' :: RemoteStateHandle -> S3Info -> ParsedRemoteConfig -> Key -> Annex (Either String [URLString])
|
||||
getPublicWebUrls' rs info c k
|
||||
| exportTree c = if versioning info
|
||||
then case publicurl info of
|
||||
Just url -> getversionid (const $ genericPublicUrl url)
|
||||
|
|
|
@ -155,4 +155,4 @@ git-annex: get: 1 failed
|
|||
|
||||
We use git-annex to share large datasets with the scientific community at https://github.com/spine-generic/data-multi-subject !
|
||||
|
||||
|
||||
> [[fixed|done]] --[[Joey]]
|
||||
|
|
|
@ -0,0 +1,29 @@
|
|||
[[!comment format=mdwn
|
||||
username="joey"
|
||||
subject="""comment 2"""
|
||||
date="2023-07-21T17:53:25Z"
|
||||
content="""
|
||||
This only affects new S3 buckets. Existing S3 buckets that were
|
||||
created before April 2023 and were set up to allow public access should
|
||||
keep working, including ACL settings when storing new files in them.
|
||||
Per [Amazon's announcement](https://aws.amazon.com/about-aws/whats-new/2022/12/amazon-s3-automatically-enable-block-public-access-disable-access-control-lists-buckets-april-2023/),
|
||||
"There is no change for existing buckets."
|
||||
|
||||
I've made `publicurl` orthogonal to `public`.
|
||||
|
||||
As for the idea of `HTTP HEAD` before trying to set the ACL,
|
||||
the ACL is currently sent at past of the PutObject request. And
|
||||
either there is not a way to change the ACL later, or the aws haskell library
|
||||
is missing support for the API to do that.
|
||||
|
||||
While git-annex could HEAD without creds when publicyes=yes to verify that the
|
||||
user has configured the bucket correctly, and at least warn about a
|
||||
misconfiguration, that would add some overhead, and I guess if the user has not
|
||||
configured the bucket correctly, they will notice in some other way eventually
|
||||
and can fix its bucket policy after the fact. So I'm inclined not to do
|
||||
that.
|
||||
|
||||
Instead I've simply depredated `public`, noting that it should not be set
|
||||
on new buckets. The user will have to deal with setting up the Bucket
|
||||
Policy themselves.
|
||||
"""]]
|
|
@ -37,3 +37,5 @@ upgrade supported from repository versions: 0 1 2 3 4 5 6 7 8 9 10
|
|||
### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders)
|
||||
|
||||
I work daily with git-annex and I never fail to be amazed by it. Thank you for your work!
|
||||
|
||||
> [[fixed|done]] --[[Joey]]
|
||||
|
|
|
@ -0,0 +1,43 @@
|
|||
[[!comment format=mdwn
|
||||
username="joey"
|
||||
subject="""comment 1"""
|
||||
date="2023-07-21T17:04:19Z"
|
||||
content="""
|
||||
This only affects new S3 buckets. Existing S3 buckets that were
|
||||
created before April 2023 and were set up to allow public access should
|
||||
keep working, including ACL settings when storing new files in them.
|
||||
Per Amazon's announcement, "There is no change for existing buckets."
|
||||
|
||||
So users who create new buckets will need to set `public=no`
|
||||
(the default) and set a bucket policy instread. See
|
||||
[this comment](https://git-annex.branchable.com/special_remotes/S3/#comment-fcfba0021592de4c1425d3bf3c9563d3)
|
||||
for an example policy.
|
||||
|
||||
That comment also suggests:
|
||||
|
||||
* If public=yes, instead of trying to set an ACL, first try HEAD on the
|
||||
newly uploaded object without using the AWS_ACCESS_KEY. Only if that
|
||||
fails, fall over to trying to set an ACL. And if you get
|
||||
AccessControlListNotSupported (i.e. the error due to
|
||||
BucketOwnerEnforced), then give a warning that the bucket policy is not
|
||||
configured for public access.
|
||||
|
||||
However, the ACL is currently sent at past of the PutObject request. And
|
||||
either there is not a way to change the ACL later, or the aws haskell library
|
||||
is missing support for the API to do that.
|
||||
|
||||
I think what needs to be done is discourage initializing new S3 remotes
|
||||
with public=yes, since it won't work. (Assuming some other S3
|
||||
implementation than Amazon doesn't keep on supporting ACLs.)
|
||||
|
||||
And allow setting publicurl=yes without public=yes, so users who create
|
||||
new buckets and configure a bucket policy to allow public access can tell
|
||||
git-annex it's set up that way, so it will download from the bucket w/o S3
|
||||
credentials.
|
||||
|
||||
While git-annex could HEAD without creds when publicyes=yes to verify that
|
||||
the user has configured the bucket correctly, that would add some overhead,
|
||||
and I guess if the user has not configured the bucket correctly, they will
|
||||
notice in some other way eventually and can fix its bucket policy after the
|
||||
fact. So I'm inclined not to do that.
|
||||
"""]]
|
|
@ -125,15 +125,18 @@ the S3 remote.
|
|||
When versioning is not enabled, this risks data loss, and so git-annex
|
||||
will not let you enable a remote with that configuration unless forced.
|
||||
|
||||
* `public` - Set to "yes" to allow public read access to files sent
|
||||
to the S3 remote. This is accomplished by setting an ACL when each
|
||||
file is uploaded to the remote. So, changes to this setting will
|
||||
only affect subseqent uploads.
|
||||
|
||||
* `publicurl` - Configure the URL that is used to download files
|
||||
from the bucket. Using this in combination with public=yes allows
|
||||
git-annex to download files from the S3 remote without needing to
|
||||
know the S3 credentials.
|
||||
from the bucket. Using this with a S3 bucket that has been configured
|
||||
to allow anyone to download its content allows git-annex to download
|
||||
files from the S3 remote without needing to know the S3 credentials.
|
||||
|
||||
To configure the S3 bucket to allow anyone to download its content,
|
||||
refer to S3 documentation to set a Bucket Policy.
|
||||
|
||||
* `public` - Deprecated. This enables public read access to files sent to
|
||||
the S3 remote using ACLs. Note that Amazon S3 buckets created after April
|
||||
2023 do not support using ACLs in this way and a Bucket Policy must instead
|
||||
be used. This should only be set for older buckets.
|
||||
|
||||
* `partsize` - Amazon S3 only accepts uploads up to a certian file size,
|
||||
and storing larger files requires a multipart upload process.
|
||||
|
|
Loading…
Reference in a new issue