diff --git a/Remote/S3.hs b/Remote/S3.hs index e9c62eb25a..348b240d48 100644 --- a/Remote/S3.hs +++ b/Remote/S3.hs @@ -102,23 +102,24 @@ s3Setup' u c = if isIA c then archiveorg else defaulthost archiveorg = do showNote "Internet Archive mode" - maybe (error "specify bucket=") (const noop) $ - getBucket archiveconfig - writeUUIDFile archiveconfig u - use archiveconfig - where - archiveconfig = + -- Ensure user enters a valid bucket name, since + -- this determines the name of the archive.org item. + let bucket = replace " " "-" $ map toLower $ + fromMaybe (error "specify bucket=") $ + getBucket c + let archiveconfig = -- hS3 does not pass through x-archive-* headers M.mapKeys (replace "x-archive-" "x-amz-") $ -- encryption does not make sense here M.insert "encryption" "none" $ + M.insert "bucket" bucket $ M.union c $ -- special constraints on key names M.insert "mungekeys" "ia" $ -- bucket created only when files are uploaded - M.insert "x-amz-auto-make-bucket" "1" $ - -- no default bucket name; should be human-readable - M.delete "bucket" defaults + M.insert "x-amz-auto-make-bucket" "1" defaults + writeUUIDFile archiveconfig u + use archiveconfig store :: Remote -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool store r k _f p = s3Action r False $ \(conn, bucket) -> diff --git a/debian/changelog b/debian/changelog index 67aae5e2c5..1a7b5f1ab7 100644 --- a/debian/changelog +++ b/debian/changelog @@ -25,6 +25,7 @@ git-annex (4.20131003) UNRELEASED; urgency=low on OSX. * sync: Fix automatic resolution of merge conflicts where one side is an annexed file, and the other side is a non-annexed file, or a directory. + * S3: Try to ensure bucket name is valid for archive.org. -- Joey Hess Thu, 03 Oct 2013 15:41:24 -0400 diff --git a/doc/bugs/S3_buckets_with_capital_letters_breaks_authentication.mdwn b/doc/bugs/S3_buckets_with_capital_letters_breaks_authentication.mdwn index 886f0491ea..9a67db0c62 100644 --- a/doc/bugs/S3_buckets_with_capital_letters_breaks_authentication.mdwn +++ b/doc/bugs/S3_buckets_with_capital_letters_breaks_authentication.mdwn @@ -28,3 +28,5 @@ initremote archive-moglenrepublica (Internet Archive mode) git-annex: The reques """]] Just thought it would be better to have a separate thread for this bug. :) + +> [[fixed|done]] --[[Joey]] diff --git a/doc/tips/Internet_Archive_via_S3.mdwn b/doc/tips/Internet_Archive_via_S3.mdwn index 39da2c60fd..eba28961d1 100644 --- a/doc/tips/Internet_Archive_via_S3.mdwn +++ b/doc/tips/Internet_Archive_via_S3.mdwn @@ -30,12 +30,6 @@ rather than having git-annex pick a random one; and you can optionally specify `x-archive-meta*` headers to add metadata as explained in their [documentation](http://www.archive.org/help/abouts3.txt). -[[!template id=note text=""" -/!\ There seems to be a [[bug|bugs/S3 buckets with capital letters breaks authentication]] in either hS3 or the archive that breaks -authentication when the bucket name contains spaces or upper-case letters.. -use all lowercase and no spaces when making the bucket with `initremote`. -"""]] - # git annex initremote archive-panama type=S3 \ host=s3.us.archive.org bucket=panama-canal-lock-blueprints \ x-archive-meta-mediatype=texts x-archive-meta-language=eng \ @@ -51,8 +45,14 @@ Then you can annex files and copy them to the remote as usual: # git annex copy photo1.jpeg --fast --to archive-panama copy (to archive-panama...) ok -Note the use of the SHA1E [[backend|backends]]. It makes most sense -to use the WORM or SHA1E backend for files that will be stored in -the Internet Archive, since the key name will be exposed as the filename -there, and since the Archive does special processing of files based on -their extension. +Once a file has been stored on archive.org, it cannot be (easily) removed +from it. Also, git-annex whereis will tell you a public url for the file +on archive.org. (It may take a while for archive.org to make the file +publically visibile.) + +Note the use of the SHA1E [[backend|backends]] when adding files. That is +the default backend used by git-annex, but even if you don't normally use +it, it makes most sense to use the WORM or SHA1E backend for files that +will be stored in the Internet Archive, since the key name will be exposed +as the filename there, and since the Archive does special processing of +files based on their extension.