Automatically register public urls for files uploaded to the Internet Archive.

This commit is contained in:
Joey Hess 2013-04-25 17:28:25 -04:00
parent e3ea36174b
commit 3c7f4d2bd1
3 changed files with 38 additions and 13 deletions

View file

@ -1,6 +1,6 @@
{- Amazon S3 remotes. {- S3 remotes
- -
- Copyright 2011 Joey Hess <joey@kitenet.net> - Copyright 2011-2013 Joey Hess <joey@kitenet.net>
- -
- Licensed under the GNU GPL version 3 or higher. - Licensed under the GNU GPL version 3 or higher.
-} -}
@ -30,6 +30,9 @@ import Crypto
import Creds import Creds
import Utility.Metered import Utility.Metered
import Annex.Content import Annex.Content
import Logs.Web
type Bucket = String
remote :: RemoteType remote :: RemoteType
remote = RemoteType { remote = RemoteType {
@ -112,8 +115,13 @@ s3Setup u c = if isIA c then archiveorg else defaulthost
store :: Remote -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool store :: Remote -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool
store r k _f p = s3Action r False $ \(conn, bucket) -> store r k _f p = s3Action r False $ \(conn, bucket) ->
sendAnnex k (void $ remove' r k) $ \src -> do sendAnnex k (void $ remove' r k) $ \src -> do
res <- storeHelper (conn, bucket) r k p src ok <- s3Bool =<< storeHelper (conn, bucket) r k p src
s3Bool res
-- Store public URL to item in Internet Archive.
when (ok && isIA (config r)) $
setUrlPresent k (iaKeyUrl r k)
return ok
storeEncrypted :: Remote -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool storeEncrypted :: Remote -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool
storeEncrypted r (cipher, enck) k p = s3Action r False $ \(conn, bucket) -> storeEncrypted r (cipher, enck) k p = s3Action r False $ \(conn, bucket) ->
@ -122,10 +130,9 @@ storeEncrypted r (cipher, enck) k p = s3Action r False $ \(conn, bucket) ->
withTmp enck $ \tmp -> sendAnnex k (void $ remove' r enck) $ \src -> do withTmp enck $ \tmp -> sendAnnex k (void $ remove' r enck) $ \src -> do
liftIO $ encrypt (getGpgOpts r) cipher (feedFile src) $ liftIO $ encrypt (getGpgOpts r) cipher (feedFile src) $
readBytes $ L.writeFile tmp readBytes $ L.writeFile tmp
res <- storeHelper (conn, bucket) r enck p tmp s3Bool =<< storeHelper (conn, bucket) r enck p tmp
s3Bool res
storeHelper :: (AWSConnection, String) -> Remote -> Key -> MeterUpdate -> FilePath -> Annex (AWSResult ()) storeHelper :: (AWSConnection, Bucket) -> Remote -> Key -> MeterUpdate -> FilePath -> Annex (AWSResult ())
storeHelper (conn, bucket) r k p file = do storeHelper (conn, bucket) r k p file = do
size <- maybe getsize (return . fromIntegral) $ keySize k size <- maybe getsize (return . fromIntegral) $ keySize k
meteredBytes (Just p) size $ \meterupdate -> meteredBytes (Just p) size $ \meterupdate ->
@ -173,6 +180,9 @@ retrieveEncrypted r (cipher, enck) k d p = s3Action r False $ \(conn, bucket) ->
return True return True
Left e -> s3Warning e Left e -> s3Warning e
{- Internet Archive doesn't easily allow removing content.
- While it may remove the file, there are generally other files
- derived from it that it does not remove. -}
remove :: Remote -> RemoteConfig -> Key -> Annex Bool remove :: Remote -> RemoteConfig -> Key -> Annex Bool
remove r c k remove r c k
| isIA c = do | isIA c = do
@ -181,9 +191,8 @@ remove r c k
| otherwise = remove' r k | otherwise = remove' r k
remove' :: Remote -> Key -> Annex Bool remove' :: Remote -> Key -> Annex Bool
remove' r k = s3Action r False $ \(conn, bucket) -> do remove' r k = s3Action r False $ \(conn, bucket) ->
res <- liftIO $ deleteObject conn $ bucketKey r bucket k s3Bool =<< liftIO (deleteObject conn $ bucketKey r bucket k)
s3Bool res
checkPresent :: Remote -> Key -> Annex (Either String Bool) checkPresent :: Remote -> Key -> Annex (Either String Bool)
checkPresent r k = s3Action r noconn $ \(conn, bucket) -> do checkPresent r k = s3Action r noconn $ \(conn, bucket) -> do
@ -208,7 +217,7 @@ s3Bool :: AWSResult () -> Annex Bool
s3Bool (Right _) = return True s3Bool (Right _) = return True
s3Bool (Left e) = s3Warning e s3Bool (Left e) = s3Warning e
s3Action :: Remote -> a -> ((AWSConnection, String) -> Annex a) -> Annex a s3Action :: Remote -> a -> ((AWSConnection, Bucket) -> Annex a) -> Annex a
s3Action r noconn action = do s3Action r noconn action = do
let bucket = M.lookup "bucket" $ config r let bucket = M.lookup "bucket" $ config r
conn <- s3Connection (config r) (uuid r) conn <- s3Connection (config r) (uuid r)
@ -225,7 +234,7 @@ bucketFile r = munge . key2file
fileprefix = M.findWithDefault "" "fileprefix" c fileprefix = M.findWithDefault "" "fileprefix" c
c = config r c = config r
bucketKey :: Remote -> String -> Key -> S3Object bucketKey :: Remote -> Bucket -> Key -> S3Object
bucketKey r bucket k = S3Object bucket (bucketFile r k) "" [] L.empty bucketKey r bucket k = S3Object bucket (bucketFile r k) "" [] L.empty
{- Internet Archive limits filenames to a subset of ascii, {- Internet Archive limits filenames to a subset of ascii,
@ -284,5 +293,10 @@ isIA c = maybe False isIAHost (M.lookup "host" c)
isIAHost :: HostName -> Bool isIAHost :: HostName -> Bool
isIAHost h = ".archive.org" `isSuffixOf` map toLower h isIAHost h = ".archive.org" `isSuffixOf` map toLower h
iaItemUrl :: String -> String iaItemUrl :: Bucket -> URLString
iaItemUrl bucket = "http://archive.org/details/" ++ bucket iaItemUrl bucket = "http://archive.org/details/" ++ bucket
iaKeyUrl :: Remote -> Key -> URLString
iaKeyUrl r k = "http://archive.org/download/" ++ bucket ++ "/" ++ bucketFile r k
where
bucket = fromJust $ M.lookup "bucket" $ config r

2
debian/changelog vendored
View file

@ -37,6 +37,8 @@ git-annex (4.20130418) UNRELEASED; urgency=low
their API indicates it does. Always refuse to drop from there. their API indicates it does. Always refuse to drop from there.
* webapp: Display some additional information about a repository on its edit * webapp: Display some additional information about a repository on its edit
page. page.
* Automatically register public urls for files uploaded to the
Internet Archive.
-- Joey Hess <joeyh@debian.org> Thu, 18 Apr 2013 16:22:48 -0400 -- Joey Hess <joeyh@debian.org> Thu, 18 Apr 2013 16:22:48 -0400

View file

@ -72,6 +72,15 @@ Note that `not present` is a very bad thing to put in a preferred content
expression. It'll make it prefer to get content that's not present, and expression. It'll make it prefer to get content that's not present, and
drop content that is present! Don't go there.. drop content that is present! Don't go there..
### difference: "inmydir"
There's a special "inmydir" keyword you can use in a preferred content
expression of a special remote. This means that the content is preferred
if it's in a directory (located anywhere in the tree) with a special name.
The name of the directory can be configured using
`git annex initremote $remote mydir=$dirname`
## standard expressions ## standard expressions
git-annex comes with some standard preferred content expressions, that can git-annex comes with some standard preferred content expressions, that can