better memoization

This commit is contained in:
Joey Hess 2014-08-09 22:13:03 -04:00
parent 5ee72b1bae
commit 445f04472c

View file

@ -51,13 +51,16 @@ remote = RemoteType {
} }
gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote)
gen r u c gc = new <$> remoteCost gc expensiveRemoteCost gen r u c gc = do
cst <- remoteCost gc expensiveRemoteCost
info <- extractS3Info c
return $ new cst info
where where
new cst = Just $ specialRemote c new cst info = Just $ specialRemote c
(prepareS3 this $ store this) (prepareS3 this info $ store this)
(prepareS3 this retrieve) (prepareS3 this info retrieve)
(prepareS3 this remove) (prepareS3 this info remove)
(prepareS3 this $ checkKey this) (prepareS3 this info $ checkKey this)
this this
where where
this = Remote { this = Remote {
@ -88,7 +91,7 @@ s3Setup mu mcreds c = do
c' <- setRemoteCredPair c (AWS.creds u) mcreds c' <- setRemoteCredPair c (AWS.creds u) mcreds
s3Setup' u c' s3Setup' u c'
s3Setup' :: UUID -> RemoteConfig -> Annex (RemoteConfig, UUID) s3Setup' :: UUID -> RemoteConfig -> Annex (RemoteConfig, UUID)
s3Setup' u c = if isIA c then archiveorg else defaulthost s3Setup' u c = if configIA c then archiveorg else defaulthost
where where
remotename = fromJust (M.lookup "name" c) remotename = fromJust (M.lookup "name" c)
defbucket = remotename ++ "-" ++ fromUUID u defbucket = remotename ++ "-" ++ fromUUID u
@ -114,7 +117,7 @@ s3Setup' u c = if isIA c then archiveorg else defaulthost
showNote "Internet Archive mode" showNote "Internet Archive mode"
-- Ensure user enters a valid bucket name, since -- Ensure user enters a valid bucket name, since
-- this determines the name of the archive.org item. -- this determines the name of the archive.org item.
let bucket = replace " " "-" $ map toLower $ let validbucket = replace " " "-" $ map toLower $
fromMaybe (error "specify bucket=") $ fromMaybe (error "specify bucket=") $
getBucketName c getBucketName c
let archiveconfig = let archiveconfig =
@ -122,28 +125,30 @@ s3Setup' u c = if isIA c then archiveorg else defaulthost
M.mapKeys (replace "x-archive-" "x-amz-") $ M.mapKeys (replace "x-archive-" "x-amz-") $
-- encryption does not make sense here -- encryption does not make sense here
M.insert "encryption" "none" $ M.insert "encryption" "none" $
M.insert "bucket" bucket $ M.insert "bucket" validbucket $
M.union c $ M.union c $
-- special constraints on key names -- special constraints on key names
M.insert "mungekeys" "ia" $ M.insert "mungekeys" "ia" $
-- bucket created only when files are uploaded -- bucket created only when files are uploaded
M.insert "x-amz-auto-make-bucket" "1" defaults M.insert "x-amz-auto-make-bucket" "1" defaults
withS3Handle archiveconfig u $ info <- extractS3Info archiveconfig
withS3Handle archiveconfig u info $
writeUUIDFile archiveconfig u writeUUIDFile archiveconfig u
use archiveconfig use archiveconfig
-- Sets up a http connection manager for S3 encdpoint, which allows -- Sets up a http connection manager for S3 encdpoint, which allows
-- http connections to be reused across calls to the helper. -- http connections to be reused across calls to the helper.
prepareS3 :: Remote -> (S3Handle -> helper) -> Preparer helper prepareS3 :: Remote -> S3Info -> (S3Handle -> helper) -> Preparer helper
prepareS3 r = resourcePrepare $ const $ withS3Handle (config r) (uuid r) prepareS3 r info = resourcePrepare $ const $
withS3Handle (config r) (uuid r) info
store :: Remote -> S3Handle -> Storer store :: Remote -> S3Handle -> Storer
store r h = fileStorer $ \k f p -> do store r h = fileStorer $ \k f p -> do
rbody <- liftIO $ httpBodyStorer f p rbody <- liftIO $ httpBodyStorer f p
void $ sendS3Handle h $ putObject h (hBucketObject h k) rbody void $ sendS3Handle h $ putObject h (bucketObject (hinfo h) k) rbody
-- Store public URL to item in Internet Archive. -- Store public URL to item in Internet Archive.
when (hIsIA h && not (isChunkKey k)) $ when (isIA (hinfo h) && not (isChunkKey k)) $
setUrlPresent k (iaKeyUrl r k) setUrlPresent k (iaKeyUrl r k)
return True return True
@ -154,11 +159,12 @@ store r h = fileStorer $ \k f p -> do
retrieve :: S3Handle -> Retriever retrieve :: S3Handle -> Retriever
retrieve h = fileRetriever $ \f k p -> liftIO $ runResourceT $ do retrieve h = fileRetriever $ \f k p -> liftIO $ runResourceT $ do
(fr, fh) <- allocate (openFile f WriteMode) hClose (fr, fh) <- allocate (openFile f WriteMode) hClose
let req = S3.getObject (hBucket h) (hBucketObject h k) let req = S3.getObject (bucket info) (bucketObject info k)
S3.GetObjectResponse { S3.gorResponse = rsp } <- sendS3Handle' h req S3.GetObjectResponse { S3.gorResponse = rsp } <- sendS3Handle' h req
responseBody rsp $$+- sinkprogressfile fh p zeroBytesProcessed responseBody rsp $$+- sinkprogressfile fh p zeroBytesProcessed
release fr release fr
where where
info = hinfo h
sinkprogressfile fh meterupdate sofar = do sinkprogressfile fh meterupdate sofar = do
mbs <- await mbs <- await
case mbs of case mbs of
@ -178,20 +184,22 @@ retrieveCheap _ _ = return False
- derived from it that it does not remove. -} - derived from it that it does not remove. -}
remove :: S3Handle -> Remover remove :: S3Handle -> Remover
remove h k remove h k
| hIsIA h = do | isIA info = do
warning "Cannot remove content from the Internet Archive" warning "Cannot remove content from the Internet Archive"
return False return False
| otherwise = do | otherwise = do
res <- tryNonAsync $ sendS3Handle h $ res <- tryNonAsync $ sendS3Handle h $
S3.DeleteObject (hBucketObject h k) (hBucket h) S3.DeleteObject (bucketObject info k) (bucket info)
return $ either (const False) (const True) res return $ either (const False) (const True) res
where
info = hinfo h
checkKey :: Remote -> S3Handle -> CheckPresent checkKey :: Remote -> S3Handle -> CheckPresent
checkKey r h k = do checkKey r h k = do
showAction $ "checking " ++ name r showAction $ "checking " ++ name r
catchMissingException $ do catchMissingException $ do
void $ sendS3Handle h $ void $ sendS3Handle h $
S3.headObject (hBucket h) (hBucketObject h k) S3.headObject (bucket (hinfo h)) (bucketObject (hinfo h) k)
return True return True
{- Catch exception headObject returns when an object is not present {- Catch exception headObject returns when an object is not present
@ -217,18 +225,19 @@ catchMissingException a = catchJust missing a (const $ return False)
genBucket :: RemoteConfig -> UUID -> Annex () genBucket :: RemoteConfig -> UUID -> Annex ()
genBucket c u = do genBucket c u = do
showAction "checking bucket" showAction "checking bucket"
withS3Handle c u $ \h -> info <- extractS3Info c
withS3Handle c u info $ \h ->
go h =<< checkUUIDFile c u h go h =<< checkUUIDFile c u h
where where
go _ (Right True) = noop go _ (Right True) = noop
go h _ = do go h _ = do
v <- tryS3 $ sendS3Handle h (S3.getBucket $ hBucket h) v <- tryS3 $ sendS3Handle h (S3.getBucket $ bucket $ hinfo h)
case v of case v of
Right _ -> noop Right _ -> noop
Left _ -> do Left _ -> do
showAction $ "creating bucket in " ++ datacenter showAction $ "creating bucket in " ++ datacenter
void $ sendS3Handle h $ void $ sendS3Handle h $
S3.PutBucket (hBucket h) Nothing $ S3.PutBucket (bucket $ hinfo h) Nothing $
AWS.mkLocationConstraint $ AWS.mkLocationConstraint $
T.pack datacenter T.pack datacenter
writeUUIDFile c u h writeUUIDFile c u h
@ -263,7 +272,7 @@ checkUUIDFile c u h = tryNonAsync $ check <$> get
get = liftIO get = liftIO
. runResourceT . runResourceT
. either (pure . Left) (Right <$$> AWS.loadToMemory) . either (pure . Left) (Right <$$> AWS.loadToMemory)
=<< tryS3 (sendS3Handle h (S3.getObject (hBucket h) file)) =<< tryS3 (sendS3Handle h (S3.getObject (bucket (hinfo h)) file))
check (Right (S3.GetObjectMemoryResponse _meta rsp)) = check (Right (S3.GetObjectMemoryResponse _meta rsp)) =
responseStatus rsp == ok200 && responseBody rsp == uuidb responseStatus rsp == ok200 && responseBody rsp == uuidb
check (Left _S3Error) = False check (Left _S3Error) = False
@ -276,22 +285,16 @@ uuidFile c = getFilePrefix c ++ "annex-uuid"
-- TODO: auto-create bucket when hIsIA. -- TODO: auto-create bucket when hIsIA.
putObject :: S3Handle -> T.Text -> RequestBody -> S3.PutObject putObject :: S3Handle -> T.Text -> RequestBody -> S3.PutObject
putObject h file rbody = (S3.putObject (hBucket h) file rbody) putObject h file rbody = (S3.putObject (bucket (hinfo h)) file rbody)
{ S3.poStorageClass = Just (hStorageClass h) { S3.poStorageClass = Just (storageClass (hinfo h))
, S3.poMetadata = hMetaHeaders h , S3.poMetadata = metaHeaders (hinfo h)
} }
data S3Handle = S3Handle data S3Handle = S3Handle
{ hmanager :: Manager { hmanager :: Manager
, hawscfg :: AWS.Configuration , hawscfg :: AWS.Configuration
, hs3cfg :: S3.S3Configuration AWS.NormalQuery , hs3cfg :: S3.S3Configuration AWS.NormalQuery
, hinfo :: S3Info
-- Cached values.
, hBucket :: S3.Bucket
, hStorageClass :: S3.StorageClass
, hBucketObject :: Key -> T.Text
, hMetaHeaders :: [(T.Text, T.Text)]
, hIsIA :: Bool
} }
{- Sends a request to S3 and gets back the response. {- Sends a request to S3 and gets back the response.
@ -314,23 +317,18 @@ sendS3Handle'
-> ResourceT IO a -> ResourceT IO a
sendS3Handle' h = AWS.pureAws (hawscfg h) (hs3cfg h) (hmanager h) sendS3Handle' h = AWS.pureAws (hawscfg h) (hs3cfg h) (hmanager h)
withS3Handle :: RemoteConfig -> UUID -> (S3Handle -> Annex a) -> Annex a withS3Handle :: RemoteConfig -> UUID -> S3Info -> (S3Handle -> Annex a) -> Annex a
withS3Handle c u a = do withS3Handle c u info a = do
creds <- getRemoteCredPairFor "S3" c (AWS.creds u) creds <- getRemoteCredPairFor "S3" c (AWS.creds u)
awscreds <- liftIO $ AWS.genCredentials $ fromMaybe nocreds creds awscreds <- liftIO $ AWS.genCredentials $ fromMaybe nocreds creds
bucket <- maybe nobucket (return . T.pack) (getBucketName c)
let awscfg = AWS.Configuration AWS.Timestamp awscreds (AWS.defaultLog AWS.Error) let awscfg = AWS.Configuration AWS.Timestamp awscreds (AWS.defaultLog AWS.Error)
bracketIO (newManager httpcfg) closeManager $ \mgr -> bracketIO (newManager httpcfg) closeManager $ \mgr ->
a $ S3Handle mgr awscfg s3cfg bucket sc bo mh (isIA c) a $ S3Handle mgr awscfg s3cfg info
where where
s3cfg = s3Configuration c s3cfg = s3Configuration c
httpcfg = defaultManagerSettings httpcfg = defaultManagerSettings
{ managerResponseTimeout = Nothing } { managerResponseTimeout = Nothing }
sc = getStorageClass c
bo = T.pack . bucketObject c
mh = getMetaHeaders c
nocreds = error "Cannot use S3 without credentials configured" nocreds = error "Cannot use S3 without credentials configured"
nobucket = error "S3 bucket not configured"
s3Configuration :: RemoteConfig -> S3.S3Configuration AWS.NormalQuery s3Configuration :: RemoteConfig -> S3.S3Configuration AWS.NormalQuery
s3Configuration c = (S3.s3 proto endpoint False) { S3.s3Port = port } s3Configuration c = (S3.s3 proto endpoint False) { S3.s3Port = port }
@ -354,6 +352,28 @@ s3Configuration c = (S3.s3 proto endpoint False) { S3.s3Port = port }
tryS3 :: Annex a -> Annex (Either S3.S3Error a) tryS3 :: Annex a -> Annex (Either S3.S3Error a)
tryS3 a = (Right <$> a) `catch` (pure . Left) tryS3 a = (Right <$> a) `catch` (pure . Left)
data S3Info = S3Info
{ bucket :: S3.Bucket
, storageClass :: S3.StorageClass
, bucketObject :: Key -> T.Text
, metaHeaders :: [(T.Text, T.Text)]
, isIA :: Bool
}
extractS3Info :: RemoteConfig -> Annex S3Info
extractS3Info c = do
b <- maybe
(error "S3 bucket not configured")
(return . T.pack)
(getBucketName c)
return $ S3Info
{ bucket = b
, storageClass = getStorageClass c
, bucketObject = T.pack . getBucketObject c
, metaHeaders = getMetaHeaders c
, isIA = configIA c
}
getBucketName :: RemoteConfig -> Maybe BucketName getBucketName :: RemoteConfig -> Maybe BucketName
getBucketName = M.lookup "bucket" getBucketName = M.lookup "bucket"
@ -373,8 +393,8 @@ getMetaHeaders = map munge . filter ismetaheader . M.assocs
getFilePrefix :: RemoteConfig -> String getFilePrefix :: RemoteConfig -> String
getFilePrefix = M.findWithDefault "" "fileprefix" getFilePrefix = M.findWithDefault "" "fileprefix"
bucketObject :: RemoteConfig -> Key -> FilePath getBucketObject :: RemoteConfig -> Key -> FilePath
bucketObject c = munge . key2file getBucketObject c = munge . key2file
where where
munge s = case M.lookup "mungekeys" c of munge s = case M.lookup "mungekeys" c of
Just "ia" -> iaMunge $ getFilePrefix c ++ s Just "ia" -> iaMunge $ getFilePrefix c ++ s
@ -392,20 +412,20 @@ iaMunge = (>>= munge)
| isSpace c = [] | isSpace c = []
| otherwise = "&" ++ show (ord c) ++ ";" | otherwise = "&" ++ show (ord c) ++ ";"
configIA :: RemoteConfig -> Bool
configIA = maybe False isIAHost . M.lookup "host"
{- Hostname to use for archive.org S3. -} {- Hostname to use for archive.org S3. -}
iaHost :: HostName iaHost :: HostName
iaHost = "s3.us.archive.org" iaHost = "s3.us.archive.org"
isIA :: RemoteConfig -> Bool
isIA c = maybe False isIAHost (M.lookup "host" c)
isIAHost :: HostName -> Bool isIAHost :: HostName -> Bool
isIAHost h = ".archive.org" `isSuffixOf` map toLower h isIAHost h = ".archive.org" `isSuffixOf` map toLower h
iaItemUrl :: BucketName -> URLString iaItemUrl :: BucketName -> URLString
iaItemUrl bucket = "http://archive.org/details/" ++ bucket iaItemUrl b = "http://archive.org/details/" ++ b
iaKeyUrl :: Remote -> Key -> URLString iaKeyUrl :: Remote -> Key -> URLString
iaKeyUrl r k = "http://archive.org/download/" ++ bucket ++ "/" ++ bucketObject (config r) k iaKeyUrl r k = "http://archive.org/download/" ++ b ++ "/" ++ getBucketObject (config r) k
where where
bucket = fromMaybe "" $ getBucketName $ config r b = fromMaybe "" $ getBucketName $ config r