From d05b7b9182d84886487903f3c0040a4ebfef60b1 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 3 Aug 2014 01:12:24 -0400 Subject: [PATCH] better byteRetriever Make the byteRetriever be passed the callback that consumes the bytestring. This way, there's no worries about the lazy bytestring not all being read when the resource that's creating it is closed. Which in turn lets bup, ddar, and S3 each switch from using an unncessary fileRetriver to a byteRetriever. So, more efficient on chunks and encrypted files. The only remaining fileRetrievers are hook and external, which really do retrieve to files. --- Remote/Bup.hs | 13 ++++++------ Remote/Ddar.hs | 12 +++++------ Remote/Directory.hs | 4 ++-- Remote/Directory/LegacyChunked.hs | 10 ++++----- Remote/Glacier.hs | 33 ++++++++++++++--------------- Remote/Helper/ChunkedEncryptable.hs | 8 ++++--- Remote/S3.hs | 4 ++-- Types/StoreRetrieve.hs | 2 +- 8 files changed, 44 insertions(+), 42 deletions(-) diff --git a/Remote/Bup.hs b/Remote/Bup.hs index 06679c4b8a..44ea8c7d83 100644 --- a/Remote/Bup.hs +++ b/Remote/Bup.hs @@ -8,6 +8,7 @@ module Remote.Bup (remote) where import qualified Data.Map as M +import qualified Data.ByteString.Lazy as L import Data.ByteString.Lazy.UTF8 (fromString) import Common.Annex @@ -127,12 +128,12 @@ store r buprepo = byteStorer $ \k b p -> do return True retrieve :: BupRepo -> Retriever -retrieve buprepo = fileRetriever $ \d k _p -> - liftIO $ withFile d WriteMode $ \h -> do - let params = bupParams "join" buprepo [Param $ bupRef k] - let p = proc "bup" (toCommand params) - (_, _, _, pid) <- createProcess $ p { std_out = UseHandle h } - forceSuccessProcess p pid +retrieve buprepo = byteRetriever $ \k sink -> do + let params = bupParams "join" buprepo [Param $ bupRef k] + let p = proc "bup" (toCommand params) + (_, Just h, _, pid) <- liftIO $ createProcess $ p { std_out = CreatePipe } + liftIO (hClose h >> forceSuccessProcess p pid) + `after` (sink =<< liftIO (L.hGetContents h)) retrieveCheap :: BupRepo -> Key -> FilePath -> Annex Bool retrieveCheap _ _ _ = return False diff --git a/Remote/Ddar.hs b/Remote/Ddar.hs index 365506a22c..bc4755a81c 100644 --- a/Remote/Ddar.hs +++ b/Remote/Ddar.hs @@ -10,8 +10,8 @@ module Remote.Ddar (remote) where import Control.Exception import qualified Data.Map as M +import qualified Data.ByteString.Lazy as L import System.IO.Error -import System.Process import Data.String.Utils import Common.Annex @@ -127,12 +127,12 @@ ddarExtractRemoteCall ddarrepo k = ddarRemoteCall ddarrepo 'x' [Param "--force-stdout", Param $ key2file k] retrieve :: DdarRepo -> Retriever -retrieve ddarrepo = fileRetriever $ \d k _p -> do +retrieve ddarrepo = byteRetriever $ \k sink -> do (cmd, params) <- ddarExtractRemoteCall ddarrepo k - liftIO $ withFile d WriteMode $ \h -> do - let p = (proc cmd $ toCommand params){ std_out = UseHandle h } - (_, _, _, pid) <- Common.Annex.createProcess p - forceSuccessProcess p pid + let p = (proc cmd $ toCommand params) { std_out = CreatePipe } + (_, Just h, _, pid) <- liftIO $ createProcess p + liftIO (hClose h >> forceSuccessProcess p pid) + `after` (sink =<< liftIO (L.hGetContents h)) retrieveCheap :: Key -> FilePath -> Annex Bool retrieveCheap _ _ = return False diff --git a/Remote/Directory.hs b/Remote/Directory.hs index a879875298..78d30b1a16 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -136,8 +136,8 @@ store d chunkconfig k b p = liftIO $ do retrieve :: FilePath -> ChunkConfig -> Preparer Retriever retrieve d (LegacyChunks _) = Legacy.retrieve locations d -retrieve d _ = simplyPrepare $ byteRetriever $ \k -> - liftIO $ L.readFile =<< getLocation d k +retrieve d _ = simplyPrepare $ byteRetriever $ \k sink -> + sink =<< liftIO (L.readFile =<< getLocation d k) retrieveCheap :: FilePath -> ChunkConfig -> Key -> FilePath -> Annex Bool -- no cheap retrieval possible for chunks diff --git a/Remote/Directory/LegacyChunked.hs b/Remote/Directory/LegacyChunked.hs index 5c200570c8..a198688024 100644 --- a/Remote/Directory/LegacyChunked.hs +++ b/Remote/Directory/LegacyChunked.hs @@ -94,14 +94,14 @@ retrieve locations d basek a = do tmpdir <- fromRepo $ gitAnnexTmpMiscDir createAnnexDirectory tmpdir let tmp = tmpdir keyFile basek ++ ".directorylegacy.tmp" - a $ Just $ byteRetriever $ \k -> liftIO $ do - void $ withStoredFiles d locations k $ \fs -> do + a $ Just $ byteRetriever $ \k sink -> do + liftIO $ void $ withStoredFiles d locations k $ \fs -> do forM_ fs $ S.appendFile tmp <=< S.readFile return True - b <- L.readFile tmp - nukeFile tmp - return b + b <- liftIO $ L.readFile tmp + liftIO $ nukeFile tmp + sink b checkPresent :: FilePath -> (FilePath -> Key -> [FilePath]) -> Key -> Annex (Either String Bool) checkPresent d locations k = liftIO $ catchMsgIO $ diff --git a/Remote/Glacier.hs b/Remote/Glacier.hs index 9b428bd80a..592a7db1f0 100644 --- a/Remote/Glacier.hs +++ b/Remote/Glacier.hs @@ -20,7 +20,6 @@ import Config.Cost import Remote.Helper.Special import Remote.Helper.ChunkedEncryptable import qualified Remote.Helper.AWS as AWS -import Crypto import Creds import Utility.Metered import qualified Annex @@ -120,11 +119,10 @@ store r k b p = go =<< glacierEnv c u return True prepareRetrieve :: Remote -> Preparer Retriever -prepareRetrieve r = simplyPrepare $ fileRetriever $ \d k p -> - retrieve r k (readBytes (meteredWriteFile p d)) +prepareRetrieve = simplyPrepare . byteRetriever . retrieve -retrieve :: Remote -> Key -> (Handle -> IO ()) -> Annex () -retrieve r k reader = go =<< glacierEnv c u +retrieve :: Remote -> Key -> (L.ByteString -> Annex Bool) -> Annex Bool +retrieve r k sink = go =<< glacierEnv c u where c = config r u = uuid r @@ -138,17 +136,21 @@ retrieve r k reader = go =<< glacierEnv c u go Nothing = error "cannot retrieve from glacier" go (Just e) = do let cmd = (proc "glacier" (toCommand params)) { env = Just e } - ok <- liftIO $ catchBoolIO $ - withHandle StdoutHandle createProcessSuccess cmd $ \h -> - ifM (hIsEOF h) - ( return False - , do - reader h - return True - ) + (_, Just h, _, pid) <- liftIO $ createProcess cmd + -- Glacier cannot store empty files, so if the output is + -- empty, the content is not available yet. + ok <- ifM (liftIO $ hIsEOF h) + ( return False + , sink =<< liftIO (L.hGetContents h) + ) + liftIO $ hClose h + liftIO $ forceSuccessProcess cmd pid unless ok $ do showLongNote "Recommend you wait up to 4 hours, and then run this command again." - error "not yet available" + return ok + +retrieveCheap :: Remote -> Key -> FilePath -> Annex Bool +retrieveCheap _ _ _ = return False remove :: Remote -> Key -> Annex Bool remove r k = glacierAction r @@ -159,9 +161,6 @@ remove r k = glacierAction r , Param $ archive r k ] -retrieveCheap :: Remote -> Key -> FilePath -> Annex Bool -retrieveCheap _ _ _ = return False - checkPresent :: Remote -> Key -> Annex (Either String Bool) checkPresent r k = do showAction $ "checking " ++ name r diff --git a/Remote/Helper/ChunkedEncryptable.hs b/Remote/Helper/ChunkedEncryptable.hs index e607715514..9c6ba98a2b 100644 --- a/Remote/Helper/ChunkedEncryptable.hs +++ b/Remote/Helper/ChunkedEncryptable.hs @@ -77,9 +77,11 @@ fileRetriever a k m callback = do a f k m callback (FileContent f) --- A Retriever that generates a L.ByteString containing the Key's content. -byteRetriever :: (Key -> Annex L.ByteString) -> Retriever -byteRetriever a k _m callback = callback =<< (ByteContent <$> a k) +-- A Retriever that generates a lazy ByteString containing the Key's +-- content, and passes it to a callback action which will fully consume it +-- before returning. +byteRetriever :: (Key -> (L.ByteString -> Annex Bool) -> Annex Bool) -> Retriever +byteRetriever a k _m callback = a k (callback . ByteContent) {- The base Remote that is provided to chunkedEncryptableRemote - needs to have storeKey and retreiveKeyFile methods, but they are diff --git a/Remote/S3.hs b/Remote/S3.hs index ed9122cab1..68d8ee4bfb 100644 --- a/Remote/S3.hs +++ b/Remote/S3.hs @@ -147,9 +147,9 @@ store (conn, bucket) r k p file = do prepareRetrieve :: Remote -> Preparer Retriever prepareRetrieve r = resourcePrepare (const $ s3Action r False) $ \(conn, bucket) -> - byteRetriever $ \k -> + byteRetriever $ \k sink -> liftIO (getObject conn $ bucketKey r bucket k) - >>= either s3Error (return . obj_data) + >>= either s3Error (sink . obj_data) retrieveCheap :: Remote -> Key -> FilePath -> Annex Bool retrieveCheap _ _ _ = return False diff --git a/Types/StoreRetrieve.hs b/Types/StoreRetrieve.hs index bde7489604..9fc0634c4e 100644 --- a/Types/StoreRetrieve.hs +++ b/Types/StoreRetrieve.hs @@ -30,6 +30,6 @@ isByteContent (FileContent _) = False type Storer = Key -> ContentSource -> MeterUpdate -> Annex Bool -- Action that retrieves a Key's content from a remote, passing it to a --- callback. +-- callback, which will fully consume the content before returning. -- Throws exception if key is not present, or remote is not accessible. type Retriever = Key -> MeterUpdate -> (ContentSource -> Annex Bool) -> Annex Bool