From 835283b86240ef4b68c44f4332ac1b644e08e49f Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 15 Oct 2024 14:29:06 -0400 Subject: [PATCH] stream through proxy when using fileRetriever The problem was that when the proxy requests a key be retrieved to its own temp file, fileRetriever was retriving it to the key's temp location, and then moving it at the end, which broke streaming. So, plumb through the path where the key is being retrieved to. --- Remote/Directory.hs | 2 +- Remote/Directory/LegacyChunked.hs | 4 ++-- Remote/GCrypt.hs | 4 ++-- Remote/Glacier.hs | 2 +- Remote/Helper/Chunked.hs | 14 +++++++++----- Remote/Helper/Special.hs | 17 +++++++++-------- Types/StoreRetrieve.hs | 7 +++++-- doc/todo/git-annex_proxies.mdwn | 5 ----- 8 files changed, 29 insertions(+), 26 deletions(-) diff --git a/Remote/Directory.hs b/Remote/Directory.hs index cebd5e61fe..ae8b2cc803 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -173,7 +173,7 @@ locations d k = NE.map (d P.) (keyPaths k) locations' :: RawFilePath -> Key -> [RawFilePath] locations' d k = NE.toList (locations d k) -{- Returns the location off a Key in the directory. If the key is +{- Returns the location of a Key in the directory. If the key is - present, returns the location that is actually used, otherwise - returns the first, default location. -} getLocation :: RawFilePath -> Key -> IO RawFilePath diff --git a/Remote/Directory/LegacyChunked.hs b/Remote/Directory/LegacyChunked.hs index fe1065d76c..2268dc998a 100644 --- a/Remote/Directory/LegacyChunked.hs +++ b/Remote/Directory/LegacyChunked.hs @@ -98,7 +98,7 @@ store repotop chunksize finalizer k b p = storeHelper repotop finalizer k $ \des - :/ This is legacy code.. -} retrieve :: (RawFilePath -> Key -> [RawFilePath]) -> RawFilePath -> Retriever -retrieve locations d basek p miv c = withOtherTmp $ \tmpdir -> do +retrieve locations d basek p _dest miv c = withOtherTmp $ \tmpdir -> do showLongNote "This remote uses the deprecated chunksize setting. So this will be quite slow." let tmp = tmpdir P. keyFile basek <> ".directorylegacy.tmp" let tmp' = fromRawFilePath tmp @@ -110,7 +110,7 @@ retrieve locations d basek p miv c = withOtherTmp $ \tmpdir -> do b <- liftIO $ L.readFile tmp' liftIO $ removeWhenExistsWith R.removeLink tmp sink b - byteRetriever go basek p miv c + byteRetriever go basek p tmp miv c checkKey :: RawFilePath -> (RawFilePath -> Key -> [RawFilePath]) -> Key -> Annex Bool checkKey d locations k = liftIO $ diff --git a/Remote/GCrypt.hs b/Remote/GCrypt.hs index 9ac6138639..e740d14535 100644 --- a/Remote/GCrypt.hs +++ b/Remote/GCrypt.hs @@ -409,9 +409,9 @@ store' repo r rsyncopts accessmethod storersync = fileStorer $ Remote.Rsync.store rsyncopts retrieve :: Remote -> Remote.Rsync.RsyncOpts -> AccessMethod -> Retriever -retrieve r rsyncopts accessmethod k p miv sink = do +retrieve r rsyncopts accessmethod k p dest miv sink = do repo <- getRepo r - retrieve' repo r rsyncopts accessmethod k p miv sink + retrieve' repo r rsyncopts accessmethod k p dest miv sink retrieve' :: Git.Repo -> Remote -> Remote.Rsync.RsyncOpts -> AccessMethod -> Retriever retrieve' repo r rsyncopts accessmethod diff --git a/Remote/Glacier.hs b/Remote/Glacier.hs index 7c0d94383f..e686b85c57 100644 --- a/Remote/Glacier.hs +++ b/Remote/Glacier.hs @@ -177,7 +177,7 @@ store' r k b p = go =<< glacierEnv c gc u forceSuccessProcess cmd pid go' _ _ _ _ _ = error "internal" -retrieve :: forall a. Remote -> Key -> MeterUpdate -> Maybe IncrementalVerifier -> (ContentSource -> Annex a) -> Annex a +retrieve :: forall a. Remote -> Key -> MeterUpdate -> RawFilePath -> Maybe IncrementalVerifier -> (ContentSource -> Annex a) -> Annex a retrieve = byteRetriever . retrieve' retrieve' :: forall a. Remote -> Key -> (L.ByteString -> Annex a) -> Annex a diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs index 7a3b33066d..7ebdfdccef 100644 --- a/Remote/Helper/Chunked.hs +++ b/Remote/Helper/Chunked.hs @@ -294,8 +294,10 @@ retrieveChunks retriever u vc chunkconfig encryptor basek dest basep enc encc let p = maybe basep (offsetMeterUpdate basep . toBytesProcessed) offset - v <- tryNonAsync $ - retriever (encryptor k) p Nothing $ \content -> + v <- tryNonAsync $ do + let enck = encryptor k + objloc <- fromRepo $ gitAnnexTmpObjectLocation enck + retriever enck p objloc Nothing $ \content -> bracket (maybe opennew openresume offset) (liftIO . hClose . fst) $ \(h, iv) -> do retrieved iv (Just h) p content let sz = toBytesProcessed $ @@ -316,7 +318,9 @@ retrieveChunks retriever u vc chunkconfig encryptor basek dest basep enc encc getrest p h iv sz bytesprocessed (k:ks) = do let p' = offsetMeterUpdate p bytesprocessed liftIO $ p' zeroBytesProcessed - retriever (encryptor k) p' Nothing $ + let enck = encryptor k + objloc <- fromRepo $ gitAnnexTmpObjectLocation enck + retriever enck p' objloc Nothing $ retrieved iv (Just h) p' getrest p h iv sz (addBytesProcessed bytesprocessed sz) ks @@ -324,7 +328,7 @@ retrieveChunks retriever u vc chunkconfig encryptor basek dest basep enc encc iv <- startVerifyKeyContentIncrementally vc basek case enc of Just _ -> do - retriever (encryptor basek) basep Nothing $ + retriever (encryptor basek) basep (toRawFilePath dest) Nothing $ retrieved iv Nothing basep return (Right iv) -- Not chunked and not encrypted, so ask the @@ -333,7 +337,7 @@ retrieveChunks retriever u vc chunkconfig encryptor basek dest basep enc encc -- passing the whole file content to the -- incremental verifier though. Nothing -> do - retriever (encryptor basek) basep iv $ + retriever (encryptor basek) basep (toRawFilePath dest) iv $ retrieved iv Nothing basep return $ case iv of Nothing -> Right iv diff --git a/Remote/Helper/Special.hs b/Remote/Helper/Special.hs index 19d5abd310..1a3c88ab1d 100644 --- a/Remote/Helper/Special.hs +++ b/Remote/Helper/Special.hs @@ -42,6 +42,7 @@ import Types.StoreRetrieve import Types.Remote import Annex.Verify import Annex.UUID +import Annex.Perms import Config import Config.Cost import Utility.Metered @@ -106,10 +107,10 @@ byteStorer a k c m = withBytes c $ \b -> a k b m -- A Retriever that generates a lazy ByteString containing the Key's -- content, and passes it to a callback action which will fully consume it -- before returning. -byteRetriever :: (Key -> (L.ByteString -> Annex a) -> Annex a) -> Key -> MeterUpdate -> Maybe IncrementalVerifier -> (ContentSource -> Annex a) -> Annex a -byteRetriever a k _m _miv callback = a k (callback . ByteContent) +byteRetriever :: (Key -> (L.ByteString -> Annex a) -> Annex a) -> Key -> MeterUpdate -> RawFilePath -> Maybe IncrementalVerifier -> (ContentSource -> Annex a) -> Annex a +byteRetriever a k _m _dest _miv callback = a k (callback . ByteContent) --- A Retriever that writes the content of a Key to a provided file. +-- A Retriever that writes the content of a Key to a file. -- The action is responsible for updating the progress meter as it -- retrieves data. The incremental verifier is updated in the background as -- the action writes to the file, but may not be updated with the entire @@ -119,15 +120,15 @@ fileRetriever a = fileRetriever' $ \f k m miv -> let retrieve = a f k m in tailVerify miv f retrieve -{- A Retriever that writes the content of a Key to a provided file. +{- A Retriever that writes the content of a Key to a file. - The action is responsible for updating the progress meter and the - incremental verifier as it retrieves data. -} fileRetriever' :: (RawFilePath -> Key -> MeterUpdate -> Maybe IncrementalVerifier -> Annex ()) -> Retriever -fileRetriever' a k m miv callback = do - f <- prepTmp k - a f k m miv - pruneTmpWorkDirBefore f (callback . FileContent . fromRawFilePath) +fileRetriever' a k m dest miv callback = do + createAnnexDirectory (parentDir dest) + a dest k m miv + pruneTmpWorkDirBefore dest (callback . FileContent . fromRawFilePath) {- The base Remote that is provided to specialRemote needs to have - storeKey, retrieveKeyFile, removeKey, and checkPresent methods, diff --git a/Types/StoreRetrieve.hs b/Types/StoreRetrieve.hs index 985d5adbec..ce7d228d74 100644 --- a/Types/StoreRetrieve.hs +++ b/Types/StoreRetrieve.hs @@ -35,12 +35,15 @@ type Storer = Key -> ContentSource -> MeterUpdate -> Annex () -- Throws exception if key is not present, or remote is not accessible. -- -- When it retrieves FileContent, it is responsible for updating the --- MeterUpdate. And when the IncrementalVerifier is passed to it, +-- MeterUpdate, and the provided FilePath can be used to store the file +-- it retrieves. +-- +-- When the IncrementalVerifier is passed to it, -- and it retrieves FileContent, it can feed some or all of the file's -- content to the verifier before running the callback. -- This should not be done when it retrieves ByteContent. type Retriever = forall a. - Key -> MeterUpdate -> Maybe IncrementalVerifier + Key -> MeterUpdate -> RawFilePath -> Maybe IncrementalVerifier -> (ContentSource -> Annex a) -> Annex a -- Action that removes a Key's content from a remote. diff --git a/doc/todo/git-annex_proxies.mdwn b/doc/todo/git-annex_proxies.mdwn index 668ea642ce..b7112b96b8 100644 --- a/doc/todo/git-annex_proxies.mdwn +++ b/doc/todo/git-annex_proxies.mdwn @@ -30,11 +30,6 @@ Planned schedule of work: * Currently working on streaming download via proxy from special remote. -* Remotes using fileRetriever retrieve to the temp object file, - before it is renamed to the requested file. In the case of a proxy, - that is a different file, and so it won't see the file until it's all - been transferred and renamed. - ## completed items for September's work on proving behavior of preferred content * Static analysis to detect "not present", "not balanced", and similar