stream through proxy when using fileRetriever

The problem was that when the proxy requests a key be retrieved to its
own temp file, fileRetriever was retriving it to the key's temp
location, and then moving it at the end, which broke streaming.

So, plumb through the path where the key is being retrieved to.
This commit is contained in:
Joey Hess 2024-10-15 14:29:06 -04:00
parent 54fcc2ec51
commit 835283b862
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
8 changed files with 29 additions and 26 deletions

View file

@ -173,7 +173,7 @@ locations d k = NE.map (d P.</>) (keyPaths k)
locations' :: RawFilePath -> Key -> [RawFilePath]
locations' d k = NE.toList (locations d k)
{- Returns the location off a Key in the directory. If the key is
{- Returns the location of a Key in the directory. If the key is
- present, returns the location that is actually used, otherwise
- returns the first, default location. -}
getLocation :: RawFilePath -> Key -> IO RawFilePath

View file

@ -98,7 +98,7 @@ store repotop chunksize finalizer k b p = storeHelper repotop finalizer k $ \des
- :/ This is legacy code..
-}
retrieve :: (RawFilePath -> Key -> [RawFilePath]) -> RawFilePath -> Retriever
retrieve locations d basek p miv c = withOtherTmp $ \tmpdir -> do
retrieve locations d basek p _dest miv c = withOtherTmp $ \tmpdir -> do
showLongNote "This remote uses the deprecated chunksize setting. So this will be quite slow."
let tmp = tmpdir P.</> keyFile basek <> ".directorylegacy.tmp"
let tmp' = fromRawFilePath tmp
@ -110,7 +110,7 @@ retrieve locations d basek p miv c = withOtherTmp $ \tmpdir -> do
b <- liftIO $ L.readFile tmp'
liftIO $ removeWhenExistsWith R.removeLink tmp
sink b
byteRetriever go basek p miv c
byteRetriever go basek p tmp miv c
checkKey :: RawFilePath -> (RawFilePath -> Key -> [RawFilePath]) -> Key -> Annex Bool
checkKey d locations k = liftIO $

View file

@ -409,9 +409,9 @@ store' repo r rsyncopts accessmethod
storersync = fileStorer $ Remote.Rsync.store rsyncopts
retrieve :: Remote -> Remote.Rsync.RsyncOpts -> AccessMethod -> Retriever
retrieve r rsyncopts accessmethod k p miv sink = do
retrieve r rsyncopts accessmethod k p dest miv sink = do
repo <- getRepo r
retrieve' repo r rsyncopts accessmethod k p miv sink
retrieve' repo r rsyncopts accessmethod k p dest miv sink
retrieve' :: Git.Repo -> Remote -> Remote.Rsync.RsyncOpts -> AccessMethod -> Retriever
retrieve' repo r rsyncopts accessmethod

View file

@ -177,7 +177,7 @@ store' r k b p = go =<< glacierEnv c gc u
forceSuccessProcess cmd pid
go' _ _ _ _ _ = error "internal"
retrieve :: forall a. Remote -> Key -> MeterUpdate -> Maybe IncrementalVerifier -> (ContentSource -> Annex a) -> Annex a
retrieve :: forall a. Remote -> Key -> MeterUpdate -> RawFilePath -> Maybe IncrementalVerifier -> (ContentSource -> Annex a) -> Annex a
retrieve = byteRetriever . retrieve'
retrieve' :: forall a. Remote -> Key -> (L.ByteString -> Annex a) -> Annex a

View file

@ -294,8 +294,10 @@ retrieveChunks retriever u vc chunkconfig encryptor basek dest basep enc encc
let p = maybe basep
(offsetMeterUpdate basep . toBytesProcessed)
offset
v <- tryNonAsync $
retriever (encryptor k) p Nothing $ \content ->
v <- tryNonAsync $ do
let enck = encryptor k
objloc <- fromRepo $ gitAnnexTmpObjectLocation enck
retriever enck p objloc Nothing $ \content ->
bracket (maybe opennew openresume offset) (liftIO . hClose . fst) $ \(h, iv) -> do
retrieved iv (Just h) p content
let sz = toBytesProcessed $
@ -316,7 +318,9 @@ retrieveChunks retriever u vc chunkconfig encryptor basek dest basep enc encc
getrest p h iv sz bytesprocessed (k:ks) = do
let p' = offsetMeterUpdate p bytesprocessed
liftIO $ p' zeroBytesProcessed
retriever (encryptor k) p' Nothing $
let enck = encryptor k
objloc <- fromRepo $ gitAnnexTmpObjectLocation enck
retriever enck p' objloc Nothing $
retrieved iv (Just h) p'
getrest p h iv sz (addBytesProcessed bytesprocessed sz) ks
@ -324,7 +328,7 @@ retrieveChunks retriever u vc chunkconfig encryptor basek dest basep enc encc
iv <- startVerifyKeyContentIncrementally vc basek
case enc of
Just _ -> do
retriever (encryptor basek) basep Nothing $
retriever (encryptor basek) basep (toRawFilePath dest) Nothing $
retrieved iv Nothing basep
return (Right iv)
-- Not chunked and not encrypted, so ask the
@ -333,7 +337,7 @@ retrieveChunks retriever u vc chunkconfig encryptor basek dest basep enc encc
-- passing the whole file content to the
-- incremental verifier though.
Nothing -> do
retriever (encryptor basek) basep iv $
retriever (encryptor basek) basep (toRawFilePath dest) iv $
retrieved iv Nothing basep
return $ case iv of
Nothing -> Right iv

View file

@ -42,6 +42,7 @@ import Types.StoreRetrieve
import Types.Remote
import Annex.Verify
import Annex.UUID
import Annex.Perms
import Config
import Config.Cost
import Utility.Metered
@ -106,10 +107,10 @@ byteStorer a k c m = withBytes c $ \b -> a k b m
-- A Retriever that generates a lazy ByteString containing the Key's
-- content, and passes it to a callback action which will fully consume it
-- before returning.
byteRetriever :: (Key -> (L.ByteString -> Annex a) -> Annex a) -> Key -> MeterUpdate -> Maybe IncrementalVerifier -> (ContentSource -> Annex a) -> Annex a
byteRetriever a k _m _miv callback = a k (callback . ByteContent)
byteRetriever :: (Key -> (L.ByteString -> Annex a) -> Annex a) -> Key -> MeterUpdate -> RawFilePath -> Maybe IncrementalVerifier -> (ContentSource -> Annex a) -> Annex a
byteRetriever a k _m _dest _miv callback = a k (callback . ByteContent)
-- A Retriever that writes the content of a Key to a provided file.
-- A Retriever that writes the content of a Key to a file.
-- The action is responsible for updating the progress meter as it
-- retrieves data. The incremental verifier is updated in the background as
-- the action writes to the file, but may not be updated with the entire
@ -119,15 +120,15 @@ fileRetriever a = fileRetriever' $ \f k m miv ->
let retrieve = a f k m
in tailVerify miv f retrieve
{- A Retriever that writes the content of a Key to a provided file.
{- A Retriever that writes the content of a Key to a file.
- The action is responsible for updating the progress meter and the
- incremental verifier as it retrieves data.
-}
fileRetriever' :: (RawFilePath -> Key -> MeterUpdate -> Maybe IncrementalVerifier -> Annex ()) -> Retriever
fileRetriever' a k m miv callback = do
f <- prepTmp k
a f k m miv
pruneTmpWorkDirBefore f (callback . FileContent . fromRawFilePath)
fileRetriever' a k m dest miv callback = do
createAnnexDirectory (parentDir dest)
a dest k m miv
pruneTmpWorkDirBefore dest (callback . FileContent . fromRawFilePath)
{- The base Remote that is provided to specialRemote needs to have
- storeKey, retrieveKeyFile, removeKey, and checkPresent methods,

View file

@ -35,12 +35,15 @@ type Storer = Key -> ContentSource -> MeterUpdate -> Annex ()
-- Throws exception if key is not present, or remote is not accessible.
--
-- When it retrieves FileContent, it is responsible for updating the
-- MeterUpdate. And when the IncrementalVerifier is passed to it,
-- MeterUpdate, and the provided FilePath can be used to store the file
-- it retrieves.
--
-- When the IncrementalVerifier is passed to it,
-- and it retrieves FileContent, it can feed some or all of the file's
-- content to the verifier before running the callback.
-- This should not be done when it retrieves ByteContent.
type Retriever = forall a.
Key -> MeterUpdate -> Maybe IncrementalVerifier
Key -> MeterUpdate -> RawFilePath -> Maybe IncrementalVerifier
-> (ContentSource -> Annex a) -> Annex a
-- Action that removes a Key's content from a remote.

View file

@ -30,11 +30,6 @@ Planned schedule of work:
* Currently working on streaming download via proxy from special remote.
* Remotes using fileRetriever retrieve to the temp object file,
before it is renamed to the requested file. In the case of a proxy,
that is a different file, and so it won't see the file until it's all
been transferred and renamed.
## completed items for September's work on proving behavior of preferred content
* Static analysis to detect "not present", "not balanced", and similar