stream through proxy when using fileRetriever

The problem was that when the proxy requests a key be retrieved to its
own temp file, fileRetriever was retriving it to the key's temp
location, and then moving it at the end, which broke streaming.

So, plumb through the path where the key is being retrieved to.
This commit is contained in:
Joey Hess 2024-10-15 14:29:06 -04:00
parent 54fcc2ec51
commit 835283b862
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
8 changed files with 29 additions and 26 deletions

View file

@ -173,7 +173,7 @@ locations d k = NE.map (d P.</>) (keyPaths k)
locations' :: RawFilePath -> Key -> [RawFilePath] locations' :: RawFilePath -> Key -> [RawFilePath]
locations' d k = NE.toList (locations d k) locations' d k = NE.toList (locations d k)
{- Returns the location off a Key in the directory. If the key is {- Returns the location of a Key in the directory. If the key is
- present, returns the location that is actually used, otherwise - present, returns the location that is actually used, otherwise
- returns the first, default location. -} - returns the first, default location. -}
getLocation :: RawFilePath -> Key -> IO RawFilePath getLocation :: RawFilePath -> Key -> IO RawFilePath

View file

@ -98,7 +98,7 @@ store repotop chunksize finalizer k b p = storeHelper repotop finalizer k $ \des
- :/ This is legacy code.. - :/ This is legacy code..
-} -}
retrieve :: (RawFilePath -> Key -> [RawFilePath]) -> RawFilePath -> Retriever retrieve :: (RawFilePath -> Key -> [RawFilePath]) -> RawFilePath -> Retriever
retrieve locations d basek p miv c = withOtherTmp $ \tmpdir -> do retrieve locations d basek p _dest miv c = withOtherTmp $ \tmpdir -> do
showLongNote "This remote uses the deprecated chunksize setting. So this will be quite slow." showLongNote "This remote uses the deprecated chunksize setting. So this will be quite slow."
let tmp = tmpdir P.</> keyFile basek <> ".directorylegacy.tmp" let tmp = tmpdir P.</> keyFile basek <> ".directorylegacy.tmp"
let tmp' = fromRawFilePath tmp let tmp' = fromRawFilePath tmp
@ -110,7 +110,7 @@ retrieve locations d basek p miv c = withOtherTmp $ \tmpdir -> do
b <- liftIO $ L.readFile tmp' b <- liftIO $ L.readFile tmp'
liftIO $ removeWhenExistsWith R.removeLink tmp liftIO $ removeWhenExistsWith R.removeLink tmp
sink b sink b
byteRetriever go basek p miv c byteRetriever go basek p tmp miv c
checkKey :: RawFilePath -> (RawFilePath -> Key -> [RawFilePath]) -> Key -> Annex Bool checkKey :: RawFilePath -> (RawFilePath -> Key -> [RawFilePath]) -> Key -> Annex Bool
checkKey d locations k = liftIO $ checkKey d locations k = liftIO $

View file

@ -409,9 +409,9 @@ store' repo r rsyncopts accessmethod
storersync = fileStorer $ Remote.Rsync.store rsyncopts storersync = fileStorer $ Remote.Rsync.store rsyncopts
retrieve :: Remote -> Remote.Rsync.RsyncOpts -> AccessMethod -> Retriever retrieve :: Remote -> Remote.Rsync.RsyncOpts -> AccessMethod -> Retriever
retrieve r rsyncopts accessmethod k p miv sink = do retrieve r rsyncopts accessmethod k p dest miv sink = do
repo <- getRepo r repo <- getRepo r
retrieve' repo r rsyncopts accessmethod k p miv sink retrieve' repo r rsyncopts accessmethod k p dest miv sink
retrieve' :: Git.Repo -> Remote -> Remote.Rsync.RsyncOpts -> AccessMethod -> Retriever retrieve' :: Git.Repo -> Remote -> Remote.Rsync.RsyncOpts -> AccessMethod -> Retriever
retrieve' repo r rsyncopts accessmethod retrieve' repo r rsyncopts accessmethod

View file

@ -177,7 +177,7 @@ store' r k b p = go =<< glacierEnv c gc u
forceSuccessProcess cmd pid forceSuccessProcess cmd pid
go' _ _ _ _ _ = error "internal" go' _ _ _ _ _ = error "internal"
retrieve :: forall a. Remote -> Key -> MeterUpdate -> Maybe IncrementalVerifier -> (ContentSource -> Annex a) -> Annex a retrieve :: forall a. Remote -> Key -> MeterUpdate -> RawFilePath -> Maybe IncrementalVerifier -> (ContentSource -> Annex a) -> Annex a
retrieve = byteRetriever . retrieve' retrieve = byteRetriever . retrieve'
retrieve' :: forall a. Remote -> Key -> (L.ByteString -> Annex a) -> Annex a retrieve' :: forall a. Remote -> Key -> (L.ByteString -> Annex a) -> Annex a

View file

@ -294,8 +294,10 @@ retrieveChunks retriever u vc chunkconfig encryptor basek dest basep enc encc
let p = maybe basep let p = maybe basep
(offsetMeterUpdate basep . toBytesProcessed) (offsetMeterUpdate basep . toBytesProcessed)
offset offset
v <- tryNonAsync $ v <- tryNonAsync $ do
retriever (encryptor k) p Nothing $ \content -> let enck = encryptor k
objloc <- fromRepo $ gitAnnexTmpObjectLocation enck
retriever enck p objloc Nothing $ \content ->
bracket (maybe opennew openresume offset) (liftIO . hClose . fst) $ \(h, iv) -> do bracket (maybe opennew openresume offset) (liftIO . hClose . fst) $ \(h, iv) -> do
retrieved iv (Just h) p content retrieved iv (Just h) p content
let sz = toBytesProcessed $ let sz = toBytesProcessed $
@ -316,7 +318,9 @@ retrieveChunks retriever u vc chunkconfig encryptor basek dest basep enc encc
getrest p h iv sz bytesprocessed (k:ks) = do getrest p h iv sz bytesprocessed (k:ks) = do
let p' = offsetMeterUpdate p bytesprocessed let p' = offsetMeterUpdate p bytesprocessed
liftIO $ p' zeroBytesProcessed liftIO $ p' zeroBytesProcessed
retriever (encryptor k) p' Nothing $ let enck = encryptor k
objloc <- fromRepo $ gitAnnexTmpObjectLocation enck
retriever enck p' objloc Nothing $
retrieved iv (Just h) p' retrieved iv (Just h) p'
getrest p h iv sz (addBytesProcessed bytesprocessed sz) ks getrest p h iv sz (addBytesProcessed bytesprocessed sz) ks
@ -324,7 +328,7 @@ retrieveChunks retriever u vc chunkconfig encryptor basek dest basep enc encc
iv <- startVerifyKeyContentIncrementally vc basek iv <- startVerifyKeyContentIncrementally vc basek
case enc of case enc of
Just _ -> do Just _ -> do
retriever (encryptor basek) basep Nothing $ retriever (encryptor basek) basep (toRawFilePath dest) Nothing $
retrieved iv Nothing basep retrieved iv Nothing basep
return (Right iv) return (Right iv)
-- Not chunked and not encrypted, so ask the -- Not chunked and not encrypted, so ask the
@ -333,7 +337,7 @@ retrieveChunks retriever u vc chunkconfig encryptor basek dest basep enc encc
-- passing the whole file content to the -- passing the whole file content to the
-- incremental verifier though. -- incremental verifier though.
Nothing -> do Nothing -> do
retriever (encryptor basek) basep iv $ retriever (encryptor basek) basep (toRawFilePath dest) iv $
retrieved iv Nothing basep retrieved iv Nothing basep
return $ case iv of return $ case iv of
Nothing -> Right iv Nothing -> Right iv

View file

@ -42,6 +42,7 @@ import Types.StoreRetrieve
import Types.Remote import Types.Remote
import Annex.Verify import Annex.Verify
import Annex.UUID import Annex.UUID
import Annex.Perms
import Config import Config
import Config.Cost import Config.Cost
import Utility.Metered import Utility.Metered
@ -106,10 +107,10 @@ byteStorer a k c m = withBytes c $ \b -> a k b m
-- A Retriever that generates a lazy ByteString containing the Key's -- A Retriever that generates a lazy ByteString containing the Key's
-- content, and passes it to a callback action which will fully consume it -- content, and passes it to a callback action which will fully consume it
-- before returning. -- before returning.
byteRetriever :: (Key -> (L.ByteString -> Annex a) -> Annex a) -> Key -> MeterUpdate -> Maybe IncrementalVerifier -> (ContentSource -> Annex a) -> Annex a byteRetriever :: (Key -> (L.ByteString -> Annex a) -> Annex a) -> Key -> MeterUpdate -> RawFilePath -> Maybe IncrementalVerifier -> (ContentSource -> Annex a) -> Annex a
byteRetriever a k _m _miv callback = a k (callback . ByteContent) byteRetriever a k _m _dest _miv callback = a k (callback . ByteContent)
-- A Retriever that writes the content of a Key to a provided file. -- A Retriever that writes the content of a Key to a file.
-- The action is responsible for updating the progress meter as it -- The action is responsible for updating the progress meter as it
-- retrieves data. The incremental verifier is updated in the background as -- retrieves data. The incremental verifier is updated in the background as
-- the action writes to the file, but may not be updated with the entire -- the action writes to the file, but may not be updated with the entire
@ -119,15 +120,15 @@ fileRetriever a = fileRetriever' $ \f k m miv ->
let retrieve = a f k m let retrieve = a f k m
in tailVerify miv f retrieve in tailVerify miv f retrieve
{- A Retriever that writes the content of a Key to a provided file. {- A Retriever that writes the content of a Key to a file.
- The action is responsible for updating the progress meter and the - The action is responsible for updating the progress meter and the
- incremental verifier as it retrieves data. - incremental verifier as it retrieves data.
-} -}
fileRetriever' :: (RawFilePath -> Key -> MeterUpdate -> Maybe IncrementalVerifier -> Annex ()) -> Retriever fileRetriever' :: (RawFilePath -> Key -> MeterUpdate -> Maybe IncrementalVerifier -> Annex ()) -> Retriever
fileRetriever' a k m miv callback = do fileRetriever' a k m dest miv callback = do
f <- prepTmp k createAnnexDirectory (parentDir dest)
a f k m miv a dest k m miv
pruneTmpWorkDirBefore f (callback . FileContent . fromRawFilePath) pruneTmpWorkDirBefore dest (callback . FileContent . fromRawFilePath)
{- The base Remote that is provided to specialRemote needs to have {- The base Remote that is provided to specialRemote needs to have
- storeKey, retrieveKeyFile, removeKey, and checkPresent methods, - storeKey, retrieveKeyFile, removeKey, and checkPresent methods,

View file

@ -35,12 +35,15 @@ type Storer = Key -> ContentSource -> MeterUpdate -> Annex ()
-- Throws exception if key is not present, or remote is not accessible. -- Throws exception if key is not present, or remote is not accessible.
-- --
-- When it retrieves FileContent, it is responsible for updating the -- When it retrieves FileContent, it is responsible for updating the
-- MeterUpdate. And when the IncrementalVerifier is passed to it, -- MeterUpdate, and the provided FilePath can be used to store the file
-- it retrieves.
--
-- When the IncrementalVerifier is passed to it,
-- and it retrieves FileContent, it can feed some or all of the file's -- and it retrieves FileContent, it can feed some or all of the file's
-- content to the verifier before running the callback. -- content to the verifier before running the callback.
-- This should not be done when it retrieves ByteContent. -- This should not be done when it retrieves ByteContent.
type Retriever = forall a. type Retriever = forall a.
Key -> MeterUpdate -> Maybe IncrementalVerifier Key -> MeterUpdate -> RawFilePath -> Maybe IncrementalVerifier
-> (ContentSource -> Annex a) -> Annex a -> (ContentSource -> Annex a) -> Annex a
-- Action that removes a Key's content from a remote. -- Action that removes a Key's content from a remote.

View file

@ -30,11 +30,6 @@ Planned schedule of work:
* Currently working on streaming download via proxy from special remote. * Currently working on streaming download via proxy from special remote.
* Remotes using fileRetriever retrieve to the temp object file,
before it is renamed to the requested file. In the case of a proxy,
that is a different file, and so it won't see the file until it's all
been transferred and renamed.
## completed items for September's work on proving behavior of preferred content ## completed items for September's work on proving behavior of preferred content
* Static analysis to detect "not present", "not balanced", and similar * Static analysis to detect "not present", "not balanced", and similar