git-annex/doc
Joey Hess 8baa43ee12
tried a blind alley on streaming special remote download via proxy
This didn't work. In case I want to revisit, here's what I tried.

diff --git a/Annex/Proxy.hs b/Annex/Proxy.hs
index 48222872c1..e4e526d3dd 100644
--- a/Annex/Proxy.hs
+++ b/Annex/Proxy.hs
@@ -26,16 +26,21 @@ import Logs.UUID
 import Logs.Location
 import Utility.Tmp.Dir
 import Utility.Metered
+import Utility.ThreadScheduler
+import Utility.OpenFd
 import Git.Types
 import qualified Database.Export as Export

 import Control.Concurrent.STM
 import Control.Concurrent.Async
+import Control.Concurrent.MVar
 import qualified Data.ByteString as B
+import qualified Data.ByteString as BS
 import qualified Data.ByteString.Lazy as L
 import qualified System.FilePath.ByteString as P
 import qualified Data.Map as M
 import qualified Data.Set as S
+import System.IO.Unsafe

 proxyRemoteSide :: ProtocolVersion -> Bypass -> Remote -> Annex RemoteSide
 proxyRemoteSide clientmaxversion bypass r
@@ -240,21 +245,99 @@ proxySpecialRemote protoversion r ihdl ohdl owaitv oclosedv mexportdb = go
 		writeVerifyChunk iv h b
 		storetofile iv h (n - fromIntegral (B.length b)) bs

-	proxyget offset af k = withproxytmpfile k $ \tmpfile -> do
+	proxyget offset af k = withproxytmpfile k $ \tmpfile ->
+		let retrieve = tryNonAsync $ Remote.retrieveKeyFile r k af
+			(fromRawFilePath tmpfile) nullMeterUpdate vc
+		in case fromKey keySize k of
+			Just size | size > 0 -> do
+				cancelv <- liftIO newEmptyMVar
+				donev <- liftIO newEmptyMVar
+				streamer <- liftIO $ async $
+					streamdata offset tmpfile size cancelv donev
+				retrieve >>= \case
+					Right _ -> liftIO $ do
+						putMVar donev ()
+						wait streamer
+					Left err -> liftIO $ do
+						putMVar cancelv ()
+						wait streamer
+						propagateerror err
+			_ -> retrieve >>= \case
+				Right _ -> liftIO $ senddata offset tmpfile
+				Left err -> liftIO $ propagateerror err
+	  where
 		-- Don't verify the content from the remote,
 		-- because the client will do its own verification.
-		let vc = Remote.NoVerify
-		tryNonAsync (Remote.retrieveKeyFile r k af (fromRawFilePath tmpfile) nullMeterUpdate vc) >>= \case
-			Right _ -> liftIO $ senddata offset tmpfile
-			Left err -> liftIO $ propagateerror err
+		vc = Remote.NoVerify

+	streamdata (Offset offset) f size cancelv donev = do
+		sendlen offset size
+		waitforfile
+		x <- tryNonAsync $ do
+			fd <- openFdWithMode f ReadOnly Nothing defaultFileFlags
+			h <- fdToHandle fd
+			hSeek h AbsoluteSeek offset
+			senddata' h (getcontents size)
+		case x of
+			Left err -> do
+				throwM err
+			Right res -> return res
+	  where
+		-- The file doesn't exist at the start.
+		-- Wait for some data to be written to it as well,
+		-- in case an empty file is first created and then
+		-- overwritten. When there is an offset, wait for
+		-- the file to get that large. Note that this is not used
+		-- when the size is 0.
+		waitforfile = tryNonAsync (fromIntegral <$> getFileSize f) >>= \case
+			Right sz | sz > 0 && sz >= offset -> return ()
+			_ -> ifM (isEmptyMVar cancelv)
+				( do
+					threadDelaySeconds (Seconds 1)
+					waitforfile
+				, do
+					return ()
+				)
+
+		getcontents n h = unsafeInterleaveIO $ do
+			isdone <- isEmptyMVar donev <||> isEmptyMVar cancelv
+			c <- BS.hGet h defaultChunkSize
+			let n' = n - fromIntegral (BS.length c)
+			let c' = L.fromChunks [BS.take (fromIntegral n) c]
+			if BS.null c
+				then if isdone
+					then return mempty
+					else do
+						-- Wait for more data to be
+						-- written to the file.
+						threadDelaySeconds (Seconds 1)
+						getcontents n h
+				else if n' > 0
+					then do
+						-- unsafeInterleaveIO causes
+						-- this to be deferred until
+						-- data is read from the lazy
+						-- ByteString.
+						cs <- getcontents n' h
+						return $ L.append c' cs
+					else return c'
+
 	senddata (Offset offset) f = do
 		size <- fromIntegral <$> getFileSize f
-		let n = max 0 (size - offset)
-		sendmessage $ DATA (Len n)
+		sendlen offset size
 		withBinaryFile (fromRawFilePath f) ReadMode $ \h -> do
 			hSeek h AbsoluteSeek offset
-			sendbs =<< L.hGetContents h
+			senddata' h L.hGetContents
+
+	senddata' h getcontents = do
+			sendbs =<< getcontents h
 			-- Important to keep the handle open until
 			-- the client responds. The bytestring
 			-- could still be lazily streaming out to
@@ -272,6 +355,11 @@ proxySpecialRemote protoversion r ihdl ohdl owaitv oclosedv mexportdb = go
 				Just FAILURE -> return ()
 				Just _ -> giveup "protocol error"
 				Nothing -> return ()
+
+	sendlen offset size = do
+		let n = max 0 (size - offset)
+		sendmessage $ DATA (Len n)
+

 {- Check if this repository can proxy for a specified remote uuid,
  - and if so enable proxying for it. -}
2024-10-07 15:12:09 -04:00
..
Android Added a comment: git-annex for managing music 2024-07-21 19:08:45 +00:00
android
assistant
automatic_conflict_resolution
backends
bare_repositories
bugs 2024-09-30 20:54:14 +00:00
chunking
coding_style
contribute
copies
design tried a blind alley on streaming special remote download via proxy 2024-10-07 15:12:09 -04:00
devblog devblog 2024-05-16 13:23:36 -04:00
direct_mode
download
encryption
footer
forum 2024-09-25 06:41:27 +00:00
future_proofing
git-annex-add
git-annex-addurl
git-annex-adjust
git-annex-benchmark
git-annex-checkpresentkey
git-annex-config
git-annex-copy
git-annex-diffdriver
git-annex-direct
git-annex-drop
git-annex-enableremote
git-annex-export Added a comment: parameter --from not accepted 2024-08-14 14:27:54 +00:00
git-annex-find
git-annex-fsck Added a comment: PS 2024-09-04 15:48:01 +00:00
git-annex-get
git-annex-import
git-annex-importfeed
git-annex-lookupkey
git-annex-matching-options
git-annex-metadata
git-annex-move
git-annex-p2p
git-annex-preferred-content
git-annex-reinject
git-annex-repair
git-annex-rmurl
git-annex-setpresentkey
git-annex-sync
git-annex-test
git-annex-unannex
git-annex-undo
git-annex-uninit
git-annex-unlock
git-annex-unused
git-annex-webapp
git-annex-whereis
git-annex-whereused
how_it_works
install Added a comment: Remote Helper? 2024-08-17 05:33:01 +00:00
internals update 2024-05-28 13:36:27 -04:00
license
links move clusters page to tips 2024-06-27 15:41:38 -04:00
metadata
news add news item for git-annex 10.20240927 2024-09-30 19:16:06 -04:00
not
polls
preferred_content Added a comment: Re: default preferred content 2024-09-25 09:25:42 +00:00
profiling
projects
required_content
scalability
security
sims formatting 2024-09-25 14:50:17 -04:00
special_remotes removed 2024-09-27 12:18:59 +00:00
stickers
submodules
sync Added a comment: git annex sync --ff-only 2024-07-21 01:04:44 +00:00
templates
testimonials
thanks update 2024-08-12 18:57:03 -04:00
tips document using balanced preferred content in a cluster 2024-08-30 11:08:32 -04:00
todo tried a blind alley on streaming special remote download via proxy 2024-10-07 15:12:09 -04:00
trust
tuning Added a comment 2024-06-15 07:37:07 +00:00
upgrades
use_case
users Added contributions section to track my bugs and inquiries 2024-08-26 20:02:03 +00:00
videos add my distribits talk 2024-06-20 11:03:19 -04:00
walkthrough
workflow
Android.mdwn
assistant.mdwn
automatic_conflict_resolution.mdwn
backends.mdwn
bare_repositories.mdwn
bugs.mdwn
builds.mdwn update github badges 2024-07-29 13:00:00 -04:00
chunking.mdwn
coding_style.mdwn
comments.mdwn
contact.mdwn
contribute.mdwn
copies.mdwn
design.mdwn
devblog.mdwn
direct_mode.mdwn
distributed_version_control.mdwn
download.mdwn
encryption.mdwn
favicon.ico
forum.mdwn
future_proofing.mdwn
git-annex-add.mdwn
git-annex-addunused.mdwn
git-annex-addurl.mdwn
git-annex-adjust.mdwn
git-annex-assist.mdwn
git-annex-assistant.mdwn
git-annex-backends.mdwn
git-annex-benchmark.mdwn
git-annex-calckey.mdwn
git-annex-checkpresentkey.mdwn
git-annex-common-options.mdwn mention sizebalanced as well as balanced 2024-08-30 12:06:45 -04:00
git-annex-config.mdwn
git-annex-configremote.mdwn
git-annex-contentlocation.mdwn
git-annex-copy.mdwn
git-annex-dead.mdwn
git-annex-describe.mdwn
git-annex-diffdriver.mdwn
git-annex-direct.mdwn
git-annex-drop.mdwn
git-annex-dropkey.mdwn
git-annex-dropunused.mdwn
git-annex-edit.mdwn
git-annex-enable-tor.mdwn
git-annex-enableremote.mdwn initremote, enableremote: Added --with-url to enable using git-remote-annex 2024-05-24 14:29:36 -04:00
git-annex-examinekey.mdwn
git-annex-expire.mdwn
git-annex-export.mdwn export: Added --from option 2024-08-08 12:08:55 -04:00
git-annex-extendcluster.mdwn layout 2024-06-27 15:52:58 -04:00
git-annex-filter-branch.mdwn
git-annex-filter-process.mdwn
git-annex-find.mdwn
git-annex-findkeys.mdwn
git-annex-findref.mdwn
git-annex-fix.mdwn
git-annex-forget.mdwn
git-annex-fromkey.mdwn
git-annex-fsck.mdwn
git-annex-fuzztest.mdwn
git-annex-get.mdwn
git-annex-group.mdwn group: Added --list option 2024-05-29 13:37:35 -04:00
git-annex-groupwanted.mdwn
git-annex-import.mdwn
git-annex-importfeed.mdwn
git-annex-indirect.mdwn
git-annex-info.mdwn
git-annex-init.mdwn
git-annex-initcluster.mdwn layout 2024-06-27 15:52:58 -04:00
git-annex-initremote.mdwn initremote, enableremote: Added --with-url to enable using git-remote-annex 2024-05-24 14:29:36 -04:00
git-annex-inprogress.mdwn
git-annex-list.mdwn
git-annex-lock.mdwn
git-annex-log.mdwn
git-annex-lookupkey.mdwn
git-annex-map.mdwn
git-annex-matchexpression.mdwn git-annex sim command is working 2024-09-12 16:10:52 -04:00
git-annex-matching-expression.mdwn
git-annex-matching-options.mdwn
git-annex-maxsize.mdwn maxsize of 0 to disable 2024-09-09 09:32:43 -04:00
git-annex-merge.mdwn
git-annex-metadata.mdwn
git-annex-migrate.mdwn
git-annex-mincopies.mdwn
git-annex-mirror.mdwn
git-annex-move.mdwn
git-annex-multicast.mdwn
git-annex-numcopies.mdwn
git-annex-oldkeys.mdwn
git-annex-p2p.mdwn
git-annex-p2phttp.mdwn remove uuid from annex+http urls 2024-07-28 20:29:42 -04:00
git-annex-post-receive.mdwn update 2024-08-07 11:49:53 -04:00
git-annex-pre-commit.mdwn
git-annex-preferred-content.mdwn treat "not present" in preferred content as invalid 2024-09-03 13:50:06 -04:00
git-annex-proxy.mdwn
git-annex-pull.mdwn
git-annex-push.mdwn initial support for annexobjects=yes 2024-08-02 14:07:45 -04:00
git-annex-readpresentkey.mdwn
git-annex-registerurl.mdwn
git-annex-reinit.mdwn
git-annex-reinject.mdwn
git-annex-rekey.mdwn
git-annex-remotedaemon.mdwn
git-annex-renameremote.mdwn
git-annex-repair.mdwn
git-annex-required.mdwn wording 2024-06-20 10:15:24 -04:00
git-annex-reregisterurl.mdwn
git-annex-resolvemerge.mdwn
git-annex-restage.mdwn
git-annex-rmurl.mdwn
git-annex-satisfy.mdwn initial support for annexobjects=yes 2024-08-02 14:07:45 -04:00
git-annex-schedule.mdwn
git-annex-semitrust.mdwn
git-annex-setkey.mdwn
git-annex-setpresentkey.mdwn
git-annex-shell.mdwn git-annex-shell: accept uuid of remote that proxying is enabled for 2024-06-10 12:44:35 -04:00
git-annex-sim.mdwn sim: document interruption and concurrency issues 2024-09-26 12:26:47 -04:00
git-annex-smudge.mdwn
git-annex-status.mdwn
git-annex-sync.mdwn
git-annex-test.mdwn
git-annex-testremote.mdwn
git-annex-transferkey.mdwn
git-annex-transferkeys.mdwn
git-annex-transferrer.mdwn
git-annex-trust.mdwn
git-annex-unannex.mdwn
git-annex-undo.mdwn
git-annex-ungroup.mdwn
git-annex-uninit.mdwn
git-annex-unlock.mdwn
git-annex-unregisterurl.mdwn
git-annex-untrust.mdwn
git-annex-unused.mdwn
git-annex-updatecluster.mdwn layout 2024-06-27 15:52:58 -04:00
git-annex-updateproxy.mdwn updateproxy, updatecluster check annexobjects=yes 2024-08-07 12:27:24 -04:00
git-annex-upgrade.mdwn
git-annex-vadd.mdwn
git-annex-vcycle.mdwn
git-annex-version.mdwn
git-annex-vfilter.mdwn
git-annex-vicfg.mdwn
git-annex-view.mdwn
git-annex-vpop.mdwn
git-annex-wanted.mdwn wording 2024-06-20 10:15:24 -04:00
git-annex-watch.mdwn
git-annex-webapp.mdwn
git-annex-whereis.mdwn
git-annex-whereused.mdwn
git-annex.mdwn documentation for git-annex sim 2024-09-04 15:03:17 -04:00
git-remote-annex.mdwn tip 2024-05-30 13:26:34 -04:00
git-remote-tor-annex.mdwn
git-union-merge.mdwn
how_it_works.mdwn
index.mdwn
install.mdwn
internals.mdwn implement maxsize log and command 2024-08-11 15:41:26 -04:00
license.mdwn
location_tracking.mdwn
logo-old-bw.svg
logo-old.png
logo-old.svg
logo-old_small.png
logo-with-cli.png
logo-with-cli.svg
logo.mdwn
logo.svg
logo_16x16.png
logo_32x32.png
logo_small.png
meta.mdwn
metadata.mdwn
news.mdwn
not.mdwn
polls.mdwn
preferred_content.mdwn balanced preferred content and --rebalance 2024-08-09 14:16:09 -04:00
privacy.mdwn remove mention of XMPP which is no longer used 2024-06-27 15:56:30 -04:00
profiling.mdwn
projects.mdwn
publicrepos.mdwn
related_software.mdwn 2024-07-16 15:42:54 +00:00
repomap.png
required_content.mdwn
scalability.mdwn
security.mdwn
shortcuts.mdwn
sidebar.mdwn
sims.mdwn adds sims collection 2024-09-23 13:43:55 -04:00
sitemap.mdwn
special_remotes.mdwn update docs for git-remote-annex 2024-05-14 15:31:16 -04:00
stickers.mdwn
submodules.mdwn
summary.mdwn
sync.mdwn
testimonials.mdwn
thanks.mdwn
tips.mdwn
todo.mdwn
transferring_data.mdwn
trust.mdwn
tuning.mdwn fix typo 2024-09-03 14:23:14 -04:00
upgrades.mdwn
users.mdwn
videos.mdwn
walkthrough.mdwn
workflow.mdwn