CI to automate build of cargo lockfiles on different Alpine releases for git-annex aports https://gitlab.alpinelinux.org/alpine/aports/-/tree/master/community/git-annex
Find a file
Joey Hess 8baa43ee12
tried a blind alley on streaming special remote download via proxy
This didn't work. In case I want to revisit, here's what I tried.

diff --git a/Annex/Proxy.hs b/Annex/Proxy.hs
index 48222872c1..e4e526d3dd 100644
--- a/Annex/Proxy.hs
+++ b/Annex/Proxy.hs
@@ -26,16 +26,21 @@ import Logs.UUID
 import Logs.Location
 import Utility.Tmp.Dir
 import Utility.Metered
+import Utility.ThreadScheduler
+import Utility.OpenFd
 import Git.Types
 import qualified Database.Export as Export

 import Control.Concurrent.STM
 import Control.Concurrent.Async
+import Control.Concurrent.MVar
 import qualified Data.ByteString as B
+import qualified Data.ByteString as BS
 import qualified Data.ByteString.Lazy as L
 import qualified System.FilePath.ByteString as P
 import qualified Data.Map as M
 import qualified Data.Set as S
+import System.IO.Unsafe

 proxyRemoteSide :: ProtocolVersion -> Bypass -> Remote -> Annex RemoteSide
 proxyRemoteSide clientmaxversion bypass r
@@ -240,21 +245,99 @@ proxySpecialRemote protoversion r ihdl ohdl owaitv oclosedv mexportdb = go
 		writeVerifyChunk iv h b
 		storetofile iv h (n - fromIntegral (B.length b)) bs

-	proxyget offset af k = withproxytmpfile k $ \tmpfile -> do
+	proxyget offset af k = withproxytmpfile k $ \tmpfile ->
+		let retrieve = tryNonAsync $ Remote.retrieveKeyFile r k af
+			(fromRawFilePath tmpfile) nullMeterUpdate vc
+		in case fromKey keySize k of
+			Just size | size > 0 -> do
+				cancelv <- liftIO newEmptyMVar
+				donev <- liftIO newEmptyMVar
+				streamer <- liftIO $ async $
+					streamdata offset tmpfile size cancelv donev
+				retrieve >>= \case
+					Right _ -> liftIO $ do
+						putMVar donev ()
+						wait streamer
+					Left err -> liftIO $ do
+						putMVar cancelv ()
+						wait streamer
+						propagateerror err
+			_ -> retrieve >>= \case
+				Right _ -> liftIO $ senddata offset tmpfile
+				Left err -> liftIO $ propagateerror err
+	  where
 		-- Don't verify the content from the remote,
 		-- because the client will do its own verification.
-		let vc = Remote.NoVerify
-		tryNonAsync (Remote.retrieveKeyFile r k af (fromRawFilePath tmpfile) nullMeterUpdate vc) >>= \case
-			Right _ -> liftIO $ senddata offset tmpfile
-			Left err -> liftIO $ propagateerror err
+		vc = Remote.NoVerify

+	streamdata (Offset offset) f size cancelv donev = do
+		sendlen offset size
+		waitforfile
+		x <- tryNonAsync $ do
+			fd <- openFdWithMode f ReadOnly Nothing defaultFileFlags
+			h <- fdToHandle fd
+			hSeek h AbsoluteSeek offset
+			senddata' h (getcontents size)
+		case x of
+			Left err -> do
+				throwM err
+			Right res -> return res
+	  where
+		-- The file doesn't exist at the start.
+		-- Wait for some data to be written to it as well,
+		-- in case an empty file is first created and then
+		-- overwritten. When there is an offset, wait for
+		-- the file to get that large. Note that this is not used
+		-- when the size is 0.
+		waitforfile = tryNonAsync (fromIntegral <$> getFileSize f) >>= \case
+			Right sz | sz > 0 && sz >= offset -> return ()
+			_ -> ifM (isEmptyMVar cancelv)
+				( do
+					threadDelaySeconds (Seconds 1)
+					waitforfile
+				, do
+					return ()
+				)
+
+		getcontents n h = unsafeInterleaveIO $ do
+			isdone <- isEmptyMVar donev <||> isEmptyMVar cancelv
+			c <- BS.hGet h defaultChunkSize
+			let n' = n - fromIntegral (BS.length c)
+			let c' = L.fromChunks [BS.take (fromIntegral n) c]
+			if BS.null c
+				then if isdone
+					then return mempty
+					else do
+						-- Wait for more data to be
+						-- written to the file.
+						threadDelaySeconds (Seconds 1)
+						getcontents n h
+				else if n' > 0
+					then do
+						-- unsafeInterleaveIO causes
+						-- this to be deferred until
+						-- data is read from the lazy
+						-- ByteString.
+						cs <- getcontents n' h
+						return $ L.append c' cs
+					else return c'
+
 	senddata (Offset offset) f = do
 		size <- fromIntegral <$> getFileSize f
-		let n = max 0 (size - offset)
-		sendmessage $ DATA (Len n)
+		sendlen offset size
 		withBinaryFile (fromRawFilePath f) ReadMode $ \h -> do
 			hSeek h AbsoluteSeek offset
-			sendbs =<< L.hGetContents h
+			senddata' h L.hGetContents
+
+	senddata' h getcontents = do
+			sendbs =<< getcontents h
 			-- Important to keep the handle open until
 			-- the client responds. The bytestring
 			-- could still be lazily streaming out to
@@ -272,6 +355,11 @@ proxySpecialRemote protoversion r ihdl ohdl owaitv oclosedv mexportdb = go
 				Just FAILURE -> return ()
 				Just _ -> giveup "protocol error"
 				Nothing -> return ()
+
+	sendlen offset size = do
+		let n = max 0 (size - offset)
+		sendmessage $ DATA (Len n)
+

 {- Check if this repository can proxy for a specified remote uuid,
  - and if so enable proxying for it. -}
2024-10-07 15:12:09 -04:00
Annex use NonEmpty for dirHashes 2024-09-26 18:15:00 -04:00
Assistant use NonEmpty for dirHashes 2024-09-26 18:15:00 -04:00
Backend remove read of the heads 2024-09-26 18:43:59 -04:00
Build Merge branch 'master' into git-remote-annex 2024-05-10 14:20:36 -04:00
CmdLine started Annex.Sim 2024-09-04 15:15:36 -04:00
Command fix build with old random 2024-09-30 17:36:19 -04:00
Config use status --ignore-submodules in configureSmudgeFilter 2022-12-20 16:02:42 -04:00
Database sim: Fix size tracking for balanced preferred content 2024-09-23 12:42:32 -04:00
debian Refresh standlone patch to avoid fuzz and offsets 2024-08-06 16:39:48 -04:00
doc tried a blind alley on streaming special remote download via proxy 2024-10-07 15:12:09 -04:00
Git fix build with old base 2024-09-30 11:02:08 -04:00
Limit treat "not present" in preferred content as invalid 2024-09-03 13:50:06 -04:00
Logs invalidate caches after log changes 2024-09-20 16:52:17 -04:00
Messages RawFilePath conversion 2024-01-19 14:26:21 -04:00
P2P LiveUpdate for clusters 2024-08-24 10:20:12 -04:00
Remote fix build with old base 2024-09-30 11:02:08 -04:00
RemoteDaemon support a P2PConnection that uses TMVars rather than Handles 2024-06-28 11:22:29 -04:00
standalone disable servant build flag for i386ancient 2024-07-30 10:01:56 -04:00
static Revert "remove newlines from static js and css" 2014-06-13 02:20:39 -04:00
templates assistant: When generating a gpg secret key, avoid hardcoding the key algorithm and size 2024-01-09 15:31:53 -04:00
Test remove support for directory < 1.2.7 2024-02-06 10:53:13 -04:00
Types sim: Add metadata command 2024-09-26 12:20:37 -04:00
Upgrade remove read of the heads 2024-09-26 18:43:59 -04:00
Utility remove read of the heads 2024-09-26 18:43:59 -04:00
.appveyor.yml update stack.yaml to nightly-2024-07-29 and remove stack-lts-18.13.yaml 2024-07-29 20:09:37 -04:00
.codespellrc A few more of typo fixes/skip as detected with bleeding edge codespell 2024-05-01 20:06:08 -04:00
.ghci turn of PackageImports in cabal file 2022-02-25 13:16:36 -04:00
.gitattributes update changelog location 2016-08-22 23:54:11 -04:00
.gitignore ignore git-remote-annex 2024-05-06 13:13:39 -04:00
.mailmap ENH: add one more mailmap for yarikoptic 2024-06-03 13:00:45 -04:00
Annex.hs closing in on finishing live reposizes 2024-08-27 12:54:46 -04:00
Assistant.hs webapp: Added --port option, and annex.port config 2024-01-25 14:08:36 -04:00
Author.hs improve attribution armoring 2023-11-21 11:34:21 -04:00
Backend.hs implement URL to VURL migration 2024-03-01 16:42:02 -04:00
bash-completion.bash bash completion fix 2018-11-12 13:23:05 -04:00
Benchmark.hs --size-limit exit 101 2021-06-04 16:43:47 -04:00
BuildFlags.hs update servant build flag 2024-07-23 08:53:56 -04:00
BuildInfo.hs update licenses from GPL to AGPL 2019-03-13 15:48:14 -04:00
CHANGELOG releasing package git-annex version 10.20240927 2024-09-30 19:15:57 -04:00
CmdLine.hs remove read of the heads 2024-09-26 18:43:59 -04:00
Command.hs plumb in LiveUpdate (WIP) 2024-08-23 16:35:12 -04:00
Common.hs Windows: Support long filenames in more (possibly all) of the code 2023-03-01 15:55:58 -04:00
Config.hs proxying to exporttree=yes annexobjects=yes basically working 2024-08-06 14:21:23 -04:00
COPYRIGHT Added dependency on unbounded-delays 2024-02-27 13:11:59 -04:00
Creds.hs simplify base64 to only use ByteString 2023-10-26 13:10:05 -04:00
Crypto.hs use hmac in balanced preferred content 2024-08-10 16:32:54 -04:00
git-annex.cabal update version for release 2024-09-27 10:01:44 -04:00
git-annex.hs add git-remote-annex stub and build machinery 2024-05-06 13:05:58 -04:00
git-union-merge.hs update licenses from GPL to AGPL 2019-03-13 15:48:14 -04:00
Git.hs run codespell throughout fixing typos automagically 2024-05-01 15:46:21 -04:00
Key.hs convert Key to ShortByteString 2021-10-05 20:20:08 -04:00
Limit.hs fix sizebalanced empty size bug 2024-09-23 14:30:18 -04:00
Logs.hs update RepoSize database from git-annex branch incrementally 2024-08-17 13:35:00 -04:00
Makefile use cabal list-bin 2024-05-31 10:28:56 -04:00
Messages.hs maxsize overview display and --json support 2024-08-18 12:08:13 -04:00
NEWS prep release 2023-06-26 10:41:36 -04:00
README improve description 2022-01-06 12:24:27 -04:00
Remote.hs avoid head 2024-09-26 17:52:19 -04:00
Setup.hs Setup.hs: Stop installing man pages, desktop files, and the git-annex-shell and git-remote-tor-annex symlinks 2023-08-01 15:08:56 -04:00
stack-lts-18.13.yaml Revert "remove stack-lts-18.13.yaml" 2024-09-27 15:30:51 -04:00
stack.yaml deindent 2024-07-30 10:34:18 -04:00
Test.hs use NonEmpty for dirHashes 2024-09-26 18:15:00 -04:00
Types.hs plumb VerifyConfig into retrieveKeyFile 2021-08-17 12:43:13 -04:00
Upgrade.hs don't say a supported version is unsupported 2023-10-09 14:26:24 -04:00

git-annex allows managing large files with git, without storing the file
contents in git. It can sync, backup, and archive your data, offline
and online. Checksums and encryption keep your data safe and secure. Bring
the power and distributed nature of git to bear on your large files with
git-annex.

For documentation, see doc/ or <https://git-annex.branchable.com/>