Commit graph

45663 commits

Author SHA1 Message Date
Joey Hess
8baa43ee12
tried a blind alley on streaming special remote download via proxy
This didn't work. In case I want to revisit, here's what I tried.

diff --git a/Annex/Proxy.hs b/Annex/Proxy.hs
index 48222872c1..e4e526d3dd 100644
--- a/Annex/Proxy.hs
+++ b/Annex/Proxy.hs
@@ -26,16 +26,21 @@ import Logs.UUID
 import Logs.Location
 import Utility.Tmp.Dir
 import Utility.Metered
+import Utility.ThreadScheduler
+import Utility.OpenFd
 import Git.Types
 import qualified Database.Export as Export

 import Control.Concurrent.STM
 import Control.Concurrent.Async
+import Control.Concurrent.MVar
 import qualified Data.ByteString as B
+import qualified Data.ByteString as BS
 import qualified Data.ByteString.Lazy as L
 import qualified System.FilePath.ByteString as P
 import qualified Data.Map as M
 import qualified Data.Set as S
+import System.IO.Unsafe

 proxyRemoteSide :: ProtocolVersion -> Bypass -> Remote -> Annex RemoteSide
 proxyRemoteSide clientmaxversion bypass r
@@ -240,21 +245,99 @@ proxySpecialRemote protoversion r ihdl ohdl owaitv oclosedv mexportdb = go
 		writeVerifyChunk iv h b
 		storetofile iv h (n - fromIntegral (B.length b)) bs

-	proxyget offset af k = withproxytmpfile k $ \tmpfile -> do
+	proxyget offset af k = withproxytmpfile k $ \tmpfile ->
+		let retrieve = tryNonAsync $ Remote.retrieveKeyFile r k af
+			(fromRawFilePath tmpfile) nullMeterUpdate vc
+		in case fromKey keySize k of
+			Just size | size > 0 -> do
+				cancelv <- liftIO newEmptyMVar
+				donev <- liftIO newEmptyMVar
+				streamer <- liftIO $ async $
+					streamdata offset tmpfile size cancelv donev
+				retrieve >>= \case
+					Right _ -> liftIO $ do
+						putMVar donev ()
+						wait streamer
+					Left err -> liftIO $ do
+						putMVar cancelv ()
+						wait streamer
+						propagateerror err
+			_ -> retrieve >>= \case
+				Right _ -> liftIO $ senddata offset tmpfile
+				Left err -> liftIO $ propagateerror err
+	  where
 		-- Don't verify the content from the remote,
 		-- because the client will do its own verification.
-		let vc = Remote.NoVerify
-		tryNonAsync (Remote.retrieveKeyFile r k af (fromRawFilePath tmpfile) nullMeterUpdate vc) >>= \case
-			Right _ -> liftIO $ senddata offset tmpfile
-			Left err -> liftIO $ propagateerror err
+		vc = Remote.NoVerify

+	streamdata (Offset offset) f size cancelv donev = do
+		sendlen offset size
+		waitforfile
+		x <- tryNonAsync $ do
+			fd <- openFdWithMode f ReadOnly Nothing defaultFileFlags
+			h <- fdToHandle fd
+			hSeek h AbsoluteSeek offset
+			senddata' h (getcontents size)
+		case x of
+			Left err -> do
+				throwM err
+			Right res -> return res
+	  where
+		-- The file doesn't exist at the start.
+		-- Wait for some data to be written to it as well,
+		-- in case an empty file is first created and then
+		-- overwritten. When there is an offset, wait for
+		-- the file to get that large. Note that this is not used
+		-- when the size is 0.
+		waitforfile = tryNonAsync (fromIntegral <$> getFileSize f) >>= \case
+			Right sz | sz > 0 && sz >= offset -> return ()
+			_ -> ifM (isEmptyMVar cancelv)
+				( do
+					threadDelaySeconds (Seconds 1)
+					waitforfile
+				, do
+					return ()
+				)
+
+		getcontents n h = unsafeInterleaveIO $ do
+			isdone <- isEmptyMVar donev <||> isEmptyMVar cancelv
+			c <- BS.hGet h defaultChunkSize
+			let n' = n - fromIntegral (BS.length c)
+			let c' = L.fromChunks [BS.take (fromIntegral n) c]
+			if BS.null c
+				then if isdone
+					then return mempty
+					else do
+						-- Wait for more data to be
+						-- written to the file.
+						threadDelaySeconds (Seconds 1)
+						getcontents n h
+				else if n' > 0
+					then do
+						-- unsafeInterleaveIO causes
+						-- this to be deferred until
+						-- data is read from the lazy
+						-- ByteString.
+						cs <- getcontents n' h
+						return $ L.append c' cs
+					else return c'
+
 	senddata (Offset offset) f = do
 		size <- fromIntegral <$> getFileSize f
-		let n = max 0 (size - offset)
-		sendmessage $ DATA (Len n)
+		sendlen offset size
 		withBinaryFile (fromRawFilePath f) ReadMode $ \h -> do
 			hSeek h AbsoluteSeek offset
-			sendbs =<< L.hGetContents h
+			senddata' h L.hGetContents
+
+	senddata' h getcontents = do
+			sendbs =<< getcontents h
 			-- Important to keep the handle open until
 			-- the client responds. The bytestring
 			-- could still be lazily streaming out to
@@ -272,6 +355,11 @@ proxySpecialRemote protoversion r ihdl ohdl owaitv oclosedv mexportdb = go
 				Just FAILURE -> return ()
 				Just _ -> giveup "protocol error"
 				Nothing -> return ()
+
+	sendlen offset size = do
+		let n = max 0 (size - offset)
+		sendmessage $ DATA (Len n)
+

 {- Check if this repository can proxy for a specified remote uuid,
  - and if so enable proxying for it. -}
2024-10-07 15:12:09 -04:00
Joey Hess
b501d23f9b
update 2024-10-07 10:06:12 -04:00
Joey Hess
f3403e9691
add news item for git-annex 10.20240927 2024-09-30 19:16:06 -04:00
Joey Hess
fca26db22b
releasing package git-annex version 10.20240927 2024-09-30 19:15:57 -04:00
Joey Hess
3d7f94ea39
Merge branch 'master' of ssh://git-annex.branchable.com 2024-09-30 17:36:45 -04:00
Joey Hess
743690d022
fix build with old random
getStdGen used to be an IO not a MonadIO action
2024-09-30 17:36:19 -04:00
brendan.ward@a2e11ad27f6b2fa2c556aea6811496e0d95dd0da
191e84d82a 2024-09-30 20:54:14 +00:00
Joey Hess
d2ad07f5a3
fix build with random-1.2
getStdGen worked with that version but initStdGen is newer. For our
purposes, they are equivilant.
2024-09-30 14:56:06 -04:00
Joey Hess
75b3f0eb75
fix build with old base
i386ancient has a base too old for NE.singleton
2024-09-30 11:02:08 -04:00
Joey Hess
1d8bf92724
Merge branch 'master' of ssh://git-annex.branchable.com 2024-09-27 15:31:31 -04:00
Joey Hess
5225812659
Revert "remove stack-lts-18.13.yaml"
This reverts commit b0546e8bde.

https://github.com/datalad/git-annex/issues/204 is still not fixed yet
2024-09-27 15:30:51 -04:00
Joey Hess
e8e4347fcc
update version for release 2024-09-27 10:01:44 -04:00
mike@2d6d71f56ce2a992244350475251df87c26fe351
7b5dda33e0 removed 2024-09-27 12:18:59 +00:00
mike@2d6d71f56ce2a992244350475251df87c26fe351
39e02528f0 Added a comment: corruption using git-annex-remote-rclone 2024-09-27 12:18:41 +00:00
mike@2d6d71f56ce2a992244350475251df87c26fe351
82538a9cd3 Added a comment: corruption using git-annex-remote-rclone 2024-09-27 07:39:06 +00:00
Joey Hess
b0546e8bde
remove stack-lts-18.13.yaml
windows autobuilder should have been fixed by now

(offline so didn't check)
2024-09-26 18:46:12 -04:00
Joey Hess
4ca3d1d584
remove read of the heads
and one tail

Removed head from Utility.PartialPrelude in order to avoid the build
warning with recent ghc versions as well.
2024-09-26 18:43:59 -04:00
Joey Hess
10216b44d2
use NonEmpty for dirHashes
This avoids 4 uses of head.
2024-09-26 18:15:00 -04:00
Joey Hess
43f31121a5
Git: use NonEmpty in fullconfig
This is a nice win. Avoids partial functions, by encoding at the type
level the fact that fullconfig is never an empty list.
2024-09-26 17:54:36 -04:00
Joey Hess
936f22273e
avoid head
While in some sense this is better, the use of NE.fromList is still
partial.
2024-09-26 17:53:00 -04:00
Joey Hess
c8fcd97626
avoid head
Recent ghc has a deprecation warning on it.

This is not an improvement though. I know these cannot fail, but I can't
prove it to ghc.
2024-09-26 17:52:19 -04:00
Joey Hess
30713ab0d3
avoid head
Seems like generate works fine to generate a single arbitrary value, I
dunno why I used sample' originally.
2024-09-26 17:49:41 -04:00
Joey Hess
5a8add5d55
remove slightly unsafe use of head
If git rev-parse somehow didn't output anything, git-annex would crash
here.
2024-09-26 17:21:22 -04:00
Joey Hess
99236376e7
sim: document interruption and concurrency issues
Does not seem worth doing a lot of locking and detection of these
problems.
2024-09-26 12:26:47 -04:00
Joey Hess
783e910d0c
sim: Add metadata command
Only really needed for completeness, preferred content expressions can
match against metadata.
2024-09-26 12:20:37 -04:00
Joey Hess
b492eb051b
heading 2024-09-25 14:54:55 -04:00
Joey Hess
253f2325fb
remove example, which didn't format right in mdwn 2024-09-25 14:54:21 -04:00
Joey Hess
df7045c2e4
formatting 2024-09-25 14:53:46 -04:00
Joey Hess
854fcf9619
formatting 2024-09-25 14:50:17 -04:00
Joey Hess
49c3e1d8f3
formatting 2024-09-25 14:49:48 -04:00
Joey Hess
6a95e4edad
sim: support "--" as comment
Using this in my sim files that are also mdwn files to avoid comments
being displayed as headers.
2024-09-25 14:47:32 -04:00
Joey Hess
dc6c0f0f1f
preparing for release later this week 2024-09-25 14:43:52 -04:00
Joey Hess
6f084524bd
Merge branch 'sim' 2024-09-25 14:42:27 -04:00
Joey Hess
76362278e9
export only the parts of aeson that are used
Rather than hiding things not wanted. This fixes a build warning with
aeson-2.2.3 which no longer has a json function.
2024-09-25 14:41:23 -04:00
Joey Hess
d026e585be
update 2024-09-25 14:29:37 -04:00
Joey Hess
431499e4ff
fix tab damage that broke examples formatting in man page
When did vim default to expandtabs for mdwn? No.
2024-09-25 14:23:04 -04:00
Joey Hess
8e94b75a61
support simulating clusters
Without actually simulating cluster implementation at all. Instead, only
the essential fact that cluster gateways know what changes they have
made to each node of a cluster. That is enough for sims like
sizebalanced_cluster.
2024-09-25 14:06:41 -04:00
Joey Hess
61c95f4d29
design for simulating clusters w/o simulating cluster gateways 2024-09-25 12:58:53 -04:00
Joey Hess
b9214d4162
Revert "sim: add commands for cluster management"
This reverts commit 344141da63.

Rethinking this
2024-09-25 12:11:03 -04:00
Joey Hess
85418d6c72
update 2024-09-25 12:10:55 -04:00
Joey Hess
344141da63
sim: add commands for cluster management
Clusters are not actually simulated yet.
2024-09-25 11:48:22 -04:00
nobodyinperson
e15b8769e0 Added a comment: Re: default preferred content 2024-09-25 09:25:42 +00:00
nadir
e22272129e 2024-09-25 06:41:27 +00:00
Joey Hess
8047128591
sim: quiesce before freezing or ending
Probably a good idea for freezing, but especially I hope this fixes a
problem with git-annex sim run that caused it to sometimes crash in
removeDirectoryRecursive with directory not empty, presumably because a
thread was writing there at the same time.
2024-09-24 16:46:09 -04:00
Joey Hess
540bd5e1ab
sim: added run subcommand
And a nice sim of random preferred content expressions.
2024-09-24 12:06:34 -04:00
Joey Hess
9571162057
sim: add stepstable 2024-09-24 11:50:24 -04:00
Joey Hess
4ed58d7894
sim: random preferred content expression generation 2024-09-24 11:23:23 -04:00
Joey Hess
ee3d6502bb
prevent action or step from simulating running on a special remote
Without any connections, the step command will not try to do any actions
on a special remote.

But even without any connections, it's still possible for a drop action
explicitly run "on" the special remote to do, when numcopies = 0 or
there is a trusted repo. So guard all actions against running on a
special remote too.
2024-09-24 10:15:56 -04:00
Joey Hess
7cc4312695
fix state overwrite bug
I have needed to excercise a lot of care in threading st through, and I
got it wrong here. Probably using a state monad would be a good idea.
2024-09-24 10:00:38 -04:00
adehnert
ec59cb526f Added a comment: Settable default preferred content? 2024-09-24 00:02:21 +00:00