git-annex

Author	SHA1	Message	Date
Joey Hess	8baa43ee12	tried a blind alley on streaming special remote download via proxy This didn't work. In case I want to revisit, here's what I tried. diff --git a/Annex/Proxy.hs b/Annex/Proxy.hs index 48222872c1..e4e526d3dd 100644 --- a/Annex/Proxy.hs +++ b/Annex/Proxy.hs @@ -26,16 +26,21 @@ import Logs.UUID import Logs.Location import Utility.Tmp.Dir import Utility.Metered +import Utility.ThreadScheduler +import Utility.OpenFd import Git.Types import qualified Database.Export as Export import Control.Concurrent.STM import Control.Concurrent.Async +import Control.Concurrent.MVar import qualified Data.ByteString as B +import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as L import qualified System.FilePath.ByteString as P import qualified Data.Map as M import qualified Data.Set as S +import System.IO.Unsafe proxyRemoteSide :: ProtocolVersion -> Bypass -> Remote -> Annex RemoteSide proxyRemoteSide clientmaxversion bypass r @@ -240,21 +245,99 @@ proxySpecialRemote protoversion r ihdl ohdl owaitv oclosedv mexportdb = go writeVerifyChunk iv h b storetofile iv h (n - fromIntegral (B.length b)) bs - proxyget offset af k = withproxytmpfile k $ \tmpfile -> do + proxyget offset af k = withproxytmpfile k $ \tmpfile -> + let retrieve = tryNonAsync $ Remote.retrieveKeyFile r k af + (fromRawFilePath tmpfile) nullMeterUpdate vc + in case fromKey keySize k of + Just size \| size > 0 -> do + cancelv <- liftIO newEmptyMVar + donev <- liftIO newEmptyMVar + streamer <- liftIO $ async $ + streamdata offset tmpfile size cancelv donev + retrieve >>= \case + Right _ -> liftIO $ do + putMVar donev () + wait streamer + Left err -> liftIO $ do + putMVar cancelv () + wait streamer + propagateerror err + _ -> retrieve >>= \case + Right _ -> liftIO $ senddata offset tmpfile + Left err -> liftIO $ propagateerror err + where -- Don't verify the content from the remote, -- because the client will do its own verification. - let vc = Remote.NoVerify - tryNonAsync (Remote.retrieveKeyFile r k af (fromRawFilePath tmpfile) nullMeterUpdate vc) >>= \case - Right _ -> liftIO $ senddata offset tmpfile - Left err -> liftIO $ propagateerror err + vc = Remote.NoVerify + streamdata (Offset offset) f size cancelv donev = do + sendlen offset size + waitforfile + x <- tryNonAsync $ do + fd <- openFdWithMode f ReadOnly Nothing defaultFileFlags + h <- fdToHandle fd + hSeek h AbsoluteSeek offset + senddata' h (getcontents size) + case x of + Left err -> do + throwM err + Right res -> return res + where + -- The file doesn't exist at the start. + -- Wait for some data to be written to it as well, + -- in case an empty file is first created and then + -- overwritten. When there is an offset, wait for + -- the file to get that large. Note that this is not used + -- when the size is 0. + waitforfile = tryNonAsync (fromIntegral <$> getFileSize f) >>= \case + Right sz \| sz > 0 && sz >= offset -> return () + _ -> ifM (isEmptyMVar cancelv) + ( do + threadDelaySeconds (Seconds 1) + waitforfile + , do + return () + ) + + getcontents n h = unsafeInterleaveIO $ do + isdone <- isEmptyMVar donev <\|\|> isEmptyMVar cancelv + c <- BS.hGet h defaultChunkSize + let n' = n - fromIntegral (BS.length c) + let c' = L.fromChunks [BS.take (fromIntegral n) c] + if BS.null c + then if isdone + then return mempty + else do + -- Wait for more data to be + -- written to the file. + threadDelaySeconds (Seconds 1) + getcontents n h + else if n' > 0 + then do + -- unsafeInterleaveIO causes + -- this to be deferred until + -- data is read from the lazy + -- ByteString. + cs <- getcontents n' h + return $ L.append c' cs + else return c' + senddata (Offset offset) f = do size <- fromIntegral <$> getFileSize f - let n = max 0 (size - offset) - sendmessage $ DATA (Len n) + sendlen offset size withBinaryFile (fromRawFilePath f) ReadMode $ \h -> do hSeek h AbsoluteSeek offset - sendbs =<< L.hGetContents h + senddata' h L.hGetContents + + senddata' h getcontents = do + sendbs =<< getcontents h -- Important to keep the handle open until -- the client responds. The bytestring -- could still be lazily streaming out to @@ -272,6 +355,11 @@ proxySpecialRemote protoversion r ihdl ohdl owaitv oclosedv mexportdb = go Just FAILURE -> return () Just _ -> giveup "protocol error" Nothing -> return () + + sendlen offset size = do + let n = max 0 (size - offset) + sendmessage $ DATA (Len n) + {- Check if this repository can proxy for a specified remote uuid, - and if so enable proxying for it. -}	2024-10-07 15:12:09 -04:00
Spencer	cb196337f4	additional question of spaces in URL	2024-10-07 19:10:19 +00:00
Spencer	abd56608cf		2024-10-07 19:02:17 +00:00
matrss	f650627b23		2024-10-07 14:40:19 +00:00
matrss	b0a6301cde	Added a comment	2024-10-07 14:12:23 +00:00
Joey Hess	b501d23f9b	update	2024-10-07 10:06:12 -04:00
matrss	6b6ec39997		2024-10-07 13:59:56 +00:00
sng@353ca358075d9aa328f60a5439a3cee10f8301fe	b57677251b	Added a comment	2024-10-06 21:42:13 +00:00
matrss	19f7b0e7d4		2024-10-02 15:07:54 +00:00
matrss	470bd1f441		2024-10-02 14:51:58 +00:00
matrss	4a794ce0ba		2024-10-02 14:42:37 +00:00
yarikoptic	13580427c8	filing an issue on yt-dlp not used for some reason	2024-10-01 21:01:40 +00:00
Joey Hess	f3403e9691	add news item for git-annex 10.20240927	2024-09-30 19:16:06 -04:00
brendan.ward@a2e11ad27f6b2fa2c556aea6811496e0d95dd0da	191e84d82a		2024-09-30 20:54:14 +00:00
mike@2d6d71f56ce2a992244350475251df87c26fe351	7b5dda33e0	removed	2024-09-27 12:18:59 +00:00
mike@2d6d71f56ce2a992244350475251df87c26fe351	39e02528f0	Added a comment: corruption using git-annex-remote-rclone	2024-09-27 12:18:41 +00:00
mike@2d6d71f56ce2a992244350475251df87c26fe351	82538a9cd3	Added a comment: corruption using git-annex-remote-rclone	2024-09-27 07:39:06 +00:00
Joey Hess	99236376e7	sim: document interruption and concurrency issues Does not seem worth doing a lot of locking and detection of these problems.	2024-09-26 12:26:47 -04:00
Joey Hess	783e910d0c	sim: Add metadata command Only really needed for completeness, preferred content expressions can match against metadata.	2024-09-26 12:20:37 -04:00
Joey Hess	b492eb051b	heading	2024-09-25 14:54:55 -04:00
Joey Hess	253f2325fb	remove example, which didn't format right in mdwn	2024-09-25 14:54:21 -04:00
Joey Hess	df7045c2e4	formatting	2024-09-25 14:53:46 -04:00
Joey Hess	854fcf9619	formatting	2024-09-25 14:50:17 -04:00
Joey Hess	49c3e1d8f3	formatting	2024-09-25 14:49:48 -04:00
Joey Hess	6a95e4edad	sim: support "--" as comment Using this in my sim files that are also mdwn files to avoid comments being displayed as headers.	2024-09-25 14:47:32 -04:00
Joey Hess	6f084524bd	Merge branch 'sim'	2024-09-25 14:42:27 -04:00
Joey Hess	d026e585be	update	2024-09-25 14:29:37 -04:00
Joey Hess	431499e4ff	fix tab damage that broke examples formatting in man page When did vim default to expandtabs for mdwn? No.	2024-09-25 14:23:04 -04:00
Joey Hess	8e94b75a61	support simulating clusters Without actually simulating cluster implementation at all. Instead, only the essential fact that cluster gateways know what changes they have made to each node of a cluster. That is enough for sims like sizebalanced_cluster.	2024-09-25 14:06:41 -04:00
Joey Hess	61c95f4d29	design for simulating clusters w/o simulating cluster gateways	2024-09-25 12:58:53 -04:00
Joey Hess	b9214d4162	Revert "sim: add commands for cluster management" This reverts commit `344141da63`. Rethinking this	2024-09-25 12:11:03 -04:00
Joey Hess	85418d6c72	update	2024-09-25 12:10:55 -04:00
Joey Hess	344141da63	sim: add commands for cluster management Clusters are not actually simulated yet.	2024-09-25 11:48:22 -04:00
nobodyinperson	e15b8769e0	Added a comment: Re: default preferred content	2024-09-25 09:25:42 +00:00
nadir	e22272129e		2024-09-25 06:41:27 +00:00
Joey Hess	540bd5e1ab	sim: added run subcommand And a nice sim of random preferred content expressions.	2024-09-24 12:06:34 -04:00
Joey Hess	9571162057	sim: add stepstable	2024-09-24 11:50:24 -04:00
Joey Hess	4ed58d7894	sim: random preferred content expression generation	2024-09-24 11:23:23 -04:00
Joey Hess	7cc4312695	fix state overwrite bug I have needed to excercise a lot of care in threading st through, and I got it wrong here. Probably using a state monad would be a good idea.	2024-09-24 10:00:38 -04:00
adehnert	ec59cb526f	Added a comment: Settable default preferred content?	2024-09-24 00:02:21 +00:00
Joey Hess	76fa43e882	update test case for bug after recent changes broke the test case the other bug I cannot reproduce though	2024-09-23 16:05:11 -04:00
Joey Hess	969e6c2747	sped up sim step by about 200% Noticed that it was quite slow compared with things like action sendwanted. Guessed that the slowdown is largely due to every step doing a simulated git pull/push. So, rather than always doing a pull/push, only do those when no actions are found without doing a pull/push. This does mean that step will sometimes experience a split brain situation, but that seems like a good thing? Because step ought to explore as many possible scenarios as it reasonably can.	2024-09-23 15:45:47 -04:00
Joey Hess	6df101f8b4	added sim of sizebalanced in a splitbrain situation	2024-09-23 15:04:52 -04:00
Joey Hess	5a4bee24b8	fix sizebalanced empty size bug Fix bug that prevented anything being stored in an empty repository whose preferred content expression uses sizebalanced.	2024-09-23 14:30:18 -04:00
Joey Hess	1aacf7ece4	adds sims collection	2024-09-23 13:43:55 -04:00
Joey Hess	7bc8c2bfeb	sim visit as first-class command Allows using it in a sim file.	2024-09-23 13:09:35 -04:00
Joey Hess	6cf9a101b8	sim: Fix size tracking for balanced preferred content	2024-09-23 12:42:32 -04:00
Joey Hess	a6b8082119	update	2024-09-23 09:38:56 -04:00
AaronBrooks	edc02432ef	removed	2024-09-22 22:21:32 +00:00
AaronBrooks	8857265224	Added a comment: reinject files -- more efficiently	2024-09-22 22:21:05 +00:00
AaronBrooks	6ee1a98071	Added a comment: reinject files -- more efficiently	2024-09-22 22:19:13 +00:00
Joey Hess	2daa8a8f21	puzzling bug	2024-09-20 16:53:40 -04:00
Joey Hess	19b966f0fd	sim: better step On each step, find all the actions that could be done, and pick one of them to do. Should detect stability, but that is broken.	2024-09-20 15:23:34 -04:00
Joey Hess	24b3aed84a	update	2024-09-20 11:59:35 -04:00
Joey Hess	fd24d0d66f	update	2024-09-20 11:26:40 -04:00
Joey Hess	7c10d6846c	update	2024-09-20 11:05:57 -04:00
Joey Hess	f061ae92fb	sim: implement addtree	2024-09-20 10:34:52 -04:00
Joey Hess	5e51e7c339	comment	2024-09-18 09:08:42 -04:00
Joey Hess	29d8429779	sim: tested concurrency over actions This demonstrates concurrent behavior that looks right. And with a random seed, the results are deterministic. init foo init bar init backup connect foo <-> bar connect foo <-> backup addmulti 10 testfiles 1mb 1gb foo backup action foo gitpull backup wanted foo nothing wanted bar anything wanted backup anything action bar gitpull foo action foo dropunwanted while action bar getwanted foo	2024-09-17 14:39:53 -04:00
Joey Hess	6751f23978	sim: fix get bug When getting from a remote, have to check that the repo doing the getting thinks the remote contains the key, but also that the remote actually does. Before this bug fix, it would get from a repo that used to have the key, but that had dropped it since the last git pull.	2024-09-17 14:29:49 -04:00
Joey Hess	02f0996e25	git-annex sim log	2024-09-17 13:43:11 -04:00
Joey Hess	b85965cb3c	sim: implement dropunwantedfrom	2024-09-17 13:35:35 -04:00
Joey Hess	eb5fad4e79	fix ActionDropUnwanted Now tested working	2024-09-17 11:55:57 -04:00
Joey Hess	4c7db31c20	addmulti	2024-09-17 11:22:14 -04:00
Joey Hess	2a16796a1c	move pull/push/sync into getSimActionComponents As well as being a more pleasing implementation than I managed yesterday, this allows for those actions to be run concurrently in the sim.	2024-09-17 10:54:44 -04:00
Joey Hess	7d27a8ea1a	sim concurrency	2024-09-17 10:37:22 -04:00
Joey Hess	3b7e3cb2f4	add	2024-09-17 08:31:55 -04:00
Joey Hess	c420ec9364	sim: add action repo sync command	2024-09-16 16:48:21 -04:00
Joey Hess	52891711d2	git-annex sim command is working Had to add Read instances to Key and NumCopies and some other similar types. I only expect to use those in serializing a sim. Of course, this risks that implementation changes break reading old data. For a sim, that would not be a big problem.	2024-09-12 16:10:52 -04:00
mike@2d6d71f56ce2a992244350475251df87c26fe351	a2895c2dac	Added a comment	2024-09-12 15:40:24 +00:00
nobodyinperson	f8d1022db0	Added a comment: 👍 +1 for encrypting the annex on regular git remotes	2024-09-12 14:51:20 +00:00
Joey Hess	7e8274c6b7	implemented ActionDropUnwanted Not tested yet. This emulates the same checking that is done when dropping. Note that when dropping from a special remote it is not able to make a locked copy.	2024-09-12 10:44:31 -04:00
m.szczepanik@8dd0314f20fa09be99ee3903d1c04a80eafbd849	3a03ed42e6		2024-09-12 12:13:06 +00:00
mike@2d6d71f56ce2a992244350475251df87c26fe351	0f2754ec3c	Added a comment	2024-09-12 05:22:18 +00:00
yarikoptic	28d207bc57	initial report on that addunlocked is not respected during import	2024-09-11 20:47:45 +00:00
Joey Hess	f381b457f2	sim file parser and generator The generator doesn't emit the best possible connect commands, but it does output something valid. Eg, an input like: connect A <-> B <-> C <-> D becomes: connect A <-> B <-> C connect C <-> D Also: connect A -> B <- C becomes: connect A -> B connect C -> B Which could be improved. Also disconnect commands are not prettified at all, but probably there's no reason to.	2024-09-11 15:59:13 -04:00
Joey Hess	84bbbeae9d	started on sim file parser	2024-09-11 11:53:25 -04:00
Joey Hess	64466d8687	add action command to git-annex sim step just picks a random action, and this allows finer control over what happens in the sim	2024-09-09 16:06:45 -04:00
Joey Hess	a2c0d5e4a9	finish updateSimRepoState Converted maps to use UUID as key. Also added mincopies to the sim.	2024-09-09 09:37:59 -04:00
Joey Hess	811dd95453	maxsize of 0 to disable	2024-09-09 09:32:43 -04:00
Joey Hess	def8095e5f	rethought sim a bit	2024-09-06 12:53:20 -04:00
yarikoptic	578abf7b89	initial report on incorrect handling of empty files in adjusted branches mode	2024-09-06 14:01:34 +00:00
Joey Hess	d717e9aca0	Merge branch 'master' of ssh://git-annex.branchable.com	2024-09-05 15:25:34 -04:00
yarikoptic	f0aa5ddf3e	Added a comment	2024-09-05 14:52:51 +00:00
yarikoptic	3d0dc4a91d	Added a comment: ping on this issue : how to recover?	2024-09-05 14:49:07 +00:00
Joey Hess	ed740bc31e	comment	2024-09-05 09:20:38 -04:00
Joey Hess	84c781d924	documentation for git-annex sim command not implemented yet	2024-09-04 15:03:17 -04:00
tapesafer	6412c19127	Added a comment: PS	2024-09-04 15:48:01 +00:00
Joey Hess	00e3531169	update	2024-09-04 11:36:46 -04:00
tapesafer	2c458d7116	Added a comment: numcopies & force-trusting is ignored by fsck on readonly directory remotes?	2024-09-04 14:50:16 +00:00
Rick	3f2957d0e4	Added a comment: Similar Borg sync issue	2024-09-03 19:40:57 +00:00
Joey Hess	1b6c33a38e	update	2024-09-03 14:24:32 -04:00
Joey Hess	3398514c38	sim design	2024-09-03 14:23:48 -04:00
Joey Hess	fe71400e37	fix typo	2024-09-03 14:23:14 -04:00
Joey Hess	340bdd0dac	treat "not present" in preferred content as invalid Detect when a preferred content expression contains "not present", which would lead to repeatedly getting and then dropping files, and make it never match. This also applies to "not balanced" and "not sizebalanced". --explain will tell the user when this happens Note that getMatcher calls matchMrun' and does not check for unstable negated limits. While there is no --present anyway, if there was, it would not make sense for --not --present to complain about instability and fail to match.	2024-09-03 13:50:06 -04:00
Joey Hess	03864a2c3b	update	2024-09-03 11:52:54 -04:00
Joey Hess	b800ea6826	2 level toc	2024-09-02 16:32:28 -04:00
Joey Hess	ab0c82114b	Merge branch 'master' of ssh://git-annex.branchable.com	2024-09-02 16:31:31 -04:00
Joey Hess	1e1c13dd38	fix number of headers	2024-09-02 16:31:03 -04:00
lucas.gautheron@f2b5c93a64b028c1ec8698b9c2412ed51ff22040	850ea3a9b8		2024-09-02 15:12:02 +00:00
lucas.gautheron@f2b5c93a64b028c1ec8698b9c2412ed51ff22040	925c203c09		2024-09-02 15:08:25 +00:00
Joey Hess	9d29b99ac4	add news item for git-annex 10.20240831	2024-08-31 19:50:36 -04:00
Joey Hess	698d9252a5	mention sizebalanced as well as balanced	2024-08-30 12:06:45 -04:00
Joey Hess	53b7375cc6	update	2024-08-30 11:14:45 -04:00
Joey Hess	54b6151412	document using balanced preferred content in a cluster	2024-08-30 11:08:32 -04:00
Joey Hess	d0938d730b	Merge branch 'master' into balanced	2024-08-30 11:01:39 -04:00
Joey Hess	242c525659	lookupkey: Allow using --ref in a bare repository.	2024-08-30 10:55:48 -04:00
yarikoptic	e2b7895cbc	Added a comment	2024-08-29 18:35:47 +00:00
Joey Hess	f89a1b8216	remove stale live changes from reposize database Reorganized the reposize database directory, and split up a column. checkStaleSizeChanges needs to run before needLiveUpdate, otherwise the process won't be holding a lock on its pid file, and another process could go in and expire the live update it records. It just so happens that they do get called in the correct order, since checking balanced preferred content calls getLiveRepoSizes before needLiveUpdate. The 1 minute delay between checks is arbitrary, but will avoid excess work. The downside of it is that, if a process is dropping a file and gets interrupted, for 1 minute another process can expect a repository will soon be smaller than it is. And so a process might send data to a repository when a file is not really going to be dropped from it. But note that can already happen if a drop takes some time in eg locking and then fails. So it seems possible that live updates should only be allowed to increase, rather than decrease the size of a repository.	2024-08-28 13:57:25 -04:00
Joey Hess	278adbb726	combine 2 queries	2024-08-28 11:00:59 -04:00
Joey Hess	e006acef22	avoid reposize database locking overhead when not needed Only when the preferred content expression being matched uses balanced preferred content is this overhead needed. It might be possible to eliminate the locking entirely. Eg, check the live changes before and after the action and re-run if they are not stable. For now, this is good enough, it avoids existing preferred content getting slow. If balanced preferred content turns out to be too slow to check, that could be tried later.	2024-08-28 10:52:34 -04:00
matrss	833150fd25	Added a comment	2024-08-28 14:11:36 +00:00
mih	16f9042046	Added a comment: Needed to retrieve single file metadata from bare repo	2024-08-28 13:58:30 +00:00
matrss	3f62116d64	Added a comment	2024-08-28 08:47:33 +00:00
Joey Hess	0a119184e6	thoughts	2024-08-27 14:59:13 -04:00
Joey Hess	8555fb88ef	locking in checkLiveUpdate This makes sure that two threads don't check balanced preferred content at the same time, so each thread always sees a consistent picture of what is happening. This does add a fairly expensive file level lock to every check of preferred content, in commands that use prepareLiveUpdate. It would be good to only do that when live updates are actually needed, eg when the preferred content expression uses balanced preferred content.	2024-08-27 13:12:43 -04:00
Joey Hess	4d2f95853d	closing in on finishing live reposizes Fixed successfullyFinishedLiveSizeChange to not update the rolling total when a redundant change is in RecentChanges. Made setRepoSizes clear RecentChanges that are no longer needed. It might be possible to clear those earlier, this is only a convenient point to do it. The reason it's safe to clear RecentChanges here is that, in order for a live update to call successfullyFinishedLiveSizeChange, a change must be made to a location log. If a RecentChange gets cleared, and just after that a new live update is started, making the same change, the location log has already been changed (since the RecentChange exists), and so when the live update succeeds, it won't call successfullyFinishedLiveSizeChange. The reason it doesn't clear RecentChanges when there is a reduntant live update is because I didn't want to think through whether or not all races are avoided in that case. The rolling total in SizeChanges is never cleared. Instead, calcJournalledRepoSizes gets the initial value of it, and then getLiveRepoSizes subtracts that initial value from the current value. Since the rolling total can only be updated by updateRepoSize, which is called with the journal locked, locking the journal in calcJournalledRepoSizes ensures that the database does not change while reading the journal.	2024-08-27 12:54:46 -04:00
Spencer	949be665c0	Added contributions section to track my bugs and inquiries	2024-08-26 20:02:03 +00:00
Joey Hess	21608716bd	started work on getLiveRepoSizes Doesn't quite compile	2024-08-26 14:50:09 -04:00
Joey Hess	db89e39df6	partially fix concurrency issue in updating the rollingtotal It's possible for two processes or threads to both be doing the same operation at the same time. Eg, both dropping the same key. If one finishes and updates the rollingtotal, then the other one needs to be prevented from later updating the rollingtotal as well. And they could finish at the same time, or with some time in between. Addressed this by making updateRepoSize be called with the journal locked, and only once it's been determined that there is an actual location change to record in the log. updateRepoSize waits for the database to be updated. When there is a redundant operation, updateRepoSize won't be called, and the redundant LiveUpdate will be removed from the database on garbage collection. But: There will be a window where the redundant LiveUpdate is still visible in the db, and processes can see it, combine it with the rollingtotal, and arrive at the wrong size. This is a small window, but it still ought to be addressed. Unsure if it would always be safe to remove the redundant LiveUpdate? Consider the case where two drops and a get are all running concurrently somehow, and the order they finish is [drop, get, drop]. The second drop seems redundant to the first, but it would not be safe to remove it. While this seems unlikely, it's hard to rule out that a get and drop at different stages can both be running at the same time.	2024-08-26 09:43:32 -04:00
Joey Hess	03c7f99957	todo	2024-08-25 10:48:42 -04:00
Joey Hess	2b037d36a1	update	2024-08-24 15:06:00 -04:00
Joey Hess	6660984442	update	2024-08-24 13:15:39 -04:00
Joey Hess	d60a33fd13	improve live update starting In an expression like "balanced=foo and exclude=bar", avoid it starting a live update when the overall expression doesn't match.	2024-08-24 13:07:05 -04:00
Joey Hess	16f945459c	todo	2024-08-24 11:58:17 -04:00
Joey Hess	2f20b939b7	LiveUpdate db updates working I've tested the behavior of the thread that waits for the LiveUpdate to be finished, and it does get signaled and exit cleanly when the LiveUpdate is GCed instead. Made finishedLiveUpdate wait for the thread to finish updating the database. There is a case where GC doesn't happen in time and the database is left with a live update recorded in it. This should not be a problem as such stale data can also happen when interrupted and will need to be detected when loading the database. Balanced preferred content expressions now call startLiveUpdate.	2024-08-24 11:49:58 -04:00
Joey Hess	84d1bb746b	LiveUpdate for clusters	2024-08-24 10:20:12 -04:00
Joey Hess	18cd8bf43a	punt on LiveUpdate plumbing through assistant for now	2024-08-24 09:37:24 -04:00
yarikoptic	efdee386c0	initial report on desire to do handle pathspecs	2024-08-24 01:35:31 +00:00
yarikoptic	c3877f648c	initial idea on another ability for get	2024-08-24 01:23:04 +00:00
Joey Hess	c3d40b9ec3	plumb in LiveUpdate (WIP) Each command that first checks preferred content (and/or required content) and then does something that can change the sizes of repositories needs to call prepareLiveUpdate, and plumb it through the preferred content check and the location log update. So far, only Command.Drop is done. Many other commands that don't need to do this have been updated to keep working. There may be some calls to NoLiveUpdate in places where that should be done. All will need to be double checked. Not currently in a compilable state.	2024-08-23 16:35:12 -04:00
Joey Hess	4885073377	add live size changes to RepoSize database Not yet used.	2024-08-23 12:51:00 -04:00
Joey Hess	dad1fb150f	update	2024-08-23 11:45:36 -04:00
Joey Hess	d0ab1550ec	possible design to address reposizes concurrency issues	2024-08-23 11:19:38 -04:00
gauss@055c9051f507c97fa5612f46c74ce636f5ecde10	d71ca87bc9	Added a comment: No root privileges server - annex-shell replaced by git-annex-shell	2024-08-23 01:51:49 +00:00
Joey Hess	8ade3fc5d6	improve docs	2024-08-22 08:09:10 -04:00
Joey Hess	abdd49d8c1	update	2024-08-22 07:53:56 -04:00
Joey Hess	173500872f	update	2024-08-22 07:17:04 -04:00
Joey Hess	70e2fca257	Added the annex.fullybalancedthreshhold git config.	2024-08-22 07:15:55 -04:00
Joey Hess	3fe67744b1	display new empty repos in maxsize table A new repo that has no location log info yet, but has an entry in uuid.log has 0 size, so make RepoSize aware of that. Note that a new repo that does not yet appear in uuid.log will still not be displayed. When a remote is added but not synced with yet, it has no uuid.log entry. If git-annex maxsize is used to configure that remote, it needs to appear in the maxsize table, and the change to Command.MaxSize takes care of that.	2024-08-22 07:03:22 -04:00
Spencer	acaa8e9cd5	Added a comment: Precise Workflow	2024-08-22 00:18:28 +00:00
Joey Hess	76ece2a699	make --rebalance of balanced use fullysizebalanced when useful When the specified number of copies is > 1, and some repositories are too full, it can be better to move content from them to other less full repositories, in order to make space for new content. annex.fullybalancedthreshhold is documented, but not implemented yet This is not tested very well yet, and is known to sometimes take several runs to stabalize.	2024-08-21 17:59:08 -04:00
Joey Hess	9e87061de2	Support "sizebalanced=" and "fullysizebalanced=" too Might want to make --rebalance turn balanced=group:N where N > 1 to fullysizebalanced=group:N. Have not yet determined if that will improve situations enough to be worth the extra work.	2024-08-21 15:01:54 -04:00
Joey Hess	4e1dcc0372	bug	2024-08-21 12:18:31 -04:00
Joey Hess	476d223bce	implement fullbalanced=group:N Rebalancing this when it gets into a suboptimal situation will need further work.	2024-08-20 13:51:02 -04:00
Matthew	4a9e637d36	Added a comment: Help with .nfsXXXX files	2024-08-19 21:20:59 +00:00
matrss	9cfdae4c3b	Added a comment	2024-08-19 10:25:13 +00:00
Joey Hess	68a99a8f48	size based rebalancing design	2024-08-18 16:25:12 -04:00
Joey Hess	99514f9d18	maxsize overview display and --json support	2024-08-18 12:08:13 -04:00
xentac	74b953cded	Added a comment	2024-08-18 03:17:12 +00:00
Joey Hess	f985c58d8e	consistently don't show sizes of empty repositories This used to be the case, and when matching options are used, that code path still omits them, so also omit them in the getRepoSize code path.	2024-08-17 15:09:16 -04:00
Joey Hess	b62b58b50b	git-annex info speed up using getRepoSizes	2024-08-17 14:54:31 -04:00
Joey Hess	d09a005f2b	update RepoSize database from git-annex branch incrementally The use of catObjectStream is optimally fast. Although it might be possible to combine this with git-annex branch merge to avoid some redundant work. Benchmarking, a git-annex branch that had 100000 files changed took less than 1.88 seconds to run through this.	2024-08-17 13:35:00 -04:00
Spencer	40b49e2ddd	Added a comment: Remote Helper?	2024-08-17 05:33:01 +00:00
matrss	bcf876e3a0		2024-08-16 15:52:32 +00:00
matrss	f057010086	Added a comment	2024-08-16 15:45:45 +00:00
Joey Hess	61d95627f3	fix Annex.repoSize sharing between threads	2024-08-16 10:56:51 -04:00
Joey Hess	e361b9ea3c	todo	2024-08-15 16:15:48 -04:00
Joey Hess	63ccf6ffa7	todo	2024-08-15 13:50:50 -04:00
Joey Hess	4a0c7e2b2c	update	2024-08-15 13:41:47 -04:00
Joey Hess	a2da9c526b	RepoSize concurrency fix When loading the journalled repo sizes, make sure that the current process is prevented from making changes to the journal in another thread.	2024-08-15 13:37:41 -04:00
Joey Hess	06064f897c	update Annex.reposizes when changing location logs The live update is only needed when Annex.reposizes has already been populated.	2024-08-15 13:27:14 -04:00
Joey Hess	c376b1bd7e	show message when doing possibly expensive from scratch reposize calculation	2024-08-15 12:42:36 -04:00
Joey Hess	c200523bac	implement getRepoSizes At this point the RepoSize database is getting populated, and it all seems to be working correctly. Incremental updates still need to be done to make it performant.	2024-08-15 12:31:56 -04:00
Joey Hess	eac4e9391b	finalize RepoSize database Including locking on creation, handling of permissions errors, and setting repo sizes. I'm confident that locking is not needed while using this database. Since writes happen in a single transaction. When there are two writers that are recording sizes based on different git-annex branch commits, one will overwrite what the other one recorded. Which is fine, it's only necessary that the database stays consistent with the content of a git-annex branch commit.	2024-08-15 12:29:34 -04:00
Atemu	e8997d8899	Added a comment	2024-08-15 15:40:20 +00:00
Joey Hess	3e6eb2a58d	implement journalledRepoSizes Plan is to run this when populating Annex.reposizes on demand. So Annex.reposizes will be up-to-date with the journal, including crucially journal entries for private repositories. But also anything that has been written to the journal by another process, especially if the process was ran with annex.alwayscommit=false. From there, Annex.reposizes can be kept up to date with changes made by the running process.	2024-08-14 13:53:24 -04:00
pedro-lopes-de-azevedo	c75ecc5350	Added a comment: parameter --from not accepted	2024-08-14 14:27:54 +00:00
bvaa	11eb2ae6ec	Added a comment	2024-08-14 07:18:26 +00:00
Joey Hess	90a79a6c1e	plan	2024-08-13 15:13:30 -04:00
Joey Hess	a979d8da41	update	2024-08-13 14:14:47 -04:00
Joey Hess	10d8b3cc63	fixed --rebalance stability on drop Was checking the wrong uuid, oops	2024-08-13 13:32:11 -04:00
Joey Hess	745bc5c547	take maxsize into account for balanced preferred content This is very innefficient, it will need to be optimised not to calculate the sizes of repos every time. Also, fixed a bug in balancedPicker that caused it to pick a too high index when some repos were excluded due to being full.	2024-08-13 11:00:20 -04:00
Spencer	05a62e4e5f	Added a comment: Workaround: --force-small	2024-08-13 07:05:57 +00:00
Spencer	3d252da06c	Added a comment: Exact Moment Things Go Wrong	2024-08-13 06:22:11 +00:00
Spencer	ab5f920d77	.md linting	2024-08-13 04:46:53 +00:00
Spencer	8a91a8c208		2024-08-13 04:46:10 +00:00
Spencer	c4296fbd45	Added a comment: Still a Problem (on Mac?)	2024-08-13 04:21:33 +00:00
ewen	491cf67ce2	Added a comment: Most servers upgraded to TLS v1.2 EMS / TLS v1.3	2024-08-13 00:01:05 +00:00
Joey Hess	b201792391	update	2024-08-12 18:57:03 -04:00
Joey Hess	1e799e7842	update	2024-08-12 11:56:52 -04:00
Joey Hess	71043fe9f7	update	2024-08-12 10:01:48 -04:00
Joey Hess	bcd2b9a5c4	idea	2024-08-12 09:43:14 -04:00
Joey Hess	1265d7e5df	implement maxsize log and command * maxsize: New command to tell git-annex how large the expected maximum size of a repository is. * vicfg: Include maxsize configuration.	2024-08-11 15:41:26 -04:00
Joey Hess	3019b21c40	more formal documentation of balancing	2024-08-11 13:29:06 -04:00
Joey Hess	bd5affa362	use hmac in balanced preferred content This deals with the possible security problem that someone could make an unusually low UUID and generate keys that are all constructed to hash to a number that, mod the number of repositories in the group, == 0. So balanced preferred content would always put those keys in the repository with the low UUID as long as the group contains the number of repositories that the attacker anticipated. Presumably the attacker than holds the data for ransom? Dunno. Anyway, the partial solution is to use HMAC (sha256) with all the UUIDs combined together as the "secret", and the key as the "message". Now any change in the set of UUIDs in a group will invalidate the attacker's constructed keys from hashing to anything in particular. Given that there are plenty of other things someone can do if they can write to the repository -- including modifying preferred content so only their repository wants files, and numcopies so other repositories drom them -- this seems like safeguard enough. Note that, in balancedPicker, combineduuids is memoized.	2024-08-10 16:32:54 -04:00
Joey Hess	bde58e6c71	todo	2024-08-09 16:57:10 -04:00
Joey Hess	412f6057e4	todo	2024-08-09 16:47:28 -04:00
xentac	fb186ab0a8	Added a comment	2024-08-09 19:31:12 +00:00
xentac	55a5cb7904		2024-08-09 19:22:19 +00:00
Joey Hess	f1cb5cb908	wrote git-annex maxsize man page	2024-08-09 14:57:11 -04:00
Joey Hess	5a6afff3d6	left off number option	2024-08-09 14:22:05 -04:00
Joey Hess	3ce2e95a5f	balanced preferred content and --rebalance This all works fine. But it doesn't check repository sizes yet, and without repository size checking, once a repository gets full, there will be no other repository that will want its files. Use of sha2 seems unncessary, probably alder2 or md5 or crc would have been enough. Possibly just summing up the bytes of the key mod the number of repositories would have sufficed. But sha2 is there, and probably hardware accellerated. I doubt very much there is any security benefit to using it though. If someone wants to construct a key that will be balanced onto a given repository, sha2 is certianly not going to stop them.	2024-08-09 14:16:09 -04:00
Joey Hess	152c87140b	update	2024-08-08 16:06:02 -04:00
Joey Hess	0959bfe5d3	update for exporttree=yes	2024-08-08 15:51:36 -04:00
Joey Hess	727b6a0b6d	update	2024-08-08 15:34:36 -04:00
Joey Hess	2616056cde	Merge branch 'exportreeplus'	2024-08-08 15:31:57 -04:00
Joey Hess	3b758aaad6	add news item for git-annex 10.20240808	2024-08-08 15:27:11 -04:00
Joey Hess	3ea835c7e8	proxied exporttree=yes versionedexport=yes remotes are not untrusted This removes versionedExport, which was only used by the S3 special remote. Instead, versionedexport=yes is a common way for remotes to indicate that they are versioned.	2024-08-08 15:24:19 -04:00
Joey Hess	5c36177e58	proxied exporttree=yes remotes are untrustworthy This is not perfect because it does not handle versioned special remotes, which should not be untrustworthy, but now are when proxied. The implementation turned out to be easy, because the exporttree field is a default field, so is available in RemoteConfig even for git remotes.	2024-08-08 14:43:53 -04:00

... 2 3 4 5 6 ...

34923 commits