From b4305315b26460bc5f694e50cbab4f2d713ad90e Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 10 Oct 2022 17:37:26 -0400 Subject: [PATCH 01/31] S3: pass fileprefix into getBucket calls S3: Speed up importing from a large bucket when fileprefix= is set by only asking for files under the prefix. getBucket still returns the files with the prefix included, so the rest of the fileprefix stripping still works unchanged. Sponsored-by: Dartmouth College's DANDI project --- CHANGELOG | 2 ++ Remote/S3.hs | 13 ++++++--- ..._b0d9dbe81f01e80809381a9e5f6a883d._comment | 27 +++++++++++++++++++ 3 files changed, 38 insertions(+), 4 deletions(-) create mode 100644 doc/todo/allow_for_annonymous_AWS_S3_access/comment_8_b0d9dbe81f01e80809381a9e5f6a883d._comment diff --git a/CHANGELOG b/CHANGELOG index 8e8a0afb2b..c393dad101 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -6,6 +6,8 @@ git-annex (10.20221004) UNRELEASED; urgency=medium do not operate on a repository that has an empty name. * move: Fix openFile crash with -J (Fixes a reversion in 8.20201103) + * S3: Speed up importing from a large bucket when fileprefix= is set + by only asking for files under the prefix. -- Joey Hess Mon, 03 Oct 2022 13:36:42 -0400 diff --git a/Remote/S3.hs b/Remote/S3.hs index 1f0ebd3d5a..46a9bc49ce 100644 --- a/Remote/S3.hs +++ b/Remote/S3.hs @@ -216,7 +216,7 @@ gen r u rc gc rs = do , renameExport = renameExportS3 hdl this rs info } , importActions = ImportActions - { listImportableContents = listImportableContentsS3 hdl this info + { listImportableContents = listImportableContentsS3 hdl this info c , importKey = Nothing , retrieveExportWithContentIdentifier = retrieveExportWithContentIdentifierS3 hdl this rs info , storeExportWithContentIdentifier = storeExportWithContentIdentifierS3 hdl this rs info magic @@ -548,8 +548,8 @@ renameExportS3 hv r rs info k src dest = Just <$> go srcobject = T.pack $ bucketExportLocation info src dstobject = T.pack $ bucketExportLocation info dest -listImportableContentsS3 :: S3HandleVar -> Remote -> S3Info -> Annex (Maybe (ImportableContentsChunkable Annex (ContentIdentifier, ByteSize))) -listImportableContentsS3 hv r info = +listImportableContentsS3 :: S3HandleVar -> Remote -> S3Info -> ParsedRemoteConfig -> Annex (Maybe (ImportableContentsChunkable Annex (ContentIdentifier, ByteSize))) +listImportableContentsS3 hv r info c = withS3Handle hv $ \case Nothing -> giveup $ needS3Creds (uuid r) Just h -> Just <$> go h @@ -558,6 +558,8 @@ listImportableContentsS3 hv r info = ic <- liftIO $ runResourceT $ extractFromResourceT =<< startlist h return (ImportableContentsComplete ic) + fileprefix = T.pack <$> getRemoteConfigValue fileprefixField c + startlist h | versioning info = do rsp <- sendS3Handle h $ @@ -565,7 +567,8 @@ listImportableContentsS3 hv r info = continuelistversioned h [] rsp | otherwise = do rsp <- sendS3Handle h $ - S3.getBucket (bucket info) + (S3.getBucket (bucket info)) + { S3.gbPrefix = fileprefix } continuelistunversioned h [] rsp continuelistunversioned h l rsp @@ -573,6 +576,7 @@ listImportableContentsS3 hv r info = rsp' <- sendS3Handle h $ (S3.getBucket (bucket info)) { S3.gbMarker = S3.gbrNextMarker rsp + , S3.gbPrefix = fileprefix } continuelistunversioned h (rsp:l) rsp' | otherwise = return $ @@ -584,6 +588,7 @@ listImportableContentsS3 hv r info = (S3.getBucketObjectVersions (bucket info)) { S3.gbovKeyMarker = S3.gbovrNextKeyMarker rsp , S3.gbovVersionIdMarker = S3.gbovrNextVersionIdMarker rsp + , S3.gbovPrefix = fileprefix } continuelistversioned h (rsp:l) rsp' | otherwise = return $ diff --git a/doc/todo/allow_for_annonymous_AWS_S3_access/comment_8_b0d9dbe81f01e80809381a9e5f6a883d._comment b/doc/todo/allow_for_annonymous_AWS_S3_access/comment_8_b0d9dbe81f01e80809381a9e5f6a883d._comment new file mode 100644 index 0000000000..7aa88f7cb6 --- /dev/null +++ b/doc/todo/allow_for_annonymous_AWS_S3_access/comment_8_b0d9dbe81f01e80809381a9e5f6a883d._comment @@ -0,0 +1,27 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 8""" + date="2022-10-10T21:04:49Z" + content=""" +I've finished the work on aws, which is in + and I hope will be merged soon. + +git-annex now has a branch `anons3` that implements this, when +the S3 remote is configured with signature=anonymous. + + $ git-annex initremote s3-origin type=S3 importtree=yes encryption=none bucket=dandiarchive fileprefix=zarr-checksums/2ac71edb-738c-40ac-bd8c-8ca985adaa12/ signature=anonymous + initremote s3-origin (checking bucket...) ok + (recording state in git...) + $ git-annex import master --from s3-origin + list s3-origin ok + import s3-origin .checksum + ok + import s3-origin 0/.checksum + ok + import s3-origin 0/0/.checksum + ok + ^C + +Also, I've fixed it to only list files in the fileprefix, which +sped up the listing a *lot* in this bucket with many other files.. +"""]] From e22c3b3d7c4885b56fe4afbc7c2a6ef2758fcf7a Mon Sep 17 00:00:00 2001 From: "benjamin.poldrack@d09ccff6d42dd20277610b59867cf7462927b8e3" Date: Tue, 11 Oct 2022 09:12:00 +0000 Subject: [PATCH 02/31] --- ...ort_does_not_account_for_versioning_on_S3.mdwn | 15 +++++++++++++++ 1 file changed, 15 insertions(+) create mode 100644 doc/bugs/annex_import_does_not_account_for_versioning_on_S3.mdwn diff --git a/doc/bugs/annex_import_does_not_account_for_versioning_on_S3.mdwn b/doc/bugs/annex_import_does_not_account_for_versioning_on_S3.mdwn new file mode 100644 index 0000000000..d631fac177 --- /dev/null +++ b/doc/bugs/annex_import_does_not_account_for_versioning_on_S3.mdwn @@ -0,0 +1,15 @@ +### Please describe the problem. +Using the S3 special remote with version support enabled (`versioning=yes`) leads to a bit of a strange effect with multiple `annex import` calls. +First import from an existing bucket is fine. Now, if there were changes done to a file in the bucket and one runs `annex import` again, git-annex will record that the old version of said file is gone from the remote, requiring a `git annex fsck` to be called right after that import to fix it again. That seems a bit strange given that this versioning support comes with a "native" special remote. + +I suppose that's probably a more general issue with how import/export works, since there's no way for a special remote to communicate to annex whether two different versions of the same file (same remote path, but different key) would overwrite each other. Neither an importtree nor an exporttree remote has a way to tell annex whether and how the availability of a previous key associated with the same remote path was affected. + + +### What version of git-annex are you using? On what operating system? +Observed with several versions from 8-10 on linux. As laid out above, I strongly suspect this is true for all versions. + + +### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders) + +Lots. I love git-annex. + From c2ad84b423eb9900a7d0dd4171d6f69ddc531bf7 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 11 Oct 2022 13:04:33 -0400 Subject: [PATCH 03/31] all keys are still present on versioned remote after import of a tree When importing from versioned remotes, fix tracking of the content of deleted files. Only S3 supports versioning so far, so only it was affected. But, the draft import/export interface for external remotes also seemed to need a change, so that versionedExport could be set. --- Annex/Import.hs | 11 +++++++---- CHANGELOG | 4 +++- Command/Export.hs | 2 +- Types/Remote.hs | 7 ++++--- ..._import_does_not_account_for_versioning_on_S3.mdwn | 1 + ...omment_1_a04fbb936785456ed99512ea4c29fd53._comment | 10 ++++++++++ .../export_and_import_appendix.mdwn | 9 +++++++++ 7 files changed, 35 insertions(+), 9 deletions(-) create mode 100644 doc/bugs/annex_import_does_not_account_for_versioning_on_S3/comment_1_a04fbb936785456ed99512ea4c29fd53._comment diff --git a/Annex/Import.hs b/Annex/Import.hs index 1ab5c01aba..c16eb18213 100644 --- a/Annex/Import.hs +++ b/Annex/Import.hs @@ -184,10 +184,13 @@ recordImportTree remote importtreeconfig importable = do unlessM (stillpresent db oldkey) $ logChange oldkey (Remote.uuid remote) InfoMissing _ -> noop - db <- Export.openDb (Remote.uuid remote) - forM_ (exportedTreeishes oldexport) $ \oldtree -> - Export.runExportDiffUpdater updater db oldtree finaltree - Export.closeDb db + -- When the remote is versioned, it still contains keys + -- that are not present in the new tree. + unless (Remote.versionedExport (Remote.exportActions remote)) $ do + db <- Export.openDb (Remote.uuid remote) + forM_ (exportedTreeishes oldexport) $ \oldtree -> + Export.runExportDiffUpdater updater db oldtree finaltree + Export.closeDb db buildImportCommit' :: Remote -> ImportCommitConfig -> Maybe Sha -> History Sha -> Annex (Maybe Sha) buildImportCommit' remote importcommitconfig mtrackingcommit imported@(History ti _) = diff --git a/CHANGELOG b/CHANGELOG index c393dad101..56f2c781fe 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -6,8 +6,10 @@ git-annex (10.20221004) UNRELEASED; urgency=medium do not operate on a repository that has an empty name. * move: Fix openFile crash with -J (Fixes a reversion in 8.20201103) - * S3: Speed up importing from a large bucket when fileprefix= is set + * S3: Speed up importing from a large bucket when fileprefix= is set, by only asking for files under the prefix. + * When importing from versioned remotes, fix tracking of the content + of deleted files. -- Joey Hess Mon, 03 Oct 2022 13:36:42 -0400 diff --git a/Command/Export.hs b/Command/Export.hs index 3c5b3ad214..d331bb3030 100644 --- a/Command/Export.hs +++ b/Command/Export.hs @@ -378,7 +378,7 @@ cleanupUnexport r db eks loc = do removeExportedLocation db ek loc flushDbQueue db - -- An versionedExport remote supports removeExportLocation to remove + -- A versionedExport remote supports removeExportLocation to remove -- the file from the exported tree, but still retains the content -- and allows retrieving it. unless (versionedExport (exportActions r)) $ do diff --git a/Types/Remote.hs b/Types/Remote.hs index e9e8a8c815..121d4fd5cd 100644 --- a/Types/Remote.hs +++ b/Types/Remote.hs @@ -270,9 +270,10 @@ data ExportActions a = ExportActions -- Can throw exception if unable to access remote, or if remote -- refuses to remove the content. , removeExport :: Key -> ExportLocation -> a () - -- Set when the content of a Key stored in the remote to an - -- ExportLocation and then removed with removeExport remains - -- accessible to retrieveKeyFile and checkPresent. + -- Set when the remote is versioned, so once a Key is stored + -- to an ExportLocation, a subsequent deletion of that + -- ExportLocation leaves the key still accessible to retrieveKeyFile + -- and checkPresent. , versionedExport :: Bool -- Removes an exported directory. Typically the directory will be -- empty, but it could possibly contain files or other directories, diff --git a/doc/bugs/annex_import_does_not_account_for_versioning_on_S3.mdwn b/doc/bugs/annex_import_does_not_account_for_versioning_on_S3.mdwn index d631fac177..4f6229e34c 100644 --- a/doc/bugs/annex_import_does_not_account_for_versioning_on_S3.mdwn +++ b/doc/bugs/annex_import_does_not_account_for_versioning_on_S3.mdwn @@ -13,3 +13,4 @@ Observed with several versions from 8-10 on linux. As laid out above, I strongly Lots. I love git-annex. +> [[fixed|done]] --[[Joey]] diff --git a/doc/bugs/annex_import_does_not_account_for_versioning_on_S3/comment_1_a04fbb936785456ed99512ea4c29fd53._comment b/doc/bugs/annex_import_does_not_account_for_versioning_on_S3/comment_1_a04fbb936785456ed99512ea4c29fd53._comment new file mode 100644 index 0000000000..c3d8aa057b --- /dev/null +++ b/doc/bugs/annex_import_does_not_account_for_versioning_on_S3/comment_1_a04fbb936785456ed99512ea4c29fd53._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2022-10-11T16:28:39Z" + content=""" +This looks like a simple fix. After importing from a versioned remote, +it can just skip updating the location logs to remove the keys that are not +present in the current tree. The same as is already done when exporting +to a versioned remote. I've made that change. +"""]] diff --git a/doc/design/external_special_remote_protocol/export_and_import_appendix.mdwn b/doc/design/external_special_remote_protocol/export_and_import_appendix.mdwn index 24004402fe..b2f07bad0b 100644 --- a/doc/design/external_special_remote_protocol/export_and_import_appendix.mdwn +++ b/doc/design/external_special_remote_protocol/export_and_import_appendix.mdwn @@ -150,6 +150,13 @@ support a request, it can reply with `UNSUPPORTED-REQUEST`. Indicates that `IMPORTKEY` can be used. * `IMPORTKEYSUPPORTED-FAILURE` Indicates that `IMPORTKEY` cannot be used. + * `VERSIONED` + Used to check if the special remote is versioned. + Note that this request may be made before or after `PREPARE`. + * `ISVERSIONED` + Indicates that the remote is versioned. + * `NOTVERSIONED` + Indicates that the remote is not versioned. * `LISTIMPORTABLECONTENTS` Used to get a list of all the files that are stored in the special remote. A block of responses @@ -170,6 +177,8 @@ support a request, it can reply with `UNSUPPORTED-REQUEST`. be nested multiple levels deep. This should only be used when the remote supports using "TRANSFER RECEIVE Key" to retrieve historical versions of files. + And, it should only be used when the remote replies `ISVERSIONED` + to the `VERSIONED` message. * `END` Indicates the end of a block of responses. * `LOCATION Name` From b312b2a30bbd099c33e4490238aec18d72a6e83c Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 11 Oct 2022 15:02:40 -0400 Subject: [PATCH 04/31] update --- ..._981cc786be798a612046d207c0f85955._comment | 6 +++ ..._4766362f74f135887d5b6103db9a8a06._comment | 42 +++++++++++++++++++ 2 files changed, 48 insertions(+) create mode 100644 doc/bugs/get_is_busy_doing_nothing/comment_25_4766362f74f135887d5b6103db9a8a06._comment diff --git a/doc/bugs/get_is_busy_doing_nothing/comment_20_981cc786be798a612046d207c0f85955._comment b/doc/bugs/get_is_busy_doing_nothing/comment_20_981cc786be798a612046d207c0f85955._comment index 37f0889600..9b8e965f0d 100644 --- a/doc/bugs/get_is_busy_doing_nothing/comment_20_981cc786be798a612046d207c0f85955._comment +++ b/doc/bugs/get_is_busy_doing_nothing/comment_20_981cc786be798a612046d207c0f85955._comment @@ -19,4 +19,10 @@ an ever-growing amount of memory, and be slowed down by the write attempts. Still, it does give it something better to do while the write is failing than sleeping and retrying, eg to do the rest of the work it's been asked to do. + +(Update: Reads from a database first call flushDbQueue, and it would +not be safe for that to return without actually writing to the database, +since the read would then see possible stale information. It turns +out that `git-annex get` does do a database read per file (getAssociatedFiles). +So it seems this approach will not work.) """]] diff --git a/doc/bugs/get_is_busy_doing_nothing/comment_25_4766362f74f135887d5b6103db9a8a06._comment b/doc/bugs/get_is_busy_doing_nothing/comment_25_4766362f74f135887d5b6103db9a8a06._comment new file mode 100644 index 0000000000..6fb6cd3c37 --- /dev/null +++ b/doc/bugs/get_is_busy_doing_nothing/comment_25_4766362f74f135887d5b6103db9a8a06._comment @@ -0,0 +1,42 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 25""" + date="2022-10-11T17:17:13Z" + content=""" +Revisiting this, I don't understand why the htop at the top of this page +lists so many `git-annex get` processes. They all seem to be children +of a single parent `git-annex get` process. But `git-annex get` does +not fork children like those. I wonder if perhaps those child +processes were forked() to do something else, but somehow hung before +they could exec?! + +By comment #2, annex.stalldetection is enabled, so `git-annex get` +runs 5 `git-annex transferrer` processes. Each of those can write to the +database, so concurrent sqlite writes can happen. So, my "re: comment 16" +comment was off-base in thinking there was a single git-annex process. +And so, I don't think the debug info requested in that comment is needed. + +Also, it turns out that the database queue is being flushed after every +file it gets, which is causing a sqlite write per file. So there are a +lot of sqlite writes happening, which probably makes this issue much more +likely to occur, on systems with slow enough disk IO that it does occur. +Especially if the files are relatively small. + +The reason for the queue flush is partly that Annex.run forces a queue +flush after every action. That could, I think be avoided. That was only +done to make sure the queue is flushed before the program exits, which +should be able to be handled in a different way. But also, +the queue has to be flushed before reading from the database in order +for the read to see current information. In the `git-annex get` case, +it queues a change to the inode cache, and then reads the associated +files. To avoid that, it would need to keep track of the two different +tables in the keys db, and flush the queue only when querying a table +that a write had been queued to. That would be worth doing +just to generally speed up `git-annex get`. A quick benchmark shows +a get of 1000 small files that takes 17s will only take 12s once that's +done. And that's on a fast SSD, probably much more on a hard drive! + +So I don't have a full solution, but speeding git-annex up significantly and +also making whatever the problem in this bug is probably much less likely +to occur is a good next step.. +"""]] From 5bd79e717f91d7422894e80a395bb7e2948d4b23 Mon Sep 17 00:00:00 2001 From: "benjamin.poldrack@d09ccff6d42dd20277610b59867cf7462927b8e3" Date: Wed, 12 Oct 2022 06:12:17 +0000 Subject: [PATCH 05/31] Added a comment --- .../comment_2_6015c5488e69307424a3c0281b50a49e._comment | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 doc/bugs/annex_import_does_not_account_for_versioning_on_S3/comment_2_6015c5488e69307424a3c0281b50a49e._comment diff --git a/doc/bugs/annex_import_does_not_account_for_versioning_on_S3/comment_2_6015c5488e69307424a3c0281b50a49e._comment b/doc/bugs/annex_import_does_not_account_for_versioning_on_S3/comment_2_6015c5488e69307424a3c0281b50a49e._comment new file mode 100644 index 0000000000..134394ae66 --- /dev/null +++ b/doc/bugs/annex_import_does_not_account_for_versioning_on_S3/comment_2_6015c5488e69307424a3c0281b50a49e._comment @@ -0,0 +1,9 @@ +[[!comment format=mdwn + username="benjamin.poldrack@d09ccff6d42dd20277610b59867cf7462927b8e3" + nickname="benjamin.poldrack" + avatar="http://cdn.libravatar.org/avatar/5c1a901caa7c2cfeeb7e17e786c5230d" + subject="comment 2" + date="2022-10-12T06:12:17Z" + content=""" +Thank you! +"""]] From ba7ecbc6a9c3763e8152e4f46522d14a4ee2b59d Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 12 Oct 2022 13:50:46 -0400 Subject: [PATCH 06/31] avoid flushing keys db queue after each Annex action The flush was only done Annex.run' to make sure that the queue was flushed before git-annex exits. But, doing it there means that as soon as one change gets queued, it gets flushed soon after, which contributes to excessive writes to the database, slowing git-annex down. (This does not yet speed git-annex up, but it is a stepping stone to doing so.) Database queues do not autoflush when garbage collected, so have to be flushed explicitly. I don't think it's possible to make them autoflush (except perhaps if git-annex sqitched to using ResourceT..). The comment in Database.Keys.closeDb used to be accurate, since the automatic flushing did mean that all writes reached the database even when closeDb was not called. But now, closeDb or flushDb needs to be called before stopping using an Annex state. So, removed that comment. In Remote.Git, change to using quiesce everywhere that it used to use stopCoProcesses. This means that uses on onLocal in there are just as slow as before. I considered only calling closeDb on the local git remotes when git-annex exits. But, the reason that Remote.Git calls stopCoProcesses in each onLocal is so as not to leave git processes running that have files open on the remote repo, when it's on removable media. So, it seemed to make sense to also closeDb after each one, since sqlite may also keep files open. Although that has not seemed to cause problems with removable media so far. It was also just easier to quiesce in each onLocal than once at the end. This does likely leave performance on the floor, so could be revisited. In Annex.Content.saveState, there was no reason to close the db, flushing it is enough. The rest of the changes are from auditing for Annex.new, and making sure that quiesce is called, after any action that might possibly need it. After that audit, I'm pretty sure that the change to Annex.run' is safe. The only concern might be that this does let more changes get queued for write to the db, and if git-annex is interrupted, those will be lost. But interrupting git-annex can obviously already prevent it from writing the most recent change to the db, so it must recover from such lost data... right? Sponsored-by: Dartmouth College's Datalad project --- Annex.hs | 4 ---- Annex/Action.hs | 26 ++++++++++++++++++++------ Annex/Content.hs | 2 +- Assistant/MakeRepo.hs | 2 +- Build/DistributionUpdate.hs | 2 ++ CmdLine.hs | 2 +- CmdLine/GitRemoteTorAnnex.hs | 5 ++++- Command/RecvKey.hs | 4 +--- Command/WebApp.hs | 5 ++++- Database/Keys.hs | 13 ++++++++----- Remote/Git.hs | 16 +++++++--------- 11 files changed, 49 insertions(+), 32 deletions(-) diff --git a/Annex.hs b/Annex.hs index 0f0464dcac..482c8455d4 100644 --- a/Annex.hs +++ b/Annex.hs @@ -287,12 +287,8 @@ run (st, rd) a = do run' :: MVar AnnexState -> AnnexRead -> Annex a -> IO (a, (AnnexState, AnnexRead)) run' mvar rd a = do r <- runReaderT (runAnnex a) (mvar, rd) - `onException` (flush rd) - flush rd st <- takeMVar mvar return (r, (st, rd)) - where - flush = Keys.flushDbQueue . keysdbhandle {- Performs an action in the Annex monad from a starting state, - and throws away the changed state. -} diff --git a/Annex/Action.hs b/Annex/Action.hs index 95b440fe8c..5c3bf6ca80 100644 --- a/Annex/Action.hs +++ b/Annex/Action.hs @@ -1,6 +1,6 @@ {- git-annex actions - - - Copyright 2010-2020 Joey Hess + - Copyright 2010-2022 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -11,7 +11,7 @@ module Annex.Action ( action, verifiedAction, startup, - shutdown, + quiesce, stopCoProcesses, ) where @@ -25,6 +25,7 @@ import Annex.CheckAttr import Annex.HashObject import Annex.CheckIgnore import Annex.TransferrerPool +import qualified Database.Keys import Control.Concurrent.STM #ifndef mingw32_HOST_OS @@ -74,12 +75,25 @@ startup = do return () #endif -{- Cleanup actions. -} -shutdown :: Bool -> Annex () -shutdown nocommit = do +{- Rn all cleanup actions, save all state, stop all long-running child + - processes. + - + - This can be run repeatedly with other Annex actions run in between, + - but usually it is run only once at the end. + - + - When passed True, avoids making any commits to the git-annex branch, + - leaving changes in the journal for later commit. + -} +quiesce :: Bool -> Annex () +quiesce nocommit = do + cas <- Annex.withState $ \st -> return + ( st { Annex.cleanupactions = mempty } + , Annex.cleanupactions st + ) + sequence_ (M.elems cas) saveState nocommit - sequence_ =<< M.elems <$> Annex.getState Annex.cleanupactions stopCoProcesses + Database.Keys.closeDb {- Stops all long-running child processes, including git query processes. -} stopCoProcesses :: Annex () diff --git a/Annex/Content.hs b/Annex/Content.hs index e0dc1a7841..15eab12c2f 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -718,7 +718,7 @@ listKeys' keyloc want = do saveState :: Bool -> Annex () saveState nocommit = doSideAction $ do Annex.Queue.flush - Database.Keys.closeDb + Database.Keys.flushDb unless nocommit $ whenM (annexAlwaysCommit <$> Annex.getGitConfig) $ Annex.Branch.commit =<< Annex.Branch.commitMessage diff --git a/Assistant/MakeRepo.hs b/Assistant/MakeRepo.hs index 8132dbca53..632c4abda5 100644 --- a/Assistant/MakeRepo.hs +++ b/Assistant/MakeRepo.hs @@ -49,7 +49,7 @@ inDir dir a = do state <- Annex.new =<< Git.Config.read =<< Git.Construct.fromPath (toRawFilePath dir) - Annex.eval state $ a `finally` stopCoProcesses + Annex.eval state $ a `finally` quiesce True {- Creates a new repository, and returns its UUID. -} initRepo :: Bool -> Bool -> FilePath -> Maybe String -> Maybe StandardGroup -> IO UUID diff --git a/Build/DistributionUpdate.hs b/Build/DistributionUpdate.hs index d2327d0bd3..d48be43efe 100644 --- a/Build/DistributionUpdate.hs +++ b/Build/DistributionUpdate.hs @@ -24,6 +24,7 @@ import Annex.Content import Annex.WorkTree import Git.Command import qualified Utility.RawFilePath as R +import Annex.Actions import Data.Time.Clock import Data.Char @@ -70,6 +71,7 @@ main = do ood <- Annex.eval state $ do buildrpms topdir updated makeinfos updated version + quiesce False syncToArchiveOrg unless (null ood) $ error $ "Some info files are out of date: " ++ show (map fst ood) diff --git a/CmdLine.hs b/CmdLine.hs index 0b553b9b2f..a170434107 100644 --- a/CmdLine.hs +++ b/CmdLine.hs @@ -63,7 +63,7 @@ dispatch' subcommandname args fuzzy cmds allargs allcmds fields getgitrepo progn prepRunCommand cmd annexsetter startup performCommandAction True cmd seek $ - shutdown $ cmdnocommit cmd + quiesce $ cmdnocommit cmd go (Left norepo) = do let ingitrepo = \a -> a =<< Git.Config.global -- Parse command line with full cmdparser first, diff --git a/CmdLine/GitRemoteTorAnnex.hs b/CmdLine/GitRemoteTorAnnex.hs index 5c67aa2a86..d937b652c7 100644 --- a/CmdLine/GitRemoteTorAnnex.hs +++ b/CmdLine/GitRemoteTorAnnex.hs @@ -17,6 +17,7 @@ import Utility.AuthToken import Annex.UUID import P2P.Address import P2P.Auth +import Annex.Action run :: [String] -> IO () run (_remotename:address:[]) = forever $ @@ -59,6 +60,8 @@ connectService address port service = do g <- Annex.gitRepo conn <- liftIO $ connectPeer g (TorAnnex address port) runst <- liftIO $ mkRunState Client - liftIO $ runNetProto runst conn $ auth myuuid authtoken noop >>= \case + r <- liftIO $ runNetProto runst conn $ auth myuuid authtoken noop >>= \case Just _theiruuid -> connect service stdin stdout Nothing -> giveup $ "authentication failed, perhaps you need to set " ++ p2pAuthTokenEnv + quiesce False + return r diff --git a/Command/RecvKey.hs b/Command/RecvKey.hs index e6832e32e2..11bd80f761 100644 --- a/Command/RecvKey.hs +++ b/Command/RecvKey.hs @@ -31,9 +31,7 @@ start (_, key) = fieldTransfer Download key $ \_p -> do ifM (getViaTmp rsp DefaultVerify key (AssociatedFile Nothing) go) ( do logStatus key InfoPresent - -- forcibly quit after receiving one key, - -- and shutdown cleanly - _ <- shutdown True + _ <- quiesce True return True , return False ) diff --git a/Command/WebApp.hs b/Command/WebApp.hs index ce0759f278..236a94dac4 100644 --- a/Command/WebApp.hs +++ b/Command/WebApp.hs @@ -30,6 +30,7 @@ import qualified Annex import Config.Files.AutoStart import Upgrade import Annex.Version +import Annex.Action import Utility.Android import Control.Concurrent @@ -126,8 +127,10 @@ startNoRepo o = go =<< liftIO (filterM doesDirectoryExist =<< readAutoStartFile) Right state -> void $ Annex.eval state $ do whenM (fromRepo Git.repoIsLocalBare) $ giveup $ d ++ " is a bare git repository, cannot run the webapp in it" - callCommandAction $ + r <- callCommandAction $ start' False o + quiesce False + return r cannotStartIn :: FilePath -> String -> IO () cannotStartIn d reason = warningIO $ "unable to start webapp in repository " ++ d ++ ": " ++ reason diff --git a/Database/Keys.hs b/Database/Keys.hs index f376355f23..6b4f3e4782 100644 --- a/Database/Keys.hs +++ b/Database/Keys.hs @@ -1,6 +1,6 @@ {- Sqlite database of information about Keys - - - Copyright 2015-2021 Joey Hess + - Copyright 2015-2022 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -12,6 +12,7 @@ module Database.Keys ( DbHandle, closeDb, + flushDb, addAssociatedFile, getAssociatedFiles, getAssociatedFilesIncluding, @@ -143,14 +144,16 @@ openDb forwrite _ = do {- Closes the database if it was open. Any writes will be flushed to it. - - - This does not normally need to be called; the database will auto-close - - when the handle is garbage collected. However, this can be used to - - force a re-read of the database, in case another process has written - - data to it. + - This does not prevent further use of the database; it will be re-opened + - as necessary. -} closeDb :: Annex () closeDb = liftIO . closeDbHandle =<< Annex.getRead Annex.keysdbhandle +{- Flushes any queued writes to the database. -} +flushDb :: Annex () +flushDb = liftIO . flushDbQueue =<< Annex.getRead Annex.keysdbhandle + addAssociatedFile :: Key -> TopFilePath -> Annex () addAssociatedFile k f = runWriterIO $ SQL.addAssociatedFile k f diff --git a/Remote/Git.hs b/Remote/Git.hs index 41ea016cf2..81d00f02ae 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -355,7 +355,8 @@ tryGitConfigRead autoinit r hasuuid ":" ++ show e Annex.getState Annex.repo s <- newLocal r - liftIO $ Annex.eval s $ check `finally` stopCoProcesses + liftIO $ Annex.eval s $ check + `finally` quiesce True failedreadlocalconfig = do unless hasuuid $ case Git.remoteName r of @@ -449,7 +450,6 @@ dropKey' repo r st@(State connpool duc _ _ _) key Annex.Content.lockContentForRemoval key cleanup $ \lock -> do Annex.Content.removeAnnex lock cleanup - Annex.Content.saveState True , giveup "remote does not have expected annex.uuid value" ) | Git.repoIsHttp repo = giveup "dropping from http remote not supported" @@ -577,11 +577,9 @@ copyToRemote' repo r st@(State connpool duc _ _ _) key file meterupdate let checksuccess = liftIO checkio >>= \case Just err -> giveup err Nothing -> return True - res <- logStatusAfter key $ Annex.Content.getViaTmp rsp verify key file $ \dest -> + logStatusAfter key $ Annex.Content.getViaTmp rsp verify key file $ \dest -> metered (Just (combineMeterUpdate meterupdate p)) key bwlimit $ \_ p' -> copier object (fromRawFilePath dest) key p' checksuccess verify - Annex.Content.saveState True - return res ) unless res $ giveup "failed to send content to remote" @@ -606,7 +604,7 @@ repairRemote r a = return $ do Annex.eval s $ do Annex.BranchState.disableUpdate ensureInitialized (pure []) - a `finally` stopCoProcesses + a `finally` quiesce True data LocalRemoteAnnex = LocalRemoteAnnex Git.Repo (MVar [(Annex.AnnexState, Annex.AnnexRead)]) @@ -618,8 +616,8 @@ mkLocalRemoteAnnex repo = LocalRemoteAnnex repo <$> liftIO (newMVar []) {- Runs an action from the perspective of a local remote. - - The AnnexState is cached for speed and to avoid resource leaks. - - However, coprocesses are stopped after each call to avoid git - - processes hanging around on removable media. + - However, it is quiesced after each call to avoid git processes + - hanging around on removable media. - - The remote will be automatically initialized/upgraded first, - when possible. @@ -655,7 +653,7 @@ onLocal' (LocalRemoteAnnex repo mv) a = liftIO (takeMVar mv) >>= \case go ((st, rd), a') = do curro <- Annex.getState Annex.output let act = Annex.run (st { Annex.output = curro }, rd) $ - a' `finally` stopCoProcesses + a' `finally` quiesce True (ret, (st', _rd)) <- liftIO $ act `onException` cache (st, rd) liftIO $ cache (st', rd) return ret From 6fbd337e34cd62881ed5e21616e9c00d7bf35378 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 12 Oct 2022 15:21:19 -0400 Subject: [PATCH 07/31] avoid uncessary keys db writes; doubled speed! When running eg git-annex get, for each file it has to read from and write to the keys database. But it's reading exclusively from one table, and writing to a different table. So, it is not necessary to flush the write to the database before reading. This avoids writing the database once per file, instead it will buffer 1000 changes before writing. Benchmarking getting 1000 small files from a local origin, git-annex get now takes 13.62s, down from 22.41s! git-annex drop now takes 9.07s, down from 18.63s! Wowowowowowowow! (It would perhaps have been better if there were separate databases for the two tables. At least it would have avoided this complexity. Ah well, this is better than splitting the table in a annex.version upgrade.) Sponsored-by: Dartmouth College's Datalad project --- Annex/WorkTree.hs | 13 +---- CHANGELOG | 2 + Database/Keys.hs | 105 ++++++++++++++++++++++++---------------- Database/Keys/Handle.hs | 13 +++-- Database/Keys/Tables.hs | 38 +++++++++++++++ Upgrade/V7.hs | 4 +- git-annex.cabal | 1 + 7 files changed, 117 insertions(+), 59 deletions(-) create mode 100644 Database/Keys/Tables.hs diff --git a/Annex/WorkTree.hs b/Annex/WorkTree.hs index d7e423c6e8..95e0d18e2b 100644 --- a/Annex/WorkTree.hs +++ b/Annex/WorkTree.hs @@ -53,15 +53,6 @@ whenAnnexed a file = ifAnnexed file (a file) (return Nothing) ifAnnexed :: RawFilePath -> (Key -> Annex a) -> Annex a -> Annex a ifAnnexed file yes no = maybe no yes =<< lookupKey file -{- Find all annexed files and update the keys database for them. - - - - Normally the keys database is updated incrementally when it's being - - opened, and changes are noticed. Calling this explicitly allows - - running the update at an earlier point. - - - - All that needs to be done is to open the database, - - that will result in Database.Keys.reconcileStaged - - running, and doing the work. - -} +{- Find all annexed files and update the keys database for them. -} scanAnnexedFiles :: Annex () -scanAnnexedFiles = Database.Keys.runWriter (const noop) +scanAnnexedFiles = Database.Keys.updateDatabase diff --git a/CHANGELOG b/CHANGELOG index 56f2c781fe..6c045d7609 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,7 @@ git-annex (10.20221004) UNRELEASED; urgency=medium + * Doubled the speed of git-annex drop when operating on many files, + and of git-annex get when operating on many tiny files. * trust, untrust, semitrust, dead: Fix behavior when provided with multiple repositories to operate on. * trust, untrust, semitrust, dead: When provided with no parameters, diff --git a/Database/Keys.hs b/Database/Keys.hs index 6b4f3e4782..45f8d2f851 100644 --- a/Database/Keys.hs +++ b/Database/Keys.hs @@ -25,11 +25,13 @@ module Database.Keys ( removeInodeCache, isInodeKnown, runWriter, + updateDatabase, ) where import qualified Database.Keys.SQL as SQL import Database.Types import Database.Keys.Handle +import Database.Keys.Tables import qualified Database.Queue as H import Database.Init import Annex.Locations @@ -64,49 +66,53 @@ import Control.Concurrent.Async - If the database is already open, any writes are flushed to it, to ensure - consistency. - - - Any queued writes will be flushed before the read. + - Any queued writes to the table will be flushed before the read. -} -runReader :: Monoid v => (SQL.ReadHandle -> Annex v) -> Annex v -runReader a = do +runReader :: Monoid v => DbTable -> (SQL.ReadHandle -> Annex v) -> Annex v +runReader t a = do h <- Annex.getRead Annex.keysdbhandle withDbState h go where go DbUnavailable = return (mempty, DbUnavailable) - go st@(DbOpen qh) = do - liftIO $ H.flushDbQueue qh + go (DbOpen (qh, tableschanged)) = do + tableschanged' <- if isDbTableChanged tableschanged t + then do + liftIO $ H.flushDbQueue qh + return mempty + else return tableschanged v <- a (SQL.ReadHandle qh) - return (v, st) + return (v, DbOpen (qh, tableschanged')) go DbClosed = do - st' <- openDb False DbClosed - v <- case st' of - (DbOpen qh) -> a (SQL.ReadHandle qh) + st <- openDb False DbClosed + v <- case st of + (DbOpen (qh, _)) -> a (SQL.ReadHandle qh) _ -> return mempty - return (v, st') + return (v, st) -runReaderIO :: Monoid v => (SQL.ReadHandle -> IO v) -> Annex v -runReaderIO a = runReader (liftIO . a) +runReaderIO :: Monoid v => DbTable -> (SQL.ReadHandle -> IO v) -> Annex v +runReaderIO t a = runReader t (liftIO . a) {- Runs an action that writes to the database. Typically this is used to - queue changes, which will be flushed at a later point. - - The database is created if it doesn't exist yet. -} -runWriter :: (SQL.WriteHandle -> Annex ()) -> Annex () -runWriter a = do +runWriter :: DbTable -> (SQL.WriteHandle -> Annex ()) -> Annex () +runWriter t a = do h <- Annex.getRead Annex.keysdbhandle withDbState h go where - go st@(DbOpen qh) = do + go (DbOpen (qh, tableschanged)) = do v <- a (SQL.WriteHandle qh) - return (v, st) + return (v, DbOpen (qh, addDbTable tableschanged t)) go st = do st' <- openDb True st v <- case st' of - DbOpen qh -> a (SQL.WriteHandle qh) + DbOpen (qh, _) -> a (SQL.WriteHandle qh) _ -> error "internal" return (v, st') -runWriterIO :: (SQL.WriteHandle -> IO ()) -> Annex () -runWriterIO a = runWriter (liftIO . a) +runWriterIO :: DbTable -> (SQL.WriteHandle -> IO ()) -> Annex () +runWriterIO t a = runWriter t (liftIO . a) {- Opens the database, creating it if it doesn't exist yet. - @@ -139,8 +145,8 @@ openDb forwrite _ = do open db = do qh <- liftIO $ H.openDbQueue db SQL.containedTable - reconcileStaged qh - return $ DbOpen qh + tc <- reconcileStaged qh + return $ DbOpen (qh, tc) {- Closes the database if it was open. Any writes will be flushed to it. - @@ -155,12 +161,13 @@ flushDb :: Annex () flushDb = liftIO . flushDbQueue =<< Annex.getRead Annex.keysdbhandle addAssociatedFile :: Key -> TopFilePath -> Annex () -addAssociatedFile k f = runWriterIO $ SQL.addAssociatedFile k f +addAssociatedFile k f = runWriterIO AssociatedTable $ SQL.addAssociatedFile k f {- Note that the files returned were once associated with the key, but - some of them may not be any longer. -} getAssociatedFiles :: Key -> Annex [TopFilePath] -getAssociatedFiles k = emptyWhenBare $ runReaderIO $ SQL.getAssociatedFiles k +getAssociatedFiles k = emptyWhenBare $ runReaderIO AssociatedTable $ + SQL.getAssociatedFiles k {- Queries for associated files never return anything when in a bare - repository, since without a work tree there can be no associated files. @@ -186,10 +193,12 @@ getAssociatedFilesIncluding afile k = emptyWhenBare $ do {- Gets any keys that are on record as having a particular associated file. - (Should be one or none but the database doesn't enforce that.) -} getAssociatedKey :: TopFilePath -> Annex [Key] -getAssociatedKey f = emptyWhenBare $ runReaderIO $ SQL.getAssociatedKey f +getAssociatedKey f = emptyWhenBare $ runReaderIO AssociatedTable $ + SQL.getAssociatedKey f removeAssociatedFile :: Key -> TopFilePath -> Annex () -removeAssociatedFile k = runWriterIO . SQL.removeAssociatedFile k +removeAssociatedFile k = runWriterIO AssociatedTable . + SQL.removeAssociatedFile k {- Stats the files, and stores their InodeCaches. -} storeInodeCaches :: Key -> [RawFilePath] -> Annex () @@ -198,7 +207,7 @@ storeInodeCaches k fs = withTSDelta $ \d -> =<< liftIO (mapM (\f -> genInodeCache f d) fs) addInodeCaches :: Key -> [InodeCache] -> Annex () -addInodeCaches k is = runWriterIO $ SQL.addInodeCaches k is +addInodeCaches k is = runWriterIO ContentTable $ SQL.addInodeCaches k is {- A key may have multiple InodeCaches; one for the annex object, and one - for each pointer file that is a copy of it. @@ -210,18 +219,19 @@ addInodeCaches k is = runWriterIO $ SQL.addInodeCaches k is - for pointer files, but none recorded for the annex object. -} getInodeCaches :: Key -> Annex [InodeCache] -getInodeCaches = runReaderIO . SQL.getInodeCaches +getInodeCaches = runReaderIO ContentTable . SQL.getInodeCaches {- Remove all inodes cached for a key. -} removeInodeCaches :: Key -> Annex () -removeInodeCaches = runWriterIO . SQL.removeInodeCaches +removeInodeCaches = runWriterIO ContentTable . SQL.removeInodeCaches {- Remove cached inodes, for any key. -} removeInodeCache :: InodeCache -> Annex () -removeInodeCache = runWriterIO . SQL.removeInodeCache +removeInodeCache = runWriterIO ContentTable . SQL.removeInodeCache isInodeKnown :: InodeCache -> SentinalStatus -> Annex Bool -isInodeKnown i s = or <$> runReaderIO ((:[]) <$$> SQL.isInodeKnown i s) +isInodeKnown i s = or <$> runReaderIO ContentTable + ((:[]) <$$> SQL.isInodeKnown i s) {- Looks at staged changes to annexed files, and updates the keys database, - so that its information is consistent with the state of the repository. @@ -250,18 +260,21 @@ isInodeKnown i s = or <$> runReaderIO ((:[]) <$$> SQL.isInodeKnown i s) - So when using getAssociatedFiles, have to make sure the file still - is an associated file. -} -reconcileStaged :: H.DbQueue -> Annex () -reconcileStaged qh = unlessM (Git.Config.isBare <$> gitRepo) $ do - gitindex <- inRepo currentIndexFile - indexcache <- fromRawFilePath <$> calcRepo' gitAnnexKeysDbIndexCache - withTSDelta (liftIO . genInodeCache gitindex) >>= \case - Just cur -> readindexcache indexcache >>= \case - Nothing -> go cur indexcache =<< getindextree - Just prev -> ifM (compareInodeCaches prev cur) - ( noop - , go cur indexcache =<< getindextree - ) - Nothing -> noop +reconcileStaged :: H.DbQueue -> Annex DbTablesChanged +reconcileStaged qh = ifM (Git.Config.isBare <$> gitRepo) + ( return mempty + , do + gitindex <- inRepo currentIndexFile + indexcache <- fromRawFilePath <$> calcRepo' gitAnnexKeysDbIndexCache + withTSDelta (liftIO . genInodeCache gitindex) >>= \case + Just cur -> readindexcache indexcache >>= \case + Nothing -> go cur indexcache =<< getindextree + Just prev -> ifM (compareInodeCaches prev cur) + ( return mempty + , go cur indexcache =<< getindextree + ) + Nothing -> return mempty + ) where lastindexref = Ref "refs/annex/last-index" @@ -286,6 +299,7 @@ reconcileStaged qh = unlessM (Git.Config.isBare <$> gitRepo) $ do -- against next time. inRepo $ update' lastindexref newtree fastDebug "Database.Keys" "reconcileStaged end" + return (DbTablesChanged True True) -- git write-tree will fail if the index is locked or when there is -- a merge conflict. To get up-to-date with the current index, -- diff --staged with the old index tree. The current index tree @@ -307,6 +321,7 @@ reconcileStaged qh = unlessM (Git.Config.isBare <$> gitRepo) $ do void $ updatetodiff g Nothing "--staged" (procmergeconflictdiff mdfeeder) fastDebug "Database.Keys" "reconcileStaged end" + return (DbTablesChanged True True) updatetodiff g old new processor = do (l, cleanup) <- pipeNullSplit' (diff old new) g @@ -482,3 +497,9 @@ reconcileStaged qh = unlessM (Git.Config.isBare <$> gitRepo) $ do largediff :: Int largediff = 1000 +{- Normally the keys database is updated incrementally when opened, + - by reconcileStaged. Calling this explicitly allows running the + - update at an earlier point. + -} +updateDatabase :: Annex () +updateDatabase = runWriter ContentTable (const noop) diff --git a/Database/Keys/Handle.hs b/Database/Keys/Handle.hs index ed7cc6e6c8..1e4a85427b 100644 --- a/Database/Keys/Handle.hs +++ b/Database/Keys/Handle.hs @@ -15,6 +15,7 @@ module Database.Keys.Handle ( ) where import qualified Database.Queue as H +import Database.Keys.Tables import Utility.Exception import Utility.DebugLocks @@ -29,7 +30,7 @@ newtype DbHandle = DbHandle (MVar DbState) -- The database can be closed or open, but it also may have been -- tried to open (for read) and didn't exist yet or is not readable. -data DbState = DbClosed | DbOpen H.DbQueue | DbUnavailable +data DbState = DbClosed | DbOpen (H.DbQueue, DbTablesChanged) | DbUnavailable newDbHandle :: IO DbHandle newDbHandle = DbHandle <$> newMVar DbClosed @@ -52,15 +53,17 @@ withDbState (DbHandle mvar) a = do return v flushDbQueue :: DbHandle -> IO () -flushDbQueue (DbHandle mvar) = go =<< debugLocks (readMVar mvar) +flushDbQueue h = withDbState h go where - go (DbOpen qh) = H.flushDbQueue qh - go _ = return () + go (DbOpen (qh, _)) = do + H.flushDbQueue qh + return ((), DbOpen (qh, mempty)) + go st = return ((), st) closeDbHandle :: DbHandle -> IO () closeDbHandle h = withDbState h go where - go (DbOpen qh) = do + go (DbOpen (qh, _)) = do H.closeDbQueue qh return ((), DbClosed) go st = return ((), st) diff --git a/Database/Keys/Tables.hs b/Database/Keys/Tables.hs new file mode 100644 index 0000000000..ab6a4fb757 --- /dev/null +++ b/Database/Keys/Tables.hs @@ -0,0 +1,38 @@ +{- Keeping track of which tables in the keys database have changed + - + - Copyright 2022 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +module Database.Keys.Tables where + +import Data.Monoid +import qualified Data.Semigroup as Sem +import Prelude + +data DbTable = AssociatedTable | ContentTable + deriving (Eq, Show) + +data DbTablesChanged = DbTablesChanged + { associatedTable :: Bool + , contentTable :: Bool + } + deriving (Show) + +instance Sem.Semigroup DbTablesChanged where + a <> b = DbTablesChanged + { associatedTable = associatedTable a || associatedTable b + , contentTable = contentTable a || contentTable b + } + +instance Monoid DbTablesChanged where + mempty = DbTablesChanged False False + +addDbTable :: DbTablesChanged -> DbTable -> DbTablesChanged +addDbTable ts AssociatedTable = ts { associatedTable = True } +addDbTable ts ContentTable = ts { contentTable = True } + +isDbTableChanged :: DbTablesChanged -> DbTable -> Bool +isDbTableChanged ts AssociatedTable = associatedTable ts +isDbTableChanged ts ContentTable = contentTable ts diff --git a/Upgrade/V7.hs b/Upgrade/V7.hs index 28e808b309..219c11ed14 100644 --- a/Upgrade/V7.hs +++ b/Upgrade/V7.hs @@ -16,6 +16,7 @@ import Types.Upgrade import Annex.CatFile import qualified Database.Keys import qualified Database.Keys.SQL +import Database.Keys.Tables import qualified Git.LsFiles as LsFiles import qualified Git import Git.FilePath @@ -114,8 +115,9 @@ populateKeysDb = unlessM isBareRepo $ do Nothing -> noop Just k -> do topf <- inRepo $ toTopFilePath $ toRawFilePath f - Database.Keys.runWriter $ \h -> liftIO $ do + Database.Keys.runWriter AssociatedTable $ \h -> liftIO $ Database.Keys.SQL.addAssociatedFile k topf h + Database.Keys.runWriter ContentTable $ \h -> liftIO $ Database.Keys.SQL.addInodeCaches k [ic] h liftIO $ void cleanup Database.Keys.closeDb diff --git a/git-annex.cabal b/git-annex.cabal index ce0cd60ea7..c60e627682 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -830,6 +830,7 @@ Executable git-annex Database.Init Database.Keys Database.Keys.Handle + Database.Keys.Tables Database.Keys.SQL Database.Queue Database.Types From d5cd1de2803725308eff1778cb053d03b52b5abf Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 12 Oct 2022 15:53:56 -0400 Subject: [PATCH 08/31] update and open a todo about something I'm pondering --- ..._d02b310b007b304e0eda23bd5e565851._comment | 16 +++++++++++++++ .../withExclusiveLock_blocking_issue.mdwn | 20 +++++++++++++++++++ 2 files changed, 36 insertions(+) create mode 100644 doc/bugs/get_is_busy_doing_nothing/comment_26_d02b310b007b304e0eda23bd5e565851._comment create mode 100644 doc/todo/withExclusiveLock_blocking_issue.mdwn diff --git a/doc/bugs/get_is_busy_doing_nothing/comment_26_d02b310b007b304e0eda23bd5e565851._comment b/doc/bugs/get_is_busy_doing_nothing/comment_26_d02b310b007b304e0eda23bd5e565851._comment new file mode 100644 index 0000000000..3aafff13ff --- /dev/null +++ b/doc/bugs/get_is_busy_doing_nothing/comment_26_d02b310b007b304e0eda23bd5e565851._comment @@ -0,0 +1,16 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 26""" + date="2022-10-12T19:34:05Z" + content=""" +I've avoided the excessive sqlite database writes. Which doubled the speed +of git-annex in some circumstances, wow! + +@yoh see if it still happens once you upgrade to a git-annex +with [[!commit 6fbd337e34cd62881ed5e21616e9c00d7bf35378]]. + +It would be possible for git-annex to do its own locking around writes +to the sqlite database. That would surely avoid any problem that sqlite might +have that would cause ErrorBusy. I want to think some about +[[todo/withExclusiveLock_blocking_issue]] first. +"""]] diff --git a/doc/todo/withExclusiveLock_blocking_issue.mdwn b/doc/todo/withExclusiveLock_blocking_issue.mdwn new file mode 100644 index 0000000000..6915015da3 --- /dev/null +++ b/doc/todo/withExclusiveLock_blocking_issue.mdwn @@ -0,0 +1,20 @@ +Some parts of git-annex use withExclusiveLock or otherwise wait for an +exclusive lock and hold it while performing an operation. Now consider what +happens if the git-annex process is suspended. Another git-annex process +that is running and that blocks on the same lock will stall forever, until +the git-annex process is resumed. + +These time windows tend to be small, but may not always be. + +Would it be better for the second git-annex process, rather than hanging +indefinitely, to try to take the lock a few times over a few seconds, and +then error out? The risk with doing that is, when 2 concurrent git-annex +processes are running and taking the locks repeatedly, one might get +unlucky, fail to take the lock, and error out, when waiting a little longer +would have succeeded, because the other process is not holding the lock all +the time. + +Is there any better way git-annex could handle this? Is it a significant +problem at all? I don't think I've ever seen it happen, but I rarely ^Z +git-annex either. How do other programs handle this, if at all? +--[[Joey]] From a8a5d444c528ae2de9f130583443b9b41116ff3f Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 12 Oct 2022 16:13:30 -0400 Subject: [PATCH 09/31] test v10 --- Test.hs | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/Test.hs b/Test.hs index 78eeef882e..cb0c515ee9 100644 --- a/Test.hs +++ b/Test.hs @@ -130,9 +130,9 @@ tests n crippledfilesystem adjustedbranchok opts = : concatMap mkrepotests testmodes where testmodes = catMaybes - [ canadjust ("v8 adjusted unlocked branch", (testMode opts (RepoVersion 8)) { adjustedUnlockedBranch = True }) - , unlesscrippled ("v8 unlocked", (testMode opts (RepoVersion 8)) { unlockedFiles = True }) - , unlesscrippled ("v8 locked", testMode opts (RepoVersion 8)) + [ canadjust ("v10 adjusted unlocked branch", (testMode opts (RepoVersion 10)) { adjustedUnlockedBranch = True }) + , unlesscrippled ("v10 unlocked", (testMode opts (RepoVersion 10)) { unlockedFiles = True }) + , unlesscrippled ("v10 locked", testMode opts (RepoVersion 10)) ] remotetestmode = testMode opts (RepoVersion 8) unlesscrippled v From dbca1781d9779a71a7846b77efae2280254e9393 Mon Sep 17 00:00:00 2001 From: "asakurareiko@f3d908c71c009580228b264f63f21c7274df7476" Date: Thu, 13 Oct 2022 15:29:12 +0000 Subject: [PATCH 10/31] Update WSL1 tips --- .../Using_git-annex_on_NTFS_with_WSL1.mdwn | 126 +++++++++++++++++- 1 file changed, 119 insertions(+), 7 deletions(-) diff --git a/doc/tips/Using_git-annex_on_NTFS_with_WSL1.mdwn b/doc/tips/Using_git-annex_on_NTFS_with_WSL1.mdwn index 683361bb02..1cda5aef6d 100644 --- a/doc/tips/Using_git-annex_on_NTFS_with_WSL1.mdwn +++ b/doc/tips/Using_git-annex_on_NTFS_with_WSL1.mdwn @@ -3,7 +3,7 @@ The following steps are tested on Windows 10 21h1 with Ubuntu 20 and are designe ** Limitations ** * The repository must be created with `annex.tune.objecthashlower=true`. -* `git annex adjust --unlock` will not work. Unlocked files will work most of the time. Avoid `annex.addunlocked=true` because it is likely to not work. +* `git annex adjust --unlock` will not work. Avoid `annex.addunlocked=true` and do not add multiple unlocked files to the index. **Setup** @@ -13,7 +13,7 @@ The following steps are tested on Windows 10 21h1 with Ubuntu 20 and are designe * `git config annex.sshcaching false` * `git annex init` * git-annex should not detect the filesystem as crippled but now set `git config annex.crippledfilesystem true` -* Safety of locked files will require these settings and scripts and the patch below. +* Safety of locked files will require these settings and scripts. * `git config annex.freezecontent-command 'wsl-freezecontent %path'` * `git config annex.thawcontent-command 'wsl-thawcontent %path'` @@ -71,8 +71,123 @@ fi ** Patches ** +These patches may introduce problems when there are multiple independent processes writing to the repository. Use at your own risk. +
-This patch allows `git annex fix` on a crippled file system. +Create symlink to annexed objects in-place. The add, addunused, lock, and rekey commands will create symlinks in-place instead of in a temporary directory. + +``` +From d871289d22d2e86cb62776841343baf6c0f83484 Mon Sep 17 00:00:00 2001 +From: Reiko Asakura +Date: Wed, 12 Oct 2022 17:13:55 -0400 +Subject: [PATCH 2/3] Create symlink to annexed objects in-place + +--- + Annex/Ingest.hs | 2 +- + 1 file changed, 1 insertion(+), 1 deletion(-) + +diff --git a/Annex/Ingest.hs b/Annex/Ingest.hs +index 89dc8acea..ec35fb15d 100644 +--- a/Annex/Ingest.hs ++++ b/Annex/Ingest.hs +@@ -301,7 +301,7 @@ restoreFile file key e = do + makeLink :: RawFilePath -> Key -> Maybe InodeCache -> Annex LinkTarget + makeLink file key mcache = flip catchNonAsync (restoreFile file key) $ do + l <- calcRepo $ gitAnnexLink file key +- replaceWorkTreeFile file' $ makeAnnexLink l . toRawFilePath ++ makeAnnexLink l file + + -- touch symlink to have same time as the original file, + -- as provided in the InodeCache +-- +2.30.2 + +``` +
+ +
+Recreate symlinks after remote transfer. The copy, move, get, sync commands will recreate the symlink after transferring the file from a remote. + +``` +From 82ea0ffb02fbc5e4003a466a216c8d1030b7d70a Mon Sep 17 00:00:00 2001 +From: Reiko Asakura +Date: Wed, 12 Oct 2022 19:10:07 -0400 +Subject: [PATCH 3/3] Recreate symlinks after remote transfer + +--- + Annex/Link.hs | 7 +++++++ + Command/Get.hs | 3 ++- + Command/Move.hs | 3 ++- + 3 files changed, 11 insertions(+), 2 deletions(-) + +diff --git a/Annex/Link.hs b/Annex/Link.hs +index 1a344d07e..e0f172d1d 100644 +--- a/Annex/Link.hs ++++ b/Annex/Link.hs +@@ -96,6 +96,13 @@ getAnnexLinkTarget' file coresymlinks = if coresymlinks + then mempty + else s + ++relinkAssociatedFile :: AssociatedFile -> Bool -> Annex () ++relinkAssociatedFile (AssociatedFile (Just file)) True = ++ getAnnexLinkTarget file >>= \case ++ Just target -> makeAnnexLink target file ++ _ -> noop ++relinkAssociatedFile _ _ = noop ++ + makeAnnexLink :: LinkTarget -> RawFilePath -> Annex () + makeAnnexLink = makeGitLink + +diff --git a/Command/Get.hs b/Command/Get.hs +index a25fd8bf1..e16362f79 100644 +--- a/Command/Get.hs ++++ b/Command/Get.hs +@@ -12,6 +12,7 @@ import qualified Remote + import Annex.Transfer + import Annex.NumCopies + import Annex.Wanted ++import Annex.Link + import qualified Command.Move + + cmd :: Command +@@ -95,7 +96,7 @@ getKey' key afile = dispatch + showNote "not available" + showlocs [] + return False +- dispatch remotes = notifyTransfer Download afile $ \witness -> do ++ dispatch remotes = observe (relinkAssociatedFile afile) $ notifyTransfer Download afile $ \witness -> do + ok <- pickRemote remotes $ \r -> ifM (probablyPresent r) + ( docopy r witness + , return False +diff --git a/Command/Move.hs b/Command/Move.hs +index 55fed5c37..d733a7cbb 100644 +--- a/Command/Move.hs ++++ b/Command/Move.hs +@@ -20,6 +20,7 @@ import Logs.Presence + import Logs.Trust + import Logs.File + import Annex.NumCopies ++import Annex.Link + + import qualified Data.ByteString.Char8 as B8 + import qualified Data.ByteString.Lazy as L +@@ -241,7 +242,7 @@ fromPerform src removewhen key afile = do + then dispatch removewhen deststartedwithcopy True + else dispatch removewhen deststartedwithcopy =<< get + where +- get = notifyTransfer Download afile $ ++ get = observe (relinkAssociatedFile afile) $ notifyTransfer Download afile $ + download src key afile stdRetry + + dispatch _ deststartedwithcopy False = do +-- +2.30.2 + +``` +
+ +
+Allow git-annex fix on crippled filesystem ``` From 65fe6e362dfbf2f54c8da5ca17c59af26de5ff83 Mon Sep 17 00:00:00 2001 @@ -105,7 +220,7 @@ index 39853c894..2d66c1461 100644 ** Usage tips ** -* WSL1 will not create symlinks that work in Windows if created before the target file exists, such as after `git annex add` or `git annex get`. This can be fixed by recreating them with any method, such as delete them and `git checkout`. +* WSL1 will not create symlinks that work in Windows if created before the target file exists. This can be fixed by recreating them with any method, such as delete them and `git checkout`. Also see the above patches to make git-annex automatically recreate symlinks.
Sample script to recreate all symlinks under the current directory @@ -129,10 +244,7 @@ do(pathlib.Path('.')) ```
-* Sometimes there will SQLite errors using multiple jobs but retrying will work most of the time. - ** Related bugs ** * [[bugs/WSL_adjusted_braches__58___smudge_fails_with_sqlite_thread_crashed_-_locking_protocol]] * [[bugs/WSL1__58___git-annex-add_fails_in_DrvFs_filesystem]] -* [[bugs/problems_with_SSH_and_relative_paths]] From 2cad4b95f4f850c01752fb244b1de80ba5bf3d27 Mon Sep 17 00:00:00 2001 From: AlexPraga Date: Fri, 14 Oct 2022 09:28:38 +0000 Subject: [PATCH 11/31] --- doc/forum/Failed_to_push_on_git-lfs.mdwn | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) create mode 100644 doc/forum/Failed_to_push_on_git-lfs.mdwn diff --git a/doc/forum/Failed_to_push_on_git-lfs.mdwn b/doc/forum/Failed_to_push_on_git-lfs.mdwn new file mode 100644 index 0000000000..4cae1c3aa4 --- /dev/null +++ b/doc/forum/Failed_to_push_on_git-lfs.mdwn @@ -0,0 +1,22 @@ +Hi, + +I've setup a remote on Github with LFS enabled. Running `git-annex sync --content` failed to push on the main branch : +``` +pull lfstest +From github.com:myuser/myremote + * branch HEAD -> FETCH_HEAD +ok +push lfstest +Everything up-to-date +To github.com:myuser/myremote.git + ! [rejected] main -> main (non-fast-forward) +error: failed to push some refs to 'github.com:myuser/myremote.git' +hint: Updates were rejected because the tip of your current branch is behind +hint: its remote counterpart. Integrate the remote changes (e.g. +hint: 'git pull ...') before pushing again. +hint: See the 'Note about fast-forwards' in 'git push --help' for details. +ok +``` +Is this an expected behaviour ? Is it possible to correct the push failure ? + +Thans in advance From 7b9e3dc5fb4f21559ed0f27af954e9014ee3e05c Mon Sep 17 00:00:00 2001 From: AlexPraga Date: Fri, 14 Oct 2022 09:32:44 +0000 Subject: [PATCH 12/31] --- doc/forum/Failed_to_push_on_git-lfs.mdwn | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/doc/forum/Failed_to_push_on_git-lfs.mdwn b/doc/forum/Failed_to_push_on_git-lfs.mdwn index 4cae1c3aa4..1d4f407abd 100644 --- a/doc/forum/Failed_to_push_on_git-lfs.mdwn +++ b/doc/forum/Failed_to_push_on_git-lfs.mdwn @@ -1,6 +1,7 @@ Hi, I've setup a remote on Github with LFS enabled. Running `git-annex sync --content` failed to push on the main branch : + ``` pull lfstest From github.com:myuser/myremote @@ -19,4 +20,4 @@ ok ``` Is this an expected behaviour ? Is it possible to correct the push failure ? -Thans in advance +Thansk in advance From f8fe393052be1fb24bb7487a4ef43f8677b10fa1 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 17 Oct 2022 12:37:57 -0400 Subject: [PATCH 13/31] comment --- ...ment_1_f83dbcfdd09de3ce908c0d4a9daef458._comment | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 doc/forum/Failed_to_push_on_git-lfs/comment_1_f83dbcfdd09de3ce908c0d4a9daef458._comment diff --git a/doc/forum/Failed_to_push_on_git-lfs/comment_1_f83dbcfdd09de3ce908c0d4a9daef458._comment b/doc/forum/Failed_to_push_on_git-lfs/comment_1_f83dbcfdd09de3ce908c0d4a9daef458._comment new file mode 100644 index 0000000000..f3c9d06728 --- /dev/null +++ b/doc/forum/Failed_to_push_on_git-lfs/comment_1_f83dbcfdd09de3ce908c0d4a9daef458._comment @@ -0,0 +1,13 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2022-10-17T16:28:48Z" + content=""" +This doesn't involve LFS at all, it's a regular git branch being pushed in +the regular way. So you can certianly solve the problem with some +combination of `git pull`, `git merge`, and `git push`. + +That said, I don't know why `git-annex sync` didn't work in your situation. +I created some test git-lfs repos on github and never saw any difficulty +syncing with them. +"""]] From 0d762acf7ebf2406dc4f0ad167b580e3a3801f1e Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 17 Oct 2022 15:08:54 -0400 Subject: [PATCH 14/31] update comment, probably not a sqlite bug Sqlite's page documenting WAL mode changed in Oct 2016 to mention ways that queries could fail with SQLITE_BUSY. http://web.archive.org/web/20161009044054/http://www.sqlite.org:80/wal.html Probably not cooincidentally, I emailed sqlite-users about such a situation in Feb 2015. https://www.mail-archive.com/sqlite-users@mailinglists.sqlite.org/msg90580.html Noone ever replied to me, but at least now I understand why it does that. Since it's documented now, it's no longer a bug. --- Database/Handle.hs | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/Database/Handle.hs b/Database/Handle.hs index b0c9786974..ce22d0a661 100644 --- a/Database/Handle.hs +++ b/Database/Handle.hs @@ -148,8 +148,8 @@ workerThread db tablename jobs = newconn -- Like runSqlite, but more robust. -- -- New database connections can sometimes take a while to become usable. --- This may be due to WAL mode recovering after a crash, or perhaps a bug --- like described in blob 500f777a6ab6c45ca5f9790e0a63575f8e3cb88f. +-- This may be due to WAL mode recovering after a crash, or perhaps a +-- situation like described in blob 500f777a6ab6c45ca5f9790e0a63575f8e3cb88f. -- So, loop until a select succeeds; once one succeeds the connection will -- stay usable. -- From 3149a1e2fe6bd907cb9e1852451621e8386bf2b2 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 17 Oct 2022 15:56:19 -0400 Subject: [PATCH 15/31] More robust handling of ErrorBusy when writing to sqlite databases While ErrorBusy and other exceptions were caught and the write retried for up to 10 seconds, it was still possible for git-annex to eventually give up and error out without writing to the database. Now it will retry as long as necessary. This does mean that, if one git-annex process is suspended just as sqlite has locked the database for writing, another git-annex that tries to write it it might get stuck retrying forever. But, that could already happen when opening the sqlite database, which retries forever on ErrorBusy. This is an area where git-annex is known to not behave well, there's a todo about the general case of it. Sponsored-by: Dartmouth College's Datalad project --- CHANGELOG | 1 + Database/Handle.hs | 16 ++-- ..._45168f110bded2f8c8f9777e1edda945._comment | 36 ++++++++ .../withExclusiveLock_blocking_issue.mdwn | 89 ++++++++++++++++--- 4 files changed, 120 insertions(+), 22 deletions(-) create mode 100644 doc/bugs/get_is_busy_doing_nothing/comment_27_45168f110bded2f8c8f9777e1edda945._comment diff --git a/CHANGELOG b/CHANGELOG index 6c045d7609..3f118af109 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -12,6 +12,7 @@ git-annex (10.20221004) UNRELEASED; urgency=medium by only asking for files under the prefix. * When importing from versioned remotes, fix tracking of the content of deleted files. + * More robust handling of ErrorBusy when writing to sqlite databases. -- Joey Hess Mon, 03 Oct 2022 13:36:42 -0400 diff --git a/Database/Handle.hs b/Database/Handle.hs index ce22d0a661..283eef4cf2 100644 --- a/Database/Handle.hs +++ b/Database/Handle.hs @@ -82,22 +82,20 @@ queryDb (DbHandle _ jobs) a = do {- Writes a change to the database. - - - Writes can fail if another write is happening concurrently. - - So write failures are caught and retried repeatedly for up to 10 - - seconds, which should avoid all but the most exceptional problems. + - Writes can fail when another write is happening concurrently. + - So write failures are caught and retried repeatedly. -} commitDb :: DbHandle -> SqlPersistM () -> IO () -commitDb h wa = robustly Nothing 100 (commitDb' h wa) +commitDb h wa = robustly (commitDb' h wa) where - robustly :: Maybe SomeException -> Int -> IO (Either SomeException ()) -> IO () - robustly e 0 _ = error $ "failed to commit changes to sqlite database: " ++ show e - robustly _ n a = do + robustly :: IO (Either SomeException ()) -> IO () + robustly a = do r <- a case r of Right _ -> return () - Left e -> do + Left _ -> do threadDelay 100000 -- 1/10th second - robustly (Just e) (n-1) a + robustly a commitDb' :: DbHandle -> SqlPersistM () -> IO (Either SomeException ()) commitDb' (DbHandle _ jobs) a = do diff --git a/doc/bugs/get_is_busy_doing_nothing/comment_27_45168f110bded2f8c8f9777e1edda945._comment b/doc/bugs/get_is_busy_doing_nothing/comment_27_45168f110bded2f8c8f9777e1edda945._comment new file mode 100644 index 0000000000..55307eb53d --- /dev/null +++ b/doc/bugs/get_is_busy_doing_nothing/comment_27_45168f110bded2f8c8f9777e1edda945._comment @@ -0,0 +1,36 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 27""" + date="2022-10-17T18:49:47Z" + content=""" +[[todo/withExclusiveLock_blocking_issue]] does not have to be solved for +every other lock in git-annex first. Since the sqlite database lock would +be a new lock file, it could use the mtime update method described in there +without backwards compatibility issues. + +ErrorBusy can also occur when opening a new database connection for read, +but it retries that as often as necessary. Which does mean that suspending +git-annex at just the wrong time can already cause other git-annex +processes to stall forever waiting to read from the database. + +So, in a way, it would be ok for write to also retry each time it gets +ErrorBusy, rather than the current limited number of retries. If that does +cause git-annex to block when another git-annex process is suspended, it +would not be a new behavior. + +Also, the mtime file method described in +[[todo/withExclusiveLock_blocking_issue]] could be used without a lock file +in order to detect when a suspended process is causing ErrorBusy. And can +avoid that situation for both writes and reads. + +So, plan: + +1. Retry forever on ErrorBusy when writing to sqlite database. + (I've made this change now... So I think probably this bug can't + occur any longer.) +2. While running opensettle and ChangeJob, have a background thread that + periodically updates a mtime file. +3. If ErrorBusy is received repeatedly for some amount of time, + check the mtime file. If it's not being updated, give up, since + a suspended git-annex process apparently has the sqlite database locked. +"""]] diff --git a/doc/todo/withExclusiveLock_blocking_issue.mdwn b/doc/todo/withExclusiveLock_blocking_issue.mdwn index 6915015da3..32c750ed7b 100644 --- a/doc/todo/withExclusiveLock_blocking_issue.mdwn +++ b/doc/todo/withExclusiveLock_blocking_issue.mdwn @@ -1,20 +1,83 @@ -Some parts of git-annex use withExclusiveLock or otherwise wait for an -exclusive lock and hold it while performing an operation. Now consider what -happens if the git-annex process is suspended. Another git-annex process -that is running and that blocks on the same lock will stall forever, until -the git-annex process is resumed. +Some parts of git-annex wait for an exclusive lock, and once they take it, +hold it while performing an operation. Now consider what happens if the +git-annex process is suspended. Another git-annex process that is running +and that waits to take the same exclusive lock (or a shared lock of the +same file) will stall forever, until the git-annex process is resumed. These time windows tend to be small, but may not always be. -Would it be better for the second git-annex process, rather than hanging -indefinitely, to try to take the lock a few times over a few seconds, and -then error out? The risk with doing that is, when 2 concurrent git-annex -processes are running and taking the locks repeatedly, one might get -unlucky, fail to take the lock, and error out, when waiting a little longer -would have succeeded, because the other process is not holding the lock all -the time. - Is there any better way git-annex could handle this? Is it a significant problem at all? I don't think I've ever seen it happen, but I rarely ^Z git-annex either. How do other programs handle this, if at all? --[[Joey]] + +---- + +Would it be better for the second git-annex process, rather than hanging +indefinitely, to timeout after a few seconds? + +But how many seconds? What if the system is under heavy load? + +> What could be done is, update the lock's file's mtime after successfully +> taking the lock. Then, as long as the mtime is advancing, some other +> process is actively using it, and it's ok for our process to wait +> longer. +> +> (Updating the mtime would be a problem when locking annex object files +> in v9 and earlier. Luckily, that locking is not done with a blocking +> lock anyway.) + +> If the lock file's mtime is being checked, the process that is +> blocking with the lock held could periodically update the mtime. +> A background thread could manage that. If that's done every ten seconds, +> then an mtime more than 20 seconds old indicates that the lock is +> held by a suspended process. So git-annex would stall for up to 20-30 +> seconds before erroring out when a lock is held by a suspended process. +> That seems acceptible, it doesn't need to deal with this situation +> instantly, it just needs to not block indefinitely. And updating the +> mtime every 10 seconds should not be too much IO. +> +> When an old version of git-annex has the lock held, it won't be updating +> the mtime. So if it takes longer than 10 seconds to do the operation with +> the lock held, a new version may complain that it's suspended when it's +> really not. This could be avoided by checking what process holds the +> lock, and whether it's suspended. But probably 10 seconds is enough +> time for all the operations git-annex takes a blocking lock for +> currently to finish, and if so we don't need to worry about this situation? +> +> > Unfortunately not: importKeys takes an exclusive lock and holds it while +> > downloading all the content! This seems like a bug though, because it can +> > cause other git-annex processes that are eg storing content in a remote +> > to block for a long time. +> > +> > Another one is Database.Export.writeLockDbWhile, which takes an +> > exclusive lock while running eg, Command.Export.changeExport, +> > which may sometimes need to do a lot of work. +> > +> > Another one is Annex.Queue.flush, which probably mostly runs in under +> > 10 seconds, but maybe not always, and when annex.queuesize is changed, +> > could surely take longer. +> +> To avoid problems when old git-annex's are also being used, it could +> update and check the mtime of a different file than the lock file. +> +> Start by trying to take the lock for up to 10 seconds. If it takes the +> lock, create the mtime file and start a thread that updates the mtime +> every 10 seconds until the lock is closed, and delete the mtime file +> before closing the lock handle. +> +> When it times out taking the lock, if the mtime file does not exist, an +> old git-annex has the lock; if the mtime file does exist, then check +> if its timestamp has advanced; if not then a new git-annex has the lock +> and is suspended and it can error out. +> +> Oops: There's a race in the method above; a timeout may occur +> right when the other process has taken the lock, but has not updated +> the mtime file yet. Then that process would incorrectly be treated +> as an old git-annex process. +> +> So: To support old git-annex, it seems it will need to check, when the +> lock is held, what process has the lock. And then check if that process +> is suspended or not. Which means looking in /proc. Ugh. +> +> Or: Change to checking lock mtimes only in git-annex v11.. From cde2e6110582b8cefbfadcc74c1355c2c1bff81b Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 18 Oct 2022 15:47:20 -0400 Subject: [PATCH 16/31] improve sqlite retrying behavior Avoid hanging when a suspended git-annex process is keeping a sqlite database locked. Sponsored-by: Dartmouth College's Datalad project --- CHANGELOG | 2 + Database/Handle.hs | 228 +++++++++++------- ..._45168f110bded2f8c8f9777e1edda945._comment | 35 +-- 3 files changed, 155 insertions(+), 110 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 3f118af109..58972284e3 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -13,6 +13,8 @@ git-annex (10.20221004) UNRELEASED; urgency=medium * When importing from versioned remotes, fix tracking of the content of deleted files. * More robust handling of ErrorBusy when writing to sqlite databases. + * Avoid hanging when a suspended git-annex process is keeping a sqlite + database locked. -- Joey Hess Mon, 03 Oct 2022 13:36:42 -0400 diff --git a/Database/Handle.hs b/Database/Handle.hs index 283eef4cf2..84c7623bbf 100644 --- a/Database/Handle.hs +++ b/Database/Handle.hs @@ -1,6 +1,6 @@ {- Persistent sqlite database handles. - - - Copyright 2015-2019 Joey Hess + - Copyright 2015-2022 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -21,6 +21,7 @@ import Utility.Exception import Utility.FileSystemEncoding import Utility.Debug import Utility.DebugLocks +import Utility.InodeCache import Database.Persist.Sqlite import qualified Database.Sqlite as Sqlite @@ -38,7 +39,7 @@ import System.IO {- A DbHandle is a reference to a worker thread that communicates with - the database. It has a MVar which Jobs are submitted to. -} -data DbHandle = DbHandle (Async ()) (MVar Job) +data DbHandle = DbHandle RawFilePath (Async ()) (MVar Job) {- Name of a table that should exist once the database is initialized. -} type TableName = String @@ -48,17 +49,17 @@ type TableName = String openDb :: RawFilePath -> TableName -> IO DbHandle openDb db tablename = do jobs <- newEmptyMVar - worker <- async (workerThread (T.pack (fromRawFilePath db)) tablename jobs) + worker <- async (workerThread db tablename jobs) -- work around https://github.com/yesodweb/persistent/issues/474 liftIO $ fileEncoding stderr - return $ DbHandle worker jobs + return $ DbHandle db worker jobs {- This is optional; when the DbHandle gets garbage collected it will - auto-close. -} closeDb :: DbHandle -> IO () -closeDb (DbHandle worker jobs) = do +closeDb (DbHandle _db worker jobs) = do debugLocks $ putMVar jobs CloseJob wait worker @@ -73,7 +74,7 @@ closeDb (DbHandle worker jobs) = do - it is able to run. -} queryDb :: DbHandle -> SqlPersistM a -> IO a -queryDb (DbHandle _ jobs) a = do +queryDb (DbHandle _db _ jobs) a = do res <- newEmptyMVar putMVar jobs $ QueryJob $ debugLocks $ liftIO . putMVar res =<< tryNonAsync a @@ -83,22 +84,31 @@ queryDb (DbHandle _ jobs) a = do {- Writes a change to the database. - - Writes can fail when another write is happening concurrently. - - So write failures are caught and retried repeatedly. + - So write failures are caught and retried. + - + - Retries repeatedly for up to 60 seconds. Part that point, it continues + - retrying only if the database shows signs of being modified by another + - process at least once each 30 seconds. -} commitDb :: DbHandle -> SqlPersistM () -> IO () -commitDb h wa = robustly (commitDb' h wa) +commitDb h@(DbHandle db _ _) wa = + robustly (commitDb' h wa) maxretries emptyDatabaseInodeCache where - robustly :: IO (Either SomeException ()) -> IO () - robustly a = do + robustly a retries ic = do r <- a case r of Right _ -> return () - Left _ -> do - threadDelay 100000 -- 1/10th second - robustly a + Left err -> do + threadDelay briefdelay + retryHelper "write to" err maxretries db retries ic $ + robustly a + + briefdelay = 100000 -- 1/10th second + + maxretries = 300 :: Int -- 30 seconds of briefdelay commitDb' :: DbHandle -> SqlPersistM () -> IO (Either SomeException ()) -commitDb' (DbHandle _ jobs) a = do +commitDb' (DbHandle _ _ jobs) a = do debug "Database.Handle" "commitDb start" res <- newEmptyMVar putMVar jobs $ ChangeJob $ @@ -115,7 +125,7 @@ data Job | ChangeJob (SqlPersistM ()) | CloseJob -workerThread :: T.Text -> TableName -> MVar Job -> IO () +workerThread :: RawFilePath -> TableName -> MVar Job -> IO () workerThread db tablename jobs = newconn where newconn = do @@ -142,45 +152,47 @@ workerThread db tablename jobs = newconn getjob :: IO (Either BlockedIndefinitelyOnMVar Job) getjob = try $ takeMVar jobs - --- Like runSqlite, but more robust. --- --- New database connections can sometimes take a while to become usable. --- This may be due to WAL mode recovering after a crash, or perhaps a --- situation like described in blob 500f777a6ab6c45ca5f9790e0a63575f8e3cb88f. --- So, loop until a select succeeds; once one succeeds the connection will --- stay usable. --- --- And sqlite sometimes throws ErrorIO when there's not really an IO problem, --- but perhaps just a short read(). That's caught and retried several times. -runSqliteRobustly :: TableName -> T.Text -> (SqlPersistM a) -> IO a + +{- Like runSqlite, but more robust. + - + - New database connections can sometimes take a while to become usable, + - and selects will fail with ErrorBusy in the meantime. This may be due to + - WAL mode recovering after a crash, or a concurrent writer. + - So, wait until a select succeeds; once one succeeds the connection will + - stay usable. + - + - Also sqlite sometimes throws ErrorIO when there's not really an IO + - problem, but perhaps just a short read(). So also retry on ErrorIO. + - + - Retries repeatedly for up to 60 seconds. Part that point, it continues + - retrying only if the database shows signs of being modified by another + - process at least once each 30 seconds. + -} +runSqliteRobustly :: TableName -> RawFilePath -> (SqlPersistM a) -> IO a runSqliteRobustly tablename db a = do - conn <- opensettle maxretries - go conn maxretries + conn <- opensettle maxretries emptyDatabaseInodeCache + go conn maxretries emptyDatabaseInodeCache where - maxretries = 100 :: Int - - rethrow msg e = throwIO $ userError $ show e ++ "(" ++ msg ++ ")" - - go conn retries = do + go conn retries ic = do r <- try $ runResourceT $ runNoLoggingT $ - withSqlConnRobustly (wrapConnection conn) $ + withSqlConnRobustly db (wrapConnection conn) $ runSqlConn a case r of Right v -> return v Left ex@(Sqlite.SqliteException { Sqlite.seError = e }) - | e == Sqlite.ErrorIO -> - let retries' = retries - 1 - in if retries' < 1 - then rethrow "after successful open" ex - else go conn retries' - | otherwise -> rethrow "after successful open" ex + | e == Sqlite.ErrorIO -> do + briefdelay + retryHelper "access" ex maxretries db retries ic $ + go conn + | otherwise -> rethrow $ errmsg "after successful open" ex - opensettle retries = do - conn <- Sqlite.open db - settle conn retries + opensettle retries ic = do + conn <- Sqlite.open tdb + settle conn retries ic - settle conn retries = do + tdb = T.pack (fromRawFilePath db) + + settle conn retries ic = do r <- try $ do stmt <- Sqlite.prepare conn nullselect void $ Sqlite.step stmt @@ -188,26 +200,26 @@ runSqliteRobustly tablename db a = do case r of Right _ -> return conn Left ex@(Sqlite.SqliteException { Sqlite.seError = e }) - | e == Sqlite.ErrorBusy -> do - -- Wait and retry any number of times; it - -- will stop being busy eventually. + | e == Sqlite.ErrorBusy || e == Sqlite.ErrorIO -> do + when (e == Sqlite.ErrorIO) $ + Sqlite.close conn briefdelay - settle conn retries - | e == Sqlite.ErrorIO -> do - -- Could be a real IO error, - -- so don't retry indefinitely. - Sqlite.close conn - briefdelay - let retries' = retries - 1 - if retries' < 1 - then rethrow "while opening database connection" ex - else opensettle retries' - | otherwise -> rethrow "while opening database connection" ex + retryHelper "open" ex maxretries db retries ic $ + if e == Sqlite.ErrorIO + then opensettle + else settle conn + | otherwise -> rethrow $ errmsg "while opening database connection" ex -- This should succeed for any table. nullselect = T.pack $ "SELECT null from " ++ tablename ++ " limit 1" briefdelay = threadDelay 1000 -- 1/1000th second + + maxretries = 30000 :: Int -- 30 seconds of briefdelays + + rethrow = throwIO . userError + + errmsg msg e = show e ++ "(" ++ msg ++ ")" -- Like withSqlConn, but more robust. withSqlConnRobustly @@ -217,45 +229,99 @@ withSqlConnRobustly , BaseBackend backend ~ SqlBackend , BackendCompatible SqlBackend backend ) - => (LogFunc -> IO backend) + => RawFilePath + -> (LogFunc -> IO backend) -> (backend -> m a) -> m a -withSqlConnRobustly open f = do +withSqlConnRobustly db open f = do logFunc <- askLoggerIO withRunInIO $ \run -> bracket (open logFunc) - closeRobustly + (closeRobustly db) (run . f) --- Sqlite can throw ErrorBusy while closing a database; this catches --- the exception and retries. +{- Sqlite can throw ErrorBusy while closing a database; this catches + - the exception and retries. + - + - Retries repeatedly for up to 60 seconds. Part that point, it continues + - retrying only if the database shows signs of being modified by another + - process at least once each 30 seconds. + -} closeRobustly :: (IsPersistBackend backend , BaseBackend backend ~ SqlBackend , BackendCompatible SqlBackend backend ) - => backend + => RawFilePath + -> backend -> IO () -closeRobustly conn = go maxretries briefdelay +closeRobustly db conn = go maxretries emptyDatabaseInodeCache where - briefdelay = 1000 -- 1/1000th second - - -- Try up to 14 times; with the delay doubling each time, - -- the maximum delay before giving up is 16 seconds. - maxretries = 14 :: Int - - go retries delay = do + go retries ic = do r <- try $ close' conn case r of Right () -> return () Left ex@(Sqlite.SqliteException { Sqlite.seError = e }) | e == Sqlite.ErrorBusy -> do - threadDelay delay - let delay' = delay * 2 - let retries' = retries - 1 - if retries' < 1 - then rethrow "while closing database connection" ex - else go retries' delay' - | otherwise -> rethrow "while closing database connection" ex + threadDelay briefdelay + retryHelper "close" ex maxretries db retries ic go + | otherwise -> rethrow $ errmsg "while closing database connection" ex - rethrow msg e = throwIO $ userError $ show e ++ "(" ++ msg ++ ")" + briefdelay = 1000 -- 1/1000th second + + maxretries = 30000 :: Int -- 30 seconds of briefdelays + + rethrow = throwIO . userError + + errmsg msg e = show e ++ "(" ++ msg ++ ")" + +{- Retries a sqlite action repeatedly, but not forever. Detects situations + - when another git-annex process is suspended and has the database locked, + - and eventually gives up. The retries is the current number of retries + - that are left. The maxretries is how many retries to make each time + - the database is seen to have been modified by some other process. + -} +retryHelper + :: Show err + => String + -> err + -> Int + -> RawFilePath + -> Int + -> DatabaseInodeCache + -> (Int -> DatabaseInodeCache -> IO a) + -> IO a +retryHelper action err maxretries db retries ic a = do + let retries' = retries - 1 + if retries' < 1 + then do + ic' <- getDatabaseInodeCache db + if isDatabaseModified ic ic' + then a maxretries ic' + else giveup (databaseAccessStalledMsg action db err) + else a retries' ic + +databaseAccessStalledMsg :: Show err => String -> RawFilePath -> err -> String +databaseAccessStalledMsg action db err = + "Repeatedly unable to " ++ action ++ " sqlite database " ++ fromRawFilePath db + ++ ": " ++ show err ++ ". " + ++ "Perhaps another git-annex process is suspended and is " + ++ "keeping this database locked?" + +data DatabaseInodeCache = DatabaseInodeCache (Maybe InodeCache) (Maybe InodeCache) + +emptyDatabaseInodeCache :: DatabaseInodeCache +emptyDatabaseInodeCache = DatabaseInodeCache Nothing Nothing + +getDatabaseInodeCache :: RawFilePath -> IO DatabaseInodeCache +getDatabaseInodeCache db = DatabaseInodeCache + <$> genInodeCache db noTSDelta + <*> genInodeCache (db <> "-wal") noTSDelta + +isDatabaseModified :: DatabaseInodeCache -> DatabaseInodeCache -> Bool +isDatabaseModified (DatabaseInodeCache a1 b1) (DatabaseInodeCache a2 b2) = + ismodified a1 a2 || ismodified b1 b2 + where + ismodified (Just a) (Just b) = not (compareStrong a b) + ismodified Nothing Nothing = False + ismodified _ _ = True diff --git a/doc/bugs/get_is_busy_doing_nothing/comment_27_45168f110bded2f8c8f9777e1edda945._comment b/doc/bugs/get_is_busy_doing_nothing/comment_27_45168f110bded2f8c8f9777e1edda945._comment index 55307eb53d..c69a88ec49 100644 --- a/doc/bugs/get_is_busy_doing_nothing/comment_27_45168f110bded2f8c8f9777e1edda945._comment +++ b/doc/bugs/get_is_busy_doing_nothing/comment_27_45168f110bded2f8c8f9777e1edda945._comment @@ -3,34 +3,11 @@ subject="""comment 27""" date="2022-10-17T18:49:47Z" content=""" -[[todo/withExclusiveLock_blocking_issue]] does not have to be solved for -every other lock in git-annex first. Since the sqlite database lock would -be a new lock file, it could use the mtime update method described in there -without backwards compatibility issues. +I've made it retry as long as necessary on ErrorBusy, while also noticing +when another process is suspended and has the sqlite database locked, +and avoiding retrying forever in that situation. -ErrorBusy can also occur when opening a new database connection for read, -but it retries that as often as necessary. Which does mean that suspending -git-annex at just the wrong time can already cause other git-annex -processes to stall forever waiting to read from the database. - -So, in a way, it would be ok for write to also retry each time it gets -ErrorBusy, rather than the current limited number of retries. If that does -cause git-annex to block when another git-annex process is suspended, it -would not be a new behavior. - -Also, the mtime file method described in -[[todo/withExclusiveLock_blocking_issue]] could be used without a lock file -in order to detect when a suspended process is causing ErrorBusy. And can -avoid that situation for both writes and reads. - -So, plan: - -1. Retry forever on ErrorBusy when writing to sqlite database. - (I've made this change now... So I think probably this bug can't - occur any longer.) -2. While running opensettle and ChangeJob, have a background thread that - periodically updates a mtime file. -3. If ErrorBusy is received repeatedly for some amount of time, - check the mtime file. If it's not being updated, give up, since - a suspended git-annex process apparently has the sqlite database locked. +This seems to be as far as I can take this bug report, I don't know +100% for sure if I've fixed it, but git-annex's behavior should certainly +be improved. """]] From e4355a6f33ac37da62752abafa8ee6126935d1a6 Mon Sep 17 00:00:00 2001 From: AlexPraga Date: Sat, 22 Oct 2022 15:12:26 +0000 Subject: [PATCH 17/31] Added a comment --- .../comment_2_52051b251bea7a5ed452769d0320602a._comment | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 doc/forum/Failed_to_push_on_git-lfs/comment_2_52051b251bea7a5ed452769d0320602a._comment diff --git a/doc/forum/Failed_to_push_on_git-lfs/comment_2_52051b251bea7a5ed452769d0320602a._comment b/doc/forum/Failed_to_push_on_git-lfs/comment_2_52051b251bea7a5ed452769d0320602a._comment new file mode 100644 index 0000000000..61c503889a --- /dev/null +++ b/doc/forum/Failed_to_push_on_git-lfs/comment_2_52051b251bea7a5ed452769d0320602a._comment @@ -0,0 +1,9 @@ +[[!comment format=mdwn + username="AlexPraga" + avatar="http://cdn.libravatar.org/avatar/7c4e10fd352b81279b405f9f5337cdb7" + subject="comment 2" + date="2022-10-22T15:12:26Z" + content=""" +Thanks for answering. I managed to correct it with force-pushing it : `git push lfs main -f`. +Not sure why it did not work before but it seems to be working now. +"""]] From 5e52325147c8de6898ad01212f4dbad95058042e Mon Sep 17 00:00:00 2001 From: "gyurmo.gyuri@8d622eb91a0312fcb7e63e4f47a6e191c417a0c8" Date: Sun, 23 Oct 2022 08:30:57 +0000 Subject: [PATCH 18/31] --- doc/bugs/__47__exe__47__git-annex.mdwn | 50 ++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) create mode 100644 doc/bugs/__47__exe__47__git-annex.mdwn diff --git a/doc/bugs/__47__exe__47__git-annex.mdwn b/doc/bugs/__47__exe__47__git-annex.mdwn new file mode 100644 index 0000000000..e1845a75c8 --- /dev/null +++ b/doc/bugs/__47__exe__47__git-annex.mdwn @@ -0,0 +1,50 @@ +### Please describe the problem. + + +### What steps will reproduce the problem? +I have system: +Linux RPI-4B 5.15.74-2-MANJARO-ARM-RPI #1 SMP PREEMPT Thu Oct 20 16:43:17 UTC 2022 aarch64 GNU/Linux + +Doesnt't have git-annex in aur and pacman. +So download git-annex-standalone-arm64.tar.gz from web. +./runshell + +```git-annex enable-tor (adjusted/master(unlocked)+2) 10:25:22 +enable-tor + You may be prompted for a password + +git-annex: Failed to run as root: /home/gyurmo/.local/git-annex.linux/bin/git-annex enable-tor 1000 +failed +enable-tor: 1 failed``` + + +sudo /home/gyurmo/.local/git-annex.linux/bin/git-annex enable-tor 1000 +[sudo] gyurmo jelszava: +/home/gyurmo/.local/git-annex.linux/bin/git-annex: sor: 4: /exe/git-annex: No such file or directory + + +### What version of git-annex are you using? On what operating system? + +git-annex version: 10.20220121-g0bcb94487 +build flags: Assistant Webapp Pairing Inotify DBus DesktopNotify TorrentParser MagicMime Feeds Testsuite S3 WebDAV +dependency versions: aws-0.22 bloomfilter-2.0.1.0 cryptonite-0.26 DAV-1.3.4 feed-1.3.0.1 ghc-8.8.4 http-client-0.6.4.1 persistent-sqlite-2.10.6.2 torrent-10000.1.1 uuid-1.3.13 yesod-1.6.1.0 +key/value backends: SHA256E SHA256 SHA512E SHA512 SHA224E SHA224 SHA384E SHA384 SHA3_256E SHA3_256 SHA3_512E SHA3_512 SHA3_224E SHA3_224 SHA3_384E SHA3_384 SKEIN256E SKEIN256 SKEIN512E SKEIN512 BLAKE2B256E BLAKE2B256 BLAKE2B512E BLAKE2B512 BLAKE2B160E BLAKE2B160 BLAKE2B224E BLAKE2B224 BLAKE2B384E BLAKE2B384 BLAKE2BP512E BLAKE2BP512 BLAKE2S256E BLAKE2S256 BLAKE2S160E BLAKE2S160 BLAKE2S224E BLAKE2S224 BLAKE2SP256E BLAKE2SP256 BLAKE2SP224E BLAKE2SP224 SHA1E SHA1 MD5E MD5 WORM URL X* +remote types: git gcrypt p2p S3 bup directory rsync web bittorrent webdav adb tahoe glacier ddar git-lfs httpalso borg hook external +operating system: linux aarch64 +supported repository versions: 8 9 10 +upgrade supported from repository versions: 0 1 2 3 4 5 6 7 8 9 10 +local repository version: 8 + +### Please provide any additional information below. + +[[!format sh """ +# If you can, paste a complete transcript of the problem occurring here. +# If the problem is with the git-annex assistant, paste in .git/annex/daemon.log + + +# End of transcript or log. +"""]] + +### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders) + + From 0d853cef0175975908b139bdcf3064effd54489b Mon Sep 17 00:00:00 2001 From: jwodder Date: Tue, 25 Oct 2022 16:18:09 +0000 Subject: [PATCH 19/31] --- ...metadata_on_Windows_doesn__39__t_work.mdwn | 37 +++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 doc/bugs/addurl_+_metadata_on_Windows_doesn__39__t_work.mdwn diff --git a/doc/bugs/addurl_+_metadata_on_Windows_doesn__39__t_work.mdwn b/doc/bugs/addurl_+_metadata_on_Windows_doesn__39__t_work.mdwn new file mode 100644 index 0000000000..df111bb55e --- /dev/null +++ b/doc/bugs/addurl_+_metadata_on_Windows_doesn__39__t_work.mdwn @@ -0,0 +1,37 @@ +(Sorry for the uninformative title, but I had to work within the character limit.) + +### Please describe the problem. + +`git-annex metadata` does nothing on Windows if invoked while `git-annex addurl` is in progress on other files. + +### What steps will reproduce the problem? + +On Windows (but not on Linux or macOS, where everything works fine): + +- Start `git-annex addurl` in batch mode +- Feed it two or more URLs +- After reading the completion message for a URL from addurl's stdout, but before reading the remaining output, run `git-annex metadata` in batch mode and try to set the metadata for the file that was just downloaded. +- `git-annex metadata` will output an empty line (i.e., just CR LF), and if nothing further is fed to it, it will exit successfully without printing anything else on stdout or stderr. +- Querying the file's metadata normally after `git-annex addurl` exits will show that no metadata was set for the file. + +The Python script at (Python 3.8+ required) will run the above steps and show the output from `git-annex metadata`. A sample run under GitHub Actions can be seen at ; note the following section of the output under "Run script": + +``` +16:04:04 [DEBUG ] __main__: Opening pipe to: git-annex metadata --batch --json --json-error-messages +16:04:04 [DEBUG ] __main__: Input to metadata: b'{"file": "programming/gameboy.pdf", "fields": {"title": ["GameBoy Programming Manual"]}}\n' +16:04:04 [DEBUG ] __main__: r.returncode=0 +16:04:04 [DEBUG ] __main__: r.stdout=b'\r\n' +16:04:04 [DEBUG ] __main__: r.stderr=b'' +``` + +This problem does not always occur, but it seems to occur most of the time. Using `git-annex registerurl` in place of `git-annex metadata` works fine. + +### What version of git-annex are you using? On what operating system? + +git-annex 10.20221003, provided by datalad/git-annex, on Microsoft Windows Server 2022 + +### Please provide any additional information below. + +This affects a hobby project of mine – "gamdam", implemented in [Python](https://github.com/jwodder/gamdam) and [Rust](https://github.com/jwodder/gamdam-rust) — that interacts with git-annex. + +[[!meta author=jwodder]] From 1944549a38d9462962afc14fb20052c6431f12c4 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 26 Oct 2022 12:58:10 -0400 Subject: [PATCH 20/31] comment --- ..._831dc2185919865d418b29cd06ef42be._comment | 42 +++++++++++++++++++ 1 file changed, 42 insertions(+) create mode 100644 doc/bugs/addurl_+_metadata_on_Windows_doesn__39__t_work/comment_1_831dc2185919865d418b29cd06ef42be._comment diff --git a/doc/bugs/addurl_+_metadata_on_Windows_doesn__39__t_work/comment_1_831dc2185919865d418b29cd06ef42be._comment b/doc/bugs/addurl_+_metadata_on_Windows_doesn__39__t_work/comment_1_831dc2185919865d418b29cd06ef42be._comment new file mode 100644 index 0000000000..c1e1d254cb --- /dev/null +++ b/doc/bugs/addurl_+_metadata_on_Windows_doesn__39__t_work/comment_1_831dc2185919865d418b29cd06ef42be._comment @@ -0,0 +1,42 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2022-10-26T16:44:21Z" + content=""" +Windows is not needed, this will happen in a +repository where `git annex adjust --unlock` has been run. + +A simpler example: + + joey@darkstar:~/tmp/t2#master(unlocked)>git-annex addurl --batch + http://google.com/ + addurl http://google.com/ + (to google.com_) ok + ^Z + joey@darkstar:~/tmp/t2#master(unlocked)>git-annex metadata --batch --json + {"file":"google.com_","fields":{"author":["bar"]}} + +I'm not sure if this is a bug, because it's documented to output a blank +line when batch mode is provided a file that is not an annexed file, and +the file is not an annexed file yet due to the pointer not yet having been +staged in git. Which is needed, when in an adjusted unlocked branch, for +git-annex to know that this is an annexed file. + +When the file is locked, it just stats the symlink, so the fact that the +symlink is not yet staged in git doesn't matter. + +It does not seem to make sense to have addurl update the index +after each file it adds, because that would make addurl of a lot +of files unncessarily slow. + +So, I think if anything is changed, it would need to be a change to make +the behavior with unlocked files consistent with the behavior with locked +files. Eg, when the symlink is not yet staged in git, treat it as a +non-annexed file. Which is also consistent with other handling of such +files by git-annex when not in batch mode. + +The solution for your program, though, seems like it will be to end the +git-annex addurl process before trying to set metadata on just-added files. +Or, alternatively, to use addurl with --json, extract the key, and set the +metadata of the key. +"""]] From b2ee2496ee9b22df6d61b73e71ff09e17267e98c Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 26 Oct 2022 13:58:20 -0400 Subject: [PATCH 21/31] remove whenAnnexed and ifAnnexed In preparation for adding a new variation on lookupKey. Sponsored-by: Max Thoursie on Patreon --- Annex/WorkTree.hs | 10 +--------- CmdLine/Batch.hs | 5 +++-- Command.hs | 1 - Command/Add.hs | 5 ++++- Command/AddUrl.hs | 15 ++++++++++----- Command/ImportFeed.hs | 5 ++++- Command/Info.hs | 7 ++++--- Command/ReKey.hs | 5 ++++- Command/Reinject.hs | 13 +++++++------ Command/RmUrl.hs | 11 +++++++---- Command/Sync.hs | 6 +++++- Command/Uninit.hs | 9 +++++++-- 12 files changed, 56 insertions(+), 36 deletions(-) diff --git a/Annex/WorkTree.hs b/Annex/WorkTree.hs index 95e0d18e2b..e065a2185c 100644 --- a/Annex/WorkTree.hs +++ b/Annex/WorkTree.hs @@ -1,6 +1,6 @@ {- git-annex worktree files - - - Copyright 2013-2021 Joey Hess + - Copyright 2013-2022 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -45,14 +45,6 @@ lookupKey' catkeyfile file = isAnnexLink file >>= \case Just key -> return (Just key) Nothing -> catkeyfile file -{- Modifies an action to only act on files that are already annexed, - - and passes the key on to it. -} -whenAnnexed :: (RawFilePath -> Key -> Annex (Maybe a)) -> RawFilePath -> Annex (Maybe a) -whenAnnexed a file = ifAnnexed file (a file) (return Nothing) - -ifAnnexed :: RawFilePath -> (Key -> Annex a) -> Annex a -> Annex a -ifAnnexed file yes no = maybe no yes =<< lookupKey file - {- Find all annexed files and update the keys database for them. -} scanAnnexedFiles :: Annex () scanAnnexedFiles = Database.Keys.updateDatabase diff --git a/CmdLine/Batch.hs b/CmdLine/Batch.hs index 80c901ecca..0c2617230f 100644 --- a/CmdLine/Batch.hs +++ b/CmdLine/Batch.hs @@ -186,8 +186,9 @@ batchAnnexed fmt seeker keyaction = do matcher <- getMatcher batchFilesKeys fmt $ \(si, v) -> case v of - Right bf -> flip whenAnnexed bf $ \f k -> - checkpresent k $ + Right f -> lookupKey f >>= \case + Nothing -> return Nothing + Just k -> checkpresent k $ startAction seeker si f k Left k -> ifM (matcher (MatchingInfo (mkinfo k))) ( checkpresent k $ diff --git a/Command.hs b/Command.hs index 09e7cb55be..67d10dd125 100644 --- a/Command.hs +++ b/Command.hs @@ -11,7 +11,6 @@ module Command ( ) where import Annex.Common as ReExported -import Annex.WorkTree as ReExported (whenAnnexed, ifAnnexed) import Types.Command as ReExported import Types.DeferredParse as ReExported import CmdLine.Seek as ReExported diff --git a/Command/Add.hs b/Command/Add.hs index 5010fef0e7..e1742bed66 100644 --- a/Command/Add.hs +++ b/Command/Add.hs @@ -18,6 +18,7 @@ import Annex.FileMatcher import Annex.Link import Annex.Tmp import Annex.HashObject +import Annex.WorkTree import Messages.Progress import Git.FilePath import Git.Types @@ -202,7 +203,9 @@ start dr si file addunlockedmatcher = mk <- liftIO $ isPointerFile file maybe (go s) (fixuppointer s) mk where - go s = ifAnnexed file (addpresent s) (add s) + go s = lookupKey file >>= \case + Just k -> addpresent s k + Nothing -> add s add s = starting "add" (ActionItemTreeFile file) si $ skipWhenDryRun dr $ if isSymbolicLink s diff --git a/Command/AddUrl.hs b/Command/AddUrl.hs index 830cc09251..5961a18eb4 100644 --- a/Command/AddUrl.hs +++ b/Command/AddUrl.hs @@ -20,6 +20,7 @@ import Annex.Ingest import Annex.CheckIgnore import Annex.Perms import Annex.UUID +import Annex.WorkTree import Annex.YoutubeDl import Annex.UntrustedFilePath import Logs.Web @@ -183,7 +184,9 @@ startRemote addunlockedmatcher r o si file uri sz = do performRemote addunlockedmatcher r o uri (toRawFilePath file') sz performRemote :: AddUnlockedMatcher -> Remote -> AddUrlOptions -> URLString -> RawFilePath -> Maybe Integer -> CommandPerform -performRemote addunlockedmatcher r o uri file sz = ifAnnexed file adduri geturi +performRemote addunlockedmatcher r o uri file sz = lookupKey file >>= \case + Just k -> adduri k + Nothing -> geturi where loguri = setDownloader uri OtherDownloader adduri = addUrlChecked o loguri file (Remote.uuid r) checkexistssize @@ -270,7 +273,9 @@ checkPreserveFileNameSecurity f = do ] performWeb :: AddUnlockedMatcher -> AddUrlOptions -> URLString -> RawFilePath -> Url.UrlInfo -> CommandPerform -performWeb addunlockedmatcher o url file urlinfo = ifAnnexed file addurl geturl +performWeb addunlockedmatcher o url file urlinfo = lookupKey file >>= \case + Just k -> addurl k + Nothing -> geturl where geturl = next $ isJust <$> addUrlFile addunlockedmatcher (downloadOptions o) url urlinfo file addurl = addUrlChecked o url file webUUID $ \k -> @@ -335,9 +340,9 @@ downloadWeb addunlockedmatcher o url urlinfo file = tryyoutubedl tmp = youtubeDlFileNameHtmlOnly url >>= \case Right mediafile -> let f = youtubeDlDestFile o file (toRawFilePath mediafile) - in ifAnnexed f - (alreadyannexed (fromRawFilePath f)) - (dl f) + in lookupKey f >>= \case + Just k -> alreadyannexed (fromRawFilePath f) k + Nothing -> dl f Left err -> checkRaw (Just err) o Nothing (normalfinish tmp) where dl dest = withTmpWorkDir mediakey $ \workdir -> do diff --git a/Command/ImportFeed.hs b/Command/ImportFeed.hs index 816db01691..b6ee11be9e 100644 --- a/Command/ImportFeed.hs +++ b/Command/ImportFeed.hs @@ -40,6 +40,7 @@ import Command.AddUrl (addUrlFile, downloadRemoteFile, parseDownloadOptions, Dow import Annex.UUID import Backend.URL (fromUrl) import Annex.Content +import Annex.WorkTree import Annex.YoutubeDl import Types.MetaData import Logs.MetaData @@ -297,7 +298,9 @@ performDownload' started addunlockedmatcher opts cache todownload = case locatio - to be re-downloaded. -} makeunique url n file = ifM alreadyexists ( ifM forced - ( ifAnnexed (toRawFilePath f) checksameurl tryanother + ( lookupKey (toRawFilePath f) >>= \case + Just k -> checksameurl k + Nothing -> tryanother , tryanother ) , return $ Just f diff --git a/Command/Info.hs b/Command/Info.hs index 9e47512596..7eb7727f33 100644 --- a/Command/Info.hs +++ b/Command/Info.hs @@ -28,6 +28,7 @@ import Utility.DiskFree import Annex.Content import Annex.UUID import Annex.CatFile +import Annex.WorkTree import Logs.UUID import Logs.Trust import Logs.Location @@ -174,9 +175,9 @@ itemInfo o (si, p) = ifM (isdir p) Right u -> uuidInfo o u si Left _ -> do relp <- liftIO $ relPathCwdToFile (toRawFilePath p) - ifAnnexed relp - (fileInfo o (fromRawFilePath relp) si) - (treeishInfo o p si) + lookupKey relp >>= \case + Just k -> fileInfo o (fromRawFilePath relp) si k + Nothing -> treeishInfo o p si ) where isdir = liftIO . catchBoolIO . (isDirectory <$$> getFileStatus) diff --git a/Command/ReKey.hs b/Command/ReKey.hs index 165f48c078..91d0804222 100644 --- a/Command/ReKey.hs +++ b/Command/ReKey.hs @@ -16,6 +16,7 @@ import Annex.Perms import Annex.ReplaceFile import Logs.Location import Annex.InodeSentinal +import Annex.WorkTree import Utility.InodeCache import qualified Utility.RawFilePath as R @@ -61,7 +62,9 @@ seek o = case batchOption o of (toRawFilePath file, fromMaybe (giveup "bad key") (deserializeKey skey)) start :: SeekInput -> (RawFilePath, Key) -> CommandStart -start si (file, newkey) = ifAnnexed file go stop +start si (file, newkey) = lookupKey file >>= \case + Just k -> go k + Nothing -> stop where go oldkey | oldkey == newkey = stop diff --git a/Command/Reinject.hs b/Command/Reinject.hs index ad8e908a62..54492e235b 100644 --- a/Command/Reinject.hs +++ b/Command/Reinject.hs @@ -13,6 +13,7 @@ import Annex.Content import Backend import Types.KeySource import Utility.Metered +import Annex.WorkTree import qualified Git cmd :: Command @@ -45,9 +46,9 @@ startSrcDest :: [FilePath] -> CommandStart startSrcDest ps@(src:dest:[]) | src == dest = stop | otherwise = notAnnexed src' $ - ifAnnexed (toRawFilePath dest) - go - (giveup $ src ++ " is not an annexed file") + lookupKey (toRawFilePath dest) >>= \case + Just k -> go k + Nothing -> giveup $ src ++ " is not an annexed file" where src' = toRawFilePath src go key = starting "reinject" ai si $ @@ -79,9 +80,9 @@ notAnnexed :: RawFilePath -> CommandStart -> CommandStart notAnnexed src a = ifM (fromRepo Git.repoIsLocalBare) ( a - , ifAnnexed src - (giveup $ "cannot used annexed file as src: " ++ fromRawFilePath src) - a + , lookupKey src >>= \case + Just _ -> giveup $ "cannot used annexed file as src: " ++ fromRawFilePath src + Nothing -> a ) perform :: RawFilePath -> Key -> CommandPerform diff --git a/Command/RmUrl.hs b/Command/RmUrl.hs index 93443b227a..c5107bd0eb 100644 --- a/Command/RmUrl.hs +++ b/Command/RmUrl.hs @@ -9,6 +9,7 @@ module Command.RmUrl where import Command import Logs.Web +import Annex.WorkTree cmd :: Command cmd = notBareRepo $ @@ -46,10 +47,12 @@ batchParser s = case separate (== ' ') (reverse s) of return $ Right (f', reverse ru) start :: (SeekInput, (FilePath, URLString)) -> CommandStart -start (si, (file, url)) = flip whenAnnexed file' $ \_ key -> do - let ai = mkActionItem (key, AssociatedFile (Just file')) - starting "rmurl" ai si $ - next $ cleanup url key +start (si, (file, url)) = lookupKey file' >>= \case + Nothing -> stop + Just key -> do + let ai = mkActionItem (key, AssociatedFile (Just file')) + starting "rmurl" ai si $ + next $ cleanup url key where file' = toRawFilePath file diff --git a/Command/Sync.hs b/Command/Sync.hs index 15ccfd3763..781b4e6125 100644 --- a/Command/Sync.hs +++ b/Command/Sync.hs @@ -50,6 +50,7 @@ import Config.DynamicConfig import Annex.Path import Annex.Wanted import Annex.Content +import Annex.WorkTree import Command.Get (getKey') import qualified Command.Move import qualified Command.Export @@ -765,7 +766,10 @@ seekSyncContent o rs currbranch = do seekHelper fst3 ww LsFiles.inRepoDetails l seekincludinghidden origbranch mvar l bloomfeeder = - seekFiltered (const (pure True)) (\(si, f) -> ifAnnexed f (commandAction . gofile bloomfeeder mvar si f) noop) $ + let filterer = \(si, f) -> lookupKey f >>= \case + Just k -> (commandAction $ gofile bloomfeeder mvar si f k) + Nothing -> noop + in seekFiltered (const (pure True)) filterer $ seekHelper id ww (LsFiles.inRepoOrBranch origbranch) l ww = WarnUnmatchLsFiles diff --git a/Command/Uninit.hs b/Command/Uninit.hs index 38f50c8f6c..d8cba0c4df 100644 --- a/Command/Uninit.hs +++ b/Command/Uninit.hs @@ -18,6 +18,7 @@ import qualified Database.Keys import Annex.Content import Annex.Init import Annex.CheckIgnore +import Annex.WorkTree import Utility.FileMode import qualified Utility.RawFilePath as R @@ -50,13 +51,17 @@ seek ps = do l <- workTreeItems ww ps withFilesNotInGit (CheckGitIgnore False) - WarnUnmatchWorkTreeItems - (\(_, f) -> commandAction $ whenAnnexed (startCheckIncomplete . fromRawFilePath) f) + WarnUnmatchWorkTreeItems + checksymlinks l withFilesInGitAnnex ww (Command.Unannex.seeker True) l finish where ww = WarnUnmatchLsFiles + checksymlinks (_, f) = + commandAction $ lookupKey f >>= \case + Nothing -> stop + Just k -> startCheckIncomplete (fromRawFilePath f) k {- git annex symlinks that are not checked into git could be left by an - interrupted add. -} From 731e806c9638f4f5852b965a2e3430d3df83d3c1 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 26 Oct 2022 14:23:06 -0400 Subject: [PATCH 22/31] use lookupKeyStaged in --batch code paths Make --batch mode handle unstaged annexed files consistently whether the file is unlocked or not. Before this, a unstaged locked file would have the symlink on disk examined and operated on in --batch mode, while an unstaged unlocked file would be skipped. Note that, when not in batch mode, unstaged files are skipped over too. That is actually somewhat new behavior; as late as 7.20191114 a command like `git-annex whereis .` would operate on unstaged locked files and skip over unstaged unlocked files. That changed during optimisation of CmdLine.Seek with apparently little fanfare or notice. Turns out that rmurl still behaved that way when given an unstaged file on the command line. It was changed to use lookupKeyStaged to handle its --batch mode. That also affected its non-batch mode, but since that's just catching up to the change earlier made to most other commands, I have not mentioed that in the changelog. It may be that other uses of lookupKey should also change to lookupKeyStaged. But it may also be that would slow down some things, or lead to unwanted behavior changes, so I've kept the changes minimal for now. An example of a place where the use of lookupKey is better than lookupKeyStaged is in Command.AddUrl, where it looks to see if the file already exists, and adds the url to the file when so. It does not matter there whether the file is staged or not (when it's locked). The use of lookupKey in Command.Unused likewise seems good (and faster). Sponsored-by: Nicholas Golder-Manning on Patreon --- Annex/WorkTree.hs | 12 +++++++++++- CHANGELOG | 2 ++ CmdLine/Batch.hs | 2 +- Command/MetaData.hs | 2 +- Command/RmUrl.hs | 2 +- ...durl_+_metadata_on_Windows_doesn__39__t_work.mdwn | 2 ++ ...mment_2_88b7db5434a56c25c75772caa37bc14a._comment | 8 ++++++++ 7 files changed, 26 insertions(+), 4 deletions(-) create mode 100644 doc/bugs/addurl_+_metadata_on_Windows_doesn__39__t_work/comment_2_88b7db5434a56c25c75772caa37bc14a._comment diff --git a/Annex/WorkTree.hs b/Annex/WorkTree.hs index e065a2185c..41abc2471e 100644 --- a/Annex/WorkTree.hs +++ b/Annex/WorkTree.hs @@ -14,7 +14,7 @@ import Annex.CurrentBranch import qualified Database.Keys {- Looks up the key corresponding to an annexed file in the work tree, - - by examining what the file links to. + - by examining what the symlink points to. - - An unlocked file will not have a link on disk, so fall back to - looking for a pointer to a key in git. @@ -31,6 +31,16 @@ lookupKey = lookupKey' catkeyfile , catKeyFileHidden file =<< getCurrentBranch ) +{- Like lookupKey, but only looks at files staged in git, not at unstaged + - changes in the work tree. This means it's slower, but it also has + - consistently the same behavior for locked files as for unlocked files. + -} +lookupKeyStaged :: RawFilePath -> Annex (Maybe Key) +lookupKeyStaged file = catKeyFile file >>= \case + Just k -> return (Just k) + Nothing -> catKeyFileHidden file =<< getCurrentBranch + +{- Like lookupKey, but does not find keys for hidden files. -} lookupKeyNotHidden :: RawFilePath -> Annex (Maybe Key) lookupKeyNotHidden = lookupKey' catkeyfile where diff --git a/CHANGELOG b/CHANGELOG index 58972284e3..9926f1ff84 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -15,6 +15,8 @@ git-annex (10.20221004) UNRELEASED; urgency=medium * More robust handling of ErrorBusy when writing to sqlite databases. * Avoid hanging when a suspended git-annex process is keeping a sqlite database locked. + * Make --batch mode handle unstaged annexed files consistently + whether the file is unlocked or not. -- Joey Hess Mon, 03 Oct 2022 13:36:42 -0400 diff --git a/CmdLine/Batch.hs b/CmdLine/Batch.hs index 0c2617230f..3439b3d580 100644 --- a/CmdLine/Batch.hs +++ b/CmdLine/Batch.hs @@ -186,7 +186,7 @@ batchAnnexed fmt seeker keyaction = do matcher <- getMatcher batchFilesKeys fmt $ \(si, v) -> case v of - Right f -> lookupKey f >>= \case + Right f -> lookupKeyStaged f >>= \case Nothing -> return Nothing Just k -> checkpresent k $ startAction seeker si f k diff --git a/Command/MetaData.hs b/Command/MetaData.hs index b33e632c73..4568b1f8df 100644 --- a/Command/MetaData.hs +++ b/Command/MetaData.hs @@ -155,7 +155,7 @@ parseJSONInput i = case eitherDecode (BU.fromString i) of startBatch :: (SeekInput, (Either RawFilePath Key, MetaData)) -> CommandStart startBatch (si, (i, (MetaData m))) = case i of Left f -> do - mk <- lookupKey f + mk <- lookupKeyStaged f case mk of Just k -> go k (mkActionItem (k, AssociatedFile (Just f))) Nothing -> return Nothing diff --git a/Command/RmUrl.hs b/Command/RmUrl.hs index c5107bd0eb..efd6e4059d 100644 --- a/Command/RmUrl.hs +++ b/Command/RmUrl.hs @@ -47,7 +47,7 @@ batchParser s = case separate (== ' ') (reverse s) of return $ Right (f', reverse ru) start :: (SeekInput, (FilePath, URLString)) -> CommandStart -start (si, (file, url)) = lookupKey file' >>= \case +start (si, (file, url)) = lookupKeyStaged file' >>= \case Nothing -> stop Just key -> do let ai = mkActionItem (key, AssociatedFile (Just file')) diff --git a/doc/bugs/addurl_+_metadata_on_Windows_doesn__39__t_work.mdwn b/doc/bugs/addurl_+_metadata_on_Windows_doesn__39__t_work.mdwn index df111bb55e..944d986dbf 100644 --- a/doc/bugs/addurl_+_metadata_on_Windows_doesn__39__t_work.mdwn +++ b/doc/bugs/addurl_+_metadata_on_Windows_doesn__39__t_work.mdwn @@ -35,3 +35,5 @@ git-annex 10.20221003, provided by datalad/git-annex, on Microsoft Windows Serve This affects a hobby project of mine – "gamdam", implemented in [Python](https://github.com/jwodder/gamdam) and [Rust](https://github.com/jwodder/gamdam-rust) — that interacts with git-annex. [[!meta author=jwodder]] + +> [[fixed|done]], see my comments --[[Joey]] diff --git a/doc/bugs/addurl_+_metadata_on_Windows_doesn__39__t_work/comment_2_88b7db5434a56c25c75772caa37bc14a._comment b/doc/bugs/addurl_+_metadata_on_Windows_doesn__39__t_work/comment_2_88b7db5434a56c25c75772caa37bc14a._comment new file mode 100644 index 0000000000..80ac4fc644 --- /dev/null +++ b/doc/bugs/addurl_+_metadata_on_Windows_doesn__39__t_work/comment_2_88b7db5434a56c25c75772caa37bc14a._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2022-10-26T18:13:42Z" + content=""" +I've made --batch handling of unstaged locked files consistent with the +handling of unstaged unlocked files. +"""]] From a8ce8ac75d30afc76e3543cd7e0acbb41f5207bf Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 26 Oct 2022 14:54:38 -0400 Subject: [PATCH 23/31] comment --- .../comment_1_8a9e9d83a3dcf07ed76a22f03636f6d1._comment | 9 +++++++++ 1 file changed, 9 insertions(+) create mode 100644 doc/bugs/__47__exe__47__git-annex/comment_1_8a9e9d83a3dcf07ed76a22f03636f6d1._comment diff --git a/doc/bugs/__47__exe__47__git-annex/comment_1_8a9e9d83a3dcf07ed76a22f03636f6d1._comment b/doc/bugs/__47__exe__47__git-annex/comment_1_8a9e9d83a3dcf07ed76a22f03636f6d1._comment new file mode 100644 index 0000000000..45c0e01b52 --- /dev/null +++ b/doc/bugs/__47__exe__47__git-annex/comment_1_8a9e9d83a3dcf07ed76a22f03636f6d1._comment @@ -0,0 +1,9 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2022-10-26T18:53:40Z" + content=""" +Using this command instead should work: + + sudo /home/gyurmo/.local/git-annex.linux/git-annex enable-tor 1000 +"""]] From 14f7a386f05e57d32fcd9ffbd815132c704c3705 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 26 Oct 2022 15:44:06 -0400 Subject: [PATCH 24/31] Make git-annex enable-tor work when using the linux standalone build Clean the standalone environment before running the su command to run "sh". Otherwise, PATH leaked through, causing it to run git-annex.linux/bin/sh, but GIT_ANNEX_DIR was not set, which caused that script to not work: [2022-10-26 15:07:02.145466106] (Utility.Process) process [938146] call: pkexec ["sh","-c","cd '/home/joey/tmp/git-annex.linux/r' && '/home/joey/tmp/git-annex.linux/git-annex' 'enable-tor' '1000'"] /home/joey/tmp/git-annex.linux/bin/sh: 4: exec: /exe/sh: not found Changed programPath to not use GIT_ANNEX_PROGRAMPATH, but instead run the scripts at the top of GIT_ANNEX_DIR. That works both when the standalone environment is set up, and when it's not. Sponsored-by: Kevin Mueller on Patreon --- Annex/Path.hs | 38 ++++++++++++++++--- Assistant/Install.hs | 22 ----------- Build/LinuxMkLibs.hs | 2 - CHANGELOG | 1 + Command/EnableTor.hs | 3 +- Command/WebApp.hs | 3 +- Utility/Su.hs | 6 +-- doc/bugs/__47__exe__47__git-annex.mdwn | 1 + ..._381ac4e8b27343cdc470584c05edec76._comment | 7 ++++ 9 files changed, 48 insertions(+), 35 deletions(-) create mode 100644 doc/bugs/__47__exe__47__git-annex/comment_2_381ac4e8b27343cdc470584c05edec76._comment diff --git a/Annex/Path.hs b/Annex/Path.hs index 11400d32a5..e058db32a8 100644 --- a/Annex/Path.hs +++ b/Annex/Path.hs @@ -1,6 +1,6 @@ {- git-annex program path - - - Copyright 2013-2021 Joey Hess + - Copyright 2013-2022 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -11,6 +11,7 @@ module Annex.Path ( gitAnnexChildProcess, gitAnnexChildProcessParams, gitAnnexDaemonizeParams, + cleanStandaloneEnvironment, ) where import Annex.Common @@ -19,7 +20,7 @@ import Utility.Env import Annex.PidLock import qualified Annex -import System.Environment (getExecutablePath, getArgs) +import System.Environment (getExecutablePath, getArgs, getProgName) {- A fully qualified path to the currently running git-annex program. - @@ -29,13 +30,16 @@ import System.Environment (getExecutablePath, getArgs) - or searching for the command name in PATH. - - The standalone build runs git-annex via ld.so, and defeats - - getExecutablePath. It sets GIT_ANNEX_PROGRAMPATH to the correct path - - to the wrapper script to use. + - getExecutablePath. It sets GIT_ANNEX_DIR to the location of the + - standalone build directory, and there are wrapper scripts for git-annex + - and git-annex-shell in that directory. -} programPath :: IO FilePath -programPath = go =<< getEnv "GIT_ANNEX_PROGRAMPATH" +programPath = go =<< getEnv "GIT_ANNEX_DIR" where - go (Just p) = return p + go (Just dir) = do + name <- getProgName + return (dir name) go Nothing = do exe <- getExecutablePath p <- if isAbsolute exe @@ -97,3 +101,25 @@ gitAnnexDaemonizeParams = do -- Get every parameter git-annex was run with. ps <- liftIO getArgs return (map Param ps ++ cps) + +{- Returns a cleaned up environment that lacks path and other settings + - used to make the standalone builds use their bundled libraries and programs. + - Useful when calling programs not included in the standalone builds. + - + - For a non-standalone build, returns Nothing. + -} +cleanStandaloneEnvironment :: IO (Maybe [(String, String)]) +cleanStandaloneEnvironment = clean <$> getEnvironment + where + clean environ + | null vars = Nothing + | otherwise = Just $ catMaybes $ map (restoreorig environ) environ + where + vars = words $ fromMaybe "" $ + lookup "GIT_ANNEX_STANDLONE_ENV" environ + restoreorig oldenviron p@(k, _v) + | k `elem` vars = case lookup ("ORIG_" ++ k) oldenviron of + (Just v') + | not (null v') -> Just (k, v') + _ -> Nothing + | otherwise = Just p diff --git a/Assistant/Install.hs b/Assistant/Install.hs index 3569ddb4ba..6a31968d7b 100644 --- a/Assistant/Install.hs +++ b/Assistant/Install.hs @@ -171,25 +171,3 @@ installFileManagerHooks program = unlessM osAndroid $ do #else installFileManagerHooks _ = noop #endif - -{- Returns a cleaned up environment that lacks settings used to make the - - standalone builds use their bundled libraries and programs. - - Useful when calling programs not included in the standalone builds. - - - - For a non-standalone build, returns Nothing. - -} -cleanEnvironment :: IO (Maybe [(String, String)]) -cleanEnvironment = clean <$> getEnvironment - where - clean environ - | null vars = Nothing - | otherwise = Just $ catMaybes $ map (restoreorig environ) environ - where - vars = words $ fromMaybe "" $ - lookup "GIT_ANNEX_STANDLONE_ENV" environ - restoreorig oldenviron p@(k, _v) - | k `elem` vars = case lookup ("ORIG_" ++ k) oldenviron of - (Just v') - | not (null v') -> Just (k, v') - _ -> Nothing - | otherwise = Just p diff --git a/Build/LinuxMkLibs.hs b/Build/LinuxMkLibs.hs index 4c0824fb79..7beab60125 100644 --- a/Build/LinuxMkLibs.hs +++ b/Build/LinuxMkLibs.hs @@ -164,8 +164,6 @@ installLinkerShim top linker exe = do createSymbolicLink (fromRawFilePath link) (top exelink) writeFile exe $ unlines [ "#!/bin/sh" - , "GIT_ANNEX_PROGRAMPATH=\"$0\"" - , "export GIT_ANNEX_PROGRAMPATH" , "exec \"$GIT_ANNEX_DIR/" ++ exelink ++ "\" --library-path \"$GIT_ANNEX_LD_LIBRARY_PATH\" \"$GIT_ANNEX_DIR/shimmed/" ++ base ++ "/" ++ base ++ "\" \"$@\"" ] modifyFileMode (toRawFilePath exe) $ addModes executeModes diff --git a/CHANGELOG b/CHANGELOG index 9926f1ff84..42e68dfda7 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -17,6 +17,7 @@ git-annex (10.20221004) UNRELEASED; urgency=medium database locked. * Make --batch mode handle unstaged annexed files consistently whether the file is unlocked or not. + * Make git-annex enable-tor work when using the linux standalone build. -- Joey Hess Mon, 03 Oct 2022 13:36:42 -0400 diff --git a/Command/EnableTor.hs b/Command/EnableTor.hs index aeae96be92..df518c2fa0 100644 --- a/Command/EnableTor.hs +++ b/Command/EnableTor.hs @@ -60,9 +60,10 @@ start _os = do gitannex <- liftIO programPath let ps = [Param (cmdname cmd), Param (show curruserid)] sucommand <- liftIO $ mkSuCommand gitannex ps + cleanenv <- liftIO $ cleanStandaloneEnvironment maybe noop showLongNote (describePasswordPrompt' sucommand) - ifM (liftIO $ runSuCommand sucommand) + ifM (liftIO $ runSuCommand sucommand cleanenv) ( next checkHiddenService , giveup $ unwords $ [ "Failed to run as root:" , gitannex ] ++ toCommand ps diff --git a/Command/WebApp.hs b/Command/WebApp.hs index 236a94dac4..1e01e1a97f 100644 --- a/Command/WebApp.hs +++ b/Command/WebApp.hs @@ -22,6 +22,7 @@ import Utility.WebApp import Utility.Daemon (checkDaemon) import Utility.UserInfo import Annex.Init +import Annex.Path import qualified Git import Git.Types (fromConfigValue) import qualified Git.Config @@ -222,7 +223,7 @@ openBrowser' mcmd htmlshim realurl outh errh = #endif hPutStrLn (fromMaybe stdout outh) $ "Launching web browser on " ++ url hFlush stdout - environ <- cleanEnvironment + environ <- cleanStandaloneEnvironment let p' = p { env = environ , std_out = maybe Inherit UseHandle outh diff --git a/Utility/Su.hs b/Utility/Su.hs index 52f3f7f687..e956d808b4 100644 --- a/Utility/Su.hs +++ b/Utility/Su.hs @@ -57,9 +57,9 @@ describePasswordPrompt' :: Maybe SuCommand -> Maybe String describePasswordPrompt' (Just (SuCommand p _ _)) = describePasswordPrompt p describePasswordPrompt' Nothing = Nothing -runSuCommand :: (Maybe SuCommand) -> IO Bool -runSuCommand (Just (SuCommand _ cmd ps)) = boolSystem cmd ps -runSuCommand Nothing = return False +runSuCommand :: (Maybe SuCommand) -> Maybe [(String, String)] -> IO Bool +runSuCommand (Just (SuCommand _ cmd ps)) env = boolSystemEnv cmd ps env +runSuCommand Nothing _ = return False -- Generates a SuCommand that runs a command as root, fairly portably. -- diff --git a/doc/bugs/__47__exe__47__git-annex.mdwn b/doc/bugs/__47__exe__47__git-annex.mdwn index e1845a75c8..8d7e157f3d 100644 --- a/doc/bugs/__47__exe__47__git-annex.mdwn +++ b/doc/bugs/__47__exe__47__git-annex.mdwn @@ -48,3 +48,4 @@ local repository version: 8 ### Have you had any luck using git-annex before? (Sometimes we get tired of reading bug reports all day and a lil' positive end note does wonders) +> [[fixed|done]] --[[Joey]] diff --git a/doc/bugs/__47__exe__47__git-annex/comment_2_381ac4e8b27343cdc470584c05edec76._comment b/doc/bugs/__47__exe__47__git-annex/comment_2_381ac4e8b27343cdc470584c05edec76._comment new file mode 100644 index 0000000000..6db2b4d8bf --- /dev/null +++ b/doc/bugs/__47__exe__47__git-annex/comment_2_381ac4e8b27343cdc470584c05edec76._comment @@ -0,0 +1,7 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2022-10-26T19:44:24Z" + content=""" +And I've fixed this problem now. +"""]] From 9187a37decf7f4a7c01a3c8e0aebbe0391616bb4 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 27 Oct 2022 10:21:24 -0400 Subject: [PATCH 25/31] fix build warning --- Utility/Su.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Utility/Su.hs b/Utility/Su.hs index e956d808b4..d2d970298a 100644 --- a/Utility/Su.hs +++ b/Utility/Su.hs @@ -58,7 +58,7 @@ describePasswordPrompt' (Just (SuCommand p _ _)) = describePasswordPrompt p describePasswordPrompt' Nothing = Nothing runSuCommand :: (Maybe SuCommand) -> Maybe [(String, String)] -> IO Bool -runSuCommand (Just (SuCommand _ cmd ps)) env = boolSystemEnv cmd ps env +runSuCommand (Just (SuCommand _ cmd ps)) environ = boolSystemEnv cmd ps environ runSuCommand Nothing _ = return False -- Generates a SuCommand that runs a command as root, fairly portably. From 813bc50cb38af8457f7906dd54fae8a16260d688 Mon Sep 17 00:00:00 2001 From: Stefan Date: Sat, 29 Oct 2022 10:28:19 +0000 Subject: [PATCH 26/31] Added a comment: This guide fails with "fatal: refusing to merge unrelated histories" --- ..._d94ae69945416e57faa6e6dd5536f66e._comment | 59 +++++++++++++++++++ 1 file changed, 59 insertions(+) create mode 100644 doc/tips/migrating_two_seperate_disconnected_directories_to_git_annex/comment_1_d94ae69945416e57faa6e6dd5536f66e._comment diff --git a/doc/tips/migrating_two_seperate_disconnected_directories_to_git_annex/comment_1_d94ae69945416e57faa6e6dd5536f66e._comment b/doc/tips/migrating_two_seperate_disconnected_directories_to_git_annex/comment_1_d94ae69945416e57faa6e6dd5536f66e._comment new file mode 100644 index 0000000000..2f739fe461 --- /dev/null +++ b/doc/tips/migrating_two_seperate_disconnected_directories_to_git_annex/comment_1_d94ae69945416e57faa6e6dd5536f66e._comment @@ -0,0 +1,59 @@ +[[!comment format=mdwn + username="Stefan" + avatar="http://cdn.libravatar.org/avatar/1474db4b030b82320e3bd5e899ef2bad" + subject="This guide fails with "fatal: refusing to merge unrelated histories"" + date="2022-10-29T10:28:18Z" + content=""" +This no longer works, here is a MWE to copy-paste (uses /tmp/{A,B}): + +``` +mkdir /tmp/A && touch /tmp/A/bigfile +mkdir /tmp/B && touch /tmp/B/bigfile +cd /tmp/A +git init +git annex init +git annex add . +git commit -m \"git annex yay\" +cd /tmp/B +git init +git remote add A /tmp/A +git fetch A +git annex info # this should display the two repos +git annex add . +git annex whereis +git annex sync +``` + +This fails with + +``` +commit +[main (root-commit) e9435bf] git-annex in stefan@notebook:/tmp/B + 1 file changed, 1 insertion(+) + create mode 120000 bigfile +ok +pull A + +fatal: refusing to merge unrelated histories +failed +push A +Enumerating objects: 19, done. +Counting objects: 100% (19/19), done. +Delta compression using up to 8 threads +Compressing objects: 100% (11/11), done. +Writing objects: 100% (14/14), 1.37 KiB | 1.37 MiB/s, done. +Total 14 (delta 2), reused 0 (delta 0), pack-reused 0 +To /tmp/A + * [new branch] main -> synced/main + * [new branch] git-annex -> synced/git-annex +To /tmp/A + ! [rejected] main -> main (non-fast-forward) +error: failed to push some refs to '/tmp/A' +hint: Updates were rejected because the tip of your current branch is behind +hint: its remote counterpart. Integrate the remote changes (e.g. +hint: 'git pull ...') before pushing again. +hint: See the 'Note about fast-forwards' in 'git push --help' for details. +ok +sync: 1 failed +``` +"""]] From bf27a02b0754c2d58ea9e6f5385b8a8d21043d80 Mon Sep 17 00:00:00 2001 From: xloem Date: Mon, 31 Oct 2022 13:36:51 +0000 Subject: [PATCH 27/31] Added a comment: ipfs --- .../comment_3_3a38ab16ca034025476b4df0a566b4a9._comment | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 doc/install/rpm_standalone/comment_3_3a38ab16ca034025476b4df0a566b4a9._comment diff --git a/doc/install/rpm_standalone/comment_3_3a38ab16ca034025476b4df0a566b4a9._comment b/doc/install/rpm_standalone/comment_3_3a38ab16ca034025476b4df0a566b4a9._comment new file mode 100644 index 0000000000..0e08bcb0c9 --- /dev/null +++ b/doc/install/rpm_standalone/comment_3_3a38ab16ca034025476b4df0a566b4a9._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="xloem" + avatar="http://cdn.libravatar.org/avatar/b8c087f7c5e6a9358748f0727c077f3b" + subject="ipfs" + date="2022-10-31T13:36:51Z" + content=""" +It would be nice if the rpm or repository included auxiliarity scripts such as git-annex-remote-ipfs so that these would get installed/uninstalled/upgraded alongside the main project. +"""]] From 9dc3acc95df4955d4a29b047cba292962969dcc4 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 31 Oct 2022 12:16:36 -0400 Subject: [PATCH 28/31] comment, update tip --- ...perate_disconnected_directories_to_git_annex.mdwn | 12 +++++------- ...mment_2_c6c11a9d5f9f136fa541404cdc49f45c._comment | 8 ++++++++ 2 files changed, 13 insertions(+), 7 deletions(-) create mode 100644 doc/tips/migrating_two_seperate_disconnected_directories_to_git_annex/comment_2_c6c11a9d5f9f136fa541404cdc49f45c._comment diff --git a/doc/tips/migrating_two_seperate_disconnected_directories_to_git_annex.mdwn b/doc/tips/migrating_two_seperate_disconnected_directories_to_git_annex.mdwn index c8faf340c2..8a14b35dc8 100644 --- a/doc/tips/migrating_two_seperate_disconnected_directories_to_git_annex.mdwn +++ b/doc/tips/migrating_two_seperate_disconnected_directories_to_git_annex.mdwn @@ -49,9 +49,11 @@ This should display something like: Once you are sure things went on okay, you can synchronise this with `marcos`: - git annex sync + git annex sync --allow-unrelated-histories -This will push the metadata information to marcos, so it knows which files are available on `angela`. From there on, you can freely get and move files between the two repos! +This will push the metadata information to marcos, so it knows which files +are available on `angela`. From there on, you can freely get and move files +between the two repos! Importing files from a third directory -------------------------------------- @@ -61,7 +63,7 @@ Say that some files on `angela` are actually spread out outside of the `~/mp3` d cd ~/mp3 git annex import ~/music/ -(!) Be careful that `~/music` is not a git-annex repository, or this will [[destroy it!|bugs/git annex import destroys a fellow git annex repository]]. +(!) Be careful that `~/music` is not a git-annex repository. Deleting deleted files ---------------------- @@ -73,7 +75,3 @@ It is quite possible some files were removed (or renamed!) on `marcos` but not o This will show files that are on `angela` and not on `marcos`. They could be new files that were only added on `angela`, so be careful! A manual analysis is necessary, but let's say you are certain those files are not relevant anymore, you can delete them from `angela`: git annex drop - -If the file is a renamed or modified version from the original, you may need to use `--force`, but be careful! If you delete the wrong file, it will be lost forever! - -> (!) Maybe this wouldn't happen with [[direct mode]] and an fsck? --[[anarcat]] diff --git a/doc/tips/migrating_two_seperate_disconnected_directories_to_git_annex/comment_2_c6c11a9d5f9f136fa541404cdc49f45c._comment b/doc/tips/migrating_two_seperate_disconnected_directories_to_git_annex/comment_2_c6c11a9d5f9f136fa541404cdc49f45c._comment new file mode 100644 index 0000000000..dc693fa467 --- /dev/null +++ b/doc/tips/migrating_two_seperate_disconnected_directories_to_git_annex/comment_2_c6c11a9d5f9f136fa541404cdc49f45c._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2022-10-31T16:09:12Z" + content=""" +Indeed, you will need to use `git-annex sync --allow-unrelated-histories` +now in that situation. I have updated the tip. +"""]] From d22bd53310b7ed558317858cfc84b0bff693969e Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 3 Nov 2022 14:07:53 -0400 Subject: [PATCH 29/31] releasing package git-annex version 10.20221103 --- CHANGELOG | 9 ++++++--- git-annex.cabal | 2 +- 2 files changed, 7 insertions(+), 4 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index 42e68dfda7..9d7370388b 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,4 +1,4 @@ -git-annex (10.20221004) UNRELEASED; urgency=medium +git-annex (10.20221103) upstream; urgency=medium * Doubled the speed of git-annex drop when operating on many files, and of git-annex get when operating on many tiny files. @@ -16,10 +16,13 @@ git-annex (10.20221004) UNRELEASED; urgency=medium * Avoid hanging when a suspended git-annex process is keeping a sqlite database locked. * Make --batch mode handle unstaged annexed files consistently - whether the file is unlocked or not. + whether the file is unlocked or not. Note that this changes the + behavior of --batch when it is provided with locked files that are + in the process of being added to the repository, but have not yet been + staged in git. * Make git-annex enable-tor work when using the linux standalone build. - -- Joey Hess Mon, 03 Oct 2022 13:36:42 -0400 + -- Joey Hess Thu, 03 Nov 2022 14:07:31 -0400 git-annex (10.20221003) upstream; urgency=medium diff --git a/git-annex.cabal b/git-annex.cabal index c60e627682..3d96e49f5c 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -1,5 +1,5 @@ Name: git-annex -Version: 10.20221003 +Version: 10.20221103 Cabal-Version: 1.12 License: AGPL-3 Maintainer: Joey Hess From 95405c067f130f5ddf18f16b828058da1ff1c16e Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 3 Nov 2022 14:08:29 -0400 Subject: [PATCH 30/31] add news item for git-annex 10.20221103 --- doc/news/version_10.20220624.mdwn | 23 ----------------------- doc/news/version_10.20221103.mdwn | 22 ++++++++++++++++++++++ 2 files changed, 22 insertions(+), 23 deletions(-) delete mode 100644 doc/news/version_10.20220624.mdwn create mode 100644 doc/news/version_10.20221103.mdwn diff --git a/doc/news/version_10.20220624.mdwn b/doc/news/version_10.20220624.mdwn deleted file mode 100644 index f2a6ba5522..0000000000 --- a/doc/news/version_10.20220624.mdwn +++ /dev/null @@ -1,23 +0,0 @@ -git-annex 10.20220624 released with [[!toggle text="these changes"]] -[[!toggleable text=""" * init: Added --no-autoenable option. - * info: Added --autoenable option. - * initremote: Improve handling of type=git special remotes. - The location value no longer needs to match the url of an existing - git remote, and locations not using ssh:// will work now, including - both paths and host:/path - * Fix retrival of an empty file that is stored in a special remote with - chunking enabled. - (Fixes a reversion in 8.20201103) - * move: Improve resuming a move that succeeded in transferring the - content, but where dropping failed due to eg a network problem, - in cases where numcopies checks prevented the resumed - move from dropping the object from the source repository. - * add, fix, lock, rekey: When several files were being processed, - replacing an annex symlink of a file that was already processed - with a new large file could sometimes cause that large file to be - added to git. These races have been fixed. - * add: Also fix a similar race that could cause a large file be added - to git when a small file was modified or overwritten while it was - being added. - * add --batch: Fix handling of a file that is skipped due to being - gitignored."""]] \ No newline at end of file diff --git a/doc/news/version_10.20221103.mdwn b/doc/news/version_10.20221103.mdwn new file mode 100644 index 0000000000..e7374b7c48 --- /dev/null +++ b/doc/news/version_10.20221103.mdwn @@ -0,0 +1,22 @@ +git-annex 10.20221103 released with [[!toggle text="these changes"]] +[[!toggleable text=""" * Doubled the speed of git-annex drop when operating on many files, + and of git-annex get when operating on many tiny files. + * trust, untrust, semitrust, dead: Fix behavior when provided with + multiple repositories to operate on. + * trust, untrust, semitrust, dead: When provided with no parameters, + do not operate on a repository that has an empty name. + * move: Fix openFile crash with -J + (Fixes a reversion in 8.20201103) + * S3: Speed up importing from a large bucket when fileprefix= is set, + by only asking for files under the prefix. + * When importing from versioned remotes, fix tracking of the content + of deleted files. + * More robust handling of ErrorBusy when writing to sqlite databases. + * Avoid hanging when a suspended git-annex process is keeping a sqlite + database locked. + * Make --batch mode handle unstaged annexed files consistently + whether the file is unlocked or not. Note that this changes the + behavior of --batch when it is provided with locked files that are + in the process of being added to the repository, but have not yet been + staged in git. + * Make git-annex enable-tor work when using the linux standalone build."""]] \ No newline at end of file From bc980815eea3df9aaa349bc5a9e302461ac603b9 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 3 Nov 2022 14:11:49 -0400 Subject: [PATCH 31/31] fix build of helper program broken by ba7ecbc6a9c3763e8152e4f46522d14a4ee2b59d Sponsored-by: Svenne Krap on Patreon --- Build/DistributionUpdate.hs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Build/DistributionUpdate.hs b/Build/DistributionUpdate.hs index d48be43efe..79f8a6c188 100644 --- a/Build/DistributionUpdate.hs +++ b/Build/DistributionUpdate.hs @@ -24,7 +24,7 @@ import Annex.Content import Annex.WorkTree import Git.Command import qualified Utility.RawFilePath as R -import Annex.Actions +import Annex.Action import Data.Time.Clock import Data.Char @@ -70,8 +70,9 @@ main = do state <- Annex.new =<< Git.Construct.fromPath (toRawFilePath ".") ood <- Annex.eval state $ do buildrpms topdir updated - makeinfos updated version + is <- makeinfos updated version quiesce False + return is syncToArchiveOrg unless (null ood) $ error $ "Some info files are out of date: " ++ show (map fst ood)