diff --git a/Annex/Journal.hs b/Annex/Journal.hs index de0ad77655..a02eaa3155 100644 --- a/Annex/Journal.hs +++ b/Annex/Journal.hs @@ -81,14 +81,16 @@ setJournalFile _jl ru file content = withOtherTmp $ \tmp -> do ( return gitAnnexPrivateJournalDir , return gitAnnexJournalDir ) - createAnnexDirectory jd -- journal file is written atomically let jfile = journalFile file let tmpfile = tmp P. jfile - liftIO $ do + let write = liftIO $ do withFile (fromRawFilePath tmpfile) WriteMode $ \h -> writeJournalHandle h content moveFile tmpfile (jd P. jfile) + -- avoid overhead of creating the journal directory when it already + -- exists + write `catchIO` (const (createAnnexDirectory jd >> write)) data JournalledContent = NoJournalledContent diff --git a/doc/todo/registerurl__58___do_changes_in_journal___34__in_place__34____63__.mdwn b/doc/todo/registerurl__58___do_changes_in_journal___34__in_place__34____63__.mdwn index 93ab5f35f1..f6a50bcff5 100644 --- a/doc/todo/registerurl__58___do_changes_in_journal___34__in_place__34____63__.mdwn +++ b/doc/todo/registerurl__58___do_changes_in_journal___34__in_place__34____63__.mdwn @@ -27,3 +27,5 @@ May be changes to those .web files in journal could be done "in place" by append may be there is a way to "stagger" those --batch additions somehow so all thousands of URLs are added in a single "run" thus having a single "copy/move" and locking/stat'ing syscalls? PS More information could be found at [dandisets/issues/225](https://github.com/dandi/dandisets/issues/225 ) + +[[!tag projects/dandi]] diff --git a/doc/todo/registerurl__58___do_changes_in_journal___34__in_place__34____63__/comment_2_a4fce84f5777ed582fa599778835455f._comment b/doc/todo/registerurl__58___do_changes_in_journal___34__in_place__34____63__/comment_2_a4fce84f5777ed582fa599778835455f._comment index 6efc0a53a4..9d09c64d4b 100644 --- a/doc/todo/registerurl__58___do_changes_in_journal___34__in_place__34____63__/comment_2_a4fce84f5777ed582fa599778835455f._comment +++ b/doc/todo/registerurl__58___do_changes_in_journal___34__in_place__34____63__/comment_2_a4fce84f5777ed582fa599778835455f._comment @@ -9,9 +9,10 @@ randomly distributed? It sounds like it's more randomly distributed, if you're walking a tree and adding each file you encounter, and some of them have the same content so -the same url and key. +the same key. -If it was not randomly distributed, a nice optimisation would be for +But your stace shows repeated writes for the same key, so maybe they bunch +up? If it was not randomly distributed, a nice optimisation would be for registerurl to buffer urls as long as the key is the same, and then do a single write for that key of all the urls. But it can't really buffer like that if it's randomly distributed; the buffer could use a large amount of diff --git a/doc/todo/registerurl__58___do_changes_in_journal___34__in_place__34____63__/comment_3_56c313fdcb88e95abaa10647678bc108._comment b/doc/todo/registerurl__58___do_changes_in_journal___34__in_place__34____63__/comment_3_56c313fdcb88e95abaa10647678bc108._comment new file mode 100644 index 0000000000..fdeb636ab4 --- /dev/null +++ b/doc/todo/registerurl__58___do_changes_in_journal___34__in_place__34____63__/comment_3_56c313fdcb88e95abaa10647678bc108._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 3""" + date="2022-07-14T16:16:35Z" + content=""" +I've optimised away the repeated mkdir of the journal. + +Probably not a big win in this particular edge case, but a nice general +win.. +"""]]