From 528882a6df47755659d8683630ee2f611e8a6e4a Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 5 Jun 2023 14:08:12 -0400 Subject: [PATCH] comment --- ..._c1865c88d23873eb4b32489fc387f40c._comment | 121 ++++++++++++++++++ 1 file changed, 121 insertions(+) create mode 100644 doc/bugs/importtree_spends_hours_reading_cidsdb/comment_12_c1865c88d23873eb4b32489fc387f40c._comment diff --git a/doc/bugs/importtree_spends_hours_reading_cidsdb/comment_12_c1865c88d23873eb4b32489fc387f40c._comment b/doc/bugs/importtree_spends_hours_reading_cidsdb/comment_12_c1865c88d23873eb4b32489fc387f40c._comment new file mode 100644 index 0000000000..ce05ecb617 --- /dev/null +++ b/doc/bugs/importtree_spends_hours_reading_cidsdb/comment_12_c1865c88d23873eb4b32489fc387f40c._comment @@ -0,0 +1,121 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 12""" + date="2023-06-05T16:28:05Z" + content=""" +That is a significant move of the goal posts. I don't know if that even +involves importtree being slow anymore. + +You've apparently got a lot of stuff going on in the directories used by +that script, NTFS, acrypt, some kind of removable drive, ...? + +I tried to adapt the script to a simple test case that I can run, but had +to leave all that stuff out, since I don't know anything about it. The +script I ran is below. + +I ran it with 10000 files, and the timings of the syncs were: + +1. 0m0.154s +2. 2m5.719s +3. 0m3.088s +4. 0m15.240s +5. 0m34.297s +6. 0m1.244s +7. 2m7.648s +8. 0m35.378s + +So I have not reproduced the (new) problem, #5 and #8 are not slow. + +Your 5th sync taking a long time is particularly strange because the only +difference between it and the 4th sync is that `git annex wanted` is +configured. But, `git-annex sync` does not do anything with that +configuration unless it is syncing content. That made me think you might +have annex.synccontent set in gitconfig globally. It seems to me that would +also make the 3rd sync slow, but I suppose you could have a complex git +config that sets it for some repos but not other ones or something like +that. + +Otherwise I'm out of ideas about how to reproduce your problem. + +I'll reiterate that it would be helpful to see the last output of the sync +command before it does whatever is being slow. + +----- + + #!/bin/bash + + set -euo pipefail + set -x + + NFILES=$1 + + REPO=repo + DRIVE=drive + REPOBASE=`pwd`/bench/repo + DRIVEBASE=`pwd`/bench/drive + SRBASE=`pwd`/bench/sr + + mkdir -p $REPOBASE + mkdir -p $SRBASE/$REPO + perl -e 'for(1..'$NFILES') { open(OUT,">", "'$SRBASE/$REPO'/f$_"); print OUT $_; close OUT }' + + set -x + cd $REPOBASE + mkdir $REPO + cd $REPO + git init + git config annex.thin true + git annex init 'local hub' + # use newer belowgit annex wanted . "include=mtree exclude=$REPO/*" + git annex wanted . "include=* and exclude=$REPO/*" + + # Now initialize things. + touch mtree + git annex add mtree + time git annex sync + git annex adjust --unlock-present + + git annex initremote source type=directory directory=$SRBASE/$REPO importtree=yes encryption=none + + git annex enableremote source directory=$SRBASE/$REPO + git config remote.source.annex-readonly true + git config remote.source.annex-tracking-branch master:$REPO # (says to put the files in the "$REPO" directory) + git config annex.securehashesonly true + git config annex.genmetadata true + git config annex.diskreserve 100M + + time git annex sync + + # Now set up the clone + + mkdir -p $DRIVEBASE/$DRIVE/git-annex + cd $DRIVEBASE/$DRIVE/git-annex + git clone $REPOBASE/$REPO + cd $REPO + git config annex.thin true + git annex init "alexandria:Goerzen3" + git annex adjust --unlock # NTFS doesn't like symlinks + time git annex sync + + cd $REPOBASE/$REPO + git remote add $DRIVE $DRIVEBASE/$DRIVE/git-annex/$REPO + time git annex sync $DRIVE + git annex group $DRIVE archivedrive + git annex wanted $DRIVE 'include=mtree* or standard' # 'standard and include=mtree*' + time git annex sync + + cd $DRIVEBASE/$DRIVE/git-annex/$REPO + time git annex sync + git annex enableremote source directory=$SRBASE/$REPO + git config remote.source.annex-readonly true + git config remote.source.annex-tracking-branch master:$REPO # (says to put the files in the "$REPO" directory) + git config annex.securehashesonly true + git config annex.genmetadata true + git config annex.diskreserve 100M + + time git annex sync --content + git annex get --auto + + cd $REPOBASE/$REPO + time git annex sync +"""]]