From 2c2a84caac6d8b461677cc1dca7a87a4309210e9 Mon Sep 17 00:00:00 2001 From: jgoerzen Date: Fri, 2 Jun 2023 21:44:54 +0000 Subject: [PATCH] Added a comment --- ..._a3fc6b67529ddce9e79c0e72996259ad._comment | 136 ++++++++++++++++++ 1 file changed, 136 insertions(+) create mode 100644 doc/bugs/importtree_spends_hours_reading_cidsdb/comment_11_a3fc6b67529ddce9e79c0e72996259ad._comment diff --git a/doc/bugs/importtree_spends_hours_reading_cidsdb/comment_11_a3fc6b67529ddce9e79c0e72996259ad._comment b/doc/bugs/importtree_spends_hours_reading_cidsdb/comment_11_a3fc6b67529ddce9e79c0e72996259ad._comment new file mode 100644 index 0000000000..8f771e2921 --- /dev/null +++ b/doc/bugs/importtree_spends_hours_reading_cidsdb/comment_11_a3fc6b67529ddce9e79c0e72996259ad._comment @@ -0,0 +1,136 @@ +[[!comment format=mdwn + username="jgoerzen" + avatar="http://cdn.libravatar.org/avatar/090740822c9dcdb39ffe506b890981b4" + subject="comment 11" + date="2023-06-02T21:44:53Z" + content=""" +So I ran your script on the same dataset that I was using. + +I was rather surprised that it rocketed through in 15 minutes. I then ran a subsequent git annex sync, which after a few minutes started showing the same pread64 behavior as before. + +Perhaps your improvements have been helping here? + +I went back to my original full script and thought: let's time each git-annex call. + +``` +#!/bin/bash + +set -euo pipefail + +if [ -z \"$2\" ]; then + echo \"Syntax: $0 REPO DRIVE\" + exit 5 +fi + +REPO=\"$1\" +DRIVE=\"$2\" +DRIVEBASE=\"/acrypt/no-backup/jgoerzen/git-annex-testing\" + +set -x +cd /acrypt/git-annex/repos +mkdir $REPO +cd $REPO +git init +git config annex.thin true +git annex init 'local hub' +# use newer belowgit annex wanted . \"include=mtree exclude=$REPO/*\" +git annex wanted . \"include=* and exclude=$REPO/*\" + +# Now initialize things. +touch mtree +time git annex add mtree +time git annex sync +time git annex adjust --unlock-present + +time git annex initremote source type=directory directory=/acrypt/git-annex/bind-ro/$REPO importtree=yes encryption=none + +time git annex enableremote source directory=/acrypt/git-annex/bind-ro/$REPO +git config remote.source.annex-readonly true +git config remote.source.annex-tracking-branch main:$REPO # (says to put the files in the \"$REPO\" directory) +git config annex.securehashesonly true +git config annex.genmetadata true +git config annex.diskreserve 100M + +time git annex sync + +# Now set up the clone + +cd $DRIVEBASE/$DRIVE/git-annex +time git clone /acrypt/git-annex/repos/$REPO +cd $REPO +git config annex.thin true +git annex init \"alexandria:Goerzen3\" +time git annex adjust --unlock # NTFS doesn't like symlinks +time git annex sync + +cd /acrypt/git-annex/repos/$REPO +git remote add $DRIVE $DRIVEBASE/$DRIVE/git-annex/$REPO +time git annex sync $DRIVE +git annex group $DRIVE archivedrive +git annex wanted $DRIVE 'include=mtree* or standard' # 'standard and include=mtree*' +time git annex sync + +cd $DRIVEBASE/$DRIVE/git-annex/$REPO +time git annex sync +git annex enableremote source directory=/acrypt/git-annex/bind-ro/$REPO +git config remote.source.annex-readonly true +git config remote.source.annex-tracking-branch main:$REPO # (says to put the files in the \"$REPO\" directory) +git config annex.securehashesonly true +git config annex.genmetadata true +git config annex.diskreserve 100M + +time git annex sync --content +time git annex get --auto + +cd /acrypt/git-annex/repos/$REPO +time git annex sync +``` + +In this script: + +First sync (this is before the initremote so it is doing pretty much nothing): + +real 0m0.075s +user 0m0.027s +sys 0m0.045s + +Second sync (this is after initremote/enableremote): + +real 15m1.417s +user 7m10.770s +sys 5m59.405s + +[ that correlates closely with how long it took to run your script ] + +Third sync (this is after the clone/init/adjust): + +real 0m6.606s +user 0m1.908s +sys 0m4.632s + +Fourth sync (git annex sync $DRIVE): +real 0m1.066s +user 0m0.634s +sys 0m0.647s + +Fifth sync (after the group/wanted): +real 113m32.988s +user 55m21.012s +sys 58m40.752s + +Sixth sync: +real 0m1.180s +user 0m0.819s +sys 0m0.514s + +Seventh sync (git annex sync --content): +real 112m29.594s +user 55m13.650s +sys 57m52.259s + +Eigth sync (back at the original repo): +real 110m3.099s +user 51m32.468s +sys 58m53.885s + +"""]]