From 54ce959986ca97894793b8eb6c4b1f3ea0fdf668 Mon Sep 17 00:00:00 2001 From: binx Date: Fri, 17 Feb 2017 00:01:12 +0000 Subject: [PATCH 1/4] --- doc/forum/dangling_blobs.mdwn | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) create mode 100644 doc/forum/dangling_blobs.mdwn diff --git a/doc/forum/dangling_blobs.mdwn b/doc/forum/dangling_blobs.mdwn new file mode 100644 index 0000000000..dc3cfbd78d --- /dev/null +++ b/doc/forum/dangling_blobs.mdwn @@ -0,0 +1,27 @@ +Revisiting an issue I reported a couple of months ago but never figured out. I am trying to use git annex assistant on two separate machines to automatically mirror files between them. But after I start the second assistant and add new files to the annex, I find that git fsck reports dangling blobs. Is there a conflict between the two assistants? + +On the server: + + $ mkdir ~/annex + $ cd ~/annex + $ git init + $ git annex init u --version=6 + $ echo This is test file 1. >testfile1.txt + $ git annex add testfile1.txt + $ git annex sync + $ git remote add ml2 ssh://laptop/Users/username/annex + $ git annex assistant + +After all that, I do this on the laptop: + + $ cd ~/ + $ git clone ssh://server/home/username/annex + $ cd annex + $ git annex init ml2 --version=6 + $ git annex sync + $ git annex assistant + +At this point git fsck is happy. But when I add files to the annex on either machine and run git fsck, I get messages like: + + Checking object directories: 100% (256/256), done. + dangling blob 31a30177d1e37faf8eac96524302a61713d3d522 From 2f601791bf0b3546b1e44be89742e07b5dc4b3c6 Mon Sep 17 00:00:00 2001 From: yarikoptic Date: Fri, 17 Feb 2017 01:57:11 +0000 Subject: [PATCH 2/4] --- ...n_case_of_failures_into_returned_json.mdwn | 20 +++++++++++++++++++ 1 file changed, 20 insertions(+) create mode 100644 doc/todo/more_of_diagnostic_information_in_case_of_failures_into_returned_json.mdwn diff --git a/doc/todo/more_of_diagnostic_information_in_case_of_failures_into_returned_json.mdwn b/doc/todo/more_of_diagnostic_information_in_case_of_failures_into_returned_json.mdwn new file mode 100644 index 0000000000..b9283ae546 --- /dev/null +++ b/doc/todo/more_of_diagnostic_information_in_case_of_failures_into_returned_json.mdwn @@ -0,0 +1,20 @@ +ATM I am experiencing sporadic failures of the batched git annex addurl call -- seems to report failure (success: False) once in a while, but succeeds on a retry: + +[[!format sh """ +(Pdb) p url +'http://openneuro.s3.amazonaws.com/ds000001/ds000001_R1.1.0/uncompressed/sub016/BOLD/task001_run003/QA/QA_report.pdf?versionId=null' + +(Pdb) p out_json +{u'note': u'from datalad', u'command': u'addurl', u'file': u'ds000001_R1.1.0/uncompressed/sub016/BOLD/task001_run003/QA/QA_report.pdf', u'success': False} + +(Pdb) up +> /home/yoh/proj/datalad/datalad/datalad/support/gitrepo.py(210)newfunc() +-> return func(self, file_new, *args, **kwargs) + +(Pdb) func(self, file_new, *args, **kwargs) +{u'note': u'from datalad', u'file': u'ds000001_R1.1.0/uncompressed/sub016/BOLD/task001_run003/QA/QA_report.pdf', u'command': u'addurl', u'key': u'MD5E-s1191419--cb4efab8104b5117f64b58ee6d6a79ba.pdf', u'success': True} +"""]] + +besides me blindly trying to re-run it e.g. 3 times and only then declare total failure, I wondered if json output could provide more information (if any known) about the failure... e.g. if a custom remote crashed/errorred (I guess the case here due to "from datalad") -- what was stderr/exit code for that process if crashed/ERROR msg... if wget -- what was stderr there + +[[!meta name=yoh]] From ccdb4bdfa15a84ba1e534b788ac2bc7da5be2831 Mon Sep 17 00:00:00 2001 From: lasitus Date: Fri, 17 Feb 2017 03:23:46 +0000 Subject: [PATCH 3/4] Added a comment --- ..._63b3ec113d52501237abbe6ee1ef5fa5._comment | 45 +++++++++++++++++++ 1 file changed, 45 insertions(+) create mode 100644 doc/bugs/git_annex_assistant_exits_with_failure/comment_7_63b3ec113d52501237abbe6ee1ef5fa5._comment diff --git a/doc/bugs/git_annex_assistant_exits_with_failure/comment_7_63b3ec113d52501237abbe6ee1ef5fa5._comment b/doc/bugs/git_annex_assistant_exits_with_failure/comment_7_63b3ec113d52501237abbe6ee1ef5fa5._comment new file mode 100644 index 0000000000..9206ba3139 --- /dev/null +++ b/doc/bugs/git_annex_assistant_exits_with_failure/comment_7_63b3ec113d52501237abbe6ee1ef5fa5._comment @@ -0,0 +1,45 @@ +[[!comment format=mdwn + username="lasitus" + avatar="http://cdn.libravatar.org/avatar/dfe778f28027aeb75876172022aa5de3" + subject="comment 7" + date="2017-02-17T03:23:46Z" + content=""" +Ok, I have a script that generates the error. This generates a repository and 30 GB of random binary files with many folders 2 layers deep. Just put in an empty folder and run with python. No remotes are necessary. This was run in Windows 10 in a git bash window. + +``` +#!/usr/bin/env python + +import logging +import os +import shutil +import subprocess +import uuid + +logging.basicConfig(level=logging.DEBUG) + +repositoryPath = os.path.abspath(\"./bigRepoTest\") +os.makedirs(repositoryPath) + +subprocess.call(\"git init\", cwd=repositoryPath) +subprocess.call(\"git annex init pc\", cwd=repositoryPath) + +def makeRandomDirectories(level1FolderCount, level2FolderCount, fileCount): + for directoryIndex in range(0, level1FolderCount): + logging.info(\"Adding top level folder \" + str(directoryIndex + 1) + \" of \" + str(level1FolderCount)) + newDirectory = os.path.join(repositoryPath, str(uuid.uuid1())) + os.makedirs(newDirectory) + for directoryIndex in range(0, level2FolderCount): + newNestedDirectory = os.path.join(newDirectory, str(uuid.uuid1())) + os.makedirs(newNestedDirectory) + for fileIndex in range(0, fileCount): + newFile = os.path.join(newNestedDirectory, str(uuid.uuid1()) + \".bin\") + with open(newFile, 'wb') as fileOut: + fileOut.write(os.urandom(500000)) + +makeRandomDirectories(32, 1000, 1) +with open(os.path.join(repositoryPath, \"assistant.log\"), 'w') as output: + subprocess.Popen([\"git\", \"annex\", \"assistant\", \"--debug\"], cwd=repositoryPath, stdout=output, stderr=output) + makeRandomDirectories(32, 1000, 1) + subprocess.call(\"tail -f daemon.log\", cwd=os.path.join(repositoryPath, \".git\", \"annex\")) +``` +"""]] From 7fa132987796dd5a8934d86a5e75fce73b1da531 Mon Sep 17 00:00:00 2001 From: yarikoptic Date: Fri, 17 Feb 2017 15:30:32 +0000 Subject: [PATCH 4/4] initial whining --- ...annex_status_output_of_the_added_file.mdwn | 81 +++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 doc/bugs/mysterious_dependency_of_git_annex_status_output_of_the_added_file.mdwn diff --git a/doc/bugs/mysterious_dependency_of_git_annex_status_output_of_the_added_file.mdwn b/doc/bugs/mysterious_dependency_of_git_annex_status_output_of_the_added_file.mdwn new file mode 100644 index 0000000000..fbdf80e5f5 --- /dev/null +++ b/doc/bugs/mysterious_dependency_of_git_annex_status_output_of_the_added_file.mdwn @@ -0,0 +1,81 @@ +### Please describe the problem. + +in v6 mode -- Result depends on having a good sleep before running 'git annex add'. + +Without sleep, git annex manages first to stage file to be committed into git, but then also modifies it to be added into annex (this is not shown above -- just inspect that repository obtained without having any sleep) + +I guess relates to http://git-annex.branchable.com/bugs/Too_difficult_if_not_impossible_to_explicitly_add__47__keep_file_under_git___40__not_annex__41___in_v6_without_employing_.gitattributes/ + +### What steps will reproduce the problem? + +Run http://www.onerussian.com/tmp/ga-3.sh twice: once giving 0 secs to sleep, and then 1 (or about 0.3 might work as well) + + +### What version of git-annex are you using? On what operating system? + +6.20170209+gitg16be7b5cc-1~ndall+1 + +### Please provide any additional information below. + +if we just proceed with the script (init, add, status) without any delays -- git annex status would report it +[[!format sh """ +$> ./ga-3.sh 0 ++ s=0 +++ mktemp -d ++ d=/home/yoh/.tmp/tmp.d6g0E7scxt ++ echo 'directory: /home/yoh/.tmp/tmp.d6g0E7scxt' +directory: /home/yoh/.tmp/tmp.d6g0E7scxt ++ cd /home/yoh/.tmp/tmp.d6g0E7scxt ++ git init +Initialized empty Git repository in /tmp/tmp.d6g0E7scxt/.git/ ++ git annex init --version=6 +init ok +(recording state in git...) ++ sed -i -e 's,pre-commit ,pre-commit --debug ,g' .git/hooks/pre-commit ++ echo 'I: creating a file' +I: creating a file ++ echo whatever ++ sleep 0 ++ git -c annex.largefiles=nothing annex --debug add file5 +[2017-02-17 10:19:48.91932971] read: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","ls-files","--others","--exclude-standard","-z","--","file5"] +add file5 (non-large file; adding content to git repository) ok +[2017-02-17 10:19:48.923428344] read: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","ls-files","--modified","-z","--","file5"] +(recording state in git...) +[2017-02-17 10:19:48.927922289] feed: xargs ["-0","git","--git-dir=.git","--work-tree=.","--literal-pathspecs","add","--"] +[2017-02-17 10:19:48.956812867] process done ExitSuccess ++ git annex status +M file5 +"""]] + +And if we wait just a bit before running add -- we would get it reported added +[[!format sh """ +hopa:~/.tmp +$> ./ga-3.sh 1 ++ s=1 +++ mktemp -d ++ d=/home/yoh/.tmp/tmp.4I7ym6dSx2 ++ echo 'directory: /home/yoh/.tmp/tmp.4I7ym6dSx2' +directory: /home/yoh/.tmp/tmp.4I7ym6dSx2 ++ cd /home/yoh/.tmp/tmp.4I7ym6dSx2 ++ git init +Initialized empty Git repository in /tmp/tmp.4I7ym6dSx2/.git/ ++ git annex init --version=6 +init ok +(recording state in git...) ++ sed -i -e 's,pre-commit ,pre-commit --debug ,g' .git/hooks/pre-commit ++ echo 'I: creating a file' +I: creating a file ++ echo whatever ++ sleep 1 ++ git -c annex.largefiles=nothing annex --debug add file5 +[2017-02-17 10:19:52.529445464] read: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","ls-files","--others","--exclude-standard","-z","--","file5"] +add file5 (non-large file; adding content to git repository) ok +[2017-02-17 10:19:52.533532166] read: git ["--git-dir=.git","--work-tree=.","--literal-pathspecs","ls-files","--modified","-z","--","file5"] +(recording state in git...) +[2017-02-17 10:19:52.537789158] feed: xargs ["-0","git","--git-dir=.git","--work-tree=.","--literal-pathspecs","add","--"] +[2017-02-17 10:19:52.567222419] process done ExitSuccess ++ git annex status +A file5 +"""]] + +[[!meta author=yoh]]