From c4374e4aeeddbe343c6e81e05232baf08767bdb2 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 22 Oct 2019 14:36:31 -0400 Subject: [PATCH] followup --- ..._5310b510f0835a3339ba46b43992a300._comment | 10 +++++ ..._d09ac8233492cdc3c5c6dd21faae62b8._comment | 13 ++++++ ..._9bb17be5839d28ba8b61d380d95c15b2._comment | 16 +++++++ ..._8adbbd37b3720d66a3cffac05188590a._comment | 12 +++++ ...an_filter_for_less_surprising_git_add.mdwn | 44 +++++++++++++++++++ ..._876d01d37dbad664b064428da65bd910._comment | 11 +++++ 6 files changed, 106 insertions(+) create mode 100644 doc/forum/lets_discuss_git_add_behavior/comment_20_5310b510f0835a3339ba46b43992a300._comment create mode 100644 doc/forum/lets_discuss_git_add_behavior/comment_21_d09ac8233492cdc3c5c6dd21faae62b8._comment create mode 100644 doc/forum/lets_discuss_git_add_behavior/comment_22_9bb17be5839d28ba8b61d380d95c15b2._comment create mode 100644 doc/forum/lets_discuss_git_add_behavior/comment_23_8adbbd37b3720d66a3cffac05188590a._comment create mode 100644 doc/todo/inode_based_clean_filter_for_less_surprising_git_add.mdwn create mode 100644 doc/todo/separate_annex.largefiles.git-add_and_annex.largefiles.git-annex-add_settings/comment_2_876d01d37dbad664b064428da65bd910._comment diff --git a/doc/forum/lets_discuss_git_add_behavior/comment_20_5310b510f0835a3339ba46b43992a300._comment b/doc/forum/lets_discuss_git_add_behavior/comment_20_5310b510f0835a3339ba46b43992a300._comment new file mode 100644 index 0000000000..7605e848f4 --- /dev/null +++ b/doc/forum/lets_discuss_git_add_behavior/comment_20_5310b510f0835a3339ba46b43992a300._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 20""" + date="2019-10-22T17:14:24Z" + content=""" +Can we please not use language like "hijacked" and "man in the middle +attack" about this. + +At least, not if you want me to engage constructively with this thread. +"""]] diff --git a/doc/forum/lets_discuss_git_add_behavior/comment_21_d09ac8233492cdc3c5c6dd21faae62b8._comment b/doc/forum/lets_discuss_git_add_behavior/comment_21_d09ac8233492cdc3c5c6dd21faae62b8._comment new file mode 100644 index 0000000000..6ed14d6372 --- /dev/null +++ b/doc/forum/lets_discuss_git_add_behavior/comment_21_d09ac8233492cdc3c5c6dd21faae62b8._comment @@ -0,0 +1,13 @@ +[[!comment format=mdwn + username="joey" + subject="""re: can git-annex-pre-commit annex files?""" + date="2019-10-22T17:45:43Z" + content=""" +Ilya, by the time the pre-commit hook runs, `git add` would have already +written the large file into the object file, so stuff like `git gc` would +pay the price of it even if it were kept out of a commit. + +In other words, that has the same problems that v5 unlocked files had when +git add or git commit was run on them. I've seen plenty of users bitten by +that with v5. Fixing that problem was a (minor) motivation for v7. +"""]] diff --git a/doc/forum/lets_discuss_git_add_behavior/comment_22_9bb17be5839d28ba8b61d380d95c15b2._comment b/doc/forum/lets_discuss_git_add_behavior/comment_22_9bb17be5839d28ba8b61d380d95c15b2._comment new file mode 100644 index 0000000000..57b166d7ff --- /dev/null +++ b/doc/forum/lets_discuss_git_add_behavior/comment_22_9bb17be5839d28ba8b61d380d95c15b2._comment @@ -0,0 +1,16 @@ +[[!comment format=mdwn + username="joey" + subject="""re: comment 4""" + date="2019-10-22T17:49:43Z" + content=""" +> maybe, git-annex could keep track of local unlocked files by inode, not just by path name? + +That's an interesting idea. If it could be made to work well, I think it +would address my concerns from comment 2 while freeing `git add` to +otherwise behave however it might be desired to behave by the user. + +I've expanded on the idea in +[todo/inode_based_clean_filter_for_less_surprising_git_add]] + +Thanks! +"""]] diff --git a/doc/forum/lets_discuss_git_add_behavior/comment_23_8adbbd37b3720d66a3cffac05188590a._comment b/doc/forum/lets_discuss_git_add_behavior/comment_23_8adbbd37b3720d66a3cffac05188590a._comment new file mode 100644 index 0000000000..a9f22d6c74 --- /dev/null +++ b/doc/forum/lets_discuss_git_add_behavior/comment_23_8adbbd37b3720d66a3cffac05188590a._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 23""" + date="2019-10-22T18:16:15Z" + content=""" +Several commenters seem to be under the misapprehension that `git add` of a +modified file that is stored in git before will annex the new version. It +does not. That case is already handled, by git-annex noticing if the old +file was annexed, and if not, letting git add it to git as usual +(unless annex.largefiles is configured, in which case it uses that +configuration). +"""]] diff --git a/doc/todo/inode_based_clean_filter_for_less_surprising_git_add.mdwn b/doc/todo/inode_based_clean_filter_for_less_surprising_git_add.mdwn new file mode 100644 index 0000000000..b373cb977c --- /dev/null +++ b/doc/todo/inode_based_clean_filter_for_less_surprising_git_add.mdwn @@ -0,0 +1,44 @@ +[[forum/lets_discuss_git_add_behavior]] shows that v7 git add +behavior when annex.largefiles is not configured is surprising to many +users. + +As described in comment 2 on that thread, a major driver of `git add` +adding files to the annex by default is that it's just as surprising for +annexed files to get added to git, and that surprise is much harder to +recover from. Two main cases are: + + git annex add bigfile; git annex unlock bigfile; mv bigfile newname; git add . + + git annex add bigfile; git annex unlock bigfile; git commit; modify bigfile; git commit -a + +The modify case is already handled; git-annex checks if bigfile was annexed +before, and if so, it knows it needs to be annexed again. (Although +annex.largefiles overrides that check.) + +Ilya suggested an improvement that solves the rename case: +Since git-annex has a record of the inode of bigfile, it can check if the +new file has the same inode. If so, the user renamed it, so add it to the +annex not to git. + +That frees git-annex to let `git add` behave as usual and not annex files +otherwise, unless the user has indicated they always want to annex files by +configuring annex.largefiles or whatever. + +Cases where a file gets added to git accidentially seem to then be limited +to a modify+rename: + + git annex add bigfile; git annex unlock bigfile; git commit; modify bigfile; mv bigfile newname; git add . + +Pretty uncommon case, and easy to argue that the user shot their own +foot there; there's no way for git-annex to know that the modified renamed +file has its origin in an annexed file. So seems acceptable. + +The inodes of all unlocked files are known, via the InodeCache stored in +the keys database. Unfortunately there is not an index to make queries for +inodes be fast. One would need to be added, at least eventually. +[[todo/sqlite_database_improvements]] discusses how to improve the +databases. + +Some filesystems don't have stable inodes etc, but all that is already +handled by the InodeCache machinery, so I think this could work pretty +well. --[[Joey]] diff --git a/doc/todo/separate_annex.largefiles.git-add_and_annex.largefiles.git-annex-add_settings/comment_2_876d01d37dbad664b064428da65bd910._comment b/doc/todo/separate_annex.largefiles.git-add_and_annex.largefiles.git-annex-add_settings/comment_2_876d01d37dbad664b064428da65bd910._comment new file mode 100644 index 0000000000..560fcb98a7 --- /dev/null +++ b/doc/todo/separate_annex.largefiles.git-add_and_annex.largefiles.git-annex-add_settings/comment_2_876d01d37dbad664b064428da65bd910._comment @@ -0,0 +1,11 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2019-10-22T18:29:44Z" + content=""" +[[todo/inode_based_clean_filter_for_less_surprising_git_add]] +seems to open the door to adding such a config as this. + +Although, if that were implemented, I suspect that demand for such a config +might dry up.. +"""]]