From 195c403ae3c112b6c1792b2620e784ce2bcfcef4 Mon Sep 17 00:00:00 2001 From: "http://joeyh.name/" Date: Fri, 1 Aug 2014 22:16:29 +0000 Subject: [PATCH 01/56] removed --- ...t_21_5c11e69c28b9ed4cbe238a36c0839a47._comment | 15 --------------- 1 file changed, 15 deletions(-) delete mode 100644 doc/special_remotes/comment_21_5c11e69c28b9ed4cbe238a36c0839a47._comment diff --git a/doc/special_remotes/comment_21_5c11e69c28b9ed4cbe238a36c0839a47._comment b/doc/special_remotes/comment_21_5c11e69c28b9ed4cbe238a36c0839a47._comment deleted file mode 100644 index 1645e03e68..0000000000 --- a/doc/special_remotes/comment_21_5c11e69c28b9ed4cbe238a36c0839a47._comment +++ /dev/null @@ -1,15 +0,0 @@ -[[!comment format=mdwn - username="http://joeyh.name/" - ip="209.250.56.64" - subject="comment 21" - date="2013-11-24T15:58:30Z" - content=""" -@Bence the closest I have is some tests of particular special remotes inside Test.hs. The shell equivilant of that code is: - -[[!format sh \"\"\" -set -e -git annex copy file --to remote # tests store -git annex drop file # tests checkpresent when remote has file -git annex move file --from remote # tests retrieve and remove -\"\"\"]] -"""]] From 25ac3a4c4ad72ca5acca02dab84ad98de0578b98 Mon Sep 17 00:00:00 2001 From: "https://id.koumbit.net/anarcat" Date: Sat, 2 Aug 2014 02:46:07 +0000 Subject: [PATCH 02/56] some progress. maybe. --- ...usability:_what_are_those_arrow_things__63__.mdwn | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/doc/forum/usability:_what_are_those_arrow_things__63__.mdwn b/doc/forum/usability:_what_are_those_arrow_things__63__.mdwn index ac83430408..cde7d8d505 100644 --- a/doc/forum/usability:_what_are_those_arrow_things__63__.mdwn +++ b/doc/forum/usability:_what_are_those_arrow_things__63__.mdwn @@ -1,6 +1,8 @@ +# Introduction + i want to relate a usability story that happens fairly regularly when I show git-annex to people. the story goes like this. ----- +# The story Antoine sat down at his computer saying, "i have this great movie collection I want to share with you, my friend, because the fair use provisions allow for that, and I use this great git-annex tool that allows me to sync my movie collection between different places". His friend Charlie, a Linux user only vaguely familiar with the internals of how his operating system or legal system actually works, reads this as "yay free movies" and wholeheartedly agrees to lend himself to the experiment. @@ -10,7 +12,7 @@ Charlie logs into Antoine's computer, named `marcos`. Antoine shows Charlie wher Antoine then has no solution but to convert the git-annex repository into direct mode, something which takes a significant amount of time and is actually [[designated as "untrusted"|direct_mode]] in the documentation. In fact, so much so that he actually did [[screw up his repository magnificently|bugs/direct_command_leaves_repository_inconsistent_if_interrupted]] because he freaked out when `git-annex direct` started and interrupted it because he tought it would take too long. ----- +# Technical analysis Now I understand it is not necessarily `git-annex`'s responsability if Thunar (or Nautilus, for that matter), doesn't know how to properly deal with symlinks (hint: just dereference the damn thing already). Maybe I should file a bug about this against thunar? I also understand that symlinks are useful to ensure the security of the data hosted in `git-annex`, and that I could have used direct mode in the first place. But I like to track changes in git to those files, and direct mode makes that really difficult. @@ -19,3 +21,9 @@ I didn't file this as a bug because I want to start the conversation, but maybe (The other being "how do i actually use git annex to sync those files instead of just copying them by hand", but that's for another story!) -- [[anarcat]] + +# Followup + +Here is a bug report filed against Thunar, with a patch to fix this behavior: https://bugzilla.xfce.org/show_bug.cgi?id=11065 + +Similar bugs would need to be filed against Nautilus, at the very least, but probably other file managers, which makes this task a little daunting, to say the least. -- [[anarcat]] From 670f4d43e3e76bd42fea4d5e725997c97a945f4e Mon Sep 17 00:00:00 2001 From: zardoz Date: Sat, 2 Aug 2014 12:21:38 +0000 Subject: [PATCH 03/56] --- ...r_depending_on_working_dir_during_add.mdwn | 57 +++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 doc/bugs/WORM_keys_differ_depending_on_working_dir_during_add.mdwn diff --git a/doc/bugs/WORM_keys_differ_depending_on_working_dir_during_add.mdwn b/doc/bugs/WORM_keys_differ_depending_on_working_dir_during_add.mdwn new file mode 100644 index 0000000000..e412201141 --- /dev/null +++ b/doc/bugs/WORM_keys_differ_depending_on_working_dir_during_add.mdwn @@ -0,0 +1,57 @@ +### Please describe the problem. + +While the docs say that WORM keys are a function of a files basename, +when doing «git annex add .», the generated keys will actually contain +the relative path (with slashes escaped). Not sure whether this is by +design or a bug in its own right. I suppose that to minimize the chance +of collisions on WORM, having the path within the key is preferable. + +A problem about this, however, is that the path in the key is not +stable, but varies with the working dir when doing the «git annex +add». So, when a file is added from one working dir (say, the repo +base), later unlocked, and readded from another working dir (say, +somewhere below the repo base), this will generate a different key +even when the file has not been touched. + +Is there a rationale for this variability, or should «add» canonicalize +the encoded paths to the repo root? + + +### What steps will reproduce the problem? + + +[[!format sh """ + +# Init +$ git init /tmp/foo +$ cd /tmp/foo && git annex init + +$ mkdir baz +$ touch baz/quux + +# Add file with working dir at repo root. +$ git annex add --backend=WORM baz +$ git commit -m "first" + +# Key includes relative path. +$ readlink baz/quux +../.git/annex/objects/8x/8V/WORM-s0-m1406981486--baz%quux/WORM-s0-m1406981486--baz%quux + +# Unlock and readd with working dir at path below repo root. +$ cd baz +$ git annex unlock quux + +$ git annex add quux +$ git com -m "second" + +# Relative path is anchored to working dir instead of repo root. +$ readlink quux +../.git/annex/objects/9G/72/WORM-s0-m1406981486--quux/WORM-s0-m1406981486--quux + +# End of transcript or log. +"""]] + +### What version of git-annex are you using? On what operating system? +Linux 3.15.8 + +git-annex 5.20140716 From 5e2dd2ea865562fae4eb99e1820a8c632e5bb723 Mon Sep 17 00:00:00 2001 From: zardoz Date: Sat, 2 Aug 2014 14:29:26 +0000 Subject: [PATCH 04/56] Added a comment --- .../comment_2_975edca7ec87158216d9e106903dfb48._comment | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 doc/todo/wishlist:_annex.largefiles_support_for_mimetypes/comment_2_975edca7ec87158216d9e106903dfb48._comment diff --git a/doc/todo/wishlist:_annex.largefiles_support_for_mimetypes/comment_2_975edca7ec87158216d9e106903dfb48._comment b/doc/todo/wishlist:_annex.largefiles_support_for_mimetypes/comment_2_975edca7ec87158216d9e106903dfb48._comment new file mode 100644 index 0000000000..ba88bfb7c6 --- /dev/null +++ b/doc/todo/wishlist:_annex.largefiles_support_for_mimetypes/comment_2_975edca7ec87158216d9e106903dfb48._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="zardoz" + ip="78.48.163.229" + subject="comment 2" + date="2014-08-02T14:29:26Z" + content=""" +This could be achieved in a generic way by allowing filter binaries in expressions, which are run on the filename and return 0 or 1. +"""]] From 49db204da4f72bc2355b330d9d2da262f0569532 Mon Sep 17 00:00:00 2001 From: divB Date: Sat, 2 Aug 2014 20:10:12 +0000 Subject: [PATCH 05/56] --- ...to_repair_hangs__44___broken_symlinks.mdwn | 41 +++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 doc/forum/Pusher_crashed__44___attempt_to_repair_hangs__44___broken_symlinks.mdwn diff --git a/doc/forum/Pusher_crashed__44___attempt_to_repair_hangs__44___broken_symlinks.mdwn b/doc/forum/Pusher_crashed__44___attempt_to_repair_hangs__44___broken_symlinks.mdwn new file mode 100644 index 0000000000..8688820cf9 --- /dev/null +++ b/doc/forum/Pusher_crashed__44___attempt_to_repair_hangs__44___broken_symlinks.mdwn @@ -0,0 +1,41 @@ +Sorry that I put all this in the same thread but I don't know what happened and how it is related. + +I have just a simple setup: git-annex client with assistant (Windows 7) and on a server (Debian). + +Suddenly weird things started to happen + +1.) On Windows, when I start the assistant, it writes "Attempting to repair THINKTANK:c:\data\annex [here]" but it runs forever and never stops + +2.) On Windows, when I get "Pusher crashed: failed to read sha from git write-tree [Restart Thread]". When I click "Restart Thread" nothing happens but the message from (1) persists. + +3.) When I run "git annex fsck" on the client I get thousands of messages like + fsck Fotos/2014/DSC_0303.JPG + ** No known copies exist of Fotos/2014/DSC_0303.JPG + failed + +Here the same: + + $ git annex whereis "Fotos/2014/DSC_0303.JPG" + whereis Fotos/2014/DSC_0303.JPG (0 copies) failed + git-annex: whereis: 1 failed + +4.) On the server, files that should ALWAYS be on the server (configured as "full backup") suddenly wiped data that was also made available on the client. The symlinks are dangling symlinks and contain just binary data: + + ls -l + lrwxrwxrwx 1 4 Aug 2 08:55 DSC_0011.JPG -> ???? + lrwxrwxrwx 1 4 Aug 2 08:55 DSC_0012.JPG -> ???? + lrwxrwxrwx 1 4 Aug 2 08:55 DSC_0013.JPG -> ???? + lrwxrwxrwx 1 4 Aug 2 08:55 DSC_0014.JPG -> ???? + lrwxrwxrwx 1 4 Aug 2 08:55 DSC_0015.JPG -> ???? + lrwxrwxrwx 1 4 Aug 2 08:55 DSC_0018.JPG -> ???? + lrwxrwxrwx 1 4 Aug 2 08:55 DSC_0019.JPG -> ???? + lrwxrwxrwx 1 4 Aug 2 08:55 DSC_0020.JPG -> ???? + +5.) "git annex fsck" on the server is still successful, returning no errors! + +6.) Manually executing "git annex sync --content" on both sides does not change anything and does not output any error messages. + +Oh no, so desparate :-( Any ideas? + +(I am happy to share all log files privately but I do not want to publish them here because they contain sensitive data) + From 7d015c8eef404ccae30269ad8911dc09da9aba56 Mon Sep 17 00:00:00 2001 From: divB Date: Sat, 2 Aug 2014 20:12:42 +0000 Subject: [PATCH 06/56] --- ...4___attempt_to_repair_hangs__44___broken_symlinks.mdwn | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/doc/forum/Pusher_crashed__44___attempt_to_repair_hangs__44___broken_symlinks.mdwn b/doc/forum/Pusher_crashed__44___attempt_to_repair_hangs__44___broken_symlinks.mdwn index 8688820cf9..56666a49a9 100644 --- a/doc/forum/Pusher_crashed__44___attempt_to_repair_hangs__44___broken_symlinks.mdwn +++ b/doc/forum/Pusher_crashed__44___attempt_to_repair_hangs__44___broken_symlinks.mdwn @@ -19,7 +19,9 @@ Here the same: whereis Fotos/2014/DSC_0303.JPG (0 copies) failed git-annex: whereis: 1 failed -4.) On the server, files that should ALWAYS be on the server (configured as "full backup") suddenly wiped data that was also made available on the client. The symlinks are dangling symlinks and contain just binary data: +4.) When I do "git annex status" a whole bunch of files are displayed with "M" (modified) although they are not, they are not even checked out and should be only at the server ... + +5.) On the server, files that should ALWAYS be on the server (configured as "full backup") suddenly wiped data that was also made available on the client. The symlinks are dangling symlinks and contain just binary data: ls -l lrwxrwxrwx 1 4 Aug 2 08:55 DSC_0011.JPG -> ???? @@ -31,9 +33,9 @@ Here the same: lrwxrwxrwx 1 4 Aug 2 08:55 DSC_0019.JPG -> ???? lrwxrwxrwx 1 4 Aug 2 08:55 DSC_0020.JPG -> ???? -5.) "git annex fsck" on the server is still successful, returning no errors! +6.) "git annex fsck" on the server is still successful, returning no errors! -6.) Manually executing "git annex sync --content" on both sides does not change anything and does not output any error messages. +7.) Manually executing "git annex sync --content" on both sides does not change anything and does not output any error messages. Oh no, so desparate :-( Any ideas? From 84b5d1241c77fdf6d96d299e7daf41668719f6e3 Mon Sep 17 00:00:00 2001 From: divB Date: Sat, 2 Aug 2014 20:17:05 +0000 Subject: [PATCH 07/56] --- ...t_to_repair_hangs__44___broken_symlinks.mdwn | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/doc/forum/Pusher_crashed__44___attempt_to_repair_hangs__44___broken_symlinks.mdwn b/doc/forum/Pusher_crashed__44___attempt_to_repair_hangs__44___broken_symlinks.mdwn index 56666a49a9..f25041165d 100644 --- a/doc/forum/Pusher_crashed__44___attempt_to_repair_hangs__44___broken_symlinks.mdwn +++ b/doc/forum/Pusher_crashed__44___attempt_to_repair_hangs__44___broken_symlinks.mdwn @@ -37,6 +37,23 @@ Here the same: 7.) Manually executing "git annex sync --content" on both sides does not change anything and does not output any error messages. +8.) On the client: + + $ git annex group here + error: invalid object 100644 3b3767ae65e5c6d2e3835af3d55fbf2f9e145c8b for '000/0e6/SHA256Es193806--b6d4689fba8e15acd6497f9a7e584c93ea0c8c2199ad32eadac79d59b9f49814.JPG.log' + fatal: git-write-tree: error building trees + manual + (Recording state in git...) + git-annex: failed to read sha from git write-tree + + $ git annex wanted here + error: invalid object 100644 3b3767ae65e5c6d2e3835af3d55fbf2f9e145c8b for '000/0e6/SHA256Es193806--b6d4689fba8e15acd6497f9a7e584c93ea0c8c2199ad32eadac79d59b9f49814.JPG.log' + fatal: git-write-tree: error building trees + exclude="*" and present + git-annex: failed to read sha from git write-tree + + + Oh no, so desparate :-( Any ideas? (I am happy to share all log files privately but I do not want to publish them here because they contain sensitive data) From a8b0311d8af4301fa24c212214125d820efb92c2 Mon Sep 17 00:00:00 2001 From: divB Date: Sat, 2 Aug 2014 20:23:10 +0000 Subject: [PATCH 08/56] --- ...attempt_to_repair_hangs__44___broken_symlinks.mdwn | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/doc/forum/Pusher_crashed__44___attempt_to_repair_hangs__44___broken_symlinks.mdwn b/doc/forum/Pusher_crashed__44___attempt_to_repair_hangs__44___broken_symlinks.mdwn index f25041165d..91fbfbc7bb 100644 --- a/doc/forum/Pusher_crashed__44___attempt_to_repair_hangs__44___broken_symlinks.mdwn +++ b/doc/forum/Pusher_crashed__44___attempt_to_repair_hangs__44___broken_symlinks.mdwn @@ -52,6 +52,17 @@ Here the same: exclude="*" and present git-annex: failed to read sha from git write-tree +9.) Ok I don't know what happened I did nothing special but it seems that the repository is broken :( :( + + git annex repair + Running git fsck ... + git-annex: DeleteFile "C:\\Data\\annex\\.git\\objects\\2a\\54bb281c80c91ea7a732c0d48db0c5acc0ca2c": permission denied (Access is denied.) + failed + git-annex: repair: 1 failed + +But that's not true, I can read and delete that file manually + + Oh no, so desparate :-( Any ideas? From 9973dc85e620788a9372de89bd41d5385ac6b5f6 Mon Sep 17 00:00:00 2001 From: divB Date: Sat, 2 Aug 2014 20:24:15 +0000 Subject: [PATCH 09/56] --- ...shed__44___attempt_to_repair_hangs__44___broken_symlinks.mdwn | 1 + 1 file changed, 1 insertion(+) diff --git a/doc/forum/Pusher_crashed__44___attempt_to_repair_hangs__44___broken_symlinks.mdwn b/doc/forum/Pusher_crashed__44___attempt_to_repair_hangs__44___broken_symlinks.mdwn index 91fbfbc7bb..f83a85ff43 100644 --- a/doc/forum/Pusher_crashed__44___attempt_to_repair_hangs__44___broken_symlinks.mdwn +++ b/doc/forum/Pusher_crashed__44___attempt_to_repair_hangs__44___broken_symlinks.mdwn @@ -9,6 +9,7 @@ Suddenly weird things started to happen 2.) On Windows, when I get "Pusher crashed: failed to read sha from git write-tree [Restart Thread]". When I click "Restart Thread" nothing happens but the message from (1) persists. 3.) When I run "git annex fsck" on the client I get thousands of messages like + fsck Fotos/2014/DSC_0303.JPG ** No known copies exist of Fotos/2014/DSC_0303.JPG failed From 668e67719a1f4ada1016cd378cf123c438611828 Mon Sep 17 00:00:00 2001 From: divB Date: Sat, 2 Aug 2014 20:29:06 +0000 Subject: [PATCH 10/56] --- ...4___attempt_to_repair_hangs__44___broken_symlinks.mdwn | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/doc/forum/Pusher_crashed__44___attempt_to_repair_hangs__44___broken_symlinks.mdwn b/doc/forum/Pusher_crashed__44___attempt_to_repair_hangs__44___broken_symlinks.mdwn index f83a85ff43..10fd5d6aaf 100644 --- a/doc/forum/Pusher_crashed__44___attempt_to_repair_hangs__44___broken_symlinks.mdwn +++ b/doc/forum/Pusher_crashed__44___attempt_to_repair_hangs__44___broken_symlinks.mdwn @@ -55,13 +55,17 @@ Here the same: 9.) Ok I don't know what happened I did nothing special but it seems that the repository is broken :( :( - git annex repair + $ git annex --verbose --debug repair + [...] + [2014-08-02 13:27:38 Pacific Daylight Time] read: git ["--git-dir=C:\\Data\\annex\\.git","--work-tree=C:\\Data\\annex","-c","core.bare=false","show","ef3fe549f457783dbbd877b467b4e54b0ebc813c"] Running git fsck ... + git-annex: DeleteFile "C:\\Data\\annex\\.git\\objects\\2a\\54bb281c80c91ea7a732c0d48db0c5acc0ca2c": permission denied (Access is denied.) failed git-annex: repair: 1 failed -But that's not true, I can read and delete that file manually +But this file exists, I can read, write and delete to this file manually, there is definitely no permission denied ... + From 8fbdcd7d9f030e821b1981056947ee72e6b9c95d Mon Sep 17 00:00:00 2001 From: divB Date: Sat, 2 Aug 2014 22:57:48 +0000 Subject: [PATCH 11/56] --- ...44___attempt_to_repair_hangs__44___broken_symlinks.mdwn | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/doc/forum/Pusher_crashed__44___attempt_to_repair_hangs__44___broken_symlinks.mdwn b/doc/forum/Pusher_crashed__44___attempt_to_repair_hangs__44___broken_symlinks.mdwn index 10fd5d6aaf..4c1bcc129b 100644 --- a/doc/forum/Pusher_crashed__44___attempt_to_repair_hangs__44___broken_symlinks.mdwn +++ b/doc/forum/Pusher_crashed__44___attempt_to_repair_hangs__44___broken_symlinks.mdwn @@ -1,6 +1,6 @@ Sorry that I put all this in the same thread but I don't know what happened and how it is related. -I have just a simple setup: git-annex client with assistant (Windows 7) and on a server (Debian). +I have just a simple setup: git-annex client with assistant (Windows 7) and on a server (Debian, no assistant). Suddenly weird things started to happen @@ -68,9 +68,10 @@ But this file exists, I can read, write and delete to this file manually, there - - Oh no, so desparate :-( Any ideas? +As it seems the client repository is broken but how can it be then that also files on the server repository get deleted which shouldn't be deleted? +And how can it be that there are not only broken symlinks but symlinks that have just binary garbage as target and "fsck" returns success? + (I am happy to share all log files privately but I do not want to publish them here because they contain sensitive data) From 3f8605870e8d46c81595d6fb3fb40208fbe45caa Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sat, 2 Aug 2014 19:05:16 -0400 Subject: [PATCH 12/56] devblog --- doc/devblog/day_209__mass_conversion.mdwn | 28 +++++++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 doc/devblog/day_209__mass_conversion.mdwn diff --git a/doc/devblog/day_209__mass_conversion.mdwn b/doc/devblog/day_209__mass_conversion.mdwn new file mode 100644 index 0000000000..6278b191a4 --- /dev/null +++ b/doc/devblog/day_209__mass_conversion.mdwn @@ -0,0 +1,28 @@ +Have started converting lots of special remotes to the new API. Today, S3 +and hook got chunking support. I also converted several remotes to the new +API without supporting chunking: bup, ddar, and glacier (which should +support chunking, but there were complications). + +This removed 110 lines of code while adding features! And, +I seem to be able to convert them faster than `testremote` can test them. :) + +Now that S3 supports chunks, they can be used to work around several +problems with S3 remotes, including file size limits, and a memory leak in +the underlying S3 library. + +The S3 conversion included caching of the S3 connection when +storing/retrieving chunks. But the API doesn't yet support caching +when removing or checking if chunks are present. I should probably expand +the API, but got into some type checker messes when using generic enough +data types to support everything. Should probably switch to `ResourceT`. + +Also, I tried, but failed to make `testremote` check that storing a key +is done atomically. The best I could come up with was a test that stored a +key and had another thread repeatedly check if the object was present on +the remote, logging the results and timestamps. It then becomes a +statistical problem -- somewhere toward the end of the log it's ok if the key +has become present -- but too early might indicate that it wasn't stored +atomically. Perhaps it's my poor knowledge of statistics, but I could not +find a way to analize the log that reliably detected non-atomic storage. +If someone would like to try to work on this, see the `atomic-store-test` +branch. From 2ca8ddca0f893ebe53e82c7a66d1fe328da584f2 Mon Sep 17 00:00:00 2001 From: "http://joeyh.name/" Date: Sat, 2 Aug 2014 23:08:44 +0000 Subject: [PATCH 13/56] Added a comment --- ...mment_3_5e9cecb0e2ec7602963406779b6e3c1f._comment | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 doc/bugs/S3_memory_leaks/comment_3_5e9cecb0e2ec7602963406779b6e3c1f._comment diff --git a/doc/bugs/S3_memory_leaks/comment_3_5e9cecb0e2ec7602963406779b6e3c1f._comment b/doc/bugs/S3_memory_leaks/comment_3_5e9cecb0e2ec7602963406779b6e3c1f._comment new file mode 100644 index 0000000000..a7bb0265fe --- /dev/null +++ b/doc/bugs/S3_memory_leaks/comment_3_5e9cecb0e2ec7602963406779b6e3c1f._comment @@ -0,0 +1,12 @@ +[[!comment format=mdwn + username="http://joeyh.name/" + ip="209.250.56.112" + subject="comment 3" + date="2014-08-02T23:08:44Z" + content=""" +hS3's author seems to have abandoned it and it has other problems. I should try to switch to a different S3 library. + +There is now a workaround; S3 special remotes can be configured to use [[chunking]]. A max of one chunk will then be buffered in memory at a time. + +For example, to reconfigure an existing mys3 remote: `enableremote mys3 chunk=1MiB` +"""]] From 51f774f8c8082221ed1548eba76382ce6444a841 Mon Sep 17 00:00:00 2001 From: "http://joeyh.name/" Date: Sat, 2 Aug 2014 23:13:41 +0000 Subject: [PATCH 14/56] Added a comment --- ...ent_3_d878b87a05f4fcd380e6ff309b615aab._comment | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100644 doc/bugs/S3_upload_not_using_multipart/comment_3_d878b87a05f4fcd380e6ff309b615aab._comment diff --git a/doc/bugs/S3_upload_not_using_multipart/comment_3_d878b87a05f4fcd380e6ff309b615aab._comment b/doc/bugs/S3_upload_not_using_multipart/comment_3_d878b87a05f4fcd380e6ff309b615aab._comment new file mode 100644 index 0000000000..46245e657c --- /dev/null +++ b/doc/bugs/S3_upload_not_using_multipart/comment_3_d878b87a05f4fcd380e6ff309b615aab._comment @@ -0,0 +1,14 @@ +[[!comment format=mdwn + username="http://joeyh.name/" + ip="209.250.56.112" + subject="comment 3" + date="2014-08-02T23:13:41Z" + content=""" +There is now a workaround; S3 special remotes can be configured to use [[chunking]]. + +For example, to reconfigure an existing mys3 remote: `enableremote mys3 chunk=1MiB` + +I'm leaving this bug open because chunking is not the default (although the assistant does enable it by default), and because this chunking operates at a higher, and less efficient level than S3's own multipart upload API. In particular, AWS will charge a fee for each http request made for a chunk. + +Adding proper multipart support will probably require switching to a different S3 library. +"""]] From 42655caacc1ebec030e0db36332ce0f12f95050c Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sat, 2 Aug 2014 19:16:35 -0400 Subject: [PATCH 15/56] correction --- doc/devblog/day_209__mass_conversion.mdwn | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/doc/devblog/day_209__mass_conversion.mdwn b/doc/devblog/day_209__mass_conversion.mdwn index 6278b191a4..23de65b2ac 100644 --- a/doc/devblog/day_209__mass_conversion.mdwn +++ b/doc/devblog/day_209__mass_conversion.mdwn @@ -11,7 +11,11 @@ problems with S3 remotes, including file size limits, and a memory leak in the underlying S3 library. The S3 conversion included caching of the S3 connection when -storing/retrieving chunks. But the API doesn't yet support caching +storing/retrieving chunks. [Update: Actually, it turns out it didn't; +the hS3 library doesn't support persistent connections. Another reason I +need to switch to a better S3 library!] + +But the API doesn't yet support caching when removing or checking if chunks are present. I should probably expand the API, but got into some type checker messes when using generic enough data types to support everything. Should probably switch to `ResourceT`. From 116f70555e35df440bea6c91460ef8bfcfa9c63f Mon Sep 17 00:00:00 2001 From: "https://www.google.com/accounts/o8/id?id=AItOawmkuFJVGp6WVvJtIV5JYb8IqN8mRvSGQdI" Date: Sun, 3 Aug 2014 01:18:54 +0000 Subject: [PATCH 16/56] Added a comment: Would you accept a patch? --- ...ment_5_a4ab4173620b72ac0a24d575fa9c810c._comment | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 doc/forum/Using_git-annex_as_a_library/comment_5_a4ab4173620b72ac0a24d575fa9c810c._comment diff --git a/doc/forum/Using_git-annex_as_a_library/comment_5_a4ab4173620b72ac0a24d575fa9c810c._comment b/doc/forum/Using_git-annex_as_a_library/comment_5_a4ab4173620b72ac0a24d575fa9c810c._comment new file mode 100644 index 0000000000..70f2ad9353 --- /dev/null +++ b/doc/forum/Using_git-annex_as_a_library/comment_5_a4ab4173620b72ac0a24d575fa9c810c._comment @@ -0,0 +1,13 @@ +[[!comment format=mdwn + username="https://www.google.com/accounts/o8/id?id=AItOawmkuFJVGp6WVvJtIV5JYb8IqN8mRvSGQdI" + nickname="Emilio Jesús" + subject="Would you accept a patch?" + date="2014-08-03T01:18:54Z" + content=""" +Dear Joey, + +I am also interested in using git-annex as a Haskell library, would you accept a patch to the .cabal file then? + +Thanks, +Emilio +"""]] From ef0404f5d4a5d1ca9e678d9b8cdc16fbfea4f74b Mon Sep 17 00:00:00 2001 From: zardoz Date: Sun, 3 Aug 2014 15:25:47 +0000 Subject: [PATCH 17/56] --- ...ate_entries_in_location_tracking_logs.mdwn | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 doc/forum/Duplicate_entries_in_location_tracking_logs.mdwn diff --git a/doc/forum/Duplicate_entries_in_location_tracking_logs.mdwn b/doc/forum/Duplicate_entries_in_location_tracking_logs.mdwn new file mode 100644 index 0000000000..ba7526ed2e --- /dev/null +++ b/doc/forum/Duplicate_entries_in_location_tracking_logs.mdwn @@ -0,0 +1,21 @@ +I’ve noticed something odd when inspecting the history of the +git-annex branch today. Apparently, the branch had some merge +conflicts during sync that involved two alternative location tracking +entries that both were for one and the same remote. Both entries only +differed in their timestamps, and the union merge kept both, so that I +now have .log files in the annex branch that contain duplicate parts +like this. + +1404838274.151066s 1 a2401cfd-1f58-4441-a2b3-d9bef06220ad +1406978406.24838s 1 a2401cfd-1f58-4441-a2b3-d9bef06220ad + +The UUID here is my local repository. + +The duplication also occurred in the uuid.log: + +4316c3dc-5b6d-46eb-b780-948c717b7be5 server timestamp=1404839228.113473s +4316c3dc-5b6d-46eb-b780-948c717b7be5 server timestamp=1404847241.863051s + +Is this something to be concerned about? The situation somehow arose +in relation to unannexing a bunch of files and rebasing the master +branch. From 699a0a3bf8051549ebb4b0962a3ea402ea5e9020 Mon Sep 17 00:00:00 2001 From: "http://joeyh.name/" Date: Sun, 3 Aug 2014 18:22:58 +0000 Subject: [PATCH 18/56] Added a comment --- .../comment_4_09a3372fd13734cbb05e79d0ba76d052._comment | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 doc/bugs/S3_upload_not_using_multipart/comment_4_09a3372fd13734cbb05e79d0ba76d052._comment diff --git a/doc/bugs/S3_upload_not_using_multipart/comment_4_09a3372fd13734cbb05e79d0ba76d052._comment b/doc/bugs/S3_upload_not_using_multipart/comment_4_09a3372fd13734cbb05e79d0ba76d052._comment new file mode 100644 index 0000000000..d366281637 --- /dev/null +++ b/doc/bugs/S3_upload_not_using_multipart/comment_4_09a3372fd13734cbb05e79d0ba76d052._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joeyh.name/" + ip="209.250.56.112" + subject="comment 4" + date="2014-08-03T18:22:58Z" + content=""" +The aws library does not support multipart yet either; here's the bug report requesting it: +"""]] From ac166a898ece3ab2aabcbcf826a9dcff07d2d6e6 Mon Sep 17 00:00:00 2001 From: "http://joeyh.name/" Date: Sun, 3 Aug 2014 18:27:32 +0000 Subject: [PATCH 19/56] Added a comment --- .../comment_5_5add65b5b284f79ec09ee4d0326e7132._comment | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 doc/bugs/S3_upload_not_using_multipart/comment_5_5add65b5b284f79ec09ee4d0326e7132._comment diff --git a/doc/bugs/S3_upload_not_using_multipart/comment_5_5add65b5b284f79ec09ee4d0326e7132._comment b/doc/bugs/S3_upload_not_using_multipart/comment_5_5add65b5b284f79ec09ee4d0326e7132._comment new file mode 100644 index 0000000000..0c77423649 --- /dev/null +++ b/doc/bugs/S3_upload_not_using_multipart/comment_5_5add65b5b284f79ec09ee4d0326e7132._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joeyh.name/" + ip="209.250.56.112" + subject="comment 5" + date="2014-08-03T18:27:32Z" + content=""" +However, I don't think that multipart upload actually allows exceeding the S3 limit of 5 GB per object. Configuring the remote with `chunk=100MiB` *does* allow bypassing whatever S3's maximum object size happens to be. +"""]] From 63c00daa080230aab25c34e91fed89e1d25d677a Mon Sep 17 00:00:00 2001 From: "http://joeyh.name/" Date: Sun, 3 Aug 2014 18:40:26 +0000 Subject: [PATCH 20/56] Added a comment --- .../comment_4_37e41b518813bd7c349017abf4a0ca0f._comment | 8 ++++++++ 1 file changed, 8 insertions(+) create mode 100644 doc/bugs/S3_memory_leaks/comment_4_37e41b518813bd7c349017abf4a0ca0f._comment diff --git a/doc/bugs/S3_memory_leaks/comment_4_37e41b518813bd7c349017abf4a0ca0f._comment b/doc/bugs/S3_memory_leaks/comment_4_37e41b518813bd7c349017abf4a0ca0f._comment new file mode 100644 index 0000000000..464588a793 --- /dev/null +++ b/doc/bugs/S3_memory_leaks/comment_4_37e41b518813bd7c349017abf4a0ca0f._comment @@ -0,0 +1,8 @@ +[[!comment format=mdwn + username="http://joeyh.name/" + ip="209.250.56.112" + subject="comment 4" + date="2014-08-03T18:40:26Z" + content=""" +Beginning work on a `s3-aws` branch using the aws library instead of hS3. +"""]] From c648548e1fa9dd2f972accee5db5d2353c088384 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 3 Aug 2014 14:56:40 -0400 Subject: [PATCH 21/56] formatting --- doc/forum/Duplicate_entries_in_location_tracking_logs.mdwn | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/doc/forum/Duplicate_entries_in_location_tracking_logs.mdwn b/doc/forum/Duplicate_entries_in_location_tracking_logs.mdwn index ba7526ed2e..f5f1354440 100644 --- a/doc/forum/Duplicate_entries_in_location_tracking_logs.mdwn +++ b/doc/forum/Duplicate_entries_in_location_tracking_logs.mdwn @@ -6,15 +6,19 @@ differed in their timestamps, and the union merge kept both, so that I now have .log files in the annex branch that contain duplicate parts like this. +
 1404838274.151066s 1 a2401cfd-1f58-4441-a2b3-d9bef06220ad
 1406978406.24838s 1 a2401cfd-1f58-4441-a2b3-d9bef06220ad
+
The UUID here is my local repository. The duplication also occurred in the uuid.log: +
 4316c3dc-5b6d-46eb-b780-948c717b7be5 server timestamp=1404839228.113473s
 4316c3dc-5b6d-46eb-b780-948c717b7be5 server timestamp=1404847241.863051s
+
Is this something to be concerned about? The situation somehow arose in relation to unannexing a bunch of files and rebasing the master From e1e5853c94b7c812c5b1b9d142cc14e5d6579743 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 3 Aug 2014 15:07:43 -0400 Subject: [PATCH 22/56] rsync: support chunking Chunking does not speed up rsync at all, so it's only useful for interop with the directory special remote. --- debian/changelog | 5 +++-- doc/special_remotes/rsync.mdwn | 10 +++++++--- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/debian/changelog b/debian/changelog index ca6059f43e..2e00728ad9 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,8 +1,8 @@ git-annex (5.20140718) UNRELEASED; urgency=medium * New chunk= option to chunk files stored in special remotes. - Currently supported by: directory, S3, and all external and hook - special remotes. + Currently supported by: directory, S3, rsync, and all external + and hook special remotes. * Partially transferred files are automatically resumed when using chunked remotes! * The old chunksize= option is deprecated. Do not use for new remotes. @@ -21,6 +21,7 @@ git-annex (5.20140718) UNRELEASED; urgency=medium * Dropping an object from a bup special remote now deletes the git branch for the object, although of course the object's content cannot be deleted due to the nature of bup. + * -- Joey Hess Mon, 21 Jul 2014 14:41:26 -0400 diff --git a/doc/special_remotes/rsync.mdwn b/doc/special_remotes/rsync.mdwn index b2a9d23f5d..eb218b181a 100644 --- a/doc/special_remotes/rsync.mdwn +++ b/doc/special_remotes/rsync.mdwn @@ -14,14 +14,14 @@ Or for using rsync over SSH These parameters can be passed to `git annex initremote` to configure rsync: +* `rsyncurl` - Required. This is the url or `hostname:/directory` to + pass to rsync to tell it where to store content. + * `encryption` - One of "none", "hybrid", "shared", or "pubkey". See [[encryption]]. * `keyid` - Specifies the gpg key to use for [[encryption]]. -* `rsyncurl` - Required. This is the url or `hostname:/directory` to - pass to rsync to tell it where to store content. - * `shellescape` - Optional. Set to "no" to avoid shell escaping normally done when using rsync over ssh. That escaping is needed with typical setups, but not with some hosting providers that do not expose rsynced @@ -30,6 +30,10 @@ These parameters can be passed to `git annex initremote` to configure rsync: quote (`'`) character. If that happens, you can run enableremote setting shellescape=no. +* `chunk` - Enables [[chunking]] when storing large files. + This is typically not a win for rsync, so no need to enable it. + But, it makes this interoperate with the [[directory]] special remote. + The `annex-rsync-options` git configuration setting can be used to pass parameters to rsync. From 4b16989e98a3b6fdeb418fd87fba9ac1e65e17cb Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 3 Aug 2014 15:35:23 -0400 Subject: [PATCH 23/56] roll ChunkedEncryptable into Special and improve interface Allow disabling progress displays, for eg, rsync. --- Command/TestRemote.hs | 2 +- Remote/Bup.hs | 7 +- Remote/Ddar.hs | 7 +- Remote/Directory.hs | 5 +- Remote/Directory/LegacyChunked.hs | 2 +- Remote/External.hs | 3 +- Remote/Glacier.hs | 7 +- Remote/Helper/Chunked.hs | 6 +- Remote/Helper/ChunkedEncryptable.hs | 212 -------------------------- Remote/Helper/Special.hs | 223 +++++++++++++++++++++++++++- Remote/Hook.hs | 3 +- Remote/S3.hs | 3 +- Remote/WebDAV.hs | 5 +- 13 files changed, 245 insertions(+), 240 deletions(-) delete mode 100644 Remote/Helper/ChunkedEncryptable.hs diff --git a/Command/TestRemote.hs b/Command/TestRemote.hs index 29a2e809cd..3f75214a56 100644 --- a/Command/TestRemote.hs +++ b/Command/TestRemote.hs @@ -76,7 +76,7 @@ perform rs ks = do where desc r' k = intercalate "; " $ map unwords [ [ "key size", show (keySize k) ] - , [ show (chunkConfig (Remote.config r')) ] + , [ show (getChunkConfig (Remote.config r')) ] , ["encryption", fromMaybe "none" (M.lookup "encryption" (Remote.config r'))] ] diff --git a/Remote/Bup.hs b/Remote/Bup.hs index 44ea8c7d83..6a04ad5f7f 100644 --- a/Remote/Bup.hs +++ b/Remote/Bup.hs @@ -25,7 +25,6 @@ import Config import Config.Cost import qualified Remote.Helper.Ssh as Ssh import Remote.Helper.Special -import Remote.Helper.ChunkedEncryptable import Remote.Helper.Messages import Utility.Hash import Utility.UserInfo @@ -74,12 +73,16 @@ gen r u c gc = do , availability = if bupLocal buprepo then LocallyAvailable else GloballyAvailable , readonly = False } - return $ Just $ encryptableRemote c + return $ Just $ specialRemote' specialcfg c (simplyPrepare $ store this buprepo) (simplyPrepare $ retrieve buprepo) this where buprepo = fromMaybe (error "missing buprepo") $ remoteAnnexBupRepo gc + specialcfg = (specialRemoteCfg c) + -- chunking would not improve bup + { chunkConfig = NoChunks + } bupSetup :: Maybe UUID -> Maybe CredPair -> RemoteConfig -> Annex (RemoteConfig, UUID) bupSetup mu _ c = do diff --git a/Remote/Ddar.hs b/Remote/Ddar.hs index bc4755a81c..b4c7ac1e62 100644 --- a/Remote/Ddar.hs +++ b/Remote/Ddar.hs @@ -22,7 +22,6 @@ import qualified Git import Config import Config.Cost import Remote.Helper.Special -import Remote.Helper.ChunkedEncryptable import Annex.Ssh import Annex.UUID @@ -42,7 +41,7 @@ gen r u c gc = do if ddarLocal ddarrepo then nearlyCheapRemoteCost else expensiveRemoteCost - return $ Just $ encryptableRemote c + return $ Just $ specialRemote' specialcfg c (simplyPrepare $ store ddarrepo) (simplyPrepare $ retrieve ddarrepo) (this cst) @@ -71,6 +70,10 @@ gen r u c gc = do , readonly = False } ddarrepo = fromMaybe (error "missing ddarrepo") $ remoteAnnexDdarRepo gc + specialcfg = (specialRemoteCfg c) + -- chunking would not improve ddar + { chunkConfig = NoChunks + } ddarSetup :: Maybe UUID -> Maybe CredPair -> RemoteConfig -> Annex (RemoteConfig, UUID) ddarSetup mu _ c = do diff --git a/Remote/Directory.hs b/Remote/Directory.hs index 78d30b1a16..db141e01ad 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -20,7 +20,6 @@ import Config.Cost import Config import Utility.FileMode import Remote.Helper.Special -import Remote.Helper.ChunkedEncryptable import qualified Remote.Directory.LegacyChunked as Legacy import Annex.Content import Annex.UUID @@ -37,8 +36,8 @@ remote = RemoteType { gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) gen r u c gc = do cst <- remoteCost gc cheapRemoteCost - let chunkconfig = chunkConfig c - return $ Just $ chunkedEncryptableRemote c + let chunkconfig = getChunkConfig c + return $ Just $ specialRemote c (prepareStore dir chunkconfig) (retrieve dir chunkconfig) Remote { diff --git a/Remote/Directory/LegacyChunked.hs b/Remote/Directory/LegacyChunked.hs index a198688024..1be885db21 100644 --- a/Remote/Directory/LegacyChunked.hs +++ b/Remote/Directory/LegacyChunked.hs @@ -14,7 +14,7 @@ import qualified Data.ByteString as S import Common.Annex import Utility.FileMode -import Remote.Helper.ChunkedEncryptable +import Remote.Helper.Special import qualified Remote.Helper.Chunked.Legacy as Legacy import Annex.Perms import Utility.Metered diff --git a/Remote/External.hs b/Remote/External.hs index 1c22a589bf..c00093402f 100644 --- a/Remote/External.hs +++ b/Remote/External.hs @@ -15,7 +15,6 @@ import Types.CleanupActions import qualified Git import Config import Remote.Helper.Special -import Remote.Helper.ChunkedEncryptable import Utility.Metered import Logs.Transfer import Logs.PreferredContent.Raw @@ -43,7 +42,7 @@ gen r u c gc = do Annex.addCleanup (RemoteCleanup u) $ stopExternal external cst <- getCost external r gc avail <- getAvailability external r gc - return $ Just $ chunkedEncryptableRemote c + return $ Just $ specialRemote c (simplyPrepare $ store external) (simplyPrepare $ retrieve external) Remote { diff --git a/Remote/Glacier.hs b/Remote/Glacier.hs index 592a7db1f0..c5bfefa641 100644 --- a/Remote/Glacier.hs +++ b/Remote/Glacier.hs @@ -18,7 +18,6 @@ import qualified Git import Config import Config.Cost import Remote.Helper.Special -import Remote.Helper.ChunkedEncryptable import qualified Remote.Helper.AWS as AWS import Creds import Utility.Metered @@ -40,7 +39,7 @@ remote = RemoteType { gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) gen r u c gc = new <$> remoteCost gc veryExpensiveRemoteCost where - new cst = Just $ encryptableRemote c + new cst = Just $ specialRemote' specialcfg c (prepareStore this) (prepareRetrieve this) this @@ -66,6 +65,10 @@ gen r u c gc = new <$> remoteCost gc veryExpensiveRemoteCost availability = GloballyAvailable, remotetype = remote } + specialcfg = (specialRemoteCfg c) + -- Disabled until jobList gets support for chunks. + { chunkConfig = NoChunks + } glacierSetup :: Maybe UUID -> Maybe CredPair -> RemoteConfig -> Annex (RemoteConfig, UUID) glacierSetup mu mcreds c = do diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs index a7c43801aa..2e9467b2a0 100644 --- a/Remote/Helper/Chunked.hs +++ b/Remote/Helper/Chunked.hs @@ -8,7 +8,7 @@ module Remote.Helper.Chunked ( ChunkSize, ChunkConfig(..), - chunkConfig, + getChunkConfig, storeChunks, removeChunks, retrieveChunks, @@ -39,8 +39,8 @@ noChunks :: ChunkConfig -> Bool noChunks NoChunks = True noChunks _ = False -chunkConfig :: RemoteConfig -> ChunkConfig -chunkConfig m = +getChunkConfig :: RemoteConfig -> ChunkConfig +getChunkConfig m = case M.lookup "chunksize" m of Nothing -> case M.lookup "chunk" m of Nothing -> NoChunks diff --git a/Remote/Helper/ChunkedEncryptable.hs b/Remote/Helper/ChunkedEncryptable.hs deleted file mode 100644 index 9c6ba98a2b..0000000000 --- a/Remote/Helper/ChunkedEncryptable.hs +++ /dev/null @@ -1,212 +0,0 @@ -{- Remotes that support both chunking and encryption. - - - - Copyright 2014 Joey Hess - - - - Licensed under the GNU GPL version 3 or higher. - -} - -{-# LANGUAGE RankNTypes #-} - -module Remote.Helper.ChunkedEncryptable ( - Preparer, - Storer, - Retriever, - simplyPrepare, - ContentSource, - checkPrepare, - resourcePrepare, - fileStorer, - byteStorer, - fileRetriever, - byteRetriever, - storeKeyDummy, - retreiveKeyFileDummy, - chunkedEncryptableRemote, - encryptableRemote, - module X -) where - -import Common.Annex -import Types.StoreRetrieve -import Types.Remote -import Crypto -import Config.Cost -import Utility.Metered -import Remote.Helper.Chunked as X -import Remote.Helper.Encryptable as X hiding (encryptableRemote) -import Annex.Content -import Annex.Exception - -import qualified Data.ByteString.Lazy as L -import Control.Exception (bracket) - --- Use when nothing needs to be done to prepare a helper. -simplyPrepare :: helper -> Preparer helper -simplyPrepare helper _ a = a $ Just helper - --- Use to run a check when preparing a helper. -checkPrepare :: (Key -> Annex Bool) -> helper -> Preparer helper -checkPrepare checker helper k a = ifM (checker k) - ( a (Just helper) - , a Nothing - ) - --- Use to acquire a resource when preparing a helper. -resourcePrepare :: (Key -> (r -> Annex Bool) -> Annex Bool) -> (r -> helper) -> Preparer helper -resourcePrepare withr helper k a = withr k $ \r -> - a (Just (helper r)) - --- A Storer that expects to be provided with a file containing --- the content of the key to store. -fileStorer :: (Key -> FilePath -> MeterUpdate -> Annex Bool) -> Storer -fileStorer a k (FileContent f) m = a k f m -fileStorer a k (ByteContent b) m = withTmp k $ \f -> do - liftIO $ L.writeFile f b - a k f m - --- A Storer that expects to be provided with a L.ByteString of --- the content to store. -byteStorer :: (Key -> L.ByteString -> MeterUpdate -> Annex Bool) -> Storer -byteStorer a k c m = withBytes c $ \b -> a k b m - --- A Retriever that writes the content of a Key to a provided file. --- It is responsible for updating the progress meter as it retrieves data. -fileRetriever :: (FilePath -> Key -> MeterUpdate -> Annex ()) -> Retriever -fileRetriever a k m callback = do - f <- prepTmp k - a f k m - callback (FileContent f) - --- A Retriever that generates a lazy ByteString containing the Key's --- content, and passes it to a callback action which will fully consume it --- before returning. -byteRetriever :: (Key -> (L.ByteString -> Annex Bool) -> Annex Bool) -> Retriever -byteRetriever a k _m callback = a k (callback . ByteContent) - -{- The base Remote that is provided to chunkedEncryptableRemote - - needs to have storeKey and retreiveKeyFile methods, but they are - - never actually used (since chunkedEncryptableRemote replaces - - them). Here are some dummy ones. - -} -storeKeyDummy :: Key -> AssociatedFile -> MeterUpdate -> Annex Bool -storeKeyDummy _ _ _ = return False -retreiveKeyFileDummy :: Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool -retreiveKeyFileDummy _ _ _ _ = return False - -type RemoteModifier = RemoteConfig -> Preparer Storer -> Preparer Retriever -> Remote -> Remote - --- Modifies a base Remote to support both chunking and encryption. -chunkedEncryptableRemote :: RemoteModifier -chunkedEncryptableRemote c = chunkedEncryptableRemote' (chunkConfig c) c - --- Modifies a base Remote to support encryption, but not chunking. -encryptableRemote :: RemoteModifier -encryptableRemote = chunkedEncryptableRemote' NoChunks - -chunkedEncryptableRemote' :: ChunkConfig -> RemoteModifier -chunkedEncryptableRemote' chunkconfig c preparestorer prepareretriever baser = encr - where - encr = baser - { storeKey = \k _f p -> cip >>= storeKeyGen k p - , retrieveKeyFile = \k _f d p -> cip >>= retrieveKeyFileGen k d p - , retrieveKeyFileCheap = \k d -> cip >>= maybe - (retrieveKeyFileCheap baser k d) - (\_ -> return False) - , removeKey = \k -> cip >>= removeKeyGen k - , hasKey = \k -> cip >>= hasKeyGen k - , cost = maybe - (cost baser) - (const $ cost baser + encryptedRemoteCostAdj) - (extractCipher c) - } - cip = cipherKey c - gpgopts = getGpgEncParams encr - - safely a = catchNonAsyncAnnex a (\e -> warning (show e) >> return False) - - -- chunk, then encrypt, then feed to the storer - storeKeyGen k p enc = - safely $ preparestorer k $ safely . go - where - go (Just storer) = sendAnnex k rollback $ \src -> - metered (Just p) k $ \p' -> - storeChunks (uuid baser) chunkconfig k src p' - (storechunk enc storer) - (hasKey baser) - go Nothing = return False - rollback = void $ removeKey encr k - - storechunk Nothing storer k content p = storer k content p - storechunk (Just (cipher, enck)) storer k content p = - withBytes content $ \b -> - encrypt gpgopts cipher (feedBytes b) $ - readBytes $ \encb -> - storer (enck k) (ByteContent encb) p - - -- call retriever to get chunks; decrypt them; stream to dest file - retrieveKeyFileGen k dest p enc = - safely $ prepareretriever k $ safely . go - where - go (Just retriever) = metered (Just p) k $ \p' -> - retrieveChunks retriever (uuid baser) chunkconfig - enck k dest p' (sink dest enc) - go Nothing = return False - enck = maybe id snd enc - - removeKeyGen k enc = removeChunks remover (uuid baser) chunkconfig enck k - where - enck = maybe id snd enc - remover = removeKey baser - - hasKeyGen k enc = hasKeyChunks checker (uuid baser) chunkconfig enck k - where - enck = maybe id snd enc - checker = hasKey baser - -{- Sink callback for retrieveChunks. Stores the file content into the - - provided Handle, decrypting it first if necessary. - - - - If the remote did not store the content using chunks, no Handle - - will be provided, and it's up to us to open the destination file. - - - - Note that when neither chunking nor encryption is used, and the remote - - provides FileContent, that file only needs to be renamed - - into place. (And it may even already be in the right place..) - -} -sink - :: FilePath - -> Maybe (Cipher, EncKey) - -> Maybe Handle - -> Maybe MeterUpdate - -> ContentSource - -> Annex Bool -sink dest enc mh mp content = do - case (enc, mh, content) of - (Nothing, Nothing, FileContent f) - | f == dest -> noop - | otherwise -> liftIO $ moveFile f dest - (Just (cipher, _), _, ByteContent b) -> - decrypt cipher (feedBytes b) $ - readBytes write - (Just (cipher, _), _, FileContent f) -> do - withBytes content $ \b -> - decrypt cipher (feedBytes b) $ - readBytes write - liftIO $ nukeFile f - (Nothing, _, FileContent f) -> do - withBytes content write - liftIO $ nukeFile f - (Nothing, _, ByteContent b) -> write b - return True - where - write b = case mh of - Just h -> liftIO $ b `streamto` h - Nothing -> liftIO $ bracket opendest hClose (b `streamto`) - streamto b h = case mp of - Just p -> meteredWrite p h b - Nothing -> L.hPut h b - opendest = openBinaryFile dest WriteMode - -withBytes :: ContentSource -> (L.ByteString -> Annex a) -> Annex a -withBytes (ByteContent b) a = a b -withBytes (FileContent f) a = a =<< liftIO (L.readFile f) diff --git a/Remote/Helper/Special.hs b/Remote/Helper/Special.hs index 7fc421f46f..2bcb7d530d 100644 --- a/Remote/Helper/Special.hs +++ b/Remote/Helper/Special.hs @@ -1,20 +1,51 @@ -{- common functions for special remotes +{- helpers for special remotes - - - Copyright 2011 Joey Hess + - Copyright 2011-2014 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} -module Remote.Helper.Special where - -import qualified Data.Map as M +module Remote.Helper.Special ( + findSpecialRemotes, + gitConfigSpecialRemote, + Preparer, + Storer, + Retriever, + simplyPrepare, + ContentSource, + checkPrepare, + resourcePrepare, + fileStorer, + byteStorer, + fileRetriever, + byteRetriever, + storeKeyDummy, + retreiveKeyFileDummy, + SpecialRemoteCfg(..), + specialRemoteCfg, + specialRemote, + specialRemote', + module X +) where import Common.Annex +import Types.StoreRetrieve import Types.Remote +import Crypto +import Config.Cost +import Utility.Metered +import Remote.Helper.Chunked as X +import Remote.Helper.Encryptable as X hiding (encryptableRemote) +import Annex.Content +import Annex.Exception import qualified Git import qualified Git.Command import qualified Git.Construct +import qualified Data.ByteString.Lazy as L +import Control.Exception (bracket) +import qualified Data.Map as M + {- Special remotes don't have a configured url, so Git.Repo does not - automatically generate remotes for them. This looks for a different - configuration key instead. @@ -38,3 +69,185 @@ gitConfigSpecialRemote u c k v = do [Param "config", Param (configsetting a), Param b] remotename = fromJust (M.lookup "name" c) configsetting s = "remote." ++ remotename ++ "." ++ s + +-- Use when nothing needs to be done to prepare a helper. +simplyPrepare :: helper -> Preparer helper +simplyPrepare helper _ a = a $ Just helper + +-- Use to run a check when preparing a helper. +checkPrepare :: (Key -> Annex Bool) -> helper -> Preparer helper +checkPrepare checker helper k a = ifM (checker k) + ( a (Just helper) + , a Nothing + ) + +-- Use to acquire a resource when preparing a helper. +resourcePrepare :: (Key -> (r -> Annex Bool) -> Annex Bool) -> (r -> helper) -> Preparer helper +resourcePrepare withr helper k a = withr k $ \r -> + a (Just (helper r)) + +-- A Storer that expects to be provided with a file containing +-- the content of the key to store. +fileStorer :: (Key -> FilePath -> MeterUpdate -> Annex Bool) -> Storer +fileStorer a k (FileContent f) m = a k f m +fileStorer a k (ByteContent b) m = withTmp k $ \f -> do + liftIO $ L.writeFile f b + a k f m + +-- A Storer that expects to be provided with a L.ByteString of +-- the content to store. +byteStorer :: (Key -> L.ByteString -> MeterUpdate -> Annex Bool) -> Storer +byteStorer a k c m = withBytes c $ \b -> a k b m + +-- A Retriever that writes the content of a Key to a provided file. +-- It is responsible for updating the progress meter as it retrieves data. +fileRetriever :: (FilePath -> Key -> MeterUpdate -> Annex ()) -> Retriever +fileRetriever a k m callback = do + f <- prepTmp k + a f k m + callback (FileContent f) + +-- A Retriever that generates a lazy ByteString containing the Key's +-- content, and passes it to a callback action which will fully consume it +-- before returning. +byteRetriever :: (Key -> (L.ByteString -> Annex Bool) -> Annex Bool) -> Retriever +byteRetriever a k _m callback = a k (callback . ByteContent) + +{- The base Remote that is provided to specialRemote needs to have + - storeKey and retreiveKeyFile methods, but they are never + - actually used (since specialRemote replaces them). + - Here are some dummy ones. + -} +storeKeyDummy :: Key -> AssociatedFile -> MeterUpdate -> Annex Bool +storeKeyDummy _ _ _ = return False +retreiveKeyFileDummy :: Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool +retreiveKeyFileDummy _ _ _ _ = return False + +type RemoteModifier = RemoteConfig -> Preparer Storer -> Preparer Retriever -> Remote -> Remote + +data SpecialRemoteCfg = SpecialRemoteCfg + { chunkConfig :: ChunkConfig + , displayProgress :: Bool + } + +specialRemoteCfg :: RemoteConfig -> SpecialRemoteCfg +specialRemoteCfg c = SpecialRemoteCfg (getChunkConfig c) True + +-- Modifies a base Remote to support both chunking and encryption, +-- which special remotes typically should support. +specialRemote :: RemoteModifier +specialRemote c = specialRemote' (specialRemoteCfg c) c + +specialRemote' :: SpecialRemoteCfg -> RemoteModifier +specialRemote' cfg c preparestorer prepareretriever baser = encr + where + encr = baser + { storeKey = \k _f p -> cip >>= storeKeyGen k p + , retrieveKeyFile = \k _f d p -> cip >>= retrieveKeyFileGen k d p + , retrieveKeyFileCheap = \k d -> cip >>= maybe + (retrieveKeyFileCheap baser k d) + (\_ -> return False) + , removeKey = \k -> cip >>= removeKeyGen k + , hasKey = \k -> cip >>= hasKeyGen k + , cost = maybe + (cost baser) + (const $ cost baser + encryptedRemoteCostAdj) + (extractCipher c) + } + cip = cipherKey c + gpgopts = getGpgEncParams encr + + safely a = catchNonAsyncAnnex a (\e -> warning (show e) >> return False) + + -- chunk, then encrypt, then feed to the storer + storeKeyGen k p enc = + safely $ preparestorer k $ safely . go + where + go (Just storer) = sendAnnex k rollback $ \src -> + displayprogress p k $ \p' -> + storeChunks (uuid baser) chunkconfig k src p' + (storechunk enc storer) + (hasKey baser) + go Nothing = return False + rollback = void $ removeKey encr k + + storechunk Nothing storer k content p = storer k content p + storechunk (Just (cipher, enck)) storer k content p = + withBytes content $ \b -> + encrypt gpgopts cipher (feedBytes b) $ + readBytes $ \encb -> + storer (enck k) (ByteContent encb) p + + -- call retriever to get chunks; decrypt them; stream to dest file + retrieveKeyFileGen k dest p enc = + safely $ prepareretriever k $ safely . go + where + go (Just retriever) = displayprogress p k $ \p' -> + retrieveChunks retriever (uuid baser) chunkconfig + enck k dest p' (sink dest enc) + go Nothing = return False + enck = maybe id snd enc + + removeKeyGen k enc = removeChunks remover (uuid baser) chunkconfig enck k + where + enck = maybe id snd enc + remover = removeKey baser + + hasKeyGen k enc = hasKeyChunks checker (uuid baser) chunkconfig enck k + where + enck = maybe id snd enc + checker = hasKey baser + + chunkconfig = chunkConfig cfg + + displayprogress p k a + | displayProgress cfg = metered (Just p) k a + | otherwise = a p + +{- Sink callback for retrieveChunks. Stores the file content into the + - provided Handle, decrypting it first if necessary. + - + - If the remote did not store the content using chunks, no Handle + - will be provided, and it's up to us to open the destination file. + - + - Note that when neither chunking nor encryption is used, and the remote + - provides FileContent, that file only needs to be renamed + - into place. (And it may even already be in the right place..) + -} +sink + :: FilePath + -> Maybe (Cipher, EncKey) + -> Maybe Handle + -> Maybe MeterUpdate + -> ContentSource + -> Annex Bool +sink dest enc mh mp content = do + case (enc, mh, content) of + (Nothing, Nothing, FileContent f) + | f == dest -> noop + | otherwise -> liftIO $ moveFile f dest + (Just (cipher, _), _, ByteContent b) -> + decrypt cipher (feedBytes b) $ + readBytes write + (Just (cipher, _), _, FileContent f) -> do + withBytes content $ \b -> + decrypt cipher (feedBytes b) $ + readBytes write + liftIO $ nukeFile f + (Nothing, _, FileContent f) -> do + withBytes content write + liftIO $ nukeFile f + (Nothing, _, ByteContent b) -> write b + return True + where + write b = case mh of + Just h -> liftIO $ b `streamto` h + Nothing -> liftIO $ bracket opendest hClose (b `streamto`) + streamto b h = case mp of + Just p -> meteredWrite p h b + Nothing -> L.hPut h b + opendest = openBinaryFile dest WriteMode + +withBytes :: ContentSource -> (L.ByteString -> Annex a) -> Annex a +withBytes (ByteContent b) a = a b +withBytes (FileContent f) a = a =<< liftIO (L.readFile f) diff --git a/Remote/Hook.hs b/Remote/Hook.hs index 0668e2ca91..efbd9f8ba4 100644 --- a/Remote/Hook.hs +++ b/Remote/Hook.hs @@ -18,7 +18,6 @@ import Config import Config.Cost import Annex.UUID import Remote.Helper.Special -import Remote.Helper.ChunkedEncryptable import Utility.Env type Action = String @@ -35,7 +34,7 @@ remote = RemoteType { gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) gen r u c gc = do cst <- remoteCost gc expensiveRemoteCost - return $ Just $ chunkedEncryptableRemote c + return $ Just $ specialRemote c (simplyPrepare $ store hooktype) (simplyPrepare $ retrieve hooktype) Remote { diff --git a/Remote/S3.hs b/Remote/S3.hs index 68d8ee4bfb..8603757eb6 100644 --- a/Remote/S3.hs +++ b/Remote/S3.hs @@ -25,7 +25,6 @@ import qualified Git import Config import Config.Cost import Remote.Helper.Special -import Remote.Helper.ChunkedEncryptable import qualified Remote.Helper.AWS as AWS import Creds import Utility.Metered @@ -45,7 +44,7 @@ remote = RemoteType { gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) gen r u c gc = new <$> remoteCost gc expensiveRemoteCost where - new cst = Just $ chunkedEncryptableRemote c + new cst = Just $ specialRemote c (prepareStore this) (prepareRetrieve this) this diff --git a/Remote/WebDAV.hs b/Remote/WebDAV.hs index d6644cdc70..0bdd383602 100644 --- a/Remote/WebDAV.hs +++ b/Remote/WebDAV.hs @@ -28,7 +28,6 @@ import Config import Config.Cost import Remote.Helper.Special import Remote.Helper.Encryptable -import Remote.Helper.Chunked import qualified Remote.Helper.Chunked.Legacy as Legacy import Crypto import Creds @@ -122,7 +121,7 @@ storeHelper r k baseurl user pass b = catchBoolIO $ do where tmpurl = tmpLocation baseurl k keyurl = davLocation baseurl k - chunkconfig = chunkConfig $ config r + chunkconfig = getChunkConfig $ config r finalizer srcurl desturl = do void $ tryNonAsync (deleteDAV desturl user pass) mkdirRecursiveDAV (urlParent desturl) user pass @@ -220,7 +219,7 @@ withStoredFiles r k baseurl user pass onerr a = case chunkconfig of else a chunks where keyurl = davLocation baseurl k ++ keyFile k - chunkconfig = chunkConfig $ config r + chunkconfig = getChunkConfig $ config r davAction :: Remote -> a -> ((DavUrl, DavUser, DavPass) -> Annex a) -> Annex a davAction r unconfigured action = do From b3fe23b552d7ccf8f4b0a8a3dcb0f6e6a317a5f7 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 3 Aug 2014 16:18:40 -0400 Subject: [PATCH 24/56] remove redundant progress meter display code specialRemote handles all meter display, so this is redundant. --- Remote/Helper/Chunked.hs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs index 2e9467b2a0..7ad790cb16 100644 --- a/Remote/Helper/Chunked.hs +++ b/Remote/Helper/Chunked.hs @@ -101,10 +101,8 @@ storeChunks u chunkconfig k f p storer checker = case chunkconfig of (UnpaddedChunks chunksize) | isStableKey k -> bracketIO open close (go chunksize) - _ -> showprogress $ storer k (FileContent f) + _ -> storer k (FileContent f) p where - showprogress = metered (Just p) k - open = tryIO $ openBinaryFile f ReadMode close (Right h) = hClose h @@ -113,11 +111,11 @@ storeChunks u chunkconfig k f p storer checker = go _ (Left e) = do warning (show e) return False - go chunksize (Right h) = showprogress $ \meterupdate -> do + go chunksize (Right h) = do let chunkkeys = chunkKeyStream k chunksize (chunkkeys', startpos) <- seekResume h chunkkeys checker b <- liftIO $ L.hGetContents h - gochunks meterupdate startpos chunksize b chunkkeys' + gochunks p startpos chunksize b chunkkeys' gochunks :: MeterUpdate -> BytesProcessed -> ChunkSize -> L.ByteString -> ChunkKeyStream -> Annex Bool gochunks meterupdate startpos chunksize = loop startpos . splitchunk From 6c450aad1d78f92c27b48840dd078d1b85548e23 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 3 Aug 2014 16:35:35 -0400 Subject: [PATCH 25/56] move ugly rsync zombie workaround This reaping of any processes came to cause me problems when redoing the rsync special remote -- a gpg process that was running gets waited on and the place that then checks its return code fails. I cannot reproduce any zombies when using the rsync special remote. But I still can when using a normal git remote, accessed over ssh. There is 1 zombie per file downloaded without this horrible hack enabled. So, move the hack to only be used in that case. --- Remote/Helper/Ssh.hs | 9 ++++++++- Utility/Rsync.hs | 10 ++-------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/Remote/Helper/Ssh.hs b/Remote/Helper/Ssh.hs index 05a98865ff..e0199dca3e 100644 --- a/Remote/Helper/Ssh.hs +++ b/Remote/Helper/Ssh.hs @@ -102,13 +102,20 @@ dropKey r key = onRemote r (boolSystem, False) "dropkey" rsyncHelper :: Maybe MeterUpdate -> [CommandParam] -> Annex Bool rsyncHelper callback params = do showOutput -- make way for progress bar - ifM (liftIO $ (maybe rsync rsyncProgress callback) params) + ok <- ifM (liftIO $ (maybe rsync rsyncProgress callback) params) ( return True , do showLongNote "rsync failed -- run git annex again to resume file transfer" return False ) + {- For an unknown reason, this causes rsync to run a second + - ssh process, which it neglects to wait on. + - Reap the resulting zombie. -} + liftIO reapZombies + + return ok + {- Generates rsync parameters that ssh to the remote and asks it - to either receive or send the key's content. -} rsyncParamsRemote :: Bool -> Remote -> Direction -> Key -> FilePath -> AssociatedFile -> Annex [CommandParam] diff --git a/Utility/Rsync.hs b/Utility/Rsync.hs index 60381264e8..d0a89b2b0c 100644 --- a/Utility/Rsync.hs +++ b/Utility/Rsync.hs @@ -66,14 +66,8 @@ rsyncParamsFixup = map fixup - The params must enable rsync's --progress mode for this to work. -} rsyncProgress :: MeterUpdate -> [CommandParam] -> IO Bool -rsyncProgress meterupdate params = do - r <- catchBoolIO $ - withHandle StdoutHandle createProcessSuccess p (feedprogress 0 []) - {- For an unknown reason, piping rsync's output like this does - - causes it to run a second ssh process, which it neglects to wait - - on. Reap the resulting zombie. -} - reapZombies - return r +rsyncProgress meterupdate params = catchBoolIO $ + withHandle StdoutHandle createProcessSuccess p (feedprogress 0 []) where p = proc "rsync" (toCommand $ rsyncParamsFixup params) feedprogress prev buf h = do From f5f961215b376ff5fcb7e78f5c2f3cce59fd95ad Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 3 Aug 2014 16:54:57 -0400 Subject: [PATCH 26/56] finish making rsync support chunking This breaks gcrypt, which relies on some internals of the rsync remote. To fix next.. --- Crypto.hs | 9 ++++++- Remote/GCrypt.hs | 4 ++-- Remote/Rsync.hs | 62 +++++++++++++++++++++--------------------------- 3 files changed, 37 insertions(+), 38 deletions(-) diff --git a/Crypto.hs b/Crypto.hs index dcefc2959a..10d6e5cef4 100644 --- a/Crypto.hs +++ b/Crypto.hs @@ -22,6 +22,7 @@ module Crypto ( describeCipher, decryptCipher, encryptKey, + isEncKey, feedFile, feedBytes, readBytes, @@ -150,9 +151,15 @@ type EncKey = Key -> Key encryptKey :: Mac -> Cipher -> EncKey encryptKey mac c k = stubKey { keyName = macWithCipher mac c (key2file k) - , keyBackendName = "GPG" ++ showMac mac + , keyBackendName = encryptedBackendNamePrefix ++ showMac mac } +encryptedBackendNamePrefix :: String +encryptedBackendNamePrefix = "GPG" + +isEncKey :: Key -> Bool +isEncKey k = encryptedBackendNamePrefix `isPrefixOf` keyBackendName k + type Feeder = Handle -> IO () type Reader m a = Handle -> m a diff --git a/Remote/GCrypt.hs b/Remote/GCrypt.hs index b2dd6cdaf5..523175fdc4 100644 --- a/Remote/GCrypt.hs +++ b/Remote/GCrypt.hs @@ -299,7 +299,7 @@ store r rsyncopts (cipher, enck) k p | otherwise = unsupportedUrl where gpgopts = getGpgEncParams r - storersync = Remote.Rsync.storeEncrypted rsyncopts gpgopts (cipher, enck) k p + storersync = undefined -- Remote.Rsync.storeEncrypted rsyncopts gpgopts (cipher, enck) k p storeshell = withTmp enck $ \tmp -> ifM (spoolencrypted $ readBytes $ \b -> catchBoolIO $ L.writeFile tmp b >> return True) ( Ssh.rsyncHelper (Just p) @@ -323,7 +323,7 @@ retrieve r rsyncopts (cipher, enck) k d p a >>= \b -> decrypt cipher (feedBytes b) (readBytes $ meteredWriteFile meterupdate d) - retrieversync = Remote.Rsync.retrieveEncrypted rsyncopts (cipher, enck) k d p + retrieversync = undefined -- Remote.Rsync.retrieveEncrypted rsyncopts (cipher, enck) k d p retrieveshell = withTmp enck $ \tmp -> ifM (Ssh.rsyncHelper (Just p) =<< Ssh.rsyncParamsRemote False r Download enck tmp Nothing) ( liftIO $ catchBoolIO $ do diff --git a/Remote/Rsync.hs b/Remote/Rsync.hs index 7d051d6cd3..d0bacd5859 100644 --- a/Remote/Rsync.hs +++ b/Remote/Rsync.hs @@ -9,8 +9,6 @@ module Remote.Rsync ( remote, - storeEncrypted, - retrieveEncrypted, remove, checkPresent, withRsyncScratchDir, @@ -27,7 +25,6 @@ import Annex.Content import Annex.UUID import Annex.Ssh import Remote.Helper.Special -import Remote.Helper.Encryptable import Remote.Rsync.RsyncUrl import Crypto import Utility.Rsync @@ -37,8 +34,8 @@ import Utility.PID import Annex.Perms import Logs.Transfer import Types.Creds +import Types.Key (isChunkKey) -import qualified Data.ByteString.Lazy as L import qualified Data.Map as M remote :: RemoteType @@ -56,15 +53,15 @@ gen r u c gc = do fromMaybe (error "missing rsyncurl") $ remoteAnnexRsyncUrl gc let o = genRsyncOpts c gc transport url let islocal = rsyncUrlIsPath $ rsyncUrl o - return $ Just $ encryptableRemote c - (storeEncrypted o $ getGpgEncParams (c,gc)) - (retrieveEncrypted o) + return $ Just $ specialRemote' specialcfg c + (simplyPrepare $ store o) + (simplyPrepare $ retrieve o) Remote { uuid = u , cost = cst , name = Git.repoDescribe r - , storeKey = store o - , retrieveKeyFile = retrieve o + , storeKey = storeKeyDummy + , retrieveKeyFile = retreiveKeyFileDummy , retrieveKeyFileCheap = retrieveCheap o , removeKey = remove o , hasKey = checkPresent r o @@ -82,6 +79,10 @@ gen r u c gc = do , availability = if islocal then LocallyAvailable else GloballyAvailable , remotetype = remote } + where + specialcfg = (specialRemoteCfg c) + -- Rsync displays its own progress. + { displayProgress = False } genRsyncOpts :: RemoteConfig -> RemoteGitConfig -> [CommandParam] -> RsyncUrl -> RsyncOpts genRsyncOpts c gc transport url = RsyncOpts @@ -139,32 +140,17 @@ rsyncSetup mu _ c = do gitConfigSpecialRemote u c' "rsyncurl" url return (c', u) -store :: RsyncOpts -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool -store o k _f p = sendAnnex k (void $ remove o k) $ rsyncSend o p k False +store :: RsyncOpts -> Storer +store = fileStorer . rsyncSend -storeEncrypted :: RsyncOpts -> [CommandParam] -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool -storeEncrypted o gpgOpts (cipher, enck) k p = withTmp enck $ \tmp -> - sendAnnex k (void $ remove o enck) $ \src -> do - liftIO $ encrypt gpgOpts cipher (feedFile src) $ - readBytes $ L.writeFile tmp - rsyncSend o p enck True tmp - -retrieve :: RsyncOpts -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool -retrieve o k _ f p = rsyncRetrieve o k f (Just p) +retrieve :: RsyncOpts -> Retriever +retrieve o = fileRetriever $ \f k p -> + unlessM (rsyncRetrieve o k f (Just p)) $ + error "rsync failed" retrieveCheap :: RsyncOpts -> Key -> FilePath -> Annex Bool retrieveCheap o k f = ifM (preseedTmp k f) ( rsyncRetrieve o k f Nothing , return False ) -retrieveEncrypted :: RsyncOpts -> (Cipher, Key) -> Key -> FilePath -> MeterUpdate -> Annex Bool -retrieveEncrypted o (cipher, enck) _ f p = withTmp enck $ \tmp -> - ifM (rsyncRetrieve o enck tmp (Just p)) - ( liftIO $ catchBoolIO $ do - decrypt cipher (feedFile tmp) $ - readBytes $ L.writeFile f - return True - , return False - ) - remove :: RsyncOpts -> Key -> Annex Bool remove o k = do ps <- sendParams @@ -238,8 +224,8 @@ withRsyncScratchDir a = do removeDirectoryRecursive d rsyncRetrieve :: RsyncOpts -> Key -> FilePath -> Maybe MeterUpdate -> Annex Bool -rsyncRetrieve o k dest callback = - showResumable $ untilTrue (rsyncUrls o k) $ \u -> rsyncRemote Download o callback +rsyncRetrieve o k dest meterupdate = + showResumable $ untilTrue (rsyncUrls o k) $ \u -> rsyncRemote Download o meterupdate -- use inplace when retrieving to support resuming [ Param "--inplace" , Param u @@ -274,8 +260,8 @@ rsyncRemote direction o callback params = do - (When we have the right hash directory structure, we can just - pass --include=X --include=X/Y --include=X/Y/file --exclude=*) -} -rsyncSend :: RsyncOpts -> MeterUpdate -> Key -> Bool -> FilePath -> Annex Bool -rsyncSend o callback k canrename src = withRsyncScratchDir $ \tmp -> do +rsyncSend :: RsyncOpts -> Key -> FilePath -> MeterUpdate -> Annex Bool +rsyncSend o k src meterupdate = withRsyncScratchDir $ \tmp -> do let dest = tmp Prelude.head (keyPaths k) liftIO $ createDirectoryIfMissing True $ parentDir dest ok <- liftIO $ if canrename @@ -285,7 +271,7 @@ rsyncSend o callback k canrename src = withRsyncScratchDir $ \tmp -> do else createLinkOrCopy src dest ps <- sendParams if ok - then showResumable $ rsyncRemote Upload o (Just callback) $ ps ++ + then showResumable $ rsyncRemote Upload o (Just meterupdate) $ ps ++ [ Param "--recursive" , partialParams -- tmp/ to send contents of tmp dir @@ -293,3 +279,9 @@ rsyncSend o callback k canrename src = withRsyncScratchDir $ \tmp -> do , Param $ rsyncUrl o ] else return False + where + {- If the key being sent is encrypted or chunked, the file + - containing its content is a temp file, and so can be + - renamed into place. Otherwise, the file is the annexed + - object file, and has to be copied or hard linked into place. -} + canrename = isEncKey k || isChunkKey k From b35f7983ff1edd28cea58885a4d1c8d75799c66b Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 3 Aug 2014 17:31:10 -0400 Subject: [PATCH 27/56] convert gcrypt to new regime, including chunking Some reorg of Remote.Rsync code to export the things gcrypt needs. --- Remote/GCrypt.hs | 91 ++++++++++++++------------------- Remote/Rsync.hs | 83 +++++++++++++++--------------- debian/changelog | 2 +- doc/special_remotes/gcrypt.mdwn | 4 +- 4 files changed, 84 insertions(+), 96 deletions(-) diff --git a/Remote/GCrypt.hs b/Remote/GCrypt.hs index 523175fdc4..faf45b1d9d 100644 --- a/Remote/GCrypt.hs +++ b/Remote/GCrypt.hs @@ -29,7 +29,6 @@ import qualified Git.GCrypt import qualified Git.Construct import qualified Git.Types as Git () import qualified Annex.Branch -import qualified Annex.Content import Config import Config.Cost import Remote.Helper.Git @@ -38,7 +37,6 @@ import Remote.Helper.Special import Remote.Helper.Messages import qualified Remote.Helper.Ssh as Ssh import Utility.Metered -import Crypto import Annex.UUID import Annex.Ssh import qualified Remote.Rsync @@ -47,7 +45,6 @@ import Utility.Tmp import Logs.Remote import Logs.Transfer import Utility.Gpg -import Annex.Content remote :: RemoteType remote = RemoteType { @@ -101,8 +98,8 @@ gen' r u c gc = do { uuid = u , cost = cst , name = Git.repoDescribe r - , storeKey = \_ _ _ -> noCrypto - , retrieveKeyFile = \_ _ _ _ -> noCrypto + , storeKey = storeKeyDummy + , retrieveKeyFile = retreiveKeyFileDummy , retrieveKeyFileCheap = \_ _ -> return False , removeKey = remove this rsyncopts , hasKey = checkPresent this rsyncopts @@ -118,10 +115,14 @@ gen' r u c gc = do , availability = availabilityCalc r , remotetype = remote } - return $ Just $ encryptableRemote c - (store this rsyncopts) - (retrieve this rsyncopts) + return $ Just $ specialRemote' specialcfg c + (simplyPrepare $ store this rsyncopts) + (simplyPrepare $ retrieve this rsyncopts) this + where + specialcfg = (specialRemoteCfg c) + -- Rsync displays its own progress. + { displayProgress = False } rsyncTransportToObjects :: Git.Repo -> Annex ([CommandParam], String) rsyncTransportToObjects r = do @@ -147,7 +148,7 @@ rsyncTransport r noCrypto :: Annex a noCrypto = error "cannot use gcrypt remote without encryption enabled" -unsupportedUrl :: Annex a +unsupportedUrl :: a unsupportedUrl = error "using non-ssh remote repo url with gcrypt is not supported" gCryptSetup :: Maybe UUID -> Maybe CredPair -> RemoteConfig -> Annex (RemoteConfig, UUID) @@ -249,14 +250,19 @@ setupRepo gcryptid r denyNonFastForwards = "receive.denyNonFastForwards" -shellOrRsync :: Remote -> Annex a -> Annex a -> Annex a -shellOrRsync r ashell arsync = case method of - AccessShell -> ashell - _ -> arsync +isShell :: Remote -> Bool +isShell r = case method of + AccessShell -> True + _ -> False where method = toAccessMethod $ fromMaybe "" $ remoteAnnexGCrypt $ gitconfig r +shellOrRsync :: Remote -> Annex a -> Annex a -> Annex a +shellOrRsync r ashell arsync + | isShell r = ashell + | otherwise = arsync + {- Configure gcrypt to use the same list of keyids that - were passed to initremote as its participants. - Also, configure it to use a signing key that is in the list of @@ -287,51 +293,32 @@ setGcryptEncryption c remotename = do where remoteconfig n = ConfigKey $ n remotename -store :: Remote -> Remote.Rsync.RsyncOpts -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool -store r rsyncopts (cipher, enck) k p - | not $ Git.repoIsUrl (repo r) = guardUsable (repo r) False $ - metered (Just p) k $ \meterupdate -> spoolencrypted $ \h -> do - let dest = gCryptLocation r enck +store :: Remote -> Remote.Rsync.RsyncOpts -> Storer +store r rsyncopts + | not $ Git.repoIsUrl (repo r) = + byteStorer $ \k b p -> guardUsable (repo r) False $ liftIO $ do + let dest = gCryptLocation r k createDirectoryIfMissing True $ parentDir dest - readBytes (meteredWriteFile meterupdate dest) h + meteredWriteFile p dest b return True - | Git.repoIsSsh (repo r) = shellOrRsync r storeshell storersync + | Git.repoIsSsh (repo r) = if isShell r + then fileStorer $ \k f p -> Ssh.rsyncHelper (Just p) + =<< Ssh.rsyncParamsRemote False r Upload k f Nothing + else fileStorer $ Remote.Rsync.store rsyncopts | otherwise = unsupportedUrl - where - gpgopts = getGpgEncParams r - storersync = undefined -- Remote.Rsync.storeEncrypted rsyncopts gpgopts (cipher, enck) k p - storeshell = withTmp enck $ \tmp -> - ifM (spoolencrypted $ readBytes $ \b -> catchBoolIO $ L.writeFile tmp b >> return True) - ( Ssh.rsyncHelper (Just p) - =<< Ssh.rsyncParamsRemote False r Upload enck tmp Nothing - , return False - ) - spoolencrypted a = Annex.Content.sendAnnex k noop $ \src -> - liftIO $ catchBoolIO $ - encrypt gpgopts cipher (feedFile src) a -retrieve :: Remote -> Remote.Rsync.RsyncOpts -> (Cipher, Key) -> Key -> FilePath -> MeterUpdate -> Annex Bool -retrieve r rsyncopts (cipher, enck) k d p - | not $ Git.repoIsUrl (repo r) = guardUsable (repo r) False $ do - retrievewith $ L.readFile src - return True - | Git.repoIsSsh (repo r) = shellOrRsync r retrieveshell retrieversync +retrieve :: Remote -> Remote.Rsync.RsyncOpts -> Retriever +retrieve r rsyncopts + | not $ Git.repoIsUrl (repo r) = byteRetriever $ \k sink -> + guardUsable (repo r) False $ + sink =<< liftIO (L.readFile $ gCryptLocation r k) + | Git.repoIsSsh (repo r) = if isShell r + then fileRetriever $ \f k p -> + unlessM (Ssh.rsyncHelper (Just p) =<< Ssh.rsyncParamsRemote False r Download k f Nothing) $ + error "rsync failed" + else fileRetriever $ Remote.Rsync.retrieve rsyncopts | otherwise = unsupportedUrl where - src = gCryptLocation r enck - retrievewith a = metered (Just p) k $ \meterupdate -> liftIO $ - a >>= \b -> - decrypt cipher (feedBytes b) - (readBytes $ meteredWriteFile meterupdate d) - retrieversync = undefined -- Remote.Rsync.retrieveEncrypted rsyncopts (cipher, enck) k d p - retrieveshell = withTmp enck $ \tmp -> - ifM (Ssh.rsyncHelper (Just p) =<< Ssh.rsyncParamsRemote False r Download enck tmp Nothing) - ( liftIO $ catchBoolIO $ do - decrypt cipher (feedFile tmp) $ - readBytes $ L.writeFile d - return True - , return False - ) remove :: Remote -> Remote.Rsync.RsyncOpts -> Key -> Annex Bool remove r rsyncopts k diff --git a/Remote/Rsync.hs b/Remote/Rsync.hs index d0bacd5859..421c451bdd 100644 --- a/Remote/Rsync.hs +++ b/Remote/Rsync.hs @@ -9,6 +9,8 @@ module Remote.Rsync ( remote, + store, + retrieve, remove, checkPresent, withRsyncScratchDir, @@ -54,8 +56,8 @@ gen r u c gc = do let o = genRsyncOpts c gc transport url let islocal = rsyncUrlIsPath $ rsyncUrl o return $ Just $ specialRemote' specialcfg c - (simplyPrepare $ store o) - (simplyPrepare $ retrieve o) + (simplyPrepare $ fileStorer $ store o) + (simplyPrepare $ fileRetriever $ retrieve o) Remote { uuid = u , cost = cst @@ -140,11 +142,44 @@ rsyncSetup mu _ c = do gitConfigSpecialRemote u c' "rsyncurl" url return (c', u) -store :: RsyncOpts -> Storer -store = fileStorer . rsyncSend +{- To send a single key is slightly tricky; need to build up a temporary + - directory structure to pass to rsync so it can create the hash + - directories. + - + - This would not be necessary if the hash directory structure used locally + - was always the same as that used on the rsync remote. So if that's ever + - unified, this gets nicer. + - (When we have the right hash directory structure, we can just + - pass --include=X --include=X/Y --include=X/Y/file --exclude=*) + -} +store :: RsyncOpts -> Key -> FilePath -> MeterUpdate -> Annex Bool +store o k src meterupdate = withRsyncScratchDir $ \tmp -> do + let dest = tmp Prelude.head (keyPaths k) + liftIO $ createDirectoryIfMissing True $ parentDir dest + ok <- liftIO $ if canrename + then do + rename src dest + return True + else createLinkOrCopy src dest + ps <- sendParams + if ok + then showResumable $ rsyncRemote Upload o (Just meterupdate) $ ps ++ + [ Param "--recursive" + , partialParams + -- tmp/ to send contents of tmp dir + , File $ addTrailingPathSeparator tmp + , Param $ rsyncUrl o + ] + else return False + where + {- If the key being sent is encrypted or chunked, the file + - containing its content is a temp file, and so can be + - renamed into place. Otherwise, the file is the annexed + - object file, and has to be copied or hard linked into place. -} + canrename = isEncKey k || isChunkKey k -retrieve :: RsyncOpts -> Retriever -retrieve o = fileRetriever $ \f k p -> +retrieve :: RsyncOpts -> FilePath -> Key -> MeterUpdate -> Annex () +retrieve o f k p = unlessM (rsyncRetrieve o k f (Just p)) $ error "rsync failed" @@ -249,39 +284,3 @@ rsyncRemote direction o callback params = do opts | direction == Download = rsyncDownloadOptions o | otherwise = rsyncUploadOptions o - -{- To send a single key is slightly tricky; need to build up a temporary - - directory structure to pass to rsync so it can create the hash - - directories. - - - - This would not be necessary if the hash directory structure used locally - - was always the same as that used on the rsync remote. So if that's ever - - unified, this gets nicer. - - (When we have the right hash directory structure, we can just - - pass --include=X --include=X/Y --include=X/Y/file --exclude=*) - -} -rsyncSend :: RsyncOpts -> Key -> FilePath -> MeterUpdate -> Annex Bool -rsyncSend o k src meterupdate = withRsyncScratchDir $ \tmp -> do - let dest = tmp Prelude.head (keyPaths k) - liftIO $ createDirectoryIfMissing True $ parentDir dest - ok <- liftIO $ if canrename - then do - rename src dest - return True - else createLinkOrCopy src dest - ps <- sendParams - if ok - then showResumable $ rsyncRemote Upload o (Just meterupdate) $ ps ++ - [ Param "--recursive" - , partialParams - -- tmp/ to send contents of tmp dir - , File $ addTrailingPathSeparator tmp - , Param $ rsyncUrl o - ] - else return False - where - {- If the key being sent is encrypted or chunked, the file - - containing its content is a temp file, and so can be - - renamed into place. Otherwise, the file is the annexed - - object file, and has to be copied or hard linked into place. -} - canrename = isEncKey k || isChunkKey k diff --git a/debian/changelog b/debian/changelog index 2e00728ad9..a6118966e7 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,7 +1,7 @@ git-annex (5.20140718) UNRELEASED; urgency=medium * New chunk= option to chunk files stored in special remotes. - Currently supported by: directory, S3, rsync, and all external + Supported by: directory, S3, gcrypt, rsync, and all external and hook special remotes. * Partially transferred files are automatically resumed when using chunked remotes! diff --git a/doc/special_remotes/gcrypt.mdwn b/doc/special_remotes/gcrypt.mdwn index 2e07741d3f..c9a22b01aa 100644 --- a/doc/special_remotes/gcrypt.mdwn +++ b/doc/special_remotes/gcrypt.mdwn @@ -13,7 +13,7 @@ These parameters can be passed to `git annex initremote` to configure gcrypt: * `encryption` - One of "none", "hybrid", "shared", or "pubkey". - See [[encryption]]. + Required. See [[encryption]]. * `keyid` - Specifies the gpg key to use for encryption of both the files git-annex stores in the repository, as well as to encrypt the git @@ -24,6 +24,8 @@ gcrypt: for gcrypt to use. This repository should be either empty, or an existing gcrypt repositry. +* `chunk` - Enables [[chunking]] when storing large files. + * `shellescape` - See [[rsync]] for the details of this option. ## notes From 93f20541f5a0e04312530d3f0b496c9c3976b589 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 3 Aug 2014 18:08:34 -0400 Subject: [PATCH 28/56] testremote --fast --- Command/TestRemote.hs | 20 ++++++++++++++------ doc/git-annex.mdwn | 2 ++ 2 files changed, 16 insertions(+), 6 deletions(-) diff --git a/Command/TestRemote.hs b/Command/TestRemote.hs index 29a2e809cd..f68d391b11 100644 --- a/Command/TestRemote.hs +++ b/Command/TestRemote.hs @@ -59,8 +59,9 @@ start basesz ws = do showStart "testremote" name r <- either error id <$> Remote.byName' name showSideAction "generating test keys" - ks <- mapM randKey (keySizes basesz) - rs <- catMaybes <$> mapM (adjustChunkSize r) (chunkSizes basesz) + fast <- Annex.getState Annex.fast + ks <- mapM randKey (keySizes basesz fast) + rs <- catMaybes <$> mapM (adjustChunkSize r) (chunkSizes basesz fast) rs' <- concat <$> mapM encryptionVariants rs next $ perform rs' ks @@ -161,22 +162,29 @@ cleanup rs ks ok = do forM_ ks removeAnnex return ok -chunkSizes :: Int -> [Int] -chunkSizes base = +chunkSizes :: Int -> Bool -> [Int] +chunkSizes base False = [ 0 -- no chunking , base `div` 100 , base `div` 1000 , base ] +chunkSizes base True = + [ 0 + ] -keySizes :: Int -> [Int] -keySizes base = filter (>= 0) +keySizes :: Int -> Bool -> [Int] +keySizes base fast = filter want [ 0 -- empty key is a special case when chunking , base , base + 1 , base - 1 , base * 2 ] + where + want sz + | fast = sz <= base && sz > 0 + | otherwise = sz > 0 randKey :: Int -> Annex Key randKey sz = withTmpFile "randkey" $ \f h -> do diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index ba851eef86..673e86fc96 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -974,6 +974,8 @@ subdirectories). It's safe to run in an existing repository (the repository contents are not altered), although it may perform expensive data transfers. + To perform a smaller set of tests, use --fast. + The --size option can be used to tune the size of the generated objects. Testing a single remote will use the remote's configuration, From 1cd2273035e71b73cc47f10a7269876c0550d4c0 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 3 Aug 2014 20:14:20 -0400 Subject: [PATCH 29/56] finally properly fixed ssh zombie leak The leak was caused by the thread that sshd'd to send transferinfo not waiting on its ssh. Doh. --- Remote/Git.hs | 9 ++++++++- Remote/Helper/Ssh.hs | 9 +-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Remote/Git.hs b/Remote/Git.hs index 4498ec907c..bf2228c497 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -390,6 +390,7 @@ copyFromRemote' r key file dest Just (cmd, params) <- Ssh.git_annex_shell (repo r) "transferinfo" [Param $ key2file key] fields v <- liftIO (newEmptySV :: IO (MSampleVar Integer)) + pidv <- liftIO $ newEmptyMVar tid <- liftIO $ forkIO $ void $ tryIO $ do bytes <- readSV v p <- createProcess $ @@ -397,6 +398,7 @@ copyFromRemote' r key file dest { std_in = CreatePipe , std_err = CreatePipe } + putMVar pidv (processHandle p) hClose $ stderrHandle p let h = stdinHandle p let send b = do @@ -406,7 +408,12 @@ copyFromRemote' r key file dest forever $ send =<< readSV v let feeder = writeSV v . fromBytesProcessed - bracketIO noop (const $ tryIO $ killThread tid) (const $ a feeder) + let cleanup = do + void $ tryIO $ killThread tid + tryNonAsync $ + maybe noop (void . waitForProcess) + =<< tryTakeMVar pidv + bracketIO noop (const cleanup) (const $ a feeder) copyFromRemoteCheap :: Remote -> Key -> FilePath -> Annex Bool #ifndef mingw32_HOST_OS diff --git a/Remote/Helper/Ssh.hs b/Remote/Helper/Ssh.hs index e0199dca3e..05a98865ff 100644 --- a/Remote/Helper/Ssh.hs +++ b/Remote/Helper/Ssh.hs @@ -102,20 +102,13 @@ dropKey r key = onRemote r (boolSystem, False) "dropkey" rsyncHelper :: Maybe MeterUpdate -> [CommandParam] -> Annex Bool rsyncHelper callback params = do showOutput -- make way for progress bar - ok <- ifM (liftIO $ (maybe rsync rsyncProgress callback) params) + ifM (liftIO $ (maybe rsync rsyncProgress callback) params) ( return True , do showLongNote "rsync failed -- run git annex again to resume file transfer" return False ) - {- For an unknown reason, this causes rsync to run a second - - ssh process, which it neglects to wait on. - - Reap the resulting zombie. -} - liftIO reapZombies - - return ok - {- Generates rsync parameters that ssh to the remote and asks it - to either receive or send the key's content. -} rsyncParamsRemote :: Bool -> Remote -> Direction -> Key -> FilePath -> AssociatedFile -> Annex [CommandParam] From 8601f8f57111b654ad5606aa47e42f280d2398f3 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 3 Aug 2014 20:19:04 -0400 Subject: [PATCH 30/56] when not using rsync (for local gcrypt repo), display own progress meter --- Remote/GCrypt.hs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Remote/GCrypt.hs b/Remote/GCrypt.hs index faf45b1d9d..b2df7d56a1 100644 --- a/Remote/GCrypt.hs +++ b/Remote/GCrypt.hs @@ -120,9 +120,11 @@ gen' r u c gc = do (simplyPrepare $ retrieve this rsyncopts) this where - specialcfg = (specialRemoteCfg c) - -- Rsync displays its own progress. - { displayProgress = False } + specialcfg + | Git.repoIsUrl r = (specialRemoteCfg c) + -- Rsync displays its own progress. + { displayProgress = False } + | otherwise = specialRemoteCfg c rsyncTransportToObjects :: Git.Repo -> Annex ([CommandParam], String) rsyncTransportToObjects r = do From d12becfddeca7468a0a362a1993b523ec73e1769 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 3 Aug 2014 20:21:46 -0400 Subject: [PATCH 31/56] fix removal from local gcrypt repo that had files stored using rsync When files are stored using rsync, they have their write bit removed; so does the directory they're put in. The local repo code did not turn these bits back on, so failed to remove. --- Remote/GCrypt.hs | 8 +++++++- debian/changelog | 1 - 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/Remote/GCrypt.hs b/Remote/GCrypt.hs index b2df7d56a1..db01443634 100644 --- a/Remote/GCrypt.hs +++ b/Remote/GCrypt.hs @@ -45,6 +45,7 @@ import Utility.Tmp import Logs.Remote import Logs.Transfer import Utility.Gpg +import Utility.FileMode remote :: RemoteType remote = RemoteType { @@ -325,7 +326,12 @@ retrieve r rsyncopts remove :: Remote -> Remote.Rsync.RsyncOpts -> Key -> Annex Bool remove r rsyncopts k | not $ Git.repoIsUrl (repo r) = guardUsable (repo r) False $ do - liftIO $ removeDirectoryRecursive $ parentDir $ gCryptLocation r k + let f = gCryptLocation r k + let d = parentDir f + liftIO $ do + allowWrite d + allowWrite f + removeDirectoryRecursive d return True | Git.repoIsSsh (repo r) = shellOrRsync r removeshell removersync | otherwise = unsupportedUrl diff --git a/debian/changelog b/debian/changelog index a6118966e7..8da5b87bb3 100644 --- a/debian/changelog +++ b/debian/changelog @@ -21,7 +21,6 @@ git-annex (5.20140718) UNRELEASED; urgency=medium * Dropping an object from a bup special remote now deletes the git branch for the object, although of course the object's content cannot be deleted due to the nature of bup. - * -- Joey Hess Mon, 21 Jul 2014 14:41:26 -0400 From d3778e631b795214c65968978863530ee2188cce Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 3 Aug 2014 20:24:13 -0400 Subject: [PATCH 32/56] remove write bit when storing to local gcrypt repo Same as is done by rsync, and for regular git repos. --- Remote/GCrypt.hs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/Remote/GCrypt.hs b/Remote/GCrypt.hs index db01443634..28fe9964b6 100644 --- a/Remote/GCrypt.hs +++ b/Remote/GCrypt.hs @@ -300,9 +300,14 @@ store :: Remote -> Remote.Rsync.RsyncOpts -> Storer store r rsyncopts | not $ Git.repoIsUrl (repo r) = byteStorer $ \k b p -> guardUsable (repo r) False $ liftIO $ do - let dest = gCryptLocation r k - createDirectoryIfMissing True $ parentDir dest - meteredWriteFile p dest b + let f = gCryptLocation r k + let d = parentDir f + createDirectoryIfMissing True d + allowWrite d + void $ liftIO $ tryIO $ allowWrite f + meteredWriteFile p f b + preventWrite f + preventWrite d return True | Git.repoIsSsh (repo r) = if isShell r then fileStorer $ \k f p -> Ssh.rsyncHelper (Just p) From db54981a4c7d690cc658b9785e6f56288f506086 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 4 Aug 2014 08:24:06 -0400 Subject: [PATCH 33/56] fix warning --- Command/TestRemote.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Command/TestRemote.hs b/Command/TestRemote.hs index 789eb75b17..463c4d3595 100644 --- a/Command/TestRemote.hs +++ b/Command/TestRemote.hs @@ -169,7 +169,7 @@ chunkSizes base False = , base `div` 1000 , base ] -chunkSizes base True = +chunkSizes _ True = [ 0 ] From 6f4592966d60834f28769311e127c9b374e2e62d Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 4 Aug 2014 08:42:04 -0400 Subject: [PATCH 34/56] make testremote work with gcrypt repos This involved making Remote.Gcrypt.gen expect a Repo with a regular, non-gcrypt path. Since tht is what's stored as the Remote's gitrepo, testremote can then modify it and feed it back into gen. --- Remote/GCrypt.hs | 23 ++++++++++++++--------- Remote/Git.hs | 2 +- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/Remote/GCrypt.hs b/Remote/GCrypt.hs index 28fe9964b6..02c31f38d0 100644 --- a/Remote/GCrypt.hs +++ b/Remote/GCrypt.hs @@ -7,7 +7,7 @@ module Remote.GCrypt ( remote, - gen, + chainGen, getGCryptUUID, coreGCryptId, setupRepo @@ -57,19 +57,24 @@ remote = RemoteType { setup = gCryptSetup } -gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) -gen gcryptr u c gc = do +chainGen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) +chainGen gcryptr u c gc = do g <- gitRepo -- get underlying git repo with real path, not gcrypt path r <- liftIO $ Git.GCrypt.encryptedRemote g gcryptr let r' = r { Git.remoteName = Git.remoteName gcryptr } + gen r' u c gc + +gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) +gen baser u c gc = do -- doublecheck that cache matches underlying repo's gcrypt-id -- (which might not be set), only for local repos - (mgcryptid, r'') <- getGCryptId True r' - case (mgcryptid, Git.GCrypt.remoteRepoId g (Git.remoteName gcryptr)) of + (mgcryptid, r) <- getGCryptId True baser + g <- gitRepo + case (mgcryptid, Git.GCrypt.remoteRepoId g (Git.remoteName baser)) of (Just gcryptid, Just cachedgcryptid) - | gcryptid /= cachedgcryptid -> resetup gcryptid r'' - _ -> gen' r'' u c gc + | gcryptid /= cachedgcryptid -> resetup gcryptid r + _ -> gen' r u c gc where -- A different drive may have been mounted, making a different -- gcrypt remote available. So need to set the cached @@ -79,10 +84,10 @@ gen gcryptr u c gc = do resetup gcryptid r = do let u' = genUUIDInNameSpace gCryptNameSpace gcryptid v <- M.lookup u' <$> readRemoteLog - case (Git.remoteName gcryptr, v) of + case (Git.remoteName baser, v) of (Just remotename, Just c') -> do setGcryptEncryption c' remotename - setConfig (remoteConfig gcryptr "uuid") (fromUUID u') + setConfig (remoteConfig baser "uuid") (fromUUID u') setConfig (ConfigKey $ Git.GCrypt.remoteConfigKey "gcrypt-id" remotename) gcryptid gen' r u' c' gc _ -> do diff --git a/Remote/Git.hs b/Remote/Git.hs index bf2228c497..c35f9f32af 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -127,7 +127,7 @@ configRead r = do gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) gen r u c gc - | Git.GCrypt.isEncrypted r = Remote.GCrypt.gen r u c gc + | Git.GCrypt.isEncrypted r = Remote.GCrypt.chainGen r u c gc | otherwise = go <$> remoteCost gc defcst where defcst = if repoCheap r then cheapRemoteCost else expensiveRemoteCost From 00c146816076d2d4dd15d51749fbdd3038452735 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 4 Aug 2014 09:00:57 -0400 Subject: [PATCH 35/56] gcrypt: fix removal of key that does not exist Generalized code from Remote.Directory and reused it. Test suite now passes for local gcrypt repos. --- Remote/Directory.hs | 24 ++++++++++++++++-------- Remote/GCrypt.hs | 11 +++-------- 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/Remote/Directory.hs b/Remote/Directory.hs index db141e01ad..3b54ad2000 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -7,7 +7,7 @@ {-# LANGUAGE CPP #-} -module Remote.Directory (remote) where +module Remote.Directory (remote, removeDirGeneric) where import qualified Data.ByteString.Lazy as L import qualified Data.Map as M @@ -152,7 +152,20 @@ retrieveCheap _ _ _ _ = return False #endif remove :: FilePath -> Key -> Annex Bool -remove d k = liftIO $ do +remove d k = liftIO $ removeDirGeneric d (storeDir d k) + +{- Removes the directory, which must be located under the topdir. + - + - Succeeds even on directories and contents that do not have write + - permission. + - + - If the directory does not exist, succeeds as long as the topdir does + - exist. If the topdir does not exist, fails, because in this case the + - remote is not currently accessible and probably still has the content + - we were supposed to remove from it. + -} +removeDirGeneric :: FilePath -> FilePath -> IO Bool +removeDirGeneric topdir dir = do void $ tryIO $ allowWrite dir #ifdef mingw32_HOST_OS {- Windows needs the files inside the directory to be writable @@ -162,14 +175,9 @@ remove d k = liftIO $ do ok <- catchBoolIO $ do removeDirectoryRecursive dir return True - {- Removing the subdirectory will fail if it doesn't exist. - - But, we want to succeed in that case, as long as the directory - - remote's top-level directory does exist. -} if ok then return ok - else doesDirectoryExist d <&&> (not <$> doesDirectoryExist dir) - where - dir = storeDir d k + else doesDirectoryExist topdir <&&> (not <$> doesDirectoryExist dir) checkPresent :: FilePath -> ChunkConfig -> Key -> Annex (Either String Bool) checkPresent d (LegacyChunks _) k = Legacy.checkPresent d locations k diff --git a/Remote/GCrypt.hs b/Remote/GCrypt.hs index 02c31f38d0..a0292a954d 100644 --- a/Remote/GCrypt.hs +++ b/Remote/GCrypt.hs @@ -40,6 +40,7 @@ import Utility.Metered import Annex.UUID import Annex.Ssh import qualified Remote.Rsync +import qualified Remote.Directory import Utility.Rsync import Utility.Tmp import Logs.Remote @@ -335,14 +336,8 @@ retrieve r rsyncopts remove :: Remote -> Remote.Rsync.RsyncOpts -> Key -> Annex Bool remove r rsyncopts k - | not $ Git.repoIsUrl (repo r) = guardUsable (repo r) False $ do - let f = gCryptLocation r k - let d = parentDir f - liftIO $ do - allowWrite d - allowWrite f - removeDirectoryRecursive d - return True + | not $ Git.repoIsUrl (repo r) = guardUsable (repo r) False $ + liftIO $ Remote.Directory.removeDirGeneric (Git.repoLocation (repo r)) (parentDir (gCryptLocation r k)) | Git.repoIsSsh (repo r) = shellOrRsync r removeshell removersync | otherwise = unsupportedUrl where From 6cecffea8924229c15589b25896de916e2428890 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 4 Aug 2014 09:16:47 -0400 Subject: [PATCH 36/56] fix "storeKey when already present" test for git-annex-shell transfers Now git-annex-shell recvkey, when the key is already present, allows another copy to be rsynced up, and just throws it away. This same behavior could have already happened before, when eg, two repos tried to upload the same object at the same time. So this makes the test suite pass, and should not add any bad behavior, other than slightly more work being done in a rather edge case. This relies on moveAnnex's behavior of keeping the current version of an object. --- Command/RecvKey.hs | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/Command/RecvKey.hs b/Command/RecvKey.hs index 1794596c57..d5971d6cf5 100644 --- a/Command/RecvKey.hs +++ b/Command/RecvKey.hs @@ -28,18 +28,15 @@ seek :: CommandSeek seek = withKeys start start :: Key -> CommandStart -start key = ifM (inAnnex key) - ( error "key is already present in annex" - , fieldTransfer Download key $ \_p -> - ifM (getViaTmp key go) - ( do - -- forcibly quit after receiving one key, - -- and shutdown cleanly - _ <- shutdown True - return True - , return False - ) - ) +start key = fieldTransfer Download key $ \_p -> + ifM (getViaTmp key go) + ( do + -- forcibly quit after receiving one key, + -- and shutdown cleanly + _ <- shutdown True + return True + , return False + ) where go tmp = do opts <- filterRsyncSafeOptions . maybe [] words From 22c7a7a41ac1a20425c7b11240bc5a346654a27b Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 4 Aug 2014 09:35:57 -0400 Subject: [PATCH 37/56] make local gcrypt storeKey be atomic Reuse Remote.Directory's code. --- Remote/Directory.hs | 34 ++++++++++++++++++++++------------ Remote/GCrypt.hs | 14 ++++++-------- 2 files changed, 28 insertions(+), 20 deletions(-) diff --git a/Remote/Directory.hs b/Remote/Directory.hs index 3b54ad2000..9b3c156959 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -7,7 +7,11 @@ {-# LANGUAGE CPP #-} -module Remote.Directory (remote, removeDirGeneric) where +module Remote.Directory ( + remote, + finalizeStoreGeneric, + removeDirGeneric, +) where import qualified Data.ByteString.Lazy as L import qualified Data.Map as M @@ -114,24 +118,30 @@ store :: FilePath -> ChunkConfig -> Key -> L.ByteString -> MeterUpdate -> Annex store d chunkconfig k b p = liftIO $ do void $ tryIO $ createDirectoryIfMissing True tmpdir case chunkconfig of - LegacyChunks chunksize -> Legacy.store chunksize finalizer k b p tmpdir destdir + LegacyChunks chunksize -> Legacy.store chunksize finalizeStoreGeneric k b p tmpdir destdir _ -> do let tmpf = tmpdir keyFile k meteredWriteFile p tmpf b - finalizer tmpdir destdir + finalizeStoreGeneric tmpdir destdir return True where tmpdir = tmpDir d k destdir = storeDir d k - finalizer tmp dest = do - void $ tryIO $ allowWrite dest -- may already exist - void $ tryIO $ removeDirectoryRecursive dest -- or not exist - createDirectoryIfMissing True (parentDir dest) - renameDirectory tmp dest - -- may fail on some filesystems - void $ tryIO $ do - mapM_ preventWrite =<< dirContents dest - preventWrite dest + +{- Passed a temp directory that contains the files that should be placed + - in the dest directory, moves it into place. Anything already existing + - in the dest directory will be deleted. File permissions will be locked + - down. -} +finalizeStoreGeneric :: FilePath -> FilePath -> IO () +finalizeStoreGeneric tmp dest = do + void $ tryIO $ allowWrite dest -- may already exist + void $ tryIO $ removeDirectoryRecursive dest -- or not exist + createDirectoryIfMissing True (parentDir dest) + renameDirectory tmp dest + -- may fail on some filesystems + void $ tryIO $ do + mapM_ preventWrite =<< dirContents dest + preventWrite dest retrieve :: FilePath -> ChunkConfig -> Preparer Retriever retrieve d (LegacyChunks _) = Legacy.retrieve locations d diff --git a/Remote/GCrypt.hs b/Remote/GCrypt.hs index a0292a954d..d969e02f8a 100644 --- a/Remote/GCrypt.hs +++ b/Remote/GCrypt.hs @@ -306,14 +306,12 @@ store :: Remote -> Remote.Rsync.RsyncOpts -> Storer store r rsyncopts | not $ Git.repoIsUrl (repo r) = byteStorer $ \k b p -> guardUsable (repo r) False $ liftIO $ do - let f = gCryptLocation r k - let d = parentDir f - createDirectoryIfMissing True d - allowWrite d - void $ liftIO $ tryIO $ allowWrite f - meteredWriteFile p f b - preventWrite f - preventWrite d + let tmpdir = Git.repoLocation (repo r) "tmp" keyFile k + void $ tryIO $ createDirectoryIfMissing True tmpdir + let tmpf = tmpdir keyFile k + meteredWriteFile p tmpf b + let destdir = parentDir $ gCryptLocation r k + Remote.Directory.finalizeStoreGeneric tmpdir destdir return True | Git.repoIsSsh (repo r) = if isShell r then fileStorer $ \k f p -> Ssh.rsyncHelper (Just p) From 781833b16fd005255e47d794955d7ed996c1c800 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 6 Aug 2014 13:45:05 -0400 Subject: [PATCH 38/56] update for change in bup remote removal --- Test.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Test.hs b/Test.hs index 3ae5e323b9..5032038ad9 100644 --- a/Test.hs +++ b/Test.hs @@ -1251,7 +1251,7 @@ test_bup_remote testenv = intmpclonerepo testenv $ when Build.SysConfig.bup $ do annexed_notpresent annexedfile git_annex testenv "copy" [annexedfile, "--from", "foo"] @? "copy --from bup remote failed" annexed_present annexedfile - not <$> git_annex testenv "move" [annexedfile, "--from", "foo"] @? "move --from bup remote failed to fail" + git_annex testenv "move" [annexedfile, "--from", "foo"] @? "move --from bup remote failed" annexed_present annexedfile -- gpg is not a build dependency, so only test when it's available From b4cf22a388915b4f44386ecd062bf6e56003fac3 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 6 Aug 2014 13:45:19 -0400 Subject: [PATCH 39/56] pushed checkPresent exception handling out of Remote implementations I tend to prefer moving toward explicit exception handling, not away from it, but in this case, I think there are good reasons to let checkPresent throw exceptions: 1. They can all be caught in one place (Remote.hasKey), and we know every possible exception is caught there now, which we didn't before. 2. It simplified the code of the Remotes. I think it makes sense for Remotes to be able to be implemented without needing to worry about catching exceptions inside them. (Mostly.) 3. Types.StoreRetrieve.Preparer can only work on things that return a Bool, which all the other relevant remote methods already did. I do not see a good way to generalize that type; my previous attempts failed miserably. --- Remote.hs | 8 ++++++ Remote/Bup.hs | 15 +++++----- Remote/Ddar.hs | 15 +++++----- Remote/Directory.hs | 21 ++++++-------- Remote/Directory/LegacyChunked.hs | 9 +++--- Remote/External.hs | 8 +++--- Remote/GCrypt.hs | 14 ++++----- Remote/Git.hs | 25 ++++++---------- Remote/Glacier.hs | 28 ++++++++---------- Remote/Helper/Chunked.hs | 47 +++++++++++++++++-------------- Remote/Helper/Encryptable.hs | 6 ++-- Remote/Helper/Hooks.hs | 2 +- Remote/Helper/Messages.hs | 4 +-- Remote/Helper/Special.hs | 8 +++--- Remote/Helper/Ssh.hs | 6 ++-- Remote/Hook.hs | 10 +++---- Remote/Rsync.hs | 14 ++++----- Remote/S3.hs | 16 +++++------ Remote/Tahoe.hs | 20 +++++++------ Remote/Web.hs | 10 +++---- Remote/WebDAV.hs | 10 +++---- Types/Remote.hs | 10 +++---- Types/StoreRetrieve.hs | 8 ++++++ doc/design/assistant/chunks.mdwn | 16 +++++------ 24 files changed, 167 insertions(+), 163 deletions(-) diff --git a/Remote.hs b/Remote.hs index 29097f77d3..5ee75823f5 100644 --- a/Remote.hs +++ b/Remote.hs @@ -56,6 +56,7 @@ import Data.Ord import Common.Annex import Types.Remote import qualified Annex +import Annex.Exception import Annex.UUID import Logs.UUID import Logs.Trust @@ -312,3 +313,10 @@ isXMPPRemote :: Remote -> Bool isXMPPRemote remote = Git.repoIsUrl r && "xmpp::" `isPrefixOf` Git.repoLocation r where r = repo remote + +hasKey :: Remote -> Key -> Annex (Either String Bool) +hasKey r k = either (Left . show) Right + <$> tryNonAsyncAnnex (checkPresent r k) + +hasKeyCheap :: Remote -> Bool +hasKeyCheap = checkPresentCheap diff --git a/Remote/Bup.hs b/Remote/Bup.hs index 6a04ad5f7f..2e68f30ef7 100644 --- a/Remote/Bup.hs +++ b/Remote/Bup.hs @@ -58,8 +58,8 @@ gen r u c gc = do , retrieveKeyFile = retreiveKeyFileDummy , retrieveKeyFileCheap = retrieveCheap buprepo , removeKey = remove buprepo - , hasKey = checkPresent r bupr' - , hasKeyCheap = bupLocal buprepo + , checkPresent = checkKey r bupr' + , checkPresentCheap = bupLocal buprepo , whereisKey = Nothing , remoteFsck = Nothing , repairRepo = Nothing @@ -163,14 +163,13 @@ remove buprepo k = do - in a bup repository. One way it to check if the git repository has - a branch matching the name (as created by bup split -n). -} -checkPresent :: Git.Repo -> Git.Repo -> Key -> Annex (Either String Bool) -checkPresent r bupr k +checkKey :: Git.Repo -> Git.Repo -> Key -> Annex Bool +checkKey r bupr k | Git.repoIsUrl bupr = do showChecking r - ok <- onBupRemote bupr boolSystem "git" params - return $ Right ok - | otherwise = liftIO $ catchMsgIO $ - boolSystem "git" $ Git.Command.gitCommandLine params bupr + onBupRemote bupr boolSystem "git" params + | otherwise = liftIO $ boolSystem "git" $ + Git.Command.gitCommandLine params bupr where params = [ Params "show-ref --quiet --verify" diff --git a/Remote/Ddar.hs b/Remote/Ddar.hs index b4c7ac1e62..1227b52755 100644 --- a/Remote/Ddar.hs +++ b/Remote/Ddar.hs @@ -54,8 +54,8 @@ gen r u c gc = do , retrieveKeyFile = retreiveKeyFileDummy , retrieveKeyFileCheap = retrieveCheap , removeKey = remove ddarrepo - , hasKey = checkPresent ddarrepo - , hasKeyCheap = ddarLocal ddarrepo + , checkPresent = checkKey ddarrepo + , checkPresentCheap = ddarLocal ddarrepo , whereisKey = Nothing , remoteFsck = Nothing , repairRepo = Nothing @@ -181,13 +181,14 @@ inDdarManifest ddarrepo k = do where k' = key2file k -checkPresent :: DdarRepo -> Key -> Annex (Either String Bool) -checkPresent ddarrepo key = do +checkKey :: DdarRepo -> Key -> Annex Bool +checkKey ddarrepo key = do directoryExists <- ddarDirectoryExists ddarrepo case directoryExists of - Left e -> return $ Left e - Right True -> inDdarManifest ddarrepo key - Right False -> return $ Right False + Left e -> error e + Right True -> either error return + =<< inDdarManifest ddarrepo key + Right False -> return False ddarLocal :: DdarRepo -> Bool ddarLocal = notElem ':' diff --git a/Remote/Directory.hs b/Remote/Directory.hs index 9b3c156959..0a2532aa5b 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -52,8 +52,8 @@ gen r u c gc = do retrieveKeyFile = retreiveKeyFileDummy, retrieveKeyFileCheap = retrieveCheap dir chunkconfig, removeKey = remove dir, - hasKey = checkPresent dir chunkconfig, - hasKeyCheap = True, + checkPresent = checkKey dir chunkconfig, + checkPresentCheap = True, whereisKey = Nothing, remoteFsck = Nothing, repairRepo = Nothing, @@ -189,13 +189,10 @@ removeDirGeneric topdir dir = do then return ok else doesDirectoryExist topdir <&&> (not <$> doesDirectoryExist dir) -checkPresent :: FilePath -> ChunkConfig -> Key -> Annex (Either String Bool) -checkPresent d (LegacyChunks _) k = Legacy.checkPresent d locations k -checkPresent d _ k = liftIO $ do - v <- catchMsgIO $ anyM doesFileExist (locations d k) - case v of - Right False -> ifM (doesDirectoryExist d) - ( return v - , return $ Left $ "directory " ++ d ++ " is not accessible" - ) - _ -> return v +checkKey :: FilePath -> ChunkConfig -> Key -> Annex Bool +checkKey d (LegacyChunks _) k = Legacy.checkKey d locations k +checkKey d _ k = liftIO $ + ifM (anyM doesFileExist (locations d k)) + ( return True + , error $ "directory " ++ d ++ " is not accessible" + ) diff --git a/Remote/Directory/LegacyChunked.hs b/Remote/Directory/LegacyChunked.hs index 1be885db21..b2248c5f66 100644 --- a/Remote/Directory/LegacyChunked.hs +++ b/Remote/Directory/LegacyChunked.hs @@ -103,8 +103,7 @@ retrieve locations d basek a = do liftIO $ nukeFile tmp sink b -checkPresent :: FilePath -> (FilePath -> Key -> [FilePath]) -> Key -> Annex (Either String Bool) -checkPresent d locations k = liftIO $ catchMsgIO $ - withStoredFiles d locations k $ - -- withStoredFiles checked that it exists - const $ return True +checkKey :: FilePath -> (FilePath -> Key -> [FilePath]) -> Key -> Annex Bool +checkKey d locations k = liftIO $ withStoredFiles d locations k $ + -- withStoredFiles checked that it exists + const $ return True diff --git a/Remote/External.hs b/Remote/External.hs index c00093402f..ffae94ec99 100644 --- a/Remote/External.hs +++ b/Remote/External.hs @@ -53,8 +53,8 @@ gen r u c gc = do retrieveKeyFile = retreiveKeyFileDummy, retrieveKeyFileCheap = \_ _ -> return False, removeKey = remove external, - hasKey = checkPresent external, - hasKeyCheap = False, + checkPresent = checkKey external, + checkPresentCheap = False, whereisKey = Nothing, remoteFsck = Nothing, repairRepo = Nothing, @@ -121,8 +121,8 @@ remove external k = safely $ return False _ -> Nothing -checkPresent :: External -> Key -> Annex (Either String Bool) -checkPresent external k = either (Left . show) id <$> tryAnnex go +checkKey :: External -> Key -> Annex Bool +checkKey external k = either error id <$> go where go = handleRequest external (CHECKPRESENT k) Nothing $ \resp -> case resp of diff --git a/Remote/GCrypt.hs b/Remote/GCrypt.hs index d969e02f8a..f971ff754f 100644 --- a/Remote/GCrypt.hs +++ b/Remote/GCrypt.hs @@ -46,7 +46,6 @@ import Utility.Tmp import Logs.Remote import Logs.Transfer import Utility.Gpg -import Utility.FileMode remote :: RemoteType remote = RemoteType { @@ -109,8 +108,8 @@ gen' r u c gc = do , retrieveKeyFile = retreiveKeyFileDummy , retrieveKeyFileCheap = \_ _ -> return False , removeKey = remove this rsyncopts - , hasKey = checkPresent this rsyncopts - , hasKeyCheap = repoCheap r + , checkPresent = checkKey this rsyncopts + , checkPresentCheap = repoCheap r , whereisKey = Nothing , remoteFsck = Nothing , repairRepo = Nothing @@ -342,16 +341,15 @@ remove r rsyncopts k removersync = Remote.Rsync.remove rsyncopts k removeshell = Ssh.dropKey (repo r) k -checkPresent :: Remote -> Remote.Rsync.RsyncOpts -> Key -> Annex (Either String Bool) -checkPresent r rsyncopts k +checkKey :: Remote -> Remote.Rsync.RsyncOpts -> Key -> Annex Bool +checkKey r rsyncopts k | not $ Git.repoIsUrl (repo r) = guardUsable (repo r) (cantCheck $ repo r) $ - liftIO $ catchDefaultIO (cantCheck $ repo r) $ - Right <$> doesFileExist (gCryptLocation r k) + liftIO $ doesFileExist (gCryptLocation r k) | Git.repoIsSsh (repo r) = shellOrRsync r checkshell checkrsync | otherwise = unsupportedUrl where - checkrsync = Remote.Rsync.checkPresent (repo r) rsyncopts k + checkrsync = Remote.Rsync.checkKey (repo r) rsyncopts k checkshell = Ssh.inAnnex (repo r) k {- Annexed objects are hashed using lower-case directories for max diff --git a/Remote/Git.hs b/Remote/Git.hs index c35f9f32af..da5ca4c4a5 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -141,8 +141,8 @@ gen r u c gc , retrieveKeyFile = copyFromRemote new , retrieveKeyFileCheap = copyFromRemoteCheap new , removeKey = dropKey new - , hasKey = inAnnex new - , hasKeyCheap = repoCheap r + , checkPresent = inAnnex new + , checkPresentCheap = repoCheap r , whereisKey = Nothing , remoteFsck = if Git.repoIsUrl r then Nothing @@ -284,11 +284,8 @@ tryGitConfigRead r void $ tryAnnex $ ensureInitialized Annex.getState Annex.repo -{- Checks if a given remote has the content for a key inAnnex. - - If the remote cannot be accessed, or if it cannot determine - - whether it has the content, returns a Left error message. - -} -inAnnex :: Remote -> Key -> Annex (Either String Bool) +{- Checks if a given remote has the content for a key in its annex. -} +inAnnex :: Remote -> Key -> Annex Bool inAnnex rmt key | Git.repoIsHttp r = checkhttp | Git.repoIsUrl r = checkremote @@ -298,17 +295,13 @@ inAnnex rmt key checkhttp = do showChecking r ifM (Url.withUrlOptions $ \uo -> anyM (\u -> Url.checkBoth u (keySize key) uo) (keyUrls rmt key)) - ( return $ Right True - , return $ Left "not found" + ( return True + , error "not found" ) checkremote = Ssh.inAnnex r key - checklocal = guardUsable r (cantCheck r) $ dispatch <$> check - where - check = either (Left . show) Right - <$> tryAnnex (onLocal rmt $ Annex.Content.inAnnexSafe key) - dispatch (Left e) = Left e - dispatch (Right (Just b)) = Right b - dispatch (Right Nothing) = cantCheck r + checklocal = guardUsable r (cantCheck r) $ + fromMaybe (cantCheck r) + <$> onLocal rmt (Annex.Content.inAnnexSafe key) keyUrls :: Remote -> Key -> [String] keyUrls r key = map tourl locs' diff --git a/Remote/Glacier.hs b/Remote/Glacier.hs index c5bfefa641..2ade37011e 100644 --- a/Remote/Glacier.hs +++ b/Remote/Glacier.hs @@ -52,8 +52,8 @@ gen r u c gc = new <$> remoteCost gc veryExpensiveRemoteCost retrieveKeyFile = retreiveKeyFileDummy, retrieveKeyFileCheap = retrieveCheap this, removeKey = remove this, - hasKey = checkPresent this, - hasKeyCheap = False, + checkPresent = checkKey this, + checkPresentCheap = False, whereisKey = Nothing, remoteFsck = Nothing, repairRepo = Nothing, @@ -164,25 +164,21 @@ remove r k = glacierAction r , Param $ archive r k ] -checkPresent :: Remote -> Key -> Annex (Either String Bool) -checkPresent r k = do +checkKey :: Remote -> Key -> Annex Bool +checkKey r k = do showAction $ "checking " ++ name r go =<< glacierEnv (config r) (uuid r) where - go Nothing = return $ Left "cannot check glacier" + go Nothing = error "cannot check glacier" go (Just e) = do {- glacier checkpresent outputs the archive name to stdout if - it's present. -} - v <- liftIO $ catchMsgIO $ - readProcessEnv "glacier" (toCommand params) (Just e) - case v of - Right s -> do - let probablypresent = key2file k `elem` lines s - if probablypresent - then ifM (Annex.getFlag "trustglacier") - ( return $ Right True, untrusted ) - else return $ Right False - Left err -> return $ Left err + s <- liftIO $ readProcessEnv "glacier" (toCommand params) (Just e) + let probablypresent = key2file k `elem` lines s + if probablypresent + then ifM (Annex.getFlag "trustglacier") + ( return True, error untrusted ) + else return False params = glacierParams (config r) [ Param "archive" @@ -192,7 +188,7 @@ checkPresent r k = do , Param $ archive r k ] - untrusted = return $ Left $ unlines + untrusted = unlines [ "Glacier's inventory says it has a copy." , "However, the inventory could be out of date, if it was recently removed." , "(Use --trust-glacier if you're sure it's still in Glacier.)" diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs index 7ad790cb16..953c533b66 100644 --- a/Remote/Helper/Chunked.hs +++ b/Remote/Helper/Chunked.hs @@ -12,7 +12,7 @@ module Remote.Helper.Chunked ( storeChunks, removeChunks, retrieveChunks, - hasKeyChunks, + checkPresentChunks, ) where import Common.Annex @@ -94,8 +94,8 @@ storeChunks -> Key -> FilePath -> MeterUpdate - -> (Key -> ContentSource -> MeterUpdate -> Annex Bool) - -> (Key -> Annex (Either String Bool)) + -> Storer + -> CheckPresent -> Annex Bool storeChunks u chunkconfig k f p storer checker = case chunkconfig of @@ -158,7 +158,7 @@ storeChunks u chunkconfig k f p storer checker = seekResume :: Handle -> ChunkKeyStream - -> (Key -> Annex (Either String Bool)) + -> CheckPresent -> Annex (ChunkKeyStream, BytesProcessed) seekResume h chunkkeys checker = do sz <- liftIO (hFileSize h) @@ -172,7 +172,7 @@ seekResume h chunkkeys checker = do liftIO $ hSeek h AbsoluteSeek sz return (cks, toBytesProcessed sz) | otherwise = do - v <- checker k + v <- tryNonAsyncAnnex (checker k) case v of Right True -> check pos' cks' sz @@ -331,43 +331,48 @@ setupResume ls currsize = map dropunneeded ls {- Checks if a key is present in a remote. This requires any one - of the lists of options returned by chunkKeys to all check out - as being present using the checker action. + - + - Throws an exception if the remote is not accessible. -} -hasKeyChunks - :: (Key -> Annex (Either String Bool)) +checkPresentChunks + :: CheckPresent -> UUID -> ChunkConfig -> EncKey -> Key - -> Annex (Either String Bool) -hasKeyChunks checker u chunkconfig encryptor basek - | noChunks chunkconfig = + -> Annex Bool +checkPresentChunks checker u chunkconfig encryptor basek + | noChunks chunkconfig = do -- Optimisation: Try the unchunked key first, to avoid -- looking in the git-annex branch for chunk counts -- that are likely not there. - ifM ((Right True ==) <$> checker (encryptor basek)) - ( return (Right True) - , checklists Nothing =<< chunkKeysOnly u basek - ) + v <- check basek + case v of + Right True -> return True + _ -> checklists Nothing =<< chunkKeysOnly u basek | otherwise = checklists Nothing =<< chunkKeys u chunkconfig basek where - checklists Nothing [] = return (Right False) - checklists (Just deferrederror) [] = return (Left deferrederror) + checklists Nothing [] = return False + checklists (Just deferrederror) [] = error deferrederror checklists d (l:ls) | not (null l) = do v <- checkchunks l case v of Left e -> checklists (Just e) ls - Right True -> return (Right True) + Right True -> return True Right False -> checklists Nothing ls | otherwise = checklists d ls checkchunks :: [Key] -> Annex (Either String Bool) checkchunks [] = return (Right True) checkchunks (k:ks) = do - v <- checker (encryptor k) - if v == Right True - then checkchunks ks - else return v + v <- check k + case v of + Right True -> checkchunks ks + Right False -> return $ Right False + Left e -> return $ Left $ show e + + check = tryNonAsyncAnnex . checker . encryptor {- A key can be stored in a remote unchunked, or as a list of chunked keys. - This can be the case whether or not the remote is currently configured diff --git a/Remote/Helper/Encryptable.hs b/Remote/Helper/Encryptable.hs index 65a3ba284d..c364a69e76 100644 --- a/Remote/Helper/Encryptable.hs +++ b/Remote/Helper/Encryptable.hs @@ -91,9 +91,9 @@ encryptableRemote c storeKeyEncrypted retrieveKeyFileEncrypted r = r , removeKey = \k -> cip k >>= maybe (removeKey r k) (\(_, enckey) -> removeKey r enckey) - , hasKey = \k -> cip k >>= maybe - (hasKey r k) - (\(_, enckey) -> hasKey r enckey) + , checkPresent = \k -> cip k >>= maybe + (checkPresent r k) + (\(_, enckey) -> checkPresent r enckey) , cost = maybe (cost r) (const $ cost r + encryptedRemoteCostAdj) diff --git a/Remote/Helper/Hooks.hs b/Remote/Helper/Hooks.hs index c3ff970c62..907400bd1c 100644 --- a/Remote/Helper/Hooks.hs +++ b/Remote/Helper/Hooks.hs @@ -39,7 +39,7 @@ addHooks' r starthook stophook = r' , retrieveKeyFile = \k f d p -> wrapper $ retrieveKeyFile r k f d p , retrieveKeyFileCheap = \k f -> wrapper $ retrieveKeyFileCheap r k f , removeKey = wrapper . removeKey r - , hasKey = wrapper . hasKey r + , checkPresent = wrapper . checkPresent r } where wrapper = runHooks r' starthook stophook diff --git a/Remote/Helper/Messages.hs b/Remote/Helper/Messages.hs index c4b1966dc8..3088a9ab2b 100644 --- a/Remote/Helper/Messages.hs +++ b/Remote/Helper/Messages.hs @@ -13,5 +13,5 @@ import qualified Git showChecking :: Git.Repo -> Annex () showChecking r = showAction $ "checking " ++ Git.repoDescribe r -cantCheck :: Git.Repo -> Either String Bool -cantCheck r = Left $ "unable to check " ++ Git.repoDescribe r +cantCheck :: Git.Repo -> a +cantCheck r = error $ "unable to check " ++ Git.repoDescribe r diff --git a/Remote/Helper/Special.hs b/Remote/Helper/Special.hs index 2bcb7d530d..3c19f25eb9 100644 --- a/Remote/Helper/Special.hs +++ b/Remote/Helper/Special.hs @@ -148,7 +148,7 @@ specialRemote' cfg c preparestorer prepareretriever baser = encr (retrieveKeyFileCheap baser k d) (\_ -> return False) , removeKey = \k -> cip >>= removeKeyGen k - , hasKey = \k -> cip >>= hasKeyGen k + , checkPresent = \k -> cip >>= checkPresentGen k , cost = maybe (cost baser) (const $ cost baser + encryptedRemoteCostAdj) @@ -167,7 +167,7 @@ specialRemote' cfg c preparestorer prepareretriever baser = encr displayprogress p k $ \p' -> storeChunks (uuid baser) chunkconfig k src p' (storechunk enc storer) - (hasKey baser) + (checkPresent baser) go Nothing = return False rollback = void $ removeKey encr k @@ -193,10 +193,10 @@ specialRemote' cfg c preparestorer prepareretriever baser = encr enck = maybe id snd enc remover = removeKey baser - hasKeyGen k enc = hasKeyChunks checker (uuid baser) chunkconfig enck k + checkPresentGen k enc = checkPresentChunks checker (uuid baser) chunkconfig enck k where enck = maybe id snd enc - checker = hasKey baser + checker = checkPresent baser chunkconfig = chunkConfig cfg diff --git a/Remote/Helper/Ssh.hs b/Remote/Helper/Ssh.hs index 05a98865ff..42d77ea592 100644 --- a/Remote/Helper/Ssh.hs +++ b/Remote/Helper/Ssh.hs @@ -81,14 +81,14 @@ onRemote r (with, errorval) command params fields = do Nothing -> return errorval {- Checks if a remote contains a key. -} -inAnnex :: Git.Repo -> Key -> Annex (Either String Bool) +inAnnex :: Git.Repo -> Key -> Annex Bool inAnnex r k = do showChecking r onRemote r (check, cantCheck r) "inannex" [Param $ key2file k] [] where check c p = dispatch <$> safeSystem c p - dispatch ExitSuccess = Right True - dispatch (ExitFailure 1) = Right False + dispatch ExitSuccess = True + dispatch (ExitFailure 1) = False dispatch _ = cantCheck r {- Removes a key from a remote. -} diff --git a/Remote/Hook.hs b/Remote/Hook.hs index efbd9f8ba4..037f71ced4 100644 --- a/Remote/Hook.hs +++ b/Remote/Hook.hs @@ -45,8 +45,8 @@ gen r u c gc = do retrieveKeyFile = retreiveKeyFileDummy, retrieveKeyFileCheap = retrieveCheap hooktype, removeKey = remove hooktype, - hasKey = checkPresent r hooktype, - hasKeyCheap = False, + checkPresent = checkKey r hooktype, + checkPresentCheap = False, whereisKey = Nothing, remoteFsck = Nothing, repairRepo = Nothing, @@ -128,11 +128,11 @@ retrieveCheap _ _ _ = return False remove :: HookName -> Key -> Annex Bool remove h k = runHook h "remove" k Nothing $ return True -checkPresent :: Git.Repo -> HookName -> Key -> Annex (Either String Bool) -checkPresent r h k = do +checkKey :: Git.Repo -> HookName -> Key -> Annex Bool +checkKey r h k = do showAction $ "checking " ++ Git.repoDescribe r v <- lookupHook h action - liftIO $ catchMsgIO $ check v + liftIO $ check v where action = "checkpresent" findkey s = key2file k `elem` lines s diff --git a/Remote/Rsync.hs b/Remote/Rsync.hs index 421c451bdd..91070fe846 100644 --- a/Remote/Rsync.hs +++ b/Remote/Rsync.hs @@ -12,7 +12,7 @@ module Remote.Rsync ( store, retrieve, remove, - checkPresent, + checkKey, withRsyncScratchDir, genRsyncOpts, RsyncOpts @@ -66,8 +66,8 @@ gen r u c gc = do , retrieveKeyFile = retreiveKeyFileDummy , retrieveKeyFileCheap = retrieveCheap o , removeKey = remove o - , hasKey = checkPresent r o - , hasKeyCheap = False + , checkPresent = checkKey r o + , checkPresentCheap = False , whereisKey = Nothing , remoteFsck = Nothing , repairRepo = Nothing @@ -214,14 +214,12 @@ remove o k = do , dir keyFile k "***" ] -checkPresent :: Git.Repo -> RsyncOpts -> Key -> Annex (Either String Bool) -checkPresent r o k = do +checkKey :: Git.Repo -> RsyncOpts -> Key -> Annex Bool +checkKey r o k = do showAction $ "checking " ++ Git.repoDescribe r -- note: Does not currently differentiate between rsync failing -- to connect, and the file not being present. - Right <$> check - where - check = untilTrue (rsyncUrls o k) $ \u -> + untilTrue (rsyncUrls o k) $ \u -> liftIO $ catchBoolIO $ do withQuietOutput createProcessSuccess $ proc "rsync" $ toCommand $ diff --git a/Remote/S3.hs b/Remote/S3.hs index 8603757eb6..4c1f1ecfda 100644 --- a/Remote/S3.hs +++ b/Remote/S3.hs @@ -57,8 +57,8 @@ gen r u c gc = new <$> remoteCost gc expensiveRemoteCost retrieveKeyFile = retreiveKeyFileDummy, retrieveKeyFileCheap = retrieveCheap this, removeKey = remove this c, - hasKey = checkPresent this, - hasKeyCheap = False, + checkPresent = checkKey this, + checkPresentCheap = False, whereisKey = Nothing, remoteFsck = Nothing, repairRepo = Nothing, @@ -167,16 +167,16 @@ remove' :: Remote -> Key -> Annex Bool remove' r k = s3Action r False $ \(conn, bucket) -> s3Bool =<< liftIO (deleteObject conn $ bucketKey r bucket k) -checkPresent :: Remote -> Key -> Annex (Either String Bool) -checkPresent r k = s3Action r noconn $ \(conn, bucket) -> do +checkKey :: Remote -> Key -> Annex Bool +checkKey r k = s3Action r noconn $ \(conn, bucket) -> do showAction $ "checking " ++ name r res <- liftIO $ getObjectInfo conn $ bucketKey r bucket k case res of - Right _ -> return $ Right True - Left (AWSError _ _) -> return $ Right False - Left e -> return $ Left (s3Error e) + Right _ -> return True + Left (AWSError _ _) -> return False + Left e -> s3Error e where - noconn = Left $ error "S3 not configured" + noconn = error "S3 not configured" s3Warning :: ReqError -> Annex Bool s3Warning e = do diff --git a/Remote/Tahoe.hs b/Remote/Tahoe.hs index d265d7ac12..6e52c09810 100644 --- a/Remote/Tahoe.hs +++ b/Remote/Tahoe.hs @@ -72,8 +72,8 @@ gen r u c gc = do retrieveKeyFile = retrieve u hdl, retrieveKeyFileCheap = \_ _ -> return False, removeKey = remove, - hasKey = checkPresent u hdl, - hasKeyCheap = False, + checkPresent = checkKey u hdl, + checkPresentCheap = False, whereisKey = Nothing, remoteFsck = Nothing, repairRepo = Nothing, @@ -123,14 +123,16 @@ remove _k = do warning "content cannot be removed from tahoe remote" return False -checkPresent :: UUID -> TahoeHandle -> Key -> Annex (Either String Bool) -checkPresent u hdl k = go =<< getCapability u k +checkKey :: UUID -> TahoeHandle -> Key -> Annex Bool +checkKey u hdl k = go =<< getCapability u k where - go Nothing = return (Right False) - go (Just cap) = liftIO $ parseCheck <$> readTahoe hdl "check" - [ Param "--raw" - , Param cap - ] + go Nothing = return False + go (Just cap) = liftIO $ do + v <- parseCheck <$> readTahoe hdl "check" + [ Param "--raw" + , Param cap + ] + either error return v defaultTahoeConfigDir :: UUID -> IO TahoeConfigDir defaultTahoeConfigDir u = do diff --git a/Remote/Web.hs b/Remote/Web.hs index ddd1fc1ccd..7bdd8d1854 100644 --- a/Remote/Web.hs +++ b/Remote/Web.hs @@ -50,8 +50,8 @@ gen r _ c gc = retrieveKeyFile = downloadKey, retrieveKeyFileCheap = downloadKeyCheap, removeKey = dropKey, - hasKey = checkKey, - hasKeyCheap = False, + checkPresent = checkKey, + checkPresentCheap = False, whereisKey = Just getUrls, remoteFsck = Nothing, repairRepo = Nothing, @@ -98,12 +98,12 @@ dropKey k = do mapM_ (setUrlMissing k) =<< getUrls k return True -checkKey :: Key -> Annex (Either String Bool) +checkKey :: Key -> Annex Bool checkKey key = do us <- getUrls key if null us - then return $ Right False - else return =<< checkKey' key us + then return False + else either error return =<< checkKey' key us checkKey' :: Key -> [URLString] -> Annex (Either String Bool) checkKey' key us = firsthit us (Right False) $ \u -> do let (u', downloader) = getDownloader u diff --git a/Remote/WebDAV.hs b/Remote/WebDAV.hs index 0bdd383602..f0bcac10ea 100644 --- a/Remote/WebDAV.hs +++ b/Remote/WebDAV.hs @@ -63,8 +63,8 @@ gen r u c gc = new <$> remoteCost gc expensiveRemoteCost retrieveKeyFile = retrieve this, retrieveKeyFileCheap = retrieveCheap this, removeKey = remove this, - hasKey = checkPresent this, - hasKeyCheap = False, + checkPresent = checkKey this, + checkPresentCheap = False, whereisKey = Nothing, remoteFsck = Nothing, repairRepo = Nothing, @@ -170,10 +170,10 @@ remove r k = davAction r False $ \(baseurl, user, pass) -> liftIO $ do let url = davLocation baseurl k isJust . eitherToMaybe <$> tryNonAsync (deleteDAV url user pass) -checkPresent :: Remote -> Key -> Annex (Either String Bool) -checkPresent r k = davAction r noconn go +checkKey :: Remote -> Key -> Annex Bool +checkKey r k = davAction r noconn (either error id <$$> go) where - noconn = Left $ error $ name r ++ " not configured" + noconn = error $ name r ++ " not configured" go (baseurl, user, pass) = do showAction $ "checking " ++ name r diff --git a/Types/Remote.hs b/Types/Remote.hs index 805b984740..b657cfcdc5 100644 --- a/Types/Remote.hs +++ b/Types/Remote.hs @@ -68,12 +68,12 @@ data RemoteA a = Remote { retrieveKeyFileCheap :: Key -> FilePath -> a Bool, -- removes a key's contents (succeeds if the contents are not present) removeKey :: Key -> a Bool, - -- Checks if a key is present in the remote; if the remote - -- cannot be accessed returns a Left error message. - hasKey :: Key -> a (Either String Bool), - -- Some remotes can check hasKey without an expensive network + -- Checks if a key is present in the remote. + -- Throws an exception if the remote cannot be accessed. + checkPresent :: Key -> a Bool, + -- Some remotes can checkPresent without an expensive network -- operation. - hasKeyCheap :: Bool, + checkPresentCheap :: Bool, -- Some remotes can provide additional details for whereis. whereisKey :: Maybe (Key -> a [String]), -- Some remotes can run a fsck operation on the remote, diff --git a/Types/StoreRetrieve.hs b/Types/StoreRetrieve.hs index 9fc0634c4e..a21fa7866c 100644 --- a/Types/StoreRetrieve.hs +++ b/Types/StoreRetrieve.hs @@ -33,3 +33,11 @@ type Storer = Key -> ContentSource -> MeterUpdate -> Annex Bool -- callback, which will fully consume the content before returning. -- Throws exception if key is not present, or remote is not accessible. type Retriever = Key -> MeterUpdate -> (ContentSource -> Annex Bool) -> Annex Bool + +-- Action that removes a Key's content from a remote. +-- Succeeds if key is already not present; never throws exceptions. +type Remover = Key -> Annex Bool + +-- Checks if a Key's content is present on a remote. +-- Throws an exception if the remote is not accessible. +type CheckPresent = Key -> Annex Bool diff --git a/doc/design/assistant/chunks.mdwn b/doc/design/assistant/chunks.mdwn index a9709a778d..0aa389899a 100644 --- a/doc/design/assistant/chunks.mdwn +++ b/doc/design/assistant/chunks.mdwn @@ -91,7 +91,7 @@ cannot tell when we've gotten the last chunk. (Also, we cannot strip padding.) Note that `addurl` sometimes generates keys w/o size info (particularly, it does so by design when using quvi). -Problem: Also, this makes `hasKey` hard to implement: How can it know if +Problem: Also, this makes `checkPresent` hard to implement: How can it know if all the chunks are present, if the key size is not known? Problem: Also, this makes it difficult to download encrypted keys, because @@ -111,7 +111,7 @@ So, SHA256-1048576-c1--xxxxxxx for the first chunk of 1 megabyte. Before any chunks are stored, write a chunkcount file, eg SHA256-s12345-c0--xxxxxxx. Note that this key is the same as the original object's key, except with chunk number set to 0. This file contains both -the number of chunks, and also the chunk size used. `hasKey` downloads this +the number of chunks, and also the chunk size used. `checkPresent` downloads this file, and then verifies that each chunk is present, looking for keys with the expected chunk numbers and chunk size. @@ -126,7 +126,7 @@ Note: This design lets an attacker with logs tell the (appoximate) size of objects, by finding the small files that contain a chunk count, and correlating when that is written/read and when other files are written/read. That could be solved by padding the chunkcount key up to the -size of the rest of the keys, but that's very innefficient; `hasKey` is not +size of the rest of the keys, but that's very innefficient; `checkPresent` is not designed to need to download large files. # design 3 @@ -139,7 +139,7 @@ This seems difficult; attacker could probably tell where the first encrypted part stops and the next encrypted part starts by looking for gpg headers, and so tell which files are the first chunks. -Also, `hasKey` would need to download some or all of the first file. +Also, `checkPresent` would need to download some or all of the first file. If all, that's a lot more expensive. If only some is downloaded, an attacker can guess that the file that was partially downloaded is the first chunk in a series, and wait for a time when it's fully downloaded to @@ -163,7 +163,7 @@ The location log does not record locations of individual chunk keys (too space-inneficient). Instead, look at a chunk log in the git-annex branch to get the chunk count and size for a key. -`hasKey` would check if any of the logged sets of chunks is +`checkPresent` would check if any of the logged sets of chunks is present on the remote. It would also check if the non-chunked key is present, as a fallback. @@ -225,7 +225,7 @@ Reasons: Note that this means that the chunks won't exactly match the configured chunk size. gpg does compression, which might make them a -lot smaller. Or gpg overhead could make them slightly larger. So `hasKey` +lot smaller. Or gpg overhead could make them slightly larger. So `checkPresent` cannot check exact file sizes. If padding is enabled, gpg compression should be disabled, to not leak @@ -250,10 +250,10 @@ and skip forward to the next needed chunk. Easy. Uploads: Check if the 1st chunk is present. If so, check the second chunk, etc. Once the first missing chunk is found, start uploading from there. -That adds one extra hasKey call per upload. Probably a win in most cases. +That adds one extra checkPresent call per upload. Probably a win in most cases. Can be improved by making special remotes open a persistent connection that is used for transferring all chunks, as well as for -checking hasKey. +checking checkPresent. Note that this is safe to do only as long as the Key being transferred cannot possibly have 2 different contents in different repos. Notably not From 8025decc7f08a175cd36cd8f1cc9c5c1ef3cdff1 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 6 Aug 2014 14:28:36 -0400 Subject: [PATCH 40/56] run Preparer to get Remover and CheckPresent actions This will allow special remotes to eg, open a http connection and reuse it, while checking if chunks are present, or removing chunks. S3 and WebDAV both need this to support chunks with reasonable speed. Note that a special remote might want to cache a http connection across multiple requests. A simple case of this is that CheckPresent is typically called before Store or Remove. A remote using this interface can certianly use a Preparer that eg, uses a MVar to cache a http connection. However, it's up to the remote to then deal with things like stale or stalled http connections when eg, doing a series of downloads from a remote and other places. There could be long delays between calls to a remote, which could lead to eg, http connection stalls; the machine might even move to a new network, etc. It might be nice to improve this interface later to allow the simple case without needing to handle the full complex case. One way to do it would be to have a `Transaction SpecialRemote cache`, where SpecialRemote contains methods for Storer, Retriever, Remover, and CheckPresent, that all expect to be passed a `cache`. --- Remote/Bup.hs | 10 ++++++---- Remote/Ddar.hs | 10 ++++++---- Remote/Directory.hs | 10 ++++++---- Remote/External.hs | 10 ++++++---- Remote/GCrypt.hs | 10 ++++++---- Remote/Glacier.hs | 10 ++++++---- Remote/Helper/Messages.hs | 14 ++++++++++++-- Remote/Helper/Special.hs | 40 ++++++++++++++++++++++++++++----------- Remote/Hook.hs | 10 ++++++---- Remote/Rsync.hs | 10 ++++++---- Remote/S3.hs | 16 +++++++++------- 11 files changed, 98 insertions(+), 52 deletions(-) diff --git a/Remote/Bup.hs b/Remote/Bup.hs index 2e68f30ef7..80fffc056c 100644 --- a/Remote/Bup.hs +++ b/Remote/Bup.hs @@ -57,8 +57,8 @@ gen r u c gc = do , storeKey = storeKeyDummy , retrieveKeyFile = retreiveKeyFileDummy , retrieveKeyFileCheap = retrieveCheap buprepo - , removeKey = remove buprepo - , checkPresent = checkKey r bupr' + , removeKey = removeKeyDummy + , checkPresent = checkPresentDummy , checkPresentCheap = bupLocal buprepo , whereisKey = Nothing , remoteFsck = Nothing @@ -76,6 +76,8 @@ gen r u c gc = do return $ Just $ specialRemote' specialcfg c (simplyPrepare $ store this buprepo) (simplyPrepare $ retrieve buprepo) + (simplyPrepare $ remove buprepo) + (simplyPrepare $ checkKey r bupr') this where buprepo = fromMaybe (error "missing buprepo") $ remoteAnnexBupRepo gc @@ -146,7 +148,7 @@ retrieveCheap _ _ _ = return False - - We can, however, remove the git branch that bup created for the key. -} -remove :: BupRepo -> Key -> Annex Bool +remove :: BupRepo -> Remover remove buprepo k = do go =<< liftIO (bup2GitRemote buprepo) warning "content cannot be completely removed from bup remote" @@ -163,7 +165,7 @@ remove buprepo k = do - in a bup repository. One way it to check if the git repository has - a branch matching the name (as created by bup split -n). -} -checkKey :: Git.Repo -> Git.Repo -> Key -> Annex Bool +checkKey :: Git.Repo -> Git.Repo -> CheckPresent checkKey r bupr k | Git.repoIsUrl bupr = do showChecking r diff --git a/Remote/Ddar.hs b/Remote/Ddar.hs index 1227b52755..fba05312be 100644 --- a/Remote/Ddar.hs +++ b/Remote/Ddar.hs @@ -44,6 +44,8 @@ gen r u c gc = do return $ Just $ specialRemote' specialcfg c (simplyPrepare $ store ddarrepo) (simplyPrepare $ retrieve ddarrepo) + (simplyPrepare $ remove ddarrepo) + (simplyPrepare $ checkKey ddarrepo) (this cst) where this cst = Remote @@ -53,8 +55,8 @@ gen r u c gc = do , storeKey = storeKeyDummy , retrieveKeyFile = retreiveKeyFileDummy , retrieveKeyFileCheap = retrieveCheap - , removeKey = remove ddarrepo - , checkPresent = checkKey ddarrepo + , removeKey = removeKeyDummy + , checkPresent = checkPresentDummy , checkPresentCheap = ddarLocal ddarrepo , whereisKey = Nothing , remoteFsck = Nothing @@ -140,7 +142,7 @@ retrieve ddarrepo = byteRetriever $ \k sink -> do retrieveCheap :: Key -> FilePath -> Annex Bool retrieveCheap _ _ = return False -remove :: DdarRepo -> Key -> Annex Bool +remove :: DdarRepo -> Remover remove ddarrepo key = do (cmd, params) <- ddarRemoteCall ddarrepo 'd' [Param $ key2file key] liftIO $ boolSystem cmd params @@ -181,7 +183,7 @@ inDdarManifest ddarrepo k = do where k' = key2file k -checkKey :: DdarRepo -> Key -> Annex Bool +checkKey :: DdarRepo -> CheckPresent checkKey ddarrepo key = do directoryExists <- ddarDirectoryExists ddarrepo case directoryExists of diff --git a/Remote/Directory.hs b/Remote/Directory.hs index 0a2532aa5b..d9419757f0 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -44,6 +44,8 @@ gen r u c gc = do return $ Just $ specialRemote c (prepareStore dir chunkconfig) (retrieve dir chunkconfig) + (simplyPrepare $ remove dir) + (simplyPrepare $ checkKey dir chunkconfig) Remote { uuid = u, cost = cst, @@ -51,8 +53,8 @@ gen r u c gc = do storeKey = storeKeyDummy, retrieveKeyFile = retreiveKeyFileDummy, retrieveKeyFileCheap = retrieveCheap dir chunkconfig, - removeKey = remove dir, - checkPresent = checkKey dir chunkconfig, + removeKey = removeKeyDummy, + checkPresent = checkPresentDummy, checkPresentCheap = True, whereisKey = Nothing, remoteFsck = Nothing, @@ -161,7 +163,7 @@ retrieveCheap d NoChunks k f = liftIO $ catchBoolIO $ do retrieveCheap _ _ _ _ = return False #endif -remove :: FilePath -> Key -> Annex Bool +remove :: FilePath -> Remover remove d k = liftIO $ removeDirGeneric d (storeDir d k) {- Removes the directory, which must be located under the topdir. @@ -189,7 +191,7 @@ removeDirGeneric topdir dir = do then return ok else doesDirectoryExist topdir <&&> (not <$> doesDirectoryExist dir) -checkKey :: FilePath -> ChunkConfig -> Key -> Annex Bool +checkKey :: FilePath -> ChunkConfig -> CheckPresent checkKey d (LegacyChunks _) k = Legacy.checkKey d locations k checkKey d _ k = liftIO $ ifM (anyM doesFileExist (locations d k)) diff --git a/Remote/External.hs b/Remote/External.hs index ffae94ec99..f326f26ba5 100644 --- a/Remote/External.hs +++ b/Remote/External.hs @@ -45,6 +45,8 @@ gen r u c gc = do return $ Just $ specialRemote c (simplyPrepare $ store external) (simplyPrepare $ retrieve external) + (simplyPrepare $ remove external) + (simplyPrepare $ checkKey external) Remote { uuid = u, cost = cst, @@ -52,8 +54,8 @@ gen r u c gc = do storeKey = storeKeyDummy, retrieveKeyFile = retreiveKeyFileDummy, retrieveKeyFileCheap = \_ _ -> return False, - removeKey = remove external, - checkPresent = checkKey external, + removeKey = removeKeyDummy, + checkPresent = checkPresentDummy, checkPresentCheap = False, whereisKey = Nothing, remoteFsck = Nothing, @@ -109,7 +111,7 @@ retrieve external = fileRetriever $ \d k p -> error errmsg _ -> Nothing -remove :: External -> Key -> Annex Bool +remove :: External -> Remover remove external k = safely $ handleRequest external (REMOVE k) Nothing $ \resp -> case resp of @@ -121,7 +123,7 @@ remove external k = safely $ return False _ -> Nothing -checkKey :: External -> Key -> Annex Bool +checkKey :: External -> CheckPresent checkKey external k = either error id <$> go where go = handleRequest external (CHECKPRESENT k) Nothing $ \resp -> diff --git a/Remote/GCrypt.hs b/Remote/GCrypt.hs index f971ff754f..55a7758112 100644 --- a/Remote/GCrypt.hs +++ b/Remote/GCrypt.hs @@ -107,8 +107,8 @@ gen' r u c gc = do , storeKey = storeKeyDummy , retrieveKeyFile = retreiveKeyFileDummy , retrieveKeyFileCheap = \_ _ -> return False - , removeKey = remove this rsyncopts - , checkPresent = checkKey this rsyncopts + , removeKey = removeKeyDummy + , checkPresent = checkPresentDummy , checkPresentCheap = repoCheap r , whereisKey = Nothing , remoteFsck = Nothing @@ -124,6 +124,8 @@ gen' r u c gc = do return $ Just $ specialRemote' specialcfg c (simplyPrepare $ store this rsyncopts) (simplyPrepare $ retrieve this rsyncopts) + (simplyPrepare $ remove this rsyncopts) + (simplyPrepare $ checkKey this rsyncopts) this where specialcfg @@ -331,7 +333,7 @@ retrieve r rsyncopts | otherwise = unsupportedUrl where -remove :: Remote -> Remote.Rsync.RsyncOpts -> Key -> Annex Bool +remove :: Remote -> Remote.Rsync.RsyncOpts -> Remover remove r rsyncopts k | not $ Git.repoIsUrl (repo r) = guardUsable (repo r) False $ liftIO $ Remote.Directory.removeDirGeneric (Git.repoLocation (repo r)) (parentDir (gCryptLocation r k)) @@ -341,7 +343,7 @@ remove r rsyncopts k removersync = Remote.Rsync.remove rsyncopts k removeshell = Ssh.dropKey (repo r) k -checkKey :: Remote -> Remote.Rsync.RsyncOpts -> Key -> Annex Bool +checkKey :: Remote -> Remote.Rsync.RsyncOpts -> CheckPresent checkKey r rsyncopts k | not $ Git.repoIsUrl (repo r) = guardUsable (repo r) (cantCheck $ repo r) $ diff --git a/Remote/Glacier.hs b/Remote/Glacier.hs index 2ade37011e..dd28def63e 100644 --- a/Remote/Glacier.hs +++ b/Remote/Glacier.hs @@ -42,6 +42,8 @@ gen r u c gc = new <$> remoteCost gc veryExpensiveRemoteCost new cst = Just $ specialRemote' specialcfg c (prepareStore this) (prepareRetrieve this) + (simplyPrepare $ remove this) + (simplyPrepare $ checkKey this) this where this = Remote { @@ -51,8 +53,8 @@ gen r u c gc = new <$> remoteCost gc veryExpensiveRemoteCost storeKey = storeKeyDummy, retrieveKeyFile = retreiveKeyFileDummy, retrieveKeyFileCheap = retrieveCheap this, - removeKey = remove this, - checkPresent = checkKey this, + removeKey = removeKeyDummy, + checkPresent = checkPresentDummy, checkPresentCheap = False, whereisKey = Nothing, remoteFsck = Nothing, @@ -155,7 +157,7 @@ retrieve r k sink = go =<< glacierEnv c u retrieveCheap :: Remote -> Key -> FilePath -> Annex Bool retrieveCheap _ _ _ = return False -remove :: Remote -> Key -> Annex Bool +remove :: Remote -> Remover remove r k = glacierAction r [ Param "archive" @@ -164,7 +166,7 @@ remove r k = glacierAction r , Param $ archive r k ] -checkKey :: Remote -> Key -> Annex Bool +checkKey :: Remote -> CheckPresent checkKey r k = do showAction $ "checking " ++ name r go =<< glacierEnv (config r) (uuid r) diff --git a/Remote/Helper/Messages.hs b/Remote/Helper/Messages.hs index 3088a9ab2b..774716ca1a 100644 --- a/Remote/Helper/Messages.hs +++ b/Remote/Helper/Messages.hs @@ -9,9 +9,19 @@ module Remote.Helper.Messages where import Common.Annex import qualified Git +import qualified Types.Remote as Remote showChecking :: Git.Repo -> Annex () showChecking r = showAction $ "checking " ++ Git.repoDescribe r -cantCheck :: Git.Repo -> a -cantCheck r = error $ "unable to check " ++ Git.repoDescribe r +class Checkable a where + descCheckable :: a -> String + +instance Checkable Git.Repo where + descCheckable = Git.repoDescribe + +instance Checkable (Remote.RemoteA a) where + descCheckable = Remote.name + +cantCheck :: Checkable a => a -> e +cantCheck v = error $ "unable to check " ++ descCheckable v diff --git a/Remote/Helper/Special.hs b/Remote/Helper/Special.hs index 3c19f25eb9..f8428aff7c 100644 --- a/Remote/Helper/Special.hs +++ b/Remote/Helper/Special.hs @@ -11,6 +11,8 @@ module Remote.Helper.Special ( Preparer, Storer, Retriever, + Remover, + CheckPresent, simplyPrepare, ContentSource, checkPrepare, @@ -21,6 +23,8 @@ module Remote.Helper.Special ( byteRetriever, storeKeyDummy, retreiveKeyFileDummy, + removeKeyDummy, + checkPresentDummy, SpecialRemoteCfg(..), specialRemoteCfg, specialRemote, @@ -36,6 +40,7 @@ import Config.Cost import Utility.Metered import Remote.Helper.Chunked as X import Remote.Helper.Encryptable as X hiding (encryptableRemote) +import Remote.Helper.Messages import Annex.Content import Annex.Exception import qualified Git @@ -114,16 +119,27 @@ byteRetriever :: (Key -> (L.ByteString -> Annex Bool) -> Annex Bool) -> Retrieve byteRetriever a k _m callback = a k (callback . ByteContent) {- The base Remote that is provided to specialRemote needs to have - - storeKey and retreiveKeyFile methods, but they are never - - actually used (since specialRemote replaces them). + - storeKey, retreiveKeyFile, removeKey, and checkPresent methods, + - but they are never actually used (since specialRemote replaces them). - Here are some dummy ones. -} storeKeyDummy :: Key -> AssociatedFile -> MeterUpdate -> Annex Bool storeKeyDummy _ _ _ = return False retreiveKeyFileDummy :: Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool retreiveKeyFileDummy _ _ _ _ = return False +removeKeyDummy :: Key -> Annex Bool +removeKeyDummy _ = return False +checkPresentDummy :: Key -> Annex Bool +checkPresentDummy _ = error "missing checkPresent implementation" -type RemoteModifier = RemoteConfig -> Preparer Storer -> Preparer Retriever -> Remote -> Remote +type RemoteModifier + = RemoteConfig + -> Preparer Storer + -> Preparer Retriever + -> Preparer Remover + -> Preparer CheckPresent + -> Remote + -> Remote data SpecialRemoteCfg = SpecialRemoteCfg { chunkConfig :: ChunkConfig @@ -139,13 +155,14 @@ specialRemote :: RemoteModifier specialRemote c = specialRemote' (specialRemoteCfg c) c specialRemote' :: SpecialRemoteCfg -> RemoteModifier -specialRemote' cfg c preparestorer prepareretriever baser = encr +specialRemote' cfg c preparestorer prepareretriever prepareremover preparecheckpresent baser = encr where encr = baser { storeKey = \k _f p -> cip >>= storeKeyGen k p , retrieveKeyFile = \k _f d p -> cip >>= retrieveKeyFileGen k d p , retrieveKeyFileCheap = \k d -> cip >>= maybe (retrieveKeyFileCheap baser k d) + -- retrieval of encrypted keys is never cheap (\_ -> return False) , removeKey = \k -> cip >>= removeKeyGen k , checkPresent = \k -> cip >>= checkPresentGen k @@ -160,8 +177,7 @@ specialRemote' cfg c preparestorer prepareretriever baser = encr safely a = catchNonAsyncAnnex a (\e -> warning (show e) >> return False) -- chunk, then encrypt, then feed to the storer - storeKeyGen k p enc = - safely $ preparestorer k $ safely . go + storeKeyGen k p enc = safely $ preparestorer k $ safely . go where go (Just storer) = sendAnnex k rollback $ \src -> displayprogress p k $ \p' -> @@ -178,7 +194,7 @@ specialRemote' cfg c preparestorer prepareretriever baser = encr readBytes $ \encb -> storer (enck k) (ByteContent encb) p - -- call retriever to get chunks; decrypt them; stream to dest file + -- call retrieve-r to get chunks; decrypt them; stream to dest file retrieveKeyFileGen k dest p enc = safely $ prepareretriever k $ safely . go where @@ -188,15 +204,17 @@ specialRemote' cfg c preparestorer prepareretriever baser = encr go Nothing = return False enck = maybe id snd enc - removeKeyGen k enc = removeChunks remover (uuid baser) chunkconfig enck k + removeKeyGen k enc = safely $ prepareremover k $ safely . go where + go (Just remover) = removeChunks remover (uuid baser) chunkconfig enck k + go Nothing = return False enck = maybe id snd enc - remover = removeKey baser - checkPresentGen k enc = checkPresentChunks checker (uuid baser) chunkconfig enck k + checkPresentGen k enc = preparecheckpresent k go where + go (Just checker) = checkPresentChunks checker (uuid baser) chunkconfig enck k + go Nothing = cantCheck baser enck = maybe id snd enc - checker = checkPresent baser chunkconfig = chunkConfig cfg diff --git a/Remote/Hook.hs b/Remote/Hook.hs index 037f71ced4..a2d096ecd4 100644 --- a/Remote/Hook.hs +++ b/Remote/Hook.hs @@ -37,6 +37,8 @@ gen r u c gc = do return $ Just $ specialRemote c (simplyPrepare $ store hooktype) (simplyPrepare $ retrieve hooktype) + (simplyPrepare $ remove hooktype) + (simplyPrepare $ checkKey r hooktype) Remote { uuid = u, cost = cst, @@ -44,8 +46,8 @@ gen r u c gc = do storeKey = storeKeyDummy, retrieveKeyFile = retreiveKeyFileDummy, retrieveKeyFileCheap = retrieveCheap hooktype, - removeKey = remove hooktype, - checkPresent = checkKey r hooktype, + removeKey = removeKeyDummy, + checkPresent = checkPresentDummy, checkPresentCheap = False, whereisKey = Nothing, remoteFsck = Nothing, @@ -125,10 +127,10 @@ retrieve h = fileRetriever $ \d k _p -> retrieveCheap :: HookName -> Key -> FilePath -> Annex Bool retrieveCheap _ _ _ = return False -remove :: HookName -> Key -> Annex Bool +remove :: HookName -> Remover remove h k = runHook h "remove" k Nothing $ return True -checkKey :: Git.Repo -> HookName -> Key -> Annex Bool +checkKey :: Git.Repo -> HookName -> CheckPresent checkKey r h k = do showAction $ "checking " ++ Git.repoDescribe r v <- lookupHook h action diff --git a/Remote/Rsync.hs b/Remote/Rsync.hs index 91070fe846..afd13abf03 100644 --- a/Remote/Rsync.hs +++ b/Remote/Rsync.hs @@ -58,6 +58,8 @@ gen r u c gc = do return $ Just $ specialRemote' specialcfg c (simplyPrepare $ fileStorer $ store o) (simplyPrepare $ fileRetriever $ retrieve o) + (simplyPrepare $ remove o) + (simplyPrepare $ checkKey r o) Remote { uuid = u , cost = cst @@ -65,8 +67,8 @@ gen r u c gc = do , storeKey = storeKeyDummy , retrieveKeyFile = retreiveKeyFileDummy , retrieveKeyFileCheap = retrieveCheap o - , removeKey = remove o - , checkPresent = checkKey r o + , removeKey = removeKeyDummy + , checkPresent = checkPresentDummy , checkPresentCheap = False , whereisKey = Nothing , remoteFsck = Nothing @@ -186,7 +188,7 @@ retrieve o f k p = retrieveCheap :: RsyncOpts -> Key -> FilePath -> Annex Bool retrieveCheap o k f = ifM (preseedTmp k f) ( rsyncRetrieve o k f Nothing , return False ) -remove :: RsyncOpts -> Key -> Annex Bool +remove :: RsyncOpts -> Remover remove o k = do ps <- sendParams withRsyncScratchDir $ \tmp -> liftIO $ do @@ -214,7 +216,7 @@ remove o k = do , dir keyFile k "***" ] -checkKey :: Git.Repo -> RsyncOpts -> Key -> Annex Bool +checkKey :: Git.Repo -> RsyncOpts -> CheckPresent checkKey r o k = do showAction $ "checking " ++ Git.repoDescribe r -- note: Does not currently differentiate between rsync failing diff --git a/Remote/S3.hs b/Remote/S3.hs index 4c1f1ecfda..1aba392453 100644 --- a/Remote/S3.hs +++ b/Remote/S3.hs @@ -47,6 +47,8 @@ gen r u c gc = new <$> remoteCost gc expensiveRemoteCost new cst = Just $ specialRemote c (prepareStore this) (prepareRetrieve this) + (simplyPrepare $ remove this c) + (simplyPrepare $ checkKey this) this where this = Remote { @@ -55,9 +57,9 @@ gen r u c gc = new <$> remoteCost gc expensiveRemoteCost name = Git.repoDescribe r, storeKey = storeKeyDummy, retrieveKeyFile = retreiveKeyFileDummy, - retrieveKeyFileCheap = retrieveCheap this, - removeKey = remove this c, - checkPresent = checkKey this, + retrieveKeyFileCheap = retrieveCheap, + removeKey = removeKeyDummy, + checkPresent = checkPresentDummy, checkPresentCheap = False, whereisKey = Nothing, remoteFsck = Nothing, @@ -150,13 +152,13 @@ prepareRetrieve r = resourcePrepare (const $ s3Action r False) $ \(conn, bucket) liftIO (getObject conn $ bucketKey r bucket k) >>= either s3Error (sink . obj_data) -retrieveCheap :: Remote -> Key -> FilePath -> Annex Bool -retrieveCheap _ _ _ = return False +retrieveCheap :: Key -> FilePath -> Annex Bool +retrieveCheap _ _ = return False {- Internet Archive doesn't easily allow removing content. - While it may remove the file, there are generally other files - derived from it that it does not remove. -} -remove :: Remote -> RemoteConfig -> Key -> Annex Bool +remove :: Remote -> RemoteConfig -> Remover remove r c k | isIA c = do warning "Cannot remove content from the Internet Archive" @@ -167,7 +169,7 @@ remove' :: Remote -> Key -> Annex Bool remove' r k = s3Action r False $ \(conn, bucket) -> s3Bool =<< liftIO (deleteObject conn $ bucketKey r bucket k) -checkKey :: Remote -> Key -> Annex Bool +checkKey :: Remote -> CheckPresent checkKey r k = s3Action r noconn $ \(conn, bucket) -> do showAction $ "checking " ++ name r res <- liftIO $ getObjectInfo conn $ bucketKey r bucket k From aacb0b282357b41e1f055d2435b433a08b2eec65 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 6 Aug 2014 16:55:32 -0400 Subject: [PATCH 41/56] convert WebDAV to new special remote interface, adding new-style chunking support Reusing http connection when operating on chunks is not done yet, I had to submit some patches to DAV to support that. However, this is no slower than old-style chunking was. Note that it's a fileRetriever and a fileStorer, despite DAV using bytestrings that would allow streaming. As a result, upload/download of encrypted files is made a bit more expensive, since it spools them to temp files. This was needed to get the progress meters to work. There are probably ways to avoid that.. But it turns out that the current DAV interface buffers the whole file content in memory, and I have sent in a patch to DAV to improve its interfaces. Using the new interfaces, it's certainly going to need to be a fileStorer, in order to read the file size from the file (getting the size of a bytestring would destroy laziness). It should be possible to use the new interface to make it be a byteRetriever, so I'll change that when I get to it. This commit was sponsored by Andreas Olsson. --- Remote/Helper/Encryptable.hs | 38 ---------- Remote/Helper/Special.hs | 4 +- Remote/WebDAV.hs | 122 ++++++++++++-------------------- debian/changelog | 2 +- doc/special_remotes/webdav.mdwn | 2 +- 5 files changed, 50 insertions(+), 118 deletions(-) diff --git a/Remote/Helper/Encryptable.hs b/Remote/Helper/Encryptable.hs index c364a69e76..dd032ce337 100644 --- a/Remote/Helper/Encryptable.hs +++ b/Remote/Helper/Encryptable.hs @@ -14,9 +14,7 @@ import Types.Remote import Crypto import Types.Crypto import qualified Annex -import Config.Cost import Utility.Base64 -import Utility.Metered {- Encryption setup for a remote. The user must specify whether to use - an encryption key, or not encrypt. An encrypted cipher is created, or is @@ -70,42 +68,6 @@ encryptionSetup c = maybe genCipher updateCipher $ extractCipher c -- remotes (while being backward-compatible). [ "keyid", "keyid+", "keyid-", "highRandomQuality" ] -{- Modifies a Remote to support encryption. -} --- TODO: deprecated -encryptableRemote - :: RemoteConfig - -> ((Cipher, Key) -> Key -> MeterUpdate -> Annex Bool) - -> ((Cipher, Key) -> Key -> FilePath -> MeterUpdate -> Annex Bool) - -> Remote - -> Remote -encryptableRemote c storeKeyEncrypted retrieveKeyFileEncrypted r = r - { storeKey = \k f p -> cip k >>= maybe - (storeKey r k f p) - (\v -> storeKeyEncrypted v k p) - , retrieveKeyFile = \k f d p -> cip k >>= maybe - (retrieveKeyFile r k f d p) - (\v -> retrieveKeyFileEncrypted v k d p) - , retrieveKeyFileCheap = \k d -> cip k >>= maybe - (retrieveKeyFileCheap r k d) - (\_ -> return False) - , removeKey = \k -> cip k >>= maybe - (removeKey r k) - (\(_, enckey) -> removeKey r enckey) - , checkPresent = \k -> cip k >>= maybe - (checkPresent r k) - (\(_, enckey) -> checkPresent r enckey) - , cost = maybe - (cost r) - (const $ cost r + encryptedRemoteCostAdj) - (extractCipher c) - } - where - cip k = do - v <- cipherKey c - return $ case v of - Nothing -> Nothing - Just (cipher, enck) -> Just (cipher, enck k) - {- Gets encryption Cipher. The decrypted Ciphers are cached in the Annex - state. -} remoteCipher :: RemoteConfig -> Annex (Maybe Cipher) diff --git a/Remote/Helper/Special.hs b/Remote/Helper/Special.hs index f8428aff7c..fc0e11d2f3 100644 --- a/Remote/Helper/Special.hs +++ b/Remote/Helper/Special.hs @@ -39,7 +39,7 @@ import Crypto import Config.Cost import Utility.Metered import Remote.Helper.Chunked as X -import Remote.Helper.Encryptable as X hiding (encryptableRemote) +import Remote.Helper.Encryptable as X import Remote.Helper.Messages import Annex.Content import Annex.Exception @@ -119,7 +119,7 @@ byteRetriever :: (Key -> (L.ByteString -> Annex Bool) -> Annex Bool) -> Retrieve byteRetriever a k _m callback = a k (callback . ByteContent) {- The base Remote that is provided to specialRemote needs to have - - storeKey, retreiveKeyFile, removeKey, and checkPresent methods, + - storeKey, retrieveKeyFile, removeKey, and checkPresent methods, - but they are never actually used (since specialRemote replaces them). - Here are some dummy ones. -} diff --git a/Remote/WebDAV.hs b/Remote/WebDAV.hs index f0bcac10ea..6679242e5c 100644 --- a/Remote/WebDAV.hs +++ b/Remote/WebDAV.hs @@ -27,12 +27,9 @@ import qualified Git import Config import Config.Cost import Remote.Helper.Special -import Remote.Helper.Encryptable import qualified Remote.Helper.Chunked.Legacy as Legacy -import Crypto import Creds import Utility.Metered -import Annex.Content import Annex.UUID import Remote.WebDAV.DavUrl @@ -50,20 +47,22 @@ remote = RemoteType { gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) gen r u c gc = new <$> remoteCost gc expensiveRemoteCost where - new cst = Just $ encryptableRemote c - (storeEncrypted this) - (retrieveEncrypted this) + new cst = Just $ specialRemote c + (prepareStore this chunkconfig) + (prepareRetrieve this chunkconfig) + (prepareRemove this) + (prepareCheckPresent this chunkconfig) this where this = Remote { uuid = u, cost = cst, name = Git.repoDescribe r, - storeKey = store this, - retrieveKeyFile = retrieve this, - retrieveKeyFileCheap = retrieveCheap this, - removeKey = remove this, - checkPresent = checkKey this, + storeKey = storeKeyDummy, + retrieveKeyFile = retreiveKeyFileDummy, + retrieveKeyFileCheap = retrieveCheap, + removeKey = removeKeyDummy, + checkPresent = checkPresentDummy, checkPresentCheap = False, whereisKey = Nothing, remoteFsck = Nothing, @@ -76,6 +75,7 @@ gen r u c gc = new <$> remoteCost gc expensiveRemoteCost availability = GloballyAvailable, remotetype = remote } + chunkconfig = getChunkConfig c webdavSetup :: Maybe UUID -> Maybe CredPair -> RemoteConfig -> Annex (RemoteConfig, UUID) webdavSetup mu mcreds c = do @@ -89,95 +89,67 @@ webdavSetup mu mcreds c = do c'' <- setRemoteCredPair c' (davCreds u) creds return (c'', u) -store :: Remote -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool -store r k _f p = metered (Just p) k $ \meterupdate -> - davAction r False $ \(baseurl, user, pass) -> - sendAnnex k (void $ remove r k) $ \src -> - liftIO $ withMeteredFile src meterupdate $ - storeHelper r k baseurl user pass +prepareStore :: Remote -> ChunkConfig -> Preparer Storer +prepareStore r chunkconfig = simplyPrepare $ fileStorer $ \k f p -> + davAction r False $ \(baseurl, user, pass) -> liftIO $ + withMeteredFile f p $ + storeHelper chunkconfig k baseurl user pass -storeEncrypted :: Remote -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool -storeEncrypted r (cipher, enck) k p = metered (Just p) k $ \meterupdate -> - davAction r False $ \(baseurl, user, pass) -> - sendAnnex k (void $ remove r enck) $ \src -> - liftIO $ encrypt (getGpgEncParams r) cipher - (streamMeteredFile src meterupdate) $ - readBytes $ storeHelper r enck baseurl user pass - -storeHelper :: Remote -> Key -> DavUrl -> DavUser -> DavPass -> L.ByteString -> IO Bool -storeHelper r k baseurl user pass b = catchBoolIO $ do +storeHelper :: ChunkConfig -> Key -> DavUrl -> DavUser -> DavPass -> L.ByteString -> IO Bool +storeHelper chunkconfig k baseurl user pass b = do mkdirRecursiveDAV tmpurl user pass case chunkconfig of - NoChunks -> flip catchNonAsync (\e -> warningIO (show e) >> return False) $ do - storehttp tmpurl b - finalizer tmpurl keyurl - return True - UnpaddedChunks _ -> error "TODO: storeHelper with UnpaddedChunks" LegacyChunks chunksize -> do let storer urls = Legacy.storeChunked chunksize urls storehttp b let recorder url s = storehttp url (L8.fromString s) Legacy.storeChunks k tmpurl keyurl storer recorder finalizer - + _ -> do + storehttp tmpurl b + finalizer tmpurl keyurl + return True where tmpurl = tmpLocation baseurl k keyurl = davLocation baseurl k - chunkconfig = getChunkConfig $ config r finalizer srcurl desturl = do void $ tryNonAsync (deleteDAV desturl user pass) mkdirRecursiveDAV (urlParent desturl) user pass moveDAV srcurl desturl user pass storehttp url = putDAV url user pass -retrieveCheap :: Remote -> Key -> FilePath -> Annex Bool -retrieveCheap _ _ _ = return False +retrieveCheap :: Key -> FilePath -> Annex Bool +retrieveCheap _ _ = return False -retrieve :: Remote -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool -retrieve r k _f d p = metered (Just p) k $ \meterupdate -> - davAction r False $ \(baseurl, user, pass) -> liftIO $ catchBoolIO $ - withStoredFiles r k baseurl user pass onerr $ \urls -> do - Legacy.meteredWriteFileChunks meterupdate d urls $ \url -> do +prepareRetrieve :: Remote -> ChunkConfig -> Preparer Retriever +prepareRetrieve r chunkconfig = simplyPrepare $ fileRetriever $ \d k p -> + davAction r onerr $ \(baseurl, user, pass) -> liftIO $ + withStoredFiles chunkconfig k baseurl user pass onerr $ \urls -> do + Legacy.meteredWriteFileChunks p d urls $ \url -> do mb <- getDAV url user pass case mb of - Nothing -> throwIO "download failed" + Nothing -> onerr Just b -> return b - return True where - onerr _ = return False + onerr = error "download failed" -retrieveEncrypted :: Remote -> (Cipher, Key) -> Key -> FilePath -> MeterUpdate -> Annex Bool -retrieveEncrypted r (cipher, enck) k d p = metered (Just p) k $ \meterupdate -> - davAction r False $ \(baseurl, user, pass) -> liftIO $ catchBoolIO $ - withStoredFiles r enck baseurl user pass onerr $ \urls -> do - decrypt cipher (feeder user pass urls) $ - readBytes $ meteredWriteFile meterupdate d - return True - where - onerr _ = return False +prepareRemove :: Remote -> Preparer Remover +prepareRemove r = simplyPrepare $ \k -> + davAction r False $ \(baseurl, user, pass) -> liftIO $ do + -- Delete the key's whole directory, including any + -- legacy chunked files, etc, in a single action. + let url = davLocation baseurl k + isJust . eitherToMaybe <$> tryNonAsync (deleteDAV url user pass) - feeder _ _ [] _ = noop - feeder user pass (url:urls) h = do - mb <- getDAV url user pass - case mb of - Nothing -> throwIO "download failed" - Just b -> do - L.hPut h b - feeder user pass urls h +prepareCheckPresent :: Remote -> ChunkConfig -> Preparer CheckPresent +prepareCheckPresent r chunkconfig = simplyPrepare $ checkKey r chunkconfig -remove :: Remote -> Key -> Annex Bool -remove r k = davAction r False $ \(baseurl, user, pass) -> liftIO $ do - -- Delete the key's whole directory, including any chunked - -- files, etc, in a single action. - let url = davLocation baseurl k - isJust . eitherToMaybe <$> tryNonAsync (deleteDAV url user pass) - -checkKey :: Remote -> Key -> Annex Bool -checkKey r k = davAction r noconn (either error id <$$> go) +checkKey :: Remote -> ChunkConfig -> Key -> Annex Bool +checkKey r chunkconfig k = davAction r noconn (either error id <$$> go) where noconn = error $ name r ++ " not configured" go (baseurl, user, pass) = do showAction $ "checking " ++ name r - liftIO $ withStoredFiles r k baseurl user pass onerr check + liftIO $ withStoredFiles chunkconfig k baseurl user pass onerr check where check [] = return $ Right True check (url:urls) = do @@ -196,7 +168,7 @@ checkKey r k = davAction r noconn (either error id <$$> go) else v withStoredFiles - :: Remote + :: ChunkConfig -> Key -> DavUrl -> DavUser @@ -204,9 +176,7 @@ withStoredFiles -> (DavUrl -> IO a) -> ([DavUrl] -> IO a) -> IO a -withStoredFiles r k baseurl user pass onerr a = case chunkconfig of - NoChunks -> a [keyurl] - UnpaddedChunks _ -> error "TODO: withStoredFiles with UnpaddedChunks" +withStoredFiles chunkconfig k baseurl user pass onerr a = case chunkconfig of LegacyChunks _ -> do let chunkcount = keyurl ++ Legacy.chunkCount v <- getDAV chunkcount user pass @@ -217,9 +187,9 @@ withStoredFiles r k baseurl user pass onerr a = case chunkconfig of if null chunks then onerr chunkcount else a chunks + _ -> a [keyurl] where keyurl = davLocation baseurl k ++ keyFile k - chunkconfig = getChunkConfig $ config r davAction :: Remote -> a -> ((DavUrl, DavUser, DavPass) -> Annex a) -> Annex a davAction r unconfigured action = do diff --git a/debian/changelog b/debian/changelog index 8da5b87bb3..6d13c96372 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,7 +1,7 @@ git-annex (5.20140718) UNRELEASED; urgency=medium * New chunk= option to chunk files stored in special remotes. - Supported by: directory, S3, gcrypt, rsync, and all external + Supported by: directory, S3, webdav, gcrypt, rsync, and all external and hook special remotes. * Partially transferred files are automatically resumed when using chunked remotes! diff --git a/doc/special_remotes/webdav.mdwn b/doc/special_remotes/webdav.mdwn index 64eed5d0b8..6b5f5b1222 100644 --- a/doc/special_remotes/webdav.mdwn +++ b/doc/special_remotes/webdav.mdwn @@ -37,4 +37,4 @@ the webdav remote. Setup example: - # WEBDAV_USERNAME=joey@kitenet.net WEBDAV_PASSWORD=xxxxxxx git annex initremote box.com type=webdav url=https://dav.box.com/dav/git-annex chunksize=75mb keyid=joey@kitenet.net + # WEBDAV_USERNAME=joey@kitenet.net WEBDAV_PASSWORD=xxxxxxx git annex initremote box.com type=webdav url=https://dav.box.com/dav/git-annex chunk=10mb keyid=joey@kitenet.net From 0b1b85d9ea5fc4665ceea2674e2bd07e794f48b8 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 7 Aug 2014 15:45:56 -0400 Subject: [PATCH 42/56] use DAV monad This speeds up the webdav special remote somewhat, since it often now groups actions together in a single http connection when eg, storing a file. Legacy chunks are still supported, but have not been sped up. This depends on a as-yet unreleased version of DAV. This commit was sponsored by Thomas Hochstein. --- Remote/WebDAV.hs | 229 ++++++++++++++++++----------------- Remote/WebDAV/DavLocation.hs | 59 +++++++++ Remote/WebDAV/DavUrl.hs | 44 ------- debian/changelog | 4 +- debian/control | 2 +- git-annex.cabal | 4 +- 6 files changed, 180 insertions(+), 162 deletions(-) create mode 100644 Remote/WebDAV/DavLocation.hs delete mode 100644 Remote/WebDAV/DavUrl.hs diff --git a/Remote/WebDAV.hs b/Remote/WebDAV.hs index 6679242e5c..a77deffc53 100644 --- a/Remote/WebDAV.hs +++ b/Remote/WebDAV.hs @@ -11,14 +11,12 @@ module Remote.WebDAV (remote, davCreds, configUrl) where import Network.Protocol.HTTP.DAV import qualified Data.Map as M +import qualified Data.ByteString.Lazy as L import qualified Data.ByteString.UTF8 as B8 import qualified Data.ByteString.Lazy.UTF8 as L8 -import qualified Data.ByteString.Lazy as L -import qualified Control.Exception as E import qualified Control.Exception.Lifted as EL import Network.HTTP.Client (HttpException(..)) import Network.HTTP.Types -import System.Log.Logger (debugM) import System.IO.Error import Common.Annex @@ -30,8 +28,9 @@ import Remote.Helper.Special import qualified Remote.Helper.Chunked.Legacy as Legacy import Creds import Utility.Metered +import Utility.Url (URLString) import Annex.UUID -import Remote.WebDAV.DavUrl +import Remote.WebDAV.DavLocation type DavUser = B8.ByteString type DavPass = B8.ByteString @@ -95,26 +94,34 @@ prepareStore r chunkconfig = simplyPrepare $ fileStorer $ \k f p -> withMeteredFile f p $ storeHelper chunkconfig k baseurl user pass -storeHelper :: ChunkConfig -> Key -> DavUrl -> DavUser -> DavPass -> L.ByteString -> IO Bool +storeHelper :: ChunkConfig -> Key -> URLString -> DavUser -> DavPass -> L.ByteString -> IO Bool storeHelper chunkconfig k baseurl user pass b = do - mkdirRecursiveDAV tmpurl user pass case chunkconfig of LegacyChunks chunksize -> do - let storer urls = Legacy.storeChunked chunksize urls storehttp b - let recorder url s = storehttp url (L8.fromString s) - Legacy.storeChunks k tmpurl keyurl storer recorder finalizer - _ -> do - storehttp tmpurl b - finalizer tmpurl keyurl + let storehttp l b' = do + void $ goDAV baseurl user pass $ do + maybe noop (void . mkColRecursive) (locationParent l) + inLocation l $ putContentM (contentType, b') + let storer locs = Legacy.storeChunked chunksize locs storehttp b + let recorder l s = storehttp l (L8.fromString s) + let finalizer tmp' dest' = goDAV baseurl user pass $ + finalizeStore baseurl tmp' (fromJust $ locationParent dest') + Legacy.storeChunks k tmp dest storer recorder finalizer + _ -> goDAV baseurl user pass $ do + void $ mkColRecursive tmpDir + inLocation tmp $ + putContentM (contentType, b) + finalizeStore baseurl tmp dest return True where - tmpurl = tmpLocation baseurl k - keyurl = davLocation baseurl k - finalizer srcurl desturl = do - void $ tryNonAsync (deleteDAV desturl user pass) - mkdirRecursiveDAV (urlParent desturl) user pass - moveDAV srcurl desturl user pass - storehttp url = putDAV url user pass + tmp = keyTmpLocation k + dest = keyLocation k ++ keyFile k + +finalizeStore :: URLString -> DavLocation -> DavLocation -> DAVT IO () +finalizeStore baseurl tmp dest = do + inLocation dest $ void $ safely $ delContentM + maybe noop (void . mkColRecursive) (locationParent dest) + moveDAV baseurl tmp dest retrieveCheap :: Key -> FilePath -> Annex Bool retrieveCheap _ _ = return False @@ -122,9 +129,11 @@ retrieveCheap _ _ = return False prepareRetrieve :: Remote -> ChunkConfig -> Preparer Retriever prepareRetrieve r chunkconfig = simplyPrepare $ fileRetriever $ \d k p -> davAction r onerr $ \(baseurl, user, pass) -> liftIO $ - withStoredFiles chunkconfig k baseurl user pass onerr $ \urls -> do - Legacy.meteredWriteFileChunks p d urls $ \url -> do - mb <- getDAV url user pass + withStoredFiles chunkconfig k baseurl user pass onerr $ \locs -> do + Legacy.meteredWriteFileChunks p d locs $ \l -> do + mb <- goDAV baseurl user pass $ safely $ + inLocation l $ + snd <$> getContentM case mb of Nothing -> onerr Just b -> return b @@ -136,8 +145,9 @@ prepareRemove r = simplyPrepare $ \k -> davAction r False $ \(baseurl, user, pass) -> liftIO $ do -- Delete the key's whole directory, including any -- legacy chunked files, etc, in a single action. - let url = davLocation baseurl k - isJust . eitherToMaybe <$> tryNonAsync (deleteDAV url user pass) + ret <- goDAV baseurl user pass $ safely $ + inLocation (keyLocation k) delContentM + return (isJust ret) prepareCheckPresent :: Remote -> ChunkConfig -> Preparer CheckPresent prepareCheckPresent r chunkconfig = simplyPrepare $ checkKey r chunkconfig @@ -152,46 +162,49 @@ checkKey r chunkconfig k = davAction r noconn (either error id <$$> go) liftIO $ withStoredFiles chunkconfig k baseurl user pass onerr check where check [] = return $ Right True - check (url:urls) = do - v <- existsDAV url user pass + check (l:ls) = do + v <- goDAV baseurl user pass $ existsDAV l if v == Right True - then check urls + then check ls else return v {- Failed to read the chunkcount file; see if it's missing, - or if there's a problem accessing it, - - or perhaps this was an intermittent error. -} - onerr url = do - v <- existsDAV url user pass + - or perhaps this was an intermittent error. -} + onerr f = do + v <- goDAV baseurl user pass $ existsDAV f return $ if v == Right True - then Left $ "failed to read " ++ url + then Left $ "failed to read " ++ f else v withStoredFiles :: ChunkConfig -> Key - -> DavUrl + -> URLString -> DavUser -> DavPass - -> (DavUrl -> IO a) - -> ([DavUrl] -> IO a) + -> (DavLocation -> IO a) + -> ([DavLocation] -> IO a) -> IO a withStoredFiles chunkconfig k baseurl user pass onerr a = case chunkconfig of LegacyChunks _ -> do - let chunkcount = keyurl ++ Legacy.chunkCount - v <- getDAV chunkcount user pass + let chunkcount = keyloc ++ Legacy.chunkCount + v <- goDAV baseurl user pass $ safely $ + inLocation chunkcount $ + snd <$> getContentM case v of - Just s -> a $ Legacy.listChunks keyurl $ L8.toString s + Just s -> a $ Legacy.listChunks keyloc $ L8.toString s Nothing -> do - chunks <- Legacy.probeChunks keyurl $ \u -> (== Right True) <$> existsDAV u user pass + chunks <- Legacy.probeChunks keyloc $ \f -> + (== Right True) <$> goDAV baseurl user pass (existsDAV f) if null chunks then onerr chunkcount else a chunks - _ -> a [keyurl] + _ -> a [keyloc] where - keyurl = davLocation baseurl k ++ keyFile k + keyloc = keyLocation k ++ keyFile k -davAction :: Remote -> a -> ((DavUrl, DavUser, DavPass) -> Annex a) -> Annex a +davAction :: Remote -> a -> ((DavLocation, DavUser, DavPass) -> Annex a) -> Annex a davAction r unconfigured action = do mcreds <- getCreds (config r) (uuid r) case (mcreds, configUrl r) of @@ -199,7 +212,7 @@ davAction r unconfigured action = do action (url, toDavUser user, toDavPass pass) _ -> return unconfigured -configUrl :: Remote -> Maybe DavUrl +configUrl :: Remote -> Maybe URLString configUrl r = fixup <$> M.lookup "url" (config r) where -- box.com DAV url changed @@ -211,47 +224,63 @@ toDavUser = B8.fromString toDavPass :: String -> DavPass toDavPass = B8.fromString -{- Creates a directory in WebDAV, if not already present; also creating - - any missing parent directories. -} -mkdirRecursiveDAV :: DavUrl -> DavUser -> DavPass -> IO () -mkdirRecursiveDAV url user pass = go url - where - make u = mkdirDAV u user pass - - go u = do - r <- E.try (make u) :: IO (Either E.SomeException Bool) - case r of - {- Parent directory is missing. Recurse to create - - it, and try once more to create the directory. -} - Right False -> do - go (urlParent u) - void $ make u - {- Directory created successfully -} - Right True -> return () - {- Directory already exists, or some other error - - occurred. In the latter case, whatever wanted - - to use this directory will fail. -} - Left _ -> return () - {- Test if a WebDAV store is usable, by writing to a test file, and then - - deleting the file. Exits with an IO error if not. -} -testDav :: String -> Maybe CredPair -> Annex () -testDav baseurl (Just (u, p)) = do + - deleting the file. + - + - Also ensures that the path of the url exists, trying to create it if not. + - + - Throws an error if store is not usable. + -} +testDav :: URLString -> Maybe CredPair -> Annex () +testDav url (Just (u, p)) = do showSideAction "testing WebDAV server" - test "make directory" $ mkdirRecursiveDAV baseurl user pass - test "write file" $ putDAV testurl user pass L.empty - test "delete file" $ deleteDAV testurl user pass + test $ liftIO $ goDAV url user pass $ do + makeParentDirs + inLocation tmpDir $ void mkCol + inLocation (tmpLocation "git-annex-test") $ do + putContentM (Nothing, L.empty) + delContentM where - test desc a = liftIO $ - either (\e -> throwIO $ "WebDAV failed to " ++ desc ++ ": " ++ show e) + test a = liftIO $ + either (\e -> throwIO $ "WebDAV test failed: " ++ show e) (const noop) =<< tryNonAsync a user = toDavUser u pass = toDavPass p - testurl = davUrl baseurl "git-annex-test" testDav _ Nothing = error "Need to configure webdav username and password." +{- Tries to make all the parent directories in the WebDAV urls's path, + - right down to the root. + - + - Ignores any failures, which can occur for reasons including the WebDAV + - server only serving up WebDAV in a subdirectory. -} +makeParentDirs :: DAVT IO () +makeParentDirs = go + where + go = do + l <- getDAVLocation + case locationParent l of + Nothing -> noop + Just p -> void $ safely $ inDAVLocation (const p) go + void $ safely mkCol + +{- Checks if the directory exists. If not, tries to create its + - parent directories, all the way down to the root, and finally creates + - it. -} +mkColRecursive :: DavLocation -> DAVT IO Bool +mkColRecursive d = go =<< existsDAV d + where + go (Right True) = return True + go _ = ifM (inLocation d mkCol) + ( return True + , do + case locationParent d of + Nothing -> makeParentDirs + Just parent -> void (mkColRecursive parent) + inLocation d mkCol + ) + getCreds :: RemoteConfig -> UUID -> Annex (Maybe CredPair) getCreds c u = getRemoteCredPairFor "webdav" c (davCreds u) @@ -269,54 +298,21 @@ contentType = Just $ B8.fromString "application/octet-stream" throwIO :: String -> IO a throwIO msg = ioError $ mkIOError userErrorType msg Nothing Nothing -debugDAV :: DavUrl -> String -> IO () -debugDAV msg url = debugM "DAV" $ msg ++ " " ++ url - -{--------------------------------------------------------------------- - - Low-level DAV operations. - ---------------------------------------------------------------------} - -putDAV :: DavUrl -> DavUser -> DavPass -> L.ByteString -> IO () -putDAV url user pass b = do - debugDAV "PUT" url - goDAV url user pass $ putContentM (contentType, b) - -getDAV :: DavUrl -> DavUser -> DavPass -> IO (Maybe L.ByteString) -getDAV url user pass = do - debugDAV "GET" url - eitherToMaybe <$> tryNonAsync go +moveDAV :: URLString -> DavLocation -> DavLocation -> DAVT IO () +moveDAV baseurl src dest = inLocation src $ moveContentM newurl where - go = goDAV url user pass $ snd <$> getContentM + newurl = B8.fromString (locationUrl baseurl dest) -deleteDAV :: DavUrl -> DavUser -> DavPass -> IO () -deleteDAV url user pass = do - debugDAV "DELETE" url - goDAV url user pass delContentM - -moveDAV :: DavUrl -> DavUrl -> DavUser -> DavPass -> IO () -moveDAV url newurl user pass = do - debugDAV ("MOVE to " ++ newurl ++ " from ") url - goDAV url user pass $ moveContentM newurl' +existsDAV :: DavLocation -> DAVT IO (Either String Bool) +existsDAV l = inLocation l check `EL.catch` (\(e :: EL.SomeException) -> return (Left $ show e)) where - newurl' = B8.fromString newurl - -mkdirDAV :: DavUrl -> DavUser -> DavPass -> IO Bool -mkdirDAV url user pass = do - debugDAV "MKDIR" url - goDAV url user pass mkCol - -existsDAV :: DavUrl -> DavUser -> DavPass -> IO (Either String Bool) -existsDAV url user pass = do - debugDAV "EXISTS" url - either (Left . show) id <$> tryNonAsync check - where - ispresent = return . Right - check = goDAV url user pass $ do + check = do setDepth Nothing EL.catchJust (matchStatusCodeException notFound404) (getPropsM >> ispresent True) (const $ ispresent False) + ispresent = return . Right matchStatusCodeException :: Status -> HttpException -> Maybe () matchStatusCodeException want (StatusCodeException s _ _) @@ -324,7 +320,12 @@ matchStatusCodeException want (StatusCodeException s _ _) | otherwise = Nothing matchStatusCodeException _ _ = Nothing -goDAV :: DavUrl -> DavUser -> DavPass -> DAVT IO a -> IO a +-- Ignores any exceptions when performing a DAV action. +safely :: DAVT IO a -> DAVT IO (Maybe a) +safely a = (Just <$> a) + `EL.catch` (\(_ :: EL.SomeException) -> return Nothing) + +goDAV :: URLString -> DavUser -> DavPass -> DAVT IO a -> IO a goDAV url user pass a = choke $ evalDAVT url $ do setResponseTimeout Nothing -- disable default (5 second!) timeout setCreds user pass diff --git a/Remote/WebDAV/DavLocation.hs b/Remote/WebDAV/DavLocation.hs new file mode 100644 index 0000000000..3b52f3a64c --- /dev/null +++ b/Remote/WebDAV/DavLocation.hs @@ -0,0 +1,59 @@ +{- WebDAV locations. + - + - Copyright 2014 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} +{-# LANGUAGE FlexibleContexts #-} + +module Remote.WebDAV.DavLocation where + +import Types +import Locations +import Utility.Url (URLString) + +import System.FilePath.Posix -- for manipulating url paths +import Network.Protocol.HTTP.DAV (inDAVLocation, DAVT) +import Control.Monad.IO.Class (MonadIO) +#ifdef mingw32_HOST_OS +import Data.String.Utils +#endif + +-- Relative to the top of the DAV url. +type DavLocation = String + +{- Runs action in subdirectory, relative to the current location. -} +inLocation :: (MonadIO m) => DavLocation -> DAVT m a -> DAVT m a +inLocation d = inDAVLocation ( d) + +{- The directory where files(s) for a key are stored. -} +keyLocation :: Key -> DavLocation +keyLocation k = addTrailingPathSeparator $ hashdir keyFile k + where +#ifndef mingw32_HOST_OS + hashdir = hashDirLower k +#else + hashdir = replace "\\" "/" (hashDirLower k) +#endif + +{- Where we store temporary data for a key as it's being uploaded. -} +keyTmpLocation :: Key -> DavLocation +keyTmpLocation = addTrailingPathSeparator . tmpLocation . keyFile + +tmpLocation :: FilePath -> DavLocation +tmpLocation f = tmpDir f + +tmpDir :: DavLocation +tmpDir = "tmp" + +locationParent :: String -> Maybe String +locationParent loc + | loc `elem` tops = Nothing + | otherwise = Just (takeDirectory loc) + where + tops = ["/", "", "."] + +locationUrl :: URLString -> DavLocation -> URLString +locationUrl baseurl loc = baseurl loc diff --git a/Remote/WebDAV/DavUrl.hs b/Remote/WebDAV/DavUrl.hs deleted file mode 100644 index 4862c4f37d..0000000000 --- a/Remote/WebDAV/DavUrl.hs +++ /dev/null @@ -1,44 +0,0 @@ -{- WebDAV urls. - - - - Copyright 2014 Joey Hess - - - - Licensed under the GNU GPL version 3 or higher. - -} - -{-# LANGUAGE CPP #-} - -module Remote.WebDAV.DavUrl where - -import Types -import Locations - -import Network.URI (normalizePathSegments) -import System.FilePath.Posix -#ifdef mingw32_HOST_OS -import Data.String.Utils -#endif - -type DavUrl = String - -{- The directory where files(s) for a key are stored. -} -davLocation :: DavUrl -> Key -> DavUrl -davLocation baseurl k = addTrailingPathSeparator $ - davUrl baseurl $ hashdir keyFile k - where -#ifndef mingw32_HOST_OS - hashdir = hashDirLower k -#else - hashdir = replace "\\" "/" (hashDirLower k) -#endif - -{- Where we store temporary data for a key as it's being uploaded. -} -tmpLocation :: DavUrl -> Key -> DavUrl -tmpLocation baseurl k = addTrailingPathSeparator $ - davUrl baseurl $ "tmp" keyFile k - -davUrl :: DavUrl -> FilePath -> DavUrl -davUrl baseurl file = baseurl file - -urlParent :: DavUrl -> DavUrl -urlParent url = dropTrailingPathSeparator $ - normalizePathSegments (dropTrailingPathSeparator url ++ "/..") diff --git a/debian/changelog b/debian/changelog index 6d13c96372..3b54458c39 100644 --- a/debian/changelog +++ b/debian/changelog @@ -16,7 +16,9 @@ git-annex (5.20140718) UNRELEASED; urgency=medium were incompletely repaired before. * Fix cost calculation for non-encrypted remotes. * Display exception message when a transfer fails due to an exception. - * WebDAV: Dropped support for DAV before 0.6.1. + * WebDAV: Sped up by avoiding making multiple http connections + when storing a file. + * WebDAV: Dropped support for DAV before 0.8. * testremote: New command to test uploads/downloads to a remote. * Dropping an object from a bup special remote now deletes the git branch for the object, although of course the object's content cannot be deleted diff --git a/debian/control b/debian/control index e37f7d05ee..66d340e3cf 100644 --- a/debian/control +++ b/debian/control @@ -14,7 +14,7 @@ Build-Depends: libghc-dataenc-dev, libghc-utf8-string-dev, libghc-hs3-dev (>= 0.5.6), - libghc-dav-dev (>= 0.6.1) [amd64 i386 kfreebsd-amd64 kfreebsd-i386 powerpc], + libghc-dav-dev (>= 0.8) [amd64 i386 kfreebsd-amd64 kfreebsd-i386 powerpc], libghc-quickcheck2-dev, libghc-monad-control-dev (>= 0.3), libghc-exceptions-dev, diff --git a/git-annex.cabal b/git-annex.cabal index 2a39489d40..c3a1c161db 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -142,8 +142,8 @@ Executable git-annex CPP-Options: -DWITH_S3 if flag(WebDAV) - Build-Depends: DAV (> 0.6), - http-client, http-conduit, http-types, lifted-base + Build-Depends: DAV (> 0.8), + http-client, http-conduit, http-types, lifted-base, transformers CPP-Options: -DWITH_WEBDAV if flag(Assistant) && ! os(solaris) From fc4b3cdcce66363305b806db1a93fc6824b87905 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 7 Aug 2014 18:32:07 -0400 Subject: [PATCH 43/56] webdav: reuse http connection when operating on the chunks of a file For both new and legacy chunks. Massive speed up! This commit was sponsored by Dominik Wagenknecht. --- Remote/WebDAV.hs | 187 +++++++++++++++++++++++++---------------------- git-annex.cabal | 2 +- 2 files changed, 101 insertions(+), 88 deletions(-) diff --git a/Remote/WebDAV.hs b/Remote/WebDAV.hs index a77deffc53..d81b765106 100644 --- a/Remote/WebDAV.hs +++ b/Remote/WebDAV.hs @@ -30,11 +30,9 @@ import Creds import Utility.Metered import Utility.Url (URLString) import Annex.UUID +import Annex.Exception import Remote.WebDAV.DavLocation -type DavUser = B8.ByteString -type DavPass = B8.ByteString - remote :: RemoteType remote = RemoteType { typename = "webdav", @@ -47,10 +45,10 @@ gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remot gen r u c gc = new <$> remoteCost gc expensiveRemoteCost where new cst = Just $ specialRemote c - (prepareStore this chunkconfig) - (prepareRetrieve this chunkconfig) - (prepareRemove this) - (prepareCheckPresent this chunkconfig) + (prepareDAV this $ store chunkconfig) + (prepareDAV this $ retrieve chunkconfig) + (prepareDAV this $ remove) + (prepareDAV this $ checkKey this chunkconfig) this where this = Remote { @@ -88,30 +86,34 @@ webdavSetup mu mcreds c = do c'' <- setRemoteCredPair c' (davCreds u) creds return (c'', u) -prepareStore :: Remote -> ChunkConfig -> Preparer Storer -prepareStore r chunkconfig = simplyPrepare $ fileStorer $ \k f p -> - davAction r False $ \(baseurl, user, pass) -> liftIO $ - withMeteredFile f p $ - storeHelper chunkconfig k baseurl user pass +-- Opens a http connection to the DAV server, which will be reused +-- each time the helper is called. +prepareDAV :: Remote -> (Maybe DavHandle -> helper) -> Preparer helper +prepareDAV = resourcePrepare . const . withDAVHandle -storeHelper :: ChunkConfig -> Key -> URLString -> DavUser -> DavPass -> L.ByteString -> IO Bool -storeHelper chunkconfig k baseurl user pass b = do +store :: ChunkConfig -> Maybe DavHandle -> Storer +store _ Nothing = byteStorer $ \_k _b _p -> return False +store chunkconfig (Just dav) = fileStorer $ \k f p -> liftIO $ + withMeteredFile f p $ storeHelper chunkconfig k dav + +storeHelper :: ChunkConfig -> Key -> DavHandle -> L.ByteString -> IO Bool +storeHelper chunkconfig k dav b = do case chunkconfig of LegacyChunks chunksize -> do let storehttp l b' = do - void $ goDAV baseurl user pass $ do + void $ goDAV dav $ do maybe noop (void . mkColRecursive) (locationParent l) inLocation l $ putContentM (contentType, b') let storer locs = Legacy.storeChunked chunksize locs storehttp b let recorder l s = storehttp l (L8.fromString s) - let finalizer tmp' dest' = goDAV baseurl user pass $ - finalizeStore baseurl tmp' (fromJust $ locationParent dest') + let finalizer tmp' dest' = goDAV dav $ + finalizeStore (baseURL dav) tmp' (fromJust $ locationParent dest') Legacy.storeChunks k tmp dest storer recorder finalizer - _ -> goDAV baseurl user pass $ do + _ -> goDAV dav $ do void $ mkColRecursive tmpDir inLocation tmp $ putContentM (contentType, b) - finalizeStore baseurl tmp dest + finalizeStore (baseURL dav) tmp dest return True where tmp = keyTmpLocation k @@ -126,77 +128,71 @@ finalizeStore baseurl tmp dest = do retrieveCheap :: Key -> FilePath -> Annex Bool retrieveCheap _ _ = return False -prepareRetrieve :: Remote -> ChunkConfig -> Preparer Retriever -prepareRetrieve r chunkconfig = simplyPrepare $ fileRetriever $ \d k p -> - davAction r onerr $ \(baseurl, user, pass) -> liftIO $ - withStoredFiles chunkconfig k baseurl user pass onerr $ \locs -> do - Legacy.meteredWriteFileChunks p d locs $ \l -> do - mb <- goDAV baseurl user pass $ safely $ - inLocation l $ - snd <$> getContentM - case mb of - Nothing -> onerr - Just b -> return b +retrieve :: ChunkConfig -> Maybe DavHandle -> Retriever +retrieve _ Nothing = error "unable to connect" +retrieve chunkconfig (Just dav) = fileRetriever $ \d k p -> liftIO $ + withStoredFiles chunkconfig k dav onerr $ \locs -> do + Legacy.meteredWriteFileChunks p d locs $ \l -> do + mb <- goDAV dav $ safely $ + inLocation l $ + snd <$> getContentM + case mb of + Nothing -> onerr + Just b -> return b where onerr = error "download failed" -prepareRemove :: Remote -> Preparer Remover -prepareRemove r = simplyPrepare $ \k -> - davAction r False $ \(baseurl, user, pass) -> liftIO $ do - -- Delete the key's whole directory, including any - -- legacy chunked files, etc, in a single action. - ret <- goDAV baseurl user pass $ safely $ - inLocation (keyLocation k) delContentM - return (isJust ret) +remove :: Maybe DavHandle -> Remover +remove Nothing _ = return False +remove (Just dav) k = liftIO $ do + -- Delete the key's whole directory, including any + -- legacy chunked files, etc, in a single action. + ret <- goDAV dav $ safely $ + inLocation (keyLocation k) delContentM + return (isJust ret) -prepareCheckPresent :: Remote -> ChunkConfig -> Preparer CheckPresent -prepareCheckPresent r chunkconfig = simplyPrepare $ checkKey r chunkconfig - -checkKey :: Remote -> ChunkConfig -> Key -> Annex Bool -checkKey r chunkconfig k = davAction r noconn (either error id <$$> go) +checkKey :: Remote -> ChunkConfig -> Maybe DavHandle -> CheckPresent +checkKey r _ Nothing _ = error $ name r ++ " not configured" +checkKey r chunkconfig (Just dav) k = either error id <$> go where - noconn = error $ name r ++ " not configured" - - go (baseurl, user, pass) = do + go = do showAction $ "checking " ++ name r - liftIO $ withStoredFiles chunkconfig k baseurl user pass onerr check - where - check [] = return $ Right True - check (l:ls) = do - v <- goDAV baseurl user pass $ existsDAV l - if v == Right True - then check ls - else return v + liftIO $ withStoredFiles chunkconfig k dav onerr check - {- Failed to read the chunkcount file; see if it's missing, - - or if there's a problem accessing it, - - or perhaps this was an intermittent error. -} - onerr f = do - v <- goDAV baseurl user pass $ existsDAV f - return $ if v == Right True - then Left $ "failed to read " ++ f - else v + check [] = return $ Right True + check (l:ls) = do + v <- goDAV dav $ existsDAV l + if v == Right True + then check ls + else return v + + {- Failed to read the chunkcount file; see if it's missing, + - or if there's a problem accessing it, + - or perhaps this was an intermittent error. -} + onerr f = do + v <- goDAV dav $ existsDAV f + return $ if v == Right True + then Left $ "failed to read " ++ f + else v withStoredFiles :: ChunkConfig -> Key - -> URLString - -> DavUser - -> DavPass + -> DavHandle -> (DavLocation -> IO a) -> ([DavLocation] -> IO a) -> IO a -withStoredFiles chunkconfig k baseurl user pass onerr a = case chunkconfig of +withStoredFiles chunkconfig k dav onerr a = case chunkconfig of LegacyChunks _ -> do let chunkcount = keyloc ++ Legacy.chunkCount - v <- goDAV baseurl user pass $ safely $ + v <- goDAV dav $ safely $ inLocation chunkcount $ snd <$> getContentM case v of Just s -> a $ Legacy.listChunks keyloc $ L8.toString s Nothing -> do chunks <- Legacy.probeChunks keyloc $ \f -> - (== Right True) <$> goDAV baseurl user pass (existsDAV f) + (== Right True) <$> goDAV dav (existsDAV f) if null chunks then onerr chunkcount else a chunks @@ -204,20 +200,19 @@ withStoredFiles chunkconfig k baseurl user pass onerr a = case chunkconfig of where keyloc = keyLocation k ++ keyFile k -davAction :: Remote -> a -> ((DavLocation, DavUser, DavPass) -> Annex a) -> Annex a -davAction r unconfigured action = do - mcreds <- getCreds (config r) (uuid r) - case (mcreds, configUrl r) of - (Just (user, pass), Just url) -> - action (url, toDavUser user, toDavPass pass) - _ -> return unconfigured - configUrl :: Remote -> Maybe URLString configUrl r = fixup <$> M.lookup "url" (config r) where -- box.com DAV url changed fixup = replace "https://www.box.com/dav/" "https://dav.box.com/dav/" +type DavUser = B8.ByteString +type DavPass = B8.ByteString + +baseURL :: DavHandle -> URLString +baseURL (DavHandle _ _ _ u) = u + + toDavUser :: String -> DavUser toDavUser = B8.fromString @@ -234,7 +229,8 @@ toDavPass = B8.fromString testDav :: URLString -> Maybe CredPair -> Annex () testDav url (Just (u, p)) = do showSideAction "testing WebDAV server" - test $ liftIO $ goDAV url user pass $ do + test $ liftIO $ evalDAVT url $ do + prepDAV user pass makeParentDirs inLocation tmpDir $ void mkCol inLocation (tmpLocation "git-annex-test") $ do @@ -325,15 +321,32 @@ safely :: DAVT IO a -> DAVT IO (Maybe a) safely a = (Just <$> a) `EL.catch` (\(_ :: EL.SomeException) -> return Nothing) -goDAV :: URLString -> DavUser -> DavPass -> DAVT IO a -> IO a -goDAV url user pass a = choke $ evalDAVT url $ do - setResponseTimeout Nothing -- disable default (5 second!) timeout - setCreds user pass +choke :: IO (Either String a) -> IO a +choke f = do + x <- f + case x of + Left e -> error e + Right r -> return r + +data DavHandle = DavHandle DAVContext DavUser DavPass URLString + +withDAVHandle :: Remote -> (Maybe DavHandle -> Annex a) -> Annex a +withDAVHandle r a = do + mcreds <- getCreds (config r) (uuid r) + case (mcreds, configUrl r) of + (Just (user, pass), Just baseurl) -> + bracketIO (mkDAVContext baseurl) closeDAVContext $ \ctx -> + a (Just (DavHandle ctx (toDavUser user) (toDavPass pass) baseurl)) + _ -> a Nothing + +goDAV :: DavHandle -> DAVT IO a -> IO a +goDAV (DavHandle ctx user pass _) a = choke $ run $ do + prepDAV user pass a where - choke :: IO (Either String a) -> IO a - choke f = do - x <- f - case x of - Left e -> error e - Right r -> return r + run = fst <$$> runDAVContext ctx + +prepDAV :: DavUser -> DavPass -> DAVT IO () +prepDAV user pass = do + setResponseTimeout Nothing -- disable default (5 second!) timeout + setCreds user pass diff --git a/git-annex.cabal b/git-annex.cabal index c3a1c161db..8f36bfe486 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -142,7 +142,7 @@ Executable git-annex CPP-Options: -DWITH_S3 if flag(WebDAV) - Build-Depends: DAV (> 0.8), + Build-Depends: DAV (>= 0.8), http-client, http-conduit, http-types, lifted-base, transformers CPP-Options: -DWITH_WEBDAV From 2dd8dab314c95233a859d7aab5aa1c8ba47ef6f1 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 7 Aug 2014 19:32:23 -0400 Subject: [PATCH 44/56] WebDAV: Avoid buffering whole file in memory when uploading. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The httpStorer will later also be used by S3. This commit was sponsored by Torbjørn Thorsen. --- Remote/Helper/Http.hs | 39 +++++++++++++++++++++++++++++++++++++++ Remote/WebDAV.hs | 43 ++++++++++++++++++++++--------------------- debian/changelog | 1 + 3 files changed, 62 insertions(+), 21 deletions(-) create mode 100644 Remote/Helper/Http.hs diff --git a/Remote/Helper/Http.hs b/Remote/Helper/Http.hs new file mode 100644 index 0000000000..945e5cd991 --- /dev/null +++ b/Remote/Helper/Http.hs @@ -0,0 +1,39 @@ +{- helpers for remotes using http + - + - Copyright 2014 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Remote.Helper.Http where + +import Common.Annex +import Types.StoreRetrieve +import Utility.Metered +import Remote.Helper.Special +import Network.HTTP.Client (RequestBody(..)) + +import qualified Data.ByteString.Lazy as L +import qualified Data.ByteString as S +import Control.Concurrent + +-- A storer that expects to be provided with a http RequestBody containing +-- the content to store. +-- +-- Implemented as a fileStorer, so that the content can be streamed +-- from the file in constant space. +httpStorer :: (Key -> RequestBody -> Annex Bool) -> Storer +httpStorer a = fileStorer $ \k f m -> do + size <- liftIO $ (fromIntegral . fileSize <$> getFileStatus f :: IO Integer) + let streamer sink = withMeteredFile f m $ \b -> do + mvar <- newMVar $ L.toChunks b + let getnextchunk = modifyMVar mvar $ pure . pop + sink getnextchunk + let body = RequestBodyStream (fromInteger size) streamer + a k body + where + pop [] = ([], S.empty) + pop (c:cs) = (cs, c) + +--httpRetriever :: (Key -> Annex Response) -> Retriever +--httpRetriever a = byteRetriever $ \k sink diff --git a/Remote/WebDAV.hs b/Remote/WebDAV.hs index d81b765106..b70001ddba 100644 --- a/Remote/WebDAV.hs +++ b/Remote/WebDAV.hs @@ -25,6 +25,7 @@ import qualified Git import Config import Config.Cost import Remote.Helper.Special +import Remote.Helper.Http import qualified Remote.Helper.Chunked.Legacy as Legacy import Creds import Utility.Metered @@ -93,29 +94,29 @@ prepareDAV = resourcePrepare . const . withDAVHandle store :: ChunkConfig -> Maybe DavHandle -> Storer store _ Nothing = byteStorer $ \_k _b _p -> return False -store chunkconfig (Just dav) = fileStorer $ \k f p -> liftIO $ - withMeteredFile f p $ storeHelper chunkconfig k dav +store (LegacyChunks chunksize) (Just dav) = fileStorer $ \k f p -> liftIO $ + withMeteredFile f p $ storeLegacyChunked chunksize k dav +store _ (Just dav) = httpStorer $ \k reqbody -> liftIO $ goDAV dav $ do + let tmp = keyTmpLocation k + let dest = keyLocation k ++ keyFile k + void $ mkColRecursive tmpDir + inLocation tmp $ + putContentM' (contentType, reqbody) + finalizeStore (baseURL dav) tmp dest + return True -storeHelper :: ChunkConfig -> Key -> DavHandle -> L.ByteString -> IO Bool -storeHelper chunkconfig k dav b = do - case chunkconfig of - LegacyChunks chunksize -> do - let storehttp l b' = do - void $ goDAV dav $ do - maybe noop (void . mkColRecursive) (locationParent l) - inLocation l $ putContentM (contentType, b') - let storer locs = Legacy.storeChunked chunksize locs storehttp b - let recorder l s = storehttp l (L8.fromString s) - let finalizer tmp' dest' = goDAV dav $ - finalizeStore (baseURL dav) tmp' (fromJust $ locationParent dest') - Legacy.storeChunks k tmp dest storer recorder finalizer - _ -> goDAV dav $ do - void $ mkColRecursive tmpDir - inLocation tmp $ - putContentM (contentType, b) - finalizeStore (baseURL dav) tmp dest - return True +storeLegacyChunked :: ChunkSize -> Key -> DavHandle -> L.ByteString -> IO Bool +storeLegacyChunked chunksize k dav b = + Legacy.storeChunks k tmp dest storer recorder finalizer where + storehttp l b' = void $ goDAV dav $ do + maybe noop (void . mkColRecursive) (locationParent l) + inLocation l $ putContentM (contentType, b') + storer locs = Legacy.storeChunked chunksize locs storehttp b + recorder l s = storehttp l (L8.fromString s) + finalizer tmp' dest' = goDAV dav $ + finalizeStore (baseURL dav) tmp' (fromJust $ locationParent dest') + tmp = keyTmpLocation k dest = keyLocation k ++ keyFile k diff --git a/debian/changelog b/debian/changelog index 3b54458c39..1b981c08d3 100644 --- a/debian/changelog +++ b/debian/changelog @@ -18,6 +18,7 @@ git-annex (5.20140718) UNRELEASED; urgency=medium * Display exception message when a transfer fails due to an exception. * WebDAV: Sped up by avoiding making multiple http connections when storing a file. + * WebDAV: Avoid buffering whole file in memory when uploading. * WebDAV: Dropped support for DAV before 0.8. * testremote: New command to test uploads/downloads to a remote. * Dropping an object from a bup special remote now deletes the git branch From 8e3d62dd5d4d753b2fe7e9b9564224744328e93b Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 7 Aug 2014 21:01:07 -0400 Subject: [PATCH 45/56] generalized using the extensions package --- Utility/Exception.hs | 49 ++++++++++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/Utility/Exception.hs b/Utility/Exception.hs index 1fecf65d56..13c9d508a4 100644 --- a/Utility/Exception.hs +++ b/Utility/Exception.hs @@ -1,6 +1,6 @@ {- Simple IO exception handling (and some more) - - - Copyright 2011-2012 Joey Hess + - Copyright 2011-2014 Joey Hess - - License: BSD-2-clause -} @@ -9,51 +9,60 @@ module Utility.Exception where -import Control.Exception -import qualified Control.Exception as E -import Control.Applicative +import Control.Exception (IOException, AsyncException) +import Control.Monad.Catch import Control.Monad import System.IO.Error (isDoesNotExistError) import Utility.Data {- Catches IO errors and returns a Bool -} -catchBoolIO :: IO Bool -> IO Bool +catchBoolIO :: MonadCatch m => m Bool -> m Bool catchBoolIO = catchDefaultIO False {- Catches IO errors and returns a Maybe -} -catchMaybeIO :: IO a -> IO (Maybe a) -catchMaybeIO a = catchDefaultIO Nothing $ Just <$> a +catchMaybeIO :: MonadCatch m => m a -> m (Maybe a) +catchMaybeIO a = do + catchDefaultIO Nothing $ do + v <- a + return (Just v) {- Catches IO errors and returns a default value. -} -catchDefaultIO :: a -> IO a -> IO a +catchDefaultIO :: MonadCatch m => a -> m a -> m a catchDefaultIO def a = catchIO a (const $ return def) {- Catches IO errors and returns the error message. -} -catchMsgIO :: IO a -> IO (Either String a) -catchMsgIO a = either (Left . show) Right <$> tryIO a +catchMsgIO :: MonadCatch m => m a -> m (Either String a) +catchMsgIO a = do + v <- tryIO a + return $ either (Left . show) Right v {- catch specialized for IO errors only -} -catchIO :: IO a -> (IOException -> IO a) -> IO a -catchIO = E.catch +catchIO :: MonadCatch m => m a -> (IOException -> m a) -> m a +catchIO = catch {- try specialized for IO errors only -} -tryIO :: IO a -> IO (Either IOException a) +tryIO :: MonadCatch m => m a -> m (Either IOException a) tryIO = try {- Catches all exceptions except for async exceptions. - This is often better to use than catching them all, so that - ThreadKilled and UserInterrupt get through. -} -catchNonAsync :: IO a -> (SomeException -> IO a) -> IO a +catchNonAsync :: MonadCatch m => m a -> (SomeException -> m a) -> m a catchNonAsync a onerr = a `catches` - [ Handler (\ (e :: AsyncException) -> throw e) + [ Handler (\ (e :: AsyncException) -> throwM e) , Handler (\ (e :: SomeException) -> onerr e) ] -tryNonAsync :: IO a -> IO (Either SomeException a) -tryNonAsync a = (Right <$> a) `catchNonAsync` (return . Left) +tryNonAsync :: MonadCatch m => m a -> m (Either SomeException a) +tryNonAsync a = go `catchNonAsync` (return . Left) + where + go = do + v <- a + return (Right v) {- Catches only DoesNotExist exceptions, and lets all others through. -} -tryWhenExists :: IO a -> IO (Maybe a) -tryWhenExists a = eitherToMaybe <$> - tryJust (guard . isDoesNotExistError) a +tryWhenExists :: MonadCatch m => m a -> m (Maybe a) +tryWhenExists a = do + v <- tryJust (guard . isDoesNotExistError) a + return (eitherToMaybe v) From c784ef4586fc827d128ccf920398de8a52c063f4 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 7 Aug 2014 21:55:44 -0400 Subject: [PATCH 46/56] unify exception handling into Utility.Exception Removed old extensible-exceptions, only needed for very old ghc. Made webdav use Utility.Exception, to work after some changes in DAV's exception handling. Removed Annex.Exception. Mostly this was trivial, but note that tryAnnex is replaced with tryNonAsync and catchAnnex replaced with catchNonAsync. In theory that could be a behavior change, since the former caught all exceptions, and the latter don't catch async exceptions. However, in practice, nothing in the Annex monad uses async exceptions. Grepping for throwTo and killThread only find stuff in the assistant, which does not seem related. Command.Add.undo is changed to accept a SomeException, and things that use it for rollback now catch non-async exceptions, rather than only IOExceptions. --- Annex.hs | 6 ++- Annex/Content.hs | 5 +-- Annex/Direct.hs | 3 +- Annex/Drop.hs | 3 +- Annex/Environment.hs | 3 +- Annex/Exception.hs | 63 ------------------------------ Annex/Index.hs | 3 +- Annex/Journal.hs | 1 - Annex/LockFile.hs | 1 - Annex/Perms.hs | 5 +-- Annex/ReplaceFile.hs | 3 +- Annex/Transfer.hs | 3 +- Annex/View.hs | 6 +-- Assistant/Pairing/Network.hs | 1 - Assistant/Threads/Committer.hs | 3 +- Assistant/Threads/Cronner.hs | 6 +-- Assistant/Threads/SanityChecker.hs | 3 +- Assistant/Threads/Watcher.hs | 4 +- Assistant/Threads/XMPPClient.hs | 18 ++++----- Assistant/XMPP/Client.hs | 7 ++-- Assistant/XMPP/Git.hs | 18 ++++----- CmdLine/Action.hs | 9 ++--- Command/Add.hs | 17 ++++---- Command/Direct.hs | 5 +-- Command/FuzzTest.hs | 3 +- Command/Indirect.hs | 7 +--- Command/Map.hs | 3 +- Command/Move.hs | 10 ++--- Command/PreCommit.hs | 1 - Command/TestRemote.hs | 1 - Command/Vicfg.hs | 4 +- Common.hs | 1 - Crypto.hs | 1 - Git/Config.hs | 1 - Git/UpdateIndex.hs | 2 - Limit.hs | 8 ++-- Logs/Transfer.hs | 3 +- Messages.hs | 2 +- Remote.hs | 10 ++--- Remote/Ddar.hs | 1 - Remote/External.hs | 3 +- Remote/External/Types.hs | 1 - Remote/GCrypt.hs | 2 +- Remote/Git.hs | 4 +- Remote/Helper/Chunked.hs | 15 ++++--- Remote/Helper/Special.hs | 4 +- Remote/WebDAV.hs | 12 +++--- RemoteDaemon/Transport/Ssh.hs | 6 +-- Test.hs | 3 +- Utility/Directory.hs | 3 +- Utility/Exception.hs | 28 +++++++++++-- Utility/FileMode.hs | 1 - Utility/Gpg.hs | 1 - Utility/Matcher.hs | 8 ++-- Utility/Parallel.hs | 1 - Utility/Tmp.hs | 15 ++++--- Utility/Url.hs | 8 ++-- Utility/WebApp.hs | 4 -- debian/control | 1 - git-annex.cabal | 5 +-- 60 files changed, 142 insertions(+), 237 deletions(-) delete mode 100644 Annex/Exception.hs diff --git a/Annex.hs b/Annex.hs index bb271c5e83..b915e852bc 100644 --- a/Annex.hs +++ b/Annex.hs @@ -64,14 +64,16 @@ import Utility.Quvi (QuviVersion) import Utility.InodeCache import "mtl" Control.Monad.Reader -import Control.Monad.Catch import Control.Concurrent import qualified Data.Map as M import qualified Data.Set as S {- git-annex's monad is a ReaderT around an AnnexState stored in a MVar. - - This allows modifying the state in an exception-safe fashion. - The MVar is not exposed outside this module. + - + - Note that when an Annex action fails and the exception is caught, + - ny changes the action has made to the AnnexState are retained, + - due to the use of the MVar to store the state. -} newtype Annex a = Annex { runAnnex :: ReaderT (MVar AnnexState) IO a } deriving ( diff --git a/Annex/Content.hs b/Annex/Content.hs index eb84f2fe9d..b51e15827b 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -56,7 +56,6 @@ import Annex.Perms import Annex.Link import Annex.Content.Direct import Annex.ReplaceFile -import Annex.Exception #ifdef mingw32_HOST_OS import Utility.WinLock @@ -167,7 +166,7 @@ lockContent key a = do contentfile <- calcRepo $ gitAnnexLocation key lockfile <- contentLockFile key maybe noop setuplockfile lockfile - bracketAnnex (liftIO $ lock contentfile lockfile) (unlock lockfile) (const a) + bracket (liftIO $ lock contentfile lockfile) (unlock lockfile) (const a) where alreadylocked = error "content is locked" setuplockfile lockfile = modifyContent lockfile $ @@ -420,7 +419,7 @@ withObjectLoc key indirect direct = ifM isDirect cleanObjectLoc :: Key -> Annex () -> Annex () cleanObjectLoc key cleaner = do file <- calcRepo $ gitAnnexLocation key - void $ tryAnnexIO $ thawContentDir file + void $ tryIO $ thawContentDir file cleaner liftIO $ removeparents file (3 :: Int) where diff --git a/Annex/Direct.hs b/Annex/Direct.hs index e6b941e0f9..3745993698 100644 --- a/Annex/Direct.hs +++ b/Annex/Direct.hs @@ -32,7 +32,6 @@ import Utility.InodeCache import Utility.CopyFile import Annex.Perms import Annex.ReplaceFile -import Annex.Exception import Annex.VariantFile import Git.Index import Annex.Index @@ -252,7 +251,7 @@ mergeDirectCleanup d oldref = do go makeabs getsha getmode a araw (f, item) | getsha item == nullSha = noop | otherwise = void $ - tryAnnex . maybe (araw item makeabs f) (\k -> void $ a item makeabs k f) + tryNonAsync . maybe (araw item makeabs f) (\k -> void $ a item makeabs k f) =<< catKey (getsha item) (getmode item) moveout _ _ = removeDirect diff --git a/Annex/Drop.hs b/Annex/Drop.hs index 71263dc618..c5a3fbe5fb 100644 --- a/Annex/Drop.hs +++ b/Annex/Drop.hs @@ -16,7 +16,6 @@ import qualified Remote import qualified Command.Drop import Command import Annex.Wanted -import Annex.Exception import Config import Annex.Content.Direct @@ -120,5 +119,5 @@ handleDropsFrom locs rs reason fromhere key afile knownpresentremote runner = do slocs = S.fromList locs - safely a = either (const False) id <$> tryAnnex a + safely a = either (const False) id <$> tryNonAsync a diff --git a/Annex/Environment.hs b/Annex/Environment.hs index 4b8d384642..bc97c17b70 100644 --- a/Annex/Environment.hs +++ b/Annex/Environment.hs @@ -13,7 +13,6 @@ import Common.Annex import Utility.UserInfo import qualified Git.Config import Config -import Annex.Exception #ifndef mingw32_HOST_OS import Utility.Env @@ -58,7 +57,7 @@ checkEnvironmentIO = {- Runs an action that commits to the repository, and if it fails, - sets user.email and user.name to a dummy value and tries the action again. -} ensureCommit :: Annex a -> Annex a -ensureCommit a = either retry return =<< tryAnnex a +ensureCommit a = either retry return =<< tryNonAsync a where retry _ = do name <- liftIO myUserName diff --git a/Annex/Exception.hs b/Annex/Exception.hs deleted file mode 100644 index 5ecbd28a07..0000000000 --- a/Annex/Exception.hs +++ /dev/null @@ -1,63 +0,0 @@ -{- exception handling in the git-annex monad - - - - Note that when an Annex action fails and the exception is handled - - by these functions, any changes the action has made to the - - AnnexState are retained. This works because the Annex monad - - internally stores the AnnexState in a MVar. - - - - Copyright 2011-2014 Joey Hess - - - - Licensed under the GNU GPL version 3 or higher. - -} - -{-# LANGUAGE PackageImports #-} -{-# LANGUAGE ScopedTypeVariables #-} - -module Annex.Exception ( - bracketIO, - bracketAnnex, - tryAnnex, - tryAnnexIO, - throwAnnex, - catchAnnex, - catchNonAsyncAnnex, - tryNonAsyncAnnex, -) where - -import qualified Control.Monad.Catch as M -import Control.Exception - -import Common.Annex - -{- Runs an Annex action, with setup and cleanup both in the IO monad. -} -bracketIO :: IO v -> (v -> IO b) -> (v -> Annex a) -> Annex a -bracketIO setup cleanup = M.bracket (liftIO setup) (liftIO . cleanup) - -bracketAnnex :: Annex v -> (v -> Annex b) -> (v -> Annex a) -> Annex a -bracketAnnex = M.bracket - -{- try in the Annex monad -} -tryAnnex :: Annex a -> Annex (Either SomeException a) -tryAnnex = M.try - -{- try in the Annex monad, but only catching IO exceptions -} -tryAnnexIO :: Annex a -> Annex (Either IOException a) -tryAnnexIO = M.try - -{- throw in the Annex monad -} -throwAnnex :: Exception e => e -> Annex a -throwAnnex = M.throwM - -{- catch in the Annex monad -} -catchAnnex :: Exception e => Annex a -> (e -> Annex a) -> Annex a -catchAnnex = M.catch - -{- catchs all exceptions except for async exceptions -} -catchNonAsyncAnnex :: Annex a -> (SomeException -> Annex a) -> Annex a -catchNonAsyncAnnex a onerr = a `M.catches` - [ M.Handler (\ (e :: AsyncException) -> throwAnnex e) - , M.Handler (\ (e :: SomeException) -> onerr e) - ] - -tryNonAsyncAnnex :: Annex a -> Annex (Either SomeException a) -tryNonAsyncAnnex a = (Right <$> a) `catchNonAsyncAnnex` (return . Left) diff --git a/Annex/Index.hs b/Annex/Index.hs index af0cab45e4..7757a412b0 100644 --- a/Annex/Index.hs +++ b/Annex/Index.hs @@ -18,7 +18,6 @@ import Common.Annex import Git.Types import qualified Annex import Utility.Env -import Annex.Exception {- Runs an action using a different git index file. -} withIndexFile :: FilePath -> Annex a -> Annex a @@ -26,7 +25,7 @@ withIndexFile f a = do g <- gitRepo g' <- liftIO $ addGitEnv g "GIT_INDEX_FILE" f - r <- tryAnnex $ do + r <- tryNonAsync $ do Annex.changeState $ \s -> s { Annex.repo = g' } a Annex.changeState $ \s -> s { Annex.repo = (Annex.repo s) { gitEnv = gitEnv g} } diff --git a/Annex/Journal.hs b/Annex/Journal.hs index f34a7be1bf..798bcba292 100644 --- a/Annex/Journal.hs +++ b/Annex/Journal.hs @@ -14,7 +14,6 @@ module Annex.Journal where import Common.Annex -import Annex.Exception import qualified Git import Annex.Perms import Annex.LockFile diff --git a/Annex/LockFile.hs b/Annex/LockFile.hs index 8114e94f2b..dc4f82f984 100644 --- a/Annex/LockFile.hs +++ b/Annex/LockFile.hs @@ -18,7 +18,6 @@ import Common.Annex import Annex import Types.LockPool import qualified Git -import Annex.Exception import Annex.Perms import qualified Data.Map as M diff --git a/Annex/Perms.hs b/Annex/Perms.hs index e3a2fa65a2..3430554c71 100644 --- a/Annex/Perms.hs +++ b/Annex/Perms.hs @@ -21,7 +21,6 @@ import Common.Annex import Utility.FileMode import Git.SharedRepository import qualified Annex -import Annex.Exception import Config import System.Posix.Types @@ -120,6 +119,6 @@ createContentDir dest = do modifyContent :: FilePath -> Annex a -> Annex a modifyContent f a = do createContentDir f -- also thaws it - v <- tryAnnex a + v <- tryNonAsync a freezeContentDir f - either throwAnnex return v + either throwM return v diff --git a/Annex/ReplaceFile.hs b/Annex/ReplaceFile.hs index e734c4d64b..8776762e97 100644 --- a/Annex/ReplaceFile.hs +++ b/Annex/ReplaceFile.hs @@ -9,7 +9,6 @@ module Annex.ReplaceFile where import Common.Annex import Annex.Perms -import Annex.Exception {- Replaces a possibly already existing file with a new version, - atomically, by running an action. @@ -31,7 +30,7 @@ replaceFileOr :: FilePath -> (FilePath -> Annex ()) -> (FilePath -> Annex ()) -> replaceFileOr file action rollback = do tmpdir <- fromRepo gitAnnexTmpMiscDir void $ createAnnexDirectory tmpdir - bracketAnnex (liftIO $ setup tmpdir) rollback $ \tmpfile -> do + bracket (liftIO $ setup tmpdir) rollback $ \tmpfile -> do action tmpfile liftIO $ catchIO (rename tmpfile file) (fallback tmpfile) where diff --git a/Annex/Transfer.hs b/Annex/Transfer.hs index 001539adcc..ebc8e8b899 100644 --- a/Annex/Transfer.hs +++ b/Annex/Transfer.hs @@ -20,7 +20,6 @@ import Common.Annex import Logs.Transfer as X import Annex.Notification as X import Annex.Perms -import Annex.Exception import Utility.Metered #ifdef mingw32_HOST_OS import Utility.WinLock @@ -103,7 +102,7 @@ runTransfer t file shouldretry a = do void $ tryIO $ removeFile $ transferLockFile tfile #endif retry oldinfo metervar run = do - v <- tryAnnex run + v <- tryNonAsync run case v of Right b -> return b Left e -> do diff --git a/Annex/View.hs b/Annex/View.hs index b969816128..a1d873f500 100644 --- a/Annex/View.hs +++ b/Annex/View.hs @@ -410,19 +410,19 @@ withViewChanges addmeta removemeta = do where handleremovals item | DiffTree.srcsha item /= nullSha = - handle item removemeta + handlechange item removemeta =<< catKey (DiffTree.srcsha item) (DiffTree.srcmode item) | otherwise = noop handleadds makeabs item | DiffTree.dstsha item /= nullSha = - handle item addmeta + handlechange item addmeta =<< ifM isDirect ( catKey (DiffTree.dstsha item) (DiffTree.dstmode item) -- optimisation , isAnnexLink $ makeabs $ DiffTree.file item ) | otherwise = noop - handle item a = maybe noop + handlechange item a = maybe noop (void . commandAction . a (getTopFilePath $ DiffTree.file item)) {- Generates a branch for a view. This is done using a different index diff --git a/Assistant/Pairing/Network.hs b/Assistant/Pairing/Network.hs index 6c625f8814..4bb6088b11 100644 --- a/Assistant/Pairing/Network.hs +++ b/Assistant/Pairing/Network.hs @@ -20,7 +20,6 @@ import Utility.Verifiable import Network.Multicast import Network.Info import Network.Socket -import Control.Exception (bracket) import qualified Data.Map as M import Control.Concurrent diff --git a/Assistant/Threads/Committer.hs b/Assistant/Threads/Committer.hs index afe4aa1442..4a47a9e2c9 100644 --- a/Assistant/Threads/Committer.hs +++ b/Assistant/Threads/Committer.hs @@ -27,7 +27,6 @@ import qualified Utility.Lsof as Lsof import qualified Utility.DirWatcher as DirWatcher import Types.KeySource import Config -import Annex.Exception import Annex.Content import Annex.Link import Annex.CatFile @@ -217,7 +216,7 @@ commitStaged :: Annex Bool commitStaged = do {- This could fail if there's another commit being made by - something else. -} - v <- tryAnnex Annex.Queue.flush + v <- tryNonAsync Annex.Queue.flush case v of Left _ -> return False Right _ -> do diff --git a/Assistant/Threads/Cronner.hs b/Assistant/Threads/Cronner.hs index 55b3ca2f10..0fe7f58f4c 100644 --- a/Assistant/Threads/Cronner.hs +++ b/Assistant/Threads/Cronner.hs @@ -191,10 +191,10 @@ runActivity' urlrenderer (ScheduledSelfFsck _ d) = do mapM_ reget =<< liftAnnex (dirKeys gitAnnexBadDir) where reget k = queueTransfers "fsck found bad file; redownloading" Next k Nothing Download -runActivity' urlrenderer (ScheduledRemoteFsck u s d) = handle =<< liftAnnex (remoteFromUUID u) +runActivity' urlrenderer (ScheduledRemoteFsck u s d) = dispatch =<< liftAnnex (remoteFromUUID u) where - handle Nothing = debug ["skipping remote fsck of uuid without a configured remote", fromUUID u, fromSchedule s] - handle (Just rmt) = void $ case Remote.remoteFsck rmt of + dispatch Nothing = debug ["skipping remote fsck of uuid without a configured remote", fromUUID u, fromSchedule s] + dispatch (Just rmt) = void $ case Remote.remoteFsck rmt of Nothing -> go rmt $ do program <- readProgramFile void $ batchCommand program $ diff --git a/Assistant/Threads/SanityChecker.hs b/Assistant/Threads/SanityChecker.hs index b623183823..dce2c2db75 100644 --- a/Assistant/Threads/SanityChecker.hs +++ b/Assistant/Threads/SanityChecker.hs @@ -40,7 +40,6 @@ import Logs.Transfer import Config.Files import Utility.DiskFree import qualified Annex -import Annex.Exception #ifdef WITH_WEBAPP import Assistant.WebApp.Types #endif @@ -85,7 +84,7 @@ sanityCheckerStartupThread startupdelay = namedThreadUnchecked "SanityCheckerSta liftIO $ fixUpSshRemotes {- Clean up old temp files. -} - void $ liftAnnex $ tryAnnex $ do + void $ liftAnnex $ tryNonAsync $ do cleanOldTmpMisc cleanReallyOldTmp diff --git a/Assistant/Threads/Watcher.hs b/Assistant/Threads/Watcher.hs index 91e0fc6196..fe9a95471b 100644 --- a/Assistant/Threads/Watcher.hs +++ b/Assistant/Threads/Watcher.hs @@ -104,13 +104,13 @@ runWatcher = do , errHook = errhook } scanevents <- liftAnnex $ annexStartupScan <$> Annex.getGitConfig - handle <- liftIO $ watchDir "." ignored scanevents hooks startup + h <- liftIO $ watchDir "." ignored scanevents hooks startup debug [ "watching", "."] {- Let the DirWatcher thread run until signalled to pause it, - then wait for a resume signal, and restart. -} waitFor PauseWatcher $ do - liftIO $ stopWatchDir handle + liftIO $ stopWatchDir h waitFor ResumeWatcher runWatcher where hook a = Just <$> asIO2 (runHandler a) diff --git a/Assistant/Threads/XMPPClient.hs b/Assistant/Threads/XMPPClient.hs index 39b0459b7b..2f70b508f6 100644 --- a/Assistant/Threads/XMPPClient.hs +++ b/Assistant/Threads/XMPPClient.hs @@ -117,7 +117,7 @@ xmppClient urlrenderer d creds xmppuuid = void $ liftIO $ atomically . swapTMVar lasttraffic =<< getCurrentTime inAssistant $ debug ["received:", show $ map logXMPPEvent l] - mapM_ (handle selfjid) l + mapM_ (handlemsg selfjid) l sendpings selfjid lasttraffic = forever $ do putStanza pingstanza @@ -133,21 +133,21 @@ xmppClient urlrenderer d creds xmppuuid = - cause traffic, so good enough. -} pingstanza = xmppPing selfjid - handle selfjid (PresenceMessage p) = do + handlemsg selfjid (PresenceMessage p) = do void $ inAssistant $ updateBuddyList (updateBuddies p) <<~ buddyList resendImportantMessages selfjid p - handle _ (GotNetMessage QueryPresence) = putStanza gitAnnexSignature - handle _ (GotNetMessage (NotifyPush us)) = void $ inAssistant $ pull us - handle selfjid (GotNetMessage (PairingNotification stage c u)) = + handlemsg _ (GotNetMessage QueryPresence) = putStanza gitAnnexSignature + handlemsg _ (GotNetMessage (NotifyPush us)) = void $ inAssistant $ pull us + handlemsg selfjid (GotNetMessage (PairingNotification stage c u)) = maybe noop (inAssistant . pairMsgReceived urlrenderer stage u selfjid) (parseJID c) - handle _ (GotNetMessage m@(Pushing _ pushstage)) + handlemsg _ (GotNetMessage m@(Pushing _ pushstage)) | isPushNotice pushstage = inAssistant $ handlePushNotice m | isPushInitiation pushstage = inAssistant $ queuePushInitiation m | otherwise = inAssistant $ storeInbox m - handle _ (Ignorable _) = noop - handle _ (Unknown _) = noop - handle _ (ProtocolError _) = noop + handlemsg _ (Ignorable _) = noop + handlemsg _ (Unknown _) = noop + handlemsg _ (ProtocolError _) = noop resendImportantMessages selfjid (Presence { presenceFrom = Just jid }) = do let c = formatJID jid diff --git a/Assistant/XMPP/Client.hs b/Assistant/XMPP/Client.hs index 677bb2ff31..314ace64ab 100644 --- a/Assistant/XMPP/Client.hs +++ b/Assistant/XMPP/Client.hs @@ -15,7 +15,6 @@ import Network.Protocol.XMPP import Network import Control.Concurrent import qualified Data.Text as T -import Control.Exception (SomeException) {- Everything we need to know to connect to an XMPP server. -} data XMPPCreds = XMPPCreds @@ -34,18 +33,18 @@ connectXMPP c a = case parseJID (xmppJID c) of {- Do a SRV lookup, but if it fails, fall back to the cached xmppHostname. -} connectXMPP' :: JID -> XMPPCreds -> (JID -> XMPP a) -> IO [(HostPort, Either SomeException ())] -connectXMPP' jid c a = reverse <$> (handle =<< lookupSRV srvrecord) +connectXMPP' jid c a = reverse <$> (handlesrv =<< lookupSRV srvrecord) where srvrecord = mkSRVTcp "xmpp-client" $ T.unpack $ strDomain $ jidDomain jid serverjid = JID Nothing (jidDomain jid) Nothing - handle [] = do + handlesrv [] = do let h = xmppHostname c let p = PortNumber $ fromIntegral $ xmppPort c r <- run h p $ a jid return [r] - handle srvs = go [] srvs + handlesrv srvs = go [] srvs go l [] = return l go l ((h,p):rest) = do diff --git a/Assistant/XMPP/Git.hs b/Assistant/XMPP/Git.hs index 301aa71853..19050c7d01 100644 --- a/Assistant/XMPP/Git.hs +++ b/Assistant/XMPP/Git.hs @@ -150,16 +150,16 @@ xmppPush cid gitpush = do SendPackOutput seqnum' b toxmpp seqnum' inh - fromxmpp outh controlh = withPushMessagesInSequence cid SendPack handle + fromxmpp outh controlh = withPushMessagesInSequence cid SendPack handlemsg where - handle (Just (Pushing _ (ReceivePackOutput _ b))) = + handlemsg (Just (Pushing _ (ReceivePackOutput _ b))) = liftIO $ writeChunk outh b - handle (Just (Pushing _ (ReceivePackDone exitcode))) = + handlemsg (Just (Pushing _ (ReceivePackDone exitcode))) = liftIO $ do hPrint controlh exitcode hFlush controlh - handle (Just _) = noop - handle Nothing = do + handlemsg (Just _) = noop + handlemsg Nothing = do debug ["timeout waiting for git receive-pack output via XMPP"] -- Send a synthetic exit code to git-annex -- xmppgit, which will exit and cause git push @@ -264,12 +264,12 @@ xmppReceivePack cid = do let seqnum' = succ seqnum sendNetMessage $ Pushing cid $ ReceivePackOutput seqnum' b relaytoxmpp seqnum' outh - relayfromxmpp inh = withPushMessagesInSequence cid ReceivePack handle + relayfromxmpp inh = withPushMessagesInSequence cid ReceivePack handlemsg where - handle (Just (Pushing _ (SendPackOutput _ b))) = + handlemsg (Just (Pushing _ (SendPackOutput _ b))) = liftIO $ writeChunk inh b - handle (Just _) = noop - handle Nothing = do + handlemsg (Just _) = noop + handlemsg Nothing = do debug ["timeout waiting for git send-pack output via XMPP"] -- closing the handle will make git receive-pack exit liftIO $ do diff --git a/CmdLine/Action.hs b/CmdLine/Action.hs index 247c658bcd..db4f768ac3 100644 --- a/CmdLine/Action.hs +++ b/CmdLine/Action.hs @@ -13,7 +13,6 @@ import Common.Annex import qualified Annex import Types.Command import qualified Annex.Queue -import Annex.Exception type CommandActionRunner = CommandStart -> CommandCleanup @@ -37,14 +36,14 @@ performCommandAction Command { cmdseek = seek, cmdcheck = c, cmdname = name } pa - - This should only be run in the seek stage. -} commandAction :: CommandActionRunner -commandAction a = handle =<< tryAnnexIO go +commandAction a = account =<< tryIO go where go = do Annex.Queue.flushWhenFull callCommandAction a - handle (Right True) = return True - handle (Right False) = incerr - handle (Left err) = do + account (Right True) = return True + account (Right False) = incerr + account (Left err) = do showErr err showEndFail incerr diff --git a/Command/Add.hs b/Command/Add.hs index ae895464e9..5c70545430 100644 --- a/Command/Add.hs +++ b/Command/Add.hs @@ -10,7 +10,6 @@ module Command.Add where import Common.Annex -import Annex.Exception import Command import Types.KeySource import Backend @@ -33,6 +32,8 @@ import Annex.FileMatcher import Annex.ReplaceFile import Utility.Tmp +import Control.Exception (IOException) + def :: [Command] def = [notBareRepo $ withOptions [includeDotFilesOption] $ command "add" paramPaths seek SectionCommon @@ -103,7 +104,7 @@ lockDown = either (\e -> showErr e >> return Nothing) (return . Just) <=< lockDo lockDown' :: FilePath -> Annex (Either IOException KeySource) lockDown' file = ifM crippledFileSystem ( withTSDelta $ liftIO . tryIO . nohardlink - , tryAnnexIO $ do + , tryIO $ do tmp <- fromRepo gitAnnexTmpMiscDir createAnnexDirectory tmp go tmp @@ -167,7 +168,7 @@ ingest (Just source) = withTSDelta $ \delta -> do ) goindirect (Just (key, _)) mcache ms = do - catchAnnex (moveAnnex key $ contentLocation source) + catchNonAsync (moveAnnex key $ contentLocation source) (undo (keyFilename source) key) maybe noop (genMetaData key (keyFilename source)) ms liftIO $ nukeFile $ keyFilename source @@ -206,23 +207,23 @@ perform file = lockDown file >>= ingest >>= go {- On error, put the file back so it doesn't seem to have vanished. - This can be called before or after the symlink is in place. -} -undo :: FilePath -> Key -> IOException -> Annex a +undo :: FilePath -> Key -> SomeException -> Annex a undo file key e = do whenM (inAnnex key) $ do liftIO $ nukeFile file - catchAnnex (fromAnnex key file) tryharder + catchNonAsync (fromAnnex key file) tryharder logStatus key InfoMissing - throwAnnex e + throwM e where -- fromAnnex could fail if the file ownership is weird - tryharder :: IOException -> Annex () + tryharder :: SomeException -> Annex () tryharder _ = do src <- calcRepo $ gitAnnexLocation key liftIO $ moveFile src file {- Creates the symlink to the annexed content, returns the link target. -} link :: FilePath -> Key -> Maybe InodeCache -> Annex String -link file key mcache = flip catchAnnex (undo file key) $ do +link file key mcache = flip catchNonAsync (undo file key) $ do l <- inRepo $ gitAnnexLink file key replaceFile file $ makeAnnexLink l diff --git a/Command/Direct.hs b/Command/Direct.hs index a5165a4a2a..c64ef6e562 100644 --- a/Command/Direct.hs +++ b/Command/Direct.hs @@ -7,8 +7,6 @@ module Command.Direct where -import Control.Exception.Extensible - import Common.Annex import Command import qualified Git @@ -16,7 +14,6 @@ import qualified Git.LsFiles import qualified Git.Branch import Config import Annex.Direct -import Annex.Exception def :: [Command] def = [notBareRepo $ noDaemonRunning $ @@ -52,7 +49,7 @@ perform = do Nothing -> noop Just a -> do showStart "direct" f - r' <- tryAnnex a + r' <- tryNonAsync a case r' of Left e -> warnlocked e Right _ -> showEndOk diff --git a/Command/FuzzTest.hs b/Command/FuzzTest.hs index d673541fbb..7075aeddcc 100644 --- a/Command/FuzzTest.hs +++ b/Command/FuzzTest.hs @@ -13,7 +13,6 @@ import Command import qualified Git.Config import Config import Utility.ThreadScheduler -import Annex.Exception import Utility.DiskFree import Data.Time.Clock @@ -56,7 +55,7 @@ fuzz :: Handle -> Annex () fuzz logh = do action <- genFuzzAction record logh $ flip Started action - result <- tryAnnex $ runFuzzAction action + result <- tryNonAsync $ runFuzzAction action record logh $ flip Finished $ either (const False) (const True) result diff --git a/Command/Indirect.hs b/Command/Indirect.hs index 4ce4c2c383..e146f13b79 100644 --- a/Command/Indirect.hs +++ b/Command/Indirect.hs @@ -7,8 +7,6 @@ module Command.Indirect where -import Control.Exception.Extensible - import Common.Annex import Command import qualified Git @@ -21,7 +19,6 @@ import Annex.Direct import Annex.Content import Annex.Content.Direct import Annex.CatFile -import Annex.Exception import Annex.Init import qualified Command.Add @@ -88,12 +85,12 @@ perform = do removeInodeCache k removeAssociatedFiles k whenM (liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f) $ do - v <-tryAnnexIO (moveAnnex k f) + v <- tryNonAsync (moveAnnex k f) case v of Right _ -> do l <- inRepo $ gitAnnexLink f k liftIO $ createSymbolicLink l f - Left e -> catchAnnex (Command.Add.undo f k e) + Left e -> catchNonAsync (Command.Add.undo f k e) warnlocked showEndOk diff --git a/Command/Map.hs b/Command/Map.hs index 5a32d7f524..a62c3e1ad9 100644 --- a/Command/Map.hs +++ b/Command/Map.hs @@ -7,7 +7,6 @@ module Command.Map where -import Control.Exception.Extensible import qualified Data.Map as M import Common.Annex @@ -247,7 +246,7 @@ combineSame = map snd . nubBy sameuuid . map pair safely :: IO Git.Repo -> IO (Maybe Git.Repo) safely a = do - result <- try a :: IO (Either SomeException Git.Repo) + result <- tryNonAsync a case result of Left _ -> return Nothing Right r' -> return $ Just r' diff --git a/Command/Move.hs b/Command/Move.hs index 396ea4afce..3d9646dea8 100644 --- a/Command/Move.hs +++ b/Command/Move.hs @@ -152,17 +152,17 @@ fromOk src key = go =<< Annex.getState Annex.force fromPerform :: Remote -> Bool -> Key -> AssociatedFile -> CommandPerform fromPerform src move key afile = moveLock move key $ ifM (inAnnex key) - ( handle move True - , handle move =<< go + ( dispatch move True + , dispatch move =<< go ) where go = notifyTransfer Download afile $ download (Remote.uuid src) key afile noRetry $ \p -> do showAction $ "from " ++ Remote.name src getViaTmp key $ \t -> Remote.retrieveKeyFile src key afile t p - handle _ False = stop -- failed - handle False True = next $ return True -- copy complete - handle True True = do -- finish moving + dispatch _ False = stop -- failed + dispatch False True = next $ return True -- copy complete + dispatch True True = do -- finish moving ok <- Remote.removeKey src key next $ Command.Drop.cleanupRemote key src ok diff --git a/Command/PreCommit.hs b/Command/PreCommit.hs index 412b9ae08e..09ff042aa6 100644 --- a/Command/PreCommit.hs +++ b/Command/PreCommit.hs @@ -19,7 +19,6 @@ import Annex.Hook import Annex.View import Annex.View.ViewedFile import Annex.Perms -import Annex.Exception import Logs.View import Logs.MetaData import Types.View diff --git a/Command/TestRemote.hs b/Command/TestRemote.hs index 463c4d3595..cb36b66bad 100644 --- a/Command/TestRemote.hs +++ b/Command/TestRemote.hs @@ -31,7 +31,6 @@ import Locations import Test.Tasty import Test.Tasty.Runners import Test.Tasty.HUnit -import Control.Exception import "crypto-api" Crypto.Random import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as L diff --git a/Command/Vicfg.hs b/Command/Vicfg.hs index 5ec6bbf723..1f16955360 100644 --- a/Command/Vicfg.hs +++ b/Command/Vicfg.hs @@ -217,7 +217,7 @@ parseCfg curcfg = go [] curcfg . lines | null l = Right cfg | "#" `isPrefixOf` l = Right cfg | null setting || null f = Left "missing field" - | otherwise = handle cfg f setting value' + | otherwise = parsed cfg f setting value' where (setting, rest) = separate isSpace l (r, value) = separate (== '=') rest @@ -225,7 +225,7 @@ parseCfg curcfg = go [] curcfg . lines f = reverse $ trimspace $ reverse $ trimspace r trimspace = dropWhile isSpace - handle cfg f setting value + parsed cfg f setting value | setting == "trust" = case readTrustLevel value of Nothing -> badval "trust value" value Just t -> diff --git a/Common.hs b/Common.hs index 0f3dc71d04..76e8d5133c 100644 --- a/Common.hs +++ b/Common.hs @@ -6,7 +6,6 @@ import Control.Monad as X import Control.Monad.IfElse as X import Control.Applicative as X import "mtl" Control.Monad.State.Strict as X (liftIO) -import Control.Exception.Extensible as X (IOException) import Data.Maybe as X import Data.List as X hiding (head, tail, init, last) diff --git a/Crypto.hs b/Crypto.hs index 10d6e5cef4..8d4d4f04fa 100644 --- a/Crypto.hs +++ b/Crypto.hs @@ -38,7 +38,6 @@ import Data.ByteString.Lazy.UTF8 (fromString) import Control.Applicative import qualified Data.Map as M import Control.Monad.IO.Class -import Control.Monad.Catch (MonadMask) import Common.Annex import qualified Utility.Gpg as Gpg diff --git a/Git/Config.hs b/Git/Config.hs index d998fd1e29..171c3e6c65 100644 --- a/Git/Config.hs +++ b/Git/Config.hs @@ -9,7 +9,6 @@ module Git.Config where import qualified Data.Map as M import Data.Char -import Control.Exception.Extensible import Common import Git diff --git a/Git/UpdateIndex.hs b/Git/UpdateIndex.hs index 7de2f1be34..ecd154aa04 100644 --- a/Git/UpdateIndex.hs +++ b/Git/UpdateIndex.hs @@ -29,8 +29,6 @@ import Git.Command import Git.FilePath import Git.Sha -import Control.Exception (bracket) - {- Streamers are passed a callback and should feed it lines in the form - read by update-index, and generated by ls-tree. -} type Streamer = (String -> IO ()) -> IO () diff --git a/Limit.hs b/Limit.hs index 9ac849bcec..89dd9d33e1 100644 --- a/Limit.hs +++ b/Limit.hs @@ -152,8 +152,8 @@ limitCopies want = case split ":" want of go num good = case readish num of Nothing -> Left "bad number for copies" Just n -> Right $ \notpresent -> checkKey $ - handle n good notpresent - handle n good notpresent key = do + go' n good notpresent + go' n good notpresent key = do us <- filter (`S.notMember` notpresent) <$> (filterM good =<< Remote.keyLocations key) return $ length us >= n @@ -170,10 +170,10 @@ addLackingCopies approx = addLimit . limitLackingCopies approx limitLackingCopies :: Bool -> MkLimit Annex limitLackingCopies approx want = case readish want of Just needed -> Right $ \notpresent mi -> flip checkKey mi $ - handle mi needed notpresent + go mi needed notpresent Nothing -> Left "bad value for number of lacking copies" where - handle mi needed notpresent key = do + go mi needed notpresent key = do NumCopies numcopies <- if approx then approxNumCopies else case mi of diff --git a/Logs/Transfer.hs b/Logs/Transfer.hs index c96d9cd1e7..b6279ccbab 100644 --- a/Logs/Transfer.hs +++ b/Logs/Transfer.hs @@ -11,7 +11,6 @@ module Logs.Transfer where import Common.Annex import Annex.Perms -import Annex.Exception import qualified Git import Types.Key import Utility.Metered @@ -94,7 +93,7 @@ percentComplete (Transfer { transferKey = key }) info = mkProgressUpdater :: Transfer -> TransferInfo -> Annex (MeterUpdate, FilePath, MVar Integer) mkProgressUpdater t info = do tfile <- fromRepo $ transferFile t - _ <- tryAnnex $ createAnnexDirectory $ takeDirectory tfile + _ <- tryNonAsync $ createAnnexDirectory $ takeDirectory tfile mvar <- liftIO $ newMVar 0 return (liftIO . updater tfile mvar, tfile, mvar) where diff --git a/Messages.hs b/Messages.hs index 9f473110ac..f27755f3a4 100644 --- a/Messages.hs +++ b/Messages.hs @@ -47,7 +47,7 @@ import System.Log.Handler (setFormatter, LogHandler) import System.Log.Handler.Simple import qualified Data.Set as S -import Common +import Common hiding (handle) import Types import Types.Messages import qualified Messages.JSON as JSON diff --git a/Remote.hs b/Remote.hs index 5ee75823f5..8a8eb64df0 100644 --- a/Remote.hs +++ b/Remote.hs @@ -56,7 +56,6 @@ import Data.Ord import Common.Annex import Types.Remote import qualified Annex -import Annex.Exception import Annex.UUID import Logs.UUID import Logs.Trust @@ -114,10 +113,10 @@ byNameWithUUID = checkuuid <=< byName byName' :: RemoteName -> Annex (Either String Remote) byName' "" = return $ Left "no remote specified" -byName' n = handle . filter matching <$> remoteList +byName' n = go . filter matching <$> remoteList where - handle [] = Left $ "there is no available git remote named \"" ++ n ++ "\"" - handle (match:_) = Right match + go [] = Left $ "there is no available git remote named \"" ++ n ++ "\"" + go (match:_) = Right match matching r = n == name r || toUUID n == uuid r {- Only matches remote name, not UUID -} @@ -315,8 +314,7 @@ isXMPPRemote remote = Git.repoIsUrl r && "xmpp::" `isPrefixOf` Git.repoLocation r = repo remote hasKey :: Remote -> Key -> Annex (Either String Bool) -hasKey r k = either (Left . show) Right - <$> tryNonAsyncAnnex (checkPresent r k) +hasKey r k = either (Left . show) Right <$> tryNonAsync (checkPresent r k) hasKeyCheap :: Remote -> Bool hasKeyCheap = checkPresentCheap diff --git a/Remote/Ddar.hs b/Remote/Ddar.hs index fba05312be..beeb4d7cc0 100644 --- a/Remote/Ddar.hs +++ b/Remote/Ddar.hs @@ -8,7 +8,6 @@ module Remote.Ddar (remote) where -import Control.Exception import qualified Data.Map as M import qualified Data.ByteString.Lazy as L import System.IO.Error diff --git a/Remote/External.hs b/Remote/External.hs index f326f26ba5..4fb760afdc 100644 --- a/Remote/External.hs +++ b/Remote/External.hs @@ -21,7 +21,6 @@ import Logs.PreferredContent.Raw import Logs.RemoteState import Config.Cost import Annex.UUID -import Annex.Exception import Creds import Control.Concurrent.STM @@ -137,7 +136,7 @@ checkKey external k = either error id <$> go _ -> Nothing safely :: Annex Bool -> Annex Bool -safely a = go =<< tryAnnex a +safely a = go =<< tryNonAsync a where go (Right r) = return r go (Left e) = do diff --git a/Remote/External/Types.hs b/Remote/External/Types.hs index 983764f701..3a69ae9ea6 100644 --- a/Remote/External/Types.hs +++ b/Remote/External/Types.hs @@ -32,7 +32,6 @@ module Remote.External.Types ( ) where import Common.Annex -import Annex.Exception import Types.Key (file2key, key2file) import Types.StandardGroups (PreferredContentExpression) import Utility.Metered (BytesProcessed(..)) diff --git a/Remote/GCrypt.hs b/Remote/GCrypt.hs index 55a7758112..8891977f73 100644 --- a/Remote/GCrypt.hs +++ b/Remote/GCrypt.hs @@ -15,7 +15,7 @@ module Remote.GCrypt ( import qualified Data.Map as M import qualified Data.ByteString.Lazy as L -import Control.Exception.Extensible +import Control.Exception import Common.Annex import Types.Remote diff --git a/Remote/Git.hs b/Remote/Git.hs index da5ca4c4a5..34c60d98fe 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -27,7 +27,6 @@ import qualified Annex import Logs.Presence import Annex.Transfer import Annex.UUID -import Annex.Exception import qualified Annex.Content import qualified Annex.BranchState import qualified Annex.Branch @@ -56,7 +55,6 @@ import Creds import Control.Concurrent import Control.Concurrent.MSampleVar import qualified Data.Map as M -import Control.Exception.Extensible remote :: RemoteType remote = RemoteType { @@ -281,7 +279,7 @@ tryGitConfigRead r s <- Annex.new r Annex.eval s $ do Annex.BranchState.disableUpdate - void $ tryAnnex $ ensureInitialized + void $ tryNonAsync $ ensureInitialized Annex.getState Annex.repo {- Checks if a given remote has the content for a key in its annex. -} diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs index 953c533b66..5e4ea111f6 100644 --- a/Remote/Helper/Chunked.hs +++ b/Remote/Helper/Chunked.hs @@ -24,7 +24,6 @@ import Logs.Chunk import Utility.Metered import Crypto (EncKey) import Backend (isStableKey) -import Annex.Exception import qualified Data.ByteString.Lazy as L import qualified Data.Map as M @@ -172,7 +171,7 @@ seekResume h chunkkeys checker = do liftIO $ hSeek h AbsoluteSeek sz return (cks, toBytesProcessed sz) | otherwise = do - v <- tryNonAsyncAnnex (checker k) + v <- tryNonAsync (checker k) case v of Right True -> check pos' cks' sz @@ -231,7 +230,7 @@ retrieveChunks retriever u chunkconfig encryptor basek dest basep sink -- Optimisation: Try the unchunked key first, to avoid -- looking in the git-annex branch for chunk counts -- that are likely not there. - getunchunked `catchNonAsyncAnnex` + getunchunked `catchNonAsync` const (go =<< chunkKeysOnly u basek) | otherwise = go =<< chunkKeys u chunkconfig basek where @@ -241,7 +240,7 @@ retrieveChunks retriever u chunkconfig encryptor basek dest basep sink let ls' = maybe ls (setupResume ls) currsize if any null ls' then return True -- dest is already complete - else firstavail currsize ls' `catchNonAsyncAnnex` giveup + else firstavail currsize ls' `catchNonAsync` giveup giveup e = do warning (show e) @@ -251,20 +250,20 @@ retrieveChunks retriever u chunkconfig encryptor basek dest basep sink firstavail currsize ([]:ls) = firstavail currsize ls firstavail currsize ((k:ks):ls) | k == basek = getunchunked - `catchNonAsyncAnnex` (const $ firstavail currsize ls) + `catchNonAsync` (const $ firstavail currsize ls) | otherwise = do let offset = resumeOffset currsize k let p = maybe basep (offsetMeterUpdate basep . toBytesProcessed) offset - v <- tryNonAsyncAnnex $ + v <- tryNonAsync $ retriever (encryptor k) p $ \content -> bracketIO (maybe opennew openresume offset) hClose $ \h -> do void $ tosink (Just h) p content let sz = toBytesProcessed $ fromMaybe 0 $ keyChunkSize k getrest p h sz sz ks - `catchNonAsyncAnnex` giveup + `catchNonAsync` giveup case v of Left e | null ls -> giveup e @@ -372,7 +371,7 @@ checkPresentChunks checker u chunkconfig encryptor basek Right False -> return $ Right False Left e -> return $ Left $ show e - check = tryNonAsyncAnnex . checker . encryptor + check = tryNonAsync . checker . encryptor {- A key can be stored in a remote unchunked, or as a list of chunked keys. - This can be the case whether or not the remote is currently configured diff --git a/Remote/Helper/Special.hs b/Remote/Helper/Special.hs index fc0e11d2f3..ba9ff4fb40 100644 --- a/Remote/Helper/Special.hs +++ b/Remote/Helper/Special.hs @@ -42,13 +42,11 @@ import Remote.Helper.Chunked as X import Remote.Helper.Encryptable as X import Remote.Helper.Messages import Annex.Content -import Annex.Exception import qualified Git import qualified Git.Command import qualified Git.Construct import qualified Data.ByteString.Lazy as L -import Control.Exception (bracket) import qualified Data.Map as M {- Special remotes don't have a configured url, so Git.Repo does not @@ -174,7 +172,7 @@ specialRemote' cfg c preparestorer prepareretriever prepareremover preparecheckp cip = cipherKey c gpgopts = getGpgEncParams encr - safely a = catchNonAsyncAnnex a (\e -> warning (show e) >> return False) + safely a = catchNonAsync a (\e -> warning (show e) >> return False) -- chunk, then encrypt, then feed to the storer storeKeyGen k p enc = safely $ preparestorer k $ safely . go diff --git a/Remote/WebDAV.hs b/Remote/WebDAV.hs index b70001ddba..4caebaf214 100644 --- a/Remote/WebDAV.hs +++ b/Remote/WebDAV.hs @@ -14,10 +14,10 @@ import qualified Data.Map as M import qualified Data.ByteString.Lazy as L import qualified Data.ByteString.UTF8 as B8 import qualified Data.ByteString.Lazy.UTF8 as L8 -import qualified Control.Exception.Lifted as EL import Network.HTTP.Client (HttpException(..)) import Network.HTTP.Types import System.IO.Error +import Control.Monad.Catch import Common.Annex import Types.Remote @@ -31,7 +31,6 @@ import Creds import Utility.Metered import Utility.Url (URLString) import Annex.UUID -import Annex.Exception import Remote.WebDAV.DavLocation remote :: RemoteType @@ -301,11 +300,11 @@ moveDAV baseurl src dest = inLocation src $ moveContentM newurl newurl = B8.fromString (locationUrl baseurl dest) existsDAV :: DavLocation -> DAVT IO (Either String Bool) -existsDAV l = inLocation l check `EL.catch` (\(e :: EL.SomeException) -> return (Left $ show e)) +existsDAV l = inLocation l check `catchNonAsync` (\e -> return (Left $ show e)) where check = do setDepth Nothing - EL.catchJust + catchJust (matchStatusCodeException notFound404) (getPropsM >> ispresent True) (const $ ispresent False) @@ -319,8 +318,7 @@ matchStatusCodeException _ _ = Nothing -- Ignores any exceptions when performing a DAV action. safely :: DAVT IO a -> DAVT IO (Maybe a) -safely a = (Just <$> a) - `EL.catch` (\(_ :: EL.SomeException) -> return Nothing) +safely = eitherToMaybe <$$> tryNonAsync choke :: IO (Either String a) -> IO a choke f = do @@ -336,7 +334,7 @@ withDAVHandle r a = do mcreds <- getCreds (config r) (uuid r) case (mcreds, configUrl r) of (Just (user, pass), Just baseurl) -> - bracketIO (mkDAVContext baseurl) closeDAVContext $ \ctx -> + withDAVContext baseurl $ \ctx -> a (Just (DavHandle ctx (toDavUser user) (toDavPass pass) baseurl)) _ -> a Nothing diff --git a/RemoteDaemon/Transport/Ssh.hs b/RemoteDaemon/Transport/Ssh.hs index 65c3138525..db6b6127cc 100644 --- a/RemoteDaemon/Transport/Ssh.hs +++ b/RemoteDaemon/Transport/Ssh.hs @@ -108,10 +108,10 @@ data Status = Stopping | ConnectionClosed {- Make connection robustly, with exponentioal backoff on failure. -} robustly :: Int -> IO Status -> IO () -robustly backoff a = handle =<< catchDefaultIO ConnectionClosed a +robustly backoff a = caught =<< catchDefaultIO ConnectionClosed a where - handle Stopping = return () - handle ConnectionClosed = do + caught Stopping = return () + caught ConnectionClosed = do threadDelaySeconds (Seconds backoff) robustly increasedbackoff a diff --git a/Test.hs b/Test.hs index 5032038ad9..9a34835cc9 100644 --- a/Test.hs +++ b/Test.hs @@ -20,7 +20,6 @@ import Options.Applicative hiding (command) #if MIN_VERSION_optparse_applicative(0,8,0) import qualified Options.Applicative.Types as Opt #endif -import Control.Exception.Extensible import qualified Data.Map as M import qualified Text.JSON @@ -1444,7 +1443,7 @@ indir testenv dir a = do (try a::IO (Either SomeException ())) case r of Right () -> return () - Left e -> throw e + Left e -> throwM e setuprepo :: TestEnv -> FilePath -> IO FilePath setuprepo testenv dir = do diff --git a/Utility/Directory.hs b/Utility/Directory.hs index ade5ef8111..a4429d5b96 100644 --- a/Utility/Directory.hs +++ b/Utility/Directory.hs @@ -11,7 +11,6 @@ module Utility.Directory where import System.IO.Error import System.Directory -import Control.Exception (throw, bracket) import Control.Monad import Control.Monad.IfElse import System.FilePath @@ -114,7 +113,7 @@ moveFile src dest = tryIO (rename src dest) >>= onrename whenM (isdir dest) rethrow viaTmp mv dest undefined where - rethrow = throw e + rethrow = throwM e mv tmp _ = do ok <- boolSystem "mv" [Param "-f", Param src, Param tmp] unless ok $ do diff --git a/Utility/Exception.hs b/Utility/Exception.hs index 13c9d508a4..802e9e24b2 100644 --- a/Utility/Exception.hs +++ b/Utility/Exception.hs @@ -7,11 +7,25 @@ {-# LANGUAGE ScopedTypeVariables #-} -module Utility.Exception where +module Utility.Exception ( + module X, + catchBoolIO, + catchMaybeIO, + catchDefaultIO, + catchMsgIO, + catchIO, + tryIO, + bracketIO, + catchNonAsync, + tryNonAsync, + tryWhenExists, +) where +import Control.Monad.Catch as X hiding (Handler) +import qualified Control.Monad.Catch as M import Control.Exception (IOException, AsyncException) -import Control.Monad.Catch import Control.Monad +import Control.Monad.IO.Class (liftIO, MonadIO) import System.IO.Error (isDoesNotExistError) import Utility.Data @@ -44,14 +58,20 @@ catchIO = catch tryIO :: MonadCatch m => m a -> m (Either IOException a) tryIO = try +{- bracket with setup and cleanup actions lifted to IO. + - + - Note that unlike catchIO and tryIO, this catches all exceptions. -} +bracketIO :: (MonadMask m, MonadIO m) => IO v -> (v -> IO b) -> (v -> m a) -> m a +bracketIO setup cleanup = bracket (liftIO setup) (liftIO . cleanup) + {- Catches all exceptions except for async exceptions. - This is often better to use than catching them all, so that - ThreadKilled and UserInterrupt get through. -} catchNonAsync :: MonadCatch m => m a -> (SomeException -> m a) -> m a catchNonAsync a onerr = a `catches` - [ Handler (\ (e :: AsyncException) -> throwM e) - , Handler (\ (e :: SomeException) -> onerr e) + [ M.Handler (\ (e :: AsyncException) -> throwM e) + , M.Handler (\ (e :: SomeException) -> onerr e) ] tryNonAsync :: MonadCatch m => m a -> m (Either SomeException a) diff --git a/Utility/FileMode.hs b/Utility/FileMode.hs index c2ef683a81..832250bde7 100644 --- a/Utility/FileMode.hs +++ b/Utility/FileMode.hs @@ -11,7 +11,6 @@ module Utility.FileMode where import System.IO import Control.Monad -import Control.Exception (bracket) import System.PosixCompat.Types import Utility.PosixFiles #ifndef mingw32_HOST_OS diff --git a/Utility/Gpg.hs b/Utility/Gpg.hs index 410259b11b..dfca827786 100644 --- a/Utility/Gpg.hs +++ b/Utility/Gpg.hs @@ -13,7 +13,6 @@ import Control.Applicative import Control.Concurrent import Control.Monad.IO.Class import qualified Data.Map as M -import Control.Monad.Catch (bracket, MonadMask) import Common import qualified Build.SysConfig as SysConfig diff --git a/Utility/Matcher.hs b/Utility/Matcher.hs index 1ee224ffc6..76f8903f5c 100644 --- a/Utility/Matcher.hs +++ b/Utility/Matcher.hs @@ -102,13 +102,13 @@ findClose l = in (Group (reverse g), rest) where go c [] = (c, []) -- not picky about extra Close - go c (t:ts) = handle t + go c (t:ts) = dispatch t where - handle Close = (c, ts) - handle Open = + dispatch Close = (c, ts) + dispatch Open = let (c', ts') = go [] ts in go (Group (reverse c') : c) ts' - handle _ = go (One t:c) ts + dispatch _ = go (One t:c) ts {- Checks if a Matcher matches, using a supplied function to check - the value of Operations. -} diff --git a/Utility/Parallel.hs b/Utility/Parallel.hs index 239c81e7b5..7966811aba 100644 --- a/Utility/Parallel.hs +++ b/Utility/Parallel.hs @@ -10,7 +10,6 @@ module Utility.Parallel where import Common import Control.Concurrent -import Control.Exception {- Runs an action in parallel with a set of values, in a set of threads. - In order for the actions to truely run in parallel, requires GHC's diff --git a/Utility/Tmp.hs b/Utility/Tmp.hs index 7da5cc2847..edd82f5ac2 100644 --- a/Utility/Tmp.hs +++ b/Utility/Tmp.hs @@ -14,7 +14,6 @@ import System.Directory import Control.Monad.IfElse import System.FilePath import Control.Monad.IO.Class -import Control.Monad.Catch (bracket, MonadMask) import Utility.Exception import Utility.FileSystemEncoding @@ -33,11 +32,11 @@ viaTmp a file content = bracket setup cleanup use setup = do createDirectoryIfMissing True dir openTempFile dir template - cleanup (tmpfile, handle) = do - _ <- tryIO $ hClose handle + cleanup (tmpfile, h) = do + _ <- tryIO $ hClose h tryIO $ removeFile tmpfile - use (tmpfile, handle) = do - hClose handle + use (tmpfile, h) = do + hClose h a tmpfile content rename tmpfile file @@ -54,10 +53,10 @@ withTmpFileIn :: (MonadIO m, MonadMask m) => FilePath -> Template -> (FilePath - withTmpFileIn tmpdir template a = bracket create remove use where create = liftIO $ openTempFile tmpdir template - remove (name, handle) = liftIO $ do - hClose handle + remove (name, h) = liftIO $ do + hClose h catchBoolIO (removeFile name >> return True) - use (name, handle) = a name handle + use (name, h) = a name h {- Runs an action with a tmp directory located within the system's tmp - directory (or within "." if there is none), then removes the tmp diff --git a/Utility/Url.hs b/Utility/Url.hs index bf2d3859c4..4137a5d8bb 100644 --- a/Utility/Url.hs +++ b/Utility/Url.hs @@ -51,11 +51,11 @@ checkBoth url expected_size uo = do v <- check url expected_size uo return (fst v && snd v) check :: URLString -> Maybe Integer -> UrlOptions -> IO (Bool, Bool) -check url expected_size = handle <$$> exists url +check url expected_size = go <$$> exists url where - handle (False, _) = (False, False) - handle (True, Nothing) = (True, True) - handle (True, s) = case expected_size of + go (False, _) = (False, False) + go (True, Nothing) = (True, True) + go (True, s) = case expected_size of Just _ -> (True, expected_size == s) Nothing -> (True, True) diff --git a/Utility/WebApp.hs b/Utility/WebApp.hs index 0f3378a15a..6bcfce9196 100644 --- a/Utility/WebApp.hs +++ b/Utility/WebApp.hs @@ -38,10 +38,6 @@ import Data.Byteable #ifdef __ANDROID__ import Data.Endian #endif -#if defined(__ANDROID__) || defined (mingw32_HOST_OS) -#else -import Control.Exception (bracketOnError) -#endif localhost :: HostName localhost = "localhost" diff --git a/debian/control b/debian/control index 66d340e3cf..821629297e 100644 --- a/debian/control +++ b/debian/control @@ -26,7 +26,6 @@ Build-Depends: libghc-ifelse-dev, libghc-bloomfilter-dev, libghc-edit-distance-dev, - libghc-extensible-exceptions-dev, libghc-hinotify-dev [linux-any], libghc-stm-dev (>= 2.3), libghc-dbus-dev (>= 0.10.3) [linux-any], diff --git a/git-annex.cabal b/git-annex.cabal index 8f36bfe486..8dd42ee2f6 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -96,8 +96,7 @@ Executable git-annex Main-Is: git-annex.hs Build-Depends: MissingH, hslogger, directory, filepath, containers, utf8-string, network (>= 2.0), mtl (>= 2), - bytestring, old-locale, time, HTTP, - extensible-exceptions, dataenc, SHA, process, json, + bytestring, old-locale, time, HTTP, dataenc, SHA, process, json, base (>= 4.5 && < 4.9), monad-control, exceptions (>= 0.5), IfElse, text, QuickCheck >= 2.1, bloomfilter, edit-distance, process, SafeSemaphore, uuid, random, dlist, unix-compat, async, stm (>= 2.3), @@ -143,7 +142,7 @@ Executable git-annex if flag(WebDAV) Build-Depends: DAV (>= 0.8), - http-client, http-conduit, http-types, lifted-base, transformers + http-client, http-conduit, http-types CPP-Options: -DWITH_WEBDAV if flag(Assistant) && ! os(solaris) From d3d30d2bf3838fadaeec28ab203f82fb3dc7929a Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 7 Aug 2014 22:11:28 -0400 Subject: [PATCH 47/56] need transformers for Utility.Exception --- debian/control | 1 + git-annex.cabal | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/debian/control b/debian/control index 821629297e..46adc02cf0 100644 --- a/debian/control +++ b/debian/control @@ -18,6 +18,7 @@ Build-Depends: libghc-quickcheck2-dev, libghc-monad-control-dev (>= 0.3), libghc-exceptions-dev, + libghc-transformers-dev, libghc-unix-compat-dev, libghc-dlist-dev, libghc-uuid-dev, diff --git a/git-annex.cabal b/git-annex.cabal index 8dd42ee2f6..dc5503819d 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -97,7 +97,7 @@ Executable git-annex Build-Depends: MissingH, hslogger, directory, filepath, containers, utf8-string, network (>= 2.0), mtl (>= 2), bytestring, old-locale, time, HTTP, dataenc, SHA, process, json, - base (>= 4.5 && < 4.9), monad-control, exceptions (>= 0.5), + base (>= 4.5 && < 4.9), monad-control, exceptions (>= 0.5), transformers, IfElse, text, QuickCheck >= 2.1, bloomfilter, edit-distance, process, SafeSemaphore, uuid, random, dlist, unix-compat, async, stm (>= 2.3), data-default, case-insensitive @@ -188,7 +188,7 @@ Executable git-annex if flag(Webapp) Build-Depends: yesod, yesod-default, yesod-static, yesod-form, yesod-core, - http-types, transformers, wai, wai-extra, warp, warp-tls, + http-types, wai, wai-extra, warp, warp-tls, blaze-builder, crypto-api, hamlet, clientsession, template-haskell, data-default, aeson, path-pieces, shakespeare From 871b6cb88692b4f39c03eea7ccfee841cf5fb6d7 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 8 Aug 2014 12:50:51 -0400 Subject: [PATCH 48/56] DAV version turns out to be 1.0. --- debian/changelog | 2 +- git-annex.cabal | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/debian/changelog b/debian/changelog index 1b981c08d3..26c30451fc 100644 --- a/debian/changelog +++ b/debian/changelog @@ -19,7 +19,7 @@ git-annex (5.20140718) UNRELEASED; urgency=medium * WebDAV: Sped up by avoiding making multiple http connections when storing a file. * WebDAV: Avoid buffering whole file in memory when uploading. - * WebDAV: Dropped support for DAV before 0.8. + * WebDAV: Dropped support for DAV before 1.0. * testremote: New command to test uploads/downloads to a remote. * Dropping an object from a bup special remote now deletes the git branch for the object, although of course the object's content cannot be deleted diff --git a/git-annex.cabal b/git-annex.cabal index dc5503819d..be1cdbb968 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -141,7 +141,7 @@ Executable git-annex CPP-Options: -DWITH_S3 if flag(WebDAV) - Build-Depends: DAV (>= 0.8), + Build-Depends: DAV (>= 1.0), http-client, http-conduit, http-types CPP-Options: -DWITH_WEBDAV From fc17cf852ed31154c26801a365da290dd6417e27 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 8 Aug 2014 13:17:24 -0400 Subject: [PATCH 49/56] further break out legacy chunking code --- Remote/WebDAV.hs | 161 ++++++++++++++++++++++++++--------------------- 1 file changed, 89 insertions(+), 72 deletions(-) diff --git a/Remote/WebDAV.hs b/Remote/WebDAV.hs index 4caebaf214..e7c08c8006 100644 --- a/Remote/WebDAV.hs +++ b/Remote/WebDAV.hs @@ -104,21 +104,6 @@ store _ (Just dav) = httpStorer $ \k reqbody -> liftIO $ goDAV dav $ do finalizeStore (baseURL dav) tmp dest return True -storeLegacyChunked :: ChunkSize -> Key -> DavHandle -> L.ByteString -> IO Bool -storeLegacyChunked chunksize k dav b = - Legacy.storeChunks k tmp dest storer recorder finalizer - where - storehttp l b' = void $ goDAV dav $ do - maybe noop (void . mkColRecursive) (locationParent l) - inLocation l $ putContentM (contentType, b') - storer locs = Legacy.storeChunked chunksize locs storehttp b - recorder l s = storehttp l (L8.fromString s) - finalizer tmp' dest' = goDAV dav $ - finalizeStore (baseURL dav) tmp' (fromJust $ locationParent dest') - - tmp = keyTmpLocation k - dest = keyLocation k ++ keyFile k - finalizeStore :: URLString -> DavLocation -> DavLocation -> DAVT IO () finalizeStore baseurl tmp dest = do inLocation dest $ void $ safely $ delContentM @@ -130,17 +115,18 @@ retrieveCheap _ _ = return False retrieve :: ChunkConfig -> Maybe DavHandle -> Retriever retrieve _ Nothing = error "unable to connect" -retrieve chunkconfig (Just dav) = fileRetriever $ \d k p -> liftIO $ - withStoredFiles chunkconfig k dav onerr $ \locs -> do - Legacy.meteredWriteFileChunks p d locs $ \l -> do - mb <- goDAV dav $ safely $ - inLocation l $ - snd <$> getContentM - case mb of - Nothing -> onerr - Just b -> return b - where - onerr = error "download failed" +retrieve (LegacyChunks _) (Just dav) = retrieveLegacyChunked dav +retrieve _ (Just dav) = fileRetriever $ \d k p -> liftIO $ + meteredWriteFile p d =<< getDAV dav (keyLocation k ++ keyFile k) + +getDAV :: DavHandle -> DavLocation -> IO L.ByteString +getDAV dav l = do + mb <- goDAV dav $ safely $ + inLocation l $ + snd <$> getContentM + case mb of + Nothing -> error "download failed" + Just b -> return b remove :: Maybe DavHandle -> Remover remove Nothing _ = return False @@ -153,52 +139,14 @@ remove (Just dav) k = liftIO $ do checkKey :: Remote -> ChunkConfig -> Maybe DavHandle -> CheckPresent checkKey r _ Nothing _ = error $ name r ++ " not configured" -checkKey r chunkconfig (Just dav) k = either error id <$> go - where - go = do - showAction $ "checking " ++ name r - liftIO $ withStoredFiles chunkconfig k dav onerr check - - check [] = return $ Right True - check (l:ls) = do - v <- goDAV dav $ existsDAV l - if v == Right True - then check ls - else return v - - {- Failed to read the chunkcount file; see if it's missing, - - or if there's a problem accessing it, - - or perhaps this was an intermittent error. -} - onerr f = do - v <- goDAV dav $ existsDAV f - return $ if v == Right True - then Left $ "failed to read " ++ f - else v - -withStoredFiles - :: ChunkConfig - -> Key - -> DavHandle - -> (DavLocation -> IO a) - -> ([DavLocation] -> IO a) - -> IO a -withStoredFiles chunkconfig k dav onerr a = case chunkconfig of - LegacyChunks _ -> do - let chunkcount = keyloc ++ Legacy.chunkCount - v <- goDAV dav $ safely $ - inLocation chunkcount $ - snd <$> getContentM - case v of - Just s -> a $ Legacy.listChunks keyloc $ L8.toString s - Nothing -> do - chunks <- Legacy.probeChunks keyloc $ \f -> - (== Right True) <$> goDAV dav (existsDAV f) - if null chunks - then onerr chunkcount - else a chunks - _ -> a [keyloc] - where - keyloc = keyLocation k ++ keyFile k +checkKey r chunkconfig (Just dav) k = do + showAction $ "checking " ++ name r + case chunkconfig of + LegacyChunks _ -> checkKeyLegacyChunked dav k + _ -> do + v <- liftIO $ goDAV dav $ + existsDAV (keyLocation k ++ keyFile k) + either error return v configUrl :: Remote -> Maybe URLString configUrl r = fixup <$> M.lookup "url" (config r) @@ -349,3 +297,72 @@ prepDAV :: DavUser -> DavPass -> DAVT IO () prepDAV user pass = do setResponseTimeout Nothing -- disable default (5 second!) timeout setCreds user pass + +-- +-- Legacy chunking code, to be removed eventually. +-- + +storeLegacyChunked :: ChunkSize -> Key -> DavHandle -> L.ByteString -> IO Bool +storeLegacyChunked chunksize k dav b = + Legacy.storeChunks k tmp dest storer recorder finalizer + where + storehttp l b' = void $ goDAV dav $ do + maybe noop (void . mkColRecursive) (locationParent l) + inLocation l $ putContentM (contentType, b') + storer locs = Legacy.storeChunked chunksize locs storehttp b + recorder l s = storehttp l (L8.fromString s) + finalizer tmp' dest' = goDAV dav $ + finalizeStore (baseURL dav) tmp' (fromJust $ locationParent dest') + + tmp = keyTmpLocation k + dest = keyLocation k ++ keyFile k + +retrieveLegacyChunked :: DavHandle -> Retriever +retrieveLegacyChunked dav = fileRetriever $ \d k p -> liftIO $ + withStoredFilesLegacyChunked k dav onerr $ \locs -> + Legacy.meteredWriteFileChunks p d locs $ + getDAV dav + where + onerr = error "download failed" + +checkKeyLegacyChunked :: DavHandle -> CheckPresent +checkKeyLegacyChunked dav k = liftIO $ + either error id <$> withStoredFilesLegacyChunked k dav onerr check + where + check [] = return $ Right True + check (l:ls) = do + v <- goDAV dav $ existsDAV l + if v == Right True + then check ls + else return v + + {- Failed to read the chunkcount file; see if it's missing, + - or if there's a problem accessing it, + - or perhaps this was an intermittent error. -} + onerr f = do + v <- goDAV dav $ existsDAV f + return $ if v == Right True + then Left $ "failed to read " ++ f + else v + +withStoredFilesLegacyChunked + :: Key + -> DavHandle + -> (DavLocation -> IO a) + -> ([DavLocation] -> IO a) + -> IO a +withStoredFilesLegacyChunked k dav onerr a = do + let chunkcount = keyloc ++ Legacy.chunkCount + v <- goDAV dav $ safely $ + inLocation chunkcount $ + snd <$> getContentM + case v of + Just s -> a $ Legacy.listChunks keyloc $ L8.toString s + Nothing -> do + chunks <- Legacy.probeChunks keyloc $ \f -> + (== Right True) <$> goDAV dav (existsDAV f) + if null chunks + then onerr chunkcount + else a chunks + where + keyloc = keyLocation k ++ keyFile k From c3f85124757a10791adfe2eca4a201fa2649b7a6 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 8 Aug 2014 13:40:55 -0400 Subject: [PATCH 50/56] WebDAV: Avoid buffering whole file in memory when downloading. httpBodyRetriever will later also be used by S3 This commit was sponsored by Ethan Aubin. --- Remote/Helper/Http.hs | 20 +++++++++++++++++--- Remote/WebDAV.hs | 30 +++++++++++++----------------- Remote/WebDAV/DavLocation.hs | 7 +++++-- debian/changelog | 3 ++- 4 files changed, 37 insertions(+), 23 deletions(-) diff --git a/Remote/Helper/Http.hs b/Remote/Helper/Http.hs index 945e5cd991..d4882b8c8f 100644 --- a/Remote/Helper/Http.hs +++ b/Remote/Helper/Http.hs @@ -11,7 +11,7 @@ import Common.Annex import Types.StoreRetrieve import Utility.Metered import Remote.Helper.Special -import Network.HTTP.Client (RequestBody(..)) +import Network.HTTP.Client (RequestBody(..), Response, responseBody, BodyReader) import qualified Data.ByteString.Lazy as L import qualified Data.ByteString as S @@ -35,5 +35,19 @@ httpStorer a = fileStorer $ \k f m -> do pop [] = ([], S.empty) pop (c:cs) = (cs, c) ---httpRetriever :: (Key -> Annex Response) -> Retriever ---httpRetriever a = byteRetriever $ \k sink +-- Reads the http body and stores it to the specified file, updating the +-- meter as it goes. +httpBodyRetriever :: FilePath -> MeterUpdate -> Response BodyReader -> IO () +httpBodyRetriever dest meterupdate resp = + bracket (openBinaryFile dest WriteMode) hClose (go zeroBytesProcessed) + where + reader = responseBody resp + go sofar h = do + b <- reader + if S.null b + then return () + else do + let sofar' = addBytesProcessed sofar $ S.length b + S.hPut h b + meterupdate sofar' + go sofar' h diff --git a/Remote/WebDAV.hs b/Remote/WebDAV.hs index e7c08c8006..2c621b6333 100644 --- a/Remote/WebDAV.hs +++ b/Remote/WebDAV.hs @@ -97,7 +97,7 @@ store (LegacyChunks chunksize) (Just dav) = fileStorer $ \k f p -> liftIO $ withMeteredFile f p $ storeLegacyChunked chunksize k dav store _ (Just dav) = httpStorer $ \k reqbody -> liftIO $ goDAV dav $ do let tmp = keyTmpLocation k - let dest = keyLocation k ++ keyFile k + let dest = keyLocation k void $ mkColRecursive tmpDir inLocation tmp $ putContentM' (contentType, reqbody) @@ -117,16 +117,10 @@ retrieve :: ChunkConfig -> Maybe DavHandle -> Retriever retrieve _ Nothing = error "unable to connect" retrieve (LegacyChunks _) (Just dav) = retrieveLegacyChunked dav retrieve _ (Just dav) = fileRetriever $ \d k p -> liftIO $ - meteredWriteFile p d =<< getDAV dav (keyLocation k ++ keyFile k) - -getDAV :: DavHandle -> DavLocation -> IO L.ByteString -getDAV dav l = do - mb <- goDAV dav $ safely $ - inLocation l $ - snd <$> getContentM - case mb of - Nothing -> error "download failed" - Just b -> return b + goDAV dav $ + inLocation (keyLocation k) $ + withContentM $ + httpBodyRetriever d p remove :: Maybe DavHandle -> Remover remove Nothing _ = return False @@ -134,7 +128,7 @@ remove (Just dav) k = liftIO $ do -- Delete the key's whole directory, including any -- legacy chunked files, etc, in a single action. ret <- goDAV dav $ safely $ - inLocation (keyLocation k) delContentM + inLocation (keyDir k) delContentM return (isJust ret) checkKey :: Remote -> ChunkConfig -> Maybe DavHandle -> CheckPresent @@ -145,7 +139,7 @@ checkKey r chunkconfig (Just dav) k = do LegacyChunks _ -> checkKeyLegacyChunked dav k _ -> do v <- liftIO $ goDAV dav $ - existsDAV (keyLocation k ++ keyFile k) + existsDAV (keyLocation k) either error return v configUrl :: Remote -> Maybe URLString @@ -315,13 +309,15 @@ storeLegacyChunked chunksize k dav b = finalizeStore (baseURL dav) tmp' (fromJust $ locationParent dest') tmp = keyTmpLocation k - dest = keyLocation k ++ keyFile k + dest = keyLocation k retrieveLegacyChunked :: DavHandle -> Retriever retrieveLegacyChunked dav = fileRetriever $ \d k p -> liftIO $ withStoredFilesLegacyChunked k dav onerr $ \locs -> - Legacy.meteredWriteFileChunks p d locs $ - getDAV dav + Legacy.meteredWriteFileChunks p d locs $ \l -> + goDAV dav $ + inLocation l $ + snd <$> getContentM where onerr = error "download failed" @@ -365,4 +361,4 @@ withStoredFilesLegacyChunked k dav onerr a = do then onerr chunkcount else a chunks where - keyloc = keyLocation k ++ keyFile k + keyloc = keyLocation k diff --git a/Remote/WebDAV/DavLocation.hs b/Remote/WebDAV/DavLocation.hs index 3b52f3a64c..33c3aa0790 100644 --- a/Remote/WebDAV/DavLocation.hs +++ b/Remote/WebDAV/DavLocation.hs @@ -29,8 +29,8 @@ inLocation :: (MonadIO m) => DavLocation -> DAVT m a -> DAVT m a inLocation d = inDAVLocation ( d) {- The directory where files(s) for a key are stored. -} -keyLocation :: Key -> DavLocation -keyLocation k = addTrailingPathSeparator $ hashdir keyFile k +keyDir :: Key -> DavLocation +keyDir k = addTrailingPathSeparator $ hashdir keyFile k where #ifndef mingw32_HOST_OS hashdir = hashDirLower k @@ -38,6 +38,9 @@ keyLocation k = addTrailingPathSeparator $ hashdir keyFile k hashdir = replace "\\" "/" (hashDirLower k) #endif +keyLocation :: Key -> DavLocation +keyLocation k = keyDir k ++ keyFile k + {- Where we store temporary data for a key as it's being uploaded. -} keyTmpLocation :: Key -> DavLocation keyTmpLocation = addTrailingPathSeparator . tmpLocation . keyFile diff --git a/debian/changelog b/debian/changelog index 26c30451fc..3a8ab302e1 100644 --- a/debian/changelog +++ b/debian/changelog @@ -18,7 +18,8 @@ git-annex (5.20140718) UNRELEASED; urgency=medium * Display exception message when a transfer fails due to an exception. * WebDAV: Sped up by avoiding making multiple http connections when storing a file. - * WebDAV: Avoid buffering whole file in memory when uploading. + * WebDAV: Avoid buffering whole file in memory when uploading and + downloading. * WebDAV: Dropped support for DAV before 1.0. * testremote: New command to test uploads/downloads to a remote. * Dropping an object from a bup special remote now deletes the git branch From 2d344edf60271e233a42a1444a8eaf7f79266bed Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 8 Aug 2014 13:47:44 -0400 Subject: [PATCH 51/56] increave dav build-dep --- debian/control | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debian/control b/debian/control index 46adc02cf0..522b7c5cce 100644 --- a/debian/control +++ b/debian/control @@ -14,7 +14,7 @@ Build-Depends: libghc-dataenc-dev, libghc-utf8-string-dev, libghc-hs3-dev (>= 0.5.6), - libghc-dav-dev (>= 0.8) [amd64 i386 kfreebsd-amd64 kfreebsd-i386 powerpc], + libghc-dav-dev (>= 1.0) [amd64 i386 kfreebsd-amd64 kfreebsd-i386 powerpc], libghc-quickcheck2-dev, libghc-monad-control-dev (>= 0.3), libghc-exceptions-dev, From ec11e0b89ad96f2eda0d213e46a342a1c7e370cd Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 8 Aug 2014 13:55:09 -0400 Subject: [PATCH 52/56] fix build warning --- Assistant/Threads/NetWatcher.hs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Assistant/Threads/NetWatcher.hs b/Assistant/Threads/NetWatcher.hs index f8c456aacf..09cddd3245 100644 --- a/Assistant/Threads/NetWatcher.hs +++ b/Assistant/Threads/NetWatcher.hs @@ -117,7 +117,7 @@ listenNMConnections client setconnected = #else listen client matcher #endif - $ \event -> mapM_ handle + $ \event -> mapM_ handleevent (map dictionaryItems $ mapMaybe fromVariant $ signalBody event) where matcher = matchAny @@ -128,7 +128,7 @@ listenNMConnections client setconnected = nm_activatingconnection_key = toVariant ("ActivatingConnection" :: String) noconnections = Just $ toVariant $ toVariant ([] :: [ObjectPath]) rootconnection = Just $ toVariant $ toVariant $ objectPath_ "/" - handle m + handleevent m | lookup nm_active_connections_key m == noconnections = setconnected False | lookup nm_activatingconnection_key m == rootconnection = @@ -150,7 +150,7 @@ listenWicdConnections client setconnected = do match connmatcher $ \event -> when (any (== wicd_success) (signalBody event)) $ setconnected True - match statusmatcher $ \event -> handle (signalBody event) + match statusmatcher $ \event -> handleevent (signalBody event) where connmatcher = matchAny { matchInterface = Just "org.wicd.daemon" @@ -162,7 +162,7 @@ listenWicdConnections client setconnected = do } wicd_success = toVariant ("success" :: String) wicd_disconnected = toVariant [toVariant ("" :: String)] - handle status + handleevent status | any (== wicd_disconnected) status = setconnected False | otherwise = noop match matcher a = From 6cb9e5c32f46d0d7bcfdc96c7d05a93c08e9d20a Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 8 Aug 2014 14:19:08 -0400 Subject: [PATCH 53/56] show missing url= parameter error sooner --- Remote/WebDAV.hs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Remote/WebDAV.hs b/Remote/WebDAV.hs index 2c621b6333..af3af79020 100644 --- a/Remote/WebDAV.hs +++ b/Remote/WebDAV.hs @@ -77,8 +77,9 @@ gen r u c gc = new <$> remoteCost gc expensiveRemoteCost webdavSetup :: Maybe UUID -> Maybe CredPair -> RemoteConfig -> Annex (RemoteConfig, UUID) webdavSetup mu mcreds c = do u <- maybe (liftIO genUUID) return mu - let url = fromMaybe (error "Specify url=") $ - M.lookup "url" c + url <- case M.lookup "url" c of + Nothing -> error "Specify url=" + Just url -> return url c' <- encryptionSetup c creds <- maybe (getCreds c' u) (return . Just) mcreds testDav url creds From 0260ee43e63c98dc43a6c47603f702d2b0449b17 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 8 Aug 2014 14:57:05 -0400 Subject: [PATCH 54/56] fix removeKey when not present --- Remote/WebDAV.hs | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/Remote/WebDAV.hs b/Remote/WebDAV.hs index af3af79020..d344e0a745 100644 --- a/Remote/WebDAV.hs +++ b/Remote/WebDAV.hs @@ -128,9 +128,16 @@ remove Nothing _ = return False remove (Just dav) k = liftIO $ do -- Delete the key's whole directory, including any -- legacy chunked files, etc, in a single action. - ret <- goDAV dav $ safely $ - inLocation (keyDir k) delContentM - return (isJust ret) + let d = keyDir k + goDAV dav $ do + v <- safely $ inLocation d delContentM + case v of + Just _ -> return True + Nothing -> do + v' <- existsDAV d + case v' of + Right False -> return True + _ -> return False checkKey :: Remote -> ChunkConfig -> Maybe DavHandle -> CheckPresent checkKey r _ Nothing _ = error $ name r ++ " not configured" From 1f8c170c953cefa8eccb6c3ba50b1b05494060b1 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 8 Aug 2014 17:17:23 -0400 Subject: [PATCH 55/56] http-conduit not used for dav --- git-annex.cabal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/git-annex.cabal b/git-annex.cabal index be1cdbb968..5154b27dd8 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -142,7 +142,7 @@ Executable git-annex if flag(WebDAV) Build-Depends: DAV (>= 1.0), - http-client, http-conduit, http-types + http-client, http-types CPP-Options: -DWITH_WEBDAV if flag(Assistant) && ! os(solaris) From 1dd3232e8e90b2aa49b670284eb90f7d500eae57 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 8 Aug 2014 17:17:36 -0400 Subject: [PATCH 56/56] check for 200 response --- Remote/Helper/Http.hs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Remote/Helper/Http.hs b/Remote/Helper/Http.hs index d4882b8c8f..f1d576d1c9 100644 --- a/Remote/Helper/Http.hs +++ b/Remote/Helper/Http.hs @@ -11,7 +11,8 @@ import Common.Annex import Types.StoreRetrieve import Utility.Metered import Remote.Helper.Special -import Network.HTTP.Client (RequestBody(..), Response, responseBody, BodyReader) +import Network.HTTP.Client (RequestBody(..), Response, responseStatus, responseBody, BodyReader) +import Network.HTTP.Types import qualified Data.ByteString.Lazy as L import qualified Data.ByteString as S @@ -38,8 +39,9 @@ httpStorer a = fileStorer $ \k f m -> do -- Reads the http body and stores it to the specified file, updating the -- meter as it goes. httpBodyRetriever :: FilePath -> MeterUpdate -> Response BodyReader -> IO () -httpBodyRetriever dest meterupdate resp = - bracket (openBinaryFile dest WriteMode) hClose (go zeroBytesProcessed) +httpBodyRetriever dest meterupdate resp + | responseStatus resp /= ok200 = error $ show $ responseStatus resp + | otherwise = bracket (openBinaryFile dest WriteMode) hClose (go zeroBytesProcessed) where reader = responseBody resp go sofar h = do