From c3750901d867131a5e1abc5b583ca604d7761a20 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sat, 2 Aug 2014 15:34:09 -0400 Subject: [PATCH 01/44] specialize Preparer a bit, so resourcePrepare can be added The forall a. in Preparer made resourcePrepare not seem to be usable, so I specialized a to Bool. Which works for both Preparer Storer and Preparer Retriever, but wouldn't let the Preparer be used for hasKey as it currently stands. --- Remote/Directory.hs | 1 - Remote/Directory/LegacyChunked.hs | 2 -- Remote/Helper/ChunkedEncryptable.hs | 6 ++++++ Types/StoreRetrieve.hs | 4 +--- 4 files changed, 7 insertions(+), 6 deletions(-) diff --git a/Remote/Directory.hs b/Remote/Directory.hs index 37942a295d..a879875298 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -6,7 +6,6 @@ -} {-# LANGUAGE CPP #-} -{-# LANGUAGE Rank2Types #-} module Remote.Directory (remote) where diff --git a/Remote/Directory/LegacyChunked.hs b/Remote/Directory/LegacyChunked.hs index 312119f4e6..5c200570c8 100644 --- a/Remote/Directory/LegacyChunked.hs +++ b/Remote/Directory/LegacyChunked.hs @@ -7,8 +7,6 @@ - Licensed under the GNU GPL version 3 or higher. -} -{-# LANGUAGE Rank2Types #-} - module Remote.Directory.LegacyChunked where import qualified Data.ByteString.Lazy as L diff --git a/Remote/Helper/ChunkedEncryptable.hs b/Remote/Helper/ChunkedEncryptable.hs index 2a844212b5..1267f5b59e 100644 --- a/Remote/Helper/ChunkedEncryptable.hs +++ b/Remote/Helper/ChunkedEncryptable.hs @@ -14,6 +14,7 @@ module Remote.Helper.ChunkedEncryptable ( simplyPrepare, ContentSource, checkPrepare, + resourcePrepare, fileStorer, byteStorer, fileRetriever, @@ -49,6 +50,11 @@ checkPrepare checker helper k a = ifM (checker k) , a Nothing ) +-- Use to acquire a resource when preparing a helper. +resourcePrepare :: (Key -> (r -> Annex Bool) -> Annex Bool) -> (r -> helper) -> Preparer helper +resourcePrepare withr helper k a = withr k $ \r -> + a (Just (helper r)) + -- A Storer that expects to be provided with a file containing -- the content of the key to store. fileStorer :: (Key -> FilePath -> MeterUpdate -> Annex Bool) -> Storer diff --git a/Types/StoreRetrieve.hs b/Types/StoreRetrieve.hs index 33f66efb19..bde7489604 100644 --- a/Types/StoreRetrieve.hs +++ b/Types/StoreRetrieve.hs @@ -5,8 +5,6 @@ - Licensed under the GNU GPL version 3 or higher. -} -{-# LANGUAGE Rank2Types #-} - module Types.StoreRetrieve where import Common.Annex @@ -16,7 +14,7 @@ import qualified Data.ByteString.Lazy as L -- Prepares for and then runs an action that will act on a Key's -- content, passing it a helper when the preparation is successful. -type Preparer helper = forall a. Key -> (Maybe helper -> Annex a) -> Annex a +type Preparer helper = Key -> (Maybe helper -> Annex Bool) -> Annex Bool -- A source of a Key's content. data ContentSource From 32e436837731da279c233deb6be84188a9ad1f87 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sat, 2 Aug 2014 15:51:58 -0400 Subject: [PATCH 02/44] S3: support chunking The assistant defaults to 1MiB chunk size for new S3 special remotes. Which will work around a couple of bugs: http://git-annex.branchable.com/bugs/S3_memory_leaks/ http://git-annex.branchable.com/bugs/S3_upload_not_using_multipart/ --- Assistant/WebApp/Configurators/AWS.hs | 1 + Remote/S3.hs | 86 +++++++++------------------ Types/Key.hs | 4 ++ debian/changelog | 2 +- doc/special_remotes/S3.mdwn | 3 + doc/tips/using_Amazon_S3.mdwn | 2 +- 6 files changed, 38 insertions(+), 60 deletions(-) diff --git a/Assistant/WebApp/Configurators/AWS.hs b/Assistant/WebApp/Configurators/AWS.hs index a7250d8cce..9a6be38814 100644 --- a/Assistant/WebApp/Configurators/AWS.hs +++ b/Assistant/WebApp/Configurators/AWS.hs @@ -129,6 +129,7 @@ postAddS3R = awsConfigurator $ do , ("type", "S3") , ("datacenter", T.unpack $ datacenter input) , ("storageclass", show $ storageClass input) + , ("chunk", "1MiB") ] _ -> $(widgetFile "configurators/adds3") #else diff --git a/Remote/S3.hs b/Remote/S3.hs index c30d07b8a3..ed9122cab1 100644 --- a/Remote/S3.hs +++ b/Remote/S3.hs @@ -25,12 +25,10 @@ import qualified Git import Config import Config.Cost import Remote.Helper.Special -import Remote.Helper.Encryptable +import Remote.Helper.ChunkedEncryptable import qualified Remote.Helper.AWS as AWS -import Crypto import Creds import Utility.Metered -import Annex.Content import Annex.UUID import Logs.Web @@ -47,17 +45,17 @@ remote = RemoteType { gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) gen r u c gc = new <$> remoteCost gc expensiveRemoteCost where - new cst = Just $ encryptableRemote c - (storeEncrypted this) - (retrieveEncrypted this) + new cst = Just $ chunkedEncryptableRemote c + (prepareStore this) + (prepareRetrieve this) this where this = Remote { uuid = u, cost = cst, name = Git.repoDescribe r, - storeKey = store this, - retrieveKeyFile = retrieve this, + storeKey = storeKeyDummy, + retrieveKeyFile = retreiveKeyFileDummy, retrieveKeyFileCheap = retrieveCheap this, removeKey = remove this c, hasKey = checkPresent this, @@ -123,67 +121,39 @@ s3Setup' u c = if isIA c then archiveorg else defaulthost writeUUIDFile archiveconfig u use archiveconfig -store :: Remote -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool -store r k _f p = s3Action r False $ \(conn, bucket) -> - sendAnnex k (void $ remove' r k) $ \src -> do - ok <- s3Bool =<< storeHelper (conn, bucket) r k p src +prepareStore :: Remote -> Preparer Storer +prepareStore r = resourcePrepare (const $ s3Action r False) $ \(conn, bucket) -> + fileStorer $ \k src p -> do + ok <- s3Bool =<< liftIO (store (conn, bucket) r k p src) -- Store public URL to item in Internet Archive. - when (ok && isIA (config r)) $ + when (ok && isIA (config r) && not (isChunkKey k)) $ setUrlPresent k (iaKeyUrl r k) return ok -storeEncrypted :: Remote -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool -storeEncrypted r (cipher, enck) k p = s3Action r False $ \(conn, bucket) -> - -- To get file size of the encrypted content, have to use a temp file. - -- (An alternative would be chunking to to a constant size.) - withTmp enck $ \tmp -> sendAnnex k (void $ remove' r enck) $ \src -> do - liftIO $ encrypt (getGpgEncParams r) cipher (feedFile src) $ - readBytes $ L.writeFile tmp - s3Bool =<< storeHelper (conn, bucket) r enck p tmp +store :: (AWSConnection, Bucket) -> Remote -> Key -> MeterUpdate -> FilePath -> IO (AWSResult ()) +store (conn, bucket) r k p file = do + size <- (fromIntegral . fileSize <$> getFileStatus file) :: IO Integer + withMeteredFile file p $ \content -> do + -- size is provided to S3 so the whole content + -- does not need to be buffered to calculate it + let object = S3Object + bucket (bucketFile r k) "" + (("Content-Length", show size) : getXheaders (config r)) + content + sendObject conn $ + setStorageClass (getStorageClass $ config r) object -storeHelper :: (AWSConnection, Bucket) -> Remote -> Key -> MeterUpdate -> FilePath -> Annex (AWSResult ()) -storeHelper (conn, bucket) r k p file = do - size <- maybe getsize (return . fromIntegral) $ keySize k - meteredBytes (Just p) size $ \meterupdate -> - liftIO $ withMeteredFile file meterupdate $ \content -> do - -- size is provided to S3 so the whole content - -- does not need to be buffered to calculate it - let object = S3Object - bucket (bucketFile r k) "" - (("Content-Length", show size) : getXheaders (config r)) - content - sendObject conn $ - setStorageClass (getStorageClass $ config r) object - where - getsize = liftIO $ fromIntegral . fileSize <$> getFileStatus file - -retrieve :: Remote -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool -retrieve r k _f d p = s3Action r False $ \(conn, bucket) -> - metered (Just p) k $ \meterupdate -> do - res <- liftIO $ getObject conn $ bucketKey r bucket k - case res of - Right o -> do - liftIO $ meteredWriteFile meterupdate d $ - obj_data o - return True - Left e -> s3Warning e +prepareRetrieve :: Remote -> Preparer Retriever +prepareRetrieve r = resourcePrepare (const $ s3Action r False) $ \(conn, bucket) -> + byteRetriever $ \k -> + liftIO (getObject conn $ bucketKey r bucket k) + >>= either s3Error (return . obj_data) retrieveCheap :: Remote -> Key -> FilePath -> Annex Bool retrieveCheap _ _ _ = return False -retrieveEncrypted :: Remote -> (Cipher, Key) -> Key -> FilePath -> MeterUpdate -> Annex Bool -retrieveEncrypted r (cipher, enck) k d p = s3Action r False $ \(conn, bucket) -> - metered (Just p) k $ \meterupdate -> do - res <- liftIO $ getObject conn $ bucketKey r bucket enck - case res of - Right o -> liftIO $ decrypt cipher (\h -> meteredWrite meterupdate h $ obj_data o) $ - readBytes $ \content -> do - L.writeFile d content - return True - Left e -> s3Warning e - {- Internet Archive doesn't easily allow removing content. - While it may remove the file, there are generally other files - derived from it that it does not remove. -} diff --git a/Types/Key.hs b/Types/Key.hs index 154e813ffd..5bb41e15f5 100644 --- a/Types/Key.hs +++ b/Types/Key.hs @@ -15,6 +15,7 @@ module Types.Key ( file2key, nonChunkKey, chunkKeyOffset, + isChunkKey, prop_idempotent_key_encode, prop_idempotent_key_decode @@ -62,6 +63,9 @@ chunkKeyOffset k = (*) <$> keyChunkSize k <*> (pred <$> keyChunkNum k) +isChunkKey :: Key -> Bool +isChunkKey k = isJust (keyChunkSize k) && isJust (keyChunkNum k) + fieldSep :: Char fieldSep = '-' diff --git a/debian/changelog b/debian/changelog index f8b700ae73..d8c23af9b9 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,7 +1,7 @@ git-annex (5.20140718) UNRELEASED; urgency=medium * New chunk= option to chunk files stored in special remotes. - Currently supported by: directory, and all external special remotes. + Currently supported by: directory, S3, and all external special remotes. * Partially transferred files are automatically resumed when using chunked remotes! * The old chunksize= option is deprecated. Do not use for new remotes. diff --git a/doc/special_remotes/S3.mdwn b/doc/special_remotes/S3.mdwn index 5291a4eb6e..fe46948b3b 100644 --- a/doc/special_remotes/S3.mdwn +++ b/doc/special_remotes/S3.mdwn @@ -18,6 +18,9 @@ the S3 remote. * `encryption` - One of "none", "hybrid", "shared", or "pubkey". See [[encryption]]. +* `chunk` - Enables [[chunking]] when storing large files. + `chunk=1MiB` is a good starting point for chunking. + * `keyid` - Specifies the gpg key to use for [[encryption]]. * `embedcreds` - Optional. Set to "yes" embed the login credentials inside diff --git a/doc/tips/using_Amazon_S3.mdwn b/doc/tips/using_Amazon_S3.mdwn index 0c68c7387d..ede3f952f7 100644 --- a/doc/tips/using_Amazon_S3.mdwn +++ b/doc/tips/using_Amazon_S3.mdwn @@ -14,7 +14,7 @@ like "2512E3C7" Next, create the S3 remote, and describe it. - # git annex initremote cloud type=S3 keyid=2512E3C7 + # git annex initremote cloud type=S3 chunk=1MiB keyid=2512E3C7 initremote cloud (encryption setup with gpg key C910D9222512E3C7) (checking bucket) (creating bucket in US) (gpg) ok # git annex describe cloud "at Amazon's US datacenter" describe cloud ok From 0eb1f057c4572676e144fdafdb0324f3243fa6a0 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sat, 2 Aug 2014 16:47:21 -0400 Subject: [PATCH 03/44] convert glacier to new ChunkedEncryptable API (but do not support chunking) Chunking would complicate the assistant's code that checks when a pending retrieval of a key from glacier is done. It would perhaps be nice to support it to allow resuming, but not right now. Converting to the new API still simplifies the code. --- Remote/Glacier.hs | 82 +++++++++++++---------------- Remote/Helper/ChunkedEncryptable.hs | 22 ++++---- 2 files changed, 50 insertions(+), 54 deletions(-) diff --git a/Remote/Glacier.hs b/Remote/Glacier.hs index bf8f050610..9b428bd80a 100644 --- a/Remote/Glacier.hs +++ b/Remote/Glacier.hs @@ -9,6 +9,7 @@ module Remote.Glacier (remote, jobList) where import qualified Data.Map as M import qualified Data.Text as T +import qualified Data.ByteString.Lazy as L import Common.Annex import Types.Remote @@ -17,13 +18,12 @@ import qualified Git import Config import Config.Cost import Remote.Helper.Special -import Remote.Helper.Encryptable +import Remote.Helper.ChunkedEncryptable import qualified Remote.Helper.AWS as AWS import Crypto import Creds import Utility.Metered import qualified Annex -import Annex.Content import Annex.UUID import Utility.Env @@ -42,16 +42,16 @@ gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remot gen r u c gc = new <$> remoteCost gc veryExpensiveRemoteCost where new cst = Just $ encryptableRemote c - (storeEncrypted this) - (retrieveEncrypted this) + (prepareStore this) + (prepareRetrieve this) this where this = Remote { uuid = u, cost = cst, name = Git.repoDescribe r, - storeKey = store this, - retrieveKeyFile = retrieve this, + storeKey = storeKeyDummy, + retrieveKeyFile = retreiveKeyFileDummy, retrieveKeyFileCheap = retrieveCheap this, removeKey = remove this, hasKey = checkPresent this, @@ -89,38 +89,18 @@ glacierSetup' enabling u c = do , ("vault", defvault) ] -store :: Remote -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool -store r k _f p +prepareStore :: Remote -> Preparer Storer +prepareStore r = checkPrepare nonEmpty (byteStorer $ store r) + +nonEmpty :: Key -> Annex Bool +nonEmpty k | keySize k == Just 0 = do warning "Cannot store empty files in Glacier." return False - | otherwise = sendAnnex k (void $ remove r k) $ \src -> - metered (Just p) k $ \meterupdate -> - storeHelper r k $ streamMeteredFile src meterupdate + | otherwise = return True -storeEncrypted :: Remote -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool -storeEncrypted r (cipher, enck) k p = sendAnnex k (void $ remove r enck) $ \src -> - metered (Just p) k $ \meterupdate -> - storeHelper r enck $ \h -> - encrypt (getGpgEncParams r) cipher (feedFile src) - (readBytes $ meteredWrite meterupdate h) - -retrieve :: Remote -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool -retrieve r k _f d p = metered (Just p) k $ \meterupdate -> - retrieveHelper r k $ - readBytes $ meteredWriteFile meterupdate d - -retrieveCheap :: Remote -> Key -> FilePath -> Annex Bool -retrieveCheap _ _ _ = return False - -retrieveEncrypted :: Remote -> (Cipher, Key) -> Key -> FilePath -> MeterUpdate -> Annex Bool -retrieveEncrypted r (cipher, enck) k d p = metered (Just p) k $ \meterupdate -> - retrieveHelper r enck $ readBytes $ \b -> - decrypt cipher (feedBytes b) $ - readBytes $ meteredWriteFile meterupdate d - -storeHelper :: Remote -> Key -> (Handle -> IO ()) -> Annex Bool -storeHelper r k feeder = go =<< glacierEnv c u +store :: Remote -> Key -> L.ByteString -> MeterUpdate -> Annex Bool +store r k b p = go =<< glacierEnv c u where c = config r u = uuid r @@ -133,14 +113,18 @@ storeHelper r k feeder = go =<< glacierEnv c u ] go Nothing = return False go (Just e) = do - let p = (proc "glacier" (toCommand params)) { env = Just e } + let cmd = (proc "glacier" (toCommand params)) { env = Just e } liftIO $ catchBoolIO $ - withHandle StdinHandle createProcessSuccess p $ \h -> do - feeder h + withHandle StdinHandle createProcessSuccess cmd $ \h -> do + meteredWrite p h b return True -retrieveHelper :: Remote -> Key -> (Handle -> IO ()) -> Annex Bool -retrieveHelper r k reader = go =<< glacierEnv c u +prepareRetrieve :: Remote -> Preparer Retriever +prepareRetrieve r = simplyPrepare $ fileRetriever $ \d k p -> + retrieve r k (readBytes (meteredWriteFile p d)) + +retrieve :: Remote -> Key -> (Handle -> IO ()) -> Annex () +retrieve r k reader = go =<< glacierEnv c u where c = config r u = uuid r @@ -151,29 +135,33 @@ retrieveHelper r k reader = go =<< glacierEnv c u , Param $ getVault $ config r , Param $ archive r k ] - go Nothing = return False + go Nothing = error "cannot retrieve from glacier" go (Just e) = do - let p = (proc "glacier" (toCommand params)) { env = Just e } + let cmd = (proc "glacier" (toCommand params)) { env = Just e } ok <- liftIO $ catchBoolIO $ - withHandle StdoutHandle createProcessSuccess p $ \h -> + withHandle StdoutHandle createProcessSuccess cmd $ \h -> ifM (hIsEOF h) ( return False , do reader h return True ) - unless ok later - return ok - later = showLongNote "Recommend you wait up to 4 hours, and then run this command again." + unless ok $ do + showLongNote "Recommend you wait up to 4 hours, and then run this command again." + error "not yet available" remove :: Remote -> Key -> Annex Bool remove r k = glacierAction r [ Param "archive" + , Param "delete" , Param $ getVault $ config r , Param $ archive r k ] +retrieveCheap :: Remote -> Key -> FilePath -> Annex Bool +retrieveCheap _ _ _ = return False + checkPresent :: Remote -> Key -> Annex (Either String Bool) checkPresent r k = do showAction $ "checking " ++ name r @@ -261,6 +249,10 @@ genVault c u = unlessM (runGlacier c u params) $ - - A complication is that `glacier job list` will display the encrypted - keys when the remote is encrypted. + - + - Dealing with encrypted chunked keys would be tricky. However, there + - seems to be no benefit to using chunking with glacier, so chunking is + - not supported. -} jobList :: Remote -> [Key] -> Annex ([Key], [Key]) jobList r keys = go =<< glacierEnv (config r) (uuid r) diff --git a/Remote/Helper/ChunkedEncryptable.hs b/Remote/Helper/ChunkedEncryptable.hs index 1267f5b59e..e607715514 100644 --- a/Remote/Helper/ChunkedEncryptable.hs +++ b/Remote/Helper/ChunkedEncryptable.hs @@ -22,6 +22,7 @@ module Remote.Helper.ChunkedEncryptable ( storeKeyDummy, retreiveKeyFileDummy, chunkedEncryptableRemote, + encryptableRemote, module X ) where @@ -32,7 +33,7 @@ import Crypto import Config.Cost import Utility.Metered import Remote.Helper.Chunked as X -import Remote.Helper.Encryptable as X +import Remote.Helper.Encryptable as X hiding (encryptableRemote) import Annex.Content import Annex.Exception @@ -90,14 +91,18 @@ storeKeyDummy _ _ _ = return False retreiveKeyFileDummy :: Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool retreiveKeyFileDummy _ _ _ _ = return False +type RemoteModifier = RemoteConfig -> Preparer Storer -> Preparer Retriever -> Remote -> Remote + -- Modifies a base Remote to support both chunking and encryption. -chunkedEncryptableRemote - :: RemoteConfig - -> Preparer Storer - -> Preparer Retriever - -> Remote - -> Remote -chunkedEncryptableRemote c preparestorer prepareretriever baser = encr +chunkedEncryptableRemote :: RemoteModifier +chunkedEncryptableRemote c = chunkedEncryptableRemote' (chunkConfig c) c + +-- Modifies a base Remote to support encryption, but not chunking. +encryptableRemote :: RemoteModifier +encryptableRemote = chunkedEncryptableRemote' NoChunks + +chunkedEncryptableRemote' :: ChunkConfig -> RemoteModifier +chunkedEncryptableRemote' chunkconfig c preparestorer prepareretriever baser = encr where encr = baser { storeKey = \k _f p -> cip >>= storeKeyGen k p @@ -113,7 +118,6 @@ chunkedEncryptableRemote c preparestorer prepareretriever baser = encr (extractCipher c) } cip = cipherKey c - chunkconfig = chunkConfig c gpgopts = getGpgEncParams encr safely a = catchNonAsyncAnnex a (\e -> warning (show e) >> return False) From d4dc1bce754470468e43a39e2dff8a1e443fce7a Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sat, 2 Aug 2014 17:04:40 -0400 Subject: [PATCH 04/44] document that encryption + bup = inneficient --- doc/special_remotes/bup.mdwn | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/doc/special_remotes/bup.mdwn b/doc/special_remotes/bup.mdwn index f2d465e779..ca50569175 100644 --- a/doc/special_remotes/bup.mdwn +++ b/doc/special_remotes/bup.mdwn @@ -19,16 +19,17 @@ for example; or clone bup's git repository to further back it up. These parameters can be passed to `git annex initremote` to configure bup: -* `encryption` - One of "none", "hybrid", "shared", or "pubkey". - See [[encryption]]. - -* `keyid` - Specifies the gpg key to use for [[encryption]]. - * `buprepo` - Required. This is passed to `bup` as the `--remote` to use to store data. To create the repository,`bup init` will be run. Example: "buprepo=example.com:/big/mybup" or "buprepo=/big/mybup" (To use the default `~/.bup` repository on the local host, specify "buprepo=") +* `encryption` - One of "none", "hybrid", "shared", or "pubkey". + See [[encryption]]. Note that using encryption will prevent + de-duplication of content stored in the buprepo. + +* `keyid` - Specifies the gpg key to use for [[encryption]]. + Options to pass to `bup split` when sending content to bup can also be specified, by using `git config annex.bup-split-options`. This can be used to, for example, limit its bandwidth. From 7f5cd868d7180f46faa8d31ea42f8867531131dc Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sat, 2 Aug 2014 17:25:16 -0400 Subject: [PATCH 05/44] hook: use ChunkedEncryptable --- Remote/Hook.hs | 40 +++++++++++------------------------ debian/changelog | 3 ++- doc/special_remotes/hook.mdwn | 2 ++ 3 files changed, 16 insertions(+), 29 deletions(-) diff --git a/Remote/Hook.hs b/Remote/Hook.hs index 74641f5aa5..0668e2ca91 100644 --- a/Remote/Hook.hs +++ b/Remote/Hook.hs @@ -7,7 +7,6 @@ module Remote.Hook (remote) where -import qualified Data.ByteString.Lazy as L import qualified Data.Map as M import Common.Annex @@ -17,12 +16,9 @@ import Types.Creds import qualified Git import Config import Config.Cost -import Annex.Content import Annex.UUID import Remote.Helper.Special -import Remote.Helper.Encryptable -import Crypto -import Utility.Metered +import Remote.Helper.ChunkedEncryptable import Utility.Env type Action = String @@ -39,15 +35,15 @@ remote = RemoteType { gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) gen r u c gc = do cst <- remoteCost gc expensiveRemoteCost - return $ Just $ encryptableRemote c - (storeEncrypted hooktype $ getGpgEncParams (c,gc)) - (retrieveEncrypted hooktype) + return $ Just $ chunkedEncryptableRemote c + (simplyPrepare $ store hooktype) + (simplyPrepare $ retrieve hooktype) Remote { uuid = u, cost = cst, name = Git.repoDescribe r, - storeKey = store hooktype, - retrieveKeyFile = retrieve hooktype, + storeKey = storeKeyDummy, + retrieveKeyFile = retreiveKeyFileDummy, retrieveKeyFileCheap = retrieveCheap hooktype, removeKey = remove hooktype, hasKey = checkPresent r hooktype, @@ -118,30 +114,18 @@ runHook hook action k f a = maybe (return False) run =<< lookupHook hook action return False ) -store :: HookName -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool -store h k _f _p = sendAnnex k (void $ remove h k) $ \src -> +store :: HookName -> Storer +store h = fileStorer $ \k src _p -> runHook h "store" k (Just src) $ return True -storeEncrypted :: HookName -> [CommandParam] -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool -storeEncrypted h gpgOpts (cipher, enck) k _p = withTmp enck $ \tmp -> - sendAnnex k (void $ remove h enck) $ \src -> do - liftIO $ encrypt gpgOpts cipher (feedFile src) $ - readBytes $ L.writeFile tmp - runHook h "store" enck (Just tmp) $ return True - -retrieve :: HookName -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool -retrieve h k _f d _p = runHook h "retrieve" k (Just d) $ return True +retrieve :: HookName -> Retriever +retrieve h = fileRetriever $ \d k _p -> + unlessM (runHook h "retrieve" k (Just d) $ return True) $ + error "failed to retrieve content" retrieveCheap :: HookName -> Key -> FilePath -> Annex Bool retrieveCheap _ _ _ = return False -retrieveEncrypted :: HookName -> (Cipher, Key) -> Key -> FilePath -> MeterUpdate -> Annex Bool -retrieveEncrypted h (cipher, enck) _ f _p = withTmp enck $ \tmp -> - runHook h "retrieve" enck (Just tmp) $ liftIO $ catchBoolIO $ do - decrypt cipher (feedFile tmp) $ - readBytes $ L.writeFile f - return True - remove :: HookName -> Key -> Annex Bool remove h k = runHook h "remove" k Nothing $ return True diff --git a/debian/changelog b/debian/changelog index d8c23af9b9..2343856075 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,7 +1,8 @@ git-annex (5.20140718) UNRELEASED; urgency=medium * New chunk= option to chunk files stored in special remotes. - Currently supported by: directory, S3, and all external special remotes. + Currently supported by: directory, S3, and all external and hook + special remotes. * Partially transferred files are automatically resumed when using chunked remotes! * The old chunksize= option is deprecated. Do not use for new remotes. diff --git a/doc/special_remotes/hook.mdwn b/doc/special_remotes/hook.mdwn index 8cf31ed02f..0bb76d98a0 100644 --- a/doc/special_remotes/hook.mdwn +++ b/doc/special_remotes/hook.mdwn @@ -36,6 +36,8 @@ These parameters can be passed to `git annex initremote`: * `keyid` - Specifies the gpg key to use for [[encryption]]. +* `chunk` - Enables [[chunking]] when storing large files. + ## hooks Each type of hook remote is specified by a collection of hook commands. From b261df735d01ad1057edcf3d06e0450762fc2e42 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sat, 2 Aug 2014 18:36:26 -0400 Subject: [PATCH 06/44] convert bup to new ChunkedEncryptable API (but do not support chunking) bup already splits files and does rolling deltas, so there is no reason to use chunking here. The new API made it easier to add progress support for storeKey, so that's done. Unfortunately, bup-split still outputs its own progress with -q, so a little ugly, but not too bad. Made dropping remove the branch for an object, for two reasons: 1. The new API calls removeKey to roll back a storeKey when the content changed unexpectedly. 2. So that testremote will be happy. Also, fixed a bug that caused a crash when removing the branch for an object in rollback. --- Remote/Bup.hs | 93 +++++++++----------------- debian/changelog | 3 + doc/design/assistant/progressbars.mdwn | 4 +- 3 files changed, 38 insertions(+), 62 deletions(-) diff --git a/Remote/Bup.hs b/Remote/Bup.hs index 7788328508..06679c4b8a 100644 --- a/Remote/Bup.hs +++ b/Remote/Bup.hs @@ -1,15 +1,13 @@ {- Using bup as a remote. - - - Copyright 2011 Joey Hess + - Copyright 2011-2014 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} module Remote.Bup (remote) where -import qualified Data.ByteString.Lazy as L import qualified Data.Map as M -import System.Process import Data.ByteString.Lazy.UTF8 (fromString) import Common.Annex @@ -26,12 +24,10 @@ import Config import Config.Cost import qualified Remote.Helper.Ssh as Ssh import Remote.Helper.Special -import Remote.Helper.Encryptable +import Remote.Helper.ChunkedEncryptable import Remote.Helper.Messages -import Crypto import Utility.Hash import Utility.UserInfo -import Annex.Content import Annex.UUID import Utility.Metered @@ -54,14 +50,14 @@ gen r u c gc = do else expensiveRemoteCost (u', bupr') <- getBupUUID bupr u - let new = Remote + let this = Remote { uuid = u' , cost = cst , name = Git.repoDescribe r - , storeKey = store new buprepo - , retrieveKeyFile = retrieve buprepo + , storeKey = storeKeyDummy + , retrieveKeyFile = retreiveKeyFileDummy , retrieveKeyFileCheap = retrieveCheap buprepo - , removeKey = remove + , removeKey = remove buprepo , hasKey = checkPresent r bupr' , hasKeyCheap = bupLocal buprepo , whereisKey = Nothing @@ -78,9 +74,9 @@ gen r u c gc = do , readonly = False } return $ Just $ encryptableRemote c - (storeEncrypted new buprepo) - (retrieveEncrypted buprepo) - new + (simplyPrepare $ store this buprepo) + (simplyPrepare $ retrieve buprepo) + this where buprepo = fromMaybe (error "missing buprepo") $ remoteAnnexBupRepo gc @@ -115,72 +111,49 @@ bup command buprepo params = do showOutput -- make way for bup output liftIO $ boolSystem "bup" $ bupParams command buprepo params -pipeBup :: [CommandParam] -> Maybe Handle -> Maybe Handle -> IO Bool -pipeBup params inh outh = do - p <- runProcess "bup" (toCommand params) - Nothing Nothing inh outh Nothing - ok <- waitForProcess p - case ok of - ExitSuccess -> return True - _ -> return False - bupSplitParams :: Remote -> BupRepo -> Key -> [CommandParam] -> Annex [CommandParam] bupSplitParams r buprepo k src = do let os = map Param $ remoteAnnexBupSplitOptions $ gitconfig r showOutput -- make way for bup output return $ bupParams "split" buprepo - (os ++ [Param "-n", Param (bupRef k)] ++ src) + (os ++ [Param "-q", Param "-n", Param (bupRef k)] ++ src) -store :: Remote -> BupRepo -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool -store r buprepo k _f _p = sendAnnex k (rollback k buprepo) $ \src -> do - params <- bupSplitParams r buprepo k [File src] - liftIO $ boolSystem "bup" params +store :: Remote -> BupRepo -> Storer +store r buprepo = byteStorer $ \k b p -> do + params <- bupSplitParams r buprepo k [] + let cmd = proc "bup" (toCommand params) + liftIO $ withHandle StdinHandle createProcessSuccess cmd $ \h -> do + meteredWrite p h b + return True -storeEncrypted :: Remote -> BupRepo -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool -storeEncrypted r buprepo (cipher, enck) k _p = - sendAnnex k (rollback enck buprepo) $ \src -> do - params <- bupSplitParams r buprepo enck [] - liftIO $ catchBoolIO $ - encrypt (getGpgEncParams r) cipher (feedFile src) $ \h -> - pipeBup params (Just h) Nothing - -retrieve :: BupRepo -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool -retrieve buprepo k _f d _p = do - let params = bupParams "join" buprepo [Param $ bupRef k] - liftIO $ catchBoolIO $ withFile d WriteMode $ - pipeBup params Nothing . Just +retrieve :: BupRepo -> Retriever +retrieve buprepo = fileRetriever $ \d k _p -> + liftIO $ withFile d WriteMode $ \h -> do + let params = bupParams "join" buprepo [Param $ bupRef k] + let p = proc "bup" (toCommand params) + (_, _, _, pid) <- createProcess $ p { std_out = UseHandle h } + forceSuccessProcess p pid retrieveCheap :: BupRepo -> Key -> FilePath -> Annex Bool retrieveCheap _ _ _ = return False -retrieveEncrypted :: BupRepo -> (Cipher, Key) -> Key -> FilePath -> MeterUpdate -> Annex Bool -retrieveEncrypted buprepo (cipher, enck) _ f _p = liftIO $ catchBoolIO $ - withHandle StdoutHandle createProcessSuccess p $ \h -> do - decrypt cipher (\toh -> L.hPut toh =<< L.hGetContents h) $ - readBytes $ L.writeFile f - return True - where - params = bupParams "join" buprepo [Param $ bupRef enck] - p = proc "bup" $ toCommand params - -remove :: Key -> Annex Bool -remove _ = do - warning "content cannot be removed from bup remote" - return False - {- Cannot revert having stored a key in bup, but at least the data for the - key will be used for deltaing data of other keys stored later. - - We can, however, remove the git branch that bup created for the key. -} -rollback :: Key -> BupRepo -> Annex () -rollback k bupr = go =<< liftIO (bup2GitRemote bupr) +remove :: BupRepo -> Key -> Annex Bool +remove buprepo k = do + go =<< liftIO (bup2GitRemote buprepo) + warning "content cannot be completely removed from bup remote" + return True where go r | Git.repoIsUrl r = void $ onBupRemote r boolSystem "git" params - | otherwise = void $ liftIO $ catchMaybeIO $ - boolSystem "git" $ Git.Command.gitCommandLine params r - params = [ Params "branch -D", Param (bupRef k) ] + | otherwise = void $ liftIO $ catchMaybeIO $ do + r' <- Git.Config.read r + boolSystem "git" $ Git.Command.gitCommandLine params r' + params = [ Params "branch -q -D", Param (bupRef k) ] {- Bup does not provide a way to tell if a given dataset is present - in a bup repository. One way it to check if the git repository has diff --git a/debian/changelog b/debian/changelog index 4c6eac663f..ca6059f43e 100644 --- a/debian/changelog +++ b/debian/changelog @@ -18,6 +18,9 @@ git-annex (5.20140718) UNRELEASED; urgency=medium * Display exception message when a transfer fails due to an exception. * WebDAV: Dropped support for DAV before 0.6.1. * testremote: New command to test uploads/downloads to a remote. + * Dropping an object from a bup special remote now deletes the git branch + for the object, although of course the object's content cannot be deleted + due to the nature of bup. -- Joey Hess Mon, 21 Jul 2014 14:41:26 -0400 diff --git a/doc/design/assistant/progressbars.mdwn b/doc/design/assistant/progressbars.mdwn index 50f4245085..7de70452d0 100644 --- a/doc/design/assistant/progressbars.mdwn +++ b/doc/design/assistant/progressbars.mdwn @@ -14,7 +14,7 @@ This is one of those potentially hidden but time consuming problems. could use inotify. **done** * When easily available, remotes call the MeterUpdate callback as downloads progress. **done** -* S3 TODO +* S3: TODO While it has a download progress bar, `getObject` probably buffers the whole download in memory before returning. Leaving the progress bar to only display progress for writing the file out of memory. Fixing this would @@ -32,7 +32,7 @@ the MeterUpdate callback as the upload progresses. * webdav: **done** * S3: **done** * glacier: **done** -* bup: TODO +* bup: **done** * hook: Would require the hook interface to somehow do this, which seems too complicated. So skipping. From 19b71cfb8fd9553c14baeda21e443169c8fb255b Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sat, 2 Aug 2014 18:58:38 -0400 Subject: [PATCH 07/44] convert ddar to new ChunkedEncryptable API (but do not support chunking) Since ddar de-deuplicates, I assume there is no benefit from chunking. This has not been tested! --- Remote/Ddar.hs | 67 +++++++++++--------------------------------------- 1 file changed, 14 insertions(+), 53 deletions(-) diff --git a/Remote/Ddar.hs b/Remote/Ddar.hs index 7218226e8b..365506a22c 100644 --- a/Remote/Ddar.hs +++ b/Remote/Ddar.hs @@ -9,7 +9,6 @@ module Remote.Ddar (remote) where import Control.Exception -import qualified Data.ByteString.Lazy as L import qualified Data.Map as M import System.IO.Error import System.Process @@ -23,12 +22,9 @@ import qualified Git import Config import Config.Cost import Remote.Helper.Special -import Remote.Helper.Encryptable -import Crypto -import Annex.Content +import Remote.Helper.ChunkedEncryptable import Annex.Ssh import Annex.UUID -import Utility.Metered type DdarRepo = String @@ -46,13 +42,17 @@ gen r u c gc = do if ddarLocal ddarrepo then nearlyCheapRemoteCost else expensiveRemoteCost - - let new = Remote + return $ Just $ encryptableRemote c + (simplyPrepare $ store ddarrepo) + (simplyPrepare $ retrieve ddarrepo) + (this cst) + where + this cst = Remote { uuid = u , cost = cst , name = Git.repoDescribe r - , storeKey = store ddarrepo - , retrieveKeyFile = retrieve ddarrepo + , storeKey = storeKeyDummy + , retrieveKeyFile = retreiveKeyFileDummy , retrieveKeyFileCheap = retrieveCheap , removeKey = remove ddarrepo , hasKey = checkPresent ddarrepo @@ -70,11 +70,6 @@ gen r u c gc = do , availability = if ddarLocal ddarrepo then LocallyAvailable else GloballyAvailable , readonly = False } - return $ Just $ encryptableRemote c - (storeEncrypted new ddarrepo) - (retrieveEncrypted ddarrepo) - new - where ddarrepo = fromMaybe (error "missing ddarrepo") $ remoteAnnexDdarRepo gc ddarSetup :: Maybe UUID -> Maybe CredPair -> RemoteConfig -> Annex (RemoteConfig, UUID) @@ -92,17 +87,8 @@ ddarSetup mu _ c = do return (c', u) -pipeDdar :: [CommandParam] -> Maybe Handle -> Maybe Handle -> IO Bool -pipeDdar params inh outh = do - p <- runProcess "ddar" (toCommand params) - Nothing Nothing inh outh Nothing - ok <- waitForProcess p - case ok of - ExitSuccess -> return True - _ -> return False - -store :: DdarRepo -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool -store ddarrepo k _f _p = sendAnnex k (void $ remove ddarrepo k) $ \src -> do +store :: DdarRepo -> Storer +store ddarrepo = fileStorer $ \k src _p -> do let params = [ Param "c" , Param "-N" @@ -112,21 +98,6 @@ store ddarrepo k _f _p = sendAnnex k (void $ remove ddarrepo k) $ \src -> do ] liftIO $ boolSystem "ddar" params -storeEncrypted :: Remote -> DdarRepo -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool -storeEncrypted r ddarrepo (cipher, enck) k _p = - sendAnnex k (void $ remove ddarrepo k) $ \src -> - liftIO $ catchBoolIO $ - encrypt (getGpgEncParams r) cipher (feedFile src) $ \h -> - pipeDdar params (Just h) Nothing - where - params = - [ Param "c" - , Param "-N" - , Param $ key2file enck - , Param ddarrepo - , Param "-" - ] - {- Convert remote DdarRepo to host and path on remote end -} splitRemoteDdarRepo :: DdarRepo -> (String, String) splitRemoteDdarRepo ddarrepo = @@ -155,27 +126,17 @@ ddarExtractRemoteCall :: DdarRepo -> Key -> Annex (String, [CommandParam]) ddarExtractRemoteCall ddarrepo k = ddarRemoteCall ddarrepo 'x' [Param "--force-stdout", Param $ key2file k] -retrieve :: DdarRepo -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool -retrieve ddarrepo k _f d _p = do +retrieve :: DdarRepo -> Retriever +retrieve ddarrepo = fileRetriever $ \d k _p -> do (cmd, params) <- ddarExtractRemoteCall ddarrepo k - liftIO $ catchBoolIO $ withFile d WriteMode $ \h -> do + liftIO $ withFile d WriteMode $ \h -> do let p = (proc cmd $ toCommand params){ std_out = UseHandle h } (_, _, _, pid) <- Common.Annex.createProcess p forceSuccessProcess p pid - return True retrieveCheap :: Key -> FilePath -> Annex Bool retrieveCheap _ _ = return False -retrieveEncrypted :: DdarRepo -> (Cipher, Key) -> Key -> FilePath -> MeterUpdate -> Annex Bool -retrieveEncrypted ddarrepo (cipher, enck) _ f _p = do - (cmd, params) <- ddarExtractRemoteCall ddarrepo enck - let p = proc cmd $ toCommand params - liftIO $ catchBoolIO $ withHandle StdoutHandle createProcessSuccess p $ \h -> do - decrypt cipher (\toh -> L.hPut toh =<< L.hGetContents h) $ - readBytes $ L.writeFile f - return True - remove :: DdarRepo -> Key -> Annex Bool remove ddarrepo key = do (cmd, params) <- ddarRemoteCall ddarrepo 'd' [Param $ key2file key] From d05b7b9182d84886487903f3c0040a4ebfef60b1 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 3 Aug 2014 01:12:24 -0400 Subject: [PATCH 08/44] better byteRetriever Make the byteRetriever be passed the callback that consumes the bytestring. This way, there's no worries about the lazy bytestring not all being read when the resource that's creating it is closed. Which in turn lets bup, ddar, and S3 each switch from using an unncessary fileRetriver to a byteRetriever. So, more efficient on chunks and encrypted files. The only remaining fileRetrievers are hook and external, which really do retrieve to files. --- Remote/Bup.hs | 13 ++++++------ Remote/Ddar.hs | 12 +++++------ Remote/Directory.hs | 4 ++-- Remote/Directory/LegacyChunked.hs | 10 ++++----- Remote/Glacier.hs | 33 ++++++++++++++--------------- Remote/Helper/ChunkedEncryptable.hs | 8 ++++--- Remote/S3.hs | 4 ++-- Types/StoreRetrieve.hs | 2 +- 8 files changed, 44 insertions(+), 42 deletions(-) diff --git a/Remote/Bup.hs b/Remote/Bup.hs index 06679c4b8a..44ea8c7d83 100644 --- a/Remote/Bup.hs +++ b/Remote/Bup.hs @@ -8,6 +8,7 @@ module Remote.Bup (remote) where import qualified Data.Map as M +import qualified Data.ByteString.Lazy as L import Data.ByteString.Lazy.UTF8 (fromString) import Common.Annex @@ -127,12 +128,12 @@ store r buprepo = byteStorer $ \k b p -> do return True retrieve :: BupRepo -> Retriever -retrieve buprepo = fileRetriever $ \d k _p -> - liftIO $ withFile d WriteMode $ \h -> do - let params = bupParams "join" buprepo [Param $ bupRef k] - let p = proc "bup" (toCommand params) - (_, _, _, pid) <- createProcess $ p { std_out = UseHandle h } - forceSuccessProcess p pid +retrieve buprepo = byteRetriever $ \k sink -> do + let params = bupParams "join" buprepo [Param $ bupRef k] + let p = proc "bup" (toCommand params) + (_, Just h, _, pid) <- liftIO $ createProcess $ p { std_out = CreatePipe } + liftIO (hClose h >> forceSuccessProcess p pid) + `after` (sink =<< liftIO (L.hGetContents h)) retrieveCheap :: BupRepo -> Key -> FilePath -> Annex Bool retrieveCheap _ _ _ = return False diff --git a/Remote/Ddar.hs b/Remote/Ddar.hs index 365506a22c..bc4755a81c 100644 --- a/Remote/Ddar.hs +++ b/Remote/Ddar.hs @@ -10,8 +10,8 @@ module Remote.Ddar (remote) where import Control.Exception import qualified Data.Map as M +import qualified Data.ByteString.Lazy as L import System.IO.Error -import System.Process import Data.String.Utils import Common.Annex @@ -127,12 +127,12 @@ ddarExtractRemoteCall ddarrepo k = ddarRemoteCall ddarrepo 'x' [Param "--force-stdout", Param $ key2file k] retrieve :: DdarRepo -> Retriever -retrieve ddarrepo = fileRetriever $ \d k _p -> do +retrieve ddarrepo = byteRetriever $ \k sink -> do (cmd, params) <- ddarExtractRemoteCall ddarrepo k - liftIO $ withFile d WriteMode $ \h -> do - let p = (proc cmd $ toCommand params){ std_out = UseHandle h } - (_, _, _, pid) <- Common.Annex.createProcess p - forceSuccessProcess p pid + let p = (proc cmd $ toCommand params) { std_out = CreatePipe } + (_, Just h, _, pid) <- liftIO $ createProcess p + liftIO (hClose h >> forceSuccessProcess p pid) + `after` (sink =<< liftIO (L.hGetContents h)) retrieveCheap :: Key -> FilePath -> Annex Bool retrieveCheap _ _ = return False diff --git a/Remote/Directory.hs b/Remote/Directory.hs index a879875298..78d30b1a16 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -136,8 +136,8 @@ store d chunkconfig k b p = liftIO $ do retrieve :: FilePath -> ChunkConfig -> Preparer Retriever retrieve d (LegacyChunks _) = Legacy.retrieve locations d -retrieve d _ = simplyPrepare $ byteRetriever $ \k -> - liftIO $ L.readFile =<< getLocation d k +retrieve d _ = simplyPrepare $ byteRetriever $ \k sink -> + sink =<< liftIO (L.readFile =<< getLocation d k) retrieveCheap :: FilePath -> ChunkConfig -> Key -> FilePath -> Annex Bool -- no cheap retrieval possible for chunks diff --git a/Remote/Directory/LegacyChunked.hs b/Remote/Directory/LegacyChunked.hs index 5c200570c8..a198688024 100644 --- a/Remote/Directory/LegacyChunked.hs +++ b/Remote/Directory/LegacyChunked.hs @@ -94,14 +94,14 @@ retrieve locations d basek a = do tmpdir <- fromRepo $ gitAnnexTmpMiscDir createAnnexDirectory tmpdir let tmp = tmpdir keyFile basek ++ ".directorylegacy.tmp" - a $ Just $ byteRetriever $ \k -> liftIO $ do - void $ withStoredFiles d locations k $ \fs -> do + a $ Just $ byteRetriever $ \k sink -> do + liftIO $ void $ withStoredFiles d locations k $ \fs -> do forM_ fs $ S.appendFile tmp <=< S.readFile return True - b <- L.readFile tmp - nukeFile tmp - return b + b <- liftIO $ L.readFile tmp + liftIO $ nukeFile tmp + sink b checkPresent :: FilePath -> (FilePath -> Key -> [FilePath]) -> Key -> Annex (Either String Bool) checkPresent d locations k = liftIO $ catchMsgIO $ diff --git a/Remote/Glacier.hs b/Remote/Glacier.hs index 9b428bd80a..592a7db1f0 100644 --- a/Remote/Glacier.hs +++ b/Remote/Glacier.hs @@ -20,7 +20,6 @@ import Config.Cost import Remote.Helper.Special import Remote.Helper.ChunkedEncryptable import qualified Remote.Helper.AWS as AWS -import Crypto import Creds import Utility.Metered import qualified Annex @@ -120,11 +119,10 @@ store r k b p = go =<< glacierEnv c u return True prepareRetrieve :: Remote -> Preparer Retriever -prepareRetrieve r = simplyPrepare $ fileRetriever $ \d k p -> - retrieve r k (readBytes (meteredWriteFile p d)) +prepareRetrieve = simplyPrepare . byteRetriever . retrieve -retrieve :: Remote -> Key -> (Handle -> IO ()) -> Annex () -retrieve r k reader = go =<< glacierEnv c u +retrieve :: Remote -> Key -> (L.ByteString -> Annex Bool) -> Annex Bool +retrieve r k sink = go =<< glacierEnv c u where c = config r u = uuid r @@ -138,17 +136,21 @@ retrieve r k reader = go =<< glacierEnv c u go Nothing = error "cannot retrieve from glacier" go (Just e) = do let cmd = (proc "glacier" (toCommand params)) { env = Just e } - ok <- liftIO $ catchBoolIO $ - withHandle StdoutHandle createProcessSuccess cmd $ \h -> - ifM (hIsEOF h) - ( return False - , do - reader h - return True - ) + (_, Just h, _, pid) <- liftIO $ createProcess cmd + -- Glacier cannot store empty files, so if the output is + -- empty, the content is not available yet. + ok <- ifM (liftIO $ hIsEOF h) + ( return False + , sink =<< liftIO (L.hGetContents h) + ) + liftIO $ hClose h + liftIO $ forceSuccessProcess cmd pid unless ok $ do showLongNote "Recommend you wait up to 4 hours, and then run this command again." - error "not yet available" + return ok + +retrieveCheap :: Remote -> Key -> FilePath -> Annex Bool +retrieveCheap _ _ _ = return False remove :: Remote -> Key -> Annex Bool remove r k = glacierAction r @@ -159,9 +161,6 @@ remove r k = glacierAction r , Param $ archive r k ] -retrieveCheap :: Remote -> Key -> FilePath -> Annex Bool -retrieveCheap _ _ _ = return False - checkPresent :: Remote -> Key -> Annex (Either String Bool) checkPresent r k = do showAction $ "checking " ++ name r diff --git a/Remote/Helper/ChunkedEncryptable.hs b/Remote/Helper/ChunkedEncryptable.hs index e607715514..9c6ba98a2b 100644 --- a/Remote/Helper/ChunkedEncryptable.hs +++ b/Remote/Helper/ChunkedEncryptable.hs @@ -77,9 +77,11 @@ fileRetriever a k m callback = do a f k m callback (FileContent f) --- A Retriever that generates a L.ByteString containing the Key's content. -byteRetriever :: (Key -> Annex L.ByteString) -> Retriever -byteRetriever a k _m callback = callback =<< (ByteContent <$> a k) +-- A Retriever that generates a lazy ByteString containing the Key's +-- content, and passes it to a callback action which will fully consume it +-- before returning. +byteRetriever :: (Key -> (L.ByteString -> Annex Bool) -> Annex Bool) -> Retriever +byteRetriever a k _m callback = a k (callback . ByteContent) {- The base Remote that is provided to chunkedEncryptableRemote - needs to have storeKey and retreiveKeyFile methods, but they are diff --git a/Remote/S3.hs b/Remote/S3.hs index ed9122cab1..68d8ee4bfb 100644 --- a/Remote/S3.hs +++ b/Remote/S3.hs @@ -147,9 +147,9 @@ store (conn, bucket) r k p file = do prepareRetrieve :: Remote -> Preparer Retriever prepareRetrieve r = resourcePrepare (const $ s3Action r False) $ \(conn, bucket) -> - byteRetriever $ \k -> + byteRetriever $ \k sink -> liftIO (getObject conn $ bucketKey r bucket k) - >>= either s3Error (return . obj_data) + >>= either s3Error (sink . obj_data) retrieveCheap :: Remote -> Key -> FilePath -> Annex Bool retrieveCheap _ _ _ = return False diff --git a/Types/StoreRetrieve.hs b/Types/StoreRetrieve.hs index bde7489604..9fc0634c4e 100644 --- a/Types/StoreRetrieve.hs +++ b/Types/StoreRetrieve.hs @@ -30,6 +30,6 @@ isByteContent (FileContent _) = False type Storer = Key -> ContentSource -> MeterUpdate -> Annex Bool -- Action that retrieves a Key's content from a remote, passing it to a --- callback. +-- callback, which will fully consume the content before returning. -- Throws exception if key is not present, or remote is not accessible. type Retriever = Key -> MeterUpdate -> (ContentSource -> Annex Bool) -> Annex Bool From 00f92a7e59fcf1ed502105293c70effc300bc91f Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 3 Aug 2014 01:21:38 -0400 Subject: [PATCH 09/44] whitespace --- Remote/Helper/Chunked.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs index 0d786c98dd..a7c43801aa 100644 --- a/Remote/Helper/Chunked.hs +++ b/Remote/Helper/Chunked.hs @@ -299,7 +299,7 @@ retrieveChunks retriever u chunkconfig encryptor basek dest basep sink - - However, if the Retriever generates a lazy ByteString, - it is not responsible for updating progress (often it cannot). - - Instead, the sink is passed a meter to update as it consumes + - Instead, the sink is passed a meter to update as it consumes - the ByteString. -} tosink h p content = sink h p' content From e1e5853c94b7c812c5b1b9d142cc14e5d6579743 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 3 Aug 2014 15:07:43 -0400 Subject: [PATCH 10/44] rsync: support chunking Chunking does not speed up rsync at all, so it's only useful for interop with the directory special remote. --- debian/changelog | 5 +++-- doc/special_remotes/rsync.mdwn | 10 +++++++--- 2 files changed, 10 insertions(+), 5 deletions(-) diff --git a/debian/changelog b/debian/changelog index ca6059f43e..2e00728ad9 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,8 +1,8 @@ git-annex (5.20140718) UNRELEASED; urgency=medium * New chunk= option to chunk files stored in special remotes. - Currently supported by: directory, S3, and all external and hook - special remotes. + Currently supported by: directory, S3, rsync, and all external + and hook special remotes. * Partially transferred files are automatically resumed when using chunked remotes! * The old chunksize= option is deprecated. Do not use for new remotes. @@ -21,6 +21,7 @@ git-annex (5.20140718) UNRELEASED; urgency=medium * Dropping an object from a bup special remote now deletes the git branch for the object, although of course the object's content cannot be deleted due to the nature of bup. + * -- Joey Hess Mon, 21 Jul 2014 14:41:26 -0400 diff --git a/doc/special_remotes/rsync.mdwn b/doc/special_remotes/rsync.mdwn index b2a9d23f5d..eb218b181a 100644 --- a/doc/special_remotes/rsync.mdwn +++ b/doc/special_remotes/rsync.mdwn @@ -14,14 +14,14 @@ Or for using rsync over SSH These parameters can be passed to `git annex initremote` to configure rsync: +* `rsyncurl` - Required. This is the url or `hostname:/directory` to + pass to rsync to tell it where to store content. + * `encryption` - One of "none", "hybrid", "shared", or "pubkey". See [[encryption]]. * `keyid` - Specifies the gpg key to use for [[encryption]]. -* `rsyncurl` - Required. This is the url or `hostname:/directory` to - pass to rsync to tell it where to store content. - * `shellescape` - Optional. Set to "no" to avoid shell escaping normally done when using rsync over ssh. That escaping is needed with typical setups, but not with some hosting providers that do not expose rsynced @@ -30,6 +30,10 @@ These parameters can be passed to `git annex initremote` to configure rsync: quote (`'`) character. If that happens, you can run enableremote setting shellescape=no. +* `chunk` - Enables [[chunking]] when storing large files. + This is typically not a win for rsync, so no need to enable it. + But, it makes this interoperate with the [[directory]] special remote. + The `annex-rsync-options` git configuration setting can be used to pass parameters to rsync. From 4b16989e98a3b6fdeb418fd87fba9ac1e65e17cb Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 3 Aug 2014 15:35:23 -0400 Subject: [PATCH 11/44] roll ChunkedEncryptable into Special and improve interface Allow disabling progress displays, for eg, rsync. --- Command/TestRemote.hs | 2 +- Remote/Bup.hs | 7 +- Remote/Ddar.hs | 7 +- Remote/Directory.hs | 5 +- Remote/Directory/LegacyChunked.hs | 2 +- Remote/External.hs | 3 +- Remote/Glacier.hs | 7 +- Remote/Helper/Chunked.hs | 6 +- Remote/Helper/ChunkedEncryptable.hs | 212 -------------------------- Remote/Helper/Special.hs | 223 +++++++++++++++++++++++++++- Remote/Hook.hs | 3 +- Remote/S3.hs | 3 +- Remote/WebDAV.hs | 5 +- 13 files changed, 245 insertions(+), 240 deletions(-) delete mode 100644 Remote/Helper/ChunkedEncryptable.hs diff --git a/Command/TestRemote.hs b/Command/TestRemote.hs index 29a2e809cd..3f75214a56 100644 --- a/Command/TestRemote.hs +++ b/Command/TestRemote.hs @@ -76,7 +76,7 @@ perform rs ks = do where desc r' k = intercalate "; " $ map unwords [ [ "key size", show (keySize k) ] - , [ show (chunkConfig (Remote.config r')) ] + , [ show (getChunkConfig (Remote.config r')) ] , ["encryption", fromMaybe "none" (M.lookup "encryption" (Remote.config r'))] ] diff --git a/Remote/Bup.hs b/Remote/Bup.hs index 44ea8c7d83..6a04ad5f7f 100644 --- a/Remote/Bup.hs +++ b/Remote/Bup.hs @@ -25,7 +25,6 @@ import Config import Config.Cost import qualified Remote.Helper.Ssh as Ssh import Remote.Helper.Special -import Remote.Helper.ChunkedEncryptable import Remote.Helper.Messages import Utility.Hash import Utility.UserInfo @@ -74,12 +73,16 @@ gen r u c gc = do , availability = if bupLocal buprepo then LocallyAvailable else GloballyAvailable , readonly = False } - return $ Just $ encryptableRemote c + return $ Just $ specialRemote' specialcfg c (simplyPrepare $ store this buprepo) (simplyPrepare $ retrieve buprepo) this where buprepo = fromMaybe (error "missing buprepo") $ remoteAnnexBupRepo gc + specialcfg = (specialRemoteCfg c) + -- chunking would not improve bup + { chunkConfig = NoChunks + } bupSetup :: Maybe UUID -> Maybe CredPair -> RemoteConfig -> Annex (RemoteConfig, UUID) bupSetup mu _ c = do diff --git a/Remote/Ddar.hs b/Remote/Ddar.hs index bc4755a81c..b4c7ac1e62 100644 --- a/Remote/Ddar.hs +++ b/Remote/Ddar.hs @@ -22,7 +22,6 @@ import qualified Git import Config import Config.Cost import Remote.Helper.Special -import Remote.Helper.ChunkedEncryptable import Annex.Ssh import Annex.UUID @@ -42,7 +41,7 @@ gen r u c gc = do if ddarLocal ddarrepo then nearlyCheapRemoteCost else expensiveRemoteCost - return $ Just $ encryptableRemote c + return $ Just $ specialRemote' specialcfg c (simplyPrepare $ store ddarrepo) (simplyPrepare $ retrieve ddarrepo) (this cst) @@ -71,6 +70,10 @@ gen r u c gc = do , readonly = False } ddarrepo = fromMaybe (error "missing ddarrepo") $ remoteAnnexDdarRepo gc + specialcfg = (specialRemoteCfg c) + -- chunking would not improve ddar + { chunkConfig = NoChunks + } ddarSetup :: Maybe UUID -> Maybe CredPair -> RemoteConfig -> Annex (RemoteConfig, UUID) ddarSetup mu _ c = do diff --git a/Remote/Directory.hs b/Remote/Directory.hs index 78d30b1a16..db141e01ad 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -20,7 +20,6 @@ import Config.Cost import Config import Utility.FileMode import Remote.Helper.Special -import Remote.Helper.ChunkedEncryptable import qualified Remote.Directory.LegacyChunked as Legacy import Annex.Content import Annex.UUID @@ -37,8 +36,8 @@ remote = RemoteType { gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) gen r u c gc = do cst <- remoteCost gc cheapRemoteCost - let chunkconfig = chunkConfig c - return $ Just $ chunkedEncryptableRemote c + let chunkconfig = getChunkConfig c + return $ Just $ specialRemote c (prepareStore dir chunkconfig) (retrieve dir chunkconfig) Remote { diff --git a/Remote/Directory/LegacyChunked.hs b/Remote/Directory/LegacyChunked.hs index a198688024..1be885db21 100644 --- a/Remote/Directory/LegacyChunked.hs +++ b/Remote/Directory/LegacyChunked.hs @@ -14,7 +14,7 @@ import qualified Data.ByteString as S import Common.Annex import Utility.FileMode -import Remote.Helper.ChunkedEncryptable +import Remote.Helper.Special import qualified Remote.Helper.Chunked.Legacy as Legacy import Annex.Perms import Utility.Metered diff --git a/Remote/External.hs b/Remote/External.hs index 1c22a589bf..c00093402f 100644 --- a/Remote/External.hs +++ b/Remote/External.hs @@ -15,7 +15,6 @@ import Types.CleanupActions import qualified Git import Config import Remote.Helper.Special -import Remote.Helper.ChunkedEncryptable import Utility.Metered import Logs.Transfer import Logs.PreferredContent.Raw @@ -43,7 +42,7 @@ gen r u c gc = do Annex.addCleanup (RemoteCleanup u) $ stopExternal external cst <- getCost external r gc avail <- getAvailability external r gc - return $ Just $ chunkedEncryptableRemote c + return $ Just $ specialRemote c (simplyPrepare $ store external) (simplyPrepare $ retrieve external) Remote { diff --git a/Remote/Glacier.hs b/Remote/Glacier.hs index 592a7db1f0..c5bfefa641 100644 --- a/Remote/Glacier.hs +++ b/Remote/Glacier.hs @@ -18,7 +18,6 @@ import qualified Git import Config import Config.Cost import Remote.Helper.Special -import Remote.Helper.ChunkedEncryptable import qualified Remote.Helper.AWS as AWS import Creds import Utility.Metered @@ -40,7 +39,7 @@ remote = RemoteType { gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) gen r u c gc = new <$> remoteCost gc veryExpensiveRemoteCost where - new cst = Just $ encryptableRemote c + new cst = Just $ specialRemote' specialcfg c (prepareStore this) (prepareRetrieve this) this @@ -66,6 +65,10 @@ gen r u c gc = new <$> remoteCost gc veryExpensiveRemoteCost availability = GloballyAvailable, remotetype = remote } + specialcfg = (specialRemoteCfg c) + -- Disabled until jobList gets support for chunks. + { chunkConfig = NoChunks + } glacierSetup :: Maybe UUID -> Maybe CredPair -> RemoteConfig -> Annex (RemoteConfig, UUID) glacierSetup mu mcreds c = do diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs index a7c43801aa..2e9467b2a0 100644 --- a/Remote/Helper/Chunked.hs +++ b/Remote/Helper/Chunked.hs @@ -8,7 +8,7 @@ module Remote.Helper.Chunked ( ChunkSize, ChunkConfig(..), - chunkConfig, + getChunkConfig, storeChunks, removeChunks, retrieveChunks, @@ -39,8 +39,8 @@ noChunks :: ChunkConfig -> Bool noChunks NoChunks = True noChunks _ = False -chunkConfig :: RemoteConfig -> ChunkConfig -chunkConfig m = +getChunkConfig :: RemoteConfig -> ChunkConfig +getChunkConfig m = case M.lookup "chunksize" m of Nothing -> case M.lookup "chunk" m of Nothing -> NoChunks diff --git a/Remote/Helper/ChunkedEncryptable.hs b/Remote/Helper/ChunkedEncryptable.hs deleted file mode 100644 index 9c6ba98a2b..0000000000 --- a/Remote/Helper/ChunkedEncryptable.hs +++ /dev/null @@ -1,212 +0,0 @@ -{- Remotes that support both chunking and encryption. - - - - Copyright 2014 Joey Hess - - - - Licensed under the GNU GPL version 3 or higher. - -} - -{-# LANGUAGE RankNTypes #-} - -module Remote.Helper.ChunkedEncryptable ( - Preparer, - Storer, - Retriever, - simplyPrepare, - ContentSource, - checkPrepare, - resourcePrepare, - fileStorer, - byteStorer, - fileRetriever, - byteRetriever, - storeKeyDummy, - retreiveKeyFileDummy, - chunkedEncryptableRemote, - encryptableRemote, - module X -) where - -import Common.Annex -import Types.StoreRetrieve -import Types.Remote -import Crypto -import Config.Cost -import Utility.Metered -import Remote.Helper.Chunked as X -import Remote.Helper.Encryptable as X hiding (encryptableRemote) -import Annex.Content -import Annex.Exception - -import qualified Data.ByteString.Lazy as L -import Control.Exception (bracket) - --- Use when nothing needs to be done to prepare a helper. -simplyPrepare :: helper -> Preparer helper -simplyPrepare helper _ a = a $ Just helper - --- Use to run a check when preparing a helper. -checkPrepare :: (Key -> Annex Bool) -> helper -> Preparer helper -checkPrepare checker helper k a = ifM (checker k) - ( a (Just helper) - , a Nothing - ) - --- Use to acquire a resource when preparing a helper. -resourcePrepare :: (Key -> (r -> Annex Bool) -> Annex Bool) -> (r -> helper) -> Preparer helper -resourcePrepare withr helper k a = withr k $ \r -> - a (Just (helper r)) - --- A Storer that expects to be provided with a file containing --- the content of the key to store. -fileStorer :: (Key -> FilePath -> MeterUpdate -> Annex Bool) -> Storer -fileStorer a k (FileContent f) m = a k f m -fileStorer a k (ByteContent b) m = withTmp k $ \f -> do - liftIO $ L.writeFile f b - a k f m - --- A Storer that expects to be provided with a L.ByteString of --- the content to store. -byteStorer :: (Key -> L.ByteString -> MeterUpdate -> Annex Bool) -> Storer -byteStorer a k c m = withBytes c $ \b -> a k b m - --- A Retriever that writes the content of a Key to a provided file. --- It is responsible for updating the progress meter as it retrieves data. -fileRetriever :: (FilePath -> Key -> MeterUpdate -> Annex ()) -> Retriever -fileRetriever a k m callback = do - f <- prepTmp k - a f k m - callback (FileContent f) - --- A Retriever that generates a lazy ByteString containing the Key's --- content, and passes it to a callback action which will fully consume it --- before returning. -byteRetriever :: (Key -> (L.ByteString -> Annex Bool) -> Annex Bool) -> Retriever -byteRetriever a k _m callback = a k (callback . ByteContent) - -{- The base Remote that is provided to chunkedEncryptableRemote - - needs to have storeKey and retreiveKeyFile methods, but they are - - never actually used (since chunkedEncryptableRemote replaces - - them). Here are some dummy ones. - -} -storeKeyDummy :: Key -> AssociatedFile -> MeterUpdate -> Annex Bool -storeKeyDummy _ _ _ = return False -retreiveKeyFileDummy :: Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool -retreiveKeyFileDummy _ _ _ _ = return False - -type RemoteModifier = RemoteConfig -> Preparer Storer -> Preparer Retriever -> Remote -> Remote - --- Modifies a base Remote to support both chunking and encryption. -chunkedEncryptableRemote :: RemoteModifier -chunkedEncryptableRemote c = chunkedEncryptableRemote' (chunkConfig c) c - --- Modifies a base Remote to support encryption, but not chunking. -encryptableRemote :: RemoteModifier -encryptableRemote = chunkedEncryptableRemote' NoChunks - -chunkedEncryptableRemote' :: ChunkConfig -> RemoteModifier -chunkedEncryptableRemote' chunkconfig c preparestorer prepareretriever baser = encr - where - encr = baser - { storeKey = \k _f p -> cip >>= storeKeyGen k p - , retrieveKeyFile = \k _f d p -> cip >>= retrieveKeyFileGen k d p - , retrieveKeyFileCheap = \k d -> cip >>= maybe - (retrieveKeyFileCheap baser k d) - (\_ -> return False) - , removeKey = \k -> cip >>= removeKeyGen k - , hasKey = \k -> cip >>= hasKeyGen k - , cost = maybe - (cost baser) - (const $ cost baser + encryptedRemoteCostAdj) - (extractCipher c) - } - cip = cipherKey c - gpgopts = getGpgEncParams encr - - safely a = catchNonAsyncAnnex a (\e -> warning (show e) >> return False) - - -- chunk, then encrypt, then feed to the storer - storeKeyGen k p enc = - safely $ preparestorer k $ safely . go - where - go (Just storer) = sendAnnex k rollback $ \src -> - metered (Just p) k $ \p' -> - storeChunks (uuid baser) chunkconfig k src p' - (storechunk enc storer) - (hasKey baser) - go Nothing = return False - rollback = void $ removeKey encr k - - storechunk Nothing storer k content p = storer k content p - storechunk (Just (cipher, enck)) storer k content p = - withBytes content $ \b -> - encrypt gpgopts cipher (feedBytes b) $ - readBytes $ \encb -> - storer (enck k) (ByteContent encb) p - - -- call retriever to get chunks; decrypt them; stream to dest file - retrieveKeyFileGen k dest p enc = - safely $ prepareretriever k $ safely . go - where - go (Just retriever) = metered (Just p) k $ \p' -> - retrieveChunks retriever (uuid baser) chunkconfig - enck k dest p' (sink dest enc) - go Nothing = return False - enck = maybe id snd enc - - removeKeyGen k enc = removeChunks remover (uuid baser) chunkconfig enck k - where - enck = maybe id snd enc - remover = removeKey baser - - hasKeyGen k enc = hasKeyChunks checker (uuid baser) chunkconfig enck k - where - enck = maybe id snd enc - checker = hasKey baser - -{- Sink callback for retrieveChunks. Stores the file content into the - - provided Handle, decrypting it first if necessary. - - - - If the remote did not store the content using chunks, no Handle - - will be provided, and it's up to us to open the destination file. - - - - Note that when neither chunking nor encryption is used, and the remote - - provides FileContent, that file only needs to be renamed - - into place. (And it may even already be in the right place..) - -} -sink - :: FilePath - -> Maybe (Cipher, EncKey) - -> Maybe Handle - -> Maybe MeterUpdate - -> ContentSource - -> Annex Bool -sink dest enc mh mp content = do - case (enc, mh, content) of - (Nothing, Nothing, FileContent f) - | f == dest -> noop - | otherwise -> liftIO $ moveFile f dest - (Just (cipher, _), _, ByteContent b) -> - decrypt cipher (feedBytes b) $ - readBytes write - (Just (cipher, _), _, FileContent f) -> do - withBytes content $ \b -> - decrypt cipher (feedBytes b) $ - readBytes write - liftIO $ nukeFile f - (Nothing, _, FileContent f) -> do - withBytes content write - liftIO $ nukeFile f - (Nothing, _, ByteContent b) -> write b - return True - where - write b = case mh of - Just h -> liftIO $ b `streamto` h - Nothing -> liftIO $ bracket opendest hClose (b `streamto`) - streamto b h = case mp of - Just p -> meteredWrite p h b - Nothing -> L.hPut h b - opendest = openBinaryFile dest WriteMode - -withBytes :: ContentSource -> (L.ByteString -> Annex a) -> Annex a -withBytes (ByteContent b) a = a b -withBytes (FileContent f) a = a =<< liftIO (L.readFile f) diff --git a/Remote/Helper/Special.hs b/Remote/Helper/Special.hs index 7fc421f46f..2bcb7d530d 100644 --- a/Remote/Helper/Special.hs +++ b/Remote/Helper/Special.hs @@ -1,20 +1,51 @@ -{- common functions for special remotes +{- helpers for special remotes - - - Copyright 2011 Joey Hess + - Copyright 2011-2014 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} -module Remote.Helper.Special where - -import qualified Data.Map as M +module Remote.Helper.Special ( + findSpecialRemotes, + gitConfigSpecialRemote, + Preparer, + Storer, + Retriever, + simplyPrepare, + ContentSource, + checkPrepare, + resourcePrepare, + fileStorer, + byteStorer, + fileRetriever, + byteRetriever, + storeKeyDummy, + retreiveKeyFileDummy, + SpecialRemoteCfg(..), + specialRemoteCfg, + specialRemote, + specialRemote', + module X +) where import Common.Annex +import Types.StoreRetrieve import Types.Remote +import Crypto +import Config.Cost +import Utility.Metered +import Remote.Helper.Chunked as X +import Remote.Helper.Encryptable as X hiding (encryptableRemote) +import Annex.Content +import Annex.Exception import qualified Git import qualified Git.Command import qualified Git.Construct +import qualified Data.ByteString.Lazy as L +import Control.Exception (bracket) +import qualified Data.Map as M + {- Special remotes don't have a configured url, so Git.Repo does not - automatically generate remotes for them. This looks for a different - configuration key instead. @@ -38,3 +69,185 @@ gitConfigSpecialRemote u c k v = do [Param "config", Param (configsetting a), Param b] remotename = fromJust (M.lookup "name" c) configsetting s = "remote." ++ remotename ++ "." ++ s + +-- Use when nothing needs to be done to prepare a helper. +simplyPrepare :: helper -> Preparer helper +simplyPrepare helper _ a = a $ Just helper + +-- Use to run a check when preparing a helper. +checkPrepare :: (Key -> Annex Bool) -> helper -> Preparer helper +checkPrepare checker helper k a = ifM (checker k) + ( a (Just helper) + , a Nothing + ) + +-- Use to acquire a resource when preparing a helper. +resourcePrepare :: (Key -> (r -> Annex Bool) -> Annex Bool) -> (r -> helper) -> Preparer helper +resourcePrepare withr helper k a = withr k $ \r -> + a (Just (helper r)) + +-- A Storer that expects to be provided with a file containing +-- the content of the key to store. +fileStorer :: (Key -> FilePath -> MeterUpdate -> Annex Bool) -> Storer +fileStorer a k (FileContent f) m = a k f m +fileStorer a k (ByteContent b) m = withTmp k $ \f -> do + liftIO $ L.writeFile f b + a k f m + +-- A Storer that expects to be provided with a L.ByteString of +-- the content to store. +byteStorer :: (Key -> L.ByteString -> MeterUpdate -> Annex Bool) -> Storer +byteStorer a k c m = withBytes c $ \b -> a k b m + +-- A Retriever that writes the content of a Key to a provided file. +-- It is responsible for updating the progress meter as it retrieves data. +fileRetriever :: (FilePath -> Key -> MeterUpdate -> Annex ()) -> Retriever +fileRetriever a k m callback = do + f <- prepTmp k + a f k m + callback (FileContent f) + +-- A Retriever that generates a lazy ByteString containing the Key's +-- content, and passes it to a callback action which will fully consume it +-- before returning. +byteRetriever :: (Key -> (L.ByteString -> Annex Bool) -> Annex Bool) -> Retriever +byteRetriever a k _m callback = a k (callback . ByteContent) + +{- The base Remote that is provided to specialRemote needs to have + - storeKey and retreiveKeyFile methods, but they are never + - actually used (since specialRemote replaces them). + - Here are some dummy ones. + -} +storeKeyDummy :: Key -> AssociatedFile -> MeterUpdate -> Annex Bool +storeKeyDummy _ _ _ = return False +retreiveKeyFileDummy :: Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool +retreiveKeyFileDummy _ _ _ _ = return False + +type RemoteModifier = RemoteConfig -> Preparer Storer -> Preparer Retriever -> Remote -> Remote + +data SpecialRemoteCfg = SpecialRemoteCfg + { chunkConfig :: ChunkConfig + , displayProgress :: Bool + } + +specialRemoteCfg :: RemoteConfig -> SpecialRemoteCfg +specialRemoteCfg c = SpecialRemoteCfg (getChunkConfig c) True + +-- Modifies a base Remote to support both chunking and encryption, +-- which special remotes typically should support. +specialRemote :: RemoteModifier +specialRemote c = specialRemote' (specialRemoteCfg c) c + +specialRemote' :: SpecialRemoteCfg -> RemoteModifier +specialRemote' cfg c preparestorer prepareretriever baser = encr + where + encr = baser + { storeKey = \k _f p -> cip >>= storeKeyGen k p + , retrieveKeyFile = \k _f d p -> cip >>= retrieveKeyFileGen k d p + , retrieveKeyFileCheap = \k d -> cip >>= maybe + (retrieveKeyFileCheap baser k d) + (\_ -> return False) + , removeKey = \k -> cip >>= removeKeyGen k + , hasKey = \k -> cip >>= hasKeyGen k + , cost = maybe + (cost baser) + (const $ cost baser + encryptedRemoteCostAdj) + (extractCipher c) + } + cip = cipherKey c + gpgopts = getGpgEncParams encr + + safely a = catchNonAsyncAnnex a (\e -> warning (show e) >> return False) + + -- chunk, then encrypt, then feed to the storer + storeKeyGen k p enc = + safely $ preparestorer k $ safely . go + where + go (Just storer) = sendAnnex k rollback $ \src -> + displayprogress p k $ \p' -> + storeChunks (uuid baser) chunkconfig k src p' + (storechunk enc storer) + (hasKey baser) + go Nothing = return False + rollback = void $ removeKey encr k + + storechunk Nothing storer k content p = storer k content p + storechunk (Just (cipher, enck)) storer k content p = + withBytes content $ \b -> + encrypt gpgopts cipher (feedBytes b) $ + readBytes $ \encb -> + storer (enck k) (ByteContent encb) p + + -- call retriever to get chunks; decrypt them; stream to dest file + retrieveKeyFileGen k dest p enc = + safely $ prepareretriever k $ safely . go + where + go (Just retriever) = displayprogress p k $ \p' -> + retrieveChunks retriever (uuid baser) chunkconfig + enck k dest p' (sink dest enc) + go Nothing = return False + enck = maybe id snd enc + + removeKeyGen k enc = removeChunks remover (uuid baser) chunkconfig enck k + where + enck = maybe id snd enc + remover = removeKey baser + + hasKeyGen k enc = hasKeyChunks checker (uuid baser) chunkconfig enck k + where + enck = maybe id snd enc + checker = hasKey baser + + chunkconfig = chunkConfig cfg + + displayprogress p k a + | displayProgress cfg = metered (Just p) k a + | otherwise = a p + +{- Sink callback for retrieveChunks. Stores the file content into the + - provided Handle, decrypting it first if necessary. + - + - If the remote did not store the content using chunks, no Handle + - will be provided, and it's up to us to open the destination file. + - + - Note that when neither chunking nor encryption is used, and the remote + - provides FileContent, that file only needs to be renamed + - into place. (And it may even already be in the right place..) + -} +sink + :: FilePath + -> Maybe (Cipher, EncKey) + -> Maybe Handle + -> Maybe MeterUpdate + -> ContentSource + -> Annex Bool +sink dest enc mh mp content = do + case (enc, mh, content) of + (Nothing, Nothing, FileContent f) + | f == dest -> noop + | otherwise -> liftIO $ moveFile f dest + (Just (cipher, _), _, ByteContent b) -> + decrypt cipher (feedBytes b) $ + readBytes write + (Just (cipher, _), _, FileContent f) -> do + withBytes content $ \b -> + decrypt cipher (feedBytes b) $ + readBytes write + liftIO $ nukeFile f + (Nothing, _, FileContent f) -> do + withBytes content write + liftIO $ nukeFile f + (Nothing, _, ByteContent b) -> write b + return True + where + write b = case mh of + Just h -> liftIO $ b `streamto` h + Nothing -> liftIO $ bracket opendest hClose (b `streamto`) + streamto b h = case mp of + Just p -> meteredWrite p h b + Nothing -> L.hPut h b + opendest = openBinaryFile dest WriteMode + +withBytes :: ContentSource -> (L.ByteString -> Annex a) -> Annex a +withBytes (ByteContent b) a = a b +withBytes (FileContent f) a = a =<< liftIO (L.readFile f) diff --git a/Remote/Hook.hs b/Remote/Hook.hs index 0668e2ca91..efbd9f8ba4 100644 --- a/Remote/Hook.hs +++ b/Remote/Hook.hs @@ -18,7 +18,6 @@ import Config import Config.Cost import Annex.UUID import Remote.Helper.Special -import Remote.Helper.ChunkedEncryptable import Utility.Env type Action = String @@ -35,7 +34,7 @@ remote = RemoteType { gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) gen r u c gc = do cst <- remoteCost gc expensiveRemoteCost - return $ Just $ chunkedEncryptableRemote c + return $ Just $ specialRemote c (simplyPrepare $ store hooktype) (simplyPrepare $ retrieve hooktype) Remote { diff --git a/Remote/S3.hs b/Remote/S3.hs index 68d8ee4bfb..8603757eb6 100644 --- a/Remote/S3.hs +++ b/Remote/S3.hs @@ -25,7 +25,6 @@ import qualified Git import Config import Config.Cost import Remote.Helper.Special -import Remote.Helper.ChunkedEncryptable import qualified Remote.Helper.AWS as AWS import Creds import Utility.Metered @@ -45,7 +44,7 @@ remote = RemoteType { gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) gen r u c gc = new <$> remoteCost gc expensiveRemoteCost where - new cst = Just $ chunkedEncryptableRemote c + new cst = Just $ specialRemote c (prepareStore this) (prepareRetrieve this) this diff --git a/Remote/WebDAV.hs b/Remote/WebDAV.hs index d6644cdc70..0bdd383602 100644 --- a/Remote/WebDAV.hs +++ b/Remote/WebDAV.hs @@ -28,7 +28,6 @@ import Config import Config.Cost import Remote.Helper.Special import Remote.Helper.Encryptable -import Remote.Helper.Chunked import qualified Remote.Helper.Chunked.Legacy as Legacy import Crypto import Creds @@ -122,7 +121,7 @@ storeHelper r k baseurl user pass b = catchBoolIO $ do where tmpurl = tmpLocation baseurl k keyurl = davLocation baseurl k - chunkconfig = chunkConfig $ config r + chunkconfig = getChunkConfig $ config r finalizer srcurl desturl = do void $ tryNonAsync (deleteDAV desturl user pass) mkdirRecursiveDAV (urlParent desturl) user pass @@ -220,7 +219,7 @@ withStoredFiles r k baseurl user pass onerr a = case chunkconfig of else a chunks where keyurl = davLocation baseurl k ++ keyFile k - chunkconfig = chunkConfig $ config r + chunkconfig = getChunkConfig $ config r davAction :: Remote -> a -> ((DavUrl, DavUser, DavPass) -> Annex a) -> Annex a davAction r unconfigured action = do From b3fe23b552d7ccf8f4b0a8a3dcb0f6e6a317a5f7 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 3 Aug 2014 16:18:40 -0400 Subject: [PATCH 12/44] remove redundant progress meter display code specialRemote handles all meter display, so this is redundant. --- Remote/Helper/Chunked.hs | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs index 2e9467b2a0..7ad790cb16 100644 --- a/Remote/Helper/Chunked.hs +++ b/Remote/Helper/Chunked.hs @@ -101,10 +101,8 @@ storeChunks u chunkconfig k f p storer checker = case chunkconfig of (UnpaddedChunks chunksize) | isStableKey k -> bracketIO open close (go chunksize) - _ -> showprogress $ storer k (FileContent f) + _ -> storer k (FileContent f) p where - showprogress = metered (Just p) k - open = tryIO $ openBinaryFile f ReadMode close (Right h) = hClose h @@ -113,11 +111,11 @@ storeChunks u chunkconfig k f p storer checker = go _ (Left e) = do warning (show e) return False - go chunksize (Right h) = showprogress $ \meterupdate -> do + go chunksize (Right h) = do let chunkkeys = chunkKeyStream k chunksize (chunkkeys', startpos) <- seekResume h chunkkeys checker b <- liftIO $ L.hGetContents h - gochunks meterupdate startpos chunksize b chunkkeys' + gochunks p startpos chunksize b chunkkeys' gochunks :: MeterUpdate -> BytesProcessed -> ChunkSize -> L.ByteString -> ChunkKeyStream -> Annex Bool gochunks meterupdate startpos chunksize = loop startpos . splitchunk From 6c450aad1d78f92c27b48840dd078d1b85548e23 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 3 Aug 2014 16:35:35 -0400 Subject: [PATCH 13/44] move ugly rsync zombie workaround This reaping of any processes came to cause me problems when redoing the rsync special remote -- a gpg process that was running gets waited on and the place that then checks its return code fails. I cannot reproduce any zombies when using the rsync special remote. But I still can when using a normal git remote, accessed over ssh. There is 1 zombie per file downloaded without this horrible hack enabled. So, move the hack to only be used in that case. --- Remote/Helper/Ssh.hs | 9 ++++++++- Utility/Rsync.hs | 10 ++-------- 2 files changed, 10 insertions(+), 9 deletions(-) diff --git a/Remote/Helper/Ssh.hs b/Remote/Helper/Ssh.hs index 05a98865ff..e0199dca3e 100644 --- a/Remote/Helper/Ssh.hs +++ b/Remote/Helper/Ssh.hs @@ -102,13 +102,20 @@ dropKey r key = onRemote r (boolSystem, False) "dropkey" rsyncHelper :: Maybe MeterUpdate -> [CommandParam] -> Annex Bool rsyncHelper callback params = do showOutput -- make way for progress bar - ifM (liftIO $ (maybe rsync rsyncProgress callback) params) + ok <- ifM (liftIO $ (maybe rsync rsyncProgress callback) params) ( return True , do showLongNote "rsync failed -- run git annex again to resume file transfer" return False ) + {- For an unknown reason, this causes rsync to run a second + - ssh process, which it neglects to wait on. + - Reap the resulting zombie. -} + liftIO reapZombies + + return ok + {- Generates rsync parameters that ssh to the remote and asks it - to either receive or send the key's content. -} rsyncParamsRemote :: Bool -> Remote -> Direction -> Key -> FilePath -> AssociatedFile -> Annex [CommandParam] diff --git a/Utility/Rsync.hs b/Utility/Rsync.hs index 60381264e8..d0a89b2b0c 100644 --- a/Utility/Rsync.hs +++ b/Utility/Rsync.hs @@ -66,14 +66,8 @@ rsyncParamsFixup = map fixup - The params must enable rsync's --progress mode for this to work. -} rsyncProgress :: MeterUpdate -> [CommandParam] -> IO Bool -rsyncProgress meterupdate params = do - r <- catchBoolIO $ - withHandle StdoutHandle createProcessSuccess p (feedprogress 0 []) - {- For an unknown reason, piping rsync's output like this does - - causes it to run a second ssh process, which it neglects to wait - - on. Reap the resulting zombie. -} - reapZombies - return r +rsyncProgress meterupdate params = catchBoolIO $ + withHandle StdoutHandle createProcessSuccess p (feedprogress 0 []) where p = proc "rsync" (toCommand $ rsyncParamsFixup params) feedprogress prev buf h = do From f5f961215b376ff5fcb7e78f5c2f3cce59fd95ad Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 3 Aug 2014 16:54:57 -0400 Subject: [PATCH 14/44] finish making rsync support chunking This breaks gcrypt, which relies on some internals of the rsync remote. To fix next.. --- Crypto.hs | 9 ++++++- Remote/GCrypt.hs | 4 ++-- Remote/Rsync.hs | 62 +++++++++++++++++++++--------------------------- 3 files changed, 37 insertions(+), 38 deletions(-) diff --git a/Crypto.hs b/Crypto.hs index dcefc2959a..10d6e5cef4 100644 --- a/Crypto.hs +++ b/Crypto.hs @@ -22,6 +22,7 @@ module Crypto ( describeCipher, decryptCipher, encryptKey, + isEncKey, feedFile, feedBytes, readBytes, @@ -150,9 +151,15 @@ type EncKey = Key -> Key encryptKey :: Mac -> Cipher -> EncKey encryptKey mac c k = stubKey { keyName = macWithCipher mac c (key2file k) - , keyBackendName = "GPG" ++ showMac mac + , keyBackendName = encryptedBackendNamePrefix ++ showMac mac } +encryptedBackendNamePrefix :: String +encryptedBackendNamePrefix = "GPG" + +isEncKey :: Key -> Bool +isEncKey k = encryptedBackendNamePrefix `isPrefixOf` keyBackendName k + type Feeder = Handle -> IO () type Reader m a = Handle -> m a diff --git a/Remote/GCrypt.hs b/Remote/GCrypt.hs index b2dd6cdaf5..523175fdc4 100644 --- a/Remote/GCrypt.hs +++ b/Remote/GCrypt.hs @@ -299,7 +299,7 @@ store r rsyncopts (cipher, enck) k p | otherwise = unsupportedUrl where gpgopts = getGpgEncParams r - storersync = Remote.Rsync.storeEncrypted rsyncopts gpgopts (cipher, enck) k p + storersync = undefined -- Remote.Rsync.storeEncrypted rsyncopts gpgopts (cipher, enck) k p storeshell = withTmp enck $ \tmp -> ifM (spoolencrypted $ readBytes $ \b -> catchBoolIO $ L.writeFile tmp b >> return True) ( Ssh.rsyncHelper (Just p) @@ -323,7 +323,7 @@ retrieve r rsyncopts (cipher, enck) k d p a >>= \b -> decrypt cipher (feedBytes b) (readBytes $ meteredWriteFile meterupdate d) - retrieversync = Remote.Rsync.retrieveEncrypted rsyncopts (cipher, enck) k d p + retrieversync = undefined -- Remote.Rsync.retrieveEncrypted rsyncopts (cipher, enck) k d p retrieveshell = withTmp enck $ \tmp -> ifM (Ssh.rsyncHelper (Just p) =<< Ssh.rsyncParamsRemote False r Download enck tmp Nothing) ( liftIO $ catchBoolIO $ do diff --git a/Remote/Rsync.hs b/Remote/Rsync.hs index 7d051d6cd3..d0bacd5859 100644 --- a/Remote/Rsync.hs +++ b/Remote/Rsync.hs @@ -9,8 +9,6 @@ module Remote.Rsync ( remote, - storeEncrypted, - retrieveEncrypted, remove, checkPresent, withRsyncScratchDir, @@ -27,7 +25,6 @@ import Annex.Content import Annex.UUID import Annex.Ssh import Remote.Helper.Special -import Remote.Helper.Encryptable import Remote.Rsync.RsyncUrl import Crypto import Utility.Rsync @@ -37,8 +34,8 @@ import Utility.PID import Annex.Perms import Logs.Transfer import Types.Creds +import Types.Key (isChunkKey) -import qualified Data.ByteString.Lazy as L import qualified Data.Map as M remote :: RemoteType @@ -56,15 +53,15 @@ gen r u c gc = do fromMaybe (error "missing rsyncurl") $ remoteAnnexRsyncUrl gc let o = genRsyncOpts c gc transport url let islocal = rsyncUrlIsPath $ rsyncUrl o - return $ Just $ encryptableRemote c - (storeEncrypted o $ getGpgEncParams (c,gc)) - (retrieveEncrypted o) + return $ Just $ specialRemote' specialcfg c + (simplyPrepare $ store o) + (simplyPrepare $ retrieve o) Remote { uuid = u , cost = cst , name = Git.repoDescribe r - , storeKey = store o - , retrieveKeyFile = retrieve o + , storeKey = storeKeyDummy + , retrieveKeyFile = retreiveKeyFileDummy , retrieveKeyFileCheap = retrieveCheap o , removeKey = remove o , hasKey = checkPresent r o @@ -82,6 +79,10 @@ gen r u c gc = do , availability = if islocal then LocallyAvailable else GloballyAvailable , remotetype = remote } + where + specialcfg = (specialRemoteCfg c) + -- Rsync displays its own progress. + { displayProgress = False } genRsyncOpts :: RemoteConfig -> RemoteGitConfig -> [CommandParam] -> RsyncUrl -> RsyncOpts genRsyncOpts c gc transport url = RsyncOpts @@ -139,32 +140,17 @@ rsyncSetup mu _ c = do gitConfigSpecialRemote u c' "rsyncurl" url return (c', u) -store :: RsyncOpts -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool -store o k _f p = sendAnnex k (void $ remove o k) $ rsyncSend o p k False +store :: RsyncOpts -> Storer +store = fileStorer . rsyncSend -storeEncrypted :: RsyncOpts -> [CommandParam] -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool -storeEncrypted o gpgOpts (cipher, enck) k p = withTmp enck $ \tmp -> - sendAnnex k (void $ remove o enck) $ \src -> do - liftIO $ encrypt gpgOpts cipher (feedFile src) $ - readBytes $ L.writeFile tmp - rsyncSend o p enck True tmp - -retrieve :: RsyncOpts -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool -retrieve o k _ f p = rsyncRetrieve o k f (Just p) +retrieve :: RsyncOpts -> Retriever +retrieve o = fileRetriever $ \f k p -> + unlessM (rsyncRetrieve o k f (Just p)) $ + error "rsync failed" retrieveCheap :: RsyncOpts -> Key -> FilePath -> Annex Bool retrieveCheap o k f = ifM (preseedTmp k f) ( rsyncRetrieve o k f Nothing , return False ) -retrieveEncrypted :: RsyncOpts -> (Cipher, Key) -> Key -> FilePath -> MeterUpdate -> Annex Bool -retrieveEncrypted o (cipher, enck) _ f p = withTmp enck $ \tmp -> - ifM (rsyncRetrieve o enck tmp (Just p)) - ( liftIO $ catchBoolIO $ do - decrypt cipher (feedFile tmp) $ - readBytes $ L.writeFile f - return True - , return False - ) - remove :: RsyncOpts -> Key -> Annex Bool remove o k = do ps <- sendParams @@ -238,8 +224,8 @@ withRsyncScratchDir a = do removeDirectoryRecursive d rsyncRetrieve :: RsyncOpts -> Key -> FilePath -> Maybe MeterUpdate -> Annex Bool -rsyncRetrieve o k dest callback = - showResumable $ untilTrue (rsyncUrls o k) $ \u -> rsyncRemote Download o callback +rsyncRetrieve o k dest meterupdate = + showResumable $ untilTrue (rsyncUrls o k) $ \u -> rsyncRemote Download o meterupdate -- use inplace when retrieving to support resuming [ Param "--inplace" , Param u @@ -274,8 +260,8 @@ rsyncRemote direction o callback params = do - (When we have the right hash directory structure, we can just - pass --include=X --include=X/Y --include=X/Y/file --exclude=*) -} -rsyncSend :: RsyncOpts -> MeterUpdate -> Key -> Bool -> FilePath -> Annex Bool -rsyncSend o callback k canrename src = withRsyncScratchDir $ \tmp -> do +rsyncSend :: RsyncOpts -> Key -> FilePath -> MeterUpdate -> Annex Bool +rsyncSend o k src meterupdate = withRsyncScratchDir $ \tmp -> do let dest = tmp Prelude.head (keyPaths k) liftIO $ createDirectoryIfMissing True $ parentDir dest ok <- liftIO $ if canrename @@ -285,7 +271,7 @@ rsyncSend o callback k canrename src = withRsyncScratchDir $ \tmp -> do else createLinkOrCopy src dest ps <- sendParams if ok - then showResumable $ rsyncRemote Upload o (Just callback) $ ps ++ + then showResumable $ rsyncRemote Upload o (Just meterupdate) $ ps ++ [ Param "--recursive" , partialParams -- tmp/ to send contents of tmp dir @@ -293,3 +279,9 @@ rsyncSend o callback k canrename src = withRsyncScratchDir $ \tmp -> do , Param $ rsyncUrl o ] else return False + where + {- If the key being sent is encrypted or chunked, the file + - containing its content is a temp file, and so can be + - renamed into place. Otherwise, the file is the annexed + - object file, and has to be copied or hard linked into place. -} + canrename = isEncKey k || isChunkKey k From b35f7983ff1edd28cea58885a4d1c8d75799c66b Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 3 Aug 2014 17:31:10 -0400 Subject: [PATCH 15/44] convert gcrypt to new regime, including chunking Some reorg of Remote.Rsync code to export the things gcrypt needs. --- Remote/GCrypt.hs | 91 ++++++++++++++------------------- Remote/Rsync.hs | 83 +++++++++++++++--------------- debian/changelog | 2 +- doc/special_remotes/gcrypt.mdwn | 4 +- 4 files changed, 84 insertions(+), 96 deletions(-) diff --git a/Remote/GCrypt.hs b/Remote/GCrypt.hs index 523175fdc4..faf45b1d9d 100644 --- a/Remote/GCrypt.hs +++ b/Remote/GCrypt.hs @@ -29,7 +29,6 @@ import qualified Git.GCrypt import qualified Git.Construct import qualified Git.Types as Git () import qualified Annex.Branch -import qualified Annex.Content import Config import Config.Cost import Remote.Helper.Git @@ -38,7 +37,6 @@ import Remote.Helper.Special import Remote.Helper.Messages import qualified Remote.Helper.Ssh as Ssh import Utility.Metered -import Crypto import Annex.UUID import Annex.Ssh import qualified Remote.Rsync @@ -47,7 +45,6 @@ import Utility.Tmp import Logs.Remote import Logs.Transfer import Utility.Gpg -import Annex.Content remote :: RemoteType remote = RemoteType { @@ -101,8 +98,8 @@ gen' r u c gc = do { uuid = u , cost = cst , name = Git.repoDescribe r - , storeKey = \_ _ _ -> noCrypto - , retrieveKeyFile = \_ _ _ _ -> noCrypto + , storeKey = storeKeyDummy + , retrieveKeyFile = retreiveKeyFileDummy , retrieveKeyFileCheap = \_ _ -> return False , removeKey = remove this rsyncopts , hasKey = checkPresent this rsyncopts @@ -118,10 +115,14 @@ gen' r u c gc = do , availability = availabilityCalc r , remotetype = remote } - return $ Just $ encryptableRemote c - (store this rsyncopts) - (retrieve this rsyncopts) + return $ Just $ specialRemote' specialcfg c + (simplyPrepare $ store this rsyncopts) + (simplyPrepare $ retrieve this rsyncopts) this + where + specialcfg = (specialRemoteCfg c) + -- Rsync displays its own progress. + { displayProgress = False } rsyncTransportToObjects :: Git.Repo -> Annex ([CommandParam], String) rsyncTransportToObjects r = do @@ -147,7 +148,7 @@ rsyncTransport r noCrypto :: Annex a noCrypto = error "cannot use gcrypt remote without encryption enabled" -unsupportedUrl :: Annex a +unsupportedUrl :: a unsupportedUrl = error "using non-ssh remote repo url with gcrypt is not supported" gCryptSetup :: Maybe UUID -> Maybe CredPair -> RemoteConfig -> Annex (RemoteConfig, UUID) @@ -249,14 +250,19 @@ setupRepo gcryptid r denyNonFastForwards = "receive.denyNonFastForwards" -shellOrRsync :: Remote -> Annex a -> Annex a -> Annex a -shellOrRsync r ashell arsync = case method of - AccessShell -> ashell - _ -> arsync +isShell :: Remote -> Bool +isShell r = case method of + AccessShell -> True + _ -> False where method = toAccessMethod $ fromMaybe "" $ remoteAnnexGCrypt $ gitconfig r +shellOrRsync :: Remote -> Annex a -> Annex a -> Annex a +shellOrRsync r ashell arsync + | isShell r = ashell + | otherwise = arsync + {- Configure gcrypt to use the same list of keyids that - were passed to initremote as its participants. - Also, configure it to use a signing key that is in the list of @@ -287,51 +293,32 @@ setGcryptEncryption c remotename = do where remoteconfig n = ConfigKey $ n remotename -store :: Remote -> Remote.Rsync.RsyncOpts -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool -store r rsyncopts (cipher, enck) k p - | not $ Git.repoIsUrl (repo r) = guardUsable (repo r) False $ - metered (Just p) k $ \meterupdate -> spoolencrypted $ \h -> do - let dest = gCryptLocation r enck +store :: Remote -> Remote.Rsync.RsyncOpts -> Storer +store r rsyncopts + | not $ Git.repoIsUrl (repo r) = + byteStorer $ \k b p -> guardUsable (repo r) False $ liftIO $ do + let dest = gCryptLocation r k createDirectoryIfMissing True $ parentDir dest - readBytes (meteredWriteFile meterupdate dest) h + meteredWriteFile p dest b return True - | Git.repoIsSsh (repo r) = shellOrRsync r storeshell storersync + | Git.repoIsSsh (repo r) = if isShell r + then fileStorer $ \k f p -> Ssh.rsyncHelper (Just p) + =<< Ssh.rsyncParamsRemote False r Upload k f Nothing + else fileStorer $ Remote.Rsync.store rsyncopts | otherwise = unsupportedUrl - where - gpgopts = getGpgEncParams r - storersync = undefined -- Remote.Rsync.storeEncrypted rsyncopts gpgopts (cipher, enck) k p - storeshell = withTmp enck $ \tmp -> - ifM (spoolencrypted $ readBytes $ \b -> catchBoolIO $ L.writeFile tmp b >> return True) - ( Ssh.rsyncHelper (Just p) - =<< Ssh.rsyncParamsRemote False r Upload enck tmp Nothing - , return False - ) - spoolencrypted a = Annex.Content.sendAnnex k noop $ \src -> - liftIO $ catchBoolIO $ - encrypt gpgopts cipher (feedFile src) a -retrieve :: Remote -> Remote.Rsync.RsyncOpts -> (Cipher, Key) -> Key -> FilePath -> MeterUpdate -> Annex Bool -retrieve r rsyncopts (cipher, enck) k d p - | not $ Git.repoIsUrl (repo r) = guardUsable (repo r) False $ do - retrievewith $ L.readFile src - return True - | Git.repoIsSsh (repo r) = shellOrRsync r retrieveshell retrieversync +retrieve :: Remote -> Remote.Rsync.RsyncOpts -> Retriever +retrieve r rsyncopts + | not $ Git.repoIsUrl (repo r) = byteRetriever $ \k sink -> + guardUsable (repo r) False $ + sink =<< liftIO (L.readFile $ gCryptLocation r k) + | Git.repoIsSsh (repo r) = if isShell r + then fileRetriever $ \f k p -> + unlessM (Ssh.rsyncHelper (Just p) =<< Ssh.rsyncParamsRemote False r Download k f Nothing) $ + error "rsync failed" + else fileRetriever $ Remote.Rsync.retrieve rsyncopts | otherwise = unsupportedUrl where - src = gCryptLocation r enck - retrievewith a = metered (Just p) k $ \meterupdate -> liftIO $ - a >>= \b -> - decrypt cipher (feedBytes b) - (readBytes $ meteredWriteFile meterupdate d) - retrieversync = undefined -- Remote.Rsync.retrieveEncrypted rsyncopts (cipher, enck) k d p - retrieveshell = withTmp enck $ \tmp -> - ifM (Ssh.rsyncHelper (Just p) =<< Ssh.rsyncParamsRemote False r Download enck tmp Nothing) - ( liftIO $ catchBoolIO $ do - decrypt cipher (feedFile tmp) $ - readBytes $ L.writeFile d - return True - , return False - ) remove :: Remote -> Remote.Rsync.RsyncOpts -> Key -> Annex Bool remove r rsyncopts k diff --git a/Remote/Rsync.hs b/Remote/Rsync.hs index d0bacd5859..421c451bdd 100644 --- a/Remote/Rsync.hs +++ b/Remote/Rsync.hs @@ -9,6 +9,8 @@ module Remote.Rsync ( remote, + store, + retrieve, remove, checkPresent, withRsyncScratchDir, @@ -54,8 +56,8 @@ gen r u c gc = do let o = genRsyncOpts c gc transport url let islocal = rsyncUrlIsPath $ rsyncUrl o return $ Just $ specialRemote' specialcfg c - (simplyPrepare $ store o) - (simplyPrepare $ retrieve o) + (simplyPrepare $ fileStorer $ store o) + (simplyPrepare $ fileRetriever $ retrieve o) Remote { uuid = u , cost = cst @@ -140,11 +142,44 @@ rsyncSetup mu _ c = do gitConfigSpecialRemote u c' "rsyncurl" url return (c', u) -store :: RsyncOpts -> Storer -store = fileStorer . rsyncSend +{- To send a single key is slightly tricky; need to build up a temporary + - directory structure to pass to rsync so it can create the hash + - directories. + - + - This would not be necessary if the hash directory structure used locally + - was always the same as that used on the rsync remote. So if that's ever + - unified, this gets nicer. + - (When we have the right hash directory structure, we can just + - pass --include=X --include=X/Y --include=X/Y/file --exclude=*) + -} +store :: RsyncOpts -> Key -> FilePath -> MeterUpdate -> Annex Bool +store o k src meterupdate = withRsyncScratchDir $ \tmp -> do + let dest = tmp Prelude.head (keyPaths k) + liftIO $ createDirectoryIfMissing True $ parentDir dest + ok <- liftIO $ if canrename + then do + rename src dest + return True + else createLinkOrCopy src dest + ps <- sendParams + if ok + then showResumable $ rsyncRemote Upload o (Just meterupdate) $ ps ++ + [ Param "--recursive" + , partialParams + -- tmp/ to send contents of tmp dir + , File $ addTrailingPathSeparator tmp + , Param $ rsyncUrl o + ] + else return False + where + {- If the key being sent is encrypted or chunked, the file + - containing its content is a temp file, and so can be + - renamed into place. Otherwise, the file is the annexed + - object file, and has to be copied or hard linked into place. -} + canrename = isEncKey k || isChunkKey k -retrieve :: RsyncOpts -> Retriever -retrieve o = fileRetriever $ \f k p -> +retrieve :: RsyncOpts -> FilePath -> Key -> MeterUpdate -> Annex () +retrieve o f k p = unlessM (rsyncRetrieve o k f (Just p)) $ error "rsync failed" @@ -249,39 +284,3 @@ rsyncRemote direction o callback params = do opts | direction == Download = rsyncDownloadOptions o | otherwise = rsyncUploadOptions o - -{- To send a single key is slightly tricky; need to build up a temporary - - directory structure to pass to rsync so it can create the hash - - directories. - - - - This would not be necessary if the hash directory structure used locally - - was always the same as that used on the rsync remote. So if that's ever - - unified, this gets nicer. - - (When we have the right hash directory structure, we can just - - pass --include=X --include=X/Y --include=X/Y/file --exclude=*) - -} -rsyncSend :: RsyncOpts -> Key -> FilePath -> MeterUpdate -> Annex Bool -rsyncSend o k src meterupdate = withRsyncScratchDir $ \tmp -> do - let dest = tmp Prelude.head (keyPaths k) - liftIO $ createDirectoryIfMissing True $ parentDir dest - ok <- liftIO $ if canrename - then do - rename src dest - return True - else createLinkOrCopy src dest - ps <- sendParams - if ok - then showResumable $ rsyncRemote Upload o (Just meterupdate) $ ps ++ - [ Param "--recursive" - , partialParams - -- tmp/ to send contents of tmp dir - , File $ addTrailingPathSeparator tmp - , Param $ rsyncUrl o - ] - else return False - where - {- If the key being sent is encrypted or chunked, the file - - containing its content is a temp file, and so can be - - renamed into place. Otherwise, the file is the annexed - - object file, and has to be copied or hard linked into place. -} - canrename = isEncKey k || isChunkKey k diff --git a/debian/changelog b/debian/changelog index 2e00728ad9..a6118966e7 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,7 +1,7 @@ git-annex (5.20140718) UNRELEASED; urgency=medium * New chunk= option to chunk files stored in special remotes. - Currently supported by: directory, S3, rsync, and all external + Supported by: directory, S3, gcrypt, rsync, and all external and hook special remotes. * Partially transferred files are automatically resumed when using chunked remotes! diff --git a/doc/special_remotes/gcrypt.mdwn b/doc/special_remotes/gcrypt.mdwn index 2e07741d3f..c9a22b01aa 100644 --- a/doc/special_remotes/gcrypt.mdwn +++ b/doc/special_remotes/gcrypt.mdwn @@ -13,7 +13,7 @@ These parameters can be passed to `git annex initremote` to configure gcrypt: * `encryption` - One of "none", "hybrid", "shared", or "pubkey". - See [[encryption]]. + Required. See [[encryption]]. * `keyid` - Specifies the gpg key to use for encryption of both the files git-annex stores in the repository, as well as to encrypt the git @@ -24,6 +24,8 @@ gcrypt: for gcrypt to use. This repository should be either empty, or an existing gcrypt repositry. +* `chunk` - Enables [[chunking]] when storing large files. + * `shellescape` - See [[rsync]] for the details of this option. ## notes From 1cd2273035e71b73cc47f10a7269876c0550d4c0 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 3 Aug 2014 20:14:20 -0400 Subject: [PATCH 16/44] finally properly fixed ssh zombie leak The leak was caused by the thread that sshd'd to send transferinfo not waiting on its ssh. Doh. --- Remote/Git.hs | 9 ++++++++- Remote/Helper/Ssh.hs | 9 +-------- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/Remote/Git.hs b/Remote/Git.hs index 4498ec907c..bf2228c497 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -390,6 +390,7 @@ copyFromRemote' r key file dest Just (cmd, params) <- Ssh.git_annex_shell (repo r) "transferinfo" [Param $ key2file key] fields v <- liftIO (newEmptySV :: IO (MSampleVar Integer)) + pidv <- liftIO $ newEmptyMVar tid <- liftIO $ forkIO $ void $ tryIO $ do bytes <- readSV v p <- createProcess $ @@ -397,6 +398,7 @@ copyFromRemote' r key file dest { std_in = CreatePipe , std_err = CreatePipe } + putMVar pidv (processHandle p) hClose $ stderrHandle p let h = stdinHandle p let send b = do @@ -406,7 +408,12 @@ copyFromRemote' r key file dest forever $ send =<< readSV v let feeder = writeSV v . fromBytesProcessed - bracketIO noop (const $ tryIO $ killThread tid) (const $ a feeder) + let cleanup = do + void $ tryIO $ killThread tid + tryNonAsync $ + maybe noop (void . waitForProcess) + =<< tryTakeMVar pidv + bracketIO noop (const cleanup) (const $ a feeder) copyFromRemoteCheap :: Remote -> Key -> FilePath -> Annex Bool #ifndef mingw32_HOST_OS diff --git a/Remote/Helper/Ssh.hs b/Remote/Helper/Ssh.hs index e0199dca3e..05a98865ff 100644 --- a/Remote/Helper/Ssh.hs +++ b/Remote/Helper/Ssh.hs @@ -102,20 +102,13 @@ dropKey r key = onRemote r (boolSystem, False) "dropkey" rsyncHelper :: Maybe MeterUpdate -> [CommandParam] -> Annex Bool rsyncHelper callback params = do showOutput -- make way for progress bar - ok <- ifM (liftIO $ (maybe rsync rsyncProgress callback) params) + ifM (liftIO $ (maybe rsync rsyncProgress callback) params) ( return True , do showLongNote "rsync failed -- run git annex again to resume file transfer" return False ) - {- For an unknown reason, this causes rsync to run a second - - ssh process, which it neglects to wait on. - - Reap the resulting zombie. -} - liftIO reapZombies - - return ok - {- Generates rsync parameters that ssh to the remote and asks it - to either receive or send the key's content. -} rsyncParamsRemote :: Bool -> Remote -> Direction -> Key -> FilePath -> AssociatedFile -> Annex [CommandParam] From 8601f8f57111b654ad5606aa47e42f280d2398f3 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 3 Aug 2014 20:19:04 -0400 Subject: [PATCH 17/44] when not using rsync (for local gcrypt repo), display own progress meter --- Remote/GCrypt.hs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Remote/GCrypt.hs b/Remote/GCrypt.hs index faf45b1d9d..b2df7d56a1 100644 --- a/Remote/GCrypt.hs +++ b/Remote/GCrypt.hs @@ -120,9 +120,11 @@ gen' r u c gc = do (simplyPrepare $ retrieve this rsyncopts) this where - specialcfg = (specialRemoteCfg c) - -- Rsync displays its own progress. - { displayProgress = False } + specialcfg + | Git.repoIsUrl r = (specialRemoteCfg c) + -- Rsync displays its own progress. + { displayProgress = False } + | otherwise = specialRemoteCfg c rsyncTransportToObjects :: Git.Repo -> Annex ([CommandParam], String) rsyncTransportToObjects r = do From d12becfddeca7468a0a362a1993b523ec73e1769 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 3 Aug 2014 20:21:46 -0400 Subject: [PATCH 18/44] fix removal from local gcrypt repo that had files stored using rsync When files are stored using rsync, they have their write bit removed; so does the directory they're put in. The local repo code did not turn these bits back on, so failed to remove. --- Remote/GCrypt.hs | 8 +++++++- debian/changelog | 1 - 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/Remote/GCrypt.hs b/Remote/GCrypt.hs index b2df7d56a1..db01443634 100644 --- a/Remote/GCrypt.hs +++ b/Remote/GCrypt.hs @@ -45,6 +45,7 @@ import Utility.Tmp import Logs.Remote import Logs.Transfer import Utility.Gpg +import Utility.FileMode remote :: RemoteType remote = RemoteType { @@ -325,7 +326,12 @@ retrieve r rsyncopts remove :: Remote -> Remote.Rsync.RsyncOpts -> Key -> Annex Bool remove r rsyncopts k | not $ Git.repoIsUrl (repo r) = guardUsable (repo r) False $ do - liftIO $ removeDirectoryRecursive $ parentDir $ gCryptLocation r k + let f = gCryptLocation r k + let d = parentDir f + liftIO $ do + allowWrite d + allowWrite f + removeDirectoryRecursive d return True | Git.repoIsSsh (repo r) = shellOrRsync r removeshell removersync | otherwise = unsupportedUrl diff --git a/debian/changelog b/debian/changelog index a6118966e7..8da5b87bb3 100644 --- a/debian/changelog +++ b/debian/changelog @@ -21,7 +21,6 @@ git-annex (5.20140718) UNRELEASED; urgency=medium * Dropping an object from a bup special remote now deletes the git branch for the object, although of course the object's content cannot be deleted due to the nature of bup. - * -- Joey Hess Mon, 21 Jul 2014 14:41:26 -0400 From d3778e631b795214c65968978863530ee2188cce Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 3 Aug 2014 20:24:13 -0400 Subject: [PATCH 19/44] remove write bit when storing to local gcrypt repo Same as is done by rsync, and for regular git repos. --- Remote/GCrypt.hs | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/Remote/GCrypt.hs b/Remote/GCrypt.hs index db01443634..28fe9964b6 100644 --- a/Remote/GCrypt.hs +++ b/Remote/GCrypt.hs @@ -300,9 +300,14 @@ store :: Remote -> Remote.Rsync.RsyncOpts -> Storer store r rsyncopts | not $ Git.repoIsUrl (repo r) = byteStorer $ \k b p -> guardUsable (repo r) False $ liftIO $ do - let dest = gCryptLocation r k - createDirectoryIfMissing True $ parentDir dest - meteredWriteFile p dest b + let f = gCryptLocation r k + let d = parentDir f + createDirectoryIfMissing True d + allowWrite d + void $ liftIO $ tryIO $ allowWrite f + meteredWriteFile p f b + preventWrite f + preventWrite d return True | Git.repoIsSsh (repo r) = if isShell r then fileStorer $ \k f p -> Ssh.rsyncHelper (Just p) From db54981a4c7d690cc658b9785e6f56288f506086 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 4 Aug 2014 08:24:06 -0400 Subject: [PATCH 20/44] fix warning --- Command/TestRemote.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Command/TestRemote.hs b/Command/TestRemote.hs index 789eb75b17..463c4d3595 100644 --- a/Command/TestRemote.hs +++ b/Command/TestRemote.hs @@ -169,7 +169,7 @@ chunkSizes base False = , base `div` 1000 , base ] -chunkSizes base True = +chunkSizes _ True = [ 0 ] From 6f4592966d60834f28769311e127c9b374e2e62d Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 4 Aug 2014 08:42:04 -0400 Subject: [PATCH 21/44] make testremote work with gcrypt repos This involved making Remote.Gcrypt.gen expect a Repo with a regular, non-gcrypt path. Since tht is what's stored as the Remote's gitrepo, testremote can then modify it and feed it back into gen. --- Remote/GCrypt.hs | 23 ++++++++++++++--------- Remote/Git.hs | 2 +- 2 files changed, 15 insertions(+), 10 deletions(-) diff --git a/Remote/GCrypt.hs b/Remote/GCrypt.hs index 28fe9964b6..02c31f38d0 100644 --- a/Remote/GCrypt.hs +++ b/Remote/GCrypt.hs @@ -7,7 +7,7 @@ module Remote.GCrypt ( remote, - gen, + chainGen, getGCryptUUID, coreGCryptId, setupRepo @@ -57,19 +57,24 @@ remote = RemoteType { setup = gCryptSetup } -gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) -gen gcryptr u c gc = do +chainGen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) +chainGen gcryptr u c gc = do g <- gitRepo -- get underlying git repo with real path, not gcrypt path r <- liftIO $ Git.GCrypt.encryptedRemote g gcryptr let r' = r { Git.remoteName = Git.remoteName gcryptr } + gen r' u c gc + +gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) +gen baser u c gc = do -- doublecheck that cache matches underlying repo's gcrypt-id -- (which might not be set), only for local repos - (mgcryptid, r'') <- getGCryptId True r' - case (mgcryptid, Git.GCrypt.remoteRepoId g (Git.remoteName gcryptr)) of + (mgcryptid, r) <- getGCryptId True baser + g <- gitRepo + case (mgcryptid, Git.GCrypt.remoteRepoId g (Git.remoteName baser)) of (Just gcryptid, Just cachedgcryptid) - | gcryptid /= cachedgcryptid -> resetup gcryptid r'' - _ -> gen' r'' u c gc + | gcryptid /= cachedgcryptid -> resetup gcryptid r + _ -> gen' r u c gc where -- A different drive may have been mounted, making a different -- gcrypt remote available. So need to set the cached @@ -79,10 +84,10 @@ gen gcryptr u c gc = do resetup gcryptid r = do let u' = genUUIDInNameSpace gCryptNameSpace gcryptid v <- M.lookup u' <$> readRemoteLog - case (Git.remoteName gcryptr, v) of + case (Git.remoteName baser, v) of (Just remotename, Just c') -> do setGcryptEncryption c' remotename - setConfig (remoteConfig gcryptr "uuid") (fromUUID u') + setConfig (remoteConfig baser "uuid") (fromUUID u') setConfig (ConfigKey $ Git.GCrypt.remoteConfigKey "gcrypt-id" remotename) gcryptid gen' r u' c' gc _ -> do diff --git a/Remote/Git.hs b/Remote/Git.hs index bf2228c497..c35f9f32af 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -127,7 +127,7 @@ configRead r = do gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) gen r u c gc - | Git.GCrypt.isEncrypted r = Remote.GCrypt.gen r u c gc + | Git.GCrypt.isEncrypted r = Remote.GCrypt.chainGen r u c gc | otherwise = go <$> remoteCost gc defcst where defcst = if repoCheap r then cheapRemoteCost else expensiveRemoteCost From 00c146816076d2d4dd15d51749fbdd3038452735 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 4 Aug 2014 09:00:57 -0400 Subject: [PATCH 22/44] gcrypt: fix removal of key that does not exist Generalized code from Remote.Directory and reused it. Test suite now passes for local gcrypt repos. --- Remote/Directory.hs | 24 ++++++++++++++++-------- Remote/GCrypt.hs | 11 +++-------- 2 files changed, 19 insertions(+), 16 deletions(-) diff --git a/Remote/Directory.hs b/Remote/Directory.hs index db141e01ad..3b54ad2000 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -7,7 +7,7 @@ {-# LANGUAGE CPP #-} -module Remote.Directory (remote) where +module Remote.Directory (remote, removeDirGeneric) where import qualified Data.ByteString.Lazy as L import qualified Data.Map as M @@ -152,7 +152,20 @@ retrieveCheap _ _ _ _ = return False #endif remove :: FilePath -> Key -> Annex Bool -remove d k = liftIO $ do +remove d k = liftIO $ removeDirGeneric d (storeDir d k) + +{- Removes the directory, which must be located under the topdir. + - + - Succeeds even on directories and contents that do not have write + - permission. + - + - If the directory does not exist, succeeds as long as the topdir does + - exist. If the topdir does not exist, fails, because in this case the + - remote is not currently accessible and probably still has the content + - we were supposed to remove from it. + -} +removeDirGeneric :: FilePath -> FilePath -> IO Bool +removeDirGeneric topdir dir = do void $ tryIO $ allowWrite dir #ifdef mingw32_HOST_OS {- Windows needs the files inside the directory to be writable @@ -162,14 +175,9 @@ remove d k = liftIO $ do ok <- catchBoolIO $ do removeDirectoryRecursive dir return True - {- Removing the subdirectory will fail if it doesn't exist. - - But, we want to succeed in that case, as long as the directory - - remote's top-level directory does exist. -} if ok then return ok - else doesDirectoryExist d <&&> (not <$> doesDirectoryExist dir) - where - dir = storeDir d k + else doesDirectoryExist topdir <&&> (not <$> doesDirectoryExist dir) checkPresent :: FilePath -> ChunkConfig -> Key -> Annex (Either String Bool) checkPresent d (LegacyChunks _) k = Legacy.checkPresent d locations k diff --git a/Remote/GCrypt.hs b/Remote/GCrypt.hs index 02c31f38d0..a0292a954d 100644 --- a/Remote/GCrypt.hs +++ b/Remote/GCrypt.hs @@ -40,6 +40,7 @@ import Utility.Metered import Annex.UUID import Annex.Ssh import qualified Remote.Rsync +import qualified Remote.Directory import Utility.Rsync import Utility.Tmp import Logs.Remote @@ -335,14 +336,8 @@ retrieve r rsyncopts remove :: Remote -> Remote.Rsync.RsyncOpts -> Key -> Annex Bool remove r rsyncopts k - | not $ Git.repoIsUrl (repo r) = guardUsable (repo r) False $ do - let f = gCryptLocation r k - let d = parentDir f - liftIO $ do - allowWrite d - allowWrite f - removeDirectoryRecursive d - return True + | not $ Git.repoIsUrl (repo r) = guardUsable (repo r) False $ + liftIO $ Remote.Directory.removeDirGeneric (Git.repoLocation (repo r)) (parentDir (gCryptLocation r k)) | Git.repoIsSsh (repo r) = shellOrRsync r removeshell removersync | otherwise = unsupportedUrl where From 6cecffea8924229c15589b25896de916e2428890 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 4 Aug 2014 09:16:47 -0400 Subject: [PATCH 23/44] fix "storeKey when already present" test for git-annex-shell transfers Now git-annex-shell recvkey, when the key is already present, allows another copy to be rsynced up, and just throws it away. This same behavior could have already happened before, when eg, two repos tried to upload the same object at the same time. So this makes the test suite pass, and should not add any bad behavior, other than slightly more work being done in a rather edge case. This relies on moveAnnex's behavior of keeping the current version of an object. --- Command/RecvKey.hs | 21 +++++++++------------ 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/Command/RecvKey.hs b/Command/RecvKey.hs index 1794596c57..d5971d6cf5 100644 --- a/Command/RecvKey.hs +++ b/Command/RecvKey.hs @@ -28,18 +28,15 @@ seek :: CommandSeek seek = withKeys start start :: Key -> CommandStart -start key = ifM (inAnnex key) - ( error "key is already present in annex" - , fieldTransfer Download key $ \_p -> - ifM (getViaTmp key go) - ( do - -- forcibly quit after receiving one key, - -- and shutdown cleanly - _ <- shutdown True - return True - , return False - ) - ) +start key = fieldTransfer Download key $ \_p -> + ifM (getViaTmp key go) + ( do + -- forcibly quit after receiving one key, + -- and shutdown cleanly + _ <- shutdown True + return True + , return False + ) where go tmp = do opts <- filterRsyncSafeOptions . maybe [] words From 22c7a7a41ac1a20425c7b11240bc5a346654a27b Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 4 Aug 2014 09:35:57 -0400 Subject: [PATCH 24/44] make local gcrypt storeKey be atomic Reuse Remote.Directory's code. --- Remote/Directory.hs | 34 ++++++++++++++++++++++------------ Remote/GCrypt.hs | 14 ++++++-------- 2 files changed, 28 insertions(+), 20 deletions(-) diff --git a/Remote/Directory.hs b/Remote/Directory.hs index 3b54ad2000..9b3c156959 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -7,7 +7,11 @@ {-# LANGUAGE CPP #-} -module Remote.Directory (remote, removeDirGeneric) where +module Remote.Directory ( + remote, + finalizeStoreGeneric, + removeDirGeneric, +) where import qualified Data.ByteString.Lazy as L import qualified Data.Map as M @@ -114,24 +118,30 @@ store :: FilePath -> ChunkConfig -> Key -> L.ByteString -> MeterUpdate -> Annex store d chunkconfig k b p = liftIO $ do void $ tryIO $ createDirectoryIfMissing True tmpdir case chunkconfig of - LegacyChunks chunksize -> Legacy.store chunksize finalizer k b p tmpdir destdir + LegacyChunks chunksize -> Legacy.store chunksize finalizeStoreGeneric k b p tmpdir destdir _ -> do let tmpf = tmpdir keyFile k meteredWriteFile p tmpf b - finalizer tmpdir destdir + finalizeStoreGeneric tmpdir destdir return True where tmpdir = tmpDir d k destdir = storeDir d k - finalizer tmp dest = do - void $ tryIO $ allowWrite dest -- may already exist - void $ tryIO $ removeDirectoryRecursive dest -- or not exist - createDirectoryIfMissing True (parentDir dest) - renameDirectory tmp dest - -- may fail on some filesystems - void $ tryIO $ do - mapM_ preventWrite =<< dirContents dest - preventWrite dest + +{- Passed a temp directory that contains the files that should be placed + - in the dest directory, moves it into place. Anything already existing + - in the dest directory will be deleted. File permissions will be locked + - down. -} +finalizeStoreGeneric :: FilePath -> FilePath -> IO () +finalizeStoreGeneric tmp dest = do + void $ tryIO $ allowWrite dest -- may already exist + void $ tryIO $ removeDirectoryRecursive dest -- or not exist + createDirectoryIfMissing True (parentDir dest) + renameDirectory tmp dest + -- may fail on some filesystems + void $ tryIO $ do + mapM_ preventWrite =<< dirContents dest + preventWrite dest retrieve :: FilePath -> ChunkConfig -> Preparer Retriever retrieve d (LegacyChunks _) = Legacy.retrieve locations d diff --git a/Remote/GCrypt.hs b/Remote/GCrypt.hs index a0292a954d..d969e02f8a 100644 --- a/Remote/GCrypt.hs +++ b/Remote/GCrypt.hs @@ -306,14 +306,12 @@ store :: Remote -> Remote.Rsync.RsyncOpts -> Storer store r rsyncopts | not $ Git.repoIsUrl (repo r) = byteStorer $ \k b p -> guardUsable (repo r) False $ liftIO $ do - let f = gCryptLocation r k - let d = parentDir f - createDirectoryIfMissing True d - allowWrite d - void $ liftIO $ tryIO $ allowWrite f - meteredWriteFile p f b - preventWrite f - preventWrite d + let tmpdir = Git.repoLocation (repo r) "tmp" keyFile k + void $ tryIO $ createDirectoryIfMissing True tmpdir + let tmpf = tmpdir keyFile k + meteredWriteFile p tmpf b + let destdir = parentDir $ gCryptLocation r k + Remote.Directory.finalizeStoreGeneric tmpdir destdir return True | Git.repoIsSsh (repo r) = if isShell r then fileStorer $ \k f p -> Ssh.rsyncHelper (Just p) From 781833b16fd005255e47d794955d7ed996c1c800 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 6 Aug 2014 13:45:05 -0400 Subject: [PATCH 25/44] update for change in bup remote removal --- Test.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Test.hs b/Test.hs index 3ae5e323b9..5032038ad9 100644 --- a/Test.hs +++ b/Test.hs @@ -1251,7 +1251,7 @@ test_bup_remote testenv = intmpclonerepo testenv $ when Build.SysConfig.bup $ do annexed_notpresent annexedfile git_annex testenv "copy" [annexedfile, "--from", "foo"] @? "copy --from bup remote failed" annexed_present annexedfile - not <$> git_annex testenv "move" [annexedfile, "--from", "foo"] @? "move --from bup remote failed to fail" + git_annex testenv "move" [annexedfile, "--from", "foo"] @? "move --from bup remote failed" annexed_present annexedfile -- gpg is not a build dependency, so only test when it's available From b4cf22a388915b4f44386ecd062bf6e56003fac3 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 6 Aug 2014 13:45:19 -0400 Subject: [PATCH 26/44] pushed checkPresent exception handling out of Remote implementations I tend to prefer moving toward explicit exception handling, not away from it, but in this case, I think there are good reasons to let checkPresent throw exceptions: 1. They can all be caught in one place (Remote.hasKey), and we know every possible exception is caught there now, which we didn't before. 2. It simplified the code of the Remotes. I think it makes sense for Remotes to be able to be implemented without needing to worry about catching exceptions inside them. (Mostly.) 3. Types.StoreRetrieve.Preparer can only work on things that return a Bool, which all the other relevant remote methods already did. I do not see a good way to generalize that type; my previous attempts failed miserably. --- Remote.hs | 8 ++++++ Remote/Bup.hs | 15 +++++----- Remote/Ddar.hs | 15 +++++----- Remote/Directory.hs | 21 ++++++-------- Remote/Directory/LegacyChunked.hs | 9 +++--- Remote/External.hs | 8 +++--- Remote/GCrypt.hs | 14 ++++----- Remote/Git.hs | 25 ++++++---------- Remote/Glacier.hs | 28 ++++++++---------- Remote/Helper/Chunked.hs | 47 +++++++++++++++++-------------- Remote/Helper/Encryptable.hs | 6 ++-- Remote/Helper/Hooks.hs | 2 +- Remote/Helper/Messages.hs | 4 +-- Remote/Helper/Special.hs | 8 +++--- Remote/Helper/Ssh.hs | 6 ++-- Remote/Hook.hs | 10 +++---- Remote/Rsync.hs | 14 ++++----- Remote/S3.hs | 16 +++++------ Remote/Tahoe.hs | 20 +++++++------ Remote/Web.hs | 10 +++---- Remote/WebDAV.hs | 10 +++---- Types/Remote.hs | 10 +++---- Types/StoreRetrieve.hs | 8 ++++++ doc/design/assistant/chunks.mdwn | 16 +++++------ 24 files changed, 167 insertions(+), 163 deletions(-) diff --git a/Remote.hs b/Remote.hs index 29097f77d3..5ee75823f5 100644 --- a/Remote.hs +++ b/Remote.hs @@ -56,6 +56,7 @@ import Data.Ord import Common.Annex import Types.Remote import qualified Annex +import Annex.Exception import Annex.UUID import Logs.UUID import Logs.Trust @@ -312,3 +313,10 @@ isXMPPRemote :: Remote -> Bool isXMPPRemote remote = Git.repoIsUrl r && "xmpp::" `isPrefixOf` Git.repoLocation r where r = repo remote + +hasKey :: Remote -> Key -> Annex (Either String Bool) +hasKey r k = either (Left . show) Right + <$> tryNonAsyncAnnex (checkPresent r k) + +hasKeyCheap :: Remote -> Bool +hasKeyCheap = checkPresentCheap diff --git a/Remote/Bup.hs b/Remote/Bup.hs index 6a04ad5f7f..2e68f30ef7 100644 --- a/Remote/Bup.hs +++ b/Remote/Bup.hs @@ -58,8 +58,8 @@ gen r u c gc = do , retrieveKeyFile = retreiveKeyFileDummy , retrieveKeyFileCheap = retrieveCheap buprepo , removeKey = remove buprepo - , hasKey = checkPresent r bupr' - , hasKeyCheap = bupLocal buprepo + , checkPresent = checkKey r bupr' + , checkPresentCheap = bupLocal buprepo , whereisKey = Nothing , remoteFsck = Nothing , repairRepo = Nothing @@ -163,14 +163,13 @@ remove buprepo k = do - in a bup repository. One way it to check if the git repository has - a branch matching the name (as created by bup split -n). -} -checkPresent :: Git.Repo -> Git.Repo -> Key -> Annex (Either String Bool) -checkPresent r bupr k +checkKey :: Git.Repo -> Git.Repo -> Key -> Annex Bool +checkKey r bupr k | Git.repoIsUrl bupr = do showChecking r - ok <- onBupRemote bupr boolSystem "git" params - return $ Right ok - | otherwise = liftIO $ catchMsgIO $ - boolSystem "git" $ Git.Command.gitCommandLine params bupr + onBupRemote bupr boolSystem "git" params + | otherwise = liftIO $ boolSystem "git" $ + Git.Command.gitCommandLine params bupr where params = [ Params "show-ref --quiet --verify" diff --git a/Remote/Ddar.hs b/Remote/Ddar.hs index b4c7ac1e62..1227b52755 100644 --- a/Remote/Ddar.hs +++ b/Remote/Ddar.hs @@ -54,8 +54,8 @@ gen r u c gc = do , retrieveKeyFile = retreiveKeyFileDummy , retrieveKeyFileCheap = retrieveCheap , removeKey = remove ddarrepo - , hasKey = checkPresent ddarrepo - , hasKeyCheap = ddarLocal ddarrepo + , checkPresent = checkKey ddarrepo + , checkPresentCheap = ddarLocal ddarrepo , whereisKey = Nothing , remoteFsck = Nothing , repairRepo = Nothing @@ -181,13 +181,14 @@ inDdarManifest ddarrepo k = do where k' = key2file k -checkPresent :: DdarRepo -> Key -> Annex (Either String Bool) -checkPresent ddarrepo key = do +checkKey :: DdarRepo -> Key -> Annex Bool +checkKey ddarrepo key = do directoryExists <- ddarDirectoryExists ddarrepo case directoryExists of - Left e -> return $ Left e - Right True -> inDdarManifest ddarrepo key - Right False -> return $ Right False + Left e -> error e + Right True -> either error return + =<< inDdarManifest ddarrepo key + Right False -> return False ddarLocal :: DdarRepo -> Bool ddarLocal = notElem ':' diff --git a/Remote/Directory.hs b/Remote/Directory.hs index 9b3c156959..0a2532aa5b 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -52,8 +52,8 @@ gen r u c gc = do retrieveKeyFile = retreiveKeyFileDummy, retrieveKeyFileCheap = retrieveCheap dir chunkconfig, removeKey = remove dir, - hasKey = checkPresent dir chunkconfig, - hasKeyCheap = True, + checkPresent = checkKey dir chunkconfig, + checkPresentCheap = True, whereisKey = Nothing, remoteFsck = Nothing, repairRepo = Nothing, @@ -189,13 +189,10 @@ removeDirGeneric topdir dir = do then return ok else doesDirectoryExist topdir <&&> (not <$> doesDirectoryExist dir) -checkPresent :: FilePath -> ChunkConfig -> Key -> Annex (Either String Bool) -checkPresent d (LegacyChunks _) k = Legacy.checkPresent d locations k -checkPresent d _ k = liftIO $ do - v <- catchMsgIO $ anyM doesFileExist (locations d k) - case v of - Right False -> ifM (doesDirectoryExist d) - ( return v - , return $ Left $ "directory " ++ d ++ " is not accessible" - ) - _ -> return v +checkKey :: FilePath -> ChunkConfig -> Key -> Annex Bool +checkKey d (LegacyChunks _) k = Legacy.checkKey d locations k +checkKey d _ k = liftIO $ + ifM (anyM doesFileExist (locations d k)) + ( return True + , error $ "directory " ++ d ++ " is not accessible" + ) diff --git a/Remote/Directory/LegacyChunked.hs b/Remote/Directory/LegacyChunked.hs index 1be885db21..b2248c5f66 100644 --- a/Remote/Directory/LegacyChunked.hs +++ b/Remote/Directory/LegacyChunked.hs @@ -103,8 +103,7 @@ retrieve locations d basek a = do liftIO $ nukeFile tmp sink b -checkPresent :: FilePath -> (FilePath -> Key -> [FilePath]) -> Key -> Annex (Either String Bool) -checkPresent d locations k = liftIO $ catchMsgIO $ - withStoredFiles d locations k $ - -- withStoredFiles checked that it exists - const $ return True +checkKey :: FilePath -> (FilePath -> Key -> [FilePath]) -> Key -> Annex Bool +checkKey d locations k = liftIO $ withStoredFiles d locations k $ + -- withStoredFiles checked that it exists + const $ return True diff --git a/Remote/External.hs b/Remote/External.hs index c00093402f..ffae94ec99 100644 --- a/Remote/External.hs +++ b/Remote/External.hs @@ -53,8 +53,8 @@ gen r u c gc = do retrieveKeyFile = retreiveKeyFileDummy, retrieveKeyFileCheap = \_ _ -> return False, removeKey = remove external, - hasKey = checkPresent external, - hasKeyCheap = False, + checkPresent = checkKey external, + checkPresentCheap = False, whereisKey = Nothing, remoteFsck = Nothing, repairRepo = Nothing, @@ -121,8 +121,8 @@ remove external k = safely $ return False _ -> Nothing -checkPresent :: External -> Key -> Annex (Either String Bool) -checkPresent external k = either (Left . show) id <$> tryAnnex go +checkKey :: External -> Key -> Annex Bool +checkKey external k = either error id <$> go where go = handleRequest external (CHECKPRESENT k) Nothing $ \resp -> case resp of diff --git a/Remote/GCrypt.hs b/Remote/GCrypt.hs index d969e02f8a..f971ff754f 100644 --- a/Remote/GCrypt.hs +++ b/Remote/GCrypt.hs @@ -46,7 +46,6 @@ import Utility.Tmp import Logs.Remote import Logs.Transfer import Utility.Gpg -import Utility.FileMode remote :: RemoteType remote = RemoteType { @@ -109,8 +108,8 @@ gen' r u c gc = do , retrieveKeyFile = retreiveKeyFileDummy , retrieveKeyFileCheap = \_ _ -> return False , removeKey = remove this rsyncopts - , hasKey = checkPresent this rsyncopts - , hasKeyCheap = repoCheap r + , checkPresent = checkKey this rsyncopts + , checkPresentCheap = repoCheap r , whereisKey = Nothing , remoteFsck = Nothing , repairRepo = Nothing @@ -342,16 +341,15 @@ remove r rsyncopts k removersync = Remote.Rsync.remove rsyncopts k removeshell = Ssh.dropKey (repo r) k -checkPresent :: Remote -> Remote.Rsync.RsyncOpts -> Key -> Annex (Either String Bool) -checkPresent r rsyncopts k +checkKey :: Remote -> Remote.Rsync.RsyncOpts -> Key -> Annex Bool +checkKey r rsyncopts k | not $ Git.repoIsUrl (repo r) = guardUsable (repo r) (cantCheck $ repo r) $ - liftIO $ catchDefaultIO (cantCheck $ repo r) $ - Right <$> doesFileExist (gCryptLocation r k) + liftIO $ doesFileExist (gCryptLocation r k) | Git.repoIsSsh (repo r) = shellOrRsync r checkshell checkrsync | otherwise = unsupportedUrl where - checkrsync = Remote.Rsync.checkPresent (repo r) rsyncopts k + checkrsync = Remote.Rsync.checkKey (repo r) rsyncopts k checkshell = Ssh.inAnnex (repo r) k {- Annexed objects are hashed using lower-case directories for max diff --git a/Remote/Git.hs b/Remote/Git.hs index c35f9f32af..da5ca4c4a5 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -141,8 +141,8 @@ gen r u c gc , retrieveKeyFile = copyFromRemote new , retrieveKeyFileCheap = copyFromRemoteCheap new , removeKey = dropKey new - , hasKey = inAnnex new - , hasKeyCheap = repoCheap r + , checkPresent = inAnnex new + , checkPresentCheap = repoCheap r , whereisKey = Nothing , remoteFsck = if Git.repoIsUrl r then Nothing @@ -284,11 +284,8 @@ tryGitConfigRead r void $ tryAnnex $ ensureInitialized Annex.getState Annex.repo -{- Checks if a given remote has the content for a key inAnnex. - - If the remote cannot be accessed, or if it cannot determine - - whether it has the content, returns a Left error message. - -} -inAnnex :: Remote -> Key -> Annex (Either String Bool) +{- Checks if a given remote has the content for a key in its annex. -} +inAnnex :: Remote -> Key -> Annex Bool inAnnex rmt key | Git.repoIsHttp r = checkhttp | Git.repoIsUrl r = checkremote @@ -298,17 +295,13 @@ inAnnex rmt key checkhttp = do showChecking r ifM (Url.withUrlOptions $ \uo -> anyM (\u -> Url.checkBoth u (keySize key) uo) (keyUrls rmt key)) - ( return $ Right True - , return $ Left "not found" + ( return True + , error "not found" ) checkremote = Ssh.inAnnex r key - checklocal = guardUsable r (cantCheck r) $ dispatch <$> check - where - check = either (Left . show) Right - <$> tryAnnex (onLocal rmt $ Annex.Content.inAnnexSafe key) - dispatch (Left e) = Left e - dispatch (Right (Just b)) = Right b - dispatch (Right Nothing) = cantCheck r + checklocal = guardUsable r (cantCheck r) $ + fromMaybe (cantCheck r) + <$> onLocal rmt (Annex.Content.inAnnexSafe key) keyUrls :: Remote -> Key -> [String] keyUrls r key = map tourl locs' diff --git a/Remote/Glacier.hs b/Remote/Glacier.hs index c5bfefa641..2ade37011e 100644 --- a/Remote/Glacier.hs +++ b/Remote/Glacier.hs @@ -52,8 +52,8 @@ gen r u c gc = new <$> remoteCost gc veryExpensiveRemoteCost retrieveKeyFile = retreiveKeyFileDummy, retrieveKeyFileCheap = retrieveCheap this, removeKey = remove this, - hasKey = checkPresent this, - hasKeyCheap = False, + checkPresent = checkKey this, + checkPresentCheap = False, whereisKey = Nothing, remoteFsck = Nothing, repairRepo = Nothing, @@ -164,25 +164,21 @@ remove r k = glacierAction r , Param $ archive r k ] -checkPresent :: Remote -> Key -> Annex (Either String Bool) -checkPresent r k = do +checkKey :: Remote -> Key -> Annex Bool +checkKey r k = do showAction $ "checking " ++ name r go =<< glacierEnv (config r) (uuid r) where - go Nothing = return $ Left "cannot check glacier" + go Nothing = error "cannot check glacier" go (Just e) = do {- glacier checkpresent outputs the archive name to stdout if - it's present. -} - v <- liftIO $ catchMsgIO $ - readProcessEnv "glacier" (toCommand params) (Just e) - case v of - Right s -> do - let probablypresent = key2file k `elem` lines s - if probablypresent - then ifM (Annex.getFlag "trustglacier") - ( return $ Right True, untrusted ) - else return $ Right False - Left err -> return $ Left err + s <- liftIO $ readProcessEnv "glacier" (toCommand params) (Just e) + let probablypresent = key2file k `elem` lines s + if probablypresent + then ifM (Annex.getFlag "trustglacier") + ( return True, error untrusted ) + else return False params = glacierParams (config r) [ Param "archive" @@ -192,7 +188,7 @@ checkPresent r k = do , Param $ archive r k ] - untrusted = return $ Left $ unlines + untrusted = unlines [ "Glacier's inventory says it has a copy." , "However, the inventory could be out of date, if it was recently removed." , "(Use --trust-glacier if you're sure it's still in Glacier.)" diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs index 7ad790cb16..953c533b66 100644 --- a/Remote/Helper/Chunked.hs +++ b/Remote/Helper/Chunked.hs @@ -12,7 +12,7 @@ module Remote.Helper.Chunked ( storeChunks, removeChunks, retrieveChunks, - hasKeyChunks, + checkPresentChunks, ) where import Common.Annex @@ -94,8 +94,8 @@ storeChunks -> Key -> FilePath -> MeterUpdate - -> (Key -> ContentSource -> MeterUpdate -> Annex Bool) - -> (Key -> Annex (Either String Bool)) + -> Storer + -> CheckPresent -> Annex Bool storeChunks u chunkconfig k f p storer checker = case chunkconfig of @@ -158,7 +158,7 @@ storeChunks u chunkconfig k f p storer checker = seekResume :: Handle -> ChunkKeyStream - -> (Key -> Annex (Either String Bool)) + -> CheckPresent -> Annex (ChunkKeyStream, BytesProcessed) seekResume h chunkkeys checker = do sz <- liftIO (hFileSize h) @@ -172,7 +172,7 @@ seekResume h chunkkeys checker = do liftIO $ hSeek h AbsoluteSeek sz return (cks, toBytesProcessed sz) | otherwise = do - v <- checker k + v <- tryNonAsyncAnnex (checker k) case v of Right True -> check pos' cks' sz @@ -331,43 +331,48 @@ setupResume ls currsize = map dropunneeded ls {- Checks if a key is present in a remote. This requires any one - of the lists of options returned by chunkKeys to all check out - as being present using the checker action. + - + - Throws an exception if the remote is not accessible. -} -hasKeyChunks - :: (Key -> Annex (Either String Bool)) +checkPresentChunks + :: CheckPresent -> UUID -> ChunkConfig -> EncKey -> Key - -> Annex (Either String Bool) -hasKeyChunks checker u chunkconfig encryptor basek - | noChunks chunkconfig = + -> Annex Bool +checkPresentChunks checker u chunkconfig encryptor basek + | noChunks chunkconfig = do -- Optimisation: Try the unchunked key first, to avoid -- looking in the git-annex branch for chunk counts -- that are likely not there. - ifM ((Right True ==) <$> checker (encryptor basek)) - ( return (Right True) - , checklists Nothing =<< chunkKeysOnly u basek - ) + v <- check basek + case v of + Right True -> return True + _ -> checklists Nothing =<< chunkKeysOnly u basek | otherwise = checklists Nothing =<< chunkKeys u chunkconfig basek where - checklists Nothing [] = return (Right False) - checklists (Just deferrederror) [] = return (Left deferrederror) + checklists Nothing [] = return False + checklists (Just deferrederror) [] = error deferrederror checklists d (l:ls) | not (null l) = do v <- checkchunks l case v of Left e -> checklists (Just e) ls - Right True -> return (Right True) + Right True -> return True Right False -> checklists Nothing ls | otherwise = checklists d ls checkchunks :: [Key] -> Annex (Either String Bool) checkchunks [] = return (Right True) checkchunks (k:ks) = do - v <- checker (encryptor k) - if v == Right True - then checkchunks ks - else return v + v <- check k + case v of + Right True -> checkchunks ks + Right False -> return $ Right False + Left e -> return $ Left $ show e + + check = tryNonAsyncAnnex . checker . encryptor {- A key can be stored in a remote unchunked, or as a list of chunked keys. - This can be the case whether or not the remote is currently configured diff --git a/Remote/Helper/Encryptable.hs b/Remote/Helper/Encryptable.hs index 65a3ba284d..c364a69e76 100644 --- a/Remote/Helper/Encryptable.hs +++ b/Remote/Helper/Encryptable.hs @@ -91,9 +91,9 @@ encryptableRemote c storeKeyEncrypted retrieveKeyFileEncrypted r = r , removeKey = \k -> cip k >>= maybe (removeKey r k) (\(_, enckey) -> removeKey r enckey) - , hasKey = \k -> cip k >>= maybe - (hasKey r k) - (\(_, enckey) -> hasKey r enckey) + , checkPresent = \k -> cip k >>= maybe + (checkPresent r k) + (\(_, enckey) -> checkPresent r enckey) , cost = maybe (cost r) (const $ cost r + encryptedRemoteCostAdj) diff --git a/Remote/Helper/Hooks.hs b/Remote/Helper/Hooks.hs index c3ff970c62..907400bd1c 100644 --- a/Remote/Helper/Hooks.hs +++ b/Remote/Helper/Hooks.hs @@ -39,7 +39,7 @@ addHooks' r starthook stophook = r' , retrieveKeyFile = \k f d p -> wrapper $ retrieveKeyFile r k f d p , retrieveKeyFileCheap = \k f -> wrapper $ retrieveKeyFileCheap r k f , removeKey = wrapper . removeKey r - , hasKey = wrapper . hasKey r + , checkPresent = wrapper . checkPresent r } where wrapper = runHooks r' starthook stophook diff --git a/Remote/Helper/Messages.hs b/Remote/Helper/Messages.hs index c4b1966dc8..3088a9ab2b 100644 --- a/Remote/Helper/Messages.hs +++ b/Remote/Helper/Messages.hs @@ -13,5 +13,5 @@ import qualified Git showChecking :: Git.Repo -> Annex () showChecking r = showAction $ "checking " ++ Git.repoDescribe r -cantCheck :: Git.Repo -> Either String Bool -cantCheck r = Left $ "unable to check " ++ Git.repoDescribe r +cantCheck :: Git.Repo -> a +cantCheck r = error $ "unable to check " ++ Git.repoDescribe r diff --git a/Remote/Helper/Special.hs b/Remote/Helper/Special.hs index 2bcb7d530d..3c19f25eb9 100644 --- a/Remote/Helper/Special.hs +++ b/Remote/Helper/Special.hs @@ -148,7 +148,7 @@ specialRemote' cfg c preparestorer prepareretriever baser = encr (retrieveKeyFileCheap baser k d) (\_ -> return False) , removeKey = \k -> cip >>= removeKeyGen k - , hasKey = \k -> cip >>= hasKeyGen k + , checkPresent = \k -> cip >>= checkPresentGen k , cost = maybe (cost baser) (const $ cost baser + encryptedRemoteCostAdj) @@ -167,7 +167,7 @@ specialRemote' cfg c preparestorer prepareretriever baser = encr displayprogress p k $ \p' -> storeChunks (uuid baser) chunkconfig k src p' (storechunk enc storer) - (hasKey baser) + (checkPresent baser) go Nothing = return False rollback = void $ removeKey encr k @@ -193,10 +193,10 @@ specialRemote' cfg c preparestorer prepareretriever baser = encr enck = maybe id snd enc remover = removeKey baser - hasKeyGen k enc = hasKeyChunks checker (uuid baser) chunkconfig enck k + checkPresentGen k enc = checkPresentChunks checker (uuid baser) chunkconfig enck k where enck = maybe id snd enc - checker = hasKey baser + checker = checkPresent baser chunkconfig = chunkConfig cfg diff --git a/Remote/Helper/Ssh.hs b/Remote/Helper/Ssh.hs index 05a98865ff..42d77ea592 100644 --- a/Remote/Helper/Ssh.hs +++ b/Remote/Helper/Ssh.hs @@ -81,14 +81,14 @@ onRemote r (with, errorval) command params fields = do Nothing -> return errorval {- Checks if a remote contains a key. -} -inAnnex :: Git.Repo -> Key -> Annex (Either String Bool) +inAnnex :: Git.Repo -> Key -> Annex Bool inAnnex r k = do showChecking r onRemote r (check, cantCheck r) "inannex" [Param $ key2file k] [] where check c p = dispatch <$> safeSystem c p - dispatch ExitSuccess = Right True - dispatch (ExitFailure 1) = Right False + dispatch ExitSuccess = True + dispatch (ExitFailure 1) = False dispatch _ = cantCheck r {- Removes a key from a remote. -} diff --git a/Remote/Hook.hs b/Remote/Hook.hs index efbd9f8ba4..037f71ced4 100644 --- a/Remote/Hook.hs +++ b/Remote/Hook.hs @@ -45,8 +45,8 @@ gen r u c gc = do retrieveKeyFile = retreiveKeyFileDummy, retrieveKeyFileCheap = retrieveCheap hooktype, removeKey = remove hooktype, - hasKey = checkPresent r hooktype, - hasKeyCheap = False, + checkPresent = checkKey r hooktype, + checkPresentCheap = False, whereisKey = Nothing, remoteFsck = Nothing, repairRepo = Nothing, @@ -128,11 +128,11 @@ retrieveCheap _ _ _ = return False remove :: HookName -> Key -> Annex Bool remove h k = runHook h "remove" k Nothing $ return True -checkPresent :: Git.Repo -> HookName -> Key -> Annex (Either String Bool) -checkPresent r h k = do +checkKey :: Git.Repo -> HookName -> Key -> Annex Bool +checkKey r h k = do showAction $ "checking " ++ Git.repoDescribe r v <- lookupHook h action - liftIO $ catchMsgIO $ check v + liftIO $ check v where action = "checkpresent" findkey s = key2file k `elem` lines s diff --git a/Remote/Rsync.hs b/Remote/Rsync.hs index 421c451bdd..91070fe846 100644 --- a/Remote/Rsync.hs +++ b/Remote/Rsync.hs @@ -12,7 +12,7 @@ module Remote.Rsync ( store, retrieve, remove, - checkPresent, + checkKey, withRsyncScratchDir, genRsyncOpts, RsyncOpts @@ -66,8 +66,8 @@ gen r u c gc = do , retrieveKeyFile = retreiveKeyFileDummy , retrieveKeyFileCheap = retrieveCheap o , removeKey = remove o - , hasKey = checkPresent r o - , hasKeyCheap = False + , checkPresent = checkKey r o + , checkPresentCheap = False , whereisKey = Nothing , remoteFsck = Nothing , repairRepo = Nothing @@ -214,14 +214,12 @@ remove o k = do , dir keyFile k "***" ] -checkPresent :: Git.Repo -> RsyncOpts -> Key -> Annex (Either String Bool) -checkPresent r o k = do +checkKey :: Git.Repo -> RsyncOpts -> Key -> Annex Bool +checkKey r o k = do showAction $ "checking " ++ Git.repoDescribe r -- note: Does not currently differentiate between rsync failing -- to connect, and the file not being present. - Right <$> check - where - check = untilTrue (rsyncUrls o k) $ \u -> + untilTrue (rsyncUrls o k) $ \u -> liftIO $ catchBoolIO $ do withQuietOutput createProcessSuccess $ proc "rsync" $ toCommand $ diff --git a/Remote/S3.hs b/Remote/S3.hs index 8603757eb6..4c1f1ecfda 100644 --- a/Remote/S3.hs +++ b/Remote/S3.hs @@ -57,8 +57,8 @@ gen r u c gc = new <$> remoteCost gc expensiveRemoteCost retrieveKeyFile = retreiveKeyFileDummy, retrieveKeyFileCheap = retrieveCheap this, removeKey = remove this c, - hasKey = checkPresent this, - hasKeyCheap = False, + checkPresent = checkKey this, + checkPresentCheap = False, whereisKey = Nothing, remoteFsck = Nothing, repairRepo = Nothing, @@ -167,16 +167,16 @@ remove' :: Remote -> Key -> Annex Bool remove' r k = s3Action r False $ \(conn, bucket) -> s3Bool =<< liftIO (deleteObject conn $ bucketKey r bucket k) -checkPresent :: Remote -> Key -> Annex (Either String Bool) -checkPresent r k = s3Action r noconn $ \(conn, bucket) -> do +checkKey :: Remote -> Key -> Annex Bool +checkKey r k = s3Action r noconn $ \(conn, bucket) -> do showAction $ "checking " ++ name r res <- liftIO $ getObjectInfo conn $ bucketKey r bucket k case res of - Right _ -> return $ Right True - Left (AWSError _ _) -> return $ Right False - Left e -> return $ Left (s3Error e) + Right _ -> return True + Left (AWSError _ _) -> return False + Left e -> s3Error e where - noconn = Left $ error "S3 not configured" + noconn = error "S3 not configured" s3Warning :: ReqError -> Annex Bool s3Warning e = do diff --git a/Remote/Tahoe.hs b/Remote/Tahoe.hs index d265d7ac12..6e52c09810 100644 --- a/Remote/Tahoe.hs +++ b/Remote/Tahoe.hs @@ -72,8 +72,8 @@ gen r u c gc = do retrieveKeyFile = retrieve u hdl, retrieveKeyFileCheap = \_ _ -> return False, removeKey = remove, - hasKey = checkPresent u hdl, - hasKeyCheap = False, + checkPresent = checkKey u hdl, + checkPresentCheap = False, whereisKey = Nothing, remoteFsck = Nothing, repairRepo = Nothing, @@ -123,14 +123,16 @@ remove _k = do warning "content cannot be removed from tahoe remote" return False -checkPresent :: UUID -> TahoeHandle -> Key -> Annex (Either String Bool) -checkPresent u hdl k = go =<< getCapability u k +checkKey :: UUID -> TahoeHandle -> Key -> Annex Bool +checkKey u hdl k = go =<< getCapability u k where - go Nothing = return (Right False) - go (Just cap) = liftIO $ parseCheck <$> readTahoe hdl "check" - [ Param "--raw" - , Param cap - ] + go Nothing = return False + go (Just cap) = liftIO $ do + v <- parseCheck <$> readTahoe hdl "check" + [ Param "--raw" + , Param cap + ] + either error return v defaultTahoeConfigDir :: UUID -> IO TahoeConfigDir defaultTahoeConfigDir u = do diff --git a/Remote/Web.hs b/Remote/Web.hs index ddd1fc1ccd..7bdd8d1854 100644 --- a/Remote/Web.hs +++ b/Remote/Web.hs @@ -50,8 +50,8 @@ gen r _ c gc = retrieveKeyFile = downloadKey, retrieveKeyFileCheap = downloadKeyCheap, removeKey = dropKey, - hasKey = checkKey, - hasKeyCheap = False, + checkPresent = checkKey, + checkPresentCheap = False, whereisKey = Just getUrls, remoteFsck = Nothing, repairRepo = Nothing, @@ -98,12 +98,12 @@ dropKey k = do mapM_ (setUrlMissing k) =<< getUrls k return True -checkKey :: Key -> Annex (Either String Bool) +checkKey :: Key -> Annex Bool checkKey key = do us <- getUrls key if null us - then return $ Right False - else return =<< checkKey' key us + then return False + else either error return =<< checkKey' key us checkKey' :: Key -> [URLString] -> Annex (Either String Bool) checkKey' key us = firsthit us (Right False) $ \u -> do let (u', downloader) = getDownloader u diff --git a/Remote/WebDAV.hs b/Remote/WebDAV.hs index 0bdd383602..f0bcac10ea 100644 --- a/Remote/WebDAV.hs +++ b/Remote/WebDAV.hs @@ -63,8 +63,8 @@ gen r u c gc = new <$> remoteCost gc expensiveRemoteCost retrieveKeyFile = retrieve this, retrieveKeyFileCheap = retrieveCheap this, removeKey = remove this, - hasKey = checkPresent this, - hasKeyCheap = False, + checkPresent = checkKey this, + checkPresentCheap = False, whereisKey = Nothing, remoteFsck = Nothing, repairRepo = Nothing, @@ -170,10 +170,10 @@ remove r k = davAction r False $ \(baseurl, user, pass) -> liftIO $ do let url = davLocation baseurl k isJust . eitherToMaybe <$> tryNonAsync (deleteDAV url user pass) -checkPresent :: Remote -> Key -> Annex (Either String Bool) -checkPresent r k = davAction r noconn go +checkKey :: Remote -> Key -> Annex Bool +checkKey r k = davAction r noconn (either error id <$$> go) where - noconn = Left $ error $ name r ++ " not configured" + noconn = error $ name r ++ " not configured" go (baseurl, user, pass) = do showAction $ "checking " ++ name r diff --git a/Types/Remote.hs b/Types/Remote.hs index 805b984740..b657cfcdc5 100644 --- a/Types/Remote.hs +++ b/Types/Remote.hs @@ -68,12 +68,12 @@ data RemoteA a = Remote { retrieveKeyFileCheap :: Key -> FilePath -> a Bool, -- removes a key's contents (succeeds if the contents are not present) removeKey :: Key -> a Bool, - -- Checks if a key is present in the remote; if the remote - -- cannot be accessed returns a Left error message. - hasKey :: Key -> a (Either String Bool), - -- Some remotes can check hasKey without an expensive network + -- Checks if a key is present in the remote. + -- Throws an exception if the remote cannot be accessed. + checkPresent :: Key -> a Bool, + -- Some remotes can checkPresent without an expensive network -- operation. - hasKeyCheap :: Bool, + checkPresentCheap :: Bool, -- Some remotes can provide additional details for whereis. whereisKey :: Maybe (Key -> a [String]), -- Some remotes can run a fsck operation on the remote, diff --git a/Types/StoreRetrieve.hs b/Types/StoreRetrieve.hs index 9fc0634c4e..a21fa7866c 100644 --- a/Types/StoreRetrieve.hs +++ b/Types/StoreRetrieve.hs @@ -33,3 +33,11 @@ type Storer = Key -> ContentSource -> MeterUpdate -> Annex Bool -- callback, which will fully consume the content before returning. -- Throws exception if key is not present, or remote is not accessible. type Retriever = Key -> MeterUpdate -> (ContentSource -> Annex Bool) -> Annex Bool + +-- Action that removes a Key's content from a remote. +-- Succeeds if key is already not present; never throws exceptions. +type Remover = Key -> Annex Bool + +-- Checks if a Key's content is present on a remote. +-- Throws an exception if the remote is not accessible. +type CheckPresent = Key -> Annex Bool diff --git a/doc/design/assistant/chunks.mdwn b/doc/design/assistant/chunks.mdwn index a9709a778d..0aa389899a 100644 --- a/doc/design/assistant/chunks.mdwn +++ b/doc/design/assistant/chunks.mdwn @@ -91,7 +91,7 @@ cannot tell when we've gotten the last chunk. (Also, we cannot strip padding.) Note that `addurl` sometimes generates keys w/o size info (particularly, it does so by design when using quvi). -Problem: Also, this makes `hasKey` hard to implement: How can it know if +Problem: Also, this makes `checkPresent` hard to implement: How can it know if all the chunks are present, if the key size is not known? Problem: Also, this makes it difficult to download encrypted keys, because @@ -111,7 +111,7 @@ So, SHA256-1048576-c1--xxxxxxx for the first chunk of 1 megabyte. Before any chunks are stored, write a chunkcount file, eg SHA256-s12345-c0--xxxxxxx. Note that this key is the same as the original object's key, except with chunk number set to 0. This file contains both -the number of chunks, and also the chunk size used. `hasKey` downloads this +the number of chunks, and also the chunk size used. `checkPresent` downloads this file, and then verifies that each chunk is present, looking for keys with the expected chunk numbers and chunk size. @@ -126,7 +126,7 @@ Note: This design lets an attacker with logs tell the (appoximate) size of objects, by finding the small files that contain a chunk count, and correlating when that is written/read and when other files are written/read. That could be solved by padding the chunkcount key up to the -size of the rest of the keys, but that's very innefficient; `hasKey` is not +size of the rest of the keys, but that's very innefficient; `checkPresent` is not designed to need to download large files. # design 3 @@ -139,7 +139,7 @@ This seems difficult; attacker could probably tell where the first encrypted part stops and the next encrypted part starts by looking for gpg headers, and so tell which files are the first chunks. -Also, `hasKey` would need to download some or all of the first file. +Also, `checkPresent` would need to download some or all of the first file. If all, that's a lot more expensive. If only some is downloaded, an attacker can guess that the file that was partially downloaded is the first chunk in a series, and wait for a time when it's fully downloaded to @@ -163,7 +163,7 @@ The location log does not record locations of individual chunk keys (too space-inneficient). Instead, look at a chunk log in the git-annex branch to get the chunk count and size for a key. -`hasKey` would check if any of the logged sets of chunks is +`checkPresent` would check if any of the logged sets of chunks is present on the remote. It would also check if the non-chunked key is present, as a fallback. @@ -225,7 +225,7 @@ Reasons: Note that this means that the chunks won't exactly match the configured chunk size. gpg does compression, which might make them a -lot smaller. Or gpg overhead could make them slightly larger. So `hasKey` +lot smaller. Or gpg overhead could make them slightly larger. So `checkPresent` cannot check exact file sizes. If padding is enabled, gpg compression should be disabled, to not leak @@ -250,10 +250,10 @@ and skip forward to the next needed chunk. Easy. Uploads: Check if the 1st chunk is present. If so, check the second chunk, etc. Once the first missing chunk is found, start uploading from there. -That adds one extra hasKey call per upload. Probably a win in most cases. +That adds one extra checkPresent call per upload. Probably a win in most cases. Can be improved by making special remotes open a persistent connection that is used for transferring all chunks, as well as for -checking hasKey. +checking checkPresent. Note that this is safe to do only as long as the Key being transferred cannot possibly have 2 different contents in different repos. Notably not From 8025decc7f08a175cd36cd8f1cc9c5c1ef3cdff1 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 6 Aug 2014 14:28:36 -0400 Subject: [PATCH 27/44] run Preparer to get Remover and CheckPresent actions This will allow special remotes to eg, open a http connection and reuse it, while checking if chunks are present, or removing chunks. S3 and WebDAV both need this to support chunks with reasonable speed. Note that a special remote might want to cache a http connection across multiple requests. A simple case of this is that CheckPresent is typically called before Store or Remove. A remote using this interface can certianly use a Preparer that eg, uses a MVar to cache a http connection. However, it's up to the remote to then deal with things like stale or stalled http connections when eg, doing a series of downloads from a remote and other places. There could be long delays between calls to a remote, which could lead to eg, http connection stalls; the machine might even move to a new network, etc. It might be nice to improve this interface later to allow the simple case without needing to handle the full complex case. One way to do it would be to have a `Transaction SpecialRemote cache`, where SpecialRemote contains methods for Storer, Retriever, Remover, and CheckPresent, that all expect to be passed a `cache`. --- Remote/Bup.hs | 10 ++++++---- Remote/Ddar.hs | 10 ++++++---- Remote/Directory.hs | 10 ++++++---- Remote/External.hs | 10 ++++++---- Remote/GCrypt.hs | 10 ++++++---- Remote/Glacier.hs | 10 ++++++---- Remote/Helper/Messages.hs | 14 ++++++++++++-- Remote/Helper/Special.hs | 40 ++++++++++++++++++++++++++++----------- Remote/Hook.hs | 10 ++++++---- Remote/Rsync.hs | 10 ++++++---- Remote/S3.hs | 16 +++++++++------- 11 files changed, 98 insertions(+), 52 deletions(-) diff --git a/Remote/Bup.hs b/Remote/Bup.hs index 2e68f30ef7..80fffc056c 100644 --- a/Remote/Bup.hs +++ b/Remote/Bup.hs @@ -57,8 +57,8 @@ gen r u c gc = do , storeKey = storeKeyDummy , retrieveKeyFile = retreiveKeyFileDummy , retrieveKeyFileCheap = retrieveCheap buprepo - , removeKey = remove buprepo - , checkPresent = checkKey r bupr' + , removeKey = removeKeyDummy + , checkPresent = checkPresentDummy , checkPresentCheap = bupLocal buprepo , whereisKey = Nothing , remoteFsck = Nothing @@ -76,6 +76,8 @@ gen r u c gc = do return $ Just $ specialRemote' specialcfg c (simplyPrepare $ store this buprepo) (simplyPrepare $ retrieve buprepo) + (simplyPrepare $ remove buprepo) + (simplyPrepare $ checkKey r bupr') this where buprepo = fromMaybe (error "missing buprepo") $ remoteAnnexBupRepo gc @@ -146,7 +148,7 @@ retrieveCheap _ _ _ = return False - - We can, however, remove the git branch that bup created for the key. -} -remove :: BupRepo -> Key -> Annex Bool +remove :: BupRepo -> Remover remove buprepo k = do go =<< liftIO (bup2GitRemote buprepo) warning "content cannot be completely removed from bup remote" @@ -163,7 +165,7 @@ remove buprepo k = do - in a bup repository. One way it to check if the git repository has - a branch matching the name (as created by bup split -n). -} -checkKey :: Git.Repo -> Git.Repo -> Key -> Annex Bool +checkKey :: Git.Repo -> Git.Repo -> CheckPresent checkKey r bupr k | Git.repoIsUrl bupr = do showChecking r diff --git a/Remote/Ddar.hs b/Remote/Ddar.hs index 1227b52755..fba05312be 100644 --- a/Remote/Ddar.hs +++ b/Remote/Ddar.hs @@ -44,6 +44,8 @@ gen r u c gc = do return $ Just $ specialRemote' specialcfg c (simplyPrepare $ store ddarrepo) (simplyPrepare $ retrieve ddarrepo) + (simplyPrepare $ remove ddarrepo) + (simplyPrepare $ checkKey ddarrepo) (this cst) where this cst = Remote @@ -53,8 +55,8 @@ gen r u c gc = do , storeKey = storeKeyDummy , retrieveKeyFile = retreiveKeyFileDummy , retrieveKeyFileCheap = retrieveCheap - , removeKey = remove ddarrepo - , checkPresent = checkKey ddarrepo + , removeKey = removeKeyDummy + , checkPresent = checkPresentDummy , checkPresentCheap = ddarLocal ddarrepo , whereisKey = Nothing , remoteFsck = Nothing @@ -140,7 +142,7 @@ retrieve ddarrepo = byteRetriever $ \k sink -> do retrieveCheap :: Key -> FilePath -> Annex Bool retrieveCheap _ _ = return False -remove :: DdarRepo -> Key -> Annex Bool +remove :: DdarRepo -> Remover remove ddarrepo key = do (cmd, params) <- ddarRemoteCall ddarrepo 'd' [Param $ key2file key] liftIO $ boolSystem cmd params @@ -181,7 +183,7 @@ inDdarManifest ddarrepo k = do where k' = key2file k -checkKey :: DdarRepo -> Key -> Annex Bool +checkKey :: DdarRepo -> CheckPresent checkKey ddarrepo key = do directoryExists <- ddarDirectoryExists ddarrepo case directoryExists of diff --git a/Remote/Directory.hs b/Remote/Directory.hs index 0a2532aa5b..d9419757f0 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -44,6 +44,8 @@ gen r u c gc = do return $ Just $ specialRemote c (prepareStore dir chunkconfig) (retrieve dir chunkconfig) + (simplyPrepare $ remove dir) + (simplyPrepare $ checkKey dir chunkconfig) Remote { uuid = u, cost = cst, @@ -51,8 +53,8 @@ gen r u c gc = do storeKey = storeKeyDummy, retrieveKeyFile = retreiveKeyFileDummy, retrieveKeyFileCheap = retrieveCheap dir chunkconfig, - removeKey = remove dir, - checkPresent = checkKey dir chunkconfig, + removeKey = removeKeyDummy, + checkPresent = checkPresentDummy, checkPresentCheap = True, whereisKey = Nothing, remoteFsck = Nothing, @@ -161,7 +163,7 @@ retrieveCheap d NoChunks k f = liftIO $ catchBoolIO $ do retrieveCheap _ _ _ _ = return False #endif -remove :: FilePath -> Key -> Annex Bool +remove :: FilePath -> Remover remove d k = liftIO $ removeDirGeneric d (storeDir d k) {- Removes the directory, which must be located under the topdir. @@ -189,7 +191,7 @@ removeDirGeneric topdir dir = do then return ok else doesDirectoryExist topdir <&&> (not <$> doesDirectoryExist dir) -checkKey :: FilePath -> ChunkConfig -> Key -> Annex Bool +checkKey :: FilePath -> ChunkConfig -> CheckPresent checkKey d (LegacyChunks _) k = Legacy.checkKey d locations k checkKey d _ k = liftIO $ ifM (anyM doesFileExist (locations d k)) diff --git a/Remote/External.hs b/Remote/External.hs index ffae94ec99..f326f26ba5 100644 --- a/Remote/External.hs +++ b/Remote/External.hs @@ -45,6 +45,8 @@ gen r u c gc = do return $ Just $ specialRemote c (simplyPrepare $ store external) (simplyPrepare $ retrieve external) + (simplyPrepare $ remove external) + (simplyPrepare $ checkKey external) Remote { uuid = u, cost = cst, @@ -52,8 +54,8 @@ gen r u c gc = do storeKey = storeKeyDummy, retrieveKeyFile = retreiveKeyFileDummy, retrieveKeyFileCheap = \_ _ -> return False, - removeKey = remove external, - checkPresent = checkKey external, + removeKey = removeKeyDummy, + checkPresent = checkPresentDummy, checkPresentCheap = False, whereisKey = Nothing, remoteFsck = Nothing, @@ -109,7 +111,7 @@ retrieve external = fileRetriever $ \d k p -> error errmsg _ -> Nothing -remove :: External -> Key -> Annex Bool +remove :: External -> Remover remove external k = safely $ handleRequest external (REMOVE k) Nothing $ \resp -> case resp of @@ -121,7 +123,7 @@ remove external k = safely $ return False _ -> Nothing -checkKey :: External -> Key -> Annex Bool +checkKey :: External -> CheckPresent checkKey external k = either error id <$> go where go = handleRequest external (CHECKPRESENT k) Nothing $ \resp -> diff --git a/Remote/GCrypt.hs b/Remote/GCrypt.hs index f971ff754f..55a7758112 100644 --- a/Remote/GCrypt.hs +++ b/Remote/GCrypt.hs @@ -107,8 +107,8 @@ gen' r u c gc = do , storeKey = storeKeyDummy , retrieveKeyFile = retreiveKeyFileDummy , retrieveKeyFileCheap = \_ _ -> return False - , removeKey = remove this rsyncopts - , checkPresent = checkKey this rsyncopts + , removeKey = removeKeyDummy + , checkPresent = checkPresentDummy , checkPresentCheap = repoCheap r , whereisKey = Nothing , remoteFsck = Nothing @@ -124,6 +124,8 @@ gen' r u c gc = do return $ Just $ specialRemote' specialcfg c (simplyPrepare $ store this rsyncopts) (simplyPrepare $ retrieve this rsyncopts) + (simplyPrepare $ remove this rsyncopts) + (simplyPrepare $ checkKey this rsyncopts) this where specialcfg @@ -331,7 +333,7 @@ retrieve r rsyncopts | otherwise = unsupportedUrl where -remove :: Remote -> Remote.Rsync.RsyncOpts -> Key -> Annex Bool +remove :: Remote -> Remote.Rsync.RsyncOpts -> Remover remove r rsyncopts k | not $ Git.repoIsUrl (repo r) = guardUsable (repo r) False $ liftIO $ Remote.Directory.removeDirGeneric (Git.repoLocation (repo r)) (parentDir (gCryptLocation r k)) @@ -341,7 +343,7 @@ remove r rsyncopts k removersync = Remote.Rsync.remove rsyncopts k removeshell = Ssh.dropKey (repo r) k -checkKey :: Remote -> Remote.Rsync.RsyncOpts -> Key -> Annex Bool +checkKey :: Remote -> Remote.Rsync.RsyncOpts -> CheckPresent checkKey r rsyncopts k | not $ Git.repoIsUrl (repo r) = guardUsable (repo r) (cantCheck $ repo r) $ diff --git a/Remote/Glacier.hs b/Remote/Glacier.hs index 2ade37011e..dd28def63e 100644 --- a/Remote/Glacier.hs +++ b/Remote/Glacier.hs @@ -42,6 +42,8 @@ gen r u c gc = new <$> remoteCost gc veryExpensiveRemoteCost new cst = Just $ specialRemote' specialcfg c (prepareStore this) (prepareRetrieve this) + (simplyPrepare $ remove this) + (simplyPrepare $ checkKey this) this where this = Remote { @@ -51,8 +53,8 @@ gen r u c gc = new <$> remoteCost gc veryExpensiveRemoteCost storeKey = storeKeyDummy, retrieveKeyFile = retreiveKeyFileDummy, retrieveKeyFileCheap = retrieveCheap this, - removeKey = remove this, - checkPresent = checkKey this, + removeKey = removeKeyDummy, + checkPresent = checkPresentDummy, checkPresentCheap = False, whereisKey = Nothing, remoteFsck = Nothing, @@ -155,7 +157,7 @@ retrieve r k sink = go =<< glacierEnv c u retrieveCheap :: Remote -> Key -> FilePath -> Annex Bool retrieveCheap _ _ _ = return False -remove :: Remote -> Key -> Annex Bool +remove :: Remote -> Remover remove r k = glacierAction r [ Param "archive" @@ -164,7 +166,7 @@ remove r k = glacierAction r , Param $ archive r k ] -checkKey :: Remote -> Key -> Annex Bool +checkKey :: Remote -> CheckPresent checkKey r k = do showAction $ "checking " ++ name r go =<< glacierEnv (config r) (uuid r) diff --git a/Remote/Helper/Messages.hs b/Remote/Helper/Messages.hs index 3088a9ab2b..774716ca1a 100644 --- a/Remote/Helper/Messages.hs +++ b/Remote/Helper/Messages.hs @@ -9,9 +9,19 @@ module Remote.Helper.Messages where import Common.Annex import qualified Git +import qualified Types.Remote as Remote showChecking :: Git.Repo -> Annex () showChecking r = showAction $ "checking " ++ Git.repoDescribe r -cantCheck :: Git.Repo -> a -cantCheck r = error $ "unable to check " ++ Git.repoDescribe r +class Checkable a where + descCheckable :: a -> String + +instance Checkable Git.Repo where + descCheckable = Git.repoDescribe + +instance Checkable (Remote.RemoteA a) where + descCheckable = Remote.name + +cantCheck :: Checkable a => a -> e +cantCheck v = error $ "unable to check " ++ descCheckable v diff --git a/Remote/Helper/Special.hs b/Remote/Helper/Special.hs index 3c19f25eb9..f8428aff7c 100644 --- a/Remote/Helper/Special.hs +++ b/Remote/Helper/Special.hs @@ -11,6 +11,8 @@ module Remote.Helper.Special ( Preparer, Storer, Retriever, + Remover, + CheckPresent, simplyPrepare, ContentSource, checkPrepare, @@ -21,6 +23,8 @@ module Remote.Helper.Special ( byteRetriever, storeKeyDummy, retreiveKeyFileDummy, + removeKeyDummy, + checkPresentDummy, SpecialRemoteCfg(..), specialRemoteCfg, specialRemote, @@ -36,6 +40,7 @@ import Config.Cost import Utility.Metered import Remote.Helper.Chunked as X import Remote.Helper.Encryptable as X hiding (encryptableRemote) +import Remote.Helper.Messages import Annex.Content import Annex.Exception import qualified Git @@ -114,16 +119,27 @@ byteRetriever :: (Key -> (L.ByteString -> Annex Bool) -> Annex Bool) -> Retrieve byteRetriever a k _m callback = a k (callback . ByteContent) {- The base Remote that is provided to specialRemote needs to have - - storeKey and retreiveKeyFile methods, but they are never - - actually used (since specialRemote replaces them). + - storeKey, retreiveKeyFile, removeKey, and checkPresent methods, + - but they are never actually used (since specialRemote replaces them). - Here are some dummy ones. -} storeKeyDummy :: Key -> AssociatedFile -> MeterUpdate -> Annex Bool storeKeyDummy _ _ _ = return False retreiveKeyFileDummy :: Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool retreiveKeyFileDummy _ _ _ _ = return False +removeKeyDummy :: Key -> Annex Bool +removeKeyDummy _ = return False +checkPresentDummy :: Key -> Annex Bool +checkPresentDummy _ = error "missing checkPresent implementation" -type RemoteModifier = RemoteConfig -> Preparer Storer -> Preparer Retriever -> Remote -> Remote +type RemoteModifier + = RemoteConfig + -> Preparer Storer + -> Preparer Retriever + -> Preparer Remover + -> Preparer CheckPresent + -> Remote + -> Remote data SpecialRemoteCfg = SpecialRemoteCfg { chunkConfig :: ChunkConfig @@ -139,13 +155,14 @@ specialRemote :: RemoteModifier specialRemote c = specialRemote' (specialRemoteCfg c) c specialRemote' :: SpecialRemoteCfg -> RemoteModifier -specialRemote' cfg c preparestorer prepareretriever baser = encr +specialRemote' cfg c preparestorer prepareretriever prepareremover preparecheckpresent baser = encr where encr = baser { storeKey = \k _f p -> cip >>= storeKeyGen k p , retrieveKeyFile = \k _f d p -> cip >>= retrieveKeyFileGen k d p , retrieveKeyFileCheap = \k d -> cip >>= maybe (retrieveKeyFileCheap baser k d) + -- retrieval of encrypted keys is never cheap (\_ -> return False) , removeKey = \k -> cip >>= removeKeyGen k , checkPresent = \k -> cip >>= checkPresentGen k @@ -160,8 +177,7 @@ specialRemote' cfg c preparestorer prepareretriever baser = encr safely a = catchNonAsyncAnnex a (\e -> warning (show e) >> return False) -- chunk, then encrypt, then feed to the storer - storeKeyGen k p enc = - safely $ preparestorer k $ safely . go + storeKeyGen k p enc = safely $ preparestorer k $ safely . go where go (Just storer) = sendAnnex k rollback $ \src -> displayprogress p k $ \p' -> @@ -178,7 +194,7 @@ specialRemote' cfg c preparestorer prepareretriever baser = encr readBytes $ \encb -> storer (enck k) (ByteContent encb) p - -- call retriever to get chunks; decrypt them; stream to dest file + -- call retrieve-r to get chunks; decrypt them; stream to dest file retrieveKeyFileGen k dest p enc = safely $ prepareretriever k $ safely . go where @@ -188,15 +204,17 @@ specialRemote' cfg c preparestorer prepareretriever baser = encr go Nothing = return False enck = maybe id snd enc - removeKeyGen k enc = removeChunks remover (uuid baser) chunkconfig enck k + removeKeyGen k enc = safely $ prepareremover k $ safely . go where + go (Just remover) = removeChunks remover (uuid baser) chunkconfig enck k + go Nothing = return False enck = maybe id snd enc - remover = removeKey baser - checkPresentGen k enc = checkPresentChunks checker (uuid baser) chunkconfig enck k + checkPresentGen k enc = preparecheckpresent k go where + go (Just checker) = checkPresentChunks checker (uuid baser) chunkconfig enck k + go Nothing = cantCheck baser enck = maybe id snd enc - checker = checkPresent baser chunkconfig = chunkConfig cfg diff --git a/Remote/Hook.hs b/Remote/Hook.hs index 037f71ced4..a2d096ecd4 100644 --- a/Remote/Hook.hs +++ b/Remote/Hook.hs @@ -37,6 +37,8 @@ gen r u c gc = do return $ Just $ specialRemote c (simplyPrepare $ store hooktype) (simplyPrepare $ retrieve hooktype) + (simplyPrepare $ remove hooktype) + (simplyPrepare $ checkKey r hooktype) Remote { uuid = u, cost = cst, @@ -44,8 +46,8 @@ gen r u c gc = do storeKey = storeKeyDummy, retrieveKeyFile = retreiveKeyFileDummy, retrieveKeyFileCheap = retrieveCheap hooktype, - removeKey = remove hooktype, - checkPresent = checkKey r hooktype, + removeKey = removeKeyDummy, + checkPresent = checkPresentDummy, checkPresentCheap = False, whereisKey = Nothing, remoteFsck = Nothing, @@ -125,10 +127,10 @@ retrieve h = fileRetriever $ \d k _p -> retrieveCheap :: HookName -> Key -> FilePath -> Annex Bool retrieveCheap _ _ _ = return False -remove :: HookName -> Key -> Annex Bool +remove :: HookName -> Remover remove h k = runHook h "remove" k Nothing $ return True -checkKey :: Git.Repo -> HookName -> Key -> Annex Bool +checkKey :: Git.Repo -> HookName -> CheckPresent checkKey r h k = do showAction $ "checking " ++ Git.repoDescribe r v <- lookupHook h action diff --git a/Remote/Rsync.hs b/Remote/Rsync.hs index 91070fe846..afd13abf03 100644 --- a/Remote/Rsync.hs +++ b/Remote/Rsync.hs @@ -58,6 +58,8 @@ gen r u c gc = do return $ Just $ specialRemote' specialcfg c (simplyPrepare $ fileStorer $ store o) (simplyPrepare $ fileRetriever $ retrieve o) + (simplyPrepare $ remove o) + (simplyPrepare $ checkKey r o) Remote { uuid = u , cost = cst @@ -65,8 +67,8 @@ gen r u c gc = do , storeKey = storeKeyDummy , retrieveKeyFile = retreiveKeyFileDummy , retrieveKeyFileCheap = retrieveCheap o - , removeKey = remove o - , checkPresent = checkKey r o + , removeKey = removeKeyDummy + , checkPresent = checkPresentDummy , checkPresentCheap = False , whereisKey = Nothing , remoteFsck = Nothing @@ -186,7 +188,7 @@ retrieve o f k p = retrieveCheap :: RsyncOpts -> Key -> FilePath -> Annex Bool retrieveCheap o k f = ifM (preseedTmp k f) ( rsyncRetrieve o k f Nothing , return False ) -remove :: RsyncOpts -> Key -> Annex Bool +remove :: RsyncOpts -> Remover remove o k = do ps <- sendParams withRsyncScratchDir $ \tmp -> liftIO $ do @@ -214,7 +216,7 @@ remove o k = do , dir keyFile k "***" ] -checkKey :: Git.Repo -> RsyncOpts -> Key -> Annex Bool +checkKey :: Git.Repo -> RsyncOpts -> CheckPresent checkKey r o k = do showAction $ "checking " ++ Git.repoDescribe r -- note: Does not currently differentiate between rsync failing diff --git a/Remote/S3.hs b/Remote/S3.hs index 4c1f1ecfda..1aba392453 100644 --- a/Remote/S3.hs +++ b/Remote/S3.hs @@ -47,6 +47,8 @@ gen r u c gc = new <$> remoteCost gc expensiveRemoteCost new cst = Just $ specialRemote c (prepareStore this) (prepareRetrieve this) + (simplyPrepare $ remove this c) + (simplyPrepare $ checkKey this) this where this = Remote { @@ -55,9 +57,9 @@ gen r u c gc = new <$> remoteCost gc expensiveRemoteCost name = Git.repoDescribe r, storeKey = storeKeyDummy, retrieveKeyFile = retreiveKeyFileDummy, - retrieveKeyFileCheap = retrieveCheap this, - removeKey = remove this c, - checkPresent = checkKey this, + retrieveKeyFileCheap = retrieveCheap, + removeKey = removeKeyDummy, + checkPresent = checkPresentDummy, checkPresentCheap = False, whereisKey = Nothing, remoteFsck = Nothing, @@ -150,13 +152,13 @@ prepareRetrieve r = resourcePrepare (const $ s3Action r False) $ \(conn, bucket) liftIO (getObject conn $ bucketKey r bucket k) >>= either s3Error (sink . obj_data) -retrieveCheap :: Remote -> Key -> FilePath -> Annex Bool -retrieveCheap _ _ _ = return False +retrieveCheap :: Key -> FilePath -> Annex Bool +retrieveCheap _ _ = return False {- Internet Archive doesn't easily allow removing content. - While it may remove the file, there are generally other files - derived from it that it does not remove. -} -remove :: Remote -> RemoteConfig -> Key -> Annex Bool +remove :: Remote -> RemoteConfig -> Remover remove r c k | isIA c = do warning "Cannot remove content from the Internet Archive" @@ -167,7 +169,7 @@ remove' :: Remote -> Key -> Annex Bool remove' r k = s3Action r False $ \(conn, bucket) -> s3Bool =<< liftIO (deleteObject conn $ bucketKey r bucket k) -checkKey :: Remote -> Key -> Annex Bool +checkKey :: Remote -> CheckPresent checkKey r k = s3Action r noconn $ \(conn, bucket) -> do showAction $ "checking " ++ name r res <- liftIO $ getObjectInfo conn $ bucketKey r bucket k From aacb0b282357b41e1f055d2435b433a08b2eec65 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 6 Aug 2014 16:55:32 -0400 Subject: [PATCH 28/44] convert WebDAV to new special remote interface, adding new-style chunking support Reusing http connection when operating on chunks is not done yet, I had to submit some patches to DAV to support that. However, this is no slower than old-style chunking was. Note that it's a fileRetriever and a fileStorer, despite DAV using bytestrings that would allow streaming. As a result, upload/download of encrypted files is made a bit more expensive, since it spools them to temp files. This was needed to get the progress meters to work. There are probably ways to avoid that.. But it turns out that the current DAV interface buffers the whole file content in memory, and I have sent in a patch to DAV to improve its interfaces. Using the new interfaces, it's certainly going to need to be a fileStorer, in order to read the file size from the file (getting the size of a bytestring would destroy laziness). It should be possible to use the new interface to make it be a byteRetriever, so I'll change that when I get to it. This commit was sponsored by Andreas Olsson. --- Remote/Helper/Encryptable.hs | 38 ---------- Remote/Helper/Special.hs | 4 +- Remote/WebDAV.hs | 122 ++++++++++++-------------------- debian/changelog | 2 +- doc/special_remotes/webdav.mdwn | 2 +- 5 files changed, 50 insertions(+), 118 deletions(-) diff --git a/Remote/Helper/Encryptable.hs b/Remote/Helper/Encryptable.hs index c364a69e76..dd032ce337 100644 --- a/Remote/Helper/Encryptable.hs +++ b/Remote/Helper/Encryptable.hs @@ -14,9 +14,7 @@ import Types.Remote import Crypto import Types.Crypto import qualified Annex -import Config.Cost import Utility.Base64 -import Utility.Metered {- Encryption setup for a remote. The user must specify whether to use - an encryption key, or not encrypt. An encrypted cipher is created, or is @@ -70,42 +68,6 @@ encryptionSetup c = maybe genCipher updateCipher $ extractCipher c -- remotes (while being backward-compatible). [ "keyid", "keyid+", "keyid-", "highRandomQuality" ] -{- Modifies a Remote to support encryption. -} --- TODO: deprecated -encryptableRemote - :: RemoteConfig - -> ((Cipher, Key) -> Key -> MeterUpdate -> Annex Bool) - -> ((Cipher, Key) -> Key -> FilePath -> MeterUpdate -> Annex Bool) - -> Remote - -> Remote -encryptableRemote c storeKeyEncrypted retrieveKeyFileEncrypted r = r - { storeKey = \k f p -> cip k >>= maybe - (storeKey r k f p) - (\v -> storeKeyEncrypted v k p) - , retrieveKeyFile = \k f d p -> cip k >>= maybe - (retrieveKeyFile r k f d p) - (\v -> retrieveKeyFileEncrypted v k d p) - , retrieveKeyFileCheap = \k d -> cip k >>= maybe - (retrieveKeyFileCheap r k d) - (\_ -> return False) - , removeKey = \k -> cip k >>= maybe - (removeKey r k) - (\(_, enckey) -> removeKey r enckey) - , checkPresent = \k -> cip k >>= maybe - (checkPresent r k) - (\(_, enckey) -> checkPresent r enckey) - , cost = maybe - (cost r) - (const $ cost r + encryptedRemoteCostAdj) - (extractCipher c) - } - where - cip k = do - v <- cipherKey c - return $ case v of - Nothing -> Nothing - Just (cipher, enck) -> Just (cipher, enck k) - {- Gets encryption Cipher. The decrypted Ciphers are cached in the Annex - state. -} remoteCipher :: RemoteConfig -> Annex (Maybe Cipher) diff --git a/Remote/Helper/Special.hs b/Remote/Helper/Special.hs index f8428aff7c..fc0e11d2f3 100644 --- a/Remote/Helper/Special.hs +++ b/Remote/Helper/Special.hs @@ -39,7 +39,7 @@ import Crypto import Config.Cost import Utility.Metered import Remote.Helper.Chunked as X -import Remote.Helper.Encryptable as X hiding (encryptableRemote) +import Remote.Helper.Encryptable as X import Remote.Helper.Messages import Annex.Content import Annex.Exception @@ -119,7 +119,7 @@ byteRetriever :: (Key -> (L.ByteString -> Annex Bool) -> Annex Bool) -> Retrieve byteRetriever a k _m callback = a k (callback . ByteContent) {- The base Remote that is provided to specialRemote needs to have - - storeKey, retreiveKeyFile, removeKey, and checkPresent methods, + - storeKey, retrieveKeyFile, removeKey, and checkPresent methods, - but they are never actually used (since specialRemote replaces them). - Here are some dummy ones. -} diff --git a/Remote/WebDAV.hs b/Remote/WebDAV.hs index f0bcac10ea..6679242e5c 100644 --- a/Remote/WebDAV.hs +++ b/Remote/WebDAV.hs @@ -27,12 +27,9 @@ import qualified Git import Config import Config.Cost import Remote.Helper.Special -import Remote.Helper.Encryptable import qualified Remote.Helper.Chunked.Legacy as Legacy -import Crypto import Creds import Utility.Metered -import Annex.Content import Annex.UUID import Remote.WebDAV.DavUrl @@ -50,20 +47,22 @@ remote = RemoteType { gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remote) gen r u c gc = new <$> remoteCost gc expensiveRemoteCost where - new cst = Just $ encryptableRemote c - (storeEncrypted this) - (retrieveEncrypted this) + new cst = Just $ specialRemote c + (prepareStore this chunkconfig) + (prepareRetrieve this chunkconfig) + (prepareRemove this) + (prepareCheckPresent this chunkconfig) this where this = Remote { uuid = u, cost = cst, name = Git.repoDescribe r, - storeKey = store this, - retrieveKeyFile = retrieve this, - retrieveKeyFileCheap = retrieveCheap this, - removeKey = remove this, - checkPresent = checkKey this, + storeKey = storeKeyDummy, + retrieveKeyFile = retreiveKeyFileDummy, + retrieveKeyFileCheap = retrieveCheap, + removeKey = removeKeyDummy, + checkPresent = checkPresentDummy, checkPresentCheap = False, whereisKey = Nothing, remoteFsck = Nothing, @@ -76,6 +75,7 @@ gen r u c gc = new <$> remoteCost gc expensiveRemoteCost availability = GloballyAvailable, remotetype = remote } + chunkconfig = getChunkConfig c webdavSetup :: Maybe UUID -> Maybe CredPair -> RemoteConfig -> Annex (RemoteConfig, UUID) webdavSetup mu mcreds c = do @@ -89,95 +89,67 @@ webdavSetup mu mcreds c = do c'' <- setRemoteCredPair c' (davCreds u) creds return (c'', u) -store :: Remote -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool -store r k _f p = metered (Just p) k $ \meterupdate -> - davAction r False $ \(baseurl, user, pass) -> - sendAnnex k (void $ remove r k) $ \src -> - liftIO $ withMeteredFile src meterupdate $ - storeHelper r k baseurl user pass +prepareStore :: Remote -> ChunkConfig -> Preparer Storer +prepareStore r chunkconfig = simplyPrepare $ fileStorer $ \k f p -> + davAction r False $ \(baseurl, user, pass) -> liftIO $ + withMeteredFile f p $ + storeHelper chunkconfig k baseurl user pass -storeEncrypted :: Remote -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool -storeEncrypted r (cipher, enck) k p = metered (Just p) k $ \meterupdate -> - davAction r False $ \(baseurl, user, pass) -> - sendAnnex k (void $ remove r enck) $ \src -> - liftIO $ encrypt (getGpgEncParams r) cipher - (streamMeteredFile src meterupdate) $ - readBytes $ storeHelper r enck baseurl user pass - -storeHelper :: Remote -> Key -> DavUrl -> DavUser -> DavPass -> L.ByteString -> IO Bool -storeHelper r k baseurl user pass b = catchBoolIO $ do +storeHelper :: ChunkConfig -> Key -> DavUrl -> DavUser -> DavPass -> L.ByteString -> IO Bool +storeHelper chunkconfig k baseurl user pass b = do mkdirRecursiveDAV tmpurl user pass case chunkconfig of - NoChunks -> flip catchNonAsync (\e -> warningIO (show e) >> return False) $ do - storehttp tmpurl b - finalizer tmpurl keyurl - return True - UnpaddedChunks _ -> error "TODO: storeHelper with UnpaddedChunks" LegacyChunks chunksize -> do let storer urls = Legacy.storeChunked chunksize urls storehttp b let recorder url s = storehttp url (L8.fromString s) Legacy.storeChunks k tmpurl keyurl storer recorder finalizer - + _ -> do + storehttp tmpurl b + finalizer tmpurl keyurl + return True where tmpurl = tmpLocation baseurl k keyurl = davLocation baseurl k - chunkconfig = getChunkConfig $ config r finalizer srcurl desturl = do void $ tryNonAsync (deleteDAV desturl user pass) mkdirRecursiveDAV (urlParent desturl) user pass moveDAV srcurl desturl user pass storehttp url = putDAV url user pass -retrieveCheap :: Remote -> Key -> FilePath -> Annex Bool -retrieveCheap _ _ _ = return False +retrieveCheap :: Key -> FilePath -> Annex Bool +retrieveCheap _ _ = return False -retrieve :: Remote -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool -retrieve r k _f d p = metered (Just p) k $ \meterupdate -> - davAction r False $ \(baseurl, user, pass) -> liftIO $ catchBoolIO $ - withStoredFiles r k baseurl user pass onerr $ \urls -> do - Legacy.meteredWriteFileChunks meterupdate d urls $ \url -> do +prepareRetrieve :: Remote -> ChunkConfig -> Preparer Retriever +prepareRetrieve r chunkconfig = simplyPrepare $ fileRetriever $ \d k p -> + davAction r onerr $ \(baseurl, user, pass) -> liftIO $ + withStoredFiles chunkconfig k baseurl user pass onerr $ \urls -> do + Legacy.meteredWriteFileChunks p d urls $ \url -> do mb <- getDAV url user pass case mb of - Nothing -> throwIO "download failed" + Nothing -> onerr Just b -> return b - return True where - onerr _ = return False + onerr = error "download failed" -retrieveEncrypted :: Remote -> (Cipher, Key) -> Key -> FilePath -> MeterUpdate -> Annex Bool -retrieveEncrypted r (cipher, enck) k d p = metered (Just p) k $ \meterupdate -> - davAction r False $ \(baseurl, user, pass) -> liftIO $ catchBoolIO $ - withStoredFiles r enck baseurl user pass onerr $ \urls -> do - decrypt cipher (feeder user pass urls) $ - readBytes $ meteredWriteFile meterupdate d - return True - where - onerr _ = return False +prepareRemove :: Remote -> Preparer Remover +prepareRemove r = simplyPrepare $ \k -> + davAction r False $ \(baseurl, user, pass) -> liftIO $ do + -- Delete the key's whole directory, including any + -- legacy chunked files, etc, in a single action. + let url = davLocation baseurl k + isJust . eitherToMaybe <$> tryNonAsync (deleteDAV url user pass) - feeder _ _ [] _ = noop - feeder user pass (url:urls) h = do - mb <- getDAV url user pass - case mb of - Nothing -> throwIO "download failed" - Just b -> do - L.hPut h b - feeder user pass urls h +prepareCheckPresent :: Remote -> ChunkConfig -> Preparer CheckPresent +prepareCheckPresent r chunkconfig = simplyPrepare $ checkKey r chunkconfig -remove :: Remote -> Key -> Annex Bool -remove r k = davAction r False $ \(baseurl, user, pass) -> liftIO $ do - -- Delete the key's whole directory, including any chunked - -- files, etc, in a single action. - let url = davLocation baseurl k - isJust . eitherToMaybe <$> tryNonAsync (deleteDAV url user pass) - -checkKey :: Remote -> Key -> Annex Bool -checkKey r k = davAction r noconn (either error id <$$> go) +checkKey :: Remote -> ChunkConfig -> Key -> Annex Bool +checkKey r chunkconfig k = davAction r noconn (either error id <$$> go) where noconn = error $ name r ++ " not configured" go (baseurl, user, pass) = do showAction $ "checking " ++ name r - liftIO $ withStoredFiles r k baseurl user pass onerr check + liftIO $ withStoredFiles chunkconfig k baseurl user pass onerr check where check [] = return $ Right True check (url:urls) = do @@ -196,7 +168,7 @@ checkKey r k = davAction r noconn (either error id <$$> go) else v withStoredFiles - :: Remote + :: ChunkConfig -> Key -> DavUrl -> DavUser @@ -204,9 +176,7 @@ withStoredFiles -> (DavUrl -> IO a) -> ([DavUrl] -> IO a) -> IO a -withStoredFiles r k baseurl user pass onerr a = case chunkconfig of - NoChunks -> a [keyurl] - UnpaddedChunks _ -> error "TODO: withStoredFiles with UnpaddedChunks" +withStoredFiles chunkconfig k baseurl user pass onerr a = case chunkconfig of LegacyChunks _ -> do let chunkcount = keyurl ++ Legacy.chunkCount v <- getDAV chunkcount user pass @@ -217,9 +187,9 @@ withStoredFiles r k baseurl user pass onerr a = case chunkconfig of if null chunks then onerr chunkcount else a chunks + _ -> a [keyurl] where keyurl = davLocation baseurl k ++ keyFile k - chunkconfig = getChunkConfig $ config r davAction :: Remote -> a -> ((DavUrl, DavUser, DavPass) -> Annex a) -> Annex a davAction r unconfigured action = do diff --git a/debian/changelog b/debian/changelog index 8da5b87bb3..6d13c96372 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,7 +1,7 @@ git-annex (5.20140718) UNRELEASED; urgency=medium * New chunk= option to chunk files stored in special remotes. - Supported by: directory, S3, gcrypt, rsync, and all external + Supported by: directory, S3, webdav, gcrypt, rsync, and all external and hook special remotes. * Partially transferred files are automatically resumed when using chunked remotes! diff --git a/doc/special_remotes/webdav.mdwn b/doc/special_remotes/webdav.mdwn index 64eed5d0b8..6b5f5b1222 100644 --- a/doc/special_remotes/webdav.mdwn +++ b/doc/special_remotes/webdav.mdwn @@ -37,4 +37,4 @@ the webdav remote. Setup example: - # WEBDAV_USERNAME=joey@kitenet.net WEBDAV_PASSWORD=xxxxxxx git annex initremote box.com type=webdav url=https://dav.box.com/dav/git-annex chunksize=75mb keyid=joey@kitenet.net + # WEBDAV_USERNAME=joey@kitenet.net WEBDAV_PASSWORD=xxxxxxx git annex initremote box.com type=webdav url=https://dav.box.com/dav/git-annex chunk=10mb keyid=joey@kitenet.net From 0b1b85d9ea5fc4665ceea2674e2bd07e794f48b8 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 7 Aug 2014 15:45:56 -0400 Subject: [PATCH 29/44] use DAV monad This speeds up the webdav special remote somewhat, since it often now groups actions together in a single http connection when eg, storing a file. Legacy chunks are still supported, but have not been sped up. This depends on a as-yet unreleased version of DAV. This commit was sponsored by Thomas Hochstein. --- Remote/WebDAV.hs | 229 ++++++++++++++++++----------------- Remote/WebDAV/DavLocation.hs | 59 +++++++++ Remote/WebDAV/DavUrl.hs | 44 ------- debian/changelog | 4 +- debian/control | 2 +- git-annex.cabal | 4 +- 6 files changed, 180 insertions(+), 162 deletions(-) create mode 100644 Remote/WebDAV/DavLocation.hs delete mode 100644 Remote/WebDAV/DavUrl.hs diff --git a/Remote/WebDAV.hs b/Remote/WebDAV.hs index 6679242e5c..a77deffc53 100644 --- a/Remote/WebDAV.hs +++ b/Remote/WebDAV.hs @@ -11,14 +11,12 @@ module Remote.WebDAV (remote, davCreds, configUrl) where import Network.Protocol.HTTP.DAV import qualified Data.Map as M +import qualified Data.ByteString.Lazy as L import qualified Data.ByteString.UTF8 as B8 import qualified Data.ByteString.Lazy.UTF8 as L8 -import qualified Data.ByteString.Lazy as L -import qualified Control.Exception as E import qualified Control.Exception.Lifted as EL import Network.HTTP.Client (HttpException(..)) import Network.HTTP.Types -import System.Log.Logger (debugM) import System.IO.Error import Common.Annex @@ -30,8 +28,9 @@ import Remote.Helper.Special import qualified Remote.Helper.Chunked.Legacy as Legacy import Creds import Utility.Metered +import Utility.Url (URLString) import Annex.UUID -import Remote.WebDAV.DavUrl +import Remote.WebDAV.DavLocation type DavUser = B8.ByteString type DavPass = B8.ByteString @@ -95,26 +94,34 @@ prepareStore r chunkconfig = simplyPrepare $ fileStorer $ \k f p -> withMeteredFile f p $ storeHelper chunkconfig k baseurl user pass -storeHelper :: ChunkConfig -> Key -> DavUrl -> DavUser -> DavPass -> L.ByteString -> IO Bool +storeHelper :: ChunkConfig -> Key -> URLString -> DavUser -> DavPass -> L.ByteString -> IO Bool storeHelper chunkconfig k baseurl user pass b = do - mkdirRecursiveDAV tmpurl user pass case chunkconfig of LegacyChunks chunksize -> do - let storer urls = Legacy.storeChunked chunksize urls storehttp b - let recorder url s = storehttp url (L8.fromString s) - Legacy.storeChunks k tmpurl keyurl storer recorder finalizer - _ -> do - storehttp tmpurl b - finalizer tmpurl keyurl + let storehttp l b' = do + void $ goDAV baseurl user pass $ do + maybe noop (void . mkColRecursive) (locationParent l) + inLocation l $ putContentM (contentType, b') + let storer locs = Legacy.storeChunked chunksize locs storehttp b + let recorder l s = storehttp l (L8.fromString s) + let finalizer tmp' dest' = goDAV baseurl user pass $ + finalizeStore baseurl tmp' (fromJust $ locationParent dest') + Legacy.storeChunks k tmp dest storer recorder finalizer + _ -> goDAV baseurl user pass $ do + void $ mkColRecursive tmpDir + inLocation tmp $ + putContentM (contentType, b) + finalizeStore baseurl tmp dest return True where - tmpurl = tmpLocation baseurl k - keyurl = davLocation baseurl k - finalizer srcurl desturl = do - void $ tryNonAsync (deleteDAV desturl user pass) - mkdirRecursiveDAV (urlParent desturl) user pass - moveDAV srcurl desturl user pass - storehttp url = putDAV url user pass + tmp = keyTmpLocation k + dest = keyLocation k ++ keyFile k + +finalizeStore :: URLString -> DavLocation -> DavLocation -> DAVT IO () +finalizeStore baseurl tmp dest = do + inLocation dest $ void $ safely $ delContentM + maybe noop (void . mkColRecursive) (locationParent dest) + moveDAV baseurl tmp dest retrieveCheap :: Key -> FilePath -> Annex Bool retrieveCheap _ _ = return False @@ -122,9 +129,11 @@ retrieveCheap _ _ = return False prepareRetrieve :: Remote -> ChunkConfig -> Preparer Retriever prepareRetrieve r chunkconfig = simplyPrepare $ fileRetriever $ \d k p -> davAction r onerr $ \(baseurl, user, pass) -> liftIO $ - withStoredFiles chunkconfig k baseurl user pass onerr $ \urls -> do - Legacy.meteredWriteFileChunks p d urls $ \url -> do - mb <- getDAV url user pass + withStoredFiles chunkconfig k baseurl user pass onerr $ \locs -> do + Legacy.meteredWriteFileChunks p d locs $ \l -> do + mb <- goDAV baseurl user pass $ safely $ + inLocation l $ + snd <$> getContentM case mb of Nothing -> onerr Just b -> return b @@ -136,8 +145,9 @@ prepareRemove r = simplyPrepare $ \k -> davAction r False $ \(baseurl, user, pass) -> liftIO $ do -- Delete the key's whole directory, including any -- legacy chunked files, etc, in a single action. - let url = davLocation baseurl k - isJust . eitherToMaybe <$> tryNonAsync (deleteDAV url user pass) + ret <- goDAV baseurl user pass $ safely $ + inLocation (keyLocation k) delContentM + return (isJust ret) prepareCheckPresent :: Remote -> ChunkConfig -> Preparer CheckPresent prepareCheckPresent r chunkconfig = simplyPrepare $ checkKey r chunkconfig @@ -152,46 +162,49 @@ checkKey r chunkconfig k = davAction r noconn (either error id <$$> go) liftIO $ withStoredFiles chunkconfig k baseurl user pass onerr check where check [] = return $ Right True - check (url:urls) = do - v <- existsDAV url user pass + check (l:ls) = do + v <- goDAV baseurl user pass $ existsDAV l if v == Right True - then check urls + then check ls else return v {- Failed to read the chunkcount file; see if it's missing, - or if there's a problem accessing it, - - or perhaps this was an intermittent error. -} - onerr url = do - v <- existsDAV url user pass + - or perhaps this was an intermittent error. -} + onerr f = do + v <- goDAV baseurl user pass $ existsDAV f return $ if v == Right True - then Left $ "failed to read " ++ url + then Left $ "failed to read " ++ f else v withStoredFiles :: ChunkConfig -> Key - -> DavUrl + -> URLString -> DavUser -> DavPass - -> (DavUrl -> IO a) - -> ([DavUrl] -> IO a) + -> (DavLocation -> IO a) + -> ([DavLocation] -> IO a) -> IO a withStoredFiles chunkconfig k baseurl user pass onerr a = case chunkconfig of LegacyChunks _ -> do - let chunkcount = keyurl ++ Legacy.chunkCount - v <- getDAV chunkcount user pass + let chunkcount = keyloc ++ Legacy.chunkCount + v <- goDAV baseurl user pass $ safely $ + inLocation chunkcount $ + snd <$> getContentM case v of - Just s -> a $ Legacy.listChunks keyurl $ L8.toString s + Just s -> a $ Legacy.listChunks keyloc $ L8.toString s Nothing -> do - chunks <- Legacy.probeChunks keyurl $ \u -> (== Right True) <$> existsDAV u user pass + chunks <- Legacy.probeChunks keyloc $ \f -> + (== Right True) <$> goDAV baseurl user pass (existsDAV f) if null chunks then onerr chunkcount else a chunks - _ -> a [keyurl] + _ -> a [keyloc] where - keyurl = davLocation baseurl k ++ keyFile k + keyloc = keyLocation k ++ keyFile k -davAction :: Remote -> a -> ((DavUrl, DavUser, DavPass) -> Annex a) -> Annex a +davAction :: Remote -> a -> ((DavLocation, DavUser, DavPass) -> Annex a) -> Annex a davAction r unconfigured action = do mcreds <- getCreds (config r) (uuid r) case (mcreds, configUrl r) of @@ -199,7 +212,7 @@ davAction r unconfigured action = do action (url, toDavUser user, toDavPass pass) _ -> return unconfigured -configUrl :: Remote -> Maybe DavUrl +configUrl :: Remote -> Maybe URLString configUrl r = fixup <$> M.lookup "url" (config r) where -- box.com DAV url changed @@ -211,47 +224,63 @@ toDavUser = B8.fromString toDavPass :: String -> DavPass toDavPass = B8.fromString -{- Creates a directory in WebDAV, if not already present; also creating - - any missing parent directories. -} -mkdirRecursiveDAV :: DavUrl -> DavUser -> DavPass -> IO () -mkdirRecursiveDAV url user pass = go url - where - make u = mkdirDAV u user pass - - go u = do - r <- E.try (make u) :: IO (Either E.SomeException Bool) - case r of - {- Parent directory is missing. Recurse to create - - it, and try once more to create the directory. -} - Right False -> do - go (urlParent u) - void $ make u - {- Directory created successfully -} - Right True -> return () - {- Directory already exists, or some other error - - occurred. In the latter case, whatever wanted - - to use this directory will fail. -} - Left _ -> return () - {- Test if a WebDAV store is usable, by writing to a test file, and then - - deleting the file. Exits with an IO error if not. -} -testDav :: String -> Maybe CredPair -> Annex () -testDav baseurl (Just (u, p)) = do + - deleting the file. + - + - Also ensures that the path of the url exists, trying to create it if not. + - + - Throws an error if store is not usable. + -} +testDav :: URLString -> Maybe CredPair -> Annex () +testDav url (Just (u, p)) = do showSideAction "testing WebDAV server" - test "make directory" $ mkdirRecursiveDAV baseurl user pass - test "write file" $ putDAV testurl user pass L.empty - test "delete file" $ deleteDAV testurl user pass + test $ liftIO $ goDAV url user pass $ do + makeParentDirs + inLocation tmpDir $ void mkCol + inLocation (tmpLocation "git-annex-test") $ do + putContentM (Nothing, L.empty) + delContentM where - test desc a = liftIO $ - either (\e -> throwIO $ "WebDAV failed to " ++ desc ++ ": " ++ show e) + test a = liftIO $ + either (\e -> throwIO $ "WebDAV test failed: " ++ show e) (const noop) =<< tryNonAsync a user = toDavUser u pass = toDavPass p - testurl = davUrl baseurl "git-annex-test" testDav _ Nothing = error "Need to configure webdav username and password." +{- Tries to make all the parent directories in the WebDAV urls's path, + - right down to the root. + - + - Ignores any failures, which can occur for reasons including the WebDAV + - server only serving up WebDAV in a subdirectory. -} +makeParentDirs :: DAVT IO () +makeParentDirs = go + where + go = do + l <- getDAVLocation + case locationParent l of + Nothing -> noop + Just p -> void $ safely $ inDAVLocation (const p) go + void $ safely mkCol + +{- Checks if the directory exists. If not, tries to create its + - parent directories, all the way down to the root, and finally creates + - it. -} +mkColRecursive :: DavLocation -> DAVT IO Bool +mkColRecursive d = go =<< existsDAV d + where + go (Right True) = return True + go _ = ifM (inLocation d mkCol) + ( return True + , do + case locationParent d of + Nothing -> makeParentDirs + Just parent -> void (mkColRecursive parent) + inLocation d mkCol + ) + getCreds :: RemoteConfig -> UUID -> Annex (Maybe CredPair) getCreds c u = getRemoteCredPairFor "webdav" c (davCreds u) @@ -269,54 +298,21 @@ contentType = Just $ B8.fromString "application/octet-stream" throwIO :: String -> IO a throwIO msg = ioError $ mkIOError userErrorType msg Nothing Nothing -debugDAV :: DavUrl -> String -> IO () -debugDAV msg url = debugM "DAV" $ msg ++ " " ++ url - -{--------------------------------------------------------------------- - - Low-level DAV operations. - ---------------------------------------------------------------------} - -putDAV :: DavUrl -> DavUser -> DavPass -> L.ByteString -> IO () -putDAV url user pass b = do - debugDAV "PUT" url - goDAV url user pass $ putContentM (contentType, b) - -getDAV :: DavUrl -> DavUser -> DavPass -> IO (Maybe L.ByteString) -getDAV url user pass = do - debugDAV "GET" url - eitherToMaybe <$> tryNonAsync go +moveDAV :: URLString -> DavLocation -> DavLocation -> DAVT IO () +moveDAV baseurl src dest = inLocation src $ moveContentM newurl where - go = goDAV url user pass $ snd <$> getContentM + newurl = B8.fromString (locationUrl baseurl dest) -deleteDAV :: DavUrl -> DavUser -> DavPass -> IO () -deleteDAV url user pass = do - debugDAV "DELETE" url - goDAV url user pass delContentM - -moveDAV :: DavUrl -> DavUrl -> DavUser -> DavPass -> IO () -moveDAV url newurl user pass = do - debugDAV ("MOVE to " ++ newurl ++ " from ") url - goDAV url user pass $ moveContentM newurl' +existsDAV :: DavLocation -> DAVT IO (Either String Bool) +existsDAV l = inLocation l check `EL.catch` (\(e :: EL.SomeException) -> return (Left $ show e)) where - newurl' = B8.fromString newurl - -mkdirDAV :: DavUrl -> DavUser -> DavPass -> IO Bool -mkdirDAV url user pass = do - debugDAV "MKDIR" url - goDAV url user pass mkCol - -existsDAV :: DavUrl -> DavUser -> DavPass -> IO (Either String Bool) -existsDAV url user pass = do - debugDAV "EXISTS" url - either (Left . show) id <$> tryNonAsync check - where - ispresent = return . Right - check = goDAV url user pass $ do + check = do setDepth Nothing EL.catchJust (matchStatusCodeException notFound404) (getPropsM >> ispresent True) (const $ ispresent False) + ispresent = return . Right matchStatusCodeException :: Status -> HttpException -> Maybe () matchStatusCodeException want (StatusCodeException s _ _) @@ -324,7 +320,12 @@ matchStatusCodeException want (StatusCodeException s _ _) | otherwise = Nothing matchStatusCodeException _ _ = Nothing -goDAV :: DavUrl -> DavUser -> DavPass -> DAVT IO a -> IO a +-- Ignores any exceptions when performing a DAV action. +safely :: DAVT IO a -> DAVT IO (Maybe a) +safely a = (Just <$> a) + `EL.catch` (\(_ :: EL.SomeException) -> return Nothing) + +goDAV :: URLString -> DavUser -> DavPass -> DAVT IO a -> IO a goDAV url user pass a = choke $ evalDAVT url $ do setResponseTimeout Nothing -- disable default (5 second!) timeout setCreds user pass diff --git a/Remote/WebDAV/DavLocation.hs b/Remote/WebDAV/DavLocation.hs new file mode 100644 index 0000000000..3b52f3a64c --- /dev/null +++ b/Remote/WebDAV/DavLocation.hs @@ -0,0 +1,59 @@ +{- WebDAV locations. + - + - Copyright 2014 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +{-# LANGUAGE CPP #-} +{-# LANGUAGE FlexibleContexts #-} + +module Remote.WebDAV.DavLocation where + +import Types +import Locations +import Utility.Url (URLString) + +import System.FilePath.Posix -- for manipulating url paths +import Network.Protocol.HTTP.DAV (inDAVLocation, DAVT) +import Control.Monad.IO.Class (MonadIO) +#ifdef mingw32_HOST_OS +import Data.String.Utils +#endif + +-- Relative to the top of the DAV url. +type DavLocation = String + +{- Runs action in subdirectory, relative to the current location. -} +inLocation :: (MonadIO m) => DavLocation -> DAVT m a -> DAVT m a +inLocation d = inDAVLocation ( d) + +{- The directory where files(s) for a key are stored. -} +keyLocation :: Key -> DavLocation +keyLocation k = addTrailingPathSeparator $ hashdir keyFile k + where +#ifndef mingw32_HOST_OS + hashdir = hashDirLower k +#else + hashdir = replace "\\" "/" (hashDirLower k) +#endif + +{- Where we store temporary data for a key as it's being uploaded. -} +keyTmpLocation :: Key -> DavLocation +keyTmpLocation = addTrailingPathSeparator . tmpLocation . keyFile + +tmpLocation :: FilePath -> DavLocation +tmpLocation f = tmpDir f + +tmpDir :: DavLocation +tmpDir = "tmp" + +locationParent :: String -> Maybe String +locationParent loc + | loc `elem` tops = Nothing + | otherwise = Just (takeDirectory loc) + where + tops = ["/", "", "."] + +locationUrl :: URLString -> DavLocation -> URLString +locationUrl baseurl loc = baseurl loc diff --git a/Remote/WebDAV/DavUrl.hs b/Remote/WebDAV/DavUrl.hs deleted file mode 100644 index 4862c4f37d..0000000000 --- a/Remote/WebDAV/DavUrl.hs +++ /dev/null @@ -1,44 +0,0 @@ -{- WebDAV urls. - - - - Copyright 2014 Joey Hess - - - - Licensed under the GNU GPL version 3 or higher. - -} - -{-# LANGUAGE CPP #-} - -module Remote.WebDAV.DavUrl where - -import Types -import Locations - -import Network.URI (normalizePathSegments) -import System.FilePath.Posix -#ifdef mingw32_HOST_OS -import Data.String.Utils -#endif - -type DavUrl = String - -{- The directory where files(s) for a key are stored. -} -davLocation :: DavUrl -> Key -> DavUrl -davLocation baseurl k = addTrailingPathSeparator $ - davUrl baseurl $ hashdir keyFile k - where -#ifndef mingw32_HOST_OS - hashdir = hashDirLower k -#else - hashdir = replace "\\" "/" (hashDirLower k) -#endif - -{- Where we store temporary data for a key as it's being uploaded. -} -tmpLocation :: DavUrl -> Key -> DavUrl -tmpLocation baseurl k = addTrailingPathSeparator $ - davUrl baseurl $ "tmp" keyFile k - -davUrl :: DavUrl -> FilePath -> DavUrl -davUrl baseurl file = baseurl file - -urlParent :: DavUrl -> DavUrl -urlParent url = dropTrailingPathSeparator $ - normalizePathSegments (dropTrailingPathSeparator url ++ "/..") diff --git a/debian/changelog b/debian/changelog index 6d13c96372..3b54458c39 100644 --- a/debian/changelog +++ b/debian/changelog @@ -16,7 +16,9 @@ git-annex (5.20140718) UNRELEASED; urgency=medium were incompletely repaired before. * Fix cost calculation for non-encrypted remotes. * Display exception message when a transfer fails due to an exception. - * WebDAV: Dropped support for DAV before 0.6.1. + * WebDAV: Sped up by avoiding making multiple http connections + when storing a file. + * WebDAV: Dropped support for DAV before 0.8. * testremote: New command to test uploads/downloads to a remote. * Dropping an object from a bup special remote now deletes the git branch for the object, although of course the object's content cannot be deleted diff --git a/debian/control b/debian/control index e37f7d05ee..66d340e3cf 100644 --- a/debian/control +++ b/debian/control @@ -14,7 +14,7 @@ Build-Depends: libghc-dataenc-dev, libghc-utf8-string-dev, libghc-hs3-dev (>= 0.5.6), - libghc-dav-dev (>= 0.6.1) [amd64 i386 kfreebsd-amd64 kfreebsd-i386 powerpc], + libghc-dav-dev (>= 0.8) [amd64 i386 kfreebsd-amd64 kfreebsd-i386 powerpc], libghc-quickcheck2-dev, libghc-monad-control-dev (>= 0.3), libghc-exceptions-dev, diff --git a/git-annex.cabal b/git-annex.cabal index 2a39489d40..c3a1c161db 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -142,8 +142,8 @@ Executable git-annex CPP-Options: -DWITH_S3 if flag(WebDAV) - Build-Depends: DAV (> 0.6), - http-client, http-conduit, http-types, lifted-base + Build-Depends: DAV (> 0.8), + http-client, http-conduit, http-types, lifted-base, transformers CPP-Options: -DWITH_WEBDAV if flag(Assistant) && ! os(solaris) From fc4b3cdcce66363305b806db1a93fc6824b87905 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 7 Aug 2014 18:32:07 -0400 Subject: [PATCH 30/44] webdav: reuse http connection when operating on the chunks of a file For both new and legacy chunks. Massive speed up! This commit was sponsored by Dominik Wagenknecht. --- Remote/WebDAV.hs | 187 +++++++++++++++++++++++++---------------------- git-annex.cabal | 2 +- 2 files changed, 101 insertions(+), 88 deletions(-) diff --git a/Remote/WebDAV.hs b/Remote/WebDAV.hs index a77deffc53..d81b765106 100644 --- a/Remote/WebDAV.hs +++ b/Remote/WebDAV.hs @@ -30,11 +30,9 @@ import Creds import Utility.Metered import Utility.Url (URLString) import Annex.UUID +import Annex.Exception import Remote.WebDAV.DavLocation -type DavUser = B8.ByteString -type DavPass = B8.ByteString - remote :: RemoteType remote = RemoteType { typename = "webdav", @@ -47,10 +45,10 @@ gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remot gen r u c gc = new <$> remoteCost gc expensiveRemoteCost where new cst = Just $ specialRemote c - (prepareStore this chunkconfig) - (prepareRetrieve this chunkconfig) - (prepareRemove this) - (prepareCheckPresent this chunkconfig) + (prepareDAV this $ store chunkconfig) + (prepareDAV this $ retrieve chunkconfig) + (prepareDAV this $ remove) + (prepareDAV this $ checkKey this chunkconfig) this where this = Remote { @@ -88,30 +86,34 @@ webdavSetup mu mcreds c = do c'' <- setRemoteCredPair c' (davCreds u) creds return (c'', u) -prepareStore :: Remote -> ChunkConfig -> Preparer Storer -prepareStore r chunkconfig = simplyPrepare $ fileStorer $ \k f p -> - davAction r False $ \(baseurl, user, pass) -> liftIO $ - withMeteredFile f p $ - storeHelper chunkconfig k baseurl user pass +-- Opens a http connection to the DAV server, which will be reused +-- each time the helper is called. +prepareDAV :: Remote -> (Maybe DavHandle -> helper) -> Preparer helper +prepareDAV = resourcePrepare . const . withDAVHandle -storeHelper :: ChunkConfig -> Key -> URLString -> DavUser -> DavPass -> L.ByteString -> IO Bool -storeHelper chunkconfig k baseurl user pass b = do +store :: ChunkConfig -> Maybe DavHandle -> Storer +store _ Nothing = byteStorer $ \_k _b _p -> return False +store chunkconfig (Just dav) = fileStorer $ \k f p -> liftIO $ + withMeteredFile f p $ storeHelper chunkconfig k dav + +storeHelper :: ChunkConfig -> Key -> DavHandle -> L.ByteString -> IO Bool +storeHelper chunkconfig k dav b = do case chunkconfig of LegacyChunks chunksize -> do let storehttp l b' = do - void $ goDAV baseurl user pass $ do + void $ goDAV dav $ do maybe noop (void . mkColRecursive) (locationParent l) inLocation l $ putContentM (contentType, b') let storer locs = Legacy.storeChunked chunksize locs storehttp b let recorder l s = storehttp l (L8.fromString s) - let finalizer tmp' dest' = goDAV baseurl user pass $ - finalizeStore baseurl tmp' (fromJust $ locationParent dest') + let finalizer tmp' dest' = goDAV dav $ + finalizeStore (baseURL dav) tmp' (fromJust $ locationParent dest') Legacy.storeChunks k tmp dest storer recorder finalizer - _ -> goDAV baseurl user pass $ do + _ -> goDAV dav $ do void $ mkColRecursive tmpDir inLocation tmp $ putContentM (contentType, b) - finalizeStore baseurl tmp dest + finalizeStore (baseURL dav) tmp dest return True where tmp = keyTmpLocation k @@ -126,77 +128,71 @@ finalizeStore baseurl tmp dest = do retrieveCheap :: Key -> FilePath -> Annex Bool retrieveCheap _ _ = return False -prepareRetrieve :: Remote -> ChunkConfig -> Preparer Retriever -prepareRetrieve r chunkconfig = simplyPrepare $ fileRetriever $ \d k p -> - davAction r onerr $ \(baseurl, user, pass) -> liftIO $ - withStoredFiles chunkconfig k baseurl user pass onerr $ \locs -> do - Legacy.meteredWriteFileChunks p d locs $ \l -> do - mb <- goDAV baseurl user pass $ safely $ - inLocation l $ - snd <$> getContentM - case mb of - Nothing -> onerr - Just b -> return b +retrieve :: ChunkConfig -> Maybe DavHandle -> Retriever +retrieve _ Nothing = error "unable to connect" +retrieve chunkconfig (Just dav) = fileRetriever $ \d k p -> liftIO $ + withStoredFiles chunkconfig k dav onerr $ \locs -> do + Legacy.meteredWriteFileChunks p d locs $ \l -> do + mb <- goDAV dav $ safely $ + inLocation l $ + snd <$> getContentM + case mb of + Nothing -> onerr + Just b -> return b where onerr = error "download failed" -prepareRemove :: Remote -> Preparer Remover -prepareRemove r = simplyPrepare $ \k -> - davAction r False $ \(baseurl, user, pass) -> liftIO $ do - -- Delete the key's whole directory, including any - -- legacy chunked files, etc, in a single action. - ret <- goDAV baseurl user pass $ safely $ - inLocation (keyLocation k) delContentM - return (isJust ret) +remove :: Maybe DavHandle -> Remover +remove Nothing _ = return False +remove (Just dav) k = liftIO $ do + -- Delete the key's whole directory, including any + -- legacy chunked files, etc, in a single action. + ret <- goDAV dav $ safely $ + inLocation (keyLocation k) delContentM + return (isJust ret) -prepareCheckPresent :: Remote -> ChunkConfig -> Preparer CheckPresent -prepareCheckPresent r chunkconfig = simplyPrepare $ checkKey r chunkconfig - -checkKey :: Remote -> ChunkConfig -> Key -> Annex Bool -checkKey r chunkconfig k = davAction r noconn (either error id <$$> go) +checkKey :: Remote -> ChunkConfig -> Maybe DavHandle -> CheckPresent +checkKey r _ Nothing _ = error $ name r ++ " not configured" +checkKey r chunkconfig (Just dav) k = either error id <$> go where - noconn = error $ name r ++ " not configured" - - go (baseurl, user, pass) = do + go = do showAction $ "checking " ++ name r - liftIO $ withStoredFiles chunkconfig k baseurl user pass onerr check - where - check [] = return $ Right True - check (l:ls) = do - v <- goDAV baseurl user pass $ existsDAV l - if v == Right True - then check ls - else return v + liftIO $ withStoredFiles chunkconfig k dav onerr check - {- Failed to read the chunkcount file; see if it's missing, - - or if there's a problem accessing it, - - or perhaps this was an intermittent error. -} - onerr f = do - v <- goDAV baseurl user pass $ existsDAV f - return $ if v == Right True - then Left $ "failed to read " ++ f - else v + check [] = return $ Right True + check (l:ls) = do + v <- goDAV dav $ existsDAV l + if v == Right True + then check ls + else return v + + {- Failed to read the chunkcount file; see if it's missing, + - or if there's a problem accessing it, + - or perhaps this was an intermittent error. -} + onerr f = do + v <- goDAV dav $ existsDAV f + return $ if v == Right True + then Left $ "failed to read " ++ f + else v withStoredFiles :: ChunkConfig -> Key - -> URLString - -> DavUser - -> DavPass + -> DavHandle -> (DavLocation -> IO a) -> ([DavLocation] -> IO a) -> IO a -withStoredFiles chunkconfig k baseurl user pass onerr a = case chunkconfig of +withStoredFiles chunkconfig k dav onerr a = case chunkconfig of LegacyChunks _ -> do let chunkcount = keyloc ++ Legacy.chunkCount - v <- goDAV baseurl user pass $ safely $ + v <- goDAV dav $ safely $ inLocation chunkcount $ snd <$> getContentM case v of Just s -> a $ Legacy.listChunks keyloc $ L8.toString s Nothing -> do chunks <- Legacy.probeChunks keyloc $ \f -> - (== Right True) <$> goDAV baseurl user pass (existsDAV f) + (== Right True) <$> goDAV dav (existsDAV f) if null chunks then onerr chunkcount else a chunks @@ -204,20 +200,19 @@ withStoredFiles chunkconfig k baseurl user pass onerr a = case chunkconfig of where keyloc = keyLocation k ++ keyFile k -davAction :: Remote -> a -> ((DavLocation, DavUser, DavPass) -> Annex a) -> Annex a -davAction r unconfigured action = do - mcreds <- getCreds (config r) (uuid r) - case (mcreds, configUrl r) of - (Just (user, pass), Just url) -> - action (url, toDavUser user, toDavPass pass) - _ -> return unconfigured - configUrl :: Remote -> Maybe URLString configUrl r = fixup <$> M.lookup "url" (config r) where -- box.com DAV url changed fixup = replace "https://www.box.com/dav/" "https://dav.box.com/dav/" +type DavUser = B8.ByteString +type DavPass = B8.ByteString + +baseURL :: DavHandle -> URLString +baseURL (DavHandle _ _ _ u) = u + + toDavUser :: String -> DavUser toDavUser = B8.fromString @@ -234,7 +229,8 @@ toDavPass = B8.fromString testDav :: URLString -> Maybe CredPair -> Annex () testDav url (Just (u, p)) = do showSideAction "testing WebDAV server" - test $ liftIO $ goDAV url user pass $ do + test $ liftIO $ evalDAVT url $ do + prepDAV user pass makeParentDirs inLocation tmpDir $ void mkCol inLocation (tmpLocation "git-annex-test") $ do @@ -325,15 +321,32 @@ safely :: DAVT IO a -> DAVT IO (Maybe a) safely a = (Just <$> a) `EL.catch` (\(_ :: EL.SomeException) -> return Nothing) -goDAV :: URLString -> DavUser -> DavPass -> DAVT IO a -> IO a -goDAV url user pass a = choke $ evalDAVT url $ do - setResponseTimeout Nothing -- disable default (5 second!) timeout - setCreds user pass +choke :: IO (Either String a) -> IO a +choke f = do + x <- f + case x of + Left e -> error e + Right r -> return r + +data DavHandle = DavHandle DAVContext DavUser DavPass URLString + +withDAVHandle :: Remote -> (Maybe DavHandle -> Annex a) -> Annex a +withDAVHandle r a = do + mcreds <- getCreds (config r) (uuid r) + case (mcreds, configUrl r) of + (Just (user, pass), Just baseurl) -> + bracketIO (mkDAVContext baseurl) closeDAVContext $ \ctx -> + a (Just (DavHandle ctx (toDavUser user) (toDavPass pass) baseurl)) + _ -> a Nothing + +goDAV :: DavHandle -> DAVT IO a -> IO a +goDAV (DavHandle ctx user pass _) a = choke $ run $ do + prepDAV user pass a where - choke :: IO (Either String a) -> IO a - choke f = do - x <- f - case x of - Left e -> error e - Right r -> return r + run = fst <$$> runDAVContext ctx + +prepDAV :: DavUser -> DavPass -> DAVT IO () +prepDAV user pass = do + setResponseTimeout Nothing -- disable default (5 second!) timeout + setCreds user pass diff --git a/git-annex.cabal b/git-annex.cabal index c3a1c161db..8f36bfe486 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -142,7 +142,7 @@ Executable git-annex CPP-Options: -DWITH_S3 if flag(WebDAV) - Build-Depends: DAV (> 0.8), + Build-Depends: DAV (>= 0.8), http-client, http-conduit, http-types, lifted-base, transformers CPP-Options: -DWITH_WEBDAV From 2dd8dab314c95233a859d7aab5aa1c8ba47ef6f1 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 7 Aug 2014 19:32:23 -0400 Subject: [PATCH 31/44] WebDAV: Avoid buffering whole file in memory when uploading. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The httpStorer will later also be used by S3. This commit was sponsored by Torbjørn Thorsen. --- Remote/Helper/Http.hs | 39 +++++++++++++++++++++++++++++++++++++++ Remote/WebDAV.hs | 43 ++++++++++++++++++++++--------------------- debian/changelog | 1 + 3 files changed, 62 insertions(+), 21 deletions(-) create mode 100644 Remote/Helper/Http.hs diff --git a/Remote/Helper/Http.hs b/Remote/Helper/Http.hs new file mode 100644 index 0000000000..945e5cd991 --- /dev/null +++ b/Remote/Helper/Http.hs @@ -0,0 +1,39 @@ +{- helpers for remotes using http + - + - Copyright 2014 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Remote.Helper.Http where + +import Common.Annex +import Types.StoreRetrieve +import Utility.Metered +import Remote.Helper.Special +import Network.HTTP.Client (RequestBody(..)) + +import qualified Data.ByteString.Lazy as L +import qualified Data.ByteString as S +import Control.Concurrent + +-- A storer that expects to be provided with a http RequestBody containing +-- the content to store. +-- +-- Implemented as a fileStorer, so that the content can be streamed +-- from the file in constant space. +httpStorer :: (Key -> RequestBody -> Annex Bool) -> Storer +httpStorer a = fileStorer $ \k f m -> do + size <- liftIO $ (fromIntegral . fileSize <$> getFileStatus f :: IO Integer) + let streamer sink = withMeteredFile f m $ \b -> do + mvar <- newMVar $ L.toChunks b + let getnextchunk = modifyMVar mvar $ pure . pop + sink getnextchunk + let body = RequestBodyStream (fromInteger size) streamer + a k body + where + pop [] = ([], S.empty) + pop (c:cs) = (cs, c) + +--httpRetriever :: (Key -> Annex Response) -> Retriever +--httpRetriever a = byteRetriever $ \k sink diff --git a/Remote/WebDAV.hs b/Remote/WebDAV.hs index d81b765106..b70001ddba 100644 --- a/Remote/WebDAV.hs +++ b/Remote/WebDAV.hs @@ -25,6 +25,7 @@ import qualified Git import Config import Config.Cost import Remote.Helper.Special +import Remote.Helper.Http import qualified Remote.Helper.Chunked.Legacy as Legacy import Creds import Utility.Metered @@ -93,29 +94,29 @@ prepareDAV = resourcePrepare . const . withDAVHandle store :: ChunkConfig -> Maybe DavHandle -> Storer store _ Nothing = byteStorer $ \_k _b _p -> return False -store chunkconfig (Just dav) = fileStorer $ \k f p -> liftIO $ - withMeteredFile f p $ storeHelper chunkconfig k dav +store (LegacyChunks chunksize) (Just dav) = fileStorer $ \k f p -> liftIO $ + withMeteredFile f p $ storeLegacyChunked chunksize k dav +store _ (Just dav) = httpStorer $ \k reqbody -> liftIO $ goDAV dav $ do + let tmp = keyTmpLocation k + let dest = keyLocation k ++ keyFile k + void $ mkColRecursive tmpDir + inLocation tmp $ + putContentM' (contentType, reqbody) + finalizeStore (baseURL dav) tmp dest + return True -storeHelper :: ChunkConfig -> Key -> DavHandle -> L.ByteString -> IO Bool -storeHelper chunkconfig k dav b = do - case chunkconfig of - LegacyChunks chunksize -> do - let storehttp l b' = do - void $ goDAV dav $ do - maybe noop (void . mkColRecursive) (locationParent l) - inLocation l $ putContentM (contentType, b') - let storer locs = Legacy.storeChunked chunksize locs storehttp b - let recorder l s = storehttp l (L8.fromString s) - let finalizer tmp' dest' = goDAV dav $ - finalizeStore (baseURL dav) tmp' (fromJust $ locationParent dest') - Legacy.storeChunks k tmp dest storer recorder finalizer - _ -> goDAV dav $ do - void $ mkColRecursive tmpDir - inLocation tmp $ - putContentM (contentType, b) - finalizeStore (baseURL dav) tmp dest - return True +storeLegacyChunked :: ChunkSize -> Key -> DavHandle -> L.ByteString -> IO Bool +storeLegacyChunked chunksize k dav b = + Legacy.storeChunks k tmp dest storer recorder finalizer where + storehttp l b' = void $ goDAV dav $ do + maybe noop (void . mkColRecursive) (locationParent l) + inLocation l $ putContentM (contentType, b') + storer locs = Legacy.storeChunked chunksize locs storehttp b + recorder l s = storehttp l (L8.fromString s) + finalizer tmp' dest' = goDAV dav $ + finalizeStore (baseURL dav) tmp' (fromJust $ locationParent dest') + tmp = keyTmpLocation k dest = keyLocation k ++ keyFile k diff --git a/debian/changelog b/debian/changelog index 3b54458c39..1b981c08d3 100644 --- a/debian/changelog +++ b/debian/changelog @@ -18,6 +18,7 @@ git-annex (5.20140718) UNRELEASED; urgency=medium * Display exception message when a transfer fails due to an exception. * WebDAV: Sped up by avoiding making multiple http connections when storing a file. + * WebDAV: Avoid buffering whole file in memory when uploading. * WebDAV: Dropped support for DAV before 0.8. * testremote: New command to test uploads/downloads to a remote. * Dropping an object from a bup special remote now deletes the git branch From 8e3d62dd5d4d753b2fe7e9b9564224744328e93b Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 7 Aug 2014 21:01:07 -0400 Subject: [PATCH 32/44] generalized using the extensions package --- Utility/Exception.hs | 49 ++++++++++++++++++++++++++------------------ 1 file changed, 29 insertions(+), 20 deletions(-) diff --git a/Utility/Exception.hs b/Utility/Exception.hs index 1fecf65d56..13c9d508a4 100644 --- a/Utility/Exception.hs +++ b/Utility/Exception.hs @@ -1,6 +1,6 @@ {- Simple IO exception handling (and some more) - - - Copyright 2011-2012 Joey Hess + - Copyright 2011-2014 Joey Hess - - License: BSD-2-clause -} @@ -9,51 +9,60 @@ module Utility.Exception where -import Control.Exception -import qualified Control.Exception as E -import Control.Applicative +import Control.Exception (IOException, AsyncException) +import Control.Monad.Catch import Control.Monad import System.IO.Error (isDoesNotExistError) import Utility.Data {- Catches IO errors and returns a Bool -} -catchBoolIO :: IO Bool -> IO Bool +catchBoolIO :: MonadCatch m => m Bool -> m Bool catchBoolIO = catchDefaultIO False {- Catches IO errors and returns a Maybe -} -catchMaybeIO :: IO a -> IO (Maybe a) -catchMaybeIO a = catchDefaultIO Nothing $ Just <$> a +catchMaybeIO :: MonadCatch m => m a -> m (Maybe a) +catchMaybeIO a = do + catchDefaultIO Nothing $ do + v <- a + return (Just v) {- Catches IO errors and returns a default value. -} -catchDefaultIO :: a -> IO a -> IO a +catchDefaultIO :: MonadCatch m => a -> m a -> m a catchDefaultIO def a = catchIO a (const $ return def) {- Catches IO errors and returns the error message. -} -catchMsgIO :: IO a -> IO (Either String a) -catchMsgIO a = either (Left . show) Right <$> tryIO a +catchMsgIO :: MonadCatch m => m a -> m (Either String a) +catchMsgIO a = do + v <- tryIO a + return $ either (Left . show) Right v {- catch specialized for IO errors only -} -catchIO :: IO a -> (IOException -> IO a) -> IO a -catchIO = E.catch +catchIO :: MonadCatch m => m a -> (IOException -> m a) -> m a +catchIO = catch {- try specialized for IO errors only -} -tryIO :: IO a -> IO (Either IOException a) +tryIO :: MonadCatch m => m a -> m (Either IOException a) tryIO = try {- Catches all exceptions except for async exceptions. - This is often better to use than catching them all, so that - ThreadKilled and UserInterrupt get through. -} -catchNonAsync :: IO a -> (SomeException -> IO a) -> IO a +catchNonAsync :: MonadCatch m => m a -> (SomeException -> m a) -> m a catchNonAsync a onerr = a `catches` - [ Handler (\ (e :: AsyncException) -> throw e) + [ Handler (\ (e :: AsyncException) -> throwM e) , Handler (\ (e :: SomeException) -> onerr e) ] -tryNonAsync :: IO a -> IO (Either SomeException a) -tryNonAsync a = (Right <$> a) `catchNonAsync` (return . Left) +tryNonAsync :: MonadCatch m => m a -> m (Either SomeException a) +tryNonAsync a = go `catchNonAsync` (return . Left) + where + go = do + v <- a + return (Right v) {- Catches only DoesNotExist exceptions, and lets all others through. -} -tryWhenExists :: IO a -> IO (Maybe a) -tryWhenExists a = eitherToMaybe <$> - tryJust (guard . isDoesNotExistError) a +tryWhenExists :: MonadCatch m => m a -> m (Maybe a) +tryWhenExists a = do + v <- tryJust (guard . isDoesNotExistError) a + return (eitherToMaybe v) From c784ef4586fc827d128ccf920398de8a52c063f4 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 7 Aug 2014 21:55:44 -0400 Subject: [PATCH 33/44] unify exception handling into Utility.Exception Removed old extensible-exceptions, only needed for very old ghc. Made webdav use Utility.Exception, to work after some changes in DAV's exception handling. Removed Annex.Exception. Mostly this was trivial, but note that tryAnnex is replaced with tryNonAsync and catchAnnex replaced with catchNonAsync. In theory that could be a behavior change, since the former caught all exceptions, and the latter don't catch async exceptions. However, in practice, nothing in the Annex monad uses async exceptions. Grepping for throwTo and killThread only find stuff in the assistant, which does not seem related. Command.Add.undo is changed to accept a SomeException, and things that use it for rollback now catch non-async exceptions, rather than only IOExceptions. --- Annex.hs | 6 ++- Annex/Content.hs | 5 +-- Annex/Direct.hs | 3 +- Annex/Drop.hs | 3 +- Annex/Environment.hs | 3 +- Annex/Exception.hs | 63 ------------------------------ Annex/Index.hs | 3 +- Annex/Journal.hs | 1 - Annex/LockFile.hs | 1 - Annex/Perms.hs | 5 +-- Annex/ReplaceFile.hs | 3 +- Annex/Transfer.hs | 3 +- Annex/View.hs | 6 +-- Assistant/Pairing/Network.hs | 1 - Assistant/Threads/Committer.hs | 3 +- Assistant/Threads/Cronner.hs | 6 +-- Assistant/Threads/SanityChecker.hs | 3 +- Assistant/Threads/Watcher.hs | 4 +- Assistant/Threads/XMPPClient.hs | 18 ++++----- Assistant/XMPP/Client.hs | 7 ++-- Assistant/XMPP/Git.hs | 18 ++++----- CmdLine/Action.hs | 9 ++--- Command/Add.hs | 17 ++++---- Command/Direct.hs | 5 +-- Command/FuzzTest.hs | 3 +- Command/Indirect.hs | 7 +--- Command/Map.hs | 3 +- Command/Move.hs | 10 ++--- Command/PreCommit.hs | 1 - Command/TestRemote.hs | 1 - Command/Vicfg.hs | 4 +- Common.hs | 1 - Crypto.hs | 1 - Git/Config.hs | 1 - Git/UpdateIndex.hs | 2 - Limit.hs | 8 ++-- Logs/Transfer.hs | 3 +- Messages.hs | 2 +- Remote.hs | 10 ++--- Remote/Ddar.hs | 1 - Remote/External.hs | 3 +- Remote/External/Types.hs | 1 - Remote/GCrypt.hs | 2 +- Remote/Git.hs | 4 +- Remote/Helper/Chunked.hs | 15 ++++--- Remote/Helper/Special.hs | 4 +- Remote/WebDAV.hs | 12 +++--- RemoteDaemon/Transport/Ssh.hs | 6 +-- Test.hs | 3 +- Utility/Directory.hs | 3 +- Utility/Exception.hs | 28 +++++++++++-- Utility/FileMode.hs | 1 - Utility/Gpg.hs | 1 - Utility/Matcher.hs | 8 ++-- Utility/Parallel.hs | 1 - Utility/Tmp.hs | 15 ++++--- Utility/Url.hs | 8 ++-- Utility/WebApp.hs | 4 -- debian/control | 1 - git-annex.cabal | 5 +-- 60 files changed, 142 insertions(+), 237 deletions(-) delete mode 100644 Annex/Exception.hs diff --git a/Annex.hs b/Annex.hs index bb271c5e83..b915e852bc 100644 --- a/Annex.hs +++ b/Annex.hs @@ -64,14 +64,16 @@ import Utility.Quvi (QuviVersion) import Utility.InodeCache import "mtl" Control.Monad.Reader -import Control.Monad.Catch import Control.Concurrent import qualified Data.Map as M import qualified Data.Set as S {- git-annex's monad is a ReaderT around an AnnexState stored in a MVar. - - This allows modifying the state in an exception-safe fashion. - The MVar is not exposed outside this module. + - + - Note that when an Annex action fails and the exception is caught, + - ny changes the action has made to the AnnexState are retained, + - due to the use of the MVar to store the state. -} newtype Annex a = Annex { runAnnex :: ReaderT (MVar AnnexState) IO a } deriving ( diff --git a/Annex/Content.hs b/Annex/Content.hs index eb84f2fe9d..b51e15827b 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -56,7 +56,6 @@ import Annex.Perms import Annex.Link import Annex.Content.Direct import Annex.ReplaceFile -import Annex.Exception #ifdef mingw32_HOST_OS import Utility.WinLock @@ -167,7 +166,7 @@ lockContent key a = do contentfile <- calcRepo $ gitAnnexLocation key lockfile <- contentLockFile key maybe noop setuplockfile lockfile - bracketAnnex (liftIO $ lock contentfile lockfile) (unlock lockfile) (const a) + bracket (liftIO $ lock contentfile lockfile) (unlock lockfile) (const a) where alreadylocked = error "content is locked" setuplockfile lockfile = modifyContent lockfile $ @@ -420,7 +419,7 @@ withObjectLoc key indirect direct = ifM isDirect cleanObjectLoc :: Key -> Annex () -> Annex () cleanObjectLoc key cleaner = do file <- calcRepo $ gitAnnexLocation key - void $ tryAnnexIO $ thawContentDir file + void $ tryIO $ thawContentDir file cleaner liftIO $ removeparents file (3 :: Int) where diff --git a/Annex/Direct.hs b/Annex/Direct.hs index e6b941e0f9..3745993698 100644 --- a/Annex/Direct.hs +++ b/Annex/Direct.hs @@ -32,7 +32,6 @@ import Utility.InodeCache import Utility.CopyFile import Annex.Perms import Annex.ReplaceFile -import Annex.Exception import Annex.VariantFile import Git.Index import Annex.Index @@ -252,7 +251,7 @@ mergeDirectCleanup d oldref = do go makeabs getsha getmode a araw (f, item) | getsha item == nullSha = noop | otherwise = void $ - tryAnnex . maybe (araw item makeabs f) (\k -> void $ a item makeabs k f) + tryNonAsync . maybe (araw item makeabs f) (\k -> void $ a item makeabs k f) =<< catKey (getsha item) (getmode item) moveout _ _ = removeDirect diff --git a/Annex/Drop.hs b/Annex/Drop.hs index 71263dc618..c5a3fbe5fb 100644 --- a/Annex/Drop.hs +++ b/Annex/Drop.hs @@ -16,7 +16,6 @@ import qualified Remote import qualified Command.Drop import Command import Annex.Wanted -import Annex.Exception import Config import Annex.Content.Direct @@ -120,5 +119,5 @@ handleDropsFrom locs rs reason fromhere key afile knownpresentremote runner = do slocs = S.fromList locs - safely a = either (const False) id <$> tryAnnex a + safely a = either (const False) id <$> tryNonAsync a diff --git a/Annex/Environment.hs b/Annex/Environment.hs index 4b8d384642..bc97c17b70 100644 --- a/Annex/Environment.hs +++ b/Annex/Environment.hs @@ -13,7 +13,6 @@ import Common.Annex import Utility.UserInfo import qualified Git.Config import Config -import Annex.Exception #ifndef mingw32_HOST_OS import Utility.Env @@ -58,7 +57,7 @@ checkEnvironmentIO = {- Runs an action that commits to the repository, and if it fails, - sets user.email and user.name to a dummy value and tries the action again. -} ensureCommit :: Annex a -> Annex a -ensureCommit a = either retry return =<< tryAnnex a +ensureCommit a = either retry return =<< tryNonAsync a where retry _ = do name <- liftIO myUserName diff --git a/Annex/Exception.hs b/Annex/Exception.hs deleted file mode 100644 index 5ecbd28a07..0000000000 --- a/Annex/Exception.hs +++ /dev/null @@ -1,63 +0,0 @@ -{- exception handling in the git-annex monad - - - - Note that when an Annex action fails and the exception is handled - - by these functions, any changes the action has made to the - - AnnexState are retained. This works because the Annex monad - - internally stores the AnnexState in a MVar. - - - - Copyright 2011-2014 Joey Hess - - - - Licensed under the GNU GPL version 3 or higher. - -} - -{-# LANGUAGE PackageImports #-} -{-# LANGUAGE ScopedTypeVariables #-} - -module Annex.Exception ( - bracketIO, - bracketAnnex, - tryAnnex, - tryAnnexIO, - throwAnnex, - catchAnnex, - catchNonAsyncAnnex, - tryNonAsyncAnnex, -) where - -import qualified Control.Monad.Catch as M -import Control.Exception - -import Common.Annex - -{- Runs an Annex action, with setup and cleanup both in the IO monad. -} -bracketIO :: IO v -> (v -> IO b) -> (v -> Annex a) -> Annex a -bracketIO setup cleanup = M.bracket (liftIO setup) (liftIO . cleanup) - -bracketAnnex :: Annex v -> (v -> Annex b) -> (v -> Annex a) -> Annex a -bracketAnnex = M.bracket - -{- try in the Annex monad -} -tryAnnex :: Annex a -> Annex (Either SomeException a) -tryAnnex = M.try - -{- try in the Annex monad, but only catching IO exceptions -} -tryAnnexIO :: Annex a -> Annex (Either IOException a) -tryAnnexIO = M.try - -{- throw in the Annex monad -} -throwAnnex :: Exception e => e -> Annex a -throwAnnex = M.throwM - -{- catch in the Annex monad -} -catchAnnex :: Exception e => Annex a -> (e -> Annex a) -> Annex a -catchAnnex = M.catch - -{- catchs all exceptions except for async exceptions -} -catchNonAsyncAnnex :: Annex a -> (SomeException -> Annex a) -> Annex a -catchNonAsyncAnnex a onerr = a `M.catches` - [ M.Handler (\ (e :: AsyncException) -> throwAnnex e) - , M.Handler (\ (e :: SomeException) -> onerr e) - ] - -tryNonAsyncAnnex :: Annex a -> Annex (Either SomeException a) -tryNonAsyncAnnex a = (Right <$> a) `catchNonAsyncAnnex` (return . Left) diff --git a/Annex/Index.hs b/Annex/Index.hs index af0cab45e4..7757a412b0 100644 --- a/Annex/Index.hs +++ b/Annex/Index.hs @@ -18,7 +18,6 @@ import Common.Annex import Git.Types import qualified Annex import Utility.Env -import Annex.Exception {- Runs an action using a different git index file. -} withIndexFile :: FilePath -> Annex a -> Annex a @@ -26,7 +25,7 @@ withIndexFile f a = do g <- gitRepo g' <- liftIO $ addGitEnv g "GIT_INDEX_FILE" f - r <- tryAnnex $ do + r <- tryNonAsync $ do Annex.changeState $ \s -> s { Annex.repo = g' } a Annex.changeState $ \s -> s { Annex.repo = (Annex.repo s) { gitEnv = gitEnv g} } diff --git a/Annex/Journal.hs b/Annex/Journal.hs index f34a7be1bf..798bcba292 100644 --- a/Annex/Journal.hs +++ b/Annex/Journal.hs @@ -14,7 +14,6 @@ module Annex.Journal where import Common.Annex -import Annex.Exception import qualified Git import Annex.Perms import Annex.LockFile diff --git a/Annex/LockFile.hs b/Annex/LockFile.hs index 8114e94f2b..dc4f82f984 100644 --- a/Annex/LockFile.hs +++ b/Annex/LockFile.hs @@ -18,7 +18,6 @@ import Common.Annex import Annex import Types.LockPool import qualified Git -import Annex.Exception import Annex.Perms import qualified Data.Map as M diff --git a/Annex/Perms.hs b/Annex/Perms.hs index e3a2fa65a2..3430554c71 100644 --- a/Annex/Perms.hs +++ b/Annex/Perms.hs @@ -21,7 +21,6 @@ import Common.Annex import Utility.FileMode import Git.SharedRepository import qualified Annex -import Annex.Exception import Config import System.Posix.Types @@ -120,6 +119,6 @@ createContentDir dest = do modifyContent :: FilePath -> Annex a -> Annex a modifyContent f a = do createContentDir f -- also thaws it - v <- tryAnnex a + v <- tryNonAsync a freezeContentDir f - either throwAnnex return v + either throwM return v diff --git a/Annex/ReplaceFile.hs b/Annex/ReplaceFile.hs index e734c4d64b..8776762e97 100644 --- a/Annex/ReplaceFile.hs +++ b/Annex/ReplaceFile.hs @@ -9,7 +9,6 @@ module Annex.ReplaceFile where import Common.Annex import Annex.Perms -import Annex.Exception {- Replaces a possibly already existing file with a new version, - atomically, by running an action. @@ -31,7 +30,7 @@ replaceFileOr :: FilePath -> (FilePath -> Annex ()) -> (FilePath -> Annex ()) -> replaceFileOr file action rollback = do tmpdir <- fromRepo gitAnnexTmpMiscDir void $ createAnnexDirectory tmpdir - bracketAnnex (liftIO $ setup tmpdir) rollback $ \tmpfile -> do + bracket (liftIO $ setup tmpdir) rollback $ \tmpfile -> do action tmpfile liftIO $ catchIO (rename tmpfile file) (fallback tmpfile) where diff --git a/Annex/Transfer.hs b/Annex/Transfer.hs index 001539adcc..ebc8e8b899 100644 --- a/Annex/Transfer.hs +++ b/Annex/Transfer.hs @@ -20,7 +20,6 @@ import Common.Annex import Logs.Transfer as X import Annex.Notification as X import Annex.Perms -import Annex.Exception import Utility.Metered #ifdef mingw32_HOST_OS import Utility.WinLock @@ -103,7 +102,7 @@ runTransfer t file shouldretry a = do void $ tryIO $ removeFile $ transferLockFile tfile #endif retry oldinfo metervar run = do - v <- tryAnnex run + v <- tryNonAsync run case v of Right b -> return b Left e -> do diff --git a/Annex/View.hs b/Annex/View.hs index b969816128..a1d873f500 100644 --- a/Annex/View.hs +++ b/Annex/View.hs @@ -410,19 +410,19 @@ withViewChanges addmeta removemeta = do where handleremovals item | DiffTree.srcsha item /= nullSha = - handle item removemeta + handlechange item removemeta =<< catKey (DiffTree.srcsha item) (DiffTree.srcmode item) | otherwise = noop handleadds makeabs item | DiffTree.dstsha item /= nullSha = - handle item addmeta + handlechange item addmeta =<< ifM isDirect ( catKey (DiffTree.dstsha item) (DiffTree.dstmode item) -- optimisation , isAnnexLink $ makeabs $ DiffTree.file item ) | otherwise = noop - handle item a = maybe noop + handlechange item a = maybe noop (void . commandAction . a (getTopFilePath $ DiffTree.file item)) {- Generates a branch for a view. This is done using a different index diff --git a/Assistant/Pairing/Network.hs b/Assistant/Pairing/Network.hs index 6c625f8814..4bb6088b11 100644 --- a/Assistant/Pairing/Network.hs +++ b/Assistant/Pairing/Network.hs @@ -20,7 +20,6 @@ import Utility.Verifiable import Network.Multicast import Network.Info import Network.Socket -import Control.Exception (bracket) import qualified Data.Map as M import Control.Concurrent diff --git a/Assistant/Threads/Committer.hs b/Assistant/Threads/Committer.hs index afe4aa1442..4a47a9e2c9 100644 --- a/Assistant/Threads/Committer.hs +++ b/Assistant/Threads/Committer.hs @@ -27,7 +27,6 @@ import qualified Utility.Lsof as Lsof import qualified Utility.DirWatcher as DirWatcher import Types.KeySource import Config -import Annex.Exception import Annex.Content import Annex.Link import Annex.CatFile @@ -217,7 +216,7 @@ commitStaged :: Annex Bool commitStaged = do {- This could fail if there's another commit being made by - something else. -} - v <- tryAnnex Annex.Queue.flush + v <- tryNonAsync Annex.Queue.flush case v of Left _ -> return False Right _ -> do diff --git a/Assistant/Threads/Cronner.hs b/Assistant/Threads/Cronner.hs index 55b3ca2f10..0fe7f58f4c 100644 --- a/Assistant/Threads/Cronner.hs +++ b/Assistant/Threads/Cronner.hs @@ -191,10 +191,10 @@ runActivity' urlrenderer (ScheduledSelfFsck _ d) = do mapM_ reget =<< liftAnnex (dirKeys gitAnnexBadDir) where reget k = queueTransfers "fsck found bad file; redownloading" Next k Nothing Download -runActivity' urlrenderer (ScheduledRemoteFsck u s d) = handle =<< liftAnnex (remoteFromUUID u) +runActivity' urlrenderer (ScheduledRemoteFsck u s d) = dispatch =<< liftAnnex (remoteFromUUID u) where - handle Nothing = debug ["skipping remote fsck of uuid without a configured remote", fromUUID u, fromSchedule s] - handle (Just rmt) = void $ case Remote.remoteFsck rmt of + dispatch Nothing = debug ["skipping remote fsck of uuid without a configured remote", fromUUID u, fromSchedule s] + dispatch (Just rmt) = void $ case Remote.remoteFsck rmt of Nothing -> go rmt $ do program <- readProgramFile void $ batchCommand program $ diff --git a/Assistant/Threads/SanityChecker.hs b/Assistant/Threads/SanityChecker.hs index b623183823..dce2c2db75 100644 --- a/Assistant/Threads/SanityChecker.hs +++ b/Assistant/Threads/SanityChecker.hs @@ -40,7 +40,6 @@ import Logs.Transfer import Config.Files import Utility.DiskFree import qualified Annex -import Annex.Exception #ifdef WITH_WEBAPP import Assistant.WebApp.Types #endif @@ -85,7 +84,7 @@ sanityCheckerStartupThread startupdelay = namedThreadUnchecked "SanityCheckerSta liftIO $ fixUpSshRemotes {- Clean up old temp files. -} - void $ liftAnnex $ tryAnnex $ do + void $ liftAnnex $ tryNonAsync $ do cleanOldTmpMisc cleanReallyOldTmp diff --git a/Assistant/Threads/Watcher.hs b/Assistant/Threads/Watcher.hs index 91e0fc6196..fe9a95471b 100644 --- a/Assistant/Threads/Watcher.hs +++ b/Assistant/Threads/Watcher.hs @@ -104,13 +104,13 @@ runWatcher = do , errHook = errhook } scanevents <- liftAnnex $ annexStartupScan <$> Annex.getGitConfig - handle <- liftIO $ watchDir "." ignored scanevents hooks startup + h <- liftIO $ watchDir "." ignored scanevents hooks startup debug [ "watching", "."] {- Let the DirWatcher thread run until signalled to pause it, - then wait for a resume signal, and restart. -} waitFor PauseWatcher $ do - liftIO $ stopWatchDir handle + liftIO $ stopWatchDir h waitFor ResumeWatcher runWatcher where hook a = Just <$> asIO2 (runHandler a) diff --git a/Assistant/Threads/XMPPClient.hs b/Assistant/Threads/XMPPClient.hs index 39b0459b7b..2f70b508f6 100644 --- a/Assistant/Threads/XMPPClient.hs +++ b/Assistant/Threads/XMPPClient.hs @@ -117,7 +117,7 @@ xmppClient urlrenderer d creds xmppuuid = void $ liftIO $ atomically . swapTMVar lasttraffic =<< getCurrentTime inAssistant $ debug ["received:", show $ map logXMPPEvent l] - mapM_ (handle selfjid) l + mapM_ (handlemsg selfjid) l sendpings selfjid lasttraffic = forever $ do putStanza pingstanza @@ -133,21 +133,21 @@ xmppClient urlrenderer d creds xmppuuid = - cause traffic, so good enough. -} pingstanza = xmppPing selfjid - handle selfjid (PresenceMessage p) = do + handlemsg selfjid (PresenceMessage p) = do void $ inAssistant $ updateBuddyList (updateBuddies p) <<~ buddyList resendImportantMessages selfjid p - handle _ (GotNetMessage QueryPresence) = putStanza gitAnnexSignature - handle _ (GotNetMessage (NotifyPush us)) = void $ inAssistant $ pull us - handle selfjid (GotNetMessage (PairingNotification stage c u)) = + handlemsg _ (GotNetMessage QueryPresence) = putStanza gitAnnexSignature + handlemsg _ (GotNetMessage (NotifyPush us)) = void $ inAssistant $ pull us + handlemsg selfjid (GotNetMessage (PairingNotification stage c u)) = maybe noop (inAssistant . pairMsgReceived urlrenderer stage u selfjid) (parseJID c) - handle _ (GotNetMessage m@(Pushing _ pushstage)) + handlemsg _ (GotNetMessage m@(Pushing _ pushstage)) | isPushNotice pushstage = inAssistant $ handlePushNotice m | isPushInitiation pushstage = inAssistant $ queuePushInitiation m | otherwise = inAssistant $ storeInbox m - handle _ (Ignorable _) = noop - handle _ (Unknown _) = noop - handle _ (ProtocolError _) = noop + handlemsg _ (Ignorable _) = noop + handlemsg _ (Unknown _) = noop + handlemsg _ (ProtocolError _) = noop resendImportantMessages selfjid (Presence { presenceFrom = Just jid }) = do let c = formatJID jid diff --git a/Assistant/XMPP/Client.hs b/Assistant/XMPP/Client.hs index 677bb2ff31..314ace64ab 100644 --- a/Assistant/XMPP/Client.hs +++ b/Assistant/XMPP/Client.hs @@ -15,7 +15,6 @@ import Network.Protocol.XMPP import Network import Control.Concurrent import qualified Data.Text as T -import Control.Exception (SomeException) {- Everything we need to know to connect to an XMPP server. -} data XMPPCreds = XMPPCreds @@ -34,18 +33,18 @@ connectXMPP c a = case parseJID (xmppJID c) of {- Do a SRV lookup, but if it fails, fall back to the cached xmppHostname. -} connectXMPP' :: JID -> XMPPCreds -> (JID -> XMPP a) -> IO [(HostPort, Either SomeException ())] -connectXMPP' jid c a = reverse <$> (handle =<< lookupSRV srvrecord) +connectXMPP' jid c a = reverse <$> (handlesrv =<< lookupSRV srvrecord) where srvrecord = mkSRVTcp "xmpp-client" $ T.unpack $ strDomain $ jidDomain jid serverjid = JID Nothing (jidDomain jid) Nothing - handle [] = do + handlesrv [] = do let h = xmppHostname c let p = PortNumber $ fromIntegral $ xmppPort c r <- run h p $ a jid return [r] - handle srvs = go [] srvs + handlesrv srvs = go [] srvs go l [] = return l go l ((h,p):rest) = do diff --git a/Assistant/XMPP/Git.hs b/Assistant/XMPP/Git.hs index 301aa71853..19050c7d01 100644 --- a/Assistant/XMPP/Git.hs +++ b/Assistant/XMPP/Git.hs @@ -150,16 +150,16 @@ xmppPush cid gitpush = do SendPackOutput seqnum' b toxmpp seqnum' inh - fromxmpp outh controlh = withPushMessagesInSequence cid SendPack handle + fromxmpp outh controlh = withPushMessagesInSequence cid SendPack handlemsg where - handle (Just (Pushing _ (ReceivePackOutput _ b))) = + handlemsg (Just (Pushing _ (ReceivePackOutput _ b))) = liftIO $ writeChunk outh b - handle (Just (Pushing _ (ReceivePackDone exitcode))) = + handlemsg (Just (Pushing _ (ReceivePackDone exitcode))) = liftIO $ do hPrint controlh exitcode hFlush controlh - handle (Just _) = noop - handle Nothing = do + handlemsg (Just _) = noop + handlemsg Nothing = do debug ["timeout waiting for git receive-pack output via XMPP"] -- Send a synthetic exit code to git-annex -- xmppgit, which will exit and cause git push @@ -264,12 +264,12 @@ xmppReceivePack cid = do let seqnum' = succ seqnum sendNetMessage $ Pushing cid $ ReceivePackOutput seqnum' b relaytoxmpp seqnum' outh - relayfromxmpp inh = withPushMessagesInSequence cid ReceivePack handle + relayfromxmpp inh = withPushMessagesInSequence cid ReceivePack handlemsg where - handle (Just (Pushing _ (SendPackOutput _ b))) = + handlemsg (Just (Pushing _ (SendPackOutput _ b))) = liftIO $ writeChunk inh b - handle (Just _) = noop - handle Nothing = do + handlemsg (Just _) = noop + handlemsg Nothing = do debug ["timeout waiting for git send-pack output via XMPP"] -- closing the handle will make git receive-pack exit liftIO $ do diff --git a/CmdLine/Action.hs b/CmdLine/Action.hs index 247c658bcd..db4f768ac3 100644 --- a/CmdLine/Action.hs +++ b/CmdLine/Action.hs @@ -13,7 +13,6 @@ import Common.Annex import qualified Annex import Types.Command import qualified Annex.Queue -import Annex.Exception type CommandActionRunner = CommandStart -> CommandCleanup @@ -37,14 +36,14 @@ performCommandAction Command { cmdseek = seek, cmdcheck = c, cmdname = name } pa - - This should only be run in the seek stage. -} commandAction :: CommandActionRunner -commandAction a = handle =<< tryAnnexIO go +commandAction a = account =<< tryIO go where go = do Annex.Queue.flushWhenFull callCommandAction a - handle (Right True) = return True - handle (Right False) = incerr - handle (Left err) = do + account (Right True) = return True + account (Right False) = incerr + account (Left err) = do showErr err showEndFail incerr diff --git a/Command/Add.hs b/Command/Add.hs index ae895464e9..5c70545430 100644 --- a/Command/Add.hs +++ b/Command/Add.hs @@ -10,7 +10,6 @@ module Command.Add where import Common.Annex -import Annex.Exception import Command import Types.KeySource import Backend @@ -33,6 +32,8 @@ import Annex.FileMatcher import Annex.ReplaceFile import Utility.Tmp +import Control.Exception (IOException) + def :: [Command] def = [notBareRepo $ withOptions [includeDotFilesOption] $ command "add" paramPaths seek SectionCommon @@ -103,7 +104,7 @@ lockDown = either (\e -> showErr e >> return Nothing) (return . Just) <=< lockDo lockDown' :: FilePath -> Annex (Either IOException KeySource) lockDown' file = ifM crippledFileSystem ( withTSDelta $ liftIO . tryIO . nohardlink - , tryAnnexIO $ do + , tryIO $ do tmp <- fromRepo gitAnnexTmpMiscDir createAnnexDirectory tmp go tmp @@ -167,7 +168,7 @@ ingest (Just source) = withTSDelta $ \delta -> do ) goindirect (Just (key, _)) mcache ms = do - catchAnnex (moveAnnex key $ contentLocation source) + catchNonAsync (moveAnnex key $ contentLocation source) (undo (keyFilename source) key) maybe noop (genMetaData key (keyFilename source)) ms liftIO $ nukeFile $ keyFilename source @@ -206,23 +207,23 @@ perform file = lockDown file >>= ingest >>= go {- On error, put the file back so it doesn't seem to have vanished. - This can be called before or after the symlink is in place. -} -undo :: FilePath -> Key -> IOException -> Annex a +undo :: FilePath -> Key -> SomeException -> Annex a undo file key e = do whenM (inAnnex key) $ do liftIO $ nukeFile file - catchAnnex (fromAnnex key file) tryharder + catchNonAsync (fromAnnex key file) tryharder logStatus key InfoMissing - throwAnnex e + throwM e where -- fromAnnex could fail if the file ownership is weird - tryharder :: IOException -> Annex () + tryharder :: SomeException -> Annex () tryharder _ = do src <- calcRepo $ gitAnnexLocation key liftIO $ moveFile src file {- Creates the symlink to the annexed content, returns the link target. -} link :: FilePath -> Key -> Maybe InodeCache -> Annex String -link file key mcache = flip catchAnnex (undo file key) $ do +link file key mcache = flip catchNonAsync (undo file key) $ do l <- inRepo $ gitAnnexLink file key replaceFile file $ makeAnnexLink l diff --git a/Command/Direct.hs b/Command/Direct.hs index a5165a4a2a..c64ef6e562 100644 --- a/Command/Direct.hs +++ b/Command/Direct.hs @@ -7,8 +7,6 @@ module Command.Direct where -import Control.Exception.Extensible - import Common.Annex import Command import qualified Git @@ -16,7 +14,6 @@ import qualified Git.LsFiles import qualified Git.Branch import Config import Annex.Direct -import Annex.Exception def :: [Command] def = [notBareRepo $ noDaemonRunning $ @@ -52,7 +49,7 @@ perform = do Nothing -> noop Just a -> do showStart "direct" f - r' <- tryAnnex a + r' <- tryNonAsync a case r' of Left e -> warnlocked e Right _ -> showEndOk diff --git a/Command/FuzzTest.hs b/Command/FuzzTest.hs index d673541fbb..7075aeddcc 100644 --- a/Command/FuzzTest.hs +++ b/Command/FuzzTest.hs @@ -13,7 +13,6 @@ import Command import qualified Git.Config import Config import Utility.ThreadScheduler -import Annex.Exception import Utility.DiskFree import Data.Time.Clock @@ -56,7 +55,7 @@ fuzz :: Handle -> Annex () fuzz logh = do action <- genFuzzAction record logh $ flip Started action - result <- tryAnnex $ runFuzzAction action + result <- tryNonAsync $ runFuzzAction action record logh $ flip Finished $ either (const False) (const True) result diff --git a/Command/Indirect.hs b/Command/Indirect.hs index 4ce4c2c383..e146f13b79 100644 --- a/Command/Indirect.hs +++ b/Command/Indirect.hs @@ -7,8 +7,6 @@ module Command.Indirect where -import Control.Exception.Extensible - import Common.Annex import Command import qualified Git @@ -21,7 +19,6 @@ import Annex.Direct import Annex.Content import Annex.Content.Direct import Annex.CatFile -import Annex.Exception import Annex.Init import qualified Command.Add @@ -88,12 +85,12 @@ perform = do removeInodeCache k removeAssociatedFiles k whenM (liftIO $ not . isSymbolicLink <$> getSymbolicLinkStatus f) $ do - v <-tryAnnexIO (moveAnnex k f) + v <- tryNonAsync (moveAnnex k f) case v of Right _ -> do l <- inRepo $ gitAnnexLink f k liftIO $ createSymbolicLink l f - Left e -> catchAnnex (Command.Add.undo f k e) + Left e -> catchNonAsync (Command.Add.undo f k e) warnlocked showEndOk diff --git a/Command/Map.hs b/Command/Map.hs index 5a32d7f524..a62c3e1ad9 100644 --- a/Command/Map.hs +++ b/Command/Map.hs @@ -7,7 +7,6 @@ module Command.Map where -import Control.Exception.Extensible import qualified Data.Map as M import Common.Annex @@ -247,7 +246,7 @@ combineSame = map snd . nubBy sameuuid . map pair safely :: IO Git.Repo -> IO (Maybe Git.Repo) safely a = do - result <- try a :: IO (Either SomeException Git.Repo) + result <- tryNonAsync a case result of Left _ -> return Nothing Right r' -> return $ Just r' diff --git a/Command/Move.hs b/Command/Move.hs index 396ea4afce..3d9646dea8 100644 --- a/Command/Move.hs +++ b/Command/Move.hs @@ -152,17 +152,17 @@ fromOk src key = go =<< Annex.getState Annex.force fromPerform :: Remote -> Bool -> Key -> AssociatedFile -> CommandPerform fromPerform src move key afile = moveLock move key $ ifM (inAnnex key) - ( handle move True - , handle move =<< go + ( dispatch move True + , dispatch move =<< go ) where go = notifyTransfer Download afile $ download (Remote.uuid src) key afile noRetry $ \p -> do showAction $ "from " ++ Remote.name src getViaTmp key $ \t -> Remote.retrieveKeyFile src key afile t p - handle _ False = stop -- failed - handle False True = next $ return True -- copy complete - handle True True = do -- finish moving + dispatch _ False = stop -- failed + dispatch False True = next $ return True -- copy complete + dispatch True True = do -- finish moving ok <- Remote.removeKey src key next $ Command.Drop.cleanupRemote key src ok diff --git a/Command/PreCommit.hs b/Command/PreCommit.hs index 412b9ae08e..09ff042aa6 100644 --- a/Command/PreCommit.hs +++ b/Command/PreCommit.hs @@ -19,7 +19,6 @@ import Annex.Hook import Annex.View import Annex.View.ViewedFile import Annex.Perms -import Annex.Exception import Logs.View import Logs.MetaData import Types.View diff --git a/Command/TestRemote.hs b/Command/TestRemote.hs index 463c4d3595..cb36b66bad 100644 --- a/Command/TestRemote.hs +++ b/Command/TestRemote.hs @@ -31,7 +31,6 @@ import Locations import Test.Tasty import Test.Tasty.Runners import Test.Tasty.HUnit -import Control.Exception import "crypto-api" Crypto.Random import qualified Data.ByteString as B import qualified Data.ByteString.Lazy as L diff --git a/Command/Vicfg.hs b/Command/Vicfg.hs index 5ec6bbf723..1f16955360 100644 --- a/Command/Vicfg.hs +++ b/Command/Vicfg.hs @@ -217,7 +217,7 @@ parseCfg curcfg = go [] curcfg . lines | null l = Right cfg | "#" `isPrefixOf` l = Right cfg | null setting || null f = Left "missing field" - | otherwise = handle cfg f setting value' + | otherwise = parsed cfg f setting value' where (setting, rest) = separate isSpace l (r, value) = separate (== '=') rest @@ -225,7 +225,7 @@ parseCfg curcfg = go [] curcfg . lines f = reverse $ trimspace $ reverse $ trimspace r trimspace = dropWhile isSpace - handle cfg f setting value + parsed cfg f setting value | setting == "trust" = case readTrustLevel value of Nothing -> badval "trust value" value Just t -> diff --git a/Common.hs b/Common.hs index 0f3dc71d04..76e8d5133c 100644 --- a/Common.hs +++ b/Common.hs @@ -6,7 +6,6 @@ import Control.Monad as X import Control.Monad.IfElse as X import Control.Applicative as X import "mtl" Control.Monad.State.Strict as X (liftIO) -import Control.Exception.Extensible as X (IOException) import Data.Maybe as X import Data.List as X hiding (head, tail, init, last) diff --git a/Crypto.hs b/Crypto.hs index 10d6e5cef4..8d4d4f04fa 100644 --- a/Crypto.hs +++ b/Crypto.hs @@ -38,7 +38,6 @@ import Data.ByteString.Lazy.UTF8 (fromString) import Control.Applicative import qualified Data.Map as M import Control.Monad.IO.Class -import Control.Monad.Catch (MonadMask) import Common.Annex import qualified Utility.Gpg as Gpg diff --git a/Git/Config.hs b/Git/Config.hs index d998fd1e29..171c3e6c65 100644 --- a/Git/Config.hs +++ b/Git/Config.hs @@ -9,7 +9,6 @@ module Git.Config where import qualified Data.Map as M import Data.Char -import Control.Exception.Extensible import Common import Git diff --git a/Git/UpdateIndex.hs b/Git/UpdateIndex.hs index 7de2f1be34..ecd154aa04 100644 --- a/Git/UpdateIndex.hs +++ b/Git/UpdateIndex.hs @@ -29,8 +29,6 @@ import Git.Command import Git.FilePath import Git.Sha -import Control.Exception (bracket) - {- Streamers are passed a callback and should feed it lines in the form - read by update-index, and generated by ls-tree. -} type Streamer = (String -> IO ()) -> IO () diff --git a/Limit.hs b/Limit.hs index 9ac849bcec..89dd9d33e1 100644 --- a/Limit.hs +++ b/Limit.hs @@ -152,8 +152,8 @@ limitCopies want = case split ":" want of go num good = case readish num of Nothing -> Left "bad number for copies" Just n -> Right $ \notpresent -> checkKey $ - handle n good notpresent - handle n good notpresent key = do + go' n good notpresent + go' n good notpresent key = do us <- filter (`S.notMember` notpresent) <$> (filterM good =<< Remote.keyLocations key) return $ length us >= n @@ -170,10 +170,10 @@ addLackingCopies approx = addLimit . limitLackingCopies approx limitLackingCopies :: Bool -> MkLimit Annex limitLackingCopies approx want = case readish want of Just needed -> Right $ \notpresent mi -> flip checkKey mi $ - handle mi needed notpresent + go mi needed notpresent Nothing -> Left "bad value for number of lacking copies" where - handle mi needed notpresent key = do + go mi needed notpresent key = do NumCopies numcopies <- if approx then approxNumCopies else case mi of diff --git a/Logs/Transfer.hs b/Logs/Transfer.hs index c96d9cd1e7..b6279ccbab 100644 --- a/Logs/Transfer.hs +++ b/Logs/Transfer.hs @@ -11,7 +11,6 @@ module Logs.Transfer where import Common.Annex import Annex.Perms -import Annex.Exception import qualified Git import Types.Key import Utility.Metered @@ -94,7 +93,7 @@ percentComplete (Transfer { transferKey = key }) info = mkProgressUpdater :: Transfer -> TransferInfo -> Annex (MeterUpdate, FilePath, MVar Integer) mkProgressUpdater t info = do tfile <- fromRepo $ transferFile t - _ <- tryAnnex $ createAnnexDirectory $ takeDirectory tfile + _ <- tryNonAsync $ createAnnexDirectory $ takeDirectory tfile mvar <- liftIO $ newMVar 0 return (liftIO . updater tfile mvar, tfile, mvar) where diff --git a/Messages.hs b/Messages.hs index 9f473110ac..f27755f3a4 100644 --- a/Messages.hs +++ b/Messages.hs @@ -47,7 +47,7 @@ import System.Log.Handler (setFormatter, LogHandler) import System.Log.Handler.Simple import qualified Data.Set as S -import Common +import Common hiding (handle) import Types import Types.Messages import qualified Messages.JSON as JSON diff --git a/Remote.hs b/Remote.hs index 5ee75823f5..8a8eb64df0 100644 --- a/Remote.hs +++ b/Remote.hs @@ -56,7 +56,6 @@ import Data.Ord import Common.Annex import Types.Remote import qualified Annex -import Annex.Exception import Annex.UUID import Logs.UUID import Logs.Trust @@ -114,10 +113,10 @@ byNameWithUUID = checkuuid <=< byName byName' :: RemoteName -> Annex (Either String Remote) byName' "" = return $ Left "no remote specified" -byName' n = handle . filter matching <$> remoteList +byName' n = go . filter matching <$> remoteList where - handle [] = Left $ "there is no available git remote named \"" ++ n ++ "\"" - handle (match:_) = Right match + go [] = Left $ "there is no available git remote named \"" ++ n ++ "\"" + go (match:_) = Right match matching r = n == name r || toUUID n == uuid r {- Only matches remote name, not UUID -} @@ -315,8 +314,7 @@ isXMPPRemote remote = Git.repoIsUrl r && "xmpp::" `isPrefixOf` Git.repoLocation r = repo remote hasKey :: Remote -> Key -> Annex (Either String Bool) -hasKey r k = either (Left . show) Right - <$> tryNonAsyncAnnex (checkPresent r k) +hasKey r k = either (Left . show) Right <$> tryNonAsync (checkPresent r k) hasKeyCheap :: Remote -> Bool hasKeyCheap = checkPresentCheap diff --git a/Remote/Ddar.hs b/Remote/Ddar.hs index fba05312be..beeb4d7cc0 100644 --- a/Remote/Ddar.hs +++ b/Remote/Ddar.hs @@ -8,7 +8,6 @@ module Remote.Ddar (remote) where -import Control.Exception import qualified Data.Map as M import qualified Data.ByteString.Lazy as L import System.IO.Error diff --git a/Remote/External.hs b/Remote/External.hs index f326f26ba5..4fb760afdc 100644 --- a/Remote/External.hs +++ b/Remote/External.hs @@ -21,7 +21,6 @@ import Logs.PreferredContent.Raw import Logs.RemoteState import Config.Cost import Annex.UUID -import Annex.Exception import Creds import Control.Concurrent.STM @@ -137,7 +136,7 @@ checkKey external k = either error id <$> go _ -> Nothing safely :: Annex Bool -> Annex Bool -safely a = go =<< tryAnnex a +safely a = go =<< tryNonAsync a where go (Right r) = return r go (Left e) = do diff --git a/Remote/External/Types.hs b/Remote/External/Types.hs index 983764f701..3a69ae9ea6 100644 --- a/Remote/External/Types.hs +++ b/Remote/External/Types.hs @@ -32,7 +32,6 @@ module Remote.External.Types ( ) where import Common.Annex -import Annex.Exception import Types.Key (file2key, key2file) import Types.StandardGroups (PreferredContentExpression) import Utility.Metered (BytesProcessed(..)) diff --git a/Remote/GCrypt.hs b/Remote/GCrypt.hs index 55a7758112..8891977f73 100644 --- a/Remote/GCrypt.hs +++ b/Remote/GCrypt.hs @@ -15,7 +15,7 @@ module Remote.GCrypt ( import qualified Data.Map as M import qualified Data.ByteString.Lazy as L -import Control.Exception.Extensible +import Control.Exception import Common.Annex import Types.Remote diff --git a/Remote/Git.hs b/Remote/Git.hs index da5ca4c4a5..34c60d98fe 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -27,7 +27,6 @@ import qualified Annex import Logs.Presence import Annex.Transfer import Annex.UUID -import Annex.Exception import qualified Annex.Content import qualified Annex.BranchState import qualified Annex.Branch @@ -56,7 +55,6 @@ import Creds import Control.Concurrent import Control.Concurrent.MSampleVar import qualified Data.Map as M -import Control.Exception.Extensible remote :: RemoteType remote = RemoteType { @@ -281,7 +279,7 @@ tryGitConfigRead r s <- Annex.new r Annex.eval s $ do Annex.BranchState.disableUpdate - void $ tryAnnex $ ensureInitialized + void $ tryNonAsync $ ensureInitialized Annex.getState Annex.repo {- Checks if a given remote has the content for a key in its annex. -} diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs index 953c533b66..5e4ea111f6 100644 --- a/Remote/Helper/Chunked.hs +++ b/Remote/Helper/Chunked.hs @@ -24,7 +24,6 @@ import Logs.Chunk import Utility.Metered import Crypto (EncKey) import Backend (isStableKey) -import Annex.Exception import qualified Data.ByteString.Lazy as L import qualified Data.Map as M @@ -172,7 +171,7 @@ seekResume h chunkkeys checker = do liftIO $ hSeek h AbsoluteSeek sz return (cks, toBytesProcessed sz) | otherwise = do - v <- tryNonAsyncAnnex (checker k) + v <- tryNonAsync (checker k) case v of Right True -> check pos' cks' sz @@ -231,7 +230,7 @@ retrieveChunks retriever u chunkconfig encryptor basek dest basep sink -- Optimisation: Try the unchunked key first, to avoid -- looking in the git-annex branch for chunk counts -- that are likely not there. - getunchunked `catchNonAsyncAnnex` + getunchunked `catchNonAsync` const (go =<< chunkKeysOnly u basek) | otherwise = go =<< chunkKeys u chunkconfig basek where @@ -241,7 +240,7 @@ retrieveChunks retriever u chunkconfig encryptor basek dest basep sink let ls' = maybe ls (setupResume ls) currsize if any null ls' then return True -- dest is already complete - else firstavail currsize ls' `catchNonAsyncAnnex` giveup + else firstavail currsize ls' `catchNonAsync` giveup giveup e = do warning (show e) @@ -251,20 +250,20 @@ retrieveChunks retriever u chunkconfig encryptor basek dest basep sink firstavail currsize ([]:ls) = firstavail currsize ls firstavail currsize ((k:ks):ls) | k == basek = getunchunked - `catchNonAsyncAnnex` (const $ firstavail currsize ls) + `catchNonAsync` (const $ firstavail currsize ls) | otherwise = do let offset = resumeOffset currsize k let p = maybe basep (offsetMeterUpdate basep . toBytesProcessed) offset - v <- tryNonAsyncAnnex $ + v <- tryNonAsync $ retriever (encryptor k) p $ \content -> bracketIO (maybe opennew openresume offset) hClose $ \h -> do void $ tosink (Just h) p content let sz = toBytesProcessed $ fromMaybe 0 $ keyChunkSize k getrest p h sz sz ks - `catchNonAsyncAnnex` giveup + `catchNonAsync` giveup case v of Left e | null ls -> giveup e @@ -372,7 +371,7 @@ checkPresentChunks checker u chunkconfig encryptor basek Right False -> return $ Right False Left e -> return $ Left $ show e - check = tryNonAsyncAnnex . checker . encryptor + check = tryNonAsync . checker . encryptor {- A key can be stored in a remote unchunked, or as a list of chunked keys. - This can be the case whether or not the remote is currently configured diff --git a/Remote/Helper/Special.hs b/Remote/Helper/Special.hs index fc0e11d2f3..ba9ff4fb40 100644 --- a/Remote/Helper/Special.hs +++ b/Remote/Helper/Special.hs @@ -42,13 +42,11 @@ import Remote.Helper.Chunked as X import Remote.Helper.Encryptable as X import Remote.Helper.Messages import Annex.Content -import Annex.Exception import qualified Git import qualified Git.Command import qualified Git.Construct import qualified Data.ByteString.Lazy as L -import Control.Exception (bracket) import qualified Data.Map as M {- Special remotes don't have a configured url, so Git.Repo does not @@ -174,7 +172,7 @@ specialRemote' cfg c preparestorer prepareretriever prepareremover preparecheckp cip = cipherKey c gpgopts = getGpgEncParams encr - safely a = catchNonAsyncAnnex a (\e -> warning (show e) >> return False) + safely a = catchNonAsync a (\e -> warning (show e) >> return False) -- chunk, then encrypt, then feed to the storer storeKeyGen k p enc = safely $ preparestorer k $ safely . go diff --git a/Remote/WebDAV.hs b/Remote/WebDAV.hs index b70001ddba..4caebaf214 100644 --- a/Remote/WebDAV.hs +++ b/Remote/WebDAV.hs @@ -14,10 +14,10 @@ import qualified Data.Map as M import qualified Data.ByteString.Lazy as L import qualified Data.ByteString.UTF8 as B8 import qualified Data.ByteString.Lazy.UTF8 as L8 -import qualified Control.Exception.Lifted as EL import Network.HTTP.Client (HttpException(..)) import Network.HTTP.Types import System.IO.Error +import Control.Monad.Catch import Common.Annex import Types.Remote @@ -31,7 +31,6 @@ import Creds import Utility.Metered import Utility.Url (URLString) import Annex.UUID -import Annex.Exception import Remote.WebDAV.DavLocation remote :: RemoteType @@ -301,11 +300,11 @@ moveDAV baseurl src dest = inLocation src $ moveContentM newurl newurl = B8.fromString (locationUrl baseurl dest) existsDAV :: DavLocation -> DAVT IO (Either String Bool) -existsDAV l = inLocation l check `EL.catch` (\(e :: EL.SomeException) -> return (Left $ show e)) +existsDAV l = inLocation l check `catchNonAsync` (\e -> return (Left $ show e)) where check = do setDepth Nothing - EL.catchJust + catchJust (matchStatusCodeException notFound404) (getPropsM >> ispresent True) (const $ ispresent False) @@ -319,8 +318,7 @@ matchStatusCodeException _ _ = Nothing -- Ignores any exceptions when performing a DAV action. safely :: DAVT IO a -> DAVT IO (Maybe a) -safely a = (Just <$> a) - `EL.catch` (\(_ :: EL.SomeException) -> return Nothing) +safely = eitherToMaybe <$$> tryNonAsync choke :: IO (Either String a) -> IO a choke f = do @@ -336,7 +334,7 @@ withDAVHandle r a = do mcreds <- getCreds (config r) (uuid r) case (mcreds, configUrl r) of (Just (user, pass), Just baseurl) -> - bracketIO (mkDAVContext baseurl) closeDAVContext $ \ctx -> + withDAVContext baseurl $ \ctx -> a (Just (DavHandle ctx (toDavUser user) (toDavPass pass) baseurl)) _ -> a Nothing diff --git a/RemoteDaemon/Transport/Ssh.hs b/RemoteDaemon/Transport/Ssh.hs index 65c3138525..db6b6127cc 100644 --- a/RemoteDaemon/Transport/Ssh.hs +++ b/RemoteDaemon/Transport/Ssh.hs @@ -108,10 +108,10 @@ data Status = Stopping | ConnectionClosed {- Make connection robustly, with exponentioal backoff on failure. -} robustly :: Int -> IO Status -> IO () -robustly backoff a = handle =<< catchDefaultIO ConnectionClosed a +robustly backoff a = caught =<< catchDefaultIO ConnectionClosed a where - handle Stopping = return () - handle ConnectionClosed = do + caught Stopping = return () + caught ConnectionClosed = do threadDelaySeconds (Seconds backoff) robustly increasedbackoff a diff --git a/Test.hs b/Test.hs index 5032038ad9..9a34835cc9 100644 --- a/Test.hs +++ b/Test.hs @@ -20,7 +20,6 @@ import Options.Applicative hiding (command) #if MIN_VERSION_optparse_applicative(0,8,0) import qualified Options.Applicative.Types as Opt #endif -import Control.Exception.Extensible import qualified Data.Map as M import qualified Text.JSON @@ -1444,7 +1443,7 @@ indir testenv dir a = do (try a::IO (Either SomeException ())) case r of Right () -> return () - Left e -> throw e + Left e -> throwM e setuprepo :: TestEnv -> FilePath -> IO FilePath setuprepo testenv dir = do diff --git a/Utility/Directory.hs b/Utility/Directory.hs index ade5ef8111..a4429d5b96 100644 --- a/Utility/Directory.hs +++ b/Utility/Directory.hs @@ -11,7 +11,6 @@ module Utility.Directory where import System.IO.Error import System.Directory -import Control.Exception (throw, bracket) import Control.Monad import Control.Monad.IfElse import System.FilePath @@ -114,7 +113,7 @@ moveFile src dest = tryIO (rename src dest) >>= onrename whenM (isdir dest) rethrow viaTmp mv dest undefined where - rethrow = throw e + rethrow = throwM e mv tmp _ = do ok <- boolSystem "mv" [Param "-f", Param src, Param tmp] unless ok $ do diff --git a/Utility/Exception.hs b/Utility/Exception.hs index 13c9d508a4..802e9e24b2 100644 --- a/Utility/Exception.hs +++ b/Utility/Exception.hs @@ -7,11 +7,25 @@ {-# LANGUAGE ScopedTypeVariables #-} -module Utility.Exception where +module Utility.Exception ( + module X, + catchBoolIO, + catchMaybeIO, + catchDefaultIO, + catchMsgIO, + catchIO, + tryIO, + bracketIO, + catchNonAsync, + tryNonAsync, + tryWhenExists, +) where +import Control.Monad.Catch as X hiding (Handler) +import qualified Control.Monad.Catch as M import Control.Exception (IOException, AsyncException) -import Control.Monad.Catch import Control.Monad +import Control.Monad.IO.Class (liftIO, MonadIO) import System.IO.Error (isDoesNotExistError) import Utility.Data @@ -44,14 +58,20 @@ catchIO = catch tryIO :: MonadCatch m => m a -> m (Either IOException a) tryIO = try +{- bracket with setup and cleanup actions lifted to IO. + - + - Note that unlike catchIO and tryIO, this catches all exceptions. -} +bracketIO :: (MonadMask m, MonadIO m) => IO v -> (v -> IO b) -> (v -> m a) -> m a +bracketIO setup cleanup = bracket (liftIO setup) (liftIO . cleanup) + {- Catches all exceptions except for async exceptions. - This is often better to use than catching them all, so that - ThreadKilled and UserInterrupt get through. -} catchNonAsync :: MonadCatch m => m a -> (SomeException -> m a) -> m a catchNonAsync a onerr = a `catches` - [ Handler (\ (e :: AsyncException) -> throwM e) - , Handler (\ (e :: SomeException) -> onerr e) + [ M.Handler (\ (e :: AsyncException) -> throwM e) + , M.Handler (\ (e :: SomeException) -> onerr e) ] tryNonAsync :: MonadCatch m => m a -> m (Either SomeException a) diff --git a/Utility/FileMode.hs b/Utility/FileMode.hs index c2ef683a81..832250bde7 100644 --- a/Utility/FileMode.hs +++ b/Utility/FileMode.hs @@ -11,7 +11,6 @@ module Utility.FileMode where import System.IO import Control.Monad -import Control.Exception (bracket) import System.PosixCompat.Types import Utility.PosixFiles #ifndef mingw32_HOST_OS diff --git a/Utility/Gpg.hs b/Utility/Gpg.hs index 410259b11b..dfca827786 100644 --- a/Utility/Gpg.hs +++ b/Utility/Gpg.hs @@ -13,7 +13,6 @@ import Control.Applicative import Control.Concurrent import Control.Monad.IO.Class import qualified Data.Map as M -import Control.Monad.Catch (bracket, MonadMask) import Common import qualified Build.SysConfig as SysConfig diff --git a/Utility/Matcher.hs b/Utility/Matcher.hs index 1ee224ffc6..76f8903f5c 100644 --- a/Utility/Matcher.hs +++ b/Utility/Matcher.hs @@ -102,13 +102,13 @@ findClose l = in (Group (reverse g), rest) where go c [] = (c, []) -- not picky about extra Close - go c (t:ts) = handle t + go c (t:ts) = dispatch t where - handle Close = (c, ts) - handle Open = + dispatch Close = (c, ts) + dispatch Open = let (c', ts') = go [] ts in go (Group (reverse c') : c) ts' - handle _ = go (One t:c) ts + dispatch _ = go (One t:c) ts {- Checks if a Matcher matches, using a supplied function to check - the value of Operations. -} diff --git a/Utility/Parallel.hs b/Utility/Parallel.hs index 239c81e7b5..7966811aba 100644 --- a/Utility/Parallel.hs +++ b/Utility/Parallel.hs @@ -10,7 +10,6 @@ module Utility.Parallel where import Common import Control.Concurrent -import Control.Exception {- Runs an action in parallel with a set of values, in a set of threads. - In order for the actions to truely run in parallel, requires GHC's diff --git a/Utility/Tmp.hs b/Utility/Tmp.hs index 7da5cc2847..edd82f5ac2 100644 --- a/Utility/Tmp.hs +++ b/Utility/Tmp.hs @@ -14,7 +14,6 @@ import System.Directory import Control.Monad.IfElse import System.FilePath import Control.Monad.IO.Class -import Control.Monad.Catch (bracket, MonadMask) import Utility.Exception import Utility.FileSystemEncoding @@ -33,11 +32,11 @@ viaTmp a file content = bracket setup cleanup use setup = do createDirectoryIfMissing True dir openTempFile dir template - cleanup (tmpfile, handle) = do - _ <- tryIO $ hClose handle + cleanup (tmpfile, h) = do + _ <- tryIO $ hClose h tryIO $ removeFile tmpfile - use (tmpfile, handle) = do - hClose handle + use (tmpfile, h) = do + hClose h a tmpfile content rename tmpfile file @@ -54,10 +53,10 @@ withTmpFileIn :: (MonadIO m, MonadMask m) => FilePath -> Template -> (FilePath - withTmpFileIn tmpdir template a = bracket create remove use where create = liftIO $ openTempFile tmpdir template - remove (name, handle) = liftIO $ do - hClose handle + remove (name, h) = liftIO $ do + hClose h catchBoolIO (removeFile name >> return True) - use (name, handle) = a name handle + use (name, h) = a name h {- Runs an action with a tmp directory located within the system's tmp - directory (or within "." if there is none), then removes the tmp diff --git a/Utility/Url.hs b/Utility/Url.hs index bf2d3859c4..4137a5d8bb 100644 --- a/Utility/Url.hs +++ b/Utility/Url.hs @@ -51,11 +51,11 @@ checkBoth url expected_size uo = do v <- check url expected_size uo return (fst v && snd v) check :: URLString -> Maybe Integer -> UrlOptions -> IO (Bool, Bool) -check url expected_size = handle <$$> exists url +check url expected_size = go <$$> exists url where - handle (False, _) = (False, False) - handle (True, Nothing) = (True, True) - handle (True, s) = case expected_size of + go (False, _) = (False, False) + go (True, Nothing) = (True, True) + go (True, s) = case expected_size of Just _ -> (True, expected_size == s) Nothing -> (True, True) diff --git a/Utility/WebApp.hs b/Utility/WebApp.hs index 0f3378a15a..6bcfce9196 100644 --- a/Utility/WebApp.hs +++ b/Utility/WebApp.hs @@ -38,10 +38,6 @@ import Data.Byteable #ifdef __ANDROID__ import Data.Endian #endif -#if defined(__ANDROID__) || defined (mingw32_HOST_OS) -#else -import Control.Exception (bracketOnError) -#endif localhost :: HostName localhost = "localhost" diff --git a/debian/control b/debian/control index 66d340e3cf..821629297e 100644 --- a/debian/control +++ b/debian/control @@ -26,7 +26,6 @@ Build-Depends: libghc-ifelse-dev, libghc-bloomfilter-dev, libghc-edit-distance-dev, - libghc-extensible-exceptions-dev, libghc-hinotify-dev [linux-any], libghc-stm-dev (>= 2.3), libghc-dbus-dev (>= 0.10.3) [linux-any], diff --git a/git-annex.cabal b/git-annex.cabal index 8f36bfe486..8dd42ee2f6 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -96,8 +96,7 @@ Executable git-annex Main-Is: git-annex.hs Build-Depends: MissingH, hslogger, directory, filepath, containers, utf8-string, network (>= 2.0), mtl (>= 2), - bytestring, old-locale, time, HTTP, - extensible-exceptions, dataenc, SHA, process, json, + bytestring, old-locale, time, HTTP, dataenc, SHA, process, json, base (>= 4.5 && < 4.9), monad-control, exceptions (>= 0.5), IfElse, text, QuickCheck >= 2.1, bloomfilter, edit-distance, process, SafeSemaphore, uuid, random, dlist, unix-compat, async, stm (>= 2.3), @@ -143,7 +142,7 @@ Executable git-annex if flag(WebDAV) Build-Depends: DAV (>= 0.8), - http-client, http-conduit, http-types, lifted-base, transformers + http-client, http-conduit, http-types CPP-Options: -DWITH_WEBDAV if flag(Assistant) && ! os(solaris) From d3d30d2bf3838fadaeec28ab203f82fb3dc7929a Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Thu, 7 Aug 2014 22:11:28 -0400 Subject: [PATCH 34/44] need transformers for Utility.Exception --- debian/control | 1 + git-annex.cabal | 4 ++-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/debian/control b/debian/control index 821629297e..46adc02cf0 100644 --- a/debian/control +++ b/debian/control @@ -18,6 +18,7 @@ Build-Depends: libghc-quickcheck2-dev, libghc-monad-control-dev (>= 0.3), libghc-exceptions-dev, + libghc-transformers-dev, libghc-unix-compat-dev, libghc-dlist-dev, libghc-uuid-dev, diff --git a/git-annex.cabal b/git-annex.cabal index 8dd42ee2f6..dc5503819d 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -97,7 +97,7 @@ Executable git-annex Build-Depends: MissingH, hslogger, directory, filepath, containers, utf8-string, network (>= 2.0), mtl (>= 2), bytestring, old-locale, time, HTTP, dataenc, SHA, process, json, - base (>= 4.5 && < 4.9), monad-control, exceptions (>= 0.5), + base (>= 4.5 && < 4.9), monad-control, exceptions (>= 0.5), transformers, IfElse, text, QuickCheck >= 2.1, bloomfilter, edit-distance, process, SafeSemaphore, uuid, random, dlist, unix-compat, async, stm (>= 2.3), data-default, case-insensitive @@ -188,7 +188,7 @@ Executable git-annex if flag(Webapp) Build-Depends: yesod, yesod-default, yesod-static, yesod-form, yesod-core, - http-types, transformers, wai, wai-extra, warp, warp-tls, + http-types, wai, wai-extra, warp, warp-tls, blaze-builder, crypto-api, hamlet, clientsession, template-haskell, data-default, aeson, path-pieces, shakespeare From 871b6cb88692b4f39c03eea7ccfee841cf5fb6d7 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 8 Aug 2014 12:50:51 -0400 Subject: [PATCH 35/44] DAV version turns out to be 1.0. --- debian/changelog | 2 +- git-annex.cabal | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/debian/changelog b/debian/changelog index 1b981c08d3..26c30451fc 100644 --- a/debian/changelog +++ b/debian/changelog @@ -19,7 +19,7 @@ git-annex (5.20140718) UNRELEASED; urgency=medium * WebDAV: Sped up by avoiding making multiple http connections when storing a file. * WebDAV: Avoid buffering whole file in memory when uploading. - * WebDAV: Dropped support for DAV before 0.8. + * WebDAV: Dropped support for DAV before 1.0. * testremote: New command to test uploads/downloads to a remote. * Dropping an object from a bup special remote now deletes the git branch for the object, although of course the object's content cannot be deleted diff --git a/git-annex.cabal b/git-annex.cabal index dc5503819d..be1cdbb968 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -141,7 +141,7 @@ Executable git-annex CPP-Options: -DWITH_S3 if flag(WebDAV) - Build-Depends: DAV (>= 0.8), + Build-Depends: DAV (>= 1.0), http-client, http-conduit, http-types CPP-Options: -DWITH_WEBDAV From fc17cf852ed31154c26801a365da290dd6417e27 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 8 Aug 2014 13:17:24 -0400 Subject: [PATCH 36/44] further break out legacy chunking code --- Remote/WebDAV.hs | 161 ++++++++++++++++++++++++++--------------------- 1 file changed, 89 insertions(+), 72 deletions(-) diff --git a/Remote/WebDAV.hs b/Remote/WebDAV.hs index 4caebaf214..e7c08c8006 100644 --- a/Remote/WebDAV.hs +++ b/Remote/WebDAV.hs @@ -104,21 +104,6 @@ store _ (Just dav) = httpStorer $ \k reqbody -> liftIO $ goDAV dav $ do finalizeStore (baseURL dav) tmp dest return True -storeLegacyChunked :: ChunkSize -> Key -> DavHandle -> L.ByteString -> IO Bool -storeLegacyChunked chunksize k dav b = - Legacy.storeChunks k tmp dest storer recorder finalizer - where - storehttp l b' = void $ goDAV dav $ do - maybe noop (void . mkColRecursive) (locationParent l) - inLocation l $ putContentM (contentType, b') - storer locs = Legacy.storeChunked chunksize locs storehttp b - recorder l s = storehttp l (L8.fromString s) - finalizer tmp' dest' = goDAV dav $ - finalizeStore (baseURL dav) tmp' (fromJust $ locationParent dest') - - tmp = keyTmpLocation k - dest = keyLocation k ++ keyFile k - finalizeStore :: URLString -> DavLocation -> DavLocation -> DAVT IO () finalizeStore baseurl tmp dest = do inLocation dest $ void $ safely $ delContentM @@ -130,17 +115,18 @@ retrieveCheap _ _ = return False retrieve :: ChunkConfig -> Maybe DavHandle -> Retriever retrieve _ Nothing = error "unable to connect" -retrieve chunkconfig (Just dav) = fileRetriever $ \d k p -> liftIO $ - withStoredFiles chunkconfig k dav onerr $ \locs -> do - Legacy.meteredWriteFileChunks p d locs $ \l -> do - mb <- goDAV dav $ safely $ - inLocation l $ - snd <$> getContentM - case mb of - Nothing -> onerr - Just b -> return b - where - onerr = error "download failed" +retrieve (LegacyChunks _) (Just dav) = retrieveLegacyChunked dav +retrieve _ (Just dav) = fileRetriever $ \d k p -> liftIO $ + meteredWriteFile p d =<< getDAV dav (keyLocation k ++ keyFile k) + +getDAV :: DavHandle -> DavLocation -> IO L.ByteString +getDAV dav l = do + mb <- goDAV dav $ safely $ + inLocation l $ + snd <$> getContentM + case mb of + Nothing -> error "download failed" + Just b -> return b remove :: Maybe DavHandle -> Remover remove Nothing _ = return False @@ -153,52 +139,14 @@ remove (Just dav) k = liftIO $ do checkKey :: Remote -> ChunkConfig -> Maybe DavHandle -> CheckPresent checkKey r _ Nothing _ = error $ name r ++ " not configured" -checkKey r chunkconfig (Just dav) k = either error id <$> go - where - go = do - showAction $ "checking " ++ name r - liftIO $ withStoredFiles chunkconfig k dav onerr check - - check [] = return $ Right True - check (l:ls) = do - v <- goDAV dav $ existsDAV l - if v == Right True - then check ls - else return v - - {- Failed to read the chunkcount file; see if it's missing, - - or if there's a problem accessing it, - - or perhaps this was an intermittent error. -} - onerr f = do - v <- goDAV dav $ existsDAV f - return $ if v == Right True - then Left $ "failed to read " ++ f - else v - -withStoredFiles - :: ChunkConfig - -> Key - -> DavHandle - -> (DavLocation -> IO a) - -> ([DavLocation] -> IO a) - -> IO a -withStoredFiles chunkconfig k dav onerr a = case chunkconfig of - LegacyChunks _ -> do - let chunkcount = keyloc ++ Legacy.chunkCount - v <- goDAV dav $ safely $ - inLocation chunkcount $ - snd <$> getContentM - case v of - Just s -> a $ Legacy.listChunks keyloc $ L8.toString s - Nothing -> do - chunks <- Legacy.probeChunks keyloc $ \f -> - (== Right True) <$> goDAV dav (existsDAV f) - if null chunks - then onerr chunkcount - else a chunks - _ -> a [keyloc] - where - keyloc = keyLocation k ++ keyFile k +checkKey r chunkconfig (Just dav) k = do + showAction $ "checking " ++ name r + case chunkconfig of + LegacyChunks _ -> checkKeyLegacyChunked dav k + _ -> do + v <- liftIO $ goDAV dav $ + existsDAV (keyLocation k ++ keyFile k) + either error return v configUrl :: Remote -> Maybe URLString configUrl r = fixup <$> M.lookup "url" (config r) @@ -349,3 +297,72 @@ prepDAV :: DavUser -> DavPass -> DAVT IO () prepDAV user pass = do setResponseTimeout Nothing -- disable default (5 second!) timeout setCreds user pass + +-- +-- Legacy chunking code, to be removed eventually. +-- + +storeLegacyChunked :: ChunkSize -> Key -> DavHandle -> L.ByteString -> IO Bool +storeLegacyChunked chunksize k dav b = + Legacy.storeChunks k tmp dest storer recorder finalizer + where + storehttp l b' = void $ goDAV dav $ do + maybe noop (void . mkColRecursive) (locationParent l) + inLocation l $ putContentM (contentType, b') + storer locs = Legacy.storeChunked chunksize locs storehttp b + recorder l s = storehttp l (L8.fromString s) + finalizer tmp' dest' = goDAV dav $ + finalizeStore (baseURL dav) tmp' (fromJust $ locationParent dest') + + tmp = keyTmpLocation k + dest = keyLocation k ++ keyFile k + +retrieveLegacyChunked :: DavHandle -> Retriever +retrieveLegacyChunked dav = fileRetriever $ \d k p -> liftIO $ + withStoredFilesLegacyChunked k dav onerr $ \locs -> + Legacy.meteredWriteFileChunks p d locs $ + getDAV dav + where + onerr = error "download failed" + +checkKeyLegacyChunked :: DavHandle -> CheckPresent +checkKeyLegacyChunked dav k = liftIO $ + either error id <$> withStoredFilesLegacyChunked k dav onerr check + where + check [] = return $ Right True + check (l:ls) = do + v <- goDAV dav $ existsDAV l + if v == Right True + then check ls + else return v + + {- Failed to read the chunkcount file; see if it's missing, + - or if there's a problem accessing it, + - or perhaps this was an intermittent error. -} + onerr f = do + v <- goDAV dav $ existsDAV f + return $ if v == Right True + then Left $ "failed to read " ++ f + else v + +withStoredFilesLegacyChunked + :: Key + -> DavHandle + -> (DavLocation -> IO a) + -> ([DavLocation] -> IO a) + -> IO a +withStoredFilesLegacyChunked k dav onerr a = do + let chunkcount = keyloc ++ Legacy.chunkCount + v <- goDAV dav $ safely $ + inLocation chunkcount $ + snd <$> getContentM + case v of + Just s -> a $ Legacy.listChunks keyloc $ L8.toString s + Nothing -> do + chunks <- Legacy.probeChunks keyloc $ \f -> + (== Right True) <$> goDAV dav (existsDAV f) + if null chunks + then onerr chunkcount + else a chunks + where + keyloc = keyLocation k ++ keyFile k From c3f85124757a10791adfe2eca4a201fa2649b7a6 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 8 Aug 2014 13:40:55 -0400 Subject: [PATCH 37/44] WebDAV: Avoid buffering whole file in memory when downloading. httpBodyRetriever will later also be used by S3 This commit was sponsored by Ethan Aubin. --- Remote/Helper/Http.hs | 20 +++++++++++++++++--- Remote/WebDAV.hs | 30 +++++++++++++----------------- Remote/WebDAV/DavLocation.hs | 7 +++++-- debian/changelog | 3 ++- 4 files changed, 37 insertions(+), 23 deletions(-) diff --git a/Remote/Helper/Http.hs b/Remote/Helper/Http.hs index 945e5cd991..d4882b8c8f 100644 --- a/Remote/Helper/Http.hs +++ b/Remote/Helper/Http.hs @@ -11,7 +11,7 @@ import Common.Annex import Types.StoreRetrieve import Utility.Metered import Remote.Helper.Special -import Network.HTTP.Client (RequestBody(..)) +import Network.HTTP.Client (RequestBody(..), Response, responseBody, BodyReader) import qualified Data.ByteString.Lazy as L import qualified Data.ByteString as S @@ -35,5 +35,19 @@ httpStorer a = fileStorer $ \k f m -> do pop [] = ([], S.empty) pop (c:cs) = (cs, c) ---httpRetriever :: (Key -> Annex Response) -> Retriever ---httpRetriever a = byteRetriever $ \k sink +-- Reads the http body and stores it to the specified file, updating the +-- meter as it goes. +httpBodyRetriever :: FilePath -> MeterUpdate -> Response BodyReader -> IO () +httpBodyRetriever dest meterupdate resp = + bracket (openBinaryFile dest WriteMode) hClose (go zeroBytesProcessed) + where + reader = responseBody resp + go sofar h = do + b <- reader + if S.null b + then return () + else do + let sofar' = addBytesProcessed sofar $ S.length b + S.hPut h b + meterupdate sofar' + go sofar' h diff --git a/Remote/WebDAV.hs b/Remote/WebDAV.hs index e7c08c8006..2c621b6333 100644 --- a/Remote/WebDAV.hs +++ b/Remote/WebDAV.hs @@ -97,7 +97,7 @@ store (LegacyChunks chunksize) (Just dav) = fileStorer $ \k f p -> liftIO $ withMeteredFile f p $ storeLegacyChunked chunksize k dav store _ (Just dav) = httpStorer $ \k reqbody -> liftIO $ goDAV dav $ do let tmp = keyTmpLocation k - let dest = keyLocation k ++ keyFile k + let dest = keyLocation k void $ mkColRecursive tmpDir inLocation tmp $ putContentM' (contentType, reqbody) @@ -117,16 +117,10 @@ retrieve :: ChunkConfig -> Maybe DavHandle -> Retriever retrieve _ Nothing = error "unable to connect" retrieve (LegacyChunks _) (Just dav) = retrieveLegacyChunked dav retrieve _ (Just dav) = fileRetriever $ \d k p -> liftIO $ - meteredWriteFile p d =<< getDAV dav (keyLocation k ++ keyFile k) - -getDAV :: DavHandle -> DavLocation -> IO L.ByteString -getDAV dav l = do - mb <- goDAV dav $ safely $ - inLocation l $ - snd <$> getContentM - case mb of - Nothing -> error "download failed" - Just b -> return b + goDAV dav $ + inLocation (keyLocation k) $ + withContentM $ + httpBodyRetriever d p remove :: Maybe DavHandle -> Remover remove Nothing _ = return False @@ -134,7 +128,7 @@ remove (Just dav) k = liftIO $ do -- Delete the key's whole directory, including any -- legacy chunked files, etc, in a single action. ret <- goDAV dav $ safely $ - inLocation (keyLocation k) delContentM + inLocation (keyDir k) delContentM return (isJust ret) checkKey :: Remote -> ChunkConfig -> Maybe DavHandle -> CheckPresent @@ -145,7 +139,7 @@ checkKey r chunkconfig (Just dav) k = do LegacyChunks _ -> checkKeyLegacyChunked dav k _ -> do v <- liftIO $ goDAV dav $ - existsDAV (keyLocation k ++ keyFile k) + existsDAV (keyLocation k) either error return v configUrl :: Remote -> Maybe URLString @@ -315,13 +309,15 @@ storeLegacyChunked chunksize k dav b = finalizeStore (baseURL dav) tmp' (fromJust $ locationParent dest') tmp = keyTmpLocation k - dest = keyLocation k ++ keyFile k + dest = keyLocation k retrieveLegacyChunked :: DavHandle -> Retriever retrieveLegacyChunked dav = fileRetriever $ \d k p -> liftIO $ withStoredFilesLegacyChunked k dav onerr $ \locs -> - Legacy.meteredWriteFileChunks p d locs $ - getDAV dav + Legacy.meteredWriteFileChunks p d locs $ \l -> + goDAV dav $ + inLocation l $ + snd <$> getContentM where onerr = error "download failed" @@ -365,4 +361,4 @@ withStoredFilesLegacyChunked k dav onerr a = do then onerr chunkcount else a chunks where - keyloc = keyLocation k ++ keyFile k + keyloc = keyLocation k diff --git a/Remote/WebDAV/DavLocation.hs b/Remote/WebDAV/DavLocation.hs index 3b52f3a64c..33c3aa0790 100644 --- a/Remote/WebDAV/DavLocation.hs +++ b/Remote/WebDAV/DavLocation.hs @@ -29,8 +29,8 @@ inLocation :: (MonadIO m) => DavLocation -> DAVT m a -> DAVT m a inLocation d = inDAVLocation ( d) {- The directory where files(s) for a key are stored. -} -keyLocation :: Key -> DavLocation -keyLocation k = addTrailingPathSeparator $ hashdir keyFile k +keyDir :: Key -> DavLocation +keyDir k = addTrailingPathSeparator $ hashdir keyFile k where #ifndef mingw32_HOST_OS hashdir = hashDirLower k @@ -38,6 +38,9 @@ keyLocation k = addTrailingPathSeparator $ hashdir keyFile k hashdir = replace "\\" "/" (hashDirLower k) #endif +keyLocation :: Key -> DavLocation +keyLocation k = keyDir k ++ keyFile k + {- Where we store temporary data for a key as it's being uploaded. -} keyTmpLocation :: Key -> DavLocation keyTmpLocation = addTrailingPathSeparator . tmpLocation . keyFile diff --git a/debian/changelog b/debian/changelog index 26c30451fc..3a8ab302e1 100644 --- a/debian/changelog +++ b/debian/changelog @@ -18,7 +18,8 @@ git-annex (5.20140718) UNRELEASED; urgency=medium * Display exception message when a transfer fails due to an exception. * WebDAV: Sped up by avoiding making multiple http connections when storing a file. - * WebDAV: Avoid buffering whole file in memory when uploading. + * WebDAV: Avoid buffering whole file in memory when uploading and + downloading. * WebDAV: Dropped support for DAV before 1.0. * testremote: New command to test uploads/downloads to a remote. * Dropping an object from a bup special remote now deletes the git branch From 2d344edf60271e233a42a1444a8eaf7f79266bed Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 8 Aug 2014 13:47:44 -0400 Subject: [PATCH 38/44] increave dav build-dep --- debian/control | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/debian/control b/debian/control index 46adc02cf0..522b7c5cce 100644 --- a/debian/control +++ b/debian/control @@ -14,7 +14,7 @@ Build-Depends: libghc-dataenc-dev, libghc-utf8-string-dev, libghc-hs3-dev (>= 0.5.6), - libghc-dav-dev (>= 0.8) [amd64 i386 kfreebsd-amd64 kfreebsd-i386 powerpc], + libghc-dav-dev (>= 1.0) [amd64 i386 kfreebsd-amd64 kfreebsd-i386 powerpc], libghc-quickcheck2-dev, libghc-monad-control-dev (>= 0.3), libghc-exceptions-dev, From ec11e0b89ad96f2eda0d213e46a342a1c7e370cd Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 8 Aug 2014 13:55:09 -0400 Subject: [PATCH 39/44] fix build warning --- Assistant/Threads/NetWatcher.hs | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/Assistant/Threads/NetWatcher.hs b/Assistant/Threads/NetWatcher.hs index f8c456aacf..09cddd3245 100644 --- a/Assistant/Threads/NetWatcher.hs +++ b/Assistant/Threads/NetWatcher.hs @@ -117,7 +117,7 @@ listenNMConnections client setconnected = #else listen client matcher #endif - $ \event -> mapM_ handle + $ \event -> mapM_ handleevent (map dictionaryItems $ mapMaybe fromVariant $ signalBody event) where matcher = matchAny @@ -128,7 +128,7 @@ listenNMConnections client setconnected = nm_activatingconnection_key = toVariant ("ActivatingConnection" :: String) noconnections = Just $ toVariant $ toVariant ([] :: [ObjectPath]) rootconnection = Just $ toVariant $ toVariant $ objectPath_ "/" - handle m + handleevent m | lookup nm_active_connections_key m == noconnections = setconnected False | lookup nm_activatingconnection_key m == rootconnection = @@ -150,7 +150,7 @@ listenWicdConnections client setconnected = do match connmatcher $ \event -> when (any (== wicd_success) (signalBody event)) $ setconnected True - match statusmatcher $ \event -> handle (signalBody event) + match statusmatcher $ \event -> handleevent (signalBody event) where connmatcher = matchAny { matchInterface = Just "org.wicd.daemon" @@ -162,7 +162,7 @@ listenWicdConnections client setconnected = do } wicd_success = toVariant ("success" :: String) wicd_disconnected = toVariant [toVariant ("" :: String)] - handle status + handleevent status | any (== wicd_disconnected) status = setconnected False | otherwise = noop match matcher a = From 6cb9e5c32f46d0d7bcfdc96c7d05a93c08e9d20a Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 8 Aug 2014 14:19:08 -0400 Subject: [PATCH 40/44] show missing url= parameter error sooner --- Remote/WebDAV.hs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/Remote/WebDAV.hs b/Remote/WebDAV.hs index 2c621b6333..af3af79020 100644 --- a/Remote/WebDAV.hs +++ b/Remote/WebDAV.hs @@ -77,8 +77,9 @@ gen r u c gc = new <$> remoteCost gc expensiveRemoteCost webdavSetup :: Maybe UUID -> Maybe CredPair -> RemoteConfig -> Annex (RemoteConfig, UUID) webdavSetup mu mcreds c = do u <- maybe (liftIO genUUID) return mu - let url = fromMaybe (error "Specify url=") $ - M.lookup "url" c + url <- case M.lookup "url" c of + Nothing -> error "Specify url=" + Just url -> return url c' <- encryptionSetup c creds <- maybe (getCreds c' u) (return . Just) mcreds testDav url creds From 0260ee43e63c98dc43a6c47603f702d2b0449b17 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 8 Aug 2014 14:57:05 -0400 Subject: [PATCH 41/44] fix removeKey when not present --- Remote/WebDAV.hs | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/Remote/WebDAV.hs b/Remote/WebDAV.hs index af3af79020..d344e0a745 100644 --- a/Remote/WebDAV.hs +++ b/Remote/WebDAV.hs @@ -128,9 +128,16 @@ remove Nothing _ = return False remove (Just dav) k = liftIO $ do -- Delete the key's whole directory, including any -- legacy chunked files, etc, in a single action. - ret <- goDAV dav $ safely $ - inLocation (keyDir k) delContentM - return (isJust ret) + let d = keyDir k + goDAV dav $ do + v <- safely $ inLocation d delContentM + case v of + Just _ -> return True + Nothing -> do + v' <- existsDAV d + case v' of + Right False -> return True + _ -> return False checkKey :: Remote -> ChunkConfig -> Maybe DavHandle -> CheckPresent checkKey r _ Nothing _ = error $ name r ++ " not configured" From 1f8c170c953cefa8eccb6c3ba50b1b05494060b1 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 8 Aug 2014 17:17:23 -0400 Subject: [PATCH 42/44] http-conduit not used for dav --- git-annex.cabal | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/git-annex.cabal b/git-annex.cabal index be1cdbb968..5154b27dd8 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -142,7 +142,7 @@ Executable git-annex if flag(WebDAV) Build-Depends: DAV (>= 1.0), - http-client, http-conduit, http-types + http-client, http-types CPP-Options: -DWITH_WEBDAV if flag(Assistant) && ! os(solaris) From 1dd3232e8e90b2aa49b670284eb90f7d500eae57 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 8 Aug 2014 17:17:36 -0400 Subject: [PATCH 43/44] check for 200 response --- Remote/Helper/Http.hs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/Remote/Helper/Http.hs b/Remote/Helper/Http.hs index d4882b8c8f..f1d576d1c9 100644 --- a/Remote/Helper/Http.hs +++ b/Remote/Helper/Http.hs @@ -11,7 +11,8 @@ import Common.Annex import Types.StoreRetrieve import Utility.Metered import Remote.Helper.Special -import Network.HTTP.Client (RequestBody(..), Response, responseBody, BodyReader) +import Network.HTTP.Client (RequestBody(..), Response, responseStatus, responseBody, BodyReader) +import Network.HTTP.Types import qualified Data.ByteString.Lazy as L import qualified Data.ByteString as S @@ -38,8 +39,9 @@ httpStorer a = fileStorer $ \k f m -> do -- Reads the http body and stores it to the specified file, updating the -- meter as it goes. httpBodyRetriever :: FilePath -> MeterUpdate -> Response BodyReader -> IO () -httpBodyRetriever dest meterupdate resp = - bracket (openBinaryFile dest WriteMode) hClose (go zeroBytesProcessed) +httpBodyRetriever dest meterupdate resp + | responseStatus resp /= ok200 = error $ show $ responseStatus resp + | otherwise = bracket (openBinaryFile dest WriteMode) hClose (go zeroBytesProcessed) where reader = responseBody resp go sofar h = do From 4f1ba9a23d6ba36501deeeac3994d3c1681454a0 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 8 Aug 2014 19:18:08 -0400 Subject: [PATCH 44/44] fix checkPresent error handling for non-present local git repos guardUsable r (error "foo") *returned* an error, rather than throwing it --- Remote/GCrypt.hs | 6 +++--- Remote/Git.hs | 23 ++++++++++++----------- Remote/Helper/Git.hs | 6 +++--- 3 files changed, 18 insertions(+), 17 deletions(-) diff --git a/Remote/GCrypt.hs b/Remote/GCrypt.hs index 8891977f73..5edb3d0227 100644 --- a/Remote/GCrypt.hs +++ b/Remote/GCrypt.hs @@ -306,7 +306,7 @@ setGcryptEncryption c remotename = do store :: Remote -> Remote.Rsync.RsyncOpts -> Storer store r rsyncopts | not $ Git.repoIsUrl (repo r) = - byteStorer $ \k b p -> guardUsable (repo r) False $ liftIO $ do + byteStorer $ \k b p -> guardUsable (repo r) (return False) $ liftIO $ do let tmpdir = Git.repoLocation (repo r) "tmp" keyFile k void $ tryIO $ createDirectoryIfMissing True tmpdir let tmpf = tmpdir keyFile k @@ -323,7 +323,7 @@ store r rsyncopts retrieve :: Remote -> Remote.Rsync.RsyncOpts -> Retriever retrieve r rsyncopts | not $ Git.repoIsUrl (repo r) = byteRetriever $ \k sink -> - guardUsable (repo r) False $ + guardUsable (repo r) (return False) $ sink =<< liftIO (L.readFile $ gCryptLocation r k) | Git.repoIsSsh (repo r) = if isShell r then fileRetriever $ \f k p -> @@ -335,7 +335,7 @@ retrieve r rsyncopts remove :: Remote -> Remote.Rsync.RsyncOpts -> Remover remove r rsyncopts k - | not $ Git.repoIsUrl (repo r) = guardUsable (repo r) False $ + | not $ Git.repoIsUrl (repo r) = guardUsable (repo r) (return False) $ liftIO $ Remote.Directory.removeDirGeneric (Git.repoLocation (repo r)) (parentDir (gCryptLocation r k)) | Git.repoIsSsh (repo r) = shellOrRsync r removeshell removersync | otherwise = unsupportedUrl diff --git a/Remote/Git.hs b/Remote/Git.hs index 34c60d98fe..20955ff5ba 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -319,14 +319,15 @@ keyUrls r key = map tourl locs' dropKey :: Remote -> Key -> Annex Bool dropKey r key | not $ Git.repoIsUrl (repo r) = - guardUsable (repo r) False $ commitOnCleanup r $ onLocal r $ do - ensureInitialized - whenM (Annex.Content.inAnnex key) $ do - Annex.Content.lockContent key $ - Annex.Content.removeAnnex key - logStatus key InfoMissing - Annex.Content.saveState True - return True + guardUsable (repo r) (return False) $ + commitOnCleanup r $ onLocal r $ do + ensureInitialized + whenM (Annex.Content.inAnnex key) $ do + Annex.Content.lockContent key $ + Annex.Content.removeAnnex key + logStatus key InfoMissing + Annex.Content.saveState True + return True | Git.repoIsHttp (repo r) = error "dropping from http remote not supported" | otherwise = commitOnCleanup r $ Ssh.dropKey (repo r) key @@ -335,7 +336,7 @@ copyFromRemote :: Remote -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> copyFromRemote r key file dest _p = copyFromRemote' r key file dest copyFromRemote' :: Remote -> Key -> AssociatedFile -> FilePath -> Annex Bool copyFromRemote' r key file dest - | not $ Git.repoIsUrl (repo r) = guardUsable (repo r) False $ do + | not $ Git.repoIsUrl (repo r) = guardUsable (repo r) (return False) $ do params <- Ssh.rsyncParams r Download u <- getUUID -- run copy from perspective of remote @@ -409,7 +410,7 @@ copyFromRemote' r key file dest copyFromRemoteCheap :: Remote -> Key -> FilePath -> Annex Bool #ifndef mingw32_HOST_OS copyFromRemoteCheap r key file - | not $ Git.repoIsUrl (repo r) = guardUsable (repo r) False $ do + | not $ Git.repoIsUrl (repo r) = guardUsable (repo r) (return False) $ do loc <- liftIO $ gitAnnexLocation key (repo r) $ fromJust $ remoteGitConfig $ gitconfig r liftIO $ catchBoolIO $ createSymbolicLink loc file >> return True @@ -427,7 +428,7 @@ copyFromRemoteCheap _ _ _ = return False copyToRemote :: Remote -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool copyToRemote r key file p | not $ Git.repoIsUrl (repo r) = - guardUsable (repo r) False $ commitOnCleanup r $ + guardUsable (repo r) (return False) $ commitOnCleanup r $ copylocal =<< Annex.Content.prepSendAnnex key | Git.repoIsSsh (repo r) = commitOnCleanup r $ Annex.Content.sendAnnex key noop $ \object -> do diff --git a/Remote/Helper/Git.hs b/Remote/Helper/Git.hs index d76cb2ee78..b405fd3584 100644 --- a/Remote/Helper/Git.hs +++ b/Remote/Helper/Git.hs @@ -26,7 +26,7 @@ availabilityCalc r {- Avoids performing an action on a local repository that's not usable. - Does not check that the repository is still available on disk. -} -guardUsable :: Git.Repo -> a -> Annex a -> Annex a -guardUsable r onerr a - | Git.repoIsLocalUnknown r = return onerr +guardUsable :: Git.Repo -> Annex a -> Annex a -> Annex a +guardUsable r fallback a + | Git.repoIsLocalUnknown r = fallback | otherwise = a