better streaming while encrypting/decrypting

Both the directory and webdav special remotes used to have to buffer
the whole file contents before it could be decrypted, as they read
from chunks. Now the chunks are streamed through gpg with no buffering.
This commit is contained in:
Joey Hess 2012-11-18 15:27:44 -04:00
parent 9ac7473c67
commit 81379bb29c
11 changed files with 79 additions and 75 deletions

View file

@ -40,9 +40,9 @@ setRemoteCredPair c storage = go =<< getRemoteCredPair c storage
mcipher <- remoteCipher c
case (mcipher, credPairRemoteKey storage) of
(Just cipher, Just key) | isTrustedCipher c -> do
s <- liftIO $ withEncryptedContent cipher
(return $ L.pack $ encodeCredPair creds)
(return . L.unpack)
s <- liftIO $ encrypt cipher
(feedBytes $ L.pack $ encodeCredPair creds)
(readBytes $ return . L.unpack)
return $ M.insert key (toB64 s) c
_ -> do
writeCacheCredPair creds storage
@ -62,7 +62,9 @@ getRemoteCredPair c storage = maybe fromcache (return . Just) =<< fromenv
mcipher <- remoteCipher c
case (M.lookup key c, mcipher) of
(Just enccreds, Just cipher) -> do
creds <- liftIO $ decrypt enccreds cipher
creds <- liftIO $ decrypt cipher
(feedBytes $ L.pack $ fromB64 enccreds)
(readBytes $ return . L.unpack)
case decodeCredPair creds of
Just credpair -> do
writeCacheCredPair credpair storage
@ -70,9 +72,6 @@ getRemoteCredPair c storage = maybe fromcache (return . Just) =<< fromenv
_ -> do error $ "bad " ++ key
_ -> return Nothing
Nothing -> return Nothing
decrypt enccreds cipher = withDecryptedContent cipher
(return $ L.pack $ fromB64 enccreds)
(return . L.unpack)
{- Gets a CredPair from the environment. -}
getEnvCredPair :: CredPairStorage -> IO (Maybe CredPair)

View file

@ -18,10 +18,11 @@ module Crypto (
describeCipher,
decryptCipher,
encryptKey,
withEncryptedHandle,
withDecryptedHandle,
withEncryptedContent,
withDecryptedContent,
feedFile,
feedBytes,
readBytes,
encrypt,
decrypt,
prop_hmacWithCipher_sane
) where
@ -90,10 +91,9 @@ describeCipher (EncryptedCipher _ (KeyIds ks)) =
encryptCipher :: Cipher -> KeyIds -> IO StorableCipher
encryptCipher (Cipher c) (KeyIds ks) = do
let ks' = nub $ sort ks -- gpg complains about duplicate recipient keyids
encipher <- Gpg.pipeStrict (encrypt++recipients ks') c
encipher <- Gpg.pipeStrict ([ Params "--encrypt" ] ++ recipients ks') c
return $ EncryptedCipher encipher (KeyIds ks')
where
encrypt = [ Params "--encrypt" ]
recipients l = force_recipients :
concatMap (\k -> [Param "--recipient", Param k]) l
-- Force gpg to only encrypt to the specified
@ -103,9 +103,7 @@ encryptCipher (Cipher c) (KeyIds ks) = do
{- Decrypting an EncryptedCipher is expensive; the Cipher should be cached. -}
decryptCipher :: StorableCipher -> IO Cipher
decryptCipher (SharedCipher t) = return $ Cipher t
decryptCipher (EncryptedCipher t _) = Cipher <$> Gpg.pipeStrict decrypt t
where
decrypt = [ Param "--decrypt" ]
decryptCipher (EncryptedCipher t _) = Cipher <$> Gpg.pipeStrict [ Param "--decrypt" ] t
{- Generates an encrypted form of a Key. The encryption does not need to be
- reversable, nor does it need to be the same type of encryption used
@ -118,31 +116,27 @@ encryptKey c k = Key
, keyMtime = Nothing -- to avoid leaking data
}
{- Runs an action, passing it a handle from which it can
- stream encrypted content. -}
withEncryptedHandle :: Cipher -> IO L.ByteString -> (Handle -> IO a) -> IO a
withEncryptedHandle = Gpg.passphraseHandle [Params "--symmetric --force-mdc"] . cipherPassphrase
type Feeder = Handle -> IO ()
type Reader a = Handle -> IO a
{- Runs an action, passing it a handle from which it can
- stream decrypted content. -}
withDecryptedHandle :: Cipher -> IO L.ByteString -> (Handle -> IO a) -> IO a
withDecryptedHandle = Gpg.passphraseHandle [Param "--decrypt"] . cipherPassphrase
feedFile :: FilePath -> Feeder
feedFile f h = L.hPut h =<< L.readFile f
{- Streams encrypted content to an action. -}
withEncryptedContent :: Cipher -> IO L.ByteString -> (L.ByteString -> IO a) -> IO a
withEncryptedContent = pass withEncryptedHandle
feedBytes :: L.ByteString -> Feeder
feedBytes = flip L.hPut
{- Streams decrypted content to an action. -}
withDecryptedContent :: Cipher -> IO L.ByteString -> (L.ByteString -> IO a) -> IO a
withDecryptedContent = pass withDecryptedHandle
readBytes :: (L.ByteString -> IO a) -> Reader a
readBytes a h = L.hGetContents h >>= a
pass
:: (Cipher -> IO L.ByteString -> (Handle -> IO a) -> IO a)
-> Cipher
-> IO L.ByteString
-> (L.ByteString -> IO a)
-> IO a
pass to n s a = to n s $ a <=< L.hGetContents
{- Runs a Feeder action, that generates content that is encrypted with the
- Cipher, and read by the Reader action. -}
encrypt :: Cipher -> Feeder -> Reader a -> IO a
encrypt = Gpg.feedRead [Params "--symmetric --force-mdc"] . cipherPassphrase
{- Runs a Feeder action, that generates content that is decrypted with the
- Cipher, and read by the Reader action. -}
decrypt :: Cipher -> Feeder -> Reader a -> IO a
decrypt = Gpg.feedRead [Param "--decrypt"] . cipherPassphrase
hmacWithCipher :: Cipher -> String -> String
hmacWithCipher c = hmacWithCipher' (cipherHmac c)

View file

@ -125,7 +125,7 @@ storeEncrypted r buprepo (cipher, enck) k _p = do
src <- inRepo $ gitAnnexLocation k
params <- bupSplitParams r buprepo enck []
liftIO $ catchBoolIO $
withEncryptedHandle cipher (L.readFile src) $ \h ->
encrypt cipher (feedFile src) $ \h ->
pipeBup params (Just h) Nothing
retrieve :: BupRepo -> Key -> AssociatedFile -> FilePath -> Annex Bool
@ -141,7 +141,8 @@ retrieveCheap _ _ _ = return False
retrieveEncrypted :: BupRepo -> (Cipher, Key) -> Key -> FilePath -> Annex Bool
retrieveEncrypted buprepo (cipher, enck) _ f = liftIO $ catchBoolIO $
withHandle StdoutHandle createProcessSuccess p $ \h -> do
withDecryptedContent cipher (L.hGetContents h) $ L.writeFile f
decrypt cipher (\toh -> L.hPut toh =<< L.hGetContents h) $
readBytes $ L.writeFile f
return True
where
params = bupParams "join" buprepo [Param $ bupRef enck]

View file

@ -118,13 +118,13 @@ storeEncrypted d chunksize (cipher, enck) k p = do
src <- inRepo $ gitAnnexLocation k
metered (Just p) k $ \meterupdate ->
storeHelper d chunksize enck $ \dests ->
withEncryptedContent cipher (L.readFile src) $ \s ->
encrypt cipher (feedFile src) $ readBytes $ \b ->
case chunksize of
Nothing -> do
let dest = Prelude.head dests
meteredWriteFile meterupdate dest s
meteredWriteFile meterupdate dest b
return [dest]
Just _ -> storeSplit meterupdate chunksize dests s
Just _ -> storeSplit meterupdate chunksize dests b
{- Splits a ByteString into chunks and writes to dests, obeying configured
- chunk size (not to be confused with the L.ByteString chunk size).
@ -192,9 +192,11 @@ retrieveEncrypted :: FilePath -> ChunkSize -> (Cipher, Key) -> Key -> FilePath -
retrieveEncrypted d chunksize (cipher, enck) k f = metered Nothing k $ \meterupdate ->
liftIO $ withStoredFiles chunksize d enck $ \files ->
catchBoolIO $ do
withDecryptedContent cipher (L.concat <$> mapM L.readFile files) $
meteredWriteFile meterupdate f
decrypt cipher (feeder files) $
readBytes $ meteredWriteFile meterupdate f
return True
where
feeder files h = forM_ files $ \file -> L.hPut h =<< L.readFile file
retrieveCheap :: FilePath -> ChunkSize -> Key -> FilePath -> Annex Bool
retrieveCheap _ (Just _) _ _ = return False -- no cheap retrieval for chunks

View file

@ -81,12 +81,11 @@ remoteCipher c = go $ extractCipher c
cache <- Annex.getState Annex.ciphers
case M.lookup encipher cache of
Just cipher -> return $ Just cipher
Nothing -> decrypt encipher cache
decrypt encipher cache = do
showNote "gpg"
cipher <- liftIO $ decryptCipher encipher
Annex.changeState (\s -> s { Annex.ciphers = M.insert encipher cipher cache })
return $ Just cipher
Nothing -> do
showNote "gpg"
cipher <- liftIO $ decryptCipher encipher
Annex.changeState (\s -> s { Annex.ciphers = M.insert encipher cipher cache })
return $ Just cipher
{- Checks if there is a trusted (non-shared) cipher. -}
isTrustedCipher :: RemoteConfig -> Bool
@ -96,9 +95,9 @@ isTrustedCipher c =
{- Gets encryption Cipher, and encrypted version of Key. -}
cipherKey :: Maybe RemoteConfig -> Key -> Annex (Maybe (Cipher, Key))
cipherKey Nothing _ = return Nothing
cipherKey (Just c) k = maybe Nothing encrypt <$> remoteCipher c
cipherKey (Just c) k = maybe Nothing make <$> remoteCipher c
where
encrypt ciphertext = Just (ciphertext, encryptKey ciphertext k)
make ciphertext = Just (ciphertext, encryptKey ciphertext k)
{- Stores an StorableCipher in a remote's configuration. -}
storeCipher :: RemoteConfig -> StorableCipher -> RemoteConfig

View file

@ -108,7 +108,8 @@ store h k _f _p = do
storeEncrypted :: String -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool
storeEncrypted h (cipher, enck) k _p = withTmp enck $ \tmp -> do
src <- inRepo $ gitAnnexLocation k
liftIO $ withEncryptedContent cipher (L.readFile src) $ L.writeFile tmp
liftIO $ encrypt cipher (feedFile src) $
readBytes $ L.writeFile tmp
runHook h "store" enck (Just tmp) $ return True
retrieve :: String -> Key -> AssociatedFile -> FilePath -> Annex Bool
@ -120,7 +121,8 @@ retrieveCheap _ _ _ = return False
retrieveEncrypted :: String -> (Cipher, Key) -> Key -> FilePath -> Annex Bool
retrieveEncrypted h (cipher, enck) _ f = withTmp enck $ \tmp ->
runHook h "retrieve" enck (Just tmp) $ liftIO $ catchBoolIO $ do
withDecryptedContent cipher (L.readFile tmp) $ L.writeFile f
decrypt cipher (feedFile tmp) $
readBytes $ L.writeFile f
return True
remove :: String -> Key -> Annex Bool

View file

@ -110,7 +110,8 @@ store o k _f p = rsyncSend o p k <=< inRepo $ gitAnnexLocation k
storeEncrypted :: RsyncOpts -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool
storeEncrypted o (cipher, enck) k p = withTmp enck $ \tmp -> do
src <- inRepo $ gitAnnexLocation k
liftIO $ withEncryptedContent cipher (L.readFile src) $ L.writeFile tmp
liftIO $ decrypt cipher (feedFile src) $
readBytes $ L.writeFile tmp
rsyncSend o p enck tmp
retrieve :: RsyncOpts -> Key -> AssociatedFile -> FilePath -> Annex Bool
@ -128,7 +129,8 @@ retrieveEncrypted :: RsyncOpts -> (Cipher, Key) -> Key -> FilePath -> Annex Bool
retrieveEncrypted o (cipher, enck) _ f = withTmp enck $ \tmp -> do
ifM (retrieve o enck undefined tmp)
( liftIO $ catchBoolIO $ do
withDecryptedContent cipher (L.readFile tmp) $ L.writeFile f
decrypt cipher (feedFile tmp) $
readBytes $ L.writeFile f
return True
, return False
)

View file

@ -122,7 +122,8 @@ storeEncrypted r (cipher, enck) k _p = s3Action r False $ \(conn, bucket) ->
-- (An alternative would be chunking to to a constant size.)
withTmp enck $ \tmp -> do
f <- inRepo $ gitAnnexLocation k
liftIO $ withEncryptedContent cipher (L.readFile f) $ L.writeFile tmp
liftIO $ encrypt cipher (feedFile f) $
readBytes $ L.writeFile tmp
res <- liftIO $ storeHelper (conn, bucket) r enck tmp
s3Bool res
@ -162,8 +163,8 @@ retrieveEncrypted :: Remote -> (Cipher, Key) -> Key -> FilePath -> Annex Bool
retrieveEncrypted r (cipher, enck) _ f = s3Action r False $ \(conn, bucket) -> do
res <- liftIO $ getObject conn $ bucketKey r bucket enck
case res of
Right o -> liftIO $
withDecryptedContent cipher (return $ obj_data o) $ \content -> do
Right o -> liftIO $ decrypt cipher (feedBytes $ obj_data o) $
readBytes $ \content -> do
L.writeFile f content
return True
Left e -> s3Warning e

View file

@ -93,8 +93,8 @@ storeEncrypted :: Remote -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool
storeEncrypted r (cipher, enck) k _p = davAction r False $ \(baseurl, user, pass) -> do
let url = davLocation baseurl enck
f <- inRepo $ gitAnnexLocation k
liftIO $ withEncryptedContent cipher (L.readFile f) $
storeHelper r url user pass
liftIO $ encrypt cipher (feedFile f) $
readBytes $ storeHelper r url user pass
storeHelper :: Remote -> DavUrl -> DavUser -> DavPass -> L.ByteString -> IO Bool
storeHelper r urlbase user pass b = catchBoolIO $ do
@ -133,18 +133,20 @@ retrieveEncrypted :: Remote -> (Cipher, Key) -> Key -> FilePath -> Annex Bool
retrieveEncrypted r (cipher, enck) k d = metered Nothing k $ \meterupdate ->
davAction r False $ \(baseurl, user, pass) -> liftIO $ catchBoolIO $
withStoredFiles r enck baseurl user pass onerr $ \urls -> do
withDecryptedContent cipher (L.concat <$> feeder user pass urls []) $
meteredWriteFile meterupdate d
decrypt cipher (feeder user pass urls) $
readBytes $ meteredWriteFile meterupdate d
return True
where
onerr _ = return False
feeder _ _ [] c = return $ reverse c
feeder user pass (url:urls) c = do
feeder _ _ [] _ = noop
feeder user pass (url:urls) h = do
mb <- davGetUrlContent url user pass
case mb of
Nothing -> throwIO "download failed"
Just b -> feeder user pass urls (b:c)
Just b -> do
L.hPut h b
feeder user pass urls h
remove :: Remote -> Key -> Annex Bool
remove r k = davAction r False $ \(baseurl, user, pass) -> liftIO $ do

View file

@ -7,7 +7,6 @@
module Utility.Gpg where
import qualified Data.ByteString.Lazy as L
import System.Posix.Types
import Control.Applicative
import Control.Concurrent
@ -54,14 +53,15 @@ pipeStrict params input = do
hClose to
hGetContentsStrict from
{- Runs gpg with some parameters, first feeding it a passphrase via
- --passphrase-fd, then feeding it an input, and passing a handle
- to its output to an action.
{- Runs gpg with some parameters. First sends it a passphrase via
- --passphrase-fd. Then runs a feeder action that is passed a handle and
- should write to it all the data to input to gpg. Finally, runs
- a reader action that is passed a handle to gpg's output.
-
- Note that to avoid deadlock with the cleanup stage,
- the action must fully consume gpg's input before returning. -}
passphraseHandle :: [CommandParam] -> String -> IO L.ByteString -> (Handle -> IO a) -> IO a
passphraseHandle params passphrase a b = do
- the reader must fully consume gpg's input before returning. -}
feedRead :: [CommandParam] -> String -> (Handle -> IO ()) -> (Handle -> IO a) -> IO a
feedRead params passphrase feeder reader = do
-- pipe the passphrase into gpg on a fd
(frompipe, topipe) <- createPipe
void $ forkIO $ do
@ -77,9 +77,9 @@ passphraseHandle params passphrase a b = do
where
go (to, from) = do
void $ forkIO $ do
L.hPut to =<< a
feeder to
hClose to
b from
reader from
{- Finds gpg public keys matching some string. (Could be an email address,
- a key id, or a name. -}

2
debian/changelog vendored
View file

@ -7,6 +7,8 @@ git-annex (3.20121113) UNRELEASED; urgency=low
added, including when new repository configurations are pushed in from
remotes.
* OSX: Fix RunAtLoad value in plist file.
* Getting a file from chunked directory special remotes no longer buffers
it all in memory.
-- Joey Hess <joeyh@debian.org> Tue, 13 Nov 2012 13:17:07 -0400