better streaming while encrypting/decrypting

Both the directory and webdav special remotes used to have to buffer
the whole file contents before it could be decrypted, as they read
from chunks. Now the chunks are streamed through gpg with no buffering.
This commit is contained in:
Joey Hess 2012-11-18 15:27:44 -04:00
parent 9ac7473c67
commit 81379bb29c
11 changed files with 79 additions and 75 deletions

View file

@ -40,9 +40,9 @@ setRemoteCredPair c storage = go =<< getRemoteCredPair c storage
mcipher <- remoteCipher c mcipher <- remoteCipher c
case (mcipher, credPairRemoteKey storage) of case (mcipher, credPairRemoteKey storage) of
(Just cipher, Just key) | isTrustedCipher c -> do (Just cipher, Just key) | isTrustedCipher c -> do
s <- liftIO $ withEncryptedContent cipher s <- liftIO $ encrypt cipher
(return $ L.pack $ encodeCredPair creds) (feedBytes $ L.pack $ encodeCredPair creds)
(return . L.unpack) (readBytes $ return . L.unpack)
return $ M.insert key (toB64 s) c return $ M.insert key (toB64 s) c
_ -> do _ -> do
writeCacheCredPair creds storage writeCacheCredPair creds storage
@ -62,7 +62,9 @@ getRemoteCredPair c storage = maybe fromcache (return . Just) =<< fromenv
mcipher <- remoteCipher c mcipher <- remoteCipher c
case (M.lookup key c, mcipher) of case (M.lookup key c, mcipher) of
(Just enccreds, Just cipher) -> do (Just enccreds, Just cipher) -> do
creds <- liftIO $ decrypt enccreds cipher creds <- liftIO $ decrypt cipher
(feedBytes $ L.pack $ fromB64 enccreds)
(readBytes $ return . L.unpack)
case decodeCredPair creds of case decodeCredPair creds of
Just credpair -> do Just credpair -> do
writeCacheCredPair credpair storage writeCacheCredPair credpair storage
@ -70,9 +72,6 @@ getRemoteCredPair c storage = maybe fromcache (return . Just) =<< fromenv
_ -> do error $ "bad " ++ key _ -> do error $ "bad " ++ key
_ -> return Nothing _ -> return Nothing
Nothing -> return Nothing Nothing -> return Nothing
decrypt enccreds cipher = withDecryptedContent cipher
(return $ L.pack $ fromB64 enccreds)
(return . L.unpack)
{- Gets a CredPair from the environment. -} {- Gets a CredPair from the environment. -}
getEnvCredPair :: CredPairStorage -> IO (Maybe CredPair) getEnvCredPair :: CredPairStorage -> IO (Maybe CredPair)

View file

@ -18,10 +18,11 @@ module Crypto (
describeCipher, describeCipher,
decryptCipher, decryptCipher,
encryptKey, encryptKey,
withEncryptedHandle, feedFile,
withDecryptedHandle, feedBytes,
withEncryptedContent, readBytes,
withDecryptedContent, encrypt,
decrypt,
prop_hmacWithCipher_sane prop_hmacWithCipher_sane
) where ) where
@ -90,10 +91,9 @@ describeCipher (EncryptedCipher _ (KeyIds ks)) =
encryptCipher :: Cipher -> KeyIds -> IO StorableCipher encryptCipher :: Cipher -> KeyIds -> IO StorableCipher
encryptCipher (Cipher c) (KeyIds ks) = do encryptCipher (Cipher c) (KeyIds ks) = do
let ks' = nub $ sort ks -- gpg complains about duplicate recipient keyids let ks' = nub $ sort ks -- gpg complains about duplicate recipient keyids
encipher <- Gpg.pipeStrict (encrypt++recipients ks') c encipher <- Gpg.pipeStrict ([ Params "--encrypt" ] ++ recipients ks') c
return $ EncryptedCipher encipher (KeyIds ks') return $ EncryptedCipher encipher (KeyIds ks')
where where
encrypt = [ Params "--encrypt" ]
recipients l = force_recipients : recipients l = force_recipients :
concatMap (\k -> [Param "--recipient", Param k]) l concatMap (\k -> [Param "--recipient", Param k]) l
-- Force gpg to only encrypt to the specified -- Force gpg to only encrypt to the specified
@ -103,9 +103,7 @@ encryptCipher (Cipher c) (KeyIds ks) = do
{- Decrypting an EncryptedCipher is expensive; the Cipher should be cached. -} {- Decrypting an EncryptedCipher is expensive; the Cipher should be cached. -}
decryptCipher :: StorableCipher -> IO Cipher decryptCipher :: StorableCipher -> IO Cipher
decryptCipher (SharedCipher t) = return $ Cipher t decryptCipher (SharedCipher t) = return $ Cipher t
decryptCipher (EncryptedCipher t _) = Cipher <$> Gpg.pipeStrict decrypt t decryptCipher (EncryptedCipher t _) = Cipher <$> Gpg.pipeStrict [ Param "--decrypt" ] t
where
decrypt = [ Param "--decrypt" ]
{- Generates an encrypted form of a Key. The encryption does not need to be {- Generates an encrypted form of a Key. The encryption does not need to be
- reversable, nor does it need to be the same type of encryption used - reversable, nor does it need to be the same type of encryption used
@ -118,31 +116,27 @@ encryptKey c k = Key
, keyMtime = Nothing -- to avoid leaking data , keyMtime = Nothing -- to avoid leaking data
} }
{- Runs an action, passing it a handle from which it can type Feeder = Handle -> IO ()
- stream encrypted content. -} type Reader a = Handle -> IO a
withEncryptedHandle :: Cipher -> IO L.ByteString -> (Handle -> IO a) -> IO a
withEncryptedHandle = Gpg.passphraseHandle [Params "--symmetric --force-mdc"] . cipherPassphrase
{- Runs an action, passing it a handle from which it can feedFile :: FilePath -> Feeder
- stream decrypted content. -} feedFile f h = L.hPut h =<< L.readFile f
withDecryptedHandle :: Cipher -> IO L.ByteString -> (Handle -> IO a) -> IO a
withDecryptedHandle = Gpg.passphraseHandle [Param "--decrypt"] . cipherPassphrase
{- Streams encrypted content to an action. -} feedBytes :: L.ByteString -> Feeder
withEncryptedContent :: Cipher -> IO L.ByteString -> (L.ByteString -> IO a) -> IO a feedBytes = flip L.hPut
withEncryptedContent = pass withEncryptedHandle
{- Streams decrypted content to an action. -} readBytes :: (L.ByteString -> IO a) -> Reader a
withDecryptedContent :: Cipher -> IO L.ByteString -> (L.ByteString -> IO a) -> IO a readBytes a h = L.hGetContents h >>= a
withDecryptedContent = pass withDecryptedHandle
pass {- Runs a Feeder action, that generates content that is encrypted with the
:: (Cipher -> IO L.ByteString -> (Handle -> IO a) -> IO a) - Cipher, and read by the Reader action. -}
-> Cipher encrypt :: Cipher -> Feeder -> Reader a -> IO a
-> IO L.ByteString encrypt = Gpg.feedRead [Params "--symmetric --force-mdc"] . cipherPassphrase
-> (L.ByteString -> IO a)
-> IO a {- Runs a Feeder action, that generates content that is decrypted with the
pass to n s a = to n s $ a <=< L.hGetContents - Cipher, and read by the Reader action. -}
decrypt :: Cipher -> Feeder -> Reader a -> IO a
decrypt = Gpg.feedRead [Param "--decrypt"] . cipherPassphrase
hmacWithCipher :: Cipher -> String -> String hmacWithCipher :: Cipher -> String -> String
hmacWithCipher c = hmacWithCipher' (cipherHmac c) hmacWithCipher c = hmacWithCipher' (cipherHmac c)

View file

@ -125,7 +125,7 @@ storeEncrypted r buprepo (cipher, enck) k _p = do
src <- inRepo $ gitAnnexLocation k src <- inRepo $ gitAnnexLocation k
params <- bupSplitParams r buprepo enck [] params <- bupSplitParams r buprepo enck []
liftIO $ catchBoolIO $ liftIO $ catchBoolIO $
withEncryptedHandle cipher (L.readFile src) $ \h -> encrypt cipher (feedFile src) $ \h ->
pipeBup params (Just h) Nothing pipeBup params (Just h) Nothing
retrieve :: BupRepo -> Key -> AssociatedFile -> FilePath -> Annex Bool retrieve :: BupRepo -> Key -> AssociatedFile -> FilePath -> Annex Bool
@ -141,7 +141,8 @@ retrieveCheap _ _ _ = return False
retrieveEncrypted :: BupRepo -> (Cipher, Key) -> Key -> FilePath -> Annex Bool retrieveEncrypted :: BupRepo -> (Cipher, Key) -> Key -> FilePath -> Annex Bool
retrieveEncrypted buprepo (cipher, enck) _ f = liftIO $ catchBoolIO $ retrieveEncrypted buprepo (cipher, enck) _ f = liftIO $ catchBoolIO $
withHandle StdoutHandle createProcessSuccess p $ \h -> do withHandle StdoutHandle createProcessSuccess p $ \h -> do
withDecryptedContent cipher (L.hGetContents h) $ L.writeFile f decrypt cipher (\toh -> L.hPut toh =<< L.hGetContents h) $
readBytes $ L.writeFile f
return True return True
where where
params = bupParams "join" buprepo [Param $ bupRef enck] params = bupParams "join" buprepo [Param $ bupRef enck]

View file

@ -118,13 +118,13 @@ storeEncrypted d chunksize (cipher, enck) k p = do
src <- inRepo $ gitAnnexLocation k src <- inRepo $ gitAnnexLocation k
metered (Just p) k $ \meterupdate -> metered (Just p) k $ \meterupdate ->
storeHelper d chunksize enck $ \dests -> storeHelper d chunksize enck $ \dests ->
withEncryptedContent cipher (L.readFile src) $ \s -> encrypt cipher (feedFile src) $ readBytes $ \b ->
case chunksize of case chunksize of
Nothing -> do Nothing -> do
let dest = Prelude.head dests let dest = Prelude.head dests
meteredWriteFile meterupdate dest s meteredWriteFile meterupdate dest b
return [dest] return [dest]
Just _ -> storeSplit meterupdate chunksize dests s Just _ -> storeSplit meterupdate chunksize dests b
{- Splits a ByteString into chunks and writes to dests, obeying configured {- Splits a ByteString into chunks and writes to dests, obeying configured
- chunk size (not to be confused with the L.ByteString chunk size). - chunk size (not to be confused with the L.ByteString chunk size).
@ -192,9 +192,11 @@ retrieveEncrypted :: FilePath -> ChunkSize -> (Cipher, Key) -> Key -> FilePath -
retrieveEncrypted d chunksize (cipher, enck) k f = metered Nothing k $ \meterupdate -> retrieveEncrypted d chunksize (cipher, enck) k f = metered Nothing k $ \meterupdate ->
liftIO $ withStoredFiles chunksize d enck $ \files -> liftIO $ withStoredFiles chunksize d enck $ \files ->
catchBoolIO $ do catchBoolIO $ do
withDecryptedContent cipher (L.concat <$> mapM L.readFile files) $ decrypt cipher (feeder files) $
meteredWriteFile meterupdate f readBytes $ meteredWriteFile meterupdate f
return True return True
where
feeder files h = forM_ files $ \file -> L.hPut h =<< L.readFile file
retrieveCheap :: FilePath -> ChunkSize -> Key -> FilePath -> Annex Bool retrieveCheap :: FilePath -> ChunkSize -> Key -> FilePath -> Annex Bool
retrieveCheap _ (Just _) _ _ = return False -- no cheap retrieval for chunks retrieveCheap _ (Just _) _ _ = return False -- no cheap retrieval for chunks

View file

@ -81,12 +81,11 @@ remoteCipher c = go $ extractCipher c
cache <- Annex.getState Annex.ciphers cache <- Annex.getState Annex.ciphers
case M.lookup encipher cache of case M.lookup encipher cache of
Just cipher -> return $ Just cipher Just cipher -> return $ Just cipher
Nothing -> decrypt encipher cache Nothing -> do
decrypt encipher cache = do showNote "gpg"
showNote "gpg" cipher <- liftIO $ decryptCipher encipher
cipher <- liftIO $ decryptCipher encipher Annex.changeState (\s -> s { Annex.ciphers = M.insert encipher cipher cache })
Annex.changeState (\s -> s { Annex.ciphers = M.insert encipher cipher cache }) return $ Just cipher
return $ Just cipher
{- Checks if there is a trusted (non-shared) cipher. -} {- Checks if there is a trusted (non-shared) cipher. -}
isTrustedCipher :: RemoteConfig -> Bool isTrustedCipher :: RemoteConfig -> Bool
@ -96,9 +95,9 @@ isTrustedCipher c =
{- Gets encryption Cipher, and encrypted version of Key. -} {- Gets encryption Cipher, and encrypted version of Key. -}
cipherKey :: Maybe RemoteConfig -> Key -> Annex (Maybe (Cipher, Key)) cipherKey :: Maybe RemoteConfig -> Key -> Annex (Maybe (Cipher, Key))
cipherKey Nothing _ = return Nothing cipherKey Nothing _ = return Nothing
cipherKey (Just c) k = maybe Nothing encrypt <$> remoteCipher c cipherKey (Just c) k = maybe Nothing make <$> remoteCipher c
where where
encrypt ciphertext = Just (ciphertext, encryptKey ciphertext k) make ciphertext = Just (ciphertext, encryptKey ciphertext k)
{- Stores an StorableCipher in a remote's configuration. -} {- Stores an StorableCipher in a remote's configuration. -}
storeCipher :: RemoteConfig -> StorableCipher -> RemoteConfig storeCipher :: RemoteConfig -> StorableCipher -> RemoteConfig

View file

@ -108,7 +108,8 @@ store h k _f _p = do
storeEncrypted :: String -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool storeEncrypted :: String -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool
storeEncrypted h (cipher, enck) k _p = withTmp enck $ \tmp -> do storeEncrypted h (cipher, enck) k _p = withTmp enck $ \tmp -> do
src <- inRepo $ gitAnnexLocation k src <- inRepo $ gitAnnexLocation k
liftIO $ withEncryptedContent cipher (L.readFile src) $ L.writeFile tmp liftIO $ encrypt cipher (feedFile src) $
readBytes $ L.writeFile tmp
runHook h "store" enck (Just tmp) $ return True runHook h "store" enck (Just tmp) $ return True
retrieve :: String -> Key -> AssociatedFile -> FilePath -> Annex Bool retrieve :: String -> Key -> AssociatedFile -> FilePath -> Annex Bool
@ -120,7 +121,8 @@ retrieveCheap _ _ _ = return False
retrieveEncrypted :: String -> (Cipher, Key) -> Key -> FilePath -> Annex Bool retrieveEncrypted :: String -> (Cipher, Key) -> Key -> FilePath -> Annex Bool
retrieveEncrypted h (cipher, enck) _ f = withTmp enck $ \tmp -> retrieveEncrypted h (cipher, enck) _ f = withTmp enck $ \tmp ->
runHook h "retrieve" enck (Just tmp) $ liftIO $ catchBoolIO $ do runHook h "retrieve" enck (Just tmp) $ liftIO $ catchBoolIO $ do
withDecryptedContent cipher (L.readFile tmp) $ L.writeFile f decrypt cipher (feedFile tmp) $
readBytes $ L.writeFile f
return True return True
remove :: String -> Key -> Annex Bool remove :: String -> Key -> Annex Bool

View file

@ -110,7 +110,8 @@ store o k _f p = rsyncSend o p k <=< inRepo $ gitAnnexLocation k
storeEncrypted :: RsyncOpts -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool storeEncrypted :: RsyncOpts -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool
storeEncrypted o (cipher, enck) k p = withTmp enck $ \tmp -> do storeEncrypted o (cipher, enck) k p = withTmp enck $ \tmp -> do
src <- inRepo $ gitAnnexLocation k src <- inRepo $ gitAnnexLocation k
liftIO $ withEncryptedContent cipher (L.readFile src) $ L.writeFile tmp liftIO $ decrypt cipher (feedFile src) $
readBytes $ L.writeFile tmp
rsyncSend o p enck tmp rsyncSend o p enck tmp
retrieve :: RsyncOpts -> Key -> AssociatedFile -> FilePath -> Annex Bool retrieve :: RsyncOpts -> Key -> AssociatedFile -> FilePath -> Annex Bool
@ -128,7 +129,8 @@ retrieveEncrypted :: RsyncOpts -> (Cipher, Key) -> Key -> FilePath -> Annex Bool
retrieveEncrypted o (cipher, enck) _ f = withTmp enck $ \tmp -> do retrieveEncrypted o (cipher, enck) _ f = withTmp enck $ \tmp -> do
ifM (retrieve o enck undefined tmp) ifM (retrieve o enck undefined tmp)
( liftIO $ catchBoolIO $ do ( liftIO $ catchBoolIO $ do
withDecryptedContent cipher (L.readFile tmp) $ L.writeFile f decrypt cipher (feedFile tmp) $
readBytes $ L.writeFile f
return True return True
, return False , return False
) )

View file

@ -122,7 +122,8 @@ storeEncrypted r (cipher, enck) k _p = s3Action r False $ \(conn, bucket) ->
-- (An alternative would be chunking to to a constant size.) -- (An alternative would be chunking to to a constant size.)
withTmp enck $ \tmp -> do withTmp enck $ \tmp -> do
f <- inRepo $ gitAnnexLocation k f <- inRepo $ gitAnnexLocation k
liftIO $ withEncryptedContent cipher (L.readFile f) $ L.writeFile tmp liftIO $ encrypt cipher (feedFile f) $
readBytes $ L.writeFile tmp
res <- liftIO $ storeHelper (conn, bucket) r enck tmp res <- liftIO $ storeHelper (conn, bucket) r enck tmp
s3Bool res s3Bool res
@ -162,8 +163,8 @@ retrieveEncrypted :: Remote -> (Cipher, Key) -> Key -> FilePath -> Annex Bool
retrieveEncrypted r (cipher, enck) _ f = s3Action r False $ \(conn, bucket) -> do retrieveEncrypted r (cipher, enck) _ f = s3Action r False $ \(conn, bucket) -> do
res <- liftIO $ getObject conn $ bucketKey r bucket enck res <- liftIO $ getObject conn $ bucketKey r bucket enck
case res of case res of
Right o -> liftIO $ Right o -> liftIO $ decrypt cipher (feedBytes $ obj_data o) $
withDecryptedContent cipher (return $ obj_data o) $ \content -> do readBytes $ \content -> do
L.writeFile f content L.writeFile f content
return True return True
Left e -> s3Warning e Left e -> s3Warning e

View file

@ -93,8 +93,8 @@ storeEncrypted :: Remote -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool
storeEncrypted r (cipher, enck) k _p = davAction r False $ \(baseurl, user, pass) -> do storeEncrypted r (cipher, enck) k _p = davAction r False $ \(baseurl, user, pass) -> do
let url = davLocation baseurl enck let url = davLocation baseurl enck
f <- inRepo $ gitAnnexLocation k f <- inRepo $ gitAnnexLocation k
liftIO $ withEncryptedContent cipher (L.readFile f) $ liftIO $ encrypt cipher (feedFile f) $
storeHelper r url user pass readBytes $ storeHelper r url user pass
storeHelper :: Remote -> DavUrl -> DavUser -> DavPass -> L.ByteString -> IO Bool storeHelper :: Remote -> DavUrl -> DavUser -> DavPass -> L.ByteString -> IO Bool
storeHelper r urlbase user pass b = catchBoolIO $ do storeHelper r urlbase user pass b = catchBoolIO $ do
@ -133,18 +133,20 @@ retrieveEncrypted :: Remote -> (Cipher, Key) -> Key -> FilePath -> Annex Bool
retrieveEncrypted r (cipher, enck) k d = metered Nothing k $ \meterupdate -> retrieveEncrypted r (cipher, enck) k d = metered Nothing k $ \meterupdate ->
davAction r False $ \(baseurl, user, pass) -> liftIO $ catchBoolIO $ davAction r False $ \(baseurl, user, pass) -> liftIO $ catchBoolIO $
withStoredFiles r enck baseurl user pass onerr $ \urls -> do withStoredFiles r enck baseurl user pass onerr $ \urls -> do
withDecryptedContent cipher (L.concat <$> feeder user pass urls []) $ decrypt cipher (feeder user pass urls) $
meteredWriteFile meterupdate d readBytes $ meteredWriteFile meterupdate d
return True return True
where where
onerr _ = return False onerr _ = return False
feeder _ _ [] c = return $ reverse c feeder _ _ [] _ = noop
feeder user pass (url:urls) c = do feeder user pass (url:urls) h = do
mb <- davGetUrlContent url user pass mb <- davGetUrlContent url user pass
case mb of case mb of
Nothing -> throwIO "download failed" Nothing -> throwIO "download failed"
Just b -> feeder user pass urls (b:c) Just b -> do
L.hPut h b
feeder user pass urls h
remove :: Remote -> Key -> Annex Bool remove :: Remote -> Key -> Annex Bool
remove r k = davAction r False $ \(baseurl, user, pass) -> liftIO $ do remove r k = davAction r False $ \(baseurl, user, pass) -> liftIO $ do

View file

@ -7,7 +7,6 @@
module Utility.Gpg where module Utility.Gpg where
import qualified Data.ByteString.Lazy as L
import System.Posix.Types import System.Posix.Types
import Control.Applicative import Control.Applicative
import Control.Concurrent import Control.Concurrent
@ -54,14 +53,15 @@ pipeStrict params input = do
hClose to hClose to
hGetContentsStrict from hGetContentsStrict from
{- Runs gpg with some parameters, first feeding it a passphrase via {- Runs gpg with some parameters. First sends it a passphrase via
- --passphrase-fd, then feeding it an input, and passing a handle - --passphrase-fd. Then runs a feeder action that is passed a handle and
- to its output to an action. - should write to it all the data to input to gpg. Finally, runs
- a reader action that is passed a handle to gpg's output.
- -
- Note that to avoid deadlock with the cleanup stage, - Note that to avoid deadlock with the cleanup stage,
- the action must fully consume gpg's input before returning. -} - the reader must fully consume gpg's input before returning. -}
passphraseHandle :: [CommandParam] -> String -> IO L.ByteString -> (Handle -> IO a) -> IO a feedRead :: [CommandParam] -> String -> (Handle -> IO ()) -> (Handle -> IO a) -> IO a
passphraseHandle params passphrase a b = do feedRead params passphrase feeder reader = do
-- pipe the passphrase into gpg on a fd -- pipe the passphrase into gpg on a fd
(frompipe, topipe) <- createPipe (frompipe, topipe) <- createPipe
void $ forkIO $ do void $ forkIO $ do
@ -77,9 +77,9 @@ passphraseHandle params passphrase a b = do
where where
go (to, from) = do go (to, from) = do
void $ forkIO $ do void $ forkIO $ do
L.hPut to =<< a feeder to
hClose to hClose to
b from reader from
{- Finds gpg public keys matching some string. (Could be an email address, {- Finds gpg public keys matching some string. (Could be an email address,
- a key id, or a name. -} - a key id, or a name. -}

2
debian/changelog vendored
View file

@ -7,6 +7,8 @@ git-annex (3.20121113) UNRELEASED; urgency=low
added, including when new repository configurations are pushed in from added, including when new repository configurations are pushed in from
remotes. remotes.
* OSX: Fix RunAtLoad value in plist file. * OSX: Fix RunAtLoad value in plist file.
* Getting a file from chunked directory special remotes no longer buffers
it all in memory.
-- Joey Hess <joeyh@debian.org> Tue, 13 Nov 2012 13:17:07 -0400 -- Joey Hess <joeyh@debian.org> Tue, 13 Nov 2012 13:17:07 -0400