From 1755c5de40c9fc8634e5985e1113179f046b8f13 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 25 Jul 2014 15:12:51 -0400 Subject: [PATCH 01/54] thought about chunk key hashing --- doc/design/assistant/chunks.mdwn | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/doc/design/assistant/chunks.mdwn b/doc/design/assistant/chunks.mdwn index d7517243ca..b5a86ef82c 100644 --- a/doc/design/assistant/chunks.mdwn +++ b/doc/design/assistant/chunks.mdwn @@ -209,3 +209,12 @@ cannot check exact file sizes. If padding is enabled, gpg compression should be disabled, to not leak clues about how well the files compress and so what kind of file it is. + +## chunk key hashing + +A chunk key should hash into the same directory structure as its parent +key. This will avoid lots of extra hash directories when using chunking +with non-encrypted keys. + +Won't happen when the key is encrypted, but that is good; hashing to the +same bucket then would allow statistical correlation. From 8f93982df6ff6b22d076842b915c325a6c3ca486 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 25 Jul 2014 16:09:23 -0400 Subject: [PATCH 02/54] use same hash directories for chunked key as are used for its parent This avoids a proliferation of hash directories when using new-style chunking, and should improve performance since chunks are accessed in sequence and so should have a common locality. Of course, when a chunked key is encrypted, its hash directories have no relation to the parent key. This commit was sponsored by Christian Kellermann. --- Locations.hs | 5 +++-- Types/Key.hs | 12 ++++++++++++ doc/internals/hashing.mdwn | 5 +++++ 3 files changed, 20 insertions(+), 2 deletions(-) diff --git a/Locations.hs b/Locations.hs index d397a97be6..0369c7a1c3 100644 --- a/Locations.hs +++ b/Locations.hs @@ -421,6 +421,7 @@ keyPaths key = map (keyPath key) annexHashes - which do not allow using a directory "XX" when "xx" already exists. - To support that, most repositories use the lower case hash for new data. -} type Hasher = Key -> FilePath + annexHashes :: [Hasher] annexHashes = [hashDirLower, hashDirMixed] @@ -428,12 +429,12 @@ hashDirMixed :: Hasher hashDirMixed k = addTrailingPathSeparator $ take 2 dir drop 2 dir where dir = take 4 $ display_32bits_as_dir =<< [a,b,c,d] - ABCD (a,b,c,d) = md5 $ md5FilePath $ key2file k + ABCD (a,b,c,d) = md5 $ md5FilePath $ key2file $ nonChunkKey k hashDirLower :: Hasher hashDirLower k = addTrailingPathSeparator $ take 3 dir drop 3 dir where - dir = take 6 $ md5s $ md5FilePath $ key2file k + dir = take 6 $ md5s $ md5FilePath $ key2file $ nonChunkKey k {- modified version of display_32bits_as_hex from Data.Hash.MD5 - Copyright (C) 2001 Ian Lynagh diff --git a/Types/Key.hs b/Types/Key.hs index 90f66f23ed..3015b1e86b 100644 --- a/Types/Key.hs +++ b/Types/Key.hs @@ -13,6 +13,8 @@ module Types.Key ( stubKey, key2file, file2key, + isChunkKey, + nonChunkKey, prop_idempotent_key_encode, prop_idempotent_key_decode @@ -47,6 +49,16 @@ stubKey = Key , keyChunkNum = Nothing } +isChunkKey :: Key -> Bool +isChunkKey k = isJust (keyChunkSize k) && isJust (keyChunkNum k) + +-- Gets the parent of a chunk key. +nonChunkKey :: Key -> Key +nonChunkKey k = k + { keyChunkSize = Nothing + , keyChunkNum = Nothing + } + fieldSep :: Char fieldSep = '-' diff --git a/doc/internals/hashing.mdwn b/doc/internals/hashing.mdwn index cc4bc6456e..bdc259b634 100644 --- a/doc/internals/hashing.mdwn +++ b/doc/internals/hashing.mdwn @@ -36,3 +36,8 @@ string, but where that would normally encode the bits using the 16 characters 0-9a-f, this instead uses the 32 characters "0123456789zqjxkmvwgpfZQJXKMVWGPF". The first 2 letters of the resulting string are the first directory, and the second 2 are the second directory. + +## chunk keys + +The same hash directory is used for a chunk key as would be used for the +key that it's a chunk of. From ab4cce411425ef456ed706fd25fc209a98210abe Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 25 Jul 2014 16:20:32 -0400 Subject: [PATCH 03/54] core implementation of new style chunking Not yet used by any special remotes, but should not be too hard to add it to most of them. storeChunks is the hairy bit! It's loosely based on Remote.Directory.storeLegacyChunked. The object is read in using a lazy bytestring, which is streamed though, creating chunks as needed, without ever buffering more than 1 chunk in memory. Getting the progress meter update to work right was also fun, since progress meter values are absolute. Finessed by constructing an offset meter. This commit was sponsored by Richard Collins. --- Remote/Helper/Chunked.hs | 104 ++++++++++++++++++++++++++++++++++++++- Utility/Metered.hs | 16 ++++++ 2 files changed, 119 insertions(+), 1 deletion(-) diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs index 031ff63d64..e984001003 100644 --- a/Remote/Helper/Chunked.hs +++ b/Remote/Helper/Chunked.hs @@ -9,16 +9,21 @@ module Remote.Helper.Chunked ( ChunkSize , ChunkConfig(..) , chunkConfig + , storeChunks + , chunkKeys , meteredWriteFileChunks ) where import Common.Annex import Utility.DataUnits import Types.Remote -import Logs.Chunk.Pure (ChunkSize) +import Types.Key +import Logs.Chunk.Pure (ChunkSize, ChunkCount) +import Logs.Chunk import Utility.Metered import qualified Data.ByteString.Lazy as L +import qualified Data.ByteString as S import qualified Data.Map as M data ChunkConfig @@ -38,6 +43,103 @@ chunkConfig m = Just size | size > 0 -> fromInteger size _ -> error ("bad " ++ f) +-- An infinite stream of chunk keys, starting from chunk 1. +newtype ChunkKeyStream = ChunkKeyStream [Key] + +chunkKeyStream :: Key -> ChunkSize -> ChunkKeyStream +chunkKeyStream basek chunksize = ChunkKeyStream $ map mk [1..] + where + mk chunknum = sizedk { keyChunkNum = Just chunknum } + sizedk = basek { keyChunkSize = Just (toInteger chunksize) } + +nextChunkKeyStream :: ChunkKeyStream -> (Key, ChunkKeyStream) +nextChunkKeyStream (ChunkKeyStream (k:l)) = (k, ChunkKeyStream l) +nextChunkKeyStream (ChunkKeyStream []) = undefined -- stream is infinite! + +takeChunkKeyStream :: ChunkCount -> ChunkKeyStream -> [Key] +takeChunkKeyStream n (ChunkKeyStream l) = genericTake n l + +-- Number of chunks already consumed from the stream. +numChunks :: ChunkKeyStream -> Integer +numChunks = pred . fromJust . keyChunkNum . fst . nextChunkKeyStream + +{- Slits up the key's content into chunks, passing each chunk to + - the storer action, along with a unique chunk key. + - + - Note that the storer action is responsible for catching any + - exceptions it may encounter. + - + - A progress meter display is set up, and the storer action + - is passed a callback to update it. + - + - Once all chunks are successfully stored, updates the chunk log. + -} +storeChunks :: UUID -> ChunkConfig -> Key -> FilePath -> MeterUpdate -> (Key -> L.ByteString -> MeterUpdate -> Annex Bool) -> Annex Bool +storeChunks u chunkconfig k f p storer = metered (Just p) k $ \meterupdate -> + either (\e -> liftIO (print e) >> return False) (go meterupdate) + =<< (liftIO $ tryIO $ L.readFile f) + where + go meterupdate b = case chunkconfig of + (UnpaddedChunks chunksize) | not (isChunkKey k) -> + gochunks meterupdate chunksize b (chunkKeyStream k chunksize) + _ -> storer k b meterupdate + + gochunks :: MeterUpdate -> ChunkSize -> L.ByteString -> ChunkKeyStream -> Annex Bool + gochunks meterupdate chunksize lb = + loop zeroBytesProcessed chunksize (L.toChunks lb) [] + where + loop bytesprocessed sz [] c chunkkeys + -- Always store at least one chunk, + -- even for empty content. + | not (null c) || numchunks == 0 = + storechunk bytesprocessed sz [] c chunkkeys + | otherwise = do + chunksStored u k chunksize numchunks + return True + where + numchunks = numChunks chunkkeys + loop bytesprocessed sz (b:bs) c chunkkeys + | s <= sz || sz == chunksize = + loop bytesprocessed sz' bs (b:c) chunkkeys + | otherwise = + storechunk bytesprocessed sz' bs (b:c) chunkkeys + where + s = fromIntegral (S.length b) + sz' = sz - s + + storechunk bytesprocessed sz bs c chunkkeys = do + let (chunkkey, chunkkeys') = nextChunkKeyStream chunkkeys + ifM (storer chunkkey (L.fromChunks $ reverse c) meterupdate') + ( do + let bytesprocessed' = addBytesProcessed bytesprocessed (chunksize - sz) + loop bytesprocessed' chunksize bs [] chunkkeys' + , return False + ) + where + {- The MeterUpdate that is passed to the action + - storing a chunk is offset, so that it reflects + - the total bytes that have already been stored + - in previous chunks. -} + meterupdate' = offsetMeterUpdate meterupdate bytesprocessed + +-- retrieveChunks :: UUID -> ChunkConfig -> Key -> Annex + +{- A key can be stored in a remote unchunked, or as a list of chunked keys. + - It's even possible for a remote to have the same key stored multiple + - times with different chunk sizes. This finds all possible lists of keys + - that might be on the remote that can be combined to get back the + - requested key. + -} +chunkKeys :: UUID -> ChunkConfig -> Key -> Annex [[Key]] +chunkKeys u (UnpaddedChunks _) k = do + chunklists <- map (toChunkList k) <$> getCurrentChunks u k + return ([k]:chunklists) +chunkKeys _ _ k = pure [[k]] + +toChunkList :: Key -> (ChunkSize, ChunkCount) -> [Key] +toChunkList k (chunksize, chunkcount) = takeChunkKeyStream chunkcount $ + chunkKeyStream k chunksize + {- Writes a series of chunks to a file. The feeder is called to get - each chunk. -} meteredWriteFileChunks :: MeterUpdate -> FilePath -> [v] -> (v -> IO L.ByteString) -> IO () diff --git a/Utility/Metered.hs b/Utility/Metered.hs index 0d94c1c5c6..bca7f58e79 100644 --- a/Utility/Metered.hs +++ b/Utility/Metered.hs @@ -16,6 +16,7 @@ import qualified Data.ByteString as S import System.IO.Unsafe import Foreign.Storable (Storable(sizeOf)) import System.Posix.Types +import Data.Int {- An action that can be run repeatedly, updating it on the bytes processed. - @@ -31,6 +32,10 @@ class AsBytesProcessed a where toBytesProcessed :: a -> BytesProcessed fromBytesProcessed :: BytesProcessed -> a +instance AsBytesProcessed BytesProcessed where + toBytesProcessed = id + fromBytesProcessed = id + instance AsBytesProcessed Integer where toBytesProcessed i = BytesProcessed i fromBytesProcessed (BytesProcessed i) = i @@ -39,6 +44,10 @@ instance AsBytesProcessed Int where toBytesProcessed i = BytesProcessed $ toInteger i fromBytesProcessed (BytesProcessed i) = fromInteger i +instance AsBytesProcessed Int64 where + toBytesProcessed i = BytesProcessed $ toInteger i + fromBytesProcessed (BytesProcessed i) = fromInteger i + instance AsBytesProcessed FileOffset where toBytesProcessed sz = BytesProcessed $ toInteger sz fromBytesProcessed (BytesProcessed sz) = fromInteger sz @@ -77,6 +86,13 @@ meteredWriteFile :: MeterUpdate -> FilePath -> L.ByteString -> IO () meteredWriteFile meterupdate f b = withBinaryFile f WriteMode $ \h -> meteredWrite meterupdate h b +{- Applies an offset to a MeterUpdate. This can be useful when + - performing a sequence of actions, such as multiple meteredWriteFiles, + - that all update a common meter progressively. + -} +offsetMeterUpdate :: MeterUpdate -> BytesProcessed -> MeterUpdate +offsetMeterUpdate base offset = \n -> base (offset `addBytesProcessed` n) + {- This is like L.hGetContents, but after each chunk is read, a meter - is updated based on the size of the chunk. - From 9e8a4a09504e1f963c4a9f8f5d000320f6288f5f Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 25 Jul 2014 16:21:01 -0400 Subject: [PATCH 04/54] support new style chunking in directory special remote MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only when storing non-encrypted so far, not retrieving or checking if a key is present or removing. This commit was sponsored by Renaud Casenave-Péré. --- Remote/Directory.hs | 124 ++++++++++++++++++++++---------------------- 1 file changed, 61 insertions(+), 63 deletions(-) diff --git a/Remote/Directory.hs b/Remote/Directory.hs index 62c01e370c..3305f712b3 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -43,16 +43,16 @@ gen r u c gc = do let chunkconfig = chunkConfig c return $ Just $ encryptableRemote c (storeEncrypted dir (getGpgEncParams (c,gc)) chunkconfig) - (retrieveEncrypted dir chunkconfig) + (retrieveEncrypted u dir chunkconfig) Remote { uuid = u, cost = cst, name = Git.repoDescribe r, - storeKey = store dir chunkconfig, - retrieveKeyFile = retrieve dir chunkconfig, - retrieveKeyFileCheap = retrieveCheap dir chunkconfig, + storeKey = store u dir chunkconfig, + retrieveKeyFile = retrieve u dir chunkconfig, + retrieveKeyFileCheap = retrieveCheap u dir chunkconfig, removeKey = remove dir, - hasKey = checkPresent dir chunkconfig, + hasKey = checkPresent u dir chunkconfig, hasKeyCheap = True, whereisKey = Nothing, remoteFsck = Nothing, @@ -97,9 +97,9 @@ storeDir d k = addTrailingPathSeparator $ d hashDirLower k keyFile k tmpDir :: FilePath -> Key -> FilePath tmpDir d k = addTrailingPathSeparator $ d "tmp" keyFile k -withCheckedFiles :: (FilePath -> IO Bool) -> ChunkConfig -> FilePath -> Key -> ([FilePath] -> IO Bool) -> IO Bool -withCheckedFiles _ _ [] _ _ = return False -withCheckedFiles check (LegacyChunks _) d k a = go $ locations d k +withCheckedFiles :: (FilePath -> IO Bool) -> UUID -> ChunkConfig -> FilePath -> Key -> ([FilePath] -> IO Bool) -> IO Bool +withCheckedFiles _ _ _ [] _ _ = return False +withCheckedFiles check _ (LegacyChunks _) d k a = go $ locations d k where go [] = return False go (f:fs) = do @@ -115,33 +115,20 @@ withCheckedFiles check (LegacyChunks _) d k a = go $ locations d k then go fs else a chunks ) -withCheckedFiles check _ d k a = go $ locations d k +withCheckedFiles check u chunkconfig d k a = + go $ locations d k where go [] = return False go (f:fs) = ifM (check f) ( a [f] , go fs ) -withStoredFiles :: ChunkConfig -> FilePath -> Key -> ([FilePath] -> IO Bool) -> IO Bool +withStoredFiles :: UUID -> ChunkConfig -> FilePath -> Key -> ([FilePath] -> IO Bool) -> IO Bool withStoredFiles = withCheckedFiles doesFileExist -store :: FilePath -> ChunkConfig -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool -store d chunkconfig k _f p = sendAnnex k (void $ remove d k) $ \src -> - metered (Just p) k $ \meterupdate -> - storeHelper d chunkconfig k k $ \dests -> - case chunkconfig of - LegacyChunks chunksize -> - storeLegacyChunked meterupdate chunksize dests - =<< L.readFile src - _ -> do - let dest = Prelude.head dests - meteredWriteFile meterupdate dest - =<< L.readFile src - return [dest] - -storeEncrypted :: FilePath -> [CommandParam] -> ChunkConfig -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool -storeEncrypted d gpgOpts chunkconfig (cipher, enck) k p = sendAnnex k (void $ remove d enck) $ \src -> - metered (Just p) k $ \meterupdate -> - storeHelper d chunkconfig enck k $ \dests -> - encrypt gpgOpts cipher (feedFile src) $ readBytes $ \b -> +store :: UUID -> FilePath -> ChunkConfig -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool +store u d chunkconfig k _f p = whenDiskAvail d k $ + sendAnnex k (void $ remove d k) $ \src -> + storeChunks u chunkconfig k src p $ \k' b meterupdate -> + storeHelper d chunkconfig k' $ \dests -> case chunkconfig of LegacyChunks chunksize -> storeLegacyChunked meterupdate chunksize dests b @@ -150,14 +137,27 @@ storeEncrypted d gpgOpts chunkconfig (cipher, enck) k p = sendAnnex k (void $ re meteredWriteFile meterupdate dest b return [dest] +storeEncrypted :: FilePath -> [CommandParam] -> ChunkConfig -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool +storeEncrypted d gpgOpts chunkconfig (cipher, enck) k p = whenDiskAvail d k $ + sendAnnex k (void $ remove d enck) $ \src -> + metered (Just p) k $ \meterupdate -> + storeHelper d chunkconfig enck $ \dests -> + encrypt gpgOpts cipher (feedFile src) $ readBytes $ \b -> + case chunkconfig of + LegacyChunks chunksize -> + storeLegacyChunked meterupdate chunksize dests b + _ -> do + let dest = Prelude.head dests + meteredWriteFile meterupdate dest b + return [dest] + {- Splits a ByteString into chunks and writes to dests, obeying configured - - chunk size (not to be confused with the L.ByteString chunk size). - - Note: Must always write at least one file, even for empty ByteString. -} + - chunk size (not to be confused with the L.ByteString chunk size). -} storeLegacyChunked :: MeterUpdate -> ChunkSize -> [FilePath] -> L.ByteString -> IO [FilePath] storeLegacyChunked _ _ [] _ = error "bad storeLegacyChunked call" storeLegacyChunked meterupdate chunksize alldests@(firstdest:_) b | L.null b = do - -- must always write at least one file, even for empty + -- always write at least one file, even for empty L.writeFile firstdest b return [firstdest] | otherwise = storeLegacyChunked' meterupdate chunksize alldests (L.toChunks b) [] @@ -181,28 +181,25 @@ storeLegacyChunked' meterupdate chunksize (d:dests) bs c = do feed bytes' (sz - s) ls h else return (l:ls) -storeHelper :: FilePath -> ChunkConfig -> Key -> Key -> ([FilePath] -> IO [FilePath]) -> Annex Bool -storeHelper d chunkconfig key origkey storer = check <&&> liftIO go - where - tmpdir = tmpDir d key - destdir = storeDir d key +{- An encrypted key does not have a known size, so the unencrypted + - key should always be passed. -} +whenDiskAvail :: FilePath -> Key -> Annex Bool -> Annex Bool +whenDiskAvail d k a = checkDiskSpace (Just d) k 0 <&&> a - {- An encrypted key does not have a known size, - - so check that the size of the original key is available as free - - space. -} - check = do - liftIO $ createDirectoryIfMissing True tmpdir - checkDiskSpace (Just tmpdir) origkey 0 - - go = case chunkconfig of - NoChunks -> flip catchNonAsync (\e -> print e >> return False) $ do +storeHelper :: FilePath -> ChunkConfig -> Key -> ([FilePath] -> IO [FilePath]) -> Annex Bool +storeHelper d chunkconfig key storer = liftIO $ do + void $ liftIO $ tryIO $ createDirectoryIfMissing True tmpdir + case chunkconfig of + LegacyChunks _ -> Legacy.storeChunks key tmpdir destdir storer recorder finalizer + _ -> flip catchNonAsync (\e -> print e >> return False) $ do let tmpf = tmpdir keyFile key void $ storer [tmpf] finalizer tmpdir destdir return True - UnpaddedChunks _ -> error "TODO: storeHelper with UnpaddedChunks" - LegacyChunks _ -> Legacy.storeChunks key tmpdir destdir storer recorder finalizer - + where + tmpdir = tmpDir d key + destdir = storeDir d key + finalizer tmp dest = do void $ tryIO $ allowWrite dest -- may already exist void $ tryIO $ removeDirectoryRecursive dest -- or not exist @@ -218,16 +215,16 @@ storeHelper d chunkconfig key origkey storer = check <&&> liftIO go writeFile f s void $ tryIO $ preventWrite f -retrieve :: FilePath -> ChunkConfig -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool -retrieve d chunkconfig k _ f p = metered (Just p) k $ \meterupdate -> - liftIO $ withStoredFiles chunkconfig d k $ \files -> +retrieve :: UUID -> FilePath -> ChunkConfig -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool +retrieve u d chunkconfig k _ f p = metered (Just p) k $ \meterupdate -> + liftIO $ withStoredFiles u chunkconfig d k $ \files -> catchBoolIO $ do meteredWriteFileChunks meterupdate f files L.readFile return True -retrieveEncrypted :: FilePath -> ChunkConfig -> (Cipher, Key) -> Key -> FilePath -> MeterUpdate -> Annex Bool -retrieveEncrypted d chunkconfig (cipher, enck) k f p = metered (Just p) k $ \meterupdate -> - liftIO $ withStoredFiles chunkconfig d enck $ \files -> +retrieveEncrypted :: UUID -> FilePath -> ChunkConfig -> (Cipher, Key) -> Key -> FilePath -> MeterUpdate -> Annex Bool +retrieveEncrypted u d chunkconfig (cipher, enck) k f p = metered (Just p) k $ \meterupdate -> + liftIO $ withStoredFiles u chunkconfig d enck $ \files -> catchBoolIO $ do decrypt cipher (feeder files) $ readBytes $ meteredWriteFile meterupdate f @@ -235,17 +232,18 @@ retrieveEncrypted d chunkconfig (cipher, enck) k f p = metered (Just p) k $ \met where feeder files h = forM_ files $ L.hPut h <=< L.readFile -retrieveCheap :: FilePath -> ChunkConfig -> Key -> FilePath -> Annex Bool +retrieveCheap :: UUID -> FilePath -> ChunkConfig -> Key -> FilePath -> Annex Bool -- no cheap retrieval for chunks -retrieveCheap _ (UnpaddedChunks _) _ _ = return False -retrieveCheap _ (LegacyChunks _) _ _ = return False +retrieveCheap _ _ (UnpaddedChunks _) _ _ = return False +retrieveCheap _ _ (LegacyChunks _) _ _ = return False #ifndef mingw32_HOST_OS -retrieveCheap d ck k f = liftIO $ withStoredFiles ck d k go +retrieveCheap u d ck k f = liftIO $ withStoredFiles u ck d k go where - go [file] = catchBoolIO $ createSymbolicLink file f >> return True + go [file] = catchBoolIO $ + createSymbolicLink file f >> return True go _files = return False #else -retrieveCheap _ _ _ _ = return False +retrieveCheap _ _ _ _ _ = return False #endif remove :: FilePath -> Key -> Annex Bool @@ -262,6 +260,6 @@ remove d k = liftIO $ do where dir = storeDir d k -checkPresent :: FilePath -> ChunkConfig -> Key -> Annex (Either String Bool) -checkPresent d chunkconfig k = liftIO $ catchMsgIO $ withStoredFiles chunkconfig d k $ +checkPresent :: UUID -> FilePath -> ChunkConfig -> Key -> Annex (Either String Bool) +checkPresent u d chunkconfig k = liftIO $ catchMsgIO $ withStoredFiles u chunkconfig d k $ const $ return True -- withStoredFiles checked that it exists From cf83697c33b8af93c528da9eadab6f60950ce540 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sat, 26 Jul 2014 12:04:35 -0400 Subject: [PATCH 05/54] reorg --- Remote/Helper/Chunked.hs | 12 +++++------- Remote/Helper/Encryptable.hs | 28 ++++++++++++---------------- 2 files changed, 17 insertions(+), 23 deletions(-) diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs index e984001003..e298299ce0 100644 --- a/Remote/Helper/Chunked.hs +++ b/Remote/Helper/Chunked.hs @@ -63,16 +63,12 @@ takeChunkKeyStream n (ChunkKeyStream l) = genericTake n l numChunks :: ChunkKeyStream -> Integer numChunks = pred . fromJust . keyChunkNum . fst . nextChunkKeyStream -{- Slits up the key's content into chunks, passing each chunk to - - the storer action, along with a unique chunk key. +{- Splits up the key's content into chunks, passing each chunk to + - the storer action, along with a corresponding chunk key and a + - progress meter update callback. - - Note that the storer action is responsible for catching any - exceptions it may encounter. - - - - A progress meter display is set up, and the storer action - - is passed a callback to update it. - - - - Once all chunks are successfully stored, updates the chunk log. -} storeChunks :: UUID -> ChunkConfig -> Key -> FilePath -> MeterUpdate -> (Key -> L.ByteString -> MeterUpdate -> Annex Bool) -> Annex Bool storeChunks u chunkconfig k f p storer = metered (Just p) k $ \meterupdate -> @@ -93,6 +89,8 @@ storeChunks u chunkconfig k f p storer = metered (Just p) k $ \meterupdate -> -- even for empty content. | not (null c) || numchunks == 0 = storechunk bytesprocessed sz [] c chunkkeys + -- Once all chunks are successfully stored, + -- update the chunk log. | otherwise = do chunksStored u k chunksize numchunks return True diff --git a/Remote/Helper/Encryptable.hs b/Remote/Helper/Encryptable.hs index 41174cf7c0..c450a10842 100644 --- a/Remote/Helper/Encryptable.hs +++ b/Remote/Helper/Encryptable.hs @@ -80,28 +80,24 @@ encryptableRemote -> ((Cipher, Key) -> Key -> FilePath -> MeterUpdate -> Annex Bool) -> Remote -> Remote -encryptableRemote c storeKeyEncrypted retrieveKeyFileEncrypted r = - r { - storeKey = store, - retrieveKeyFile = retrieve, - retrieveKeyFileCheap = retrieveCheap, - removeKey = withkey $ removeKey r, - hasKey = withkey $ hasKey r, - cost = maybe - (cost r) - (const $ cost r + encryptedRemoteCostAdj) - (extractCipher c) - } - where - store k f p = cip k >>= maybe +encryptableRemote c storeKeyEncrypted retrieveKeyFileEncrypted r = r + { storeKey = \k f p -> cip k >>= maybe (storeKey r k f p) (\enck -> storeKeyEncrypted enck k p) - retrieve k f d p = cip k >>= maybe + , retrieveKeyFile = \k f d p -> cip k >>= maybe (retrieveKeyFile r k f d p) (\enck -> retrieveKeyFileEncrypted enck k d p) - retrieveCheap k d = cip k >>= maybe + , retrieveKeyFileCheap = \k d -> cip k >>= maybe (retrieveKeyFileCheap r k d) (\_ -> return False) + , removeKey = withkey $ removeKey r + , hasKey = withkey $ hasKey r + , cost = maybe + (cost r) + (const $ cost r + encryptedRemoteCostAdj) + (extractCipher c) + } + where withkey a k = cip k >>= maybe (a k) (a . snd) cip = cipherKey c From 904859d676d97eec3d5057404eac0ab6c2ec9e3a Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sat, 26 Jul 2014 13:25:06 -0400 Subject: [PATCH 06/54] wording --- Types/Remote.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Types/Remote.hs b/Types/Remote.hs index 2ddb68dfb8..584f3d044c 100644 --- a/Types/Remote.hs +++ b/Types/Remote.hs @@ -56,7 +56,7 @@ data RemoteA a = Remote { name :: RemoteName, -- Remotes have a use cost; higher is more expensive cost :: Cost, - -- Transfers a key to the remote. + -- Transfers a key's contents from disk to the remote. storeKey :: Key -> AssociatedFile -> MeterUpdate -> a Bool, -- Retrieves a key's contents to a file. -- (The MeterUpdate does not need to be used if it retrieves From d4d68f57e50125eba0d38adcf26e4b825e048cce Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sat, 26 Jul 2014 20:11:41 -0400 Subject: [PATCH 07/54] finish up basic chunked remote groundwork Chunk retrieval and reassembly, removal, and checking if all necessary chunks are present. This commit was sponsored by Damien Raude-Morvan. --- Remote/Helper/Chunked.hs | 135 +++++++++++++++++++++++++++----- Remote/Helper/Chunked/Legacy.hs | 10 +++ 2 files changed, 125 insertions(+), 20 deletions(-) diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs index e298299ce0..18dfe8aeee 100644 --- a/Remote/Helper/Chunked.hs +++ b/Remote/Helper/Chunked.hs @@ -5,14 +5,16 @@ - Licensed under the GNU GPL version 3 or higher. -} -module Remote.Helper.Chunked - ( ChunkSize - , ChunkConfig(..) - , chunkConfig - , storeChunks - , chunkKeys - , meteredWriteFileChunks - ) where +module Remote.Helper.Chunked ( + ChunkSize, + ChunkConfig(..), + chunkConfig, + storeChunks, + chunkKeys, + removeChunks, + retrieveChunks, + hasKeyChunks, +) where import Common.Annex import Utility.DataUnits @@ -21,6 +23,7 @@ import Types.Key import Logs.Chunk.Pure (ChunkSize, ChunkCount) import Logs.Chunk import Utility.Metered +import Crypto (EncKey) import qualified Data.ByteString.Lazy as L import qualified Data.ByteString as S @@ -69,8 +72,10 @@ numChunks = pred . fromJust . keyChunkNum . fst . nextChunkKeyStream - - Note that the storer action is responsible for catching any - exceptions it may encounter. + - + - This action may be called on a chunked key. It will simply store it. -} -storeChunks :: UUID -> ChunkConfig -> Key -> FilePath -> MeterUpdate -> (Key -> L.ByteString -> MeterUpdate -> Annex Bool) -> Annex Bool +storeChunks :: UUID -> ChunkConfig -> Key -> FilePath -> MeterUpdate -> (Key -> L.ByteString -> MeterUpdate -> IO Bool) -> Annex Bool storeChunks u chunkconfig k f p storer = metered (Just p) k $ \meterupdate -> either (\e -> liftIO (print e) >> return False) (go meterupdate) =<< (liftIO $ tryIO $ L.readFile f) @@ -78,7 +83,7 @@ storeChunks u chunkconfig k f p storer = metered (Just p) k $ \meterupdate -> go meterupdate b = case chunkconfig of (UnpaddedChunks chunksize) | not (isChunkKey k) -> gochunks meterupdate chunksize b (chunkKeyStream k chunksize) - _ -> storer k b meterupdate + _ -> liftIO $ storer k b meterupdate gochunks :: MeterUpdate -> ChunkSize -> L.ByteString -> ChunkKeyStream -> Annex Bool gochunks meterupdate chunksize lb = @@ -107,7 +112,7 @@ storeChunks u chunkconfig k f p storer = metered (Just p) k $ \meterupdate -> storechunk bytesprocessed sz bs c chunkkeys = do let (chunkkey, chunkkeys') = nextChunkKeyStream chunkkeys - ifM (storer chunkkey (L.fromChunks $ reverse c) meterupdate') + ifM (liftIO $ storer chunkkey (L.fromChunks $ reverse c) meterupdate') ( do let bytesprocessed' = addBytesProcessed bytesprocessed (chunksize - sz) loop bytesprocessed' chunksize bs [] chunkkeys' @@ -129,19 +134,109 @@ storeChunks u chunkconfig k f p storer = metered (Just p) k $ \meterupdate -> - requested key. -} chunkKeys :: UUID -> ChunkConfig -> Key -> Annex [[Key]] -chunkKeys u (UnpaddedChunks _) k = do +chunkKeys u (UnpaddedChunks _) k | not (isChunkKey k) = do chunklists <- map (toChunkList k) <$> getCurrentChunks u k - return ([k]:chunklists) + -- Probably using the chunklists, but the unchunked + -- key could be present. + return (chunklists ++ [[k]]) chunkKeys _ _ k = pure [[k]] toChunkList :: Key -> (ChunkSize, ChunkCount) -> [Key] toChunkList k (chunksize, chunkcount) = takeChunkKeyStream chunkcount $ chunkKeyStream k chunksize -{- Writes a series of chunks to a file. The feeder is called to get - - each chunk. -} -meteredWriteFileChunks :: MeterUpdate -> FilePath -> [v] -> (v -> IO L.ByteString) -> IO () -meteredWriteFileChunks meterupdate dest chunks feeder = - withBinaryFile dest WriteMode $ \h -> - forM_ chunks $ - meteredWrite meterupdate h <=< feeder +{- Removes all chunks of a key from a remote, by calling a remover + - action on each. The remover action should succeed even if asked to + - remove a key that is not present on the remote. + - + - This action may be called on a chunked key. It will simply remove it. + -} +removeChunks :: (Key -> Annex Bool) -> UUID -> ChunkConfig -> EncKey -> Key -> Annex Bool +removeChunks remover u chunkconfig encryptor k = do + ls <- chunkKeys u chunkconfig k + ok <- and <$> mapM (remover . encryptor) (concat ls) + when ok $ + case chunkconfig of + (UnpaddedChunks _) | not (isChunkKey k) -> do + let chunksizes = catMaybes $ map (keyChunkSize <=< headMaybe) ls + forM_ chunksizes $ chunksRemoved u k . fromIntegral + _ -> noop + return ok + +{- Retrieves a key from a remote, using a retriever action that + - streams it to a ByteString. + - + - When the remote is chunked, tries each of the options returned by + - chunkKeys until it finds one where the retriever successfully + - gets the first key in the list. The content of that key, and any + - other chunks in the list is fed to the sink. + - + - If retrival of one of the subsequent chunks throws an exception, + - gives up and returns False. Note that partial data may have been + - written to the sink in this case. + -} +retrieveChunks + :: (Key -> IO L.ByteString) + -> UUID + -> ChunkConfig + -> EncKey + -> Key + -> MeterUpdate + -> (MeterUpdate -> L.ByteString -> IO ()) + -> Annex Bool +retrieveChunks retriever u chunkconfig encryptor basek basep sink = do + ls <- chunkKeys u chunkconfig basek + liftIO $ flip catchNonAsync giveup (firstavail ls) + where + giveup e = print e >> return False + + firstavail [] = return False + firstavail ([]:ls) = firstavail ls + firstavail ((k:ks):ls) = do + v <- tryNonAsync $ retriever (encryptor k) + case v of + Left e + | null ls -> giveup e + | otherwise -> firstavail ls + Right b -> do + sink basep b + let sz = toBytesProcessed $ + fromMaybe 0 $ keyChunkSize k + getrest sz sz ks + + getrest _ _ [] = return True + getrest sz bytesprocessed (k:ks) = do + let p = offsetMeterUpdate basep bytesprocessed + sink p =<< retriever (encryptor k) + getrest sz (addBytesProcessed bytesprocessed sz) ks + +{- Checks if a key is present in a remote. This requires any one + - of the lists of options returned by chunkKeys to all check out + - as being present using the checker action. + -} +hasKeyChunks + :: (Key -> Annex (Either String Bool)) + -> UUID + -> ChunkConfig + -> EncKey + -> Key + -> Annex (Either String Bool) +hasKeyChunks checker u chunkconfig encryptor basek = + checklists impossible =<< chunkKeys u chunkconfig basek + where + checklists lastfailmsg [] = return $ Left lastfailmsg + checklists _ (l:ls) + | not (null l) = + either (`checklists` ls) (return . Right) + =<< checkchunks l + | otherwise = checklists impossible ls + + checkchunks :: [Key] -> Annex (Either String Bool) + checkchunks [] = return (Right True) + checkchunks (k:ks) = do + v <- checker (encryptor k) + if v == Right True + then checkchunks ks + else return v + + impossible = "no recorded chunks" diff --git a/Remote/Helper/Chunked/Legacy.hs b/Remote/Helper/Chunked/Legacy.hs index 1ec0eb68f2..e435851db1 100644 --- a/Remote/Helper/Chunked/Legacy.hs +++ b/Remote/Helper/Chunked/Legacy.hs @@ -9,6 +9,7 @@ module Remote.Helper.Chunked.Legacy where import Common.Annex import Remote.Helper.Chunked +import Utility.Metered import qualified Data.ByteString.Lazy as L import qualified Control.Exception as E @@ -114,3 +115,12 @@ storeChunked chunksize dests storer content = either onerr return let (chunk, b') = L.splitAt sz b storer d chunk storechunks sz (d:useddests) ds b' + +{- Writes a series of chunks to a file. The feeder is called to get + - each chunk. + -} +meteredWriteFileChunks :: MeterUpdate -> FilePath -> [v] -> (v -> IO L.ByteString) -> IO () +meteredWriteFileChunks meterupdate dest chunks feeder = + withBinaryFile dest WriteMode $ \h -> + forM_ chunks $ + meteredWrite meterupdate h <=< feeder From 1400cbb032f41f2769f6fe78786900c6688eab27 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sat, 26 Jul 2014 20:14:09 -0400 Subject: [PATCH 08/54] Support for remotes that are chunkable and encryptable. I'd have liked to keep these two concepts entirely separate, but that are entagled: Storing a key in an encrypted and chunked remote need to generate chunk keys, encrypt the keys, chunk the data, encrypt the chunks, and send them to the remote. Similar for retrieval, etc. So, here's an implemnetation of all of that. The total win here is that every remote was implementing encrypted storage and retrival, and now it can move into this single place. I expect this to result in several hundred lines of code being removed from git-annex eventually! This commit was sponsored by Henrik Ahlgren. --- Crypto.hs | 5 +- Remote/Directory/LegacyChunked.hs | 110 +++++++++++++++++++++++++ Remote/Helper/ChunkedEncryptable.hs | 121 ++++++++++++++++++++++++++++ Remote/Helper/Encryptable.hs | 33 ++++---- 4 files changed, 254 insertions(+), 15 deletions(-) create mode 100644 Remote/Directory/LegacyChunked.hs create mode 100644 Remote/Helper/ChunkedEncryptable.hs diff --git a/Crypto.hs b/Crypto.hs index 0bfa81db2e..89b47f3184 100644 --- a/Crypto.hs +++ b/Crypto.hs @@ -13,6 +13,7 @@ module Crypto ( Cipher, KeyIds(..), + EncKey, StorableCipher(..), genEncryptedCipher, genSharedCipher, @@ -138,10 +139,12 @@ decryptCipher (EncryptedCipher t variant _) = Hybrid -> Cipher PubKey -> MacOnlyCipher +type EncKey = Key -> Key + {- Generates an encrypted form of a Key. The encryption does not need to be - reversable, nor does it need to be the same type of encryption used - on content. It does need to be repeatable. -} -encryptKey :: Mac -> Cipher -> Key -> Key +encryptKey :: Mac -> Cipher -> EncKey encryptKey mac c k = stubKey { keyName = macWithCipher mac c (key2file k) , keyBackendName = "GPG" ++ showMac mac diff --git a/Remote/Directory/LegacyChunked.hs b/Remote/Directory/LegacyChunked.hs new file mode 100644 index 0000000000..df6d94d04a --- /dev/null +++ b/Remote/Directory/LegacyChunked.hs @@ -0,0 +1,110 @@ +{- Legacy chunksize support for directory special remote. + - + - Can be removed eventually. + - + - Copyright 2011-2012 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Remote.Directory.LegacyChunked where + +import qualified Data.ByteString.Lazy as L +import qualified Data.ByteString as S + +import Common.Annex +import Utility.FileMode +import Remote.Helper.ChunkedEncryptable +import qualified Remote.Helper.Chunked.Legacy as Legacy +import Annex.Perms +import Utility.Metered + +withCheckedFiles :: (FilePath -> IO Bool) -> FilePath -> (FilePath -> Key -> [FilePath]) -> Key -> ([FilePath] -> IO Bool) -> IO Bool +withCheckedFiles _ [] _locations _ _ = return False +withCheckedFiles check d locations k a = go $ locations d k + where + go [] = return False + go (f:fs) = do + let chunkcount = f ++ Legacy.chunkCount + ifM (check chunkcount) + ( do + chunks <- Legacy.listChunks f <$> readFile chunkcount + ifM (allM check chunks) + ( a chunks , return False ) + , do + chunks <- Legacy.probeChunks f check + if null chunks + then go fs + else a chunks + ) +withStoredFiles :: FilePath -> (FilePath -> Key -> [FilePath]) -> Key -> ([FilePath] -> IO Bool) -> IO Bool +withStoredFiles = withCheckedFiles doesFileExist + +{- Splits a ByteString into chunks and writes to dests, obeying configured + - chunk size (not to be confused with the L.ByteString chunk size). -} +storeLegacyChunked :: MeterUpdate -> ChunkSize -> [FilePath] -> L.ByteString -> IO [FilePath] +storeLegacyChunked _ _ [] _ = error "bad storeLegacyChunked call" +storeLegacyChunked meterupdate chunksize alldests@(firstdest:_) b + | L.null b = do + -- always write at least one file, even for empty + L.writeFile firstdest b + return [firstdest] + | otherwise = storeLegacyChunked' meterupdate chunksize alldests (L.toChunks b) [] +storeLegacyChunked' :: MeterUpdate -> ChunkSize -> [FilePath] -> [S.ByteString] -> [FilePath] -> IO [FilePath] +storeLegacyChunked' _ _ [] _ _ = error "ran out of dests" +storeLegacyChunked' _ _ _ [] c = return $ reverse c +storeLegacyChunked' meterupdate chunksize (d:dests) bs c = do + bs' <- withFile d WriteMode $ + feed zeroBytesProcessed chunksize bs + storeLegacyChunked' meterupdate chunksize dests bs' (d:c) + where + feed _ _ [] _ = return [] + feed bytes sz (l:ls) h = do + let len = S.length l + let s = fromIntegral len + if s <= sz || sz == chunksize + then do + S.hPut h l + let bytes' = addBytesProcessed bytes len + meterupdate bytes' + feed bytes' (sz - s) ls h + else return (l:ls) + +storeHelper :: (FilePath -> FilePath -> IO ()) -> Key -> ([FilePath] -> IO [FilePath]) -> FilePath -> FilePath -> IO Bool +storeHelper finalizer key storer tmpdir destdir = do + void $ liftIO $ tryIO $ createDirectoryIfMissing True tmpdir + Legacy.storeChunks key tmpdir destdir storer recorder finalizer + where + recorder f s = do + void $ tryIO $ allowWrite f + writeFile f s + void $ tryIO $ preventWrite f + +store :: ChunkSize -> (FilePath -> FilePath -> IO ()) -> Key -> L.ByteString -> MeterUpdate -> FilePath -> FilePath -> IO Bool +store chunksize finalizer k b p = storeHelper finalizer k $ \dests -> + storeLegacyChunked p chunksize dests b + +{- Need to get a single ByteString containing every chunk. + - Done very innefficiently, by writing to a temp file. + - :/ This is legacy code.. + -} +retrieve :: (FilePath -> Key -> [FilePath]) -> FilePath -> PrepareRetriever +retrieve locations d basek = do + showLongNote "This remote uses the deprecated chunksize setting. So this will be quite slow." + tmpdir <- fromRepo $ gitAnnexTmpMiscDir + createAnnexDirectory tmpdir + let tmp = tmpdir keyFile basek ++ ".directorylegacy.tmp" + return $ Just $ \k -> do + void $ withStoredFiles d locations k $ \fs -> do + forM_ fs $ + S.appendFile tmp <=< S.readFile + return True + b <- L.readFile tmp + nukeFile tmp + return b + +checkPresent :: FilePath -> (FilePath -> Key -> [FilePath]) -> Key -> Annex (Either String Bool) +checkPresent d locations k = liftIO $ catchMsgIO $ + withStoredFiles d locations k $ + -- withStoredFiles checked that it exists + const $ return True diff --git a/Remote/Helper/ChunkedEncryptable.hs b/Remote/Helper/ChunkedEncryptable.hs new file mode 100644 index 0000000000..740da58b91 --- /dev/null +++ b/Remote/Helper/ChunkedEncryptable.hs @@ -0,0 +1,121 @@ +{- Remotes that support both chunking and encryption. + - + - Copyright 2014 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Remote.Helper.ChunkedEncryptable ( + chunkedEncryptableRemote, + PrepareStorer, + Storer, + PrepareRetriever, + Retriever, + storeKeyDummy, + retreiveKeyFileDummy, + module X +) where + +import qualified Data.ByteString.Lazy as L + +import Common.Annex +import Types.Remote +import Crypto +import Config.Cost +import Utility.Metered +import Remote.Helper.Chunked as X +import Remote.Helper.Encryptable as X +import Annex.Content +import Annex.Exception + +-- Prepares to store a Key, and returns a Storer action if possible. +type PrepareStorer = Key -> Annex (Maybe Storer) + +-- Stores a Key, which may be encrypted and/or a chunk key. +type Storer = Key -> L.ByteString -> MeterUpdate -> IO Bool + +-- Prepares to retrieve a Key, and returns a Retriever action if possible. +type PrepareRetriever = Key -> Annex (Maybe Retriever) + +-- Retrieves a Key, which may be encrypted and/or a chunk key. +-- Throws exception if key is not present, or remote is not accessible. +type Retriever = Key -> IO L.ByteString + +{- Modifies a base Remote to support both chunking and encryption. + -} +chunkedEncryptableRemote + :: RemoteConfig + -> PrepareStorer + -> PrepareRetriever + -> Remote + -> Remote +chunkedEncryptableRemote c preparestorer prepareretriever r = encr + where + encr = r + { storeKey = \k _f p -> cip >>= storeKeyGen k p + , retrieveKeyFile = \k _f d p -> cip >>= retrieveKeyFileGen k d p + , retrieveKeyFileCheap = \k d -> cip >>= maybe + (retrieveKeyFileCheap r k d) + (\_ -> return False) + , removeKey = \k -> cip >>= removeKeyGen k + , hasKey = \k -> cip >>= hasKeyGen k + , cost = maybe + (cost r) + (const $ cost r + encryptedRemoteCostAdj) + (extractCipher c) + } + cip = cipherKey c + chunkconfig = chunkConfig c + gpgopts = getGpgEncParams encr + + -- chunk, then encrypt, then feed to the storer + storeKeyGen k p enc = maybe (return False) go =<< preparestorer k + where + go storer = sendAnnex k rollback $ \src -> + metered (Just p) k $ \p' -> + storeChunks (uuid r) chunkconfig k src p' $ + storechunk storer + rollback = void $ removeKey encr k + storechunk storer k' b p' = case enc of + Nothing -> storer k' b p' + Just (cipher, enck) -> + encrypt gpgopts cipher (feedBytes b) $ + readBytes $ \encb -> + storer (enck k') encb p' + + -- call retriever to get chunks; decrypt them; stream to dest file + retrieveKeyFileGen k dest p enc = + maybe (return False) go =<< prepareretriever k + where + go retriever = metered (Just p) k $ \p' -> + bracketIO (openBinaryFile dest WriteMode) hClose $ \h -> + retrieveChunks retriever (uuid r) chunkconfig enck k p' $ + sink h + sink h p' b = do + let write = meteredWrite p' h + case enc of + Nothing -> write b + Just (cipher, _) -> + decrypt cipher (feedBytes b) $ + readBytes write + enck = maybe id snd enc + + removeKeyGen k enc = removeChunks remover (uuid r) chunkconfig enck k + where + enck = maybe id snd enc + remover = removeKey r + + hasKeyGen k enc = hasKeyChunks checker (uuid r) chunkconfig enck k + where + enck = maybe id snd enc + checker = hasKey r + +{- The base Remote that is provided to chunkedEncryptableRemote + - needs to have storeKey and retreiveKeyFile methods, but they are + - never actually used (since chunkedEncryptableRemote replaces + - them). Here are some dummy ones. + -} +storeKeyDummy :: Key -> AssociatedFile -> MeterUpdate -> Annex Bool +storeKeyDummy _ _ _ = return False +retreiveKeyFileDummy :: Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool +retreiveKeyFileDummy _ _ _ _ = return False diff --git a/Remote/Helper/Encryptable.hs b/Remote/Helper/Encryptable.hs index c450a10842..9da5e641d2 100644 --- a/Remote/Helper/Encryptable.hs +++ b/Remote/Helper/Encryptable.hs @@ -70,10 +70,8 @@ encryptionSetup c = maybe genCipher updateCipher $ extractCipher c -- remotes (while being backward-compatible). [ "keyid", "keyid+", "keyid-", "highRandomQuality" ] -{- Modifies a Remote to support encryption. - - - - Two additional functions must be provided by the remote, - - to support storing and retrieving encrypted content. -} +{- Modifies a Remote to support encryption. -} +-- TODO: deprecated encryptableRemote :: RemoteConfig -> ((Cipher, Key) -> Key -> MeterUpdate -> Annex Bool) @@ -83,23 +81,30 @@ encryptableRemote encryptableRemote c storeKeyEncrypted retrieveKeyFileEncrypted r = r { storeKey = \k f p -> cip k >>= maybe (storeKey r k f p) - (\enck -> storeKeyEncrypted enck k p) + (\v -> storeKeyEncrypted v k p) , retrieveKeyFile = \k f d p -> cip k >>= maybe (retrieveKeyFile r k f d p) - (\enck -> retrieveKeyFileEncrypted enck k d p) + (\v -> retrieveKeyFileEncrypted v k d p) , retrieveKeyFileCheap = \k d -> cip k >>= maybe (retrieveKeyFileCheap r k d) (\_ -> return False) - , removeKey = withkey $ removeKey r - , hasKey = withkey $ hasKey r + , removeKey = \k -> cip k >>= maybe + (removeKey r k) + (\(_, enckey) -> removeKey r enckey) + , hasKey = \k -> cip k >>= maybe + (hasKey r k) + (\(_, enckey) -> hasKey r enckey) , cost = maybe (cost r) (const $ cost r + encryptedRemoteCostAdj) (extractCipher c) } where - withkey a k = cip k >>= maybe (a k) (a . snd) - cip = cipherKey c + cip k = do + v <- cipherKey c + return $ case v of + Nothing -> Nothing + Just (cipher, enck) -> Just (cipher, enck k) {- Gets encryption Cipher. The decrypted Ciphers are cached in the Annex - state. -} @@ -132,11 +137,11 @@ embedCreds c | isJust (M.lookup "cipherkeys" c) && isJust (M.lookup "cipher" c) = True | otherwise = False -{- Gets encryption Cipher, and encrypted version of Key. -} -cipherKey :: RemoteConfig -> Key -> Annex (Maybe (Cipher, Key)) -cipherKey c k = fmap make <$> remoteCipher c +{- Gets encryption Cipher, and key encryptor. -} +cipherKey :: RemoteConfig -> Annex (Maybe (Cipher, EncKey)) +cipherKey c = fmap make <$> remoteCipher c where - make ciphertext = (ciphertext, encryptKey mac ciphertext k) + make ciphertext = (ciphertext, encryptKey mac ciphertext) mac = fromMaybe defaultMac $ M.lookup "mac" c >>= readMac {- Stores an StorableCipher in a remote's configuration. -} From b2922c1d6d982cca9438124fa6c96312bbae62d5 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sat, 26 Jul 2014 20:19:24 -0400 Subject: [PATCH 09/54] convert directory special remote to using ChunkedEncryptable And clean up legacy chunking code, which is in its own module now. So much cleaner! This commit was sponsored by Henrik Ahlgren --- Remote/Directory.hs | 199 ++++++++++++-------------------------------- 1 file changed, 53 insertions(+), 146 deletions(-) diff --git a/Remote/Directory.hs b/Remote/Directory.hs index 3305f712b3..ae2c43200c 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -1,6 +1,6 @@ {- A "remote" that is just a filesystem directory. - - - Copyright 2011-2012 Joey Hess + - Copyright 2011-2014 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -10,7 +10,6 @@ module Remote.Directory (remote) where import qualified Data.ByteString.Lazy as L -import qualified Data.ByteString as S import qualified Data.Map as M import Common.Annex @@ -21,10 +20,8 @@ import Config.Cost import Config import Utility.FileMode import Remote.Helper.Special -import Remote.Helper.Encryptable -import Remote.Helper.Chunked -import qualified Remote.Helper.Chunked.Legacy as Legacy -import Crypto +import Remote.Helper.ChunkedEncryptable +import qualified Remote.Directory.LegacyChunked as Legacy import Annex.Content import Annex.UUID import Utility.Metered @@ -41,18 +38,18 @@ gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> Annex (Maybe Remot gen r u c gc = do cst <- remoteCost gc cheapRemoteCost let chunkconfig = chunkConfig c - return $ Just $ encryptableRemote c - (storeEncrypted dir (getGpgEncParams (c,gc)) chunkconfig) - (retrieveEncrypted u dir chunkconfig) + return $ Just $ chunkedEncryptableRemote c + (prepareStore dir chunkconfig) + (retrieve dir chunkconfig) Remote { uuid = u, cost = cst, name = Git.repoDescribe r, - storeKey = store u dir chunkconfig, - retrieveKeyFile = retrieve u dir chunkconfig, - retrieveKeyFileCheap = retrieveCheap u dir chunkconfig, + storeKey = storeKeyDummy, + retrieveKeyFile = retreiveKeyFileDummy, + retrieveKeyFileCheap = retrieveCheap dir chunkconfig, removeKey = remove dir, - hasKey = checkPresent u dir chunkconfig, + hasKey = checkPresent dir chunkconfig, hasKeyCheap = True, whereisKey = Nothing, remoteFsck = Nothing, @@ -84,122 +81,50 @@ directorySetup mu _ c = do gitConfigSpecialRemote u c' "directory" absdir return (M.delete "directory" c', u) -{- Locations to try to access a given Key in the Directory. - - We try more than since we used to write to different hash directories. -} +{- Locations to try to access a given Key in the directory. + - We try more than one since we used to write to different hash + - directories. -} locations :: FilePath -> Key -> [FilePath] locations d k = map (d ) (keyPaths k) +{- Returns the location off a Key in the directory. If the key is + - present, returns the location that is actually used, otherwise + - returns the first, default location. -} +getLocation :: FilePath -> Key -> IO FilePath +getLocation d k = do + let locs = locations d k + fromMaybe (Prelude.head locs) <$> firstM doesFileExist locs + {- Directory where the file(s) for a key are stored. -} storeDir :: FilePath -> Key -> FilePath storeDir d k = addTrailingPathSeparator $ d hashDirLower k keyFile k -{- Where we store temporary data for a key as it's being uploaded. -} +{- Where we store temporary data for a key, in the directory, as it's being + - written. -} tmpDir :: FilePath -> Key -> FilePath tmpDir d k = addTrailingPathSeparator $ d "tmp" keyFile k -withCheckedFiles :: (FilePath -> IO Bool) -> UUID -> ChunkConfig -> FilePath -> Key -> ([FilePath] -> IO Bool) -> IO Bool -withCheckedFiles _ _ _ [] _ _ = return False -withCheckedFiles check _ (LegacyChunks _) d k a = go $ locations d k - where - go [] = return False - go (f:fs) = do - let chunkcount = f ++ Legacy.chunkCount - ifM (check chunkcount) - ( do - chunks <- Legacy.listChunks f <$> readFile chunkcount - ifM (allM check chunks) - ( a chunks , return False ) - , do - chunks <- Legacy.probeChunks f check - if null chunks - then go fs - else a chunks - ) -withCheckedFiles check u chunkconfig d k a = - go $ locations d k - where - go [] = return False - go (f:fs) = ifM (check f) ( a [f] , go fs ) +{- Check if there is enough free disk space in the remote's directory to + - store the key. Note that the unencrypted key size is checked. -} +prepareStore :: FilePath -> ChunkConfig -> PrepareStorer +prepareStore d chunkconfig k = ifM (checkDiskSpace (Just d) k 0) + ( return $ Just (store d chunkconfig) + , return Nothing + ) -withStoredFiles :: UUID -> ChunkConfig -> FilePath -> Key -> ([FilePath] -> IO Bool) -> IO Bool -withStoredFiles = withCheckedFiles doesFileExist - -store :: UUID -> FilePath -> ChunkConfig -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool -store u d chunkconfig k _f p = whenDiskAvail d k $ - sendAnnex k (void $ remove d k) $ \src -> - storeChunks u chunkconfig k src p $ \k' b meterupdate -> - storeHelper d chunkconfig k' $ \dests -> - case chunkconfig of - LegacyChunks chunksize -> - storeLegacyChunked meterupdate chunksize dests b - _ -> do - let dest = Prelude.head dests - meteredWriteFile meterupdate dest b - return [dest] - -storeEncrypted :: FilePath -> [CommandParam] -> ChunkConfig -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool -storeEncrypted d gpgOpts chunkconfig (cipher, enck) k p = whenDiskAvail d k $ - sendAnnex k (void $ remove d enck) $ \src -> - metered (Just p) k $ \meterupdate -> - storeHelper d chunkconfig enck $ \dests -> - encrypt gpgOpts cipher (feedFile src) $ readBytes $ \b -> - case chunkconfig of - LegacyChunks chunksize -> - storeLegacyChunked meterupdate chunksize dests b - _ -> do - let dest = Prelude.head dests - meteredWriteFile meterupdate dest b - return [dest] - -{- Splits a ByteString into chunks and writes to dests, obeying configured - - chunk size (not to be confused with the L.ByteString chunk size). -} -storeLegacyChunked :: MeterUpdate -> ChunkSize -> [FilePath] -> L.ByteString -> IO [FilePath] -storeLegacyChunked _ _ [] _ = error "bad storeLegacyChunked call" -storeLegacyChunked meterupdate chunksize alldests@(firstdest:_) b - | L.null b = do - -- always write at least one file, even for empty - L.writeFile firstdest b - return [firstdest] - | otherwise = storeLegacyChunked' meterupdate chunksize alldests (L.toChunks b) [] -storeLegacyChunked' :: MeterUpdate -> ChunkSize -> [FilePath] -> [S.ByteString] -> [FilePath] -> IO [FilePath] -storeLegacyChunked' _ _ [] _ _ = error "ran out of dests" -storeLegacyChunked' _ _ _ [] c = return $ reverse c -storeLegacyChunked' meterupdate chunksize (d:dests) bs c = do - bs' <- withFile d WriteMode $ - feed zeroBytesProcessed chunksize bs - storeLegacyChunked' meterupdate chunksize dests bs' (d:c) - where - feed _ _ [] _ = return [] - feed bytes sz (l:ls) h = do - let len = S.length l - let s = fromIntegral len - if s <= sz || sz == chunksize - then do - S.hPut h l - let bytes' = addBytesProcessed bytes len - meterupdate bytes' - feed bytes' (sz - s) ls h - else return (l:ls) - -{- An encrypted key does not have a known size, so the unencrypted - - key should always be passed. -} -whenDiskAvail :: FilePath -> Key -> Annex Bool -> Annex Bool -whenDiskAvail d k a = checkDiskSpace (Just d) k 0 <&&> a - -storeHelper :: FilePath -> ChunkConfig -> Key -> ([FilePath] -> IO [FilePath]) -> Annex Bool -storeHelper d chunkconfig key storer = liftIO $ do +store :: FilePath -> ChunkConfig -> Storer +store d chunkconfig k b p = do void $ liftIO $ tryIO $ createDirectoryIfMissing True tmpdir case chunkconfig of - LegacyChunks _ -> Legacy.storeChunks key tmpdir destdir storer recorder finalizer + LegacyChunks chunksize -> Legacy.store chunksize finalizer k b p tmpdir destdir _ -> flip catchNonAsync (\e -> print e >> return False) $ do - let tmpf = tmpdir keyFile key - void $ storer [tmpf] + let tmpf = tmpdir keyFile k + meteredWriteFile p tmpf b finalizer tmpdir destdir return True where - tmpdir = tmpDir d key - destdir = storeDir d key - + tmpdir = tmpDir d k + destdir = storeDir d k finalizer tmp dest = do void $ tryIO $ allowWrite dest -- may already exist void $ tryIO $ removeDirectoryRecursive dest -- or not exist @@ -209,41 +134,22 @@ storeHelper d chunkconfig key storer = liftIO $ do void $ tryIO $ do mapM_ preventWrite =<< dirContents dest preventWrite dest - - recorder f s = do - void $ tryIO $ allowWrite f - writeFile f s - void $ tryIO $ preventWrite f -retrieve :: UUID -> FilePath -> ChunkConfig -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool -retrieve u d chunkconfig k _ f p = metered (Just p) k $ \meterupdate -> - liftIO $ withStoredFiles u chunkconfig d k $ \files -> - catchBoolIO $ do - meteredWriteFileChunks meterupdate f files L.readFile - return True +retrieve :: FilePath -> ChunkConfig -> PrepareRetriever +retrieve d (LegacyChunks _) basek = Legacy.retrieve locations d basek +retrieve d _ _ = return $ Just $ \k -> L.readFile =<< getLocation d k -retrieveEncrypted :: UUID -> FilePath -> ChunkConfig -> (Cipher, Key) -> Key -> FilePath -> MeterUpdate -> Annex Bool -retrieveEncrypted u d chunkconfig (cipher, enck) k f p = metered (Just p) k $ \meterupdate -> - liftIO $ withStoredFiles u chunkconfig d enck $ \files -> - catchBoolIO $ do - decrypt cipher (feeder files) $ - readBytes $ meteredWriteFile meterupdate f - return True - where - feeder files h = forM_ files $ L.hPut h <=< L.readFile - -retrieveCheap :: UUID -> FilePath -> ChunkConfig -> Key -> FilePath -> Annex Bool --- no cheap retrieval for chunks -retrieveCheap _ _ (UnpaddedChunks _) _ _ = return False -retrieveCheap _ _ (LegacyChunks _) _ _ = return False +retrieveCheap :: FilePath -> ChunkConfig -> Key -> FilePath -> Annex Bool +-- no cheap retrieval possible for chunks +retrieveCheap _ (UnpaddedChunks _) _ _ = return False +retrieveCheap _ (LegacyChunks _) _ _ = return False #ifndef mingw32_HOST_OS -retrieveCheap u d ck k f = liftIO $ withStoredFiles u ck d k go - where - go [file] = catchBoolIO $ - createSymbolicLink file f >> return True - go _files = return False +retrieveCheap d NoChunks k f = liftIO $ catchBoolIO $ do + file <- getLocation d k + createSymbolicLink file f + return True #else -retrieveCheap _ _ _ _ _ = return False +retrieveCheap _ _ _ _ = return False #endif remove :: FilePath -> Key -> Annex Bool @@ -260,6 +166,7 @@ remove d k = liftIO $ do where dir = storeDir d k -checkPresent :: UUID -> FilePath -> ChunkConfig -> Key -> Annex (Either String Bool) -checkPresent u d chunkconfig k = liftIO $ catchMsgIO $ withStoredFiles u chunkconfig d k $ - const $ return True -- withStoredFiles checked that it exists +checkPresent :: FilePath -> ChunkConfig -> Key -> Annex (Either String Bool) +checkPresent d (LegacyChunks _) k = Legacy.checkPresent d locations k +checkPresent d _ k = liftIO $ catchMsgIO $ + anyM doesFileExist (locations d k) From 34c6fdf5e36465464543daf8176508e792da5cc1 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sat, 26 Jul 2014 20:21:10 -0400 Subject: [PATCH 10/54] fix build --- Remote/WebDAV.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Remote/WebDAV.hs b/Remote/WebDAV.hs index 4be7e47016..36df609459 100644 --- a/Remote/WebDAV.hs +++ b/Remote/WebDAV.hs @@ -140,7 +140,7 @@ retrieve :: Remote -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex retrieve r k _f d p = metered (Just p) k $ \meterupdate -> davAction r False $ \(baseurl, user, pass) -> liftIO $ catchBoolIO $ withStoredFiles r k baseurl user pass onerr $ \urls -> do - meteredWriteFileChunks meterupdate d urls $ \url -> do + Legacy.meteredWriteFileChunks meterupdate d urls $ \url -> do mb <- getDAV url user pass case mb of Nothing -> throwIO "download failed" From adb6ca62caa60033e3f7d8043bc4664399959a80 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sat, 26 Jul 2014 20:21:36 -0400 Subject: [PATCH 11/54] fix build --- Remote/Glacier.hs | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/Remote/Glacier.hs b/Remote/Glacier.hs index 00be9e1a96..bf8f050610 100644 --- a/Remote/Glacier.hs +++ b/Remote/Glacier.hs @@ -282,7 +282,8 @@ jobList r keys = go =<< glacierEnv (config r) (uuid r) then return nada else do enckeys <- forM keys $ \k -> - maybe k snd <$> cipherKey (config r) k + maybe k (\(_, enck) -> enck k) + <$> cipherKey (config r) let keymap = M.fromList $ zip enckeys keys let convert = mapMaybe (`M.lookup` keymap) return (convert succeeded, convert failed) From 275e284dda93e5215b3e85039f371709a465ddc9 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sat, 26 Jul 2014 20:21:49 -0400 Subject: [PATCH 12/54] doc update for new chunking --- debian/changelog | 4 ++++ doc/special_remotes/directory.mdwn | 12 ++++++++---- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/debian/changelog b/debian/changelog index bbdea7b7b6..d5c000003f 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,5 +1,9 @@ git-annex (5.20140718) UNRELEASED; urgency=medium + * New chunk= option to chunk files stored in directory remotes. + * The old chunksize= option is deprecated. Do not use for new remotes! + * Legacy code for directory remotes using the old chunksize= option + will keep them working, but more slowly than before. * webapp: Automatically install Konqueror integration scripts to get and drop files. * repair: Removing bad objects could leave fsck finding no more diff --git a/doc/special_remotes/directory.mdwn b/doc/special_remotes/directory.mdwn index 96d5938213..de7ab904e5 100644 --- a/doc/special_remotes/directory.mdwn +++ b/doc/special_remotes/directory.mdwn @@ -25,13 +25,17 @@ remote: * `keyid` - Specifies the gpg key to use for [[encryption]]. -* `chunksize` - Avoid storing files larger than the specified size in the +* `chunk` - Avoid storing files larger than the specified size in the directory. For use on directories on mount points that have file size limitations. The default is to never chunk files. The value can use specified using any commonly used units. - Example: `chunksize=100 megabytes` - Note that enabling chunking on an existing remote with non-chunked - files is not recommended; nor is changing the chunksize. + Example: `chunk=100 megabytes` + Note that chunking can be disabled later by setting chunk=0, + and can also safely be changed to a different size as needed. + +* `chunksize` - Deprecated version of chunk parameter above. + Do not use for new remotes. It is not safe to change the chunksize + setting of an existing remote. Setup example: From 67975bf50d2c998b9824c2c57178bcf45515292f Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sat, 26 Jul 2014 22:25:50 -0400 Subject: [PATCH 13/54] fix fallback to other chunk size when first does not have it --- Remote/Helper/Chunked.hs | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs index 18dfe8aeee..baf47f067f 100644 --- a/Remote/Helper/Chunked.hs +++ b/Remote/Helper/Chunked.hs @@ -221,14 +221,17 @@ hasKeyChunks -> EncKey -> Key -> Annex (Either String Bool) -hasKeyChunks checker u chunkconfig encryptor basek = +hasKeyChunks checker u chunkconfig encryptor basek = do checklists impossible =<< chunkKeys u chunkconfig basek where checklists lastfailmsg [] = return $ Left lastfailmsg checklists _ (l:ls) - | not (null l) = - either (`checklists` ls) (return . Right) - =<< checkchunks l + | not (null l) = do + v <- checkchunks l + case v of + Left e -> checklists e ls + Right True -> return (Right True) + Right False -> checklists impossible ls | otherwise = checklists impossible ls checkchunks :: [Key] -> Annex (Either String Bool) From 86e8532c0a899882acd66772fc0bd6391ee386a3 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sat, 26 Jul 2014 22:34:10 -0400 Subject: [PATCH 14/54] allM has slightly better memory use --- Remote/Helper/Chunked.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs index baf47f067f..4cb20b875b 100644 --- a/Remote/Helper/Chunked.hs +++ b/Remote/Helper/Chunked.hs @@ -154,7 +154,7 @@ toChunkList k (chunksize, chunkcount) = takeChunkKeyStream chunkcount $ removeChunks :: (Key -> Annex Bool) -> UUID -> ChunkConfig -> EncKey -> Key -> Annex Bool removeChunks remover u chunkconfig encryptor k = do ls <- chunkKeys u chunkconfig k - ok <- and <$> mapM (remover . encryptor) (concat ls) + ok <- allM (remover . encryptor) (concat ls) when ok $ case chunkconfig of (UnpaddedChunks _) | not (isChunkKey k) -> do From 93be3296fceacdf55ddde62b587f467179790f62 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sat, 26 Jul 2014 22:47:52 -0400 Subject: [PATCH 15/54] fix another fallback bug --- Remote/Helper/Chunked.hs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs index 4cb20b875b..5a52a1f4bd 100644 --- a/Remote/Helper/Chunked.hs +++ b/Remote/Helper/Chunked.hs @@ -146,7 +146,9 @@ toChunkList k (chunksize, chunkcount) = takeChunkKeyStream chunkcount $ chunkKeyStream k chunksize {- Removes all chunks of a key from a remote, by calling a remover - - action on each. The remover action should succeed even if asked to + - action on each. + - + - The remover action should succeed even if asked to - remove a key that is not present on the remote. - - This action may be called on a chunked key. It will simply remove it. @@ -231,7 +233,9 @@ hasKeyChunks checker u chunkconfig encryptor basek = do case v of Left e -> checklists e ls Right True -> return (Right True) - Right False -> checklists impossible ls + Right False + | null ls -> return (Right False) + | otherwise -> checklists impossible ls | otherwise = checklists impossible ls checkchunks :: [Key] -> Annex (Either String Bool) From 0d89b65bfc59d7571346114492e7ced4e12a261b Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sat, 26 Jul 2014 22:52:47 -0400 Subject: [PATCH 16/54] fix key checking when a directory special remote's directory is missing The best thing to do in this case is return Left, so that anything that tries to access it will fail. --- Remote/Directory.hs | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/Remote/Directory.hs b/Remote/Directory.hs index ae2c43200c..c30c3c2639 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -168,5 +168,11 @@ remove d k = liftIO $ do checkPresent :: FilePath -> ChunkConfig -> Key -> Annex (Either String Bool) checkPresent d (LegacyChunks _) k = Legacy.checkPresent d locations k -checkPresent d _ k = liftIO $ catchMsgIO $ - anyM doesFileExist (locations d k) +checkPresent d _ k = liftIO $ do + v <- catchMsgIO $ anyM doesFileExist (locations d k) + case v of + Right False -> ifM (doesDirectoryExist d) + ( return v + , return $ Left $ "directory " ++ d ++ " is not accessible" + ) + _ -> return v From 867fd116a7bb0a4c791def566f60a7d166339f6d Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sat, 26 Jul 2014 23:01:44 -0400 Subject: [PATCH 17/54] better exception display --- Assistant/Threads/Watcher.hs | 2 +- Remote/Directory.hs | 4 ++-- Remote/Helper/Chunked.hs | 6 ++++-- Remote/Helper/Chunked/Legacy.hs | 4 ++-- Remote/WebDAV.hs | 2 +- 5 files changed, 10 insertions(+), 8 deletions(-) diff --git a/Assistant/Threads/Watcher.hs b/Assistant/Threads/Watcher.hs index 0ed1bd22f9..91e0fc6196 100644 --- a/Assistant/Threads/Watcher.hs +++ b/Assistant/Threads/Watcher.hs @@ -184,7 +184,7 @@ runHandler :: Handler -> FilePath -> Maybe FileStatus -> Assistant () runHandler handler file filestatus = void $ do r <- tryIO <~> handler (normalize file) filestatus case r of - Left e -> liftIO $ print e + Left e -> liftIO $ warningIO $ show e Right Nothing -> noop Right (Just change) -> do -- Just in case the commit thread is not diff --git a/Remote/Directory.hs b/Remote/Directory.hs index c30c3c2639..6b6a4b1cec 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -114,10 +114,10 @@ prepareStore d chunkconfig k = ifM (checkDiskSpace (Just d) k 0) store :: FilePath -> ChunkConfig -> Storer store d chunkconfig k b p = do - void $ liftIO $ tryIO $ createDirectoryIfMissing True tmpdir + void $ tryIO $ createDirectoryIfMissing True tmpdir case chunkconfig of LegacyChunks chunksize -> Legacy.store chunksize finalizer k b p tmpdir destdir - _ -> flip catchNonAsync (\e -> print e >> return False) $ do + _ -> flip catchNonAsync (\e -> warningIO (show e) >> return False) $ do let tmpf = tmpdir keyFile k meteredWriteFile p tmpf b finalizer tmpdir destdir diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs index 5a52a1f4bd..5fa6c55efa 100644 --- a/Remote/Helper/Chunked.hs +++ b/Remote/Helper/Chunked.hs @@ -77,7 +77,7 @@ numChunks = pred . fromJust . keyChunkNum . fst . nextChunkKeyStream -} storeChunks :: UUID -> ChunkConfig -> Key -> FilePath -> MeterUpdate -> (Key -> L.ByteString -> MeterUpdate -> IO Bool) -> Annex Bool storeChunks u chunkconfig k f p storer = metered (Just p) k $ \meterupdate -> - either (\e -> liftIO (print e) >> return False) (go meterupdate) + either (\e -> warning (show e) >> return False) (go meterupdate) =<< (liftIO $ tryIO $ L.readFile f) where go meterupdate b = case chunkconfig of @@ -190,7 +190,9 @@ retrieveChunks retriever u chunkconfig encryptor basek basep sink = do ls <- chunkKeys u chunkconfig basek liftIO $ flip catchNonAsync giveup (firstavail ls) where - giveup e = print e >> return False + giveup e = do + warningIO (show e) + return False firstavail [] = return False firstavail ([]:ls) = firstavail ls diff --git a/Remote/Helper/Chunked/Legacy.hs b/Remote/Helper/Chunked/Legacy.hs index e435851db1..4f402705a7 100644 --- a/Remote/Helper/Chunked/Legacy.hs +++ b/Remote/Helper/Chunked/Legacy.hs @@ -74,7 +74,7 @@ storeChunks key tmp dest storer recorder finalizer = either onerr return finalizer tmp dest return (not $ null stored) onerr e = do - print e + warningIO (show e) return False basef = tmp ++ keyFile key @@ -105,7 +105,7 @@ storeChunked chunksize dests storer content = either onerr return | otherwise = storechunks sz [] dests content onerr e = do - print e + warningIO (show e) return [] storechunks _ _ [] _ = return [] -- ran out of dests diff --git a/Remote/WebDAV.hs b/Remote/WebDAV.hs index 36df609459..31e4225e40 100644 --- a/Remote/WebDAV.hs +++ b/Remote/WebDAV.hs @@ -113,7 +113,7 @@ storeHelper :: Remote -> Key -> DavUrl -> DavUser -> DavPass -> L.ByteString -> storeHelper r k baseurl user pass b = catchBoolIO $ do mkdirRecursiveDAV tmpurl user pass case chunkconfig of - NoChunks -> flip catchNonAsync (\e -> print e >> return False) $ do + NoChunks -> flip catchNonAsync (\e -> warningIO (show e) >> return False) $ do storehttp tmpurl b finalizer tmpurl keyurl return True From 74963550317fb1b77cfeda13282bb35455b376d1 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sat, 26 Jul 2014 23:24:27 -0400 Subject: [PATCH 18/54] add some more exception handling primitives --- Annex/Exception.hs | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) diff --git a/Annex/Exception.hs b/Annex/Exception.hs index 41a9ed9216..5ecbd28a07 100644 --- a/Annex/Exception.hs +++ b/Annex/Exception.hs @@ -5,12 +5,13 @@ - AnnexState are retained. This works because the Annex monad - internally stores the AnnexState in a MVar. - - - Copyright 2011-2013 Joey Hess + - Copyright 2011-2014 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} {-# LANGUAGE PackageImports #-} +{-# LANGUAGE ScopedTypeVariables #-} module Annex.Exception ( bracketIO, @@ -19,6 +20,8 @@ module Annex.Exception ( tryAnnexIO, throwAnnex, catchAnnex, + catchNonAsyncAnnex, + tryNonAsyncAnnex, ) where import qualified Control.Monad.Catch as M @@ -48,3 +51,13 @@ throwAnnex = M.throwM {- catch in the Annex monad -} catchAnnex :: Exception e => Annex a -> (e -> Annex a) -> Annex a catchAnnex = M.catch + +{- catchs all exceptions except for async exceptions -} +catchNonAsyncAnnex :: Annex a -> (SomeException -> Annex a) -> Annex a +catchNonAsyncAnnex a onerr = a `M.catches` + [ M.Handler (\ (e :: AsyncException) -> throwAnnex e) + , M.Handler (\ (e :: SomeException) -> onerr e) + ] + +tryNonAsyncAnnex :: Annex a -> Annex (Either SomeException a) +tryNonAsyncAnnex a = (Right <$> a) `catchNonAsyncAnnex` (return . Left) From 9a8c4bb21f99cee000b99be9e629513def6a459c Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sat, 26 Jul 2014 23:26:10 -0400 Subject: [PATCH 19/54] improve exception handling Push it down from needing to be done in every Storer, to being checked once inside ChunkedEncryptable. Also, catch exceptions from PrepareStorer and PrepareRetriever, just in case.. --- Remote/Directory.hs | 2 +- Remote/Helper/Chunked.hs | 14 +++++++++----- Remote/Helper/ChunkedEncryptable.hs | 10 ++++++++-- 3 files changed, 18 insertions(+), 8 deletions(-) diff --git a/Remote/Directory.hs b/Remote/Directory.hs index 6b6a4b1cec..2ebf608cb0 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -117,7 +117,7 @@ store d chunkconfig k b p = do void $ tryIO $ createDirectoryIfMissing True tmpdir case chunkconfig of LegacyChunks chunksize -> Legacy.store chunksize finalizer k b p tmpdir destdir - _ -> flip catchNonAsync (\e -> warningIO (show e) >> return False) $ do + _ -> do let tmpf = tmpdir keyFile k meteredWriteFile p tmpf b finalizer tmpdir destdir diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs index 5fa6c55efa..3eab0947a7 100644 --- a/Remote/Helper/Chunked.hs +++ b/Remote/Helper/Chunked.hs @@ -70,12 +70,16 @@ numChunks = pred . fromJust . keyChunkNum . fst . nextChunkKeyStream - the storer action, along with a corresponding chunk key and a - progress meter update callback. - - - Note that the storer action is responsible for catching any - - exceptions it may encounter. - - - This action may be called on a chunked key. It will simply store it. -} -storeChunks :: UUID -> ChunkConfig -> Key -> FilePath -> MeterUpdate -> (Key -> L.ByteString -> MeterUpdate -> IO Bool) -> Annex Bool +storeChunks + :: UUID + -> ChunkConfig + -> Key + -> FilePath + -> MeterUpdate + -> (Key -> L.ByteString -> MeterUpdate -> IO Bool) + -> Annex Bool storeChunks u chunkconfig k f p storer = metered (Just p) k $ \meterupdate -> either (\e -> warning (show e) >> return False) (go meterupdate) =<< (liftIO $ tryIO $ L.readFile f) @@ -188,7 +192,7 @@ retrieveChunks -> Annex Bool retrieveChunks retriever u chunkconfig encryptor basek basep sink = do ls <- chunkKeys u chunkconfig basek - liftIO $ flip catchNonAsync giveup (firstavail ls) + liftIO $ firstavail ls `catchNonAsync` giveup where giveup e = do warningIO (show e) diff --git a/Remote/Helper/ChunkedEncryptable.hs b/Remote/Helper/ChunkedEncryptable.hs index 740da58b91..cfa92406e7 100644 --- a/Remote/Helper/ChunkedEncryptable.hs +++ b/Remote/Helper/ChunkedEncryptable.hs @@ -29,12 +29,15 @@ import Annex.Content import Annex.Exception -- Prepares to store a Key, and returns a Storer action if possible. +-- May throw exceptions. type PrepareStorer = Key -> Annex (Maybe Storer) -- Stores a Key, which may be encrypted and/or a chunk key. +-- May throw exceptions. type Storer = Key -> L.ByteString -> MeterUpdate -> IO Bool -- Prepares to retrieve a Key, and returns a Retriever action if possible. +-- May throw exceptions. type PrepareRetriever = Key -> Annex (Maybe Retriever) -- Retrieves a Key, which may be encrypted and/or a chunk key. @@ -68,8 +71,11 @@ chunkedEncryptableRemote c preparestorer prepareretriever r = encr chunkconfig = chunkConfig c gpgopts = getGpgEncParams encr + safely a = catchNonAsyncAnnex a (\e -> warning (show e) >> return False) + -- chunk, then encrypt, then feed to the storer - storeKeyGen k p enc = maybe (return False) go =<< preparestorer k + storeKeyGen k p enc = safely $ + maybe (return False) go =<< preparestorer k where go storer = sendAnnex k rollback $ \src -> metered (Just p) k $ \p' -> @@ -84,7 +90,7 @@ chunkedEncryptableRemote c preparestorer prepareretriever r = encr storer (enck k') encb p' -- call retriever to get chunks; decrypt them; stream to dest file - retrieveKeyFileGen k dest p enc = + retrieveKeyFileGen k dest p enc = safely $ maybe (return False) go =<< prepareretriever k where go retriever = metered (Just p) k $ \p' -> From 7db60269eb7c362192e7b8608276ebd9fad01ebd Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sat, 26 Jul 2014 23:39:51 -0400 Subject: [PATCH 20/54] update does for chunking --- doc/chunking.mdwn | 22 +++++++++++++++++++ doc/special_remotes/directory.mdwn | 8 +------ doc/special_remotes/webdav.mdwn | 12 +++++----- .../using_box.com_as_a_special_remote.mdwn | 6 ++--- 4 files changed, 31 insertions(+), 17 deletions(-) create mode 100644 doc/chunking.mdwn diff --git a/doc/chunking.mdwn b/doc/chunking.mdwn new file mode 100644 index 0000000000..1be1fbef6a --- /dev/null +++ b/doc/chunking.mdwn @@ -0,0 +1,22 @@ +Some [[special_remotes]] have support for breaking large files up into +chunks that are stored on the remote. + +This can be useful to work around limitations on the size of files +on the remote. + +Note that git-annex has to buffer chunks in memory before they are sent to +a remote. So, using a large chunk size will make it use more memory. + +To enable chunking, pass a `chunk=XXmb` parameter to `git annex +initremote`. + +To disable chunking of a remote that was using chunking, +pass `chunk=0` to `git annex enableremote`. Any content already stored on +the remote using chunks will continue to be accessed via chunks, this +just prevents using chunks when storing new content. + +To change the chunk size, pass a `chunk=XXmb` parameter to +`git annex enableremote`. This only affects the chunk sized used when +storing new content. + +See also: [[design document|design/assistant/chunks]] diff --git a/doc/special_remotes/directory.mdwn b/doc/special_remotes/directory.mdwn index de7ab904e5..6279024ec5 100644 --- a/doc/special_remotes/directory.mdwn +++ b/doc/special_remotes/directory.mdwn @@ -25,13 +25,7 @@ remote: * `keyid` - Specifies the gpg key to use for [[encryption]]. -* `chunk` - Avoid storing files larger than the specified size in the - directory. For use on directories on mount points that have file size - limitations. The default is to never chunk files. - The value can use specified using any commonly used units. - Example: `chunk=100 megabytes` - Note that chunking can be disabled later by setting chunk=0, - and can also safely be changed to a different size as needed. +* `chunk` - Enables [[chunking]] when storing large files. * `chunksize` - Deprecated version of chunk parameter above. Do not use for new remotes. It is not safe to change the chunksize diff --git a/doc/special_remotes/webdav.mdwn b/doc/special_remotes/webdav.mdwn index 871540a979..64eed5d0b8 100644 --- a/doc/special_remotes/webdav.mdwn +++ b/doc/special_remotes/webdav.mdwn @@ -29,13 +29,11 @@ the webdav remote. be created as needed. Use of a https URL is strongly encouraged, since HTTP basic authentication is used. -* `chunksize` - Avoid storing files larger than the specified size in - WebDAV. For use when the WebDAV server has file size - limitations. The default is to never chunk files. - The value can use specified using any commonly used units. - Example: `chunksize=75 megabytes` - Note that enabling chunking on an existing remote with non-chunked - files is not recommended, nor is changing the chunksize. +* `chunk` - Enables [[chunking]] when storing large files. + +* `chunksize` - Deprecated version of chunk parameter above. + Do not use for new remotes. It is not safe to change the chunksize + setting of an existing remote. Setup example: diff --git a/doc/tips/using_box.com_as_a_special_remote.mdwn b/doc/tips/using_box.com_as_a_special_remote.mdwn index ac59834f58..149d1f8247 100644 --- a/doc/tips/using_box.com_as_a_special_remote.mdwn +++ b/doc/tips/using_box.com_as_a_special_remote.mdwn @@ -5,9 +5,9 @@ for providing 50 gb of free storage if you sign up with its Android client. git-annex can use Box as a [[special remote|special_remotes]]. Recent versions of git-annex make this very easy to set up: - WEBDAV_USERNAME=you@example.com WEBDAV_PASSWORD=xxxxxxx git annex initremote box.com type=webdav url=https://dav.box.com/dav/git-annex chunksize=75mb encryption=shared + WEBDAV_USERNAME=you@example.com WEBDAV_PASSWORD=xxxxxxx git annex initremote box.com type=webdav url=https://dav.box.com/dav/git-annex chunk=50mb encryption=shared -Note the use of chunksize; Box has a 100 mb maximum file size, and this +Note the use of [[chunking]]; Box has a 100 mb maximum file size, and this breaks up large files into chunks before that limit is reached. # old davfs2 method @@ -58,7 +58,7 @@ Create the special remote, in your git-annex repository. ** This example is non-encrypted; fill in your gpg key ID for a securely encrypted special remote! ** - git annex initremote box.com type=directory directory=/media/box.com chunksize=2mb encryption=none + git annex initremote box.com type=directory directory=/media/box.com chunk=2mb encryption=none Now git-annex can copy files to box.com, get files from it, etc, just like with any other special remote. From f3e47b16a5c5a6c782c9c169dd00b9e25c9d3e15 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 27 Jul 2014 00:30:04 -0400 Subject: [PATCH 21/54] better Preparer interface This will allow things like WebDAV to opean a single persistent connection and reuse it for all the chunked data. The crazy types allow for some nice code reuse. --- Remote/Directory.hs | 16 +++++----- Remote/Directory/LegacyChunked.hs | 8 +++-- Remote/Helper/ChunkedEncryptable.hs | 46 ++++++++++++++++++----------- 3 files changed, 41 insertions(+), 29 deletions(-) diff --git a/Remote/Directory.hs b/Remote/Directory.hs index 2ebf608cb0..cb7553fe25 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -6,6 +6,7 @@ -} {-# LANGUAGE CPP #-} +{-# LANGUAGE Rank2Types #-} module Remote.Directory (remote) where @@ -106,11 +107,10 @@ tmpDir d k = addTrailingPathSeparator $ d "tmp" keyFile k {- Check if there is enough free disk space in the remote's directory to - store the key. Note that the unencrypted key size is checked. -} -prepareStore :: FilePath -> ChunkConfig -> PrepareStorer -prepareStore d chunkconfig k = ifM (checkDiskSpace (Just d) k 0) - ( return $ Just (store d chunkconfig) - , return Nothing - ) +prepareStore :: FilePath -> ChunkConfig -> Preparer Storer +prepareStore d chunkconfig = checkPrepare + (\k -> checkDiskSpace (Just d) k 0) + (store d chunkconfig) store :: FilePath -> ChunkConfig -> Storer store d chunkconfig k b p = do @@ -135,9 +135,9 @@ store d chunkconfig k b p = do mapM_ preventWrite =<< dirContents dest preventWrite dest -retrieve :: FilePath -> ChunkConfig -> PrepareRetriever -retrieve d (LegacyChunks _) basek = Legacy.retrieve locations d basek -retrieve d _ _ = return $ Just $ \k -> L.readFile =<< getLocation d k +retrieve :: FilePath -> ChunkConfig -> Preparer Retriever +retrieve d (LegacyChunks _) = Legacy.retrieve locations d +retrieve d _ = simplyPrepare $ \k -> L.readFile =<< getLocation d k retrieveCheap :: FilePath -> ChunkConfig -> Key -> FilePath -> Annex Bool -- no cheap retrieval possible for chunks diff --git a/Remote/Directory/LegacyChunked.hs b/Remote/Directory/LegacyChunked.hs index df6d94d04a..c7b8ad52c3 100644 --- a/Remote/Directory/LegacyChunked.hs +++ b/Remote/Directory/LegacyChunked.hs @@ -7,6 +7,8 @@ - Licensed under the GNU GPL version 3 or higher. -} +{-# LANGUAGE Rank2Types #-} + module Remote.Directory.LegacyChunked where import qualified Data.ByteString.Lazy as L @@ -88,13 +90,13 @@ store chunksize finalizer k b p = storeHelper finalizer k $ \dests -> - Done very innefficiently, by writing to a temp file. - :/ This is legacy code.. -} -retrieve :: (FilePath -> Key -> [FilePath]) -> FilePath -> PrepareRetriever -retrieve locations d basek = do +retrieve :: (FilePath -> Key -> [FilePath]) -> FilePath -> Preparer Retriever +retrieve locations d basek a = do showLongNote "This remote uses the deprecated chunksize setting. So this will be quite slow." tmpdir <- fromRepo $ gitAnnexTmpMiscDir createAnnexDirectory tmpdir let tmp = tmpdir keyFile basek ++ ".directorylegacy.tmp" - return $ Just $ \k -> do + a $ Just $ \k -> do void $ withStoredFiles d locations k $ \fs -> do forM_ fs $ S.appendFile tmp <=< S.readFile diff --git a/Remote/Helper/ChunkedEncryptable.hs b/Remote/Helper/ChunkedEncryptable.hs index cfa92406e7..ac89178510 100644 --- a/Remote/Helper/ChunkedEncryptable.hs +++ b/Remote/Helper/ChunkedEncryptable.hs @@ -5,12 +5,15 @@ - Licensed under the GNU GPL version 3 or higher. -} +{-# LANGUAGE Rank2Types #-} + module Remote.Helper.ChunkedEncryptable ( - chunkedEncryptableRemote, - PrepareStorer, + Preparer, + simplyPrepare, + checkPrepare, Storer, - PrepareRetriever, Retriever, + chunkedEncryptableRemote, storeKeyDummy, retreiveKeyFileDummy, module X @@ -28,18 +31,23 @@ import Remote.Helper.Encryptable as X import Annex.Content import Annex.Exception --- Prepares to store a Key, and returns a Storer action if possible. --- May throw exceptions. -type PrepareStorer = Key -> Annex (Maybe Storer) +-- Prepares for and then runs an action that will act on a Key, +-- passing it a helper when the preparation is successful. +type Preparer helper = forall a. Key -> (Maybe helper -> Annex a) -> Annex a + +simplyPrepare :: helper -> Preparer helper +simplyPrepare helper _ a = a $ Just helper + +checkPrepare :: (Key -> Annex Bool) -> helper -> Preparer helper +checkPrepare checker helper k a = ifM (checker k) + ( a (Just helper) + , a Nothing + ) -- Stores a Key, which may be encrypted and/or a chunk key. -- May throw exceptions. type Storer = Key -> L.ByteString -> MeterUpdate -> IO Bool --- Prepares to retrieve a Key, and returns a Retriever action if possible. --- May throw exceptions. -type PrepareRetriever = Key -> Annex (Maybe Retriever) - -- Retrieves a Key, which may be encrypted and/or a chunk key. -- Throws exception if key is not present, or remote is not accessible. type Retriever = Key -> IO L.ByteString @@ -48,8 +56,8 @@ type Retriever = Key -> IO L.ByteString -} chunkedEncryptableRemote :: RemoteConfig - -> PrepareStorer - -> PrepareRetriever + -> Preparer Storer + -> Preparer Retriever -> Remote -> Remote chunkedEncryptableRemote c preparestorer prepareretriever r = encr @@ -74,13 +82,14 @@ chunkedEncryptableRemote c preparestorer prepareretriever r = encr safely a = catchNonAsyncAnnex a (\e -> warning (show e) >> return False) -- chunk, then encrypt, then feed to the storer - storeKeyGen k p enc = safely $ - maybe (return False) go =<< preparestorer k + storeKeyGen k p enc = + safely $ preparestorer k $ safely . go where - go storer = sendAnnex k rollback $ \src -> + go (Just storer) = sendAnnex k rollback $ \src -> metered (Just p) k $ \p' -> storeChunks (uuid r) chunkconfig k src p' $ storechunk storer + go Nothing = return False rollback = void $ removeKey encr k storechunk storer k' b p' = case enc of Nothing -> storer k' b p' @@ -90,13 +99,14 @@ chunkedEncryptableRemote c preparestorer prepareretriever r = encr storer (enck k') encb p' -- call retriever to get chunks; decrypt them; stream to dest file - retrieveKeyFileGen k dest p enc = safely $ - maybe (return False) go =<< prepareretriever k + retrieveKeyFileGen k dest p enc = + safely $ prepareretriever k $ safely . go where - go retriever = metered (Just p) k $ \p' -> + go (Just retriever) = metered (Just p) k $ \p' -> bracketIO (openBinaryFile dest WriteMode) hClose $ \h -> retrieveChunks retriever (uuid r) chunkconfig enck k p' $ sink h + go Nothing = return False sink h p' b = do let write = meteredWrite p' h case enc of From c3af4897c0484a09193f8b973c2cd4344c478491 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 27 Jul 2014 01:18:38 -0400 Subject: [PATCH 22/54] faster storeChunks No need to process each L.ByteString chunk, instead ask it to split. Doesn't seem to have really sped things up much, but it also made the code simpler. Note that this does (and already did) buffer in memory. It seems that only the directory special remote could take advantage of streaming chunks to files w/o buffering, so probably won't add an interface to allow for that. --- Remote/Helper/Chunked.hs | 54 ++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 30 deletions(-) diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs index 3eab0947a7..65d5892c32 100644 --- a/Remote/Helper/Chunked.hs +++ b/Remote/Helper/Chunked.hs @@ -26,7 +26,6 @@ import Utility.Metered import Crypto (EncKey) import qualified Data.ByteString.Lazy as L -import qualified Data.ByteString as S import qualified Data.Map as M data ChunkConfig @@ -70,6 +69,14 @@ numChunks = pred . fromJust . keyChunkNum . fst . nextChunkKeyStream - the storer action, along with a corresponding chunk key and a - progress meter update callback. - + - This buffers each chunk in memory, so can use a lot of memory + - with a large ChunkSize. + - More optimal versions of this can be written, that rely + - on L.toChunks to split the lazy bytestring into chunks (typically + - smaller than the ChunkSize), and eg, write those chunks to a Handle. + - But this is the best that can be done with the storer interface that + - writes a whole L.ByteString at a time. + - - This action may be called on a chunked key. It will simply store it. -} storeChunks @@ -90,39 +97,26 @@ storeChunks u chunkconfig k f p storer = metered (Just p) k $ \meterupdate -> _ -> liftIO $ storer k b meterupdate gochunks :: MeterUpdate -> ChunkSize -> L.ByteString -> ChunkKeyStream -> Annex Bool - gochunks meterupdate chunksize lb = - loop zeroBytesProcessed chunksize (L.toChunks lb) [] + gochunks meterupdate chunksize = loop zeroBytesProcessed . splitchunk where - loop bytesprocessed sz [] c chunkkeys - -- Always store at least one chunk, - -- even for empty content. - | not (null c) || numchunks == 0 = - storechunk bytesprocessed sz [] c chunkkeys - -- Once all chunks are successfully stored, - -- update the chunk log. - | otherwise = do + splitchunk = L.splitAt chunksize + + loop bytesprocessed (chunk, bs) chunkkeys + | L.null chunk && numchunks > 0 = do + -- Once all chunks are successfully + -- stored, update the chunk log. chunksStored u k chunksize numchunks return True + | otherwise = do + let (chunkkey, chunkkeys') = nextChunkKeyStream chunkkeys + ifM (liftIO $ storer chunkkey chunk meterupdate') + ( do + let bytesprocessed' = addBytesProcessed bytesprocessed (L.length chunk) + loop bytesprocessed' (splitchunk bs) chunkkeys' + , return False + ) where - numchunks = numChunks chunkkeys - loop bytesprocessed sz (b:bs) c chunkkeys - | s <= sz || sz == chunksize = - loop bytesprocessed sz' bs (b:c) chunkkeys - | otherwise = - storechunk bytesprocessed sz' bs (b:c) chunkkeys - where - s = fromIntegral (S.length b) - sz' = sz - s - - storechunk bytesprocessed sz bs c chunkkeys = do - let (chunkkey, chunkkeys') = nextChunkKeyStream chunkkeys - ifM (liftIO $ storer chunkkey (L.fromChunks $ reverse c) meterupdate') - ( do - let bytesprocessed' = addBytesProcessed bytesprocessed (chunksize - sz) - loop bytesprocessed' chunksize bs [] chunkkeys' - , return False - ) - where + numchunks = numChunks chunkkeys {- The MeterUpdate that is passed to the action - storing a chunk is offset, so that it reflects - the total bytes that have already been stored From bffd0e34b311c800d47d6ea441742661a0fbd302 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 27 Jul 2014 01:22:51 -0400 Subject: [PATCH 23/54] comment typo --- Remote/Helper/Encryptable.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Remote/Helper/Encryptable.hs b/Remote/Helper/Encryptable.hs index 9da5e641d2..65a3ba284d 100644 --- a/Remote/Helper/Encryptable.hs +++ b/Remote/Helper/Encryptable.hs @@ -66,7 +66,7 @@ encryptionSetup c = maybe genCipher updateCipher $ extractCipher c c' = foldr M.delete c -- git-annex used to remove 'encryption' as well, since -- it was redundant; we now need to keep it for - -- public-key incryption, hence we leave it on newer + -- public-key encryption, hence we leave it on newer -- remotes (while being backward-compatible). [ "keyid", "keyid+", "keyid-", "highRandomQuality" ] From 7afb057d60845a56c6f637151efe5464222ca00e Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 27 Jul 2014 01:24:34 -0400 Subject: [PATCH 24/54] reorg --- Remote/Helper/Chunked.hs | 39 ++++++++++++++++++--------------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs index 65d5892c32..8790c69007 100644 --- a/Remote/Helper/Chunked.hs +++ b/Remote/Helper/Chunked.hs @@ -10,7 +10,6 @@ module Remote.Helper.Chunked ( ChunkConfig(..), chunkConfig, storeChunks, - chunkKeys, removeChunks, retrieveChunks, hasKeyChunks, @@ -123,26 +122,6 @@ storeChunks u chunkconfig k f p storer = metered (Just p) k $ \meterupdate -> - in previous chunks. -} meterupdate' = offsetMeterUpdate meterupdate bytesprocessed --- retrieveChunks :: UUID -> ChunkConfig -> Key -> Annex - -{- A key can be stored in a remote unchunked, or as a list of chunked keys. - - It's even possible for a remote to have the same key stored multiple - - times with different chunk sizes. This finds all possible lists of keys - - that might be on the remote that can be combined to get back the - - requested key. - -} -chunkKeys :: UUID -> ChunkConfig -> Key -> Annex [[Key]] -chunkKeys u (UnpaddedChunks _) k | not (isChunkKey k) = do - chunklists <- map (toChunkList k) <$> getCurrentChunks u k - -- Probably using the chunklists, but the unchunked - -- key could be present. - return (chunklists ++ [[k]]) -chunkKeys _ _ k = pure [[k]] - -toChunkList :: Key -> (ChunkSize, ChunkCount) -> [Key] -toChunkList k (chunksize, chunkcount) = takeChunkKeyStream chunkcount $ - chunkKeyStream k chunksize - {- Removes all chunks of a key from a remote, by calling a remover - action on each. - @@ -247,3 +226,21 @@ hasKeyChunks checker u chunkconfig encryptor basek = do else return v impossible = "no recorded chunks" + +{- A key can be stored in a remote unchunked, or as a list of chunked keys. + - It's even possible for a remote to have the same key stored multiple + - times with different chunk sizes. This finds all possible lists of keys + - that might be on the remote that can be combined to get back the + - requested key. + -} +chunkKeys :: UUID -> ChunkConfig -> Key -> Annex [[Key]] +chunkKeys u (UnpaddedChunks _) k | not (isChunkKey k) = do + chunklists <- map (toChunkList k) <$> getCurrentChunks u k + -- Probably using the chunklists, but the unchunked + -- key could be present. + return (chunklists ++ [[k]]) +chunkKeys _ _ k = pure [[k]] + +toChunkList :: Key -> (ChunkSize, ChunkCount) -> [Key] +toChunkList k (chunksize, chunkcount) = takeChunkKeyStream chunkcount $ + chunkKeyStream k chunksize From 2996f0eb05f8ceb0e341faedc43c3e1bf73c5950 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 27 Jul 2014 02:13:51 -0400 Subject: [PATCH 25/54] use existing chunks even when chunk=0 When chunk=0, always try the unchunked key first. This avoids the overhead of needing to read the git-annex branch to find the chunkcount. However, if the unchunked key is not present, go on and try the chunks. Also, when removing a chunked key, update the chunkcounts even when chunk=0. --- Remote/Helper/Chunked.hs | 69 ++++++++++++++++++++++++++-------------- 1 file changed, 45 insertions(+), 24 deletions(-) diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs index 8790c69007..3415c2df62 100644 --- a/Remote/Helper/Chunked.hs +++ b/Remote/Helper/Chunked.hs @@ -23,6 +23,7 @@ import Logs.Chunk.Pure (ChunkSize, ChunkCount) import Logs.Chunk import Utility.Metered import Crypto (EncKey) +import Annex.Exception import qualified Data.ByteString.Lazy as L import qualified Data.Map as M @@ -32,6 +33,10 @@ data ChunkConfig | UnpaddedChunks ChunkSize | LegacyChunks ChunkSize +noChunks :: ChunkConfig -> Bool +noChunks NoChunks = True +noChunks _ = False + chunkConfig :: RemoteConfig -> ChunkConfig chunkConfig m = case M.lookup "chunksize" m of @@ -75,8 +80,6 @@ numChunks = pred . fromJust . keyChunkNum . fst . nextChunkKeyStream - smaller than the ChunkSize), and eg, write those chunks to a Handle. - But this is the best that can be done with the storer interface that - writes a whole L.ByteString at a time. - - - - This action may be called on a chunked key. It will simply store it. -} storeChunks :: UUID @@ -91,7 +94,7 @@ storeChunks u chunkconfig k f p storer = metered (Just p) k $ \meterupdate -> =<< (liftIO $ tryIO $ L.readFile f) where go meterupdate b = case chunkconfig of - (UnpaddedChunks chunksize) | not (isChunkKey k) -> + (UnpaddedChunks chunksize) -> gochunks meterupdate chunksize b (chunkKeyStream k chunksize) _ -> liftIO $ storer k b meterupdate @@ -134,12 +137,9 @@ removeChunks :: (Key -> Annex Bool) -> UUID -> ChunkConfig -> EncKey -> Key -> A removeChunks remover u chunkconfig encryptor k = do ls <- chunkKeys u chunkconfig k ok <- allM (remover . encryptor) (concat ls) - when ok $ - case chunkconfig of - (UnpaddedChunks _) | not (isChunkKey k) -> do - let chunksizes = catMaybes $ map (keyChunkSize <=< headMaybe) ls - forM_ chunksizes $ chunksRemoved u k . fromIntegral - _ -> noop + when ok $ do + let chunksizes = catMaybes $ map (keyChunkSize <=< headMaybe) ls + forM_ chunksizes $ chunksRemoved u k . fromIntegral return ok {- Retrieves a key from a remote, using a retriever action that @@ -163,10 +163,17 @@ retrieveChunks -> MeterUpdate -> (MeterUpdate -> L.ByteString -> IO ()) -> Annex Bool -retrieveChunks retriever u chunkconfig encryptor basek basep sink = do - ls <- chunkKeys u chunkconfig basek - liftIO $ firstavail ls `catchNonAsync` giveup +retrieveChunks retriever u chunkconfig encryptor basek basep sink + | noChunks chunkconfig = + -- Optimisation: Try the unchunked key first, to avoid + -- looking in the git-annex branch for chunk counts. + liftIO (retriever (encryptor basek) >>= sink basep >> return True) + `catchNonAsyncAnnex` + const (go =<< chunkKeysOnly u basek) + | otherwise = go =<< chunkKeys u chunkconfig basek where + go ls = liftIO $ firstavail ls `catchNonAsync` giveup + giveup e = do warningIO (show e) return False @@ -202,8 +209,15 @@ hasKeyChunks -> EncKey -> Key -> Annex (Either String Bool) -hasKeyChunks checker u chunkconfig encryptor basek = do - checklists impossible =<< chunkKeys u chunkconfig basek +hasKeyChunks checker u chunkconfig encryptor basek + | noChunks chunkconfig = + -- Optimisation: Try the unchunked key first, to avoid + -- looking in the git-annex branch for chunk counts. + ifM ((Right True ==) <$> checker (encryptor basek)) + ( return (Right True) + , checklists impossible =<< chunkKeysOnly u basek + ) + | otherwise = checklists impossible =<< chunkKeys u chunkconfig basek where checklists lastfailmsg [] = return $ Left lastfailmsg checklists _ (l:ls) @@ -228,18 +242,25 @@ hasKeyChunks checker u chunkconfig encryptor basek = do impossible = "no recorded chunks" {- A key can be stored in a remote unchunked, or as a list of chunked keys. - - It's even possible for a remote to have the same key stored multiple - - times with different chunk sizes. This finds all possible lists of keys - - that might be on the remote that can be combined to get back the - - requested key. + - This can be the case whether or not the remote is currently configured + - to use chunking. + - + - It's even possible for a remote to have the same key stored multiple + - times with different chunk sizes! + - + - This finds all possible lists of keys that might be on the remote that + - can be combined to get back the requested key, in order from most to + - least likely to exist. -} chunkKeys :: UUID -> ChunkConfig -> Key -> Annex [[Key]] -chunkKeys u (UnpaddedChunks _) k | not (isChunkKey k) = do - chunklists <- map (toChunkList k) <$> getCurrentChunks u k - -- Probably using the chunklists, but the unchunked - -- key could be present. - return (chunklists ++ [[k]]) -chunkKeys _ _ k = pure [[k]] +chunkKeys u chunkconfig k = do + l <- chunkKeysOnly u k + return $ if noChunks chunkconfig + then [k] : l + else l ++ [[k]] + +chunkKeysOnly :: UUID -> Key -> Annex [[Key]] +chunkKeysOnly u k = map (toChunkList k) <$> getCurrentChunks u k toChunkList :: Key -> (ChunkSize, ChunkCount) -> [Key] toChunkList k (chunksize, chunkcount) = takeChunkKeyStream chunkcount $ From aad8cfe718576a92a845f3d00a47ff8c98d963c6 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 27 Jul 2014 12:24:12 -0400 Subject: [PATCH 26/54] use map for faster backend name lookup --- Backend.hs | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/Backend.hs b/Backend.hs index dded0d0055..245e79ec20 100644 --- a/Backend.hs +++ b/Backend.hs @@ -32,6 +32,8 @@ import qualified Backend.Hash import qualified Backend.WORM import qualified Backend.URL +import qualified Data.Map as M + list :: [Backend] list = Backend.Hash.backends ++ Backend.WORM.backends ++ Backend.URL.backends @@ -116,7 +118,9 @@ lookupBackendName :: String -> Backend lookupBackendName s = fromMaybe unknown $ maybeLookupBackendName s where unknown = error $ "unknown backend " ++ s + maybeLookupBackendName :: String -> Maybe Backend -maybeLookupBackendName s = headMaybe matches - where - matches = filter (\b -> s == B.name b) list +maybeLookupBackendName s = M.lookup s nameMap + +nameMap :: M.Map String Backend +nameMap = M.fromList $ zip (map B.name list) list From 13bbb61a51f7f44696af96a00f77e137dc68b7f6 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 27 Jul 2014 12:33:46 -0400 Subject: [PATCH 27/54] add key stability checking interface Needed for resuming from chunks. Url keys are considered not stable. I considered treating url keys with a known size as stable, but just don't feel that is enough information. --- Backend.hs | 7 ++++++- Backend/Hash.hs | 1 + Backend/URL.hs | 3 +++ Backend/WORM.hs | 1 + Types/Backend.hs | 7 +++++++ 5 files changed, 18 insertions(+), 1 deletion(-) diff --git a/Backend.hs b/Backend.hs index 245e79ec20..99752c66ff 100644 --- a/Backend.hs +++ b/Backend.hs @@ -14,7 +14,8 @@ module Backend ( isAnnexLink, chooseBackend, lookupBackendName, - maybeLookupBackendName + maybeLookupBackendName, + checkStableKey, ) where import Common.Annex @@ -124,3 +125,7 @@ maybeLookupBackendName s = M.lookup s nameMap nameMap :: M.Map String Backend nameMap = M.fromList $ zip (map B.name list) list + +checkStableKey :: Key -> Bool +checkStableKey k = maybe False (`B.isStableKey` k) + (maybeLookupBackendName (keyBackendName k)) diff --git a/Backend/Hash.hs b/Backend/Hash.hs index 3ff496271a..91267ed67e 100644 --- a/Backend/Hash.hs +++ b/Backend/Hash.hs @@ -45,6 +45,7 @@ genBackend hash = Just Backend , fsckKey = Just $ checkKeyChecksum hash , canUpgradeKey = Just needsUpgrade , fastMigrate = Just trivialMigrate + , isStableKey = const True } genBackendE :: Hash -> Maybe Backend diff --git a/Backend/URL.hs b/Backend/URL.hs index 4233c56bc0..2c2988ac02 100644 --- a/Backend/URL.hs +++ b/Backend/URL.hs @@ -25,6 +25,9 @@ backend = Backend , fsckKey = Nothing , canUpgradeKey = Nothing , fastMigrate = Nothing + -- The content of an url can change at any time, so URL keys are + -- not stable. + , isStableKey = const False } {- Every unique url has a corresponding key. -} diff --git a/Backend/WORM.hs b/Backend/WORM.hs index fdeea6f89a..c972602ad0 100644 --- a/Backend/WORM.hs +++ b/Backend/WORM.hs @@ -23,6 +23,7 @@ backend = Backend , fsckKey = Nothing , canUpgradeKey = Nothing , fastMigrate = Nothing + , isStableKey = const True } {- The key includes the file size, modification time, and the diff --git a/Types/Backend.hs b/Types/Backend.hs index 7eb59b6e28..5c5855bc3d 100644 --- a/Types/Backend.hs +++ b/Types/Backend.hs @@ -15,9 +15,16 @@ import Types.KeySource data BackendA a = Backend { name :: String , getKey :: KeySource -> a (Maybe Key) + -- Checks the content of a key. , fsckKey :: Maybe (Key -> FilePath -> a Bool) + -- Checks if a key can be upgraded to a better form. , canUpgradeKey :: Maybe (Key -> Bool) + -- Checks if there is a fast way to migrate a key to a different + -- backend (ie, without re-hashing). , fastMigrate :: Maybe (Key -> BackendA a -> Maybe Key) + -- Checks if a key is known (or assumed) to always refer to the + -- same data. + , isStableKey :: Key -> Bool } instance Show (BackendA a) where From 9d4a766cd7b8e8b0fc7cd27b08249e4161b5380a Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 27 Jul 2014 18:52:42 -0400 Subject: [PATCH 28/54] resume interrupted chunked downloads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Leverage the new chunked remotes to automatically resume downloads. Sort of like rsync, although of course not as efficient since this needs to start at a chunk boundry. But, unlike rsync, this method will work for S3, WebDAV, external special remotes, etc, etc. Only directory special remotes so far, but many more soon! This implementation will also properly handle starting a download from one remote, interrupting, and resuming from another one, and so on. (Resuming interrupted chunked uploads is similarly doable, although slightly more expensive.) This commit was sponsored by Thomas Djärv. --- Remote/Helper/Chunked.hs | 97 ++++++++++++++++++++++------- Remote/Helper/ChunkedEncryptable.hs | 4 +- Types/Key.hs | 11 ++-- Utility/Metered.hs | 2 +- debian/changelog | 4 +- doc/chunking.mdwn | 2 + 6 files changed, 88 insertions(+), 32 deletions(-) diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs index 3415c2df62..9ba6d9cbd6 100644 --- a/Remote/Helper/Chunked.hs +++ b/Remote/Helper/Chunked.hs @@ -1,6 +1,6 @@ {- git-annex chunked remotes - - - Copyright 2012-2014 Joey Hess + - Copyright 2014 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} @@ -27,6 +27,7 @@ import Annex.Exception import qualified Data.ByteString.Lazy as L import qualified Data.Map as M +import Control.Exception data ChunkConfig = NoChunks @@ -147,12 +148,16 @@ removeChunks remover u chunkconfig encryptor k = do - - When the remote is chunked, tries each of the options returned by - chunkKeys until it finds one where the retriever successfully - - gets the first key in the list. The content of that key, and any + - gets the first chunked key. The content of that key, and any - other chunks in the list is fed to the sink. - - If retrival of one of the subsequent chunks throws an exception, - gives up and returns False. Note that partial data may have been - written to the sink in this case. + - + - Resuming is supported when using chunks. When the destination file + - already exists, it skips to the next chunked key that would be needed + - to resume. -} retrieveChunks :: (Key -> IO L.ByteString) @@ -160,43 +165,88 @@ retrieveChunks -> ChunkConfig -> EncKey -> Key + -> FilePath -> MeterUpdate - -> (MeterUpdate -> L.ByteString -> IO ()) + -> (Handle -> MeterUpdate -> L.ByteString -> IO ()) -> Annex Bool -retrieveChunks retriever u chunkconfig encryptor basek basep sink +retrieveChunks retriever u chunkconfig encryptor basek dest basep sink | noChunks chunkconfig = -- Optimisation: Try the unchunked key first, to avoid - -- looking in the git-annex branch for chunk counts. - liftIO (retriever (encryptor basek) >>= sink basep >> return True) - `catchNonAsyncAnnex` - const (go =<< chunkKeysOnly u basek) + -- looking in the git-annex branch for chunk counts + -- that are likely not there. + getunchunked `catchNonAsyncAnnex` + const (go =<< chunkKeysOnly u basek) | otherwise = go =<< chunkKeys u chunkconfig basek where - go ls = liftIO $ firstavail ls `catchNonAsync` giveup + go ls = liftIO $ do + currsize <- catchMaybeIO $ + toInteger . fileSize <$> getFileStatus dest + let ls' = maybe ls (setupResume ls) currsize + firstavail currsize ls' `catchNonAsync` giveup giveup e = do warningIO (show e) return False - firstavail [] = return False - firstavail ([]:ls) = firstavail ls - firstavail ((k:ks):ls) = do + firstavail _ [] = return False + firstavail currsize ([]:ls) = firstavail currsize ls + firstavail currsize ((k:ks):ls) = do v <- tryNonAsync $ retriever (encryptor k) case v of Left e | null ls -> giveup e - | otherwise -> firstavail ls + | otherwise -> firstavail currsize ls Right b -> do - sink basep b - let sz = toBytesProcessed $ - fromMaybe 0 $ keyChunkSize k - getrest sz sz ks + let offset = resumeOffset currsize k + let p = maybe basep + (offsetMeterUpdate basep . toBytesProcessed) + offset + bracket (maybe opennew openresume offset) hClose $ \h -> do + sink h p b + let sz = toBytesProcessed $ + fromMaybe 0 $ keyChunkSize k + getrest p h sz sz ks - getrest _ _ [] = return True - getrest sz bytesprocessed (k:ks) = do - let p = offsetMeterUpdate basep bytesprocessed - sink p =<< retriever (encryptor k) - getrest sz (addBytesProcessed bytesprocessed sz) ks + getrest _ _ _ _ [] = return True + getrest p h sz bytesprocessed (k:ks) = do + let p' = offsetMeterUpdate p bytesprocessed + sink h p' =<< retriever (encryptor k) + getrest p h sz (addBytesProcessed bytesprocessed sz) ks + + getunchunked = liftIO $ bracket opennew hClose $ \h -> do + retriever (encryptor basek) >>= sink h basep + return True + + opennew = openBinaryFile dest WriteMode + + -- Open the file and seek to the start point in order to resume. + openresume startpoint = do + -- ReadWriteMode allows seeking; AppendMode does not. + h <- openBinaryFile dest ReadWriteMode + hSeek h AbsoluteSeek startpoint + return h + +{- Can resume when the chunk's offset is at or before the end of + - the dest file. -} +resumeOffset :: Maybe Integer -> Key -> Maybe Integer +resumeOffset Nothing _ = Nothing +resumeOffset currsize k + | offset <= currsize = offset + | otherwise = Nothing + where + offset = chunkKeyOffset k + +{- Drops chunks that are already present in a file, based on its size. + - Keeps any non-chunk keys. + -} +setupResume :: [[Key]] -> Integer -> [[Key]] +setupResume ls currsize = map dropunneeded ls + where + dropunneeded [] = [] + dropunneeded l@(k:_) = case keyChunkSize k of + Just chunksize | chunksize > 0 -> + genericDrop (currsize `div` chunksize) l + _ -> l {- Checks if a key is present in a remote. This requires any one - of the lists of options returned by chunkKeys to all check out @@ -212,7 +262,8 @@ hasKeyChunks hasKeyChunks checker u chunkconfig encryptor basek | noChunks chunkconfig = -- Optimisation: Try the unchunked key first, to avoid - -- looking in the git-annex branch for chunk counts. + -- looking in the git-annex branch for chunk counts + -- that are likely not there. ifM ((Right True ==) <$> checker (encryptor basek)) ( return (Right True) , checklists impossible =<< chunkKeysOnly u basek diff --git a/Remote/Helper/ChunkedEncryptable.hs b/Remote/Helper/ChunkedEncryptable.hs index ac89178510..66e02da12b 100644 --- a/Remote/Helper/ChunkedEncryptable.hs +++ b/Remote/Helper/ChunkedEncryptable.hs @@ -103,9 +103,7 @@ chunkedEncryptableRemote c preparestorer prepareretriever r = encr safely $ prepareretriever k $ safely . go where go (Just retriever) = metered (Just p) k $ \p' -> - bracketIO (openBinaryFile dest WriteMode) hClose $ \h -> - retrieveChunks retriever (uuid r) chunkconfig enck k p' $ - sink h + retrieveChunks retriever (uuid r) chunkconfig enck k dest p' sink go Nothing = return False sink h p' b = do let write = meteredWrite p' h diff --git a/Types/Key.hs b/Types/Key.hs index 3015b1e86b..154e813ffd 100644 --- a/Types/Key.hs +++ b/Types/Key.hs @@ -13,8 +13,8 @@ module Types.Key ( stubKey, key2file, file2key, - isChunkKey, nonChunkKey, + chunkKeyOffset, prop_idempotent_key_encode, prop_idempotent_key_decode @@ -49,9 +49,6 @@ stubKey = Key , keyChunkNum = Nothing } -isChunkKey :: Key -> Bool -isChunkKey k = isJust (keyChunkSize k) && isJust (keyChunkNum k) - -- Gets the parent of a chunk key. nonChunkKey :: Key -> Key nonChunkKey k = k @@ -59,6 +56,12 @@ nonChunkKey k = k , keyChunkNum = Nothing } +-- Where a chunk key is offset within its parent. +chunkKeyOffset :: Key -> Maybe Integer +chunkKeyOffset k = (*) + <$> keyChunkSize k + <*> (pred <$> keyChunkNum k) + fieldSep :: Char fieldSep = '-' diff --git a/Utility/Metered.hs b/Utility/Metered.hs index bca7f58e79..cc07f9c351 100644 --- a/Utility/Metered.hs +++ b/Utility/Metered.hs @@ -88,7 +88,7 @@ meteredWriteFile meterupdate f b = withBinaryFile f WriteMode $ \h -> {- Applies an offset to a MeterUpdate. This can be useful when - performing a sequence of actions, such as multiple meteredWriteFiles, - - that all update a common meter progressively. + - that all update a common meter progressively. Or when resuming. -} offsetMeterUpdate :: MeterUpdate -> BytesProcessed -> MeterUpdate offsetMeterUpdate base offset = \n -> base (offset `addBytesProcessed` n) diff --git a/debian/changelog b/debian/changelog index d5c000003f..c85247b69a 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,7 +1,9 @@ git-annex (5.20140718) UNRELEASED; urgency=medium * New chunk= option to chunk files stored in directory remotes. - * The old chunksize= option is deprecated. Do not use for new remotes! + * Partially transferred files are automatically resumed when using + chunked remotes! + * The old chunksize= option is deprecated. Do not use for new remotes. * Legacy code for directory remotes using the old chunksize= option will keep them working, but more slowly than before. * webapp: Automatically install Konqueror integration scripts diff --git a/doc/chunking.mdwn b/doc/chunking.mdwn index 1be1fbef6a..d1dce317c0 100644 --- a/doc/chunking.mdwn +++ b/doc/chunking.mdwn @@ -4,6 +4,8 @@ chunks that are stored on the remote. This can be useful to work around limitations on the size of files on the remote. +Chunking also allows for resuming interrupted downloads and uploads. + Note that git-annex has to buffer chunks in memory before they are sent to a remote. So, using a large chunk size will make it use more memory. From 80cc554c82c7559c671388804f761e99fd045b3b Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 28 Jul 2014 13:19:08 -0400 Subject: [PATCH 29/54] add ChunkMethod type and make Logs.Chunk use it, rather than assuming fixed size chunks (so eg, rolling hash chunks can be supported later) If a newer git-annex starts logging something else in the chunk log, it won't be used by this version, but it will be preserved when updating the log. --- Logs/Chunk.hs | 27 +++++++++++++++++---------- Logs/Chunk/Pure.hs | 24 +++++++++++++++++++----- Remote/Helper/Chunked.hs | 12 ++++++------ 3 files changed, 42 insertions(+), 21 deletions(-) diff --git a/Logs/Chunk.hs b/Logs/Chunk.hs index 76da509477..a3e18efc19 100644 --- a/Logs/Chunk.hs +++ b/Logs/Chunk.hs @@ -15,7 +15,14 @@ - Licensed under the GNU GPL version 3 or higher. -} -module Logs.Chunk where +module Logs.Chunk ( + ChunkMethod(..), + ChunkSize, + ChunkCount, + chunksStored, + chunksRemoved, + getCurrentChunks, +) where import Common.Annex import Logs @@ -26,19 +33,19 @@ import Logs.Chunk.Pure import qualified Data.Map as M import Data.Time.Clock.POSIX -chunksStored :: UUID -> Key -> ChunkSize -> ChunkCount -> Annex () -chunksStored u k chunksize chunkcount = do +chunksStored :: UUID -> Key -> ChunkMethod -> ChunkCount -> Annex () +chunksStored u k chunkmethod chunkcount = do ts <- liftIO getPOSIXTime Annex.Branch.change (chunkLogFile k) $ - showLog . changeMapLog ts (u, chunksize) chunkcount . parseLog + showLog . changeMapLog ts (u, chunkmethod) chunkcount . parseLog -chunksRemoved :: UUID -> Key -> ChunkSize -> Annex () -chunksRemoved u k chunksize = chunksStored u k chunksize 0 +chunksRemoved :: UUID -> Key -> ChunkMethod -> Annex () +chunksRemoved u k chunkmethod = chunksStored u k chunkmethod 0 -getCurrentChunks :: UUID -> Key -> Annex [(ChunkSize, ChunkCount)] +getCurrentChunks :: UUID -> Key -> Annex [(ChunkMethod, ChunkCount)] getCurrentChunks u k = select . parseLog <$> Annex.Branch.get (chunkLogFile k) where - select = filter (\(_sz, ct) -> ct > 0) - . map (\((_ku, sz), l) -> (sz, value l)) + select = filter (\(_m, ct) -> ct > 0) + . map (\((_ku, m), l) -> (m, value l)) . M.toList - . M.filterWithKey (\(ku, _sz) _ -> ku == u) + . M.filterWithKey (\(ku, _m) _ -> ku == u) diff --git a/Logs/Chunk/Pure.hs b/Logs/Chunk/Pure.hs index 9bbfb868ca..080a5a08b9 100644 --- a/Logs/Chunk/Pure.hs +++ b/Logs/Chunk/Pure.hs @@ -6,7 +6,8 @@ -} module Logs.Chunk.Pure - ( ChunkSize + ( ChunkMethod(..) + , ChunkSize , ChunkCount , ChunkLog , parseLog @@ -17,24 +18,37 @@ import Common.Annex import Logs.MapLog import Data.Int +-- Currently chunks are all fixed size, but other chunking methods +-- may be added. +data ChunkMethod = FixedSizeChunks ChunkSize | UnknownChunks String + deriving (Ord, Eq) + type ChunkSize = Int64 +-- 0 when chunks are no longer present type ChunkCount = Integer -type ChunkLog = MapLog (UUID, ChunkSize) ChunkCount +type ChunkLog = MapLog (UUID, ChunkMethod) ChunkCount + +parseChunkMethod :: String -> ChunkMethod +parseChunkMethod s = maybe (UnknownChunks s) FixedSizeChunks (readish s) + +showChunkMethod :: ChunkMethod -> String +showChunkMethod (FixedSizeChunks sz) = show sz +showChunkMethod (UnknownChunks s) = s parseLog :: String -> ChunkLog parseLog = parseMapLog fieldparser valueparser where fieldparser s = - let (u,sz) = separate (== sep) s - in (,) <$> pure (toUUID u) <*> readish sz + let (u,m) = separate (== sep) s + in Just (toUUID u, parseChunkMethod m) valueparser = readish showLog :: ChunkLog -> String showLog = showMapLog fieldshower valueshower where - fieldshower (u, sz) = fromUUID u ++ sep : show sz + fieldshower (u, m) = fromUUID u ++ sep : showChunkMethod m valueshower = show sep :: Char diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs index 9ba6d9cbd6..11cd42c906 100644 --- a/Remote/Helper/Chunked.hs +++ b/Remote/Helper/Chunked.hs @@ -19,7 +19,6 @@ import Common.Annex import Utility.DataUnits import Types.Remote import Types.Key -import Logs.Chunk.Pure (ChunkSize, ChunkCount) import Logs.Chunk import Utility.Metered import Crypto (EncKey) @@ -108,7 +107,7 @@ storeChunks u chunkconfig k f p storer = metered (Just p) k $ \meterupdate -> | L.null chunk && numchunks > 0 = do -- Once all chunks are successfully -- stored, update the chunk log. - chunksStored u k chunksize numchunks + chunksStored u k (FixedSizeChunks chunksize) numchunks return True | otherwise = do let (chunkkey, chunkkeys') = nextChunkKeyStream chunkkeys @@ -140,7 +139,7 @@ removeChunks remover u chunkconfig encryptor k = do ok <- allM (remover . encryptor) (concat ls) when ok $ do let chunksizes = catMaybes $ map (keyChunkSize <=< headMaybe) ls - forM_ chunksizes $ chunksRemoved u k . fromIntegral + forM_ chunksizes $ chunksRemoved u k . FixedSizeChunks . fromIntegral return ok {- Retrieves a key from a remote, using a retriever action that @@ -313,6 +312,7 @@ chunkKeys u chunkconfig k = do chunkKeysOnly :: UUID -> Key -> Annex [[Key]] chunkKeysOnly u k = map (toChunkList k) <$> getCurrentChunks u k -toChunkList :: Key -> (ChunkSize, ChunkCount) -> [Key] -toChunkList k (chunksize, chunkcount) = takeChunkKeyStream chunkcount $ - chunkKeyStream k chunksize +toChunkList :: Key -> (ChunkMethod, ChunkCount) -> [Key] +toChunkList k (FixedSizeChunks chunksize, chunkcount) = + takeChunkKeyStream chunkcount $ chunkKeyStream k chunksize +toChunkList _ (UnknownChunks _, _) = [] From 153ace45241cc56431e8fa4803858981ea6f2629 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 28 Jul 2014 14:14:01 -0400 Subject: [PATCH 30/54] fix handling of removal of keys that are not present --- Remote/Directory.hs | 8 +++++++- Types/Remote.hs | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/Remote/Directory.hs b/Remote/Directory.hs index cb7553fe25..b107c18e90 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -160,9 +160,15 @@ remove d k = liftIO $ do - before it can delete them. -} void $ tryIO $ mapM_ allowWrite =<< dirContents dir #endif - catchBoolIO $ do + ok <- catchBoolIO $ do removeDirectoryRecursive dir return True + {- Removing the subdirectory will fail if it doesn't exist. + - But, we want to succeed in that case, as long as the directory + - remote's top-level directory does exist. -} + if ok + then return ok + else doesDirectoryExist d <&&> (not <$> doesDirectoryExist dir) where dir = storeDir d k diff --git a/Types/Remote.hs b/Types/Remote.hs index 584f3d044c..9c2a69effd 100644 --- a/Types/Remote.hs +++ b/Types/Remote.hs @@ -64,7 +64,7 @@ data RemoteA a = Remote { retrieveKeyFile :: Key -> AssociatedFile -> FilePath -> MeterUpdate -> a Bool, -- retrieves a key's contents to a tmp file, if it can be done cheaply retrieveKeyFileCheap :: Key -> FilePath -> a Bool, - -- removes a key's contents + -- removes a key's contents (succeeds if the contents are not present) removeKey :: Key -> a Bool, -- Checks if a key is present in the remote; if the remote -- cannot be accessed returns a Left error message. From 58f727afddd09073b0f4cbad8e174ad186d4152d Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 28 Jul 2014 14:18:08 -0400 Subject: [PATCH 31/54] resume interrupted chunked uploads Leverage the new chunked remotes to automatically resume uploads. Sort of like rsync, although of course not as efficient since this needs to start at a chunk boundry. But, unlike rsync, this method will work for S3, WebDAV, external special remotes, etc, etc. Only directory special remotes so far, but many more soon! This implementation will also allow starting an upload from one repository, interrupting it, and then resuming the upload to the same remote from an entirely different repository. Note that I added a comment that storeKey should atomically move the content into place once it's all received. This was already an undocumented requirement -- it's necessary for hasKey to work reliably. This resume code just uses hasKey to find the first chunk that's missing. Note that if there are two uploads of the same key to the same chunked remote, one might resume at the point the other had gotten to, but both will then redundantly upload. As before. In the non-resume case, this adds one hasKey call per storeKey, and only if the remote is configured to use chunks. Future work: Try to eliminate that hasKey. Notice that eg, `git annex copy --to` checks if the key is present before sending it, so is already running hasKey.. which could perhaps be cached and reused. However, this additional overhead is not very large compared with transferring an entire large file, and the ability to resume is certianly worth it. There is an optimisation in place for small files, that avoids trying to resume if the whole file fits within one chunk. This commit was sponsored by Georg Bauer. --- Remote/Helper/Chunked.hs | 73 +++++++++++++++++++++++++---- Remote/Helper/ChunkedEncryptable.hs | 26 +++++----- Types/Remote.hs | 2 + 3 files changed, 80 insertions(+), 21 deletions(-) diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs index 11cd42c906..3f591ae39e 100644 --- a/Remote/Helper/Chunked.hs +++ b/Remote/Helper/Chunked.hs @@ -73,6 +73,9 @@ numChunks = pred . fromJust . keyChunkNum . fst . nextChunkKeyStream - the storer action, along with a corresponding chunk key and a - progress meter update callback. - + - To support resuming, the checker is used to find the first missing + - chunk key. Storing starts from that chunk. + - - This buffers each chunk in memory, so can use a lot of memory - with a large ChunkSize. - More optimal versions of this can be written, that rely @@ -88,18 +91,31 @@ storeChunks -> FilePath -> MeterUpdate -> (Key -> L.ByteString -> MeterUpdate -> IO Bool) + -> (Key -> Annex (Either String Bool)) -> Annex Bool -storeChunks u chunkconfig k f p storer = metered (Just p) k $ \meterupdate -> - either (\e -> warning (show e) >> return False) (go meterupdate) - =<< (liftIO $ tryIO $ L.readFile f) +storeChunks u chunkconfig k f p storer checker = bracketIO open close go where - go meterupdate b = case chunkconfig of - (UnpaddedChunks chunksize) -> - gochunks meterupdate chunksize b (chunkKeyStream k chunksize) - _ -> liftIO $ storer k b meterupdate + open = tryIO $ openBinaryFile f ReadMode - gochunks :: MeterUpdate -> ChunkSize -> L.ByteString -> ChunkKeyStream -> Annex Bool - gochunks meterupdate chunksize = loop zeroBytesProcessed . splitchunk + close (Right h) = hClose h + close (Left _) = noop + + go (Left e) = do + warning (show e) + return False + go (Right h) = metered (Just p) k $ \meterupdate -> + case chunkconfig of + (UnpaddedChunks chunksize) -> do + let chunkkeys = chunkKeyStream k chunksize + (chunkkeys', startpos) <- seekResume h chunkkeys checker + b <- liftIO $ L.hGetContents h + gochunks meterupdate startpos chunksize b chunkkeys' + _ -> liftIO $ do + b <- L.hGetContents h + storer k b meterupdate + + gochunks :: MeterUpdate -> BytesProcessed -> ChunkSize -> L.ByteString -> ChunkKeyStream -> Annex Bool + gochunks meterupdate startpos chunksize = loop startpos . splitchunk where splitchunk = L.splitAt chunksize @@ -125,6 +141,45 @@ storeChunks u chunkconfig k f p storer = metered (Just p) k $ \meterupdate -> - in previous chunks. -} meterupdate' = offsetMeterUpdate meterupdate bytesprocessed +{- Check if any of the chunk keys are present. If found, seek forward + - in the Handle, so it will be read starting at the first missing chunk. + - Returns the ChunkKeyStream truncated to start at the first missing + - chunk, and the number of bytes skipped due to resuming. + - + - As an optimisation, if the file fits into a single chunk, there's no need + - to check if that chunk is present -- we know it's not, because otherwise + - the whole file would be present and there would be no reason to try to + - store it. + -} +seekResume + :: Handle + -> ChunkKeyStream + -> (Key -> Annex (Either String Bool)) + -> Annex (ChunkKeyStream, BytesProcessed) +seekResume h chunkkeys checker = do + sz <- liftIO (hFileSize h) + if sz <= fromMaybe 0 (keyChunkSize $ fst $ nextChunkKeyStream chunkkeys) + then return (chunkkeys, zeroBytesProcessed) + else check 0 chunkkeys sz + where + check pos cks sz + | pos >= sz = do + -- All chunks are already stored! + liftIO $ hSeek h AbsoluteSeek sz + return (cks', toBytesProcessed sz) + | otherwise = do + v <- checker k + case v of + Right True -> + check pos' cks' sz + _ -> do + when (pos > 0) $ + liftIO $ hSeek h AbsoluteSeek pos + return (cks, toBytesProcessed pos) + where + (k, cks') = nextChunkKeyStream cks + pos' = pos + fromMaybe 0 (keyChunkSize k) + {- Removes all chunks of a key from a remote, by calling a remover - action on each. - diff --git a/Remote/Helper/ChunkedEncryptable.hs b/Remote/Helper/ChunkedEncryptable.hs index 66e02da12b..402a64891e 100644 --- a/Remote/Helper/ChunkedEncryptable.hs +++ b/Remote/Helper/ChunkedEncryptable.hs @@ -60,19 +60,19 @@ chunkedEncryptableRemote -> Preparer Retriever -> Remote -> Remote -chunkedEncryptableRemote c preparestorer prepareretriever r = encr +chunkedEncryptableRemote c preparestorer prepareretriever baser = encr where - encr = r + encr = baser { storeKey = \k _f p -> cip >>= storeKeyGen k p , retrieveKeyFile = \k _f d p -> cip >>= retrieveKeyFileGen k d p , retrieveKeyFileCheap = \k d -> cip >>= maybe - (retrieveKeyFileCheap r k d) + (retrieveKeyFileCheap baser k d) (\_ -> return False) , removeKey = \k -> cip >>= removeKeyGen k , hasKey = \k -> cip >>= hasKeyGen k , cost = maybe - (cost r) - (const $ cost r + encryptedRemoteCostAdj) + (cost baser) + (const $ cost baser + encryptedRemoteCostAdj) (extractCipher c) } cip = cipherKey c @@ -87,8 +87,9 @@ chunkedEncryptableRemote c preparestorer prepareretriever r = encr where go (Just storer) = sendAnnex k rollback $ \src -> metered (Just p) k $ \p' -> - storeChunks (uuid r) chunkconfig k src p' $ - storechunk storer + storeChunks (uuid baser) chunkconfig k src p' + (storechunk storer) + (hasKey baser) go Nothing = return False rollback = void $ removeKey encr k storechunk storer k' b p' = case enc of @@ -103,7 +104,8 @@ chunkedEncryptableRemote c preparestorer prepareretriever r = encr safely $ prepareretriever k $ safely . go where go (Just retriever) = metered (Just p) k $ \p' -> - retrieveChunks retriever (uuid r) chunkconfig enck k dest p' sink + retrieveChunks retriever (uuid baser) chunkconfig + enck k dest p' sink go Nothing = return False sink h p' b = do let write = meteredWrite p' h @@ -114,15 +116,15 @@ chunkedEncryptableRemote c preparestorer prepareretriever r = encr readBytes write enck = maybe id snd enc - removeKeyGen k enc = removeChunks remover (uuid r) chunkconfig enck k + removeKeyGen k enc = removeChunks remover (uuid baser) chunkconfig enck k where enck = maybe id snd enc - remover = removeKey r + remover = removeKey baser - hasKeyGen k enc = hasKeyChunks checker (uuid r) chunkconfig enck k + hasKeyGen k enc = hasKeyChunks checker (uuid baser) chunkconfig enck k where enck = maybe id snd enc - checker = hasKey r + checker = hasKey baser {- The base Remote that is provided to chunkedEncryptableRemote - needs to have storeKey and retreiveKeyFile methods, but they are diff --git a/Types/Remote.hs b/Types/Remote.hs index 9c2a69effd..805b984740 100644 --- a/Types/Remote.hs +++ b/Types/Remote.hs @@ -57,6 +57,8 @@ data RemoteA a = Remote { -- Remotes have a use cost; higher is more expensive cost :: Cost, -- Transfers a key's contents from disk to the remote. + -- The key should not appear to be present on the remote until + -- all of its contents have been transferred. storeKey :: Key -> AssociatedFile -> MeterUpdate -> a Bool, -- Retrieves a key's contents to a file. -- (The MeterUpdate does not need to be used if it retrieves From 2474cf0032e1ea26471d9f6fd0ed9d308c64fbad Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 28 Jul 2014 14:37:22 -0400 Subject: [PATCH 32/54] make explicit the implicit requirement that CHECKPRESENT not say a key is present until it's all done being stored --- doc/design/external_special_remote_protocol.mdwn | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/doc/design/external_special_remote_protocol.mdwn b/doc/design/external_special_remote_protocol.mdwn index 6fe09ff7cc..01ffe7fd45 100644 --- a/doc/design/external_special_remote_protocol.mdwn +++ b/doc/design/external_special_remote_protocol.mdwn @@ -101,12 +101,14 @@ The following requests *must* all be supported by the special remote. Tells the special remote it's time to prepare itself to be used. Only INITREMOTE can come before this. * `TRANSFER STORE|RETRIEVE Key File` - Requests the transfer of a key. For Send, the File is the file to upload; - for Receive the File is where to store the download. + Requests the transfer of a key. For STORE, the File is the file to upload; + for RETRIEVE the File is where to store the download. Note that the File should not influence the filename used on the remote. The filename will not contain any whitespace. + Note that it's important that, while a Key is being stored, CHECKPRESENT + not indicate it's present until all the data has been transferred. Multiple transfers might be requested by git-annex, but it's fine for the - program to serialize them and only do one at a time. + program to serialize them and only do one at a time. * `CHECKPRESENT Key` Requests the remote check if a key is present in it. * `REMOVE Key` @@ -286,7 +288,6 @@ start a new process the next time it needs to use a remote. the remote. However, \n and probably \0 need to be escaped somehow in the file data, which adds complication. * uuid discovery during INITREMOTE. -* Support for splitting files into chunks. * Support for getting and setting the list of urls that can be associated with a key. * Hook into webapp. Needs a way to provide some kind of prompt to the user From 216fdbd6bd064b53709aec40781449ba2420f105 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 29 Jul 2014 15:07:32 -0400 Subject: [PATCH 33/54] fix non-checked hasKeyChunks --- Remote/Helper/Chunked.hs | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs index 3f591ae39e..708d878001 100644 --- a/Remote/Helper/Chunked.hs +++ b/Remote/Helper/Chunked.hs @@ -320,21 +320,20 @@ hasKeyChunks checker u chunkconfig encryptor basek -- that are likely not there. ifM ((Right True ==) <$> checker (encryptor basek)) ( return (Right True) - , checklists impossible =<< chunkKeysOnly u basek + , checklists Nothing =<< chunkKeysOnly u basek ) - | otherwise = checklists impossible =<< chunkKeys u chunkconfig basek + | otherwise = checklists Nothing =<< chunkKeys u chunkconfig basek where - checklists lastfailmsg [] = return $ Left lastfailmsg - checklists _ (l:ls) + checklists Nothing [] = return (Right False) + checklists (Just deferrederror) [] = return (Left deferrederror) + checklists d (l:ls) | not (null l) = do v <- checkchunks l case v of - Left e -> checklists e ls + Left e -> checklists (Just e) ls Right True -> return (Right True) - Right False - | null ls -> return (Right False) - | otherwise -> checklists impossible ls - | otherwise = checklists impossible ls + Right False -> checklists Nothing ls + | otherwise = checklists d ls checkchunks :: [Key] -> Annex (Either String Bool) checkchunks [] = return (Right True) @@ -344,8 +343,6 @@ hasKeyChunks checker u chunkconfig encryptor basek then checkchunks ks else return v - impossible = "no recorded chunks" - {- A key can be stored in a remote unchunked, or as a list of chunked keys. - This can be the case whether or not the remote is currently configured - to use chunking. From f5af470875be6c9d92747ca70bd3486b6ac7acf8 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 29 Jul 2014 14:53:17 -0400 Subject: [PATCH 34/54] add ContentSource type, for remotes that act on files rather than ByteStrings Note that currently nothing cleans up a ContentSource's file, when eg, retrieving chunks. --- Remote/Directory.hs | 7 ++-- Remote/Directory/LegacyChunked.hs | 2 +- Remote/Helper/Chunked.hs | 46 +++++++++++++----------- Remote/Helper/ChunkedEncryptable.hs | 42 ++++++++++------------ Types/StoreRetrieve.hs | 54 +++++++++++++++++++++++++++++ 5 files changed, 102 insertions(+), 49 deletions(-) create mode 100644 Types/StoreRetrieve.hs diff --git a/Remote/Directory.hs b/Remote/Directory.hs index b107c18e90..5d8a040d43 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -110,9 +110,9 @@ tmpDir d k = addTrailingPathSeparator $ d "tmp" keyFile k prepareStore :: FilePath -> ChunkConfig -> Preparer Storer prepareStore d chunkconfig = checkPrepare (\k -> checkDiskSpace (Just d) k 0) - (store d chunkconfig) + (byteStorer $ store d chunkconfig) -store :: FilePath -> ChunkConfig -> Storer +store :: FilePath -> ChunkConfig -> Key -> L.ByteString -> MeterUpdate -> IO Bool store d chunkconfig k b p = do void $ tryIO $ createDirectoryIfMissing True tmpdir case chunkconfig of @@ -137,7 +137,8 @@ store d chunkconfig k b p = do retrieve :: FilePath -> ChunkConfig -> Preparer Retriever retrieve d (LegacyChunks _) = Legacy.retrieve locations d -retrieve d _ = simplyPrepare $ \k -> L.readFile =<< getLocation d k +retrieve d _ = simplyPrepare $ byteRetriever $ + \k -> L.readFile =<< getLocation d k retrieveCheap :: FilePath -> ChunkConfig -> Key -> FilePath -> Annex Bool -- no cheap retrieval possible for chunks diff --git a/Remote/Directory/LegacyChunked.hs b/Remote/Directory/LegacyChunked.hs index c7b8ad52c3..af846a2e6c 100644 --- a/Remote/Directory/LegacyChunked.hs +++ b/Remote/Directory/LegacyChunked.hs @@ -96,7 +96,7 @@ retrieve locations d basek a = do tmpdir <- fromRepo $ gitAnnexTmpMiscDir createAnnexDirectory tmpdir let tmp = tmpdir keyFile basek ++ ".directorylegacy.tmp" - a $ Just $ \k -> do + a $ Just $ byteRetriever $ \k -> do void $ withStoredFiles d locations k $ \fs -> do forM_ fs $ S.appendFile tmp <=< S.readFile diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs index 708d878001..70e541cce1 100644 --- a/Remote/Helper/Chunked.hs +++ b/Remote/Helper/Chunked.hs @@ -17,6 +17,7 @@ module Remote.Helper.Chunked ( import Common.Annex import Utility.DataUnits +import Types.StoreRetrieve import Types.Remote import Types.Key import Logs.Chunk @@ -90,29 +91,31 @@ storeChunks -> Key -> FilePath -> MeterUpdate - -> (Key -> L.ByteString -> MeterUpdate -> IO Bool) + -> (Key -> ContentSource -> MeterUpdate -> IO Bool) -> (Key -> Annex (Either String Bool)) -> Annex Bool -storeChunks u chunkconfig k f p storer checker = bracketIO open close go +storeChunks u chunkconfig k f p storer checker = + case chunkconfig of + (UnpaddedChunks chunksize) -> + bracketIO open close (go chunksize) + _ -> showprogress $ + liftIO . storer k (FileContent f) where + showprogress = metered (Just p) k + open = tryIO $ openBinaryFile f ReadMode close (Right h) = hClose h close (Left _) = noop - go (Left e) = do + go _ (Left e) = do warning (show e) return False - go (Right h) = metered (Just p) k $ \meterupdate -> - case chunkconfig of - (UnpaddedChunks chunksize) -> do - let chunkkeys = chunkKeyStream k chunksize - (chunkkeys', startpos) <- seekResume h chunkkeys checker - b <- liftIO $ L.hGetContents h - gochunks meterupdate startpos chunksize b chunkkeys' - _ -> liftIO $ do - b <- L.hGetContents h - storer k b meterupdate + go chunksize (Right h) = showprogress $ \meterupdate -> do + let chunkkeys = chunkKeyStream k chunksize + (chunkkeys', startpos) <- seekResume h chunkkeys checker + b <- liftIO $ L.hGetContents h + gochunks meterupdate startpos chunksize b chunkkeys' gochunks :: MeterUpdate -> BytesProcessed -> ChunkSize -> L.ByteString -> ChunkKeyStream -> Annex Bool gochunks meterupdate startpos chunksize = loop startpos . splitchunk @@ -127,7 +130,7 @@ storeChunks u chunkconfig k f p storer checker = bracketIO open close go return True | otherwise = do let (chunkkey, chunkkeys') = nextChunkKeyStream chunkkeys - ifM (liftIO $ storer chunkkey chunk meterupdate') + ifM (liftIO $ storer chunkkey (ByteContent chunk) meterupdate') ( do let bytesprocessed' = addBytesProcessed bytesprocessed (L.length chunk) loop bytesprocessed' (splitchunk bs) chunkkeys' @@ -197,8 +200,7 @@ removeChunks remover u chunkconfig encryptor k = do forM_ chunksizes $ chunksRemoved u k . FixedSizeChunks . fromIntegral return ok -{- Retrieves a key from a remote, using a retriever action that - - streams it to a ByteString. +{- Retrieves a key from a remote, using a retriever action. - - When the remote is chunked, tries each of the options returned by - chunkKeys until it finds one where the retriever successfully @@ -214,7 +216,7 @@ removeChunks remover u chunkconfig encryptor k = do - to resume. -} retrieveChunks - :: (Key -> IO L.ByteString) + :: Retriever -> UUID -> ChunkConfig -> EncKey @@ -250,13 +252,13 @@ retrieveChunks retriever u chunkconfig encryptor basek dest basep sink Left e | null ls -> giveup e | otherwise -> firstavail currsize ls - Right b -> do + Right content -> do let offset = resumeOffset currsize k let p = maybe basep (offsetMeterUpdate basep . toBytesProcessed) offset bracket (maybe opennew openresume offset) hClose $ \h -> do - sink h p b + withBytes content $ sink h p let sz = toBytesProcessed $ fromMaybe 0 $ keyChunkSize k getrest p h sz sz ks @@ -264,11 +266,13 @@ retrieveChunks retriever u chunkconfig encryptor basek dest basep sink getrest _ _ _ _ [] = return True getrest p h sz bytesprocessed (k:ks) = do let p' = offsetMeterUpdate p bytesprocessed - sink h p' =<< retriever (encryptor k) + content <- retriever (encryptor k) + withBytes content $ sink h p' getrest p h sz (addBytesProcessed bytesprocessed sz) ks getunchunked = liftIO $ bracket opennew hClose $ \h -> do - retriever (encryptor basek) >>= sink h basep + content <- retriever (encryptor basek) + withBytes content $ sink h basep return True opennew = openBinaryFile dest WriteMode diff --git a/Remote/Helper/ChunkedEncryptable.hs b/Remote/Helper/ChunkedEncryptable.hs index 402a64891e..b851ecd94e 100644 --- a/Remote/Helper/ChunkedEncryptable.hs +++ b/Remote/Helper/ChunkedEncryptable.hs @@ -5,23 +5,28 @@ - Licensed under the GNU GPL version 3 or higher. -} -{-# LANGUAGE Rank2Types #-} +{-# LANGUAGE RankNTypes #-} +{-# LANGUAGE ImpredicativeTypes #-} +{-# LANGUAGE FlexibleContexts #-} module Remote.Helper.ChunkedEncryptable ( Preparer, - simplyPrepare, - checkPrepare, Storer, Retriever, + simplyPrepare, + checkPrepare, + fileStorer, + byteStorer, + fileRetriever, + byteRetriever, chunkedEncryptableRemote, storeKeyDummy, retreiveKeyFileDummy, module X ) where -import qualified Data.ByteString.Lazy as L - import Common.Annex +import Types.StoreRetrieve import Types.Remote import Crypto import Config.Cost @@ -31,10 +36,6 @@ import Remote.Helper.Encryptable as X import Annex.Content import Annex.Exception --- Prepares for and then runs an action that will act on a Key, --- passing it a helper when the preparation is successful. -type Preparer helper = forall a. Key -> (Maybe helper -> Annex a) -> Annex a - simplyPrepare :: helper -> Preparer helper simplyPrepare helper _ a = a $ Just helper @@ -44,14 +45,6 @@ checkPrepare checker helper k a = ifM (checker k) , a Nothing ) --- Stores a Key, which may be encrypted and/or a chunk key. --- May throw exceptions. -type Storer = Key -> L.ByteString -> MeterUpdate -> IO Bool - --- Retrieves a Key, which may be encrypted and/or a chunk key. --- Throws exception if key is not present, or remote is not accessible. -type Retriever = Key -> IO L.ByteString - {- Modifies a base Remote to support both chunking and encryption. -} chunkedEncryptableRemote @@ -88,16 +81,17 @@ chunkedEncryptableRemote c preparestorer prepareretriever baser = encr go (Just storer) = sendAnnex k rollback $ \src -> metered (Just p) k $ \p' -> storeChunks (uuid baser) chunkconfig k src p' - (storechunk storer) + (storechunk enc storer) (hasKey baser) go Nothing = return False rollback = void $ removeKey encr k - storechunk storer k' b p' = case enc of - Nothing -> storer k' b p' - Just (cipher, enck) -> - encrypt gpgopts cipher (feedBytes b) $ - readBytes $ \encb -> - storer (enck k') encb p' + + storechunk Nothing storer k content p = storer k content p + storechunk (Just (cipher, enck)) storer k content p = + withBytes content $ \b -> + encrypt gpgopts cipher (feedBytes b) $ + readBytes $ \encb -> + storer (enck k) (ByteContent encb) p -- call retriever to get chunks; decrypt them; stream to dest file retrieveKeyFileGen k dest p enc = diff --git a/Types/StoreRetrieve.hs b/Types/StoreRetrieve.hs new file mode 100644 index 0000000000..2520d63092 --- /dev/null +++ b/Types/StoreRetrieve.hs @@ -0,0 +1,54 @@ +{- Types for Storer and Retriever + - + - Copyright 2014 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +{-# LANGUAGE Rank2Types #-} + +module Types.StoreRetrieve where + +import Common.Annex +import Utility.Metered +import Utility.Tmp + +import qualified Data.ByteString.Lazy as L + +-- Prepares for and then runs an action that will act on a Key's +-- content, passing it a helper when the preparation is successful. +type Preparer helper = forall a. Key -> (Maybe helper -> Annex a) -> Annex a + +-- A source of a Key's content. +data ContentSource + = FileContent FilePath + | ByteContent L.ByteString + +-- Action that stores a Key's content on a remote. +-- Can throw exceptions. +type Storer = Key -> ContentSource -> MeterUpdate -> IO Bool + +-- Action that retrieves a Key's content from a remote. +-- Throws exception if key is not present, or remote is not accessible. +type Retriever = Key -> IO ContentSource + +fileStorer :: (Key -> FilePath -> MeterUpdate -> IO Bool) -> Storer +fileStorer a k (FileContent f) m = a k f m +fileStorer a k (ByteContent b) m = do + withTmpFile "tmpXXXXXX" $ \f h -> do + L.hPut h b + hClose h + a k f m + +byteStorer :: (Key -> L.ByteString -> MeterUpdate -> IO Bool) -> Storer +byteStorer a k c m = withBytes c $ \b -> a k b m + +withBytes :: ContentSource -> (L.ByteString -> IO a) -> IO a +withBytes (ByteContent b) a = a b +withBytes (FileContent f) a = a =<< L.readFile f + +fileRetriever :: (Key -> IO FilePath) -> Retriever +fileRetriever a k = FileContent <$> a k + +byteRetriever :: (Key -> IO L.ByteString) -> Retriever +byteRetriever a k = ByteContent <$> a k From 1d263e1e7e2030938c8c00aaaafdf549ada2ec94 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 29 Jul 2014 16:22:19 -0400 Subject: [PATCH 35/54] lift types from IO to Annex Some remotes like External need to run store and retrieve actions in Annex, not IO. In order to do that lift, I had to dive pretty deep into the utilities, making Utility.Gpg and Utility.Tmp be partly converted to using MonadIO, and Control.Monad.Catch for exception handling. There should be no behavior changes in this commit. This commit was sponsored by Michael Barabanov. --- Crypto.hs | 15 +++++++------ Remote/Directory.hs | 6 +++--- Remote/Directory/LegacyChunked.hs | 2 +- Remote/Helper/Chunked.hs | 28 +++++++++++------------- Types/StoreRetrieve.hs | 22 +++++++++---------- Utility/Gpg.hs | 36 ++++++++++++++++++++----------- Utility/Process.hs | 1 + Utility/Tmp.hs | 13 +++++------ 8 files changed, 68 insertions(+), 55 deletions(-) diff --git a/Crypto.hs b/Crypto.hs index 89b47f3184..dcefc2959a 100644 --- a/Crypto.hs +++ b/Crypto.hs @@ -3,12 +3,13 @@ - Currently using gpg; could later be modified to support different - crypto backends if neccessary. - - - Copyright 2011-2012 Joey Hess + - Copyright 2011-2014 Joey Hess - - Licensed under the GNU GPL version 3 or higher. -} {-# LANGUAGE FlexibleInstances #-} +{-# LANGUAGE Rank2Types #-} module Crypto ( Cipher, @@ -35,6 +36,8 @@ import qualified Data.ByteString.Lazy as L import Data.ByteString.Lazy.UTF8 (fromString) import Control.Applicative import qualified Data.Map as M +import Control.Monad.IO.Class +import Control.Monad.Catch (MonadMask) import Common.Annex import qualified Utility.Gpg as Gpg @@ -151,7 +154,7 @@ encryptKey mac c k = stubKey } type Feeder = Handle -> IO () -type Reader a = Handle -> IO a +type Reader m a = Handle -> m a feedFile :: FilePath -> Feeder feedFile f h = L.hPut h =<< L.readFile f @@ -159,8 +162,8 @@ feedFile f h = L.hPut h =<< L.readFile f feedBytes :: L.ByteString -> Feeder feedBytes = flip L.hPut -readBytes :: (L.ByteString -> IO a) -> Reader a -readBytes a h = L.hGetContents h >>= a +readBytes :: (MonadIO m) => (L.ByteString -> m a) -> Reader m a +readBytes a h = liftIO (L.hGetContents h) >>= a {- Runs a Feeder action, that generates content that is symmetrically - encrypted with the Cipher (unless it is empty, in which case @@ -168,7 +171,7 @@ readBytes a h = L.hGetContents h >>= a - read by the Reader action. Note: For public-key encryption, - recipients MUST be included in 'params' (for instance using - 'getGpgEncParams'). -} -encrypt :: [CommandParam] -> Cipher -> Feeder -> Reader a -> IO a +encrypt :: (MonadIO m, MonadMask m) => [CommandParam] -> Cipher -> Feeder -> Reader m a -> m a encrypt params cipher = case cipher of Cipher{} -> Gpg.feedRead (params ++ Gpg.stdEncryptionParams True) $ cipherPassphrase cipher @@ -177,7 +180,7 @@ encrypt params cipher = case cipher of {- Runs a Feeder action, that generates content that is decrypted with the - Cipher (or using a private key if the Cipher is empty), and read by the - Reader action. -} -decrypt :: Cipher -> Feeder -> Reader a -> IO a +decrypt :: (MonadIO m, MonadMask m) => Cipher -> Feeder -> Reader m a -> m a decrypt cipher = case cipher of Cipher{} -> Gpg.feedRead [Param "--decrypt"] $ cipherPassphrase cipher MacOnlyCipher{} -> Gpg.pipeLazy [Param "--decrypt"] diff --git a/Remote/Directory.hs b/Remote/Directory.hs index 5d8a040d43..9f27759659 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -112,8 +112,8 @@ prepareStore d chunkconfig = checkPrepare (\k -> checkDiskSpace (Just d) k 0) (byteStorer $ store d chunkconfig) -store :: FilePath -> ChunkConfig -> Key -> L.ByteString -> MeterUpdate -> IO Bool -store d chunkconfig k b p = do +store :: FilePath -> ChunkConfig -> Key -> L.ByteString -> MeterUpdate -> Annex Bool +store d chunkconfig k b p = liftIO $ do void $ tryIO $ createDirectoryIfMissing True tmpdir case chunkconfig of LegacyChunks chunksize -> Legacy.store chunksize finalizer k b p tmpdir destdir @@ -138,7 +138,7 @@ store d chunkconfig k b p = do retrieve :: FilePath -> ChunkConfig -> Preparer Retriever retrieve d (LegacyChunks _) = Legacy.retrieve locations d retrieve d _ = simplyPrepare $ byteRetriever $ - \k -> L.readFile =<< getLocation d k + \k -> liftIO $ L.readFile =<< getLocation d k retrieveCheap :: FilePath -> ChunkConfig -> Key -> FilePath -> Annex Bool -- no cheap retrieval possible for chunks diff --git a/Remote/Directory/LegacyChunked.hs b/Remote/Directory/LegacyChunked.hs index af846a2e6c..312119f4e6 100644 --- a/Remote/Directory/LegacyChunked.hs +++ b/Remote/Directory/LegacyChunked.hs @@ -96,7 +96,7 @@ retrieve locations d basek a = do tmpdir <- fromRepo $ gitAnnexTmpMiscDir createAnnexDirectory tmpdir let tmp = tmpdir keyFile basek ++ ".directorylegacy.tmp" - a $ Just $ byteRetriever $ \k -> do + a $ Just $ byteRetriever $ \k -> liftIO $ do void $ withStoredFiles d locations k $ \fs -> do forM_ fs $ S.appendFile tmp <=< S.readFile diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs index 70e541cce1..ccdd352713 100644 --- a/Remote/Helper/Chunked.hs +++ b/Remote/Helper/Chunked.hs @@ -27,7 +27,6 @@ import Annex.Exception import qualified Data.ByteString.Lazy as L import qualified Data.Map as M -import Control.Exception data ChunkConfig = NoChunks @@ -91,15 +90,14 @@ storeChunks -> Key -> FilePath -> MeterUpdate - -> (Key -> ContentSource -> MeterUpdate -> IO Bool) + -> (Key -> ContentSource -> MeterUpdate -> Annex Bool) -> (Key -> Annex (Either String Bool)) -> Annex Bool storeChunks u chunkconfig k f p storer checker = case chunkconfig of (UnpaddedChunks chunksize) -> bracketIO open close (go chunksize) - _ -> showprogress $ - liftIO . storer k (FileContent f) + _ -> showprogress $ storer k (FileContent f) where showprogress = metered (Just p) k @@ -130,7 +128,7 @@ storeChunks u chunkconfig k f p storer checker = return True | otherwise = do let (chunkkey, chunkkeys') = nextChunkKeyStream chunkkeys - ifM (liftIO $ storer chunkkey (ByteContent chunk) meterupdate') + ifM (storer chunkkey (ByteContent chunk) meterupdate') ( do let bytesprocessed' = addBytesProcessed bytesprocessed (L.length chunk) loop bytesprocessed' (splitchunk bs) chunkkeys' @@ -234,20 +232,20 @@ retrieveChunks retriever u chunkconfig encryptor basek dest basep sink const (go =<< chunkKeysOnly u basek) | otherwise = go =<< chunkKeys u chunkconfig basek where - go ls = liftIO $ do - currsize <- catchMaybeIO $ + go ls = do + currsize <- liftIO $ catchMaybeIO $ toInteger . fileSize <$> getFileStatus dest let ls' = maybe ls (setupResume ls) currsize - firstavail currsize ls' `catchNonAsync` giveup + firstavail currsize ls' `catchNonAsyncAnnex` giveup giveup e = do - warningIO (show e) + warning (show e) return False firstavail _ [] = return False firstavail currsize ([]:ls) = firstavail currsize ls firstavail currsize ((k:ks):ls) = do - v <- tryNonAsync $ retriever (encryptor k) + v <- tryNonAsyncAnnex $ retriever (encryptor k) case v of Left e | null ls -> giveup e @@ -257,8 +255,8 @@ retrieveChunks retriever u chunkconfig encryptor basek dest basep sink let p = maybe basep (offsetMeterUpdate basep . toBytesProcessed) offset - bracket (maybe opennew openresume offset) hClose $ \h -> do - withBytes content $ sink h p + bracketIO (maybe opennew openresume offset) hClose $ \h -> do + withBytes content $ liftIO . sink h p let sz = toBytesProcessed $ fromMaybe 0 $ keyChunkSize k getrest p h sz sz ks @@ -267,12 +265,12 @@ retrieveChunks retriever u chunkconfig encryptor basek dest basep sink getrest p h sz bytesprocessed (k:ks) = do let p' = offsetMeterUpdate p bytesprocessed content <- retriever (encryptor k) - withBytes content $ sink h p' + withBytes content $ liftIO . sink h p' getrest p h sz (addBytesProcessed bytesprocessed sz) ks - getunchunked = liftIO $ bracket opennew hClose $ \h -> do + getunchunked = bracketIO opennew hClose $ \h -> do content <- retriever (encryptor basek) - withBytes content $ sink h basep + withBytes content $ liftIO . sink h basep return True opennew = openBinaryFile dest WriteMode diff --git a/Types/StoreRetrieve.hs b/Types/StoreRetrieve.hs index 2520d63092..ccbf99e3f5 100644 --- a/Types/StoreRetrieve.hs +++ b/Types/StoreRetrieve.hs @@ -26,29 +26,29 @@ data ContentSource -- Action that stores a Key's content on a remote. -- Can throw exceptions. -type Storer = Key -> ContentSource -> MeterUpdate -> IO Bool +type Storer = Key -> ContentSource -> MeterUpdate -> Annex Bool -- Action that retrieves a Key's content from a remote. -- Throws exception if key is not present, or remote is not accessible. -type Retriever = Key -> IO ContentSource +type Retriever = Key -> Annex ContentSource -fileStorer :: (Key -> FilePath -> MeterUpdate -> IO Bool) -> Storer +fileStorer :: (Key -> FilePath -> MeterUpdate -> Annex Bool) -> Storer fileStorer a k (FileContent f) m = a k f m -fileStorer a k (ByteContent b) m = do - withTmpFile "tmpXXXXXX" $ \f h -> do +fileStorer a k (ByteContent b) m = withTmpFile "tmpXXXXXX" $ \f h -> do + liftIO $ do L.hPut h b hClose h - a k f m + a k f m -byteStorer :: (Key -> L.ByteString -> MeterUpdate -> IO Bool) -> Storer +byteStorer :: (Key -> L.ByteString -> MeterUpdate -> Annex Bool) -> Storer byteStorer a k c m = withBytes c $ \b -> a k b m -withBytes :: ContentSource -> (L.ByteString -> IO a) -> IO a +withBytes :: ContentSource -> (L.ByteString -> Annex a) -> Annex a withBytes (ByteContent b) a = a b -withBytes (FileContent f) a = a =<< L.readFile f +withBytes (FileContent f) a = a =<< liftIO (L.readFile f) -fileRetriever :: (Key -> IO FilePath) -> Retriever +fileRetriever :: (Key -> Annex FilePath) -> Retriever fileRetriever a k = FileContent <$> a k -byteRetriever :: (Key -> IO L.ByteString) -> Retriever +byteRetriever :: (Key -> Annex L.ByteString) -> Retriever byteRetriever a k = ByteContent <$> a k diff --git a/Utility/Gpg.hs b/Utility/Gpg.hs index a00bf99da5..410259b11b 100644 --- a/Utility/Gpg.hs +++ b/Utility/Gpg.hs @@ -11,14 +11,15 @@ module Utility.Gpg where import Control.Applicative import Control.Concurrent +import Control.Monad.IO.Class import qualified Data.Map as M +import Control.Monad.Catch (bracket, MonadMask) import Common import qualified Build.SysConfig as SysConfig #ifndef mingw32_HOST_OS import System.Posix.Types -import Control.Exception (bracket) import System.Path import Utility.Env #else @@ -104,18 +105,18 @@ pipeStrict params input = do - - Note that to avoid deadlock with the cleanup stage, - the reader must fully consume gpg's input before returning. -} -feedRead :: [CommandParam] -> String -> (Handle -> IO ()) -> (Handle -> IO a) -> IO a +feedRead :: (MonadIO m, MonadMask m) => [CommandParam] -> String -> (Handle -> IO ()) -> (Handle -> m a) -> m a feedRead params passphrase feeder reader = do #ifndef mingw32_HOST_OS -- pipe the passphrase into gpg on a fd - (frompipe, topipe) <- createPipe - void $ forkIO $ do + (frompipe, topipe) <- liftIO createPipe + liftIO $ void $ forkIO $ do toh <- fdToHandle topipe hPutStrLn toh passphrase hClose toh let Fd pfd = frompipe let passphrasefd = [Param "--passphrase-fd", Param $ show pfd] - closeFd frompipe `after` go (passphrasefd ++ params) + liftIO (closeFd frompipe) `after` go (passphrasefd ++ params) #else -- store the passphrase in a temp file for gpg withTmpFile "gpg" $ \tmpfile h -> do @@ -128,15 +129,24 @@ feedRead params passphrase feeder reader = do go params' = pipeLazy params' feeder reader {- Like feedRead, but without passphrase. -} -pipeLazy :: [CommandParam] -> (Handle -> IO ()) -> (Handle -> IO a) -> IO a +pipeLazy :: (MonadIO m, MonadMask m) => [CommandParam] -> (Handle -> IO ()) -> (Handle -> m a) -> m a pipeLazy params feeder reader = do - params' <- stdParams $ Param "--batch" : params - withBothHandles createProcessSuccess (proc gpgcmd params') - $ \(to, from) -> do - void $ forkIO $ do - feeder to - hClose to - reader from + params' <- liftIO $ stdParams $ Param "--batch" : params + let p = (proc gpgcmd params') + { std_in = CreatePipe + , std_out = CreatePipe + , std_err = Inherit + } + bracket (setup p) (cleanup p) go + where + setup = liftIO . createProcess + cleanup p (_, _, _, pid) = liftIO $ forceSuccessProcess p pid + go p = do + let (to, from) = bothHandles p + liftIO $ void $ forkIO $ do + feeder to + hClose to + reader from {- Finds gpg public keys matching some string. (Could be an email address, - a key id, or a name; See the section 'HOW TO SPECIFY A USER ID' of diff --git a/Utility/Process.hs b/Utility/Process.hs index 1f722af817..e25618eba7 100644 --- a/Utility/Process.hs +++ b/Utility/Process.hs @@ -31,6 +31,7 @@ module Utility.Process ( stdinHandle, stdoutHandle, stderrHandle, + bothHandles, processHandle, devNull, ) where diff --git a/Utility/Tmp.hs b/Utility/Tmp.hs index bed30bb4d4..7da5cc2847 100644 --- a/Utility/Tmp.hs +++ b/Utility/Tmp.hs @@ -9,11 +9,12 @@ module Utility.Tmp where -import Control.Exception (bracket) import System.IO import System.Directory import Control.Monad.IfElse import System.FilePath +import Control.Monad.IO.Class +import Control.Monad.Catch (bracket, MonadMask) import Utility.Exception import Utility.FileSystemEncoding @@ -42,18 +43,18 @@ viaTmp a file content = bracket setup cleanup use {- Runs an action with a tmp file located in the system's tmp directory - (or in "." if there is none) then removes the file. -} -withTmpFile :: Template -> (FilePath -> Handle -> IO a) -> IO a +withTmpFile :: (MonadIO m, MonadMask m) => Template -> (FilePath -> Handle -> m a) -> m a withTmpFile template a = do - tmpdir <- catchDefaultIO "." getTemporaryDirectory + tmpdir <- liftIO $ catchDefaultIO "." getTemporaryDirectory withTmpFileIn tmpdir template a {- Runs an action with a tmp file located in the specified directory, - then removes the file. -} -withTmpFileIn :: FilePath -> Template -> (FilePath -> Handle -> IO a) -> IO a +withTmpFileIn :: (MonadIO m, MonadMask m) => FilePath -> Template -> (FilePath -> Handle -> m a) -> m a withTmpFileIn tmpdir template a = bracket create remove use where - create = openTempFile tmpdir template - remove (name, handle) = do + create = liftIO $ openTempFile tmpdir template + remove (name, handle) = liftIO $ do hClose handle catchBoolIO (removeFile name >> return True) use (name, handle) = a name handle From 47e522979cebc92aa84bb3c9d123bc4daf1f30cc Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 29 Jul 2014 17:17:41 -0400 Subject: [PATCH 36/54] allow Retriever action to update the progress meter Needed for eg, Remote.External. Generally, any Retriever that stores content in a file is responsible for updating the meter, while ones that procude a lazy bytestring cannot update the meter, so are not asked to. --- Annex/Content.hs | 5 ++++- Remote/Directory.hs | 4 ++-- Remote/Helper/Chunked.hs | 33 +++++++++++++++++++---------- Remote/Helper/ChunkedEncryptable.hs | 9 ++++++-- Types/StoreRetrieve.hs | 24 ++++++++++----------- 5 files changed, 46 insertions(+), 29 deletions(-) diff --git a/Annex/Content.hs b/Annex/Content.hs index 8ad3d5e65e..6975f322f9 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -264,7 +264,10 @@ prepTmp key = do createAnnexDirectory (parentDir tmp) return tmp -{- Creates a temp file, runs an action on it, and cleans up the temp file. -} +{- Creates a temp file for a key, runs an action on it, and cleans up + - the temp file. If the action throws an exception, the temp file is + - left behind, which allows for resuming. + -} withTmp :: Key -> (FilePath -> Annex a) -> Annex a withTmp key action = do tmp <- prepTmp key diff --git a/Remote/Directory.hs b/Remote/Directory.hs index 9f27759659..37942a295d 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -137,8 +137,8 @@ store d chunkconfig k b p = liftIO $ do retrieve :: FilePath -> ChunkConfig -> Preparer Retriever retrieve d (LegacyChunks _) = Legacy.retrieve locations d -retrieve d _ = simplyPrepare $ byteRetriever $ - \k -> liftIO $ L.readFile =<< getLocation d k +retrieve d _ = simplyPrepare $ byteRetriever $ \k -> + liftIO $ L.readFile =<< getLocation d k retrieveCheap :: FilePath -> ChunkConfig -> Key -> FilePath -> Annex Bool -- no cheap retrieval possible for chunks diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs index ccdd352713..102ced8f44 100644 --- a/Remote/Helper/Chunked.hs +++ b/Remote/Helper/Chunked.hs @@ -221,7 +221,7 @@ retrieveChunks -> Key -> FilePath -> MeterUpdate - -> (Handle -> MeterUpdate -> L.ByteString -> IO ()) + -> (Handle -> Maybe MeterUpdate -> L.ByteString -> IO ()) -> Annex Bool retrieveChunks retriever u chunkconfig encryptor basek dest basep sink | noChunks chunkconfig = @@ -245,18 +245,18 @@ retrieveChunks retriever u chunkconfig encryptor basek dest basep sink firstavail _ [] = return False firstavail currsize ([]:ls) = firstavail currsize ls firstavail currsize ((k:ks):ls) = do - v <- tryNonAsyncAnnex $ retriever (encryptor k) + let offset = resumeOffset currsize k + let p = maybe basep + (offsetMeterUpdate basep . toBytesProcessed) + offset + v <- tryNonAsyncAnnex $ retriever (encryptor k) p case v of Left e | null ls -> giveup e | otherwise -> firstavail currsize ls Right content -> do - let offset = resumeOffset currsize k - let p = maybe basep - (offsetMeterUpdate basep . toBytesProcessed) - offset bracketIO (maybe opennew openresume offset) hClose $ \h -> do - withBytes content $ liftIO . sink h p + tosink h p content let sz = toBytesProcessed $ fromMaybe 0 $ keyChunkSize k getrest p h sz sz ks @@ -264,13 +264,11 @@ retrieveChunks retriever u chunkconfig encryptor basek dest basep sink getrest _ _ _ _ [] = return True getrest p h sz bytesprocessed (k:ks) = do let p' = offsetMeterUpdate p bytesprocessed - content <- retriever (encryptor k) - withBytes content $ liftIO . sink h p' + tosink h p' =<< retriever (encryptor k) p' getrest p h sz (addBytesProcessed bytesprocessed sz) ks getunchunked = bracketIO opennew hClose $ \h -> do - content <- retriever (encryptor basek) - withBytes content $ liftIO . sink h basep + tosink h basep =<< retriever (encryptor basek) basep return True opennew = openBinaryFile dest WriteMode @@ -282,6 +280,19 @@ retrieveChunks retriever u chunkconfig encryptor basek dest basep sink hSeek h AbsoluteSeek startpoint return h + {- Progress meter updating is a bit tricky: If the Retriever + - populates a file, it is responsible for updating progress + - as the file is being retrieved. + - + - However, if the Retriever generates a lazy ByteString, + - it is not responsible for updating progress (often it cannot). + - Instead, the sink is passed a meter to update as it consumes + - the ByteString. -} + tosink h p (ByteContent b) = liftIO $ + sink h (Just p) b + tosink h _ (FileContent f) = liftIO $ + sink h Nothing =<< L.readFile f + {- Can resume when the chunk's offset is at or before the end of - the dest file. -} resumeOffset :: Maybe Integer -> Key -> Maybe Integer diff --git a/Remote/Helper/ChunkedEncryptable.hs b/Remote/Helper/ChunkedEncryptable.hs index b851ecd94e..024a533094 100644 --- a/Remote/Helper/ChunkedEncryptable.hs +++ b/Remote/Helper/ChunkedEncryptable.hs @@ -14,6 +14,7 @@ module Remote.Helper.ChunkedEncryptable ( Storer, Retriever, simplyPrepare, + ContentSource, checkPrepare, fileStorer, byteStorer, @@ -36,6 +37,8 @@ import Remote.Helper.Encryptable as X import Annex.Content import Annex.Exception +import qualified Data.ByteString.Lazy as L + simplyPrepare :: helper -> Preparer helper simplyPrepare helper _ a = a $ Just helper @@ -101,8 +104,10 @@ chunkedEncryptableRemote c preparestorer prepareretriever baser = encr retrieveChunks retriever (uuid baser) chunkconfig enck k dest p' sink go Nothing = return False - sink h p' b = do - let write = meteredWrite p' h + sink h mp b = do + let write = case mp of + Just p' -> meteredWrite p' h + Nothing -> L.hPut h case enc of Nothing -> write b Just (cipher, _) -> diff --git a/Types/StoreRetrieve.hs b/Types/StoreRetrieve.hs index ccbf99e3f5..dfee207580 100644 --- a/Types/StoreRetrieve.hs +++ b/Types/StoreRetrieve.hs @@ -10,8 +10,8 @@ module Types.StoreRetrieve where import Common.Annex +import Annex.Content import Utility.Metered -import Utility.Tmp import qualified Data.ByteString.Lazy as L @@ -30,25 +30,23 @@ type Storer = Key -> ContentSource -> MeterUpdate -> Annex Bool -- Action that retrieves a Key's content from a remote. -- Throws exception if key is not present, or remote is not accessible. -type Retriever = Key -> Annex ContentSource +type Retriever = Key -> MeterUpdate -> Annex ContentSource fileStorer :: (Key -> FilePath -> MeterUpdate -> Annex Bool) -> Storer fileStorer a k (FileContent f) m = a k f m -fileStorer a k (ByteContent b) m = withTmpFile "tmpXXXXXX" $ \f h -> do - liftIO $ do - L.hPut h b - hClose h - a k f m +fileStorer a k (ByteContent b) m = withTmp k $ \tmp -> do + liftIO $ L.writeFile tmp b + a k tmp m byteStorer :: (Key -> L.ByteString -> MeterUpdate -> Annex Bool) -> Storer byteStorer a k c m = withBytes c $ \b -> a k b m +fileRetriever :: (Key -> MeterUpdate -> Annex FilePath) -> Retriever +fileRetriever a k m = FileContent <$> a k m + +byteRetriever :: (Key -> Annex L.ByteString) -> Retriever +byteRetriever a k _m = ByteContent <$> a k + withBytes :: ContentSource -> (L.ByteString -> Annex a) -> Annex a withBytes (ByteContent b) a = a b withBytes (FileContent f) a = a =<< liftIO (L.readFile f) - -fileRetriever :: (Key -> Annex FilePath) -> Retriever -fileRetriever a k = FileContent <$> a k - -byteRetriever :: (Key -> Annex L.ByteString) -> Retriever -byteRetriever a k = ByteContent <$> a k From bc9e4697b96ce0182aa43130cb421410a7aec343 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 29 Jul 2014 18:40:40 -0400 Subject: [PATCH 37/54] better type for Retriever Putting a callback in the Retriever type allows for the callback to remove the retrieved file when it's done with it. I did not really want to make Retriever be fixed to Annex Bool, but when I tried to use Annex a, I got into some type of type mess. --- Annex/Content.hs | 1 + Remote/Helper/Chunked.hs | 25 +++++++++++-------- Remote/Helper/ChunkedEncryptable.hs | 37 +++++++++++++++++++++++++---- Types/StoreRetrieve.hs | 27 ++++----------------- 4 files changed, 53 insertions(+), 37 deletions(-) diff --git a/Annex/Content.hs b/Annex/Content.hs index 6975f322f9..eb84f2fe9d 100644 --- a/Annex/Content.hs +++ b/Annex/Content.hs @@ -16,6 +16,7 @@ module Annex.Content ( getViaTmpChecked, getViaTmpUnchecked, prepGetViaTmpChecked, + prepTmp, withTmp, checkDiskSpace, moveAnnex, diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs index 102ced8f44..ae949abc37 100644 --- a/Remote/Helper/Chunked.hs +++ b/Remote/Helper/Chunked.hs @@ -249,26 +249,28 @@ retrieveChunks retriever u chunkconfig encryptor basek dest basep sink let p = maybe basep (offsetMeterUpdate basep . toBytesProcessed) offset - v <- tryNonAsyncAnnex $ retriever (encryptor k) p - case v of - Left e - | null ls -> giveup e - | otherwise -> firstavail currsize ls - Right content -> do + v <- tryNonAsyncAnnex $ + retriever (encryptor k) p $ \content -> bracketIO (maybe opennew openresume offset) hClose $ \h -> do tosink h p content let sz = toBytesProcessed $ fromMaybe 0 $ keyChunkSize k getrest p h sz sz ks + `catchNonAsyncAnnex` giveup + case v of + Left e + | null ls -> giveup e + | otherwise -> firstavail currsize ls + Right r -> return r getrest _ _ _ _ [] = return True getrest p h sz bytesprocessed (k:ks) = do let p' = offsetMeterUpdate p bytesprocessed - tosink h p' =<< retriever (encryptor k) p' + retriever (encryptor k) p' $ tosink h p' getrest p h sz (addBytesProcessed bytesprocessed sz) ks getunchunked = bracketIO opennew hClose $ \h -> do - tosink h basep =<< retriever (encryptor basek) basep + retriever (encryptor basek) basep $ tosink h basep return True opennew = openBinaryFile dest WriteMode @@ -288,10 +290,13 @@ retrieveChunks retriever u chunkconfig encryptor basek dest basep sink - it is not responsible for updating progress (often it cannot). - Instead, the sink is passed a meter to update as it consumes - the ByteString. -} - tosink h p (ByteContent b) = liftIO $ + tosink h p (ByteContent b) = liftIO $ do sink h (Just p) b - tosink h _ (FileContent f) = liftIO $ + return True + tosink h _ (FileContent f) = liftIO $ do sink h Nothing =<< L.readFile f + nukeFile h + return True {- Can resume when the chunk's offset is at or before the end of - the dest file. -} diff --git a/Remote/Helper/ChunkedEncryptable.hs b/Remote/Helper/ChunkedEncryptable.hs index 024a533094..550a6934bd 100644 --- a/Remote/Helper/ChunkedEncryptable.hs +++ b/Remote/Helper/ChunkedEncryptable.hs @@ -6,8 +6,6 @@ -} {-# LANGUAGE RankNTypes #-} -{-# LANGUAGE ImpredicativeTypes #-} -{-# LANGUAGE FlexibleContexts #-} module Remote.Helper.ChunkedEncryptable ( Preparer, @@ -39,17 +37,48 @@ import Annex.Exception import qualified Data.ByteString.Lazy as L +-- Use when nothing needs to be done to prepare a helper. simplyPrepare :: helper -> Preparer helper simplyPrepare helper _ a = a $ Just helper +-- Use to run a check when preparing a helper. checkPrepare :: (Key -> Annex Bool) -> helper -> Preparer helper checkPrepare checker helper k a = ifM (checker k) ( a (Just helper) , a Nothing ) -{- Modifies a base Remote to support both chunking and encryption. - -} +-- A Storer that expects to be provided with a file containing +-- the content of the key to store. +fileStorer :: (Key -> FilePath -> MeterUpdate -> Annex Bool) -> Storer +fileStorer a k (FileContent f) m = a k f m +fileStorer a k (ByteContent b) m = withTmp k $ \f -> do + liftIO $ L.writeFile f b + a k f m + +-- A Storer that expects to be provided with a L.ByteString of +-- the content to store. +byteStorer :: (Key -> L.ByteString -> MeterUpdate -> Annex Bool) -> Storer +byteStorer a k c m = withBytes c $ \b -> a k b m + +-- A Retriever that writes the content of a Key to a provided file. +-- It is responsible for updating the progress meter as it retrieves data. +fileRetriever :: (FilePath -> Key -> MeterUpdate -> Annex ()) -> Retriever +fileRetriever a k m callback = bracketAnnex (prepTmp k) (liftIO . nukeFile) go + where + go f = do + a f k m + callback (FileContent f) + +-- A Retriever that generates a L.ByteString containing the Key's content. +byteRetriever :: (Key -> Annex L.ByteString) -> Retriever +byteRetriever a k _m callback = callback =<< (ByteContent <$> a k) + +withBytes :: ContentSource -> (L.ByteString -> Annex a) -> Annex a +withBytes (ByteContent b) a = a b +withBytes (FileContent f) a = a =<< liftIO (L.readFile f) + +-- Modifies a base Remote to support both chunking and encryption. chunkedEncryptableRemote :: RemoteConfig -> Preparer Storer diff --git a/Types/StoreRetrieve.hs b/Types/StoreRetrieve.hs index dfee207580..0ee2fd5019 100644 --- a/Types/StoreRetrieve.hs +++ b/Types/StoreRetrieve.hs @@ -1,4 +1,4 @@ -{- Types for Storer and Retriever +{- Types for Storer and Retriever actions for remotes. - - Copyright 2014 Joey Hess - @@ -10,7 +10,6 @@ module Types.StoreRetrieve where import Common.Annex -import Annex.Content import Utility.Metered import qualified Data.ByteString.Lazy as L @@ -28,25 +27,7 @@ data ContentSource -- Can throw exceptions. type Storer = Key -> ContentSource -> MeterUpdate -> Annex Bool --- Action that retrieves a Key's content from a remote. +-- Action that retrieves a Key's content from a remote, passing it to a +-- callback. -- Throws exception if key is not present, or remote is not accessible. -type Retriever = Key -> MeterUpdate -> Annex ContentSource - -fileStorer :: (Key -> FilePath -> MeterUpdate -> Annex Bool) -> Storer -fileStorer a k (FileContent f) m = a k f m -fileStorer a k (ByteContent b) m = withTmp k $ \tmp -> do - liftIO $ L.writeFile tmp b - a k tmp m - -byteStorer :: (Key -> L.ByteString -> MeterUpdate -> Annex Bool) -> Storer -byteStorer a k c m = withBytes c $ \b -> a k b m - -fileRetriever :: (Key -> MeterUpdate -> Annex FilePath) -> Retriever -fileRetriever a k m = FileContent <$> a k m - -byteRetriever :: (Key -> Annex L.ByteString) -> Retriever -byteRetriever a k _m = ByteContent <$> a k - -withBytes :: ContentSource -> (L.ByteString -> Annex a) -> Annex a -withBytes (ByteContent b) a = a b -withBytes (FileContent f) a = a =<< liftIO (L.readFile f) +type Retriever = Key -> MeterUpdate -> (ContentSource -> Annex Bool) -> Annex Bool From c0dc134cded6078bb2e5fa2d4420b9cc09a292f7 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 29 Jul 2014 18:47:26 -0400 Subject: [PATCH 38/54] support chunking for all external special remotes! Removing code and at the same time adding great features, including upload/download resuming. This commit was sponsored by Romain Lenglet. --- Remote/External.hs | 60 ++++++++-------------------------------- Remote/Helper/Chunked.hs | 2 +- debian/changelog | 3 +- 3 files changed, 15 insertions(+), 50 deletions(-) diff --git a/Remote/External.hs b/Remote/External.hs index 464e9b57e9..1c22a589bf 100644 --- a/Remote/External.hs +++ b/Remote/External.hs @@ -15,14 +15,12 @@ import Types.CleanupActions import qualified Git import Config import Remote.Helper.Special -import Remote.Helper.Encryptable -import Crypto +import Remote.Helper.ChunkedEncryptable import Utility.Metered import Logs.Transfer import Logs.PreferredContent.Raw import Logs.RemoteState import Config.Cost -import Annex.Content import Annex.UUID import Annex.Exception import Creds @@ -30,7 +28,6 @@ import Creds import Control.Concurrent.STM import System.Log.Logger (debugM) import qualified Data.Map as M -import qualified Data.ByteString.Lazy as L remote :: RemoteType remote = RemoteType { @@ -46,15 +43,15 @@ gen r u c gc = do Annex.addCleanup (RemoteCleanup u) $ stopExternal external cst <- getCost external r gc avail <- getAvailability external r gc - return $ Just $ encryptableRemote c - (storeEncrypted external $ getGpgEncParams (c,gc)) - (retrieveEncrypted external) + return $ Just $ chunkedEncryptableRemote c + (simplyPrepare $ store external) + (simplyPrepare $ retrieve external) Remote { uuid = u, cost = cst, name = Git.repoDescribe r, - storeKey = store external, - retrieveKeyFile = retrieve external, + storeKey = storeKeyDummy, + retrieveKeyFile = retreiveKeyFileDummy, retrieveKeyFileCheap = \_ _ -> return False, removeKey = remove external, hasKey = checkPresent external, @@ -90,25 +87,8 @@ externalSetup mu _ c = do gitConfigSpecialRemote u c'' "externaltype" externaltype return (c'', u) -store :: External -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool -store external k _f p = sendAnnex k rollback $ \f -> - metered (Just p) k $ - storeHelper external k f - where - rollback = void $ remove external k - -storeEncrypted :: External -> [CommandParam] -> (Cipher, Key) -> Key -> MeterUpdate -> Annex Bool -storeEncrypted external gpgOpts (cipher, enck) k p = withTmp enck $ \tmp -> - sendAnnex k rollback $ \src -> do - metered (Just p) k $ \meterupdate -> do - liftIO $ encrypt gpgOpts cipher (feedFile src) $ - readBytes $ L.writeFile tmp - storeHelper external enck tmp meterupdate - where - rollback = void $ remove external enck - -storeHelper :: External -> Key -> FilePath -> MeterUpdate -> Annex Bool -storeHelper external k f p = safely $ +store :: External -> Storer +store external = fileStorer $ \k f p -> handleRequest external (TRANSFER Upload k f) (Just p) $ \resp -> case resp of TRANSFER_SUCCESS Upload k' | k == k' -> @@ -119,31 +99,15 @@ storeHelper external k f p = safely $ return False _ -> Nothing -retrieve :: External -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool -retrieve external k _f d p = metered (Just p) k $ - retrieveHelper external k d - -retrieveEncrypted :: External -> (Cipher, Key) -> Key -> FilePath -> MeterUpdate -> Annex Bool -retrieveEncrypted external (cipher, enck) k f p = withTmp enck $ \tmp -> - metered (Just p) k $ \meterupdate -> - ifM (retrieveHelper external enck tmp meterupdate) - ( liftIO $ catchBoolIO $ do - decrypt cipher (feedFile tmp) $ - readBytes $ L.writeFile f - return True - , return False - ) - -retrieveHelper :: External -> Key -> FilePath -> MeterUpdate -> Annex Bool -retrieveHelper external k d p = safely $ +retrieve :: External -> Retriever +retrieve external = fileRetriever $ \d k p -> handleRequest external (TRANSFER Download k d) (Just p) $ \resp -> case resp of TRANSFER_SUCCESS Download k' - | k == k' -> Just $ return True + | k == k' -> Just $ return () TRANSFER_FAILURE Download k' errmsg | k == k' -> Just $ do - warning errmsg - return False + error errmsg _ -> Nothing remove :: External -> Key -> Annex Bool diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs index ae949abc37..b7522aa89c 100644 --- a/Remote/Helper/Chunked.hs +++ b/Remote/Helper/Chunked.hs @@ -295,7 +295,7 @@ retrieveChunks retriever u chunkconfig encryptor basek dest basep sink return True tosink h _ (FileContent f) = liftIO $ do sink h Nothing =<< L.readFile f - nukeFile h + nukeFile f return True {- Can resume when the chunk's offset is at or before the end of diff --git a/debian/changelog b/debian/changelog index c85247b69a..32704de56f 100644 --- a/debian/changelog +++ b/debian/changelog @@ -1,6 +1,7 @@ git-annex (5.20140718) UNRELEASED; urgency=medium - * New chunk= option to chunk files stored in directory remotes. + * New chunk= option to chunk files stored in special remotes. + Currently supported by: directory, and all external special remotes. * Partially transferred files are automatically resumed when using chunked remotes! * The old chunksize= option is deprecated. Do not use for new remotes. From 53b87a859e98ef545e03d60f77fc4e552fe5f893 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 29 Jul 2014 20:10:14 -0400 Subject: [PATCH 39/54] optimise case of remote that retrieves FileContent, when chunks and encryption are not being used No need to read whole FileContent only to write it back out to a file in this case. Can just rename! Yay. Also indidentially, fixed an attempt to open a file for write that was already opened for write, which caused a crash and deadlock. --- Remote/Helper/Chunked.hs | 63 ++++++++++++++------------- Remote/Helper/ChunkedEncryptable.hs | 67 +++++++++++++++++++++-------- Types/StoreRetrieve.hs | 4 ++ 3 files changed, 84 insertions(+), 50 deletions(-) diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs index b7522aa89c..00c089e802 100644 --- a/Remote/Helper/Chunked.hs +++ b/Remote/Helper/Chunked.hs @@ -221,7 +221,7 @@ retrieveChunks -> Key -> FilePath -> MeterUpdate - -> (Handle -> Maybe MeterUpdate -> L.ByteString -> IO ()) + -> (Maybe Handle -> Maybe MeterUpdate -> ContentSource -> Annex Bool) -> Annex Bool retrieveChunks retriever u chunkconfig encryptor basek dest basep sink | noChunks chunkconfig = @@ -244,34 +244,37 @@ retrieveChunks retriever u chunkconfig encryptor basek dest basep sink firstavail _ [] = return False firstavail currsize ([]:ls) = firstavail currsize ls - firstavail currsize ((k:ks):ls) = do - let offset = resumeOffset currsize k - let p = maybe basep - (offsetMeterUpdate basep . toBytesProcessed) - offset - v <- tryNonAsyncAnnex $ - retriever (encryptor k) p $ \content -> - bracketIO (maybe opennew openresume offset) hClose $ \h -> do - tosink h p content - let sz = toBytesProcessed $ - fromMaybe 0 $ keyChunkSize k - getrest p h sz sz ks - `catchNonAsyncAnnex` giveup - case v of - Left e - | null ls -> giveup e - | otherwise -> firstavail currsize ls - Right r -> return r + firstavail currsize ((k:ks):ls) + | k == basek = getunchunked + `catchNonAsyncAnnex` (const $ firstavail currsize ls) + | otherwise = do + let offset = resumeOffset currsize k + let p = maybe basep + (offsetMeterUpdate basep . toBytesProcessed) + offset + v <- tryNonAsyncAnnex $ + retriever (encryptor k) p $ \content -> + bracketIO (maybe opennew openresume offset) hClose $ \h -> do + void $ tosink (Just h) p content + let sz = toBytesProcessed $ + fromMaybe 0 $ keyChunkSize k + getrest p h sz sz ks + `catchNonAsyncAnnex` giveup + case v of + Left e + | null ls -> giveup e + | otherwise -> firstavail currsize ls + Right r -> return r getrest _ _ _ _ [] = return True getrest p h sz bytesprocessed (k:ks) = do let p' = offsetMeterUpdate p bytesprocessed - retriever (encryptor k) p' $ tosink h p' - getrest p h sz (addBytesProcessed bytesprocessed sz) ks + ifM (retriever (encryptor k) p' $ tosink (Just h) p') + ( getrest p h sz (addBytesProcessed bytesprocessed sz) ks + , giveup "chunk retrieval failed" + ) - getunchunked = bracketIO opennew hClose $ \h -> do - retriever (encryptor basek) basep $ tosink h basep - return True + getunchunked = retriever (encryptor basek) basep $ tosink Nothing basep opennew = openBinaryFile dest WriteMode @@ -290,13 +293,11 @@ retrieveChunks retriever u chunkconfig encryptor basek dest basep sink - it is not responsible for updating progress (often it cannot). - Instead, the sink is passed a meter to update as it consumes - the ByteString. -} - tosink h p (ByteContent b) = liftIO $ do - sink h (Just p) b - return True - tosink h _ (FileContent f) = liftIO $ do - sink h Nothing =<< L.readFile f - nukeFile f - return True + tosink h p content = sink h p' content + where + p' + | isByteContent content = Just p + | otherwise = Nothing {- Can resume when the chunk's offset is at or before the end of - the dest file. -} diff --git a/Remote/Helper/ChunkedEncryptable.hs b/Remote/Helper/ChunkedEncryptable.hs index 550a6934bd..ca73802bae 100644 --- a/Remote/Helper/ChunkedEncryptable.hs +++ b/Remote/Helper/ChunkedEncryptable.hs @@ -18,9 +18,9 @@ module Remote.Helper.ChunkedEncryptable ( byteStorer, fileRetriever, byteRetriever, - chunkedEncryptableRemote, storeKeyDummy, retreiveKeyFileDummy, + chunkedEncryptableRemote, module X ) where @@ -36,6 +36,7 @@ import Annex.Content import Annex.Exception import qualified Data.ByteString.Lazy as L +import Control.Exception (bracket) -- Use when nothing needs to be done to prepare a helper. simplyPrepare :: helper -> Preparer helper @@ -78,6 +79,16 @@ withBytes :: ContentSource -> (L.ByteString -> Annex a) -> Annex a withBytes (ByteContent b) a = a b withBytes (FileContent f) a = a =<< liftIO (L.readFile f) +{- The base Remote that is provided to chunkedEncryptableRemote + - needs to have storeKey and retreiveKeyFile methods, but they are + - never actually used (since chunkedEncryptableRemote replaces + - them). Here are some dummy ones. + -} +storeKeyDummy :: Key -> AssociatedFile -> MeterUpdate -> Annex Bool +storeKeyDummy _ _ _ = return False +retreiveKeyFileDummy :: Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool +retreiveKeyFileDummy _ _ _ _ = return False + -- Modifies a base Remote to support both chunking and encryption. chunkedEncryptableRemote :: RemoteConfig @@ -131,17 +142,8 @@ chunkedEncryptableRemote c preparestorer prepareretriever baser = encr where go (Just retriever) = metered (Just p) k $ \p' -> retrieveChunks retriever (uuid baser) chunkconfig - enck k dest p' sink + enck k dest p' (sink dest enc) go Nothing = return False - sink h mp b = do - let write = case mp of - Just p' -> meteredWrite p' h - Nothing -> L.hPut h - case enc of - Nothing -> write b - Just (cipher, _) -> - decrypt cipher (feedBytes b) $ - readBytes write enck = maybe id snd enc removeKeyGen k enc = removeChunks remover (uuid baser) chunkconfig enck k @@ -154,12 +156,39 @@ chunkedEncryptableRemote c preparestorer prepareretriever baser = encr enck = maybe id snd enc checker = hasKey baser -{- The base Remote that is provided to chunkedEncryptableRemote - - needs to have storeKey and retreiveKeyFile methods, but they are - - never actually used (since chunkedEncryptableRemote replaces - - them). Here are some dummy ones. +{- Sink callback for retrieveChunks. Stores the file content into the + - provided Handle, decrypting it first if necessary. + - + - If the remote did not store the content using chunks, no Handle + - will be provided, and it's up to us to open the destination file. + - + - Note that when neither chunking nor encryption is used, and the remote + - provides FileContent, that file only needs to be renamed + - into place. (And it may even already be in the right place..) -} -storeKeyDummy :: Key -> AssociatedFile -> MeterUpdate -> Annex Bool -storeKeyDummy _ _ _ = return False -retreiveKeyFileDummy :: Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex Bool -retreiveKeyFileDummy _ _ _ _ = return False +sink + :: FilePath + -> Maybe (Cipher, EncKey) + -> Maybe Handle + -> Maybe MeterUpdate + -> ContentSource + -> Annex Bool +sink dest enc mh mp content = do + case (enc, mh, content) of + (Nothing, Nothing, FileContent f) + | f == dest -> noop + | otherwise -> liftIO $ moveFile f dest + (Just (cipher, _), _, _) -> + withBytes content $ \b -> + decrypt cipher (feedBytes b) $ + readBytes write + (Nothing, _, _) -> withBytes content write + return True + where + write b = case mh of + Just h -> liftIO $ b `streamto` h + Nothing -> liftIO $ bracket opendest hClose (b `streamto`) + streamto b h = case mp of + Just p -> meteredWrite p h b + Nothing -> L.hPut h b + opendest = openBinaryFile dest WriteMode diff --git a/Types/StoreRetrieve.hs b/Types/StoreRetrieve.hs index 0ee2fd5019..33f66efb19 100644 --- a/Types/StoreRetrieve.hs +++ b/Types/StoreRetrieve.hs @@ -23,6 +23,10 @@ data ContentSource = FileContent FilePath | ByteContent L.ByteString +isByteContent :: ContentSource -> Bool +isByteContent (ByteContent _) = True +isByteContent (FileContent _) = False + -- Action that stores a Key's content on a remote. -- Can throw exceptions. type Storer = Key -> ContentSource -> MeterUpdate -> Annex Bool From 444944c7a9a0921c51e99948d2a78f0e5fda87b3 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 29 Jul 2014 20:27:13 -0400 Subject: [PATCH 40/54] fix cleanup of FileContents once done when them when retrieving --- Remote/Helper/ChunkedEncryptable.hs | 28 +++++++++++++++++----------- 1 file changed, 17 insertions(+), 11 deletions(-) diff --git a/Remote/Helper/ChunkedEncryptable.hs b/Remote/Helper/ChunkedEncryptable.hs index ca73802bae..2a844212b5 100644 --- a/Remote/Helper/ChunkedEncryptable.hs +++ b/Remote/Helper/ChunkedEncryptable.hs @@ -65,20 +65,15 @@ byteStorer a k c m = withBytes c $ \b -> a k b m -- A Retriever that writes the content of a Key to a provided file. -- It is responsible for updating the progress meter as it retrieves data. fileRetriever :: (FilePath -> Key -> MeterUpdate -> Annex ()) -> Retriever -fileRetriever a k m callback = bracketAnnex (prepTmp k) (liftIO . nukeFile) go - where - go f = do - a f k m - callback (FileContent f) +fileRetriever a k m callback = do + f <- prepTmp k + a f k m + callback (FileContent f) -- A Retriever that generates a L.ByteString containing the Key's content. byteRetriever :: (Key -> Annex L.ByteString) -> Retriever byteRetriever a k _m callback = callback =<< (ByteContent <$> a k) -withBytes :: ContentSource -> (L.ByteString -> Annex a) -> Annex a -withBytes (ByteContent b) a = a b -withBytes (FileContent f) a = a =<< liftIO (L.readFile f) - {- The base Remote that is provided to chunkedEncryptableRemote - needs to have storeKey and retreiveKeyFile methods, but they are - never actually used (since chunkedEncryptableRemote replaces @@ -178,11 +173,18 @@ sink dest enc mh mp content = do (Nothing, Nothing, FileContent f) | f == dest -> noop | otherwise -> liftIO $ moveFile f dest - (Just (cipher, _), _, _) -> + (Just (cipher, _), _, ByteContent b) -> + decrypt cipher (feedBytes b) $ + readBytes write + (Just (cipher, _), _, FileContent f) -> do withBytes content $ \b -> decrypt cipher (feedBytes b) $ readBytes write - (Nothing, _, _) -> withBytes content write + liftIO $ nukeFile f + (Nothing, _, FileContent f) -> do + withBytes content write + liftIO $ nukeFile f + (Nothing, _, ByteContent b) -> write b return True where write b = case mh of @@ -192,3 +194,7 @@ sink dest enc mh mp content = do Just p -> meteredWrite p h b Nothing -> L.hPut h b opendest = openBinaryFile dest WriteMode + +withBytes :: ContentSource -> (L.ByteString -> Annex a) -> Annex a +withBytes (ByteContent b) a = a b +withBytes (FileContent f) a = a =<< liftIO (L.readFile f) From a963d790d3a240b226731c5d7cd1b339b77d1209 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 29 Jul 2014 20:31:16 -0400 Subject: [PATCH 41/54] update progress after each chunk, at least This way, when the remote implementation neglects to update progress, there will still be a somewhat useful progress display, as long as chunks are used. --- Remote/Helper/Chunked.hs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs index 00c089e802..d9ab3c3e63 100644 --- a/Remote/Helper/Chunked.hs +++ b/Remote/Helper/Chunked.hs @@ -127,6 +127,7 @@ storeChunks u chunkconfig k f p storer checker = chunksStored u k (FixedSizeChunks chunksize) numchunks return True | otherwise = do + liftIO $ meterupdate' zeroBytesProcessed let (chunkkey, chunkkeys') = nextChunkKeyStream chunkkeys ifM (storer chunkkey (ByteContent chunk) meterupdate') ( do @@ -269,6 +270,7 @@ retrieveChunks retriever u chunkconfig encryptor basek dest basep sink getrest _ _ _ _ [] = return True getrest p h sz bytesprocessed (k:ks) = do let p' = offsetMeterUpdate p bytesprocessed + liftIO $ p' zeroBytesProcessed ifM (retriever (encryptor k) p' $ tosink (Just h) p') ( getrest p h sz (addBytesProcessed bytesprocessed sz) ks , giveup "chunk retrieval failed" @@ -292,7 +294,8 @@ retrieveChunks retriever u chunkconfig encryptor basek dest basep sink - However, if the Retriever generates a lazy ByteString, - it is not responsible for updating progress (often it cannot). - Instead, the sink is passed a meter to update as it consumes - - the ByteString. -} + - the ByteString. + -} tosink h p content = sink h p' content where p' From 89416ba2d925a3bcdc38696bcbde62d3c5065af3 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 30 Jul 2014 10:34:39 -0400 Subject: [PATCH 42/54] only chunk stable keys The content of unstable keys can potentially be different in different repos, so eg, resuming a chunked upload started by another repo would corrupt data. --- Backend.hs | 6 +++--- Remote/Helper/Chunked.hs | 3 ++- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/Backend.hs b/Backend.hs index 99752c66ff..0fcaaa7ed8 100644 --- a/Backend.hs +++ b/Backend.hs @@ -15,7 +15,7 @@ module Backend ( chooseBackend, lookupBackendName, maybeLookupBackendName, - checkStableKey, + isStableKey, ) where import Common.Annex @@ -126,6 +126,6 @@ maybeLookupBackendName s = M.lookup s nameMap nameMap :: M.Map String Backend nameMap = M.fromList $ zip (map B.name list) list -checkStableKey :: Key -> Bool -checkStableKey k = maybe False (`B.isStableKey` k) +isStableKey :: Key -> Bool +isStableKey k = maybe False (`B.isStableKey` k) (maybeLookupBackendName (keyBackendName k)) diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs index d9ab3c3e63..fcfe06b20d 100644 --- a/Remote/Helper/Chunked.hs +++ b/Remote/Helper/Chunked.hs @@ -23,6 +23,7 @@ import Types.Key import Logs.Chunk import Utility.Metered import Crypto (EncKey) +import Backend (isStableKey) import Annex.Exception import qualified Data.ByteString.Lazy as L @@ -95,7 +96,7 @@ storeChunks -> Annex Bool storeChunks u chunkconfig k f p storer checker = case chunkconfig of - (UnpaddedChunks chunksize) -> + (UnpaddedChunks chunksize) | isStableKey k -> bracketIO open close (go chunksize) _ -> showprogress $ storer k (FileContent f) where From b5ac627fee94860f9fc4356e8b58e7aa1185f6d5 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 30 Jul 2014 11:19:05 -0400 Subject: [PATCH 43/54] WebDAV: Dropped support for DAV before 0.6.1. 0.6.1 is in testing, and stable does not have DAV at all, so I can dispense with this compatability code --- Remote/WebDAV.hs | 45 ++------------------------------------------- debian/changelog | 1 + git-annex.cabal | 2 +- 3 files changed, 4 insertions(+), 44 deletions(-) diff --git a/Remote/WebDAV.hs b/Remote/WebDAV.hs index 31e4225e40..d6644cdc70 100644 --- a/Remote/WebDAV.hs +++ b/Remote/WebDAV.hs @@ -5,7 +5,7 @@ - Licensed under the GNU GPL version 3 or higher. -} -{-# LANGUAGE ScopedTypeVariables, CPP #-} +{-# LANGUAGE ScopedTypeVariables #-} module Remote.WebDAV (remote, davCreds, configUrl) where @@ -16,11 +16,7 @@ import qualified Data.ByteString.Lazy.UTF8 as L8 import qualified Data.ByteString.Lazy as L import qualified Control.Exception as E import qualified Control.Exception.Lifted as EL -#if MIN_VERSION_DAV(0,6,0) import Network.HTTP.Client (HttpException(..)) -#else -import Network.HTTP.Conduit (HttpException(..)) -#endif import Network.HTTP.Types import System.Log.Logger (debugM) import System.IO.Error @@ -308,57 +304,37 @@ debugDAV :: DavUrl -> String -> IO () debugDAV msg url = debugM "DAV" $ msg ++ " " ++ url {--------------------------------------------------------------------- - - Low-level DAV operations, using the new DAV monad when available. + - Low-level DAV operations. ---------------------------------------------------------------------} putDAV :: DavUrl -> DavUser -> DavPass -> L.ByteString -> IO () putDAV url user pass b = do debugDAV "PUT" url -#if MIN_VERSION_DAV(0,6,0) goDAV url user pass $ putContentM (contentType, b) -#else - putContent url user pass (contentType, b) -#endif getDAV :: DavUrl -> DavUser -> DavPass -> IO (Maybe L.ByteString) getDAV url user pass = do debugDAV "GET" url eitherToMaybe <$> tryNonAsync go where -#if MIN_VERSION_DAV(0,6,0) go = goDAV url user pass $ snd <$> getContentM -#else - go = snd . snd <$> getPropsAndContent url user pass -#endif deleteDAV :: DavUrl -> DavUser -> DavPass -> IO () deleteDAV url user pass = do debugDAV "DELETE" url -#if MIN_VERSION_DAV(0,6,0) goDAV url user pass delContentM -#else - deleteContent url user pass -#endif moveDAV :: DavUrl -> DavUrl -> DavUser -> DavPass -> IO () moveDAV url newurl user pass = do debugDAV ("MOVE to " ++ newurl ++ " from ") url -#if MIN_VERSION_DAV(0,6,0) goDAV url user pass $ moveContentM newurl' -#else - moveContent url newurl' user pass -#endif where newurl' = B8.fromString newurl mkdirDAV :: DavUrl -> DavUser -> DavPass -> IO Bool mkdirDAV url user pass = do debugDAV "MKDIR" url -#if MIN_VERSION_DAV(0,6,0) goDAV url user pass mkCol -#else - makeCollection url user pass -#endif existsDAV :: DavUrl -> DavUser -> DavPass -> IO (Either String Bool) existsDAV url user pass = do @@ -366,35 +342,19 @@ existsDAV url user pass = do either (Left . show) id <$> tryNonAsync check where ispresent = return . Right -#if MIN_VERSION_DAV(0,6,0) check = goDAV url user pass $ do setDepth Nothing EL.catchJust (matchStatusCodeException notFound404) (getPropsM >> ispresent True) (const $ ispresent False) -#else - check = E.catchJust - (matchStatusCodeException notFound404) -#if ! MIN_VERSION_DAV(0,4,0) - (getProps url user pass >> ispresent True) -#else - (getProps url user pass Nothing >> ispresent True) -#endif - (const $ ispresent False) -#endif matchStatusCodeException :: Status -> HttpException -> Maybe () -#if MIN_VERSION_DAV(0,6,0) matchStatusCodeException want (StatusCodeException s _ _) -#else -matchStatusCodeException want (StatusCodeException s _) -#endif | s == want = Just () | otherwise = Nothing matchStatusCodeException _ _ = Nothing -#if MIN_VERSION_DAV(0,6,0) goDAV :: DavUrl -> DavUser -> DavPass -> DAVT IO a -> IO a goDAV url user pass a = choke $ evalDAVT url $ do setResponseTimeout Nothing -- disable default (5 second!) timeout @@ -407,4 +367,3 @@ goDAV url user pass a = choke $ evalDAVT url $ do case x of Left e -> error e Right r -> return r -#endif diff --git a/debian/changelog b/debian/changelog index 32704de56f..bacd890801 100644 --- a/debian/changelog +++ b/debian/changelog @@ -14,6 +14,7 @@ git-annex (5.20140718) UNRELEASED; urgency=medium Fix this, including support for fixing up repositories that were incompletely repaired before. * Fix cost calculation for non-encrypted remotes. + * WebDAV: Dropped support for DAV before 0.6.1. -- Joey Hess Mon, 21 Jul 2014 14:41:26 -0400 diff --git a/git-annex.cabal b/git-annex.cabal index ba23d281ec..0d0d979ea0 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -142,7 +142,7 @@ Executable git-annex CPP-Options: -DWITH_S3 if flag(WebDAV) - Build-Depends: DAV ((>= 0.3 && < 0.6) || > 0.6), + Build-Depends: DAV (> 0.6), http-client, http-conduit, http-types, lifted-base CPP-Options: -DWITH_WEBDAV From 5848793d6b36e5ce57729a60659149d6723f1836 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 30 Jul 2014 12:04:00 -0400 Subject: [PATCH 44/54] chunk size advice --- doc/chunking.mdwn | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) diff --git a/doc/chunking.mdwn b/doc/chunking.mdwn index d1dce317c0..87408f8e12 100644 --- a/doc/chunking.mdwn +++ b/doc/chunking.mdwn @@ -9,15 +9,22 @@ Chunking also allows for resuming interrupted downloads and uploads. Note that git-annex has to buffer chunks in memory before they are sent to a remote. So, using a large chunk size will make it use more memory. -To enable chunking, pass a `chunk=XXmb` parameter to `git annex -initremote`. +To enable chunking, pass a `chunk=nnMiB` parameter to `git annex +initremote, specifying the chunk size. + +Good chunk sizes will depend on the remote, but a good starting place +is probably `1MiB`. Very large chunks are problimatic, both because +git-annex needs to buffer one chunk in memory when uploading, and because +a larger chunk will make resuming interrupted transfers less efficient. +On the other hand, when a file is split into a great many chunks, +there can be increased overhead of making many requests to the remote. To disable chunking of a remote that was using chunking, pass `chunk=0` to `git annex enableremote`. Any content already stored on the remote using chunks will continue to be accessed via chunks, this just prevents using chunks when storing new content. -To change the chunk size, pass a `chunk=XXmb` parameter to +To change the chunk size, pass a `chunk=nnMiB` parameter to `git annex enableremote`. This only affects the chunk sized used when storing new content. From c03e1c56482a84323883cafe060f1b9bc228a6de Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 1 Aug 2014 12:49:26 -0400 Subject: [PATCH 45/54] add new section for testing commands --- Command/FuzzTest.hs | 2 +- Command/Test.hs | 2 +- Types/Command.hs | 2 ++ debian/changelog | 1 + doc/git-annex.mdwn | 21 +++++++++++++++------ 5 files changed, 20 insertions(+), 8 deletions(-) diff --git a/Command/FuzzTest.hs b/Command/FuzzTest.hs index 08103edc87..d673541fbb 100644 --- a/Command/FuzzTest.hs +++ b/Command/FuzzTest.hs @@ -22,7 +22,7 @@ import Test.QuickCheck import Control.Concurrent def :: [Command] -def = [ notBareRepo $ command "fuzztest" paramNothing seek SectionPlumbing +def = [ notBareRepo $ command "fuzztest" paramNothing seek SectionTesting "generates fuzz test files"] seek :: CommandSeek diff --git a/Command/Test.hs b/Command/Test.hs index ee72201423..08e9d1b6ee 100644 --- a/Command/Test.hs +++ b/Command/Test.hs @@ -13,7 +13,7 @@ import Messages def :: [Command] def = [ noRepo startIO $ dontCheck repoExists $ - command "test" paramNothing seek SectionPlumbing + command "test" paramNothing seek SectionTesting "run built-in test suite"] seek :: CommandSeek diff --git a/Types/Command.hs b/Types/Command.hs index 0df7c82e63..1f84561940 100644 --- a/Types/Command.hs +++ b/Types/Command.hs @@ -69,6 +69,7 @@ data CommandSection | SectionMetaData | SectionUtility | SectionPlumbing + | SectionTesting deriving (Eq, Ord, Enum, Bounded) descSection :: CommandSection -> String @@ -79,3 +80,4 @@ descSection SectionQuery = "Query commands" descSection SectionMetaData = "Metadata commands" descSection SectionUtility = "Utility commands" descSection SectionPlumbing = "Plumbing commands" +descSection SectionTesting = "Testing commands" diff --git a/debian/changelog b/debian/changelog index bacd890801..eb399dfee5 100644 --- a/debian/changelog +++ b/debian/changelog @@ -15,6 +15,7 @@ git-annex (5.20140718) UNRELEASED; urgency=medium were incompletely repaired before. * Fix cost calculation for non-encrypted remotes. * WebDAV: Dropped support for DAV before 0.6.1. + * testremote: New command. -- Joey Hess Mon, 21 Jul 2014 14:41:26 -0400 diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 8ba3558d32..9158b54e00 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -949,12 +949,6 @@ subdirectories). Merge conflicts between two files that are not annexed will not be automatically resolved. -* `test` - - This runs git-annex's built-in test suite. - - There are several parameters, provided by Haskell's tasty test framework. - * `remotedaemon` Detects when network remotes have received git pushes and fetches from them. @@ -963,6 +957,21 @@ subdirectories). This command is used internally to perform git pulls over XMPP. +# TESTING COMMANDS + +* `test` + + This runs git-annex's built-in test suite. + + There are several parameters, provided by Haskell's tasty test framework. + Pass --help for details. + +* `fuzztest` + + Generates random changes to files in the current repository, + for use in testing the assistant. This is dangerous, so it will not + do anything unless --forced. + # OPTIONS * `--force` From 9720ee9e5635895ae393047b567eaf637f070d18 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 1 Aug 2014 15:09:49 -0400 Subject: [PATCH 46/54] testremote: New command to test uploads/downloads to a remote. This only performs some basic tests so far; no testing of chunking or resuming. Also, the existing encryption type of the remote is used; it would be good later to derive an encrypted and a non-encrypted version of the remote and test them both. This commit was sponsored by Joseph Liu. --- Backend/Hash.hs | 38 +++++++++---- CmdLine/GitAnnex.hs | 6 +- Command/TestRemote.hs | 125 ++++++++++++++++++++++++++++++++++++++++++ Utility/Metered.hs | 3 + debian/changelog | 2 +- doc/git-annex.mdwn | 8 +++ git-annex.cabal | 2 +- 7 files changed, 169 insertions(+), 15 deletions(-) create mode 100644 Command/TestRemote.hs diff --git a/Backend/Hash.hs b/Backend/Hash.hs index 91267ed67e..62d0a0fca5 100644 --- a/Backend/Hash.hs +++ b/Backend/Hash.hs @@ -7,7 +7,10 @@ {-# LANGUAGE CPP #-} -module Backend.Hash (backends) where +module Backend.Hash ( + backends, + testKeyBackend, +) where import Common.Annex import qualified Annex @@ -36,10 +39,10 @@ hashes = concat {- The SHA256E backend is the default, so genBackendE comes first. -} backends :: [Backend] -backends = catMaybes $ map genBackendE hashes ++ map genBackend hashes +backends = map genBackendE hashes ++ map genBackend hashes -genBackend :: Hash -> Maybe Backend -genBackend hash = Just Backend +genBackend :: Hash -> Backend +genBackend hash = Backend { name = hashName hash , getKey = keyValue hash , fsckKey = Just $ checkKeyChecksum hash @@ -48,13 +51,11 @@ genBackend hash = Just Backend , isStableKey = const True } -genBackendE :: Hash -> Maybe Backend -genBackendE hash = do - b <- genBackend hash - return $ b - { name = hashNameE hash - , getKey = keyValueE hash - } +genBackendE :: Hash -> Backend +genBackendE hash = (genBackend hash) + { name = hashNameE hash + , getKey = keyValueE hash + } hashName :: Hash -> String hashName (SHAHash size) = "SHA" ++ show size @@ -176,3 +177,18 @@ skeinHasher hashsize | hashsize == 512 = show . skein512 #endif | otherwise = error $ "unsupported skein size " ++ show hashsize + +{- A varient of the SHA256E backend, for testing that needs special keys + - that cannot collide with legitimate keys in the repository. + - + - This is accomplished by appending a special extension to the key, + - that is not one that selectExtension would select (due to being too + - long). + -} +testKeyBackend :: Backend +testKeyBackend = + let b = genBackendE (SHAHash 256) + in b { getKey = (fmap addE) <$$> getKey b } + where + addE k = k { keyName = keyName k ++ longext } + longext = ".this-is-a-test-key" diff --git a/CmdLine/GitAnnex.hs b/CmdLine/GitAnnex.hs index 4c9377df9d..80a784dd77 100644 --- a/CmdLine/GitAnnex.hs +++ b/CmdLine/GitAnnex.hs @@ -96,9 +96,10 @@ import qualified Command.XMPPGit #endif import qualified Command.RemoteDaemon #endif -import qualified Command.Test #ifdef WITH_TESTSUITE +import qualified Command.Test import qualified Command.FuzzTest +import qualified Command.TestRemote #endif #ifdef WITH_EKG import System.Remote.Monitoring @@ -187,9 +188,10 @@ cmds = concat #endif , Command.RemoteDaemon.def #endif - , Command.Test.def #ifdef WITH_TESTSUITE + , Command.Test.def , Command.FuzzTest.def + , Command.TestRemote.def #endif ] diff --git a/Command/TestRemote.hs b/Command/TestRemote.hs new file mode 100644 index 0000000000..aedb8562d6 --- /dev/null +++ b/Command/TestRemote.hs @@ -0,0 +1,125 @@ +{- git-annex command + - + - Copyright 2014 Joey Hess + - + - Licensed under the GNU GPL version 3 or higher. + -} + +module Command.TestRemote where + +import Common +import Command +import qualified Annex +import qualified Remote +import Types +import Types.Key (key2file, keyBackendName, keySize) +import Types.Backend (getKey, fsckKey) +import Types.KeySource +import Annex.Content +import Backend +import qualified Backend.Hash +import Utility.Tmp +import Utility.Metered +import Messages +import Types.Messages + +import Test.Tasty +import Test.Tasty.Runners +import Test.Tasty.HUnit +import "crypto-api" Crypto.Random +import qualified Data.ByteString as B + +def :: [Command] +def = [ command "testremote" paramRemote seek SectionTesting + "test transfers to/from a remote"] + +seek :: CommandSeek +seek = withWords start + +start :: [String] -> CommandStart +start ws = do + let name = unwords ws + showStart "testremote" name + r <- either error id <$> Remote.byName' name + showSideAction "generating test keys" + ks <- testKeys + next $ perform r ks + +perform :: Remote -> [Key] -> CommandPerform +perform r ks = do + st <- Annex.getState id + let tests = testGroup "Remote Tests" $ + map (\k -> testGroup (descSize k) (testList st r k)) ks + ok <- case tryIngredients [consoleTestReporter] mempty tests of + Nothing -> error "No tests found!?" + Just act -> liftIO act + next $ cleanup r ks ok + where + descSize k = "key size " ++ show (keySize k) + +testList :: Annex.AnnexState -> Remote -> Key -> [TestTree] +testList st r k = + [ check "removeKey when not present" $ + Remote.removeKey r k + , present False + , check "storeKey" $ + Remote.storeKey r k Nothing nullMeterUpdate + , present True + , check "storeKey when already present" $ + Remote.storeKey r k Nothing nullMeterUpdate + , present True + , check "retrieveKeyFile" $ do + removeAnnex k + getViaTmp k $ \dest -> + Remote.retrieveKeyFile r k Nothing dest nullMeterUpdate + , check "fsck downloaded object" $ do + case maybeLookupBackendName (keyBackendName k) of + Nothing -> return True + Just b -> case fsckKey b of + Nothing -> return True + Just fscker -> fscker k (key2file k) + , check "removeKey when present" $ + Remote.removeKey r k + , present False + ] + where + check desc a = testCase desc $ + Annex.eval st (Annex.setOutput QuietOutput >> a) @? "failed" + present b = check ("present " ++ show b) $ + (== Right b) <$> Remote.hasKey r k + +cleanup :: Remote -> [Key] -> Bool -> CommandCleanup +cleanup r ks ok = do + forM_ ks (Remote.removeKey r) + forM_ ks removeAnnex + return ok + +-- Generate random keys of several interesting sizes, assuming a chunk +-- size that is a uniform divisor of 1 MB. +testKeys :: Annex [Key] +testKeys = mapM randKey + [ 0 -- empty key is a special case when chunking + , mb + , mb + 1 + , mb - 1 + , mb + mb + ] + where + mb = 1024 * 2014 + +randKey :: Int -> Annex Key +randKey sz = withTmpFile "randkey" $ \f h -> do + gen <- liftIO (newGenIO :: IO SystemRandom) + case genBytes sz gen of + Left e -> error $ "failed to generate random key: " ++ show e + Right (rand, _) -> liftIO $ B.hPut h rand + liftIO $ hClose h + let ks = KeySource + { keyFilename = f + , contentLocation = f + , inodeCache = Nothing + } + k <- fromMaybe (error "failed to generate random key") + <$> getKey Backend.Hash.testKeyBackend ks + moveAnnex k f + return k diff --git a/Utility/Metered.hs b/Utility/Metered.hs index cc07f9c351..4618aecfe4 100644 --- a/Utility/Metered.hs +++ b/Utility/Metered.hs @@ -24,6 +24,9 @@ import Data.Int - far, *not* an incremental amount since the last call. -} type MeterUpdate = (BytesProcessed -> IO ()) +nullMeterUpdate :: MeterUpdate +nullMeterUpdate _ = return () + {- Total number of bytes processed so far. -} newtype BytesProcessed = BytesProcessed Integer deriving (Eq, Ord, Show) diff --git a/debian/changelog b/debian/changelog index eb399dfee5..f8b700ae73 100644 --- a/debian/changelog +++ b/debian/changelog @@ -15,7 +15,7 @@ git-annex (5.20140718) UNRELEASED; urgency=medium were incompletely repaired before. * Fix cost calculation for non-encrypted remotes. * WebDAV: Dropped support for DAV before 0.6.1. - * testremote: New command. + * testremote: New command to test uploads/downloads to a remote. -- Joey Hess Mon, 21 Jul 2014 14:41:26 -0400 diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 9158b54e00..d618a619a5 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -966,6 +966,14 @@ subdirectories). There are several parameters, provided by Haskell's tasty test framework. Pass --help for details. +* `testremote remote` + + This tests a remote by generating some random objects and sending them to + the remote, then redownloading them, removing them from the remote, etc. + + It's safe to run in an existing repository (the repository contents are + not altered), although it may perform expensive data transfers. + * `fuzztest` Generates random changes to files in the current repository, diff --git a/git-annex.cabal b/git-annex.cabal index 0d0d979ea0..2a39489d40 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -124,7 +124,7 @@ Executable git-annex if flag(TestSuite) Build-Depends: tasty (>= 0.7), tasty-hunit, tasty-quickcheck, tasty-rerun, - optparse-applicative + optparse-applicative, crypto-api CPP-Options: -DWITH_TESTSUITE if flag(TDFA) From 8fce4e4bd7d452867ee4dfcd9769360d86be361e Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 1 Aug 2014 15:36:11 -0400 Subject: [PATCH 47/54] fix chunk=0 Found by testremote --- Remote/Helper/Chunked.hs | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs index fcfe06b20d..129db3281a 100644 --- a/Remote/Helper/Chunked.hs +++ b/Remote/Helper/Chunked.hs @@ -33,6 +33,7 @@ data ChunkConfig = NoChunks | UnpaddedChunks ChunkSize | LegacyChunks ChunkSize + deriving (Show) noChunks :: ChunkConfig -> Bool noChunks NoChunks = True @@ -43,12 +44,14 @@ chunkConfig m = case M.lookup "chunksize" m of Nothing -> case M.lookup "chunk" m of Nothing -> NoChunks - Just v -> UnpaddedChunks $ readsz v "chunk" - Just v -> LegacyChunks $ readsz v "chunksize" + Just v -> readsz UnpaddedChunks v "chunk" + Just v -> readsz LegacyChunks v "chunksize" where - readsz v f = case readSize dataUnits v of - Just size | size > 0 -> fromInteger size - _ -> error ("bad " ++ f) + readsz c v f = case readSize dataUnits v of + Just size + | size == 0 -> NoChunks + | size > 0 -> c (fromInteger size) + _ -> error $ "bad configuration " ++ f ++ "=" ++ v -- An infinite stream of chunk keys, starting from chunk 1. newtype ChunkKeyStream = ChunkKeyStream [Key] From 9636cfd9e1d0c567e1a1ed5245055d5139983db9 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 1 Aug 2014 16:29:39 -0400 Subject: [PATCH 48/54] fix a fenchpost bug when resuming chunked store at end Discovered thanks to testremote command! --- Remote/Helper/Chunked.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs index 129db3281a..9482153d10 100644 --- a/Remote/Helper/Chunked.hs +++ b/Remote/Helper/Chunked.hs @@ -172,7 +172,7 @@ seekResume h chunkkeys checker = do | pos >= sz = do -- All chunks are already stored! liftIO $ hSeek h AbsoluteSeek sz - return (cks', toBytesProcessed sz) + return (cks, toBytesProcessed sz) | otherwise = do v <- checker k case v of From f4f82e2741eff86f8295faad1ad3227aaf5ac959 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 1 Aug 2014 16:30:23 -0400 Subject: [PATCH 49/54] deriving Show --- Logs/Chunk/Pure.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Logs/Chunk/Pure.hs b/Logs/Chunk/Pure.hs index 080a5a08b9..26fdd63c2a 100644 --- a/Logs/Chunk/Pure.hs +++ b/Logs/Chunk/Pure.hs @@ -21,7 +21,7 @@ import Data.Int -- Currently chunks are all fixed size, but other chunking methods -- may be added. data ChunkMethod = FixedSizeChunks ChunkSize | UnknownChunks String - deriving (Ord, Eq) + deriving (Ord, Eq, Show) type ChunkSize = Int64 From 20d7295386fda0c9f30e7aca2045c1d7eb4807a1 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 1 Aug 2014 16:50:24 -0400 Subject: [PATCH 50/54] improve testremote command, adding chunk size testing And also a --size parameter to configure the basic object size. --- Command/TestRemote.hs | 88 +++++++++++++++++++++++++++++-------------- doc/git-annex.mdwn | 2 + 2 files changed, 62 insertions(+), 28 deletions(-) diff --git a/Command/TestRemote.hs b/Command/TestRemote.hs index aedb8562d6..6dde4b9f03 100644 --- a/Command/TestRemote.hs +++ b/Command/TestRemote.hs @@ -11,6 +11,7 @@ import Common import Command import qualified Annex import qualified Remote +import qualified Types.Remote as Remote import Types import Types.Key (key2file, keyBackendName, keySize) import Types.Backend (getKey, fsckKey) @@ -20,45 +21,72 @@ import Backend import qualified Backend.Hash import Utility.Tmp import Utility.Metered +import Utility.DataUnits import Messages import Types.Messages +import Remote.Helper.Chunked import Test.Tasty import Test.Tasty.Runners import Test.Tasty.HUnit import "crypto-api" Crypto.Random import qualified Data.ByteString as B +import qualified Data.Map as M def :: [Command] -def = [ command "testremote" paramRemote seek SectionTesting - "test transfers to/from a remote"] +def = [ withOptions [sizeOption] $ + command "testremote" paramRemote seek SectionTesting + "test transfers to/from a remote"] + +sizeOption :: Option +sizeOption = fieldOption [] "size" paramSize "base key size (default 1MiB)" seek :: CommandSeek -seek = withWords start +seek ps = do + basesz <- fromInteger . fromMaybe (1024 * 1024) + <$> getOptionField sizeOption (pure . getsize) + withWords (start basesz) ps + where + getsize v = v >>= readSize dataUnits -start :: [String] -> CommandStart -start ws = do +start :: Int -> [String] -> CommandStart +start basesz ws = do let name = unwords ws showStart "testremote" name r <- either error id <$> Remote.byName' name showSideAction "generating test keys" - ks <- testKeys - next $ perform r ks + ks <- mapM randKey (keySizes basesz) + rs <- catMaybes <$> mapM (adjustChunkSize r) (chunkSizes basesz) + next $ perform rs ks -perform :: Remote -> [Key] -> CommandPerform -perform r ks = do +perform :: [Remote] -> [Key] -> CommandPerform +perform rs ks = do st <- Annex.getState id let tests = testGroup "Remote Tests" $ - map (\k -> testGroup (descSize k) (testList st r k)) ks + [ testGroup (desc r k) (test st r k) | k <- ks, r <- rs ] ok <- case tryIngredients [consoleTestReporter] mempty tests of Nothing -> error "No tests found!?" Just act -> liftIO act - next $ cleanup r ks ok + next $ cleanup rs ks ok where - descSize k = "key size " ++ show (keySize k) + desc r' k = unwords + [ "key size" + , show (keySize k) + , "chunk size" + , show (chunkConfig (Remote.config r')) + ] -testList :: Annex.AnnexState -> Remote -> Key -> [TestTree] -testList st r k = +-- To adjust a Remote to use a new chunk size, have to re-generate it with +-- a modified config. +adjustChunkSize :: Remote -> Int -> Annex (Maybe Remote) +adjustChunkSize r chunksize = Remote.generate (Remote.remotetype r) + (Remote.repo r) + (Remote.uuid r) + (M.insert "chunk" (show chunksize) (Remote.config r)) + (Remote.gitconfig r) + +test :: Annex.AnnexState -> Remote -> Key -> [TestTree] +test st r k = [ check "removeKey when not present" $ Remote.removeKey r k , present False @@ -88,24 +116,28 @@ testList st r k = present b = check ("present " ++ show b) $ (== Right b) <$> Remote.hasKey r k -cleanup :: Remote -> [Key] -> Bool -> CommandCleanup -cleanup r ks ok = do - forM_ ks (Remote.removeKey r) +cleanup :: [Remote] -> [Key] -> Bool -> CommandCleanup +cleanup rs ks ok = do + forM_ rs $ \r -> forM_ ks (Remote.removeKey r) forM_ ks removeAnnex return ok --- Generate random keys of several interesting sizes, assuming a chunk --- size that is a uniform divisor of 1 MB. -testKeys :: Annex [Key] -testKeys = mapM randKey - [ 0 -- empty key is a special case when chunking - , mb - , mb + 1 - , mb - 1 - , mb + mb +chunkSizes :: Int -> [Int] +chunkSizes base = + [ 0 -- no chunking + , base `div` 100 + , base `div` 1000 + , base + ] + +keySizes :: Int -> [Int] +keySizes base = filter (>= 0) + [ 0 -- empty key is a special case when chunking + , base + , base + 1 + , base - 1 + , base * 2 ] - where - mb = 1024 * 2014 randKey :: Int -> Annex Key randKey sz = withTmpFile "randkey" $ \f h -> do diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index d618a619a5..de5e042331 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -974,6 +974,8 @@ subdirectories). It's safe to run in an existing repository (the repository contents are not altered), although it may perform expensive data transfers. + The --size option can be used to tune the size of the generated objects. + * `fuzztest` Generates random changes to files in the current repository, From 3991327d0923dde47459d06f13577d97f47a7475 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 1 Aug 2014 17:16:20 -0400 Subject: [PATCH 51/54] testremote: Test retrieveKeyFile resume And fixed a bug found by these tests; retrieveKeyFile would fail when the dest file was already complete. This commit was sponsored by Bradley Unterrheiner. --- Command/TestRemote.hs | 58 +++++++++++++++++++++++++++++----------- Remote/Helper/Chunked.hs | 4 ++- 2 files changed, 45 insertions(+), 17 deletions(-) diff --git a/Command/TestRemote.hs b/Command/TestRemote.hs index 6dde4b9f03..186d067d6d 100644 --- a/Command/TestRemote.hs +++ b/Command/TestRemote.hs @@ -22,15 +22,19 @@ import qualified Backend.Hash import Utility.Tmp import Utility.Metered import Utility.DataUnits +import Utility.CopyFile import Messages import Types.Messages import Remote.Helper.Chunked +import Locations import Test.Tasty import Test.Tasty.Runners import Test.Tasty.HUnit +import Control.Exception import "crypto-api" Crypto.Random import qualified Data.ByteString as B +import qualified Data.ByteString.Lazy as L import qualified Data.Map as M def :: [Command] @@ -87,27 +91,40 @@ adjustChunkSize r chunksize = Remote.generate (Remote.remotetype r) test :: Annex.AnnexState -> Remote -> Key -> [TestTree] test st r k = - [ check "removeKey when not present" $ - Remote.removeKey r k + [ check "removeKey when not present" remove , present False - , check "storeKey" $ - Remote.storeKey r k Nothing nullMeterUpdate + , check "storeKey" store , present True - , check "storeKey when already present" $ - Remote.storeKey r k Nothing nullMeterUpdate + , check "storeKey when already present" store , present True , check "retrieveKeyFile" $ do removeAnnex k - getViaTmp k $ \dest -> - Remote.retrieveKeyFile r k Nothing dest nullMeterUpdate - , check "fsck downloaded object" $ do - case maybeLookupBackendName (keyBackendName k) of - Nothing -> return True - Just b -> case fsckKey b of - Nothing -> return True - Just fscker -> fscker k (key2file k) - , check "removeKey when present" $ - Remote.removeKey r k + get + , check "fsck downloaded object" fsck + , check "retrieveKeyFile resume from 33%" $ do + loc <- Annex.calcRepo (gitAnnexLocation k) + tmp <- prepTmp k + partial <- liftIO $ bracket (openBinaryFile loc ReadMode) hClose $ \h -> do + sz <- hFileSize h + L.hGet h $ fromInteger $ sz `div` 3 + liftIO $ L.writeFile tmp partial + removeAnnex k + get + , check "fsck downloaded object" fsck + , check "retrieveKeyFile resume from 0" $ do + tmp <- prepTmp k + liftIO $ writeFile tmp "" + removeAnnex k + get + , check "fsck downloaded object" fsck + , check "retrieveKeyFile resume from end" $ do + loc <- Annex.calcRepo (gitAnnexLocation k) + tmp <- prepTmp k + void $ liftIO $ copyFileExternal loc tmp + removeAnnex k + get + , check "fsck downloaded object" fsck + , check "removeKey when present" remove , present False ] where @@ -115,6 +132,15 @@ test st r k = Annex.eval st (Annex.setOutput QuietOutput >> a) @? "failed" present b = check ("present " ++ show b) $ (== Right b) <$> Remote.hasKey r k + fsck = case maybeLookupBackendName (keyBackendName k) of + Nothing -> return True + Just b -> case fsckKey b of + Nothing -> return True + Just fscker -> fscker k (key2file k) + get = getViaTmp k $ \dest -> + Remote.retrieveKeyFile r k Nothing dest nullMeterUpdate + store = Remote.storeKey r k Nothing nullMeterUpdate + remove = Remote.removeKey r k cleanup :: [Remote] -> [Key] -> Bool -> CommandCleanup cleanup rs ks ok = do diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs index 9482153d10..2a156ddc54 100644 --- a/Remote/Helper/Chunked.hs +++ b/Remote/Helper/Chunked.hs @@ -241,7 +241,9 @@ retrieveChunks retriever u chunkconfig encryptor basek dest basep sink currsize <- liftIO $ catchMaybeIO $ toInteger . fileSize <$> getFileStatus dest let ls' = maybe ls (setupResume ls) currsize - firstavail currsize ls' `catchNonAsyncAnnex` giveup + if any (== 0) (map length ls') + then return True -- dest is already complete + else firstavail currsize ls' `catchNonAsyncAnnex` giveup giveup e = do warning (show e) From de0da0aece589190473e896e1798de84fdcabe22 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 1 Aug 2014 17:18:39 -0400 Subject: [PATCH 52/54] minor optimisation --- Remote/Helper/Chunked.hs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/Remote/Helper/Chunked.hs b/Remote/Helper/Chunked.hs index 2a156ddc54..0d786c98dd 100644 --- a/Remote/Helper/Chunked.hs +++ b/Remote/Helper/Chunked.hs @@ -241,7 +241,7 @@ retrieveChunks retriever u chunkconfig encryptor basek dest basep sink currsize <- liftIO $ catchMaybeIO $ toInteger . fileSize <$> getFileStatus dest let ls' = maybe ls (setupResume ls) currsize - if any (== 0) (map length ls') + if any null ls' then return True -- dest is already complete else firstavail currsize ls' `catchNonAsyncAnnex` giveup From 50a1cac24ffadb380a83be36337a0cd65d431a08 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 1 Aug 2014 17:52:09 -0400 Subject: [PATCH 53/54] fix example external remote script to write files atomically --- doc/special_remotes/external/example.sh | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/doc/special_remotes/external/example.sh b/doc/special_remotes/external/example.sh index 5152ccc281..8fed9f4aa3 100755 --- a/doc/special_remotes/external/example.sh +++ b/doc/special_remotes/external/example.sh @@ -128,14 +128,25 @@ while read line; do STORE) # Store the file to a location # based on the key. - # XXX when possible, send PROGRESS + # XXX when at all possible, send PROGRESS calclocation "$key" mkdir -p "$(dirname "$LOC")" - if runcmd cp "$file" "$LOC"; then + # Store in temp file first, so that + # CHECKPRESENT does not see it + # until it is all stored. + mkdir -p "$mydirectory/tmp" + tmp="$mydirectory/tmp/$key" + if runcmd cp "$file" "$tmp" \ + && runcmd mv -f "$tmp" "$LOC"; then echo TRANSFER-SUCCESS STORE "$key" else echo TRANSFER-FAILURE STORE "$key" fi + + mkdir -p "$(dirname "$LOC")" + # The file may already exist, so + # make sure we can overwrite it. + chmod 644 "$LOC" 2>/dev/null || true ;; RETRIEVE) # Retrieve from a location based on From 1ee24a03661bc03cff6438ab361daa272877bad2 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 1 Aug 2014 17:52:40 -0400 Subject: [PATCH 54/54] testremote now tests with and without encryption --- Command/TestRemote.hs | 33 +++++++++++++++++++++++---------- doc/git-annex.mdwn | 4 ++++ 2 files changed, 27 insertions(+), 10 deletions(-) diff --git a/Command/TestRemote.hs b/Command/TestRemote.hs index 186d067d6d..29a2e809cd 100644 --- a/Command/TestRemote.hs +++ b/Command/TestRemote.hs @@ -61,7 +61,8 @@ start basesz ws = do showSideAction "generating test keys" ks <- mapM randKey (keySizes basesz) rs <- catMaybes <$> mapM (adjustChunkSize r) (chunkSizes basesz) - next $ perform rs ks + rs' <- concat <$> mapM encryptionVariants rs + next $ perform rs' ks perform :: [Remote] -> [Key] -> CommandPerform perform rs ks = do @@ -73,20 +74,32 @@ perform rs ks = do Just act -> liftIO act next $ cleanup rs ks ok where - desc r' k = unwords - [ "key size" - , show (keySize k) - , "chunk size" - , show (chunkConfig (Remote.config r')) + desc r' k = intercalate "; " $ map unwords + [ [ "key size", show (keySize k) ] + , [ show (chunkConfig (Remote.config r')) ] + , ["encryption", fromMaybe "none" (M.lookup "encryption" (Remote.config r'))] ] --- To adjust a Remote to use a new chunk size, have to re-generate it with --- a modified config. adjustChunkSize :: Remote -> Int -> Annex (Maybe Remote) -adjustChunkSize r chunksize = Remote.generate (Remote.remotetype r) +adjustChunkSize r chunksize = adjustRemoteConfig r + (M.insert "chunk" (show chunksize)) + +-- Variants of a remote with no encryption, and with simple shared +-- encryption. Gpg key based encryption is not tested. +encryptionVariants :: Remote -> Annex [Remote] +encryptionVariants r = do + noenc <- adjustRemoteConfig r (M.insert "encryption" "none") + sharedenc <- adjustRemoteConfig r $ + M.insert "encryption" "shared" . + M.insert "highRandomQuality" "false" + return $ catMaybes [noenc, sharedenc] + +-- Regenerate a remote with a modified config. +adjustRemoteConfig :: Remote -> (Remote.RemoteConfig -> Remote.RemoteConfig) -> Annex (Maybe Remote) +adjustRemoteConfig r adjustconfig = Remote.generate (Remote.remotetype r) (Remote.repo r) (Remote.uuid r) - (M.insert "chunk" (show chunksize) (Remote.config r)) + (adjustconfig (Remote.config r)) (Remote.gitconfig r) test :: Annex.AnnexState -> Remote -> Key -> [TestTree] diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index de5e042331..ba851eef86 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -976,6 +976,10 @@ subdirectories). The --size option can be used to tune the size of the generated objects. + Testing a single remote will use the remote's configuration, + automatically varying the chunk sizes, and with simple shared encryption + enabled and disabled. + * `fuzztest` Generates random changes to files in the current repository,