2012-11-16 21:58:08 +00:00
|
|
|
{- git-annex chunked remotes
|
|
|
|
-
|
2014-07-24 18:49:22 +00:00
|
|
|
- Copyright 2012-2014 Joey Hess <joey@kitenet.net>
|
2012-11-16 21:58:08 +00:00
|
|
|
-
|
|
|
|
- Licensed under the GNU GPL version 3 or higher.
|
|
|
|
-}
|
|
|
|
|
2014-07-27 00:11:41 +00:00
|
|
|
module Remote.Helper.Chunked (
|
|
|
|
ChunkSize,
|
|
|
|
ChunkConfig(..),
|
|
|
|
chunkConfig,
|
|
|
|
storeChunks,
|
|
|
|
removeChunks,
|
|
|
|
retrieveChunks,
|
|
|
|
hasKeyChunks,
|
|
|
|
) where
|
2012-11-16 21:58:08 +00:00
|
|
|
|
2014-07-24 20:42:35 +00:00
|
|
|
import Common.Annex
|
2012-11-16 21:58:08 +00:00
|
|
|
import Utility.DataUnits
|
|
|
|
import Types.Remote
|
2014-07-25 20:20:32 +00:00
|
|
|
import Types.Key
|
|
|
|
import Logs.Chunk.Pure (ChunkSize, ChunkCount)
|
|
|
|
import Logs.Chunk
|
2014-07-24 20:42:35 +00:00
|
|
|
import Utility.Metered
|
2014-07-27 00:11:41 +00:00
|
|
|
import Crypto (EncKey)
|
2012-11-16 21:58:08 +00:00
|
|
|
|
2014-07-24 20:42:35 +00:00
|
|
|
import qualified Data.ByteString.Lazy as L
|
2012-11-16 21:58:08 +00:00
|
|
|
import qualified Data.Map as M
|
|
|
|
|
2014-07-24 18:49:22 +00:00
|
|
|
data ChunkConfig
|
|
|
|
= NoChunks
|
2014-07-24 19:08:07 +00:00
|
|
|
| UnpaddedChunks ChunkSize
|
|
|
|
| LegacyChunks ChunkSize
|
2012-11-16 21:58:08 +00:00
|
|
|
|
2014-07-24 18:49:22 +00:00
|
|
|
chunkConfig :: RemoteConfig -> ChunkConfig
|
|
|
|
chunkConfig m =
|
2012-11-16 21:58:08 +00:00
|
|
|
case M.lookup "chunksize" m of
|
2014-07-24 18:49:22 +00:00
|
|
|
Nothing -> case M.lookup "chunk" m of
|
|
|
|
Nothing -> NoChunks
|
2014-07-24 19:08:07 +00:00
|
|
|
Just v -> UnpaddedChunks $ readsz v "chunk"
|
|
|
|
Just v -> LegacyChunks $ readsz v "chunksize"
|
2013-10-26 19:03:12 +00:00
|
|
|
where
|
2014-07-24 18:49:22 +00:00
|
|
|
readsz v f = case readSize dataUnits v of
|
|
|
|
Just size | size > 0 -> fromInteger size
|
|
|
|
_ -> error ("bad " ++ f)
|
2014-07-24 20:42:35 +00:00
|
|
|
|
2014-07-25 20:20:32 +00:00
|
|
|
-- An infinite stream of chunk keys, starting from chunk 1.
|
|
|
|
newtype ChunkKeyStream = ChunkKeyStream [Key]
|
|
|
|
|
|
|
|
chunkKeyStream :: Key -> ChunkSize -> ChunkKeyStream
|
|
|
|
chunkKeyStream basek chunksize = ChunkKeyStream $ map mk [1..]
|
|
|
|
where
|
|
|
|
mk chunknum = sizedk { keyChunkNum = Just chunknum }
|
|
|
|
sizedk = basek { keyChunkSize = Just (toInteger chunksize) }
|
|
|
|
|
|
|
|
nextChunkKeyStream :: ChunkKeyStream -> (Key, ChunkKeyStream)
|
|
|
|
nextChunkKeyStream (ChunkKeyStream (k:l)) = (k, ChunkKeyStream l)
|
|
|
|
nextChunkKeyStream (ChunkKeyStream []) = undefined -- stream is infinite!
|
|
|
|
|
|
|
|
takeChunkKeyStream :: ChunkCount -> ChunkKeyStream -> [Key]
|
|
|
|
takeChunkKeyStream n (ChunkKeyStream l) = genericTake n l
|
|
|
|
|
|
|
|
-- Number of chunks already consumed from the stream.
|
|
|
|
numChunks :: ChunkKeyStream -> Integer
|
|
|
|
numChunks = pred . fromJust . keyChunkNum . fst . nextChunkKeyStream
|
|
|
|
|
2014-07-26 16:04:35 +00:00
|
|
|
{- Splits up the key's content into chunks, passing each chunk to
|
|
|
|
- the storer action, along with a corresponding chunk key and a
|
|
|
|
- progress meter update callback.
|
2014-07-25 20:20:32 +00:00
|
|
|
-
|
2014-07-27 05:18:38 +00:00
|
|
|
- This buffers each chunk in memory, so can use a lot of memory
|
|
|
|
- with a large ChunkSize.
|
|
|
|
- More optimal versions of this can be written, that rely
|
|
|
|
- on L.toChunks to split the lazy bytestring into chunks (typically
|
|
|
|
- smaller than the ChunkSize), and eg, write those chunks to a Handle.
|
|
|
|
- But this is the best that can be done with the storer interface that
|
|
|
|
- writes a whole L.ByteString at a time.
|
|
|
|
-
|
2014-07-27 00:11:41 +00:00
|
|
|
- This action may be called on a chunked key. It will simply store it.
|
2014-07-25 20:20:32 +00:00
|
|
|
-}
|
2014-07-27 03:26:10 +00:00
|
|
|
storeChunks
|
|
|
|
:: UUID
|
|
|
|
-> ChunkConfig
|
|
|
|
-> Key
|
|
|
|
-> FilePath
|
|
|
|
-> MeterUpdate
|
|
|
|
-> (Key -> L.ByteString -> MeterUpdate -> IO Bool)
|
|
|
|
-> Annex Bool
|
2014-07-25 20:20:32 +00:00
|
|
|
storeChunks u chunkconfig k f p storer = metered (Just p) k $ \meterupdate ->
|
2014-07-27 03:01:44 +00:00
|
|
|
either (\e -> warning (show e) >> return False) (go meterupdate)
|
2014-07-25 20:20:32 +00:00
|
|
|
=<< (liftIO $ tryIO $ L.readFile f)
|
|
|
|
where
|
|
|
|
go meterupdate b = case chunkconfig of
|
|
|
|
(UnpaddedChunks chunksize) | not (isChunkKey k) ->
|
|
|
|
gochunks meterupdate chunksize b (chunkKeyStream k chunksize)
|
2014-07-27 00:11:41 +00:00
|
|
|
_ -> liftIO $ storer k b meterupdate
|
2014-07-25 20:20:32 +00:00
|
|
|
|
|
|
|
gochunks :: MeterUpdate -> ChunkSize -> L.ByteString -> ChunkKeyStream -> Annex Bool
|
2014-07-27 05:18:38 +00:00
|
|
|
gochunks meterupdate chunksize = loop zeroBytesProcessed . splitchunk
|
2014-07-25 20:20:32 +00:00
|
|
|
where
|
2014-07-27 05:18:38 +00:00
|
|
|
splitchunk = L.splitAt chunksize
|
|
|
|
|
|
|
|
loop bytesprocessed (chunk, bs) chunkkeys
|
|
|
|
| L.null chunk && numchunks > 0 = do
|
|
|
|
-- Once all chunks are successfully
|
|
|
|
-- stored, update the chunk log.
|
2014-07-25 20:20:32 +00:00
|
|
|
chunksStored u k chunksize numchunks
|
|
|
|
return True
|
2014-07-27 05:18:38 +00:00
|
|
|
| otherwise = do
|
|
|
|
let (chunkkey, chunkkeys') = nextChunkKeyStream chunkkeys
|
|
|
|
ifM (liftIO $ storer chunkkey chunk meterupdate')
|
|
|
|
( do
|
|
|
|
let bytesprocessed' = addBytesProcessed bytesprocessed (L.length chunk)
|
|
|
|
loop bytesprocessed' (splitchunk bs) chunkkeys'
|
|
|
|
, return False
|
|
|
|
)
|
2014-07-25 20:20:32 +00:00
|
|
|
where
|
2014-07-27 05:18:38 +00:00
|
|
|
numchunks = numChunks chunkkeys
|
2014-07-25 20:20:32 +00:00
|
|
|
{- The MeterUpdate that is passed to the action
|
|
|
|
- storing a chunk is offset, so that it reflects
|
|
|
|
- the total bytes that have already been stored
|
|
|
|
- in previous chunks. -}
|
|
|
|
meterupdate' = offsetMeterUpdate meterupdate bytesprocessed
|
|
|
|
|
2014-07-27 00:11:41 +00:00
|
|
|
{- Removes all chunks of a key from a remote, by calling a remover
|
2014-07-27 02:47:52 +00:00
|
|
|
- action on each.
|
|
|
|
-
|
|
|
|
- The remover action should succeed even if asked to
|
2014-07-27 00:11:41 +00:00
|
|
|
- remove a key that is not present on the remote.
|
|
|
|
-
|
|
|
|
- This action may be called on a chunked key. It will simply remove it.
|
|
|
|
-}
|
|
|
|
removeChunks :: (Key -> Annex Bool) -> UUID -> ChunkConfig -> EncKey -> Key -> Annex Bool
|
|
|
|
removeChunks remover u chunkconfig encryptor k = do
|
|
|
|
ls <- chunkKeys u chunkconfig k
|
2014-07-27 02:34:10 +00:00
|
|
|
ok <- allM (remover . encryptor) (concat ls)
|
2014-07-27 00:11:41 +00:00
|
|
|
when ok $
|
|
|
|
case chunkconfig of
|
|
|
|
(UnpaddedChunks _) | not (isChunkKey k) -> do
|
|
|
|
let chunksizes = catMaybes $ map (keyChunkSize <=< headMaybe) ls
|
|
|
|
forM_ chunksizes $ chunksRemoved u k . fromIntegral
|
|
|
|
_ -> noop
|
|
|
|
return ok
|
|
|
|
|
|
|
|
{- Retrieves a key from a remote, using a retriever action that
|
|
|
|
- streams it to a ByteString.
|
|
|
|
-
|
|
|
|
- When the remote is chunked, tries each of the options returned by
|
|
|
|
- chunkKeys until it finds one where the retriever successfully
|
|
|
|
- gets the first key in the list. The content of that key, and any
|
|
|
|
- other chunks in the list is fed to the sink.
|
|
|
|
-
|
|
|
|
- If retrival of one of the subsequent chunks throws an exception,
|
|
|
|
- gives up and returns False. Note that partial data may have been
|
|
|
|
- written to the sink in this case.
|
|
|
|
-}
|
|
|
|
retrieveChunks
|
|
|
|
:: (Key -> IO L.ByteString)
|
|
|
|
-> UUID
|
|
|
|
-> ChunkConfig
|
|
|
|
-> EncKey
|
|
|
|
-> Key
|
|
|
|
-> MeterUpdate
|
|
|
|
-> (MeterUpdate -> L.ByteString -> IO ())
|
|
|
|
-> Annex Bool
|
|
|
|
retrieveChunks retriever u chunkconfig encryptor basek basep sink = do
|
|
|
|
ls <- chunkKeys u chunkconfig basek
|
2014-07-27 03:26:10 +00:00
|
|
|
liftIO $ firstavail ls `catchNonAsync` giveup
|
2014-07-27 00:11:41 +00:00
|
|
|
where
|
2014-07-27 03:01:44 +00:00
|
|
|
giveup e = do
|
|
|
|
warningIO (show e)
|
|
|
|
return False
|
2014-07-27 00:11:41 +00:00
|
|
|
|
|
|
|
firstavail [] = return False
|
|
|
|
firstavail ([]:ls) = firstavail ls
|
|
|
|
firstavail ((k:ks):ls) = do
|
|
|
|
v <- tryNonAsync $ retriever (encryptor k)
|
|
|
|
case v of
|
|
|
|
Left e
|
|
|
|
| null ls -> giveup e
|
|
|
|
| otherwise -> firstavail ls
|
|
|
|
Right b -> do
|
|
|
|
sink basep b
|
|
|
|
let sz = toBytesProcessed $
|
|
|
|
fromMaybe 0 $ keyChunkSize k
|
|
|
|
getrest sz sz ks
|
|
|
|
|
|
|
|
getrest _ _ [] = return True
|
|
|
|
getrest sz bytesprocessed (k:ks) = do
|
|
|
|
let p = offsetMeterUpdate basep bytesprocessed
|
|
|
|
sink p =<< retriever (encryptor k)
|
|
|
|
getrest sz (addBytesProcessed bytesprocessed sz) ks
|
|
|
|
|
|
|
|
{- Checks if a key is present in a remote. This requires any one
|
|
|
|
- of the lists of options returned by chunkKeys to all check out
|
|
|
|
- as being present using the checker action.
|
|
|
|
-}
|
|
|
|
hasKeyChunks
|
|
|
|
:: (Key -> Annex (Either String Bool))
|
|
|
|
-> UUID
|
|
|
|
-> ChunkConfig
|
|
|
|
-> EncKey
|
|
|
|
-> Key
|
|
|
|
-> Annex (Either String Bool)
|
2014-07-27 02:25:50 +00:00
|
|
|
hasKeyChunks checker u chunkconfig encryptor basek = do
|
2014-07-27 00:11:41 +00:00
|
|
|
checklists impossible =<< chunkKeys u chunkconfig basek
|
|
|
|
where
|
|
|
|
checklists lastfailmsg [] = return $ Left lastfailmsg
|
|
|
|
checklists _ (l:ls)
|
2014-07-27 02:25:50 +00:00
|
|
|
| not (null l) = do
|
|
|
|
v <- checkchunks l
|
|
|
|
case v of
|
|
|
|
Left e -> checklists e ls
|
|
|
|
Right True -> return (Right True)
|
2014-07-27 02:47:52 +00:00
|
|
|
Right False
|
|
|
|
| null ls -> return (Right False)
|
|
|
|
| otherwise -> checklists impossible ls
|
2014-07-27 00:11:41 +00:00
|
|
|
| otherwise = checklists impossible ls
|
|
|
|
|
|
|
|
checkchunks :: [Key] -> Annex (Either String Bool)
|
|
|
|
checkchunks [] = return (Right True)
|
|
|
|
checkchunks (k:ks) = do
|
|
|
|
v <- checker (encryptor k)
|
|
|
|
if v == Right True
|
|
|
|
then checkchunks ks
|
|
|
|
else return v
|
|
|
|
|
|
|
|
impossible = "no recorded chunks"
|
2014-07-27 05:24:34 +00:00
|
|
|
|
|
|
|
{- A key can be stored in a remote unchunked, or as a list of chunked keys.
|
|
|
|
- It's even possible for a remote to have the same key stored multiple
|
|
|
|
- times with different chunk sizes. This finds all possible lists of keys
|
|
|
|
- that might be on the remote that can be combined to get back the
|
|
|
|
- requested key.
|
|
|
|
-}
|
|
|
|
chunkKeys :: UUID -> ChunkConfig -> Key -> Annex [[Key]]
|
|
|
|
chunkKeys u (UnpaddedChunks _) k | not (isChunkKey k) = do
|
|
|
|
chunklists <- map (toChunkList k) <$> getCurrentChunks u k
|
|
|
|
-- Probably using the chunklists, but the unchunked
|
|
|
|
-- key could be present.
|
|
|
|
return (chunklists ++ [[k]])
|
|
|
|
chunkKeys _ _ k = pure [[k]]
|
|
|
|
|
|
|
|
toChunkList :: Key -> (ChunkSize, ChunkCount) -> [Key]
|
|
|
|
toChunkList k (chunksize, chunkcount) = takeChunkKeyStream chunkcount $
|
|
|
|
chunkKeyStream k chunksize
|