git-annex/Remote/Helper/Chunked.hs

144 lines
4.8 KiB
Haskell

{- git-annex chunked remotes
-
- Copyright 2012 Joey Hess <joey@kitenet.net>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Remote.Helper.Chunked where
import Common.Annex
import Utility.DataUnits
import Types.Remote
import Utility.Metered
import qualified Data.Map as M
import qualified Data.ByteString.Lazy as L
import Data.Int
import qualified Control.Exception as E
type ChunkSize = Maybe Int64
{- Gets a remote's configured chunk size. -}
chunkSize :: RemoteConfig -> ChunkSize
chunkSize m =
case M.lookup "chunksize" m of
Nothing -> Nothing
Just v -> case readSize dataUnits v of
Nothing -> error "bad chunksize"
Just size
| size <= 0 -> error "bad chunksize"
| otherwise -> Just $ fromInteger size
{- This is an extension that's added to the usual file (or whatever)
- where the remote stores a key. -}
type ChunkExt = String
{- A record of the number of chunks used.
-
- While this can be guessed at based on the size of the key, encryption
- makes that larger. Also, using this helps deal with changes to chunksize
- over the life of a remote.
-}
chunkCount :: ChunkExt
chunkCount = ".chunkcount"
{- An infinite stream of extensions to use for chunks. -}
chunkStream :: [ChunkExt]
chunkStream = map (\n -> ".chunk" ++ show n) [1 :: Integer ..]
{- Parses the String from the chunkCount file, and returns the files that
- are used to store the chunks. -}
listChunks :: FilePath -> String -> [FilePath]
listChunks basedest chunkcount = take count $ map (basedest ++) chunkStream
where
count = fromMaybe 0 $ readish chunkcount
{- For use when there is no chunkCount file; uses the action to find
- chunks, and returns them, or Nothing if none found. Relies on
- storeChunks's finalizer atomically moving the chunks into place once all
- are written.
-
- This is only needed to work around a bug that caused the chunkCount file
- not to be written.
-}
probeChunks :: FilePath -> (FilePath -> IO Bool) -> IO [FilePath]
probeChunks basedest check = go [] $ map (basedest ++) chunkStream
where
go l [] = return (reverse l)
go l (c:cs) = ifM (check c)
( go (c:l) cs
, go l []
)
{- Given the base destination to use to store a value,
- generates a stream of temporary destinations (just one when not chunking)
- and passes it to an action, which should chunk and store the data,
- and return the destinations it stored to, or [] on error. Then
- calls the recorder to write the chunk count (if chunking). Finally, the
- finalizer is called to rename the tmp into the dest
- (and do any other cleanup).
-}
storeChunks :: Key -> FilePath -> FilePath -> ChunkSize -> ([FilePath] -> IO [FilePath]) -> (FilePath -> String -> IO ()) -> (FilePath -> FilePath -> IO ()) -> IO Bool
storeChunks key tmp dest chunksize storer recorder finalizer = either onerr return
=<< (E.try go :: IO (Either E.SomeException Bool))
where
go = do
stored <- storer tmpdests
when (isJust chunksize) $ do
let chunkcount = basef ++ chunkCount
recorder chunkcount (show $ length stored)
finalizer tmp dest
return (not $ null stored)
onerr e = do
print e
return False
basef = tmp ++ keyFile key
tmpdests
| isNothing chunksize = [basef]
| otherwise = map (basef ++ ) chunkStream
{- Given a list of destinations to use, chunks the data according to the
- ChunkSize, and runs the storer action to store each chunk. Returns
- the destinations where data was stored, or [] on error.
-
- This buffers each chunk in memory.
- More optimal versions of this can be written, that rely
- on L.toChunks to split the lazy bytestring into chunks (typically
- smaller than the ChunkSize), and eg, write those chunks to a Handle.
- But this is the best that can be done with the storer interface that
- writes a whole L.ByteString at a time.
-}
storeChunked :: ChunkSize -> [FilePath] -> (FilePath -> L.ByteString -> IO ()) -> L.ByteString -> IO [FilePath]
storeChunked chunksize dests storer content = either onerr return
=<< (E.try (go chunksize dests) :: IO (Either E.SomeException [FilePath]))
where
go _ [] = return [] -- no dests!?
go Nothing (d:_) = do
storer d content
return [d]
go (Just sz) _
-- always write a chunk, even if the data is 0 bytes
| L.null content = go Nothing dests
| otherwise = storechunks sz [] dests content
onerr e = do
print e
return []
storechunks _ _ [] _ = return [] -- ran out of dests
storechunks sz useddests (d:ds) b
| L.null b = return $ reverse useddests
| otherwise = do
let (chunk, b') = L.splitAt sz b
storer d chunk
storechunks sz (d:useddests) ds b'
{- Writes a series of chunks to a file. The feeder is called to get
- each chunk. -}
meteredWriteFileChunks :: MeterUpdate -> FilePath -> [v] -> (v -> IO L.ByteString) -> IO ()
meteredWriteFileChunks meterupdate dest chunks feeder =
withBinaryFile dest WriteMode $ \h ->
forM_ chunks $
meteredWrite meterupdate h <=< feeder