generic chunked content helper
However, directory still uses its optimzed chunked file writer, as it uses less memory than the generic one in the helper.
This commit is contained in:
parent
154a832223
commit
92d5d81c2c
2 changed files with 137 additions and 60 deletions
|
@ -19,8 +19,8 @@ import Config
|
||||||
import Utility.FileMode
|
import Utility.FileMode
|
||||||
import Remote.Helper.Special
|
import Remote.Helper.Special
|
||||||
import Remote.Helper.Encryptable
|
import Remote.Helper.Encryptable
|
||||||
|
import Remote.Helper.Chunked
|
||||||
import Crypto
|
import Crypto
|
||||||
import Utility.DataUnits
|
|
||||||
import Data.Int
|
import Data.Int
|
||||||
import Annex.Content
|
import Annex.Content
|
||||||
|
|
||||||
|
@ -58,19 +58,6 @@ gen r u c = do
|
||||||
remotetype = remote
|
remotetype = remote
|
||||||
}
|
}
|
||||||
|
|
||||||
type ChunkSize = Maybe Int64
|
|
||||||
|
|
||||||
chunkSize :: Maybe RemoteConfig -> ChunkSize
|
|
||||||
chunkSize Nothing = Nothing
|
|
||||||
chunkSize (Just m) =
|
|
||||||
case M.lookup "chunksize" m of
|
|
||||||
Nothing -> Nothing
|
|
||||||
Just v -> case readSize dataUnits v of
|
|
||||||
Nothing -> error "bad chunksize"
|
|
||||||
Just size
|
|
||||||
| size <= 0 -> error "bad chunksize"
|
|
||||||
| otherwise -> Just $ fromInteger size
|
|
||||||
|
|
||||||
directorySetup :: UUID -> RemoteConfig -> Annex RemoteConfig
|
directorySetup :: UUID -> RemoteConfig -> Annex RemoteConfig
|
||||||
directorySetup u c = do
|
directorySetup u c = do
|
||||||
-- verify configuration is sane
|
-- verify configuration is sane
|
||||||
|
@ -89,14 +76,6 @@ directorySetup u c = do
|
||||||
locations :: FilePath -> Key -> [FilePath]
|
locations :: FilePath -> Key -> [FilePath]
|
||||||
locations d k = map (d </>) (keyPaths k)
|
locations d k = map (d </>) (keyPaths k)
|
||||||
|
|
||||||
{- An infinite stream of chunks to use for a given file. -}
|
|
||||||
chunkStream :: FilePath -> [FilePath]
|
|
||||||
chunkStream f = map (\n -> f ++ ".chunk" ++ show n) [1 :: Integer ..]
|
|
||||||
|
|
||||||
{- A file that records the number of chunks used. -}
|
|
||||||
chunkCount :: FilePath -> FilePath
|
|
||||||
chunkCount f = f ++ ".chunkcount"
|
|
||||||
|
|
||||||
withCheckedFiles :: (FilePath -> IO Bool) -> ChunkSize -> FilePath -> Key -> ([FilePath] -> IO Bool) -> IO Bool
|
withCheckedFiles :: (FilePath -> IO Bool) -> ChunkSize -> FilePath -> Key -> ([FilePath] -> IO Bool) -> IO Bool
|
||||||
withCheckedFiles _ _ [] _ _ = return False
|
withCheckedFiles _ _ [] _ _ = return False
|
||||||
withCheckedFiles check Nothing d k a = go $ locations d k
|
withCheckedFiles check Nothing d k a = go $ locations d k
|
||||||
|
@ -107,18 +86,14 @@ withCheckedFiles check (Just _) d k a = go $ locations d k
|
||||||
where
|
where
|
||||||
go [] = return False
|
go [] = return False
|
||||||
go (f:fs) = do
|
go (f:fs) = do
|
||||||
let chunkcount = chunkCount f
|
let chunkcount = f ++ chunkCount
|
||||||
ifM (check chunkcount)
|
ifM (check chunkcount)
|
||||||
( do
|
( do
|
||||||
count <- readcount chunkcount
|
chunks <- getChunks f <$> readFile chunkcount
|
||||||
let chunks = take count $ chunkStream f
|
|
||||||
ifM (all id <$> mapM check chunks)
|
ifM (all id <$> mapM check chunks)
|
||||||
( a chunks , return False )
|
( a chunks , return False )
|
||||||
, go fs
|
, go fs
|
||||||
)
|
)
|
||||||
readcount f = fromMaybe (error $ "cannot parse " ++ f)
|
|
||||||
. (readish :: String -> Maybe Int)
|
|
||||||
<$> readFile f
|
|
||||||
|
|
||||||
withStoredFiles :: ChunkSize -> FilePath -> Key -> ([FilePath] -> IO Bool) -> IO Bool
|
withStoredFiles :: ChunkSize -> FilePath -> Key -> ([FilePath] -> IO Bool) -> IO Bool
|
||||||
withStoredFiles = withCheckedFiles doesFileExist
|
withStoredFiles = withCheckedFiles doesFileExist
|
||||||
|
@ -203,45 +178,26 @@ meteredWriteFile' meterupdate dest startstate feeder =
|
||||||
meterupdate $ toInteger $ S.length c
|
meterupdate $ toInteger $ S.length c
|
||||||
feed state cs h
|
feed state cs h
|
||||||
|
|
||||||
{- Generates a list of destinations to write to in order to store a key.
|
|
||||||
- When chunksize is specified, this list will be a list of chunks.
|
|
||||||
- The action should store the file, and return a list of the destinations
|
|
||||||
- it stored it to, or [] on error.
|
|
||||||
- The stored files are only put into their final place once storage is
|
|
||||||
- complete.
|
|
||||||
-}
|
|
||||||
storeHelper :: FilePath -> ChunkSize -> Key -> ([FilePath] -> IO [FilePath]) -> Annex Bool
|
storeHelper :: FilePath -> ChunkSize -> Key -> ([FilePath] -> IO [FilePath]) -> Annex Bool
|
||||||
storeHelper d chunksize key a = prep <&&> check <&&> go
|
storeHelper d chunksize key storer = check <&&> go
|
||||||
where
|
where
|
||||||
desttemplate = Prelude.head $ locations d key
|
basedest = Prelude.head $ locations d key
|
||||||
dir = parentDir desttemplate
|
dir = parentDir basedest
|
||||||
tmpdests = case chunksize of
|
|
||||||
Nothing -> [desttemplate ++ tmpprefix]
|
|
||||||
Just _ -> map (++ tmpprefix) (chunkStream desttemplate)
|
|
||||||
tmpprefix = ".tmp"
|
|
||||||
detmpprefix f = take (length f - tmpprefixlen) f
|
|
||||||
tmpprefixlen = length tmpprefix
|
|
||||||
prep = liftIO $ catchBoolIO $ do
|
|
||||||
createDirectoryIfMissing True dir
|
|
||||||
allowWrite dir
|
|
||||||
return True
|
|
||||||
{- The size is not exactly known when encrypting the key;
|
{- The size is not exactly known when encrypting the key;
|
||||||
- this assumes that at least the size of the key is
|
- this assumes that at least the size of the key is
|
||||||
- needed as free space. -}
|
- needed as free space. -}
|
||||||
check = checkDiskSpace (Just dir) key 0
|
check = checkDiskSpace (Just dir) key 0
|
||||||
go = liftIO $ catchBoolIO $ do
|
go = liftIO $ catchBoolIO $ do
|
||||||
stored <- a tmpdests
|
createDirectoryIfMissing True dir
|
||||||
forM_ stored $ \f -> do
|
allowWrite dir
|
||||||
let dest = detmpprefix f
|
preventWrite dir `after` storeChunks basedest chunksize storer recorder finalizer
|
||||||
renameFile f dest
|
finalizer f dest = do
|
||||||
preventWrite dest
|
renameFile f dest
|
||||||
when (chunksize /= Nothing) $ do
|
preventWrite dest
|
||||||
let chunkcount = chunkCount desttemplate
|
recorder f s = do
|
||||||
_ <- tryIO $ allowWrite chunkcount
|
void $ tryIO $ allowWrite f
|
||||||
writeFile chunkcount (show $ length stored)
|
writeFile f s
|
||||||
preventWrite chunkcount
|
preventWrite f
|
||||||
preventWrite dir
|
|
||||||
return (not $ null stored)
|
|
||||||
|
|
||||||
retrieve :: FilePath -> ChunkSize -> Key -> AssociatedFile -> FilePath -> Annex Bool
|
retrieve :: FilePath -> ChunkSize -> Key -> AssociatedFile -> FilePath -> Annex Bool
|
||||||
retrieve d chunksize k _ f = metered Nothing k $ \meterupdate ->
|
retrieve d chunksize k _ f = metered Nothing k $ \meterupdate ->
|
||||||
|
|
121
Remote/Helper/Chunked.hs
Normal file
121
Remote/Helper/Chunked.hs
Normal file
|
@ -0,0 +1,121 @@
|
||||||
|
{- git-annex chunked remotes
|
||||||
|
-
|
||||||
|
- Copyright 2012 Joey Hess <joey@kitenet.net>
|
||||||
|
-
|
||||||
|
- Licensed under the GNU GPL version 3 or higher.
|
||||||
|
-}
|
||||||
|
|
||||||
|
module Remote.Helper.Chunked where
|
||||||
|
|
||||||
|
import Common.Annex
|
||||||
|
import Utility.DataUnits
|
||||||
|
import Types.Remote
|
||||||
|
|
||||||
|
import qualified Data.Map as M
|
||||||
|
import qualified Data.ByteString.Lazy as L
|
||||||
|
import Data.Int
|
||||||
|
import qualified Control.Exception as E
|
||||||
|
|
||||||
|
type ChunkSize = Maybe Int64
|
||||||
|
|
||||||
|
{- Gets a remote's configured chunk size. -}
|
||||||
|
chunkSize :: Maybe RemoteConfig -> ChunkSize
|
||||||
|
chunkSize Nothing = Nothing
|
||||||
|
chunkSize (Just m) =
|
||||||
|
case M.lookup "chunksize" m of
|
||||||
|
Nothing -> Nothing
|
||||||
|
Just v -> case readSize dataUnits v of
|
||||||
|
Nothing -> error "bad chunksize"
|
||||||
|
Just size
|
||||||
|
| size <= 0 -> error "bad chunksize"
|
||||||
|
| otherwise -> Just $ fromInteger size
|
||||||
|
|
||||||
|
{- This is an extension that's added to the usual file (or whatever)
|
||||||
|
- where the remote stores a key. -}
|
||||||
|
type ChunkExt = String
|
||||||
|
|
||||||
|
{- A record of the number of chunks used.
|
||||||
|
-
|
||||||
|
- While this can be guessed at based on the size of the key, encryption
|
||||||
|
- makes that larger. Also, using this helps deal with changes to chunksize
|
||||||
|
- over the life of a remote.
|
||||||
|
-}
|
||||||
|
chunkCount :: ChunkExt
|
||||||
|
chunkCount = ".chunkcount"
|
||||||
|
|
||||||
|
{- Parses the String from the chunkCount file, and returns the files that
|
||||||
|
- are used to store the chunks. -}
|
||||||
|
getChunks :: FilePath -> String -> [FilePath]
|
||||||
|
getChunks basedest chunkcount = take count $ map (basedest ++) chunkStream
|
||||||
|
where
|
||||||
|
count = fromMaybe 0 $ readish chunkcount
|
||||||
|
|
||||||
|
{- An infinite stream of extensions to use for chunks. -}
|
||||||
|
chunkStream :: [ChunkExt]
|
||||||
|
chunkStream = map (\n -> ".chunk" ++ show n) [1 :: Integer ..]
|
||||||
|
|
||||||
|
{- Given the base destination to use to store a value,
|
||||||
|
- generates a stream of temporary destinations (just one when not chunking)
|
||||||
|
- and passes it to an action, which should chunk and store the data,
|
||||||
|
- and return the destinations it stored to, or [] on error.
|
||||||
|
-
|
||||||
|
- Then calles the finalizer to rename the temporary destinations into
|
||||||
|
- their final places (and do any other cleanup), and writes the chunk count
|
||||||
|
- (if chunking)
|
||||||
|
-}
|
||||||
|
storeChunks :: FilePath -> ChunkSize -> ([FilePath] -> IO [FilePath]) -> (FilePath -> String -> IO ()) -> (FilePath -> FilePath -> IO ()) -> IO Bool
|
||||||
|
storeChunks basedest chunksize storer recorder finalizer =
|
||||||
|
either (const $ return False) return
|
||||||
|
=<< (E.try go :: IO (Either E.SomeException Bool))
|
||||||
|
where
|
||||||
|
go = do
|
||||||
|
stored <- storer tmpdests
|
||||||
|
forM_ stored $ \d -> do
|
||||||
|
let dest = detmpprefix d
|
||||||
|
finalizer d dest
|
||||||
|
when (chunksize /= Nothing) $ do
|
||||||
|
let chunkcount = basedest ++ chunkCount
|
||||||
|
recorder chunkcount (show $ length stored)
|
||||||
|
return (not $ null stored)
|
||||||
|
|
||||||
|
tmpprefix = ".tmp"
|
||||||
|
detmpprefix f = take (length f - tmpprefixlen) f
|
||||||
|
tmpprefixlen = length tmpprefix
|
||||||
|
tmpdests
|
||||||
|
| chunksize == Nothing = [basedest ++ tmpprefix]
|
||||||
|
| otherwise = map (++ tmpprefix) $ map (basedest ++) chunkStream
|
||||||
|
|
||||||
|
{- Given a list of destinations to use, chunks the data according to the
|
||||||
|
- ChunkSize, and runs the storer action to store each chunk. Returns
|
||||||
|
- the destinations where data was stored, or [] on error.
|
||||||
|
-
|
||||||
|
- This buffers each chunk in memory.
|
||||||
|
- More optimal versions of this can be written, that rely
|
||||||
|
- on L.toChunks to split the lazy bytestring into chunks (typically
|
||||||
|
- smaller than the ChunkSize), and eg, write those chunks to a Handle.
|
||||||
|
- But this is the best that can be done with the storer interface that
|
||||||
|
- writes a whole L.ByteString at a time.
|
||||||
|
-}
|
||||||
|
storeChunked :: ChunkSize -> [FilePath] -> (FilePath -> L.ByteString -> IO ()) -> L.ByteString -> IO [FilePath]
|
||||||
|
storeChunked chunksize dests storer content =
|
||||||
|
either (const $ return []) return
|
||||||
|
=<< (E.try (go chunksize dests) :: IO (Either E.SomeException [FilePath]))
|
||||||
|
where
|
||||||
|
go _ [] = return [] -- no dests!?
|
||||||
|
|
||||||
|
go Nothing (d:_) = do
|
||||||
|
storer d content
|
||||||
|
return [d]
|
||||||
|
|
||||||
|
go (Just sz) _
|
||||||
|
-- always write a chunk, even if the data is 0 bytes
|
||||||
|
| L.null content = go Nothing dests
|
||||||
|
| otherwise = storechunks sz [] dests content
|
||||||
|
|
||||||
|
storechunks _ _ [] _ = return [] -- ran out of dests
|
||||||
|
storechunks sz useddests (d:ds) b
|
||||||
|
| L.null b = return $ reverse useddests
|
||||||
|
| otherwise = do
|
||||||
|
let (chunk, b') = L.splitAt sz b
|
||||||
|
storer d chunk
|
||||||
|
storechunks sz (d:useddests) ds b'
|
Loading…
Add table
Add a link
Reference in a new issue