2014-07-24 18:49:22 +00:00
|
|
|
{- legacy git-annex chunked remotes
|
|
|
|
-
|
2015-01-21 16:50:09 +00:00
|
|
|
- Copyright 2012 Joey Hess <id@joeyh.name>
|
2014-07-24 18:49:22 +00:00
|
|
|
-
|
2019-03-13 19:48:14 +00:00
|
|
|
- Licensed under the GNU AGPL version 3 or higher.
|
2014-07-24 18:49:22 +00:00
|
|
|
-}
|
|
|
|
|
|
|
|
module Remote.Helper.Chunked.Legacy where
|
|
|
|
|
2016-01-20 20:36:33 +00:00
|
|
|
import Annex.Common
|
2014-07-24 20:42:35 +00:00
|
|
|
import Remote.Helper.Chunked
|
2014-07-27 00:11:41 +00:00
|
|
|
import Utility.Metered
|
2014-07-24 18:49:22 +00:00
|
|
|
|
2021-02-09 21:03:27 +00:00
|
|
|
import qualified Data.ByteString as S
|
2014-07-24 18:49:22 +00:00
|
|
|
import qualified Data.ByteString.Lazy as L
|
|
|
|
|
|
|
|
{- This is an extension that's added to the usual file (or whatever)
|
|
|
|
- where the remote stores a key. -}
|
|
|
|
type ChunkExt = String
|
|
|
|
|
|
|
|
{- A record of the number of chunks used.
|
|
|
|
-
|
|
|
|
- While this can be guessed at based on the size of the key, encryption
|
|
|
|
- makes that larger. Also, using this helps deal with changes to chunksize
|
|
|
|
- over the life of a remote.
|
|
|
|
-}
|
|
|
|
chunkCount :: ChunkExt
|
|
|
|
chunkCount = ".chunkcount"
|
|
|
|
|
|
|
|
{- An infinite stream of extensions to use for chunks. -}
|
|
|
|
chunkStream :: [ChunkExt]
|
|
|
|
chunkStream = map (\n -> ".chunk" ++ show n) [1 :: Integer ..]
|
|
|
|
|
|
|
|
{- Parses the String from the chunkCount file, and returns the files that
|
|
|
|
- are used to store the chunks. -}
|
|
|
|
listChunks :: FilePath -> String -> [FilePath]
|
|
|
|
listChunks basedest chunkcount = take count $ map (basedest ++) chunkStream
|
|
|
|
where
|
|
|
|
count = fromMaybe 0 $ readish chunkcount
|
|
|
|
|
|
|
|
{- For use when there is no chunkCount file; uses the action to find
|
|
|
|
- chunks, and returns them, or Nothing if none found. Relies on
|
|
|
|
- storeChunks's finalizer atomically moving the chunks into place once all
|
|
|
|
- are written.
|
|
|
|
-
|
|
|
|
- This is only needed to work around a bug that caused the chunkCount file
|
|
|
|
- not to be written.
|
|
|
|
-}
|
|
|
|
probeChunks :: FilePath -> (FilePath -> IO Bool) -> IO [FilePath]
|
|
|
|
probeChunks basedest check = go [] $ map (basedest ++) chunkStream
|
|
|
|
where
|
|
|
|
go l [] = return (reverse l)
|
|
|
|
go l (c:cs) = ifM (check c)
|
|
|
|
( go (c:l) cs
|
|
|
|
, go l []
|
|
|
|
)
|
|
|
|
|
|
|
|
{- Given the base destination to use to store a value,
|
|
|
|
- generates a stream of temporary destinations,
|
|
|
|
- and passes it to an action, which should chunk and store the data,
|
|
|
|
- and return the destinations it stored to, or [] on error. Then
|
|
|
|
- calls the recorder to write the chunk count. Finally, the
|
|
|
|
- finalizer is called to rename the tmp into the dest
|
|
|
|
- (and do any other cleanup).
|
|
|
|
-}
|
2020-05-13 18:03:00 +00:00
|
|
|
storeChunks :: Key -> FilePath -> FilePath -> ([FilePath] -> IO [FilePath]) -> (FilePath -> String -> IO ()) -> (FilePath -> FilePath -> IO ()) -> IO ()
|
|
|
|
storeChunks key tmp dest storer recorder finalizer = do
|
|
|
|
stored <- storer tmpdests
|
|
|
|
let chunkcount = basef ++ chunkCount
|
|
|
|
recorder chunkcount (show $ length stored)
|
|
|
|
finalizer tmp dest
|
|
|
|
when (null stored) $
|
|
|
|
giveup "no chunks were stored"
|
2014-07-24 18:49:22 +00:00
|
|
|
where
|
2019-12-18 20:45:03 +00:00
|
|
|
basef = tmp ++ fromRawFilePath (keyFile key)
|
2014-07-24 18:49:22 +00:00
|
|
|
tmpdests = map (basef ++ ) chunkStream
|
|
|
|
|
|
|
|
{- Given a list of destinations to use, chunks the data according to the
|
|
|
|
- ChunkSize, and runs the storer action to store each chunk. Returns
|
|
|
|
- the destinations where data was stored, or [] on error.
|
|
|
|
-
|
|
|
|
- This buffers each chunk in memory.
|
|
|
|
- More optimal versions of this can be written, that rely
|
|
|
|
- on L.toChunks to split the lazy bytestring into chunks (typically
|
|
|
|
- smaller than the ChunkSize), and eg, write those chunks to a Handle.
|
|
|
|
- But this is the best that can be done with the storer interface that
|
|
|
|
- writes a whole L.ByteString at a time.
|
|
|
|
-}
|
2020-12-02 18:57:43 +00:00
|
|
|
storeChunked :: (Annex () -> IO ()) -> ChunkSize -> [FilePath] -> (FilePath -> L.ByteString -> IO ()) -> L.ByteString -> IO [FilePath]
|
|
|
|
storeChunked annexrunner chunksize dests storer content =
|
2020-06-05 19:16:57 +00:00
|
|
|
either onerr return =<< tryNonAsync (go (Just chunksize) dests)
|
2014-07-24 18:49:22 +00:00
|
|
|
where
|
|
|
|
go _ [] = return [] -- no dests!?
|
|
|
|
go Nothing (d:_) = do
|
|
|
|
storer d content
|
|
|
|
return [d]
|
|
|
|
go (Just sz) _
|
|
|
|
-- always write a chunk, even if the data is 0 bytes
|
|
|
|
| L.null content = go Nothing dests
|
|
|
|
| otherwise = storechunks sz [] dests content
|
|
|
|
|
|
|
|
onerr e = do
|
filter out control characters in warning messages
Converted warning and similar to use StringContainingQuotedPath. Most
warnings are static strings, some do refer to filepaths that need to be
quoted, and others don't need quoting.
Note that, since quote filters out control characters of even
UnquotedString, this makes all warnings safe, even when an attacker
sneaks in a control character in some other way.
When json is being output, no quoting is done, since json gets its own
quoting.
This does, as a side effect, make warning messages in json output not
be indented. The indentation is only needed to offset warning messages
underneath the display of the file they apply to, so that's ok.
Sponsored-by: Brett Eisenberg on Patreon
2023-04-10 18:47:32 +00:00
|
|
|
annexrunner $ warning (UnquotedString (show e))
|
2014-07-24 18:49:22 +00:00
|
|
|
return []
|
|
|
|
|
|
|
|
storechunks _ _ [] _ = return [] -- ran out of dests
|
|
|
|
storechunks sz useddests (d:ds) b
|
|
|
|
| L.null b = return $ reverse useddests
|
|
|
|
| otherwise = do
|
|
|
|
let (chunk, b') = L.splitAt sz b
|
|
|
|
storer d chunk
|
|
|
|
storechunks sz (d:useddests) ds b'
|
2014-07-27 00:11:41 +00:00
|
|
|
|
|
|
|
{- Writes a series of chunks to a file. The feeder is called to get
|
|
|
|
- each chunk.
|
|
|
|
-}
|
|
|
|
meteredWriteFileChunks :: MeterUpdate -> FilePath -> [v] -> (v -> IO L.ByteString) -> IO ()
|
|
|
|
meteredWriteFileChunks meterupdate dest chunks feeder =
|
|
|
|
withBinaryFile dest WriteMode $ \h ->
|
|
|
|
forM_ chunks $
|
2021-02-09 21:03:27 +00:00
|
|
|
meteredWrite meterupdate (S.hPut h) <=< feeder
|