git-annex/Utility/Metered.hs
Joey Hess a787cead35 bittorrent: Fix mojibake introduced in parsing arai2c progress output.
hGetSomeString reads one byte at a time, so unicode bytes are not composed.
The problem comes when outputting that to the console with hPut; that
tried to apply the handle's encoding, and so we get mojibake.

Instead, use ByteStrings, and only convert it to a string for parsing, not
for display.

Note that there are a couple of other things that use hGetSomeString,
which I've left as-is for now.
2015-02-10 12:34:34 -04:00

180 lines
6 KiB
Haskell

{- Metered IO
-
- Copyright 2012-2105 Joey Hess <id@joeyh.name>
-
- License: BSD-2-clause
-}
{-# LANGUAGE TypeSynonymInstances #-}
module Utility.Metered where
import Common
import qualified Data.ByteString.Lazy as L
import qualified Data.ByteString as S
import System.IO.Unsafe
import Foreign.Storable (Storable(sizeOf))
import System.Posix.Types
import Data.Int
import Data.Bits.Utils
{- An action that can be run repeatedly, updating it on the bytes processed.
-
- Note that each call receives the total number of bytes processed, so
- far, *not* an incremental amount since the last call. -}
type MeterUpdate = (BytesProcessed -> IO ())
nullMeterUpdate :: MeterUpdate
nullMeterUpdate _ = return ()
{- Total number of bytes processed so far. -}
newtype BytesProcessed = BytesProcessed Integer
deriving (Eq, Ord, Show)
class AsBytesProcessed a where
toBytesProcessed :: a -> BytesProcessed
fromBytesProcessed :: BytesProcessed -> a
instance AsBytesProcessed BytesProcessed where
toBytesProcessed = id
fromBytesProcessed = id
instance AsBytesProcessed Integer where
toBytesProcessed i = BytesProcessed i
fromBytesProcessed (BytesProcessed i) = i
instance AsBytesProcessed Int where
toBytesProcessed i = BytesProcessed $ toInteger i
fromBytesProcessed (BytesProcessed i) = fromInteger i
instance AsBytesProcessed Int64 where
toBytesProcessed i = BytesProcessed $ toInteger i
fromBytesProcessed (BytesProcessed i) = fromInteger i
instance AsBytesProcessed FileOffset where
toBytesProcessed sz = BytesProcessed $ toInteger sz
fromBytesProcessed (BytesProcessed sz) = fromInteger sz
addBytesProcessed :: AsBytesProcessed v => BytesProcessed -> v -> BytesProcessed
addBytesProcessed (BytesProcessed i) v =
let (BytesProcessed n) = toBytesProcessed v
in BytesProcessed $! i + n
zeroBytesProcessed :: BytesProcessed
zeroBytesProcessed = BytesProcessed 0
{- Sends the content of a file to an action, updating the meter as it's
- consumed. -}
withMeteredFile :: FilePath -> MeterUpdate -> (L.ByteString -> IO a) -> IO a
withMeteredFile f meterupdate a = withBinaryFile f ReadMode $ \h ->
hGetContentsMetered h meterupdate >>= a
{- Sends the content of a file to a Handle, updating the meter as it's
- written. -}
streamMeteredFile :: FilePath -> MeterUpdate -> Handle -> IO ()
streamMeteredFile f meterupdate h = withMeteredFile f meterupdate $ L.hPut h
{- Writes a ByteString to a Handle, updating a meter as it's written. -}
meteredWrite :: MeterUpdate -> Handle -> L.ByteString -> IO ()
meteredWrite meterupdate h = go zeroBytesProcessed . L.toChunks
where
go _ [] = return ()
go sofar (c:cs) = do
S.hPut h c
let sofar' = addBytesProcessed sofar $ S.length c
meterupdate sofar'
go sofar' cs
meteredWriteFile :: MeterUpdate -> FilePath -> L.ByteString -> IO ()
meteredWriteFile meterupdate f b = withBinaryFile f WriteMode $ \h ->
meteredWrite meterupdate h b
{- Applies an offset to a MeterUpdate. This can be useful when
- performing a sequence of actions, such as multiple meteredWriteFiles,
- that all update a common meter progressively. Or when resuming.
-}
offsetMeterUpdate :: MeterUpdate -> BytesProcessed -> MeterUpdate
offsetMeterUpdate base offset = \n -> base (offset `addBytesProcessed` n)
{- This is like L.hGetContents, but after each chunk is read, a meter
- is updated based on the size of the chunk.
-
- All the usual caveats about using unsafeInterleaveIO apply to the
- meter updates, so use caution.
-}
hGetContentsMetered :: Handle -> MeterUpdate -> IO L.ByteString
hGetContentsMetered h = hGetUntilMetered h (const True)
{- Reads from the Handle, updating the meter after each chunk.
-
- Note that the meter update is run in unsafeInterleaveIO, which means that
- it can be run at any time. It's even possible for updates to run out
- of order, as different parts of the ByteString are consumed.
-
- Stops at EOF, or when keepgoing evaluates to False.
- Closes the Handle at EOF, but otherwise leaves it open.
-}
hGetUntilMetered :: Handle -> (Integer -> Bool) -> MeterUpdate -> IO L.ByteString
hGetUntilMetered h keepgoing meterupdate = lazyRead zeroBytesProcessed
where
lazyRead sofar = unsafeInterleaveIO $ loop sofar
loop sofar = do
c <- S.hGet h defaultChunkSize
if S.null c
then do
hClose h
return $ L.empty
else do
let sofar' = addBytesProcessed sofar (S.length c)
meterupdate sofar'
if keepgoing (fromBytesProcessed sofar')
then do
{- unsafeInterleaveIO causes this to be
- deferred until the data is read from the
- ByteString. -}
cs <- lazyRead sofar'
return $ L.append (L.fromChunks [c]) cs
else return $ L.fromChunks [c]
{- Same default chunk size Lazy ByteStrings use. -}
defaultChunkSize :: Int
defaultChunkSize = 32 * k - chunkOverhead
where
k = 1024
chunkOverhead = 2 * sizeOf (undefined :: Int) -- GHC specific
{- Parses the String looking for a command's progress output, and returns
- Maybe the number of bytes rsynced so far, and any any remainder of the
- string that could be an incomplete progress output. That remainder
- should be prepended to future output, and fed back in. This interface
- allows the command's output to be read in any desired size chunk, or
- even one character at a time.
-}
type ProgressParser = String -> (Maybe BytesProcessed, String)
{- Runs a command and runs a ProgressParser on its output, in order
- to update the meter. The command's output is also sent to stdout. -}
commandMeter :: ProgressParser -> MeterUpdate -> FilePath -> [CommandParam] -> IO Bool
commandMeter progressparser meterupdate cmd params = liftIO $ catchBoolIO $
withHandle StdoutHandle createProcessSuccess p $
feedprogress zeroBytesProcessed []
where
p = proc cmd (toCommand params)
feedprogress prev buf h = do
b <- S.hGetSome h 80
if S.null b
then return True
else do
S.hPut stdout b
hFlush stdout
let s = w82s (S.unpack b)
let (mbytes, buf') = progressparser (buf++s)
case mbytes of
Nothing -> feedprogress prev buf' h
(Just bytes) -> do
when (bytes /= prev) $
meterupdate bytes
feedprogress bytes buf' h