better handling of finishing up incomplete incremental verify

Now it's run in VerifyStage.

I thought about keeping the file handle open, and resuming reading where
tailVerify left off. But that risks leaking open file handles, until the
GC closes them, if the deferred verification does not get resumed. Since
that could perhaps happen if there's an exception somewhere, I decided
that was too unsafe.

Instead, re-open the file, seek, and resume.

Sponsored-by: Dartmouth College's DANDI project
This commit is contained in:
Joey Hess 2021-08-16 14:50:21 -04:00
parent e0b7f391bd
commit c4aba8e032
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
7 changed files with 114 additions and 88 deletions

View file

@ -37,7 +37,6 @@ import Control.Concurrent.STM
import qualified System.INotify as INotify import qualified System.INotify as INotify
import qualified Data.ByteString as S import qualified Data.ByteString as S
import qualified System.FilePath.ByteString as P import qualified System.FilePath.ByteString as P
import Data.Time.Clock.POSIX
#endif #endif
data VerifyConfig = AlwaysVerify | NoVerify | RemoteVerify Remote | DefaultVerify data VerifyConfig = AlwaysVerify | NoVerify | RemoteVerify Remote | DefaultVerify
@ -82,23 +81,69 @@ verifyKeyContentPostRetrieval rsp v verification k f = case (rsp, verification)
, return True , return True
) )
(_, MustVerify) -> verify (_, MustVerify) -> verify
(_, IncompleteVerify _) -> ifM (shouldVerify v)
( verify
, return True
)
where where
verify = enteringStage VerifyStage $ verifyKeyContent k f verify = enteringStage VerifyStage $
case verification of
IncompleteVerify iv -> resumeVerifyKeyContent k f iv
_ -> verifyKeyContent k f
verifyKeyContent :: Key -> RawFilePath -> Annex Bool verifyKeyContent :: Key -> RawFilePath -> Annex Bool
verifyKeyContent k f = verifysize <&&> verifycontent verifyKeyContent k f = verifyKeySize k f <&&> verifyKeyContent' k f
where
verifysize = case fromKey keySize k of verifyKeyContent' :: Key -> RawFilePath -> Annex Bool
Nothing -> return True verifyKeyContent' k f =
Just size -> do Backend.maybeLookupBackendVariety (fromKey keyVariety k) >>= \case
size' <- liftIO $ catchDefaultIO 0 $ getFileSize f
return (size' == size)
verifycontent = Backend.maybeLookupBackendVariety (fromKey keyVariety k) >>= \case
Nothing -> return True Nothing -> return True
Just b -> case Types.Backend.verifyKeyContent b of Just b -> case Types.Backend.verifyKeyContent b of
Nothing -> return True Nothing -> return True
Just verifier -> verifier k f Just verifier -> verifier k f
resumeVerifyKeyContent :: Key -> RawFilePath -> IncrementalVerifier -> Annex Bool
resumeVerifyKeyContent k f iv = liftIO (positionIncremental iv) >>= \case
Nothing -> fallback
Just endpos -> do
fsz <- liftIO $ catchDefaultIO 0 $ getFileSize f
if fsz < endpos
then fallback
else case fromKey keySize k of
Just size | fsz /= size -> return False
_ -> go fsz endpos
where
fallback = verifyKeyContent k f
go fsz endpos
| fsz == endpos =
liftIO $ catchDefaultIO False $
finalizeIncremental iv
| otherwise = do
showAction (descVerify iv)
liftIO $ catchDefaultIO False $
withBinaryFile (fromRawFilePath f) ReadMode $ \h -> do
hSeek h AbsoluteSeek endpos
feedincremental h
finalizeIncremental iv
feedincremental h = do
b <- S.hGetSome h chunk
if S.null b
then return ()
else do
updateIncremental iv b
feedincremental h
chunk = 65536
verifyKeySize :: Key -> RawFilePath -> Annex Bool
verifyKeySize k f = case fromKey keySize k of
Just size -> do
size' <- liftIO $ catchDefaultIO 0 $ getFileSize f
return (size' == size)
Nothing -> return True
warnUnverifiableInsecure :: Key -> Annex () warnUnverifiableInsecure :: Key -> Annex ()
warnUnverifiableInsecure k = warning $ unwords warnUnverifiableInsecure k = warning $ unwords
[ "Getting " ++ kv ++ " keys with this remote is not secure;" [ "Getting " ++ kv ++ " keys with this remote is not secure;"
@ -127,7 +172,9 @@ startVerifyKeyContentIncrementally verifyconfig k =
-- | Reads the file as it grows, and feeds it to the incremental verifier. -- | Reads the file as it grows, and feeds it to the incremental verifier.
-- --
-- The TMVar must start out empty, and be filled once whatever is -- The TMVar must start out empty, and be filled once whatever is
-- writing to the file finishes. -- writing to the file finishes. Once the writer finishes, this returns
-- quickly. It may not feed the entire content of the file to the
-- incremental verifier.
-- --
-- The file does not need to exist yet when this is called. It will wait -- The file does not need to exist yet when this is called. It will wait
-- for the file to appear before opening it and starting verification. -- for the file to appear before opening it and starting verification.
@ -155,22 +202,12 @@ startVerifyKeyContentIncrementally verifyconfig k =
-- and if the disk is slow, the reader may never catch up to the writer, -- and if the disk is slow, the reader may never catch up to the writer,
-- and the disk cache may never speed up reads. So this should only be -- and the disk cache may never speed up reads. So this should only be
-- used when there's not a better way to incrementally verify. -- used when there's not a better way to incrementally verify.
-- tailVerify :: IncrementalVerifier -> RawFilePath -> TMVar () -> IO ()
-- If the writer gets far ahead, this can still need to do a significant
-- amount off work once the writer is finished. That could lead to a long
-- pause with no indication to the user about what is being done. To deal
-- with this problem, it will do at most half a second of work after the
-- writer has finished. If there is more work still to do, it returns an IO
-- action that will complete the work. This way, the caller can display
-- something appropriate while that is running.
tailVerify :: IncrementalVerifier -> RawFilePath -> TMVar () -> IO (Maybe (IO ()))
#if WITH_INOTIFY #if WITH_INOTIFY
tailVerify iv f finished = tailVerify iv f finished =
tryNonAsync go >>= \case tryNonAsync go >>= \case
Right r -> return r Right r -> return r
Left _ -> do Left _ -> failIncremental iv
failIncremental iv
return Nothing
where where
-- Watch the directory containing the file, and wait for -- Watch the directory containing the file, and wait for
-- the file to be modified. It's possible that the file already -- the file to be modified. It's possible that the file already
@ -196,19 +233,15 @@ tailVerify iv f finished =
let stop w = do let stop w = do
cleanup w cleanup w
failIncremental iv failIncremental iv
return Nothing
waitopen modified >>= \case waitopen modified >>= \case
Nothing -> stop wd Nothing -> stop wd
Just h -> do Just h -> do
cleanup wd cleanup wd
wf <- inotifyfilechange i signalmodified wf <- inotifyfilechange i signalmodified
tryNonAsync (follow h modified) >>= \case tryNonAsync (follow h modified) >>= \case
Left _ -> do Left _ -> stop wf
Right () -> cleanup wf
hClose h hClose h
stop wf
Right r -> do
cleanup wf
return r
waitopen modified = do waitopen modified = do
v <- atomically $ v <- atomically $
@ -233,47 +266,19 @@ tailVerify iv f finished =
-- or until we're told it is done being -- or until we're told it is done being
-- written. -- written.
cont <- atomically $ cont <- atomically $
((const (follow h modified)) (const (follow h modified)
<$> takeTMVar modified) <$> takeTMVar modified)
`orElse` `orElse`
((const (finish h =<< getPOSIXTime)) (const (return ())
<$> takeTMVar finished) <$> takeTMVar finished)
cont cont
else do else do
updateIncremental iv b updateIncremental iv b
atomically (tryTakeTMVar finished) >>= \case atomically (tryTakeTMVar finished) >>= \case
Nothing -> follow h modified Nothing -> follow h modified
Just () -> finish h =<< getPOSIXTime Just () -> return ()
-- We've been told the file is done being written to, but we
-- may not have reached the end of it yet.
finish h starttime = do
b <- S.hGet h chunk
if S.null b
then do
hClose h
return Nothing
else do
updateIncremental iv b
now <- getPOSIXTime
if now - starttime > 0.5
then return $ Just $
tryNonAsync (deferredfinish h) >>= \case
Right () -> noop
Left _ -> failIncremental iv
else finish h starttime
deferredfinish h = do
b <- S.hGet h chunk
if S.null b
then hClose h
else do
updateIncremental iv b
deferredfinish h
chunk = 65536 chunk = 65536
#else #else
tailVerify iv _ _ = do tailVerify iv _ _ = failIncremental iv
failIncremental iv
return Nothing
#endif #endif

View file

@ -6,6 +6,7 @@
-} -}
{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE BangPatterns #-}
module Backend.Hash ( module Backend.Hash (
backends, backends,
@ -280,19 +281,25 @@ md5Hasher = mkHasher md5 md5_context
mkIncrementalVerifier :: HashAlgorithm h => Context h -> Key -> IO IncrementalVerifier mkIncrementalVerifier :: HashAlgorithm h => Context h -> Key -> IO IncrementalVerifier
mkIncrementalVerifier ctx key = do mkIncrementalVerifier ctx key = do
v <- newIORef (Just ctx) v <- newIORef (Just (ctx, 0))
return $ IncrementalVerifier return $ IncrementalVerifier
{ updateIncremental = \b -> { updateIncremental = \b ->
modifyIORef' v $ \case modifyIORef' v $ \case
Just ctx' -> Just (hashUpdate ctx' b) (Just (ctx', n)) ->
let !ctx'' = hashUpdate ctx' b
!n' = n + fromIntegral (S.length b)
in (Just (ctx'', n'))
Nothing -> Nothing Nothing -> Nothing
, finalizeIncremental = , finalizeIncremental =
readIORef v >>= \case readIORef v >>= \case
Just ctx' -> do (Just (ctx', _)) -> do
let digest = hashFinalize ctx' let digest = hashFinalize ctx'
return $ sameCheckSum key (show digest) return $ sameCheckSum key (show digest)
Nothing -> return False Nothing -> return False
, failIncremental = writeIORef v Nothing , failIncremental = writeIORef v Nothing
, positionIncremental = readIORef v >>= \case
Just (_, n) -> return (Just n)
Nothing -> return Nothing
, descVerify = descChecksum , descVerify = descChecksum
} }

View file

@ -279,7 +279,8 @@ retrieveChunks retriever u vc chunkconfig encryptor basek dest basep enc encc
currsize <- liftIO $ catchMaybeIO $ getFileSize (toRawFilePath dest) currsize <- liftIO $ catchMaybeIO $ getFileSize (toRawFilePath dest)
let ls' = maybe ls (setupResume ls) currsize let ls' = maybe ls (setupResume ls) currsize
if any null ls' if any null ls'
then finalize Nothing -- dest is already complete -- dest is already complete
then finalize (Right Nothing)
else finalize =<< firstavail pe currsize ls' else finalize =<< firstavail pe currsize ls'
firstavail Nothing _ [] = giveup "unable to determine the chunks to use for this remote" firstavail Nothing _ [] = giveup "unable to determine the chunks to use for this remote"
@ -306,7 +307,7 @@ retrieveChunks retriever u vc chunkconfig encryptor basek dest basep enc encc
| otherwise -> firstavail (Just e) currsize ls | otherwise -> firstavail (Just e) currsize ls
Right r -> return r Right r -> return r
getrest _ _ iv _ _ [] = return iv getrest _ _ iv _ _ [] = return (Right iv)
getrest p h iv sz bytesprocessed (k:ks) = do getrest p h iv sz bytesprocessed (k:ks) = do
let p' = offsetMeterUpdate p bytesprocessed let p' = offsetMeterUpdate p bytesprocessed
liftIO $ p' zeroBytesProcessed liftIO $ p' zeroBytesProcessed
@ -317,14 +318,21 @@ retrieveChunks retriever u vc chunkconfig encryptor basek dest basep enc encc
getunchunked = do getunchunked = do
iv <- startVerifyKeyContentIncrementally vc basek iv <- startVerifyKeyContentIncrementally vc basek
case enc of case enc of
Just _ -> retriever (encryptor basek) basep Nothing $ Just _ -> do
retriever (encryptor basek) basep Nothing $
retrieved iv Nothing basep retrieved iv Nothing basep
return (Right iv)
-- Not chunked and not encrypted, so ask the -- Not chunked and not encrypted, so ask the
-- retriever to incrementally verify when it -- retriever to incrementally verify when it
-- retrieves to a file. -- retrieves to a file. It may not finish
Nothing -> retriever (encryptor basek) basep iv $ -- passing the whole file content to the
-- incremental verifier though.
Nothing -> do
retriever (encryptor basek) basep iv $
retrieved iv Nothing basep retrieved iv Nothing basep
return iv return $ case iv of
Nothing -> Right iv
Just iv' -> Left (IncompleteVerify iv')
opennew = do opennew = do
iv <- startVerifyKeyContentIncrementally vc basek iv <- startVerifyKeyContentIncrementally vc basek
@ -357,12 +365,13 @@ retrieveChunks retriever u vc chunkconfig encryptor basek dest basep enc encc
| isByteContent content = Just p | isByteContent content = Just p
| otherwise = Nothing | otherwise = Nothing
finalize Nothing = return UnVerified finalize (Right Nothing) = return UnVerified
finalize (Just iv) = finalize (Right (Just iv)) =
ifM (liftIO $ finalizeIncremental iv) ifM (liftIO $ finalizeIncremental iv)
( return Verified ( return Verified
, return UnVerified , return UnVerified
) )
finalize (Left v) = return v
{- Writes retrieved file content to the provided Handle, decrypting it {- Writes retrieved file content to the provided Handle, decrypting it
- first if necessary. - first if necessary.

View file

@ -113,7 +113,8 @@ byteRetriever a k _m _miv callback = a k (callback . ByteContent)
-- A Retriever that writes the content of a Key to a provided file. -- A Retriever that writes the content of a Key to a provided file.
-- The action is responsible for updating the progress meter as it -- The action is responsible for updating the progress meter as it
-- retrieves data. The incremental verifier is updated in the background as -- retrieves data. The incremental verifier is updated in the background as
-- the action writes to the file. -- the action writes to the file, but may not be updated with the entire
-- content of the file.
fileRetriever :: (FilePath -> Key -> MeterUpdate -> Annex ()) -> Retriever fileRetriever :: (FilePath -> Key -> MeterUpdate -> Annex ()) -> Retriever
fileRetriever a k m miv callback = do fileRetriever a k m miv callback = do
f <- prepTmp k f <- prepTmp k
@ -123,13 +124,10 @@ fileRetriever a k m miv callback = do
Just iv -> do Just iv -> do
finished <- liftIO newEmptyTMVarIO finished <- liftIO newEmptyTMVarIO
t <- liftIO $ async $ tailVerify iv f finished t <- liftIO $ async $ tailVerify iv f finished
retrieve let finishtail = do
liftIO $ atomically $ putTMVar finished () liftIO $ atomically $ putTMVar finished ()
liftIO (wait t) >>= \case liftIO (wait t)
Nothing -> noop retrieve `finally` finishtail
Just deferredverify -> do
showAction (descVerify iv)
liftIO deferredverify
pruneTmpWorkDirBefore f (callback . FileContent . fromRawFilePath) pruneTmpWorkDirBefore f (callback . FileContent . fromRawFilePath)
{- The base Remote that is provided to specialRemote needs to have {- The base Remote that is provided to specialRemote needs to have

View file

@ -52,6 +52,10 @@ data IncrementalVerifier = IncrementalVerifier
-- if the hash verified. -- if the hash verified.
, failIncremental :: IO () , failIncremental :: IO ()
-- ^ Call if the incremental verification needs to fail. -- ^ Call if the incremental verification needs to fail.
, positionIncremental :: IO (Maybe Integer)
-- ^ Returns the number of bytes that have been fed to this
-- incremental verifier so far. (Nothing if failIncremental was
-- called.)
, descVerify :: String , descVerify :: String
-- ^ A description of what is done to verify the content. -- ^ A description of what is done to verify the content.
} }

View file

@ -2,7 +2,7 @@
- -
- Most things should not need this, using Types instead - Most things should not need this, using Types instead
- -
- Copyright 2011-2020 Joey Hess <id@joeyh.name> - Copyright 2011-2021 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -41,6 +41,7 @@ import Types.NumCopies
import Types.Export import Types.Export
import Types.Import import Types.Import
import Types.RemoteConfig import Types.RemoteConfig
import Types.Backend (IncrementalVerifier)
import Config.Cost import Config.Cost
import Utility.Metered import Utility.Metered
import Git.Types (RemoteName) import Git.Types (RemoteName)
@ -203,7 +204,9 @@ data Verification
| MustVerify | MustVerify
-- ^ Content likely to have been altered during transfer, -- ^ Content likely to have been altered during transfer,
-- verify even if verification is normally disabled -- verify even if verification is normally disabled
deriving (Show) | IncompleteVerify IncrementalVerifier
-- ^ Content was partially verified during transfer, but
-- the verification is not complete.
unVerified :: Monad m => m a -> m (a, Verification) unVerified :: Monad m => m a -> m (a, Verification)
unVerified a = do unVerified a = do

View file

@ -35,8 +35,8 @@ type Storer = Key -> ContentSource -> MeterUpdate -> Annex ()
-- --
-- When it retrieves FileContent, it is responsible for updating the -- When it retrieves FileContent, it is responsible for updating the
-- MeterUpdate. And when the IncrementalVerifier is passed to it, -- MeterUpdate. And when the IncrementalVerifier is passed to it,
-- and it retrieves FileContent, it should feed the content to the -- and it retrieves FileContent, it can feed some or all of the file's
-- verifier before running the callback. -- content to the verifier before running the callback.
-- This should not be done when it retrieves ByteContent. -- This should not be done when it retrieves ByteContent.
type Retriever = forall a. type Retriever = forall a.
Key -> MeterUpdate -> Maybe IncrementalVerifier Key -> MeterUpdate -> Maybe IncrementalVerifier