add tailVerify
Not yet used, but this will let all remotes verify incrementally if it's acceptable to pay the performance price. See comment for details of when it will perform badly. I anticipate using this for all special remotes that use fileRetriever. Except perhaps for a few like GitLFS that could feed the incremental verifier themselves despite using that. Sponsored-by: Dartmouth College's DANDI project
This commit is contained in:
parent
2e54564061
commit
b6efba8139
1 changed files with 134 additions and 0 deletions
134
Annex/Verify.hs
134
Annex/Verify.hs
|
@ -5,6 +5,8 @@
|
|||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
||||
{-# LANGUAGE CPP #-}
|
||||
|
||||
module Annex.Verify (
|
||||
VerifyConfig(..),
|
||||
shouldVerify,
|
||||
|
@ -16,6 +18,7 @@ module Annex.Verify (
|
|||
isVerifiable,
|
||||
startVerifyKeyContentIncrementally,
|
||||
IncrementalVerifier(..),
|
||||
tailVerify,
|
||||
) where
|
||||
|
||||
import Annex.Common
|
||||
|
@ -29,6 +32,12 @@ import Annex.WorkerPool
|
|||
import Types.WorkerPool
|
||||
import Types.Key
|
||||
|
||||
#if WITH_INOTIFY
|
||||
import qualified System.INotify as INotify
|
||||
import Control.Concurrent.STM
|
||||
import qualified Data.ByteString as S
|
||||
#endif
|
||||
|
||||
data VerifyConfig = AlwaysVerify | NoVerify | RemoteVerify Remote | DefaultVerify
|
||||
|
||||
shouldVerify :: VerifyConfig -> Annex Bool
|
||||
|
@ -112,3 +121,128 @@ startVerifyKeyContentIncrementally verifyconfig k =
|
|||
Nothing -> return Nothing
|
||||
, return Nothing
|
||||
)
|
||||
|
||||
-- | Reads the file as it grows, and feeds it to the incremental verifier.
|
||||
--
|
||||
-- The TMVar must start out empty, and be filled once whatever is
|
||||
-- writing to the file finishes.
|
||||
--
|
||||
-- The file does not need to exist yet when this is called. It will wait
|
||||
-- for the file to appear before opening it and starting verification.
|
||||
--
|
||||
-- Note that there are situations where the file may fail to verify despite
|
||||
-- having the correct content. For example, when the file is written out
|
||||
-- of order, or gets replaced part way through. To deal with such cases,
|
||||
-- when False is returned, it should not be treated as if the file's
|
||||
-- content is known to be incorrect, but instead as an indication that the
|
||||
-- file should be verified again, once it's done being written to.
|
||||
--
|
||||
-- Also, this is not supported for all OSs, and will return False on OS's
|
||||
-- where it is not supported.
|
||||
--
|
||||
-- This should be fairly efficient, reading from the disk cache,
|
||||
-- as long as the writer does not get very far ahead of it. However,
|
||||
-- there are situations where it would be much less expensive to verify
|
||||
-- chunks as they are being written. For example, when resuming with
|
||||
-- a lot of content in the file, all that content needs to be read,
|
||||
-- and if the disk is slow, the reader may never catch up to the writer,
|
||||
-- and the disk cache may never speed up reads. So this should only be
|
||||
-- used when there's not a better way to incrementally verify.
|
||||
tailVerify :: IncrementalVerifier -> FilePath -> TMVar () -> IO Bool
|
||||
#if WITH_INOTIFY
|
||||
tailVerify iv f finished =
|
||||
catchDefaultIO False $ INotify.withINotify $ \i ->
|
||||
bracket (waitforfiletoexist i) (maybe noop hClose) (go i)
|
||||
where
|
||||
rf = toRawFilePath f
|
||||
d = toRawFilePath (takeDirectory f)
|
||||
|
||||
waitforfiletoexist i = tryIO (openBinaryFile f ReadMode) >>= \case
|
||||
Right h -> return (Just h)
|
||||
Left _ -> do
|
||||
hv <- newEmptyTMVarIO
|
||||
wd <- inotifycreate i $
|
||||
tryIO (openBinaryFile f ReadMode) >>= \case
|
||||
Right h ->
|
||||
unlessM (atomically $ tryPutTMVar hv h) $
|
||||
hClose h
|
||||
Left _ -> return ()
|
||||
-- Wait for the file to appear, or for a signal
|
||||
-- that the file is finished being written.
|
||||
--
|
||||
-- The TMVar is left full to prevent the file
|
||||
-- being opened again if the inotify event
|
||||
-- fires more than once.
|
||||
v <- atomically $
|
||||
(Just <$> readTMVar hv)
|
||||
`orElse`
|
||||
((const Nothing) <$> readTMVar finished)
|
||||
INotify.removeWatch wd
|
||||
return v
|
||||
|
||||
inotifycreate i cont = INotify.addWatch i evs d $ \case
|
||||
-- Ignore changes to other files in the directory.
|
||||
INotify.Created { INotify.filePath = fn }
|
||||
| fn /= rf -> noop
|
||||
INotify.MovedIn { INotify.filePath = fn }
|
||||
| fn /= rf -> noop
|
||||
INotify.Opened { INotify.maybeFilePath = fn }
|
||||
| fn /= Just rf -> noop
|
||||
INotify.Modified { INotify.maybeFilePath = fn }
|
||||
| fn /= Just rf -> noop
|
||||
_ -> cont
|
||||
where
|
||||
evs =
|
||||
[ INotify.Create
|
||||
, INotify.MoveIn
|
||||
, INotify.Move
|
||||
, INotify.Open
|
||||
, INotify.Modify
|
||||
]
|
||||
|
||||
go i (Just h) = do
|
||||
modified <- newEmptyTMVarIO
|
||||
wd <- INotify.addWatch i [INotify.Modify] rf $ \_event ->
|
||||
atomically $ void $ tryPutTMVar modified ()
|
||||
r <- follow h modified
|
||||
INotify.removeWatch wd
|
||||
return r
|
||||
-- File never showed up, but we've been told it's done being
|
||||
-- written to.
|
||||
go _ Nothing = return False
|
||||
|
||||
follow h modified = do
|
||||
b <- S.hGetNonBlocking h chunk
|
||||
if S.null b
|
||||
then do
|
||||
-- We've caught up to the writer.
|
||||
-- Wait for the file to get modified again,
|
||||
-- or until we're told it is done being
|
||||
-- written.
|
||||
cont <- atomically $
|
||||
((const (follow h modified))
|
||||
<$> takeTMVar modified)
|
||||
`orElse`
|
||||
((const (finish h))
|
||||
<$> readTMVar finished)
|
||||
cont
|
||||
else do
|
||||
updateIncremental iv b
|
||||
follow h modified
|
||||
|
||||
-- We've been told the file is done being written to, but we
|
||||
-- may not have reached the end of it yet. Read until EOF.
|
||||
finish h = do
|
||||
b <- S.hGet h chunk
|
||||
if S.null b
|
||||
then finalizeIncremental iv
|
||||
else do
|
||||
updateIncremental iv b
|
||||
finish h
|
||||
|
||||
|
||||
chunk = 65536
|
||||
#else
|
||||
tailVerify _ _ _ = return False -- not supported
|
||||
#endif
|
||||
|
||||
|
|
Loading…
Reference in a new issue