incremental verification for retrieval from all export remotes

Only for export remotes so far, not export/import.

Sponsored-by: Dartmouth College's Datalad project
This commit is contained in:
Joey Hess 2022-05-09 13:18:47 -04:00
parent 90950a37e5
commit 2f2701137d
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
11 changed files with 73 additions and 54 deletions

View file

@ -1,6 +1,6 @@
{- verification
-
- Copyright 2010-2021 Joey Hess <id@joeyh.name>
- Copyright 2010-2022 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -17,6 +17,7 @@ module Annex.Verify (
isVerifiable,
startVerifyKeyContentIncrementally,
finishVerifyKeyContentIncrementally,
verifyKeyContentIncrementally,
IncrementalVerifier(..),
tailVerify,
) where
@ -34,6 +35,7 @@ import Types.WorkerPool
import Types.Key
import Control.Concurrent.STM
import Control.Concurrent.Async
import qualified Data.ByteString as S
#if WITH_INOTIFY
import qualified System.INotify as INotify
@ -186,12 +188,17 @@ finishVerifyKeyContentIncrementally (Just iv) =
-- Incremental verification was not able to be done.
Nothing -> return (True, UnVerified)
-- | Reads the file as it grows, and feeds it to the incremental verifier.
verifyKeyContentIncrementally :: VerifyConfig -> Key -> (Maybe IncrementalVerifier -> Annex ()) -> Annex Verification
verifyKeyContentIncrementally verifyconfig k a = do
miv <- startVerifyKeyContentIncrementally verifyconfig k
a miv
snd <$> finishVerifyKeyContentIncrementally miv
-- | Runs a writer action that retrieves to a file. In another thread,
-- reads the file as it grows, and feeds it to the incremental verifier.
--
-- The TMVar must start out empty, and be filled once whatever is
-- writing to the file finishes. Once the writer finishes, this returns
-- quickly. It may not feed the entire content of the file to the
-- incremental verifier.
-- Once the writer finishes, this returns quickly. It may not feed
-- the entire content of the file to the incremental verifier.
--
-- The file does not need to exist yet when this is called. It will wait
-- for the file to appear before opening it and starting verification.
@ -223,9 +230,19 @@ finishVerifyKeyContentIncrementally (Just iv) =
-- and if the disk is slow, the reader may never catch up to the writer,
-- and the disk cache may never speed up reads. So this should only be
-- used when there's not a better way to incrementally verify.
tailVerify :: IncrementalVerifier -> RawFilePath -> TMVar () -> IO ()
tailVerify :: Maybe IncrementalVerifier -> RawFilePath -> Annex a -> Annex a
tailVerify (Just iv) f writer = do
finished <- liftIO newEmptyTMVarIO
t <- liftIO $ async $ tailVerify' iv f finished
let finishtail = do
liftIO $ atomically $ putTMVar finished ()
liftIO (wait t)
writer `finally` finishtail
tailVerify Nothing _ writer = writer
tailVerify' :: IncrementalVerifier -> RawFilePath -> TMVar () -> IO ()
#if WITH_INOTIFY
tailVerify iv f finished =
tailVerify' iv f finished =
tryNonAsync go >>= \case
Right r -> return r
Left _ -> unableIncrementalVerifier iv
@ -312,5 +329,5 @@ tailVerify iv f finished =
chunk = 65536
#else
tailVerify iv _ _ = unableIncrementalVerifier iv
tailVerify' iv _ _ = unableIncrementalVerifier iv
#endif

View file

@ -1,6 +1,6 @@
{- Remote on Android device accessed using adb.
-
- Copyright 2018-2020 Joey Hess <id@joeyh.name>
- Copyright 2018-2022 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -22,6 +22,7 @@ import Annex.UUID
import Utility.Metered
import Types.ProposedAccepted
import Annex.SpecialRemote.Config
import Annex.Verify
import qualified Data.Map as M
import qualified System.FilePath.Posix as Posix
@ -256,9 +257,10 @@ storeExportM serial adir src _k loc _p =
dest = androidExportLocation adir loc
retrieveExportM :: AndroidSerial -> AndroidPath -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex Verification
retrieveExportM serial adir _k loc dest _p = do
retrieveExportM serial adir k loc dest _p =
verifyKeyContentIncrementally AlwaysVerify k $ \iv ->
tailVerify iv (toRawFilePath dest) $
retrieve' serial src dest
return UnVerified
where
src = androidExportLocation adir loc

View file

@ -36,6 +36,7 @@ import Annex.CopyFile
import Annex.Content
import Annex.Perms
import Annex.UUID
import Annex.Verify
import Backend
import Types.KeySource
import Types.ProposedAccepted
@ -317,9 +318,9 @@ storeExportM d cow src _k loc p = do
go tmp () = void $ fileCopier cow src tmp p Nothing
retrieveExportM :: RawFilePath -> CopyCoWTried -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex Verification
retrieveExportM d cow _k loc dest p = do
void $ fileCopier cow src dest p Nothing
return UnVerified
retrieveExportM d cow k loc dest p =
verifyKeyContentIncrementally AlwaysVerify k $ \iv ->
void $ fileCopier cow src dest p iv
where
src = fromRawFilePath $ exportPath d loc

View file

@ -1,6 +1,6 @@
{- External special remote interface.
-
- Copyright 2013-2020 Joey Hess <id@joeyh.name>
- Copyright 2013-2022 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -37,6 +37,7 @@ import Config.Cost
import Annex.Content
import Annex.Url
import Annex.UUID
import Annex.Verify
import Creds
import Control.Concurrent.STM
@ -292,9 +293,10 @@ storeExportM external f k loc p = either giveup return =<< go
req sk = TRANSFEREXPORT Upload sk f
retrieveExportM :: External -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex Verification
retrieveExportM external k loc d p = do
retrieveExportM external k loc dest p = do
verifyKeyContentIncrementally AlwaysVerify k $ \iv ->
tailVerify iv (toRawFilePath dest) $
either giveup return =<< go
return UnVerified
where
go = handleRequestExport external loc req k (Just p) $ \resp -> case resp of
TRANSFER_SUCCESS Download k'
@ -304,7 +306,7 @@ retrieveExportM external k loc d p = do
UNSUPPORTED_REQUEST ->
result $ Left "TRANSFEREXPORT not implemented by external special remote"
_ -> Nothing
req sk = TRANSFEREXPORT Download sk d
req sk = TRANSFEREXPORT Download sk dest
checkPresentExportM :: External -> Key -> ExportLocation -> Annex Bool
checkPresentExportM external k loc = either giveup id <$> go

View file

@ -491,14 +491,12 @@ copyFromRemote r st key file dest meterupdate vc = do
copyFromRemote'' :: Git.Repo -> Remote -> State -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> VerifyConfig -> Annex Verification
copyFromRemote'' repo r st@(State connpool _ _ _ _) key file dest meterupdate vc
| Git.repoIsHttp repo = do
iv <- startVerifyKeyContentIncrementally vc key
| Git.repoIsHttp repo = verifyKeyContentIncrementally vc key $ \iv -> do
gc <- Annex.getGitConfig
ok <- Url.withUrlOptionsPromptingCreds $
Annex.Content.downloadUrl False key meterupdate iv (keyUrls gc repo r key) dest
unless ok $
giveup "failed to download content"
snd <$> finishVerifyKeyContentIncrementally iv
| not $ Git.repoIsUrl repo = guardUsable repo (giveup "cannot access remote") $ do
u <- getUUID
hardlink <- wantHardLink

View file

@ -56,8 +56,6 @@ import Git.Types
import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L
import qualified Data.Map as M
import Control.Concurrent.STM
import Control.Concurrent.Async
{- Special remotes don't have a configured url, so Git.Repo does not
- automatically generate remotes for them. This looks for a different
@ -117,17 +115,9 @@ byteRetriever a k _m _miv callback = a k (callback . ByteContent)
-- the action writes to the file, but may not be updated with the entire
-- content of the file.
fileRetriever :: (RawFilePath -> Key -> MeterUpdate -> Annex ()) -> Retriever
fileRetriever a = fileRetriever' $ \f k m miv -> do
fileRetriever a = fileRetriever' $ \f k m miv ->
let retrieve = a f k m
case miv of
Nothing -> retrieve
Just iv -> do
finished <- liftIO newEmptyTMVarIO
t <- liftIO $ async $ tailVerify iv f finished
let finishtail = do
liftIO $ atomically $ putTMVar finished ()
liftIO (wait t)
retrieve `finally` finishtail
in tailVerify miv f retrieve
{- A Retriever that writes the content of a Key to a provided file.
- The action is responsible for updating the progress meter and the

View file

@ -116,14 +116,13 @@ httpAlsoSetup _ (Just u) _ c gc = do
downloadKey :: Maybe URLString -> LearnedLayout -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> VerifyConfig -> Annex Verification
downloadKey baseurl ll key _af dest p vc = do
iv <- startVerifyKeyContentIncrementally vc key
verifyKeyContentIncrementally vc key $ \iv ->
downloadAction dest p iv key (keyUrlAction baseurl ll key)
snd <$> finishVerifyKeyContentIncrementally iv
retriveExportHttpAlso :: Maybe URLString -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex Verification
retriveExportHttpAlso baseurl key loc dest p = do
downloadAction dest p Nothing key (exportLocationUrlAction baseurl loc)
return UnVerified
verifyKeyContentIncrementally AlwaysVerify key $ \iv ->
downloadAction dest p iv key (exportLocationUrlAction baseurl loc)
downloadAction :: FilePath -> MeterUpdate -> Maybe IncrementalVerifier -> Key -> ((URLString -> Annex (Either String ())) -> Annex (Either String ())) -> Annex ()
downloadAction dest p iv key run =

View file

@ -46,6 +46,7 @@ import Annex.DirHashes
import Utility.Tmp.Dir
import Utility.SshHost
import Annex.SpecialRemote.Config
import Annex.Verify
import qualified Data.Map as M
@ -317,9 +318,10 @@ storeExportM o src _k loc meterupdate =
populatedest = liftIO . createLinkOrCopy src
retrieveExportM :: RsyncOpts -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex Verification
retrieveExportM o _k loc dest p = do
retrieveExportM o k loc dest p =
verifyKeyContentIncrementally AlwaysVerify k $ \iv ->
tailVerify iv (toRawFilePath dest) $
rsyncRetrieve o [rsyncurl] dest (Just p)
return UnVerified
where
rsyncurl = mkRsyncUrl o (fromRawFilePath (fromExportLocation loc))

View file

@ -60,13 +60,13 @@ import Types.MetaData
import Types.ProposedAccepted
import Types.NumCopies
import Utility.Metered
import Utility.Hash (IncrementalVerifier)
import Utility.DataUnits
import Annex.Content
import qualified Annex.Url as Url
import Utility.Url (extractFromResourceT)
import Annex.Url (getUrlOptions, withUrlOptions, UrlOptions(..))
import Utility.Env
import Annex.Verify
type BucketName = String
type BucketObject = String
@ -496,15 +496,14 @@ storeExportS3' hv r rs info magic f k loc p = withS3Handle hv $ \case
return (metag, mvid)
retrieveExportS3 :: S3HandleVar -> Remote -> S3Info -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex Verification
retrieveExportS3 hv r info _k loc f p = do
retrieveExportS3 hv r info k loc f p = verifyKeyContentIncrementally AlwaysVerify k $ \iv ->
withS3Handle hv $ \case
Just h -> retrieveHelper info h (Left (T.pack exportloc)) f p Nothing
Just h -> retrieveHelper info h (Left (T.pack exportloc)) f p iv
Nothing -> case getPublicUrlMaker info of
Just geturl -> either giveup return =<<
Url.withUrlOptions
(Url.download' p Nothing (geturl exportloc) f)
(Url.download' p iv (geturl exportloc) f)
Nothing -> giveup $ needS3Creds (uuid r)
return UnVerified
where
exportloc = bucketExportLocation info loc

View file

@ -40,7 +40,7 @@ import qualified Remote.Helper.Chunked.Legacy as Legacy
import Creds
import Utility.Metered
import Utility.Url (URLString, matchStatusCodeException, matchHttpExceptionContent)
import Utility.Hash (IncrementalVerifier(..))
import Annex.Verify
import Annex.UUID
import Remote.WebDAV.DavLocation
import Types.ProposedAccepted
@ -219,10 +219,10 @@ storeExportDav hdl f k loc p = case exportLocation loc of
Left err -> giveup err
retrieveExportDav :: DavHandleVar -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex Verification
retrieveExportDav hdl _k loc d p = case exportLocation loc of
Right src -> withDavHandle hdl $ \h -> runExport h $ \_dav -> do
retrieveHelper src d p Nothing
return UnVerified
retrieveExportDav hdl k loc d p = case exportLocation loc of
Right src -> verifyKeyContentIncrementally AlwaysVerify k $ \iv ->
withDavHandle hdl $ \h -> runExport h $ \_dav ->
retrieveHelper src d p iv
Left err -> giveup err
checkPresentExportDav :: DavHandleVar -> Remote -> Key -> ExportLocation -> Annex Bool

View file

@ -0,0 +1,9 @@
[[!comment format=mdwn
username="joey"
subject="""comment 17"""
date="2022-05-09T17:48:31Z"
content="""
Update: incremental hashing is also now done for all export remotes.
Only import (and export+import) remotes don't support incremental hashing
now.
"""]]