incremental verification for retrieval from all export remotes

Only for export remotes so far, not export/import.

Sponsored-by: Dartmouth College's Datalad project
This commit is contained in:
Joey Hess 2022-05-09 13:18:47 -04:00
parent 90950a37e5
commit 2f2701137d
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
11 changed files with 73 additions and 54 deletions

View file

@ -1,6 +1,6 @@
{- verification {- verification
- -
- Copyright 2010-2021 Joey Hess <id@joeyh.name> - Copyright 2010-2022 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -17,6 +17,7 @@ module Annex.Verify (
isVerifiable, isVerifiable,
startVerifyKeyContentIncrementally, startVerifyKeyContentIncrementally,
finishVerifyKeyContentIncrementally, finishVerifyKeyContentIncrementally,
verifyKeyContentIncrementally,
IncrementalVerifier(..), IncrementalVerifier(..),
tailVerify, tailVerify,
) where ) where
@ -34,6 +35,7 @@ import Types.WorkerPool
import Types.Key import Types.Key
import Control.Concurrent.STM import Control.Concurrent.STM
import Control.Concurrent.Async
import qualified Data.ByteString as S import qualified Data.ByteString as S
#if WITH_INOTIFY #if WITH_INOTIFY
import qualified System.INotify as INotify import qualified System.INotify as INotify
@ -186,12 +188,17 @@ finishVerifyKeyContentIncrementally (Just iv) =
-- Incremental verification was not able to be done. -- Incremental verification was not able to be done.
Nothing -> return (True, UnVerified) Nothing -> return (True, UnVerified)
-- | Reads the file as it grows, and feeds it to the incremental verifier. verifyKeyContentIncrementally :: VerifyConfig -> Key -> (Maybe IncrementalVerifier -> Annex ()) -> Annex Verification
verifyKeyContentIncrementally verifyconfig k a = do
miv <- startVerifyKeyContentIncrementally verifyconfig k
a miv
snd <$> finishVerifyKeyContentIncrementally miv
-- | Runs a writer action that retrieves to a file. In another thread,
-- reads the file as it grows, and feeds it to the incremental verifier.
-- --
-- The TMVar must start out empty, and be filled once whatever is -- Once the writer finishes, this returns quickly. It may not feed
-- writing to the file finishes. Once the writer finishes, this returns -- the entire content of the file to the incremental verifier.
-- quickly. It may not feed the entire content of the file to the
-- incremental verifier.
-- --
-- The file does not need to exist yet when this is called. It will wait -- The file does not need to exist yet when this is called. It will wait
-- for the file to appear before opening it and starting verification. -- for the file to appear before opening it and starting verification.
@ -223,9 +230,19 @@ finishVerifyKeyContentIncrementally (Just iv) =
-- and if the disk is slow, the reader may never catch up to the writer, -- and if the disk is slow, the reader may never catch up to the writer,
-- and the disk cache may never speed up reads. So this should only be -- and the disk cache may never speed up reads. So this should only be
-- used when there's not a better way to incrementally verify. -- used when there's not a better way to incrementally verify.
tailVerify :: IncrementalVerifier -> RawFilePath -> TMVar () -> IO () tailVerify :: Maybe IncrementalVerifier -> RawFilePath -> Annex a -> Annex a
tailVerify (Just iv) f writer = do
finished <- liftIO newEmptyTMVarIO
t <- liftIO $ async $ tailVerify' iv f finished
let finishtail = do
liftIO $ atomically $ putTMVar finished ()
liftIO (wait t)
writer `finally` finishtail
tailVerify Nothing _ writer = writer
tailVerify' :: IncrementalVerifier -> RawFilePath -> TMVar () -> IO ()
#if WITH_INOTIFY #if WITH_INOTIFY
tailVerify iv f finished = tailVerify' iv f finished =
tryNonAsync go >>= \case tryNonAsync go >>= \case
Right r -> return r Right r -> return r
Left _ -> unableIncrementalVerifier iv Left _ -> unableIncrementalVerifier iv
@ -312,5 +329,5 @@ tailVerify iv f finished =
chunk = 65536 chunk = 65536
#else #else
tailVerify iv _ _ = unableIncrementalVerifier iv tailVerify' iv _ _ = unableIncrementalVerifier iv
#endif #endif

View file

@ -1,6 +1,6 @@
{- Remote on Android device accessed using adb. {- Remote on Android device accessed using adb.
- -
- Copyright 2018-2020 Joey Hess <id@joeyh.name> - Copyright 2018-2022 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -22,6 +22,7 @@ import Annex.UUID
import Utility.Metered import Utility.Metered
import Types.ProposedAccepted import Types.ProposedAccepted
import Annex.SpecialRemote.Config import Annex.SpecialRemote.Config
import Annex.Verify
import qualified Data.Map as M import qualified Data.Map as M
import qualified System.FilePath.Posix as Posix import qualified System.FilePath.Posix as Posix
@ -256,9 +257,10 @@ storeExportM serial adir src _k loc _p =
dest = androidExportLocation adir loc dest = androidExportLocation adir loc
retrieveExportM :: AndroidSerial -> AndroidPath -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex Verification retrieveExportM :: AndroidSerial -> AndroidPath -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex Verification
retrieveExportM serial adir _k loc dest _p = do retrieveExportM serial adir k loc dest _p =
verifyKeyContentIncrementally AlwaysVerify k $ \iv ->
tailVerify iv (toRawFilePath dest) $
retrieve' serial src dest retrieve' serial src dest
return UnVerified
where where
src = androidExportLocation adir loc src = androidExportLocation adir loc

View file

@ -36,6 +36,7 @@ import Annex.CopyFile
import Annex.Content import Annex.Content
import Annex.Perms import Annex.Perms
import Annex.UUID import Annex.UUID
import Annex.Verify
import Backend import Backend
import Types.KeySource import Types.KeySource
import Types.ProposedAccepted import Types.ProposedAccepted
@ -317,9 +318,9 @@ storeExportM d cow src _k loc p = do
go tmp () = void $ fileCopier cow src tmp p Nothing go tmp () = void $ fileCopier cow src tmp p Nothing
retrieveExportM :: RawFilePath -> CopyCoWTried -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex Verification retrieveExportM :: RawFilePath -> CopyCoWTried -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex Verification
retrieveExportM d cow _k loc dest p = do retrieveExportM d cow k loc dest p =
void $ fileCopier cow src dest p Nothing verifyKeyContentIncrementally AlwaysVerify k $ \iv ->
return UnVerified void $ fileCopier cow src dest p iv
where where
src = fromRawFilePath $ exportPath d loc src = fromRawFilePath $ exportPath d loc

View file

@ -1,6 +1,6 @@
{- External special remote interface. {- External special remote interface.
- -
- Copyright 2013-2020 Joey Hess <id@joeyh.name> - Copyright 2013-2022 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -37,6 +37,7 @@ import Config.Cost
import Annex.Content import Annex.Content
import Annex.Url import Annex.Url
import Annex.UUID import Annex.UUID
import Annex.Verify
import Creds import Creds
import Control.Concurrent.STM import Control.Concurrent.STM
@ -292,9 +293,10 @@ storeExportM external f k loc p = either giveup return =<< go
req sk = TRANSFEREXPORT Upload sk f req sk = TRANSFEREXPORT Upload sk f
retrieveExportM :: External -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex Verification retrieveExportM :: External -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex Verification
retrieveExportM external k loc d p = do retrieveExportM external k loc dest p = do
verifyKeyContentIncrementally AlwaysVerify k $ \iv ->
tailVerify iv (toRawFilePath dest) $
either giveup return =<< go either giveup return =<< go
return UnVerified
where where
go = handleRequestExport external loc req k (Just p) $ \resp -> case resp of go = handleRequestExport external loc req k (Just p) $ \resp -> case resp of
TRANSFER_SUCCESS Download k' TRANSFER_SUCCESS Download k'
@ -304,7 +306,7 @@ retrieveExportM external k loc d p = do
UNSUPPORTED_REQUEST -> UNSUPPORTED_REQUEST ->
result $ Left "TRANSFEREXPORT not implemented by external special remote" result $ Left "TRANSFEREXPORT not implemented by external special remote"
_ -> Nothing _ -> Nothing
req sk = TRANSFEREXPORT Download sk d req sk = TRANSFEREXPORT Download sk dest
checkPresentExportM :: External -> Key -> ExportLocation -> Annex Bool checkPresentExportM :: External -> Key -> ExportLocation -> Annex Bool
checkPresentExportM external k loc = either giveup id <$> go checkPresentExportM external k loc = either giveup id <$> go

View file

@ -491,14 +491,12 @@ copyFromRemote r st key file dest meterupdate vc = do
copyFromRemote'' :: Git.Repo -> Remote -> State -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> VerifyConfig -> Annex Verification copyFromRemote'' :: Git.Repo -> Remote -> State -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> VerifyConfig -> Annex Verification
copyFromRemote'' repo r st@(State connpool _ _ _ _) key file dest meterupdate vc copyFromRemote'' repo r st@(State connpool _ _ _ _) key file dest meterupdate vc
| Git.repoIsHttp repo = do | Git.repoIsHttp repo = verifyKeyContentIncrementally vc key $ \iv -> do
iv <- startVerifyKeyContentIncrementally vc key
gc <- Annex.getGitConfig gc <- Annex.getGitConfig
ok <- Url.withUrlOptionsPromptingCreds $ ok <- Url.withUrlOptionsPromptingCreds $
Annex.Content.downloadUrl False key meterupdate iv (keyUrls gc repo r key) dest Annex.Content.downloadUrl False key meterupdate iv (keyUrls gc repo r key) dest
unless ok $ unless ok $
giveup "failed to download content" giveup "failed to download content"
snd <$> finishVerifyKeyContentIncrementally iv
| not $ Git.repoIsUrl repo = guardUsable repo (giveup "cannot access remote") $ do | not $ Git.repoIsUrl repo = guardUsable repo (giveup "cannot access remote") $ do
u <- getUUID u <- getUUID
hardlink <- wantHardLink hardlink <- wantHardLink

View file

@ -56,8 +56,6 @@ import Git.Types
import qualified Data.ByteString as S import qualified Data.ByteString as S
import qualified Data.ByteString.Lazy as L import qualified Data.ByteString.Lazy as L
import qualified Data.Map as M import qualified Data.Map as M
import Control.Concurrent.STM
import Control.Concurrent.Async
{- Special remotes don't have a configured url, so Git.Repo does not {- Special remotes don't have a configured url, so Git.Repo does not
- automatically generate remotes for them. This looks for a different - automatically generate remotes for them. This looks for a different
@ -117,17 +115,9 @@ byteRetriever a k _m _miv callback = a k (callback . ByteContent)
-- the action writes to the file, but may not be updated with the entire -- the action writes to the file, but may not be updated with the entire
-- content of the file. -- content of the file.
fileRetriever :: (RawFilePath -> Key -> MeterUpdate -> Annex ()) -> Retriever fileRetriever :: (RawFilePath -> Key -> MeterUpdate -> Annex ()) -> Retriever
fileRetriever a = fileRetriever' $ \f k m miv -> do fileRetriever a = fileRetriever' $ \f k m miv ->
let retrieve = a f k m let retrieve = a f k m
case miv of in tailVerify miv f retrieve
Nothing -> retrieve
Just iv -> do
finished <- liftIO newEmptyTMVarIO
t <- liftIO $ async $ tailVerify iv f finished
let finishtail = do
liftIO $ atomically $ putTMVar finished ()
liftIO (wait t)
retrieve `finally` finishtail
{- A Retriever that writes the content of a Key to a provided file. {- A Retriever that writes the content of a Key to a provided file.
- The action is responsible for updating the progress meter and the - The action is responsible for updating the progress meter and the

View file

@ -116,14 +116,13 @@ httpAlsoSetup _ (Just u) _ c gc = do
downloadKey :: Maybe URLString -> LearnedLayout -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> VerifyConfig -> Annex Verification downloadKey :: Maybe URLString -> LearnedLayout -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> VerifyConfig -> Annex Verification
downloadKey baseurl ll key _af dest p vc = do downloadKey baseurl ll key _af dest p vc = do
iv <- startVerifyKeyContentIncrementally vc key verifyKeyContentIncrementally vc key $ \iv ->
downloadAction dest p iv key (keyUrlAction baseurl ll key) downloadAction dest p iv key (keyUrlAction baseurl ll key)
snd <$> finishVerifyKeyContentIncrementally iv
retriveExportHttpAlso :: Maybe URLString -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex Verification retriveExportHttpAlso :: Maybe URLString -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex Verification
retriveExportHttpAlso baseurl key loc dest p = do retriveExportHttpAlso baseurl key loc dest p = do
downloadAction dest p Nothing key (exportLocationUrlAction baseurl loc) verifyKeyContentIncrementally AlwaysVerify key $ \iv ->
return UnVerified downloadAction dest p iv key (exportLocationUrlAction baseurl loc)
downloadAction :: FilePath -> MeterUpdate -> Maybe IncrementalVerifier -> Key -> ((URLString -> Annex (Either String ())) -> Annex (Either String ())) -> Annex () downloadAction :: FilePath -> MeterUpdate -> Maybe IncrementalVerifier -> Key -> ((URLString -> Annex (Either String ())) -> Annex (Either String ())) -> Annex ()
downloadAction dest p iv key run = downloadAction dest p iv key run =

View file

@ -46,6 +46,7 @@ import Annex.DirHashes
import Utility.Tmp.Dir import Utility.Tmp.Dir
import Utility.SshHost import Utility.SshHost
import Annex.SpecialRemote.Config import Annex.SpecialRemote.Config
import Annex.Verify
import qualified Data.Map as M import qualified Data.Map as M
@ -317,9 +318,10 @@ storeExportM o src _k loc meterupdate =
populatedest = liftIO . createLinkOrCopy src populatedest = liftIO . createLinkOrCopy src
retrieveExportM :: RsyncOpts -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex Verification retrieveExportM :: RsyncOpts -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex Verification
retrieveExportM o _k loc dest p = do retrieveExportM o k loc dest p =
verifyKeyContentIncrementally AlwaysVerify k $ \iv ->
tailVerify iv (toRawFilePath dest) $
rsyncRetrieve o [rsyncurl] dest (Just p) rsyncRetrieve o [rsyncurl] dest (Just p)
return UnVerified
where where
rsyncurl = mkRsyncUrl o (fromRawFilePath (fromExportLocation loc)) rsyncurl = mkRsyncUrl o (fromRawFilePath (fromExportLocation loc))

View file

@ -60,13 +60,13 @@ import Types.MetaData
import Types.ProposedAccepted import Types.ProposedAccepted
import Types.NumCopies import Types.NumCopies
import Utility.Metered import Utility.Metered
import Utility.Hash (IncrementalVerifier)
import Utility.DataUnits import Utility.DataUnits
import Annex.Content import Annex.Content
import qualified Annex.Url as Url import qualified Annex.Url as Url
import Utility.Url (extractFromResourceT) import Utility.Url (extractFromResourceT)
import Annex.Url (getUrlOptions, withUrlOptions, UrlOptions(..)) import Annex.Url (getUrlOptions, withUrlOptions, UrlOptions(..))
import Utility.Env import Utility.Env
import Annex.Verify
type BucketName = String type BucketName = String
type BucketObject = String type BucketObject = String
@ -496,15 +496,14 @@ storeExportS3' hv r rs info magic f k loc p = withS3Handle hv $ \case
return (metag, mvid) return (metag, mvid)
retrieveExportS3 :: S3HandleVar -> Remote -> S3Info -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex Verification retrieveExportS3 :: S3HandleVar -> Remote -> S3Info -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex Verification
retrieveExportS3 hv r info _k loc f p = do retrieveExportS3 hv r info k loc f p = verifyKeyContentIncrementally AlwaysVerify k $ \iv ->
withS3Handle hv $ \case withS3Handle hv $ \case
Just h -> retrieveHelper info h (Left (T.pack exportloc)) f p Nothing Just h -> retrieveHelper info h (Left (T.pack exportloc)) f p iv
Nothing -> case getPublicUrlMaker info of Nothing -> case getPublicUrlMaker info of
Just geturl -> either giveup return =<< Just geturl -> either giveup return =<<
Url.withUrlOptions Url.withUrlOptions
(Url.download' p Nothing (geturl exportloc) f) (Url.download' p iv (geturl exportloc) f)
Nothing -> giveup $ needS3Creds (uuid r) Nothing -> giveup $ needS3Creds (uuid r)
return UnVerified
where where
exportloc = bucketExportLocation info loc exportloc = bucketExportLocation info loc

View file

@ -40,7 +40,7 @@ import qualified Remote.Helper.Chunked.Legacy as Legacy
import Creds import Creds
import Utility.Metered import Utility.Metered
import Utility.Url (URLString, matchStatusCodeException, matchHttpExceptionContent) import Utility.Url (URLString, matchStatusCodeException, matchHttpExceptionContent)
import Utility.Hash (IncrementalVerifier(..)) import Annex.Verify
import Annex.UUID import Annex.UUID
import Remote.WebDAV.DavLocation import Remote.WebDAV.DavLocation
import Types.ProposedAccepted import Types.ProposedAccepted
@ -219,10 +219,10 @@ storeExportDav hdl f k loc p = case exportLocation loc of
Left err -> giveup err Left err -> giveup err
retrieveExportDav :: DavHandleVar -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex Verification retrieveExportDav :: DavHandleVar -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex Verification
retrieveExportDav hdl _k loc d p = case exportLocation loc of retrieveExportDav hdl k loc d p = case exportLocation loc of
Right src -> withDavHandle hdl $ \h -> runExport h $ \_dav -> do Right src -> verifyKeyContentIncrementally AlwaysVerify k $ \iv ->
retrieveHelper src d p Nothing withDavHandle hdl $ \h -> runExport h $ \_dav ->
return UnVerified retrieveHelper src d p iv
Left err -> giveup err Left err -> giveup err
checkPresentExportDav :: DavHandleVar -> Remote -> Key -> ExportLocation -> Annex Bool checkPresentExportDav :: DavHandleVar -> Remote -> Key -> ExportLocation -> Annex Bool

View file

@ -0,0 +1,9 @@
[[!comment format=mdwn
username="joey"
subject="""comment 17"""
date="2022-05-09T17:48:31Z"
content="""
Update: incremental hashing is also now done for all export remotes.
Only import (and export+import) remotes don't support incremental hashing
now.
"""]]