incremental verification for retrieval from all export remotes
Only for export remotes so far, not export/import. Sponsored-by: Dartmouth College's Datalad project
This commit is contained in:
parent
90950a37e5
commit
2f2701137d
11 changed files with 73 additions and 54 deletions
|
@ -1,6 +1,6 @@
|
|||
{- verification
|
||||
-
|
||||
- Copyright 2010-2021 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2010-2022 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
@ -17,6 +17,7 @@ module Annex.Verify (
|
|||
isVerifiable,
|
||||
startVerifyKeyContentIncrementally,
|
||||
finishVerifyKeyContentIncrementally,
|
||||
verifyKeyContentIncrementally,
|
||||
IncrementalVerifier(..),
|
||||
tailVerify,
|
||||
) where
|
||||
|
@ -34,6 +35,7 @@ import Types.WorkerPool
|
|||
import Types.Key
|
||||
|
||||
import Control.Concurrent.STM
|
||||
import Control.Concurrent.Async
|
||||
import qualified Data.ByteString as S
|
||||
#if WITH_INOTIFY
|
||||
import qualified System.INotify as INotify
|
||||
|
@ -186,12 +188,17 @@ finishVerifyKeyContentIncrementally (Just iv) =
|
|||
-- Incremental verification was not able to be done.
|
||||
Nothing -> return (True, UnVerified)
|
||||
|
||||
-- | Reads the file as it grows, and feeds it to the incremental verifier.
|
||||
verifyKeyContentIncrementally :: VerifyConfig -> Key -> (Maybe IncrementalVerifier -> Annex ()) -> Annex Verification
|
||||
verifyKeyContentIncrementally verifyconfig k a = do
|
||||
miv <- startVerifyKeyContentIncrementally verifyconfig k
|
||||
a miv
|
||||
snd <$> finishVerifyKeyContentIncrementally miv
|
||||
|
||||
-- | Runs a writer action that retrieves to a file. In another thread,
|
||||
-- reads the file as it grows, and feeds it to the incremental verifier.
|
||||
--
|
||||
-- The TMVar must start out empty, and be filled once whatever is
|
||||
-- writing to the file finishes. Once the writer finishes, this returns
|
||||
-- quickly. It may not feed the entire content of the file to the
|
||||
-- incremental verifier.
|
||||
-- Once the writer finishes, this returns quickly. It may not feed
|
||||
-- the entire content of the file to the incremental verifier.
|
||||
--
|
||||
-- The file does not need to exist yet when this is called. It will wait
|
||||
-- for the file to appear before opening it and starting verification.
|
||||
|
@ -223,9 +230,19 @@ finishVerifyKeyContentIncrementally (Just iv) =
|
|||
-- and if the disk is slow, the reader may never catch up to the writer,
|
||||
-- and the disk cache may never speed up reads. So this should only be
|
||||
-- used when there's not a better way to incrementally verify.
|
||||
tailVerify :: IncrementalVerifier -> RawFilePath -> TMVar () -> IO ()
|
||||
tailVerify :: Maybe IncrementalVerifier -> RawFilePath -> Annex a -> Annex a
|
||||
tailVerify (Just iv) f writer = do
|
||||
finished <- liftIO newEmptyTMVarIO
|
||||
t <- liftIO $ async $ tailVerify' iv f finished
|
||||
let finishtail = do
|
||||
liftIO $ atomically $ putTMVar finished ()
|
||||
liftIO (wait t)
|
||||
writer `finally` finishtail
|
||||
tailVerify Nothing _ writer = writer
|
||||
|
||||
tailVerify' :: IncrementalVerifier -> RawFilePath -> TMVar () -> IO ()
|
||||
#if WITH_INOTIFY
|
||||
tailVerify iv f finished =
|
||||
tailVerify' iv f finished =
|
||||
tryNonAsync go >>= \case
|
||||
Right r -> return r
|
||||
Left _ -> unableIncrementalVerifier iv
|
||||
|
@ -312,5 +329,5 @@ tailVerify iv f finished =
|
|||
|
||||
chunk = 65536
|
||||
#else
|
||||
tailVerify iv _ _ = unableIncrementalVerifier iv
|
||||
tailVerify' iv _ _ = unableIncrementalVerifier iv
|
||||
#endif
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{- Remote on Android device accessed using adb.
|
||||
-
|
||||
- Copyright 2018-2020 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2018-2022 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
@ -22,6 +22,7 @@ import Annex.UUID
|
|||
import Utility.Metered
|
||||
import Types.ProposedAccepted
|
||||
import Annex.SpecialRemote.Config
|
||||
import Annex.Verify
|
||||
|
||||
import qualified Data.Map as M
|
||||
import qualified System.FilePath.Posix as Posix
|
||||
|
@ -256,9 +257,10 @@ storeExportM serial adir src _k loc _p =
|
|||
dest = androidExportLocation adir loc
|
||||
|
||||
retrieveExportM :: AndroidSerial -> AndroidPath -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex Verification
|
||||
retrieveExportM serial adir _k loc dest _p = do
|
||||
retrieve' serial src dest
|
||||
return UnVerified
|
||||
retrieveExportM serial adir k loc dest _p =
|
||||
verifyKeyContentIncrementally AlwaysVerify k $ \iv ->
|
||||
tailVerify iv (toRawFilePath dest) $
|
||||
retrieve' serial src dest
|
||||
where
|
||||
src = androidExportLocation adir loc
|
||||
|
||||
|
|
|
@ -36,6 +36,7 @@ import Annex.CopyFile
|
|||
import Annex.Content
|
||||
import Annex.Perms
|
||||
import Annex.UUID
|
||||
import Annex.Verify
|
||||
import Backend
|
||||
import Types.KeySource
|
||||
import Types.ProposedAccepted
|
||||
|
@ -317,9 +318,9 @@ storeExportM d cow src _k loc p = do
|
|||
go tmp () = void $ fileCopier cow src tmp p Nothing
|
||||
|
||||
retrieveExportM :: RawFilePath -> CopyCoWTried -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex Verification
|
||||
retrieveExportM d cow _k loc dest p = do
|
||||
void $ fileCopier cow src dest p Nothing
|
||||
return UnVerified
|
||||
retrieveExportM d cow k loc dest p =
|
||||
verifyKeyContentIncrementally AlwaysVerify k $ \iv ->
|
||||
void $ fileCopier cow src dest p iv
|
||||
where
|
||||
src = fromRawFilePath $ exportPath d loc
|
||||
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{- External special remote interface.
|
||||
-
|
||||
- Copyright 2013-2020 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2013-2022 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
@ -37,6 +37,7 @@ import Config.Cost
|
|||
import Annex.Content
|
||||
import Annex.Url
|
||||
import Annex.UUID
|
||||
import Annex.Verify
|
||||
import Creds
|
||||
|
||||
import Control.Concurrent.STM
|
||||
|
@ -292,9 +293,10 @@ storeExportM external f k loc p = either giveup return =<< go
|
|||
req sk = TRANSFEREXPORT Upload sk f
|
||||
|
||||
retrieveExportM :: External -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex Verification
|
||||
retrieveExportM external k loc d p = do
|
||||
either giveup return =<< go
|
||||
return UnVerified
|
||||
retrieveExportM external k loc dest p = do
|
||||
verifyKeyContentIncrementally AlwaysVerify k $ \iv ->
|
||||
tailVerify iv (toRawFilePath dest) $
|
||||
either giveup return =<< go
|
||||
where
|
||||
go = handleRequestExport external loc req k (Just p) $ \resp -> case resp of
|
||||
TRANSFER_SUCCESS Download k'
|
||||
|
@ -304,7 +306,7 @@ retrieveExportM external k loc d p = do
|
|||
UNSUPPORTED_REQUEST ->
|
||||
result $ Left "TRANSFEREXPORT not implemented by external special remote"
|
||||
_ -> Nothing
|
||||
req sk = TRANSFEREXPORT Download sk d
|
||||
req sk = TRANSFEREXPORT Download sk dest
|
||||
|
||||
checkPresentExportM :: External -> Key -> ExportLocation -> Annex Bool
|
||||
checkPresentExportM external k loc = either giveup id <$> go
|
||||
|
|
|
@ -491,14 +491,12 @@ copyFromRemote r st key file dest meterupdate vc = do
|
|||
|
||||
copyFromRemote'' :: Git.Repo -> Remote -> State -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> VerifyConfig -> Annex Verification
|
||||
copyFromRemote'' repo r st@(State connpool _ _ _ _) key file dest meterupdate vc
|
||||
| Git.repoIsHttp repo = do
|
||||
iv <- startVerifyKeyContentIncrementally vc key
|
||||
| Git.repoIsHttp repo = verifyKeyContentIncrementally vc key $ \iv -> do
|
||||
gc <- Annex.getGitConfig
|
||||
ok <- Url.withUrlOptionsPromptingCreds $
|
||||
Annex.Content.downloadUrl False key meterupdate iv (keyUrls gc repo r key) dest
|
||||
unless ok $
|
||||
giveup "failed to download content"
|
||||
snd <$> finishVerifyKeyContentIncrementally iv
|
||||
| not $ Git.repoIsUrl repo = guardUsable repo (giveup "cannot access remote") $ do
|
||||
u <- getUUID
|
||||
hardlink <- wantHardLink
|
||||
|
|
|
@ -56,8 +56,6 @@ import Git.Types
|
|||
import qualified Data.ByteString as S
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
import qualified Data.Map as M
|
||||
import Control.Concurrent.STM
|
||||
import Control.Concurrent.Async
|
||||
|
||||
{- Special remotes don't have a configured url, so Git.Repo does not
|
||||
- automatically generate remotes for them. This looks for a different
|
||||
|
@ -117,17 +115,9 @@ byteRetriever a k _m _miv callback = a k (callback . ByteContent)
|
|||
-- the action writes to the file, but may not be updated with the entire
|
||||
-- content of the file.
|
||||
fileRetriever :: (RawFilePath -> Key -> MeterUpdate -> Annex ()) -> Retriever
|
||||
fileRetriever a = fileRetriever' $ \f k m miv -> do
|
||||
fileRetriever a = fileRetriever' $ \f k m miv ->
|
||||
let retrieve = a f k m
|
||||
case miv of
|
||||
Nothing -> retrieve
|
||||
Just iv -> do
|
||||
finished <- liftIO newEmptyTMVarIO
|
||||
t <- liftIO $ async $ tailVerify iv f finished
|
||||
let finishtail = do
|
||||
liftIO $ atomically $ putTMVar finished ()
|
||||
liftIO (wait t)
|
||||
retrieve `finally` finishtail
|
||||
in tailVerify miv f retrieve
|
||||
|
||||
{- A Retriever that writes the content of a Key to a provided file.
|
||||
- The action is responsible for updating the progress meter and the
|
||||
|
|
|
@ -116,14 +116,13 @@ httpAlsoSetup _ (Just u) _ c gc = do
|
|||
|
||||
downloadKey :: Maybe URLString -> LearnedLayout -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> VerifyConfig -> Annex Verification
|
||||
downloadKey baseurl ll key _af dest p vc = do
|
||||
iv <- startVerifyKeyContentIncrementally vc key
|
||||
downloadAction dest p iv key (keyUrlAction baseurl ll key)
|
||||
snd <$> finishVerifyKeyContentIncrementally iv
|
||||
verifyKeyContentIncrementally vc key $ \iv ->
|
||||
downloadAction dest p iv key (keyUrlAction baseurl ll key)
|
||||
|
||||
retriveExportHttpAlso :: Maybe URLString -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex Verification
|
||||
retriveExportHttpAlso baseurl key loc dest p = do
|
||||
downloadAction dest p Nothing key (exportLocationUrlAction baseurl loc)
|
||||
return UnVerified
|
||||
verifyKeyContentIncrementally AlwaysVerify key $ \iv ->
|
||||
downloadAction dest p iv key (exportLocationUrlAction baseurl loc)
|
||||
|
||||
downloadAction :: FilePath -> MeterUpdate -> Maybe IncrementalVerifier -> Key -> ((URLString -> Annex (Either String ())) -> Annex (Either String ())) -> Annex ()
|
||||
downloadAction dest p iv key run =
|
||||
|
|
|
@ -46,6 +46,7 @@ import Annex.DirHashes
|
|||
import Utility.Tmp.Dir
|
||||
import Utility.SshHost
|
||||
import Annex.SpecialRemote.Config
|
||||
import Annex.Verify
|
||||
|
||||
import qualified Data.Map as M
|
||||
|
||||
|
@ -317,9 +318,10 @@ storeExportM o src _k loc meterupdate =
|
|||
populatedest = liftIO . createLinkOrCopy src
|
||||
|
||||
retrieveExportM :: RsyncOpts -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex Verification
|
||||
retrieveExportM o _k loc dest p = do
|
||||
rsyncRetrieve o [rsyncurl] dest (Just p)
|
||||
return UnVerified
|
||||
retrieveExportM o k loc dest p =
|
||||
verifyKeyContentIncrementally AlwaysVerify k $ \iv ->
|
||||
tailVerify iv (toRawFilePath dest) $
|
||||
rsyncRetrieve o [rsyncurl] dest (Just p)
|
||||
where
|
||||
rsyncurl = mkRsyncUrl o (fromRawFilePath (fromExportLocation loc))
|
||||
|
||||
|
|
|
@ -60,13 +60,13 @@ import Types.MetaData
|
|||
import Types.ProposedAccepted
|
||||
import Types.NumCopies
|
||||
import Utility.Metered
|
||||
import Utility.Hash (IncrementalVerifier)
|
||||
import Utility.DataUnits
|
||||
import Annex.Content
|
||||
import qualified Annex.Url as Url
|
||||
import Utility.Url (extractFromResourceT)
|
||||
import Annex.Url (getUrlOptions, withUrlOptions, UrlOptions(..))
|
||||
import Utility.Env
|
||||
import Annex.Verify
|
||||
|
||||
type BucketName = String
|
||||
type BucketObject = String
|
||||
|
@ -496,15 +496,14 @@ storeExportS3' hv r rs info magic f k loc p = withS3Handle hv $ \case
|
|||
return (metag, mvid)
|
||||
|
||||
retrieveExportS3 :: S3HandleVar -> Remote -> S3Info -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex Verification
|
||||
retrieveExportS3 hv r info _k loc f p = do
|
||||
retrieveExportS3 hv r info k loc f p = verifyKeyContentIncrementally AlwaysVerify k $ \iv ->
|
||||
withS3Handle hv $ \case
|
||||
Just h -> retrieveHelper info h (Left (T.pack exportloc)) f p Nothing
|
||||
Just h -> retrieveHelper info h (Left (T.pack exportloc)) f p iv
|
||||
Nothing -> case getPublicUrlMaker info of
|
||||
Just geturl -> either giveup return =<<
|
||||
Url.withUrlOptions
|
||||
(Url.download' p Nothing (geturl exportloc) f)
|
||||
(Url.download' p iv (geturl exportloc) f)
|
||||
Nothing -> giveup $ needS3Creds (uuid r)
|
||||
return UnVerified
|
||||
where
|
||||
exportloc = bucketExportLocation info loc
|
||||
|
||||
|
|
|
@ -40,7 +40,7 @@ import qualified Remote.Helper.Chunked.Legacy as Legacy
|
|||
import Creds
|
||||
import Utility.Metered
|
||||
import Utility.Url (URLString, matchStatusCodeException, matchHttpExceptionContent)
|
||||
import Utility.Hash (IncrementalVerifier(..))
|
||||
import Annex.Verify
|
||||
import Annex.UUID
|
||||
import Remote.WebDAV.DavLocation
|
||||
import Types.ProposedAccepted
|
||||
|
@ -219,10 +219,10 @@ storeExportDav hdl f k loc p = case exportLocation loc of
|
|||
Left err -> giveup err
|
||||
|
||||
retrieveExportDav :: DavHandleVar -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex Verification
|
||||
retrieveExportDav hdl _k loc d p = case exportLocation loc of
|
||||
Right src -> withDavHandle hdl $ \h -> runExport h $ \_dav -> do
|
||||
retrieveHelper src d p Nothing
|
||||
return UnVerified
|
||||
retrieveExportDav hdl k loc d p = case exportLocation loc of
|
||||
Right src -> verifyKeyContentIncrementally AlwaysVerify k $ \iv ->
|
||||
withDavHandle hdl $ \h -> runExport h $ \_dav ->
|
||||
retrieveHelper src d p iv
|
||||
Left err -> giveup err
|
||||
|
||||
checkPresentExportDav :: DavHandleVar -> Remote -> Key -> ExportLocation -> Annex Bool
|
||||
|
|
|
@ -0,0 +1,9 @@
|
|||
[[!comment format=mdwn
|
||||
username="joey"
|
||||
subject="""comment 17"""
|
||||
date="2022-05-09T17:48:31Z"
|
||||
content="""
|
||||
Update: incremental hashing is also now done for all export remotes.
|
||||
Only import (and export+import) remotes don't support incremental hashing
|
||||
now.
|
||||
"""]]
|
Loading…
Reference in a new issue