incremental verification for retrieval from import remotes

Sponsored-by: Dartmouth College's Datalad project
This commit is contained in:
Joey Hess 2022-05-09 15:38:21 -04:00
parent 2f2701137d
commit e8a601aa24
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
12 changed files with 129 additions and 83 deletions

View file

@ -338,15 +338,23 @@ listImportableContentsM serial adir c = adbfind >>= \case
-- connection is resonably fast, it's probably as good as
-- git's handling of similar situations with files being modified while
-- it's updating the working tree for a merge.
retrieveExportWithContentIdentifierM :: AndroidSerial -> AndroidPath -> ExportLocation -> ContentIdentifier -> FilePath -> Annex Key -> MeterUpdate -> Annex Key
retrieveExportWithContentIdentifierM serial adir loc cid dest mkkey _p = do
retrieve' serial src dest
k <- mkkey
currcid <- getExportContentIdentifier serial adir loc
if currcid == Right (Just cid)
then return k
else giveup "the file on the android device has changed"
retrieveExportWithContentIdentifierM :: AndroidSerial -> AndroidPath -> ExportLocation -> ContentIdentifier -> FilePath -> Either Key (Annex Key) -> MeterUpdate -> Annex (Key, Verification)
retrieveExportWithContentIdentifierM serial adir loc cid dest gk _p = do
case gk of
Right mkkey -> do
go
k <- mkkey
return (k, UnVerified)
Left k -> do
v <- verifyKeyContentIncrementally DefaultVerify k
(\iv -> tailVerify iv (toRawFilePath dest) go)
return (k, v)
where
go = do
retrieve' serial src dest
currcid <- getExportContentIdentifier serial adir loc
when (currcid /= Right (Just cid)) $
giveup "the file on the android device has changed"
src = androidExportLocation adir loc
storeExportWithContentIdentifierM :: AndroidSerial -> AndroidPath -> FilePath -> Key -> ExportLocation -> [ContentIdentifier] -> MeterUpdate -> Annex ContentIdentifier

View file

@ -29,6 +29,7 @@ import Utility.Metered
import Logs.Export
import qualified Remote.Helper.ThirdPartyPopulated as ThirdPartyPopulated
import Utility.Env
import Annex.Verify
import Data.Either
import Text.Read
@ -370,10 +371,20 @@ checkPresentExportWithContentIdentifierM borgrepo _ loc _ = prompt $ liftIO $ do
, giveup $ "Unable to access borg repository " ++ locBorgRepo borgrepo
)
retrieveExportWithContentIdentifierM :: BorgRepo -> ImportLocation -> ContentIdentifier -> FilePath -> Annex Key -> MeterUpdate -> Annex Key
retrieveExportWithContentIdentifierM borgrepo loc _ dest mkk _ = do
retrieveExportWithContentIdentifierM :: BorgRepo -> ImportLocation -> ContentIdentifier -> FilePath -> Either Key (Annex Key) -> MeterUpdate -> Annex (Key, Verification)
retrieveExportWithContentIdentifierM borgrepo loc _ dest gk _ = do
showOutput
prompt $ withOtherTmp $ \othertmp -> liftIO $ do
case gk of
Right mkkey -> do
go
k <- mkkey
return (k, UnVerified)
Left k -> do
v <- verifyKeyContentIncrementally DefaultVerify k
(\iv -> tailVerify iv (toRawFilePath dest) go)
return (k, v)
where
go = prompt $ withOtherTmp $ \othertmp -> liftIO $ do
-- borgrepo could be relative, and borg has to be run
-- in the temp directory to get it to write there
absborgrepo <- absBorgRepo borgrepo
@ -398,6 +409,5 @@ retrieveExportWithContentIdentifierM borgrepo loc _ dest mkk _ = do
-- combine with </>
moveFile (fromRawFilePath othertmp </> fromRawFilePath archivefile) dest
removeDirectoryRecursive (fromRawFilePath othertmp)
mkk
where
(archivename, archivefile) = extractImportLocation loc

View file

@ -198,9 +198,9 @@ storeKeyM d chunkconfig cow k c m =
(fromRawFilePath destdir)
in byteStorer go k c m
NoChunks ->
let go _k src p = do
let go _k src p = liftIO $ do
void $ fileCopier cow src tmpf p Nothing
liftIO $ finalizeStoreGeneric d tmpdir destdir
finalizeStoreGeneric d tmpdir destdir
in fileStorer go k c m
_ ->
let go _k b p = liftIO $ do
@ -242,7 +242,7 @@ retrieveKeyFileM :: RawFilePath -> ChunkConfig -> CopyCoWTried -> Retriever
retrieveKeyFileM d (LegacyChunks _) _ = Legacy.retrieve locations d
retrieveKeyFileM d NoChunks cow = fileRetriever' $ \dest k p iv -> do
src <- liftIO $ fromRawFilePath <$> getLocation d k
void $ fileCopier cow src (fromRawFilePath dest) p iv
void $ liftIO $ fileCopier cow src (fromRawFilePath dest) p iv
retrieveKeyFileM d _ _ = byteRetriever $ \k sink ->
sink =<< liftIO (L.readFile . fromRawFilePath =<< getLocation d k)
@ -315,12 +315,12 @@ storeExportM d cow src _k loc p = do
viaTmp go (fromRawFilePath dest) ()
where
dest = exportPath d loc
go tmp () = void $ fileCopier cow src tmp p Nothing
go tmp () = void $ liftIO $ fileCopier cow src tmp p Nothing
retrieveExportM :: RawFilePath -> CopyCoWTried -> Key -> ExportLocation -> FilePath -> MeterUpdate -> Annex Verification
retrieveExportM d cow k loc dest p =
verifyKeyContentIncrementally AlwaysVerify k $ \iv ->
void $ fileCopier cow src dest p iv
void $ liftIO $ fileCopier cow src dest p iv
where
src = fromRawFilePath $ exportPath d loc
@ -413,25 +413,31 @@ importKeyM ii dir loc cid sz p = do
, inodeCache = Nothing
}
retrieveExportWithContentIdentifierM :: IgnoreInodes -> RawFilePath -> CopyCoWTried -> ExportLocation -> ContentIdentifier -> FilePath -> Annex Key -> MeterUpdate -> Annex Key
retrieveExportWithContentIdentifierM ii dir cow loc cid dest mkkey p =
precheck docopy
retrieveExportWithContentIdentifierM :: IgnoreInodes -> RawFilePath -> CopyCoWTried -> ExportLocation -> ContentIdentifier -> FilePath -> Either Key (Annex Key) -> MeterUpdate -> Annex (Key, Verification)
retrieveExportWithContentIdentifierM ii dir cow loc cid dest gk p =
case gk of
Right mkkey -> do
go Nothing
k <- mkkey
return (k, UnVerified)
Left k -> do
v <- verifyKeyContentIncrementally DefaultVerify k go
return (k, v)
where
f = exportPath dir loc
f' = fromRawFilePath f
go iv = precheck (docopy iv)
docopy = ifM (liftIO $ tryCopyCoW cow f' dest p)
( do
k <- mkkey
postcheckcow (return k)
, docopynoncow
docopy iv = ifM (liftIO $ tryCopyCoW cow f' dest p)
( postcheckcow (liftIO $ maybe noop unableIncrementalVerifier iv)
, docopynoncow iv
)
docopynoncow = do
docopynoncow iv = do
#ifndef mingw32_HOST_OS
let open = do
-- Need a duplicate fd for the post check, since
-- hGetContentsMetered closes its handle.
-- Need a duplicate fd for the post check.
fd <- openFd f' ReadOnly Nothing defaultFileFlags
dupfd <- dup fd
h <- fdToHandle fd
@ -445,12 +451,11 @@ retrieveExportWithContentIdentifierM ii dir cow loc cid dest mkkey p =
let close = hClose
bracketIO open close $ \h -> do
#endif
liftIO $ hGetContentsMetered h p >>= L.writeFile dest
k <- mkkey
liftIO $ fileContentCopier h dest p iv
#ifndef mingw32_HOST_OS
postchecknoncow dupfd (return k)
postchecknoncow dupfd (return ())
#else
postchecknoncow (return k)
postchecknoncow (return ())
#endif
-- Check before copy, to avoid expensive copy of wrong file
@ -500,7 +505,7 @@ storeExportWithContentIdentifierM ii dir cow src _k loc overwritablecids p = do
liftIO $ createDirectoryUnder dir (toRawFilePath destdir)
withTmpFileIn destdir template $ \tmpf tmph -> do
liftIO $ hClose tmph
void $ fileCopier cow src tmpf p Nothing
void $ liftIO $ fileCopier cow src tmpf p Nothing
let tmpf' = toRawFilePath tmpf
resetAnnexFilePerm tmpf'
liftIO (getFileStatus tmpf) >>= liftIO . mkContentIdentifier ii tmpf' >>= \case

View file

@ -715,7 +715,7 @@ mkFileCopier remotewanthardlink (State _ _ copycowtried _ _) = do
where
copier src dest k p check verifyconfig = do
iv <- startVerifyKeyContentIncrementally verifyconfig k
fileCopier copycowtried src dest p iv >>= \case
liftIO (fileCopier copycowtried src dest p iv) >>= \case
Copied -> ifM check
( finishVerifyKeyContentIncrementally iv
, do

View file

@ -360,8 +360,7 @@ adjustExportImport' isexport isimport r rs = do
getkeycids ciddbv k >>= \case
(cid:_) -> do
l <- getfirstexportloc dbv k
void $ retrieveExportWithContentIdentifier (importActions r) l cid dest (pure k) p
return UnVerified
snd <$> retrieveExportWithContentIdentifier (importActions r) l cid dest (Left k) p
-- In case a content identifier is somehow missing,
-- try this instead.
[] -> if isexport

View file

@ -649,22 +649,31 @@ mkImportableContentsVersioned info = build . groupfiles
| otherwise =
i : removemostrecent mtime rest
retrieveExportWithContentIdentifierS3 :: S3HandleVar -> Remote -> RemoteStateHandle -> S3Info -> ExportLocation -> ContentIdentifier -> FilePath -> Annex Key -> MeterUpdate -> Annex Key
retrieveExportWithContentIdentifierS3 hv r rs info loc cid dest mkkey p = withS3Handle hv $ \case
Just h -> do
rewritePreconditionException $ retrieveHelper' h dest p Nothing $
limitGetToContentIdentifier cid $
S3.getObject (bucket info) o
k <- mkkey
case extractContentIdentifier cid o of
Right vid -> do
vids <- getS3VersionID rs k
unless (vid `elem` map Just vids) $
setS3VersionID info rs k vid
Left _ -> noop
return k
Nothing -> giveup $ needS3Creds (uuid r)
retrieveExportWithContentIdentifierS3 :: S3HandleVar -> Remote -> RemoteStateHandle -> S3Info -> ExportLocation -> ContentIdentifier -> FilePath -> Either Key (Annex Key) -> MeterUpdate -> Annex (Key, Verification)
retrieveExportWithContentIdentifierS3 hv r rs info loc cid dest gk p =
case gk of
Right _mkkey -> do
k <- go Nothing
return (k, UnVerified)
Left k -> do
v <- verifyKeyContentIncrementally DefaultVerify k
(void . go)
return (k, v)
where
go iv = withS3Handle hv $ \case
Just h -> do
rewritePreconditionException $ retrieveHelper' h dest p iv $
limitGetToContentIdentifier cid $
S3.getObject (bucket info) o
k <- either return id gk
case extractContentIdentifier cid o of
Right vid -> do
vids <- getS3VersionID rs k
unless (vid `elem` map Just vids) $
setS3VersionID info rs k vid
Left _ -> noop
return k
Nothing -> giveup $ needS3Creds (uuid r)
o = T.pack $ bucketExportLocation info loc
{- Catch exception getObject returns when a precondition is not met,