incremental verify for directory special remote

Added fileRetriever', which will let the remaining special remotes
eventually also support incremental verify.

Sponsored-by: Dartmouth College's DANDI project
This commit is contained in:
Joey Hess 2021-08-16 16:22:00 -04:00
parent a644f729ce
commit b1622eb932
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
13 changed files with 39 additions and 27 deletions

View file

@ -56,7 +56,7 @@ data CopyMethod = CopiedCoW | Copied
- being copied. But it is not finalized at the end.
-
- When copy-on-write is used, the IncrementalVerifier is not fed
- the content of the file.
- the content of the file, and verification using it will fail.
-
- Note that, when the destination file already exists, it's read both
- to start calculating the hash, and also to verify that its content is
@ -71,7 +71,11 @@ fileCopier _ src dest meterupdate iv = docopy
#else
fileCopier copycowtried src dest meterupdate iv =
ifM (liftIO $ tryCopyCoW copycowtried src dest meterupdate)
( return CopiedCoW
( do
-- Make sure the incremental verifier fails,
-- since we did not feed it.
liftIO $ maybe noop failIncremental iv
return CopiedCoW
, docopy
)
#endif

View file

@ -11,7 +11,7 @@ git-annex (8.20210804) UNRELEASED; urgency=medium
* Special remotes now checksum content while it is being retrieved,
instead of in a separate pass at the end. This is supported for all
special remotes on Linux (except for web and bittorrent), and for a
few on other OSs (bup, ddar, gcrypt, glacier).
few on other OSs (directory, bup, ddar, gcrypt, glacier).
-- Joey Hess <id@joeyh.name> Tue, 03 Aug 2021 12:22:45 -0400

View file

@ -191,7 +191,7 @@ store'' serial dest src canoverwrite = checkAdbInPath False $ do
retrieve :: AndroidSerial -> AndroidPath -> Retriever
retrieve serial adir = fileRetriever $ \dest k _p ->
let src = androidLocation adir k
in retrieve' serial src dest
in retrieve' serial src (fromRawFilePath dest)
retrieve' :: AndroidSerial -> AndroidPath -> FilePath -> Annex ()
retrieve' serial src dest =

View file

@ -232,9 +232,9 @@ finalizeStoreGeneric d tmp dest = do
retrieveKeyFileM :: RawFilePath -> ChunkConfig -> CopyCoWTried -> Retriever
retrieveKeyFileM d (LegacyChunks _) _ = Legacy.retrieve locations d
retrieveKeyFileM d NoChunks cow = fileRetriever $ \dest k p -> do
retrieveKeyFileM d NoChunks cow = fileRetriever' $ \dest k p iv -> do
src <- liftIO $ fromRawFilePath <$> getLocation d k
void $ fileCopier cow src dest p Nothing
void $ fileCopier cow src (fromRawFilePath dest) p iv
retrieveKeyFileM d _ _ = byteRetriever $ \k sink ->
sink =<< liftIO (L.readFile . fromRawFilePath =<< getLocation d k)

View file

@ -237,7 +237,7 @@ retrieveKeyFileM :: External -> Retriever
retrieveKeyFileM external = fileRetriever $ \d k p ->
either giveup return =<< go d k p
where
go d k p = handleRequestKey external (\sk -> TRANSFER Download sk d) k (Just p) $ \resp ->
go d k p = handleRequestKey external (\sk -> TRANSFER Download sk (fromRawFilePath d)) k (Just p) $ \resp ->
case resp of
TRANSFER_SUCCESS Download k'
| k == k' -> result $ Right ()
@ -810,7 +810,7 @@ checkUrlM external url =
retrieveUrl :: Retriever
retrieveUrl = fileRetriever $ \f k p -> do
us <- getWebUrls k
unlessM (withUrlOptions $ downloadUrl k p us f) $
unlessM (withUrlOptions $ downloadUrl k p us (fromRawFilePath f)) $
giveup "failed to download content"
checkKeyUrl :: CheckPresent

View file

@ -418,7 +418,8 @@ retrieve' repo r rsyncopts accessmethod
sink =<< liftIO (L.readFile $ gCryptLocation repo k)
| Git.repoIsSsh repo = if accessShell r
then fileRetriever $ \f k p -> do
ps <- Ssh.rsyncParamsRemote False r Download k f
ps <- Ssh.rsyncParamsRemote False r Download k
(fromRawFilePath f)
(AssociatedFile Nothing)
oh <- mkOutputHandler
unlessM (Ssh.rsyncHelper oh (Just p) ps) $

View file

@ -497,7 +497,7 @@ retrieve rs h = fileRetriever $ \dest k p -> getLFSEndpoint LFS.RequestDownload
Nothing -> giveup "unable to parse git-lfs server download url"
Just req -> do
uo <- getUrlOptions
liftIO $ downloadConduit p req dest uo
liftIO $ downloadConduit p req (fromRawFilePath dest) uo
-- Since git-lfs does not support removing content, nothing needs to be
-- done to lock content in the remote, except for checking that the content

View file

@ -20,6 +20,7 @@ module Remote.Helper.Special (
fileStorer,
byteStorer,
fileRetriever,
fileRetriever',
byteRetriever,
storeKeyDummy,
retrieveKeyFileDummy,
@ -115,10 +116,9 @@ byteRetriever a k _m _miv callback = a k (callback . ByteContent)
-- retrieves data. The incremental verifier is updated in the background as
-- the action writes to the file, but may not be updated with the entire
-- content of the file.
fileRetriever :: (FilePath -> Key -> MeterUpdate -> Annex ()) -> Retriever
fileRetriever a k m miv callback = do
f <- prepTmp k
let retrieve = a (fromRawFilePath f) k m
fileRetriever :: (RawFilePath -> Key -> MeterUpdate -> Annex ()) -> Retriever
fileRetriever a = fileRetriever' $ \f k m miv -> do
let retrieve = a f k m
case miv of
Nothing -> retrieve
Just iv -> do
@ -128,6 +128,15 @@ fileRetriever a k m miv callback = do
liftIO $ atomically $ putTMVar finished ()
liftIO (wait t)
retrieve `finally` finishtail
{- A Retriever that writes the content of a Key to a provided file.
- The action is responsible for updating the progress meter and the
- incremental verifier as it retrieves data.
-}
fileRetriever' :: (RawFilePath -> Key -> MeterUpdate -> Maybe IncrementalVerifier -> Annex ()) -> Retriever
fileRetriever' a k m miv callback = do
f <- prepTmp k
a f k m miv
pruneTmpWorkDirBefore f (callback . FileContent . fromRawFilePath)
{- The base Remote that is provided to specialRemote needs to have

View file

@ -162,7 +162,7 @@ store h = fileStorer $ \k src _p -> runHook h "store" k (Just src)
retrieve :: HookName -> Retriever
retrieve h = fileRetriever $ \d k _p ->
unlessM (runHook' h "retrieve" k (Just d) $ return True) $
unlessM (runHook' h "retrieve" k (Just (fromRawFilePath d)) $ return True) $
giveup "failed to retrieve content"
remove :: HookName -> Remover

View file

@ -235,8 +235,8 @@ storeGeneric' o meterupdate basedest populatedest = withRsyncScratchDir $ \tmp -
]
else return False
retrieve :: RsyncOpts -> FilePath -> Key -> MeterUpdate -> Annex ()
retrieve o f k p = rsyncRetrieveKey o k f (Just p)
retrieve :: RsyncOpts -> RawFilePath -> Key -> MeterUpdate -> Annex ()
retrieve o f k p = rsyncRetrieveKey o k (fromRawFilePath f) (Just p)
retrieveCheap :: RsyncOpts -> Key -> AssociatedFile -> FilePath -> Annex ()
retrieveCheap o k _af f = ifM (preseedTmp k f)

View file

@ -407,13 +407,13 @@ retrieve hv r rs c info = fileRetriever $ \f k p -> withS3Handle hv $ \case
Left failreason -> do
warning failreason
giveup "cannot download content"
Right loc -> retrieveHelper info h loc f p
Right loc -> retrieveHelper info h loc (fromRawFilePath f) p
Nothing ->
getPublicWebUrls' (uuid r) rs info c k >>= \case
Left failreason -> do
warning failreason
giveup "cannot download content"
Right us -> unlessM (withUrlOptions $ downloadUrl k p us f) $
Right us -> unlessM (withUrlOptions $ downloadUrl k p us (fromRawFilePath f)) $
giveup "failed to download content"
retrieveHelper :: S3Info -> S3Handle -> (Either S3.Object S3VersionID) -> FilePath -> MeterUpdate -> Annex ()

View file

@ -170,9 +170,9 @@ finalizeStore dav tmp dest = do
retrieve :: DavHandleVar -> ChunkConfig -> Retriever
retrieve hv cc = fileRetriever $ \d k p ->
withDavHandle hv $ \dav -> case cc of
LegacyChunks _ -> retrieveLegacyChunked d k p dav
LegacyChunks _ -> retrieveLegacyChunked (fromRawFilePath d) k p dav
_ -> liftIO $
goDAV dav $ retrieveHelper (keyLocation k) d p
goDAV dav $ retrieveHelper (keyLocation k) (fromRawFilePath d) p
retrieveHelper :: DavLocation -> FilePath -> MeterUpdate -> DAVT IO ()
retrieveHelper loc d p = do

View file

@ -6,12 +6,10 @@
The concurrency problem is fixed now.
As well as the web special remote, these do not do incremental hashing
still: directory, gitlfs, webdav, S3.
The ones that do are: external, adb, gcrypt, hook, rsync
The issue with directory etc is that they open the file
still: gitlfs, webdav, S3. Problem is, these open the file
for write. This prevents tailVerify re-opening it for read, because the
haskell RTS actually does not allowing opening a file for read that it has
open for write.
open for write. This problem has already been fixed for directory.
The ones that do are: external, adb, gcrypt, hook, rsync, directory
"""]]