incremental verify for directory special remote

Added fileRetriever', which will let the remaining special remotes
eventually also support incremental verify.

Sponsored-by: Dartmouth College's DANDI project
This commit is contained in:
Joey Hess 2021-08-16 16:22:00 -04:00
parent a644f729ce
commit b1622eb932
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
13 changed files with 39 additions and 27 deletions

View file

@ -56,7 +56,7 @@ data CopyMethod = CopiedCoW | Copied
- being copied. But it is not finalized at the end. - being copied. But it is not finalized at the end.
- -
- When copy-on-write is used, the IncrementalVerifier is not fed - When copy-on-write is used, the IncrementalVerifier is not fed
- the content of the file. - the content of the file, and verification using it will fail.
- -
- Note that, when the destination file already exists, it's read both - Note that, when the destination file already exists, it's read both
- to start calculating the hash, and also to verify that its content is - to start calculating the hash, and also to verify that its content is
@ -71,7 +71,11 @@ fileCopier _ src dest meterupdate iv = docopy
#else #else
fileCopier copycowtried src dest meterupdate iv = fileCopier copycowtried src dest meterupdate iv =
ifM (liftIO $ tryCopyCoW copycowtried src dest meterupdate) ifM (liftIO $ tryCopyCoW copycowtried src dest meterupdate)
( return CopiedCoW ( do
-- Make sure the incremental verifier fails,
-- since we did not feed it.
liftIO $ maybe noop failIncremental iv
return CopiedCoW
, docopy , docopy
) )
#endif #endif

View file

@ -11,7 +11,7 @@ git-annex (8.20210804) UNRELEASED; urgency=medium
* Special remotes now checksum content while it is being retrieved, * Special remotes now checksum content while it is being retrieved,
instead of in a separate pass at the end. This is supported for all instead of in a separate pass at the end. This is supported for all
special remotes on Linux (except for web and bittorrent), and for a special remotes on Linux (except for web and bittorrent), and for a
few on other OSs (bup, ddar, gcrypt, glacier). few on other OSs (directory, bup, ddar, gcrypt, glacier).
-- Joey Hess <id@joeyh.name> Tue, 03 Aug 2021 12:22:45 -0400 -- Joey Hess <id@joeyh.name> Tue, 03 Aug 2021 12:22:45 -0400

View file

@ -191,7 +191,7 @@ store'' serial dest src canoverwrite = checkAdbInPath False $ do
retrieve :: AndroidSerial -> AndroidPath -> Retriever retrieve :: AndroidSerial -> AndroidPath -> Retriever
retrieve serial adir = fileRetriever $ \dest k _p -> retrieve serial adir = fileRetriever $ \dest k _p ->
let src = androidLocation adir k let src = androidLocation adir k
in retrieve' serial src dest in retrieve' serial src (fromRawFilePath dest)
retrieve' :: AndroidSerial -> AndroidPath -> FilePath -> Annex () retrieve' :: AndroidSerial -> AndroidPath -> FilePath -> Annex ()
retrieve' serial src dest = retrieve' serial src dest =

View file

@ -232,9 +232,9 @@ finalizeStoreGeneric d tmp dest = do
retrieveKeyFileM :: RawFilePath -> ChunkConfig -> CopyCoWTried -> Retriever retrieveKeyFileM :: RawFilePath -> ChunkConfig -> CopyCoWTried -> Retriever
retrieveKeyFileM d (LegacyChunks _) _ = Legacy.retrieve locations d retrieveKeyFileM d (LegacyChunks _) _ = Legacy.retrieve locations d
retrieveKeyFileM d NoChunks cow = fileRetriever $ \dest k p -> do retrieveKeyFileM d NoChunks cow = fileRetriever' $ \dest k p iv -> do
src <- liftIO $ fromRawFilePath <$> getLocation d k src <- liftIO $ fromRawFilePath <$> getLocation d k
void $ fileCopier cow src dest p Nothing void $ fileCopier cow src (fromRawFilePath dest) p iv
retrieveKeyFileM d _ _ = byteRetriever $ \k sink -> retrieveKeyFileM d _ _ = byteRetriever $ \k sink ->
sink =<< liftIO (L.readFile . fromRawFilePath =<< getLocation d k) sink =<< liftIO (L.readFile . fromRawFilePath =<< getLocation d k)

View file

@ -237,7 +237,7 @@ retrieveKeyFileM :: External -> Retriever
retrieveKeyFileM external = fileRetriever $ \d k p -> retrieveKeyFileM external = fileRetriever $ \d k p ->
either giveup return =<< go d k p either giveup return =<< go d k p
where where
go d k p = handleRequestKey external (\sk -> TRANSFER Download sk d) k (Just p) $ \resp -> go d k p = handleRequestKey external (\sk -> TRANSFER Download sk (fromRawFilePath d)) k (Just p) $ \resp ->
case resp of case resp of
TRANSFER_SUCCESS Download k' TRANSFER_SUCCESS Download k'
| k == k' -> result $ Right () | k == k' -> result $ Right ()
@ -810,7 +810,7 @@ checkUrlM external url =
retrieveUrl :: Retriever retrieveUrl :: Retriever
retrieveUrl = fileRetriever $ \f k p -> do retrieveUrl = fileRetriever $ \f k p -> do
us <- getWebUrls k us <- getWebUrls k
unlessM (withUrlOptions $ downloadUrl k p us f) $ unlessM (withUrlOptions $ downloadUrl k p us (fromRawFilePath f)) $
giveup "failed to download content" giveup "failed to download content"
checkKeyUrl :: CheckPresent checkKeyUrl :: CheckPresent

View file

@ -418,7 +418,8 @@ retrieve' repo r rsyncopts accessmethod
sink =<< liftIO (L.readFile $ gCryptLocation repo k) sink =<< liftIO (L.readFile $ gCryptLocation repo k)
| Git.repoIsSsh repo = if accessShell r | Git.repoIsSsh repo = if accessShell r
then fileRetriever $ \f k p -> do then fileRetriever $ \f k p -> do
ps <- Ssh.rsyncParamsRemote False r Download k f ps <- Ssh.rsyncParamsRemote False r Download k
(fromRawFilePath f)
(AssociatedFile Nothing) (AssociatedFile Nothing)
oh <- mkOutputHandler oh <- mkOutputHandler
unlessM (Ssh.rsyncHelper oh (Just p) ps) $ unlessM (Ssh.rsyncHelper oh (Just p) ps) $

View file

@ -497,7 +497,7 @@ retrieve rs h = fileRetriever $ \dest k p -> getLFSEndpoint LFS.RequestDownload
Nothing -> giveup "unable to parse git-lfs server download url" Nothing -> giveup "unable to parse git-lfs server download url"
Just req -> do Just req -> do
uo <- getUrlOptions uo <- getUrlOptions
liftIO $ downloadConduit p req dest uo liftIO $ downloadConduit p req (fromRawFilePath dest) uo
-- Since git-lfs does not support removing content, nothing needs to be -- Since git-lfs does not support removing content, nothing needs to be
-- done to lock content in the remote, except for checking that the content -- done to lock content in the remote, except for checking that the content

View file

@ -20,6 +20,7 @@ module Remote.Helper.Special (
fileStorer, fileStorer,
byteStorer, byteStorer,
fileRetriever, fileRetriever,
fileRetriever',
byteRetriever, byteRetriever,
storeKeyDummy, storeKeyDummy,
retrieveKeyFileDummy, retrieveKeyFileDummy,
@ -115,10 +116,9 @@ byteRetriever a k _m _miv callback = a k (callback . ByteContent)
-- retrieves data. The incremental verifier is updated in the background as -- retrieves data. The incremental verifier is updated in the background as
-- the action writes to the file, but may not be updated with the entire -- the action writes to the file, but may not be updated with the entire
-- content of the file. -- content of the file.
fileRetriever :: (FilePath -> Key -> MeterUpdate -> Annex ()) -> Retriever fileRetriever :: (RawFilePath -> Key -> MeterUpdate -> Annex ()) -> Retriever
fileRetriever a k m miv callback = do fileRetriever a = fileRetriever' $ \f k m miv -> do
f <- prepTmp k let retrieve = a f k m
let retrieve = a (fromRawFilePath f) k m
case miv of case miv of
Nothing -> retrieve Nothing -> retrieve
Just iv -> do Just iv -> do
@ -128,6 +128,15 @@ fileRetriever a k m miv callback = do
liftIO $ atomically $ putTMVar finished () liftIO $ atomically $ putTMVar finished ()
liftIO (wait t) liftIO (wait t)
retrieve `finally` finishtail retrieve `finally` finishtail
{- A Retriever that writes the content of a Key to a provided file.
- The action is responsible for updating the progress meter and the
- incremental verifier as it retrieves data.
-}
fileRetriever' :: (RawFilePath -> Key -> MeterUpdate -> Maybe IncrementalVerifier -> Annex ()) -> Retriever
fileRetriever' a k m miv callback = do
f <- prepTmp k
a f k m miv
pruneTmpWorkDirBefore f (callback . FileContent . fromRawFilePath) pruneTmpWorkDirBefore f (callback . FileContent . fromRawFilePath)
{- The base Remote that is provided to specialRemote needs to have {- The base Remote that is provided to specialRemote needs to have

View file

@ -162,7 +162,7 @@ store h = fileStorer $ \k src _p -> runHook h "store" k (Just src)
retrieve :: HookName -> Retriever retrieve :: HookName -> Retriever
retrieve h = fileRetriever $ \d k _p -> retrieve h = fileRetriever $ \d k _p ->
unlessM (runHook' h "retrieve" k (Just d) $ return True) $ unlessM (runHook' h "retrieve" k (Just (fromRawFilePath d)) $ return True) $
giveup "failed to retrieve content" giveup "failed to retrieve content"
remove :: HookName -> Remover remove :: HookName -> Remover

View file

@ -235,8 +235,8 @@ storeGeneric' o meterupdate basedest populatedest = withRsyncScratchDir $ \tmp -
] ]
else return False else return False
retrieve :: RsyncOpts -> FilePath -> Key -> MeterUpdate -> Annex () retrieve :: RsyncOpts -> RawFilePath -> Key -> MeterUpdate -> Annex ()
retrieve o f k p = rsyncRetrieveKey o k f (Just p) retrieve o f k p = rsyncRetrieveKey o k (fromRawFilePath f) (Just p)
retrieveCheap :: RsyncOpts -> Key -> AssociatedFile -> FilePath -> Annex () retrieveCheap :: RsyncOpts -> Key -> AssociatedFile -> FilePath -> Annex ()
retrieveCheap o k _af f = ifM (preseedTmp k f) retrieveCheap o k _af f = ifM (preseedTmp k f)

View file

@ -407,13 +407,13 @@ retrieve hv r rs c info = fileRetriever $ \f k p -> withS3Handle hv $ \case
Left failreason -> do Left failreason -> do
warning failreason warning failreason
giveup "cannot download content" giveup "cannot download content"
Right loc -> retrieveHelper info h loc f p Right loc -> retrieveHelper info h loc (fromRawFilePath f) p
Nothing -> Nothing ->
getPublicWebUrls' (uuid r) rs info c k >>= \case getPublicWebUrls' (uuid r) rs info c k >>= \case
Left failreason -> do Left failreason -> do
warning failreason warning failreason
giveup "cannot download content" giveup "cannot download content"
Right us -> unlessM (withUrlOptions $ downloadUrl k p us f) $ Right us -> unlessM (withUrlOptions $ downloadUrl k p us (fromRawFilePath f)) $
giveup "failed to download content" giveup "failed to download content"
retrieveHelper :: S3Info -> S3Handle -> (Either S3.Object S3VersionID) -> FilePath -> MeterUpdate -> Annex () retrieveHelper :: S3Info -> S3Handle -> (Either S3.Object S3VersionID) -> FilePath -> MeterUpdate -> Annex ()

View file

@ -170,9 +170,9 @@ finalizeStore dav tmp dest = do
retrieve :: DavHandleVar -> ChunkConfig -> Retriever retrieve :: DavHandleVar -> ChunkConfig -> Retriever
retrieve hv cc = fileRetriever $ \d k p -> retrieve hv cc = fileRetriever $ \d k p ->
withDavHandle hv $ \dav -> case cc of withDavHandle hv $ \dav -> case cc of
LegacyChunks _ -> retrieveLegacyChunked d k p dav LegacyChunks _ -> retrieveLegacyChunked (fromRawFilePath d) k p dav
_ -> liftIO $ _ -> liftIO $
goDAV dav $ retrieveHelper (keyLocation k) d p goDAV dav $ retrieveHelper (keyLocation k) (fromRawFilePath d) p
retrieveHelper :: DavLocation -> FilePath -> MeterUpdate -> DAVT IO () retrieveHelper :: DavLocation -> FilePath -> MeterUpdate -> DAVT IO ()
retrieveHelper loc d p = do retrieveHelper loc d p = do

View file

@ -6,12 +6,10 @@
The concurrency problem is fixed now. The concurrency problem is fixed now.
As well as the web special remote, these do not do incremental hashing As well as the web special remote, these do not do incremental hashing
still: directory, gitlfs, webdav, S3. still: gitlfs, webdav, S3. Problem is, these open the file
The ones that do are: external, adb, gcrypt, hook, rsync
The issue with directory etc is that they open the file
for write. This prevents tailVerify re-opening it for read, because the for write. This prevents tailVerify re-opening it for read, because the
haskell RTS actually does not allowing opening a file for read that it has haskell RTS actually does not allowing opening a file for read that it has
open for write. open for write. This problem has already been fixed for directory.
The ones that do are: external, adb, gcrypt, hook, rsync, directory
"""]] """]]