diff --git a/Annex/CopyFile.hs b/Annex/CopyFile.hs index 038873749d..39077f6616 100644 --- a/Annex/CopyFile.hs +++ b/Annex/CopyFile.hs @@ -56,7 +56,7 @@ data CopyMethod = CopiedCoW | Copied - being copied. But it is not finalized at the end. - - When copy-on-write is used, the IncrementalVerifier is not fed - - the content of the file. + - the content of the file, and verification using it will fail. - - Note that, when the destination file already exists, it's read both - to start calculating the hash, and also to verify that its content is @@ -71,7 +71,11 @@ fileCopier _ src dest meterupdate iv = docopy #else fileCopier copycowtried src dest meterupdate iv = ifM (liftIO $ tryCopyCoW copycowtried src dest meterupdate) - ( return CopiedCoW + ( do + -- Make sure the incremental verifier fails, + -- since we did not feed it. + liftIO $ maybe noop failIncremental iv + return CopiedCoW , docopy ) #endif diff --git a/CHANGELOG b/CHANGELOG index ddb4573ec4..965a751ecc 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -11,7 +11,7 @@ git-annex (8.20210804) UNRELEASED; urgency=medium * Special remotes now checksum content while it is being retrieved, instead of in a separate pass at the end. This is supported for all special remotes on Linux (except for web and bittorrent), and for a - few on other OSs (bup, ddar, gcrypt, glacier). + few on other OSs (directory, bup, ddar, gcrypt, glacier). -- Joey Hess Tue, 03 Aug 2021 12:22:45 -0400 diff --git a/Remote/Adb.hs b/Remote/Adb.hs index f18dcf68b9..5f07fe8aa7 100644 --- a/Remote/Adb.hs +++ b/Remote/Adb.hs @@ -191,7 +191,7 @@ store'' serial dest src canoverwrite = checkAdbInPath False $ do retrieve :: AndroidSerial -> AndroidPath -> Retriever retrieve serial adir = fileRetriever $ \dest k _p -> let src = androidLocation adir k - in retrieve' serial src dest + in retrieve' serial src (fromRawFilePath dest) retrieve' :: AndroidSerial -> AndroidPath -> FilePath -> Annex () retrieve' serial src dest = diff --git a/Remote/Directory.hs b/Remote/Directory.hs index b3bcd01347..6a34df6bd0 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -232,9 +232,9 @@ finalizeStoreGeneric d tmp dest = do retrieveKeyFileM :: RawFilePath -> ChunkConfig -> CopyCoWTried -> Retriever retrieveKeyFileM d (LegacyChunks _) _ = Legacy.retrieve locations d -retrieveKeyFileM d NoChunks cow = fileRetriever $ \dest k p -> do +retrieveKeyFileM d NoChunks cow = fileRetriever' $ \dest k p iv -> do src <- liftIO $ fromRawFilePath <$> getLocation d k - void $ fileCopier cow src dest p Nothing + void $ fileCopier cow src (fromRawFilePath dest) p iv retrieveKeyFileM d _ _ = byteRetriever $ \k sink -> sink =<< liftIO (L.readFile . fromRawFilePath =<< getLocation d k) diff --git a/Remote/External.hs b/Remote/External.hs index 9d35048548..156b97d5dd 100644 --- a/Remote/External.hs +++ b/Remote/External.hs @@ -237,7 +237,7 @@ retrieveKeyFileM :: External -> Retriever retrieveKeyFileM external = fileRetriever $ \d k p -> either giveup return =<< go d k p where - go d k p = handleRequestKey external (\sk -> TRANSFER Download sk d) k (Just p) $ \resp -> + go d k p = handleRequestKey external (\sk -> TRANSFER Download sk (fromRawFilePath d)) k (Just p) $ \resp -> case resp of TRANSFER_SUCCESS Download k' | k == k' -> result $ Right () @@ -810,7 +810,7 @@ checkUrlM external url = retrieveUrl :: Retriever retrieveUrl = fileRetriever $ \f k p -> do us <- getWebUrls k - unlessM (withUrlOptions $ downloadUrl k p us f) $ + unlessM (withUrlOptions $ downloadUrl k p us (fromRawFilePath f)) $ giveup "failed to download content" checkKeyUrl :: CheckPresent diff --git a/Remote/GCrypt.hs b/Remote/GCrypt.hs index f2b3efe98c..3b5e35ecce 100644 --- a/Remote/GCrypt.hs +++ b/Remote/GCrypt.hs @@ -418,7 +418,8 @@ retrieve' repo r rsyncopts accessmethod sink =<< liftIO (L.readFile $ gCryptLocation repo k) | Git.repoIsSsh repo = if accessShell r then fileRetriever $ \f k p -> do - ps <- Ssh.rsyncParamsRemote False r Download k f + ps <- Ssh.rsyncParamsRemote False r Download k + (fromRawFilePath f) (AssociatedFile Nothing) oh <- mkOutputHandler unlessM (Ssh.rsyncHelper oh (Just p) ps) $ diff --git a/Remote/GitLFS.hs b/Remote/GitLFS.hs index 570cea6205..f3e3d37e49 100644 --- a/Remote/GitLFS.hs +++ b/Remote/GitLFS.hs @@ -497,7 +497,7 @@ retrieve rs h = fileRetriever $ \dest k p -> getLFSEndpoint LFS.RequestDownload Nothing -> giveup "unable to parse git-lfs server download url" Just req -> do uo <- getUrlOptions - liftIO $ downloadConduit p req dest uo + liftIO $ downloadConduit p req (fromRawFilePath dest) uo -- Since git-lfs does not support removing content, nothing needs to be -- done to lock content in the remote, except for checking that the content diff --git a/Remote/Helper/Special.hs b/Remote/Helper/Special.hs index 01f57c7340..5528470da1 100644 --- a/Remote/Helper/Special.hs +++ b/Remote/Helper/Special.hs @@ -20,6 +20,7 @@ module Remote.Helper.Special ( fileStorer, byteStorer, fileRetriever, + fileRetriever', byteRetriever, storeKeyDummy, retrieveKeyFileDummy, @@ -115,10 +116,9 @@ byteRetriever a k _m _miv callback = a k (callback . ByteContent) -- retrieves data. The incremental verifier is updated in the background as -- the action writes to the file, but may not be updated with the entire -- content of the file. -fileRetriever :: (FilePath -> Key -> MeterUpdate -> Annex ()) -> Retriever -fileRetriever a k m miv callback = do - f <- prepTmp k - let retrieve = a (fromRawFilePath f) k m +fileRetriever :: (RawFilePath -> Key -> MeterUpdate -> Annex ()) -> Retriever +fileRetriever a = fileRetriever' $ \f k m miv -> do + let retrieve = a f k m case miv of Nothing -> retrieve Just iv -> do @@ -128,6 +128,15 @@ fileRetriever a k m miv callback = do liftIO $ atomically $ putTMVar finished () liftIO (wait t) retrieve `finally` finishtail + +{- A Retriever that writes the content of a Key to a provided file. + - The action is responsible for updating the progress meter and the + - incremental verifier as it retrieves data. + -} +fileRetriever' :: (RawFilePath -> Key -> MeterUpdate -> Maybe IncrementalVerifier -> Annex ()) -> Retriever +fileRetriever' a k m miv callback = do + f <- prepTmp k + a f k m miv pruneTmpWorkDirBefore f (callback . FileContent . fromRawFilePath) {- The base Remote that is provided to specialRemote needs to have diff --git a/Remote/Hook.hs b/Remote/Hook.hs index 8e0d4dc014..3c715ac5fe 100644 --- a/Remote/Hook.hs +++ b/Remote/Hook.hs @@ -162,7 +162,7 @@ store h = fileStorer $ \k src _p -> runHook h "store" k (Just src) retrieve :: HookName -> Retriever retrieve h = fileRetriever $ \d k _p -> - unlessM (runHook' h "retrieve" k (Just d) $ return True) $ + unlessM (runHook' h "retrieve" k (Just (fromRawFilePath d)) $ return True) $ giveup "failed to retrieve content" remove :: HookName -> Remover diff --git a/Remote/Rsync.hs b/Remote/Rsync.hs index 60bddd651a..81d60311e4 100644 --- a/Remote/Rsync.hs +++ b/Remote/Rsync.hs @@ -235,8 +235,8 @@ storeGeneric' o meterupdate basedest populatedest = withRsyncScratchDir $ \tmp - ] else return False -retrieve :: RsyncOpts -> FilePath -> Key -> MeterUpdate -> Annex () -retrieve o f k p = rsyncRetrieveKey o k f (Just p) +retrieve :: RsyncOpts -> RawFilePath -> Key -> MeterUpdate -> Annex () +retrieve o f k p = rsyncRetrieveKey o k (fromRawFilePath f) (Just p) retrieveCheap :: RsyncOpts -> Key -> AssociatedFile -> FilePath -> Annex () retrieveCheap o k _af f = ifM (preseedTmp k f) diff --git a/Remote/S3.hs b/Remote/S3.hs index 07aa40da4b..f2897e658d 100644 --- a/Remote/S3.hs +++ b/Remote/S3.hs @@ -407,13 +407,13 @@ retrieve hv r rs c info = fileRetriever $ \f k p -> withS3Handle hv $ \case Left failreason -> do warning failreason giveup "cannot download content" - Right loc -> retrieveHelper info h loc f p + Right loc -> retrieveHelper info h loc (fromRawFilePath f) p Nothing -> getPublicWebUrls' (uuid r) rs info c k >>= \case Left failreason -> do warning failreason giveup "cannot download content" - Right us -> unlessM (withUrlOptions $ downloadUrl k p us f) $ + Right us -> unlessM (withUrlOptions $ downloadUrl k p us (fromRawFilePath f)) $ giveup "failed to download content" retrieveHelper :: S3Info -> S3Handle -> (Either S3.Object S3VersionID) -> FilePath -> MeterUpdate -> Annex () diff --git a/Remote/WebDAV.hs b/Remote/WebDAV.hs index 075b3ceee9..0001f08879 100644 --- a/Remote/WebDAV.hs +++ b/Remote/WebDAV.hs @@ -170,9 +170,9 @@ finalizeStore dav tmp dest = do retrieve :: DavHandleVar -> ChunkConfig -> Retriever retrieve hv cc = fileRetriever $ \d k p -> withDavHandle hv $ \dav -> case cc of - LegacyChunks _ -> retrieveLegacyChunked d k p dav + LegacyChunks _ -> retrieveLegacyChunked (fromRawFilePath d) k p dav _ -> liftIO $ - goDAV dav $ retrieveHelper (keyLocation k) d p + goDAV dav $ retrieveHelper (keyLocation k) (fromRawFilePath d) p retrieveHelper :: DavLocation -> FilePath -> MeterUpdate -> DAVT IO () retrieveHelper loc d p = do diff --git a/doc/todo/OPT__58_____34__bundle__34___get_+_check___40__of_checksum__41___in_a_single_operation/comment_16_fbbcf1d8b35078274cfe322cea6de21c._comment b/doc/todo/OPT__58_____34__bundle__34___get_+_check___40__of_checksum__41___in_a_single_operation/comment_16_fbbcf1d8b35078274cfe322cea6de21c._comment index 7bd5179c1f..fe50195cf3 100644 --- a/doc/todo/OPT__58_____34__bundle__34___get_+_check___40__of_checksum__41___in_a_single_operation/comment_16_fbbcf1d8b35078274cfe322cea6de21c._comment +++ b/doc/todo/OPT__58_____34__bundle__34___get_+_check___40__of_checksum__41___in_a_single_operation/comment_16_fbbcf1d8b35078274cfe322cea6de21c._comment @@ -6,12 +6,10 @@ The concurrency problem is fixed now. As well as the web special remote, these do not do incremental hashing -still: directory, gitlfs, webdav, S3. - -The ones that do are: external, adb, gcrypt, hook, rsync - -The issue with directory etc is that they open the file +still: gitlfs, webdav, S3. Problem is, these open the file for write. This prevents tailVerify re-opening it for read, because the haskell RTS actually does not allowing opening a file for read that it has -open for write. +open for write. This problem has already been fixed for directory. + +The ones that do are: external, adb, gcrypt, hook, rsync, directory """]]