incremental verify for directory special remote
Added fileRetriever', which will let the remaining special remotes eventually also support incremental verify. Sponsored-by: Dartmouth College's DANDI project
This commit is contained in:
parent
a644f729ce
commit
b1622eb932
13 changed files with 39 additions and 27 deletions
|
@ -56,7 +56,7 @@ data CopyMethod = CopiedCoW | Copied
|
|||
- being copied. But it is not finalized at the end.
|
||||
-
|
||||
- When copy-on-write is used, the IncrementalVerifier is not fed
|
||||
- the content of the file.
|
||||
- the content of the file, and verification using it will fail.
|
||||
-
|
||||
- Note that, when the destination file already exists, it's read both
|
||||
- to start calculating the hash, and also to verify that its content is
|
||||
|
@ -71,7 +71,11 @@ fileCopier _ src dest meterupdate iv = docopy
|
|||
#else
|
||||
fileCopier copycowtried src dest meterupdate iv =
|
||||
ifM (liftIO $ tryCopyCoW copycowtried src dest meterupdate)
|
||||
( return CopiedCoW
|
||||
( do
|
||||
-- Make sure the incremental verifier fails,
|
||||
-- since we did not feed it.
|
||||
liftIO $ maybe noop failIncremental iv
|
||||
return CopiedCoW
|
||||
, docopy
|
||||
)
|
||||
#endif
|
||||
|
|
|
@ -11,7 +11,7 @@ git-annex (8.20210804) UNRELEASED; urgency=medium
|
|||
* Special remotes now checksum content while it is being retrieved,
|
||||
instead of in a separate pass at the end. This is supported for all
|
||||
special remotes on Linux (except for web and bittorrent), and for a
|
||||
few on other OSs (bup, ddar, gcrypt, glacier).
|
||||
few on other OSs (directory, bup, ddar, gcrypt, glacier).
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Tue, 03 Aug 2021 12:22:45 -0400
|
||||
|
||||
|
|
|
@ -191,7 +191,7 @@ store'' serial dest src canoverwrite = checkAdbInPath False $ do
|
|||
retrieve :: AndroidSerial -> AndroidPath -> Retriever
|
||||
retrieve serial adir = fileRetriever $ \dest k _p ->
|
||||
let src = androidLocation adir k
|
||||
in retrieve' serial src dest
|
||||
in retrieve' serial src (fromRawFilePath dest)
|
||||
|
||||
retrieve' :: AndroidSerial -> AndroidPath -> FilePath -> Annex ()
|
||||
retrieve' serial src dest =
|
||||
|
|
|
@ -232,9 +232,9 @@ finalizeStoreGeneric d tmp dest = do
|
|||
|
||||
retrieveKeyFileM :: RawFilePath -> ChunkConfig -> CopyCoWTried -> Retriever
|
||||
retrieveKeyFileM d (LegacyChunks _) _ = Legacy.retrieve locations d
|
||||
retrieveKeyFileM d NoChunks cow = fileRetriever $ \dest k p -> do
|
||||
retrieveKeyFileM d NoChunks cow = fileRetriever' $ \dest k p iv -> do
|
||||
src <- liftIO $ fromRawFilePath <$> getLocation d k
|
||||
void $ fileCopier cow src dest p Nothing
|
||||
void $ fileCopier cow src (fromRawFilePath dest) p iv
|
||||
retrieveKeyFileM d _ _ = byteRetriever $ \k sink ->
|
||||
sink =<< liftIO (L.readFile . fromRawFilePath =<< getLocation d k)
|
||||
|
||||
|
|
|
@ -237,7 +237,7 @@ retrieveKeyFileM :: External -> Retriever
|
|||
retrieveKeyFileM external = fileRetriever $ \d k p ->
|
||||
either giveup return =<< go d k p
|
||||
where
|
||||
go d k p = handleRequestKey external (\sk -> TRANSFER Download sk d) k (Just p) $ \resp ->
|
||||
go d k p = handleRequestKey external (\sk -> TRANSFER Download sk (fromRawFilePath d)) k (Just p) $ \resp ->
|
||||
case resp of
|
||||
TRANSFER_SUCCESS Download k'
|
||||
| k == k' -> result $ Right ()
|
||||
|
@ -810,7 +810,7 @@ checkUrlM external url =
|
|||
retrieveUrl :: Retriever
|
||||
retrieveUrl = fileRetriever $ \f k p -> do
|
||||
us <- getWebUrls k
|
||||
unlessM (withUrlOptions $ downloadUrl k p us f) $
|
||||
unlessM (withUrlOptions $ downloadUrl k p us (fromRawFilePath f)) $
|
||||
giveup "failed to download content"
|
||||
|
||||
checkKeyUrl :: CheckPresent
|
||||
|
|
|
@ -418,7 +418,8 @@ retrieve' repo r rsyncopts accessmethod
|
|||
sink =<< liftIO (L.readFile $ gCryptLocation repo k)
|
||||
| Git.repoIsSsh repo = if accessShell r
|
||||
then fileRetriever $ \f k p -> do
|
||||
ps <- Ssh.rsyncParamsRemote False r Download k f
|
||||
ps <- Ssh.rsyncParamsRemote False r Download k
|
||||
(fromRawFilePath f)
|
||||
(AssociatedFile Nothing)
|
||||
oh <- mkOutputHandler
|
||||
unlessM (Ssh.rsyncHelper oh (Just p) ps) $
|
||||
|
|
|
@ -497,7 +497,7 @@ retrieve rs h = fileRetriever $ \dest k p -> getLFSEndpoint LFS.RequestDownload
|
|||
Nothing -> giveup "unable to parse git-lfs server download url"
|
||||
Just req -> do
|
||||
uo <- getUrlOptions
|
||||
liftIO $ downloadConduit p req dest uo
|
||||
liftIO $ downloadConduit p req (fromRawFilePath dest) uo
|
||||
|
||||
-- Since git-lfs does not support removing content, nothing needs to be
|
||||
-- done to lock content in the remote, except for checking that the content
|
||||
|
|
|
@ -20,6 +20,7 @@ module Remote.Helper.Special (
|
|||
fileStorer,
|
||||
byteStorer,
|
||||
fileRetriever,
|
||||
fileRetriever',
|
||||
byteRetriever,
|
||||
storeKeyDummy,
|
||||
retrieveKeyFileDummy,
|
||||
|
@ -115,10 +116,9 @@ byteRetriever a k _m _miv callback = a k (callback . ByteContent)
|
|||
-- retrieves data. The incremental verifier is updated in the background as
|
||||
-- the action writes to the file, but may not be updated with the entire
|
||||
-- content of the file.
|
||||
fileRetriever :: (FilePath -> Key -> MeterUpdate -> Annex ()) -> Retriever
|
||||
fileRetriever a k m miv callback = do
|
||||
f <- prepTmp k
|
||||
let retrieve = a (fromRawFilePath f) k m
|
||||
fileRetriever :: (RawFilePath -> Key -> MeterUpdate -> Annex ()) -> Retriever
|
||||
fileRetriever a = fileRetriever' $ \f k m miv -> do
|
||||
let retrieve = a f k m
|
||||
case miv of
|
||||
Nothing -> retrieve
|
||||
Just iv -> do
|
||||
|
@ -128,6 +128,15 @@ fileRetriever a k m miv callback = do
|
|||
liftIO $ atomically $ putTMVar finished ()
|
||||
liftIO (wait t)
|
||||
retrieve `finally` finishtail
|
||||
|
||||
{- A Retriever that writes the content of a Key to a provided file.
|
||||
- The action is responsible for updating the progress meter and the
|
||||
- incremental verifier as it retrieves data.
|
||||
-}
|
||||
fileRetriever' :: (RawFilePath -> Key -> MeterUpdate -> Maybe IncrementalVerifier -> Annex ()) -> Retriever
|
||||
fileRetriever' a k m miv callback = do
|
||||
f <- prepTmp k
|
||||
a f k m miv
|
||||
pruneTmpWorkDirBefore f (callback . FileContent . fromRawFilePath)
|
||||
|
||||
{- The base Remote that is provided to specialRemote needs to have
|
||||
|
|
|
@ -162,7 +162,7 @@ store h = fileStorer $ \k src _p -> runHook h "store" k (Just src)
|
|||
|
||||
retrieve :: HookName -> Retriever
|
||||
retrieve h = fileRetriever $ \d k _p ->
|
||||
unlessM (runHook' h "retrieve" k (Just d) $ return True) $
|
||||
unlessM (runHook' h "retrieve" k (Just (fromRawFilePath d)) $ return True) $
|
||||
giveup "failed to retrieve content"
|
||||
|
||||
remove :: HookName -> Remover
|
||||
|
|
|
@ -235,8 +235,8 @@ storeGeneric' o meterupdate basedest populatedest = withRsyncScratchDir $ \tmp -
|
|||
]
|
||||
else return False
|
||||
|
||||
retrieve :: RsyncOpts -> FilePath -> Key -> MeterUpdate -> Annex ()
|
||||
retrieve o f k p = rsyncRetrieveKey o k f (Just p)
|
||||
retrieve :: RsyncOpts -> RawFilePath -> Key -> MeterUpdate -> Annex ()
|
||||
retrieve o f k p = rsyncRetrieveKey o k (fromRawFilePath f) (Just p)
|
||||
|
||||
retrieveCheap :: RsyncOpts -> Key -> AssociatedFile -> FilePath -> Annex ()
|
||||
retrieveCheap o k _af f = ifM (preseedTmp k f)
|
||||
|
|
|
@ -407,13 +407,13 @@ retrieve hv r rs c info = fileRetriever $ \f k p -> withS3Handle hv $ \case
|
|||
Left failreason -> do
|
||||
warning failreason
|
||||
giveup "cannot download content"
|
||||
Right loc -> retrieveHelper info h loc f p
|
||||
Right loc -> retrieveHelper info h loc (fromRawFilePath f) p
|
||||
Nothing ->
|
||||
getPublicWebUrls' (uuid r) rs info c k >>= \case
|
||||
Left failreason -> do
|
||||
warning failreason
|
||||
giveup "cannot download content"
|
||||
Right us -> unlessM (withUrlOptions $ downloadUrl k p us f) $
|
||||
Right us -> unlessM (withUrlOptions $ downloadUrl k p us (fromRawFilePath f)) $
|
||||
giveup "failed to download content"
|
||||
|
||||
retrieveHelper :: S3Info -> S3Handle -> (Either S3.Object S3VersionID) -> FilePath -> MeterUpdate -> Annex ()
|
||||
|
|
|
@ -170,9 +170,9 @@ finalizeStore dav tmp dest = do
|
|||
retrieve :: DavHandleVar -> ChunkConfig -> Retriever
|
||||
retrieve hv cc = fileRetriever $ \d k p ->
|
||||
withDavHandle hv $ \dav -> case cc of
|
||||
LegacyChunks _ -> retrieveLegacyChunked d k p dav
|
||||
LegacyChunks _ -> retrieveLegacyChunked (fromRawFilePath d) k p dav
|
||||
_ -> liftIO $
|
||||
goDAV dav $ retrieveHelper (keyLocation k) d p
|
||||
goDAV dav $ retrieveHelper (keyLocation k) (fromRawFilePath d) p
|
||||
|
||||
retrieveHelper :: DavLocation -> FilePath -> MeterUpdate -> DAVT IO ()
|
||||
retrieveHelper loc d p = do
|
||||
|
|
|
@ -6,12 +6,10 @@
|
|||
The concurrency problem is fixed now.
|
||||
|
||||
As well as the web special remote, these do not do incremental hashing
|
||||
still: directory, gitlfs, webdav, S3.
|
||||
|
||||
The ones that do are: external, adb, gcrypt, hook, rsync
|
||||
|
||||
The issue with directory etc is that they open the file
|
||||
still: gitlfs, webdav, S3. Problem is, these open the file
|
||||
for write. This prevents tailVerify re-opening it for read, because the
|
||||
haskell RTS actually does not allowing opening a file for read that it has
|
||||
open for write.
|
||||
open for write. This problem has already been fixed for directory.
|
||||
|
||||
The ones that do are: external, adb, gcrypt, hook, rsync, directory
|
||||
"""]]
|
||||
|
|
Loading…
Reference in a new issue