incremental verify for directory special remote
Added fileRetriever', which will let the remaining special remotes eventually also support incremental verify. Sponsored-by: Dartmouth College's DANDI project
This commit is contained in:
parent
a644f729ce
commit
b1622eb932
13 changed files with 39 additions and 27 deletions
|
@ -56,7 +56,7 @@ data CopyMethod = CopiedCoW | Copied
|
||||||
- being copied. But it is not finalized at the end.
|
- being copied. But it is not finalized at the end.
|
||||||
-
|
-
|
||||||
- When copy-on-write is used, the IncrementalVerifier is not fed
|
- When copy-on-write is used, the IncrementalVerifier is not fed
|
||||||
- the content of the file.
|
- the content of the file, and verification using it will fail.
|
||||||
-
|
-
|
||||||
- Note that, when the destination file already exists, it's read both
|
- Note that, when the destination file already exists, it's read both
|
||||||
- to start calculating the hash, and also to verify that its content is
|
- to start calculating the hash, and also to verify that its content is
|
||||||
|
@ -71,7 +71,11 @@ fileCopier _ src dest meterupdate iv = docopy
|
||||||
#else
|
#else
|
||||||
fileCopier copycowtried src dest meterupdate iv =
|
fileCopier copycowtried src dest meterupdate iv =
|
||||||
ifM (liftIO $ tryCopyCoW copycowtried src dest meterupdate)
|
ifM (liftIO $ tryCopyCoW copycowtried src dest meterupdate)
|
||||||
( return CopiedCoW
|
( do
|
||||||
|
-- Make sure the incremental verifier fails,
|
||||||
|
-- since we did not feed it.
|
||||||
|
liftIO $ maybe noop failIncremental iv
|
||||||
|
return CopiedCoW
|
||||||
, docopy
|
, docopy
|
||||||
)
|
)
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -11,7 +11,7 @@ git-annex (8.20210804) UNRELEASED; urgency=medium
|
||||||
* Special remotes now checksum content while it is being retrieved,
|
* Special remotes now checksum content while it is being retrieved,
|
||||||
instead of in a separate pass at the end. This is supported for all
|
instead of in a separate pass at the end. This is supported for all
|
||||||
special remotes on Linux (except for web and bittorrent), and for a
|
special remotes on Linux (except for web and bittorrent), and for a
|
||||||
few on other OSs (bup, ddar, gcrypt, glacier).
|
few on other OSs (directory, bup, ddar, gcrypt, glacier).
|
||||||
|
|
||||||
-- Joey Hess <id@joeyh.name> Tue, 03 Aug 2021 12:22:45 -0400
|
-- Joey Hess <id@joeyh.name> Tue, 03 Aug 2021 12:22:45 -0400
|
||||||
|
|
||||||
|
|
|
@ -191,7 +191,7 @@ store'' serial dest src canoverwrite = checkAdbInPath False $ do
|
||||||
retrieve :: AndroidSerial -> AndroidPath -> Retriever
|
retrieve :: AndroidSerial -> AndroidPath -> Retriever
|
||||||
retrieve serial adir = fileRetriever $ \dest k _p ->
|
retrieve serial adir = fileRetriever $ \dest k _p ->
|
||||||
let src = androidLocation adir k
|
let src = androidLocation adir k
|
||||||
in retrieve' serial src dest
|
in retrieve' serial src (fromRawFilePath dest)
|
||||||
|
|
||||||
retrieve' :: AndroidSerial -> AndroidPath -> FilePath -> Annex ()
|
retrieve' :: AndroidSerial -> AndroidPath -> FilePath -> Annex ()
|
||||||
retrieve' serial src dest =
|
retrieve' serial src dest =
|
||||||
|
|
|
@ -232,9 +232,9 @@ finalizeStoreGeneric d tmp dest = do
|
||||||
|
|
||||||
retrieveKeyFileM :: RawFilePath -> ChunkConfig -> CopyCoWTried -> Retriever
|
retrieveKeyFileM :: RawFilePath -> ChunkConfig -> CopyCoWTried -> Retriever
|
||||||
retrieveKeyFileM d (LegacyChunks _) _ = Legacy.retrieve locations d
|
retrieveKeyFileM d (LegacyChunks _) _ = Legacy.retrieve locations d
|
||||||
retrieveKeyFileM d NoChunks cow = fileRetriever $ \dest k p -> do
|
retrieveKeyFileM d NoChunks cow = fileRetriever' $ \dest k p iv -> do
|
||||||
src <- liftIO $ fromRawFilePath <$> getLocation d k
|
src <- liftIO $ fromRawFilePath <$> getLocation d k
|
||||||
void $ fileCopier cow src dest p Nothing
|
void $ fileCopier cow src (fromRawFilePath dest) p iv
|
||||||
retrieveKeyFileM d _ _ = byteRetriever $ \k sink ->
|
retrieveKeyFileM d _ _ = byteRetriever $ \k sink ->
|
||||||
sink =<< liftIO (L.readFile . fromRawFilePath =<< getLocation d k)
|
sink =<< liftIO (L.readFile . fromRawFilePath =<< getLocation d k)
|
||||||
|
|
||||||
|
|
|
@ -237,7 +237,7 @@ retrieveKeyFileM :: External -> Retriever
|
||||||
retrieveKeyFileM external = fileRetriever $ \d k p ->
|
retrieveKeyFileM external = fileRetriever $ \d k p ->
|
||||||
either giveup return =<< go d k p
|
either giveup return =<< go d k p
|
||||||
where
|
where
|
||||||
go d k p = handleRequestKey external (\sk -> TRANSFER Download sk d) k (Just p) $ \resp ->
|
go d k p = handleRequestKey external (\sk -> TRANSFER Download sk (fromRawFilePath d)) k (Just p) $ \resp ->
|
||||||
case resp of
|
case resp of
|
||||||
TRANSFER_SUCCESS Download k'
|
TRANSFER_SUCCESS Download k'
|
||||||
| k == k' -> result $ Right ()
|
| k == k' -> result $ Right ()
|
||||||
|
@ -810,7 +810,7 @@ checkUrlM external url =
|
||||||
retrieveUrl :: Retriever
|
retrieveUrl :: Retriever
|
||||||
retrieveUrl = fileRetriever $ \f k p -> do
|
retrieveUrl = fileRetriever $ \f k p -> do
|
||||||
us <- getWebUrls k
|
us <- getWebUrls k
|
||||||
unlessM (withUrlOptions $ downloadUrl k p us f) $
|
unlessM (withUrlOptions $ downloadUrl k p us (fromRawFilePath f)) $
|
||||||
giveup "failed to download content"
|
giveup "failed to download content"
|
||||||
|
|
||||||
checkKeyUrl :: CheckPresent
|
checkKeyUrl :: CheckPresent
|
||||||
|
|
|
@ -418,7 +418,8 @@ retrieve' repo r rsyncopts accessmethod
|
||||||
sink =<< liftIO (L.readFile $ gCryptLocation repo k)
|
sink =<< liftIO (L.readFile $ gCryptLocation repo k)
|
||||||
| Git.repoIsSsh repo = if accessShell r
|
| Git.repoIsSsh repo = if accessShell r
|
||||||
then fileRetriever $ \f k p -> do
|
then fileRetriever $ \f k p -> do
|
||||||
ps <- Ssh.rsyncParamsRemote False r Download k f
|
ps <- Ssh.rsyncParamsRemote False r Download k
|
||||||
|
(fromRawFilePath f)
|
||||||
(AssociatedFile Nothing)
|
(AssociatedFile Nothing)
|
||||||
oh <- mkOutputHandler
|
oh <- mkOutputHandler
|
||||||
unlessM (Ssh.rsyncHelper oh (Just p) ps) $
|
unlessM (Ssh.rsyncHelper oh (Just p) ps) $
|
||||||
|
|
|
@ -497,7 +497,7 @@ retrieve rs h = fileRetriever $ \dest k p -> getLFSEndpoint LFS.RequestDownload
|
||||||
Nothing -> giveup "unable to parse git-lfs server download url"
|
Nothing -> giveup "unable to parse git-lfs server download url"
|
||||||
Just req -> do
|
Just req -> do
|
||||||
uo <- getUrlOptions
|
uo <- getUrlOptions
|
||||||
liftIO $ downloadConduit p req dest uo
|
liftIO $ downloadConduit p req (fromRawFilePath dest) uo
|
||||||
|
|
||||||
-- Since git-lfs does not support removing content, nothing needs to be
|
-- Since git-lfs does not support removing content, nothing needs to be
|
||||||
-- done to lock content in the remote, except for checking that the content
|
-- done to lock content in the remote, except for checking that the content
|
||||||
|
|
|
@ -20,6 +20,7 @@ module Remote.Helper.Special (
|
||||||
fileStorer,
|
fileStorer,
|
||||||
byteStorer,
|
byteStorer,
|
||||||
fileRetriever,
|
fileRetriever,
|
||||||
|
fileRetriever',
|
||||||
byteRetriever,
|
byteRetriever,
|
||||||
storeKeyDummy,
|
storeKeyDummy,
|
||||||
retrieveKeyFileDummy,
|
retrieveKeyFileDummy,
|
||||||
|
@ -115,10 +116,9 @@ byteRetriever a k _m _miv callback = a k (callback . ByteContent)
|
||||||
-- retrieves data. The incremental verifier is updated in the background as
|
-- retrieves data. The incremental verifier is updated in the background as
|
||||||
-- the action writes to the file, but may not be updated with the entire
|
-- the action writes to the file, but may not be updated with the entire
|
||||||
-- content of the file.
|
-- content of the file.
|
||||||
fileRetriever :: (FilePath -> Key -> MeterUpdate -> Annex ()) -> Retriever
|
fileRetriever :: (RawFilePath -> Key -> MeterUpdate -> Annex ()) -> Retriever
|
||||||
fileRetriever a k m miv callback = do
|
fileRetriever a = fileRetriever' $ \f k m miv -> do
|
||||||
f <- prepTmp k
|
let retrieve = a f k m
|
||||||
let retrieve = a (fromRawFilePath f) k m
|
|
||||||
case miv of
|
case miv of
|
||||||
Nothing -> retrieve
|
Nothing -> retrieve
|
||||||
Just iv -> do
|
Just iv -> do
|
||||||
|
@ -128,6 +128,15 @@ fileRetriever a k m miv callback = do
|
||||||
liftIO $ atomically $ putTMVar finished ()
|
liftIO $ atomically $ putTMVar finished ()
|
||||||
liftIO (wait t)
|
liftIO (wait t)
|
||||||
retrieve `finally` finishtail
|
retrieve `finally` finishtail
|
||||||
|
|
||||||
|
{- A Retriever that writes the content of a Key to a provided file.
|
||||||
|
- The action is responsible for updating the progress meter and the
|
||||||
|
- incremental verifier as it retrieves data.
|
||||||
|
-}
|
||||||
|
fileRetriever' :: (RawFilePath -> Key -> MeterUpdate -> Maybe IncrementalVerifier -> Annex ()) -> Retriever
|
||||||
|
fileRetriever' a k m miv callback = do
|
||||||
|
f <- prepTmp k
|
||||||
|
a f k m miv
|
||||||
pruneTmpWorkDirBefore f (callback . FileContent . fromRawFilePath)
|
pruneTmpWorkDirBefore f (callback . FileContent . fromRawFilePath)
|
||||||
|
|
||||||
{- The base Remote that is provided to specialRemote needs to have
|
{- The base Remote that is provided to specialRemote needs to have
|
||||||
|
|
|
@ -162,7 +162,7 @@ store h = fileStorer $ \k src _p -> runHook h "store" k (Just src)
|
||||||
|
|
||||||
retrieve :: HookName -> Retriever
|
retrieve :: HookName -> Retriever
|
||||||
retrieve h = fileRetriever $ \d k _p ->
|
retrieve h = fileRetriever $ \d k _p ->
|
||||||
unlessM (runHook' h "retrieve" k (Just d) $ return True) $
|
unlessM (runHook' h "retrieve" k (Just (fromRawFilePath d)) $ return True) $
|
||||||
giveup "failed to retrieve content"
|
giveup "failed to retrieve content"
|
||||||
|
|
||||||
remove :: HookName -> Remover
|
remove :: HookName -> Remover
|
||||||
|
|
|
@ -235,8 +235,8 @@ storeGeneric' o meterupdate basedest populatedest = withRsyncScratchDir $ \tmp -
|
||||||
]
|
]
|
||||||
else return False
|
else return False
|
||||||
|
|
||||||
retrieve :: RsyncOpts -> FilePath -> Key -> MeterUpdate -> Annex ()
|
retrieve :: RsyncOpts -> RawFilePath -> Key -> MeterUpdate -> Annex ()
|
||||||
retrieve o f k p = rsyncRetrieveKey o k f (Just p)
|
retrieve o f k p = rsyncRetrieveKey o k (fromRawFilePath f) (Just p)
|
||||||
|
|
||||||
retrieveCheap :: RsyncOpts -> Key -> AssociatedFile -> FilePath -> Annex ()
|
retrieveCheap :: RsyncOpts -> Key -> AssociatedFile -> FilePath -> Annex ()
|
||||||
retrieveCheap o k _af f = ifM (preseedTmp k f)
|
retrieveCheap o k _af f = ifM (preseedTmp k f)
|
||||||
|
|
|
@ -407,13 +407,13 @@ retrieve hv r rs c info = fileRetriever $ \f k p -> withS3Handle hv $ \case
|
||||||
Left failreason -> do
|
Left failreason -> do
|
||||||
warning failreason
|
warning failreason
|
||||||
giveup "cannot download content"
|
giveup "cannot download content"
|
||||||
Right loc -> retrieveHelper info h loc f p
|
Right loc -> retrieveHelper info h loc (fromRawFilePath f) p
|
||||||
Nothing ->
|
Nothing ->
|
||||||
getPublicWebUrls' (uuid r) rs info c k >>= \case
|
getPublicWebUrls' (uuid r) rs info c k >>= \case
|
||||||
Left failreason -> do
|
Left failreason -> do
|
||||||
warning failreason
|
warning failreason
|
||||||
giveup "cannot download content"
|
giveup "cannot download content"
|
||||||
Right us -> unlessM (withUrlOptions $ downloadUrl k p us f) $
|
Right us -> unlessM (withUrlOptions $ downloadUrl k p us (fromRawFilePath f)) $
|
||||||
giveup "failed to download content"
|
giveup "failed to download content"
|
||||||
|
|
||||||
retrieveHelper :: S3Info -> S3Handle -> (Either S3.Object S3VersionID) -> FilePath -> MeterUpdate -> Annex ()
|
retrieveHelper :: S3Info -> S3Handle -> (Either S3.Object S3VersionID) -> FilePath -> MeterUpdate -> Annex ()
|
||||||
|
|
|
@ -170,9 +170,9 @@ finalizeStore dav tmp dest = do
|
||||||
retrieve :: DavHandleVar -> ChunkConfig -> Retriever
|
retrieve :: DavHandleVar -> ChunkConfig -> Retriever
|
||||||
retrieve hv cc = fileRetriever $ \d k p ->
|
retrieve hv cc = fileRetriever $ \d k p ->
|
||||||
withDavHandle hv $ \dav -> case cc of
|
withDavHandle hv $ \dav -> case cc of
|
||||||
LegacyChunks _ -> retrieveLegacyChunked d k p dav
|
LegacyChunks _ -> retrieveLegacyChunked (fromRawFilePath d) k p dav
|
||||||
_ -> liftIO $
|
_ -> liftIO $
|
||||||
goDAV dav $ retrieveHelper (keyLocation k) d p
|
goDAV dav $ retrieveHelper (keyLocation k) (fromRawFilePath d) p
|
||||||
|
|
||||||
retrieveHelper :: DavLocation -> FilePath -> MeterUpdate -> DAVT IO ()
|
retrieveHelper :: DavLocation -> FilePath -> MeterUpdate -> DAVT IO ()
|
||||||
retrieveHelper loc d p = do
|
retrieveHelper loc d p = do
|
||||||
|
|
|
@ -6,12 +6,10 @@
|
||||||
The concurrency problem is fixed now.
|
The concurrency problem is fixed now.
|
||||||
|
|
||||||
As well as the web special remote, these do not do incremental hashing
|
As well as the web special remote, these do not do incremental hashing
|
||||||
still: directory, gitlfs, webdav, S3.
|
still: gitlfs, webdav, S3. Problem is, these open the file
|
||||||
|
|
||||||
The ones that do are: external, adb, gcrypt, hook, rsync
|
|
||||||
|
|
||||||
The issue with directory etc is that they open the file
|
|
||||||
for write. This prevents tailVerify re-opening it for read, because the
|
for write. This prevents tailVerify re-opening it for read, because the
|
||||||
haskell RTS actually does not allowing opening a file for read that it has
|
haskell RTS actually does not allowing opening a file for read that it has
|
||||||
open for write.
|
open for write. This problem has already been fixed for directory.
|
||||||
|
|
||||||
|
The ones that do are: external, adb, gcrypt, hook, rsync, directory
|
||||||
"""]]
|
"""]]
|
||||||
|
|
Loading…
Reference in a new issue