incremental verify for chunked remotes
Simply feed each chunk in turn to the incremental verifier. When resuming an interrupted retrieve, it does not do incremental verification. That would need to read the file, up to the resume point, and feed it to the incremental verifier. That seems easy to get wrong. Also it would mean extra work done before the transfer can start. Which would complicate displaying progress, and would perhaps not appear to the user as if it was resuming from where it left off. Instead, in that situation, return UnVerified, and let the verification be done in a separate pass. Granted, Annex.CopyFile does manage all that, but it's not complicated by dealing with chunks too. Sponsored-by: Dartmouth College's DANDI project
This commit is contained in:
parent
c20358b671
commit
7eb3742e4b
3 changed files with 47 additions and 30 deletions
|
@ -10,8 +10,7 @@ git-annex (8.20210804) UNRELEASED; urgency=medium
|
||||||
git-annex's own progress display.
|
git-annex's own progress display.
|
||||||
* Several special remotes verify content while it is being retrieved,
|
* Several special remotes verify content while it is being retrieved,
|
||||||
avoiding a separate checksum pass. They are: S3, bup, ddar,
|
avoiding a separate checksum pass. They are: S3, bup, ddar,
|
||||||
and gcrypt (with a local repository). This optimisation is not yet
|
and gcrypt (with a local repository).
|
||||||
available when chunks are used.
|
|
||||||
|
|
||||||
-- Joey Hess <id@joeyh.name> Tue, 03 Aug 2021 12:22:45 -0400
|
-- Joey Hess <id@joeyh.name> Tue, 03 Aug 2021 12:22:45 -0400
|
||||||
|
|
||||||
|
|
|
@ -269,33 +269,28 @@ retrieveChunks retriever u vc chunkconfig encryptor basek dest basep enc encc
|
||||||
-- that are likely not there.
|
-- that are likely not there.
|
||||||
iv <- startVerifyKeyContentIncrementally vc basek
|
iv <- startVerifyKeyContentIncrementally vc basek
|
||||||
tryNonAsync (getunchunked iv) >>= \case
|
tryNonAsync (getunchunked iv) >>= \case
|
||||||
Right Nothing -> return UnVerified
|
Right r -> finalize r
|
||||||
Right (Just iv') ->
|
Left e -> go (Just e)
|
||||||
ifM (liftIO $ finalizeIncremental iv')
|
=<< chunkKeysOnly u chunkconfig basek
|
||||||
( return Verified
|
| otherwise = go Nothing
|
||||||
, return UnVerified
|
=<< chunkKeys u chunkconfig basek
|
||||||
)
|
|
||||||
Left e -> do
|
|
||||||
go (Just e) =<< chunkKeysOnly u chunkconfig basek
|
|
||||||
return UnVerified
|
|
||||||
| otherwise = do
|
|
||||||
go Nothing =<< chunkKeys u chunkconfig basek
|
|
||||||
return UnVerified
|
|
||||||
where
|
where
|
||||||
go pe cks = do
|
go pe cks = do
|
||||||
let ls = map chunkKeyList cks
|
let ls = map chunkKeyList cks
|
||||||
currsize <- liftIO $ catchMaybeIO $ getFileSize (toRawFilePath dest)
|
currsize <- liftIO $ catchMaybeIO $ getFileSize (toRawFilePath dest)
|
||||||
let ls' = maybe ls (setupResume ls) currsize
|
let ls' = maybe ls (setupResume ls) currsize
|
||||||
if any null ls'
|
if any null ls'
|
||||||
then noop -- dest is already complete
|
then finalize Nothing -- dest is already complete
|
||||||
else firstavail pe currsize ls'
|
else finalize =<< firstavail pe currsize ls'
|
||||||
|
|
||||||
firstavail Nothing _ [] = giveup "unable to determine the chunks to use for this remote"
|
firstavail Nothing _ [] = giveup "unable to determine the chunks to use for this remote"
|
||||||
firstavail (Just e) _ [] = throwM e
|
firstavail (Just e) _ [] = throwM e
|
||||||
firstavail pe currsize ([]:ls) = firstavail pe currsize ls
|
firstavail pe currsize ([]:ls) = firstavail pe currsize ls
|
||||||
firstavail _ currsize ((k:ks):ls)
|
firstavail _ currsize ((k:ks):ls)
|
||||||
| k == basek = void (getunchunked Nothing)
|
| k == basek = do
|
||||||
`catchNonAsync` (\e -> firstavail (Just e) currsize ls)
|
iv <- startVerifyKeyContentIncrementally vc basek
|
||||||
|
getunchunked iv
|
||||||
|
`catchNonAsync` (\e -> firstavail (Just e) currsize ls)
|
||||||
| otherwise = do
|
| otherwise = do
|
||||||
let offset = resumeOffset currsize k
|
let offset = resumeOffset currsize k
|
||||||
let p = maybe basep
|
let p = maybe basep
|
||||||
|
@ -303,36 +298,42 @@ retrieveChunks retriever u vc chunkconfig encryptor basek dest basep enc encc
|
||||||
offset
|
offset
|
||||||
v <- tryNonAsync $
|
v <- tryNonAsync $
|
||||||
retriever (encryptor k) p $ \content ->
|
retriever (encryptor k) p $ \content ->
|
||||||
bracketIO (maybe opennew openresume offset) hClose $ \h -> do
|
bracket (maybe opennew openresume offset) (liftIO . hClose . fst) $ \(h, iv) -> do
|
||||||
void $ retrieved Nothing (Just h) p content
|
iv' <- retrieved iv (Just h) p content
|
||||||
let sz = toBytesProcessed $
|
let sz = toBytesProcessed $
|
||||||
fromMaybe 0 $ fromKey keyChunkSize k
|
fromMaybe 0 $ fromKey keyChunkSize k
|
||||||
getrest p h sz sz ks
|
getrest p h iv' sz sz ks
|
||||||
case v of
|
case v of
|
||||||
Left e
|
Left e
|
||||||
| null ls -> throwM e
|
| null ls -> throwM e
|
||||||
| otherwise -> firstavail (Just e) currsize ls
|
| otherwise -> firstavail (Just e) currsize ls
|
||||||
Right r -> return r
|
Right r -> return r
|
||||||
|
|
||||||
getrest _ _ _ _ [] = noop
|
getrest _ _ iv _ _ [] = return iv
|
||||||
getrest p h sz bytesprocessed (k:ks) = do
|
getrest p h iv sz bytesprocessed (k:ks) = do
|
||||||
let p' = offsetMeterUpdate p bytesprocessed
|
let p' = offsetMeterUpdate p bytesprocessed
|
||||||
liftIO $ p' zeroBytesProcessed
|
liftIO $ p' zeroBytesProcessed
|
||||||
retriever (encryptor k) p' $
|
iv' <- retriever (encryptor k) p' $
|
||||||
void . retrieved Nothing (Just h) p'
|
retrieved iv (Just h) p'
|
||||||
getrest p h sz (addBytesProcessed bytesprocessed sz) ks
|
getrest p h iv' sz (addBytesProcessed bytesprocessed sz) ks
|
||||||
|
|
||||||
getunchunked iv = retriever (encryptor basek) basep $
|
getunchunked iv = retriever (encryptor basek) basep $
|
||||||
retrieved iv Nothing basep
|
retrieved iv Nothing basep
|
||||||
|
|
||||||
opennew = openBinaryFile dest WriteMode
|
opennew = do
|
||||||
|
iv <- startVerifyKeyContentIncrementally vc basek
|
||||||
|
h <- liftIO $ openBinaryFile dest WriteMode
|
||||||
|
return (h, iv)
|
||||||
|
|
||||||
-- Open the file and seek to the start point in order to resume.
|
-- Open the file and seek to the start point in order to resume.
|
||||||
openresume startpoint = do
|
openresume startpoint = do
|
||||||
-- ReadWriteMode allows seeking; AppendMode does not.
|
-- ReadWriteMode allows seeking; AppendMode does not.
|
||||||
h <- openBinaryFile dest ReadWriteMode
|
h <- liftIO $ openBinaryFile dest ReadWriteMode
|
||||||
hSeek h AbsoluteSeek startpoint
|
liftIO $ hSeek h AbsoluteSeek startpoint
|
||||||
return h
|
-- No incremental verification when resuming, since that
|
||||||
|
-- would need to read up to the startpoint.
|
||||||
|
let iv = Nothing
|
||||||
|
return (h, iv)
|
||||||
|
|
||||||
{- Progress meter updating is a bit tricky: If the Retriever
|
{- Progress meter updating is a bit tricky: If the Retriever
|
||||||
- populates a file, it is responsible for updating progress
|
- populates a file, it is responsible for updating progress
|
||||||
|
@ -349,6 +350,13 @@ retrieveChunks retriever u vc chunkconfig encryptor basek dest basep enc encc
|
||||||
p'
|
p'
|
||||||
| isByteContent content = Just p
|
| isByteContent content = Just p
|
||||||
| otherwise = Nothing
|
| otherwise = Nothing
|
||||||
|
|
||||||
|
finalize Nothing = return UnVerified
|
||||||
|
finalize (Just iv) =
|
||||||
|
ifM (liftIO $ finalizeIncremental iv)
|
||||||
|
( return Verified
|
||||||
|
, return UnVerified
|
||||||
|
)
|
||||||
|
|
||||||
{- Writes retrieved file content to the provided Handle, decrypting it
|
{- Writes retrieved file content to the provided Handle, decrypting it
|
||||||
- first if necessary.
|
- first if necessary.
|
||||||
|
|
|
@ -0,0 +1,10 @@
|
||||||
|
[[!comment format=mdwn
|
||||||
|
username="joey"
|
||||||
|
subject="""comment 13"""
|
||||||
|
date="2021-08-11T18:16:41Z"
|
||||||
|
content="""
|
||||||
|
Some special remotes now support incremental update. So far, limited to
|
||||||
|
ones that use the byteRetriever interface.
|
||||||
|
Others, that use fileRetriever, including external special remotes, still
|
||||||
|
need work.
|
||||||
|
"""]]
|
Loading…
Add table
Add a link
Reference in a new issue