diff --git a/CHANGELOG b/CHANGELOG index 580d5832ad..b54546442e 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -24,8 +24,8 @@ git-annex (8.20210128) UNRELEASED; urgency=medium * Include libkqueue.h file needed to build the assistant on BSDs. * Tahoe: Avoid verifying hash after download, since tahoe does sufficient verification itself. - * Checksum as content is received from a remote git-annex repository - over ssh/p2p protocols, rather than doing it in a second pass. + * Checksum as content is received from a remote git-annex repository, + rather than doing it in a second pass. * Bugfix: fsck --from a ssh remote did not actually check that the content on the remote is not corrupted. diff --git a/Remote/Git.hs b/Remote/Git.hs index ec20a3cc82..3ca65c4f65 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -62,7 +62,10 @@ import Annex.Path import Creds import Types.NumCopies import Types.ProposedAccepted +import Types.Backend +import Backend import Annex.Action +import Annex.Verify import Messages.Progress #ifndef mingw32_HOST_OS @@ -542,11 +545,12 @@ copyFromRemote'' repo forcersync r st@(State connpool _ _ _ _) key file dest met -- run copy from perspective of remote onLocalFast st $ Annex.Content.prepSendAnnex key >>= \case Just (object, checksuccess) -> do + let verify = Annex.Content.RemoteVerify r copier <- mkCopier hardlink st (ok, v) <- runTransfer (Transfer Download u (fromKey id key)) file Nothing stdRetry $ \p -> metered (Just (combineMeterUpdate p meterupdate)) key $ \_ p' -> - copier object dest key p' checksuccess + copier object dest key p' checksuccess verify if ok then return v else giveup "failed to retrieve content from remote" @@ -685,12 +689,12 @@ copyToRemote' repo r st@(State connpool duc _ _ _) key file meterupdate res <- onLocalFast st $ ifM (Annex.Content.inAnnex key) ( return True , runTransfer (Transfer Download u (fromKey id key)) file Nothing stdRetry $ \p -> do - copier <- mkCopier hardlink st let verify = Annex.Content.RemoteVerify r + copier <- mkCopier hardlink st let rsp = RetrievalAllKeysSecure res <- logStatusAfter key $ Annex.Content.getViaTmp rsp verify key file $ \dest -> metered (Just (combineMeterUpdate meterupdate p)) key $ \_ p' -> - copier object (fromRawFilePath dest) key p' (liftIO checksuccessio) + copier object (fromRawFilePath dest) key p' (liftIO checksuccessio) verify Annex.Content.saveState True return res ) @@ -825,7 +829,7 @@ wantHardLink = (annexHardLink <$> Annex.getGitConfig) -- from is implicitly trusted, so no expensive verification needs to be -- done. Also returns Verified if the key's content is verified while -- copying it. -type Copier = FilePath -> FilePath -> Key -> MeterUpdate -> Annex Bool -> Annex (Bool, Verification) +type Copier = FilePath -> FilePath -> Key -> MeterUpdate -> Annex Bool -> VerifyConfig -> Annex (Bool, Verification) mkCopier :: Bool -> State -> Annex Copier mkCopier remotewanthardlink st = do @@ -833,13 +837,13 @@ mkCopier remotewanthardlink st = do localwanthardlink <- wantHardLink let linker = \src dest -> createLink src dest >> return True if remotewanthardlink || localwanthardlink - then return $ \src dest k p check -> + then return $ \src dest k p check verifyconfig -> ifM (liftIO (catchBoolIO (linker src dest))) ( ifM check ( return (True, Verified) , return (False, UnVerified) ) - , copier src dest k p check + , copier src dest k p check verifyconfig ) else return copier @@ -922,9 +926,9 @@ newCopyCoWTried = CopyCoWTried <$> newEmptyMVar -} fileCopier :: State -> Copier #ifdef mingw32_HOST_OS -fileCopier _st src dest k meterupdate check = docopy +fileCopier _st src dest k meterupdate check verifyconfig = docopy #else -fileCopier st src dest k meterupdate check = +fileCopier st src dest k meterupdate check verifyconfig = -- If multiple threads reach this at the same time, they -- will both try CoW, which is acceptable. ifM (liftIO $ isEmptyMVar copycowtried) @@ -953,14 +957,16 @@ fileCopier st src dest k meterupdate check = dest' = toRawFilePath dest docopy = do + iv <- startVerifyKeyContentIncrementally verifyconfig k + -- The file might have had the write bit removed, -- so make sure we can write to it. void $ liftIO $ tryIO $ allowWrite dest' liftIO $ withBinaryFile dest ReadWriteMode $ \hdest -> withBinaryFile src ReadMode $ \hsrc -> do - sofar <- compareexisting hdest hsrc zeroBytesProcessed - docopy' hdest hsrc sofar + sofar <- compareexisting iv hdest hsrc zeroBytesProcessed + docopy' iv hdest hsrc sofar -- Copy src mode and mtime. mode <- liftIO $ fileMode <$> getFileStatus src @@ -969,24 +975,30 @@ fileCopier st src dest k meterupdate check = liftIO $ touch dest' mtime False ifM check - ( return (True, UnVerified) + ( case iv of + Just x -> ifM (liftIO $ finalizeIncremental x) + ( return (True, Verified) + , return (False, UnVerified) + ) + Nothing -> return (True, UnVerified) , return (False, UnVerified) ) - docopy' hdest hsrc sofar = do + docopy' iv hdest hsrc sofar = do s <- S.hGet hsrc defaultChunkSize if s == S.empty then return () else do let sofar' = addBytesProcessed sofar (S.length s) S.hPut hdest s + maybe noop (flip updateIncremental s) iv meterupdate sofar' - docopy' hdest hsrc sofar' + docopy' iv hdest hsrc sofar' -- Leaves hdest and hsrc seeked to wherever the two diverge, -- so typically hdest will be seeked to end, and hsrc to the same -- position. - compareexisting hdest hsrc sofar = do + compareexisting iv hdest hsrc sofar = do s <- S.hGet hdest defaultChunkSize if s == S.empty then return sofar @@ -994,9 +1006,10 @@ fileCopier st src dest k meterupdate check = s' <- getnoshort (S.length s) hsrc if s == s' then do + maybe noop (flip updateIncremental s) iv let sofar' = addBytesProcessed sofar (S.length s) meterupdate sofar' - compareexisting hdest hsrc sofar' + compareexisting iv hdest hsrc sofar' else do seekbefore hdest s seekbefore hsrc s' diff --git a/doc/todo/OPT__58_____34__bundle__34___get_+_check___40__of_checksum__41___in_a_single_operation/comment_10_695d1269ab20c66630ddfa2d8cbabbef._comment b/doc/todo/OPT__58_____34__bundle__34___get_+_check___40__of_checksum__41___in_a_single_operation/comment_10_695d1269ab20c66630ddfa2d8cbabbef._comment new file mode 100644 index 0000000000..4e754e0b88 --- /dev/null +++ b/doc/todo/OPT__58_____34__bundle__34___get_+_check___40__of_checksum__41___in_a_single_operation/comment_10_695d1269ab20c66630ddfa2d8cbabbef._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 10""" + date="2021-02-10T19:48:58Z" + content=""" +Incremental hashing implemented for local git remotes. + +Next step should be a special remote, such as directory, +that uses byteRetriever. Chunking and encryption will complicate them.. +"""]] diff --git a/doc/todo/OPT__58_____34__bundle__34___get_+_check___40__of_checksum__41___in_a_single_operation/comment_9_4f4f4a42adafe52207ed32a5d20e94be._comment b/doc/todo/OPT__58_____34__bundle__34___get_+_check___40__of_checksum__41___in_a_single_operation/comment_9_4f4f4a42adafe52207ed32a5d20e94be._comment index 8992a4c1d8..befeac793c 100644 --- a/doc/todo/OPT__58_____34__bundle__34___get_+_check___40__of_checksum__41___in_a_single_operation/comment_9_4f4f4a42adafe52207ed32a5d20e94be._comment +++ b/doc/todo/OPT__58_____34__bundle__34___get_+_check___40__of_checksum__41___in_a_single_operation/comment_9_4f4f4a42adafe52207ed32a5d20e94be._comment @@ -20,6 +20,6 @@ checksum. Urk: Using rsync currently protects against [[bugs/URL_key_potential_data_loss]], so the replacement would also need to -deal with that. Probably by refusing to resume a partial transfer of an -affected key. (Or it could just fall back to rsync for such keys.) +deal with that. Eg, by comparing the temp file content with the start of +the object when resuming. """]]