diff --git a/Annex/CopyFile.hs b/Annex/CopyFile.hs index b49f2ef45b..b4ed97dfd5 100644 --- a/Annex/CopyFile.hs +++ b/Annex/CopyFile.hs @@ -15,6 +15,8 @@ import Utility.CopyFile import Utility.FileMode import Utility.Touch import Utility.Hash (IncrementalVerifier(..)) +import Annex.Tmp +import Utility.Tmp import Control.Concurrent import qualified Data.ByteString as S @@ -28,23 +30,34 @@ newCopyCoWTried :: IO CopyCoWTried newCopyCoWTried = CopyCoWTried <$> newEmptyMVar {- Copies a file is copy-on-write is supported. Otherwise, returns False. -} -tryCopyCoW :: CopyCoWTried -> FilePath -> FilePath -> MeterUpdate -> IO Bool +tryCopyCoW :: CopyCoWTried -> FilePath -> FilePath -> MeterUpdate -> Annex Bool tryCopyCoW (CopyCoWTried copycowtried) src dest meterupdate = -- If multiple threads reach this at the same time, they -- will both try CoW, which is acceptable. - ifM (isEmptyMVar copycowtried) + ifM (liftIO $ isEmptyMVar copycowtried) ( do ok <- docopycow - void $ tryPutMVar copycowtried ok + void $ liftIO $ tryPutMVar copycowtried ok return ok - , ifM (readMVar copycowtried) + , ifM (liftIO $ readMVar copycowtried) ( docopycow , return False ) ) where - docopycow = watchFileSize dest meterupdate $ - copyCoW CopyTimeStamps src dest + -- copyCow needs a destination file that does not exist, + -- but the dest file might already. So use it with another + -- temp file, and if it succeeds, rename it into place. If it fails, + -- the dest file is left as-is, to support resuming. + docopycow = withOtherTmp $ \othertmp -> liftIO $ + withTmpFileIn (fromRawFilePath othertmp) (takeFileName dest) $ \tmpdest _h -> do + copied <- watchFileSize tmpdest meterupdate $ + copyCoW CopyTimeStamps src tmpdest + if copied + then liftIO $ catchBoolIO $ do + rename tmpdest dest + return True + else return False data CopyMethod = CopiedCoW | Copied @@ -70,7 +83,7 @@ fileCopier :: CopyCoWTried -> FilePath -> FilePath -> MeterUpdate -> Maybe Incre fileCopier _ src dest meterupdate iv = docopy #else fileCopier copycowtried src dest meterupdate iv = - ifM (liftIO $ tryCopyCoW copycowtried src dest meterupdate) + ifM (tryCopyCoW copycowtried src dest meterupdate) ( do liftIO $ maybe noop unableIncremental iv return CopiedCoW diff --git a/CHANGELOG b/CHANGELOG index 538ae62a49..c1e6446bed 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -2,6 +2,8 @@ git-annex (8.20210904) UNRELEASED; urgency=medium * borg: Avoid trying to extract xattrs, ACLS, and bsdflags when retrieving from a borg repository. + * Resume where it left off when copying a file to/from a local git remote + was interrupted. -- Joey Hess Fri, 03 Sep 2021 12:02:55 -0400 diff --git a/Remote/Directory.hs b/Remote/Directory.hs index 6a34df6bd0..20d4027323 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -412,7 +412,7 @@ retrieveExportWithContentIdentifierM dir cow loc cid dest mkkey p = f = exportPath dir loc f' = fromRawFilePath f - docopy = ifM (liftIO $ tryCopyCoW cow f' dest p) + docopy = ifM (tryCopyCoW cow f' dest p) ( do k <- mkkey postcheckcow (return k) diff --git a/Utility/CopyFile.hs b/Utility/CopyFile.hs index ed2da15e5c..e91986593a 100644 --- a/Utility/CopyFile.hs +++ b/Utility/CopyFile.hs @@ -56,11 +56,13 @@ copyFileExternal meta src dest = do | otherwise = copyMetaDataParams meta {- When a filesystem supports CoW (and cp does), uses it to make - - an efficient copy of a file. Otherwise, returns False. -} + - an efficient copy of a file. Otherwise, returns False. + - + - The dest file must not exist yet, or it will fail to make a CoW copy, + - and will return False. -} copyCoW :: CopyMetaData -> FilePath -> FilePath -> IO Bool copyCoW meta src dest | BuildInfo.cp_reflink_supported = do - void $ tryIO $ removeFile dest -- When CoW is not supported, cp will complain to stderr, -- so have to discard its stderr. ok <- catchBoolIO $ withNullHandle $ \nullh -> diff --git a/doc/bugs/copy_--to_with_local_git_remote_does_not_resume.mdwn b/doc/bugs/copy_--to_with_local_git_remote_does_not_resume.mdwn new file mode 100644 index 0000000000..7361200daf --- /dev/null +++ b/doc/bugs/copy_--to_with_local_git_remote_does_not_resume.mdwn @@ -0,0 +1,21 @@ +A copy --to a local git remote that gets interrupted and is run again does +not resume where it left off, but copies all the data again. + +This does not affect git remotes accessed over ssh. + +It's kind of hard to notice this, because normally a resume, has to read +the src file and dest file, in order for incremental verification to +get started. But it is somewhat slower to do that than it is to re-write +the dest file from the start. And when annex.verify = false, it's a lot +slower. + +Looks like it's due to copyCoW unlinking the dest file. Since the first +file copy trues copyCoW to probe if that's supported, that happens. +And when resuming an interrupted copy, that probe will generally happen +with the file it was interrupted on. + +So, the solution seems like it would be to copyCoW to some other temp file, +and if it succeeds, rename it to the dest. +--[[Joey]] + +> [[fixed|done]] --[[Joey]]