From 63d508e8855b2e61a725c906ea17d1c7f4a2e125 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 21 Sep 2021 17:32:10 -0400 Subject: [PATCH 1/4] resume properly when copying a file to/from a local git remote is interrupted Probably this fixes a reversion, but I don't know what version broke it. This does use withOtherTmp for a temp file that could be quite large. Though albeit a reflink copy that will not actually take up any space as long as the file it was copied from still exists. So if the copy cow succeeds but git-annex is interrupted just before that temp file gets renamed into the usual .git/annex/tmp/ location, there is a risk that the other temp directory ends up cluttered with a larger temp file than later. It will eventually be cleaned up, and the changes of this being a problem are small, so this seems like an acceptable thing to do. Sponsored-by: Shae Erisson on Patreon --- Annex/CopyFile.hs | 27 ++++++++++++++----- CHANGELOG | 2 ++ Remote/Directory.hs | 2 +- Utility/CopyFile.hs | 6 +++-- ...with_local_git_remote_does_not_resume.mdwn | 21 +++++++++++++++ 5 files changed, 48 insertions(+), 10 deletions(-) create mode 100644 doc/bugs/copy_--to_with_local_git_remote_does_not_resume.mdwn diff --git a/Annex/CopyFile.hs b/Annex/CopyFile.hs index b49f2ef45b..b4ed97dfd5 100644 --- a/Annex/CopyFile.hs +++ b/Annex/CopyFile.hs @@ -15,6 +15,8 @@ import Utility.CopyFile import Utility.FileMode import Utility.Touch import Utility.Hash (IncrementalVerifier(..)) +import Annex.Tmp +import Utility.Tmp import Control.Concurrent import qualified Data.ByteString as S @@ -28,23 +30,34 @@ newCopyCoWTried :: IO CopyCoWTried newCopyCoWTried = CopyCoWTried <$> newEmptyMVar {- Copies a file is copy-on-write is supported. Otherwise, returns False. -} -tryCopyCoW :: CopyCoWTried -> FilePath -> FilePath -> MeterUpdate -> IO Bool +tryCopyCoW :: CopyCoWTried -> FilePath -> FilePath -> MeterUpdate -> Annex Bool tryCopyCoW (CopyCoWTried copycowtried) src dest meterupdate = -- If multiple threads reach this at the same time, they -- will both try CoW, which is acceptable. - ifM (isEmptyMVar copycowtried) + ifM (liftIO $ isEmptyMVar copycowtried) ( do ok <- docopycow - void $ tryPutMVar copycowtried ok + void $ liftIO $ tryPutMVar copycowtried ok return ok - , ifM (readMVar copycowtried) + , ifM (liftIO $ readMVar copycowtried) ( docopycow , return False ) ) where - docopycow = watchFileSize dest meterupdate $ - copyCoW CopyTimeStamps src dest + -- copyCow needs a destination file that does not exist, + -- but the dest file might already. So use it with another + -- temp file, and if it succeeds, rename it into place. If it fails, + -- the dest file is left as-is, to support resuming. + docopycow = withOtherTmp $ \othertmp -> liftIO $ + withTmpFileIn (fromRawFilePath othertmp) (takeFileName dest) $ \tmpdest _h -> do + copied <- watchFileSize tmpdest meterupdate $ + copyCoW CopyTimeStamps src tmpdest + if copied + then liftIO $ catchBoolIO $ do + rename tmpdest dest + return True + else return False data CopyMethod = CopiedCoW | Copied @@ -70,7 +83,7 @@ fileCopier :: CopyCoWTried -> FilePath -> FilePath -> MeterUpdate -> Maybe Incre fileCopier _ src dest meterupdate iv = docopy #else fileCopier copycowtried src dest meterupdate iv = - ifM (liftIO $ tryCopyCoW copycowtried src dest meterupdate) + ifM (tryCopyCoW copycowtried src dest meterupdate) ( do liftIO $ maybe noop unableIncremental iv return CopiedCoW diff --git a/CHANGELOG b/CHANGELOG index 538ae62a49..c1e6446bed 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -2,6 +2,8 @@ git-annex (8.20210904) UNRELEASED; urgency=medium * borg: Avoid trying to extract xattrs, ACLS, and bsdflags when retrieving from a borg repository. + * Resume where it left off when copying a file to/from a local git remote + was interrupted. -- Joey Hess Fri, 03 Sep 2021 12:02:55 -0400 diff --git a/Remote/Directory.hs b/Remote/Directory.hs index 6a34df6bd0..20d4027323 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -412,7 +412,7 @@ retrieveExportWithContentIdentifierM dir cow loc cid dest mkkey p = f = exportPath dir loc f' = fromRawFilePath f - docopy = ifM (liftIO $ tryCopyCoW cow f' dest p) + docopy = ifM (tryCopyCoW cow f' dest p) ( do k <- mkkey postcheckcow (return k) diff --git a/Utility/CopyFile.hs b/Utility/CopyFile.hs index ed2da15e5c..e91986593a 100644 --- a/Utility/CopyFile.hs +++ b/Utility/CopyFile.hs @@ -56,11 +56,13 @@ copyFileExternal meta src dest = do | otherwise = copyMetaDataParams meta {- When a filesystem supports CoW (and cp does), uses it to make - - an efficient copy of a file. Otherwise, returns False. -} + - an efficient copy of a file. Otherwise, returns False. + - + - The dest file must not exist yet, or it will fail to make a CoW copy, + - and will return False. -} copyCoW :: CopyMetaData -> FilePath -> FilePath -> IO Bool copyCoW meta src dest | BuildInfo.cp_reflink_supported = do - void $ tryIO $ removeFile dest -- When CoW is not supported, cp will complain to stderr, -- so have to discard its stderr. ok <- catchBoolIO $ withNullHandle $ \nullh -> diff --git a/doc/bugs/copy_--to_with_local_git_remote_does_not_resume.mdwn b/doc/bugs/copy_--to_with_local_git_remote_does_not_resume.mdwn new file mode 100644 index 0000000000..7361200daf --- /dev/null +++ b/doc/bugs/copy_--to_with_local_git_remote_does_not_resume.mdwn @@ -0,0 +1,21 @@ +A copy --to a local git remote that gets interrupted and is run again does +not resume where it left off, but copies all the data again. + +This does not affect git remotes accessed over ssh. + +It's kind of hard to notice this, because normally a resume, has to read +the src file and dest file, in order for incremental verification to +get started. But it is somewhat slower to do that than it is to re-write +the dest file from the start. And when annex.verify = false, it's a lot +slower. + +Looks like it's due to copyCoW unlinking the dest file. Since the first +file copy trues copyCoW to probe if that's supported, that happens. +And when resuming an interrupted copy, that probe will generally happen +with the file it was interrupted on. + +So, the solution seems like it would be to copyCoW to some other temp file, +and if it succeeds, rename it to the dest. +--[[Joey]] + +> [[fixed|done]] --[[Joey]] From b76a4453cbb09fc21dfc2788b0d8a9012060f508 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 21 Sep 2021 16:58:50 -0400 Subject: [PATCH 2/4] bwlimit branch --- doc/todo/bwlimit.mdwn | 3 +++ 1 file changed, 3 insertions(+) diff --git a/doc/todo/bwlimit.mdwn b/doc/todo/bwlimit.mdwn index 3821d93fa6..da1deffe69 100644 --- a/doc/todo/bwlimit.mdwn +++ b/doc/todo/bwlimit.mdwn @@ -9,3 +9,6 @@ second when it's running too fast. The way the progress reporting interface works, it will probably work to put the delay in there. --[[Joey]] [[confirmed]] + +> Implmentation in progress in the `bwlimit` branch. Seems to work, but see +> commit message for what still needs to be done. --[[Joey]] From 55b405a965bf6b61843c57aba63085942f381dc9 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 22 Sep 2021 10:41:39 -0400 Subject: [PATCH 3/4] fix remote git config vs global git config order Bug fix: Git configs such as annex.verify were incorrectly overriding per-remote git configs such as remote.name.annex-verify. This dates all the way back to 2013, commit 8a5b397ac47b21906f1fbb6254f5b70b3beb2a52, where hlint apparently somehow confused me into parsing in the wrong order. Before that it was correct. Amazing noone has noticed until now. Sponsored-by: Kevin Mueller on Patreon --- CHANGELOG | 3 +++ Types/GitConfig.hs | 6 ++++-- 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index c1e6446bed..9589632065 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,5 +1,8 @@ git-annex (8.20210904) UNRELEASED; urgency=medium + * Bug fix: Git configs such as annex.verify were incorrectly overriding + per-remote git configs such as remote.name.annex-verify. + (Reversion in version 4.20130323) * borg: Avoid trying to extract xattrs, ACLS, and bsdflags when retrieving from a borg repository. * Resume where it left off when copying a file to/from a local git remote diff --git a/Types/GitConfig.hs b/Types/GitConfig.hs index ab1060e060..5cdde68f37 100644 --- a/Types/GitConfig.hs +++ b/Types/GitConfig.hs @@ -440,8 +440,10 @@ extractRemoteGitConfig r remotename = do getmaybebool k = Git.Config.isTrueFalse' =<< getmaybe' k getmayberead k = readish =<< getmaybe k getmaybe = fmap fromConfigValue . getmaybe' - getmaybe' k = mplus (Git.Config.getMaybe (annexConfig k) r) - (Git.Config.getMaybe (remoteAnnexConfig remotename k) r) + getmaybe' k = + Git.Config.getMaybe (remoteAnnexConfig remotename k) r + <|> + Git.Config.getMaybe (annexConfig k) r getoptions k = fromMaybe [] $ words <$> getmaybe k notempty :: Maybe String -> Maybe String From 4fef94d76422c6f0c831eb2fdce4bc0f55750ef2 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 22 Sep 2021 10:46:10 -0400 Subject: [PATCH 4/4] simplify annex.stalldetection handling RemoteGitConfig parsing looks for annex.stalldetection when a remote does not have a per-remote config for it, so no need for a separate gobal config. Sponsored-by: Noam Kremen on Patreon --- Annex/Transfer.hs | 25 ++++++++++--------------- Assistant/TransferSlots.hs | 4 ++-- Types/GitConfig.hs | 4 ---- 3 files changed, 12 insertions(+), 21 deletions(-) diff --git a/Annex/Transfer.hs b/Annex/Transfer.hs index c57dbaf3ec..c6597baec2 100644 --- a/Annex/Transfer.hs +++ b/Annex/Transfer.hs @@ -19,7 +19,6 @@ module Annex.Transfer ( noRetry, stdRetry, pickRemote, - stallDetection, ) where import Annex.Common @@ -55,10 +54,11 @@ import Data.Ord -- Upload, supporting canceling detected stalls. upload :: Remote -> Key -> AssociatedFile -> RetryDecider -> NotifyWitness -> Annex Bool -upload r key f d witness = stallDetection r >>= \case - Nothing -> go (Just ProbeStallDetection) - Just StallDetectionDisabled -> go Nothing - Just sd -> runTransferrer sd r key f d Upload witness +upload r key f d witness = + case remoteAnnexStallDetection (Remote.gitconfig r) of + Nothing -> go (Just ProbeStallDetection) + Just StallDetectionDisabled -> go Nothing + Just sd -> runTransferrer sd r key f d Upload witness where go sd = upload' (Remote.uuid r) key f sd d (action . Remote.storeKey r key f) witness @@ -73,10 +73,11 @@ alwaysUpload u key f sd d a _witness = guardHaveUUID u $ -- Download, supporting canceling detected stalls. download :: Remote -> Key -> AssociatedFile -> RetryDecider -> NotifyWitness -> Annex Bool -download r key f d witness = logStatusAfter key $ stallDetection r >>= \case - Nothing -> go (Just ProbeStallDetection) - Just StallDetectionDisabled -> go Nothing - Just sd -> runTransferrer sd r key f d Download witness +download r key f d witness = logStatusAfter key $ + case remoteAnnexStallDetection (Remote.gitconfig r) of + Nothing -> go (Just ProbeStallDetection) + Just StallDetectionDisabled -> go Nothing + Just sd -> runTransferrer sd r key f d Download witness where go sd = getViaTmp (Remote.retrievalSecurityPolicy r) vc key f $ \dest -> download' (Remote.uuid r) key f sd d (go' dest) witness @@ -400,9 +401,3 @@ lessActiveFirst :: M.Map Remote Integer -> Remote -> Remote -> Ordering lessActiveFirst active a b | Remote.cost a == Remote.cost b = comparing (`M.lookup` active) a b | otherwise = comparing Remote.cost a b - -stallDetection :: Remote -> Annex (Maybe StallDetection) -stallDetection r = maybe globalcfg (pure . Just) remotecfg - where - globalcfg = annexStallDetection <$> Annex.getGitConfig - remotecfg = remoteAnnexStallDetection $ Remote.gitconfig r diff --git a/Assistant/TransferSlots.hs b/Assistant/TransferSlots.hs index add60706d6..f49a995ac4 100644 --- a/Assistant/TransferSlots.hs +++ b/Assistant/TransferSlots.hs @@ -24,7 +24,6 @@ import Assistant.Alert import Assistant.Alert.Utility import Assistant.Commits import Assistant.Drop -import Annex.Transfer (stallDetection) import Types.Transfer import Logs.Transfer import Logs.Location @@ -126,7 +125,8 @@ genTransfer t info = case transferRemote info of ( do debug [ "Transferring:" , describeTransfer t info ] notifyTransfer - sd <- liftAnnex $ stallDetection remote + let sd = remoteAnnexStallDetection + (Remote.gitconfig remote) return $ Just (t, info, go remote sd) , do debug [ "Skipping unnecessary transfer:", diff --git a/Types/GitConfig.hs b/Types/GitConfig.hs index 5cdde68f37..5c94743e89 100644 --- a/Types/GitConfig.hs +++ b/Types/GitConfig.hs @@ -123,7 +123,6 @@ data GitConfig = GitConfig , annexRetry :: Maybe Integer , annexForwardRetry :: Maybe Integer , annexRetryDelay :: Maybe Seconds - , annexStallDetection :: Maybe StallDetection , annexAllowedUrlSchemes :: S.Set Scheme , annexAllowedIPAddresses :: String , annexAllowUnverifiedDownloads :: Bool @@ -217,9 +216,6 @@ extractGitConfig configsource r = GitConfig , annexForwardRetry = getmayberead (annexConfig "forward-retry") , annexRetryDelay = Seconds <$> getmayberead (annexConfig "retrydelay") - , annexStallDetection = - either (const Nothing) id . parseStallDetection - =<< getmaybe (annexConfig "stalldetection") , annexAllowedUrlSchemes = S.fromList $ map mkScheme $ maybe ["http", "https", "ftp"] words $ getmaybe (annexConfig "security.allowed-url-schemes")