resume properly when copying a file to/from a local git remote is interrupted
Probably this fixes a reversion, but I don't know what version broke it. This does use withOtherTmp for a temp file that could be quite large. Though albeit a reflink copy that will not actually take up any space as long as the file it was copied from still exists. So if the copy cow succeeds but git-annex is interrupted just before that temp file gets renamed into the usual .git/annex/tmp/ location, there is a risk that the other temp directory ends up cluttered with a larger temp file than later. It will eventually be cleaned up, and the changes of this being a problem are small, so this seems like an acceptable thing to do. Sponsored-by: Shae Erisson on Patreon
This commit is contained in:
parent
c9dd63d67d
commit
63d508e885
5 changed files with 48 additions and 10 deletions
|
@ -15,6 +15,8 @@ import Utility.CopyFile
|
||||||
import Utility.FileMode
|
import Utility.FileMode
|
||||||
import Utility.Touch
|
import Utility.Touch
|
||||||
import Utility.Hash (IncrementalVerifier(..))
|
import Utility.Hash (IncrementalVerifier(..))
|
||||||
|
import Annex.Tmp
|
||||||
|
import Utility.Tmp
|
||||||
|
|
||||||
import Control.Concurrent
|
import Control.Concurrent
|
||||||
import qualified Data.ByteString as S
|
import qualified Data.ByteString as S
|
||||||
|
@ -28,23 +30,34 @@ newCopyCoWTried :: IO CopyCoWTried
|
||||||
newCopyCoWTried = CopyCoWTried <$> newEmptyMVar
|
newCopyCoWTried = CopyCoWTried <$> newEmptyMVar
|
||||||
|
|
||||||
{- Copies a file is copy-on-write is supported. Otherwise, returns False. -}
|
{- Copies a file is copy-on-write is supported. Otherwise, returns False. -}
|
||||||
tryCopyCoW :: CopyCoWTried -> FilePath -> FilePath -> MeterUpdate -> IO Bool
|
tryCopyCoW :: CopyCoWTried -> FilePath -> FilePath -> MeterUpdate -> Annex Bool
|
||||||
tryCopyCoW (CopyCoWTried copycowtried) src dest meterupdate =
|
tryCopyCoW (CopyCoWTried copycowtried) src dest meterupdate =
|
||||||
-- If multiple threads reach this at the same time, they
|
-- If multiple threads reach this at the same time, they
|
||||||
-- will both try CoW, which is acceptable.
|
-- will both try CoW, which is acceptable.
|
||||||
ifM (isEmptyMVar copycowtried)
|
ifM (liftIO $ isEmptyMVar copycowtried)
|
||||||
( do
|
( do
|
||||||
ok <- docopycow
|
ok <- docopycow
|
||||||
void $ tryPutMVar copycowtried ok
|
void $ liftIO $ tryPutMVar copycowtried ok
|
||||||
return ok
|
return ok
|
||||||
, ifM (readMVar copycowtried)
|
, ifM (liftIO $ readMVar copycowtried)
|
||||||
( docopycow
|
( docopycow
|
||||||
, return False
|
, return False
|
||||||
)
|
)
|
||||||
)
|
)
|
||||||
where
|
where
|
||||||
docopycow = watchFileSize dest meterupdate $
|
-- copyCow needs a destination file that does not exist,
|
||||||
copyCoW CopyTimeStamps src dest
|
-- but the dest file might already. So use it with another
|
||||||
|
-- temp file, and if it succeeds, rename it into place. If it fails,
|
||||||
|
-- the dest file is left as-is, to support resuming.
|
||||||
|
docopycow = withOtherTmp $ \othertmp -> liftIO $
|
||||||
|
withTmpFileIn (fromRawFilePath othertmp) (takeFileName dest) $ \tmpdest _h -> do
|
||||||
|
copied <- watchFileSize tmpdest meterupdate $
|
||||||
|
copyCoW CopyTimeStamps src tmpdest
|
||||||
|
if copied
|
||||||
|
then liftIO $ catchBoolIO $ do
|
||||||
|
rename tmpdest dest
|
||||||
|
return True
|
||||||
|
else return False
|
||||||
|
|
||||||
data CopyMethod = CopiedCoW | Copied
|
data CopyMethod = CopiedCoW | Copied
|
||||||
|
|
||||||
|
@ -70,7 +83,7 @@ fileCopier :: CopyCoWTried -> FilePath -> FilePath -> MeterUpdate -> Maybe Incre
|
||||||
fileCopier _ src dest meterupdate iv = docopy
|
fileCopier _ src dest meterupdate iv = docopy
|
||||||
#else
|
#else
|
||||||
fileCopier copycowtried src dest meterupdate iv =
|
fileCopier copycowtried src dest meterupdate iv =
|
||||||
ifM (liftIO $ tryCopyCoW copycowtried src dest meterupdate)
|
ifM (tryCopyCoW copycowtried src dest meterupdate)
|
||||||
( do
|
( do
|
||||||
liftIO $ maybe noop unableIncremental iv
|
liftIO $ maybe noop unableIncremental iv
|
||||||
return CopiedCoW
|
return CopiedCoW
|
||||||
|
|
|
@ -2,6 +2,8 @@ git-annex (8.20210904) UNRELEASED; urgency=medium
|
||||||
|
|
||||||
* borg: Avoid trying to extract xattrs, ACLS, and bsdflags when
|
* borg: Avoid trying to extract xattrs, ACLS, and bsdflags when
|
||||||
retrieving from a borg repository.
|
retrieving from a borg repository.
|
||||||
|
* Resume where it left off when copying a file to/from a local git remote
|
||||||
|
was interrupted.
|
||||||
|
|
||||||
-- Joey Hess <id@joeyh.name> Fri, 03 Sep 2021 12:02:55 -0400
|
-- Joey Hess <id@joeyh.name> Fri, 03 Sep 2021 12:02:55 -0400
|
||||||
|
|
||||||
|
|
|
@ -412,7 +412,7 @@ retrieveExportWithContentIdentifierM dir cow loc cid dest mkkey p =
|
||||||
f = exportPath dir loc
|
f = exportPath dir loc
|
||||||
f' = fromRawFilePath f
|
f' = fromRawFilePath f
|
||||||
|
|
||||||
docopy = ifM (liftIO $ tryCopyCoW cow f' dest p)
|
docopy = ifM (tryCopyCoW cow f' dest p)
|
||||||
( do
|
( do
|
||||||
k <- mkkey
|
k <- mkkey
|
||||||
postcheckcow (return k)
|
postcheckcow (return k)
|
||||||
|
|
|
@ -56,11 +56,13 @@ copyFileExternal meta src dest = do
|
||||||
| otherwise = copyMetaDataParams meta
|
| otherwise = copyMetaDataParams meta
|
||||||
|
|
||||||
{- When a filesystem supports CoW (and cp does), uses it to make
|
{- When a filesystem supports CoW (and cp does), uses it to make
|
||||||
- an efficient copy of a file. Otherwise, returns False. -}
|
- an efficient copy of a file. Otherwise, returns False.
|
||||||
|
-
|
||||||
|
- The dest file must not exist yet, or it will fail to make a CoW copy,
|
||||||
|
- and will return False. -}
|
||||||
copyCoW :: CopyMetaData -> FilePath -> FilePath -> IO Bool
|
copyCoW :: CopyMetaData -> FilePath -> FilePath -> IO Bool
|
||||||
copyCoW meta src dest
|
copyCoW meta src dest
|
||||||
| BuildInfo.cp_reflink_supported = do
|
| BuildInfo.cp_reflink_supported = do
|
||||||
void $ tryIO $ removeFile dest
|
|
||||||
-- When CoW is not supported, cp will complain to stderr,
|
-- When CoW is not supported, cp will complain to stderr,
|
||||||
-- so have to discard its stderr.
|
-- so have to discard its stderr.
|
||||||
ok <- catchBoolIO $ withNullHandle $ \nullh ->
|
ok <- catchBoolIO $ withNullHandle $ \nullh ->
|
||||||
|
|
|
@ -0,0 +1,21 @@
|
||||||
|
A copy --to a local git remote that gets interrupted and is run again does
|
||||||
|
not resume where it left off, but copies all the data again.
|
||||||
|
|
||||||
|
This does not affect git remotes accessed over ssh.
|
||||||
|
|
||||||
|
It's kind of hard to notice this, because normally a resume, has to read
|
||||||
|
the src file and dest file, in order for incremental verification to
|
||||||
|
get started. But it is somewhat slower to do that than it is to re-write
|
||||||
|
the dest file from the start. And when annex.verify = false, it's a lot
|
||||||
|
slower.
|
||||||
|
|
||||||
|
Looks like it's due to copyCoW unlinking the dest file. Since the first
|
||||||
|
file copy trues copyCoW to probe if that's supported, that happens.
|
||||||
|
And when resuming an interrupted copy, that probe will generally happen
|
||||||
|
with the file it was interrupted on.
|
||||||
|
|
||||||
|
So, the solution seems like it would be to copyCoW to some other temp file,
|
||||||
|
and if it succeeds, rename it to the dest.
|
||||||
|
--[[Joey]]
|
||||||
|
|
||||||
|
> [[fixed|done]] --[[Joey]]
|
Loading…
Add table
Reference in a new issue