CoW probing
Improved probing when CoW copies can be made between files on the same drive. Now supports CoW between BTRFS subvolumes. And, falls back to rsync instead of using cp when CoW won't work, eg copies between repos on the same EXT4 filesystem. Rather than trying cp --reflink=always for each file copied to a remote, it's tried once and if it fails it falls back to using rsync thereafter for the lifetime of the Remote object. That avoids overhead of calling cp which while small, will add up over a large number of files. This commit was sponsored by Boyd Stephen Smith Jr. on Patreon.
This commit is contained in:
parent
0dc26cd6f1
commit
21ff5e1e5a
6 changed files with 100 additions and 37 deletions
|
@ -23,7 +23,7 @@ tests =
|
||||||
, testCp "cp_a" "-a"
|
, testCp "cp_a" "-a"
|
||||||
, testCp "cp_p" "-p"
|
, testCp "cp_p" "-p"
|
||||||
, testCp "cp_preserve_timestamps" "--preserve=timestamps"
|
, testCp "cp_preserve_timestamps" "--preserve=timestamps"
|
||||||
, testCp "cp_reflink_auto" "--reflink=auto"
|
, testCp "cp_reflink_supported" "--reflink=auto"
|
||||||
, TestCase "xargs -0" $ testCmd "xargs_0" "xargs -0 </dev/null"
|
, TestCase "xargs -0" $ testCmd "xargs_0" "xargs -0 </dev/null"
|
||||||
, TestCase "rsync" $ testCmd "rsync" "rsync --version >/dev/null"
|
, TestCase "rsync" $ testCmd "rsync" "rsync --version >/dev/null"
|
||||||
, TestCase "curl" $ testCmd "curl" "curl --version >/dev/null"
|
, TestCase "curl" $ testCmd "curl" "curl --version >/dev/null"
|
||||||
|
|
|
@ -8,6 +8,10 @@ git-annex (7.20190709) UNRELEASED; urgency=medium
|
||||||
optimised for 4-way CPUs.
|
optimised for 4-way CPUs.
|
||||||
* Support running v7 upgrade in a repo where there is no branch checked
|
* Support running v7 upgrade in a repo where there is no branch checked
|
||||||
out, but HEAD is set directly to some other ref.
|
out, but HEAD is set directly to some other ref.
|
||||||
|
* Improved probing when CoW copies can be made between files on the same
|
||||||
|
drive. Now supports CoW between BTRFS subvolumes. And, falls back to rsync
|
||||||
|
instead of using cp when CoW won't work, eg copies between repos on the
|
||||||
|
same EXT4 filesystem.
|
||||||
|
|
||||||
-- Joey Hess <id@joeyh.name> Mon, 08 Jul 2019 08:59:54 -0400
|
-- Joey Hess <id@joeyh.name> Mon, 08 Jul 2019 08:59:54 -0400
|
||||||
|
|
||||||
|
|
|
@ -1,6 +1,6 @@
|
||||||
{- Standard git remotes.
|
{- Standard git remotes.
|
||||||
-
|
-
|
||||||
- Copyright 2011-2018 Joey Hess <id@joeyh.name>
|
- Copyright 2011-2019 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- Licensed under the GNU AGPL version 3 or higher.
|
- Licensed under the GNU AGPL version 3 or higher.
|
||||||
-}
|
-}
|
||||||
|
@ -335,7 +335,7 @@ inAnnex rmt st key = do
|
||||||
inAnnex' repo rmt st key
|
inAnnex' repo rmt st key
|
||||||
|
|
||||||
inAnnex' :: Git.Repo -> Remote -> State -> Key -> Annex Bool
|
inAnnex' :: Git.Repo -> Remote -> State -> Key -> Annex Bool
|
||||||
inAnnex' repo rmt (State connpool duc _) key
|
inAnnex' repo rmt (State connpool duc _ _) key
|
||||||
| Git.repoIsHttp repo = checkhttp
|
| Git.repoIsHttp repo = checkhttp
|
||||||
| Git.repoIsUrl repo = checkremote
|
| Git.repoIsUrl repo = checkremote
|
||||||
| otherwise = checklocal
|
| otherwise = checklocal
|
||||||
|
@ -382,7 +382,7 @@ dropKey r st key = do
|
||||||
(\e -> warning (show e) >> return False)
|
(\e -> warning (show e) >> return False)
|
||||||
|
|
||||||
dropKey' :: Git.Repo -> Remote -> State -> Key -> Annex Bool
|
dropKey' :: Git.Repo -> Remote -> State -> Key -> Annex Bool
|
||||||
dropKey' repo r (State connpool duc _) key
|
dropKey' repo r (State connpool duc _ _) key
|
||||||
| not $ Git.repoIsUrl repo = ifM duc
|
| not $ Git.repoIsUrl repo = ifM duc
|
||||||
( guardUsable repo (return False) $
|
( guardUsable repo (return False) $
|
||||||
commitOnCleanup repo r $ onLocalFast repo r $ do
|
commitOnCleanup repo r $ onLocalFast repo r $ do
|
||||||
|
@ -406,7 +406,7 @@ lockKey r st key callback = do
|
||||||
lockKey' repo r st key callback
|
lockKey' repo r st key callback
|
||||||
|
|
||||||
lockKey' :: Git.Repo -> Remote -> State -> Key -> (VerifiedCopy -> Annex r) -> Annex r
|
lockKey' :: Git.Repo -> Remote -> State -> Key -> (VerifiedCopy -> Annex r) -> Annex r
|
||||||
lockKey' repo r (State connpool duc _) key callback
|
lockKey' repo r (State connpool duc _ _) key callback
|
||||||
| not $ Git.repoIsUrl repo = ifM duc
|
| not $ Git.repoIsUrl repo = ifM duc
|
||||||
( guardUsable repo failedlock $ do
|
( guardUsable repo failedlock $ do
|
||||||
inorigrepo <- Annex.makeRunner
|
inorigrepo <- Annex.makeRunner
|
||||||
|
@ -474,7 +474,7 @@ copyFromRemote' forcersync r st key file dest meterupdate = do
|
||||||
copyFromRemote'' repo forcersync r st key file dest meterupdate
|
copyFromRemote'' repo forcersync r st key file dest meterupdate
|
||||||
|
|
||||||
copyFromRemote'' :: Git.Repo -> Bool -> Remote -> State -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex (Bool, Verification)
|
copyFromRemote'' :: Git.Repo -> Bool -> Remote -> State -> Key -> AssociatedFile -> FilePath -> MeterUpdate -> Annex (Bool, Verification)
|
||||||
copyFromRemote'' repo forcersync r (State connpool _ _) key file dest meterupdate
|
copyFromRemote'' repo forcersync r st@(State connpool _ _ _) key file dest meterupdate
|
||||||
| Git.repoIsHttp repo = unVerified $ do
|
| Git.repoIsHttp repo = unVerified $ do
|
||||||
gc <- Annex.getGitConfig
|
gc <- Annex.getGitConfig
|
||||||
Annex.Content.downloadUrl key meterupdate (keyUrls gc repo r key) dest
|
Annex.Content.downloadUrl key meterupdate (keyUrls gc repo r key) dest
|
||||||
|
@ -489,7 +489,7 @@ copyFromRemote'' repo forcersync r (State connpool _ _) key file dest meterupdat
|
||||||
case v of
|
case v of
|
||||||
Nothing -> return (False, UnVerified)
|
Nothing -> return (False, UnVerified)
|
||||||
Just (object, checksuccess) -> do
|
Just (object, checksuccess) -> do
|
||||||
copier <- mkCopier hardlink params
|
copier <- mkCopier hardlink st params
|
||||||
runTransfer (Transfer Download u key)
|
runTransfer (Transfer Download u key)
|
||||||
file stdRetry
|
file stdRetry
|
||||||
(\p -> copier object dest (combineMeterUpdate p meterupdate) checksuccess)
|
(\p -> copier object dest (combineMeterUpdate p meterupdate) checksuccess)
|
||||||
|
@ -600,7 +600,7 @@ copyToRemote r st key file meterupdate = do
|
||||||
copyToRemote' repo r st key file meterupdate
|
copyToRemote' repo r st key file meterupdate
|
||||||
|
|
||||||
copyToRemote' :: Git.Repo -> Remote -> State -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool
|
copyToRemote' :: Git.Repo -> Remote -> State -> Key -> AssociatedFile -> MeterUpdate -> Annex Bool
|
||||||
copyToRemote' repo r (State connpool duc _) key file meterupdate
|
copyToRemote' repo r st@(State connpool duc _ _) key file meterupdate
|
||||||
| not $ Git.repoIsUrl repo = ifM duc
|
| not $ Git.repoIsUrl repo = ifM duc
|
||||||
( guardUsable repo (return False) $ commitOnCleanup repo r $
|
( guardUsable repo (return False) $ commitOnCleanup repo r $
|
||||||
copylocal =<< Annex.Content.prepSendAnnex key
|
copylocal =<< Annex.Content.prepSendAnnex key
|
||||||
|
@ -627,7 +627,7 @@ copyToRemote' repo r (State connpool duc _) key file meterupdate
|
||||||
( return True
|
( return True
|
||||||
, do
|
, do
|
||||||
ensureInitialized
|
ensureInitialized
|
||||||
copier <- mkCopier hardlink params
|
copier <- mkCopier hardlink st params
|
||||||
let verify = Annex.Content.RemoteVerify r
|
let verify = Annex.Content.RemoteVerify r
|
||||||
let rsp = RetrievalAllKeysSecure
|
let rsp = RetrievalAllKeysSecure
|
||||||
runTransfer (Transfer Download u key) file stdRetry $ \p ->
|
runTransfer (Transfer Download u key) file stdRetry $ \p ->
|
||||||
|
@ -704,27 +704,46 @@ onLocal repo r a = do
|
||||||
onLocalFast :: Git.Repo -> Remote -> Annex a -> Annex a
|
onLocalFast :: Git.Repo -> Remote -> Annex a -> Annex a
|
||||||
onLocalFast repo r a = onLocal repo r $ Annex.BranchState.disableUpdate >> a
|
onLocalFast repo r a = onLocal repo r $ Annex.BranchState.disableUpdate >> a
|
||||||
|
|
||||||
{- Copys a file with rsync unless both locations are on the same
|
-- To avoid the overhead of trying copy-on-write every time, it's tried
|
||||||
- filesystem. Then cp could be faster. -}
|
-- once and if it fails, is not tried again.
|
||||||
rsyncOrCopyFile :: [CommandParam] -> FilePath -> FilePath -> MeterUpdate -> Annex Bool
|
newtype CopyCoWTried = CopyCoWTried (MVar Bool)
|
||||||
rsyncOrCopyFile rsyncparams src dest p =
|
|
||||||
|
newCopyCoWTried :: IO CopyCoWTried
|
||||||
|
newCopyCoWTried = CopyCoWTried <$> newEmptyMVar
|
||||||
|
|
||||||
|
{- Copys a file. Uses copy-on-write if it is supported. Otherwise,
|
||||||
|
- uses rsync, so that interrupted copies can be resumed. -}
|
||||||
|
rsyncOrCopyFile :: State -> [CommandParam] -> FilePath -> FilePath -> MeterUpdate -> Annex Bool
|
||||||
|
rsyncOrCopyFile st rsyncparams src dest p =
|
||||||
#ifdef mingw32_HOST_OS
|
#ifdef mingw32_HOST_OS
|
||||||
-- rsync is only available on Windows in some inatallation methods,
|
-- rsync is only available on Windows in some installation methods,
|
||||||
-- and is not strictly needed here, so don't use it.
|
-- and is not strictly needed here, so don't use it.
|
||||||
docopy
|
docopywith copyFileExternal
|
||||||
where
|
where
|
||||||
#else
|
#else
|
||||||
ifM (sameDeviceIds src dest) (docopy, dorsync)
|
-- If multiple threads reach this at the same time, they
|
||||||
|
-- will both try CoW, which is acceptable.
|
||||||
|
ifM (liftIO $ isEmptyMVar copycowtried)
|
||||||
|
( do
|
||||||
|
ok <- docopycow
|
||||||
|
void $ liftIO $ tryPutMVar copycowtried ok
|
||||||
|
pure ok <||> dorsync
|
||||||
|
, ifM (liftIO $ readMVar copycowtried)
|
||||||
|
( docopycow <||> dorsync
|
||||||
|
, dorsync
|
||||||
|
)
|
||||||
|
)
|
||||||
where
|
where
|
||||||
sameDeviceIds a b = (==) <$> getDeviceId a <*> getDeviceId b
|
copycowtried = case st of
|
||||||
getDeviceId f = deviceID <$> liftIO (getFileStatus $ parentDir f)
|
State _ _ (CopyCoWTried v) _ -> v
|
||||||
dorsync = do
|
dorsync = do
|
||||||
oh <- mkOutputHandler
|
oh <- mkOutputHandler
|
||||||
Ssh.rsyncHelper oh (Just p) $
|
Ssh.rsyncHelper oh (Just p) $
|
||||||
rsyncparams ++ [File src, File dest]
|
rsyncparams ++ [File src, File dest]
|
||||||
|
docopycow = docopywith copyCoW
|
||||||
#endif
|
#endif
|
||||||
docopy = liftIO $ watchFileSize dest p $
|
docopywith a = liftIO $ watchFileSize dest p $
|
||||||
copyFileExternal CopyTimeStamps src dest
|
a CopyTimeStamps src dest
|
||||||
|
|
||||||
commitOnCleanup :: Git.Repo -> Remote -> Annex a -> Annex a
|
commitOnCleanup :: Git.Repo -> Remote -> Annex a -> Annex a
|
||||||
commitOnCleanup repo r a = go `after` a
|
commitOnCleanup repo r a = go `after` a
|
||||||
|
@ -768,10 +787,10 @@ wantHardLink = (annexHardLink <$> Annex.getGitConfig)
|
||||||
-- done.
|
-- done.
|
||||||
type Copier = FilePath -> FilePath -> MeterUpdate -> Annex Bool -> Annex (Bool, Verification)
|
type Copier = FilePath -> FilePath -> MeterUpdate -> Annex Bool -> Annex (Bool, Verification)
|
||||||
|
|
||||||
mkCopier :: Bool -> [CommandParam] -> Annex Copier
|
mkCopier :: Bool -> State -> [CommandParam] -> Annex Copier
|
||||||
mkCopier remotewanthardlink rsyncparams = do
|
mkCopier remotewanthardlink st rsyncparams = do
|
||||||
let copier = \src dest p check -> unVerified $
|
let copier = \src dest p check -> unVerified $
|
||||||
rsyncOrCopyFile rsyncparams src dest p <&&> check
|
rsyncOrCopyFile st rsyncparams src dest p <&&> check
|
||||||
localwanthardlink <- wantHardLink
|
localwanthardlink <- wantHardLink
|
||||||
let linker = \src dest -> createLink src dest >> return True
|
let linker = \src dest -> createLink src dest >> return True
|
||||||
ifM (pure (remotewanthardlink || localwanthardlink) <&&> not <$> isDirect)
|
ifM (pure (remotewanthardlink || localwanthardlink) <&&> not <$> isDirect)
|
||||||
|
@ -790,20 +809,21 @@ mkCopier remotewanthardlink rsyncparams = do
|
||||||
- This returns False when the repository UUID is not as expected. -}
|
- This returns False when the repository UUID is not as expected. -}
|
||||||
type DeferredUUIDCheck = Annex Bool
|
type DeferredUUIDCheck = Annex Bool
|
||||||
|
|
||||||
data State = State Ssh.P2PSshConnectionPool DeferredUUIDCheck (Annex (Git.Repo, GitConfig))
|
data State = State Ssh.P2PSshConnectionPool DeferredUUIDCheck CopyCoWTried (Annex (Git.Repo, GitConfig))
|
||||||
|
|
||||||
getRepoFromState :: State -> Annex Git.Repo
|
getRepoFromState :: State -> Annex Git.Repo
|
||||||
getRepoFromState (State _ _ a) = fst <$> a
|
getRepoFromState (State _ _ _ a) = fst <$> a
|
||||||
|
|
||||||
{- The config of the remote git repository, cached for speed. -}
|
{- The config of the remote git repository, cached for speed. -}
|
||||||
getGitConfigFromState :: State -> Annex GitConfig
|
getGitConfigFromState :: State -> Annex GitConfig
|
||||||
getGitConfigFromState (State _ _ a) = snd <$> a
|
getGitConfigFromState (State _ _ _ a) = snd <$> a
|
||||||
|
|
||||||
mkState :: Git.Repo -> UUID -> RemoteGitConfig -> Annex State
|
mkState :: Git.Repo -> UUID -> RemoteGitConfig -> Annex State
|
||||||
mkState r u gc = do
|
mkState r u gc = do
|
||||||
pool <- Ssh.mkP2PSshConnectionPool
|
pool <- Ssh.mkP2PSshConnectionPool
|
||||||
|
copycowtried <- liftIO newCopyCoWTried
|
||||||
(duc, getrepo) <- go
|
(duc, getrepo) <- go
|
||||||
return $ State pool duc getrepo
|
return $ State pool duc copycowtried getrepo
|
||||||
where
|
where
|
||||||
go
|
go
|
||||||
| remoteAnnexCheckUUID gc = return
|
| remoteAnnexCheckUUID gc = return
|
||||||
|
|
|
@ -1,12 +1,13 @@
|
||||||
{- file copying
|
{- file copying
|
||||||
-
|
-
|
||||||
- Copyright 2010-2014 Joey Hess <id@joeyh.name>
|
- Copyright 2010-2019 Joey Hess <id@joeyh.name>
|
||||||
-
|
-
|
||||||
- License: BSD-2-clause
|
- License: BSD-2-clause
|
||||||
-}
|
-}
|
||||||
|
|
||||||
module Utility.CopyFile (
|
module Utility.CopyFile (
|
||||||
copyFileExternal,
|
copyFileExternal,
|
||||||
|
copyCoW,
|
||||||
createLinkOrCopy,
|
createLinkOrCopy,
|
||||||
CopyMetaData(..)
|
CopyMetaData(..)
|
||||||
) where
|
) where
|
||||||
|
@ -22,6 +23,17 @@ data CopyMetaData
|
||||||
| CopyAllMetaData
|
| CopyAllMetaData
|
||||||
deriving (Eq)
|
deriving (Eq)
|
||||||
|
|
||||||
|
copyMetaDataParams :: CopyMetaData -> [CommandParam]
|
||||||
|
copyMetaDataParams meta = map snd $ filter fst
|
||||||
|
[ (allmeta && BuildInfo.cp_a, Param "-a")
|
||||||
|
, (allmeta && BuildInfo.cp_p && not BuildInfo.cp_a
|
||||||
|
, Param "-p")
|
||||||
|
, (not allmeta && BuildInfo.cp_preserve_timestamps
|
||||||
|
, Param "--preserve=timestamps")
|
||||||
|
]
|
||||||
|
where
|
||||||
|
allmeta = meta == CopyAllMetaData
|
||||||
|
|
||||||
{- The cp command is used, because I hate reinventing the wheel,
|
{- The cp command is used, because I hate reinventing the wheel,
|
||||||
- and because this allows easy access to features like cp --reflink. -}
|
- and because this allows easy access to features like cp --reflink. -}
|
||||||
copyFileExternal :: CopyMetaData -> FilePath -> FilePath -> IO Bool
|
copyFileExternal :: CopyMetaData -> FilePath -> FilePath -> IO Bool
|
||||||
|
@ -30,15 +42,33 @@ copyFileExternal meta src dest = do
|
||||||
removeFile dest
|
removeFile dest
|
||||||
boolSystem "cp" $ params ++ [File src, File dest]
|
boolSystem "cp" $ params ++ [File src, File dest]
|
||||||
where
|
where
|
||||||
params = map snd $ filter fst
|
params
|
||||||
[ (BuildInfo.cp_reflink_auto, Param "--reflink=auto")
|
| BuildInfo.cp_reflink_supported =
|
||||||
, (allmeta && BuildInfo.cp_a, Param "-a")
|
Param "--reflink=auto" : copyMetaDataParams meta
|
||||||
, (allmeta && BuildInfo.cp_p && not BuildInfo.cp_a
|
| otherwise = copyMetaDataParams meta
|
||||||
, Param "-p")
|
|
||||||
, (not allmeta && BuildInfo.cp_preserve_timestamps
|
{- When a filesystem supports CoW (and cp does), uses it to make
|
||||||
, Param "--preserve=timestamps")
|
- an efficient copy of a file. Otherwise, returns False. -}
|
||||||
]
|
copyCoW :: CopyMetaData -> FilePath -> FilePath -> IO Bool
|
||||||
allmeta = meta == CopyAllMetaData
|
copyCoW meta src dest
|
||||||
|
| BuildInfo.cp_reflink_supported = do
|
||||||
|
whenM (doesFileExist dest) $
|
||||||
|
removeFile dest
|
||||||
|
-- When CoW is not supported, cp will complain to stderr,
|
||||||
|
-- so have to discard its stderr.
|
||||||
|
ok <- catchBoolIO $ do
|
||||||
|
withQuietOutput createProcessSuccess $
|
||||||
|
proc "cp" $ toCommand $
|
||||||
|
params ++ [File src, File dest]
|
||||||
|
return True
|
||||||
|
-- When CoW is not supported, cp creates the destination
|
||||||
|
-- file but leaves it empty.
|
||||||
|
unless ok $
|
||||||
|
void $ tryIO $ removeFile dest
|
||||||
|
return ok
|
||||||
|
| otherwise = return False
|
||||||
|
where
|
||||||
|
params = Param "--reflink=always" : copyMetaDataParams meta
|
||||||
|
|
||||||
{- Create a hard link if the filesystem allows it, and fall back to copying
|
{- Create a hard link if the filesystem allows it, and fall back to copying
|
||||||
- the file. -}
|
- the file. -}
|
||||||
|
|
|
@ -17,3 +17,5 @@ If there is some generic benefit from `rsync`, could it may be at least be a con
|
||||||
|
|
||||||
|
|
||||||
[[!meta author="yoh"]]
|
[[!meta author="yoh"]]
|
||||||
|
|
||||||
|
> [[done]] --[[Joey]]
|
||||||
|
|
|
@ -0,0 +1,7 @@
|
||||||
|
[[!comment format=mdwn
|
||||||
|
username="joey"
|
||||||
|
subject="""comment 2"""
|
||||||
|
date="2019-07-17T18:13:17Z"
|
||||||
|
content="""
|
||||||
|
CoW probing implemented
|
||||||
|
"""]]
|
Loading…
Add table
Reference in a new issue