annex.fastcopy
Added annex.fastcopy and remote.name.annex-fastcopy config setting. When set, this allows the copy_file_range syscall to be used, which can eg allow for server-side copies on NFS. (For fastest copying, also disable annex.verify or remote.name.annex-verify.) This is a simple implementation, that does not handle resuming as well as it possibly could. It can be used with both local git remotes (including on NFS), and directory special remotes. Other types of remotes could in theory also support it, so I've left the config documented as a general thing.
This commit is contained in:
parent
6468a39e92
commit
73060eea51
10 changed files with 151 additions and 66 deletions
|
@ -1,6 +1,6 @@
|
|||
{- Copying files.
|
||||
-
|
||||
- Copyright 2011-2022 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2011-2025 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
@ -10,6 +10,7 @@
|
|||
module Annex.CopyFile where
|
||||
|
||||
import Annex.Common
|
||||
import qualified Annex
|
||||
import Utility.Metered
|
||||
import Utility.CopyFile
|
||||
import Utility.FileMode
|
||||
|
@ -77,6 +78,23 @@ tryCopyCoW (CopyCoWTried copycowtried) src dest meterupdate =
|
|||
|
||||
data CopyMethod = CopiedCoW | Copied
|
||||
|
||||
-- Should cp be allowed to copy the file with --reflink=auto?
|
||||
--
|
||||
-- The benefit is that this lets it use the copy_file_range
|
||||
-- syscall, which is not used with --reflink=always. The drawback is that
|
||||
-- the IncrementalVerifier is not updated, so verification, if it is done,
|
||||
-- will need to re-read the whole content of the file. And, interrupted
|
||||
-- copies are not resumed but are restarted from the beginning.
|
||||
--
|
||||
-- Using this will result in CopiedCow being returned even in cases
|
||||
-- where cp fell back to a slow copy.
|
||||
newtype FastCopy = FastCopy Bool
|
||||
|
||||
getFastCopy :: RemoteGitConfig -> Annex FastCopy
|
||||
getFastCopy gc = case remoteAnnexFastCopy gc of
|
||||
False -> FastCopy . annexFastCopy <$> Annex.getGitConfig
|
||||
True -> return (FastCopy True)
|
||||
|
||||
{- Copies from src to dest, updating a meter. Preserves mode and mtime.
|
||||
- Uses copy-on-write if it is supported. If the the destination already
|
||||
- exists, an interrupted copy will resume where it left off.
|
||||
|
@ -94,20 +112,30 @@ data CopyMethod = CopiedCoW | Copied
|
|||
- (eg when isStableKey is false), and doing this avoids getting a
|
||||
- corrupted file in such cases.
|
||||
-}
|
||||
fileCopier :: CopyCoWTried -> OsPath -> OsPath -> MeterUpdate -> Maybe IncrementalVerifier -> IO CopyMethod
|
||||
fileCopier :: CopyCoWTried -> FastCopy -> OsPath -> OsPath -> MeterUpdate -> Maybe IncrementalVerifier -> IO CopyMethod
|
||||
fileCopier copycowtried (FastCopy True) src dest meterupdate iv = do
|
||||
ok <- watchFileSize dest meterupdate $ const $
|
||||
copyFileExternal CopyTimeStamps src dest
|
||||
if ok
|
||||
then do
|
||||
maybe noop unableIncrementalVerifier iv
|
||||
return CopiedCoW
|
||||
else fileCopier copycowtried (FastCopy False) src dest meterupdate iv
|
||||
#ifdef mingw32_HOST_OS
|
||||
fileCopier _ src dest meterupdate iv = docopy
|
||||
fileCopier _ _ src dest meterupdate iv =
|
||||
fileCopier' src dest meterupdate iv
|
||||
#else
|
||||
fileCopier copycowtried src dest meterupdate iv =
|
||||
fileCopier copycowtried _ src dest meterupdate iv =
|
||||
ifM (tryCopyCoW copycowtried src dest meterupdate)
|
||||
( do
|
||||
maybe noop unableIncrementalVerifier iv
|
||||
return CopiedCoW
|
||||
, docopy
|
||||
, fileCopier' src dest meterupdate iv
|
||||
)
|
||||
#endif
|
||||
where
|
||||
docopy = do
|
||||
|
||||
fileCopier' :: OsPath -> OsPath -> MeterUpdate -> Maybe IncrementalVerifier -> IO CopyMethod
|
||||
fileCopier' src dest meterupdate iv = do
|
||||
-- The file might have had the write bit removed,
|
||||
-- so make sure we can write to it.
|
||||
void $ tryIO $ allowWrite dest
|
||||
|
@ -126,6 +154,7 @@ fileCopier copycowtried src dest meterupdate iv =
|
|||
|
||||
{- Copies content from a handle to a destination file. Does not
|
||||
- use copy-on-write, and does not copy file mode and mtime.
|
||||
- Updates the IncementalVerifier with the content it copies.
|
||||
-}
|
||||
fileContentCopier :: Handle -> OsPath -> MeterUpdate -> Maybe IncrementalVerifier -> IO ()
|
||||
fileContentCopier hsrc dest meterupdate iv =
|
||||
|
|
|
@ -10,6 +10,10 @@ git-annex (10.20250521) UNRELEASED; urgency=medium
|
|||
* map: Improve display of remote names.
|
||||
* Windows: Fix duplicate file bug that could occur when files were
|
||||
supposed to be moved across devices.
|
||||
* Added annex.fastcopy and remote.name.annex-fastcopy config setting.
|
||||
When set, this allows the copy_file_range syscall to be used, which
|
||||
can eg allow for server-side copies on NFS. (For fastest copying,
|
||||
also disable annex.verify or remote.name.annex-verify.)
|
||||
|
||||
-- Joey Hess <id@joeyh.name> Thu, 22 May 2025 12:43:38 -0400
|
||||
|
||||
|
|
|
@ -84,11 +84,12 @@ gen r u rc gc rs = do
|
|||
cst <- remoteCost gc c cheapRemoteCost
|
||||
let chunkconfig = getChunkConfig c
|
||||
cow <- liftIO newCopyCoWTried
|
||||
fastcopy <- getFastCopy gc
|
||||
let ii = IgnoreInodes $ fromMaybe True $
|
||||
getRemoteConfigValue ignoreinodesField c
|
||||
return $ Just $ specialRemote c
|
||||
(storeKeyM dir chunkconfig cow)
|
||||
(retrieveKeyFileM dir chunkconfig cow)
|
||||
(storeKeyM dir chunkconfig cow fastcopy)
|
||||
(retrieveKeyFileM dir chunkconfig cow fastcopy)
|
||||
(removeKeyM dir)
|
||||
(checkPresentM dir chunkconfig)
|
||||
Remote
|
||||
|
@ -105,8 +106,8 @@ gen r u rc gc rs = do
|
|||
, checkPresent = checkPresentDummy
|
||||
, checkPresentCheap = True
|
||||
, exportActions = ExportActions
|
||||
{ storeExport = storeExportM dir cow
|
||||
, retrieveExport = retrieveExportM dir cow
|
||||
{ storeExport = storeExportM dir cow fastcopy
|
||||
, retrieveExport = retrieveExportM dir cow fastcopy
|
||||
, removeExport = removeExportM dir
|
||||
, checkPresentExport = checkPresentExportM dir
|
||||
-- Not needed because removeExportLocation
|
||||
|
@ -118,7 +119,7 @@ gen r u rc gc rs = do
|
|||
{ listImportableContents = listImportableContentsM ii dir
|
||||
, importKey = Just (importKeyM ii dir)
|
||||
, retrieveExportWithContentIdentifier = retrieveExportWithContentIdentifierM ii dir cow
|
||||
, storeExportWithContentIdentifier = storeExportWithContentIdentifierM ii dir cow
|
||||
, storeExportWithContentIdentifier = storeExportWithContentIdentifierM ii dir cow fastcopy
|
||||
, removeExportWithContentIdentifier = removeExportWithContentIdentifierM ii dir
|
||||
-- Not needed because removeExportWithContentIdentifier
|
||||
-- auto-removes empty directories.
|
||||
|
@ -189,8 +190,8 @@ storeDir d k = addTrailingPathSeparator $
|
|||
|
||||
{- Check if there is enough free disk space in the remote's directory to
|
||||
- store the key. Note that the unencrypted key size is checked. -}
|
||||
storeKeyM :: OsPath -> ChunkConfig -> CopyCoWTried -> Storer
|
||||
storeKeyM d chunkconfig cow k c m =
|
||||
storeKeyM :: OsPath -> ChunkConfig -> CopyCoWTried -> FastCopy -> Storer
|
||||
storeKeyM d chunkconfig cow fastcopy k c m =
|
||||
ifM (checkDiskSpaceDirectory d k)
|
||||
( do
|
||||
void $ liftIO $ tryIO $ createDirectoryUnder [d] tmpdir
|
||||
|
@ -210,7 +211,7 @@ storeKeyM d chunkconfig cow k c m =
|
|||
in byteStorer go k c m
|
||||
NoChunks ->
|
||||
let go _k src p = liftIO $ do
|
||||
void $ fileCopier cow src tmpf p Nothing
|
||||
void $ fileCopier cow fastcopy src tmpf p Nothing
|
||||
finalizeStoreGeneric d tmpdir destdir
|
||||
in fileStorer go k c m
|
||||
_ ->
|
||||
|
@ -247,12 +248,12 @@ finalizeStoreGeneric d tmp dest = do
|
|||
mapM_ preventWrite =<< dirContents dest
|
||||
preventWrite dest
|
||||
|
||||
retrieveKeyFileM :: OsPath -> ChunkConfig -> CopyCoWTried -> Retriever
|
||||
retrieveKeyFileM d (LegacyChunks _) _ = Legacy.retrieve locations' d
|
||||
retrieveKeyFileM d NoChunks cow = fileRetriever' $ \dest k p iv -> do
|
||||
retrieveKeyFileM :: OsPath -> ChunkConfig -> CopyCoWTried -> FastCopy -> Retriever
|
||||
retrieveKeyFileM d (LegacyChunks _) _ _ = Legacy.retrieve locations' d
|
||||
retrieveKeyFileM d NoChunks cow fastcopy = fileRetriever' $ \dest k p iv -> do
|
||||
src <- liftIO $ getLocation d k
|
||||
void $ liftIO $ fileCopier cow src dest p iv
|
||||
retrieveKeyFileM d _ _ = byteRetriever $ \k sink ->
|
||||
void $ liftIO $ fileCopier cow fastcopy src dest p iv
|
||||
retrieveKeyFileM d _ _ _ = byteRetriever $ \k sink ->
|
||||
sink =<< liftIO (F.readFile =<< getLocation d k)
|
||||
|
||||
retrieveKeyFileCheapM :: OsPath -> ChunkConfig -> Maybe (Key -> AssociatedFile -> OsPath -> Annex ())
|
||||
|
@ -327,8 +328,8 @@ checkPresentGeneric' d check = ifM check
|
|||
)
|
||||
)
|
||||
|
||||
storeExportM :: OsPath -> CopyCoWTried -> OsPath -> Key -> ExportLocation -> MeterUpdate -> Annex ()
|
||||
storeExportM d cow src _k loc p = do
|
||||
storeExportM :: OsPath -> CopyCoWTried -> FastCopy -> OsPath -> Key -> ExportLocation -> MeterUpdate -> Annex ()
|
||||
storeExportM d cow fastcopy src _k loc p = do
|
||||
liftIO $ createDirectoryUnder [d] (takeDirectory dest)
|
||||
-- Write via temp file so that checkPresentGeneric will not
|
||||
-- see it until it's fully stored.
|
||||
|
@ -336,12 +337,12 @@ storeExportM d cow src _k loc p = do
|
|||
where
|
||||
dest = exportPath d loc
|
||||
go tmp () = void $ liftIO $
|
||||
fileCopier cow src tmp p Nothing
|
||||
fileCopier cow fastcopy src tmp p Nothing
|
||||
|
||||
retrieveExportM :: OsPath -> CopyCoWTried -> Key -> ExportLocation -> OsPath -> MeterUpdate -> Annex Verification
|
||||
retrieveExportM d cow k loc dest p =
|
||||
retrieveExportM :: OsPath -> CopyCoWTried -> FastCopy -> Key -> ExportLocation -> OsPath -> MeterUpdate -> Annex Verification
|
||||
retrieveExportM d cow fastcopy k loc dest p =
|
||||
verifyKeyContentIncrementally AlwaysVerify k $ \iv ->
|
||||
void $ liftIO $ fileCopier cow src dest p iv
|
||||
void $ liftIO $ fileCopier cow fastcopy src dest p iv
|
||||
where
|
||||
src = exportPath d loc
|
||||
|
||||
|
@ -533,13 +534,13 @@ retrieveExportWithContentIdentifierM ii dir cow loc cids dest gk p =
|
|||
=<< R.getSymbolicLinkStatus f'
|
||||
guardSameContentIdentifiers cont cids currcid
|
||||
|
||||
storeExportWithContentIdentifierM :: IgnoreInodes -> OsPath -> CopyCoWTried -> OsPath -> Key -> ExportLocation -> [ContentIdentifier] -> MeterUpdate -> Annex ContentIdentifier
|
||||
storeExportWithContentIdentifierM ii dir cow src _k loc overwritablecids p = do
|
||||
storeExportWithContentIdentifierM :: IgnoreInodes -> OsPath -> CopyCoWTried -> FastCopy -> OsPath -> Key -> ExportLocation -> [ContentIdentifier] -> MeterUpdate -> Annex ContentIdentifier
|
||||
storeExportWithContentIdentifierM ii dir cow fastcopy src _k loc overwritablecids p = do
|
||||
liftIO $ createDirectoryUnder [dir] destdir
|
||||
withTmpFileIn destdir template $ \tmpf tmph -> do
|
||||
let tmpf' = fromOsPath tmpf
|
||||
liftIO $ hClose tmph
|
||||
void $ liftIO $ fileCopier cow src tmpf p Nothing
|
||||
void $ liftIO $ fileCopier cow fastcopy src tmpf p Nothing
|
||||
resetAnnexFilePerm tmpf
|
||||
liftIO (R.getSymbolicLinkStatus tmpf') >>= liftIO . mkContentIdentifier ii tmpf >>= \case
|
||||
Nothing -> giveup "unable to generate content identifier"
|
||||
|
|
|
@ -440,7 +440,7 @@ inAnnex rmt st key = do
|
|||
inAnnex' repo rmt st key
|
||||
|
||||
inAnnex' :: Git.Repo -> Remote -> State -> Key -> Annex Bool
|
||||
inAnnex' repo rmt st@(State connpool duc _ _ _) key
|
||||
inAnnex' repo rmt st@(State connpool duc _ _ _ _) key
|
||||
| isP2PHttp rmt = checkp2phttp
|
||||
| Git.repoIsHttp repo = checkhttp
|
||||
| Git.repoIsUrl repo = checkremote
|
||||
|
@ -482,7 +482,7 @@ dropKey r st proof key = do
|
|||
dropKey' repo r st proof key
|
||||
|
||||
dropKey' :: Git.Repo -> Remote -> State -> Maybe SafeDropProof -> Key -> Annex ()
|
||||
dropKey' repo r st@(State connpool duc _ _ _) proof key
|
||||
dropKey' repo r st@(State connpool duc _ _ _ _) proof key
|
||||
| isP2PHttp r =
|
||||
clientRemoveWithProof proof key unabletoremove r >>= \case
|
||||
RemoveResultPlus True fanoutuuids ->
|
||||
|
@ -531,7 +531,7 @@ lockKey r st key callback = do
|
|||
lockKey' repo r st key callback
|
||||
|
||||
lockKey' :: Git.Repo -> Remote -> State -> Key -> (VerifiedCopy -> Annex r) -> Annex r
|
||||
lockKey' repo r st@(State connpool duc _ _ _) key callback
|
||||
lockKey' repo r st@(State connpool duc _ _ _ _) key callback
|
||||
| isP2PHttp r = do
|
||||
showLocking r
|
||||
p2pHttpClient r giveup (clientLockContent key) >>= \case
|
||||
|
@ -566,7 +566,7 @@ copyFromRemote r st key file dest meterupdate vc = do
|
|||
copyFromRemote'' repo r st key file dest meterupdate vc
|
||||
|
||||
copyFromRemote'' :: Git.Repo -> Remote -> State -> Key -> AssociatedFile -> OsPath -> MeterUpdate -> VerifyConfig -> Annex Verification
|
||||
copyFromRemote'' repo r st@(State connpool _ _ _ _) key af dest meterupdate vc
|
||||
copyFromRemote'' repo r st@(State connpool _ _ _ _ _) key af dest meterupdate vc
|
||||
| isP2PHttp r = copyp2phttp
|
||||
| Git.repoIsHttp repo = verifyKeyContentIncrementally vc key $ \iv -> do
|
||||
gc <- Annex.getGitConfig
|
||||
|
@ -642,7 +642,7 @@ copyToRemote r st key af o meterupdate = do
|
|||
copyToRemote' repo r st key af o meterupdate
|
||||
|
||||
copyToRemote' :: Git.Repo -> Remote -> State -> Key -> AssociatedFile -> Maybe OsPath -> MeterUpdate -> Annex ()
|
||||
copyToRemote' repo r st@(State connpool duc _ _ _) key af o meterupdate
|
||||
copyToRemote' repo r st@(State connpool duc _ _ _ _) key af o meterupdate
|
||||
| isP2PHttp r = prepsendwith copyp2phttp
|
||||
| not $ Git.repoIsUrl repo = ifM duc
|
||||
( guardUsable repo (giveup "cannot access remote") $ commitOnCleanup repo r st $
|
||||
|
@ -753,7 +753,7 @@ mkLocalRemoteAnnex repo = LocalRemoteAnnex repo <$> liftIO (newMVar [])
|
|||
- when possible.
|
||||
-}
|
||||
onLocal :: State -> Annex a -> Annex a
|
||||
onLocal (State _ _ _ _ lra) = onLocal' lra
|
||||
onLocal (State _ _ _ _ _ lra) = onLocal' lra
|
||||
|
||||
onLocalRepo :: Git.Repo -> Annex a -> Annex a
|
||||
onLocalRepo repo a = do
|
||||
|
@ -830,7 +830,7 @@ type FileCopier = OsPath -> OsPath -> Key -> MeterUpdate -> Annex Bool -> Verify
|
|||
-- done. Also returns Verified if the key's content is verified while
|
||||
-- copying it.
|
||||
mkFileCopier :: Bool -> State -> Annex FileCopier
|
||||
mkFileCopier remotewanthardlink (State _ _ copycowtried _ _) = do
|
||||
mkFileCopier remotewanthardlink (State _ _ copycowtried fastcopy _ _) = do
|
||||
localwanthardlink <- wantHardLink
|
||||
let linker = \src dest -> R.createLink (fromOsPath src) (fromOsPath dest) >> return True
|
||||
if remotewanthardlink || localwanthardlink
|
||||
|
@ -848,7 +848,7 @@ mkFileCopier remotewanthardlink (State _ _ copycowtried _ _) = do
|
|||
where
|
||||
copier src dest k p check verifyconfig = do
|
||||
iv <- startVerifyKeyContentIncrementally verifyconfig k
|
||||
liftIO (fileCopier copycowtried src dest p iv) >>= \case
|
||||
liftIO (fileCopier copycowtried fastcopy src dest p iv) >>= \case
|
||||
Copied -> ifM check
|
||||
( finishVerifyKeyContentIncrementally iv
|
||||
, do
|
||||
|
@ -864,24 +864,25 @@ mkFileCopier remotewanthardlink (State _ _ copycowtried _ _) = do
|
|||
- This returns False when the repository UUID is not as expected. -}
|
||||
type DeferredUUIDCheck = Annex Bool
|
||||
|
||||
data State = State Ssh.P2PShellConnectionPool DeferredUUIDCheck CopyCoWTried (Annex (Git.Repo, GitConfig)) LocalRemoteAnnex
|
||||
data State = State Ssh.P2PShellConnectionPool DeferredUUIDCheck CopyCoWTried FastCopy (Annex (Git.Repo, GitConfig)) LocalRemoteAnnex
|
||||
|
||||
getRepoFromState :: State -> Annex Git.Repo
|
||||
getRepoFromState (State _ _ _ a _) = fst <$> a
|
||||
getRepoFromState (State _ _ _ _ a _) = fst <$> a
|
||||
|
||||
#ifndef mingw32_HOST_OS
|
||||
{- The config of the remote git repository, cached for speed. -}
|
||||
getGitConfigFromState :: State -> Annex GitConfig
|
||||
getGitConfigFromState (State _ _ _ a _) = snd <$> a
|
||||
getGitConfigFromState (State _ _ _ _ a _) = snd <$> a
|
||||
#endif
|
||||
|
||||
mkState :: Git.Repo -> UUID -> RemoteGitConfig -> Annex State
|
||||
mkState r u gc = do
|
||||
pool <- Ssh.mkP2PShellConnectionPool
|
||||
copycowtried <- liftIO newCopyCoWTried
|
||||
fastcopy <- getFastCopy gc
|
||||
lra <- mkLocalRemoteAnnex r
|
||||
(duc, getrepo) <- go
|
||||
return $ State pool duc copycowtried getrepo lra
|
||||
return $ State pool duc copycowtried fastcopy getrepo lra
|
||||
where
|
||||
go
|
||||
| remoteAnnexCheckUUID gc = return
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
{- git-annex configuration
|
||||
-
|
||||
- Copyright 2012-2024 Joey Hess <id@joeyh.name>
|
||||
- Copyright 2012-2025 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
@ -135,6 +135,7 @@ data GitConfig = GitConfig
|
|||
, annexDifferences :: Differences
|
||||
, annexUsedRefSpec :: Maybe RefSpec
|
||||
, annexVerify :: Bool
|
||||
, annexFastCopy :: Bool
|
||||
, annexPidLock :: Bool
|
||||
, annexPidLockTimeout :: Seconds
|
||||
, annexDbDir :: Maybe OsPath
|
||||
|
@ -241,6 +242,7 @@ extractGitConfig configsource r = GitConfig
|
|||
, annexUsedRefSpec = either (const Nothing) Just . parseRefSpec
|
||||
=<< getmaybe (annexConfig "used-refspec")
|
||||
, annexVerify = getbool (annexConfig "verify") True
|
||||
, annexFastCopy = getbool (annexConfig "fastcopy") False
|
||||
, annexPidLock = getbool (annexConfig "pidlock") False
|
||||
, annexPidLockTimeout = Seconds $ fromMaybe 300 $
|
||||
getmayberead (annexConfig "pidlocktimeout")
|
||||
|
@ -387,6 +389,7 @@ data RemoteGitConfig = RemoteGitConfig
|
|||
, remoteAnnexPush :: Bool
|
||||
, remoteAnnexReadOnly :: Bool
|
||||
, remoteAnnexVerify :: Bool
|
||||
, remoteAnnexFastCopy :: Bool
|
||||
, remoteAnnexCheckUUID :: Bool
|
||||
, remoteAnnexTrackingBranch :: Maybe Git.Ref
|
||||
, remoteAnnexTrustLevel :: Maybe String
|
||||
|
@ -466,6 +469,7 @@ extractRemoteGitConfig r remotename = do
|
|||
, remoteAnnexReadOnly = getbool ReadOnlyField False
|
||||
, remoteAnnexCheckUUID = getbool CheckUUIDField True
|
||||
, remoteAnnexVerify = getbool VerifyField True
|
||||
, remoteAnnexFastCopy = getbool FastCopyField False
|
||||
, remoteAnnexTrackingBranch = Git.Ref . encodeBS <$>
|
||||
( notempty (getmaybe TrackingBranchField)
|
||||
<|> notempty (getmaybe ExportTrackingField) -- old name
|
||||
|
@ -574,6 +578,7 @@ data RemoteGitConfigField
|
|||
| ReadOnlyField
|
||||
| CheckUUIDField
|
||||
| VerifyField
|
||||
| FastCopyField
|
||||
| TrackingBranchField
|
||||
| ExportTrackingField
|
||||
| TrustLevelField
|
||||
|
@ -644,6 +649,7 @@ remoteGitConfigField = \case
|
|||
ReadOnlyField -> inherited True "readonly"
|
||||
CheckUUIDField -> uninherited True "checkuuid"
|
||||
VerifyField -> inherited True "verify"
|
||||
FastCopyField -> inherited True "fastcopy"
|
||||
TrackingBranchField -> uninherited True "tracking-branch"
|
||||
ExportTrackingField -> uninherited True "export-tracking"
|
||||
TrustLevelField -> uninherited True "trustlevel"
|
||||
|
|
|
@ -43,7 +43,12 @@ copyMetaDataParams meta = map snd $ filter fst
|
|||
|
||||
{- The cp command is used, because I hate reinventing the wheel,
|
||||
- and because this allows easy access to features like cp --reflink
|
||||
- and preserving metadata. -}
|
||||
- and preserving metadata.
|
||||
-
|
||||
- This uses --reflink=auto when supported, which allows for fast copies
|
||||
- using reflinks or the copy_file_range syscall. Whatever cp thinks is
|
||||
- best. --reflink=auto is the default of recent versions of cp, but is
|
||||
- used explicitly to support older versions. -}
|
||||
copyFileExternal :: CopyMetaData -> OsPath -> OsPath -> IO Bool
|
||||
copyFileExternal meta src dest = do
|
||||
-- Delete any existing dest file because an unwritable file
|
||||
|
@ -81,8 +86,8 @@ copyCoW meta src dest
|
|||
| otherwise = return False
|
||||
where
|
||||
-- Note that in coreutils 9.0, cp uses CoW by default,
|
||||
-- without needing an option. This s only needed to support
|
||||
-- older versions.
|
||||
-- without needing an option. But, this makes it fail if it is
|
||||
-- unable to make a CoW copy.
|
||||
params = Param "--reflink=always" : copyMetaDataParams meta
|
||||
|
||||
{- Create a hard link if the filesystem allows it, and fall back to copying
|
||||
|
|
|
@ -1596,6 +1596,25 @@ Remotes are configured using these settings in `.git/config`.
|
|||
in some edge cases, where it's likely the case than an
|
||||
object was downloaded incorrectly, or when needed for security.
|
||||
|
||||
* `remote.<name>.annex-fastcopy`, `annex.fastcopy`
|
||||
|
||||
Set to "true" to optimize copying files to/from a remote.
|
||||
|
||||
Setting this allows `cp` to use the `copy_file_range` system call. (When
|
||||
the `cp` command supports it.) The benefit depends on the filesystem.
|
||||
For example, on NFS, this can allow a server-side copy to be done. On
|
||||
ext4, this avoids some memory transfer overhead, but the file still
|
||||
needs to be copied. There is no benefit on btrfs, since git-annex does
|
||||
the equivilantly fast CoW reflinking on btrfs by default.
|
||||
|
||||
This will do little to improve copy speed when git-annex needs to read
|
||||
the copied file in order to verify it. So for maximum speed, as well
|
||||
as setting this option, set `remote.<name>.annex-verify` or `annex.verify`
|
||||
to false.
|
||||
|
||||
Currently, setting this prevents resuming interrupted copies where
|
||||
they left off. An interrupted copy will be re-run from the beginning.
|
||||
|
||||
* `remote.<name>.annex-tracking-branch`
|
||||
|
||||
This is for use with special remotes that support exports and imports.
|
||||
|
|
|
@ -15,3 +15,5 @@ P.S.: Didn't want to call this a bug, mostly b/c the "real bug" isn't in annex e
|
|||
|
||||
[[!meta author=ben]]
|
||||
[[!tag projects/INM7]]
|
||||
|
||||
> [[done]] --[[Joey]]
|
||||
|
|
|
@ -9,14 +9,11 @@ or EXDEV when not supported. Then git-annex could use `cp --reflink=always`
|
|||
as a fallback.
|
||||
|
||||
However, `copy_file_range` is not necessarily inexpensive. Depending on the
|
||||
filesystem it can still need to read and write the whole file. And, rather
|
||||
than a single syscall copying the whole file, git-annex would need to call
|
||||
it repeatedly in chunks in order to display a progress bar. But, making a
|
||||
lot of syscalls against a NFS filesystem would be its own overhead.
|
||||
|
||||
So there seems to be a tradeoff between progress display and efficiency on
|
||||
NFS. And if the goal is to maximize speed for NFS with server-side copy,
|
||||
maybe progress bars are not important enough to have in that case?
|
||||
filesystem it can still need to read and write the whole file. So when
|
||||
using it, git-annex would need to poll the size of the file in order to
|
||||
update the progress bar. Or it could call the syscall repeatedly on
|
||||
chunks of the file, but on eg NFS that would add a lot of syscalls, so
|
||||
probably more overhead.
|
||||
|
||||
Also, it seems likely to me that you would certainly want to turn off
|
||||
annex.verify along with using `copy_file_range`, which is already a manual
|
||||
|
|
|
@ -0,0 +1,21 @@
|
|||
[[!comment format=mdwn
|
||||
username="joey"
|
||||
subject="""comment 4"""
|
||||
date="2025-06-03T18:43:21Z"
|
||||
content="""
|
||||
Implemented `remote.<name>.annex-fastcopy` and `annex.fastcopy`
|
||||
config settings.
|
||||
|
||||
These do just run `cp --reflink=auto`. So when a copy was interrupted,
|
||||
it won't resume where it left off. It would be possible to improve that,
|
||||
by making git-annex use `copy_file_range` itself, starting at the point it
|
||||
was interrupted. At least for keys where isStableKey is True, since
|
||||
git-annex won't be verifying when using this. But for the
|
||||
NFS use case, if the server is doing a server-side CoW copy, it's atomic
|
||||
anyway, so no need to handle resuming.
|
||||
|
||||
Since I don't currently know of a haskell binding to `copy_file_range` and
|
||||
since it's a low-level OS-specific thing, I have punted on handling
|
||||
resuming. This could be revisited, I just don't see the benefit in the
|
||||
current use case, and wanted to start from something relatively simple.
|
||||
"""]]
|
Loading…
Add table
Add a link
Reference in a new issue