diff --git a/Annex/CopyFile.hs b/Annex/CopyFile.hs index 83bc55e42a..9c9baf2e4f 100644 --- a/Annex/CopyFile.hs +++ b/Annex/CopyFile.hs @@ -1,6 +1,6 @@ {- Copying files. - - - Copyright 2011-2022 Joey Hess + - Copyright 2011-2025 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -10,6 +10,7 @@ module Annex.CopyFile where import Annex.Common +import qualified Annex import Utility.Metered import Utility.CopyFile import Utility.FileMode @@ -77,6 +78,23 @@ tryCopyCoW (CopyCoWTried copycowtried) src dest meterupdate = data CopyMethod = CopiedCoW | Copied +-- Should cp be allowed to copy the file with --reflink=auto? +-- +-- The benefit is that this lets it use the copy_file_range +-- syscall, which is not used with --reflink=always. The drawback is that +-- the IncrementalVerifier is not updated, so verification, if it is done, +-- will need to re-read the whole content of the file. And, interrupted +-- copies are not resumed but are restarted from the beginning. +-- +-- Using this will result in CopiedCow being returned even in cases +-- where cp fell back to a slow copy. +newtype FastCopy = FastCopy Bool + +getFastCopy :: RemoteGitConfig -> Annex FastCopy +getFastCopy gc = case remoteAnnexFastCopy gc of + False -> FastCopy . annexFastCopy <$> Annex.getGitConfig + True -> return (FastCopy True) + {- Copies from src to dest, updating a meter. Preserves mode and mtime. - Uses copy-on-write if it is supported. If the the destination already - exists, an interrupted copy will resume where it left off. @@ -94,38 +112,49 @@ data CopyMethod = CopiedCoW | Copied - (eg when isStableKey is false), and doing this avoids getting a - corrupted file in such cases. -} -fileCopier :: CopyCoWTried -> OsPath -> OsPath -> MeterUpdate -> Maybe IncrementalVerifier -> IO CopyMethod +fileCopier :: CopyCoWTried -> FastCopy -> OsPath -> OsPath -> MeterUpdate -> Maybe IncrementalVerifier -> IO CopyMethod +fileCopier copycowtried (FastCopy True) src dest meterupdate iv = do + ok <- watchFileSize dest meterupdate $ const $ + copyFileExternal CopyTimeStamps src dest + if ok + then do + maybe noop unableIncrementalVerifier iv + return CopiedCoW + else fileCopier copycowtried (FastCopy False) src dest meterupdate iv #ifdef mingw32_HOST_OS -fileCopier _ src dest meterupdate iv = docopy +fileCopier _ _ src dest meterupdate iv = + fileCopier' src dest meterupdate iv #else -fileCopier copycowtried src dest meterupdate iv = +fileCopier copycowtried _ src dest meterupdate iv = ifM (tryCopyCoW copycowtried src dest meterupdate) ( do maybe noop unableIncrementalVerifier iv return CopiedCoW - , docopy + , fileCopier' src dest meterupdate iv ) #endif - where - docopy = do - -- The file might have had the write bit removed, - -- so make sure we can write to it. - void $ tryIO $ allowWrite dest - F.withBinaryFile src ReadMode $ \hsrc -> - fileContentCopier hsrc dest meterupdate iv +fileCopier' :: OsPath -> OsPath -> MeterUpdate -> Maybe IncrementalVerifier -> IO CopyMethod +fileCopier' src dest meterupdate iv = do + -- The file might have had the write bit removed, + -- so make sure we can write to it. + void $ tryIO $ allowWrite dest + + F.withBinaryFile src ReadMode $ \hsrc -> + fileContentCopier hsrc dest meterupdate iv - -- Copy src mode and mtime. - mode <- fileMode <$> R.getFileStatus (fromOsPath src) - mtime <- utcTimeToPOSIXSeconds <$> getModificationTime src - let dest' = fromOsPath dest - R.setFileMode dest' mode - touch dest' mtime False + -- Copy src mode and mtime. + mode <- fileMode <$> R.getFileStatus (fromOsPath src) + mtime <- utcTimeToPOSIXSeconds <$> getModificationTime src + let dest' = fromOsPath dest + R.setFileMode dest' mode + touch dest' mtime False - return Copied + return Copied {- Copies content from a handle to a destination file. Does not - use copy-on-write, and does not copy file mode and mtime. + - Updates the IncementalVerifier with the content it copies. -} fileContentCopier :: Handle -> OsPath -> MeterUpdate -> Maybe IncrementalVerifier -> IO () fileContentCopier hsrc dest meterupdate iv = diff --git a/CHANGELOG b/CHANGELOG index 9b7ddf6c5e..55ca8b37ef 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -10,6 +10,10 @@ git-annex (10.20250521) UNRELEASED; urgency=medium * map: Improve display of remote names. * Windows: Fix duplicate file bug that could occur when files were supposed to be moved across devices. + * Added annex.fastcopy and remote.name.annex-fastcopy config setting. + When set, this allows the copy_file_range syscall to be used, which + can eg allow for server-side copies on NFS. (For fastest copying, + also disable annex.verify or remote.name.annex-verify.) -- Joey Hess Thu, 22 May 2025 12:43:38 -0400 diff --git a/Remote/Directory.hs b/Remote/Directory.hs index 372a485ba7..5392caafa3 100644 --- a/Remote/Directory.hs +++ b/Remote/Directory.hs @@ -84,11 +84,12 @@ gen r u rc gc rs = do cst <- remoteCost gc c cheapRemoteCost let chunkconfig = getChunkConfig c cow <- liftIO newCopyCoWTried + fastcopy <- getFastCopy gc let ii = IgnoreInodes $ fromMaybe True $ getRemoteConfigValue ignoreinodesField c return $ Just $ specialRemote c - (storeKeyM dir chunkconfig cow) - (retrieveKeyFileM dir chunkconfig cow) + (storeKeyM dir chunkconfig cow fastcopy) + (retrieveKeyFileM dir chunkconfig cow fastcopy) (removeKeyM dir) (checkPresentM dir chunkconfig) Remote @@ -105,8 +106,8 @@ gen r u rc gc rs = do , checkPresent = checkPresentDummy , checkPresentCheap = True , exportActions = ExportActions - { storeExport = storeExportM dir cow - , retrieveExport = retrieveExportM dir cow + { storeExport = storeExportM dir cow fastcopy + , retrieveExport = retrieveExportM dir cow fastcopy , removeExport = removeExportM dir , checkPresentExport = checkPresentExportM dir -- Not needed because removeExportLocation @@ -118,7 +119,7 @@ gen r u rc gc rs = do { listImportableContents = listImportableContentsM ii dir , importKey = Just (importKeyM ii dir) , retrieveExportWithContentIdentifier = retrieveExportWithContentIdentifierM ii dir cow - , storeExportWithContentIdentifier = storeExportWithContentIdentifierM ii dir cow + , storeExportWithContentIdentifier = storeExportWithContentIdentifierM ii dir cow fastcopy , removeExportWithContentIdentifier = removeExportWithContentIdentifierM ii dir -- Not needed because removeExportWithContentIdentifier -- auto-removes empty directories. @@ -189,8 +190,8 @@ storeDir d k = addTrailingPathSeparator $ {- Check if there is enough free disk space in the remote's directory to - store the key. Note that the unencrypted key size is checked. -} -storeKeyM :: OsPath -> ChunkConfig -> CopyCoWTried -> Storer -storeKeyM d chunkconfig cow k c m = +storeKeyM :: OsPath -> ChunkConfig -> CopyCoWTried -> FastCopy -> Storer +storeKeyM d chunkconfig cow fastcopy k c m = ifM (checkDiskSpaceDirectory d k) ( do void $ liftIO $ tryIO $ createDirectoryUnder [d] tmpdir @@ -210,7 +211,7 @@ storeKeyM d chunkconfig cow k c m = in byteStorer go k c m NoChunks -> let go _k src p = liftIO $ do - void $ fileCopier cow src tmpf p Nothing + void $ fileCopier cow fastcopy src tmpf p Nothing finalizeStoreGeneric d tmpdir destdir in fileStorer go k c m _ -> @@ -247,12 +248,12 @@ finalizeStoreGeneric d tmp dest = do mapM_ preventWrite =<< dirContents dest preventWrite dest -retrieveKeyFileM :: OsPath -> ChunkConfig -> CopyCoWTried -> Retriever -retrieveKeyFileM d (LegacyChunks _) _ = Legacy.retrieve locations' d -retrieveKeyFileM d NoChunks cow = fileRetriever' $ \dest k p iv -> do +retrieveKeyFileM :: OsPath -> ChunkConfig -> CopyCoWTried -> FastCopy -> Retriever +retrieveKeyFileM d (LegacyChunks _) _ _ = Legacy.retrieve locations' d +retrieveKeyFileM d NoChunks cow fastcopy = fileRetriever' $ \dest k p iv -> do src <- liftIO $ getLocation d k - void $ liftIO $ fileCopier cow src dest p iv -retrieveKeyFileM d _ _ = byteRetriever $ \k sink -> + void $ liftIO $ fileCopier cow fastcopy src dest p iv +retrieveKeyFileM d _ _ _ = byteRetriever $ \k sink -> sink =<< liftIO (F.readFile =<< getLocation d k) retrieveKeyFileCheapM :: OsPath -> ChunkConfig -> Maybe (Key -> AssociatedFile -> OsPath -> Annex ()) @@ -327,8 +328,8 @@ checkPresentGeneric' d check = ifM check ) ) -storeExportM :: OsPath -> CopyCoWTried -> OsPath -> Key -> ExportLocation -> MeterUpdate -> Annex () -storeExportM d cow src _k loc p = do +storeExportM :: OsPath -> CopyCoWTried -> FastCopy -> OsPath -> Key -> ExportLocation -> MeterUpdate -> Annex () +storeExportM d cow fastcopy src _k loc p = do liftIO $ createDirectoryUnder [d] (takeDirectory dest) -- Write via temp file so that checkPresentGeneric will not -- see it until it's fully stored. @@ -336,12 +337,12 @@ storeExportM d cow src _k loc p = do where dest = exportPath d loc go tmp () = void $ liftIO $ - fileCopier cow src tmp p Nothing + fileCopier cow fastcopy src tmp p Nothing -retrieveExportM :: OsPath -> CopyCoWTried -> Key -> ExportLocation -> OsPath -> MeterUpdate -> Annex Verification -retrieveExportM d cow k loc dest p = +retrieveExportM :: OsPath -> CopyCoWTried -> FastCopy -> Key -> ExportLocation -> OsPath -> MeterUpdate -> Annex Verification +retrieveExportM d cow fastcopy k loc dest p = verifyKeyContentIncrementally AlwaysVerify k $ \iv -> - void $ liftIO $ fileCopier cow src dest p iv + void $ liftIO $ fileCopier cow fastcopy src dest p iv where src = exportPath d loc @@ -533,13 +534,13 @@ retrieveExportWithContentIdentifierM ii dir cow loc cids dest gk p = =<< R.getSymbolicLinkStatus f' guardSameContentIdentifiers cont cids currcid -storeExportWithContentIdentifierM :: IgnoreInodes -> OsPath -> CopyCoWTried -> OsPath -> Key -> ExportLocation -> [ContentIdentifier] -> MeterUpdate -> Annex ContentIdentifier -storeExportWithContentIdentifierM ii dir cow src _k loc overwritablecids p = do +storeExportWithContentIdentifierM :: IgnoreInodes -> OsPath -> CopyCoWTried -> FastCopy -> OsPath -> Key -> ExportLocation -> [ContentIdentifier] -> MeterUpdate -> Annex ContentIdentifier +storeExportWithContentIdentifierM ii dir cow fastcopy src _k loc overwritablecids p = do liftIO $ createDirectoryUnder [dir] destdir withTmpFileIn destdir template $ \tmpf tmph -> do let tmpf' = fromOsPath tmpf liftIO $ hClose tmph - void $ liftIO $ fileCopier cow src tmpf p Nothing + void $ liftIO $ fileCopier cow fastcopy src tmpf p Nothing resetAnnexFilePerm tmpf liftIO (R.getSymbolicLinkStatus tmpf') >>= liftIO . mkContentIdentifier ii tmpf >>= \case Nothing -> giveup "unable to generate content identifier" diff --git a/Remote/Git.hs b/Remote/Git.hs index 1f8a02e7da..f4b5fccf05 100644 --- a/Remote/Git.hs +++ b/Remote/Git.hs @@ -440,7 +440,7 @@ inAnnex rmt st key = do inAnnex' repo rmt st key inAnnex' :: Git.Repo -> Remote -> State -> Key -> Annex Bool -inAnnex' repo rmt st@(State connpool duc _ _ _) key +inAnnex' repo rmt st@(State connpool duc _ _ _ _) key | isP2PHttp rmt = checkp2phttp | Git.repoIsHttp repo = checkhttp | Git.repoIsUrl repo = checkremote @@ -482,7 +482,7 @@ dropKey r st proof key = do dropKey' repo r st proof key dropKey' :: Git.Repo -> Remote -> State -> Maybe SafeDropProof -> Key -> Annex () -dropKey' repo r st@(State connpool duc _ _ _) proof key +dropKey' repo r st@(State connpool duc _ _ _ _) proof key | isP2PHttp r = clientRemoveWithProof proof key unabletoremove r >>= \case RemoveResultPlus True fanoutuuids -> @@ -531,7 +531,7 @@ lockKey r st key callback = do lockKey' repo r st key callback lockKey' :: Git.Repo -> Remote -> State -> Key -> (VerifiedCopy -> Annex r) -> Annex r -lockKey' repo r st@(State connpool duc _ _ _) key callback +lockKey' repo r st@(State connpool duc _ _ _ _) key callback | isP2PHttp r = do showLocking r p2pHttpClient r giveup (clientLockContent key) >>= \case @@ -566,7 +566,7 @@ copyFromRemote r st key file dest meterupdate vc = do copyFromRemote'' repo r st key file dest meterupdate vc copyFromRemote'' :: Git.Repo -> Remote -> State -> Key -> AssociatedFile -> OsPath -> MeterUpdate -> VerifyConfig -> Annex Verification -copyFromRemote'' repo r st@(State connpool _ _ _ _) key af dest meterupdate vc +copyFromRemote'' repo r st@(State connpool _ _ _ _ _) key af dest meterupdate vc | isP2PHttp r = copyp2phttp | Git.repoIsHttp repo = verifyKeyContentIncrementally vc key $ \iv -> do gc <- Annex.getGitConfig @@ -642,7 +642,7 @@ copyToRemote r st key af o meterupdate = do copyToRemote' repo r st key af o meterupdate copyToRemote' :: Git.Repo -> Remote -> State -> Key -> AssociatedFile -> Maybe OsPath -> MeterUpdate -> Annex () -copyToRemote' repo r st@(State connpool duc _ _ _) key af o meterupdate +copyToRemote' repo r st@(State connpool duc _ _ _ _) key af o meterupdate | isP2PHttp r = prepsendwith copyp2phttp | not $ Git.repoIsUrl repo = ifM duc ( guardUsable repo (giveup "cannot access remote") $ commitOnCleanup repo r st $ @@ -753,7 +753,7 @@ mkLocalRemoteAnnex repo = LocalRemoteAnnex repo <$> liftIO (newMVar []) - when possible. -} onLocal :: State -> Annex a -> Annex a -onLocal (State _ _ _ _ lra) = onLocal' lra +onLocal (State _ _ _ _ _ lra) = onLocal' lra onLocalRepo :: Git.Repo -> Annex a -> Annex a onLocalRepo repo a = do @@ -830,7 +830,7 @@ type FileCopier = OsPath -> OsPath -> Key -> MeterUpdate -> Annex Bool -> Verify -- done. Also returns Verified if the key's content is verified while -- copying it. mkFileCopier :: Bool -> State -> Annex FileCopier -mkFileCopier remotewanthardlink (State _ _ copycowtried _ _) = do +mkFileCopier remotewanthardlink (State _ _ copycowtried fastcopy _ _) = do localwanthardlink <- wantHardLink let linker = \src dest -> R.createLink (fromOsPath src) (fromOsPath dest) >> return True if remotewanthardlink || localwanthardlink @@ -848,7 +848,7 @@ mkFileCopier remotewanthardlink (State _ _ copycowtried _ _) = do where copier src dest k p check verifyconfig = do iv <- startVerifyKeyContentIncrementally verifyconfig k - liftIO (fileCopier copycowtried src dest p iv) >>= \case + liftIO (fileCopier copycowtried fastcopy src dest p iv) >>= \case Copied -> ifM check ( finishVerifyKeyContentIncrementally iv , do @@ -864,24 +864,25 @@ mkFileCopier remotewanthardlink (State _ _ copycowtried _ _) = do - This returns False when the repository UUID is not as expected. -} type DeferredUUIDCheck = Annex Bool -data State = State Ssh.P2PShellConnectionPool DeferredUUIDCheck CopyCoWTried (Annex (Git.Repo, GitConfig)) LocalRemoteAnnex +data State = State Ssh.P2PShellConnectionPool DeferredUUIDCheck CopyCoWTried FastCopy (Annex (Git.Repo, GitConfig)) LocalRemoteAnnex getRepoFromState :: State -> Annex Git.Repo -getRepoFromState (State _ _ _ a _) = fst <$> a +getRepoFromState (State _ _ _ _ a _) = fst <$> a #ifndef mingw32_HOST_OS {- The config of the remote git repository, cached for speed. -} getGitConfigFromState :: State -> Annex GitConfig -getGitConfigFromState (State _ _ _ a _) = snd <$> a +getGitConfigFromState (State _ _ _ _ a _) = snd <$> a #endif mkState :: Git.Repo -> UUID -> RemoteGitConfig -> Annex State mkState r u gc = do pool <- Ssh.mkP2PShellConnectionPool copycowtried <- liftIO newCopyCoWTried + fastcopy <- getFastCopy gc lra <- mkLocalRemoteAnnex r (duc, getrepo) <- go - return $ State pool duc copycowtried getrepo lra + return $ State pool duc copycowtried fastcopy getrepo lra where go | remoteAnnexCheckUUID gc = return diff --git a/Types/GitConfig.hs b/Types/GitConfig.hs index bc8cd4c1e7..35b07a50a3 100644 --- a/Types/GitConfig.hs +++ b/Types/GitConfig.hs @@ -1,6 +1,6 @@ {- git-annex configuration - - - Copyright 2012-2024 Joey Hess + - Copyright 2012-2025 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -135,6 +135,7 @@ data GitConfig = GitConfig , annexDifferences :: Differences , annexUsedRefSpec :: Maybe RefSpec , annexVerify :: Bool + , annexFastCopy :: Bool , annexPidLock :: Bool , annexPidLockTimeout :: Seconds , annexDbDir :: Maybe OsPath @@ -241,6 +242,7 @@ extractGitConfig configsource r = GitConfig , annexUsedRefSpec = either (const Nothing) Just . parseRefSpec =<< getmaybe (annexConfig "used-refspec") , annexVerify = getbool (annexConfig "verify") True + , annexFastCopy = getbool (annexConfig "fastcopy") False , annexPidLock = getbool (annexConfig "pidlock") False , annexPidLockTimeout = Seconds $ fromMaybe 300 $ getmayberead (annexConfig "pidlocktimeout") @@ -387,6 +389,7 @@ data RemoteGitConfig = RemoteGitConfig , remoteAnnexPush :: Bool , remoteAnnexReadOnly :: Bool , remoteAnnexVerify :: Bool + , remoteAnnexFastCopy :: Bool , remoteAnnexCheckUUID :: Bool , remoteAnnexTrackingBranch :: Maybe Git.Ref , remoteAnnexTrustLevel :: Maybe String @@ -466,6 +469,7 @@ extractRemoteGitConfig r remotename = do , remoteAnnexReadOnly = getbool ReadOnlyField False , remoteAnnexCheckUUID = getbool CheckUUIDField True , remoteAnnexVerify = getbool VerifyField True + , remoteAnnexFastCopy = getbool FastCopyField False , remoteAnnexTrackingBranch = Git.Ref . encodeBS <$> ( notempty (getmaybe TrackingBranchField) <|> notempty (getmaybe ExportTrackingField) -- old name @@ -574,6 +578,7 @@ data RemoteGitConfigField | ReadOnlyField | CheckUUIDField | VerifyField + | FastCopyField | TrackingBranchField | ExportTrackingField | TrustLevelField @@ -644,6 +649,7 @@ remoteGitConfigField = \case ReadOnlyField -> inherited True "readonly" CheckUUIDField -> uninherited True "checkuuid" VerifyField -> inherited True "verify" + FastCopyField -> inherited True "fastcopy" TrackingBranchField -> uninherited True "tracking-branch" ExportTrackingField -> uninherited True "export-tracking" TrustLevelField -> uninherited True "trustlevel" diff --git a/Utility/CopyFile.hs b/Utility/CopyFile.hs index 2a838ff735..4019da3a48 100644 --- a/Utility/CopyFile.hs +++ b/Utility/CopyFile.hs @@ -43,7 +43,12 @@ copyMetaDataParams meta = map snd $ filter fst {- The cp command is used, because I hate reinventing the wheel, - and because this allows easy access to features like cp --reflink - - and preserving metadata. -} + - and preserving metadata. + - + - This uses --reflink=auto when supported, which allows for fast copies + - using reflinks or the copy_file_range syscall. Whatever cp thinks is + - best. --reflink=auto is the default of recent versions of cp, but is + - used explicitly to support older versions. -} copyFileExternal :: CopyMetaData -> OsPath -> OsPath -> IO Bool copyFileExternal meta src dest = do -- Delete any existing dest file because an unwritable file @@ -81,8 +86,8 @@ copyCoW meta src dest | otherwise = return False where -- Note that in coreutils 9.0, cp uses CoW by default, - -- without needing an option. This s only needed to support - -- older versions. + -- without needing an option. But, this makes it fail if it is + -- unable to make a CoW copy. params = Param "--reflink=always" : copyMetaDataParams meta {- Create a hard link if the filesystem allows it, and fall back to copying diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index c3b1ea0745..32be6281d0 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -1596,6 +1596,25 @@ Remotes are configured using these settings in `.git/config`. in some edge cases, where it's likely the case than an object was downloaded incorrectly, or when needed for security. +* `remote..annex-fastcopy`, `annex.fastcopy` + + Set to "true" to optimize copying files to/from a remote. + + Setting this allows `cp` to use the `copy_file_range` system call. (When + the `cp` command supports it.) The benefit depends on the filesystem. + For example, on NFS, this can allow a server-side copy to be done. On + ext4, this avoids some memory transfer overhead, but the file still + needs to be copied. There is no benefit on btrfs, since git-annex does + the equivilantly fast CoW reflinking on btrfs by default. + + This will do little to improve copy speed when git-annex needs to read + the copied file in order to verify it. So for maximum speed, as well + as setting this option, set `remote..annex-verify` or `annex.verify` + to false. + + Currently, setting this prevents resuming interrupted copies where + they left off. An interrupted copy will be re-run from the beginning. + * `remote..annex-tracking-branch` This is for use with special remotes that support exports and imports. @@ -1790,7 +1809,7 @@ Remotes are configured using these settings in `.git/config`. content of any file, even though its normal location tracking does not indicate that it does. This will cause git-annex to try to get all file contents from the remote. Can be useful in setting up a caching remote. - + * `remote..annex-proxy` Set to "true" to make the local repository able to act as a proxy to this diff --git a/doc/todo/use_copy__95__file__95__range_for_get_and_copy.mdwn b/doc/todo/use_copy__95__file__95__range_for_get_and_copy.mdwn index 72bb84fb96..4ced4f6bb5 100644 --- a/doc/todo/use_copy__95__file__95__range_for_get_and_copy.mdwn +++ b/doc/todo/use_copy__95__file__95__range_for_get_and_copy.mdwn @@ -15,3 +15,5 @@ P.S.: Didn't want to call this a bug, mostly b/c the "real bug" isn't in annex e [[!meta author=ben]] [[!tag projects/INM7]] + +> [[done]] --[[Joey]] diff --git a/doc/todo/use_copy__95__file__95__range_for_get_and_copy/comment_3_04c577bff624af761d997e5f9a8d951d._comment b/doc/todo/use_copy__95__file__95__range_for_get_and_copy/comment_3_04c577bff624af761d997e5f9a8d951d._comment index db3be71305..4a954f34ea 100644 --- a/doc/todo/use_copy__95__file__95__range_for_get_and_copy/comment_3_04c577bff624af761d997e5f9a8d951d._comment +++ b/doc/todo/use_copy__95__file__95__range_for_get_and_copy/comment_3_04c577bff624af761d997e5f9a8d951d._comment @@ -9,14 +9,11 @@ or EXDEV when not supported. Then git-annex could use `cp --reflink=always` as a fallback. However, `copy_file_range` is not necessarily inexpensive. Depending on the -filesystem it can still need to read and write the whole file. And, rather -than a single syscall copying the whole file, git-annex would need to call -it repeatedly in chunks in order to display a progress bar. But, making a -lot of syscalls against a NFS filesystem would be its own overhead. - -So there seems to be a tradeoff between progress display and efficiency on -NFS. And if the goal is to maximize speed for NFS with server-side copy, -maybe progress bars are not important enough to have in that case? +filesystem it can still need to read and write the whole file. So when +using it, git-annex would need to poll the size of the file in order to +update the progress bar. Or it could call the syscall repeatedly on +chunks of the file, but on eg NFS that would add a lot of syscalls, so +probably more overhead. Also, it seems likely to me that you would certainly want to turn off annex.verify along with using `copy_file_range`, which is already a manual diff --git a/doc/todo/use_copy__95__file__95__range_for_get_and_copy/comment_4_e48e458e7b0a951820bdb81772d5c56e._comment b/doc/todo/use_copy__95__file__95__range_for_get_and_copy/comment_4_e48e458e7b0a951820bdb81772d5c56e._comment new file mode 100644 index 0000000000..4b93954126 --- /dev/null +++ b/doc/todo/use_copy__95__file__95__range_for_get_and_copy/comment_4_e48e458e7b0a951820bdb81772d5c56e._comment @@ -0,0 +1,21 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 4""" + date="2025-06-03T18:43:21Z" + content=""" +Implemented `remote..annex-fastcopy` and `annex.fastcopy` +config settings. + +These do just run `cp --reflink=auto`. So when a copy was interrupted, +it won't resume where it left off. It would be possible to improve that, +by making git-annex use `copy_file_range` itself, starting at the point it +was interrupted. At least for keys where isStableKey is True, since +git-annex won't be verifying when using this. But for the +NFS use case, if the server is doing a server-side CoW copy, it's atomic +anyway, so no need to handle resuming. + +Since I don't currently know of a haskell binding to `copy_file_range` and +since it's a low-level OS-specific thing, I have punted on handling +resuming. This could be revisited, I just don't see the benefit in the +current use case, and wanted to start from something relatively simple. +"""]]