git-annex/Utility/CopyFile.hs
Joey Hess 73060eea51
annex.fastcopy
Added annex.fastcopy and remote.name.annex-fastcopy config setting. When
set, this allows the copy_file_range syscall to be used, which can eg allow
for server-side copies on NFS. (For fastest copying, also disable
annex.verify or remote.name.annex-verify.)

This is a simple implementation, that does not handle resuming as well as
it possibly could.

It can be used with both local git remotes (including on NFS), and
directory special remotes. Other types of remotes could in theory also
support it, so I've left the config documented as a general thing.
2025-06-03 15:01:38 -04:00

101 lines
3.4 KiB
Haskell

{- file copying
-
- Copyright 2010-2021 Joey Hess <id@joeyh.name>
-
- License: BSD-2-clause
-}
module Utility.CopyFile (
copyFileExternal,
copyCoW,
createLinkOrCopy,
CopyMetaData(..)
) where
import Common
import qualified BuildInfo
import qualified Utility.RawFilePath as R
data CopyMetaData
-- Copy timestamps when possible, but no other metadata, and
-- when copying a symlink, makes a copy of its content.
= CopyTimeStamps
-- Copy all metadata when possible.
| CopyAllMetaData
deriving (Eq)
copyMetaDataParams :: CopyMetaData -> [CommandParam]
copyMetaDataParams meta = map snd $ filter fst
[ (allmeta && BuildInfo.cp_a, Param "-a")
, (allmeta && BuildInfo.cp_p && not BuildInfo.cp_a
, Param "-p")
, (not allmeta && BuildInfo.cp_preserve_timestamps
, Param "--preserve=timestamps")
-- cp -a may preserve xattrs that have special meaning,
-- eg to NFS, and have even been observed to prevent later
-- changing the permissions of the file. So prevent preserving
-- xattrs.
, (allmeta && BuildInfo.cp_a && BuildInfo.cp_no_preserve_xattr_supported
, Param "--no-preserve=xattr")
]
where
allmeta = meta == CopyAllMetaData
{- The cp command is used, because I hate reinventing the wheel,
- and because this allows easy access to features like cp --reflink
- and preserving metadata.
-
- This uses --reflink=auto when supported, which allows for fast copies
- using reflinks or the copy_file_range syscall. Whatever cp thinks is
- best. --reflink=auto is the default of recent versions of cp, but is
- used explicitly to support older versions. -}
copyFileExternal :: CopyMetaData -> OsPath -> OsPath -> IO Bool
copyFileExternal meta src dest = do
-- Delete any existing dest file because an unwritable file
-- would prevent cp from working.
void $ tryIO $ removeFile dest
boolSystem "cp" $ params ++ [File (fromOsPath src), File (fromOsPath dest)]
where
params
| BuildInfo.cp_reflink_supported =
Param "--reflink=auto" : copyMetaDataParams meta
| otherwise = copyMetaDataParams meta
{- When a filesystem supports CoW (and cp does), uses it to make
- an efficient copy of a file. Otherwise, returns False.
-
- The dest file must not exist yet, or it will fail to make a CoW copy,
- and will return False.
-}
copyCoW :: CopyMetaData -> OsPath -> OsPath -> IO Bool
copyCoW meta src dest
| BuildInfo.cp_reflink_supported = do
-- When CoW is not supported, cp will complain to stderr,
-- so have to discard its stderr.
ok <- catchBoolIO $ withNullHandle $ \nullh ->
let p = (proc "cp" $ toCommand $ params ++ [File (fromOsPath src), File (fromOsPath dest)])
{ std_out = UseHandle nullh
, std_err = UseHandle nullh
}
in withCreateProcess p $ \_ _ _ -> checkSuccessProcess
-- When CoW is not supported, cp creates the destination
-- file but leaves it empty.
unless ok $
void $ tryIO $ removeFile dest
return ok
| otherwise = return False
where
-- Note that in coreutils 9.0, cp uses CoW by default,
-- without needing an option. But, this makes it fail if it is
-- unable to make a CoW copy.
params = Param "--reflink=always" : copyMetaDataParams meta
{- Create a hard link if the filesystem allows it, and fall back to copying
- the file. -}
createLinkOrCopy :: OsPath -> OsPath -> IO Bool
createLinkOrCopy src dest = go `catchIO` const fallback
where
go = do
R.createLink (fromOsPath src) (fromOsPath dest)
return True
fallback = copyFileExternal CopyAllMetaData src dest