annex.fastcopy

Added annex.fastcopy and remote.name.annex-fastcopy config setting. When
set, this allows the copy_file_range syscall to be used, which can eg allow
for server-side copies on NFS. (For fastest copying, also disable
annex.verify or remote.name.annex-verify.)

This is a simple implementation, that does not handle resuming as well as
it possibly could.

It can be used with both local git remotes (including on NFS), and
directory special remotes. Other types of remotes could in theory also
support it, so I've left the config documented as a general thing.
This commit is contained in:
Joey Hess 2025-06-03 15:01:38 -04:00
parent 6468a39e92
commit 73060eea51
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
10 changed files with 151 additions and 66 deletions

View file

@ -84,11 +84,12 @@ gen r u rc gc rs = do
cst <- remoteCost gc c cheapRemoteCost
let chunkconfig = getChunkConfig c
cow <- liftIO newCopyCoWTried
fastcopy <- getFastCopy gc
let ii = IgnoreInodes $ fromMaybe True $
getRemoteConfigValue ignoreinodesField c
return $ Just $ specialRemote c
(storeKeyM dir chunkconfig cow)
(retrieveKeyFileM dir chunkconfig cow)
(storeKeyM dir chunkconfig cow fastcopy)
(retrieveKeyFileM dir chunkconfig cow fastcopy)
(removeKeyM dir)
(checkPresentM dir chunkconfig)
Remote
@ -105,8 +106,8 @@ gen r u rc gc rs = do
, checkPresent = checkPresentDummy
, checkPresentCheap = True
, exportActions = ExportActions
{ storeExport = storeExportM dir cow
, retrieveExport = retrieveExportM dir cow
{ storeExport = storeExportM dir cow fastcopy
, retrieveExport = retrieveExportM dir cow fastcopy
, removeExport = removeExportM dir
, checkPresentExport = checkPresentExportM dir
-- Not needed because removeExportLocation
@ -118,7 +119,7 @@ gen r u rc gc rs = do
{ listImportableContents = listImportableContentsM ii dir
, importKey = Just (importKeyM ii dir)
, retrieveExportWithContentIdentifier = retrieveExportWithContentIdentifierM ii dir cow
, storeExportWithContentIdentifier = storeExportWithContentIdentifierM ii dir cow
, storeExportWithContentIdentifier = storeExportWithContentIdentifierM ii dir cow fastcopy
, removeExportWithContentIdentifier = removeExportWithContentIdentifierM ii dir
-- Not needed because removeExportWithContentIdentifier
-- auto-removes empty directories.
@ -189,8 +190,8 @@ storeDir d k = addTrailingPathSeparator $
{- Check if there is enough free disk space in the remote's directory to
- store the key. Note that the unencrypted key size is checked. -}
storeKeyM :: OsPath -> ChunkConfig -> CopyCoWTried -> Storer
storeKeyM d chunkconfig cow k c m =
storeKeyM :: OsPath -> ChunkConfig -> CopyCoWTried -> FastCopy -> Storer
storeKeyM d chunkconfig cow fastcopy k c m =
ifM (checkDiskSpaceDirectory d k)
( do
void $ liftIO $ tryIO $ createDirectoryUnder [d] tmpdir
@ -210,7 +211,7 @@ storeKeyM d chunkconfig cow k c m =
in byteStorer go k c m
NoChunks ->
let go _k src p = liftIO $ do
void $ fileCopier cow src tmpf p Nothing
void $ fileCopier cow fastcopy src tmpf p Nothing
finalizeStoreGeneric d tmpdir destdir
in fileStorer go k c m
_ ->
@ -247,12 +248,12 @@ finalizeStoreGeneric d tmp dest = do
mapM_ preventWrite =<< dirContents dest
preventWrite dest
retrieveKeyFileM :: OsPath -> ChunkConfig -> CopyCoWTried -> Retriever
retrieveKeyFileM d (LegacyChunks _) _ = Legacy.retrieve locations' d
retrieveKeyFileM d NoChunks cow = fileRetriever' $ \dest k p iv -> do
retrieveKeyFileM :: OsPath -> ChunkConfig -> CopyCoWTried -> FastCopy -> Retriever
retrieveKeyFileM d (LegacyChunks _) _ _ = Legacy.retrieve locations' d
retrieveKeyFileM d NoChunks cow fastcopy = fileRetriever' $ \dest k p iv -> do
src <- liftIO $ getLocation d k
void $ liftIO $ fileCopier cow src dest p iv
retrieveKeyFileM d _ _ = byteRetriever $ \k sink ->
void $ liftIO $ fileCopier cow fastcopy src dest p iv
retrieveKeyFileM d _ _ _ = byteRetriever $ \k sink ->
sink =<< liftIO (F.readFile =<< getLocation d k)
retrieveKeyFileCheapM :: OsPath -> ChunkConfig -> Maybe (Key -> AssociatedFile -> OsPath -> Annex ())
@ -327,8 +328,8 @@ checkPresentGeneric' d check = ifM check
)
)
storeExportM :: OsPath -> CopyCoWTried -> OsPath -> Key -> ExportLocation -> MeterUpdate -> Annex ()
storeExportM d cow src _k loc p = do
storeExportM :: OsPath -> CopyCoWTried -> FastCopy -> OsPath -> Key -> ExportLocation -> MeterUpdate -> Annex ()
storeExportM d cow fastcopy src _k loc p = do
liftIO $ createDirectoryUnder [d] (takeDirectory dest)
-- Write via temp file so that checkPresentGeneric will not
-- see it until it's fully stored.
@ -336,12 +337,12 @@ storeExportM d cow src _k loc p = do
where
dest = exportPath d loc
go tmp () = void $ liftIO $
fileCopier cow src tmp p Nothing
fileCopier cow fastcopy src tmp p Nothing
retrieveExportM :: OsPath -> CopyCoWTried -> Key -> ExportLocation -> OsPath -> MeterUpdate -> Annex Verification
retrieveExportM d cow k loc dest p =
retrieveExportM :: OsPath -> CopyCoWTried -> FastCopy -> Key -> ExportLocation -> OsPath -> MeterUpdate -> Annex Verification
retrieveExportM d cow fastcopy k loc dest p =
verifyKeyContentIncrementally AlwaysVerify k $ \iv ->
void $ liftIO $ fileCopier cow src dest p iv
void $ liftIO $ fileCopier cow fastcopy src dest p iv
where
src = exportPath d loc
@ -533,13 +534,13 @@ retrieveExportWithContentIdentifierM ii dir cow loc cids dest gk p =
=<< R.getSymbolicLinkStatus f'
guardSameContentIdentifiers cont cids currcid
storeExportWithContentIdentifierM :: IgnoreInodes -> OsPath -> CopyCoWTried -> OsPath -> Key -> ExportLocation -> [ContentIdentifier] -> MeterUpdate -> Annex ContentIdentifier
storeExportWithContentIdentifierM ii dir cow src _k loc overwritablecids p = do
storeExportWithContentIdentifierM :: IgnoreInodes -> OsPath -> CopyCoWTried -> FastCopy -> OsPath -> Key -> ExportLocation -> [ContentIdentifier] -> MeterUpdate -> Annex ContentIdentifier
storeExportWithContentIdentifierM ii dir cow fastcopy src _k loc overwritablecids p = do
liftIO $ createDirectoryUnder [dir] destdir
withTmpFileIn destdir template $ \tmpf tmph -> do
let tmpf' = fromOsPath tmpf
liftIO $ hClose tmph
void $ liftIO $ fileCopier cow src tmpf p Nothing
void $ liftIO $ fileCopier cow fastcopy src tmpf p Nothing
resetAnnexFilePerm tmpf
liftIO (R.getSymbolicLinkStatus tmpf') >>= liftIO . mkContentIdentifier ii tmpf >>= \case
Nothing -> giveup "unable to generate content identifier"

View file

@ -440,7 +440,7 @@ inAnnex rmt st key = do
inAnnex' repo rmt st key
inAnnex' :: Git.Repo -> Remote -> State -> Key -> Annex Bool
inAnnex' repo rmt st@(State connpool duc _ _ _) key
inAnnex' repo rmt st@(State connpool duc _ _ _ _) key
| isP2PHttp rmt = checkp2phttp
| Git.repoIsHttp repo = checkhttp
| Git.repoIsUrl repo = checkremote
@ -482,7 +482,7 @@ dropKey r st proof key = do
dropKey' repo r st proof key
dropKey' :: Git.Repo -> Remote -> State -> Maybe SafeDropProof -> Key -> Annex ()
dropKey' repo r st@(State connpool duc _ _ _) proof key
dropKey' repo r st@(State connpool duc _ _ _ _) proof key
| isP2PHttp r =
clientRemoveWithProof proof key unabletoremove r >>= \case
RemoveResultPlus True fanoutuuids ->
@ -531,7 +531,7 @@ lockKey r st key callback = do
lockKey' repo r st key callback
lockKey' :: Git.Repo -> Remote -> State -> Key -> (VerifiedCopy -> Annex r) -> Annex r
lockKey' repo r st@(State connpool duc _ _ _) key callback
lockKey' repo r st@(State connpool duc _ _ _ _) key callback
| isP2PHttp r = do
showLocking r
p2pHttpClient r giveup (clientLockContent key) >>= \case
@ -566,7 +566,7 @@ copyFromRemote r st key file dest meterupdate vc = do
copyFromRemote'' repo r st key file dest meterupdate vc
copyFromRemote'' :: Git.Repo -> Remote -> State -> Key -> AssociatedFile -> OsPath -> MeterUpdate -> VerifyConfig -> Annex Verification
copyFromRemote'' repo r st@(State connpool _ _ _ _) key af dest meterupdate vc
copyFromRemote'' repo r st@(State connpool _ _ _ _ _) key af dest meterupdate vc
| isP2PHttp r = copyp2phttp
| Git.repoIsHttp repo = verifyKeyContentIncrementally vc key $ \iv -> do
gc <- Annex.getGitConfig
@ -642,7 +642,7 @@ copyToRemote r st key af o meterupdate = do
copyToRemote' repo r st key af o meterupdate
copyToRemote' :: Git.Repo -> Remote -> State -> Key -> AssociatedFile -> Maybe OsPath -> MeterUpdate -> Annex ()
copyToRemote' repo r st@(State connpool duc _ _ _) key af o meterupdate
copyToRemote' repo r st@(State connpool duc _ _ _ _) key af o meterupdate
| isP2PHttp r = prepsendwith copyp2phttp
| not $ Git.repoIsUrl repo = ifM duc
( guardUsable repo (giveup "cannot access remote") $ commitOnCleanup repo r st $
@ -753,7 +753,7 @@ mkLocalRemoteAnnex repo = LocalRemoteAnnex repo <$> liftIO (newMVar [])
- when possible.
-}
onLocal :: State -> Annex a -> Annex a
onLocal (State _ _ _ _ lra) = onLocal' lra
onLocal (State _ _ _ _ _ lra) = onLocal' lra
onLocalRepo :: Git.Repo -> Annex a -> Annex a
onLocalRepo repo a = do
@ -830,7 +830,7 @@ type FileCopier = OsPath -> OsPath -> Key -> MeterUpdate -> Annex Bool -> Verify
-- done. Also returns Verified if the key's content is verified while
-- copying it.
mkFileCopier :: Bool -> State -> Annex FileCopier
mkFileCopier remotewanthardlink (State _ _ copycowtried _ _) = do
mkFileCopier remotewanthardlink (State _ _ copycowtried fastcopy _ _) = do
localwanthardlink <- wantHardLink
let linker = \src dest -> R.createLink (fromOsPath src) (fromOsPath dest) >> return True
if remotewanthardlink || localwanthardlink
@ -848,7 +848,7 @@ mkFileCopier remotewanthardlink (State _ _ copycowtried _ _) = do
where
copier src dest k p check verifyconfig = do
iv <- startVerifyKeyContentIncrementally verifyconfig k
liftIO (fileCopier copycowtried src dest p iv) >>= \case
liftIO (fileCopier copycowtried fastcopy src dest p iv) >>= \case
Copied -> ifM check
( finishVerifyKeyContentIncrementally iv
, do
@ -864,24 +864,25 @@ mkFileCopier remotewanthardlink (State _ _ copycowtried _ _) = do
- This returns False when the repository UUID is not as expected. -}
type DeferredUUIDCheck = Annex Bool
data State = State Ssh.P2PShellConnectionPool DeferredUUIDCheck CopyCoWTried (Annex (Git.Repo, GitConfig)) LocalRemoteAnnex
data State = State Ssh.P2PShellConnectionPool DeferredUUIDCheck CopyCoWTried FastCopy (Annex (Git.Repo, GitConfig)) LocalRemoteAnnex
getRepoFromState :: State -> Annex Git.Repo
getRepoFromState (State _ _ _ a _) = fst <$> a
getRepoFromState (State _ _ _ _ a _) = fst <$> a
#ifndef mingw32_HOST_OS
{- The config of the remote git repository, cached for speed. -}
getGitConfigFromState :: State -> Annex GitConfig
getGitConfigFromState (State _ _ _ a _) = snd <$> a
getGitConfigFromState (State _ _ _ _ a _) = snd <$> a
#endif
mkState :: Git.Repo -> UUID -> RemoteGitConfig -> Annex State
mkState r u gc = do
pool <- Ssh.mkP2PShellConnectionPool
copycowtried <- liftIO newCopyCoWTried
fastcopy <- getFastCopy gc
lra <- mkLocalRemoteAnnex r
(duc, getrepo) <- go
return $ State pool duc copycowtried getrepo lra
return $ State pool duc copycowtried fastcopy getrepo lra
where
go
| remoteAnnexCheckUUID gc = return