From a535eaa176c9be4e6f6dc553db0bd7fd4d207faf Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Sun, 4 Aug 2024 12:18:17 -0400 Subject: [PATCH] rename from annexobjects location on export (When possible, of course it may not be there, or it may get renamed from there for another exported file first. Or the remote may not support renames.) This will avoids redundant uploads. An example case where this is important: Proxying to a exporttree remote, a file is uploaded to it but is not yet in an exported tree. When the exported tree is pushed, the remote needs to be updated by exporting to it. In this case, the proxy doesn't have a copy of the file, so it would need to download it from annexobjects before uploading it to the final location. With this optimisation, it can just rename it. However: If a key is used twice in an exported tree, it seems a proxy will need to download and reupload anyway. Unless a copy operation is added to exporttree remotes.. --- Command/Export.hs | 21 +++++++++++++++++++-- doc/todo/git-annex_proxies.mdwn | 15 --------------- 2 files changed, 19 insertions(+), 17 deletions(-) diff --git a/Command/Export.hs b/Command/Export.hs index d6514d92ae..c0bfe5d5f7 100644 --- a/Command/Export.hs +++ b/Command/Export.hs @@ -294,9 +294,8 @@ startExport r db cvar allfilledvar ti = do performExport :: Remote -> ExportHandle -> Key -> AssociatedFile -> Sha -> ExportLocation -> MVar AllFilled -> CommandPerform performExport r db ek af contentsha loc allfilledvar = do - let storer = storeExport (exportActions r) sent <- tryNonAsync $ if not (isGitShaKey ek) - then ifM (inAnnex ek) + then tryrenameannexobject $ ifM (inAnnex ek) ( notifyTransfer Upload af $ -- alwaysUpload because the same key -- could be used for more than one export @@ -328,6 +327,24 @@ performExport r db ek af contentsha loc allfilledvar = do Left err -> do failedsend throwM err + where + storer = storeExport (exportActions r) + + tryrenameannexobject fallback + | annexObjects (Remote.config r) = do + case renameExport (exportActions r) of + Just renameaction -> do + locs <- loggedLocations ek + gc <- Annex.getGitConfig + let objloc = exportAnnexObjectLocation gc ek + if Remote.uuid r `elem` locs + then tryNonAsync (renameaction ek objloc loc) >>= \case + Right (Just ()) -> return True + Left _err -> fallback + Right Nothing -> fallback + else fallback + Nothing -> fallback + | otherwise = fallback cleanupExport :: Remote -> ExportHandle -> Key -> ExportLocation -> Bool -> CommandCleanup cleanupExport r db ek loc sent = do diff --git a/doc/todo/git-annex_proxies.mdwn b/doc/todo/git-annex_proxies.mdwn index 68ef14b9e8..93730e383c 100644 --- a/doc/todo/git-annex_proxies.mdwn +++ b/doc/todo/git-annex_proxies.mdwn @@ -33,21 +33,6 @@ Planned schedule of work: * Working on `exportreeplus` branch which is groundwork for proxying to exporttree=yes special remotes. -* `git-annex export` when exporting a file should rename it from the - annexobjects location when it's present there. This will avoid redundant - uploads. - - An example case where this is important: Proxying to a exporttree remote, - a file is uploaded to it but is not yet in an exported tree. When the - exported tree is pushed, the remote needs to be updated by exporting to - it. In this case, the proxy doesn't have a copy of the file, so it would - need to download it from annexobjects before uploading it to the final - location. With this optimisation, it can just rename it. - - However: If a key is used twice in an exported tree, it seems a proxy - will need to download and reupload anyway. Unless a copy operation is - added to exporttree remotes.. - ## items deferred until later for p2p protocol over http * `git-annex p2phttp` should support serving several repositories at the same