rename to annexobjects location on unexport

This avoids needing to re-upload the file again to get it to the
annexobjects location, which git-annex sync was doing when it was
preferred content.

If the file is not preferred content, sync will drop it from the
annexobjects location.

If the file has been deleted from the tree, it will remain in the
annexobjects location until an unused/dropunused pass is done.
This commit is contained in:
Joey Hess 2024-08-04 11:58:07 -04:00
parent 6b63449133
commit a3d96474f2
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
4 changed files with 45 additions and 25 deletions

View file

@ -28,6 +28,7 @@ module Annex.Locations (
annexLocationsBare,
annexLocationsNonBare,
annexLocation,
exportAnnexObjectLocation,
gitAnnexDir,
gitAnnexObjectDir,
gitAnnexTmpOtherDir,
@ -122,6 +123,7 @@ import Types.UUID
import Types.GitConfig
import Types.Difference
import Types.BranchState
import Types.Export
import qualified Git
import qualified Git.Types as Git
import Git.FilePath
@ -170,6 +172,13 @@ annexLocationsBare config key =
annexLocation :: GitConfig -> Key -> (HashLevels -> Hasher) -> RawFilePath
annexLocation config key hasher = objectDir P.</> keyPath key (hasher $ objectHashLevels config)
{- For exportree remotes with annexobjects=true, objects are stored
- in this location as well as in the exported tree. -}
exportAnnexObjectLocation :: GitConfig -> Key -> ExportLocation
exportAnnexObjectLocation gc k =
mkExportLocation $
".git" P.</> annexLocation gc k hashDirLower
{- Number of subdirectories from the gitAnnexObjectDir
- to the gitAnnexLocation. -}
gitAnnexLocationDepth :: GitConfig -> Int

View file

@ -152,16 +152,15 @@ changeExport r db (ExportFiltered new) = do
[oldtreesha] -> do
diffmap <- mkDiffMap oldtreesha new db
let seekdiffmap a = mapM_ a (M.toList diffmap)
-- Rename old files to temp, or delete.
let deleteoldf = \ek oldf -> commandAction $
startUnexport' r db oldf ek
let disposeoldf = \ek oldf -> commandAction $
startDispose r db oldf ek
seekdiffmap $ \case
(ek, (oldf:oldfs, _newf:_)) -> do
commandAction $
startMoveToTempName r db oldf ek
forM_ oldfs (deleteoldf ek)
forM_ oldfs (disposeoldf ek)
(ek, (oldfs, [])) ->
forM_ oldfs (deleteoldf ek)
forM_ oldfs (disposeoldf ek)
(_ek, ([], _)) -> noop
waitForAllRunningCommandActions
-- Rename from temp to new files.
@ -350,16 +349,6 @@ startUnexport r db f shas = do
ai = ActionItemTreeFile f'
si = SeekInput []
startUnexport' :: Remote -> ExportHandle -> TopFilePath -> Key -> CommandStart
startUnexport' r db f ek =
starting ("unexport " ++ name r) ai si $
performUnexport r db [ek] loc
where
loc = mkExportLocation f'
f' = getTopFilePath f
ai = ActionItemTreeFile f'
si = SeekInput []
-- Unlike a usual drop from a repository, this does not check that
-- numcopies is satisfied before removing the content. Typically an export
-- remote is untrusted, so would not count as a copy anyway.
@ -401,6 +390,23 @@ cleanupUnexport r db eks loc = do
removeEmptyDirectories r db loc eks
-- Dispose of an old exported file by either unexporting it, or by moving
-- it to the annexobjects location.
startDispose :: Remote -> ExportHandle -> TopFilePath -> Key -> CommandStart
startDispose r db f ek =
starting ("unexport " ++ name r) ai si $
if annexObjects (Remote.config r) && not (isGitShaKey ek)
then do
gc <- Annex.getGitConfig
performRename r db ek loc
(exportAnnexObjectLocation gc ek)
else performUnexport r db [ek] loc
where
loc = mkExportLocation f'
f' = getTopFilePath f
ai = ActionItemTreeFile f'
si = SeekInput []
startRecoverIncomplete :: Remote -> ExportHandle -> Git.Sha -> TopFilePath -> CommandStart
startRecoverIncomplete r db sha oldf
| sha `elem` nullShas = stop

View file

@ -408,10 +408,7 @@ adjustExportImport' isexport isimport annexobjects r rs gc = do
k loc
=<< getkeycids ciddbv k
-- For annexobjects=true, objects are stored in the remote
-- in a location under .git/objects/
annexobjectlocation k = mkExportLocation $
".git" P.</> annexLocation gc k hashDirLower
annexobjectlocation k = exportAnnexObjectLocation gc k
checkpresentannexobject k =
checkPresentExport (exportActions r) k (annexobjectlocation k)

View file

@ -33,12 +33,20 @@ Planned schedule of work:
* Working on `exportreeplus` branch which is groundwork for proxying to
exporttree=yes special remotes.
* `git-annex export` when unexporting a deleted file from the tree should
rename it to the annexobjects location. This would avoid needing to
re-upload it again in the case where it's preferred content of the
remote. Currently eg, a sync will unexport the file and then re-upload
it. If it's not preferred content, sync will drop it from the
annexobjects location.
* `git-annex export` when exporting a file should rename it from the
annexobjects location when it's present there. This will avoid redundant
uploads.
An example case where this is important: Proxying to a exporttree remote,
a file is uploaded to it but is not yet in an exported tree. When the
exported tree is pushed, the remote needs to be updated by exporting to
it. In this case, the proxy doesn't have a copy of the file, so it would
need to download it from annexobjects before uploading it to the final
location. With this optimisation, it can just rename it.
However: If a key is used twice in an exported tree, it seems a proxy
will need to download and reupload anyway. Unless a copy operation is
added to exporttree remotes..
## items deferred until later for p2p protocol over http