rename to annexobjects location on unexport

This avoids needing to re-upload the file again to get it to the
annexobjects location, which git-annex sync was doing when it was
preferred content.

If the file is not preferred content, sync will drop it from the
annexobjects location.

If the file has been deleted from the tree, it will remain in the
annexobjects location until an unused/dropunused pass is done.
This commit is contained in:
Joey Hess 2024-08-04 11:58:07 -04:00
parent 6b63449133
commit a3d96474f2
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
4 changed files with 45 additions and 25 deletions

View file

@ -28,6 +28,7 @@ module Annex.Locations (
annexLocationsBare, annexLocationsBare,
annexLocationsNonBare, annexLocationsNonBare,
annexLocation, annexLocation,
exportAnnexObjectLocation,
gitAnnexDir, gitAnnexDir,
gitAnnexObjectDir, gitAnnexObjectDir,
gitAnnexTmpOtherDir, gitAnnexTmpOtherDir,
@ -122,6 +123,7 @@ import Types.UUID
import Types.GitConfig import Types.GitConfig
import Types.Difference import Types.Difference
import Types.BranchState import Types.BranchState
import Types.Export
import qualified Git import qualified Git
import qualified Git.Types as Git import qualified Git.Types as Git
import Git.FilePath import Git.FilePath
@ -170,6 +172,13 @@ annexLocationsBare config key =
annexLocation :: GitConfig -> Key -> (HashLevels -> Hasher) -> RawFilePath annexLocation :: GitConfig -> Key -> (HashLevels -> Hasher) -> RawFilePath
annexLocation config key hasher = objectDir P.</> keyPath key (hasher $ objectHashLevels config) annexLocation config key hasher = objectDir P.</> keyPath key (hasher $ objectHashLevels config)
{- For exportree remotes with annexobjects=true, objects are stored
- in this location as well as in the exported tree. -}
exportAnnexObjectLocation :: GitConfig -> Key -> ExportLocation
exportAnnexObjectLocation gc k =
mkExportLocation $
".git" P.</> annexLocation gc k hashDirLower
{- Number of subdirectories from the gitAnnexObjectDir {- Number of subdirectories from the gitAnnexObjectDir
- to the gitAnnexLocation. -} - to the gitAnnexLocation. -}
gitAnnexLocationDepth :: GitConfig -> Int gitAnnexLocationDepth :: GitConfig -> Int

View file

@ -152,16 +152,15 @@ changeExport r db (ExportFiltered new) = do
[oldtreesha] -> do [oldtreesha] -> do
diffmap <- mkDiffMap oldtreesha new db diffmap <- mkDiffMap oldtreesha new db
let seekdiffmap a = mapM_ a (M.toList diffmap) let seekdiffmap a = mapM_ a (M.toList diffmap)
-- Rename old files to temp, or delete. let disposeoldf = \ek oldf -> commandAction $
let deleteoldf = \ek oldf -> commandAction $ startDispose r db oldf ek
startUnexport' r db oldf ek
seekdiffmap $ \case seekdiffmap $ \case
(ek, (oldf:oldfs, _newf:_)) -> do (ek, (oldf:oldfs, _newf:_)) -> do
commandAction $ commandAction $
startMoveToTempName r db oldf ek startMoveToTempName r db oldf ek
forM_ oldfs (deleteoldf ek) forM_ oldfs (disposeoldf ek)
(ek, (oldfs, [])) -> (ek, (oldfs, [])) ->
forM_ oldfs (deleteoldf ek) forM_ oldfs (disposeoldf ek)
(_ek, ([], _)) -> noop (_ek, ([], _)) -> noop
waitForAllRunningCommandActions waitForAllRunningCommandActions
-- Rename from temp to new files. -- Rename from temp to new files.
@ -350,16 +349,6 @@ startUnexport r db f shas = do
ai = ActionItemTreeFile f' ai = ActionItemTreeFile f'
si = SeekInput [] si = SeekInput []
startUnexport' :: Remote -> ExportHandle -> TopFilePath -> Key -> CommandStart
startUnexport' r db f ek =
starting ("unexport " ++ name r) ai si $
performUnexport r db [ek] loc
where
loc = mkExportLocation f'
f' = getTopFilePath f
ai = ActionItemTreeFile f'
si = SeekInput []
-- Unlike a usual drop from a repository, this does not check that -- Unlike a usual drop from a repository, this does not check that
-- numcopies is satisfied before removing the content. Typically an export -- numcopies is satisfied before removing the content. Typically an export
-- remote is untrusted, so would not count as a copy anyway. -- remote is untrusted, so would not count as a copy anyway.
@ -401,6 +390,23 @@ cleanupUnexport r db eks loc = do
removeEmptyDirectories r db loc eks removeEmptyDirectories r db loc eks
-- Dispose of an old exported file by either unexporting it, or by moving
-- it to the annexobjects location.
startDispose :: Remote -> ExportHandle -> TopFilePath -> Key -> CommandStart
startDispose r db f ek =
starting ("unexport " ++ name r) ai si $
if annexObjects (Remote.config r) && not (isGitShaKey ek)
then do
gc <- Annex.getGitConfig
performRename r db ek loc
(exportAnnexObjectLocation gc ek)
else performUnexport r db [ek] loc
where
loc = mkExportLocation f'
f' = getTopFilePath f
ai = ActionItemTreeFile f'
si = SeekInput []
startRecoverIncomplete :: Remote -> ExportHandle -> Git.Sha -> TopFilePath -> CommandStart startRecoverIncomplete :: Remote -> ExportHandle -> Git.Sha -> TopFilePath -> CommandStart
startRecoverIncomplete r db sha oldf startRecoverIncomplete r db sha oldf
| sha `elem` nullShas = stop | sha `elem` nullShas = stop

View file

@ -408,10 +408,7 @@ adjustExportImport' isexport isimport annexobjects r rs gc = do
k loc k loc
=<< getkeycids ciddbv k =<< getkeycids ciddbv k
-- For annexobjects=true, objects are stored in the remote annexobjectlocation k = exportAnnexObjectLocation gc k
-- in a location under .git/objects/
annexobjectlocation k = mkExportLocation $
".git" P.</> annexLocation gc k hashDirLower
checkpresentannexobject k = checkpresentannexobject k =
checkPresentExport (exportActions r) k (annexobjectlocation k) checkPresentExport (exportActions r) k (annexobjectlocation k)

View file

@ -33,12 +33,20 @@ Planned schedule of work:
* Working on `exportreeplus` branch which is groundwork for proxying to * Working on `exportreeplus` branch which is groundwork for proxying to
exporttree=yes special remotes. exporttree=yes special remotes.
* `git-annex export` when unexporting a deleted file from the tree should * `git-annex export` when exporting a file should rename it from the
rename it to the annexobjects location. This would avoid needing to annexobjects location when it's present there. This will avoid redundant
re-upload it again in the case where it's preferred content of the uploads.
remote. Currently eg, a sync will unexport the file and then re-upload
it. If it's not preferred content, sync will drop it from the An example case where this is important: Proxying to a exporttree remote,
annexobjects location. a file is uploaded to it but is not yet in an exported tree. When the
exported tree is pushed, the remote needs to be updated by exporting to
it. In this case, the proxy doesn't have a copy of the file, so it would
need to download it from annexobjects before uploading it to the final
location. With this optimisation, it can just rename it.
However: If a key is used twice in an exported tree, it seems a proxy
will need to download and reupload anyway. Unless a copy operation is
added to exporttree remotes..
## items deferred until later for p2p protocol over http ## items deferred until later for p2p protocol over http