From 7916fc98a3b581fb422d1f0b5e785ca4c48d2e38 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Wed, 23 Dec 2020 14:27:38 -0400 Subject: [PATCH] graft in imported tree to avoid gc Fix a bug that could prevent getting files from an importtree=yes remote, because the imported tree was allowed to be garbage collected. --- Annex/Import.hs | 2 +- CHANGELOG | 2 + Command/Export.hs | 2 +- Logs/Export.hs | 81 ++++++++++++++---------- doc/todo/borg_sync_tree_not_grafted.mdwn | 5 ++ 5 files changed, 58 insertions(+), 34 deletions(-) diff --git a/Annex/Import.hs b/Annex/Import.hs index 201d9e5f7e..be9b93a9fb 100644 --- a/Annex/Import.hs +++ b/Annex/Import.hs @@ -161,7 +161,7 @@ recordImportTree remote importtreeconfig importable = do updateexportlog importedtree = do oldexport <- getExport (Remote.uuid remote) - recordExport (Remote.uuid remote) $ ExportChange + recordExport (Remote.uuid remote) importedtree $ ExportChange { oldTreeish = exportedTreeishes oldexport , newTreeish = importedtree } diff --git a/CHANGELOG b/CHANGELOG index b2dc71fb1f..17bf687c2d 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -34,6 +34,8 @@ git-annex (8.20201128) UNRELEASED; urgency=medium * Windows: include= and exclude= containing '/' will also match filenames that are written using '\'. (And vice-versa, but it's better to use '/' for portability.) + * Fix a bug that could prevent getting files from an importtree=yes + remote, because the imported tree was allowed to be garbage collected. -- Joey Hess Mon, 30 Nov 2020 12:55:49 -0400 diff --git a/Command/Export.hs b/Command/Export.hs index e3e48db547..fe7830471a 100644 --- a/Command/Export.hs +++ b/Command/Export.hs @@ -192,7 +192,7 @@ changeExport r db (PreferredFiltered new) = do -- from a previous export, that are not part of this export. c <- Annex.getState Annex.errcounter when (c == 0) $ do - recordExport (uuid r) $ ExportChange + recordExportUnderway (uuid r) $ ExportChange { oldTreeish = exportedTreeishes old , newTreeish = new } diff --git a/Logs/Export.hs b/Logs/Export.hs index 50b2ea1378..a8d53debdf 100644 --- a/Logs/Export.hs +++ b/Logs/Export.hs @@ -1,6 +1,6 @@ {- git-annex export log (also used to log imports) - - - Copyright 2017-2019 Joey Hess + - Copyright 2017-2020 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -15,8 +15,9 @@ module Logs.Export ( getExport, exportedTreeishes, incompleteExportedTreeishes, - recordExport, recordExportBeginning, + recordExportUnderway, + recordExport, logExportExcluded, getExportExcluded, ) where @@ -85,35 +86,6 @@ getExport remoteuuid = nub . mapMaybe get . M.toList . simpleMap | exportTo ep == remoteuuid = Just exported | otherwise = Nothing --- | Record a change in what's exported to a special remote. --- --- This is called before an export begins uploading new files to the --- remote, but after it's cleaned up any files that need to be deleted --- from the old treeish. --- --- Any entries in the log for the oldTreeish will be updated to the --- newTreeish. This way, when multiple repositories are exporting to --- the same special remote, there's no conflict as long as they move --- forward in lock-step. --- --- Also, the newTreeish is grafted into the git-annex branch. This is done --- to ensure that it's available later. -recordExport :: UUID -> ExportChange -> Annex () -recordExport remoteuuid ec = do - c <- liftIO currentVectorClock - u <- getUUID - let ep = ExportParticipants { exportFrom = u, exportTo = remoteuuid } - let exported = Exported (newTreeish ec) [] - Annex.Branch.change exportLog $ - buildExportLog - . changeMapLog c ep exported - . M.mapWithKey (updateothers c u) - . parseExportLog - where - updateothers c u ep le@(LogEntry _ exported@(Exported { exportedTreeish = t })) - | u == exportFrom ep || remoteuuid /= exportTo ep || t `notElem` oldTreeish ec = le - | otherwise = LogEntry c (exported { exportedTreeish = newTreeish ec }) - -- | Record the beginning of an export, to allow cleaning up from -- interrupted exports. -- @@ -132,7 +104,52 @@ recordExportBeginning remoteuuid newtree = do buildExportLog . changeMapLog c ep new . parseExportLog - Annex.Branch.rememberTreeish newtree (asTopFilePath "export.tree") + recordExportTreeish newtree + +-- Grade a tree ref into the git-annex branch. This is done +-- to ensure that it's available later, when getting exported files +-- from the remote. Since that could happen in another clone of the +-- repository, the tree has to be kept available, even if it +-- doesn't end up being merged into the master branch. +recordExportTreeish :: Git.Ref -> Annex () +recordExportTreeish t = + Annex.Branch.rememberTreeish t (asTopFilePath "export.tree") + +-- | Record that an export to a special remote is under way. +-- +-- This is called before an export begins uploading new files to the +-- remote, but after it's cleaned up any files that need to be deleted +-- from the old treeish. +-- +-- Any entries in the log for the oldTreeish will be updated to the +-- newTreeish. This way, when multiple repositories are exporting to +-- the same special remote, there's no conflict as long as they move +-- forward in lock-step. +recordExportUnderway :: UUID -> ExportChange -> Annex () +recordExportUnderway remoteuuid ec = do + c <- liftIO currentVectorClock + u <- getUUID + let ep = ExportParticipants { exportFrom = u, exportTo = remoteuuid } + let exported = Exported (newTreeish ec) [] + Annex.Branch.change exportLog $ + buildExportLog + . changeMapLog c ep exported + . M.mapWithKey (updateothers c u) + . parseExportLog + where + updateothers c u ep le@(LogEntry _ exported@(Exported { exportedTreeish = t })) + | u == exportFrom ep || remoteuuid /= exportTo ep || t `notElem` oldTreeish ec = le + | otherwise = LogEntry c (exported { exportedTreeish = newTreeish ec }) + +-- Record information about the export to the git-annex branch. +-- +-- This is equivilant to recordExportBeginning followed by +-- recordExportUnderway, but without the ability to clean up from +-- interrupted exports. +recordExport :: UUID -> Git.Ref -> ExportChange -> Annex () +recordExport remoteuuid tree ec = do + recordExportTreeish tree + recordExportUnderway remoteuuid ec parseExportLog :: L.ByteString -> MapLog ExportParticipants Exported parseExportLog = parseMapLog exportParticipantsParser exportedParser diff --git a/doc/todo/borg_sync_tree_not_grafted.mdwn b/doc/todo/borg_sync_tree_not_grafted.mdwn index a4423706e1..ecab6def75 100644 --- a/doc/todo/borg_sync_tree_not_grafted.mdwn +++ b/doc/todo/borg_sync_tree_not_grafted.mdwn @@ -3,3 +3,8 @@ does not seem to get grafted into the git-annex branch, so would be subject to being lost to GC. Is this a general problem affecting importtree too? + +> Yes, it was. It would have only caused a problem if the user +> kept doing imports from a remote, but never exporting to it. +> Then, in a clone of the repo that was importing, they would not be able +> to get files. [[fixed|done]] --[[Joey]]