graft in imported tree to avoid gc

Fix a bug that could prevent getting files from an importtree=yes remote,
because the imported tree was allowed to be garbage collected.
This commit is contained in:
Joey Hess 2020-12-23 14:27:38 -04:00
parent c6e693b25d
commit 7916fc98a3
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
5 changed files with 58 additions and 34 deletions

View file

@ -161,7 +161,7 @@ recordImportTree remote importtreeconfig importable = do
updateexportlog importedtree = do
oldexport <- getExport (Remote.uuid remote)
recordExport (Remote.uuid remote) $ ExportChange
recordExport (Remote.uuid remote) importedtree $ ExportChange
{ oldTreeish = exportedTreeishes oldexport
, newTreeish = importedtree
}

View file

@ -34,6 +34,8 @@ git-annex (8.20201128) UNRELEASED; urgency=medium
* Windows: include= and exclude= containing '/' will also match filenames
that are written using '\'. (And vice-versa, but it's better to use '/'
for portability.)
* Fix a bug that could prevent getting files from an importtree=yes
remote, because the imported tree was allowed to be garbage collected.
-- Joey Hess <id@joeyh.name> Mon, 30 Nov 2020 12:55:49 -0400

View file

@ -192,7 +192,7 @@ changeExport r db (PreferredFiltered new) = do
-- from a previous export, that are not part of this export.
c <- Annex.getState Annex.errcounter
when (c == 0) $ do
recordExport (uuid r) $ ExportChange
recordExportUnderway (uuid r) $ ExportChange
{ oldTreeish = exportedTreeishes old
, newTreeish = new
}

View file

@ -1,6 +1,6 @@
{- git-annex export log (also used to log imports)
-
- Copyright 2017-2019 Joey Hess <id@joeyh.name>
- Copyright 2017-2020 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -15,8 +15,9 @@ module Logs.Export (
getExport,
exportedTreeishes,
incompleteExportedTreeishes,
recordExport,
recordExportBeginning,
recordExportUnderway,
recordExport,
logExportExcluded,
getExportExcluded,
) where
@ -85,35 +86,6 @@ getExport remoteuuid = nub . mapMaybe get . M.toList . simpleMap
| exportTo ep == remoteuuid = Just exported
| otherwise = Nothing
-- | Record a change in what's exported to a special remote.
--
-- This is called before an export begins uploading new files to the
-- remote, but after it's cleaned up any files that need to be deleted
-- from the old treeish.
--
-- Any entries in the log for the oldTreeish will be updated to the
-- newTreeish. This way, when multiple repositories are exporting to
-- the same special remote, there's no conflict as long as they move
-- forward in lock-step.
--
-- Also, the newTreeish is grafted into the git-annex branch. This is done
-- to ensure that it's available later.
recordExport :: UUID -> ExportChange -> Annex ()
recordExport remoteuuid ec = do
c <- liftIO currentVectorClock
u <- getUUID
let ep = ExportParticipants { exportFrom = u, exportTo = remoteuuid }
let exported = Exported (newTreeish ec) []
Annex.Branch.change exportLog $
buildExportLog
. changeMapLog c ep exported
. M.mapWithKey (updateothers c u)
. parseExportLog
where
updateothers c u ep le@(LogEntry _ exported@(Exported { exportedTreeish = t }))
| u == exportFrom ep || remoteuuid /= exportTo ep || t `notElem` oldTreeish ec = le
| otherwise = LogEntry c (exported { exportedTreeish = newTreeish ec })
-- | Record the beginning of an export, to allow cleaning up from
-- interrupted exports.
--
@ -132,7 +104,52 @@ recordExportBeginning remoteuuid newtree = do
buildExportLog
. changeMapLog c ep new
. parseExportLog
Annex.Branch.rememberTreeish newtree (asTopFilePath "export.tree")
recordExportTreeish newtree
-- Grade a tree ref into the git-annex branch. This is done
-- to ensure that it's available later, when getting exported files
-- from the remote. Since that could happen in another clone of the
-- repository, the tree has to be kept available, even if it
-- doesn't end up being merged into the master branch.
recordExportTreeish :: Git.Ref -> Annex ()
recordExportTreeish t =
Annex.Branch.rememberTreeish t (asTopFilePath "export.tree")
-- | Record that an export to a special remote is under way.
--
-- This is called before an export begins uploading new files to the
-- remote, but after it's cleaned up any files that need to be deleted
-- from the old treeish.
--
-- Any entries in the log for the oldTreeish will be updated to the
-- newTreeish. This way, when multiple repositories are exporting to
-- the same special remote, there's no conflict as long as they move
-- forward in lock-step.
recordExportUnderway :: UUID -> ExportChange -> Annex ()
recordExportUnderway remoteuuid ec = do
c <- liftIO currentVectorClock
u <- getUUID
let ep = ExportParticipants { exportFrom = u, exportTo = remoteuuid }
let exported = Exported (newTreeish ec) []
Annex.Branch.change exportLog $
buildExportLog
. changeMapLog c ep exported
. M.mapWithKey (updateothers c u)
. parseExportLog
where
updateothers c u ep le@(LogEntry _ exported@(Exported { exportedTreeish = t }))
| u == exportFrom ep || remoteuuid /= exportTo ep || t `notElem` oldTreeish ec = le
| otherwise = LogEntry c (exported { exportedTreeish = newTreeish ec })
-- Record information about the export to the git-annex branch.
--
-- This is equivilant to recordExportBeginning followed by
-- recordExportUnderway, but without the ability to clean up from
-- interrupted exports.
recordExport :: UUID -> Git.Ref -> ExportChange -> Annex ()
recordExport remoteuuid tree ec = do
recordExportTreeish tree
recordExportUnderway remoteuuid ec
parseExportLog :: L.ByteString -> MapLog ExportParticipants Exported
parseExportLog = parseMapLog exportParticipantsParser exportedParser

View file

@ -3,3 +3,8 @@ does not seem to get grafted into the git-annex branch, so
would be subject to being lost to GC.
Is this a general problem affecting importtree too?
> Yes, it was. It would have only caused a problem if the user
> kept doing imports from a remote, but never exporting to it.
> Then, in a clone of the repo that was importing, they would not be able
> to get files. [[fixed|done]] --[[Joey]]