record incomplete exports in export.log

Not yet used, but essential for resuming cleanly.

Note that, in normmal operation, only one commit is made to export.log
during an export; the incomplete version only gets to the journal and
is then overwritten.

This commit was supported by the NSF-funded DataLad project.
This commit is contained in:
Joey Hess 2017-09-06 13:39:33 -04:00
parent 1ec3a9eb05
commit 0fa948b402
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
3 changed files with 60 additions and 18 deletions

View file

@ -79,9 +79,10 @@ seek o = do
inRepo (Git.Ref.tree (exportTreeish o))
old <- getExport (uuid r)
recordExportBeginning (uuid r) new
when (length old > 1) $
warning "Export conflict detected. Different trees have been exported to the same special remote. Resolving.."
db <- openDb (uuid r)
-- First, diff the old and new trees and delete all changed
@ -89,7 +90,7 @@ seek o = do
-- have the content from the new treeish.
--
-- (Also, when there was an export conflict, this resolves it.)
forM_ old $ \oldtreesha -> do
forM_ (map exportedTreeish old) $ \oldtreesha -> do
(diff, cleanup) <- inRepo $
Git.DiffTree.diffTreeRecursive oldtreesha new
seekActions $ pure $ map (startUnexport r db) diff
@ -99,7 +100,7 @@ seek o = do
-- if this export is interrupted, there are no files left over
-- from a previous export, that are not part of this export.
recordExport (uuid r) $ ExportChange
{ oldTreeish = old
{ oldTreeish = map exportedTreeish old
, newTreeish = new
}

View file

@ -14,22 +14,29 @@ import qualified Annex.Branch
import qualified Git
import qualified Git.Branch
import Git.Tree
import Git.Sha
import Git.FilePath
import Logs
import Logs.UUIDBased
import Annex.UUID
-- | Get the treeish that was exported to a special remote.
data Exported = Exported
{ exportedTreeish :: Git.Ref
, incompleteExportedTreeish :: [Git.Ref]
}
deriving (Eq)
-- | Get what's been exported to a special remote.
--
-- If the list contains multiple items, there was an export conflict,
-- and different trees were exported to the same special remote.
getExport :: UUID -> Annex [Git.Ref]
getExport :: UUID -> Annex [Exported]
getExport remoteuuid = nub . mapMaybe get . M.elems . simpleMap
. parseLogNew parseExportLog
<$> Annex.Branch.get exportLog
where
get (ExportLog t u)
| u == remoteuuid = Just t
get (ExportLog exported u)
| u == remoteuuid = Just exported
| otherwise = Nothing
data ExportChange = ExportChange
@ -39,6 +46,10 @@ data ExportChange = ExportChange
-- | Record a change in what's exported to a special remote.
--
-- This is called before an export begins uploading new files to the
-- remote, but after it's cleaned up any files that need to be deleted
-- from the old treeish.
--
-- Any entries in the log for the oldTreeish will be updated to the
-- newTreeish. This way, when multiple repositories are exporting to
-- the same special remote, there's no conflict as long as they move
@ -50,27 +61,48 @@ recordExport :: UUID -> ExportChange -> Annex ()
recordExport remoteuuid ec = do
c <- liftIO currentVectorClock
u <- getUUID
let val = ExportLog (newTreeish ec) remoteuuid
let val = ExportLog (Exported (newTreeish ec) []) remoteuuid
Annex.Branch.change exportLog $
showLogNew formatExportLog
. changeLog c u val
. M.mapWithKey (updateothers c u)
. parseLogNew parseExportLog
graftTreeish (newTreeish ec)
where
updateothers c u theiru le@(LogEntry _ (ExportLog t remoteuuid'))
updateothers c u theiru le@(LogEntry _ (ExportLog exported@(Exported { exportedTreeish = t }) remoteuuid'))
| u == theiru || remoteuuid' /= remoteuuid || t `notElem` oldTreeish ec = le
| otherwise = LogEntry c (ExportLog (newTreeish ec) theiru)
| otherwise = LogEntry c (ExportLog (exported { exportedTreeish = newTreeish ec }) theiru)
data ExportLog = ExportLog Git.Ref UUID
-- | Record the beginning of an export, to allow cleaning up from
-- interrupted exports.
--
-- This is called before any changes are made to the remote.
recordExportBeginning :: UUID -> Git.Ref -> Annex ()
recordExportBeginning remoteuuid newtree = do
c <- liftIO currentVectorClock
u <- getUUID
ExportLog old _ <- fromMaybe (ExportLog (Exported emptyTree []) remoteuuid)
. M.lookup u . simpleMap
. parseLogNew parseExportLog
<$> Annex.Branch.get exportLog
let new = old { incompleteExportedTreeish = newtree:incompleteExportedTreeish old }
Annex.Branch.change exportLog $
showLogNew formatExportLog
. changeLog c u (ExportLog new remoteuuid)
. parseLogNew parseExportLog
graftTreeish newtree
data ExportLog = ExportLog Exported UUID
formatExportLog :: ExportLog -> String
formatExportLog (ExportLog treeish remoteuuid) =
Git.fromRef treeish ++ " " ++ fromUUID remoteuuid
formatExportLog (ExportLog exported remoteuuid) = unwords $
[ Git.fromRef (exportedTreeish exported)
, fromUUID remoteuuid
] ++ map Git.fromRef (incompleteExportedTreeish exported)
parseExportLog :: String -> Maybe ExportLog
parseExportLog s = case words s of
(t:u:[]) -> Just $ ExportLog (Git.Ref t) (toUUID u)
(et:u:it) -> Just $
ExportLog (Exported (Git.Ref et) (map Git.Ref it)) (toUUID u)
_ -> Nothing
-- To prevent git-annex branch merge conflicts, the treeish is

View file

@ -187,12 +187,21 @@ Tracks what trees have been exported to special remotes by
Each line starts with a timestamp, then the uuid of the repository
that exported to the special remote, followed by the sha1 of the tree
that was exported, and then by the uuid of the special remote. For example:
that was exported, and then by the uuid of the special remote.
1317929189.157237s e605dca6-446a-11e0-8b2a-002170d25c55 bb08b1abd207aeecccbc7060e523b011d80cb35b 26339d22-446b-11e0-9101-002170d25c55
There can also be subsequent sha1s, of trees that have started to be
exported but whose export is not yet complete. The sha1 of the exported
tree can be the empty tree (4b825dc642cb6eb9a060e54bf8d69288fbee4904)
in order to record the beginning of the first export.
For example:
1317929100.012345s e605dca6-446a-11e0-8b2a-002170d25c55 4b825dc642cb6eb9a060e54bf8d69288fbee4904 26339d22-446b-11e0-9101-002170d25c55 bb08b1abd207aeecccbc7060e523b011d80cb35b
1317929100.012345s e605dca6-446a-11e0-8b2a-002170d25c55 bb08b1abd207aeecccbc7060e523b011d80cb35b 26339d22-446b-11e0-9101-002170d25c55
1317929189.157237s e605dca6-446a-11e0-8b2a-002170d25c55 bb08b1abd207aeecccbc7060e523b011d80cb35b 26339d22-446b-11e0-9101-002170d25c55 7c7af825782b7c8706039b855c72709993542be4
1317923000.251111s e605dca6-446a-11e0-8b2a-002170d25c55 7c7af825782b7c8706039b855c72709993542be4 26339d22-446b-11e0-9101-002170d25c55
(The exported tree is also grafted into the git-annex branch, at
(The trees are also grafted into the git-annex branch, at
`export.tree`, to prevent git from garbage collecting it. However, the head
of the git-annex branch should never contain such a grafted in tree;
the grafted tree is removed in the same commit that updates `export.log`.)