change export.log format to support multiple export remotes

This breaks backwards compatibility, but only with unreleased versions of
git-annex, which I think is acceptable.

This commit was supported by the NSF-funded DataLad project.
This commit is contained in:
Joey Hess 2017-09-12 17:45:52 -04:00
parent 301c959edf
commit c8ed941a26
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
3 changed files with 70 additions and 49 deletions

View file

@ -17,7 +17,7 @@ import Git.Tree
import Git.Sha import Git.Sha
import Git.FilePath import Git.FilePath
import Logs import Logs
import Logs.UUIDBased import Logs.MapLog
import Annex.UUID import Annex.UUID
data Exported = Exported data Exported = Exported
@ -26,24 +26,30 @@ data Exported = Exported
} }
deriving (Eq, Show) deriving (Eq, Show)
-- | Get what's been exported to a special remote. data ExportParticipants = ExportParticipants
-- { exportFrom :: UUID
-- If the list contains multiple items, there was an export conflict, , exportTo :: UUID
-- and different trees were exported to the same special remote. }
getExport :: UUID -> Annex [Exported] deriving (Eq, Ord)
getExport remoteuuid = nub . mapMaybe get . M.elems . simpleMap
. parseLogNew parseExportLog
<$> Annex.Branch.get exportLog
where
get (ExportLog exported u)
| u == remoteuuid = Just exported
| otherwise = Nothing
data ExportChange = ExportChange data ExportChange = ExportChange
{ oldTreeish :: [Git.Ref] { oldTreeish :: [Git.Ref]
, newTreeish :: Git.Ref , newTreeish :: Git.Ref
} }
-- | Get what's been exported to a special remote.
--
-- If the list contains multiple items, there was an export conflict,
-- and different trees were exported to the same special remote.
getExport :: UUID -> Annex [Exported]
getExport remoteuuid = nub . mapMaybe get . M.toList . simpleMap
. parseExportLog
<$> Annex.Branch.get exportLog
where
get (ep, exported)
| exportTo ep == remoteuuid = Just exported
| otherwise = Nothing
-- | Record a change in what's exported to a special remote. -- | Record a change in what's exported to a special remote.
-- --
-- This is called before an export begins uploading new files to the -- This is called before an export begins uploading new files to the
@ -61,16 +67,17 @@ recordExport :: UUID -> ExportChange -> Annex ()
recordExport remoteuuid ec = do recordExport remoteuuid ec = do
c <- liftIO currentVectorClock c <- liftIO currentVectorClock
u <- getUUID u <- getUUID
let val = ExportLog (Exported (newTreeish ec) []) remoteuuid let ep = ExportParticipants { exportFrom = u, exportTo = remoteuuid }
let exported = Exported (newTreeish ec) []
Annex.Branch.change exportLog $ Annex.Branch.change exportLog $
showLogNew formatExportLog showExportLog
. changeLog c u val . changeMapLog c ep exported
. M.mapWithKey (updateothers c u) . M.mapWithKey (updateothers c u)
. parseLogNew parseExportLog . parseExportLog
where where
updateothers c u theiru le@(LogEntry _ (ExportLog exported@(Exported { exportedTreeish = t }) remoteuuid')) updateothers c u ep le@(LogEntry _ exported@(Exported { exportedTreeish = t }))
| u == theiru || remoteuuid' /= remoteuuid || t `notElem` oldTreeish ec = le | u == exportFrom ep || remoteuuid /= exportTo ep || t `notElem` oldTreeish ec = le
| otherwise = LogEntry c (ExportLog (exported { exportedTreeish = newTreeish ec }) theiru) | otherwise = LogEntry c (exported { exportedTreeish = newTreeish ec })
-- | Record the beginning of an export, to allow cleaning up from -- | Record the beginning of an export, to allow cleaning up from
-- interrupted exports. -- interrupted exports.
@ -80,29 +87,43 @@ recordExportBeginning :: UUID -> Git.Ref -> Annex ()
recordExportBeginning remoteuuid newtree = do recordExportBeginning remoteuuid newtree = do
c <- liftIO currentVectorClock c <- liftIO currentVectorClock
u <- getUUID u <- getUUID
ExportLog old _ <- fromMaybe (ExportLog (Exported emptyTree []) remoteuuid) let ep = ExportParticipants { exportFrom = u, exportTo = remoteuuid }
. M.lookup u . simpleMap old <- fromMaybe (Exported emptyTree [])
. parseLogNew parseExportLog . M.lookup ep . simpleMap
. parseExportLog
<$> Annex.Branch.get exportLog <$> Annex.Branch.get exportLog
let new = old { incompleteExportedTreeish = nub (newtree:incompleteExportedTreeish old) } let new = old { incompleteExportedTreeish = nub (newtree:incompleteExportedTreeish old) }
Annex.Branch.change exportLog $ Annex.Branch.change exportLog $
showLogNew formatExportLog showExportLog
. changeLog c u (ExportLog new remoteuuid) . changeMapLog c ep new
. parseLogNew parseExportLog . parseExportLog
graftTreeish newtree graftTreeish newtree
data ExportLog = ExportLog Exported UUID parseExportLog :: String -> MapLog ExportParticipants Exported
parseExportLog = parseMapLog parseExportParticipants parseExported
formatExportLog :: ExportLog -> String showExportLog :: MapLog ExportParticipants Exported -> String
formatExportLog (ExportLog exported remoteuuid) = unwords $ showExportLog = showMapLog formatExportParticipants formatExported
[ Git.fromRef (exportedTreeish exported)
, fromUUID remoteuuid
] ++ map Git.fromRef (incompleteExportedTreeish exported)
parseExportLog :: String -> Maybe ExportLog formatExportParticipants :: ExportParticipants -> String
parseExportLog s = case words s of formatExportParticipants ep =
(et:u:it) -> Just $ fromUUID (exportFrom ep) ++ ':' : fromUUID (exportTo ep)
ExportLog (Exported (Git.Ref et) (map Git.Ref it)) (toUUID u)
parseExportParticipants :: String -> Maybe ExportParticipants
parseExportParticipants s = case separate (== ':') s of
("",_) -> Nothing
(_,"") -> Nothing
(f,t) -> Just $ ExportParticipants
{ exportFrom = toUUID f
, exportTo = toUUID t
}
formatExported :: Exported -> String
formatExported exported = unwords $ map Git.fromRef $
exportedTreeish exported : incompleteExportedTreeish exported
parseExported :: String -> Maybe Exported
parseExported s = case words s of
(et:it) -> Just $ Exported (Git.Ref et) (map Git.Ref it)
_ -> Nothing _ -> Nothing
-- To prevent git-annex branch merge conflicts, the treeish is -- To prevent git-annex branch merge conflicts, the treeish is

View file

@ -186,20 +186,22 @@ Tracks what trees have been exported to special remotes by
[[git-annex-export]](1). [[git-annex-export]](1).
Each line starts with a timestamp, then the uuid of the repository Each line starts with a timestamp, then the uuid of the repository
that exported to the special remote, followed by the sha1 of the tree that exported to the special remote, followed by a colon (`:`) and
that was exported, and then by the uuid of the special remote. the uuid of the special remote. Then, separated by a spaces,
the sha1 of the tree that was exported, and optionally any number of
subsequent sha1s, of trees that have started to be exported but whose
export is not yet complete.
There can also be subsequent sha1s, of trees that have started to be In order to record the beginning of the first export, where nothing
exported but whose export is not yet complete. The sha1 of the exported has been exported yet, the sha1 of the exported tree can be
tree can be the empty tree (4b825dc642cb6eb9a060e54bf8d69288fbee4904) the empty tree (4b825dc642cb6eb9a060e54bf8d69288fbee4904).
in order to record the beginning of the first export.
For example: For example:
1317929100.012345s e605dca6-446a-11e0-8b2a-002170d25c55 4b825dc642cb6eb9a060e54bf8d69288fbee4904 26339d22-446b-11e0-9101-002170d25c55 bb08b1abd207aeecccbc7060e523b011d80cb35b 1317929100.012345s e605dca6-446a-11e0-8b2a-002170d25c55:26339d22-446b-11e0-9101-002170d25c55 4b825dc642cb6eb9a060e54bf8d69288fbee4904 bb08b1abd207aeecccbc7060e523b011d80cb35b
1317929100.012345s e605dca6-446a-11e0-8b2a-002170d25c55 bb08b1abd207aeecccbc7060e523b011d80cb35b 26339d22-446b-11e0-9101-002170d25c55 1317929100.012345s e605dca6-446a-11e0-8b2a-002170d25c55:26339d22-446b-11e0-9101-002170d25c55 bb08b1abd207aeecccbc7060e523b011d80cb35b
1317929189.157237s e605dca6-446a-11e0-8b2a-002170d25c55 bb08b1abd207aeecccbc7060e523b011d80cb35b 26339d22-446b-11e0-9101-002170d25c55 7c7af825782b7c8706039b855c72709993542be4 1317929189.157237s e605dca6-446a-11e0-8b2a-002170d25c55:26339d22-446b-11e0-9101-002170d25c55 bb08b1abd207aeecccbc7060e523b011d80cb35b 7c7af825782b7c8706039b855c72709993542be4
1317923000.251111s e605dca6-446a-11e0-8b2a-002170d25c55 7c7af825782b7c8706039b855c72709993542be4 26339d22-446b-11e0-9101-002170d25c55 1317923000.251111s e605dca6-446a-11e0-8b2a-002170d25c55:26339d22-446b-11e0-9101-002170d25c55 7c7af825782b7c8706039b855c72709993542be4
(The trees are also grafted into the git-annex branch, at (The trees are also grafted into the git-annex branch, at
`export.tree`, to prevent git from garbage collecting it. However, the head `export.tree`, to prevent git from garbage collecting it. However, the head

View file

@ -17,9 +17,7 @@ there need to be a new interface in supported remotes?
Work is in progress. Todo list: Work is in progress. Todo list:
* The export.log parsing only works right when there's one export * Compact the export.log to remove old entries.
remote. With 2, only the most recently exported to one is gotten from the
log.
* `git annex get --from export` works in the repo that exported to it, * `git annex get --from export` works in the repo that exported to it,
but in another repo, the export db won't be populated, so it won't work. but in another repo, the export db won't be populated, so it won't work.
Maybe just show a useful error message in this case? Maybe just show a useful error message in this case?