change export.log format to support multiple export remotes

This breaks backwards compatibility, but only with unreleased versions of
git-annex, which I think is acceptable.

This commit was supported by the NSF-funded DataLad project.
This commit is contained in:
Joey Hess 2017-09-12 17:45:52 -04:00
parent 301c959edf
commit c8ed941a26
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
3 changed files with 70 additions and 49 deletions

View file

@ -17,7 +17,7 @@ import Git.Tree
import Git.Sha
import Git.FilePath
import Logs
import Logs.UUIDBased
import Logs.MapLog
import Annex.UUID
data Exported = Exported
@ -26,24 +26,30 @@ data Exported = Exported
}
deriving (Eq, Show)
-- | Get what's been exported to a special remote.
--
-- If the list contains multiple items, there was an export conflict,
-- and different trees were exported to the same special remote.
getExport :: UUID -> Annex [Exported]
getExport remoteuuid = nub . mapMaybe get . M.elems . simpleMap
. parseLogNew parseExportLog
<$> Annex.Branch.get exportLog
where
get (ExportLog exported u)
| u == remoteuuid = Just exported
| otherwise = Nothing
data ExportParticipants = ExportParticipants
{ exportFrom :: UUID
, exportTo :: UUID
}
deriving (Eq, Ord)
data ExportChange = ExportChange
{ oldTreeish :: [Git.Ref]
, newTreeish :: Git.Ref
}
-- | Get what's been exported to a special remote.
--
-- If the list contains multiple items, there was an export conflict,
-- and different trees were exported to the same special remote.
getExport :: UUID -> Annex [Exported]
getExport remoteuuid = nub . mapMaybe get . M.toList . simpleMap
. parseExportLog
<$> Annex.Branch.get exportLog
where
get (ep, exported)
| exportTo ep == remoteuuid = Just exported
| otherwise = Nothing
-- | Record a change in what's exported to a special remote.
--
-- This is called before an export begins uploading new files to the
@ -61,16 +67,17 @@ recordExport :: UUID -> ExportChange -> Annex ()
recordExport remoteuuid ec = do
c <- liftIO currentVectorClock
u <- getUUID
let val = ExportLog (Exported (newTreeish ec) []) remoteuuid
let ep = ExportParticipants { exportFrom = u, exportTo = remoteuuid }
let exported = Exported (newTreeish ec) []
Annex.Branch.change exportLog $
showLogNew formatExportLog
. changeLog c u val
showExportLog
. changeMapLog c ep exported
. M.mapWithKey (updateothers c u)
. parseLogNew parseExportLog
. parseExportLog
where
updateothers c u theiru le@(LogEntry _ (ExportLog exported@(Exported { exportedTreeish = t }) remoteuuid'))
| u == theiru || remoteuuid' /= remoteuuid || t `notElem` oldTreeish ec = le
| otherwise = LogEntry c (ExportLog (exported { exportedTreeish = newTreeish ec }) theiru)
updateothers c u ep le@(LogEntry _ exported@(Exported { exportedTreeish = t }))
| u == exportFrom ep || remoteuuid /= exportTo ep || t `notElem` oldTreeish ec = le
| otherwise = LogEntry c (exported { exportedTreeish = newTreeish ec })
-- | Record the beginning of an export, to allow cleaning up from
-- interrupted exports.
@ -80,29 +87,43 @@ recordExportBeginning :: UUID -> Git.Ref -> Annex ()
recordExportBeginning remoteuuid newtree = do
c <- liftIO currentVectorClock
u <- getUUID
ExportLog old _ <- fromMaybe (ExportLog (Exported emptyTree []) remoteuuid)
. M.lookup u . simpleMap
. parseLogNew parseExportLog
let ep = ExportParticipants { exportFrom = u, exportTo = remoteuuid }
old <- fromMaybe (Exported emptyTree [])
. M.lookup ep . simpleMap
. parseExportLog
<$> Annex.Branch.get exportLog
let new = old { incompleteExportedTreeish = nub (newtree:incompleteExportedTreeish old) }
Annex.Branch.change exportLog $
showLogNew formatExportLog
. changeLog c u (ExportLog new remoteuuid)
. parseLogNew parseExportLog
showExportLog
. changeMapLog c ep new
. parseExportLog
graftTreeish newtree
data ExportLog = ExportLog Exported UUID
parseExportLog :: String -> MapLog ExportParticipants Exported
parseExportLog = parseMapLog parseExportParticipants parseExported
formatExportLog :: ExportLog -> String
formatExportLog (ExportLog exported remoteuuid) = unwords $
[ Git.fromRef (exportedTreeish exported)
, fromUUID remoteuuid
] ++ map Git.fromRef (incompleteExportedTreeish exported)
showExportLog :: MapLog ExportParticipants Exported -> String
showExportLog = showMapLog formatExportParticipants formatExported
parseExportLog :: String -> Maybe ExportLog
parseExportLog s = case words s of
(et:u:it) -> Just $
ExportLog (Exported (Git.Ref et) (map Git.Ref it)) (toUUID u)
formatExportParticipants :: ExportParticipants -> String
formatExportParticipants ep =
fromUUID (exportFrom ep) ++ ':' : fromUUID (exportTo ep)
parseExportParticipants :: String -> Maybe ExportParticipants
parseExportParticipants s = case separate (== ':') s of
("",_) -> Nothing
(_,"") -> Nothing
(f,t) -> Just $ ExportParticipants
{ exportFrom = toUUID f
, exportTo = toUUID t
}
formatExported :: Exported -> String
formatExported exported = unwords $ map Git.fromRef $
exportedTreeish exported : incompleteExportedTreeish exported
parseExported :: String -> Maybe Exported
parseExported s = case words s of
(et:it) -> Just $ Exported (Git.Ref et) (map Git.Ref it)
_ -> Nothing
-- To prevent git-annex branch merge conflicts, the treeish is

View file

@ -186,20 +186,22 @@ Tracks what trees have been exported to special remotes by
[[git-annex-export]](1).
Each line starts with a timestamp, then the uuid of the repository
that exported to the special remote, followed by the sha1 of the tree
that was exported, and then by the uuid of the special remote.
that exported to the special remote, followed by a colon (`:`) and
the uuid of the special remote. Then, separated by a spaces,
the sha1 of the tree that was exported, and optionally any number of
subsequent sha1s, of trees that have started to be exported but whose
export is not yet complete.
There can also be subsequent sha1s, of trees that have started to be
exported but whose export is not yet complete. The sha1 of the exported
tree can be the empty tree (4b825dc642cb6eb9a060e54bf8d69288fbee4904)
in order to record the beginning of the first export.
In order to record the beginning of the first export, where nothing
has been exported yet, the sha1 of the exported tree can be
the empty tree (4b825dc642cb6eb9a060e54bf8d69288fbee4904).
For example:
1317929100.012345s e605dca6-446a-11e0-8b2a-002170d25c55 4b825dc642cb6eb9a060e54bf8d69288fbee4904 26339d22-446b-11e0-9101-002170d25c55 bb08b1abd207aeecccbc7060e523b011d80cb35b
1317929100.012345s e605dca6-446a-11e0-8b2a-002170d25c55 bb08b1abd207aeecccbc7060e523b011d80cb35b 26339d22-446b-11e0-9101-002170d25c55
1317929189.157237s e605dca6-446a-11e0-8b2a-002170d25c55 bb08b1abd207aeecccbc7060e523b011d80cb35b 26339d22-446b-11e0-9101-002170d25c55 7c7af825782b7c8706039b855c72709993542be4
1317923000.251111s e605dca6-446a-11e0-8b2a-002170d25c55 7c7af825782b7c8706039b855c72709993542be4 26339d22-446b-11e0-9101-002170d25c55
1317929100.012345s e605dca6-446a-11e0-8b2a-002170d25c55:26339d22-446b-11e0-9101-002170d25c55 4b825dc642cb6eb9a060e54bf8d69288fbee4904 bb08b1abd207aeecccbc7060e523b011d80cb35b
1317929100.012345s e605dca6-446a-11e0-8b2a-002170d25c55:26339d22-446b-11e0-9101-002170d25c55 bb08b1abd207aeecccbc7060e523b011d80cb35b
1317929189.157237s e605dca6-446a-11e0-8b2a-002170d25c55:26339d22-446b-11e0-9101-002170d25c55 bb08b1abd207aeecccbc7060e523b011d80cb35b 7c7af825782b7c8706039b855c72709993542be4
1317923000.251111s e605dca6-446a-11e0-8b2a-002170d25c55:26339d22-446b-11e0-9101-002170d25c55 7c7af825782b7c8706039b855c72709993542be4
(The trees are also grafted into the git-annex branch, at
`export.tree`, to prevent git from garbage collecting it. However, the head

View file

@ -17,9 +17,7 @@ there need to be a new interface in supported remotes?
Work is in progress. Todo list:
* The export.log parsing only works right when there's one export
remote. With 2, only the most recently exported to one is gotten from the
log.
* Compact the export.log to remove old entries.
* `git annex get --from export` works in the repo that exported to it,
but in another repo, the export db won't be populated, so it won't work.
Maybe just show a useful error message in this case?