implement export.log and resolve export conflicts

Incremental export updates work now too.

This commit was sponsored by Anthony DeRobertis on Patreon.
This commit is contained in:
Joey Hess 2017-08-31 15:41:48 -04:00
parent bb08b1abd2
commit 978885247e
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
6 changed files with 126 additions and 40 deletions

View file

@ -11,14 +11,16 @@ import Command
import qualified Git import qualified Git
import qualified Git.DiffTree import qualified Git.DiffTree
import qualified Git.LsTree import qualified Git.LsTree
import qualified Git.Ref
import Git.Types import Git.Types
import Git.Sha
import Git.FilePath import Git.FilePath
import Git.Sha
import Types.Key import Types.Key
import Types.Remote import Types.Remote
import Annex.Content import Annex.Content
import Annex.CatFile import Annex.CatFile
import Logs.Location import Logs.Location
import Logs.Export
import Messages.Progress import Messages.Progress
import Utility.Tmp import Utility.Tmp
@ -67,45 +69,46 @@ exportKey sha = mk <$> catKey sha
seek :: ExportOptions -> CommandSeek seek :: ExportOptions -> CommandSeek
seek o = do seek o = do
r <- getParsed (exportRemote o) r <- getParsed (exportRemote o)
let oldtreeish = emptyTree -- XXX temporary new <- fromMaybe (error "unknown tree") <$>
inRepo (Git.Ref.sha (exportTreeish o))
old <- getExport (uuid r)
-- First, diff the old and new trees and update all changed when (length old > 1) $
-- files in the export. warning "Export conflict detected. Different trees have been exported to the same special remote. Resolving.."
(diff, cleanup) <- inRepo $
Git.DiffTree.diffTreeRecursive oldtreeish (exportTreeish o) -- First, diff the old and new trees and delete all changed
seekActions $ pure $ map (startDiff r) diff -- files in the export. Every file that remains in the export will
void $ liftIO cleanup -- have the content from the new treeish.
--
-- (Also, when there was an export conflict, this resolves it.)
forM_ old $ \oldtreesha -> do
(diff, cleanup) <- inRepo $
Git.DiffTree.diffTreeRecursive oldtreesha new
seekActions $ pure $ map (startUnexport r) diff
void $ liftIO cleanup
-- In case a previous export was incomplete, make a pass -- Waiting until now to record the export guarantees that,
-- over the whole tree and export anything that is not -- if this export is interrupted, there are no files left over
-- yet exported. -- from a previous export, that are not part of this export.
(l, cleanup') <- inRepo $ Git.LsTree.lsTree (exportTreeish o) recordExport (uuid r) $ ExportChange
seekActions $ pure $ map (start r) l { oldTreeish = old
, newTreeish = new
}
-- Export everything that is not yet exported.
(l, cleanup') <- inRepo $ Git.LsTree.lsTree new
seekActions $ pure $ map (startExport r) l
void $ liftIO cleanup' void $ liftIO cleanup'
startDiff :: Remote -> Git.DiffTree.DiffTreeItem -> CommandStart startExport :: Remote -> Git.LsTree.TreeItem -> CommandStart
startDiff r diff startExport r ti = do
| Git.DiffTree.dstsha diff == nullSha = do
showStart "unexport" f
oldk <- exportKey (Git.DiffTree.srcsha diff)
next $ performUnexport r oldk loc
| otherwise = do
showStart "export" f
k <- exportKey (Git.DiffTree.dstsha diff)
next $ performExport r k (Git.DiffTree.dstsha diff) loc
where
loc = ExportLocation $ toInternalGitPath $
getTopFilePath $ Git.DiffTree.file diff
f = getTopFilePath $ Git.DiffTree.file diff
start :: Remote -> Git.LsTree.TreeItem -> CommandStart
start r ti = do
ek <- exportKey (Git.LsTree.sha ti) ek <- exportKey (Git.LsTree.sha ti)
stopUnless (elem (uuid r) <$> loggedLocations (asKey ek)) $ stopUnless (notElem (uuid r) <$> loggedLocations (asKey ek)) $ do
showStart "export" f
next $ performExport r ek (Git.LsTree.sha ti) loc next $ performExport r ek (Git.LsTree.sha ti) loc
where where
loc = ExportLocation $ toInternalGitPath $ loc = ExportLocation $ toInternalGitPath f
getTopFilePath $ Git.LsTree.file ti f = getTopFilePath $ Git.LsTree.file ti
performExport :: Remote -> ExportKey -> Sha -> ExportLocation -> CommandPerform performExport :: Remote -> ExportKey -> Sha -> ExportLocation -> CommandPerform
performExport r ek contentsha loc = case storeExport r of performExport r ek contentsha loc = case storeExport r of
@ -137,6 +140,17 @@ cleanupExport r ek = do
logChange (asKey ek) (uuid r) InfoPresent logChange (asKey ek) (uuid r) InfoPresent
return True return True
startUnexport :: Remote -> Git.DiffTree.DiffTreeItem -> CommandStart
startUnexport r diff
| Git.DiffTree.srcsha diff /= nullSha = do
showStart "unexport" f
oldk <- exportKey (Git.DiffTree.srcsha diff)
next $ performUnexport r oldk loc
| otherwise = stop
where
loc = ExportLocation $ toInternalGitPath f
f = getTopFilePath $ Git.DiffTree.file diff
performUnexport :: Remote -> ExportKey -> ExportLocation -> CommandPerform performUnexport :: Remote -> ExportKey -> ExportLocation -> CommandPerform
performUnexport r ek loc = case removeExport r of performUnexport r ek loc = case removeExport r of
Nothing -> error "remote does not support removing exported files" Nothing -> error "remote does not support removing exported files"

View file

@ -42,6 +42,7 @@ topLevelUUIDBasedLogs =
, activityLog , activityLog
, differenceLog , differenceLog
, multicastLog , multicastLog
, exportLog
] ]
{- All the ways to get a key from a presence log file -} {- All the ways to get a key from a presence log file -}
@ -97,6 +98,9 @@ differenceLog = "difference.log"
multicastLog :: FilePath multicastLog :: FilePath
multicastLog = "multicast.log" multicastLog = "multicast.log"
exportLog :: FilePath
exportLog = "export.log"
{- The pathname of the location log file for a given key. -} {- The pathname of the location log file for a given key. -}
locationLogFile :: GitConfig -> Key -> String locationLogFile :: GitConfig -> Key -> String
locationLogFile config key = branchHashDir config key </> keyFile key ++ ".log" locationLogFile config key = branchHashDir config key </> keyFile key ++ ".log"

67
Logs/Export.hs Normal file
View file

@ -0,0 +1,67 @@
{- git-annex export log
-
- Copyright 2017 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Logs.Export where
import qualified Data.Map as M
import Annex.Common
import qualified Annex.Branch
import qualified Git
import Logs
import Logs.UUIDBased
import Annex.UUID
-- | Get the treeish that was exported to a special remote.
--
-- If the list contains multiple items, there was an export conflict,
-- and different trees were exported to the same special remote.
getExport :: UUID -> Annex [Git.Ref]
getExport remoteuuid = nub . mapMaybe get . M.elems . simpleMap
. parseLogNew parseExportLog
<$> Annex.Branch.get exportLog
where
get (ExportLog t u)
| u == remoteuuid = Just t
| otherwise = Nothing
data ExportChange = ExportChange
{ oldTreeish :: [Git.Ref]
, newTreeish :: Git.Ref
}
-- | Record a change in what's exported to a special remote.
--
-- Any entries in the log for the oldTreeish will be updated to the
-- newTreeish. This way, when multiple repositories are exporting to
-- the same special remote, there's no conflict as long as they move
-- forward in lock-step.
recordExport :: UUID -> ExportChange -> Annex ()
recordExport remoteuuid ec = do
c <- liftIO currentVectorClock
u <- getUUID
let val = ExportLog (newTreeish ec) remoteuuid
Annex.Branch.change exportLog $
showLogNew formatExportLog
. changeLog c u val
. M.mapWithKey (updateothers c u)
. parseLogNew parseExportLog
where
updateothers c u theiru le@(LogEntry _ (ExportLog t remoteuuid'))
| u == theiru || remoteuuid' /= remoteuuid || t `notElem` oldTreeish ec = le
| otherwise = LogEntry c (ExportLog (newTreeish ec) theiru)
data ExportLog = ExportLog Git.Ref UUID
formatExportLog :: ExportLog -> String
formatExportLog (ExportLog treeish remoteuuid) =
Git.fromRef treeish ++ " " ++ fromUUID remoteuuid
parseExportLog :: String -> Maybe ExportLog
parseExportLog s = case words s of
(t:u:[]) -> Just $ ExportLog (Git.Ref t) (toUUID u)
_ -> Nothing

View file

@ -185,8 +185,12 @@ content expression.
Tracks what trees have been exported to special remotes by Tracks what trees have been exported to special remotes by
[[git-annex-export]](1). [[git-annex-export]](1).
Each line starts with a timestamp, then the uuid of the special remote, Each line starts with a timestamp, then the uuid of the repository
followed by the sha1 of the tree that was exported to that special remote. that exported to the special remote, followed by the sha1 of the tree
that was exported, and then by the uuid of the special remote. For example:
1317929189.157237s e605dca6-446a-11e0-8b2a-002170d25c55 bb08b1abd207aeecccbc7060e523b011d80cb35b 26339d22-446b-11e0-9101-002170d25c55
1317923000.251111s e605dca6-446a-11e0-8b2a-002170d25c55 7c7af825782b7c8706039b855c72709993542be4 26339d22-446b-11e0-9101-002170d25c55
(The exported tree is also grafted into the git-annex branch, at (The exported tree is also grafted into the git-annex branch, at
`export.tree`, to prevent git from garbage collecting it. However, the head `export.tree`, to prevent git from garbage collecting it. However, the head

View file

@ -17,14 +17,10 @@ there need to be a new interface in supported remotes?
Work is in progress. Todo list: Work is in progress. Todo list:
* Remember the previously exported tree (in git-annex branch, see design)
and use to make next export more efficient.
* Only export to remotes that were initialized to support it. * Only export to remotes that were initialized to support it.
* Prevent using export remotes for key/value storage. * Prevent using export remotes for key/value storage.
* When exporting, update location tracking to allow getting from exports,
* Use retrieveExport when getting from export remotes. * Use retrieveExport when getting from export remotes.
* Efficient handling of renames. * Efficient handling of renames.
* Detect export conflicts (see design)
* Support export to aditional special remotes (S3 etc) * Support export to aditional special remotes (S3 etc)
* Support export to external special remotes. * Support export to external special remotes.
* If the same content is present in two different files, export * If the same content is present in two different files, export
@ -36,4 +32,4 @@ Work is in progress. Todo list:
And, once one of the files is uploaded, the location log will And, once one of the files is uploaded, the location log will
say the content is present, so the pass over the tree won't try to say the content is present, so the pass over the tree won't try to
upload the other file. upload the other file. (See design for a fix for this.)

View file

@ -849,6 +849,7 @@ Executable git-annex
Logs.Config Logs.Config
Logs.Difference Logs.Difference
Logs.Difference.Pure Logs.Difference.Pure
Logs.Export
Logs.FsckResults Logs.FsckResults
Logs.Group Logs.Group
Logs.Line Logs.Line