implement export.log and resolve export conflicts

Incremental export updates work now too.

This commit was sponsored by Anthony DeRobertis on Patreon.
This commit is contained in:
Joey Hess 2017-08-31 15:41:48 -04:00
parent bb08b1abd2
commit 978885247e
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
6 changed files with 126 additions and 40 deletions

View file

@ -11,14 +11,16 @@ import Command
import qualified Git
import qualified Git.DiffTree
import qualified Git.LsTree
import qualified Git.Ref
import Git.Types
import Git.Sha
import Git.FilePath
import Git.Sha
import Types.Key
import Types.Remote
import Annex.Content
import Annex.CatFile
import Logs.Location
import Logs.Export
import Messages.Progress
import Utility.Tmp
@ -67,45 +69,46 @@ exportKey sha = mk <$> catKey sha
seek :: ExportOptions -> CommandSeek
seek o = do
r <- getParsed (exportRemote o)
let oldtreeish = emptyTree -- XXX temporary
new <- fromMaybe (error "unknown tree") <$>
inRepo (Git.Ref.sha (exportTreeish o))
old <- getExport (uuid r)
-- First, diff the old and new trees and update all changed
-- files in the export.
(diff, cleanup) <- inRepo $
Git.DiffTree.diffTreeRecursive oldtreeish (exportTreeish o)
seekActions $ pure $ map (startDiff r) diff
void $ liftIO cleanup
when (length old > 1) $
warning "Export conflict detected. Different trees have been exported to the same special remote. Resolving.."
-- First, diff the old and new trees and delete all changed
-- files in the export. Every file that remains in the export will
-- have the content from the new treeish.
--
-- (Also, when there was an export conflict, this resolves it.)
forM_ old $ \oldtreesha -> do
(diff, cleanup) <- inRepo $
Git.DiffTree.diffTreeRecursive oldtreesha new
seekActions $ pure $ map (startUnexport r) diff
void $ liftIO cleanup
-- In case a previous export was incomplete, make a pass
-- over the whole tree and export anything that is not
-- yet exported.
(l, cleanup') <- inRepo $ Git.LsTree.lsTree (exportTreeish o)
seekActions $ pure $ map (start r) l
-- Waiting until now to record the export guarantees that,
-- if this export is interrupted, there are no files left over
-- from a previous export, that are not part of this export.
recordExport (uuid r) $ ExportChange
{ oldTreeish = old
, newTreeish = new
}
-- Export everything that is not yet exported.
(l, cleanup') <- inRepo $ Git.LsTree.lsTree new
seekActions $ pure $ map (startExport r) l
void $ liftIO cleanup'
startDiff :: Remote -> Git.DiffTree.DiffTreeItem -> CommandStart
startDiff r diff
| Git.DiffTree.dstsha diff == nullSha = do
showStart "unexport" f
oldk <- exportKey (Git.DiffTree.srcsha diff)
next $ performUnexport r oldk loc
| otherwise = do
showStart "export" f
k <- exportKey (Git.DiffTree.dstsha diff)
next $ performExport r k (Git.DiffTree.dstsha diff) loc
where
loc = ExportLocation $ toInternalGitPath $
getTopFilePath $ Git.DiffTree.file diff
f = getTopFilePath $ Git.DiffTree.file diff
start :: Remote -> Git.LsTree.TreeItem -> CommandStart
start r ti = do
startExport :: Remote -> Git.LsTree.TreeItem -> CommandStart
startExport r ti = do
ek <- exportKey (Git.LsTree.sha ti)
stopUnless (elem (uuid r) <$> loggedLocations (asKey ek)) $
stopUnless (notElem (uuid r) <$> loggedLocations (asKey ek)) $ do
showStart "export" f
next $ performExport r ek (Git.LsTree.sha ti) loc
where
loc = ExportLocation $ toInternalGitPath $
getTopFilePath $ Git.LsTree.file ti
loc = ExportLocation $ toInternalGitPath f
f = getTopFilePath $ Git.LsTree.file ti
performExport :: Remote -> ExportKey -> Sha -> ExportLocation -> CommandPerform
performExport r ek contentsha loc = case storeExport r of
@ -137,6 +140,17 @@ cleanupExport r ek = do
logChange (asKey ek) (uuid r) InfoPresent
return True
startUnexport :: Remote -> Git.DiffTree.DiffTreeItem -> CommandStart
startUnexport r diff
| Git.DiffTree.srcsha diff /= nullSha = do
showStart "unexport" f
oldk <- exportKey (Git.DiffTree.srcsha diff)
next $ performUnexport r oldk loc
| otherwise = stop
where
loc = ExportLocation $ toInternalGitPath f
f = getTopFilePath $ Git.DiffTree.file diff
performUnexport :: Remote -> ExportKey -> ExportLocation -> CommandPerform
performUnexport r ek loc = case removeExport r of
Nothing -> error "remote does not support removing exported files"

View file

@ -42,6 +42,7 @@ topLevelUUIDBasedLogs =
, activityLog
, differenceLog
, multicastLog
, exportLog
]
{- All the ways to get a key from a presence log file -}
@ -97,6 +98,9 @@ differenceLog = "difference.log"
multicastLog :: FilePath
multicastLog = "multicast.log"
exportLog :: FilePath
exportLog = "export.log"
{- The pathname of the location log file for a given key. -}
locationLogFile :: GitConfig -> Key -> String
locationLogFile config key = branchHashDir config key </> keyFile key ++ ".log"

67
Logs/Export.hs Normal file
View file

@ -0,0 +1,67 @@
{- git-annex export log
-
- Copyright 2017 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU GPL version 3 or higher.
-}
module Logs.Export where
import qualified Data.Map as M
import Annex.Common
import qualified Annex.Branch
import qualified Git
import Logs
import Logs.UUIDBased
import Annex.UUID
-- | Get the treeish that was exported to a special remote.
--
-- If the list contains multiple items, there was an export conflict,
-- and different trees were exported to the same special remote.
getExport :: UUID -> Annex [Git.Ref]
getExport remoteuuid = nub . mapMaybe get . M.elems . simpleMap
. parseLogNew parseExportLog
<$> Annex.Branch.get exportLog
where
get (ExportLog t u)
| u == remoteuuid = Just t
| otherwise = Nothing
data ExportChange = ExportChange
{ oldTreeish :: [Git.Ref]
, newTreeish :: Git.Ref
}
-- | Record a change in what's exported to a special remote.
--
-- Any entries in the log for the oldTreeish will be updated to the
-- newTreeish. This way, when multiple repositories are exporting to
-- the same special remote, there's no conflict as long as they move
-- forward in lock-step.
recordExport :: UUID -> ExportChange -> Annex ()
recordExport remoteuuid ec = do
c <- liftIO currentVectorClock
u <- getUUID
let val = ExportLog (newTreeish ec) remoteuuid
Annex.Branch.change exportLog $
showLogNew formatExportLog
. changeLog c u val
. M.mapWithKey (updateothers c u)
. parseLogNew parseExportLog
where
updateothers c u theiru le@(LogEntry _ (ExportLog t remoteuuid'))
| u == theiru || remoteuuid' /= remoteuuid || t `notElem` oldTreeish ec = le
| otherwise = LogEntry c (ExportLog (newTreeish ec) theiru)
data ExportLog = ExportLog Git.Ref UUID
formatExportLog :: ExportLog -> String
formatExportLog (ExportLog treeish remoteuuid) =
Git.fromRef treeish ++ " " ++ fromUUID remoteuuid
parseExportLog :: String -> Maybe ExportLog
parseExportLog s = case words s of
(t:u:[]) -> Just $ ExportLog (Git.Ref t) (toUUID u)
_ -> Nothing

View file

@ -185,8 +185,12 @@ content expression.
Tracks what trees have been exported to special remotes by
[[git-annex-export]](1).
Each line starts with a timestamp, then the uuid of the special remote,
followed by the sha1 of the tree that was exported to that special remote.
Each line starts with a timestamp, then the uuid of the repository
that exported to the special remote, followed by the sha1 of the tree
that was exported, and then by the uuid of the special remote. For example:
1317929189.157237s e605dca6-446a-11e0-8b2a-002170d25c55 bb08b1abd207aeecccbc7060e523b011d80cb35b 26339d22-446b-11e0-9101-002170d25c55
1317923000.251111s e605dca6-446a-11e0-8b2a-002170d25c55 7c7af825782b7c8706039b855c72709993542be4 26339d22-446b-11e0-9101-002170d25c55
(The exported tree is also grafted into the git-annex branch, at
`export.tree`, to prevent git from garbage collecting it. However, the head

View file

@ -17,14 +17,10 @@ there need to be a new interface in supported remotes?
Work is in progress. Todo list:
* Remember the previously exported tree (in git-annex branch, see design)
and use to make next export more efficient.
* Only export to remotes that were initialized to support it.
* Prevent using export remotes for key/value storage.
* When exporting, update location tracking to allow getting from exports,
* Use retrieveExport when getting from export remotes.
* Efficient handling of renames.
* Detect export conflicts (see design)
* Support export to aditional special remotes (S3 etc)
* Support export to external special remotes.
* If the same content is present in two different files, export
@ -36,4 +32,4 @@ Work is in progress. Todo list:
And, once one of the files is uploaded, the location log will
say the content is present, so the pass over the tree won't try to
upload the other file.
upload the other file. (See design for a fix for this.)

View file

@ -849,6 +849,7 @@ Executable git-annex
Logs.Config
Logs.Difference
Logs.Difference.Pure
Logs.Export
Logs.FsckResults
Logs.Group
Logs.Line