implement export.log and resolve export conflicts
Incremental export updates work now too. This commit was sponsored by Anthony DeRobertis on Patreon.
This commit is contained in:
parent
bb08b1abd2
commit
978885247e
6 changed files with 126 additions and 40 deletions
|
@ -11,14 +11,16 @@ import Command
|
|||
import qualified Git
|
||||
import qualified Git.DiffTree
|
||||
import qualified Git.LsTree
|
||||
import qualified Git.Ref
|
||||
import Git.Types
|
||||
import Git.Sha
|
||||
import Git.FilePath
|
||||
import Git.Sha
|
||||
import Types.Key
|
||||
import Types.Remote
|
||||
import Annex.Content
|
||||
import Annex.CatFile
|
||||
import Logs.Location
|
||||
import Logs.Export
|
||||
import Messages.Progress
|
||||
import Utility.Tmp
|
||||
|
||||
|
@ -67,45 +69,46 @@ exportKey sha = mk <$> catKey sha
|
|||
seek :: ExportOptions -> CommandSeek
|
||||
seek o = do
|
||||
r <- getParsed (exportRemote o)
|
||||
let oldtreeish = emptyTree -- XXX temporary
|
||||
new <- fromMaybe (error "unknown tree") <$>
|
||||
inRepo (Git.Ref.sha (exportTreeish o))
|
||||
old <- getExport (uuid r)
|
||||
|
||||
-- First, diff the old and new trees and update all changed
|
||||
-- files in the export.
|
||||
(diff, cleanup) <- inRepo $
|
||||
Git.DiffTree.diffTreeRecursive oldtreeish (exportTreeish o)
|
||||
seekActions $ pure $ map (startDiff r) diff
|
||||
void $ liftIO cleanup
|
||||
when (length old > 1) $
|
||||
warning "Export conflict detected. Different trees have been exported to the same special remote. Resolving.."
|
||||
|
||||
-- First, diff the old and new trees and delete all changed
|
||||
-- files in the export. Every file that remains in the export will
|
||||
-- have the content from the new treeish.
|
||||
--
|
||||
-- (Also, when there was an export conflict, this resolves it.)
|
||||
forM_ old $ \oldtreesha -> do
|
||||
(diff, cleanup) <- inRepo $
|
||||
Git.DiffTree.diffTreeRecursive oldtreesha new
|
||||
seekActions $ pure $ map (startUnexport r) diff
|
||||
void $ liftIO cleanup
|
||||
|
||||
-- In case a previous export was incomplete, make a pass
|
||||
-- over the whole tree and export anything that is not
|
||||
-- yet exported.
|
||||
(l, cleanup') <- inRepo $ Git.LsTree.lsTree (exportTreeish o)
|
||||
seekActions $ pure $ map (start r) l
|
||||
-- Waiting until now to record the export guarantees that,
|
||||
-- if this export is interrupted, there are no files left over
|
||||
-- from a previous export, that are not part of this export.
|
||||
recordExport (uuid r) $ ExportChange
|
||||
{ oldTreeish = old
|
||||
, newTreeish = new
|
||||
}
|
||||
|
||||
-- Export everything that is not yet exported.
|
||||
(l, cleanup') <- inRepo $ Git.LsTree.lsTree new
|
||||
seekActions $ pure $ map (startExport r) l
|
||||
void $ liftIO cleanup'
|
||||
|
||||
startDiff :: Remote -> Git.DiffTree.DiffTreeItem -> CommandStart
|
||||
startDiff r diff
|
||||
| Git.DiffTree.dstsha diff == nullSha = do
|
||||
showStart "unexport" f
|
||||
oldk <- exportKey (Git.DiffTree.srcsha diff)
|
||||
next $ performUnexport r oldk loc
|
||||
| otherwise = do
|
||||
showStart "export" f
|
||||
k <- exportKey (Git.DiffTree.dstsha diff)
|
||||
next $ performExport r k (Git.DiffTree.dstsha diff) loc
|
||||
where
|
||||
loc = ExportLocation $ toInternalGitPath $
|
||||
getTopFilePath $ Git.DiffTree.file diff
|
||||
f = getTopFilePath $ Git.DiffTree.file diff
|
||||
|
||||
start :: Remote -> Git.LsTree.TreeItem -> CommandStart
|
||||
start r ti = do
|
||||
startExport :: Remote -> Git.LsTree.TreeItem -> CommandStart
|
||||
startExport r ti = do
|
||||
ek <- exportKey (Git.LsTree.sha ti)
|
||||
stopUnless (elem (uuid r) <$> loggedLocations (asKey ek)) $
|
||||
stopUnless (notElem (uuid r) <$> loggedLocations (asKey ek)) $ do
|
||||
showStart "export" f
|
||||
next $ performExport r ek (Git.LsTree.sha ti) loc
|
||||
where
|
||||
loc = ExportLocation $ toInternalGitPath $
|
||||
getTopFilePath $ Git.LsTree.file ti
|
||||
loc = ExportLocation $ toInternalGitPath f
|
||||
f = getTopFilePath $ Git.LsTree.file ti
|
||||
|
||||
performExport :: Remote -> ExportKey -> Sha -> ExportLocation -> CommandPerform
|
||||
performExport r ek contentsha loc = case storeExport r of
|
||||
|
@ -137,6 +140,17 @@ cleanupExport r ek = do
|
|||
logChange (asKey ek) (uuid r) InfoPresent
|
||||
return True
|
||||
|
||||
startUnexport :: Remote -> Git.DiffTree.DiffTreeItem -> CommandStart
|
||||
startUnexport r diff
|
||||
| Git.DiffTree.srcsha diff /= nullSha = do
|
||||
showStart "unexport" f
|
||||
oldk <- exportKey (Git.DiffTree.srcsha diff)
|
||||
next $ performUnexport r oldk loc
|
||||
| otherwise = stop
|
||||
where
|
||||
loc = ExportLocation $ toInternalGitPath f
|
||||
f = getTopFilePath $ Git.DiffTree.file diff
|
||||
|
||||
performUnexport :: Remote -> ExportKey -> ExportLocation -> CommandPerform
|
||||
performUnexport r ek loc = case removeExport r of
|
||||
Nothing -> error "remote does not support removing exported files"
|
||||
|
|
4
Logs.hs
4
Logs.hs
|
@ -42,6 +42,7 @@ topLevelUUIDBasedLogs =
|
|||
, activityLog
|
||||
, differenceLog
|
||||
, multicastLog
|
||||
, exportLog
|
||||
]
|
||||
|
||||
{- All the ways to get a key from a presence log file -}
|
||||
|
@ -97,6 +98,9 @@ differenceLog = "difference.log"
|
|||
multicastLog :: FilePath
|
||||
multicastLog = "multicast.log"
|
||||
|
||||
exportLog :: FilePath
|
||||
exportLog = "export.log"
|
||||
|
||||
{- The pathname of the location log file for a given key. -}
|
||||
locationLogFile :: GitConfig -> Key -> String
|
||||
locationLogFile config key = branchHashDir config key </> keyFile key ++ ".log"
|
||||
|
|
67
Logs/Export.hs
Normal file
67
Logs/Export.hs
Normal file
|
@ -0,0 +1,67 @@
|
|||
{- git-annex export log
|
||||
-
|
||||
- Copyright 2017 Joey Hess <id@joeyh.name>
|
||||
-
|
||||
- Licensed under the GNU GPL version 3 or higher.
|
||||
-}
|
||||
|
||||
module Logs.Export where
|
||||
|
||||
import qualified Data.Map as M
|
||||
|
||||
import Annex.Common
|
||||
import qualified Annex.Branch
|
||||
import qualified Git
|
||||
import Logs
|
||||
import Logs.UUIDBased
|
||||
import Annex.UUID
|
||||
|
||||
-- | Get the treeish that was exported to a special remote.
|
||||
--
|
||||
-- If the list contains multiple items, there was an export conflict,
|
||||
-- and different trees were exported to the same special remote.
|
||||
getExport :: UUID -> Annex [Git.Ref]
|
||||
getExport remoteuuid = nub . mapMaybe get . M.elems . simpleMap
|
||||
. parseLogNew parseExportLog
|
||||
<$> Annex.Branch.get exportLog
|
||||
where
|
||||
get (ExportLog t u)
|
||||
| u == remoteuuid = Just t
|
||||
| otherwise = Nothing
|
||||
|
||||
data ExportChange = ExportChange
|
||||
{ oldTreeish :: [Git.Ref]
|
||||
, newTreeish :: Git.Ref
|
||||
}
|
||||
|
||||
-- | Record a change in what's exported to a special remote.
|
||||
--
|
||||
-- Any entries in the log for the oldTreeish will be updated to the
|
||||
-- newTreeish. This way, when multiple repositories are exporting to
|
||||
-- the same special remote, there's no conflict as long as they move
|
||||
-- forward in lock-step.
|
||||
recordExport :: UUID -> ExportChange -> Annex ()
|
||||
recordExport remoteuuid ec = do
|
||||
c <- liftIO currentVectorClock
|
||||
u <- getUUID
|
||||
let val = ExportLog (newTreeish ec) remoteuuid
|
||||
Annex.Branch.change exportLog $
|
||||
showLogNew formatExportLog
|
||||
. changeLog c u val
|
||||
. M.mapWithKey (updateothers c u)
|
||||
. parseLogNew parseExportLog
|
||||
where
|
||||
updateothers c u theiru le@(LogEntry _ (ExportLog t remoteuuid'))
|
||||
| u == theiru || remoteuuid' /= remoteuuid || t `notElem` oldTreeish ec = le
|
||||
| otherwise = LogEntry c (ExportLog (newTreeish ec) theiru)
|
||||
|
||||
data ExportLog = ExportLog Git.Ref UUID
|
||||
|
||||
formatExportLog :: ExportLog -> String
|
||||
formatExportLog (ExportLog treeish remoteuuid) =
|
||||
Git.fromRef treeish ++ " " ++ fromUUID remoteuuid
|
||||
|
||||
parseExportLog :: String -> Maybe ExportLog
|
||||
parseExportLog s = case words s of
|
||||
(t:u:[]) -> Just $ ExportLog (Git.Ref t) (toUUID u)
|
||||
_ -> Nothing
|
|
@ -185,8 +185,12 @@ content expression.
|
|||
Tracks what trees have been exported to special remotes by
|
||||
[[git-annex-export]](1).
|
||||
|
||||
Each line starts with a timestamp, then the uuid of the special remote,
|
||||
followed by the sha1 of the tree that was exported to that special remote.
|
||||
Each line starts with a timestamp, then the uuid of the repository
|
||||
that exported to the special remote, followed by the sha1 of the tree
|
||||
that was exported, and then by the uuid of the special remote. For example:
|
||||
|
||||
1317929189.157237s e605dca6-446a-11e0-8b2a-002170d25c55 bb08b1abd207aeecccbc7060e523b011d80cb35b 26339d22-446b-11e0-9101-002170d25c55
|
||||
1317923000.251111s e605dca6-446a-11e0-8b2a-002170d25c55 7c7af825782b7c8706039b855c72709993542be4 26339d22-446b-11e0-9101-002170d25c55
|
||||
|
||||
(The exported tree is also grafted into the git-annex branch, at
|
||||
`export.tree`, to prevent git from garbage collecting it. However, the head
|
||||
|
|
|
@ -17,14 +17,10 @@ there need to be a new interface in supported remotes?
|
|||
|
||||
Work is in progress. Todo list:
|
||||
|
||||
* Remember the previously exported tree (in git-annex branch, see design)
|
||||
and use to make next export more efficient.
|
||||
* Only export to remotes that were initialized to support it.
|
||||
* Prevent using export remotes for key/value storage.
|
||||
* When exporting, update location tracking to allow getting from exports,
|
||||
* Use retrieveExport when getting from export remotes.
|
||||
* Efficient handling of renames.
|
||||
* Detect export conflicts (see design)
|
||||
* Support export to aditional special remotes (S3 etc)
|
||||
* Support export to external special remotes.
|
||||
* If the same content is present in two different files, export
|
||||
|
@ -36,4 +32,4 @@ Work is in progress. Todo list:
|
|||
|
||||
And, once one of the files is uploaded, the location log will
|
||||
say the content is present, so the pass over the tree won't try to
|
||||
upload the other file.
|
||||
upload the other file. (See design for a fix for this.)
|
||||
|
|
|
@ -849,6 +849,7 @@ Executable git-annex
|
|||
Logs.Config
|
||||
Logs.Difference
|
||||
Logs.Difference.Pure
|
||||
Logs.Export
|
||||
Logs.FsckResults
|
||||
Logs.Group
|
||||
Logs.Line
|
||||
|
|
Loading…
Reference in a new issue