implement export.log and resolve export conflicts
Incremental export updates work now too. This commit was sponsored by Anthony DeRobertis on Patreon.
This commit is contained in:
parent
bb08b1abd2
commit
978885247e
6 changed files with 126 additions and 40 deletions
|
@ -11,14 +11,16 @@ import Command
|
||||||
import qualified Git
|
import qualified Git
|
||||||
import qualified Git.DiffTree
|
import qualified Git.DiffTree
|
||||||
import qualified Git.LsTree
|
import qualified Git.LsTree
|
||||||
|
import qualified Git.Ref
|
||||||
import Git.Types
|
import Git.Types
|
||||||
import Git.Sha
|
|
||||||
import Git.FilePath
|
import Git.FilePath
|
||||||
|
import Git.Sha
|
||||||
import Types.Key
|
import Types.Key
|
||||||
import Types.Remote
|
import Types.Remote
|
||||||
import Annex.Content
|
import Annex.Content
|
||||||
import Annex.CatFile
|
import Annex.CatFile
|
||||||
import Logs.Location
|
import Logs.Location
|
||||||
|
import Logs.Export
|
||||||
import Messages.Progress
|
import Messages.Progress
|
||||||
import Utility.Tmp
|
import Utility.Tmp
|
||||||
|
|
||||||
|
@ -67,45 +69,46 @@ exportKey sha = mk <$> catKey sha
|
||||||
seek :: ExportOptions -> CommandSeek
|
seek :: ExportOptions -> CommandSeek
|
||||||
seek o = do
|
seek o = do
|
||||||
r <- getParsed (exportRemote o)
|
r <- getParsed (exportRemote o)
|
||||||
let oldtreeish = emptyTree -- XXX temporary
|
new <- fromMaybe (error "unknown tree") <$>
|
||||||
|
inRepo (Git.Ref.sha (exportTreeish o))
|
||||||
|
old <- getExport (uuid r)
|
||||||
|
|
||||||
-- First, diff the old and new trees and update all changed
|
when (length old > 1) $
|
||||||
-- files in the export.
|
warning "Export conflict detected. Different trees have been exported to the same special remote. Resolving.."
|
||||||
|
|
||||||
|
-- First, diff the old and new trees and delete all changed
|
||||||
|
-- files in the export. Every file that remains in the export will
|
||||||
|
-- have the content from the new treeish.
|
||||||
|
--
|
||||||
|
-- (Also, when there was an export conflict, this resolves it.)
|
||||||
|
forM_ old $ \oldtreesha -> do
|
||||||
(diff, cleanup) <- inRepo $
|
(diff, cleanup) <- inRepo $
|
||||||
Git.DiffTree.diffTreeRecursive oldtreeish (exportTreeish o)
|
Git.DiffTree.diffTreeRecursive oldtreesha new
|
||||||
seekActions $ pure $ map (startDiff r) diff
|
seekActions $ pure $ map (startUnexport r) diff
|
||||||
void $ liftIO cleanup
|
void $ liftIO cleanup
|
||||||
|
|
||||||
-- In case a previous export was incomplete, make a pass
|
-- Waiting until now to record the export guarantees that,
|
||||||
-- over the whole tree and export anything that is not
|
-- if this export is interrupted, there are no files left over
|
||||||
-- yet exported.
|
-- from a previous export, that are not part of this export.
|
||||||
(l, cleanup') <- inRepo $ Git.LsTree.lsTree (exportTreeish o)
|
recordExport (uuid r) $ ExportChange
|
||||||
seekActions $ pure $ map (start r) l
|
{ oldTreeish = old
|
||||||
|
, newTreeish = new
|
||||||
|
}
|
||||||
|
|
||||||
|
-- Export everything that is not yet exported.
|
||||||
|
(l, cleanup') <- inRepo $ Git.LsTree.lsTree new
|
||||||
|
seekActions $ pure $ map (startExport r) l
|
||||||
void $ liftIO cleanup'
|
void $ liftIO cleanup'
|
||||||
|
|
||||||
startDiff :: Remote -> Git.DiffTree.DiffTreeItem -> CommandStart
|
startExport :: Remote -> Git.LsTree.TreeItem -> CommandStart
|
||||||
startDiff r diff
|
startExport r ti = do
|
||||||
| Git.DiffTree.dstsha diff == nullSha = do
|
|
||||||
showStart "unexport" f
|
|
||||||
oldk <- exportKey (Git.DiffTree.srcsha diff)
|
|
||||||
next $ performUnexport r oldk loc
|
|
||||||
| otherwise = do
|
|
||||||
showStart "export" f
|
|
||||||
k <- exportKey (Git.DiffTree.dstsha diff)
|
|
||||||
next $ performExport r k (Git.DiffTree.dstsha diff) loc
|
|
||||||
where
|
|
||||||
loc = ExportLocation $ toInternalGitPath $
|
|
||||||
getTopFilePath $ Git.DiffTree.file diff
|
|
||||||
f = getTopFilePath $ Git.DiffTree.file diff
|
|
||||||
|
|
||||||
start :: Remote -> Git.LsTree.TreeItem -> CommandStart
|
|
||||||
start r ti = do
|
|
||||||
ek <- exportKey (Git.LsTree.sha ti)
|
ek <- exportKey (Git.LsTree.sha ti)
|
||||||
stopUnless (elem (uuid r) <$> loggedLocations (asKey ek)) $
|
stopUnless (notElem (uuid r) <$> loggedLocations (asKey ek)) $ do
|
||||||
|
showStart "export" f
|
||||||
next $ performExport r ek (Git.LsTree.sha ti) loc
|
next $ performExport r ek (Git.LsTree.sha ti) loc
|
||||||
where
|
where
|
||||||
loc = ExportLocation $ toInternalGitPath $
|
loc = ExportLocation $ toInternalGitPath f
|
||||||
getTopFilePath $ Git.LsTree.file ti
|
f = getTopFilePath $ Git.LsTree.file ti
|
||||||
|
|
||||||
performExport :: Remote -> ExportKey -> Sha -> ExportLocation -> CommandPerform
|
performExport :: Remote -> ExportKey -> Sha -> ExportLocation -> CommandPerform
|
||||||
performExport r ek contentsha loc = case storeExport r of
|
performExport r ek contentsha loc = case storeExport r of
|
||||||
|
@ -137,6 +140,17 @@ cleanupExport r ek = do
|
||||||
logChange (asKey ek) (uuid r) InfoPresent
|
logChange (asKey ek) (uuid r) InfoPresent
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
startUnexport :: Remote -> Git.DiffTree.DiffTreeItem -> CommandStart
|
||||||
|
startUnexport r diff
|
||||||
|
| Git.DiffTree.srcsha diff /= nullSha = do
|
||||||
|
showStart "unexport" f
|
||||||
|
oldk <- exportKey (Git.DiffTree.srcsha diff)
|
||||||
|
next $ performUnexport r oldk loc
|
||||||
|
| otherwise = stop
|
||||||
|
where
|
||||||
|
loc = ExportLocation $ toInternalGitPath f
|
||||||
|
f = getTopFilePath $ Git.DiffTree.file diff
|
||||||
|
|
||||||
performUnexport :: Remote -> ExportKey -> ExportLocation -> CommandPerform
|
performUnexport :: Remote -> ExportKey -> ExportLocation -> CommandPerform
|
||||||
performUnexport r ek loc = case removeExport r of
|
performUnexport r ek loc = case removeExport r of
|
||||||
Nothing -> error "remote does not support removing exported files"
|
Nothing -> error "remote does not support removing exported files"
|
||||||
|
|
4
Logs.hs
4
Logs.hs
|
@ -42,6 +42,7 @@ topLevelUUIDBasedLogs =
|
||||||
, activityLog
|
, activityLog
|
||||||
, differenceLog
|
, differenceLog
|
||||||
, multicastLog
|
, multicastLog
|
||||||
|
, exportLog
|
||||||
]
|
]
|
||||||
|
|
||||||
{- All the ways to get a key from a presence log file -}
|
{- All the ways to get a key from a presence log file -}
|
||||||
|
@ -97,6 +98,9 @@ differenceLog = "difference.log"
|
||||||
multicastLog :: FilePath
|
multicastLog :: FilePath
|
||||||
multicastLog = "multicast.log"
|
multicastLog = "multicast.log"
|
||||||
|
|
||||||
|
exportLog :: FilePath
|
||||||
|
exportLog = "export.log"
|
||||||
|
|
||||||
{- The pathname of the location log file for a given key. -}
|
{- The pathname of the location log file for a given key. -}
|
||||||
locationLogFile :: GitConfig -> Key -> String
|
locationLogFile :: GitConfig -> Key -> String
|
||||||
locationLogFile config key = branchHashDir config key </> keyFile key ++ ".log"
|
locationLogFile config key = branchHashDir config key </> keyFile key ++ ".log"
|
||||||
|
|
67
Logs/Export.hs
Normal file
67
Logs/Export.hs
Normal file
|
@ -0,0 +1,67 @@
|
||||||
|
{- git-annex export log
|
||||||
|
-
|
||||||
|
- Copyright 2017 Joey Hess <id@joeyh.name>
|
||||||
|
-
|
||||||
|
- Licensed under the GNU GPL version 3 or higher.
|
||||||
|
-}
|
||||||
|
|
||||||
|
module Logs.Export where
|
||||||
|
|
||||||
|
import qualified Data.Map as M
|
||||||
|
|
||||||
|
import Annex.Common
|
||||||
|
import qualified Annex.Branch
|
||||||
|
import qualified Git
|
||||||
|
import Logs
|
||||||
|
import Logs.UUIDBased
|
||||||
|
import Annex.UUID
|
||||||
|
|
||||||
|
-- | Get the treeish that was exported to a special remote.
|
||||||
|
--
|
||||||
|
-- If the list contains multiple items, there was an export conflict,
|
||||||
|
-- and different trees were exported to the same special remote.
|
||||||
|
getExport :: UUID -> Annex [Git.Ref]
|
||||||
|
getExport remoteuuid = nub . mapMaybe get . M.elems . simpleMap
|
||||||
|
. parseLogNew parseExportLog
|
||||||
|
<$> Annex.Branch.get exportLog
|
||||||
|
where
|
||||||
|
get (ExportLog t u)
|
||||||
|
| u == remoteuuid = Just t
|
||||||
|
| otherwise = Nothing
|
||||||
|
|
||||||
|
data ExportChange = ExportChange
|
||||||
|
{ oldTreeish :: [Git.Ref]
|
||||||
|
, newTreeish :: Git.Ref
|
||||||
|
}
|
||||||
|
|
||||||
|
-- | Record a change in what's exported to a special remote.
|
||||||
|
--
|
||||||
|
-- Any entries in the log for the oldTreeish will be updated to the
|
||||||
|
-- newTreeish. This way, when multiple repositories are exporting to
|
||||||
|
-- the same special remote, there's no conflict as long as they move
|
||||||
|
-- forward in lock-step.
|
||||||
|
recordExport :: UUID -> ExportChange -> Annex ()
|
||||||
|
recordExport remoteuuid ec = do
|
||||||
|
c <- liftIO currentVectorClock
|
||||||
|
u <- getUUID
|
||||||
|
let val = ExportLog (newTreeish ec) remoteuuid
|
||||||
|
Annex.Branch.change exportLog $
|
||||||
|
showLogNew formatExportLog
|
||||||
|
. changeLog c u val
|
||||||
|
. M.mapWithKey (updateothers c u)
|
||||||
|
. parseLogNew parseExportLog
|
||||||
|
where
|
||||||
|
updateothers c u theiru le@(LogEntry _ (ExportLog t remoteuuid'))
|
||||||
|
| u == theiru || remoteuuid' /= remoteuuid || t `notElem` oldTreeish ec = le
|
||||||
|
| otherwise = LogEntry c (ExportLog (newTreeish ec) theiru)
|
||||||
|
|
||||||
|
data ExportLog = ExportLog Git.Ref UUID
|
||||||
|
|
||||||
|
formatExportLog :: ExportLog -> String
|
||||||
|
formatExportLog (ExportLog treeish remoteuuid) =
|
||||||
|
Git.fromRef treeish ++ " " ++ fromUUID remoteuuid
|
||||||
|
|
||||||
|
parseExportLog :: String -> Maybe ExportLog
|
||||||
|
parseExportLog s = case words s of
|
||||||
|
(t:u:[]) -> Just $ ExportLog (Git.Ref t) (toUUID u)
|
||||||
|
_ -> Nothing
|
|
@ -185,8 +185,12 @@ content expression.
|
||||||
Tracks what trees have been exported to special remotes by
|
Tracks what trees have been exported to special remotes by
|
||||||
[[git-annex-export]](1).
|
[[git-annex-export]](1).
|
||||||
|
|
||||||
Each line starts with a timestamp, then the uuid of the special remote,
|
Each line starts with a timestamp, then the uuid of the repository
|
||||||
followed by the sha1 of the tree that was exported to that special remote.
|
that exported to the special remote, followed by the sha1 of the tree
|
||||||
|
that was exported, and then by the uuid of the special remote. For example:
|
||||||
|
|
||||||
|
1317929189.157237s e605dca6-446a-11e0-8b2a-002170d25c55 bb08b1abd207aeecccbc7060e523b011d80cb35b 26339d22-446b-11e0-9101-002170d25c55
|
||||||
|
1317923000.251111s e605dca6-446a-11e0-8b2a-002170d25c55 7c7af825782b7c8706039b855c72709993542be4 26339d22-446b-11e0-9101-002170d25c55
|
||||||
|
|
||||||
(The exported tree is also grafted into the git-annex branch, at
|
(The exported tree is also grafted into the git-annex branch, at
|
||||||
`export.tree`, to prevent git from garbage collecting it. However, the head
|
`export.tree`, to prevent git from garbage collecting it. However, the head
|
||||||
|
|
|
@ -17,14 +17,10 @@ there need to be a new interface in supported remotes?
|
||||||
|
|
||||||
Work is in progress. Todo list:
|
Work is in progress. Todo list:
|
||||||
|
|
||||||
* Remember the previously exported tree (in git-annex branch, see design)
|
|
||||||
and use to make next export more efficient.
|
|
||||||
* Only export to remotes that were initialized to support it.
|
* Only export to remotes that were initialized to support it.
|
||||||
* Prevent using export remotes for key/value storage.
|
* Prevent using export remotes for key/value storage.
|
||||||
* When exporting, update location tracking to allow getting from exports,
|
|
||||||
* Use retrieveExport when getting from export remotes.
|
* Use retrieveExport when getting from export remotes.
|
||||||
* Efficient handling of renames.
|
* Efficient handling of renames.
|
||||||
* Detect export conflicts (see design)
|
|
||||||
* Support export to aditional special remotes (S3 etc)
|
* Support export to aditional special remotes (S3 etc)
|
||||||
* Support export to external special remotes.
|
* Support export to external special remotes.
|
||||||
* If the same content is present in two different files, export
|
* If the same content is present in two different files, export
|
||||||
|
@ -36,4 +32,4 @@ Work is in progress. Todo list:
|
||||||
|
|
||||||
And, once one of the files is uploaded, the location log will
|
And, once one of the files is uploaded, the location log will
|
||||||
say the content is present, so the pass over the tree won't try to
|
say the content is present, so the pass over the tree won't try to
|
||||||
upload the other file.
|
upload the other file. (See design for a fix for this.)
|
||||||
|
|
|
@ -849,6 +849,7 @@ Executable git-annex
|
||||||
Logs.Config
|
Logs.Config
|
||||||
Logs.Difference
|
Logs.Difference
|
||||||
Logs.Difference.Pure
|
Logs.Difference.Pure
|
||||||
|
Logs.Export
|
||||||
Logs.FsckResults
|
Logs.FsckResults
|
||||||
Logs.Group
|
Logs.Group
|
||||||
Logs.Line
|
Logs.Line
|
||||||
|
|
Loading…
Reference in a new issue