add back non-preferred files to imported tree
Prevents merging the import from deleting the non-preferred files from the branch it's merged into. adjustTree previously appended the new list of items to the old, which could result in it generating a tree with multiple files with the same name. That is not good and confuses some parts of git. Gave it a function to resolve such conflicts. That allowed dealing with the problem of what happens when the import contains some files (or subtrees) with the same name as files that were filtered out of the export. The files from the import win.
This commit is contained in:
parent
7d177b78e4
commit
97fd9da6e7
9 changed files with 185 additions and 47 deletions
|
@ -265,7 +265,12 @@ adjustCommit adj basis = do
|
|||
adjustTree :: Adjustment -> BasisBranch -> Annex Sha
|
||||
adjustTree adj (BasisBranch basis) = do
|
||||
let toadj = adjustTreeItem adj
|
||||
treesha <- Git.Tree.adjustTree toadj [] [] basis =<< Annex.gitRepo
|
||||
treesha <- Git.Tree.adjustTree
|
||||
toadj
|
||||
[]
|
||||
(\_old new -> new)
|
||||
[]
|
||||
basis =<< Annex.gitRepo
|
||||
return treesha
|
||||
|
||||
type CommitsPrevented = Git.LockFile.LockHandle
|
||||
|
@ -544,6 +549,7 @@ reverseAdjustedTree basis adj csha = do
|
|||
treesha <- Git.Tree.adjustTree
|
||||
(propchanges changes)
|
||||
adds'
|
||||
(\_old new -> new)
|
||||
(map Git.DiffTree.file removes)
|
||||
basis
|
||||
=<< Annex.gitRepo
|
||||
|
|
|
@ -103,7 +103,7 @@ buildImportCommit remote importtreeconfig importcommitconfig importable =
|
|||
go trackingcommit = do
|
||||
imported@(History finaltree _) <-
|
||||
buildImportTrees basetree subdir importable
|
||||
buildImportCommit' importcommitconfig trackingcommit imported >>= \case
|
||||
buildImportCommit' remote importcommitconfig trackingcommit imported >>= \case
|
||||
Just finalcommit -> do
|
||||
updatestate finaltree
|
||||
return (Just finalcommit)
|
||||
|
@ -160,8 +160,8 @@ buildImportCommit remote importtreeconfig importcommitconfig importable =
|
|||
Export.runExportDiffUpdater updater db oldtree finaltree
|
||||
Export.closeDb db
|
||||
|
||||
buildImportCommit' :: ImportCommitConfig -> Maybe Sha -> History Sha -> Annex (Maybe Sha)
|
||||
buildImportCommit' importcommitconfig mtrackingcommit imported@(History ti _) =
|
||||
buildImportCommit' :: Remote -> ImportCommitConfig -> Maybe Sha -> History Sha -> Annex (Maybe Sha)
|
||||
buildImportCommit' remote importcommitconfig mtrackingcommit imported@(History ti _) =
|
||||
case mtrackingcommit of
|
||||
Nothing -> Just <$> mkcommits imported
|
||||
Just trackingcommit -> do
|
||||
|
@ -176,7 +176,6 @@ buildImportCommit' importcommitconfig mtrackingcommit imported@(History ti _) =
|
|||
go _ Nothing = Just <$> mkcommits imported
|
||||
go trackingcommit (Just h)
|
||||
-- If the tracking branch head is a merge commit
|
||||
-- with a tree that matches the head of the history,
|
||||
-- and one side of the merge matches the history,
|
||||
-- nothing new needs to be committed.
|
||||
| t == ti && any (sametodepth imported) (S.toList s) = return Nothing
|
||||
|
@ -191,8 +190,9 @@ buildImportCommit' importcommitconfig mtrackingcommit imported@(History ti _) =
|
|||
Just oldimported@(History oldhc _) -> do
|
||||
let oldimportedtrees = mapHistory historyCommitTree oldimported
|
||||
mknewcommits oldhc oldimportedtrees imported
|
||||
ti' <- addBackNonPreferredContent remote ti
|
||||
Just <$> makeRemoteTrackingBranchMergeCommit'
|
||||
trackingcommit importedcommit ti
|
||||
trackingcommit importedcommit ti'
|
||||
where
|
||||
h'@(History t s) = mapHistory historyCommitTree h
|
||||
|
||||
|
@ -380,3 +380,30 @@ importKey (ContentIdentifier cid) size = stubKey
|
|||
, keyVariety = OtherKey "CID"
|
||||
, keySize = Just size
|
||||
}
|
||||
|
||||
{-- Export omits non-preferred content from the tree stored on the
|
||||
-- remote. So the import will normally have that content
|
||||
-- omitted (unless something else added files with the same names to the
|
||||
-- special remote).
|
||||
--
|
||||
-- That presents a problem: Merging the imported tree would result
|
||||
-- in deletion of the non-preferred content. To avoid that happening,
|
||||
-- this adds the non-preferred content back to the imported tree.
|
||||
--}
|
||||
addBackNonPreferredContent :: Remote -> Sha -> Annex Sha
|
||||
addBackNonPreferredContent remote importtree =
|
||||
getExportExcluded (Remote.uuid remote) >>= \case
|
||||
[] -> return importtree
|
||||
-- TODO: does this overwrite newly imported files
|
||||
-- with excluded files? CHECK
|
||||
excludedlist -> inRepo $
|
||||
adjustTree
|
||||
-- don't remove any
|
||||
(pure . Just)
|
||||
excludedlist
|
||||
-- if something was imported with the same
|
||||
-- name as a file that was previously
|
||||
-- excluded from import, use what was imported
|
||||
(\imported _excluded -> imported)
|
||||
[]
|
||||
importtree
|
||||
|
|
|
@ -50,6 +50,7 @@ module Annex.Locations (
|
|||
gitAnnexExportDbDir,
|
||||
gitAnnexExportLock,
|
||||
gitAnnexExportUpdateLock,
|
||||
gitAnnexExportExcludeLog,
|
||||
gitAnnexContentIdentifierDbDir,
|
||||
gitAnnexContentIdentifierLock,
|
||||
gitAnnexScheduleState,
|
||||
|
@ -361,6 +362,11 @@ gitAnnexExportLock u r = gitAnnexExportDbDir u r ++ ".lck"
|
|||
gitAnnexExportUpdateLock :: UUID -> Git.Repo -> FilePath
|
||||
gitAnnexExportUpdateLock u r = gitAnnexExportDbDir u r ++ ".upl"
|
||||
|
||||
{- Log file used to keep track of files that were in the tree exported to a
|
||||
- remote, but were excluded by its preferred content settings. -}
|
||||
gitAnnexExportExcludeLog :: UUID -> Git.Repo -> FilePath
|
||||
gitAnnexExportExcludeLog u r = gitAnnexDir r </> "export.ex" </> fromUUID u
|
||||
|
||||
{- Directory containing database used to record remote content ids.
|
||||
-
|
||||
- (This used to be "cid", but a problem with the database caused it to
|
||||
|
|
|
@ -442,19 +442,29 @@ removeEmptyDirectories r db loc ks
|
|||
-- expression.
|
||||
newtype PreferredFiltered t = PreferredFiltered t
|
||||
|
||||
-- | Filters the tree to files that are preferred content of the remote.
|
||||
--
|
||||
-- A log is written with files that were filtered out, so they can be added
|
||||
-- back in when importing from the remote.
|
||||
filterPreferredContent :: Remote -> Git.Ref -> Annex (PreferredFiltered Git.Ref)
|
||||
filterPreferredContent r tree = do
|
||||
filterPreferredContent r tree = logExportExcluded (uuid r) $ \logwriter -> do
|
||||
m <- preferredContentMap
|
||||
case M.lookup (uuid r) m of
|
||||
Just matcher | not (isEmpty matcher) ->
|
||||
PreferredFiltered <$> go matcher
|
||||
Just matcher | not (isEmpty matcher) -> do
|
||||
PreferredFiltered <$> go matcher logwriter
|
||||
_ -> return (PreferredFiltered tree)
|
||||
where
|
||||
go matcher = do
|
||||
go matcher logwriter = do
|
||||
g <- Annex.gitRepo
|
||||
Git.Tree.adjustTree (checkmatcher matcher) [] [] tree g
|
||||
Git.Tree.adjustTree
|
||||
(checkmatcher matcher logwriter)
|
||||
[]
|
||||
(\_old new -> new)
|
||||
[]
|
||||
tree
|
||||
g
|
||||
|
||||
checkmatcher matcher ti@(Git.Tree.TreeItem topf _ sha) =
|
||||
checkmatcher matcher logwriter ti@(Git.Tree.TreeItem topf _ sha) =
|
||||
catKey sha >>= \case
|
||||
Just k -> do
|
||||
-- Match filename relative to the
|
||||
|
@ -464,7 +474,9 @@ filterPreferredContent r tree = do
|
|||
let mi = MatchingKey k af
|
||||
ifM (checkMatcher' matcher mi mempty)
|
||||
( return (Just ti)
|
||||
, return Nothing
|
||||
, do
|
||||
() <- liftIO $ logwriter ti
|
||||
return Nothing
|
||||
)
|
||||
-- Always export non-annexed files.
|
||||
Nothing -> return (Just ti)
|
||||
|
|
|
@ -15,6 +15,7 @@ module Git.LsTree (
|
|||
lsTreeParams,
|
||||
lsTreeFiles,
|
||||
parseLsTree,
|
||||
formatLsTree,
|
||||
) where
|
||||
|
||||
import Common
|
||||
|
@ -90,3 +91,12 @@ parseLsTree l = TreeItem
|
|||
!f = drop 1 past_s
|
||||
!smode = fst $ Prelude.head $ readOct m
|
||||
!sfile = asTopFilePath $ Git.Filename.decode f
|
||||
|
||||
{- Inverse of parseLsTree -}
|
||||
formatLsTree :: TreeItem -> String
|
||||
formatLsTree ti = unwords
|
||||
[ showOct (mode ti) ""
|
||||
, typeobj ti
|
||||
, fromRef (sha ti)
|
||||
, getTopFilePath (file ti)
|
||||
]
|
||||
|
|
40
Git/Tree.hs
40
Git/Tree.hs
|
@ -15,6 +15,8 @@ module Git.Tree (
|
|||
recordTree',
|
||||
TreeItem(..),
|
||||
treeItemsToTree,
|
||||
treeItemToLsTreeItem,
|
||||
lsTreeItemToTreeItem,
|
||||
adjustTree,
|
||||
graftTree,
|
||||
graftTree',
|
||||
|
@ -127,6 +129,20 @@ data TreeItem = TreeItem TopFilePath FileMode Sha
|
|||
treeItemToTreeContent :: TreeItem -> TreeContent
|
||||
treeItemToTreeContent (TreeItem f m s) = TreeBlob f m s
|
||||
|
||||
treeItemToLsTreeItem :: TreeItem -> LsTree.TreeItem
|
||||
treeItemToLsTreeItem (TreeItem f mode sha) = LsTree.TreeItem
|
||||
{ LsTree.mode = mode
|
||||
, LsTree.typeobj = show BlobObject
|
||||
, LsTree.sha = sha
|
||||
, LsTree.file = f
|
||||
}
|
||||
|
||||
lsTreeItemToTreeItem :: LsTree.TreeItem -> TreeItem
|
||||
lsTreeItemToTreeItem ti = TreeItem
|
||||
(LsTree.file ti)
|
||||
(LsTree.mode ti)
|
||||
(LsTree.sha ti)
|
||||
|
||||
treeItemsToTree :: [TreeItem] -> Tree
|
||||
treeItemsToTree = go M.empty
|
||||
where
|
||||
|
@ -179,12 +195,16 @@ adjustTree
|
|||
-- Cannot move the item to a different tree.
|
||||
-> [TreeItem]
|
||||
-- ^ New items to add to the tree.
|
||||
-> (TreeContent -> TreeContent -> TreeContent)
|
||||
-- ^ When adding a new item to the tree and an item with the same
|
||||
-- name already exists, this function picks which to use.
|
||||
-- The first one is the item that was already in the tree.
|
||||
-> [TopFilePath]
|
||||
-- ^ Files to remove from the tree.
|
||||
-> Ref
|
||||
-> Repo
|
||||
-> m Sha
|
||||
adjustTree adjusttreeitem addtreeitems removefiles r repo =
|
||||
adjustTree adjusttreeitem addtreeitems resolveaddconflict removefiles r repo =
|
||||
withMkTreeHandle repo $ \h -> do
|
||||
(l, cleanup) <- liftIO $ lsTreeWithObjects LsTree.LsTreeRecursive r repo
|
||||
(l', _, _) <- go h False [] 1 inTopTree l
|
||||
|
@ -223,17 +243,31 @@ adjustTree adjusttreeitem addtreeitems removefiles r repo =
|
|||
adjustlist h depth ishere underhere l = do
|
||||
let (addhere, rest) = partition ishere addtreeitems
|
||||
let l' = filter (not . removed) $
|
||||
map treeItemToTreeContent addhere ++ l
|
||||
addoldnew l (map treeItemToTreeContent addhere)
|
||||
let inl i = any (\t -> beneathSubTree t i) l'
|
||||
let (Tree addunderhere) = flattenTree depth $ treeItemsToTree $
|
||||
filter (\i -> underhere i && not (inl i)) rest
|
||||
addunderhere' <- liftIO $ mapM (recordSubTree h) addunderhere
|
||||
return (addunderhere'++l')
|
||||
return (addoldnew l' addunderhere')
|
||||
|
||||
removeset = S.fromList $ map (normalise . gitPath) removefiles
|
||||
removed (TreeBlob f _ _) = S.member (normalise (gitPath f)) removeset
|
||||
removed _ = False
|
||||
|
||||
addoldnew [] new = new
|
||||
addoldnew old [] = old
|
||||
addoldnew old new = addoldnew' (M.fromList $ map (\i -> (mkk i, i)) old) new
|
||||
addoldnew' oldm (n:ns) =
|
||||
let k = mkk n
|
||||
in case M.lookup k oldm of
|
||||
Just o ->
|
||||
resolveaddconflict o n
|
||||
:
|
||||
addoldnew' (M.delete k oldm) ns
|
||||
Nothing -> n : addoldnew' oldm ns
|
||||
addoldnew' oldm [] = M.elems oldm
|
||||
mkk = normalise . gitPath
|
||||
|
||||
{- Grafts subtree into the basetree at the specified location, replacing
|
||||
- anything that the basetree already had at that location.
|
||||
-
|
||||
|
|
|
@ -15,6 +15,8 @@ module Logs.Export (
|
|||
incompleteExportedTreeishes,
|
||||
recordExport,
|
||||
recordExportBeginning,
|
||||
logExportExcluded,
|
||||
getExportExcluded,
|
||||
) where
|
||||
|
||||
import qualified Data.Map as M
|
||||
|
@ -26,6 +28,9 @@ import Git.Sha
|
|||
import Git.FilePath
|
||||
import Logs
|
||||
import Logs.MapLog
|
||||
import Logs.File
|
||||
import qualified Git.LsTree
|
||||
import qualified Git.Tree
|
||||
import Annex.UUID
|
||||
|
||||
import qualified Data.ByteString.Lazy as L
|
||||
|
@ -156,3 +161,23 @@ exportedParser = Exported <$> refparser <*> many refparser
|
|||
where
|
||||
refparser = (Git.Ref . decodeBS <$> A8.takeWhile1 (/= ' ') )
|
||||
<* ((const () <$> A8.char ' ') <|> A.endOfInput)
|
||||
|
||||
logExportExcluded :: UUID -> ((Git.Tree.TreeItem -> IO ()) -> Annex a) -> Annex a
|
||||
logExportExcluded u a = do
|
||||
logf <- fromRepo $ gitAnnexExportExcludeLog u
|
||||
withLogHandle logf $ \logh -> do
|
||||
liftIO $ hSetNewlineMode logh noNewlineTranslation
|
||||
a (writer logh)
|
||||
where
|
||||
writer logh = hPutStrLn logh
|
||||
. Git.LsTree.formatLsTree
|
||||
. Git.Tree.treeItemToLsTreeItem
|
||||
|
||||
getExportExcluded :: UUID -> Annex [Git.Tree.TreeItem]
|
||||
getExportExcluded u = do
|
||||
logf <- fromRepo $ gitAnnexExportExcludeLog u
|
||||
liftIO $ catchDefaultIO [] $
|
||||
(map parser . lines)
|
||||
<$> readFile logf
|
||||
where
|
||||
parser = Git.Tree.lsTreeItemToTreeItem . Git.LsTree.parseLsTree
|
||||
|
|
16
Logs/File.hs
16
Logs/File.hs
|
@ -5,11 +5,12 @@
|
|||
- Licensed under the GNU AGPL version 3 or higher.
|
||||
-}
|
||||
|
||||
module Logs.File (writeLogFile, appendLogFile, streamLogFile) where
|
||||
module Logs.File (writeLogFile, withLogHandle, appendLogFile, streamLogFile) where
|
||||
|
||||
import Annex.Common
|
||||
import Annex.Perms
|
||||
import Annex.LockFile
|
||||
import Annex.ReplaceFile
|
||||
import qualified Git
|
||||
import Utility.Tmp
|
||||
|
||||
|
@ -23,6 +24,19 @@ writeLogFile f c = createDirWhenNeeded f $ viaTmp writelog f c
|
|||
liftIO $ writeFile f' c'
|
||||
setAnnexFilePerm f'
|
||||
|
||||
-- | Runs the action with a handle connected to a temp file.
|
||||
-- The temp file replaces the log file once the action succeeds.
|
||||
withLogHandle :: FilePath -> (Handle -> Annex a) -> Annex a
|
||||
withLogHandle f a = do
|
||||
createAnnexDirectory (parentDir f)
|
||||
replaceFile f $ \tmp ->
|
||||
bracket (setup tmp) cleanup a
|
||||
where
|
||||
setup tmp = do
|
||||
setAnnexFilePerm tmp
|
||||
liftIO $ openFile tmp WriteMode
|
||||
cleanup h = liftIO $ hClose h
|
||||
|
||||
-- | Appends a line to a log file, first locking it to prevent
|
||||
-- concurrent writers.
|
||||
appendLogFile :: FilePath -> (Git.Repo -> FilePath) -> String -> Annex ()
|
||||
|
|
|
@ -12,7 +12,17 @@ been listened to.
|
|||
It seems doable to make `git annex export` honor whatever
|
||||
preferred content settings have been configured for the remote.
|
||||
(And `git annex sync --content` too.)
|
||||
> done
|
||||
|
||||
Logs.Export already records the tree that the user chose to export
|
||||
into the git-annex branch. Should excluded files be present in that
|
||||
tree or not? A good reason to do that is that if the preferred content
|
||||
settings change, the next export will pick up on the change, since
|
||||
the exported tree differs from the tree to be exported.
|
||||
So: Make export of a tree filter that tree through the preferred
|
||||
content of the remote, and use the new tree as the tree that really
|
||||
gets exported, recording it in the git-annex branch. But the remote
|
||||
tracking branch will point to the tree that the user chose to export.
|
||||
> done
|
||||
|
||||
Problem: A preferred content expression include=subdir/foo or
|
||||
|
@ -25,9 +35,13 @@ But that would be inconsistent behavior and could violate least surprise.
|
|||
It may be better to add a note that preferred content expressions include=
|
||||
exclude= etc match relative to the top of the exported tree when exporting
|
||||
a subtree.
|
||||
|
||||
> done
|
||||
|
||||
Problem: Each `git-annex sync --content` re-filters the exported tree.
|
||||
Unnecessary work. If there were a way to look up the original tree that
|
||||
corresponds with the filtered exported tree, that could be avoided.
|
||||
TODO
|
||||
|
||||
----
|
||||
|
||||
> `git annex import` of a tree from a special remote would also be
|
||||
|
@ -43,44 +57,34 @@ a subtree.
|
|||
|
||||
> Problem: If a tree is exported with eg, no .wav files, and then an import
|
||||
> is made from the remote, and necessarily lacks .wav files, the remote
|
||||
> tracking branch will have a tree with no .wav
|
||||
> tracking branch will be updated with a tree with no .wav
|
||||
> files. Merging that into master will delete all the .wav files.
|
||||
>
|
||||
> If the remote tracking branch has a disconnected history from master,
|
||||
> then git wouldn't delete files on
|
||||
> merge. But: This would prevent actual deletions made on the special
|
||||
> remote from happening in master too. So not a good idea.
|
||||
>
|
||||
> So it seems that, when updating the remote tracking branch for an import,
|
||||
> the files that were excluded from being exported to it need to be added
|
||||
> back in. So that tree of excluded files needs to somehow be kept track of
|
||||
> when exporting, or generated from records.
|
||||
> when exporting.
|
||||
>
|
||||
> To generated the excluded tree, would need the whole tree that was
|
||||
> exported, and the remote's preferred content expression at export time.
|
||||
> But expressions like inallgroup would also need to look at location
|
||||
> tracking info at that time. So it would need to remember the
|
||||
> head of the git-annex branch at export time and query against that
|
||||
> version of the branch for preferred content and location tracking.
|
||||
> (And use of `git-annex forget` could break it.)
|
||||
> Complication: The export might happen from one clone and then another
|
||||
> clone imports. The clones might not sync in between. Seems all that
|
||||
> the importing clone can rely on is its local state.
|
||||
>
|
||||
> Logs.Export already records the tree that the user chose to export
|
||||
> into the git-annex branch. Should excluded files be present in that
|
||||
> tree or not? If not, the diff from that tree to the remote tracking
|
||||
> branch will be the excluded files. Another good reason to do that
|
||||
> is that if the preferred content settings change, the next export
|
||||
> will pick up on the change, since the exported tree differs from the tree
|
||||
> to be exported.
|
||||
> If importing with no remote tracking branch existing yet, the import will
|
||||
> create one with a disconnected history, and so it's ok to import a tree
|
||||
> missing excluded files; merging a disconnected history won't delete
|
||||
> those files from master.
|
||||
>
|
||||
> So, plan: Make export of a tree filter that tree through the preferred
|
||||
> content of the remote, and use the new tree as the tree that really
|
||||
> gets exported, recording it in the git-annex branch. But the remote
|
||||
> tracking branch will point to the commit that the user chose to export.
|
||||
> > (done)
|
||||
> In the multiple clone case, the importing clone can't rely on information
|
||||
> from the exporting clone, but if the importing clone only ever imports
|
||||
> it's fine; if it exports it needs to take that into account for
|
||||
> subsequent imports.
|
||||
>
|
||||
> The import then just needs to diff between the last exported tree and
|
||||
> and the remote tracking branch to get the files that were excluded,
|
||||
> and add them back into the imported tree.
|
||||
> So, the only case where the excluded files
|
||||
> need to be added back is when there was a previous export done from
|
||||
> the current repo. The list of excluded files in the export can
|
||||
> be recorded locally and added back to the import.
|
||||
>
|
||||
> > done
|
||||
|
||||
---
|
||||
|
||||
|
|
Loading…
Reference in a new issue