From 97fd9da6e723b602cde277c5449f56375e458b67 Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Mon, 20 May 2019 16:37:04 -0400 Subject: [PATCH] add back non-preferred files to imported tree Prevents merging the import from deleting the non-preferred files from the branch it's merged into. adjustTree previously appended the new list of items to the old, which could result in it generating a tree with multiple files with the same name. That is not good and confuses some parts of git. Gave it a function to resolve such conflicts. That allowed dealing with the problem of what happens when the import contains some files (or subtrees) with the same name as files that were filtered out of the export. The files from the import win. --- Annex/AdjustedBranch.hs | 8 +++- Annex/Import.hs | 37 +++++++++++++-- Annex/Locations.hs | 6 +++ Command/Export.hs | 26 ++++++++--- Git/LsTree.hs | 10 ++++ Git/Tree.hs | 40 ++++++++++++++-- Logs/Export.hs | 25 ++++++++++ Logs/File.hs | 16 ++++++- doc/todo/export_preferred_content.mdwn | 64 ++++++++++++++------------ 9 files changed, 185 insertions(+), 47 deletions(-) diff --git a/Annex/AdjustedBranch.hs b/Annex/AdjustedBranch.hs index a06bc9cd6d..9cc449b0d9 100644 --- a/Annex/AdjustedBranch.hs +++ b/Annex/AdjustedBranch.hs @@ -265,7 +265,12 @@ adjustCommit adj basis = do adjustTree :: Adjustment -> BasisBranch -> Annex Sha adjustTree adj (BasisBranch basis) = do let toadj = adjustTreeItem adj - treesha <- Git.Tree.adjustTree toadj [] [] basis =<< Annex.gitRepo + treesha <- Git.Tree.adjustTree + toadj + [] + (\_old new -> new) + [] + basis =<< Annex.gitRepo return treesha type CommitsPrevented = Git.LockFile.LockHandle @@ -544,6 +549,7 @@ reverseAdjustedTree basis adj csha = do treesha <- Git.Tree.adjustTree (propchanges changes) adds' + (\_old new -> new) (map Git.DiffTree.file removes) basis =<< Annex.gitRepo diff --git a/Annex/Import.hs b/Annex/Import.hs index 006bd02999..e8a7baddbf 100644 --- a/Annex/Import.hs +++ b/Annex/Import.hs @@ -103,7 +103,7 @@ buildImportCommit remote importtreeconfig importcommitconfig importable = go trackingcommit = do imported@(History finaltree _) <- buildImportTrees basetree subdir importable - buildImportCommit' importcommitconfig trackingcommit imported >>= \case + buildImportCommit' remote importcommitconfig trackingcommit imported >>= \case Just finalcommit -> do updatestate finaltree return (Just finalcommit) @@ -160,8 +160,8 @@ buildImportCommit remote importtreeconfig importcommitconfig importable = Export.runExportDiffUpdater updater db oldtree finaltree Export.closeDb db -buildImportCommit' :: ImportCommitConfig -> Maybe Sha -> History Sha -> Annex (Maybe Sha) -buildImportCommit' importcommitconfig mtrackingcommit imported@(History ti _) = +buildImportCommit' :: Remote -> ImportCommitConfig -> Maybe Sha -> History Sha -> Annex (Maybe Sha) +buildImportCommit' remote importcommitconfig mtrackingcommit imported@(History ti _) = case mtrackingcommit of Nothing -> Just <$> mkcommits imported Just trackingcommit -> do @@ -176,7 +176,6 @@ buildImportCommit' importcommitconfig mtrackingcommit imported@(History ti _) = go _ Nothing = Just <$> mkcommits imported go trackingcommit (Just h) -- If the tracking branch head is a merge commit - -- with a tree that matches the head of the history, -- and one side of the merge matches the history, -- nothing new needs to be committed. | t == ti && any (sametodepth imported) (S.toList s) = return Nothing @@ -191,8 +190,9 @@ buildImportCommit' importcommitconfig mtrackingcommit imported@(History ti _) = Just oldimported@(History oldhc _) -> do let oldimportedtrees = mapHistory historyCommitTree oldimported mknewcommits oldhc oldimportedtrees imported + ti' <- addBackNonPreferredContent remote ti Just <$> makeRemoteTrackingBranchMergeCommit' - trackingcommit importedcommit ti + trackingcommit importedcommit ti' where h'@(History t s) = mapHistory historyCommitTree h @@ -380,3 +380,30 @@ importKey (ContentIdentifier cid) size = stubKey , keyVariety = OtherKey "CID" , keySize = Just size } + +{-- Export omits non-preferred content from the tree stored on the + -- remote. So the import will normally have that content + -- omitted (unless something else added files with the same names to the + -- special remote). + -- + -- That presents a problem: Merging the imported tree would result + -- in deletion of the non-preferred content. To avoid that happening, + -- this adds the non-preferred content back to the imported tree. + --} +addBackNonPreferredContent :: Remote -> Sha -> Annex Sha +addBackNonPreferredContent remote importtree = + getExportExcluded (Remote.uuid remote) >>= \case + [] -> return importtree + -- TODO: does this overwrite newly imported files + -- with excluded files? CHECK + excludedlist -> inRepo $ + adjustTree + -- don't remove any + (pure . Just) + excludedlist + -- if something was imported with the same + -- name as a file that was previously + -- excluded from import, use what was imported + (\imported _excluded -> imported) + [] + importtree diff --git a/Annex/Locations.hs b/Annex/Locations.hs index 295809c7e2..0dfd1387c7 100644 --- a/Annex/Locations.hs +++ b/Annex/Locations.hs @@ -50,6 +50,7 @@ module Annex.Locations ( gitAnnexExportDbDir, gitAnnexExportLock, gitAnnexExportUpdateLock, + gitAnnexExportExcludeLog, gitAnnexContentIdentifierDbDir, gitAnnexContentIdentifierLock, gitAnnexScheduleState, @@ -361,6 +362,11 @@ gitAnnexExportLock u r = gitAnnexExportDbDir u r ++ ".lck" gitAnnexExportUpdateLock :: UUID -> Git.Repo -> FilePath gitAnnexExportUpdateLock u r = gitAnnexExportDbDir u r ++ ".upl" +{- Log file used to keep track of files that were in the tree exported to a + - remote, but were excluded by its preferred content settings. -} +gitAnnexExportExcludeLog :: UUID -> Git.Repo -> FilePath +gitAnnexExportExcludeLog u r = gitAnnexDir r "export.ex" fromUUID u + {- Directory containing database used to record remote content ids. - - (This used to be "cid", but a problem with the database caused it to diff --git a/Command/Export.hs b/Command/Export.hs index 64faa9a5db..192c3157d1 100644 --- a/Command/Export.hs +++ b/Command/Export.hs @@ -442,19 +442,29 @@ removeEmptyDirectories r db loc ks -- expression. newtype PreferredFiltered t = PreferredFiltered t +-- | Filters the tree to files that are preferred content of the remote. +-- +-- A log is written with files that were filtered out, so they can be added +-- back in when importing from the remote. filterPreferredContent :: Remote -> Git.Ref -> Annex (PreferredFiltered Git.Ref) -filterPreferredContent r tree = do +filterPreferredContent r tree = logExportExcluded (uuid r) $ \logwriter -> do m <- preferredContentMap case M.lookup (uuid r) m of - Just matcher | not (isEmpty matcher) -> - PreferredFiltered <$> go matcher + Just matcher | not (isEmpty matcher) -> do + PreferredFiltered <$> go matcher logwriter _ -> return (PreferredFiltered tree) where - go matcher = do + go matcher logwriter = do g <- Annex.gitRepo - Git.Tree.adjustTree (checkmatcher matcher) [] [] tree g + Git.Tree.adjustTree + (checkmatcher matcher logwriter) + [] + (\_old new -> new) + [] + tree + g - checkmatcher matcher ti@(Git.Tree.TreeItem topf _ sha) = + checkmatcher matcher logwriter ti@(Git.Tree.TreeItem topf _ sha) = catKey sha >>= \case Just k -> do -- Match filename relative to the @@ -464,7 +474,9 @@ filterPreferredContent r tree = do let mi = MatchingKey k af ifM (checkMatcher' matcher mi mempty) ( return (Just ti) - , return Nothing + , do + () <- liftIO $ logwriter ti + return Nothing ) -- Always export non-annexed files. Nothing -> return (Just ti) diff --git a/Git/LsTree.hs b/Git/LsTree.hs index 0fdb35a36a..8ca805402b 100644 --- a/Git/LsTree.hs +++ b/Git/LsTree.hs @@ -15,6 +15,7 @@ module Git.LsTree ( lsTreeParams, lsTreeFiles, parseLsTree, + formatLsTree, ) where import Common @@ -90,3 +91,12 @@ parseLsTree l = TreeItem !f = drop 1 past_s !smode = fst $ Prelude.head $ readOct m !sfile = asTopFilePath $ Git.Filename.decode f + +{- Inverse of parseLsTree -} +formatLsTree :: TreeItem -> String +formatLsTree ti = unwords + [ showOct (mode ti) "" + , typeobj ti + , fromRef (sha ti) + , getTopFilePath (file ti) + ] diff --git a/Git/Tree.hs b/Git/Tree.hs index 39e5c56828..70a55bf21b 100644 --- a/Git/Tree.hs +++ b/Git/Tree.hs @@ -15,6 +15,8 @@ module Git.Tree ( recordTree', TreeItem(..), treeItemsToTree, + treeItemToLsTreeItem, + lsTreeItemToTreeItem, adjustTree, graftTree, graftTree', @@ -127,6 +129,20 @@ data TreeItem = TreeItem TopFilePath FileMode Sha treeItemToTreeContent :: TreeItem -> TreeContent treeItemToTreeContent (TreeItem f m s) = TreeBlob f m s +treeItemToLsTreeItem :: TreeItem -> LsTree.TreeItem +treeItemToLsTreeItem (TreeItem f mode sha) = LsTree.TreeItem + { LsTree.mode = mode + , LsTree.typeobj = show BlobObject + , LsTree.sha = sha + , LsTree.file = f + } + +lsTreeItemToTreeItem :: LsTree.TreeItem -> TreeItem +lsTreeItemToTreeItem ti = TreeItem + (LsTree.file ti) + (LsTree.mode ti) + (LsTree.sha ti) + treeItemsToTree :: [TreeItem] -> Tree treeItemsToTree = go M.empty where @@ -179,12 +195,16 @@ adjustTree -- Cannot move the item to a different tree. -> [TreeItem] -- ^ New items to add to the tree. + -> (TreeContent -> TreeContent -> TreeContent) + -- ^ When adding a new item to the tree and an item with the same + -- name already exists, this function picks which to use. + -- The first one is the item that was already in the tree. -> [TopFilePath] -- ^ Files to remove from the tree. -> Ref -> Repo -> m Sha -adjustTree adjusttreeitem addtreeitems removefiles r repo = +adjustTree adjusttreeitem addtreeitems resolveaddconflict removefiles r repo = withMkTreeHandle repo $ \h -> do (l, cleanup) <- liftIO $ lsTreeWithObjects LsTree.LsTreeRecursive r repo (l', _, _) <- go h False [] 1 inTopTree l @@ -223,17 +243,31 @@ adjustTree adjusttreeitem addtreeitems removefiles r repo = adjustlist h depth ishere underhere l = do let (addhere, rest) = partition ishere addtreeitems let l' = filter (not . removed) $ - map treeItemToTreeContent addhere ++ l + addoldnew l (map treeItemToTreeContent addhere) let inl i = any (\t -> beneathSubTree t i) l' let (Tree addunderhere) = flattenTree depth $ treeItemsToTree $ filter (\i -> underhere i && not (inl i)) rest addunderhere' <- liftIO $ mapM (recordSubTree h) addunderhere - return (addunderhere'++l') + return (addoldnew l' addunderhere') removeset = S.fromList $ map (normalise . gitPath) removefiles removed (TreeBlob f _ _) = S.member (normalise (gitPath f)) removeset removed _ = False + addoldnew [] new = new + addoldnew old [] = old + addoldnew old new = addoldnew' (M.fromList $ map (\i -> (mkk i, i)) old) new + addoldnew' oldm (n:ns) = + let k = mkk n + in case M.lookup k oldm of + Just o -> + resolveaddconflict o n + : + addoldnew' (M.delete k oldm) ns + Nothing -> n : addoldnew' oldm ns + addoldnew' oldm [] = M.elems oldm + mkk = normalise . gitPath + {- Grafts subtree into the basetree at the specified location, replacing - anything that the basetree already had at that location. - diff --git a/Logs/Export.hs b/Logs/Export.hs index 1bde4a39ba..6ab1c231c7 100644 --- a/Logs/Export.hs +++ b/Logs/Export.hs @@ -15,6 +15,8 @@ module Logs.Export ( incompleteExportedTreeishes, recordExport, recordExportBeginning, + logExportExcluded, + getExportExcluded, ) where import qualified Data.Map as M @@ -26,6 +28,9 @@ import Git.Sha import Git.FilePath import Logs import Logs.MapLog +import Logs.File +import qualified Git.LsTree +import qualified Git.Tree import Annex.UUID import qualified Data.ByteString.Lazy as L @@ -156,3 +161,23 @@ exportedParser = Exported <$> refparser <*> many refparser where refparser = (Git.Ref . decodeBS <$> A8.takeWhile1 (/= ' ') ) <* ((const () <$> A8.char ' ') <|> A.endOfInput) + +logExportExcluded :: UUID -> ((Git.Tree.TreeItem -> IO ()) -> Annex a) -> Annex a +logExportExcluded u a = do + logf <- fromRepo $ gitAnnexExportExcludeLog u + withLogHandle logf $ \logh -> do + liftIO $ hSetNewlineMode logh noNewlineTranslation + a (writer logh) + where + writer logh = hPutStrLn logh + . Git.LsTree.formatLsTree + . Git.Tree.treeItemToLsTreeItem + +getExportExcluded :: UUID -> Annex [Git.Tree.TreeItem] +getExportExcluded u = do + logf <- fromRepo $ gitAnnexExportExcludeLog u + liftIO $ catchDefaultIO [] $ + (map parser . lines) + <$> readFile logf + where + parser = Git.Tree.lsTreeItemToTreeItem . Git.LsTree.parseLsTree diff --git a/Logs/File.hs b/Logs/File.hs index 3226b23067..72f22fdd24 100644 --- a/Logs/File.hs +++ b/Logs/File.hs @@ -5,11 +5,12 @@ - Licensed under the GNU AGPL version 3 or higher. -} -module Logs.File (writeLogFile, appendLogFile, streamLogFile) where +module Logs.File (writeLogFile, withLogHandle, appendLogFile, streamLogFile) where import Annex.Common import Annex.Perms import Annex.LockFile +import Annex.ReplaceFile import qualified Git import Utility.Tmp @@ -23,6 +24,19 @@ writeLogFile f c = createDirWhenNeeded f $ viaTmp writelog f c liftIO $ writeFile f' c' setAnnexFilePerm f' +-- | Runs the action with a handle connected to a temp file. +-- The temp file replaces the log file once the action succeeds. +withLogHandle :: FilePath -> (Handle -> Annex a) -> Annex a +withLogHandle f a = do + createAnnexDirectory (parentDir f) + replaceFile f $ \tmp -> + bracket (setup tmp) cleanup a + where + setup tmp = do + setAnnexFilePerm tmp + liftIO $ openFile tmp WriteMode + cleanup h = liftIO $ hClose h + -- | Appends a line to a log file, first locking it to prevent -- concurrent writers. appendLogFile :: FilePath -> (Git.Repo -> FilePath) -> String -> Annex () diff --git a/doc/todo/export_preferred_content.mdwn b/doc/todo/export_preferred_content.mdwn index cc8f01a33d..f7fb911fdc 100644 --- a/doc/todo/export_preferred_content.mdwn +++ b/doc/todo/export_preferred_content.mdwn @@ -12,7 +12,17 @@ been listened to. It seems doable to make `git annex export` honor whatever preferred content settings have been configured for the remote. (And `git annex sync --content` too.) +> done +Logs.Export already records the tree that the user chose to export +into the git-annex branch. Should excluded files be present in that +tree or not? A good reason to do that is that if the preferred content +settings change, the next export will pick up on the change, since +the exported tree differs from the tree to be exported. +So: Make export of a tree filter that tree through the preferred +content of the remote, and use the new tree as the tree that really +gets exported, recording it in the git-annex branch. But the remote +tracking branch will point to the tree that the user chose to export. > done Problem: A preferred content expression include=subdir/foo or @@ -25,9 +35,13 @@ But that would be inconsistent behavior and could violate least surprise. It may be better to add a note that preferred content expressions include= exclude= etc match relative to the top of the exported tree when exporting a subtree. - > done +Problem: Each `git-annex sync --content` re-filters the exported tree. +Unnecessary work. If there were a way to look up the original tree that +corresponds with the filtered exported tree, that could be avoided. +TODO + ---- > `git annex import` of a tree from a special remote would also be @@ -43,44 +57,34 @@ a subtree. > Problem: If a tree is exported with eg, no .wav files, and then an import > is made from the remote, and necessarily lacks .wav files, the remote -> tracking branch will have a tree with no .wav +> tracking branch will be updated with a tree with no .wav > files. Merging that into master will delete all the .wav files. > -> If the remote tracking branch has a disconnected history from master, -> then git wouldn't delete files on -> merge. But: This would prevent actual deletions made on the special -> remote from happening in master too. So not a good idea. -> > So it seems that, when updating the remote tracking branch for an import, > the files that were excluded from being exported to it need to be added > back in. So that tree of excluded files needs to somehow be kept track of -> when exporting, or generated from records. +> when exporting. +> +> Complication: The export might happen from one clone and then another +> clone imports. The clones might not sync in between. Seems all that +> the importing clone can rely on is its local state. > -> To generated the excluded tree, would need the whole tree that was -> exported, and the remote's preferred content expression at export time. -> But expressions like inallgroup would also need to look at location -> tracking info at that time. So it would need to remember the -> head of the git-annex branch at export time and query against that -> version of the branch for preferred content and location tracking. -> (And use of `git-annex forget` could break it.) +> If importing with no remote tracking branch existing yet, the import will +> create one with a disconnected history, and so it's ok to import a tree +> missing excluded files; merging a disconnected history won't delete +> those files from master. > -> Logs.Export already records the tree that the user chose to export -> into the git-annex branch. Should excluded files be present in that -> tree or not? If not, the diff from that tree to the remote tracking -> branch will be the excluded files. Another good reason to do that -> is that if the preferred content settings change, the next export -> will pick up on the change, since the exported tree differs from the tree -> to be exported. +> In the multiple clone case, the importing clone can't rely on information +> from the exporting clone, but if the importing clone only ever imports +> it's fine; if it exports it needs to take that into account for +> subsequent imports. > -> So, plan: Make export of a tree filter that tree through the preferred -> content of the remote, and use the new tree as the tree that really -> gets exported, recording it in the git-annex branch. But the remote -> tracking branch will point to the commit that the user chose to export. -> > (done) +> So, the only case where the excluded files +> need to be added back is when there was a previous export done from +> the current repo. The list of excluded files in the export can +> be recorded locally and added back to the import. > -> The import then just needs to diff between the last exported tree and -> and the remote tracking branch to get the files that were excluded, -> and add them back into the imported tree. +> > done ---