correctly update keys db in merge conflict
This is quite a subtle edge case, see the bug report for full details. The second git diff is needed only when there's a merge conflict. It would be possible to speed it up marginally by using --diff-filter=Unmerged, but probably not enough to bother with. Sponsored-by: Graham Spencer on Patreon
This commit is contained in:
		
					parent
					
						
							
								da24034331
							
						
					
				
			
			
				commit
				
					
						0101363eb8
					
				
			
		
					 2 changed files with 87 additions and 23 deletions
				
			
		| 
						 | 
				
			
			@ -245,7 +245,8 @@ reconcileStaged qh = do
 | 
			
		|||
	go cur indexcache (Just newtree) = do
 | 
			
		||||
		oldtree <- getoldtree
 | 
			
		||||
		when (oldtree /= newtree) $ do
 | 
			
		||||
			updatetodiff (fromRef oldtree) (fromRef newtree)
 | 
			
		||||
			updatetodiff (Just (fromRef oldtree)) (fromRef newtree) procdiff
 | 
			
		||||
				>>= flushdb . fst
 | 
			
		||||
			liftIO $ writeFile indexcache $ showInodeCache cur
 | 
			
		||||
			-- Storing the tree in a ref makes sure it does not
 | 
			
		||||
			-- get garbage collected, and is available to diff
 | 
			
		||||
| 
						 | 
				
			
			@ -253,22 +254,34 @@ reconcileStaged qh = do
 | 
			
		|||
			inRepo $ update' lastindexref newtree
 | 
			
		||||
	-- git write-tree will fail if the index is locked or when there is
 | 
			
		||||
	-- a merge conflict. To get up-to-date with the current index, 
 | 
			
		||||
	-- diff --cached with the old index tree. The current index tree
 | 
			
		||||
	-- diff --staged with the old index tree. The current index tree
 | 
			
		||||
	-- is not known, so not recorded, and the inode cache is not updated,
 | 
			
		||||
	-- so the next time git-annex runs, it will diff again, even
 | 
			
		||||
	-- if the index is unchanged.
 | 
			
		||||
	--
 | 
			
		||||
	-- When there is a merge conflict, that will not see the new local
 | 
			
		||||
	-- version of the files that are conflicted. So a second diff
 | 
			
		||||
	-- is done, with --staged but no old tree.
 | 
			
		||||
	go _ _ Nothing = do
 | 
			
		||||
		oldtree <- getoldtree
 | 
			
		||||
		updatetodiff (fromRef oldtree) "--cached"
 | 
			
		||||
		(changed, conflicted) <- updatetodiff
 | 
			
		||||
			(Just (fromRef oldtree)) "--staged" procdiff
 | 
			
		||||
		changed' <- if conflicted
 | 
			
		||||
			then fst <$> updatetodiff Nothing "--staged"
 | 
			
		||||
				procmergeconflictdiff
 | 
			
		||||
			else pure False
 | 
			
		||||
		flushdb (changed || changed')
 | 
			
		||||
		
 | 
			
		||||
	updatetodiff old new = do
 | 
			
		||||
	updatetodiff old new processor = do
 | 
			
		||||
		(l, cleanup) <- inRepo $ pipeNullSplit' $ diff old new
 | 
			
		||||
		changed <- procdiff l False
 | 
			
		||||
		void $ liftIO cleanup
 | 
			
		||||
		-- Flush database changes immediately
 | 
			
		||||
		-- so other processes can see them.
 | 
			
		||||
		when changed $
 | 
			
		||||
			liftIO $ H.flushDbQueue qh
 | 
			
		||||
		processor l False False
 | 
			
		||||
			`finally` void (liftIO cleanup)
 | 
			
		||||
	
 | 
			
		||||
	-- Flush database changes immediately
 | 
			
		||||
	-- so other processes can see them.
 | 
			
		||||
	flushdb changed
 | 
			
		||||
		| changed = liftIO $ H.flushDbQueue qh
 | 
			
		||||
		| otherwise = noop
 | 
			
		||||
	
 | 
			
		||||
	-- Avoid running smudge clean filter, which would block trying to
 | 
			
		||||
	-- access the locked database. git write-tree sometimes calls it,
 | 
			
		||||
| 
						 | 
				
			
			@ -288,8 +301,8 @@ reconcileStaged qh = do
 | 
			
		|||
		-- (The -G option may make it be used otherwise.)
 | 
			
		||||
		[ Param "-c", Param "diff.external="
 | 
			
		||||
		, Param "diff"
 | 
			
		||||
		, Param old
 | 
			
		||||
		, Param new
 | 
			
		||||
		] ++ maybeToList (Param <$> old) ++
 | 
			
		||||
		[ Param new
 | 
			
		||||
		, Param "--raw"
 | 
			
		||||
		, Param "-z"
 | 
			
		||||
		, Param "--no-abbrev"
 | 
			
		||||
| 
						 | 
				
			
			@ -307,12 +320,13 @@ reconcileStaged qh = do
 | 
			
		|||
		, Param "--no-ext-diff"
 | 
			
		||||
		]
 | 
			
		||||
	
 | 
			
		||||
	procdiff (info:file:rest) changed
 | 
			
		||||
	procdiff (info:file:rest) changed conflicted
 | 
			
		||||
		| ":" `S.isPrefixOf` info = case S8.words info of
 | 
			
		||||
			(_colonsrcmode:dstmode:srcsha:dstsha:status:[]) -> do
 | 
			
		||||
				let conflicted' = status == "U"
 | 
			
		||||
				-- avoid removing associated file when
 | 
			
		||||
				-- there is a merge conflict
 | 
			
		||||
				removed <- if status /= "U" 
 | 
			
		||||
				removed <- if not conflicted'
 | 
			
		||||
					then catKey (Ref srcsha) >>= \case
 | 
			
		||||
						Just oldkey -> do
 | 
			
		||||
							liftIO $ SQL.removeAssociatedFile oldkey
 | 
			
		||||
| 
						 | 
				
			
			@ -330,9 +344,32 @@ reconcileStaged qh = do
 | 
			
		|||
							reconcilerace (asTopFilePath file) key
 | 
			
		||||
						return True
 | 
			
		||||
					Nothing -> return False
 | 
			
		||||
				procdiff rest (changed || removed || added)
 | 
			
		||||
			_ -> return changed -- parse failed
 | 
			
		||||
	procdiff _ changed = return changed
 | 
			
		||||
				procdiff rest
 | 
			
		||||
					(changed || removed || added)
 | 
			
		||||
					(conflicted || conflicted')
 | 
			
		||||
			_ -> return (changed, conflicted) -- parse failed
 | 
			
		||||
	procdiff _ changed conflicted = return (changed, conflicted)
 | 
			
		||||
	
 | 
			
		||||
	-- Processing a diff --index when there is a merge conflict.
 | 
			
		||||
	-- This diff will have the new local version of a file as the
 | 
			
		||||
	-- first sha, and a null sha as the second sha, and we only
 | 
			
		||||
	-- care about files that are in conflict.
 | 
			
		||||
	procmergeconflictdiff (info:file:rest) changed conflicted
 | 
			
		||||
		| ":" `S.isPrefixOf` info = case S8.words info of
 | 
			
		||||
			(_colonmode:_mode:sha:_sha:status:[]) -> do
 | 
			
		||||
				let conflicted' = status == "U"
 | 
			
		||||
				added <- catKey (Ref sha) >>= \case
 | 
			
		||||
					Just key -> do
 | 
			
		||||
						liftIO $ SQL.addAssociatedFile key
 | 
			
		||||
							(asTopFilePath file)
 | 
			
		||||
							(SQL.WriteHandle qh)
 | 
			
		||||
						return True
 | 
			
		||||
					Nothing -> return False
 | 
			
		||||
				procmergeconflictdiff rest
 | 
			
		||||
					(changed || added)
 | 
			
		||||
					(conflicted || conflicted')
 | 
			
		||||
			_ -> return (changed, conflicted) -- parse failed
 | 
			
		||||
	procmergeconflictdiff _ changed conflicted = return (changed, conflicted)
 | 
			
		||||
 | 
			
		||||
	reconcilerace file key = do
 | 
			
		||||
		caches <- liftIO $ SQL.getInodeCaches key (SQL.ReadHandle qh)
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
| 
						 | 
				
			
			@ -1,10 +1,10 @@
 | 
			
		|||
Found a case where the associated files in the keys db end up out-of-date.
 | 
			
		||||
Make a repo with an unlocked file, clone it to a second repo, and set up a
 | 
			
		||||
Make a repo with an locked file, clone it to a second repo, and set up a
 | 
			
		||||
conflict involving that file in both repos, using git-annex add to add the
 | 
			
		||||
conflicting version, and not running other git-annex commands after that,
 | 
			
		||||
before pulling the conflicting branch. When the associated files db
 | 
			
		||||
gets updated in the conflict situation, only 1 key has the conflicting file
 | 
			
		||||
associated with it, rather than 2 or 3.
 | 
			
		||||
conflicting version, committing, and not running other git-annex commands
 | 
			
		||||
after that, before pulling the conflicting branch. When the associated
 | 
			
		||||
files db gets updated in the conflict situation, only 1 key has the
 | 
			
		||||
conflicting file associated with it, rather than 2 or 3.
 | 
			
		||||
 | 
			
		||||
The original key before the conflict has the file associated with it, but
 | 
			
		||||
the new local key and new remote key do not.
 | 
			
		||||
| 
						 | 
				
			
			@ -21,8 +21,35 @@ git-annex updates the keys db. So, one solution to this bug will be for
 | 
			
		|||
git-annex to also update the keys db when staging locked files.
 | 
			
		||||
(Unfortunately this would make mass adds somewhat slower.)
 | 
			
		||||
 | 
			
		||||
Or, possibly, for reconcileStaged to not use git diff --index in this case,
 | 
			
		||||
Or, possibly, for reconcileStaged to not use git diff --cached in this case,
 | 
			
		||||
but git diff with -1 and -3. That lets both sides of the merge conflict be
 | 
			
		||||
accessed, and it could then add the file to both keys. As well as not
 | 
			
		||||
slowing down git-annex add, this would let it honor the preferred content
 | 
			
		||||
of the conflicting file for all 3 keys. --[[Joey]]
 | 
			
		||||
 | 
			
		||||
> On second thought, it's not really necessary that all 3 keys have the
 | 
			
		||||
> conflicted file associated with them. The original key doesn't because
 | 
			
		||||
> the user has already changed the file to use the new key. The new remote
 | 
			
		||||
> key does not really need to, and there might not even be any effect if it
 | 
			
		||||
> did. The new local key is the one that this bug is really about.
 | 
			
		||||
> 
 | 
			
		||||
> Consider that checkDrop uses catKeyFile to double-check the associated
 | 
			
		||||
> files. And that will see the file pointing to the new local key. So
 | 
			
		||||
> if the original key or new remote key are also associated with the file,
 | 
			
		||||
> it will ignore them and drop anyway. And that's ok, from the user's
 | 
			
		||||
> perspective the one it needs to retain is the one that the file in the
 | 
			
		||||
> working tree uses, which is the new local key.
 | 
			
		||||
> 
 | 
			
		||||
> > Hmm, -1 and -3 are not what's needed to get the new local key.
 | 
			
		||||
> > It's using `git diff oldtree --cached`, and the code preserves the old
 | 
			
		||||
> > key when it sees a merge conflict. Using instead
 | 
			
		||||
> > `git diff HEAD --cached` has the new key as the src sha, and nullsha as
 | 
			
		||||
> > the dst sha.
 | 
			
		||||
> >
 | 
			
		||||
> > However, the diff with the old tree is needed to incrementally
 | 
			
		||||
> > update when it's not in the middle of a merge conflict.
 | 
			
		||||
> > So what can be done is do the diff as now; when it sees a merge
 | 
			
		||||
> > conflict, run diff a second time with `HEAD --cached` to get the new
 | 
			
		||||
> > key. 
 | 
			
		||||
> > 
 | 
			
		||||
> > > [[done]] --[[Joey]]
 | 
			
		||||
| 
						 | 
				
			
			
 | 
			
		|||
		Loading…
	
	Add table
		Add a link
		
	
		Reference in a new issue