diff --git a/Annex/Import.hs b/Annex/Import.hs index b1ace3468e..9ba4caf1b1 100644 --- a/Annex/Import.hs +++ b/Annex/Import.hs @@ -21,6 +21,7 @@ module Annex.Import ( importKeys, makeImportMatcher, getImportableContents, + PostExportLogUpdate, ) where import Annex.Common @@ -74,6 +75,8 @@ import qualified Data.ByteArray.Encoding as BA #ifdef mingw32_HOST_OS import qualified System.FilePath.Posix as Posix #endif +import qualified Data.Semigroup as Sem +import Prelude {- Configures how to build an import tree. -} data ImportTreeConfig @@ -112,8 +115,9 @@ buildImportCommit -> ImportCommitConfig -> AddUnlockedMatcher -> Imported + -> PostExportLogUpdate -> Annex (Maybe Ref) -buildImportCommit remote importtreeconfig importcommitconfig addunlockedmatcher imported = +buildImportCommit remote importtreeconfig importcommitconfig addunlockedmatcher imported postexportlogupdate = case importCommitTracking importcommitconfig of Nothing -> go Nothing Just trackingcommit -> inRepo (Git.Ref.tree trackingcommit) >>= \case @@ -121,12 +125,14 @@ buildImportCommit remote importtreeconfig importcommitconfig addunlockedmatcher Just _ -> go (Just trackingcommit) where go trackingcommit = do - (importedtree, updatestate) <- recordImportTree remote importtreeconfig (Just addunlockedmatcher) imported + (importedtree, updatestate) <- recordImportTree remote importtreeconfig (Just addunlockedmatcher) imported postexportlogupdate buildImportCommit' remote importcommitconfig trackingcommit importedtree >>= \case Just finalcommit -> do updatestate return (Just finalcommit) - Nothing -> return Nothing + Nothing -> do + postExportLogUpdate postexportlogupdate + return Nothing {- Builds a tree for an import from a special remote. - @@ -138,8 +144,9 @@ recordImportTree -> ImportTreeConfig -> Maybe AddUnlockedMatcher -> Imported + -> PostExportLogUpdate -> Annex (History Sha, Annex ()) -recordImportTree remote importtreeconfig addunlockedmatcher imported = do +recordImportTree remote importtreeconfig addunlockedmatcher imported postexportlogupdate = do importedtree@(History finaltree _) <- buildImportTrees basetree subdir addunlockedmatcher imported return (importedtree, updatestate finaltree) where @@ -180,6 +187,7 @@ recordImportTree remote importtreeconfig addunlockedmatcher imported = do { oldTreeish = exportedTreeishes oldexport , newTreeish = importedtree } + postExportLogUpdate postexportlogupdate return oldexport -- importKeys takes care of updating the location log @@ -498,11 +506,26 @@ canImportKeys remote importcontent = where ia = Remote.importActions remote --- Result of an import. ImportUnfinished indicates that some file failed to --- be imported. Running again should resume where it left off. +-- Result of an import. data ImportResult t - = ImportFinished t + = ImportFinished PostExportLogUpdate t | ImportUnfinished + -- ^ ImportUnfinished indicates that some file failed to + -- be imported. Running again should resume where it left off. + +-- An action to run after the export log has been updated to reflect an +-- import. +newtype PostExportLogUpdate = PostExportLogUpdate (Annex ()) + +instance Sem.Semigroup PostExportLogUpdate where + PostExportLogUpdate a <> PostExportLogUpdate b = + PostExportLogUpdate (a >> b) + +noPostExportLogUpdate :: PostExportLogUpdate +noPostExportLogUpdate = PostExportLogUpdate (return ()) + +postExportLogUpdate :: PostExportLogUpdate -> Annex () +postExportLogUpdate (PostExportLogUpdate a) = a data Diffed t = DiffChanged t @@ -546,7 +569,10 @@ importChanges remote importtreeconfig importcontent thirdpartypopulated importab Nothing -> fullimport currcidtree Just lastimportedtree -> diffimport cidtreemap prevcidtree currcidtree lastimportedtree where - remember = recordContentIdentifierTree (Remote.uuid remote) + -- Record the content identifier tree after the export log is + -- updated for the import. + remember = PostExportLogUpdate . + recordContentIdentifierTree (Remote.uuid remote) -- In order to use a diff, the previous ContentIdentifier tree must -- not have been garbage collected. Which can happen since there @@ -567,11 +593,11 @@ importChanges remote importtreeconfig importcontent thirdpartypopulated importab ) fullimport currcidtree = - importKeys remote importtreeconfig importcontent thirdpartypopulated importablecontents >>= \case - ImportUnfinished -> return ImportUnfinished - ImportFinished r -> do - remember currcidtree - return $ ImportFinished $ ImportedFull r + importKeys remote importtreeconfig importcontent thirdpartypopulated importablecontents >>= return . \case + ImportUnfinished -> ImportUnfinished + ImportFinished a r -> + ImportFinished (a <> remember currcidtree) $ + ImportedFull r diffimport cidtreemap prevcidtree currcidtree lastimportedtree = do (diff, cleanup) <- inRepo $ Git.DiffTree.diffTreeRecursive @@ -589,17 +615,15 @@ importChanges remote importtreeconfig importcontent thirdpartypopulated importab ImportUnfinished -> do void $ liftIO cleanup return ImportUnfinished - ImportFinished (ImportableContentsComplete ic') -> - liftIO cleanup >>= \case - False -> return ImportUnfinished - True -> do - remember currcidtree - return $ ImportFinished $ - ImportedDiff lastimportedtree - (mkdiff ic' removed) + ImportFinished a (ImportableContentsComplete ic') -> + liftIO cleanup >>= return . \case + False -> ImportUnfinished + True -> ImportFinished (a <> remember currcidtree) $ + ImportedDiff lastimportedtree + (mkdiff ic' removed) -- importKeys is not passed ImportableContentsChunked -- above, so it cannot return it - ImportFinished (ImportableContentsChunked {}) -> error "internal" + ImportFinished _ (ImportableContentsChunked {}) -> error "internal" isremoval ti = Git.DiffTree.dstsha ti `elem` nullShas @@ -685,12 +709,12 @@ importKeys remote importtreeconfig importcontent thirdpartypopulated importablec ImportableContentsComplete ic -> go False largematcher cidmap importing db ic >>= return . \case Nothing -> ImportUnfinished - Just v -> ImportFinished $ ImportableContentsComplete v + Just v -> ImportFinished noPostExportLogUpdate $ ImportableContentsComplete v ImportableContentsChunked {} -> do c <- gochunked db (importableContentsChunk importablecontents) gohistory largematcher cidmap importing db (importableHistoryComplete importablecontents) >>= return . \case Nothing -> ImportUnfinished - Just h -> ImportFinished $ ImportableContentsChunked + Just h -> ImportFinished noPostExportLogUpdate $ ImportableContentsChunked { importableContentsChunk = c , importableHistoryComplete = h } diff --git a/CHANGELOG b/CHANGELOG index 821a554794..b6541fcb1a 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -23,6 +23,10 @@ git-annex (10.20250829) UNRELEASED; urgency=medium existing remote. * Fix hang that could occur when using git-annex adjust on a branch with a number of files greater than annex.queuesize. + * Fix bug that made changes to a special remote sometimes be missed when + importing a tree from it. After upgrading, any such missed changes + will be included in the next tree imported from a special remote. + Fixes reversion introduced in version 10.20230626. -- Joey Hess Fri, 29 Aug 2025 12:34:06 -0400 diff --git a/Command/Import.hs b/Command/Import.hs index 7375b807df..ba4efdeb8c 100644 --- a/Command/Import.hs +++ b/Command/Import.hs @@ -349,9 +349,9 @@ seekRemote remote branch msubdir importcontent ci addunlockedmatcher importmessa , Remote.name remote , ". Re-run command to resume import." ] - ImportFinished imported -> void $ - includeCommandAction $ - commitimport imported + ImportFinished postexportlogupdate imported -> + void $ includeCommandAction $ + commitimport imported postexportlogupdate where importmessages' | null importmessages = ["import from " ++ Remote.name remote] @@ -383,10 +383,10 @@ listContents' remote importtreeconfig ci a = , err ] -commitRemote :: Remote -> Branch -> RemoteTrackingBranch -> Maybe Sha -> ImportTreeConfig -> ImportCommitConfig -> AddUnlockedMatcher -> Imported -> CommandStart -commitRemote remote branch tb trackingcommit importtreeconfig importcommitconfig addunlockedmatcher imported = +commitRemote :: Remote -> Branch -> RemoteTrackingBranch -> Maybe Sha -> ImportTreeConfig -> ImportCommitConfig -> AddUnlockedMatcher -> Imported -> PostExportLogUpdate -> CommandStart +commitRemote remote branch tb trackingcommit importtreeconfig importcommitconfig addunlockedmatcher imported postexportlogupdate = starting "update" ai si $ do - importcommit <- buildImportCommit remote importtreeconfig importcommitconfig addunlockedmatcher imported + importcommit <- buildImportCommit remote importtreeconfig importcommitconfig addunlockedmatcher imported postexportlogupdate next $ updateremotetrackingbranch importcommit where ai = ActionItemOther (Just $ UnquotedString $ fromRef $ fromRemoteTrackingBranch tb) diff --git a/Command/Sync.hs b/Command/Sync.hs index 02326a390e..e859746f21 100644 --- a/Command/Sync.hs +++ b/Command/Sync.hs @@ -606,8 +606,8 @@ pullThirdPartyPopulated o remote Command.Import.listContents' remote ImportTree (CheckGitIgnore False) go where go (Just importable) = importChanges remote ImportTree False True importable >>= \case - ImportFinished imported -> do - (_t, updatestate) <- recordImportTree remote ImportTree Nothing imported + ImportFinished postexportlogupdate imported -> do + (_t, updatestate) <- recordImportTree remote ImportTree Nothing imported postexportlogupdate next $ do updatestate return True diff --git a/Logs/Import.hs b/Logs/Import.hs index 7d3e0eacce..d93d28eda5 100644 --- a/Logs/Import.hs +++ b/Logs/Import.hs @@ -1,6 +1,6 @@ {- git-annex import logs - - - Copyright 2023 Joey Hess + - Copyright 2023-2025 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -14,24 +14,60 @@ import Annex.Common import Git.Types import Git.Sha import Logs.File +import Logs.Export import qualified Data.ByteString.Lazy as L +import qualified Data.ByteString.Char8 as S8 +import qualified Data.Set as S {- Records the sha of a tree that contains hashes of ContentIdentifiers - - that were imported from a remote. -} + - that were imported from a remote. + - + - The sha is on the first line of the log file, and following it + - is a line with the the currently exported treeishs, and then a line with + - the incomplete exported treeishes. + -} recordContentIdentifierTree :: UUID -> Sha -> Annex () recordContentIdentifierTree u t = do l <- calcRepo' (gitAnnexImportLog u) - writeLogFile l (fromRef t) + exported <- getExport u + writeLogFile l $ unlines + [ fromRef t + , unwords $ map fromRef $ exportedTreeishes exported + , unwords $ map fromRef $ incompleteExportedTreeishes exported + ] -{- Gets the tree last recorded for a remote. -} +{- Gets the ContentIdentifier tree last recorded for a remote. + - + - This returns Nothing if no tree was recorded yet. + - + - It also returns Nothing when there have been changes to what is exported + - to the remote since the tree was recorded. That avoids a problem where + - diffing from the current Contentidentifier tree to the previous tree + - would miss changes that were made to a remote by an export, but were + - later undone manually. For example, if a file was exported to the remote, + - and then the file was manually removed from the remote, the current tree + - would not contain the file, and neither would the previous tree. + - So diffing between the trees would miss that removal. The removed + - file would then remain in the imported tree. + -} getContentIdentifierTree :: UUID -> Annex (Maybe Sha) getContentIdentifierTree u = do l <- calcRepo' (gitAnnexImportLog u) -- This is safe because the log file is written atomically. - calcLogFileUnsafe l Nothing update + ls <- calcLogFileUnsafe l [] (\v ls -> L.toStrict v : ls) + exported <- getExport u + return $ case reverse ls of + -- Subsequent lines are ignored. This leaves room for future + -- expansion of what is logged. + (a:b:c:_) -> do + t <- extractSha a + exportedtreeishs <- mapM extractSha (S8.words b) + incompleteexportedtreeishs <- mapM extractSha (S8.words c) + if same exportedtreeishs (exportedTreeishes exported) && + same incompleteexportedtreeishs (incompleteExportedTreeishes exported) + then Just t + else Nothing + _ -> Nothing where - update l Nothing = extractSha (L.toStrict l) - -- Subsequent lines are ignored. This leaves room for future - -- expansion of what is logged. - update _l (Just l) = Just l + same l1 l2 = S.fromList l1 == S.fromList l2 diff --git a/doc/bugs/annex_import_doesn__39__t_delete_files_during_updates.mdwn b/doc/bugs/annex_import_doesn__39__t_delete_files_during_updates.mdwn index 508acc612e..386e0f8080 100644 --- a/doc/bugs/annex_import_doesn__39__t_delete_files_during_updates.mdwn +++ b/doc/bugs/annex_import_doesn__39__t_delete_files_during_updates.mdwn @@ -475,3 +475,9 @@ one three $ ls ../directory/ one ``` + +> [[fixed|done]] +> +> When this has happened in a repository, after upgrading git-annex, +> the next git-annex import will be a full import, so it will notice +> whatever changes it failed to notice before. --[[Joey]]