diff --git a/Annex/Branch.hs b/Annex/Branch.hs index 717cbc0400..806eabb99a 100644 --- a/Annex/Branch.hs +++ b/Annex/Branch.hs @@ -818,12 +818,18 @@ performTransitionsLocked jl ts neednewlocalbranch transitionedrefs = do if neednewlocalbranch then do cmode <- annexCommitMode <$> Annex.getGitConfig - committedref <- inRepo $ Git.Branch.commitAlways cmode message fullname transitionedrefs - setIndexSha committedref + -- Creating a new empty branch must happen + -- atomically, so if this is interrupted, + -- it will not leave the new branch created + -- but without exports grafted in. + c <- inRepo $ Git.Branch.commitShaAlways + cmode message transitionedrefs + void $ regraftexports c else do ref <- getBranch - commitIndex jl ref message (nub $ fullname:transitionedrefs) - regraftexports + ref' <- regraftexports ref + commitIndex jl ref' message + (nub $ fullname:transitionedrefs) where message | neednewlocalbranch && null transitionedrefs = "new branch for transition " ++ tdesc @@ -872,13 +878,25 @@ performTransitionsLocked jl ts neednewlocalbranch transitionedrefs = do apply rest file content' -- Trees mentioned in export.log were grafted into the old - -- git-annex branch to make sure they remain available. Re-graft - -- the trees into the new branch. - regraftexports = do + -- git-annex branch to make sure they remain available. + -- Re-graft the trees. + regraftexports parent = do l <- exportedTreeishes . M.elems . parseExportLogMap <$> getStaged exportLog - forM_ l $ \t -> - rememberTreeishLocked t (asTopFilePath exportTreeGraftPoint) jl + c <- regraft l parent + inRepo $ Git.Branch.update' fullname c + setIndexSha c + return c + where + regraft [] c = pure c + regraft (et:ets) c = + -- Verify that the tree object exists. + catObjectDetails et >>= \case + Just _ -> + prepRememberTreeish et graftpoint c + >>= regraft ets + Nothing -> regraft ets c + graftpoint = asTopFilePath exportTreeGraftPoint checkBranchDifferences :: Git.Ref -> Annex () checkBranchDifferences ref = do @@ -935,26 +953,29 @@ getMergedRefs' = do - Returns the sha of the git commit made to the git-annex branch. -} rememberTreeish :: Git.Ref -> TopFilePath -> Annex Git.Sha -rememberTreeish treeish graftpoint = lockJournal $ - rememberTreeishLocked treeish graftpoint -rememberTreeishLocked :: Git.Ref -> TopFilePath -> JournalLocked -> Annex Git.Sha -rememberTreeishLocked treeish graftpoint jl = do +rememberTreeish treeish graftpoint = lockJournal $ \jl -> do branchref <- getBranch updateIndex jl branchref + c <- prepRememberTreeish treeish graftpoint branchref + inRepo $ Git.Branch.update' fullname c + -- The tree in c is the same as the tree in branchref, + -- and the index was updated to that above, so it's safe to + -- say that the index contains c. + setIndexSha c + return c + +{- Create a series of commits that graft a tree onto the parent commit, + - and then remove it. -} +prepRememberTreeish :: Git.Ref -> TopFilePath -> Git.Ref -> Annex Git.Sha +prepRememberTreeish treeish graftpoint parent = do origtree <- fromMaybe (giveup "unable to determine git-annex branch tree") <$> - inRepo (Git.Ref.tree branchref) + inRepo (Git.Ref.tree parent) addedt <- inRepo $ Git.Tree.graftTree treeish graftpoint origtree cmode <- annexCommitMode <$> Annex.getGitConfig c <- inRepo $ Git.Branch.commitTree cmode - ["graft"] [branchref] addedt - c' <- inRepo $ Git.Branch.commitTree cmode + ["graft"] [parent] addedt + inRepo $ Git.Branch.commitTree cmode ["graft cleanup"] [c] origtree - inRepo $ Git.Branch.update' fullname c' - -- The tree in c' is the same as the tree in branchref, - -- and the index was updated to that above, so it's safe to - -- say that the index contains c'. - setIndexSha c' - return c' {- Runs an action on the content of selected files from the branch. - This is much faster than reading the content of each file in turn, diff --git a/CHANGELOG b/CHANGELOG index 007c9273e0..d956868768 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -1,6 +1,9 @@ git-annex (10.20240532) UNRELEASED; urgency=medium * Added updateproxy command and remote.name.annex-proxy configuration. + * Fix a bug where interrupting git-annex while it is updating the + git-annex branch for an export could later lead to git fsck + complaining about missing tree objects. * Fix Windows build with Win32 2.13.4+ Thanks, Oleg Tolmatcev diff --git a/CmdLine/GitRemoteTorAnnex.hs b/CmdLine/GitRemoteTorAnnex.hs index c3aa13c3f0..476eab0231 100644 --- a/CmdLine/GitRemoteTorAnnex.hs +++ b/CmdLine/GitRemoteTorAnnex.hs @@ -58,7 +58,7 @@ connectService address port service = do <$> loadP2PRemoteAuthToken (TorAnnex address port) myuuid <- getUUID g <- Annex.gitRepo - conn <- liftIO $ connectPeer g (TorAnnex address port) + conn <- liftIO $ connectPeer (Just g) (TorAnnex address port) runst <- liftIO $ mkRunState Client r <- liftIO $ runNetProto runst conn $ auth myuuid authtoken noop >>= \case Just _theiruuid -> connect service stdin stdout diff --git a/Command/EnableTor.hs b/Command/EnableTor.hs index 2a6688f3b1..1900d5349b 100644 --- a/Command/EnableTor.hs +++ b/Command/EnableTor.hs @@ -105,11 +105,10 @@ checkHiddenService = bracket setup cleanup go check 0 _ = giveup "Still unable to connect to hidden service. It might not yet be usable by others. Please check Tor's logs for details." check _ [] = giveup "Somehow didn't get an onion address." - check n addrs@(addr:_) = do - g <- Annex.gitRepo + check n addrs@(addr:_) = -- Connect but don't bother trying to auth, -- we just want to know if the tor circuit works. - liftIO (tryNonAsync $ connectPeer g addr) >>= \case + liftIO (tryNonAsync $ connectPeer Nothing addr) >>= \case Left e -> do warning $ UnquotedString $ "Unable to connect to hidden service. It may not yet have propagated to the Tor network. (" ++ show e ++ ") Will retry.." liftIO $ threadDelaySeconds (Seconds 2) @@ -123,19 +122,18 @@ checkHiddenService = bracket setup cleanup go -- service's socket, start a listener. This is only run during the -- check, and it refuses all auth attempts. startlistener = do - r <- Annex.gitRepo u <- getUUID msock <- torSocketFile case msock of Just sockfile -> ifM (liftIO $ haslistener sockfile) ( liftIO $ async $ return () - , liftIO $ async $ runlistener sockfile u r + , liftIO $ async $ runlistener sockfile u ) Nothing -> giveup "Could not find socket file in Tor configuration!" - runlistener sockfile u r = serveUnixSocket sockfile $ \h -> do + runlistener sockfile u = serveUnixSocket sockfile $ \h -> do let conn = P2PConnection - { connRepo = r + { connRepo = Nothing , connCheckAuth = const False , connIhdl = h , connOhdl = h diff --git a/Command/P2P.hs b/Command/P2P.hs index e51a5e16e1..414ffa7610 100644 --- a/Command/P2P.hs +++ b/Command/P2P.hs @@ -291,7 +291,7 @@ data LinkResult setupLink :: RemoteName -> P2PAddressAuth -> Annex LinkResult setupLink remotename (P2PAddressAuth addr authtoken) = do g <- Annex.gitRepo - cv <- liftIO $ tryNonAsync $ connectPeer g addr + cv <- liftIO $ tryNonAsync $ connectPeer (Just g) addr case cv of Left e -> return $ ConnectionError $ "Unable to connect with peer. Please check that the peer is connected to the network, and try again. (" ++ show e ++ ")" Right conn -> do diff --git a/Command/P2PStdIO.hs b/Command/P2PStdIO.hs index c8b4a64b63..c657a85ac8 100644 --- a/Command/P2PStdIO.hs +++ b/Command/P2PStdIO.hs @@ -36,7 +36,7 @@ start theiruuid = startingCustomOutput (ActionItemOther Nothing) $ do (False, True) -> P2P.ServeAppendOnly (False, False) -> P2P.ServeReadWrite myuuid <- getUUID - conn <- stdioP2PConnection <$> Annex.gitRepo + let conn = stdioP2PConnection Nothing let server = do P2P.net $ P2P.sendMessage (P2P.AUTH_SUCCESS myuuid) P2P.serveAuthed servermode myuuid diff --git a/Git/Branch.hs b/Git/Branch.hs index 8569f5d249..9d0ba56384 100644 --- a/Git/Branch.hs +++ b/Git/Branch.hs @@ -178,13 +178,25 @@ commitCommand' runner commitmode commitquiet ps = - in any way, or output a summary. -} commit :: CommitMode -> Bool -> String -> Branch -> [Ref] -> Repo -> IO (Maybe Sha) -commit commitmode allowempty message branch parentrefs repo = do - tree <- writeTree repo - ifM (cancommit tree) - ( do - sha <- commitTree commitmode [message] parentrefs tree repo +commit commitmode allowempty message branch parentrefs repo = + commitSha commitmode allowempty message parentrefs repo >>= \case + Just sha -> do update' branch sha repo return $ Just sha + Nothing -> return Nothing + where + cancommit tree + | allowempty = return True + | otherwise = case parentrefs of + [p] -> maybe False (tree /=) <$> Git.Ref.tree p repo + _ -> return True + +{- Same as commit but without updating any branch. -} +commitSha :: CommitMode -> Bool -> String -> [Ref] -> Repo -> IO (Maybe Sha) +commitSha commitmode allowempty message parentrefs repo = do + tree <- writeTree repo + ifM (cancommit tree) + ( Just <$> commitTree commitmode [message] parentrefs tree repo , return Nothing ) where @@ -198,6 +210,10 @@ commitAlways :: CommitMode -> String -> Branch -> [Ref] -> Repo -> IO Sha commitAlways commitmode message branch parentrefs repo = fromJust <$> commit commitmode True message branch parentrefs repo +commitShaAlways :: CommitMode -> String -> [Ref] -> Repo -> IO Sha +commitShaAlways commitmode message parentrefs repo = fromJust + <$> commitSha commitmode True message parentrefs repo + -- Throws exception if the index is locked, with an error message output by -- git on stderr. writeTree :: Repo -> IO Sha diff --git a/Logs/Export.hs b/Logs/Export.hs index 9c2c23dc77..7f2242ea14 100644 --- a/Logs/Export.hs +++ b/Logs/Export.hs @@ -65,19 +65,19 @@ recordExportBeginning remoteuuid newtree = do . parseExportLogMap <$> Annex.Branch.get exportLog let new = updateIncompleteExportedTreeish old (nub (newtree:incompleteExportedTreeishes [old])) + rememberExportTreeish newtree Annex.Branch.change (Annex.Branch.RegardingUUID [remoteuuid, u]) exportLog (buildExportLog . changeMapLog c ep new . parseExportLog) - recordExportTreeish newtree -- Graft a tree ref into the git-annex branch. This is done -- to ensure that it's available later, when getting exported files -- from the remote. Since that could happen in another clone of the -- repository, the tree has to be kept available, even if it -- doesn't end up being merged into the master branch. -recordExportTreeish :: Git.Ref -> Annex () -recordExportTreeish t = void $ +rememberExportTreeish :: Git.Ref -> Annex () +rememberExportTreeish t = void $ Annex.Branch.rememberTreeish t (asTopFilePath exportTreeGraftPoint) -- | Record that an export to a special remote is under way. @@ -111,7 +111,7 @@ recordExportUnderway remoteuuid ec = do recordExport :: UUID -> Git.Ref -> ExportChange -> Annex () recordExport remoteuuid tree ec = do when (oldTreeish ec /= [tree]) $ - recordExportTreeish tree + rememberExportTreeish tree recordExportUnderway remoteuuid ec logExportExcluded :: UUID -> ((Git.Tree.TreeItem -> IO ()) -> Annex a) -> Annex a diff --git a/P2P/IO.hs b/P2P/IO.hs index b37ce058c0..ccbdf1d6ba 100644 --- a/P2P/IO.hs +++ b/P2P/IO.hs @@ -75,7 +75,7 @@ mkRunState mk = do return (mk tvar) data P2PConnection = P2PConnection - { connRepo :: Repo + { connRepo :: Maybe Repo , connCheckAuth :: (AuthToken -> Bool) , connIhdl :: Handle , connOhdl :: Handle @@ -90,7 +90,7 @@ data ClosableConnection conn | ClosedConnection -- P2PConnection using stdio. -stdioP2PConnection :: Git.Repo -> P2PConnection +stdioP2PConnection :: Maybe Git.Repo -> P2PConnection stdioP2PConnection g = P2PConnection { connRepo = g , connCheckAuth = const False @@ -100,7 +100,7 @@ stdioP2PConnection g = P2PConnection } -- Opens a connection to a peer. Does not authenticate with it. -connectPeer :: Git.Repo -> P2PAddress -> IO P2PConnection +connectPeer :: Maybe Git.Repo -> P2PAddress -> IO P2PConnection connectPeer g (TorAnnex onionaddress onionport) = do h <- setupHandle =<< connectHiddenService onionaddress onionport return $ P2PConnection @@ -154,8 +154,7 @@ setupHandle s = do -- Purposefully incomplete interpreter of Proto. -- -- This only runs Net actions. No Local actions will be run --- (those need the Annex monad) -- if the interpreter reaches any, --- it returns Nothing. +-- (those need the Annex monad). runNetProto :: RunState -> P2PConnection -> Proto a -> IO (Either ProtoFailure a) runNetProto runst conn = go where @@ -286,19 +285,21 @@ runRelay runner (RelayHandle hout) (RelayHandle hin) = go (v, _, _) = relayHelper runner v runRelayService :: P2PConnection -> RunProto IO -> Service -> IO (Either ProtoFailure ()) -runRelayService conn runner service = - withCreateProcess serviceproc' go +runRelayService conn runner service = case connRepo conn of + Just repo -> withCreateProcess (serviceproc' repo) go `catchNonAsync` (return . Left . ProtoFailureException) + Nothing -> return $ Left $ ProtoFailureMessage + "relaying to git not supported on this connection" where cmd = case service of UploadPack -> "upload-pack" ReceivePack -> "receive-pack" - serviceproc = gitCreateProcess + serviceproc repo = gitCreateProcess [ Param cmd - , File (fromRawFilePath (repoPath (connRepo conn))) - ] (connRepo conn) - serviceproc' = serviceproc + , File (fromRawFilePath (repoPath repo)) + ] repo + serviceproc' repo = (serviceproc repo) { std_out = CreatePipe , std_in = CreatePipe } diff --git a/Remote/Helper/Ssh.hs b/Remote/Helper/Ssh.hs index 7a7da481bc..d712706fd2 100644 --- a/Remote/Helper/Ssh.hs +++ b/Remote/Helper/Ssh.hs @@ -239,9 +239,9 @@ openP2PSshConnection r connpool = do Nothing -> do liftIO $ rememberunsupported return Nothing - Just (cmd, params) -> start cmd params =<< getRepo r + Just (cmd, params) -> start cmd params where - start cmd params repo = liftIO $ do + start cmd params = liftIO $ do (Just from, Just to, Nothing, pid) <- createProcess $ (proc cmd (toCommand params)) { std_in = CreatePipe @@ -249,7 +249,7 @@ openP2PSshConnection r connpool = do } pidnum <- getPid pid let conn = P2P.P2PConnection - { P2P.connRepo = repo + { P2P.connRepo = Nothing , P2P.connCheckAuth = const False , P2P.connIhdl = to , P2P.connOhdl = from diff --git a/Remote/P2P.hs b/Remote/P2P.hs index 4b47e2f489..ba9e6570e0 100644 --- a/Remote/P2P.hs +++ b/Remote/P2P.hs @@ -143,7 +143,7 @@ withConnection u addr connpool a = bracketOnError get cache go openConnection :: UUID -> P2PAddress -> Annex Connection openConnection u addr = do g <- Annex.gitRepo - v <- liftIO $ tryNonAsync $ connectPeer g addr + v <- liftIO $ tryNonAsync $ connectPeer (Just g) addr case v of Right conn -> do myuuid <- getUUID diff --git a/RemoteDaemon/Transport/Tor.hs b/RemoteDaemon/Transport/Tor.hs index 7ec4249123..0446d07d6a 100644 --- a/RemoteDaemon/Transport/Tor.hs +++ b/RemoteDaemon/Transport/Tor.hs @@ -111,7 +111,7 @@ serveClient th@(TransportHandle _ _ rd) u r q = bracket setup cleanup start -- when the allowed set is changed. allowed <- loadP2PAuthTokens let conn = P2PConnection - { connRepo = r + { connRepo = Just r , connCheckAuth = (`isAllowedAuthToken` allowed) , connIhdl = h , connOhdl = h @@ -146,7 +146,7 @@ transport (RemoteRepo r gc) url@(RemoteURI uri) th ichan ochan = Nothing -> return () Just addr -> robustConnection 1 $ do g <- liftAnnex th Annex.gitRepo - bracket (connectPeer g addr) closeConnection (go addr) + bracket (connectPeer (Just g) addr) closeConnection (go addr) where go addr conn = do myuuid <- liftAnnex th getUUID diff --git a/doc/bugs/annex_merge__breaks_git_repository__33__.mdwn b/doc/bugs/annex_merge__breaks_git_repository__33__.mdwn index c453f3b39c..d4b5486fc1 100644 --- a/doc/bugs/annex_merge__breaks_git_repository__33__.mdwn +++ b/doc/bugs/annex_merge__breaks_git_repository__33__.mdwn @@ -53,3 +53,4 @@ there are good and there are some bad days ;) [[!meta author=yoh]] [[!tag projects/openneuro]] +> [[fixed|done]] --[[Joey]] diff --git a/doc/bugs/annex_merge__breaks_git_repository__33__/comment_10_df68709f0b9cdc265bdf37056af4edcc._comment b/doc/bugs/annex_merge__breaks_git_repository__33__/comment_10_df68709f0b9cdc265bdf37056af4edcc._comment new file mode 100644 index 0000000000..e55a239721 --- /dev/null +++ b/doc/bugs/annex_merge__breaks_git_repository__33__/comment_10_df68709f0b9cdc265bdf37056af4edcc._comment @@ -0,0 +1,30 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 10""" + date="2024-06-10T14:36:37Z" + content=""" +While I don't think this affects the ds002144 repository +(because the repository with the missing tree is dead), here's what happens +if the export.log's tree is missing, master has been reset to a previous tree, +which was exported earlier, and in a clone we try to get a file that is present +in both trees from the remote: + + get foo (from d...) fatal: bad object f4815823941716de0f0fdf85e8aaba98d024d488 + + unknown export location + +Note that the "bad object" message only appears the first time run. +Afterwards it only says "unknown export location". + +Even if the tree object later somehow gets pulled in, it will keep failing, +because the exportdb at this point contains the tree sha and it won't try +to update from it again. + +To recover from this situation, the user can make a change to +the tree (eg add a file), and export. It will complain one last time about +the bad object, and then the export.log gets fixed to contain an available +tree. However, any files that were in the missing tree that do not get +overwritten by that export will remain in the remote, without git-annex +knowing about them. If the remote has importtree=yes, importing from it +is another way to recover. +"""]] diff --git a/doc/bugs/annex_merge__breaks_git_repository__33__/comment_8_678151d78d145da6d249184ac212f935._comment b/doc/bugs/annex_merge__breaks_git_repository__33__/comment_8_678151d78d145da6d249184ac212f935._comment new file mode 100644 index 0000000000..d815f62ca9 --- /dev/null +++ b/doc/bugs/annex_merge__breaks_git_repository__33__/comment_8_678151d78d145da6d249184ac212f935._comment @@ -0,0 +1,15 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 8""" + date="2024-06-07T17:59:43Z" + content=""" +Fixed performTransitionsLocked to create the new git-annex branch +atomically. + +Found another way this could happen, interrupting `git-annex export` after +it writes export.log but before it grafts the tree into the git-annex +branch. Fixed that one too. + +So hopefully this won't happen to any more repositories with these fixes. +Still leaves the question of how to recover from the problem. +"""]] diff --git a/doc/bugs/annex_merge__breaks_git_repository__33__/comment_9_67dfc2b1444bd345f911ed779cb98bcc._comment b/doc/bugs/annex_merge__breaks_git_repository__33__/comment_9_67dfc2b1444bd345f911ed779cb98bcc._comment new file mode 100644 index 0000000000..267e31599a --- /dev/null +++ b/doc/bugs/annex_merge__breaks_git_repository__33__/comment_9_67dfc2b1444bd345f911ed779cb98bcc._comment @@ -0,0 +1,16 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 9""" + date="2024-06-07T20:25:27Z" + content=""" +Note that at least in the case of ds002144, its git-annex branch does not +contain grafts of the missing trees. The grafts only get created in the +clone when dealing with a transition. + +So, it seems that to recover from the problem, at least in the case of this +repository, it will be sufficient for git-annex to avoid regrafting trees +if the object is missing. + +Done that, and so I suppose this bug can be closed. I'd be more satified if +I knew how this repository was produced though. +"""]]