Merge branch 'master' into proxy

This commit is contained in:
Joey Hess 2024-06-10 14:26:18 -04:00
commit 649b87bedd
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
16 changed files with 159 additions and 58 deletions

View file

@ -818,12 +818,18 @@ performTransitionsLocked jl ts neednewlocalbranch transitionedrefs = do
if neednewlocalbranch if neednewlocalbranch
then do then do
cmode <- annexCommitMode <$> Annex.getGitConfig cmode <- annexCommitMode <$> Annex.getGitConfig
committedref <- inRepo $ Git.Branch.commitAlways cmode message fullname transitionedrefs -- Creating a new empty branch must happen
setIndexSha committedref -- atomically, so if this is interrupted,
-- it will not leave the new branch created
-- but without exports grafted in.
c <- inRepo $ Git.Branch.commitShaAlways
cmode message transitionedrefs
void $ regraftexports c
else do else do
ref <- getBranch ref <- getBranch
commitIndex jl ref message (nub $ fullname:transitionedrefs) ref' <- regraftexports ref
regraftexports commitIndex jl ref' message
(nub $ fullname:transitionedrefs)
where where
message message
| neednewlocalbranch && null transitionedrefs = "new branch for transition " ++ tdesc | neednewlocalbranch && null transitionedrefs = "new branch for transition " ++ tdesc
@ -872,13 +878,25 @@ performTransitionsLocked jl ts neednewlocalbranch transitionedrefs = do
apply rest file content' apply rest file content'
-- Trees mentioned in export.log were grafted into the old -- Trees mentioned in export.log were grafted into the old
-- git-annex branch to make sure they remain available. Re-graft -- git-annex branch to make sure they remain available.
-- the trees into the new branch. -- Re-graft the trees.
regraftexports = do regraftexports parent = do
l <- exportedTreeishes . M.elems . parseExportLogMap l <- exportedTreeishes . M.elems . parseExportLogMap
<$> getStaged exportLog <$> getStaged exportLog
forM_ l $ \t -> c <- regraft l parent
rememberTreeishLocked t (asTopFilePath exportTreeGraftPoint) jl inRepo $ Git.Branch.update' fullname c
setIndexSha c
return c
where
regraft [] c = pure c
regraft (et:ets) c =
-- Verify that the tree object exists.
catObjectDetails et >>= \case
Just _ ->
prepRememberTreeish et graftpoint c
>>= regraft ets
Nothing -> regraft ets c
graftpoint = asTopFilePath exportTreeGraftPoint
checkBranchDifferences :: Git.Ref -> Annex () checkBranchDifferences :: Git.Ref -> Annex ()
checkBranchDifferences ref = do checkBranchDifferences ref = do
@ -935,26 +953,29 @@ getMergedRefs' = do
- Returns the sha of the git commit made to the git-annex branch. - Returns the sha of the git commit made to the git-annex branch.
-} -}
rememberTreeish :: Git.Ref -> TopFilePath -> Annex Git.Sha rememberTreeish :: Git.Ref -> TopFilePath -> Annex Git.Sha
rememberTreeish treeish graftpoint = lockJournal $ rememberTreeish treeish graftpoint = lockJournal $ \jl -> do
rememberTreeishLocked treeish graftpoint
rememberTreeishLocked :: Git.Ref -> TopFilePath -> JournalLocked -> Annex Git.Sha
rememberTreeishLocked treeish graftpoint jl = do
branchref <- getBranch branchref <- getBranch
updateIndex jl branchref updateIndex jl branchref
c <- prepRememberTreeish treeish graftpoint branchref
inRepo $ Git.Branch.update' fullname c
-- The tree in c is the same as the tree in branchref,
-- and the index was updated to that above, so it's safe to
-- say that the index contains c.
setIndexSha c
return c
{- Create a series of commits that graft a tree onto the parent commit,
- and then remove it. -}
prepRememberTreeish :: Git.Ref -> TopFilePath -> Git.Ref -> Annex Git.Sha
prepRememberTreeish treeish graftpoint parent = do
origtree <- fromMaybe (giveup "unable to determine git-annex branch tree") <$> origtree <- fromMaybe (giveup "unable to determine git-annex branch tree") <$>
inRepo (Git.Ref.tree branchref) inRepo (Git.Ref.tree parent)
addedt <- inRepo $ Git.Tree.graftTree treeish graftpoint origtree addedt <- inRepo $ Git.Tree.graftTree treeish graftpoint origtree
cmode <- annexCommitMode <$> Annex.getGitConfig cmode <- annexCommitMode <$> Annex.getGitConfig
c <- inRepo $ Git.Branch.commitTree cmode c <- inRepo $ Git.Branch.commitTree cmode
["graft"] [branchref] addedt ["graft"] [parent] addedt
c' <- inRepo $ Git.Branch.commitTree cmode inRepo $ Git.Branch.commitTree cmode
["graft cleanup"] [c] origtree ["graft cleanup"] [c] origtree
inRepo $ Git.Branch.update' fullname c'
-- The tree in c' is the same as the tree in branchref,
-- and the index was updated to that above, so it's safe to
-- say that the index contains c'.
setIndexSha c'
return c'
{- Runs an action on the content of selected files from the branch. {- Runs an action on the content of selected files from the branch.
- This is much faster than reading the content of each file in turn, - This is much faster than reading the content of each file in turn,

View file

@ -1,6 +1,9 @@
git-annex (10.20240532) UNRELEASED; urgency=medium git-annex (10.20240532) UNRELEASED; urgency=medium
* Added updateproxy command and remote.name.annex-proxy configuration. * Added updateproxy command and remote.name.annex-proxy configuration.
* Fix a bug where interrupting git-annex while it is updating the
git-annex branch for an export could later lead to git fsck
complaining about missing tree objects.
* Fix Windows build with Win32 2.13.4+ * Fix Windows build with Win32 2.13.4+
Thanks, Oleg Tolmatcev Thanks, Oleg Tolmatcev

View file

@ -58,7 +58,7 @@ connectService address port service = do
<$> loadP2PRemoteAuthToken (TorAnnex address port) <$> loadP2PRemoteAuthToken (TorAnnex address port)
myuuid <- getUUID myuuid <- getUUID
g <- Annex.gitRepo g <- Annex.gitRepo
conn <- liftIO $ connectPeer g (TorAnnex address port) conn <- liftIO $ connectPeer (Just g) (TorAnnex address port)
runst <- liftIO $ mkRunState Client runst <- liftIO $ mkRunState Client
r <- liftIO $ runNetProto runst conn $ auth myuuid authtoken noop >>= \case r <- liftIO $ runNetProto runst conn $ auth myuuid authtoken noop >>= \case
Just _theiruuid -> connect service stdin stdout Just _theiruuid -> connect service stdin stdout

View file

@ -105,11 +105,10 @@ checkHiddenService = bracket setup cleanup go
check 0 _ = giveup "Still unable to connect to hidden service. It might not yet be usable by others. Please check Tor's logs for details." check 0 _ = giveup "Still unable to connect to hidden service. It might not yet be usable by others. Please check Tor's logs for details."
check _ [] = giveup "Somehow didn't get an onion address." check _ [] = giveup "Somehow didn't get an onion address."
check n addrs@(addr:_) = do check n addrs@(addr:_) =
g <- Annex.gitRepo
-- Connect but don't bother trying to auth, -- Connect but don't bother trying to auth,
-- we just want to know if the tor circuit works. -- we just want to know if the tor circuit works.
liftIO (tryNonAsync $ connectPeer g addr) >>= \case liftIO (tryNonAsync $ connectPeer Nothing addr) >>= \case
Left e -> do Left e -> do
warning $ UnquotedString $ "Unable to connect to hidden service. It may not yet have propagated to the Tor network. (" ++ show e ++ ") Will retry.." warning $ UnquotedString $ "Unable to connect to hidden service. It may not yet have propagated to the Tor network. (" ++ show e ++ ") Will retry.."
liftIO $ threadDelaySeconds (Seconds 2) liftIO $ threadDelaySeconds (Seconds 2)
@ -123,19 +122,18 @@ checkHiddenService = bracket setup cleanup go
-- service's socket, start a listener. This is only run during the -- service's socket, start a listener. This is only run during the
-- check, and it refuses all auth attempts. -- check, and it refuses all auth attempts.
startlistener = do startlistener = do
r <- Annex.gitRepo
u <- getUUID u <- getUUID
msock <- torSocketFile msock <- torSocketFile
case msock of case msock of
Just sockfile -> ifM (liftIO $ haslistener sockfile) Just sockfile -> ifM (liftIO $ haslistener sockfile)
( liftIO $ async $ return () ( liftIO $ async $ return ()
, liftIO $ async $ runlistener sockfile u r , liftIO $ async $ runlistener sockfile u
) )
Nothing -> giveup "Could not find socket file in Tor configuration!" Nothing -> giveup "Could not find socket file in Tor configuration!"
runlistener sockfile u r = serveUnixSocket sockfile $ \h -> do runlistener sockfile u = serveUnixSocket sockfile $ \h -> do
let conn = P2PConnection let conn = P2PConnection
{ connRepo = r { connRepo = Nothing
, connCheckAuth = const False , connCheckAuth = const False
, connIhdl = h , connIhdl = h
, connOhdl = h , connOhdl = h

View file

@ -291,7 +291,7 @@ data LinkResult
setupLink :: RemoteName -> P2PAddressAuth -> Annex LinkResult setupLink :: RemoteName -> P2PAddressAuth -> Annex LinkResult
setupLink remotename (P2PAddressAuth addr authtoken) = do setupLink remotename (P2PAddressAuth addr authtoken) = do
g <- Annex.gitRepo g <- Annex.gitRepo
cv <- liftIO $ tryNonAsync $ connectPeer g addr cv <- liftIO $ tryNonAsync $ connectPeer (Just g) addr
case cv of case cv of
Left e -> return $ ConnectionError $ "Unable to connect with peer. Please check that the peer is connected to the network, and try again. (" ++ show e ++ ")" Left e -> return $ ConnectionError $ "Unable to connect with peer. Please check that the peer is connected to the network, and try again. (" ++ show e ++ ")"
Right conn -> do Right conn -> do

View file

@ -36,7 +36,7 @@ start theiruuid = startingCustomOutput (ActionItemOther Nothing) $ do
(False, True) -> P2P.ServeAppendOnly (False, True) -> P2P.ServeAppendOnly
(False, False) -> P2P.ServeReadWrite (False, False) -> P2P.ServeReadWrite
myuuid <- getUUID myuuid <- getUUID
conn <- stdioP2PConnection <$> Annex.gitRepo let conn = stdioP2PConnection Nothing
let server = do let server = do
P2P.net $ P2P.sendMessage (P2P.AUTH_SUCCESS myuuid) P2P.net $ P2P.sendMessage (P2P.AUTH_SUCCESS myuuid)
P2P.serveAuthed servermode myuuid P2P.serveAuthed servermode myuuid

View file

@ -178,13 +178,25 @@ commitCommand' runner commitmode commitquiet ps =
- in any way, or output a summary. - in any way, or output a summary.
-} -}
commit :: CommitMode -> Bool -> String -> Branch -> [Ref] -> Repo -> IO (Maybe Sha) commit :: CommitMode -> Bool -> String -> Branch -> [Ref] -> Repo -> IO (Maybe Sha)
commit commitmode allowempty message branch parentrefs repo = do commit commitmode allowempty message branch parentrefs repo =
tree <- writeTree repo commitSha commitmode allowempty message parentrefs repo >>= \case
ifM (cancommit tree) Just sha -> do
( do
sha <- commitTree commitmode [message] parentrefs tree repo
update' branch sha repo update' branch sha repo
return $ Just sha return $ Just sha
Nothing -> return Nothing
where
cancommit tree
| allowempty = return True
| otherwise = case parentrefs of
[p] -> maybe False (tree /=) <$> Git.Ref.tree p repo
_ -> return True
{- Same as commit but without updating any branch. -}
commitSha :: CommitMode -> Bool -> String -> [Ref] -> Repo -> IO (Maybe Sha)
commitSha commitmode allowempty message parentrefs repo = do
tree <- writeTree repo
ifM (cancommit tree)
( Just <$> commitTree commitmode [message] parentrefs tree repo
, return Nothing , return Nothing
) )
where where
@ -198,6 +210,10 @@ commitAlways :: CommitMode -> String -> Branch -> [Ref] -> Repo -> IO Sha
commitAlways commitmode message branch parentrefs repo = fromJust commitAlways commitmode message branch parentrefs repo = fromJust
<$> commit commitmode True message branch parentrefs repo <$> commit commitmode True message branch parentrefs repo
commitShaAlways :: CommitMode -> String -> [Ref] -> Repo -> IO Sha
commitShaAlways commitmode message parentrefs repo = fromJust
<$> commitSha commitmode True message parentrefs repo
-- Throws exception if the index is locked, with an error message output by -- Throws exception if the index is locked, with an error message output by
-- git on stderr. -- git on stderr.
writeTree :: Repo -> IO Sha writeTree :: Repo -> IO Sha

View file

@ -65,19 +65,19 @@ recordExportBeginning remoteuuid newtree = do
. parseExportLogMap . parseExportLogMap
<$> Annex.Branch.get exportLog <$> Annex.Branch.get exportLog
let new = updateIncompleteExportedTreeish old (nub (newtree:incompleteExportedTreeishes [old])) let new = updateIncompleteExportedTreeish old (nub (newtree:incompleteExportedTreeishes [old]))
rememberExportTreeish newtree
Annex.Branch.change Annex.Branch.change
(Annex.Branch.RegardingUUID [remoteuuid, u]) (Annex.Branch.RegardingUUID [remoteuuid, u])
exportLog exportLog
(buildExportLog . changeMapLog c ep new . parseExportLog) (buildExportLog . changeMapLog c ep new . parseExportLog)
recordExportTreeish newtree
-- Graft a tree ref into the git-annex branch. This is done -- Graft a tree ref into the git-annex branch. This is done
-- to ensure that it's available later, when getting exported files -- to ensure that it's available later, when getting exported files
-- from the remote. Since that could happen in another clone of the -- from the remote. Since that could happen in another clone of the
-- repository, the tree has to be kept available, even if it -- repository, the tree has to be kept available, even if it
-- doesn't end up being merged into the master branch. -- doesn't end up being merged into the master branch.
recordExportTreeish :: Git.Ref -> Annex () rememberExportTreeish :: Git.Ref -> Annex ()
recordExportTreeish t = void $ rememberExportTreeish t = void $
Annex.Branch.rememberTreeish t (asTopFilePath exportTreeGraftPoint) Annex.Branch.rememberTreeish t (asTopFilePath exportTreeGraftPoint)
-- | Record that an export to a special remote is under way. -- | Record that an export to a special remote is under way.
@ -111,7 +111,7 @@ recordExportUnderway remoteuuid ec = do
recordExport :: UUID -> Git.Ref -> ExportChange -> Annex () recordExport :: UUID -> Git.Ref -> ExportChange -> Annex ()
recordExport remoteuuid tree ec = do recordExport remoteuuid tree ec = do
when (oldTreeish ec /= [tree]) $ when (oldTreeish ec /= [tree]) $
recordExportTreeish tree rememberExportTreeish tree
recordExportUnderway remoteuuid ec recordExportUnderway remoteuuid ec
logExportExcluded :: UUID -> ((Git.Tree.TreeItem -> IO ()) -> Annex a) -> Annex a logExportExcluded :: UUID -> ((Git.Tree.TreeItem -> IO ()) -> Annex a) -> Annex a

View file

@ -75,7 +75,7 @@ mkRunState mk = do
return (mk tvar) return (mk tvar)
data P2PConnection = P2PConnection data P2PConnection = P2PConnection
{ connRepo :: Repo { connRepo :: Maybe Repo
, connCheckAuth :: (AuthToken -> Bool) , connCheckAuth :: (AuthToken -> Bool)
, connIhdl :: Handle , connIhdl :: Handle
, connOhdl :: Handle , connOhdl :: Handle
@ -90,7 +90,7 @@ data ClosableConnection conn
| ClosedConnection | ClosedConnection
-- P2PConnection using stdio. -- P2PConnection using stdio.
stdioP2PConnection :: Git.Repo -> P2PConnection stdioP2PConnection :: Maybe Git.Repo -> P2PConnection
stdioP2PConnection g = P2PConnection stdioP2PConnection g = P2PConnection
{ connRepo = g { connRepo = g
, connCheckAuth = const False , connCheckAuth = const False
@ -100,7 +100,7 @@ stdioP2PConnection g = P2PConnection
} }
-- Opens a connection to a peer. Does not authenticate with it. -- Opens a connection to a peer. Does not authenticate with it.
connectPeer :: Git.Repo -> P2PAddress -> IO P2PConnection connectPeer :: Maybe Git.Repo -> P2PAddress -> IO P2PConnection
connectPeer g (TorAnnex onionaddress onionport) = do connectPeer g (TorAnnex onionaddress onionport) = do
h <- setupHandle =<< connectHiddenService onionaddress onionport h <- setupHandle =<< connectHiddenService onionaddress onionport
return $ P2PConnection return $ P2PConnection
@ -154,8 +154,7 @@ setupHandle s = do
-- Purposefully incomplete interpreter of Proto. -- Purposefully incomplete interpreter of Proto.
-- --
-- This only runs Net actions. No Local actions will be run -- This only runs Net actions. No Local actions will be run
-- (those need the Annex monad) -- if the interpreter reaches any, -- (those need the Annex monad).
-- it returns Nothing.
runNetProto :: RunState -> P2PConnection -> Proto a -> IO (Either ProtoFailure a) runNetProto :: RunState -> P2PConnection -> Proto a -> IO (Either ProtoFailure a)
runNetProto runst conn = go runNetProto runst conn = go
where where
@ -286,19 +285,21 @@ runRelay runner (RelayHandle hout) (RelayHandle hin) =
go (v, _, _) = relayHelper runner v go (v, _, _) = relayHelper runner v
runRelayService :: P2PConnection -> RunProto IO -> Service -> IO (Either ProtoFailure ()) runRelayService :: P2PConnection -> RunProto IO -> Service -> IO (Either ProtoFailure ())
runRelayService conn runner service = runRelayService conn runner service = case connRepo conn of
withCreateProcess serviceproc' go Just repo -> withCreateProcess (serviceproc' repo) go
`catchNonAsync` (return . Left . ProtoFailureException) `catchNonAsync` (return . Left . ProtoFailureException)
Nothing -> return $ Left $ ProtoFailureMessage
"relaying to git not supported on this connection"
where where
cmd = case service of cmd = case service of
UploadPack -> "upload-pack" UploadPack -> "upload-pack"
ReceivePack -> "receive-pack" ReceivePack -> "receive-pack"
serviceproc = gitCreateProcess serviceproc repo = gitCreateProcess
[ Param cmd [ Param cmd
, File (fromRawFilePath (repoPath (connRepo conn))) , File (fromRawFilePath (repoPath repo))
] (connRepo conn) ] repo
serviceproc' = serviceproc serviceproc' repo = (serviceproc repo)
{ std_out = CreatePipe { std_out = CreatePipe
, std_in = CreatePipe , std_in = CreatePipe
} }

View file

@ -239,9 +239,9 @@ openP2PSshConnection r connpool = do
Nothing -> do Nothing -> do
liftIO $ rememberunsupported liftIO $ rememberunsupported
return Nothing return Nothing
Just (cmd, params) -> start cmd params =<< getRepo r Just (cmd, params) -> start cmd params
where where
start cmd params repo = liftIO $ do start cmd params = liftIO $ do
(Just from, Just to, Nothing, pid) <- createProcess $ (Just from, Just to, Nothing, pid) <- createProcess $
(proc cmd (toCommand params)) (proc cmd (toCommand params))
{ std_in = CreatePipe { std_in = CreatePipe
@ -249,7 +249,7 @@ openP2PSshConnection r connpool = do
} }
pidnum <- getPid pid pidnum <- getPid pid
let conn = P2P.P2PConnection let conn = P2P.P2PConnection
{ P2P.connRepo = repo { P2P.connRepo = Nothing
, P2P.connCheckAuth = const False , P2P.connCheckAuth = const False
, P2P.connIhdl = to , P2P.connIhdl = to
, P2P.connOhdl = from , P2P.connOhdl = from

View file

@ -143,7 +143,7 @@ withConnection u addr connpool a = bracketOnError get cache go
openConnection :: UUID -> P2PAddress -> Annex Connection openConnection :: UUID -> P2PAddress -> Annex Connection
openConnection u addr = do openConnection u addr = do
g <- Annex.gitRepo g <- Annex.gitRepo
v <- liftIO $ tryNonAsync $ connectPeer g addr v <- liftIO $ tryNonAsync $ connectPeer (Just g) addr
case v of case v of
Right conn -> do Right conn -> do
myuuid <- getUUID myuuid <- getUUID

View file

@ -111,7 +111,7 @@ serveClient th@(TransportHandle _ _ rd) u r q = bracket setup cleanup start
-- when the allowed set is changed. -- when the allowed set is changed.
allowed <- loadP2PAuthTokens allowed <- loadP2PAuthTokens
let conn = P2PConnection let conn = P2PConnection
{ connRepo = r { connRepo = Just r
, connCheckAuth = (`isAllowedAuthToken` allowed) , connCheckAuth = (`isAllowedAuthToken` allowed)
, connIhdl = h , connIhdl = h
, connOhdl = h , connOhdl = h
@ -146,7 +146,7 @@ transport (RemoteRepo r gc) url@(RemoteURI uri) th ichan ochan =
Nothing -> return () Nothing -> return ()
Just addr -> robustConnection 1 $ do Just addr -> robustConnection 1 $ do
g <- liftAnnex th Annex.gitRepo g <- liftAnnex th Annex.gitRepo
bracket (connectPeer g addr) closeConnection (go addr) bracket (connectPeer (Just g) addr) closeConnection (go addr)
where where
go addr conn = do go addr conn = do
myuuid <- liftAnnex th getUUID myuuid <- liftAnnex th getUUID

View file

@ -53,3 +53,4 @@ there are good and there are some bad days ;)
[[!meta author=yoh]] [[!meta author=yoh]]
[[!tag projects/openneuro]] [[!tag projects/openneuro]]
> [[fixed|done]] --[[Joey]]

View file

@ -0,0 +1,30 @@
[[!comment format=mdwn
username="joey"
subject="""comment 10"""
date="2024-06-10T14:36:37Z"
content="""
While I don't think this affects the ds002144 repository
(because the repository with the missing tree is dead), here's what happens
if the export.log's tree is missing, master has been reset to a previous tree,
which was exported earlier, and in a clone we try to get a file that is present
in both trees from the remote:
get foo (from d...) fatal: bad object f4815823941716de0f0fdf85e8aaba98d024d488
unknown export location
Note that the "bad object" message only appears the first time run.
Afterwards it only says "unknown export location".
Even if the tree object later somehow gets pulled in, it will keep failing,
because the exportdb at this point contains the tree sha and it won't try
to update from it again.
To recover from this situation, the user can make a change to
the tree (eg add a file), and export. It will complain one last time about
the bad object, and then the export.log gets fixed to contain an available
tree. However, any files that were in the missing tree that do not get
overwritten by that export will remain in the remote, without git-annex
knowing about them. If the remote has importtree=yes, importing from it
is another way to recover.
"""]]

View file

@ -0,0 +1,15 @@
[[!comment format=mdwn
username="joey"
subject="""comment 8"""
date="2024-06-07T17:59:43Z"
content="""
Fixed performTransitionsLocked to create the new git-annex branch
atomically.
Found another way this could happen, interrupting `git-annex export` after
it writes export.log but before it grafts the tree into the git-annex
branch. Fixed that one too.
So hopefully this won't happen to any more repositories with these fixes.
Still leaves the question of how to recover from the problem.
"""]]

View file

@ -0,0 +1,16 @@
[[!comment format=mdwn
username="joey"
subject="""comment 9"""
date="2024-06-07T20:25:27Z"
content="""
Note that at least in the case of ds002144, its git-annex branch does not
contain grafts of the missing trees. The grafts only get created in the
clone when dealing with a transition.
So, it seems that to recover from the problem, at least in the case of this
repository, it will be sufficient for git-annex to avoid regrafting trees
if the object is missing.
Done that, and so I suppose this bug can be closed. I'd be more satified if
I knew how this repository was produced though.
"""]]