proxying to exporttree=yes annexobjects=yes basically working

It works when using git-annex sync/push/assist, or when manually sending
all content to the proxied remote before pushing to the proxy remote.
But when the push comes before the content is sent, sending content does
not update the exported tree.
This commit is contained in:
Joey Hess 2024-08-06 14:18:30 -04:00
parent be5c86c248
commit 3289b1ad02
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
8 changed files with 164 additions and 54 deletions

View file

@ -19,12 +19,14 @@ import Remote.Helper.Ssh (openP2PShellConnection', closeP2PShellConnection)
import Annex.Concurrent
import Annex.Tmp
import Annex.Verify
import Annex.UUID
import Logs.Proxy
import Logs.Cluster
import Logs.UUID
import Logs.Location
import Utility.Tmp.Dir
import Utility.Metered
import Git.Types
import Control.Concurrent.STM
import Control.Concurrent.Async
@ -249,9 +251,9 @@ proxySpecialRemote protoversion r ihdl ohdl owaitv oclosedv = go
{- Check if this repository can proxy for a specified remote uuid,
- and if so enable proxying for it. -}
checkCanProxy :: UUID -> UUID -> Annex Bool
checkCanProxy remoteuuid ouruuid = do
ourproxies <- M.lookup ouruuid <$> getProxies
checkCanProxy' ourproxies remoteuuid >>= \case
checkCanProxy remoteuuid myuuid = do
myproxies <- M.lookup myuuid <$> getProxies
checkCanProxy' myproxies remoteuuid >>= \case
Right v -> do
Annex.changeState $ \st -> st { Annex.proxyremote = Just v }
return True
@ -267,32 +269,12 @@ checkCanProxy' (Just proxies) remoteuuid =
Just cu -> proxyforcluster cu
Nothing -> proxyfor ps
where
-- This repository may have multiple remotes that access the same
-- repository. Proxy for the lowest cost one that is configured to
-- be used as a proxy.
proxyfor ps = do
rs <- concat . Remote.byCost <$> Remote.remoteList
myclusters <- annexClusters <$> Annex.getGitConfig
let sameuuid r = Remote.uuid r == remoteuuid
let samename r p = Remote.name r == proxyRemoteName p
case headMaybe (filter (\r -> sameuuid r && proxyisconfigured rs myclusters r && any (samename r) ps) rs) of
case canProxyForRemote rs ps myclusters remoteuuid of
Nothing -> notconfigured
Just r -> return (Right (Right r))
-- Only proxy for a remote when the git configuration
-- allows it. This is important to prevent changes to
-- the git-annex branch causing unexpected proxying for remotes.
proxyisconfigured rs myclusters r
| remoteAnnexProxy (Remote.gitconfig r) = True
-- Proxy for remotes that are configured as cluster nodes.
| any (`M.member` myclusters) (fromMaybe [] $ remoteAnnexClusterNode $ Remote.gitconfig r) = True
-- Proxy for a remote when it is proxied by another remote
-- which is itself configured as a cluster gateway.
| otherwise = case remoteAnnexProxiedBy (Remote.gitconfig r) of
Just proxyuuid -> not $ null $
concatMap (remoteAnnexClusterGateway . Remote.gitconfig) $
filter (\p -> Remote.uuid p == proxyuuid) rs
Nothing -> False
proxyforcluster cu = do
clusters <- getClusters
@ -305,6 +287,57 @@ checkCanProxy' (Just proxies) remoteuuid =
"not configured to proxy for repository " ++ fromUUIDDesc desc
Nothing -> return $ Left Nothing
{- Remotes that this repository is configured to proxy for.
-
- When there are multiple remotes that access the same repository,
- this picks the lowest cost one that is configured to be used as a proxy.
-}
proxyForRemotes :: Annex [Remote]
proxyForRemotes = do
myuuid <- getUUID
(M.lookup myuuid <$> getProxies) >>= \case
Nothing -> return []
Just myproxies -> do
let myproxies' = S.toList myproxies
rs <- concat . Remote.byCost <$> Remote.remoteList
myclusters <- annexClusters <$> Annex.getGitConfig
return $ mapMaybe (canProxyForRemote rs myproxies' myclusters . Remote.uuid) rs
-- Only proxy for a remote when the git configuration allows it.
-- This is important to prevent changes to the git-annex branch
-- causing unexpected proxying for remotes.
canProxyForRemote
:: [Remote] -- ^ must be sorted by cost
-> [Proxy]
-> M.Map RemoteName ClusterUUID
-> UUID
-> (Maybe Remote)
canProxyForRemote rs myproxies myclusters remoteuuid =
headMaybe $ filter canproxy rs
where
canproxy r =
sameuuid r &&
proxyisconfigured r &&
any (isproxyfor r) myproxies
sameuuid r = Remote.uuid r == remoteuuid
isproxyfor r p =
proxyRemoteUUID p == remoteuuid &&
Remote.name r == proxyRemoteName p
proxyisconfigured r
| remoteAnnexProxy (Remote.gitconfig r) = True
-- Proxy for remotes that are configured as cluster nodes.
| any (`M.member` myclusters) (fromMaybe [] $ remoteAnnexClusterNode $ Remote.gitconfig r) = True
-- Proxy for a remote when it is proxied by another remote
-- which is itself configured as a cluster gateway.
| otherwise = case remoteAnnexProxiedBy (Remote.gitconfig r) of
Just proxyuuid -> not $ null $
concatMap (remoteAnnexClusterGateway . Remote.gitconfig) $
filter (\p -> Remote.uuid p == proxyuuid) rs
Nothing -> False
mkProxyMethods :: ProxyMethods
mkProxyMethods = ProxyMethods
{ removedContent = \u k -> logChange k u InfoMissing

View file

@ -3,8 +3,10 @@ git-annex (10.20240732) UNRELEASED; urgency=medium
* Avoid loading cluster log at startup.
* Remove debug output (to stderr) accidentially included in
last version.
* Remotes configured with exporttree=yes annexobjects=yes
* Special remotes configured with exporttree=yes annexobjects=yes
can store objects in .git/annex/objects, as well as an exported tree.
* Support proxying to special remotes configured with
exporttree=yes annexobjects=yes.
* git-remote-annex: Store objects in exportree=yes special remotes
in the same paths used by annexobjects=yes.

View file

@ -94,7 +94,10 @@ seek o = startConcurrency commandStages $ do
inRepo (Git.Ref.tree (exportTreeish o))
mtbcommitsha <- getExportCommit r (exportTreeish o)
seekExport r tree mtbcommitsha
seekExport :: Remote -> ExportFiltered Git.Ref -> Maybe (RemoteTrackingBranch, Sha) -> CommandSeek
seekExport r tree mtbcommitsha = do
db <- openDb (uuid r)
writeLockDbWhile db $ do
changeExport r db tree

View file

@ -1,6 +1,6 @@
{- git-annex command
-
- Copyright 2017 Joey Hess <id@joeyh.name>
- Copyright 2017-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -11,10 +11,21 @@ module Command.PostReceive where
import Command
import qualified Annex
import Git.Types
import Annex.UpdateInstead
import Annex.CurrentBranch
import Command.Sync (mergeLocal, prepMerge, mergeConfig, SyncOptions(..))
import Annex.Proxy
import Remote
import qualified Types.Remote as Remote
import Config
import Git.Types
import Git.Sha
import qualified Git.Ref
import Command.Export (filterExport, getExportCommit, seekExport)
import qualified Data.Set as S
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as B8
-- This does not need to modify the git-annex branch to update the
-- work tree, but auto-initialization might change the git-annex branch.
@ -28,9 +39,55 @@ cmd = noCommit $
(withParams seek)
seek :: CmdParams -> CommandSeek
seek _ = whenM needUpdateInsteadEmulation $ do
seek _ = do
fixPostReceiveHookEnv
commandAction updateInsteadEmulation
whenM needUpdateInsteadEmulation $
commandAction updateInsteadEmulation
proxyExportTree
updateInsteadEmulation :: CommandStart
updateInsteadEmulation = do
prepMerge
let o = def { notOnlyAnnexOption = True }
mc <- mergeConfig False
mergeLocal mc o =<< getCurrentBranch
proxyExportTree :: CommandSeek
proxyExportTree = do
rbs <- catMaybes <$> (mapM canexport =<< proxyForRemotes)
unless (null rbs) $ do
pushedbranches <- liftIO $
S.fromList . map snd . parseHookInput
<$> B.hGetContents stdin
let waspushed = flip S.member pushedbranches
case filter (waspushed . Git.Ref.branchRef . fst . snd) rbs of
[] -> return ()
rbs' -> forM_ rbs' $ \((r, b), _) -> go r b
where
canexport r = case remoteAnnexTrackingBranch (Remote.gitconfig r) of
Nothing -> return Nothing
Just branch ->
ifM (isExportSupported r)
( return (Just ((r, branch), splitRemoteAnnexTrackingBranchSubdir branch))
, return Nothing
)
go r b = inRepo (Git.Ref.tree b) >>= \case
Nothing -> return ()
Just t -> do
tree <- filterExport r t
mtbcommitsha <- getExportCommit r b
seekExport r tree mtbcommitsha
parseHookInput :: B.ByteString -> [((Sha, Sha), Ref)]
parseHookInput = mapMaybe parse . B8.lines
where
parse l = case B8.words l of
(oldb:newb:refb:[]) -> do
old <- extractSha oldb
new <- extractSha newb
return ((old, new), Ref refb)
_ -> Nothing
{- When run by the post-receive hook, the cwd is the .git directory,
- and GIT_DIR=. It's not clear why git does this.
@ -50,9 +107,3 @@ fixPostReceiveHookEnv = do
}
_ -> noop
updateInsteadEmulation :: CommandStart
updateInsteadEmulation = do
prepMerge
let o = def { notOnlyAnnexOption = True }
mc <- mergeConfig False
mergeLocal mc o =<< getCurrentBranch

View file

@ -87,8 +87,6 @@ import Utility.Tuple
import Control.Concurrent.MVar
import qualified Data.Map as M
import qualified Data.ByteString as S
import Data.Char
cmd :: Command
cmd = withAnnexOptions [jobsOption, backendOption] $
@ -1154,12 +1152,3 @@ exportHasAnnexObjects = annexObjects . Remote.config
isThirdPartyPopulated :: Remote -> Bool
isThirdPartyPopulated = Remote.thirdPartyPopulated . Remote.remotetype
splitRemoteAnnexTrackingBranchSubdir :: Git.Ref -> (Git.Ref, Maybe TopFilePath)
splitRemoteAnnexTrackingBranchSubdir tb = (branch, subdir)
where
(b, p) = separate' (== (fromIntegral (ord ':'))) (Git.fromRef' tb)
branch = Git.Ref b
subdir = if S.null p
then Nothing
else Just (asTopFilePath p)

View file

@ -26,8 +26,12 @@ import Types.Availability
import Types.GitConfig
import Types.RemoteConfig
import Git.Types
import Git.FilePath
import Annex.SpecialRemote.Config
import Data.Char
import qualified Data.ByteString as S
{- Looks up a setting in git config. This is not as efficient as using the
- GitConfig type. -}
getConfig :: ConfigKey -> ConfigValue -> Annex ConfigValue
@ -99,3 +103,12 @@ pidLockFile = ifM (annexPidLock <$> Annex.getGitConfig)
#else
pidLockFile = pure Nothing
#endif
splitRemoteAnnexTrackingBranchSubdir :: Git.Ref -> (Git.Ref, Maybe TopFilePath)
splitRemoteAnnexTrackingBranchSubdir tb = (branch, subdir)
where
(b, p) = separate' (== (fromIntegral (ord ':'))) (Git.fromRef' tb)
branch = Git.Ref b
subdir = if S.null p
then Nothing
else Just (asTopFilePath p)

View file

@ -17,6 +17,10 @@ for repositories that have an adjusted branch checked
out. The hook updates the work tree when run in such a repository,
the same as running `git-annex merge` would.
When a repository is configured to proxy to a special remote with
exporttree=yes, the hook handles updating the tree exported to the special
remote.
# OPTIONS
* The [[git-annex-common-options]](1) can be used.

View file

@ -31,15 +31,30 @@ Planned schedule of work:
## work notes
* Working on `exportreeplus` branch which is groundwork for proxying to
exporttree=yes special remotes.
exporttree=yes special remotes. Need to merge it to master.
* files sent to proxied exporttree=yes remotes seem to get every line doubled?!
* `git-annex sync --content` now updates a proxied exporttree=yes special
remote! But, there are some messages like these that should be avoided:
Not updating export to origin-d to reflect changes to the tree, because export tracking is not enabled. (Set remote.origin-d.annex-tracking-branch to enable it.)
export origin-d 1
export not supported
failed
* These are only needed to support workflows other than `git-annex push`.
(Since a push sends all content to the proxied remote and then pushes
to the proxy, it happens to do things in an order where these are not
necessary.)
* `git-annex post-receive` of a proxied exporttree=yes special remote's
annex-tracking-branch should check if the special remote contains all
keys in the tree. If so, it can exporttree. If not, record
the keys that are needed. (It could always exporttree,
but better to avoid leaving it incomplete.)
* After a key is received, the proxy should check if it's the *last* key
that is needed to complete the export, and exporttree when so.
* `git-annex post-receive` of a proxied exporttree=yes special remote's
annex-tracking-branch should check if the special remote contains all
keys in the tree. If so, it can exporttree. If not, record
the keys that are needed. (It could always exporttree,
but better to avoid leaving it incomplete.)
* After a key is received, the proxy should check if it's the *last* key
that is needed to complete the export, and exporttree when so.
* Handle cases where a single key is used by multiple files in the exported
tree. Need to download from the special remote in order to export
multiple copies to it.