proxying to exporttree=yes annexobjects=yes basically working

It works when using git-annex sync/push/assist, or when manually sending
all content to the proxied remote before pushing to the proxy remote.
But when the push comes before the content is sent, sending content does
not update the exported tree.
This commit is contained in:
Joey Hess 2024-08-06 14:18:30 -04:00
parent be5c86c248
commit 3289b1ad02
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
8 changed files with 164 additions and 54 deletions

View file

@ -19,12 +19,14 @@ import Remote.Helper.Ssh (openP2PShellConnection', closeP2PShellConnection)
import Annex.Concurrent import Annex.Concurrent
import Annex.Tmp import Annex.Tmp
import Annex.Verify import Annex.Verify
import Annex.UUID
import Logs.Proxy import Logs.Proxy
import Logs.Cluster import Logs.Cluster
import Logs.UUID import Logs.UUID
import Logs.Location import Logs.Location
import Utility.Tmp.Dir import Utility.Tmp.Dir
import Utility.Metered import Utility.Metered
import Git.Types
import Control.Concurrent.STM import Control.Concurrent.STM
import Control.Concurrent.Async import Control.Concurrent.Async
@ -249,9 +251,9 @@ proxySpecialRemote protoversion r ihdl ohdl owaitv oclosedv = go
{- Check if this repository can proxy for a specified remote uuid, {- Check if this repository can proxy for a specified remote uuid,
- and if so enable proxying for it. -} - and if so enable proxying for it. -}
checkCanProxy :: UUID -> UUID -> Annex Bool checkCanProxy :: UUID -> UUID -> Annex Bool
checkCanProxy remoteuuid ouruuid = do checkCanProxy remoteuuid myuuid = do
ourproxies <- M.lookup ouruuid <$> getProxies myproxies <- M.lookup myuuid <$> getProxies
checkCanProxy' ourproxies remoteuuid >>= \case checkCanProxy' myproxies remoteuuid >>= \case
Right v -> do Right v -> do
Annex.changeState $ \st -> st { Annex.proxyremote = Just v } Annex.changeState $ \st -> st { Annex.proxyremote = Just v }
return True return True
@ -267,33 +269,13 @@ checkCanProxy' (Just proxies) remoteuuid =
Just cu -> proxyforcluster cu Just cu -> proxyforcluster cu
Nothing -> proxyfor ps Nothing -> proxyfor ps
where where
-- This repository may have multiple remotes that access the same
-- repository. Proxy for the lowest cost one that is configured to
-- be used as a proxy.
proxyfor ps = do proxyfor ps = do
rs <- concat . Remote.byCost <$> Remote.remoteList rs <- concat . Remote.byCost <$> Remote.remoteList
myclusters <- annexClusters <$> Annex.getGitConfig myclusters <- annexClusters <$> Annex.getGitConfig
let sameuuid r = Remote.uuid r == remoteuuid case canProxyForRemote rs ps myclusters remoteuuid of
let samename r p = Remote.name r == proxyRemoteName p
case headMaybe (filter (\r -> sameuuid r && proxyisconfigured rs myclusters r && any (samename r) ps) rs) of
Nothing -> notconfigured Nothing -> notconfigured
Just r -> return (Right (Right r)) Just r -> return (Right (Right r))
-- Only proxy for a remote when the git configuration
-- allows it. This is important to prevent changes to
-- the git-annex branch causing unexpected proxying for remotes.
proxyisconfigured rs myclusters r
| remoteAnnexProxy (Remote.gitconfig r) = True
-- Proxy for remotes that are configured as cluster nodes.
| any (`M.member` myclusters) (fromMaybe [] $ remoteAnnexClusterNode $ Remote.gitconfig r) = True
-- Proxy for a remote when it is proxied by another remote
-- which is itself configured as a cluster gateway.
| otherwise = case remoteAnnexProxiedBy (Remote.gitconfig r) of
Just proxyuuid -> not $ null $
concatMap (remoteAnnexClusterGateway . Remote.gitconfig) $
filter (\p -> Remote.uuid p == proxyuuid) rs
Nothing -> False
proxyforcluster cu = do proxyforcluster cu = do
clusters <- getClusters clusters <- getClusters
if M.member cu (clusterUUIDs clusters) if M.member cu (clusterUUIDs clusters)
@ -305,6 +287,57 @@ checkCanProxy' (Just proxies) remoteuuid =
"not configured to proxy for repository " ++ fromUUIDDesc desc "not configured to proxy for repository " ++ fromUUIDDesc desc
Nothing -> return $ Left Nothing Nothing -> return $ Left Nothing
{- Remotes that this repository is configured to proxy for.
-
- When there are multiple remotes that access the same repository,
- this picks the lowest cost one that is configured to be used as a proxy.
-}
proxyForRemotes :: Annex [Remote]
proxyForRemotes = do
myuuid <- getUUID
(M.lookup myuuid <$> getProxies) >>= \case
Nothing -> return []
Just myproxies -> do
let myproxies' = S.toList myproxies
rs <- concat . Remote.byCost <$> Remote.remoteList
myclusters <- annexClusters <$> Annex.getGitConfig
return $ mapMaybe (canProxyForRemote rs myproxies' myclusters . Remote.uuid) rs
-- Only proxy for a remote when the git configuration allows it.
-- This is important to prevent changes to the git-annex branch
-- causing unexpected proxying for remotes.
canProxyForRemote
:: [Remote] -- ^ must be sorted by cost
-> [Proxy]
-> M.Map RemoteName ClusterUUID
-> UUID
-> (Maybe Remote)
canProxyForRemote rs myproxies myclusters remoteuuid =
headMaybe $ filter canproxy rs
where
canproxy r =
sameuuid r &&
proxyisconfigured r &&
any (isproxyfor r) myproxies
sameuuid r = Remote.uuid r == remoteuuid
isproxyfor r p =
proxyRemoteUUID p == remoteuuid &&
Remote.name r == proxyRemoteName p
proxyisconfigured r
| remoteAnnexProxy (Remote.gitconfig r) = True
-- Proxy for remotes that are configured as cluster nodes.
| any (`M.member` myclusters) (fromMaybe [] $ remoteAnnexClusterNode $ Remote.gitconfig r) = True
-- Proxy for a remote when it is proxied by another remote
-- which is itself configured as a cluster gateway.
| otherwise = case remoteAnnexProxiedBy (Remote.gitconfig r) of
Just proxyuuid -> not $ null $
concatMap (remoteAnnexClusterGateway . Remote.gitconfig) $
filter (\p -> Remote.uuid p == proxyuuid) rs
Nothing -> False
mkProxyMethods :: ProxyMethods mkProxyMethods :: ProxyMethods
mkProxyMethods = ProxyMethods mkProxyMethods = ProxyMethods
{ removedContent = \u k -> logChange k u InfoMissing { removedContent = \u k -> logChange k u InfoMissing

View file

@ -3,8 +3,10 @@ git-annex (10.20240732) UNRELEASED; urgency=medium
* Avoid loading cluster log at startup. * Avoid loading cluster log at startup.
* Remove debug output (to stderr) accidentially included in * Remove debug output (to stderr) accidentially included in
last version. last version.
* Remotes configured with exporttree=yes annexobjects=yes * Special remotes configured with exporttree=yes annexobjects=yes
can store objects in .git/annex/objects, as well as an exported tree. can store objects in .git/annex/objects, as well as an exported tree.
* Support proxying to special remotes configured with
exporttree=yes annexobjects=yes.
* git-remote-annex: Store objects in exportree=yes special remotes * git-remote-annex: Store objects in exportree=yes special remotes
in the same paths used by annexobjects=yes. in the same paths used by annexobjects=yes.

View file

@ -94,7 +94,10 @@ seek o = startConcurrency commandStages $ do
inRepo (Git.Ref.tree (exportTreeish o)) inRepo (Git.Ref.tree (exportTreeish o))
mtbcommitsha <- getExportCommit r (exportTreeish o) mtbcommitsha <- getExportCommit r (exportTreeish o)
seekExport r tree mtbcommitsha
seekExport :: Remote -> ExportFiltered Git.Ref -> Maybe (RemoteTrackingBranch, Sha) -> CommandSeek
seekExport r tree mtbcommitsha = do
db <- openDb (uuid r) db <- openDb (uuid r)
writeLockDbWhile db $ do writeLockDbWhile db $ do
changeExport r db tree changeExport r db tree

View file

@ -1,6 +1,6 @@
{- git-annex command {- git-annex command
- -
- Copyright 2017 Joey Hess <id@joeyh.name> - Copyright 2017-2024 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -11,10 +11,21 @@ module Command.PostReceive where
import Command import Command
import qualified Annex import qualified Annex
import Git.Types
import Annex.UpdateInstead import Annex.UpdateInstead
import Annex.CurrentBranch import Annex.CurrentBranch
import Command.Sync (mergeLocal, prepMerge, mergeConfig, SyncOptions(..)) import Command.Sync (mergeLocal, prepMerge, mergeConfig, SyncOptions(..))
import Annex.Proxy
import Remote
import qualified Types.Remote as Remote
import Config
import Git.Types
import Git.Sha
import qualified Git.Ref
import Command.Export (filterExport, getExportCommit, seekExport)
import qualified Data.Set as S
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as B8
-- This does not need to modify the git-annex branch to update the -- This does not need to modify the git-annex branch to update the
-- work tree, but auto-initialization might change the git-annex branch. -- work tree, but auto-initialization might change the git-annex branch.
@ -28,9 +39,55 @@ cmd = noCommit $
(withParams seek) (withParams seek)
seek :: CmdParams -> CommandSeek seek :: CmdParams -> CommandSeek
seek _ = whenM needUpdateInsteadEmulation $ do seek _ = do
fixPostReceiveHookEnv fixPostReceiveHookEnv
whenM needUpdateInsteadEmulation $
commandAction updateInsteadEmulation commandAction updateInsteadEmulation
proxyExportTree
updateInsteadEmulation :: CommandStart
updateInsteadEmulation = do
prepMerge
let o = def { notOnlyAnnexOption = True }
mc <- mergeConfig False
mergeLocal mc o =<< getCurrentBranch
proxyExportTree :: CommandSeek
proxyExportTree = do
rbs <- catMaybes <$> (mapM canexport =<< proxyForRemotes)
unless (null rbs) $ do
pushedbranches <- liftIO $
S.fromList . map snd . parseHookInput
<$> B.hGetContents stdin
let waspushed = flip S.member pushedbranches
case filter (waspushed . Git.Ref.branchRef . fst . snd) rbs of
[] -> return ()
rbs' -> forM_ rbs' $ \((r, b), _) -> go r b
where
canexport r = case remoteAnnexTrackingBranch (Remote.gitconfig r) of
Nothing -> return Nothing
Just branch ->
ifM (isExportSupported r)
( return (Just ((r, branch), splitRemoteAnnexTrackingBranchSubdir branch))
, return Nothing
)
go r b = inRepo (Git.Ref.tree b) >>= \case
Nothing -> return ()
Just t -> do
tree <- filterExport r t
mtbcommitsha <- getExportCommit r b
seekExport r tree mtbcommitsha
parseHookInput :: B.ByteString -> [((Sha, Sha), Ref)]
parseHookInput = mapMaybe parse . B8.lines
where
parse l = case B8.words l of
(oldb:newb:refb:[]) -> do
old <- extractSha oldb
new <- extractSha newb
return ((old, new), Ref refb)
_ -> Nothing
{- When run by the post-receive hook, the cwd is the .git directory, {- When run by the post-receive hook, the cwd is the .git directory,
- and GIT_DIR=. It's not clear why git does this. - and GIT_DIR=. It's not clear why git does this.
@ -50,9 +107,3 @@ fixPostReceiveHookEnv = do
} }
_ -> noop _ -> noop
updateInsteadEmulation :: CommandStart
updateInsteadEmulation = do
prepMerge
let o = def { notOnlyAnnexOption = True }
mc <- mergeConfig False
mergeLocal mc o =<< getCurrentBranch

View file

@ -87,8 +87,6 @@ import Utility.Tuple
import Control.Concurrent.MVar import Control.Concurrent.MVar
import qualified Data.Map as M import qualified Data.Map as M
import qualified Data.ByteString as S
import Data.Char
cmd :: Command cmd :: Command
cmd = withAnnexOptions [jobsOption, backendOption] $ cmd = withAnnexOptions [jobsOption, backendOption] $
@ -1154,12 +1152,3 @@ exportHasAnnexObjects = annexObjects . Remote.config
isThirdPartyPopulated :: Remote -> Bool isThirdPartyPopulated :: Remote -> Bool
isThirdPartyPopulated = Remote.thirdPartyPopulated . Remote.remotetype isThirdPartyPopulated = Remote.thirdPartyPopulated . Remote.remotetype
splitRemoteAnnexTrackingBranchSubdir :: Git.Ref -> (Git.Ref, Maybe TopFilePath)
splitRemoteAnnexTrackingBranchSubdir tb = (branch, subdir)
where
(b, p) = separate' (== (fromIntegral (ord ':'))) (Git.fromRef' tb)
branch = Git.Ref b
subdir = if S.null p
then Nothing
else Just (asTopFilePath p)

View file

@ -26,8 +26,12 @@ import Types.Availability
import Types.GitConfig import Types.GitConfig
import Types.RemoteConfig import Types.RemoteConfig
import Git.Types import Git.Types
import Git.FilePath
import Annex.SpecialRemote.Config import Annex.SpecialRemote.Config
import Data.Char
import qualified Data.ByteString as S
{- Looks up a setting in git config. This is not as efficient as using the {- Looks up a setting in git config. This is not as efficient as using the
- GitConfig type. -} - GitConfig type. -}
getConfig :: ConfigKey -> ConfigValue -> Annex ConfigValue getConfig :: ConfigKey -> ConfigValue -> Annex ConfigValue
@ -99,3 +103,12 @@ pidLockFile = ifM (annexPidLock <$> Annex.getGitConfig)
#else #else
pidLockFile = pure Nothing pidLockFile = pure Nothing
#endif #endif
splitRemoteAnnexTrackingBranchSubdir :: Git.Ref -> (Git.Ref, Maybe TopFilePath)
splitRemoteAnnexTrackingBranchSubdir tb = (branch, subdir)
where
(b, p) = separate' (== (fromIntegral (ord ':'))) (Git.fromRef' tb)
branch = Git.Ref b
subdir = if S.null p
then Nothing
else Just (asTopFilePath p)

View file

@ -17,6 +17,10 @@ for repositories that have an adjusted branch checked
out. The hook updates the work tree when run in such a repository, out. The hook updates the work tree when run in such a repository,
the same as running `git-annex merge` would. the same as running `git-annex merge` would.
When a repository is configured to proxy to a special remote with
exporttree=yes, the hook handles updating the tree exported to the special
remote.
# OPTIONS # OPTIONS
* The [[git-annex-common-options]](1) can be used. * The [[git-annex-common-options]](1) can be used.

View file

@ -31,8 +31,22 @@ Planned schedule of work:
## work notes ## work notes
* Working on `exportreeplus` branch which is groundwork for proxying to * Working on `exportreeplus` branch which is groundwork for proxying to
exporttree=yes special remotes. exporttree=yes special remotes. Need to merge it to master.
* files sent to proxied exporttree=yes remotes seem to get every line doubled?!
* `git-annex sync --content` now updates a proxied exporttree=yes special
remote! But, there are some messages like these that should be avoided:
Not updating export to origin-d to reflect changes to the tree, because export tracking is not enabled. (Set remote.origin-d.annex-tracking-branch to enable it.)
export origin-d 1
export not supported
failed
* These are only needed to support workflows other than `git-annex push`.
(Since a push sends all content to the proxied remote and then pushes
to the proxy, it happens to do things in an order where these are not
necessary.)
* `git-annex post-receive` of a proxied exporttree=yes special remote's * `git-annex post-receive` of a proxied exporttree=yes special remote's
annex-tracking-branch should check if the special remote contains all annex-tracking-branch should check if the special remote contains all
keys in the tree. If so, it can exporttree. If not, record keys in the tree. If so, it can exporttree. If not, record
@ -40,6 +54,7 @@ Planned schedule of work:
but better to avoid leaving it incomplete.) but better to avoid leaving it incomplete.)
* After a key is received, the proxy should check if it's the *last* key * After a key is received, the proxy should check if it's the *last* key
that is needed to complete the export, and exporttree when so. that is needed to complete the export, and exporttree when so.
* Handle cases where a single key is used by multiple files in the exported * Handle cases where a single key is used by multiple files in the exported
tree. Need to download from the special remote in order to export tree. Need to download from the special remote in order to export
multiple copies to it. multiple copies to it.