annexInsteadOf config

Added config `url.<base>.annexInsteadOf` corresponding to git's
`url.<base>.pushInsteadOf`, to configure the urls to use for accessing the
git-annex repositories on a server without needing to configure
remote.name.annexUrl in each repository.

While one use case for this would be rewriting urls to use annex+http,
I decided not to add any kind of special case for that. So while
git-annex p2phttp, when serving multiple repositories, needs an url
of eg "annex+http://example.com/git-annex/ for each of them, rewriting an
url like "https://example.com/git/foo/bar" with this config set to
"https://example.com/git/" will result in eg
"annex+http://example.com/git-annex/foo/bar", which p2phttp does not
support.

That seems better dealt with in either git-annex p2phttp or a http
middleware, rather than complicating the config with a special case for
annex+http.

Anyway, there are other use cases for this that don't involve annex+http.
This commit is contained in:
Joey Hess 2024-12-03 14:01:35 -04:00
parent 0404968d10
commit dd052dcba1
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
7 changed files with 79 additions and 41 deletions

View file

@ -1,5 +1,9 @@
git-annex (10.20241203) UNRELEASED; urgency=medium
* Added config `url.<base>.annexInsteadOf` corresponding to git's
`url.<base>.pushInsteadOf`, to configure the urls to use for accessing
the git-annex repositories on a server without needing to configure
remote.name.annexUrl in each repository.
* Work around git hash-object --stdin-paths's odd stripping of carriage
return from the end of the line (some windows infection), avoiding
crashing when the repo contains a filename ending in a carriage return.

View file

@ -89,7 +89,7 @@ remoteLocationIsSshUrl _ = False
parseRemoteLocation :: String -> Bool -> Repo -> RemoteLocation
parseRemoteLocation s knownurl repo = go
where
s' = calcloc s
s' = fromMaybe s $ insteadOfUrl s ".insteadof" $ fullconfig repo
go
#ifdef mingw32_HOST_OS
| dosstyle s' = RemotePath (dospath s')
@ -98,28 +98,6 @@ parseRemoteLocation s knownurl repo = go
| urlstyle s' = RemoteUrl s'
| knownurl && s' == s = RemoteUrl s'
| otherwise = RemotePath s'
-- insteadof config can rewrite remote location
calcloc l
| null insteadofs = l
| otherwise = replacement ++ drop (S.length bestvalue) l
where
replacement = decodeBS $ S.drop (S.length prefix) $
S.take (S.length bestkey - S.length suffix) bestkey
(bestkey, bestvalue) =
case maximumBy longestvalue insteadofs of
(ConfigKey k, ConfigValue v) -> (k, v)
(ConfigKey k, NoConfigValue) -> (k, mempty)
longestvalue (_, a) (_, b) = compare b a
insteadofs = filterconfig $ \case
(ConfigKey k, ConfigValue v) ->
prefix `S.isPrefixOf` k &&
suffix `S.isSuffixOf` k &&
v `S.isPrefixOf` encodeBS l
(_, NoConfigValue) -> False
filterconfig f = filter f $
concatMap splitconfigs $ M.toList $ fullconfig repo
splitconfigs (k, vs) = map (\v -> (k, v)) (NE.toList vs)
(prefix, suffix) = ("url." , ".insteadof")
-- git supports URIs that contain unescaped characters such as
-- spaces. So to test if it's a (git) URI, escape those.
urlstyle v = isURI (escapeURIString isUnescapedInURI v)
@ -147,3 +125,26 @@ parseRemoteLocation s knownurl repo = go
dosstyle = hasDrive
dospath = fromRawFilePath . fromInternalGitPath . toRawFilePath
#endif
insteadOfUrl :: String -> S.ByteString -> RepoFullConfig -> Maybe String
insteadOfUrl u configsuffix fullcfg
| null insteadofs = Nothing
| otherwise = Just $ replacement ++ drop (S.length bestvalue) u
where
replacement = decodeBS $ S.drop (S.length configprefix) $
S.take (S.length bestkey - S.length configsuffix) bestkey
(bestkey, bestvalue) =
case maximumBy longestvalue insteadofs of
(ConfigKey k, ConfigValue v) -> (k, v)
(ConfigKey k, NoConfigValue) -> (k, mempty)
longestvalue (_, a) (_, b) = compare b a
insteadofs = filterconfig $ \case
(ConfigKey k, ConfigValue v) ->
configprefix `S.isPrefixOf` k &&
configsuffix `S.isSuffixOf` k &&
v `S.isPrefixOf` encodeBS u
(_, NoConfigValue) -> False
filterconfig f = filter f $
concatMap splitconfigs $ M.toList fullcfg
splitconfigs (k, vs) = map (\v -> (k, v)) (NE.toList vs)
configprefix = "url."

View file

@ -41,9 +41,9 @@ data RepoLocation
data Repo = Repo
{ location :: RepoLocation
, config :: M.Map ConfigKey ConfigValue
, config :: RepoConfig
-- a given git config key can actually have multiple values
, fullconfig :: M.Map ConfigKey (NE.NonEmpty ConfigValue)
, fullconfig :: RepoFullConfig
-- remoteName holds the name used for this repo in some other
-- repo's list of remotes, when this repo is such a remote
, remoteName :: Maybe RemoteName
@ -60,6 +60,10 @@ data Repo = Repo
-- when using this repository.
, repoPathSpecifiedExplicitly :: Bool
} deriving (Show, Eq, Ord)
type RepoConfig = M.Map ConfigKey ConfigValue
type RepoFullConfig = M.Map ConfigKey (NE.NonEmpty ConfigValue)
newtype ConfigKey = ConfigKey S.ByteString
deriving (Ord, Eq)

View file

@ -98,8 +98,9 @@ locationField = Accepted "location"
list :: Bool -> Annex [Git.Repo]
list autoinit = do
c <- fromRepo Git.config
rs <- mapM (tweakurl c) =<< Annex.getGitRemotes
cfg <- fromRepo Git.config
fullcfg <- fromRepo Git.fullconfig
rs <- mapM (tweakurl cfg fullcfg) =<< Annex.getGitRemotes
rs' <- mapM (configRead autoinit) (filter (not . isGitRemoteAnnex) rs)
proxies <- doQuietAction getProxies
if proxies == mempty
@ -108,17 +109,20 @@ list autoinit = do
proxied <- listProxied proxies rs'
return (proxied++rs')
where
tweakurl c r = do
tweakurl cfg fullcfg r = do
let n = fromJust $ Git.remoteName r
case getAnnexUrl r c of
Just url | not (isP2PHttpProtocolUrl url) ->
case getAnnexUrl r cfg fullcfg of
Just url | not (isP2PHttpProtocolUrl url) ->
inRepo $ \g -> Git.Construct.remoteNamed n $
Git.Construct.fromRemoteLocation url
False g
_ -> return r
getAnnexUrl :: Git.Repo -> M.Map Git.ConfigKey Git.ConfigValue -> Maybe String
getAnnexUrl r c = Git.fromConfigValue <$> M.lookup (annexUrlConfigKey r) c
getAnnexUrl :: Git.Repo -> Git.RepoConfig -> Git.RepoFullConfig -> Maybe String
getAnnexUrl r cfg fullcfg =
(Git.fromConfigValue <$> M.lookup (annexUrlConfigKey r) cfg)
<|>
annexInsteadOfUrl fullcfg (Git.repoLocation r)
annexUrlConfigKey :: Git.Repo -> Git.ConfigKey
annexUrlConfigKey r = remoteConfig r "annexurl"

View file

@ -27,6 +27,7 @@ module Types.GitConfig (
proxyInheritedFields,
MkRemoteConfigKey,
mkRemoteConfigKey,
annexInsteadOfUrl,
) where
import Common
@ -35,7 +36,7 @@ import qualified Git.Config
import qualified Git.Construct
import Git.Types
import Git.ConfigTypes
import Git.Remote (isRemoteKey, isLegalName, remoteKeyToRemoteName)
import Git.Remote (isRemoteKey, isLegalName, remoteKeyToRemoteName, insteadOfUrl)
import Git.Branch (CommitMode(..))
import Git.Quote (QuotePath(..))
import Utility.DataUnits
@ -497,16 +498,14 @@ extractRemoteGitConfig r remotename = do
, remoteAnnexClusterGateway = fromMaybe [] $
(mapMaybe (mkClusterUUID . toUUID) . words)
<$> getmaybe ClusterGatewayField
, remoteUrl = case Git.Config.getMaybe (mkRemoteConfigKey remotename (remoteGitConfigKey UrlField)) r of
Just (ConfigValue b)
| B.null b -> Nothing
| otherwise -> Just (decodeBS b)
_ -> Nothing
, remoteUrl = getremoteurl
, remoteAnnexP2PHttpUrl =
case Git.Config.getMaybe (mkRemoteConfigKey remotename (remoteGitConfigKey AnnexUrlField)) r of
Just (ConfigValue b) ->
parseP2PHttpUrl (decodeBS b)
_ -> Nothing
_ -> parseP2PHttpUrl
=<< annexInsteadOfUrl (fullconfig r)
=<< getremoteurl
, remoteAnnexShell = getmaybe ShellField
, remoteAnnexSshOptions = getoptions SshOptionsField
, remoteAnnexRsyncOptions = getoptions RsyncOptionsField
@ -544,6 +543,11 @@ extractRemoteGitConfig r remotename = do
in Git.Config.getMaybe (mkRemoteConfigKey remotename k) r
<|> Git.Config.getMaybe (mkAnnexConfigKey k) r
getoptions k = fromMaybe [] $ words <$> getmaybe k
getremoteurl = case Git.Config.getMaybe (mkRemoteConfigKey remotename (remoteGitConfigKey UrlField)) r of
Just (ConfigValue b)
| B.null b -> Nothing
| otherwise -> Just (decodeBS b)
_ -> Nothing
data RemoteGitConfigField
= CostField
@ -742,3 +746,6 @@ remoteAnnexConfigEnd key = "annex-" <> key
remoteConfig :: RemoteNameable r => r -> B.ByteString -> ConfigKey
remoteConfig r key = ConfigKey $
"remote." <> encodeBS (getRemoteName r) <> "." <> key
annexInsteadOfUrl :: RepoFullConfig -> String -> Maybe String
annexInsteadOfUrl fullcfg loc = insteadOfUrl loc ".annexinsteadof" fullcfg

View file

@ -1572,10 +1572,25 @@ Remotes are configured using these settings in `.git/config`.
When this and the `remote.<name>.url` contain the same hostname,
and this is an annex+http(s) url, and that is also a http(s) url,
git-annex assumes that the same username and password can be used
for both urls. When password cacheing is configured, this allows
for both urls. When password caching is configured, this allows
you to only be prompted once for a password when using both git and
git-annex. See gitcredentials(7) for how to set up password caching.
* `url.<base>.annexInsteadOf`
This works similarly to git's `url.<base>.pushInsteadOf`, rewriting
a remote url that starts with the value of this config to instead
start with `<base>`.
The rewritten url is used by git-annex for accessing the remote,
and works the same as `remote.<name>.annexUrl`, including supporting
annex+http urls.
Note that git-annex also supports git's `url.<base>.insteadOf`
configuration. When both are set, the remote's url is first rewritten
by insteadOf, and that rewritten url can then be further
rewritten using annexInsteadOf.
* `remote.<name>.annex-uuid`
git-annex caches UUIDs of remote repositories here.

View file

@ -10,11 +10,14 @@ The same way `remote.<name>.annexUrl` corresponds to
`remote.<name>.pushUrl`.
You would need to set 2 configs, but the separation is clear.
And you could do it once in your global git config for whatever
And you could set it once in your global git config for whatever
servers you commonly use.
Another benefit to is that the new `git-annex p2phttp` server
needs annexUrl to be configured to a different url than the git url
when using it. annexInsteadOf would let that be configured a
single time for all urls on a given git server.
Update: Implemented that. Let me know if you think it solves your problem
well enough.
"""]]