git clone support for git-remote-annex

Also support using annex:: urls that specify the whole special remote
config.

Both of these cases need a special remote to be initialized enough to
use it, which means writing to .git/config but not to the git-annex
branch. When cloning, the remote is left set up in .git/config,
so further use of it, by git-annex or git-remote-annex will work. When
using git with an annex:: url, a temporary remote is written to
.git/config, but then removed at the end.

While that's a little bit ugly, the fact is that the Remote interface
expects that it's ok to set git configs of the remote that is being
initialized. And it's nowhere near as ugly as the alternative of making
a temporary git repository and initializing the special remote in there.

Cloning from a repository that does not contain a git-annex branch and
then later running git-annex init is currently broken, although I've
gotten most of the way there to supporting it.
See cleanupInitialization FIXME.

Special shout out to git clone for running gitremote-helpers with
GIT_DIR set, but not in the git repository and with GIT_WORK_TREE not
set. Resulting in needing the fixupRepo hack.

Sponsored-by: unqueued on Patreon
This commit is contained in:
Joey Hess 2024-05-08 16:55:45 -04:00
parent df5011ec43
commit 59fc2005ec
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
4 changed files with 196 additions and 45 deletions

View file

@ -1,6 +1,6 @@
{- git-annex repository initialization {- git-annex repository initialization
- -
- Copyright 2011-2022 Joey Hess <id@joeyh.name> - Copyright 2011-2024 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -12,6 +12,7 @@ module Annex.Init (
checkInitializeAllowed, checkInitializeAllowed,
ensureInitialized, ensureInitialized,
autoInitialize, autoInitialize,
autoInitialize',
isInitialized, isInitialized,
initialize, initialize,
initialize', initialize',
@ -256,10 +257,13 @@ guardSafeToUseRepo a = ifM (inRepo Git.Config.checkRepoConfigInaccessible)
- Checks repository version and handles upgrades too. - Checks repository version and handles upgrades too.
-} -}
autoInitialize :: Annex [Remote] -> Annex () autoInitialize :: Annex [Remote] -> Annex ()
autoInitialize remotelist = getInitializedVersion >>= maybe needsinit checkUpgrade autoInitialize = autoInitialize' autoInitializeAllowed
autoInitialize' :: Annex Bool -> Annex [Remote] -> Annex ()
autoInitialize' check remotelist = getInitializedVersion >>= maybe needsinit checkUpgrade
where where
needsinit = needsinit =
whenM (initializeAllowed <&&> autoInitializeAllowed) $ do whenM (initializeAllowed <&&> check) $ do
initialize Nothing Nothing initialize Nothing Nothing
autoEnableSpecialRemotes remotelist autoEnableSpecialRemotes remotelist

View file

@ -1,6 +1,6 @@
{- git-annex special remote configuration {- git-annex special remote configuration
- -
- Copyright 2019-2023 Joey Hess <id@joeyh.name> - Copyright 2019-2024 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}

View file

@ -13,30 +13,48 @@ import Annex.Common
import qualified Annex import qualified Annex
import qualified Remote import qualified Remote
import qualified Git.CurrentRepo import qualified Git.CurrentRepo
import qualified Git
import qualified Git.Ref import qualified Git.Ref
import qualified Git.Branch import qualified Git.Branch
import qualified Git.Bundle import qualified Git.Bundle
import Git.Types import qualified Git.Remote
import Backend.GitRemoteAnnex import qualified Git.Remote.Remove
import qualified Annex.SpecialRemote as SpecialRemote
import qualified Annex.Branch
import qualified Types.Remote as R
import Annex.Transfer import Annex.Transfer
import Types.Remote import Backend.GitRemoteAnnex
import Config
import Types.RemoteConfig
import Types.ProposedAccepted
import Types.Key import Types.Key
import Network.URI import Types.GitConfig
import Git.Types
import Logs.Difference
import Annex.Init
import Annex.Content
import Remote.List
import Remote.List.Util
import Utility.Tmp import Utility.Tmp
import Utility.Env
import Utility.Metered import Utility.Metered
import Network.URI
import qualified Data.ByteString as B import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as B8 import qualified Data.ByteString.Char8 as B8
import qualified Data.Map.Strict as M import qualified Data.Map.Strict as M
import System.FilePath.ByteString as P
run :: [String] -> IO () run :: [String] -> IO ()
run (remotename:url:[]) = run (remotename:url:[]) =
-- git strips the "annex::" prefix of the url -- git strips the "annex::" prefix of the url
-- when running this command, so add it back -- when running this command, so add it back
let url' = "annex::" ++ url let url' = "annex::" ++ url
in case parseSpecialRemoteNameUrl remotename url' of in case parseSpecialRemoteNameUrl remotename url' of
Left e -> giveup e Left e -> giveup e
Right src -> do Right src -> do
state <- Annex.new =<< Git.CurrentRepo.get repo <- getRepo
state <- Annex.new repo
Annex.eval state (run' src) Annex.eval state (run' src)
run (_remotename:[]) = giveup "remote url not configured" run (_remotename:[]) = giveup "remote url not configured"
run _ = giveup "expected remote name and url parameters" run _ = giveup "expected remote name and url parameters"
@ -45,10 +63,10 @@ run' :: SpecialRemoteConfig -> Annex ()
run' src = run' src =
-- Prevent any usual git-annex output to stdout, because -- Prevent any usual git-annex output to stdout, because
-- the output of this command is being parsed by git. -- the output of this command is being parsed by git.
doQuietAction $ do doQuietAction $
rmt <- getSpecialRemote src withSpecialRemote src $ \rmt -> do
ls <- lines <$> liftIO getContents ls <- lines <$> liftIO getContents
go rmt ls emptyState go rmt ls emptyState
where where
go rmt (l:ls) st = go rmt (l:ls) st =
let (c, v) = splitLine l let (c, v) = splitLine l
@ -198,7 +216,9 @@ splitLine l =
data SpecialRemoteConfig data SpecialRemoteConfig
= SpecialRemoteConfig = SpecialRemoteConfig
{ specialRemoteUUID :: UUID { specialRemoteUUID :: UUID
, specialRemoteParams :: [(String, String)] , specialRemoteConfig :: RemoteConfig
, specialRemoteName :: Maybe RemoteName
, specialRemoteUrl :: String
} }
| ExistingSpecialRemote RemoteName | ExistingSpecialRemote RemoteName
deriving (Show) deriving (Show)
@ -212,48 +232,114 @@ parseSpecialRemoteNameUrl :: String -> String -> Either String SpecialRemoteConf
parseSpecialRemoteNameUrl remotename url parseSpecialRemoteNameUrl remotename url
| url == "annex::" && remotename /= url = Right $ | url == "annex::" && remotename /= url = Right $
ExistingSpecialRemote remotename ExistingSpecialRemote remotename
| otherwise = parseSpecialRemoteUrl url | "annex::" `isPrefixOf` remotename = parseSpecialRemoteUrl url Nothing
| otherwise = parseSpecialRemoteUrl url (Just remotename)
parseSpecialRemoteUrl :: String -> Either String SpecialRemoteConfig parseSpecialRemoteUrl :: String -> Maybe RemoteName -> Either String SpecialRemoteConfig
parseSpecialRemoteUrl url = case parseURI url of parseSpecialRemoteUrl url remotename = case parseURI url of
Nothing -> Left "URL parse failed" Nothing -> Left "URL parse failed"
Just u -> case uriScheme u of Just u -> case uriScheme u of
"annex:" -> case uriPath u of "annex:" -> case uriPath u of
"" -> Left "annex: URL did not include a UUID" "" -> Left "annex: URL did not include a UUID"
(':':p) -> Right $ SpecialRemoteConfig (':':p) -> Right $ SpecialRemoteConfig
{ specialRemoteUUID = toUUID p { specialRemoteUUID = toUUID p
, specialRemoteParams = parsequery u , specialRemoteConfig = parsequery u
, specialRemoteName = remotename
, specialRemoteUrl = url
} }
_ -> Left "annex: URL malformed" _ -> Left "annex: URL malformed"
_ -> Left "Not an annex: URL" _ -> Left "Not an annex: URL"
where where
parsequery u = map parsekv $ splitc '&' (drop 1 (uriQuery u)) parsequery u = M.fromList $
map parsekv $ splitc '&' (drop 1 (uriQuery u))
parsekv kv = parsekv kv =
let (k, sv) = break (== '=') kv let (k, sv) = break (== '=') kv
v = if null sv then sv else drop 1 sv v = if null sv then sv else drop 1 sv
in (unEscapeString k, unEscapeString v) in (Proposed (unEscapeString k), Proposed (unEscapeString v))
getSpecialRemote :: SpecialRemoteConfig -> Annex Remote -- Runs an action with a Remote as specified by the SpecialRemoteConfig.
getSpecialRemote (ExistingSpecialRemote remotename) = withSpecialRemote :: SpecialRemoteConfig -> (Remote -> Annex a) -> Annex a
withSpecialRemote (ExistingSpecialRemote remotename) a =
getEnabledSpecialRemoteByName remotename >>=
maybe (giveup $ "There is no special remote named " ++ remotename)
a
withSpecialRemote cfg@(SpecialRemoteConfig {}) a = case specialRemoteName cfg of
-- The name could be the name of an existing special remote,
-- if so use it as long as its UUID matches the UUID from the url.
Just remotename -> getEnabledSpecialRemoteByName remotename >>= \case
Just rmt
| R.uuid rmt == specialRemoteUUID cfg -> a rmt
| otherwise -> giveup $ "The uuid in the annex:: url does not match the uuid of the remote named " ++ remotename
-- When cloning from an annex:: url,
-- this is used to set up the origin remote.
Nothing -> (initremote remotename >>= a)
`finally` cleanupInitialization
Nothing -> inittempremote
`finally` cleanupInitialization
where
-- Initialize a new special remote with the provided configuration
-- and name.
--
-- The configuration is not stored in the git-annex branch, because
-- it's expected that the git repository stored on the special
-- remote includes its configuration, perhaps under a different
-- name, and perhaps slightly different (when the annex:: url
-- omitted some unimportant part of the configuration).
initremote remotename = do
let c = M.insert SpecialRemote.nameField (Proposed remotename)
(specialRemoteConfig cfg)
t <- either giveup return (SpecialRemote.findType c)
dummycfg <- liftIO dummyRemoteGitConfig
(c', _u) <- R.setup t R.Init (Just (specialRemoteUUID cfg))
Nothing c dummycfg
`onException` cleanupremote remotename
setConfig (remoteConfig c' "url") (specialRemoteUrl cfg)
remotesChanged
getEnabledSpecialRemoteByName remotename >>= \case
Just rmt -> case checkSpecialRemoteProblems rmt of
Nothing -> return rmt
Just problem -> do
cleanupremote remotename
giveup problem
Nothing -> do
cleanupremote remotename
giveup "Unable to find special remote after setup."
-- Temporarily initialize a special remote, and remove it after
-- the action is run.
inittempremote =
let remotename = Git.Remote.makeLegalName $
"annex-temp-" ++ fromUUID (specialRemoteUUID cfg)
in bracket
(initremote remotename)
(const $ cleanupremote remotename)
a
cleanupremote remotename = do
l <- inRepo Git.Remote.listRemotes
when (remotename `elem` l) $
inRepo $ Git.Remote.Remove.remove remotename
-- When a special remote has already been enabled, just use it.
getEnabledSpecialRemoteByName :: RemoteName -> Annex (Maybe Remote)
getEnabledSpecialRemoteByName remotename =
Remote.byNameOnly remotename >>= \case Remote.byNameOnly remotename >>= \case
Just rmt -> if thirdPartyPopulated (remotetype rmt) Nothing -> return Nothing
then giveup "Cannot use this thirdparty-populated special remote as a git remote" Just rmt ->
else return rmt maybe (return (Just rmt)) giveup
Nothing -> giveup $ "There is no special remote named " ++ remotename (checkSpecialRemoteProblems rmt)
getSpecialRemote src@(SpecialRemoteConfig {})
-- Given the configuration of a special remote, create a -- Avoid using special remotes that are thirdparty populated, because
-- Remote object to access the special remote. -- there is no way to push the git repository keys into one.
-- This needs to avoid storing the configuration in the git-annex --
-- branch (which would be redundant and also the configuration -- XXX Avoid using special remotes that are encrypted by key
-- provided may differ in some small way from the configuration -- material stored in the git repository, since that would present a
-- that is stored in the git repository inside the remote, which -- chicken and egg problem when cloning.
-- should not be changed). It also needs to avoid creating a git checkSpecialRemoteProblems :: Remote -> Maybe String
-- remote in .git/config. checkSpecialRemoteProblems rmt
| otherwise = error "TODO conjure up a new special remote out of thin air" | R.thirdPartyPopulated (R.remotetype rmt) =
-- XXX one way to do it would be to make a temporary git repo, Just "Cannot use this thirdparty-populated special remote as a git remote"
-- initremote in there, and use that for accessing the special | otherwise = Nothing
-- remote, rather than the current git repo. But can this be
-- avoided?
-- The manifest contains an ordered list of git bundle keys. -- The manifest contains an ordered list of git bundle keys.
newtype Manifest = Manifest { inManifest :: [Key] } newtype Manifest = Manifest { inManifest :: [Key] }
@ -268,12 +354,12 @@ newtype Manifest = Manifest { inManifest :: [Key] }
-- the usual Annex.Transfer.download. The content of manifests is not -- the usual Annex.Transfer.download. The content of manifests is not
-- stable, and so it needs to re-download it fresh every time. -- stable, and so it needs to re-download it fresh every time.
downloadManifest :: Remote -> Annex Manifest downloadManifest :: Remote -> Annex Manifest
downloadManifest rmt = ifM (checkPresent rmt mk) downloadManifest rmt = ifM (R.checkPresent rmt mk)
( withTmpFile "GITMANIFEST" $ \tmp tmph -> do ( withTmpFile "GITMANIFEST" $ \tmp tmph -> do
liftIO $ hClose tmph liftIO $ hClose tmph
_ <- retrieveKeyFile rmt mk _ <- R.retrieveKeyFile rmt mk
(AssociatedFile Nothing) tmp (AssociatedFile Nothing) tmp
nullMeterUpdate NoVerify nullMeterUpdate R.NoVerify
ks <- map deserializeKey' . B8.lines <$> liftIO (B.readFile tmp) ks <- map deserializeKey' . B8.lines <$> liftIO (B.readFile tmp)
Manifest <$> checkvalid [] ks Manifest <$> checkvalid [] ks
, return (Manifest []) , return (Manifest [])
@ -345,3 +431,58 @@ updateTrackingRefs rmt new = do
case M.lookup r oldmap of case M.lookup r oldmap of
Just s' | s' == s -> noop Just s' | s' == s -> noop
_ -> inRepo $ Git.Branch.update' r s _ -> inRepo $ Git.Branch.update' r s
-- git clone does not bother to set GIT_WORK_TREE when running this
-- program, and it does not run it inside the new git repo either.
-- GIT_DIR is set to the new git directory. So, have to override
-- the worktree to be the parent of the gitdir.
getRepo :: IO Repo
getRepo = getEnv "GIT_WORK_TREE" >>= \case
Just _ -> Git.CurrentRepo.get
Nothing -> fixup <$> Git.CurrentRepo.get
where
fixup r@(Repo { location = loc@(Local { worktree = Just _ }) }) =
r { location = loc { worktree = Just (P.takeDirectory (gitdir loc)) } }
fixup r = r
-- This is run after git has used this process to fetch or push from a
-- special remote that was specified using a git-annex url. If the git
-- repository was not initialized for use by git-annex already, it is still
-- not initialized at this point.
--
-- It's important that initialization not be done by this process until
-- git has fetched any git-annex branch from the special remote. That
-- git-annex branch may have Differences, and prematurely initializing the
-- local repository would then create a git-annex branch that can't merge
-- with the one from the special remote.
--
-- If there is still not a sibling git-annex branch, this deletes all annex
-- objects for git bundles from the annex objects directory, and deletes
-- the annex objects directory. That is necessary to avoid the
-- Annex.Init.objectDirNotPresent check preventing a later initialization.
-- And if the later initialization includes Differences, the git bundle
-- objects downloaded by this process would be in the wrong locations.
--
-- When there is now a sibling git-annex branch, this handles
-- initialization. When the initialized git-annex branch has Differences,
-- the git bundle objects are in the wrong place, so have to be deleted.
--
-- FIXME git-annex branch is unfortunately created during git clone from a
-- special remote. Should not be for this to work.
cleanupInitialization :: Annex ()
cleanupInitialization = ifM Annex.Branch.hasSibling
( do
autoInitialize' (pure True) remoteList
differences <- allDifferences <$> recordedDifferences
when (differences /= mempty) $
deletebundleobjects
, deletebundleobjects
)
where
deletebundleobjects = do
annexobjectdir <- fromRepo gitAnnexObjectDir
ks <- listKeys InAnnex
forM_ ks $ \k -> case fromKey keyVariety k of
GitBundleKey -> lockContentForRemoval k noop removeAnnex
_ -> noop
void $ liftIO $ tryIO $ removeDirectory (decodeBS annexobjectdir)

View file

@ -1,6 +1,6 @@
{- git remote stuff {- git remote stuff
- -
- Copyright 2012-2021 Joey Hess <id@joeyh.name> - Copyright 2012-2024 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -13,6 +13,7 @@ module Git.Remote where
import Common import Common
import Git import Git
import Git.Types import Git.Types
import Git.Command
import Data.Char import Data.Char
import qualified Data.Map as M import qualified Data.Map as M
@ -23,6 +24,11 @@ import Network.URI
import Git.FilePath import Git.FilePath
#endif #endif
{- Lists all currently existing git remotes. -}
listRemotes :: Repo -> IO [RemoteName]
listRemotes repo = map decodeBS . S8.lines
<$> pipeReadStrict [Param "remote"] repo
{- Is a git config key one that specifies the url of a remote? -} {- Is a git config key one that specifies the url of a remote? -}
isRemoteUrlKey :: ConfigKey -> Bool isRemoteUrlKey :: ConfigKey -> Bool
isRemoteUrlKey = isRemoteKey "url" isRemoteUrlKey = isRemoteKey "url"