git clone support for git-remote-annex

Also support using annex:: urls that specify the whole special remote
config.

Both of these cases need a special remote to be initialized enough to
use it, which means writing to .git/config but not to the git-annex
branch. When cloning, the remote is left set up in .git/config,
so further use of it, by git-annex or git-remote-annex will work. When
using git with an annex:: url, a temporary remote is written to
.git/config, but then removed at the end.

While that's a little bit ugly, the fact is that the Remote interface
expects that it's ok to set git configs of the remote that is being
initialized. And it's nowhere near as ugly as the alternative of making
a temporary git repository and initializing the special remote in there.

Cloning from a repository that does not contain a git-annex branch and
then later running git-annex init is currently broken, although I've
gotten most of the way there to supporting it.
See cleanupInitialization FIXME.

Special shout out to git clone for running gitremote-helpers with
GIT_DIR set, but not in the git repository and with GIT_WORK_TREE not
set. Resulting in needing the fixupRepo hack.

Sponsored-by: unqueued on Patreon
This commit is contained in:
Joey Hess 2024-05-08 16:55:45 -04:00
parent df5011ec43
commit 59fc2005ec
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
4 changed files with 196 additions and 45 deletions

View file

@ -13,30 +13,48 @@ import Annex.Common
import qualified Annex
import qualified Remote
import qualified Git.CurrentRepo
import qualified Git
import qualified Git.Ref
import qualified Git.Branch
import qualified Git.Bundle
import Git.Types
import Backend.GitRemoteAnnex
import qualified Git.Remote
import qualified Git.Remote.Remove
import qualified Annex.SpecialRemote as SpecialRemote
import qualified Annex.Branch
import qualified Types.Remote as R
import Annex.Transfer
import Types.Remote
import Backend.GitRemoteAnnex
import Config
import Types.RemoteConfig
import Types.ProposedAccepted
import Types.Key
import Network.URI
import Types.GitConfig
import Git.Types
import Logs.Difference
import Annex.Init
import Annex.Content
import Remote.List
import Remote.List.Util
import Utility.Tmp
import Utility.Env
import Utility.Metered
import Network.URI
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as B8
import qualified Data.Map.Strict as M
import System.FilePath.ByteString as P
run :: [String] -> IO ()
run (remotename:url:[]) =
run (remotename:url:[]) =
-- git strips the "annex::" prefix of the url
-- when running this command, so add it back
let url' = "annex::" ++ url
in case parseSpecialRemoteNameUrl remotename url' of
Left e -> giveup e
Right src -> do
state <- Annex.new =<< Git.CurrentRepo.get
repo <- getRepo
state <- Annex.new repo
Annex.eval state (run' src)
run (_remotename:[]) = giveup "remote url not configured"
run _ = giveup "expected remote name and url parameters"
@ -45,10 +63,10 @@ run' :: SpecialRemoteConfig -> Annex ()
run' src =
-- Prevent any usual git-annex output to stdout, because
-- the output of this command is being parsed by git.
doQuietAction $ do
rmt <- getSpecialRemote src
ls <- lines <$> liftIO getContents
go rmt ls emptyState
doQuietAction $
withSpecialRemote src $ \rmt -> do
ls <- lines <$> liftIO getContents
go rmt ls emptyState
where
go rmt (l:ls) st =
let (c, v) = splitLine l
@ -198,7 +216,9 @@ splitLine l =
data SpecialRemoteConfig
= SpecialRemoteConfig
{ specialRemoteUUID :: UUID
, specialRemoteParams :: [(String, String)]
, specialRemoteConfig :: RemoteConfig
, specialRemoteName :: Maybe RemoteName
, specialRemoteUrl :: String
}
| ExistingSpecialRemote RemoteName
deriving (Show)
@ -212,48 +232,114 @@ parseSpecialRemoteNameUrl :: String -> String -> Either String SpecialRemoteConf
parseSpecialRemoteNameUrl remotename url
| url == "annex::" && remotename /= url = Right $
ExistingSpecialRemote remotename
| otherwise = parseSpecialRemoteUrl url
| "annex::" `isPrefixOf` remotename = parseSpecialRemoteUrl url Nothing
| otherwise = parseSpecialRemoteUrl url (Just remotename)
parseSpecialRemoteUrl :: String -> Either String SpecialRemoteConfig
parseSpecialRemoteUrl url = case parseURI url of
parseSpecialRemoteUrl :: String -> Maybe RemoteName -> Either String SpecialRemoteConfig
parseSpecialRemoteUrl url remotename = case parseURI url of
Nothing -> Left "URL parse failed"
Just u -> case uriScheme u of
"annex:" -> case uriPath u of
"" -> Left "annex: URL did not include a UUID"
(':':p) -> Right $ SpecialRemoteConfig
{ specialRemoteUUID = toUUID p
, specialRemoteParams = parsequery u
, specialRemoteConfig = parsequery u
, specialRemoteName = remotename
, specialRemoteUrl = url
}
_ -> Left "annex: URL malformed"
_ -> Left "Not an annex: URL"
where
parsequery u = map parsekv $ splitc '&' (drop 1 (uriQuery u))
parsequery u = M.fromList $
map parsekv $ splitc '&' (drop 1 (uriQuery u))
parsekv kv =
let (k, sv) = break (== '=') kv
v = if null sv then sv else drop 1 sv
in (unEscapeString k, unEscapeString v)
in (Proposed (unEscapeString k), Proposed (unEscapeString v))
getSpecialRemote :: SpecialRemoteConfig -> Annex Remote
getSpecialRemote (ExistingSpecialRemote remotename) =
-- Runs an action with a Remote as specified by the SpecialRemoteConfig.
withSpecialRemote :: SpecialRemoteConfig -> (Remote -> Annex a) -> Annex a
withSpecialRemote (ExistingSpecialRemote remotename) a =
getEnabledSpecialRemoteByName remotename >>=
maybe (giveup $ "There is no special remote named " ++ remotename)
a
withSpecialRemote cfg@(SpecialRemoteConfig {}) a = case specialRemoteName cfg of
-- The name could be the name of an existing special remote,
-- if so use it as long as its UUID matches the UUID from the url.
Just remotename -> getEnabledSpecialRemoteByName remotename >>= \case
Just rmt
| R.uuid rmt == specialRemoteUUID cfg -> a rmt
| otherwise -> giveup $ "The uuid in the annex:: url does not match the uuid of the remote named " ++ remotename
-- When cloning from an annex:: url,
-- this is used to set up the origin remote.
Nothing -> (initremote remotename >>= a)
`finally` cleanupInitialization
Nothing -> inittempremote
`finally` cleanupInitialization
where
-- Initialize a new special remote with the provided configuration
-- and name.
--
-- The configuration is not stored in the git-annex branch, because
-- it's expected that the git repository stored on the special
-- remote includes its configuration, perhaps under a different
-- name, and perhaps slightly different (when the annex:: url
-- omitted some unimportant part of the configuration).
initremote remotename = do
let c = M.insert SpecialRemote.nameField (Proposed remotename)
(specialRemoteConfig cfg)
t <- either giveup return (SpecialRemote.findType c)
dummycfg <- liftIO dummyRemoteGitConfig
(c', _u) <- R.setup t R.Init (Just (specialRemoteUUID cfg))
Nothing c dummycfg
`onException` cleanupremote remotename
setConfig (remoteConfig c' "url") (specialRemoteUrl cfg)
remotesChanged
getEnabledSpecialRemoteByName remotename >>= \case
Just rmt -> case checkSpecialRemoteProblems rmt of
Nothing -> return rmt
Just problem -> do
cleanupremote remotename
giveup problem
Nothing -> do
cleanupremote remotename
giveup "Unable to find special remote after setup."
-- Temporarily initialize a special remote, and remove it after
-- the action is run.
inittempremote =
let remotename = Git.Remote.makeLegalName $
"annex-temp-" ++ fromUUID (specialRemoteUUID cfg)
in bracket
(initremote remotename)
(const $ cleanupremote remotename)
a
cleanupremote remotename = do
l <- inRepo Git.Remote.listRemotes
when (remotename `elem` l) $
inRepo $ Git.Remote.Remove.remove remotename
-- When a special remote has already been enabled, just use it.
getEnabledSpecialRemoteByName :: RemoteName -> Annex (Maybe Remote)
getEnabledSpecialRemoteByName remotename =
Remote.byNameOnly remotename >>= \case
Just rmt -> if thirdPartyPopulated (remotetype rmt)
then giveup "Cannot use this thirdparty-populated special remote as a git remote"
else return rmt
Nothing -> giveup $ "There is no special remote named " ++ remotename
getSpecialRemote src@(SpecialRemoteConfig {})
-- Given the configuration of a special remote, create a
-- Remote object to access the special remote.
-- This needs to avoid storing the configuration in the git-annex
-- branch (which would be redundant and also the configuration
-- provided may differ in some small way from the configuration
-- that is stored in the git repository inside the remote, which
-- should not be changed). It also needs to avoid creating a git
-- remote in .git/config.
| otherwise = error "TODO conjure up a new special remote out of thin air"
-- XXX one way to do it would be to make a temporary git repo,
-- initremote in there, and use that for accessing the special
-- remote, rather than the current git repo. But can this be
-- avoided?
Nothing -> return Nothing
Just rmt ->
maybe (return (Just rmt)) giveup
(checkSpecialRemoteProblems rmt)
-- Avoid using special remotes that are thirdparty populated, because
-- there is no way to push the git repository keys into one.
--
-- XXX Avoid using special remotes that are encrypted by key
-- material stored in the git repository, since that would present a
-- chicken and egg problem when cloning.
checkSpecialRemoteProblems :: Remote -> Maybe String
checkSpecialRemoteProblems rmt
| R.thirdPartyPopulated (R.remotetype rmt) =
Just "Cannot use this thirdparty-populated special remote as a git remote"
| otherwise = Nothing
-- The manifest contains an ordered list of git bundle keys.
newtype Manifest = Manifest { inManifest :: [Key] }
@ -268,12 +354,12 @@ newtype Manifest = Manifest { inManifest :: [Key] }
-- the usual Annex.Transfer.download. The content of manifests is not
-- stable, and so it needs to re-download it fresh every time.
downloadManifest :: Remote -> Annex Manifest
downloadManifest rmt = ifM (checkPresent rmt mk)
downloadManifest rmt = ifM (R.checkPresent rmt mk)
( withTmpFile "GITMANIFEST" $ \tmp tmph -> do
liftIO $ hClose tmph
_ <- retrieveKeyFile rmt mk
_ <- R.retrieveKeyFile rmt mk
(AssociatedFile Nothing) tmp
nullMeterUpdate NoVerify
nullMeterUpdate R.NoVerify
ks <- map deserializeKey' . B8.lines <$> liftIO (B.readFile tmp)
Manifest <$> checkvalid [] ks
, return (Manifest [])
@ -345,3 +431,58 @@ updateTrackingRefs rmt new = do
case M.lookup r oldmap of
Just s' | s' == s -> noop
_ -> inRepo $ Git.Branch.update' r s
-- git clone does not bother to set GIT_WORK_TREE when running this
-- program, and it does not run it inside the new git repo either.
-- GIT_DIR is set to the new git directory. So, have to override
-- the worktree to be the parent of the gitdir.
getRepo :: IO Repo
getRepo = getEnv "GIT_WORK_TREE" >>= \case
Just _ -> Git.CurrentRepo.get
Nothing -> fixup <$> Git.CurrentRepo.get
where
fixup r@(Repo { location = loc@(Local { worktree = Just _ }) }) =
r { location = loc { worktree = Just (P.takeDirectory (gitdir loc)) } }
fixup r = r
-- This is run after git has used this process to fetch or push from a
-- special remote that was specified using a git-annex url. If the git
-- repository was not initialized for use by git-annex already, it is still
-- not initialized at this point.
--
-- It's important that initialization not be done by this process until
-- git has fetched any git-annex branch from the special remote. That
-- git-annex branch may have Differences, and prematurely initializing the
-- local repository would then create a git-annex branch that can't merge
-- with the one from the special remote.
--
-- If there is still not a sibling git-annex branch, this deletes all annex
-- objects for git bundles from the annex objects directory, and deletes
-- the annex objects directory. That is necessary to avoid the
-- Annex.Init.objectDirNotPresent check preventing a later initialization.
-- And if the later initialization includes Differences, the git bundle
-- objects downloaded by this process would be in the wrong locations.
--
-- When there is now a sibling git-annex branch, this handles
-- initialization. When the initialized git-annex branch has Differences,
-- the git bundle objects are in the wrong place, so have to be deleted.
--
-- FIXME git-annex branch is unfortunately created during git clone from a
-- special remote. Should not be for this to work.
cleanupInitialization :: Annex ()
cleanupInitialization = ifM Annex.Branch.hasSibling
( do
autoInitialize' (pure True) remoteList
differences <- allDifferences <$> recordedDifferences
when (differences /= mempty) $
deletebundleobjects
, deletebundleobjects
)
where
deletebundleobjects = do
annexobjectdir <- fromRepo gitAnnexObjectDir
ks <- listKeys InAnnex
forM_ ks $ \k -> case fromKey keyVariety k of
GitBundleKey -> lockContentForRemoval k noop removeAnnex
_ -> noop
void $ liftIO $ tryIO $ removeDirectory (decodeBS annexobjectdir)