diff --git a/Annex/Init.hs b/Annex/Init.hs index 6a499e4771..b9478ae4f2 100644 --- a/Annex/Init.hs +++ b/Annex/Init.hs @@ -1,6 +1,6 @@ {- git-annex repository initialization - - - Copyright 2011-2022 Joey Hess + - Copyright 2011-2024 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -12,6 +12,7 @@ module Annex.Init ( checkInitializeAllowed, ensureInitialized, autoInitialize, + autoInitialize', isInitialized, initialize, initialize', @@ -256,10 +257,13 @@ guardSafeToUseRepo a = ifM (inRepo Git.Config.checkRepoConfigInaccessible) - Checks repository version and handles upgrades too. -} autoInitialize :: Annex [Remote] -> Annex () -autoInitialize remotelist = getInitializedVersion >>= maybe needsinit checkUpgrade +autoInitialize = autoInitialize' autoInitializeAllowed + +autoInitialize' :: Annex Bool -> Annex [Remote] -> Annex () +autoInitialize' check remotelist = getInitializedVersion >>= maybe needsinit checkUpgrade where needsinit = - whenM (initializeAllowed <&&> autoInitializeAllowed) $ do + whenM (initializeAllowed <&&> check) $ do initialize Nothing Nothing autoEnableSpecialRemotes remotelist diff --git a/Annex/SpecialRemote/Config.hs b/Annex/SpecialRemote/Config.hs index fff2c88c1d..7fbd0d4191 100644 --- a/Annex/SpecialRemote/Config.hs +++ b/Annex/SpecialRemote/Config.hs @@ -1,6 +1,6 @@ {- git-annex special remote configuration - - - Copyright 2019-2023 Joey Hess + - Copyright 2019-2024 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} diff --git a/CmdLine/GitRemoteAnnex.hs b/CmdLine/GitRemoteAnnex.hs index 000c5b5238..a2462ea770 100644 --- a/CmdLine/GitRemoteAnnex.hs +++ b/CmdLine/GitRemoteAnnex.hs @@ -13,30 +13,48 @@ import Annex.Common import qualified Annex import qualified Remote import qualified Git.CurrentRepo +import qualified Git import qualified Git.Ref import qualified Git.Branch import qualified Git.Bundle -import Git.Types -import Backend.GitRemoteAnnex +import qualified Git.Remote +import qualified Git.Remote.Remove +import qualified Annex.SpecialRemote as SpecialRemote +import qualified Annex.Branch +import qualified Types.Remote as R import Annex.Transfer -import Types.Remote +import Backend.GitRemoteAnnex +import Config +import Types.RemoteConfig +import Types.ProposedAccepted import Types.Key -import Network.URI +import Types.GitConfig +import Git.Types +import Logs.Difference +import Annex.Init +import Annex.Content +import Remote.List +import Remote.List.Util import Utility.Tmp +import Utility.Env import Utility.Metered + +import Network.URI import qualified Data.ByteString as B import qualified Data.ByteString.Char8 as B8 import qualified Data.Map.Strict as M +import System.FilePath.ByteString as P run :: [String] -> IO () -run (remotename:url:[]) = +run (remotename:url:[]) = -- git strips the "annex::" prefix of the url -- when running this command, so add it back let url' = "annex::" ++ url in case parseSpecialRemoteNameUrl remotename url' of Left e -> giveup e Right src -> do - state <- Annex.new =<< Git.CurrentRepo.get + repo <- getRepo + state <- Annex.new repo Annex.eval state (run' src) run (_remotename:[]) = giveup "remote url not configured" run _ = giveup "expected remote name and url parameters" @@ -45,10 +63,10 @@ run' :: SpecialRemoteConfig -> Annex () run' src = -- Prevent any usual git-annex output to stdout, because -- the output of this command is being parsed by git. - doQuietAction $ do - rmt <- getSpecialRemote src - ls <- lines <$> liftIO getContents - go rmt ls emptyState + doQuietAction $ + withSpecialRemote src $ \rmt -> do + ls <- lines <$> liftIO getContents + go rmt ls emptyState where go rmt (l:ls) st = let (c, v) = splitLine l @@ -198,7 +216,9 @@ splitLine l = data SpecialRemoteConfig = SpecialRemoteConfig { specialRemoteUUID :: UUID - , specialRemoteParams :: [(String, String)] + , specialRemoteConfig :: RemoteConfig + , specialRemoteName :: Maybe RemoteName + , specialRemoteUrl :: String } | ExistingSpecialRemote RemoteName deriving (Show) @@ -212,48 +232,114 @@ parseSpecialRemoteNameUrl :: String -> String -> Either String SpecialRemoteConf parseSpecialRemoteNameUrl remotename url | url == "annex::" && remotename /= url = Right $ ExistingSpecialRemote remotename - | otherwise = parseSpecialRemoteUrl url + | "annex::" `isPrefixOf` remotename = parseSpecialRemoteUrl url Nothing + | otherwise = parseSpecialRemoteUrl url (Just remotename) -parseSpecialRemoteUrl :: String -> Either String SpecialRemoteConfig -parseSpecialRemoteUrl url = case parseURI url of +parseSpecialRemoteUrl :: String -> Maybe RemoteName -> Either String SpecialRemoteConfig +parseSpecialRemoteUrl url remotename = case parseURI url of Nothing -> Left "URL parse failed" Just u -> case uriScheme u of "annex:" -> case uriPath u of "" -> Left "annex: URL did not include a UUID" (':':p) -> Right $ SpecialRemoteConfig { specialRemoteUUID = toUUID p - , specialRemoteParams = parsequery u + , specialRemoteConfig = parsequery u + , specialRemoteName = remotename + , specialRemoteUrl = url } _ -> Left "annex: URL malformed" _ -> Left "Not an annex: URL" where - parsequery u = map parsekv $ splitc '&' (drop 1 (uriQuery u)) + parsequery u = M.fromList $ + map parsekv $ splitc '&' (drop 1 (uriQuery u)) parsekv kv = let (k, sv) = break (== '=') kv v = if null sv then sv else drop 1 sv - in (unEscapeString k, unEscapeString v) + in (Proposed (unEscapeString k), Proposed (unEscapeString v)) -getSpecialRemote :: SpecialRemoteConfig -> Annex Remote -getSpecialRemote (ExistingSpecialRemote remotename) = +-- Runs an action with a Remote as specified by the SpecialRemoteConfig. +withSpecialRemote :: SpecialRemoteConfig -> (Remote -> Annex a) -> Annex a +withSpecialRemote (ExistingSpecialRemote remotename) a = + getEnabledSpecialRemoteByName remotename >>= + maybe (giveup $ "There is no special remote named " ++ remotename) + a +withSpecialRemote cfg@(SpecialRemoteConfig {}) a = case specialRemoteName cfg of + -- The name could be the name of an existing special remote, + -- if so use it as long as its UUID matches the UUID from the url. + Just remotename -> getEnabledSpecialRemoteByName remotename >>= \case + Just rmt + | R.uuid rmt == specialRemoteUUID cfg -> a rmt + | otherwise -> giveup $ "The uuid in the annex:: url does not match the uuid of the remote named " ++ remotename + -- When cloning from an annex:: url, + -- this is used to set up the origin remote. + Nothing -> (initremote remotename >>= a) + `finally` cleanupInitialization + Nothing -> inittempremote + `finally` cleanupInitialization + where + -- Initialize a new special remote with the provided configuration + -- and name. + -- + -- The configuration is not stored in the git-annex branch, because + -- it's expected that the git repository stored on the special + -- remote includes its configuration, perhaps under a different + -- name, and perhaps slightly different (when the annex:: url + -- omitted some unimportant part of the configuration). + initremote remotename = do + let c = M.insert SpecialRemote.nameField (Proposed remotename) + (specialRemoteConfig cfg) + t <- either giveup return (SpecialRemote.findType c) + dummycfg <- liftIO dummyRemoteGitConfig + (c', _u) <- R.setup t R.Init (Just (specialRemoteUUID cfg)) + Nothing c dummycfg + `onException` cleanupremote remotename + setConfig (remoteConfig c' "url") (specialRemoteUrl cfg) + remotesChanged + getEnabledSpecialRemoteByName remotename >>= \case + Just rmt -> case checkSpecialRemoteProblems rmt of + Nothing -> return rmt + Just problem -> do + cleanupremote remotename + giveup problem + Nothing -> do + cleanupremote remotename + giveup "Unable to find special remote after setup." + + -- Temporarily initialize a special remote, and remove it after + -- the action is run. + inittempremote = + let remotename = Git.Remote.makeLegalName $ + "annex-temp-" ++ fromUUID (specialRemoteUUID cfg) + in bracket + (initremote remotename) + (const $ cleanupremote remotename) + a + + cleanupremote remotename = do + l <- inRepo Git.Remote.listRemotes + when (remotename `elem` l) $ + inRepo $ Git.Remote.Remove.remove remotename + +-- When a special remote has already been enabled, just use it. +getEnabledSpecialRemoteByName :: RemoteName -> Annex (Maybe Remote) +getEnabledSpecialRemoteByName remotename = Remote.byNameOnly remotename >>= \case - Just rmt -> if thirdPartyPopulated (remotetype rmt) - then giveup "Cannot use this thirdparty-populated special remote as a git remote" - else return rmt - Nothing -> giveup $ "There is no special remote named " ++ remotename -getSpecialRemote src@(SpecialRemoteConfig {}) - -- Given the configuration of a special remote, create a - -- Remote object to access the special remote. - -- This needs to avoid storing the configuration in the git-annex - -- branch (which would be redundant and also the configuration - -- provided may differ in some small way from the configuration - -- that is stored in the git repository inside the remote, which - -- should not be changed). It also needs to avoid creating a git - -- remote in .git/config. - | otherwise = error "TODO conjure up a new special remote out of thin air" - -- XXX one way to do it would be to make a temporary git repo, - -- initremote in there, and use that for accessing the special - -- remote, rather than the current git repo. But can this be - -- avoided? + Nothing -> return Nothing + Just rmt -> + maybe (return (Just rmt)) giveup + (checkSpecialRemoteProblems rmt) + +-- Avoid using special remotes that are thirdparty populated, because +-- there is no way to push the git repository keys into one. +-- +-- XXX Avoid using special remotes that are encrypted by key +-- material stored in the git repository, since that would present a +-- chicken and egg problem when cloning. +checkSpecialRemoteProblems :: Remote -> Maybe String +checkSpecialRemoteProblems rmt + | R.thirdPartyPopulated (R.remotetype rmt) = + Just "Cannot use this thirdparty-populated special remote as a git remote" + | otherwise = Nothing -- The manifest contains an ordered list of git bundle keys. newtype Manifest = Manifest { inManifest :: [Key] } @@ -268,12 +354,12 @@ newtype Manifest = Manifest { inManifest :: [Key] } -- the usual Annex.Transfer.download. The content of manifests is not -- stable, and so it needs to re-download it fresh every time. downloadManifest :: Remote -> Annex Manifest -downloadManifest rmt = ifM (checkPresent rmt mk) +downloadManifest rmt = ifM (R.checkPresent rmt mk) ( withTmpFile "GITMANIFEST" $ \tmp tmph -> do liftIO $ hClose tmph - _ <- retrieveKeyFile rmt mk + _ <- R.retrieveKeyFile rmt mk (AssociatedFile Nothing) tmp - nullMeterUpdate NoVerify + nullMeterUpdate R.NoVerify ks <- map deserializeKey' . B8.lines <$> liftIO (B.readFile tmp) Manifest <$> checkvalid [] ks , return (Manifest []) @@ -345,3 +431,58 @@ updateTrackingRefs rmt new = do case M.lookup r oldmap of Just s' | s' == s -> noop _ -> inRepo $ Git.Branch.update' r s + +-- git clone does not bother to set GIT_WORK_TREE when running this +-- program, and it does not run it inside the new git repo either. +-- GIT_DIR is set to the new git directory. So, have to override +-- the worktree to be the parent of the gitdir. +getRepo :: IO Repo +getRepo = getEnv "GIT_WORK_TREE" >>= \case + Just _ -> Git.CurrentRepo.get + Nothing -> fixup <$> Git.CurrentRepo.get + where + fixup r@(Repo { location = loc@(Local { worktree = Just _ }) }) = + r { location = loc { worktree = Just (P.takeDirectory (gitdir loc)) } } + fixup r = r + +-- This is run after git has used this process to fetch or push from a +-- special remote that was specified using a git-annex url. If the git +-- repository was not initialized for use by git-annex already, it is still +-- not initialized at this point. +-- +-- It's important that initialization not be done by this process until +-- git has fetched any git-annex branch from the special remote. That +-- git-annex branch may have Differences, and prematurely initializing the +-- local repository would then create a git-annex branch that can't merge +-- with the one from the special remote. +-- +-- If there is still not a sibling git-annex branch, this deletes all annex +-- objects for git bundles from the annex objects directory, and deletes +-- the annex objects directory. That is necessary to avoid the +-- Annex.Init.objectDirNotPresent check preventing a later initialization. +-- And if the later initialization includes Differences, the git bundle +-- objects downloaded by this process would be in the wrong locations. +-- +-- When there is now a sibling git-annex branch, this handles +-- initialization. When the initialized git-annex branch has Differences, +-- the git bundle objects are in the wrong place, so have to be deleted. +-- +-- FIXME git-annex branch is unfortunately created during git clone from a +-- special remote. Should not be for this to work. +cleanupInitialization :: Annex () +cleanupInitialization = ifM Annex.Branch.hasSibling + ( do + autoInitialize' (pure True) remoteList + differences <- allDifferences <$> recordedDifferences + when (differences /= mempty) $ + deletebundleobjects + , deletebundleobjects + ) + where + deletebundleobjects = do + annexobjectdir <- fromRepo gitAnnexObjectDir + ks <- listKeys InAnnex + forM_ ks $ \k -> case fromKey keyVariety k of + GitBundleKey -> lockContentForRemoval k noop removeAnnex + _ -> noop + void $ liftIO $ tryIO $ removeDirectory (decodeBS annexobjectdir) diff --git a/Git/Remote.hs b/Git/Remote.hs index 9cdaad61ca..4eb6780fcc 100644 --- a/Git/Remote.hs +++ b/Git/Remote.hs @@ -1,6 +1,6 @@ {- git remote stuff - - - Copyright 2012-2021 Joey Hess + - Copyright 2012-2024 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -13,6 +13,7 @@ module Git.Remote where import Common import Git import Git.Types +import Git.Command import Data.Char import qualified Data.Map as M @@ -23,6 +24,11 @@ import Network.URI import Git.FilePath #endif +{- Lists all currently existing git remotes. -} +listRemotes :: Repo -> IO [RemoteName] +listRemotes repo = map decodeBS . S8.lines + <$> pipeReadStrict [Param "remote"] repo + {- Is a git config key one that specifies the url of a remote? -} isRemoteUrlKey :: ConfigKey -> Bool isRemoteUrlKey = isRemoteKey "url"