diff --git a/CmdLine/GitRemoteAnnex.hs b/CmdLine/GitRemoteAnnex.hs index 115ca3c90e..361feaabf3 100644 --- a/CmdLine/GitRemoteAnnex.hs +++ b/CmdLine/GitRemoteAnnex.hs @@ -11,9 +11,14 @@ module CmdLine.GitRemoteAnnex where import Annex.Common import qualified Annex -import qualified Git.CurrentRepo import qualified Remote -import Annex.UUID +import qualified Git.CurrentRepo +import qualified Git.Ref +import qualified Git.Branch +import qualified Git.Bundle +import Git.Types +import Backend.GitRemoteAnnex +import Annex.Transfer import Types.Remote import Types.Key import Network.URI @@ -21,14 +26,18 @@ import Utility.Tmp import Utility.Metered import qualified Data.ByteString as B import qualified Data.ByteString.Char8 as B8 -import qualified Data.ByteString.Short as S +import qualified Data.Map.Strict as M run :: [String] -> IO () -run (_remotename:url:[]) = case parseSpecialRemoteUrl url of - Left e -> giveup e - Right src -> do - state <- Annex.new =<< Git.CurrentRepo.get - Annex.eval state (run' src) +run (remotename:url:[]) = + -- git strips the "annex::" prefix of the url + -- when running this command, so add it back + let url' = "annex::" ++ url + in case parseSpecialRemoteNameUrl remotename url' of + Left e -> giveup e + Right src -> do + state <- Annex.new =<< Git.CurrentRepo.get + Annex.eval state (run' src) run (_remotename:[]) = giveup "remote url not configured" run _ = giveup "expected remote name and url parameters" @@ -38,20 +47,33 @@ run' src = -- the output of this command is being parsed by git. doQuietAction $ do rmt <- getSpecialRemote src - go rmt =<< lines <$> liftIO getContents + ls <- lines <$> liftIO getContents + go rmt ls emptyState where - go rmt (l:ls) = + go rmt (l:ls) st = let (c, v) = splitLine l in case c of - "capabilities" -> capabilities >> go rmt ls + "capabilities" -> capabilities >> go rmt ls st "list" -> case v of - "" -> list rmt False >> go rmt ls - "for-push" -> list rmt True >> go rmt ls + "" -> list st rmt False >>= go rmt ls + "for-push" -> list st rmt True >>= go rmt ls _ -> protocolError l - "fetch" -> fetch rmt (l:ls) >>= go rmt - "push" -> push rmt (l:ls) >>= go rmt + "fetch" -> fetch st rmt (l:ls) >>= \ls' -> go rmt ls' st + "push" -> push st rmt (l:ls) >>= \ls' -> go rmt ls' st + "" -> return () _ -> protocolError l - go _ [] = return () + go _ [] _ = return () + +data State = State + { manifestCache :: Maybe Manifest + , trackingRefs :: M.Map Ref Sha + } + +emptyState :: State +emptyState = State + { manifestCache = Nothing + , trackingRefs = mempty + } protocolError :: String -> a protocolError l = giveup $ "gitremote-helpers protocol error at " ++ show l @@ -63,30 +85,71 @@ capabilities = do liftIO $ putStrLn "" liftIO $ hFlush stdout -list :: Remote -> Bool -> Annex () -list rmt forpush = error "TODO list" +list :: State -> Remote -> Bool -> Annex State +list st rmt forpush = do + manifest <- downloadManifest rmt + l <- forM (inManifest manifest) $ \k -> do + b <- downloadGitBundle rmt k + heads <- inRepo $ Git.Bundle.listHeads b + -- Get all the objects from the bundle. This is done here + -- so that the tracking refs can be updated with what is + -- listed, and so what when a full repush is done, all + -- objects are available to be pushed. + when forpush $ + inRepo $ Git.Bundle.unbundle b + -- The bundle may contain tracking refs, or regular refs, + -- make sure we're operating on regular refs. + return $ map (\(s, r) -> (fromTrackingRef rmt r, s)) heads + + -- Later refs replace earlier refs with the same name. + let refmap = M.fromList $ concat l + let reflist = M.toList refmap + let trackingrefmap = M.mapKeys (toTrackingRef rmt) refmap + + -- When listing for a push, update the tracking refs to match what + -- was listed. This is necessary in order for a full repush to know + -- what to push. + when forpush $ + updateTrackingRefs rmt trackingrefmap + + -- Respond to git with a list of refs. + liftIO $ do + forM_ reflist $ \(ref, sha) -> + B8.putStrLn $ fromRef' sha <> " " <> fromRef' ref + -- Newline terminates list of refs. + putStrLn "" + hFlush stdout + + -- Remember the tracking refs. + return $ st + { manifestCache = Just manifest + , trackingRefs = trackingrefmap + } -- Any number of fetch commands can be sent by git, asking for specific -- things. We fetch everything new at once, so find the end of the fetch -- commands (which is supposed to be a blank line) before fetching. -fetch :: Remote -> [String] -> Annex [String] -fetch rmt (l:ls) = case splitLine l of - ("fetch", _) -> fetch rmt ls +fetch :: State -> Remote -> [String] -> Annex [String] +fetch st rmt (l:ls) = case splitLine l of + ("fetch", _) -> fetch st rmt ls ("", _) -> do - fetch' rmt + fetch' st rmt return ls _ -> do - fetch' rmt + fetch' st rmt return (l:ls) -fetch rmt [] = do - fetch' rmt +fetch st rmt [] = do + fetch' st rmt return [] -fetch' :: Remote -> Annex () -fetch' rmt = error "TODO fetch" +fetch' :: State -> Remote -> Annex () +fetch' st rmt = do + manifest <- maybe (downloadManifest rmt) pure (manifestCache st) + forM_ (inManifest manifest) $ \k -> + downloadGitBundle rmt k >>= inRepo . Git.Bundle.unbundle -push :: Remote -> [String] -> Annex [String] -push rmt ls = do +push :: State -> Remote -> [String] -> Annex [String] +push st rmt ls = do let (refspecs, ls') = collectRefSpecs ls error "TODO push refspecs" return ls' @@ -128,16 +191,27 @@ splitLine l = v = if null sv then sv else drop 1 sv in (c, v) -data SpecialRemoteConfig = SpecialRemoteConfig - { specialRemoteUUID :: UUID - , specialRemoteParams :: [(String, String)] - } +data SpecialRemoteConfig + = SpecialRemoteConfig + { specialRemoteUUID :: UUID + , specialRemoteParams :: [(String, String)] + } + | ExistingSpecialRemote RemoteName deriving (Show) -- The url for a special remote looks like --- annex::uuid?param=value¶m=value... +-- "annex::uuid?param=value¶m=value..." +-- +-- Also accept an url of "annex::", when a remote name is provided, +-- to use an already enabled special remote. +parseSpecialRemoteNameUrl :: String -> String -> Either String SpecialRemoteConfig +parseSpecialRemoteNameUrl remotename url + | url == "annex::" && remotename /= url = Right $ + ExistingSpecialRemote remotename + | otherwise = parseSpecialRemoteUrl url + parseSpecialRemoteUrl :: String -> Either String SpecialRemoteConfig -parseSpecialRemoteUrl s = case parseURI s of +parseSpecialRemoteUrl url = case parseURI url of Nothing -> Left "URL parse failed" Just u -> case uriScheme u of "annex:" -> case uriPath u of @@ -156,15 +230,13 @@ parseSpecialRemoteUrl s = case parseURI s of in (unEscapeString k, unEscapeString v) getSpecialRemote :: SpecialRemoteConfig -> Annex Remote -getSpecialRemote src - -- annex:uuid with no query string uses an existing remote - | null (specialRemoteParams src) = - Remote.byUUID (specialRemoteUUID src) >>= \case - Just rmt -> if thirdPartyPopulated (remotetype rmt) - then giveup "Cannot use this thirdparty-populated special remote as a git remote" - else return rmt - Nothing -> giveup $ "Cannot find an existing special remote with UUID " - ++ fromUUID (specialRemoteUUID src) +getSpecialRemote (ExistingSpecialRemote remotename) = + Remote.byNameOnly remotename >>= \case + Just rmt -> if thirdPartyPopulated (remotetype rmt) + then giveup "Cannot use this thirdparty-populated special remote as a git remote" + else return rmt + Nothing -> giveup $ "There is no special remote named " ++ remotename +getSpecialRemote src@(SpecialRemoteConfig {}) -- Given the configuration of a special remote, create a -- Remote object to access the special remote. -- This needs to avoid storing the configuration in the git-annex @@ -179,30 +251,18 @@ getSpecialRemote src -- remote, rather than the current git repo. But can this be -- avoided? --- A key that is used for the manifest of the git repository stored in a --- special remote with the specified uuid. -manifestKey :: UUID -> Key -manifestKey u = mkKey $ \kd -> kd - { keyName = S.toShort (fromUUID u) - , keyVariety = OtherKey "GITMANIFEST" - } - --- A key that is used for the git bundle with the specified sha256 --- that is stored in a special remote with the specified uuid. -gitbundleKey :: UUID -> B.ByteString -> Key -gitbundleKey u sha256 = mkKey $ \kd -> kd - { keyName = S.toShort (fromUUID u <> "-" <> sha256) - , keyVariety = OtherKey "GITBUNDLE" - } - -- The manifest contains an ordered list of git bundle keys. -newtype Manifest = Manifest [Key] +newtype Manifest = Manifest { inManifest :: [Key] } -- Downloads the Manifest, or if it does not exist, returns an empty -- Manifest. -- -- Throws errors if the remote cannot be accessed or the download fails, -- or if the manifest file cannot be parsed. +-- +-- This downloads the manifest to a temporary file, rather than using +-- the usual Annex.Transfer.download. The content of manifests is not +-- stable, and so it needs to re-download it fresh every time. downloadManifest :: Remote -> Annex Manifest downloadManifest rmt = ifM (checkPresent rmt mk) ( withTmpFile "GITMANIFEST" $ \tmp tmph -> do @@ -215,11 +275,69 @@ downloadManifest rmt = ifM (checkPresent rmt mk) , return (Manifest []) ) where - mk = manifestKey (Remote.uuid rmt) + mk = genManifestKey (Remote.uuid rmt) checkvalid c [] = return (reverse c) checkvalid c (Just k:ks) = case fromKey keyVariety k of - OtherKey "GITBUNDLE" -> checkvalid (k:c) ks + GitBundleKey -> checkvalid (k:c) ks _ -> giveup $ "Wrong type of key in manifest " ++ serializeKey k checkvalid _ (Nothing:_) = giveup $ "Error parsing manifest " ++ serializeKey mk + +-- Downloads a git bundle to the annex objects directory, unless +-- the object file is already present. Returns the filename of the object +-- file. +-- +-- Throws errors if the download fails, or the checksum does not verify. +-- +-- This does not update the location log to indicate that the local +-- repository contains the git bundle object. Reasons not to include: +-- 1. When this is being used in a git clone, the repository will not have +-- a UUID yet. +-- 2. It would unncessarily bloat the git-annex branch, which would then +-- lead to more things needing to be pushed to the special remote, +-- and so more things pulled from it, etc. +-- 3. Git bundle objects are not usually transferred between repositories +-- except special remotes (although the user can if they want to). +downloadGitBundle :: Remote -> Key -> Annex FilePath +downloadGitBundle rmt k = + ifM (download rmt k (AssociatedFile Nothing) stdRetry noNotification) + ( decodeBS <$> calcRepo (gitAnnexLocation k) + , giveup $ "Failed to download " ++ serializeKey k + ) + +-- Tracking refs are used to remember the refs that are currently on the +-- remote. This is different from git's remote tracking branches, since it +-- needs to track all refs on the remote, not only the refs that the user +-- chooses to fetch. +-- +-- For refs/heads/master, the tracking ref is +-- refs/namespaces/git-remote-annex/uuid/refs/heads/master, +-- using the uuid of the remote. See gitnamespaces(7). +trackingRefPrefix :: Remote -> B.ByteString +trackingRefPrefix rmt = "refs/namespaces/git-remote-annex/" + <> fromUUID (Remote.uuid rmt) <> "/" + +toTrackingRef :: Remote -> Ref -> Ref +toTrackingRef rmt (Ref r) = Ref $ trackingRefPrefix rmt <> r + +-- If the ref is not a tracking ref, it is returned as-is. +fromTrackingRef :: Remote -> Ref -> Ref +fromTrackingRef rmt = Git.Ref.removeBase (decodeBS (trackingRefPrefix rmt)) + +-- Update the tracking refs to be those in the map, and no others. +updateTrackingRefs :: Remote -> M.Map Ref Sha -> Annex () +updateTrackingRefs rmt new = do + old <- inRepo $ Git.Ref.forEachRef + [Param (decodeBS (trackingRefPrefix rmt))] + + -- Delete all tracking refs that are not in the map. + forM_ (filter (\p -> M.notMember (fst p) new) old) $ \(s, r) -> + inRepo $ Git.Ref.delete s r + + -- Update all changed tracking refs. + let oldmap = M.fromList (map (\(s, r) -> (r, s)) old) + forM_ (M.toList new) $ \(r, s) -> + case M.lookup r oldmap of + Just s' | s' == s -> noop + _ -> inRepo $ Git.Branch.update' r s