working toward git-remote-annex using a special remote

Not quite there yet.

Also, changed the format of GITBUNDLE keys to use only one '-'
after the UUID. A sha256 does not contain that character, so can just
split at the last one.

Amusingly, the sha256 will probably not actually be verified. A git
bundle contains its own checksums that git uses to verify it. And if
someone wanted to replace the content of a GITBUNDLE object, they
could just edit the manifest to use a new one whose sha256 does verify.

Sponsored-by: Nicholas Golder-Manning
This commit is contained in:
Joey Hess 2024-05-06 16:25:55 -04:00
parent f4ba6e0c1e
commit 483887591d
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
3 changed files with 115 additions and 29 deletions

View file

@ -5,38 +5,53 @@
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
{-# LANGUAGE OverloadedStrings #-}
module CmdLine.GitRemoteAnnex where module CmdLine.GitRemoteAnnex where
import Annex.Common import Annex.Common
import qualified Annex import qualified Annex
import qualified Git.CurrentRepo import qualified Git.CurrentRepo
import qualified Remote
import Annex.UUID import Annex.UUID
import Types.Remote
import Types.Key
import Network.URI import Network.URI
import Utility.Tmp
import Utility.Metered
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as B8
import qualified Data.ByteString.Short as S
run :: [String] -> IO () run :: [String] -> IO ()
run (_remotename:url:[]) = case parseSpecialRemoteUrl url of run (_remotename:url:[]) = case parseSpecialRemoteUrl url of
Left e -> giveup e Left e -> giveup e
Right src -> do Right src -> do
state <- Annex.new =<< Git.CurrentRepo.get state <- Annex.new =<< Git.CurrentRepo.get
Annex.eval state (run' url) Annex.eval state (run' src)
run (_remotename:[]) = giveup "remote url not configured" run (_remotename:[]) = giveup "remote url not configured"
run _ = giveup "expected remote name and url parameters" run _ = giveup "expected remote name and url parameters"
run' :: String -> Annex () run' :: SpecialRemoteConfig -> Annex ()
run' url = go =<< lines <$> liftIO getContents run' src =
-- Prevent any usual git-annex output to stdout, because
-- the output of this command is being parsed by git.
doQuietAction $ do
rmt <- getSpecialRemote src
go rmt =<< lines <$> liftIO getContents
where where
go (l:ls) = go rmt (l:ls) =
let (c, v) = splitLine l let (c, v) = splitLine l
in case c of in case c of
"capabilities" -> capabilities >> go ls "capabilities" -> capabilities >> go rmt ls
"list" -> case v of "list" -> case v of
"" -> list False >> go ls "" -> list rmt False >> go rmt ls
"for-push" -> list True >> go ls "for-push" -> list rmt True >> go rmt ls
_ -> protocolError l _ -> protocolError l
"fetch" -> fetch (l:ls) >>= go "fetch" -> fetch rmt (l:ls) >>= go rmt
"push" -> push (l:ls) >>= go "push" -> push rmt (l:ls) >>= go rmt
_ -> protocolError l _ -> protocolError l
go [] = return () go _ [] = return ()
protocolError :: String -> a protocolError :: String -> a
protocolError l = giveup $ "gitremote-helpers protocol error at " ++ show l protocolError l = giveup $ "gitremote-helpers protocol error at " ++ show l
@ -48,30 +63,30 @@ capabilities = do
liftIO $ putStrLn "" liftIO $ putStrLn ""
liftIO $ hFlush stdout liftIO $ hFlush stdout
list :: Bool -> Annex () list :: Remote -> Bool -> Annex ()
list forpush = error "TODO list" list rmt forpush = error "TODO list"
-- Any number of fetch commands can be sent by git, asking for specific -- Any number of fetch commands can be sent by git, asking for specific
-- things. We fetch everything new at once, so find the end of the fetch -- things. We fetch everything new at once, so find the end of the fetch
-- commands (which is supposed to be a blank line) before fetching. -- commands (which is supposed to be a blank line) before fetching.
fetch :: [String] -> Annex [String] fetch :: Remote -> [String] -> Annex [String]
fetch (l:ls) = case splitLine l of fetch rmt (l:ls) = case splitLine l of
("fetch", _) -> fetch ls ("fetch", _) -> fetch rmt ls
("", _) -> do ("", _) -> do
fetch' fetch' rmt
return ls return ls
_ -> do _ -> do
fetch' fetch' rmt
return (l:ls) return (l:ls)
fetch [] = do fetch rmt [] = do
fetch' fetch' rmt
return [] return []
fetch' :: Annex () fetch' :: Remote -> Annex ()
fetch' = error "TODO fetch" fetch' rmt = error "TODO fetch"
push :: [String] -> Annex [String] push :: Remote -> [String] -> Annex [String]
push ls = do push rmt ls = do
let (refspecs, ls') = collectRefSpecs ls let (refspecs, ls') = collectRefSpecs ls
error "TODO push refspecs" error "TODO push refspecs"
return ls' return ls'
@ -135,7 +150,76 @@ parseSpecialRemoteUrl s = case parseURI s of
_ -> Left "Not an annex: URL" _ -> Left "Not an annex: URL"
where where
parsequery u = map parsekv $ splitc '&' (drop 1 (uriQuery u)) parsequery u = map parsekv $ splitc '&' (drop 1 (uriQuery u))
parsekv s = parsekv kv =
let (k, sv) = break (== '=') s let (k, sv) = break (== '=') kv
v = if null sv then sv else drop 1 sv v = if null sv then sv else drop 1 sv
in (unEscapeString k, unEscapeString v) in (unEscapeString k, unEscapeString v)
getSpecialRemote :: SpecialRemoteConfig -> Annex Remote
getSpecialRemote src
-- annex:uuid with no query string uses an existing remote
| null (specialRemoteParams src) =
Remote.byUUID (specialRemoteUUID src) >>= \case
Just rmt -> if thirdPartyPopulated (remotetype rmt)
then giveup "Cannot use this thirdparty-populated special remote as a git remote"
else return rmt
Nothing -> giveup $ "Cannot find an existing special remote with UUID "
++ fromUUID (specialRemoteUUID src)
-- Given the configuration of a special remote, create a
-- Remote object to access the special remote.
-- This needs to avoid storing the configuration in the git-annex
-- branch (which would be redundant and also the configuration
-- provided may differ in some small way from the configuration
-- that is stored in the git repository inside the remote, which
-- should not be changed). It also needs to avoid creating a git
-- remote in .git/config.
| otherwise = error "TODO conjure up a new special remote out of thin air"
-- XXX one way to do it would be to make a temporary git repo,
-- initremote in there, and use that for accessing the special
-- remote, rather than the current git repo. But can this be
-- avoided?
-- A key that is used for the manifest of the git repository stored in a
-- special remote with the specified uuid.
manifestKey :: UUID -> Key
manifestKey u = mkKey $ \kd -> kd
{ keyName = S.toShort (fromUUID u)
, keyVariety = OtherKey "GITMANIFEST"
}
-- A key that is used for the git bundle with the specified sha256
-- that is stored in a special remote with the specified uuid.
gitbundleKey :: UUID -> B.ByteString -> Key
gitbundleKey u sha256 = mkKey $ \kd -> kd
{ keyName = S.toShort (fromUUID u <> "-" <> sha256)
, keyVariety = OtherKey "GITBUNDLE"
}
-- The manifest contains an ordered list of git bundle keys.
newtype Manifest = Manifest [Key]
-- Downloads the Manifest, or if it does not exist, returns an empty
-- Manifest.
--
-- Throws errors if the remote cannot be accessed or the download fails,
-- or if the manifest file cannot be parsed.
downloadManifest :: Remote -> Annex Manifest
downloadManifest rmt = ifM (checkPresent rmt mk)
( withTmpFile "GITMANIFEST" $ \tmp tmph -> do
liftIO $ hClose tmph
_ <- retrieveKeyFile rmt mk
(AssociatedFile Nothing) tmp
nullMeterUpdate NoVerify
ks <- map deserializeKey' . B8.lines <$> liftIO (B.readFile tmp)
Manifest <$> checkvalid [] ks
, return (Manifest [])
)
where
mk = manifestKey (Remote.uuid rmt)
checkvalid c [] = return (reverse c)
checkvalid c (Just k:ks) = case fromKey keyVariety k of
OtherKey "GITBUNDLE" -> checkvalid (k:c) ks
_ -> giveup $ "Wrong type of key in manifest " ++ serializeKey k
checkvalid _ (Nothing:_) =
giveup $ "Error parsing manifest " ++ serializeKey mk

View file

@ -38,6 +38,7 @@ import Utility.CopyFile
import Git.FilePath import Git.FilePath
import Utility.PID import Utility.PID
import Utility.InodeCache import Utility.InodeCache
import Utility.Metered
import Annex.InodeSentinal import Annex.InodeSentinal
import qualified Database.Keys import qualified Database.Keys
import qualified Database.Fsck as FsckDb import qualified Database.Fsck as FsckDb
@ -206,8 +207,7 @@ performRemote key afile numcopies remote =
) )
, return Nothing , return Nothing
) )
getfile' tmp = Remote.retrieveKeyFile remote key (AssociatedFile Nothing) (fromRawFilePath tmp) dummymeter (RemoteVerify remote) getfile' tmp = Remote.retrieveKeyFile remote key (AssociatedFile Nothing) (fromRawFilePath tmp) nullMeterUpdate (RemoteVerify remote)
dummymeter _ = noop
getcheap tmp = case Remote.retrieveKeyFileCheap remote of getcheap tmp = case Remote.retrieveKeyFileCheap remote of
Just a -> isRight <$> tryNonAsync (a key afile (fromRawFilePath tmp)) Just a -> isRight <$> tryNonAsync (a key afile (fromRawFilePath tmp))
Nothing -> return False Nothing -> return False

View file

@ -3,11 +3,13 @@ This adds two new object types to git-annex, GITMANIFEST and a GITBUNDLE.
GITMANIFEST--$UUID is the manifest for a git repository stored in the GITMANIFEST--$UUID is the manifest for a git repository stored in the
git-annex repository with that UUID. git-annex repository with that UUID.
GITBUNDLE--$UUID--sha256 is a git bundle. GITBUNDLE--$UUID-sha256 is a git bundle.
# format of the manifest file # format of the manifest file
An ordered list of bundle keys, one per line. An ordered list of bundle keys, one per line.
(Lines end with unix `"\n"`, not `"\r\n"`.)
# fetching # fetching