working toward git-remote-annex using a special remote

Not quite there yet.

Also, changed the format of GITBUNDLE keys to use only one '-'
after the UUID. A sha256 does not contain that character, so can just
split at the last one.

Amusingly, the sha256 will probably not actually be verified. A git
bundle contains its own checksums that git uses to verify it. And if
someone wanted to replace the content of a GITBUNDLE object, they
could just edit the manifest to use a new one whose sha256 does verify.

Sponsored-by: Nicholas Golder-Manning
This commit is contained in:
Joey Hess 2024-05-06 16:25:55 -04:00
parent f4ba6e0c1e
commit 483887591d
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
3 changed files with 115 additions and 29 deletions

View file

@ -5,38 +5,53 @@
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module CmdLine.GitRemoteAnnex where
import Annex.Common
import qualified Annex
import qualified Git.CurrentRepo
import qualified Remote
import Annex.UUID
import Types.Remote
import Types.Key
import Network.URI
import Utility.Tmp
import Utility.Metered
import qualified Data.ByteString as B
import qualified Data.ByteString.Char8 as B8
import qualified Data.ByteString.Short as S
run :: [String] -> IO ()
run (_remotename:url:[]) = case parseSpecialRemoteUrl url of
Left e -> giveup e
Right src -> do
state <- Annex.new =<< Git.CurrentRepo.get
Annex.eval state (run' url)
Annex.eval state (run' src)
run (_remotename:[]) = giveup "remote url not configured"
run _ = giveup "expected remote name and url parameters"
run' :: String -> Annex ()
run' url = go =<< lines <$> liftIO getContents
run' :: SpecialRemoteConfig -> Annex ()
run' src =
-- Prevent any usual git-annex output to stdout, because
-- the output of this command is being parsed by git.
doQuietAction $ do
rmt <- getSpecialRemote src
go rmt =<< lines <$> liftIO getContents
where
go (l:ls) =
go rmt (l:ls) =
let (c, v) = splitLine l
in case c of
"capabilities" -> capabilities >> go ls
"capabilities" -> capabilities >> go rmt ls
"list" -> case v of
"" -> list False >> go ls
"for-push" -> list True >> go ls
"" -> list rmt False >> go rmt ls
"for-push" -> list rmt True >> go rmt ls
_ -> protocolError l
"fetch" -> fetch (l:ls) >>= go
"push" -> push (l:ls) >>= go
"fetch" -> fetch rmt (l:ls) >>= go rmt
"push" -> push rmt (l:ls) >>= go rmt
_ -> protocolError l
go [] = return ()
go _ [] = return ()
protocolError :: String -> a
protocolError l = giveup $ "gitremote-helpers protocol error at " ++ show l
@ -48,30 +63,30 @@ capabilities = do
liftIO $ putStrLn ""
liftIO $ hFlush stdout
list :: Bool -> Annex ()
list forpush = error "TODO list"
list :: Remote -> Bool -> Annex ()
list rmt forpush = error "TODO list"
-- Any number of fetch commands can be sent by git, asking for specific
-- things. We fetch everything new at once, so find the end of the fetch
-- commands (which is supposed to be a blank line) before fetching.
fetch :: [String] -> Annex [String]
fetch (l:ls) = case splitLine l of
("fetch", _) -> fetch ls
fetch :: Remote -> [String] -> Annex [String]
fetch rmt (l:ls) = case splitLine l of
("fetch", _) -> fetch rmt ls
("", _) -> do
fetch'
fetch' rmt
return ls
_ -> do
fetch'
fetch' rmt
return (l:ls)
fetch [] = do
fetch'
fetch rmt [] = do
fetch' rmt
return []
fetch' :: Annex ()
fetch' = error "TODO fetch"
fetch' :: Remote -> Annex ()
fetch' rmt = error "TODO fetch"
push :: [String] -> Annex [String]
push ls = do
push :: Remote -> [String] -> Annex [String]
push rmt ls = do
let (refspecs, ls') = collectRefSpecs ls
error "TODO push refspecs"
return ls'
@ -135,7 +150,76 @@ parseSpecialRemoteUrl s = case parseURI s of
_ -> Left "Not an annex: URL"
where
parsequery u = map parsekv $ splitc '&' (drop 1 (uriQuery u))
parsekv s =
let (k, sv) = break (== '=') s
parsekv kv =
let (k, sv) = break (== '=') kv
v = if null sv then sv else drop 1 sv
in (unEscapeString k, unEscapeString v)
getSpecialRemote :: SpecialRemoteConfig -> Annex Remote
getSpecialRemote src
-- annex:uuid with no query string uses an existing remote
| null (specialRemoteParams src) =
Remote.byUUID (specialRemoteUUID src) >>= \case
Just rmt -> if thirdPartyPopulated (remotetype rmt)
then giveup "Cannot use this thirdparty-populated special remote as a git remote"
else return rmt
Nothing -> giveup $ "Cannot find an existing special remote with UUID "
++ fromUUID (specialRemoteUUID src)
-- Given the configuration of a special remote, create a
-- Remote object to access the special remote.
-- This needs to avoid storing the configuration in the git-annex
-- branch (which would be redundant and also the configuration
-- provided may differ in some small way from the configuration
-- that is stored in the git repository inside the remote, which
-- should not be changed). It also needs to avoid creating a git
-- remote in .git/config.
| otherwise = error "TODO conjure up a new special remote out of thin air"
-- XXX one way to do it would be to make a temporary git repo,
-- initremote in there, and use that for accessing the special
-- remote, rather than the current git repo. But can this be
-- avoided?
-- A key that is used for the manifest of the git repository stored in a
-- special remote with the specified uuid.
manifestKey :: UUID -> Key
manifestKey u = mkKey $ \kd -> kd
{ keyName = S.toShort (fromUUID u)
, keyVariety = OtherKey "GITMANIFEST"
}
-- A key that is used for the git bundle with the specified sha256
-- that is stored in a special remote with the specified uuid.
gitbundleKey :: UUID -> B.ByteString -> Key
gitbundleKey u sha256 = mkKey $ \kd -> kd
{ keyName = S.toShort (fromUUID u <> "-" <> sha256)
, keyVariety = OtherKey "GITBUNDLE"
}
-- The manifest contains an ordered list of git bundle keys.
newtype Manifest = Manifest [Key]
-- Downloads the Manifest, or if it does not exist, returns an empty
-- Manifest.
--
-- Throws errors if the remote cannot be accessed or the download fails,
-- or if the manifest file cannot be parsed.
downloadManifest :: Remote -> Annex Manifest
downloadManifest rmt = ifM (checkPresent rmt mk)
( withTmpFile "GITMANIFEST" $ \tmp tmph -> do
liftIO $ hClose tmph
_ <- retrieveKeyFile rmt mk
(AssociatedFile Nothing) tmp
nullMeterUpdate NoVerify
ks <- map deserializeKey' . B8.lines <$> liftIO (B.readFile tmp)
Manifest <$> checkvalid [] ks
, return (Manifest [])
)
where
mk = manifestKey (Remote.uuid rmt)
checkvalid c [] = return (reverse c)
checkvalid c (Just k:ks) = case fromKey keyVariety k of
OtherKey "GITBUNDLE" -> checkvalid (k:c) ks
_ -> giveup $ "Wrong type of key in manifest " ++ serializeKey k
checkvalid _ (Nothing:_) =
giveup $ "Error parsing manifest " ++ serializeKey mk

View file

@ -38,6 +38,7 @@ import Utility.CopyFile
import Git.FilePath
import Utility.PID
import Utility.InodeCache
import Utility.Metered
import Annex.InodeSentinal
import qualified Database.Keys
import qualified Database.Fsck as FsckDb
@ -206,8 +207,7 @@ performRemote key afile numcopies remote =
)
, return Nothing
)
getfile' tmp = Remote.retrieveKeyFile remote key (AssociatedFile Nothing) (fromRawFilePath tmp) dummymeter (RemoteVerify remote)
dummymeter _ = noop
getfile' tmp = Remote.retrieveKeyFile remote key (AssociatedFile Nothing) (fromRawFilePath tmp) nullMeterUpdate (RemoteVerify remote)
getcheap tmp = case Remote.retrieveKeyFileCheap remote of
Just a -> isRight <$> tryNonAsync (a key afile (fromRawFilePath tmp))
Nothing -> return False

View file

@ -3,11 +3,13 @@ This adds two new object types to git-annex, GITMANIFEST and a GITBUNDLE.
GITMANIFEST--$UUID is the manifest for a git repository stored in the
git-annex repository with that UUID.
GITBUNDLE--$UUID--sha256 is a git bundle.
GITBUNDLE--$UUID-sha256 is a git bundle.
# format of the manifest file
An ordered list of bundle keys, one per line.
An ordered list of bundle keys, one per line.
(Lines end with unix `"\n"`, not `"\r\n"`.)
# fetching