From c7731cdbd94521c579a418313647201e950e6c5c Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Tue, 7 May 2024 13:42:12 -0400 Subject: [PATCH] add Backend.GitRemoteAnnex Making GITBUNDLE be in the backend list allows those keys to be hashed to verify, both when git-remote-annex downloads them, and by other transfers and by git fsck. GITMANIFEST is not in the backend list, because those keys will never be stored in .git/annex/objects and can't be verified in any case. This does mean that git-annex version will include GITBUNDLE in the list of backends. Also documented these in backends.mdwn Sponsored-by: Kevin Mueller on Patreon --- Backend/GitRemoteAnnex.hs | 76 +++++++++++++++++++++++++++++ Backend/Hash.hs | 14 ++++-- Backend/Variety.hs | 2 + Types/Key.hs | 4 ++ doc/backends.mdwn | 17 +++++-- doc/internals/git-remote-annex.mdwn | 10 ++-- git-annex.cabal | 1 + 7 files changed, 110 insertions(+), 14 deletions(-) create mode 100644 Backend/GitRemoteAnnex.hs diff --git a/Backend/GitRemoteAnnex.hs b/Backend/GitRemoteAnnex.hs new file mode 100644 index 0000000000..bb825a917e --- /dev/null +++ b/Backend/GitRemoteAnnex.hs @@ -0,0 +1,76 @@ +{- Backends for git-remote-annex. + - + - GITBUNDLE keys store git bundles + - GITMANIFEST keys store ordered lists of GITBUNDLE keys + - + - Copyright 2024 Joey Hess + - + - Licensed under the GNU AGPL version 3 or higher. + -} + +{-# LANGUAGE OverloadedStrings #-} + +module Backend.GitRemoteAnnex ( + backends, + genGitBundleKey, + genManifestKey, +) where + +import Annex.Common +import Types.Key +import Types.Backend +import Utility.Hash +import Utility.Metered +import qualified Backend.Hash as Hash + +import qualified Data.ByteString.Short as S + +backends :: [Backend] +backends = [gitbundle] + +gitbundle :: Backend +gitbundle = Backend + { backendVariety = GitBundleKey + , genKey = Nothing + -- ^ Not provided because these keys can only be generated by + -- git-remote-annex. + , verifyKeyContent = Just $ Hash.checkKeyChecksum sameCheckSum hash + , verifyKeyContentIncrementally = Just (liftIO . incrementalVerifier) + , canUpgradeKey = Nothing + , fastMigrate = Nothing + , isStableKey = const True + , isCryptographicallySecure = Hash.cryptographicallySecure hash + , isCryptographicallySecureKey = const $ pure $ + Hash.cryptographicallySecure hash + } + +-- git bundle keys use the sha256 hash. +hash :: Hash.Hash +hash = Hash.SHA2Hash (HashSize 256) + +incrementalVerifier :: Key -> IO IncrementalVerifier +incrementalVerifier = + mkIncrementalVerifier sha2_256_context "checksum" . sameCheckSum + +sameCheckSum :: Key -> String -> Bool +sameCheckSum key s = s == expected + where + -- The checksum comes after a UUID. + expected = reverse $ takeWhile (/= '-') $ reverse $ + decodeBS $ S.fromShort $ fromKey keyName key + +genGitBundleKey :: UUID -> RawFilePath -> MeterUpdate -> Annex Key +genGitBundleKey remoteuuid file meterupdate = do + filesize <- liftIO $ getFileSize file + s <- Hash.hashFile hash file meterupdate + return $ mkKey $ \k -> k + { keyName = S.toShort $ fromUUID remoteuuid <> "-" <> encodeBS s + , keyVariety = GitBundleKey + , keySize = Just filesize + } + +genManifestKey :: UUID -> Key +genManifestKey u = mkKey $ \kd -> kd + { keyName = S.toShort (fromUUID u) + , keyVariety = OtherKey "GITMANIFEST" + } diff --git a/Backend/Hash.hs b/Backend/Hash.hs index 9768550adf..f2684755b7 100644 --- a/Backend/Hash.hs +++ b/Backend/Hash.hs @@ -1,6 +1,6 @@ {- git-annex hashing backends - - - Copyright 2011-2021 Joey Hess + - Copyright 2011-2024 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -12,6 +12,10 @@ module Backend.Hash ( testKeyBackend, keyHash, descChecksum, + Hash(..), + cryptographicallySecure, + hashFile, + checkKeyChecksum ) where import Annex.Common @@ -78,7 +82,7 @@ genBackend :: Hash -> Backend genBackend hash = Backend { backendVariety = hashKeyVariety hash (HasExt False) , genKey = Just (keyValue hash) - , verifyKeyContent = Just $ checkKeyChecksum hash + , verifyKeyContent = Just $ checkKeyChecksum sameCheckSum hash , verifyKeyContentIncrementally = Just $ checkKeyChecksumIncremental hash , canUpgradeKey = Just needsUpgrade , fastMigrate = Just trivialMigrate @@ -123,14 +127,14 @@ keyValueE hash source meterupdate = keyValue hash source meterupdate >>= addE source (const $ hashKeyVariety hash (HasExt True)) -checkKeyChecksum :: Hash -> Key -> RawFilePath -> Annex Bool -checkKeyChecksum hash key file = catchIOErrorType HardwareFault hwfault $ do +checkKeyChecksum :: (Key -> String -> Bool) -> Hash -> Key -> RawFilePath -> Annex Bool +checkKeyChecksum issame hash key file = catchIOErrorType HardwareFault hwfault $ do fast <- Annex.getRead Annex.fast exists <- liftIO $ R.doesPathExist file case (exists, fast) of (True, False) -> do showAction (UnquotedString descChecksum) - sameCheckSum key + issame key <$> hashFile hash file nullMeterUpdate _ -> return True where diff --git a/Backend/Variety.hs b/Backend/Variety.hs index b4da6f2a96..a48933c88a 100644 --- a/Backend/Variety.hs +++ b/Backend/Variety.hs @@ -18,12 +18,14 @@ import qualified Backend.External import qualified Backend.Hash import qualified Backend.WORM import qualified Backend.URL +import qualified Backend.GitRemoteAnnex {- Regular backends. Does not include externals or VURL. -} regularBackendList :: [Backend] regularBackendList = Backend.Hash.backends ++ Backend.WORM.backends ++ Backend.URL.backends + ++ Backend.GitRemoteAnnex.backends {- The default hashing backend. -} defaultHashBackend :: Backend diff --git a/Types/Key.hs b/Types/Key.hs index f942b4e55c..6d8956b0a0 100644 --- a/Types/Key.hs +++ b/Types/Key.hs @@ -219,6 +219,7 @@ data KeyVariety | WORMKey | URLKey | VURLKey + | GitBundleKey -- A key that is handled by some external backend. | ExternalKey S.ByteString HasExt -- Some repositories may contain keys of other varieties, @@ -253,6 +254,7 @@ hasExt (MD5Key (HasExt b)) = b hasExt WORMKey = False hasExt URLKey = False hasExt VURLKey = False +hasExt GitBundleKey = False hasExt (ExternalKey _ (HasExt b)) = b hasExt (OtherKey s) = (snd <$> S8.unsnoc s) == Just 'E' @@ -282,6 +284,7 @@ formatKeyVariety v = case v of WORMKey -> "WORM" URLKey -> "URL" VURLKey -> "VURL" + GitBundleKey -> "GITBUNDLE" ExternalKey s e -> adde e ("X" <> s) OtherKey s -> s where @@ -347,6 +350,7 @@ parseKeyVariety "MD5E" = MD5Key (HasExt True) parseKeyVariety "WORM" = WORMKey parseKeyVariety "URL" = URLKey parseKeyVariety "VURL" = VURLKey +parseKeyVariety "GITBUNDLE" = GitBundleKey parseKeyVariety b | "X" `S.isPrefixOf` b = let b' = S.tail b diff --git a/doc/backends.mdwn b/doc/backends.mdwn index da37597902..d24fe0e654 100644 --- a/doc/backends.mdwn +++ b/doc/backends.mdwn @@ -79,10 +79,6 @@ content of an annexed file remains unchanged. passing it to a shell script. These types of keys are distinct from URLs/URIs that may be attached to a key (using any backend) indicating the key's location on the web or in one of [[special_remotes]]. -* `GIT` -- This is used internally by git-annex when exporting trees - containing files stored in git, rather than git-annex. It represents a - git sha. This is never used for git-annex links, but information about - keys of this type is stored in the git-annex branch. ## external backends @@ -100,6 +96,19 @@ Like with git-annex's builtin backends, you can add "E" to the end of the name of an external backend, to get a version that includes the file extension in the key. +## internal use backends + +Keys using these backends can sometimes be visible, but they are used by +git-annex for its own purposes, and not for your annexed files. + +* `GIT` -- This is used internally by git-annex when exporting trees + containing files stored in git, rather than git-annex. It represents a + git sha. This is never used for git-annex links, but information about + keys of this type is stored in the git-annex branch. +* `GITBUNDLE` and `GITMANIFEST` -- Used by [[git-remote-annex]] to store + a git repository in a special remote. See + [[this_page|internals/git-remote-annex]] for details about these. + ## notes If you want to be able to prove that you're working with the same file diff --git a/doc/internals/git-remote-annex.mdwn b/doc/internals/git-remote-annex.mdwn index 7ec8d76515..5c9203931d 100644 --- a/doc/internals/git-remote-annex.mdwn +++ b/doc/internals/git-remote-annex.mdwn @@ -1,4 +1,4 @@ -This adds two new object types to git-annex, GITMANIFEST and a GITBUNDLE. +This adds two new key types to git-annex, GITMANIFEST and a GITBUNDLE. GITMANIFEST--$UUID is the manifest for a git repository stored in the git-annex repository with that UUID. @@ -14,7 +14,7 @@ An ordered list of bundle keys, one per line. # fetching 1. download GITMANIFEST for the uuid of the special remote -2. download each listed GITBUNDLE object that we don't have +2. download each listed GITBUNDLE key that we don't have 3. `git fetch` from each new bundle in order (note that later bundles can update refs from the versions in previous bundles) @@ -26,7 +26,7 @@ This is how pushes are usually done. 1. create git bundle of all refs that are being pushed and have changed, and objects since the previously pushed refs 2. hash to calculate GITBUNDLE key -3. upload GITBUNDLE object +3. upload GITBUNDLE key 4. download current manifest 5. append GITBUNDLE key to manifest @@ -38,8 +38,8 @@ previously pushed ref. 1. create git bundle containing all refs stored in the repository, and all objects -2. hash to calculate GITBUNDLE object name -3. upload GITBUNDLE object +2. hash to calculate GITBUNDLE key name +3. upload GITBUNDLE key 4. download old manifest 4. upload new manifest listing only the single new GITBUNDLE 5. delete all other GITBUNDLEs that were listed in the old manifest diff --git a/git-annex.cabal b/git-annex.cabal index 97ae80382f..3c6ad98118 100644 --- a/git-annex.cabal +++ b/git-annex.cabal @@ -580,6 +580,7 @@ Executable git-annex Author Backend Backend.External + Backend.GitRemoteAnnex Backend.Hash Backend.URL Backend.Utilities