add Backend.GitRemoteAnnex

Making GITBUNDLE be in the backend list allows those keys to be
hashed to verify, both when git-remote-annex downloads them, and by other
transfers and by git fsck.

GITMANIFEST is not in the backend list, because those keys will never be
stored in .git/annex/objects and can't be verified in any case.

This does mean that git-annex version will include GITBUNDLE in the list
of backends.

Also documented these in backends.mdwn

Sponsored-by: Kevin Mueller on Patreon
This commit is contained in:
Joey Hess 2024-05-07 13:42:12 -04:00
parent 483887591d
commit c7731cdbd9
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
7 changed files with 110 additions and 14 deletions

76
Backend/GitRemoteAnnex.hs Normal file
View file

@ -0,0 +1,76 @@
{- Backends for git-remote-annex.
-
- GITBUNDLE keys store git bundles
- GITMANIFEST keys store ordered lists of GITBUNDLE keys
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Backend.GitRemoteAnnex (
backends,
genGitBundleKey,
genManifestKey,
) where
import Annex.Common
import Types.Key
import Types.Backend
import Utility.Hash
import Utility.Metered
import qualified Backend.Hash as Hash
import qualified Data.ByteString.Short as S
backends :: [Backend]
backends = [gitbundle]
gitbundle :: Backend
gitbundle = Backend
{ backendVariety = GitBundleKey
, genKey = Nothing
-- ^ Not provided because these keys can only be generated by
-- git-remote-annex.
, verifyKeyContent = Just $ Hash.checkKeyChecksum sameCheckSum hash
, verifyKeyContentIncrementally = Just (liftIO . incrementalVerifier)
, canUpgradeKey = Nothing
, fastMigrate = Nothing
, isStableKey = const True
, isCryptographicallySecure = Hash.cryptographicallySecure hash
, isCryptographicallySecureKey = const $ pure $
Hash.cryptographicallySecure hash
}
-- git bundle keys use the sha256 hash.
hash :: Hash.Hash
hash = Hash.SHA2Hash (HashSize 256)
incrementalVerifier :: Key -> IO IncrementalVerifier
incrementalVerifier =
mkIncrementalVerifier sha2_256_context "checksum" . sameCheckSum
sameCheckSum :: Key -> String -> Bool
sameCheckSum key s = s == expected
where
-- The checksum comes after a UUID.
expected = reverse $ takeWhile (/= '-') $ reverse $
decodeBS $ S.fromShort $ fromKey keyName key
genGitBundleKey :: UUID -> RawFilePath -> MeterUpdate -> Annex Key
genGitBundleKey remoteuuid file meterupdate = do
filesize <- liftIO $ getFileSize file
s <- Hash.hashFile hash file meterupdate
return $ mkKey $ \k -> k
{ keyName = S.toShort $ fromUUID remoteuuid <> "-" <> encodeBS s
, keyVariety = GitBundleKey
, keySize = Just filesize
}
genManifestKey :: UUID -> Key
genManifestKey u = mkKey $ \kd -> kd
{ keyName = S.toShort (fromUUID u)
, keyVariety = OtherKey "GITMANIFEST"
}

View file

@ -1,6 +1,6 @@
{- git-annex hashing backends {- git-annex hashing backends
- -
- Copyright 2011-2021 Joey Hess <id@joeyh.name> - Copyright 2011-2024 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -12,6 +12,10 @@ module Backend.Hash (
testKeyBackend, testKeyBackend,
keyHash, keyHash,
descChecksum, descChecksum,
Hash(..),
cryptographicallySecure,
hashFile,
checkKeyChecksum
) where ) where
import Annex.Common import Annex.Common
@ -78,7 +82,7 @@ genBackend :: Hash -> Backend
genBackend hash = Backend genBackend hash = Backend
{ backendVariety = hashKeyVariety hash (HasExt False) { backendVariety = hashKeyVariety hash (HasExt False)
, genKey = Just (keyValue hash) , genKey = Just (keyValue hash)
, verifyKeyContent = Just $ checkKeyChecksum hash , verifyKeyContent = Just $ checkKeyChecksum sameCheckSum hash
, verifyKeyContentIncrementally = Just $ checkKeyChecksumIncremental hash , verifyKeyContentIncrementally = Just $ checkKeyChecksumIncremental hash
, canUpgradeKey = Just needsUpgrade , canUpgradeKey = Just needsUpgrade
, fastMigrate = Just trivialMigrate , fastMigrate = Just trivialMigrate
@ -123,14 +127,14 @@ keyValueE hash source meterupdate =
keyValue hash source meterupdate keyValue hash source meterupdate
>>= addE source (const $ hashKeyVariety hash (HasExt True)) >>= addE source (const $ hashKeyVariety hash (HasExt True))
checkKeyChecksum :: Hash -> Key -> RawFilePath -> Annex Bool checkKeyChecksum :: (Key -> String -> Bool) -> Hash -> Key -> RawFilePath -> Annex Bool
checkKeyChecksum hash key file = catchIOErrorType HardwareFault hwfault $ do checkKeyChecksum issame hash key file = catchIOErrorType HardwareFault hwfault $ do
fast <- Annex.getRead Annex.fast fast <- Annex.getRead Annex.fast
exists <- liftIO $ R.doesPathExist file exists <- liftIO $ R.doesPathExist file
case (exists, fast) of case (exists, fast) of
(True, False) -> do (True, False) -> do
showAction (UnquotedString descChecksum) showAction (UnquotedString descChecksum)
sameCheckSum key issame key
<$> hashFile hash file nullMeterUpdate <$> hashFile hash file nullMeterUpdate
_ -> return True _ -> return True
where where

View file

@ -18,12 +18,14 @@ import qualified Backend.External
import qualified Backend.Hash import qualified Backend.Hash
import qualified Backend.WORM import qualified Backend.WORM
import qualified Backend.URL import qualified Backend.URL
import qualified Backend.GitRemoteAnnex
{- Regular backends. Does not include externals or VURL. -} {- Regular backends. Does not include externals or VURL. -}
regularBackendList :: [Backend] regularBackendList :: [Backend]
regularBackendList = Backend.Hash.backends regularBackendList = Backend.Hash.backends
++ Backend.WORM.backends ++ Backend.WORM.backends
++ Backend.URL.backends ++ Backend.URL.backends
++ Backend.GitRemoteAnnex.backends
{- The default hashing backend. -} {- The default hashing backend. -}
defaultHashBackend :: Backend defaultHashBackend :: Backend

View file

@ -219,6 +219,7 @@ data KeyVariety
| WORMKey | WORMKey
| URLKey | URLKey
| VURLKey | VURLKey
| GitBundleKey
-- A key that is handled by some external backend. -- A key that is handled by some external backend.
| ExternalKey S.ByteString HasExt | ExternalKey S.ByteString HasExt
-- Some repositories may contain keys of other varieties, -- Some repositories may contain keys of other varieties,
@ -253,6 +254,7 @@ hasExt (MD5Key (HasExt b)) = b
hasExt WORMKey = False hasExt WORMKey = False
hasExt URLKey = False hasExt URLKey = False
hasExt VURLKey = False hasExt VURLKey = False
hasExt GitBundleKey = False
hasExt (ExternalKey _ (HasExt b)) = b hasExt (ExternalKey _ (HasExt b)) = b
hasExt (OtherKey s) = (snd <$> S8.unsnoc s) == Just 'E' hasExt (OtherKey s) = (snd <$> S8.unsnoc s) == Just 'E'
@ -282,6 +284,7 @@ formatKeyVariety v = case v of
WORMKey -> "WORM" WORMKey -> "WORM"
URLKey -> "URL" URLKey -> "URL"
VURLKey -> "VURL" VURLKey -> "VURL"
GitBundleKey -> "GITBUNDLE"
ExternalKey s e -> adde e ("X" <> s) ExternalKey s e -> adde e ("X" <> s)
OtherKey s -> s OtherKey s -> s
where where
@ -347,6 +350,7 @@ parseKeyVariety "MD5E" = MD5Key (HasExt True)
parseKeyVariety "WORM" = WORMKey parseKeyVariety "WORM" = WORMKey
parseKeyVariety "URL" = URLKey parseKeyVariety "URL" = URLKey
parseKeyVariety "VURL" = VURLKey parseKeyVariety "VURL" = VURLKey
parseKeyVariety "GITBUNDLE" = GitBundleKey
parseKeyVariety b parseKeyVariety b
| "X" `S.isPrefixOf` b = | "X" `S.isPrefixOf` b =
let b' = S.tail b let b' = S.tail b

View file

@ -79,10 +79,6 @@ content of an annexed file remains unchanged.
passing it to a shell script. These types of keys are distinct from URLs/URIs passing it to a shell script. These types of keys are distinct from URLs/URIs
that may be attached to a key (using any backend) indicating the key's location that may be attached to a key (using any backend) indicating the key's location
on the web or in one of [[special_remotes]]. on the web or in one of [[special_remotes]].
* `GIT` -- This is used internally by git-annex when exporting trees
containing files stored in git, rather than git-annex. It represents a
git sha. This is never used for git-annex links, but information about
keys of this type is stored in the git-annex branch.
## external backends ## external backends
@ -100,6 +96,19 @@ Like with git-annex's builtin backends, you can add "E" to the end of the
name of an external backend, to get a version that includes the file name of an external backend, to get a version that includes the file
extension in the key. extension in the key.
## internal use backends
Keys using these backends can sometimes be visible, but they are used by
git-annex for its own purposes, and not for your annexed files.
* `GIT` -- This is used internally by git-annex when exporting trees
containing files stored in git, rather than git-annex. It represents a
git sha. This is never used for git-annex links, but information about
keys of this type is stored in the git-annex branch.
* `GITBUNDLE` and `GITMANIFEST` -- Used by [[git-remote-annex]] to store
a git repository in a special remote. See
[[this_page|internals/git-remote-annex]] for details about these.
## notes ## notes
If you want to be able to prove that you're working with the same file If you want to be able to prove that you're working with the same file

View file

@ -1,4 +1,4 @@
This adds two new object types to git-annex, GITMANIFEST and a GITBUNDLE. This adds two new key types to git-annex, GITMANIFEST and a GITBUNDLE.
GITMANIFEST--$UUID is the manifest for a git repository stored in the GITMANIFEST--$UUID is the manifest for a git repository stored in the
git-annex repository with that UUID. git-annex repository with that UUID.
@ -14,7 +14,7 @@ An ordered list of bundle keys, one per line.
# fetching # fetching
1. download GITMANIFEST for the uuid of the special remote 1. download GITMANIFEST for the uuid of the special remote
2. download each listed GITBUNDLE object that we don't have 2. download each listed GITBUNDLE key that we don't have
3. `git fetch` from each new bundle in order 3. `git fetch` from each new bundle in order
(note that later bundles can update refs from the versions in previous (note that later bundles can update refs from the versions in previous
bundles) bundles)
@ -26,7 +26,7 @@ This is how pushes are usually done.
1. create git bundle of all refs that are being pushed and have changed, 1. create git bundle of all refs that are being pushed and have changed,
and objects since the previously pushed refs and objects since the previously pushed refs
2. hash to calculate GITBUNDLE key 2. hash to calculate GITBUNDLE key
3. upload GITBUNDLE object 3. upload GITBUNDLE key
4. download current manifest 4. download current manifest
5. append GITBUNDLE key to manifest 5. append GITBUNDLE key to manifest
@ -38,8 +38,8 @@ previously pushed ref.
1. create git bundle containing all refs stored in the repository, and all 1. create git bundle containing all refs stored in the repository, and all
objects objects
2. hash to calculate GITBUNDLE object name 2. hash to calculate GITBUNDLE key name
3. upload GITBUNDLE object 3. upload GITBUNDLE key
4. download old manifest 4. download old manifest
4. upload new manifest listing only the single new GITBUNDLE 4. upload new manifest listing only the single new GITBUNDLE
5. delete all other GITBUNDLEs that were listed in the old manifest 5. delete all other GITBUNDLEs that were listed in the old manifest

View file

@ -580,6 +580,7 @@ Executable git-annex
Author Author
Backend Backend
Backend.External Backend.External
Backend.GitRemoteAnnex
Backend.Hash Backend.Hash
Backend.URL Backend.URL
Backend.Utilities Backend.Utilities