add Backend.GitRemoteAnnex

Making GITBUNDLE be in the backend list allows those keys to be
hashed to verify, both when git-remote-annex downloads them, and by other
transfers and by git fsck.

GITMANIFEST is not in the backend list, because those keys will never be
stored in .git/annex/objects and can't be verified in any case.

This does mean that git-annex version will include GITBUNDLE in the list
of backends.

Also documented these in backends.mdwn

Sponsored-by: Kevin Mueller on Patreon
This commit is contained in:
Joey Hess 2024-05-07 13:42:12 -04:00
parent 483887591d
commit c7731cdbd9
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
7 changed files with 110 additions and 14 deletions

76
Backend/GitRemoteAnnex.hs Normal file
View file

@ -0,0 +1,76 @@
{- Backends for git-remote-annex.
-
- GITBUNDLE keys store git bundles
- GITMANIFEST keys store ordered lists of GITBUNDLE keys
-
- Copyright 2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
{-# LANGUAGE OverloadedStrings #-}
module Backend.GitRemoteAnnex (
backends,
genGitBundleKey,
genManifestKey,
) where
import Annex.Common
import Types.Key
import Types.Backend
import Utility.Hash
import Utility.Metered
import qualified Backend.Hash as Hash
import qualified Data.ByteString.Short as S
backends :: [Backend]
backends = [gitbundle]
gitbundle :: Backend
gitbundle = Backend
{ backendVariety = GitBundleKey
, genKey = Nothing
-- ^ Not provided because these keys can only be generated by
-- git-remote-annex.
, verifyKeyContent = Just $ Hash.checkKeyChecksum sameCheckSum hash
, verifyKeyContentIncrementally = Just (liftIO . incrementalVerifier)
, canUpgradeKey = Nothing
, fastMigrate = Nothing
, isStableKey = const True
, isCryptographicallySecure = Hash.cryptographicallySecure hash
, isCryptographicallySecureKey = const $ pure $
Hash.cryptographicallySecure hash
}
-- git bundle keys use the sha256 hash.
hash :: Hash.Hash
hash = Hash.SHA2Hash (HashSize 256)
incrementalVerifier :: Key -> IO IncrementalVerifier
incrementalVerifier =
mkIncrementalVerifier sha2_256_context "checksum" . sameCheckSum
sameCheckSum :: Key -> String -> Bool
sameCheckSum key s = s == expected
where
-- The checksum comes after a UUID.
expected = reverse $ takeWhile (/= '-') $ reverse $
decodeBS $ S.fromShort $ fromKey keyName key
genGitBundleKey :: UUID -> RawFilePath -> MeterUpdate -> Annex Key
genGitBundleKey remoteuuid file meterupdate = do
filesize <- liftIO $ getFileSize file
s <- Hash.hashFile hash file meterupdate
return $ mkKey $ \k -> k
{ keyName = S.toShort $ fromUUID remoteuuid <> "-" <> encodeBS s
, keyVariety = GitBundleKey
, keySize = Just filesize
}
genManifestKey :: UUID -> Key
genManifestKey u = mkKey $ \kd -> kd
{ keyName = S.toShort (fromUUID u)
, keyVariety = OtherKey "GITMANIFEST"
}

View file

@ -1,6 +1,6 @@
{- git-annex hashing backends
-
- Copyright 2011-2021 Joey Hess <id@joeyh.name>
- Copyright 2011-2024 Joey Hess <id@joeyh.name>
-
- Licensed under the GNU AGPL version 3 or higher.
-}
@ -12,6 +12,10 @@ module Backend.Hash (
testKeyBackend,
keyHash,
descChecksum,
Hash(..),
cryptographicallySecure,
hashFile,
checkKeyChecksum
) where
import Annex.Common
@ -78,7 +82,7 @@ genBackend :: Hash -> Backend
genBackend hash = Backend
{ backendVariety = hashKeyVariety hash (HasExt False)
, genKey = Just (keyValue hash)
, verifyKeyContent = Just $ checkKeyChecksum hash
, verifyKeyContent = Just $ checkKeyChecksum sameCheckSum hash
, verifyKeyContentIncrementally = Just $ checkKeyChecksumIncremental hash
, canUpgradeKey = Just needsUpgrade
, fastMigrate = Just trivialMigrate
@ -123,14 +127,14 @@ keyValueE hash source meterupdate =
keyValue hash source meterupdate
>>= addE source (const $ hashKeyVariety hash (HasExt True))
checkKeyChecksum :: Hash -> Key -> RawFilePath -> Annex Bool
checkKeyChecksum hash key file = catchIOErrorType HardwareFault hwfault $ do
checkKeyChecksum :: (Key -> String -> Bool) -> Hash -> Key -> RawFilePath -> Annex Bool
checkKeyChecksum issame hash key file = catchIOErrorType HardwareFault hwfault $ do
fast <- Annex.getRead Annex.fast
exists <- liftIO $ R.doesPathExist file
case (exists, fast) of
(True, False) -> do
showAction (UnquotedString descChecksum)
sameCheckSum key
issame key
<$> hashFile hash file nullMeterUpdate
_ -> return True
where

View file

@ -18,12 +18,14 @@ import qualified Backend.External
import qualified Backend.Hash
import qualified Backend.WORM
import qualified Backend.URL
import qualified Backend.GitRemoteAnnex
{- Regular backends. Does not include externals or VURL. -}
regularBackendList :: [Backend]
regularBackendList = Backend.Hash.backends
++ Backend.WORM.backends
++ Backend.URL.backends
++ Backend.GitRemoteAnnex.backends
{- The default hashing backend. -}
defaultHashBackend :: Backend

View file

@ -219,6 +219,7 @@ data KeyVariety
| WORMKey
| URLKey
| VURLKey
| GitBundleKey
-- A key that is handled by some external backend.
| ExternalKey S.ByteString HasExt
-- Some repositories may contain keys of other varieties,
@ -253,6 +254,7 @@ hasExt (MD5Key (HasExt b)) = b
hasExt WORMKey = False
hasExt URLKey = False
hasExt VURLKey = False
hasExt GitBundleKey = False
hasExt (ExternalKey _ (HasExt b)) = b
hasExt (OtherKey s) = (snd <$> S8.unsnoc s) == Just 'E'
@ -282,6 +284,7 @@ formatKeyVariety v = case v of
WORMKey -> "WORM"
URLKey -> "URL"
VURLKey -> "VURL"
GitBundleKey -> "GITBUNDLE"
ExternalKey s e -> adde e ("X" <> s)
OtherKey s -> s
where
@ -347,6 +350,7 @@ parseKeyVariety "MD5E" = MD5Key (HasExt True)
parseKeyVariety "WORM" = WORMKey
parseKeyVariety "URL" = URLKey
parseKeyVariety "VURL" = VURLKey
parseKeyVariety "GITBUNDLE" = GitBundleKey
parseKeyVariety b
| "X" `S.isPrefixOf` b =
let b' = S.tail b

View file

@ -79,10 +79,6 @@ content of an annexed file remains unchanged.
passing it to a shell script. These types of keys are distinct from URLs/URIs
that may be attached to a key (using any backend) indicating the key's location
on the web or in one of [[special_remotes]].
* `GIT` -- This is used internally by git-annex when exporting trees
containing files stored in git, rather than git-annex. It represents a
git sha. This is never used for git-annex links, but information about
keys of this type is stored in the git-annex branch.
## external backends
@ -100,6 +96,19 @@ Like with git-annex's builtin backends, you can add "E" to the end of the
name of an external backend, to get a version that includes the file
extension in the key.
## internal use backends
Keys using these backends can sometimes be visible, but they are used by
git-annex for its own purposes, and not for your annexed files.
* `GIT` -- This is used internally by git-annex when exporting trees
containing files stored in git, rather than git-annex. It represents a
git sha. This is never used for git-annex links, but information about
keys of this type is stored in the git-annex branch.
* `GITBUNDLE` and `GITMANIFEST` -- Used by [[git-remote-annex]] to store
a git repository in a special remote. See
[[this_page|internals/git-remote-annex]] for details about these.
## notes
If you want to be able to prove that you're working with the same file

View file

@ -1,4 +1,4 @@
This adds two new object types to git-annex, GITMANIFEST and a GITBUNDLE.
This adds two new key types to git-annex, GITMANIFEST and a GITBUNDLE.
GITMANIFEST--$UUID is the manifest for a git repository stored in the
git-annex repository with that UUID.
@ -14,7 +14,7 @@ An ordered list of bundle keys, one per line.
# fetching
1. download GITMANIFEST for the uuid of the special remote
2. download each listed GITBUNDLE object that we don't have
2. download each listed GITBUNDLE key that we don't have
3. `git fetch` from each new bundle in order
(note that later bundles can update refs from the versions in previous
bundles)
@ -26,7 +26,7 @@ This is how pushes are usually done.
1. create git bundle of all refs that are being pushed and have changed,
and objects since the previously pushed refs
2. hash to calculate GITBUNDLE key
3. upload GITBUNDLE object
3. upload GITBUNDLE key
4. download current manifest
5. append GITBUNDLE key to manifest
@ -38,8 +38,8 @@ previously pushed ref.
1. create git bundle containing all refs stored in the repository, and all
objects
2. hash to calculate GITBUNDLE object name
3. upload GITBUNDLE object
2. hash to calculate GITBUNDLE key name
3. upload GITBUNDLE key
4. download old manifest
4. upload new manifest listing only the single new GITBUNDLE
5. delete all other GITBUNDLEs that were listed in the old manifest

View file

@ -580,6 +580,7 @@ Executable git-annex
Author
Backend
Backend.External
Backend.GitRemoteAnnex
Backend.Hash
Backend.URL
Backend.Utilities