git-annex unused --from remote skips its git-remote-annex keys

This turns out to only be necessary is edge cases. Most of the
time, git-annex unused --from remote doesn't see git-remote-annex keys
at all, because it does not record a location log for them.

On the other hand, git-annex unused does find them, since it does not
rely on the location log. And that's good because they're a local cache
that the user should be able to drop.

If, however, the user ran git-annex unused and then git-annex move
--unused --to remote, the keys would have a location log for that
remote. Then git-annex unused --from remote would see them, and would
consider them unused. Even when they are present on the special remote
they belong to. And that risks losing data if they drop the keys from
the special remote, but didn't expect it would delete git branches they
had pushed to it.

So, make git-annex unused --from skip git-remote-annex keys whose uuid
is the same as the remote.
This commit is contained in:
Joey Hess 2024-05-14 15:12:07 -04:00
parent 0bf72ef103
commit 24af51e66d
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
4 changed files with 37 additions and 9 deletions

View file

@ -14,6 +14,7 @@ module Backend.GitRemoteAnnex (
backends, backends,
genGitBundleKey, genGitBundleKey,
genManifestKey, genManifestKey,
isGitRemoteAnnexKey,
) where ) where
import Annex.Common import Annex.Common
@ -24,9 +25,10 @@ import Utility.Metered
import qualified Backend.Hash as Hash import qualified Backend.Hash as Hash
import qualified Data.ByteString.Short as S import qualified Data.ByteString.Short as S
import qualified Data.ByteString.Char8 as B8
backends :: [Backend] backends :: [Backend]
backends = [gitbundle] backends = [gitbundle, gitmanifest]
gitbundle :: Backend gitbundle :: Backend
gitbundle = Backend gitbundle = Backend
@ -44,6 +46,19 @@ gitbundle = Backend
Hash.cryptographicallySecure hash Hash.cryptographicallySecure hash
} }
gitmanifest :: Backend
gitmanifest = Backend
{ backendVariety = GitManifestKey
, genKey = Nothing
, verifyKeyContent = Nothing
, verifyKeyContentIncrementally = Nothing
, canUpgradeKey = Nothing
, fastMigrate = Nothing
, isStableKey = const True
, isCryptographicallySecure = False
, isCryptographicallySecureKey = const $ pure False
}
-- git bundle keys use the sha256 hash. -- git bundle keys use the sha256 hash.
hash :: Hash.Hash hash :: Hash.Hash
hash = Hash.SHA2Hash (HashSize 256) hash = Hash.SHA2Hash (HashSize 256)
@ -72,5 +87,18 @@ genGitBundleKey remoteuuid file meterupdate = do
genManifestKey :: UUID -> Key genManifestKey :: UUID -> Key
genManifestKey u = mkKey $ \kd -> kd genManifestKey u = mkKey $ \kd -> kd
{ keyName = S.toShort (fromUUID u) { keyName = S.toShort (fromUUID u)
, keyVariety = OtherKey "GITMANIFEST" , keyVariety = GitManifestKey
} }
{- Is the key a manifest or bundle key that belongs to the special remote
- with this uuid? -}
isGitRemoteAnnexKey :: UUID -> Key -> Bool
isGitRemoteAnnexKey u k =
case fromKey keyVariety k of
GitBundleKey -> sameuuid $
-- Remove the checksum that comes after the UUID.
B8.dropEnd 1 . B8.dropWhileEnd (/= '-')
GitManifestKey -> sameuuid id
_ -> False
where
sameuuid f = fromUUID u == f (S.fromShort (fromKey keyName k))

View file

@ -1,6 +1,6 @@
{- git-annex command {- git-annex command
- -
- Copyright 2010-2016 Joey Hess <id@joeyh.name> - Copyright 2010-2024 Joey Hess <id@joeyh.name>
- -
- Licensed under the GNU AGPL version 3 or higher. - Licensed under the GNU AGPL version 3 or higher.
-} -}
@ -34,6 +34,7 @@ import Logs.View (is_branchView)
import Annex.BloomFilter import Annex.BloomFilter
import qualified Database.Keys import qualified Database.Keys
import Annex.InodeSentinal import Annex.InodeSentinal
import Backend.GitRemoteAnnex (isGitRemoteAnnexKey)
import qualified Data.Map as M import qualified Data.Map as M
import qualified Data.ByteString as S import qualified Data.ByteString as S
@ -104,7 +105,8 @@ checkRemoteUnused remotename refspec = go =<< Remote.nameToUUID remotename
_ <- check "" (remoteUnusedMsg r remotename) (remoteunused u) 0 _ <- check "" (remoteUnusedMsg r remotename) (remoteunused u) 0
next $ return True next $ return True
remoteunused u = loggedKeysFor u >>= \case remoteunused u = loggedKeysFor u >>= \case
Just ks -> excludeReferenced refspec ks Just ks -> filter (not . isGitRemoteAnnexKey u)
<$> excludeReferenced refspec ks
Nothing -> giveup "This repository is read-only." Nothing -> giveup "This repository is read-only."
check :: String -> ([(Int, Key)] -> String) -> Annex [Key] -> Int -> Annex Int check :: String -> ([(Int, Key)] -> String) -> Annex [Key] -> Int -> Annex Int

View file

@ -220,6 +220,7 @@ data KeyVariety
| URLKey | URLKey
| VURLKey | VURLKey
| GitBundleKey | GitBundleKey
| GitManifestKey
-- A key that is handled by some external backend. -- A key that is handled by some external backend.
| ExternalKey S.ByteString HasExt | ExternalKey S.ByteString HasExt
-- Some repositories may contain keys of other varieties, -- Some repositories may contain keys of other varieties,
@ -255,6 +256,7 @@ hasExt WORMKey = False
hasExt URLKey = False hasExt URLKey = False
hasExt VURLKey = False hasExt VURLKey = False
hasExt GitBundleKey = False hasExt GitBundleKey = False
hasExt GitManifestKey = False
hasExt (ExternalKey _ (HasExt b)) = b hasExt (ExternalKey _ (HasExt b)) = b
hasExt (OtherKey s) = (snd <$> S8.unsnoc s) == Just 'E' hasExt (OtherKey s) = (snd <$> S8.unsnoc s) == Just 'E'
@ -285,6 +287,7 @@ formatKeyVariety v = case v of
URLKey -> "URL" URLKey -> "URL"
VURLKey -> "VURL" VURLKey -> "VURL"
GitBundleKey -> "GITBUNDLE" GitBundleKey -> "GITBUNDLE"
GitManifestKey -> "GITMANIFEST"
ExternalKey s e -> adde e ("X" <> s) ExternalKey s e -> adde e ("X" <> s)
OtherKey s -> s OtherKey s -> s
where where

View file

@ -10,11 +10,6 @@ will be available to users who don't use datalad.
This is implememented and working. Remaining todo list for it: This is implememented and working. Remaining todo list for it:
* git-annex unused --from remote should not treat git manifest and bundle
keys as unused, since that could lead to data loss. It's fine for
git-annex unused on the local repo to treat those as unused since they're
only a local cache.
* Test pushes that delete branches. * Test pushes that delete branches.
* Test incremental pushes that don't fast-forward. * Test incremental pushes that don't fast-forward.