migrate --update fully working

Could use some more testing.

When the old key is not present, Command.ReKey.linkKey' will return
False, so this handles that case ok.

But, I do wonder if distributed migration may need to deal with the old
key getting copied into the repository later. In that situation,
re-running migrate --update won't link it to the new key. It may be that
some users will need that. They can delete .git/annex/migrate.log and
run it again, but that is not a good user interface. Maybe either have
a way to re-run all distributed migrations, or record migrations
in a database and scan the db to find migrations to do in a future run?

Sponsored-by: Kevin Mueller on Patreon
This commit is contained in:
Joey Hess 2023-12-07 17:26:12 -04:00
parent 7c7c9912c1
commit abea01d9e0
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
2 changed files with 17 additions and 11 deletions

View file

@ -18,6 +18,7 @@ import qualified Annex
import Logs.Migrate import Logs.Migrate
import Logs.MetaData import Logs.MetaData
import Logs.Web import Logs.Web
import Logs.Location
import Utility.Metered import Utility.Metered
cmd :: Command cmd :: Command
@ -144,7 +145,8 @@ perform onlyremovesize o file oldkey oldkeyrec oldbackend newbackend = go =<< ge
update :: CommandStart update :: CommandStart
update = starting "migrate" (ActionItemOther Nothing) (SeekInput []) $ do update = starting "migrate" (ActionItemOther Nothing) (SeekInput []) $ do
streamNewDistributedMigrations $ \oldkey newkey -> do streamNewDistributedMigrations $ \oldkey newkey ->
liftIO $ print ("migrate", oldkey, newkey) unlessM (inAnnex newkey) $
whenM (Command.ReKey.linkKey' oldkey newkey) $
logStatus newkey InfoPresent
next $ return True next $ return True

View file

@ -95,14 +95,7 @@ perform file oldkey newkey = do
- to avoid wasting disk space. -} - to avoid wasting disk space. -}
linkKey :: RawFilePath -> Key -> Key -> Annex Bool linkKey :: RawFilePath -> Key -> Key -> Annex Bool
linkKey file oldkey newkey = ifM (isJust <$> isAnnexLink file) linkKey file oldkey newkey = ifM (isJust <$> isAnnexLink file)
{- If the object file is already hardlinked to elsewhere, a hard ( linkKey' oldkey newkey
- link won't be made by getViaTmpFromDisk, but a copy instead.
- This avoids hard linking to content linked to an
- unlocked file, which would leave the new key unlocked
- and vulnerable to corruption. -}
( getViaTmpFromDisk RetrievalAllKeysSecure DefaultVerify newkey (AssociatedFile Nothing) $ \tmp -> unVerified $ do
oldobj <- calcRepo (gitAnnexLocation oldkey)
isJust <$> linkOrCopy' (return True) newkey oldobj tmp Nothing
, do , do
{- The file being rekeyed is itself an unlocked file; if {- The file being rekeyed is itself an unlocked file; if
- it's hard linked to the old key, that link must be broken. -} - it's hard linked to the old key, that link must be broken. -}
@ -128,6 +121,17 @@ linkKey file oldkey newkey = ifM (isJust <$> isAnnexLink file)
LinkAnnexNoop -> True LinkAnnexNoop -> True
) )
{- If the object file is already hardlinked to elsewhere, a hard
- link won't be made by getViaTmpFromDisk, but a copy instead.
- This avoids hard linking to content linked to an
- unlocked file, which would leave the new key unlocked
- and vulnerable to corruption. -}
linkKey' :: Key -> Key -> Annex Bool
linkKey' oldkey newkey =
getViaTmpFromDisk RetrievalAllKeysSecure DefaultVerify newkey (AssociatedFile Nothing) $ \tmp -> unVerified $ do
oldobj <- calcRepo (gitAnnexLocation oldkey)
isJust <$> linkOrCopy' (return True) newkey oldobj tmp Nothing
cleanup :: RawFilePath -> Key -> (MigrationRecord -> Annex ()) -> CommandCleanup cleanup :: RawFilePath -> Key -> (MigrationRecord -> Annex ()) -> CommandCleanup
cleanup file newkey a = do cleanup file newkey a = do
newkeyrec <- ifM (isJust <$> isAnnexLink file) newkeyrec <- ifM (isJust <$> isAnnexLink file)