migrate --apply

And avoid migrate --update/--aply migrating when the new key was already
present in the repository, and got dropped. Luckily, the location log
allows distinguishing from the new key never having been present!

That is mostly useful for --apply because otherwise dropped files would
keep coming back until the old objects were reaped as unused. But it
seemed to make sense to also do it for --update. for consistency in edge
cases if nothing else. One case where --update can use it is when one
branch got migrated earlier, and we dropped the file, and now another
branch has migrated the same file.

Sponsored-by: Jack Hill on Patreon
This commit is contained in:
Joey Hess 2023-12-08 13:23:03 -04:00
parent 51b974d9f0
commit 4ed71b34de
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
8 changed files with 78 additions and 25 deletions

View file

@ -1,7 +1,10 @@
git-annex (10.20231130) UNRELEASED; urgency=medium
* migrate: Support distributed migrations by recording each migration,
and adding a --update option that updates the local repository.
and adding a --update option that updates the local repository
incrementally, hard linking annex objects to their new keys.
* migrate: Added --apply option that (re)applies all recorded
distributed migrations to the objects in repository.
* Make git-annex get/copy/move --from foo override configuration of
remote.foo.annex-ignore, as documented.
* Support git-annex copy/move --from-anywhere --to remote.

View file

@ -23,6 +23,7 @@ import Utility.Metered
import qualified Database.Keys
import Git.FilePath
import Annex.Link
import Annex.UUID
cmd :: Command
cmd = withAnnexOptions [backendOption, annexedMatchingOptions, jsonOptions] $
@ -33,6 +34,7 @@ cmd = withAnnexOptions [backendOption, annexedMatchingOptions, jsonOptions] $
data MigrateOptions = MigrateOptions
{ migrateThese :: CmdParams
, updateOption :: Bool
, applyOption :: Bool
, removeSize :: Bool
}
@ -41,7 +43,11 @@ optParser desc = MigrateOptions
<$> cmdParams desc
<*> switch
( long "update"
<> help "update for migrations performed elsewhere"
<> help "incrementally apply migrations performed elsewhere"
)
<*> switch
( long "apply"
<> help "(re)apply migrations performed elsewhere"
)
<*> switch
( long "remove-size"
@ -50,10 +56,11 @@ optParser desc = MigrateOptions
seek :: MigrateOptions -> CommandSeek
seek o
| updateOption o = do
| updateOption o || applyOption o = do
unless (null (migrateThese o)) $
error "Cannot combine --update with files to migrate."
streamNewDistributedMigrations $ \oldkey newkey ->
error "Cannot combine --update or --apply with files to migrate."
streamNewDistributedMigrations (not (applyOption o)) $
\oldkey newkey ->
commandAction $ update oldkey newkey
| otherwise = do
withFilesInGitAnnex ww seeker =<< workTreeItems ww (migrateThese o)
@ -149,7 +156,7 @@ perform onlyremovesize o file oldkey oldkeyrec oldbackend newbackend = go =<< ge
update :: Key -> Key -> CommandStart
update oldkey newkey =
stopUnless ((not <$> inAnnex newkey) <&&> inAnnex oldkey <&&> allowed) $ do
stopUnless (allowed <&&> available <&&> wanted) $ do
ai <- findworktreefile >>= return . \case
Just f -> ActionItemAssociatedFile (AssociatedFile (Just f)) newkey
Nothing -> ActionItemKey newkey
@ -161,11 +168,21 @@ update oldkey newkey =
, next $ return False
)
where
available = (not <$> inAnnex newkey) <&&> inAnnex oldkey
-- annex.securehashesonly will block adding keys with insecure
-- hashes, this check is only to avoid doing extra work and
-- displaying a message when it fails.
allowed = isNothing <$> checkSecureHashes newkey
-- If the new key was previous present in this repository, but got
-- dropped, assume the user still doesn't want it there.
wanted = loggedPreviousLocations newkey >>= \case
[] -> pure True
us -> do
u <- getUUID
pure (u `notElem` us)
findworktreefile = do
fs <- Database.Keys.getAssociatedFiles newkey
g <- Annex.gitRepo

View file

@ -21,6 +21,7 @@ module Logs.Location (
logStatusAfter,
logChange,
loggedLocations,
loggedPreviousLocations,
loggedLocationsHistorical,
loggedLocationsRef,
parseLoggedLocations,
@ -79,7 +80,13 @@ logChange _ NoUUID _ = noop
{- Returns a list of repository UUIDs that, according to the log, have
- the value of a key. -}
loggedLocations :: Key -> Annex [UUID]
loggedLocations = getLoggedLocations currentLogInfo
loggedLocations = getLoggedLocations presentLogInfo
{- Returns a list of repository UUIDs that the location log indicates
- used to have the vale of a key, but no longer do.
-}
loggedPreviousLocations :: Key -> Annex [UUID]
loggedPreviousLocations = getLoggedLocations notPresentLogInfo
{- Gets the location log on a particular date. -}
loggedLocationsHistorical :: RefDate -> Key -> Annex [UUID]

View file

@ -117,15 +117,18 @@ commitMigration = do
(asTopFilePath migrationTreeGraftPoint)
committedMigration commitsha
-- Streams distributed migrations from the git-annex branch that have not
-- been performed here, and runs the provided action on each old and new
-- key pair.
streamNewDistributedMigrations :: (Key -> Key -> Annex ()) -> Annex ()
streamNewDistributedMigrations a = do
-- Streams distributed migrations from the git-annex branch,
-- and runs the provided action on each old and new key pair.
--
-- With the incremental option, only scans as far as the last recorded
-- migration that this has handled before.
streamNewDistributedMigrations :: Bool -> (Key -> Key -> Annex ()) -> Annex ()
streamNewDistributedMigrations incremental a = do
void Annex.Branch.update
branchsha <- Annex.Branch.getBranch
(stoppoint, toskip) <- getPerformedMigrations
(l, cleanup) <- inRepo $ getGitLog branchsha stoppoint
(l, cleanup) <- inRepo $ getGitLog branchsha
(if incremental then stoppoint else Nothing)
[fromRawFilePath migrationTreeGraftPoint]
-- Need to follow because migrate.tree is grafted in
-- and then deleted, and normally git log stops when a file

View file

@ -17,8 +17,8 @@ module Logs.Presence (
addLog',
maybeAddLog,
readLog,
currentLog,
currentLogInfo,
presentLogInfo,
notPresentLogInfo,
historicalLogInfo,
) where
@ -68,12 +68,13 @@ genLine logstatus loginfo c old = LogLine c' logstatus loginfo
readLog :: RawFilePath -> Annex [LogLine]
readLog = parseLog <$$> Annex.Branch.get
{- Reads a log and returns only the info that is still in effect. -}
currentLogInfo :: RawFilePath -> Annex [LogInfo]
currentLogInfo file = map info <$> currentLog file
{- Reads a log and returns only the info that is still present. -}
presentLogInfo :: RawFilePath -> Annex [LogInfo]
presentLogInfo file = map info . filterPresent <$> readLog file
currentLog :: RawFilePath -> Annex [LogLine]
currentLog file = filterPresent <$> readLog file
{- Reads a log and returns only the info that is no longer present. -}
notPresentLogInfo :: RawFilePath -> Annex [LogInfo]
notPresentLogInfo file = map info . filterNotPresent <$> readLog file
{- Reads a historical version of a log and returns the info that was in
- effect at that time.

View file

@ -70,14 +70,18 @@ buildLog = mconcat . map genline
genstatus InfoMissing = charUtf8 '0'
genstatus InfoDead = charUtf8 'X'
{- Given a log, returns only the info that is are still in effect. -}
{- Given a log, returns only the info that is still present. -}
getLog :: L.ByteString -> [LogInfo]
getLog = map info . filterPresent . parseLog
{- Returns the info from LogLines that are in effect. -}
{- Returns the info from LogLines that is present. -}
filterPresent :: [LogLine] -> [LogLine]
filterPresent = filter (\l -> InfoPresent == status l) . compactLog
{- Returns the info from LogLines that is not present. -}
filterNotPresent :: [LogLine] -> [LogLine]
filterNotPresent = filter (\l -> InfoPresent /= status l) . compactLog
{- Compacts a set of logs, returning a subset that contains the current
- status. -}
compactLog :: [LogLine] -> [LogLine]

View file

@ -55,7 +55,7 @@ getUrls' key = do
where
go [] = return []
go (l:ls) = do
us <- currentLogInfo l
us <- presentLogInfo l
if null us
then go ls
else return $ map fromLogInfo us

View file

@ -27,7 +27,25 @@ this can also be used to migrate files to use the new key format.
* `--update`
This updates the local repository for migrations that were performed
elsewhere.
elsewhere. Only new migrations since the last time this was run will
be performed.
This does not modify the working tree, but only hard links
(or in some cases copies) annex objects to their new keys.
Note that older versions of git-annex did not record migrations in a
way that this can use. Migrations performed with those older versions
had to be manually run in each clone of the repository.
* `--apply`
This applies all recorded migrations to the local repository. It is the
non-incremental form of `--update`.
One situation where this can be useful is when `git-annex migrate
--update` has been run, but since then un-migrated
objects have entered the repository. Using this option ensures that
any such objects get migrated.
Note that older versions of git-annex did not record migrations in a
way that this can use. Migrations performed with those older versions