migrate --apply

And avoid migrate --update/--aply migrating when the new key was already
present in the repository, and got dropped. Luckily, the location log
allows distinguishing from the new key never having been present!

That is mostly useful for --apply because otherwise dropped files would
keep coming back until the old objects were reaped as unused. But it
seemed to make sense to also do it for --update. for consistency in edge
cases if nothing else. One case where --update can use it is when one
branch got migrated earlier, and we dropped the file, and now another
branch has migrated the same file.

Sponsored-by: Jack Hill on Patreon
This commit is contained in:
Joey Hess 2023-12-08 13:23:03 -04:00
parent 51b974d9f0
commit 4ed71b34de
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
8 changed files with 78 additions and 25 deletions

View file

@ -1,7 +1,10 @@
git-annex (10.20231130) UNRELEASED; urgency=medium git-annex (10.20231130) UNRELEASED; urgency=medium
* migrate: Support distributed migrations by recording each migration, * migrate: Support distributed migrations by recording each migration,
and adding a --update option that updates the local repository. and adding a --update option that updates the local repository
incrementally, hard linking annex objects to their new keys.
* migrate: Added --apply option that (re)applies all recorded
distributed migrations to the objects in repository.
* Make git-annex get/copy/move --from foo override configuration of * Make git-annex get/copy/move --from foo override configuration of
remote.foo.annex-ignore, as documented. remote.foo.annex-ignore, as documented.
* Support git-annex copy/move --from-anywhere --to remote. * Support git-annex copy/move --from-anywhere --to remote.

View file

@ -23,6 +23,7 @@ import Utility.Metered
import qualified Database.Keys import qualified Database.Keys
import Git.FilePath import Git.FilePath
import Annex.Link import Annex.Link
import Annex.UUID
cmd :: Command cmd :: Command
cmd = withAnnexOptions [backendOption, annexedMatchingOptions, jsonOptions] $ cmd = withAnnexOptions [backendOption, annexedMatchingOptions, jsonOptions] $
@ -33,6 +34,7 @@ cmd = withAnnexOptions [backendOption, annexedMatchingOptions, jsonOptions] $
data MigrateOptions = MigrateOptions data MigrateOptions = MigrateOptions
{ migrateThese :: CmdParams { migrateThese :: CmdParams
, updateOption :: Bool , updateOption :: Bool
, applyOption :: Bool
, removeSize :: Bool , removeSize :: Bool
} }
@ -41,7 +43,11 @@ optParser desc = MigrateOptions
<$> cmdParams desc <$> cmdParams desc
<*> switch <*> switch
( long "update" ( long "update"
<> help "update for migrations performed elsewhere" <> help "incrementally apply migrations performed elsewhere"
)
<*> switch
( long "apply"
<> help "(re)apply migrations performed elsewhere"
) )
<*> switch <*> switch
( long "remove-size" ( long "remove-size"
@ -50,11 +56,12 @@ optParser desc = MigrateOptions
seek :: MigrateOptions -> CommandSeek seek :: MigrateOptions -> CommandSeek
seek o seek o
| updateOption o = do | updateOption o || applyOption o = do
unless (null (migrateThese o)) $ unless (null (migrateThese o)) $
error "Cannot combine --update with files to migrate." error "Cannot combine --update or --apply with files to migrate."
streamNewDistributedMigrations $ \oldkey newkey -> streamNewDistributedMigrations (not (applyOption o)) $
commandAction $ update oldkey newkey \oldkey newkey ->
commandAction $ update oldkey newkey
| otherwise = do | otherwise = do
withFilesInGitAnnex ww seeker =<< workTreeItems ww (migrateThese o) withFilesInGitAnnex ww seeker =<< workTreeItems ww (migrateThese o)
commitMigration commitMigration
@ -149,7 +156,7 @@ perform onlyremovesize o file oldkey oldkeyrec oldbackend newbackend = go =<< ge
update :: Key -> Key -> CommandStart update :: Key -> Key -> CommandStart
update oldkey newkey = update oldkey newkey =
stopUnless ((not <$> inAnnex newkey) <&&> inAnnex oldkey <&&> allowed) $ do stopUnless (allowed <&&> available <&&> wanted) $ do
ai <- findworktreefile >>= return . \case ai <- findworktreefile >>= return . \case
Just f -> ActionItemAssociatedFile (AssociatedFile (Just f)) newkey Just f -> ActionItemAssociatedFile (AssociatedFile (Just f)) newkey
Nothing -> ActionItemKey newkey Nothing -> ActionItemKey newkey
@ -161,11 +168,21 @@ update oldkey newkey =
, next $ return False , next $ return False
) )
where where
available = (not <$> inAnnex newkey) <&&> inAnnex oldkey
-- annex.securehashesonly will block adding keys with insecure -- annex.securehashesonly will block adding keys with insecure
-- hashes, this check is only to avoid doing extra work and -- hashes, this check is only to avoid doing extra work and
-- displaying a message when it fails. -- displaying a message when it fails.
allowed = isNothing <$> checkSecureHashes newkey allowed = isNothing <$> checkSecureHashes newkey
-- If the new key was previous present in this repository, but got
-- dropped, assume the user still doesn't want it there.
wanted = loggedPreviousLocations newkey >>= \case
[] -> pure True
us -> do
u <- getUUID
pure (u `notElem` us)
findworktreefile = do findworktreefile = do
fs <- Database.Keys.getAssociatedFiles newkey fs <- Database.Keys.getAssociatedFiles newkey
g <- Annex.gitRepo g <- Annex.gitRepo

View file

@ -21,6 +21,7 @@ module Logs.Location (
logStatusAfter, logStatusAfter,
logChange, logChange,
loggedLocations, loggedLocations,
loggedPreviousLocations,
loggedLocationsHistorical, loggedLocationsHistorical,
loggedLocationsRef, loggedLocationsRef,
parseLoggedLocations, parseLoggedLocations,
@ -79,7 +80,13 @@ logChange _ NoUUID _ = noop
{- Returns a list of repository UUIDs that, according to the log, have {- Returns a list of repository UUIDs that, according to the log, have
- the value of a key. -} - the value of a key. -}
loggedLocations :: Key -> Annex [UUID] loggedLocations :: Key -> Annex [UUID]
loggedLocations = getLoggedLocations currentLogInfo loggedLocations = getLoggedLocations presentLogInfo
{- Returns a list of repository UUIDs that the location log indicates
- used to have the vale of a key, but no longer do.
-}
loggedPreviousLocations :: Key -> Annex [UUID]
loggedPreviousLocations = getLoggedLocations notPresentLogInfo
{- Gets the location log on a particular date. -} {- Gets the location log on a particular date. -}
loggedLocationsHistorical :: RefDate -> Key -> Annex [UUID] loggedLocationsHistorical :: RefDate -> Key -> Annex [UUID]

View file

@ -117,15 +117,18 @@ commitMigration = do
(asTopFilePath migrationTreeGraftPoint) (asTopFilePath migrationTreeGraftPoint)
committedMigration commitsha committedMigration commitsha
-- Streams distributed migrations from the git-annex branch that have not -- Streams distributed migrations from the git-annex branch,
-- been performed here, and runs the provided action on each old and new -- and runs the provided action on each old and new key pair.
-- key pair. --
streamNewDistributedMigrations :: (Key -> Key -> Annex ()) -> Annex () -- With the incremental option, only scans as far as the last recorded
streamNewDistributedMigrations a = do -- migration that this has handled before.
streamNewDistributedMigrations :: Bool -> (Key -> Key -> Annex ()) -> Annex ()
streamNewDistributedMigrations incremental a = do
void Annex.Branch.update void Annex.Branch.update
branchsha <- Annex.Branch.getBranch branchsha <- Annex.Branch.getBranch
(stoppoint, toskip) <- getPerformedMigrations (stoppoint, toskip) <- getPerformedMigrations
(l, cleanup) <- inRepo $ getGitLog branchsha stoppoint (l, cleanup) <- inRepo $ getGitLog branchsha
(if incremental then stoppoint else Nothing)
[fromRawFilePath migrationTreeGraftPoint] [fromRawFilePath migrationTreeGraftPoint]
-- Need to follow because migrate.tree is grafted in -- Need to follow because migrate.tree is grafted in
-- and then deleted, and normally git log stops when a file -- and then deleted, and normally git log stops when a file

View file

@ -17,8 +17,8 @@ module Logs.Presence (
addLog', addLog',
maybeAddLog, maybeAddLog,
readLog, readLog,
currentLog, presentLogInfo,
currentLogInfo, notPresentLogInfo,
historicalLogInfo, historicalLogInfo,
) where ) where
@ -68,12 +68,13 @@ genLine logstatus loginfo c old = LogLine c' logstatus loginfo
readLog :: RawFilePath -> Annex [LogLine] readLog :: RawFilePath -> Annex [LogLine]
readLog = parseLog <$$> Annex.Branch.get readLog = parseLog <$$> Annex.Branch.get
{- Reads a log and returns only the info that is still in effect. -} {- Reads a log and returns only the info that is still present. -}
currentLogInfo :: RawFilePath -> Annex [LogInfo] presentLogInfo :: RawFilePath -> Annex [LogInfo]
currentLogInfo file = map info <$> currentLog file presentLogInfo file = map info . filterPresent <$> readLog file
currentLog :: RawFilePath -> Annex [LogLine] {- Reads a log and returns only the info that is no longer present. -}
currentLog file = filterPresent <$> readLog file notPresentLogInfo :: RawFilePath -> Annex [LogInfo]
notPresentLogInfo file = map info . filterNotPresent <$> readLog file
{- Reads a historical version of a log and returns the info that was in {- Reads a historical version of a log and returns the info that was in
- effect at that time. - effect at that time.

View file

@ -70,14 +70,18 @@ buildLog = mconcat . map genline
genstatus InfoMissing = charUtf8 '0' genstatus InfoMissing = charUtf8 '0'
genstatus InfoDead = charUtf8 'X' genstatus InfoDead = charUtf8 'X'
{- Given a log, returns only the info that is are still in effect. -} {- Given a log, returns only the info that is still present. -}
getLog :: L.ByteString -> [LogInfo] getLog :: L.ByteString -> [LogInfo]
getLog = map info . filterPresent . parseLog getLog = map info . filterPresent . parseLog
{- Returns the info from LogLines that are in effect. -} {- Returns the info from LogLines that is present. -}
filterPresent :: [LogLine] -> [LogLine] filterPresent :: [LogLine] -> [LogLine]
filterPresent = filter (\l -> InfoPresent == status l) . compactLog filterPresent = filter (\l -> InfoPresent == status l) . compactLog
{- Returns the info from LogLines that is not present. -}
filterNotPresent :: [LogLine] -> [LogLine]
filterNotPresent = filter (\l -> InfoPresent /= status l) . compactLog
{- Compacts a set of logs, returning a subset that contains the current {- Compacts a set of logs, returning a subset that contains the current
- status. -} - status. -}
compactLog :: [LogLine] -> [LogLine] compactLog :: [LogLine] -> [LogLine]

View file

@ -55,7 +55,7 @@ getUrls' key = do
where where
go [] = return [] go [] = return []
go (l:ls) = do go (l:ls) = do
us <- currentLogInfo l us <- presentLogInfo l
if null us if null us
then go ls then go ls
else return $ map fromLogInfo us else return $ map fromLogInfo us

View file

@ -27,7 +27,25 @@ this can also be used to migrate files to use the new key format.
* `--update` * `--update`
This updates the local repository for migrations that were performed This updates the local repository for migrations that were performed
elsewhere. elsewhere. Only new migrations since the last time this was run will
be performed.
This does not modify the working tree, but only hard links
(or in some cases copies) annex objects to their new keys.
Note that older versions of git-annex did not record migrations in a
way that this can use. Migrations performed with those older versions
had to be manually run in each clone of the repository.
* `--apply`
This applies all recorded migrations to the local repository. It is the
non-incremental form of `--update`.
One situation where this can be useful is when `git-annex migrate
--update` has been run, but since then un-migrated
objects have entered the repository. Using this option ensures that
any such objects get migrated.
Note that older versions of git-annex did not record migrations in a Note that older versions of git-annex did not record migrations in a
way that this can use. Migrations performed with those older versions way that this can use. Migrations performed with those older versions