migrate --apply
And avoid migrate --update/--aply migrating when the new key was already present in the repository, and got dropped. Luckily, the location log allows distinguishing from the new key never having been present! That is mostly useful for --apply because otherwise dropped files would keep coming back until the old objects were reaped as unused. But it seemed to make sense to also do it for --update. for consistency in edge cases if nothing else. One case where --update can use it is when one branch got migrated earlier, and we dropped the file, and now another branch has migrated the same file. Sponsored-by: Jack Hill on Patreon
This commit is contained in:
parent
51b974d9f0
commit
4ed71b34de
8 changed files with 78 additions and 25 deletions
|
@ -1,7 +1,10 @@
|
|||
git-annex (10.20231130) UNRELEASED; urgency=medium
|
||||
|
||||
* migrate: Support distributed migrations by recording each migration,
|
||||
and adding a --update option that updates the local repository.
|
||||
and adding a --update option that updates the local repository
|
||||
incrementally, hard linking annex objects to their new keys.
|
||||
* migrate: Added --apply option that (re)applies all recorded
|
||||
distributed migrations to the objects in repository.
|
||||
* Make git-annex get/copy/move --from foo override configuration of
|
||||
remote.foo.annex-ignore, as documented.
|
||||
* Support git-annex copy/move --from-anywhere --to remote.
|
||||
|
|
|
@ -23,6 +23,7 @@ import Utility.Metered
|
|||
import qualified Database.Keys
|
||||
import Git.FilePath
|
||||
import Annex.Link
|
||||
import Annex.UUID
|
||||
|
||||
cmd :: Command
|
||||
cmd = withAnnexOptions [backendOption, annexedMatchingOptions, jsonOptions] $
|
||||
|
@ -33,6 +34,7 @@ cmd = withAnnexOptions [backendOption, annexedMatchingOptions, jsonOptions] $
|
|||
data MigrateOptions = MigrateOptions
|
||||
{ migrateThese :: CmdParams
|
||||
, updateOption :: Bool
|
||||
, applyOption :: Bool
|
||||
, removeSize :: Bool
|
||||
}
|
||||
|
||||
|
@ -41,7 +43,11 @@ optParser desc = MigrateOptions
|
|||
<$> cmdParams desc
|
||||
<*> switch
|
||||
( long "update"
|
||||
<> help "update for migrations performed elsewhere"
|
||||
<> help "incrementally apply migrations performed elsewhere"
|
||||
)
|
||||
<*> switch
|
||||
( long "apply"
|
||||
<> help "(re)apply migrations performed elsewhere"
|
||||
)
|
||||
<*> switch
|
||||
( long "remove-size"
|
||||
|
@ -50,10 +56,11 @@ optParser desc = MigrateOptions
|
|||
|
||||
seek :: MigrateOptions -> CommandSeek
|
||||
seek o
|
||||
| updateOption o = do
|
||||
| updateOption o || applyOption o = do
|
||||
unless (null (migrateThese o)) $
|
||||
error "Cannot combine --update with files to migrate."
|
||||
streamNewDistributedMigrations $ \oldkey newkey ->
|
||||
error "Cannot combine --update or --apply with files to migrate."
|
||||
streamNewDistributedMigrations (not (applyOption o)) $
|
||||
\oldkey newkey ->
|
||||
commandAction $ update oldkey newkey
|
||||
| otherwise = do
|
||||
withFilesInGitAnnex ww seeker =<< workTreeItems ww (migrateThese o)
|
||||
|
@ -149,7 +156,7 @@ perform onlyremovesize o file oldkey oldkeyrec oldbackend newbackend = go =<< ge
|
|||
|
||||
update :: Key -> Key -> CommandStart
|
||||
update oldkey newkey =
|
||||
stopUnless ((not <$> inAnnex newkey) <&&> inAnnex oldkey <&&> allowed) $ do
|
||||
stopUnless (allowed <&&> available <&&> wanted) $ do
|
||||
ai <- findworktreefile >>= return . \case
|
||||
Just f -> ActionItemAssociatedFile (AssociatedFile (Just f)) newkey
|
||||
Nothing -> ActionItemKey newkey
|
||||
|
@ -161,11 +168,21 @@ update oldkey newkey =
|
|||
, next $ return False
|
||||
)
|
||||
where
|
||||
available = (not <$> inAnnex newkey) <&&> inAnnex oldkey
|
||||
|
||||
-- annex.securehashesonly will block adding keys with insecure
|
||||
-- hashes, this check is only to avoid doing extra work and
|
||||
-- displaying a message when it fails.
|
||||
allowed = isNothing <$> checkSecureHashes newkey
|
||||
|
||||
-- If the new key was previous present in this repository, but got
|
||||
-- dropped, assume the user still doesn't want it there.
|
||||
wanted = loggedPreviousLocations newkey >>= \case
|
||||
[] -> pure True
|
||||
us -> do
|
||||
u <- getUUID
|
||||
pure (u `notElem` us)
|
||||
|
||||
findworktreefile = do
|
||||
fs <- Database.Keys.getAssociatedFiles newkey
|
||||
g <- Annex.gitRepo
|
||||
|
|
|
@ -21,6 +21,7 @@ module Logs.Location (
|
|||
logStatusAfter,
|
||||
logChange,
|
||||
loggedLocations,
|
||||
loggedPreviousLocations,
|
||||
loggedLocationsHistorical,
|
||||
loggedLocationsRef,
|
||||
parseLoggedLocations,
|
||||
|
@ -79,7 +80,13 @@ logChange _ NoUUID _ = noop
|
|||
{- Returns a list of repository UUIDs that, according to the log, have
|
||||
- the value of a key. -}
|
||||
loggedLocations :: Key -> Annex [UUID]
|
||||
loggedLocations = getLoggedLocations currentLogInfo
|
||||
loggedLocations = getLoggedLocations presentLogInfo
|
||||
|
||||
{- Returns a list of repository UUIDs that the location log indicates
|
||||
- used to have the vale of a key, but no longer do.
|
||||
-}
|
||||
loggedPreviousLocations :: Key -> Annex [UUID]
|
||||
loggedPreviousLocations = getLoggedLocations notPresentLogInfo
|
||||
|
||||
{- Gets the location log on a particular date. -}
|
||||
loggedLocationsHistorical :: RefDate -> Key -> Annex [UUID]
|
||||
|
|
|
@ -117,15 +117,18 @@ commitMigration = do
|
|||
(asTopFilePath migrationTreeGraftPoint)
|
||||
committedMigration commitsha
|
||||
|
||||
-- Streams distributed migrations from the git-annex branch that have not
|
||||
-- been performed here, and runs the provided action on each old and new
|
||||
-- key pair.
|
||||
streamNewDistributedMigrations :: (Key -> Key -> Annex ()) -> Annex ()
|
||||
streamNewDistributedMigrations a = do
|
||||
-- Streams distributed migrations from the git-annex branch,
|
||||
-- and runs the provided action on each old and new key pair.
|
||||
--
|
||||
-- With the incremental option, only scans as far as the last recorded
|
||||
-- migration that this has handled before.
|
||||
streamNewDistributedMigrations :: Bool -> (Key -> Key -> Annex ()) -> Annex ()
|
||||
streamNewDistributedMigrations incremental a = do
|
||||
void Annex.Branch.update
|
||||
branchsha <- Annex.Branch.getBranch
|
||||
(stoppoint, toskip) <- getPerformedMigrations
|
||||
(l, cleanup) <- inRepo $ getGitLog branchsha stoppoint
|
||||
(l, cleanup) <- inRepo $ getGitLog branchsha
|
||||
(if incremental then stoppoint else Nothing)
|
||||
[fromRawFilePath migrationTreeGraftPoint]
|
||||
-- Need to follow because migrate.tree is grafted in
|
||||
-- and then deleted, and normally git log stops when a file
|
||||
|
|
|
@ -17,8 +17,8 @@ module Logs.Presence (
|
|||
addLog',
|
||||
maybeAddLog,
|
||||
readLog,
|
||||
currentLog,
|
||||
currentLogInfo,
|
||||
presentLogInfo,
|
||||
notPresentLogInfo,
|
||||
historicalLogInfo,
|
||||
) where
|
||||
|
||||
|
@ -68,12 +68,13 @@ genLine logstatus loginfo c old = LogLine c' logstatus loginfo
|
|||
readLog :: RawFilePath -> Annex [LogLine]
|
||||
readLog = parseLog <$$> Annex.Branch.get
|
||||
|
||||
{- Reads a log and returns only the info that is still in effect. -}
|
||||
currentLogInfo :: RawFilePath -> Annex [LogInfo]
|
||||
currentLogInfo file = map info <$> currentLog file
|
||||
{- Reads a log and returns only the info that is still present. -}
|
||||
presentLogInfo :: RawFilePath -> Annex [LogInfo]
|
||||
presentLogInfo file = map info . filterPresent <$> readLog file
|
||||
|
||||
currentLog :: RawFilePath -> Annex [LogLine]
|
||||
currentLog file = filterPresent <$> readLog file
|
||||
{- Reads a log and returns only the info that is no longer present. -}
|
||||
notPresentLogInfo :: RawFilePath -> Annex [LogInfo]
|
||||
notPresentLogInfo file = map info . filterNotPresent <$> readLog file
|
||||
|
||||
{- Reads a historical version of a log and returns the info that was in
|
||||
- effect at that time.
|
||||
|
|
|
@ -70,14 +70,18 @@ buildLog = mconcat . map genline
|
|||
genstatus InfoMissing = charUtf8 '0'
|
||||
genstatus InfoDead = charUtf8 'X'
|
||||
|
||||
{- Given a log, returns only the info that is are still in effect. -}
|
||||
{- Given a log, returns only the info that is still present. -}
|
||||
getLog :: L.ByteString -> [LogInfo]
|
||||
getLog = map info . filterPresent . parseLog
|
||||
|
||||
{- Returns the info from LogLines that are in effect. -}
|
||||
{- Returns the info from LogLines that is present. -}
|
||||
filterPresent :: [LogLine] -> [LogLine]
|
||||
filterPresent = filter (\l -> InfoPresent == status l) . compactLog
|
||||
|
||||
{- Returns the info from LogLines that is not present. -}
|
||||
filterNotPresent :: [LogLine] -> [LogLine]
|
||||
filterNotPresent = filter (\l -> InfoPresent /= status l) . compactLog
|
||||
|
||||
{- Compacts a set of logs, returning a subset that contains the current
|
||||
- status. -}
|
||||
compactLog :: [LogLine] -> [LogLine]
|
||||
|
|
|
@ -55,7 +55,7 @@ getUrls' key = do
|
|||
where
|
||||
go [] = return []
|
||||
go (l:ls) = do
|
||||
us <- currentLogInfo l
|
||||
us <- presentLogInfo l
|
||||
if null us
|
||||
then go ls
|
||||
else return $ map fromLogInfo us
|
||||
|
|
|
@ -27,7 +27,25 @@ this can also be used to migrate files to use the new key format.
|
|||
* `--update`
|
||||
|
||||
This updates the local repository for migrations that were performed
|
||||
elsewhere.
|
||||
elsewhere. Only new migrations since the last time this was run will
|
||||
be performed.
|
||||
|
||||
This does not modify the working tree, but only hard links
|
||||
(or in some cases copies) annex objects to their new keys.
|
||||
|
||||
Note that older versions of git-annex did not record migrations in a
|
||||
way that this can use. Migrations performed with those older versions
|
||||
had to be manually run in each clone of the repository.
|
||||
|
||||
* `--apply`
|
||||
|
||||
This applies all recorded migrations to the local repository. It is the
|
||||
non-incremental form of `--update`.
|
||||
|
||||
One situation where this can be useful is when `git-annex migrate
|
||||
--update` has been run, but since then un-migrated
|
||||
objects have entered the repository. Using this option ensures that
|
||||
any such objects get migrated.
|
||||
|
||||
Note that older versions of git-annex did not record migrations in a
|
||||
way that this can use. Migrations performed with those older versions
|
||||
|
|
Loading…
Reference in a new issue