From 257f01729c219730d54d44fdef7a76eec409baca Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 8 Dec 2023 14:18:18 -0400 Subject: [PATCH] distributed migration for pull and sync --content pull, sync: When operating on content, automatically hard link objects that have been migrated. Added annex.syncmigrations config that can be set to false to prevent pull and sync from migrating object content. I think that true is a good default for this config, because it avoids users having to re-download migrated content or learning about migration. But, some users will surely not like it, whether because it does take some time (especially for the first git-annex branch scan when there is a long history), or because they want to deal with it manually, or because their filesystem doesn't support hard links and they don't want it to copy objects. Sponsored-by: k0ld on Patreon --- CHANGELOG | 4 ++++ Command/Migrate.hs | 12 +++++++++--- Command/Sync.hs | 5 +++++ Types/GitConfig.hs | 2 ++ doc/git-annex-migrate.mdwn | 10 +++++++--- doc/git-annex-pull.mdwn | 12 ++++++++++-- doc/git-annex.mdwn | 6 ++++++ 7 files changed, 43 insertions(+), 8 deletions(-) diff --git a/CHANGELOG b/CHANGELOG index b9a138ae24..ba666913ad 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -5,6 +5,10 @@ git-annex (10.20231130) UNRELEASED; urgency=medium incrementally, hard linking annex objects to their new keys. * migrate: Added --apply option that (re)applies all recorded distributed migrations to the objects in repository. + * pull, sync: When operating on content, automatically hard link objects + that have been migrated. + * Added annex.syncmigrations config that can be set to false to prevent + pull and sync from migrating object content. * Make git-annex get/copy/move --from foo override configuration of remote.foo.annex-ignore, as documented. * Support git-annex copy/move --from-anywhere --to remote. diff --git a/Command/Migrate.hs b/Command/Migrate.hs index 9343e3d75a..344287f506 100644 --- a/Command/Migrate.hs +++ b/Command/Migrate.hs @@ -59,9 +59,7 @@ seek o | updateOption o || applyOption o = do unless (null (migrateThese o)) $ error "Cannot combine --update or --apply with files to migrate." - streamNewDistributedMigrations (not (applyOption o)) $ - \oldkey newkey -> - commandAction $ update oldkey newkey + seekDistributedMigrations (not (applyOption o)) | otherwise = do withFilesInGitAnnex ww seeker =<< workTreeItems ww (migrateThese o) commitMigration @@ -73,6 +71,14 @@ seek o , usesLocationLog = False } +seekDistributedMigrations :: Bool -> CommandSeek +seekDistributedMigrations incremental = + streamNewDistributedMigrations incremental $ \oldkey newkey -> + -- Not using commandAction because this is not necessarily + -- concurrency safe, and also is unlikely to be sped up + -- by multiple jobs. + void $ includeCommandAction $ update oldkey newkey + start :: MigrateOptions -> Maybe KeySha -> SeekInput -> RawFilePath -> Key -> CommandStart start o ksha si file key = do forced <- Annex.getRead Annex.force diff --git a/Command/Sync.hs b/Command/Sync.hs index 7feb796f91..d4bbafef27 100644 --- a/Command/Sync.hs +++ b/Command/Sync.hs @@ -58,6 +58,7 @@ import Command.Get (getKey') import qualified Command.Move import qualified Command.Export import qualified Command.Import +import qualified Command.Migrate import Annex.Drop import Annex.UUID import Logs.UUID @@ -294,6 +295,10 @@ seek' o = startConcurrency transferStages $ do content <- shouldSyncContent o + when content $ + whenM (annexSyncMigrations <$> Annex.getGitConfig) $ + Command.Migrate.seekDistributedMigrations True + forM_ (filter isImport contentremotes) $ withbranch . importRemote content o forM_ (filter isThirdPartyPopulated contentremotes) $ diff --git a/Types/GitConfig.hs b/Types/GitConfig.hs index 13978e8338..6ded7b6df3 100644 --- a/Types/GitConfig.hs +++ b/Types/GitConfig.hs @@ -94,6 +94,7 @@ data GitConfig = GitConfig , annexResolveMerge :: GlobalConfigurable Bool , annexSyncContent :: GlobalConfigurable (Maybe Bool) , annexSyncOnlyAnnex :: GlobalConfigurable Bool + , annexSyncMigrations :: Bool , annexDebug :: Bool , annexDebugFilter :: Maybe String , annexWebOptions :: [String] @@ -184,6 +185,7 @@ extractGitConfig configsource r = GitConfig getmaybebool (annexConfig "synccontent") , annexSyncOnlyAnnex = configurable False $ getmaybebool (annexConfig "synconlyannex") + , annexSyncMigrations = getbool (annexConfig "syncmigrations") True , annexDebug = getbool (annexConfig "debug") False , annexDebugFilter = getmaybe (annexConfig "debugfilter") , annexWebOptions = getwords (annexConfig "web-options") diff --git a/doc/git-annex-migrate.mdwn b/doc/git-annex-migrate.mdwn index e9f50d6129..e5e4fe0bec 100644 --- a/doc/git-annex-migrate.mdwn +++ b/doc/git-annex-migrate.mdwn @@ -18,9 +18,10 @@ Note that the content is also still stored using the old keys after migration. When possible, hard links are used to avoid that taking up extra disk space. Use `git annex unused` to find and remove the old keys. -Normally, nothing will be done to files already using the new backend. -However, if a backend changes the information it uses to construct a key, -this can also be used to migrate files to use the new key format. +Normally, nothing will be done to specified files that are already using +the new backend. However, if a backend changes the information it uses to +construct a key, this can also be used to migrate files to use the new key +format. # OPTIONS @@ -33,6 +34,9 @@ this can also be used to migrate files to use the new key format. This does not modify the working tree, but only hard links (or in some cases copies) annex objects to their new keys. + `git-annex pull` and `git-annex sync --content` automatically do this, + unless the `annex.syncmigrations` config is set to false. + Note that older versions of git-annex did not record migrations in a way that this can use. Migrations performed with those older versions had to be manually run in each clone of the repository. diff --git a/doc/git-annex-pull.mdwn b/doc/git-annex-pull.mdwn index 594bd8fa14..eabc599995 100644 --- a/doc/git-annex-pull.mdwn +++ b/doc/git-annex-pull.mdwn @@ -29,7 +29,10 @@ this command will also pull changes from the parent branch. When [[git-annex-view]](1) has been used to check out a view branch, this command will update the view branch to reflect any changes to the parent branch or metadata. - + +When [[git-annex-migrate]](1) has been used in other repositories, +this updates the content in the local repository for those migrations as well. + Normally this tries to download the content of each annexed file, from any remote that it's pulling from that has a copy. To control which files it downloads, configure the preferred @@ -70,12 +73,17 @@ See [[git-annex-preferred-content]](1). * `--no-content, `-g`, `--content` Use `--no-content` or `-g` to avoid downloading (and dropping) - the content of annexed files. + the content of annexed files, and also prevent doing any migrations of + content. If you often use `--no-content`, you can set the `annex.synccontent` configuration to false to prevent downloading content by default. The `--content` option overrides that configuration. + To only prevent only migrations of content, you can set the + `annex.syncmigrations` configuration to false. + The `--content` option overrides that configuration as well. + * `--content-of=path` `-C path` Only download (and drop) annexed files in the given path. diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 750ad923f0..5f43deb1ee 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -1222,6 +1222,12 @@ repository, using [[git-annex-config]]. See its man page for a list.) To configure the behavior in all clones of the repository, this can be set in [[git-annex-config]](1). +* `annex.syncmigrations` + + Set to false to prevent `git-annex sync` and `git-annex pull` + from scanning for migrations and updating the local + repository for those migrations. + * `annex.viewunsetdirectory` This configures the name of a directory that is used in a view to contain