From 51b73ea1fccd83172d739e04e18c152b594bd54a Mon Sep 17 00:00:00 2001 From: Joey Hess Date: Fri, 12 Nov 2021 12:59:30 -0400 Subject: [PATCH] migrate: New --remove-size option While intended for converting URL keys added by addurl --fast to be as if added by addurl --relaxed, it can also be used to remove size from other types of keys. Although that is not likely to be useful for checksummed keys, I suppose it could be used for WORM or other non-checksum keys. Specifying the --remove-size option does not prevent other migrations from taking effect if there's a key upgrade to perform, or if the backend has changed. So --backend=URL needs to be used to prevent migrating an URL key to the default backend. Note that it's not possible to use git-annex migrate to convert from a non-URL key to an URL key, as URL keys cannot be generated, except by addurl. So while this can get the same effect as --relaxed would have when addurl --fast was used, when --fast was not used, it won't work, or if --backend=URL is not used will remove the size but not prevent checksum verification, which is not useful. Due to this complexity, I decided not to mention it in the git-annex addurl man page. Sponsored-by: Jochen Bartl on Patreon --- CHANGELOG | 1 + Command/Migrate.hs | 49 ++++++++++++++----- ..._ce952a9db35d398a9e67adcd7f927d59._comment | 17 +++++++ ..._bb458296d464c2025f96502338f34000._comment | 16 ++++++ doc/git-annex-migrate.mdwn | 12 +++++ doc/git-annex-rekey.mdwn | 5 ++ 6 files changed, 87 insertions(+), 13 deletions(-) create mode 100644 doc/forum/Dropping_checksum_from_URL_key/comment_1_ce952a9db35d398a9e67adcd7f927d59._comment create mode 100644 doc/forum/Dropping_checksum_from_URL_key/comment_2_bb458296d464c2025f96502338f34000._comment diff --git a/CHANGELOG b/CHANGELOG index 6474b20204..0b9ff1bb6f 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -13,6 +13,7 @@ git-annex (8.20211029) UNRELEASED; urgency=medium * uninit: Avoid error message when there is no git-annex branch. * git-lfs: Fix interoperability with gitlab's implementation of the git-lfs protocol, which requests Content-Encoding chunked. + * migrate: New --remove-size option. -- Joey Hess Mon, 01 Nov 2021 13:19:46 -0400 diff --git a/Command/Migrate.hs b/Command/Migrate.hs index 9a0d69f35a..1844f9a63b 100644 --- a/Command/Migrate.hs +++ b/Command/Migrate.hs @@ -23,20 +23,33 @@ cmd :: Command cmd = withGlobalOptions [annexedMatchingOptions] $ command "migrate" SectionUtility "switch data to different backend" - paramPaths (withParams seek) + paramPaths (seek <$$> optParser) -seek :: CmdParams -> CommandSeek -seek = withFilesInGitAnnex ww seeker <=< workTreeItems ww +data MigrateOptions = MigrateOptions + { migrateThese :: CmdParams + , removeSize :: Bool + } + +optParser :: CmdParamsDesc -> Parser MigrateOptions +optParser desc = MigrateOptions + <$> cmdParams desc + <*> switch + ( long "remove-size" + <> help "remove size field from keys" + ) + +seek :: MigrateOptions -> CommandSeek +seek o = withFilesInGitAnnex ww seeker =<< workTreeItems ww (migrateThese o) where ww = WarnUnmatchLsFiles seeker = AnnexedFileSeeker - { startAction = start + { startAction = start o , checkContentPresent = Nothing , usesLocationLog = False } -start :: SeekInput -> RawFilePath -> Key -> CommandStart -start si file key = do +start :: MigrateOptions -> SeekInput -> RawFilePath -> Key -> CommandStart +start o si file key = do forced <- Annex.getState Annex.force v <- Backend.getBackend (fromRawFilePath file) key case v of @@ -46,9 +59,14 @@ start si file key = do newbackend <- maybe defaultBackend return =<< chooseBackend file if (newbackend /= oldbackend || upgradableKey oldbackend key || forced) && exists - then starting "migrate" (mkActionItem (key, file)) si $ - perform file key oldbackend newbackend - else stop + then go False oldbackend newbackend + else if removeSize o && exists + then go True oldbackend oldbackend + else stop + where + go onlyremovesize oldbackend newbackend = + starting "migrate" (mkActionItem (key, file)) si $ + perform onlyremovesize o file key oldbackend newbackend {- Checks if a key is upgradable to a newer representation. - @@ -70,13 +88,14 @@ upgradableKey backend key = isNothing (fromKey keySize key) || backendupgradable - data cannot get corrupted after the fsck but before the new key is - generated. -} -perform :: RawFilePath -> Key -> Backend -> Backend -> CommandPerform -perform file oldkey oldbackend newbackend = go =<< genkey (fastMigrate oldbackend) +perform :: Bool -> MigrateOptions -> RawFilePath -> Key -> Backend -> Backend -> CommandPerform +perform onlyremovesize o file oldkey oldbackend newbackend = go =<< genkey (fastMigrate oldbackend) where go Nothing = stop go (Just (newkey, knowngoodcontent)) - | knowngoodcontent = finish newkey - | otherwise = stopUnless checkcontent $ finish newkey + | knowngoodcontent = finish (removesize newkey) + | otherwise = stopUnless checkcontent $ + finish (removesize newkey) checkcontent = Command.Fsck.checkBackend oldbackend oldkey Command.Fsck.KeyPresent afile finish newkey = ifM (Command.ReKey.linkKey file oldkey newkey) ( do @@ -89,6 +108,7 @@ perform file oldkey oldbackend newbackend = go =<< genkey (fastMigrate oldbacken next $ Command.ReKey.cleanup file newkey , giveup "failed creating link from old to new key" ) + genkey _ | onlyremovesize = return $ Just (oldkey, False) genkey Nothing = do content <- calcRepo $ gitAnnexLocation oldkey let source = KeySource @@ -101,4 +121,7 @@ perform file oldkey oldbackend newbackend = go =<< genkey (fastMigrate oldbacken genkey (Just fm) = fm oldkey newbackend afile >>= \case Just newkey -> return (Just (newkey, True)) Nothing -> genkey Nothing + removesize k + | removeSize o = alterKey k $ \kd -> kd { keySize = Nothing } + | otherwise = k afile = AssociatedFile (Just file) diff --git a/doc/forum/Dropping_checksum_from_URL_key/comment_1_ce952a9db35d398a9e67adcd7f927d59._comment b/doc/forum/Dropping_checksum_from_URL_key/comment_1_ce952a9db35d398a9e67adcd7f927d59._comment new file mode 100644 index 0000000000..4e09933a18 --- /dev/null +++ b/doc/forum/Dropping_checksum_from_URL_key/comment_1_ce952a9db35d398a9e67adcd7f927d59._comment @@ -0,0 +1,17 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 1""" + date="2021-11-12T16:10:01Z" + content=""" +Migrating to URL will not do anything since they already are url keys. + +This could be scripted using `git-annex examinekey` to +convert such a key into one without a size, and then using +`git-annex rekey`, which lets the new key for a file be specified. + +However, that command is a low level plumbing command, and does not copy +over the url list from the old to the new key as migrate does (nor other +metadata). So you would also have to use `git-annex addurl file url` +afterwards to add the url, and use `git-annex metadata` if you have +metadata. Very unergonomic. +"""]] diff --git a/doc/forum/Dropping_checksum_from_URL_key/comment_2_bb458296d464c2025f96502338f34000._comment b/doc/forum/Dropping_checksum_from_URL_key/comment_2_bb458296d464c2025f96502338f34000._comment new file mode 100644 index 0000000000..65576564f5 --- /dev/null +++ b/doc/forum/Dropping_checksum_from_URL_key/comment_2_bb458296d464c2025f96502338f34000._comment @@ -0,0 +1,16 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2021-11-12T16:57:15Z" + content=""" +Implemented: `git-annex migrate --remove-size --backend=URL` + +Be sure to only run it on files using url keys, since it will also +remove sizes from other keys. (Or use `--inbackend=URL` with it.) + +Do note that `git-annex migrate` can only migrate files whose content +is present. If you have never downloaded those urls, and `git-annex get` +cannot download them now, because their size has changed, you +won't be able to migrate data you don't have. In this case, re-running +`git-annex addurl` with `--relaxed` seems like the only option. +"""]] diff --git a/doc/git-annex-migrate.mdwn b/doc/git-annex-migrate.mdwn index 79b3434807..b05991930d 100644 --- a/doc/git-annex-migrate.mdwn +++ b/doc/git-annex-migrate.mdwn @@ -39,6 +39,18 @@ it's best to run migrate in all of them. * Also the [[git-annex-common-options]](1) can be used. +* `--remove-size` + + Keys often include the size of their content, which is generally a useful + thing. In fact, this command defaults to adding missing size information + to keys. With this option, the size information is removed instead. + + One use of this option is to convert URL keys that were added + by `git-annex addurl --fast` to ones that would have been added if + that command was run with the `--relaxed` option. Eg: + + git-annex migrate --remove-size --backend=URL somefile + # SEE ALSO [[git-annex]](1) diff --git a/doc/git-annex-rekey.mdwn b/doc/git-annex-rekey.mdwn index e3171f7000..85458bdc55 100644 --- a/doc/git-annex-rekey.mdwn +++ b/doc/git-annex-rekey.mdwn @@ -13,6 +13,9 @@ both the file, and the new key to use for it. Multiple pairs of file and key can be given in a single command line. +Note that, unlike `git-annex migrate`, this does not copy over metadata, +urls, and other such information from the old to the new key + # OPTIONS * `--force` @@ -37,6 +40,8 @@ Multiple pairs of file and key can be given in a single command line. [[git-annex]](1) +[[git-annex-migrate]](1) + # AUTHOR Joey Hess