diff --git a/CHANGELOG b/CHANGELOG index b52015d635..728dbc55f0 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -15,6 +15,10 @@ git-annex (10.20221213) UNRELEASED; urgency=medium will work. * Speed up initial scanning for annexed files when built with persistent-2.14.4.1 + * Allow initremote of additional special remotes with type=web, + in addition to the default web special remote. When --sameas=web is used, + these provide additional names for the web special remote, and may + also have their own additional configuration and cost. -- Joey Hess Mon, 12 Dec 2022 13:04:54 -0400 diff --git a/Remote/Web.hs b/Remote/Web.hs index 94e9391e33..e5b370d370 100644 --- a/Remote/Web.hs +++ b/Remote/Web.hs @@ -1,6 +1,6 @@ {- Web remote. - - - Copyright 2011-2021 Joey Hess + - Copyright 2011-2023 Joey Hess - - Licensed under the GNU AGPL version 3 or higher. -} @@ -9,6 +9,7 @@ module Remote.Web (remote, getWebUrls) where import Annex.Common import Types.Remote +import Remote.Helper.Special import Remote.Helper.ExportImport import qualified Git import qualified Git.Construct @@ -22,6 +23,7 @@ import Utility.Metered import qualified Annex.Url as Url import Annex.YoutubeDl import Annex.SpecialRemote.Config +import Types.Creds remote :: RemoteType remote = RemoteType @@ -29,26 +31,33 @@ remote = RemoteType , enumerate = list , generate = gen , configParser = mkRemoteConfigParser [] - , setup = error "not supported" + , setup = setupInstance , exportSupported = exportUnsupported , importSupported = importUnsupported , thirdPartyPopulated = False } --- There is only one web remote, and it always exists. +-- The web remote always exists. -- (If the web should cease to exist, remove this module and redistribute -- a new release to the survivors by carrier pigeon.) +-- +-- There may also be other instances of the web remote, which can be +-- limited to accessing particular urls, and have different costs. list :: Bool -> Annex [Git.Repo] list _autoinit = do r <- liftIO $ Git.Construct.remoteNamed "web" (pure Git.Construct.fromUnknown) - return [r] + others <- findSpecialRemotes "web" + -- List the main one last, this makes its name be used instead + -- of the other names when git-annex is referring to content on the + -- web. + return (others++[r]) gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle -> Annex (Maybe Remote) -gen r _ rc gc rs = do +gen r u rc gc rs = do c <- parsedRemoteConfig remote rc cst <- remoteCost gc expensiveRemoteCost return $ Just Remote - { uuid = webUUID + { uuid = if u == NoUUID then webUUID else u , cost = cst , name = Git.repoDescribe r , storeKey = uploadKey @@ -77,11 +86,20 @@ gen r _ rc gc rs = do , remotetype = remote , mkUnavailable = return Nothing , getInfo = return [] - , claimUrl = Nothing -- implicitly claims all urls + -- claimingUrl makes the web special remote claim + -- urls that are not claimed by other remotes, + -- so no need to claim anything here. + , claimUrl = Nothing , checkUrl = Nothing , remoteStateHandle = rs } +setupInstance :: SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> Annex (RemoteConfig, UUID) +setupInstance ss mu _ c gc = do + u <- maybe (liftIO genUUID) return mu + gitConfigSpecialRemote u c [("web", "true")] + return (c, u) + downloadKey :: Key -> AssociatedFile -> FilePath -> MeterUpdate -> VerifyConfig -> Annex Verification downloadKey key _af dest p vc = go =<< getWebUrls key where diff --git a/doc/git-annex.mdwn b/doc/git-annex.mdwn index 60811dce06..7fc83766b6 100644 --- a/doc/git-annex.mdwn +++ b/doc/git-annex.mdwn @@ -1655,6 +1655,11 @@ Remotes are configured using these settings in `.git/config`. Used to identify Amazon Glacier special remotes. Normally this is automatically set up by `git annex initremote`. +* `remote..annex-web` + + Used to identify web special remotes. + Normally this is automatically set up by `git annex initremote`. + * `remote..annex-webdav` Used to identify webdav special remotes. diff --git a/doc/special_remotes/bittorrent.mdwn b/doc/special_remotes/bittorrent.mdwn index 22bcdc4aef..4e18757193 100644 --- a/doc/special_remotes/bittorrent.mdwn +++ b/doc/special_remotes/bittorrent.mdwn @@ -13,6 +13,9 @@ If git-annex is not built using the haskell torrent library to parse torrents, it also needs the `btshowmetainfo` program, from either bittornado or the original BitTorrent client. +The bittorrent special remote is always enabled, without any manual setup being +needed. Its name is "bittorrent". + ## notes Currently git-annex only supports downloading content from a torrent; @@ -27,5 +30,3 @@ git-annex takes a cautious approach and when dropping a file, won't treat this special remote as one of the required [[copies]]. It's probably a good idea to configure git-annex to fully distrust this remote, by running `git annex untrust bittorrent` - -This feature is available only from version `5.20141219`. diff --git a/doc/special_remotes/web.mdwn b/doc/special_remotes/web.mdwn index d965352344..571b8299dc 100644 --- a/doc/special_remotes/web.mdwn +++ b/doc/special_remotes/web.mdwn @@ -1,11 +1,12 @@ -git-annex can use the WWW as a special remote, associating an url with an +git-annex can use the web as a special remote, associating an url with an annexed file, and downloading the file content from the web. See [[tips/using_the_web_as_a_special_remote]] for usage examples. -## notes +The web special remote is always enabled, without any manual setup being +needed. Its name is "web". -Currently git-annex only supports downloading content from the web; -it cannot upload to it or remove content. +This special remote can only be used for downloading content, +not uploading content, or removing content from the web. This special remote uses urls on the web as the source for content. There are several other ways http can be used to download annexed objects, diff --git a/doc/todo/Allow_for_URLs_prioritization_WITHIN___40__web__41___remote/comment_2_1ab2634c8a7058c5aa09f1ba72be3471._comment b/doc/todo/Allow_for_URLs_prioritization_WITHIN___40__web__41___remote/comment_2_1ab2634c8a7058c5aa09f1ba72be3471._comment new file mode 100644 index 0000000000..a8680693d6 --- /dev/null +++ b/doc/todo/Allow_for_URLs_prioritization_WITHIN___40__web__41___remote/comment_2_1ab2634c8a7058c5aa09f1ba72be3471._comment @@ -0,0 +1,10 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 2""" + date="2023-01-09T18:25:56Z" + content=""" +It does happen to try the urls in the order listed in the log file. + +With that said, the order of lines in files in the git-annex branch is +not guaranteed to be preserved when eg, merging.. +"""]] diff --git a/doc/todo/Allow_for_URLs_prioritization_WITHIN___40__web__41___remote/comment_3_6970e60b7d826beb025fda452cc1bc13._comment b/doc/todo/Allow_for_URLs_prioritization_WITHIN___40__web__41___remote/comment_3_6970e60b7d826beb025fda452cc1bc13._comment new file mode 100644 index 0000000000..8a10c800a9 --- /dev/null +++ b/doc/todo/Allow_for_URLs_prioritization_WITHIN___40__web__41___remote/comment_3_6970e60b7d826beb025fda452cc1bc13._comment @@ -0,0 +1,50 @@ +[[!comment format=mdwn + username="joey" + subject="""comment 3""" + date="2023-01-09T18:27:36Z" + content=""" +See previous discussion in the since-closed todo +[[assign_costs_per_URL_or_better_repo-wide_(regexes)]]. + +In that I suggested using --sameas, and yarikoptic thought the datalad +special remote could use that to be used to handle urls and do its own +prioritization. I suppose that probably didn't get done in datalad. + +But I do like the idea of using --sameas. It avoids several problems +with providing "url-priority-N=" configs to the web special remote: + +* Two clones could have url-priority-1 set to different values, and merging + the remote.log would lose one of them. +* It may be that one group of urls is slow from repo A, but fast from repo + B. So there needs to be a local override, and we already have that + for remote costs. +* Using --sameas means that cost configs can be used, rather than + adding a separate config that's essentially for the same thing. + +So, what if it were possible to initremote versions of the web special +remote that were limited to particular urls, and skipped over any other +urls: + + git-annex initremote --sameas web s3public type=web urllimit=s3.amazonaws.com/ autoenable=true + git config remote.s3public.annex-cost 150 + + git-annex initremote --sameas web dandiapi type=web urllimit=/api.dandiarchive.org/ autoenable=true + git config remote.dandiapi.annex-cost 250 + +As well as adding the urllimit= config, that would need the web special +remote to allow initremote of other instances of it. Currently, that will +fail: + + git-annex initremote --sameas web web2 type=web autoenable=true + git-annex: not supported + CallStack (from HasCallStack): + error, called at ./Remote/Web.hs:32:19 in main:Remote.Web + failed + initremote: 1 failed + +Which is not ideal when it comes to using autoenable=true because using +a current git-annex after this gets implemented would try to autoenable +the remote, and display all that. Compare with how autoenable handles +remote types it does not know -- it silently skips them. This could be avoided +by using something other than type=web for these. +"""]]