respect urlinclude/urlexclude of other web special remotes

When a web special remote does not have urlinclude/urlexclude
configured, make it respect the configuration of other web special
remotes and avoid using urls that match the config of another.

Note that the other web special remote does not have to be enabled.
That seems ok, it would have been extra work to check for only ones that
are enabled.

The implementation does mean that the web special remote re-parses
its own config once at startup, as well as re-parsing the configs of any
other web special remotes. This should be a very small slowdown
unless there are lots of web special remotes.

Sponsored-by: Dartmouth College's DANDI project
This commit is contained in:
Joey Hess 2023-01-10 14:58:53 -04:00
parent 0fc476f16e
commit 8a305e5fa3
No known key found for this signature in database
GPG key ID: DB12DB0FF05F8F38
5 changed files with 134 additions and 31 deletions

View file

@ -26,6 +26,9 @@ import Utility.Glob
import qualified Annex.Url as Url
import Annex.YoutubeDl
import Annex.SpecialRemote.Config
import Logs.Remote
import qualified Data.Map as M
remote :: RemoteType
remote = RemoteType
@ -44,14 +47,6 @@ remote = RemoteType
, thirdPartyPopulated = False
}
urlincludeField :: RemoteConfigField
urlincludeField = Accepted "urlinclude"
urlexcludeField :: RemoteConfigField
urlexcludeField = Accepted "urlexclude"
data UrlIncludeExclude = UrlIncludeExclude (Maybe Glob) (Maybe Glob)
-- The web remote always exists.
-- (If the web should cease to exist, remove this module and redistribute
-- a new release to the survivors by carrier pigeon.)
@ -71,9 +66,7 @@ gen :: Git.Repo -> UUID -> RemoteConfig -> RemoteGitConfig -> RemoteStateHandle
gen r u rc gc rs = do
c <- parsedRemoteConfig remote rc
cst <- remoteCost gc expensiveRemoteCost
let urlincludeexclude = UrlIncludeExclude
(getglob c urlincludeField)
( getglob c urlexcludeField)
urlincludeexclude <- mkUrlIncludeExclude c
return $ Just Remote
{ uuid = if u == NoUUID then webUUID else u
, cost = cst
@ -111,10 +104,6 @@ gen r u rc gc rs = do
, checkUrl = Nothing
, remoteStateHandle = rs
}
where
getglob c f = do
glob <- getRemoteConfigValue f c
Just $ compileGlob glob CaseInsensative (GlobFilePath False)
setupInstance :: SetupStage -> Maybe UUID -> Maybe CredPair -> RemoteConfig -> RemoteGitConfig -> Annex (RemoteConfig, UUID)
setupInstance _ mu _ c _ = do
@ -180,18 +169,66 @@ checkKey' key us = firsthit us (Right False) $ \u -> do
_ -> firsthit rest r a
getWebUrls :: Key -> Annex [URLString]
getWebUrls key = getWebUrls' (UrlIncludeExclude Nothing Nothing) key
getWebUrls key = getWebUrls' alwaysInclude key
getWebUrls' :: UrlIncludeExclude -> Key -> Annex [URLString]
getWebUrls' (UrlIncludeExclude minclude mexclude) key =
getWebUrls' urlincludeexclude key =
filter supported <$> getUrls key
where
supported u = supporteddownloader u && isincluded u && notexcluded u
supported u = supporteddownloader u
&& checkUrlIncludeExclude urlincludeexclude u
supporteddownloader u = snd (getDownloader u)
`elem` [WebDownloader, YoutubeDownloader]
isincluded u = case minclude of
Nothing -> True
Just glob -> matchGlob glob u
notexcluded u = case mexclude of
Nothing -> True
Just glob -> not (matchGlob glob u)
urlincludeField :: RemoteConfigField
urlincludeField = Accepted "urlinclude"
urlexcludeField :: RemoteConfigField
urlexcludeField = Accepted "urlexclude"
data UrlIncludeExclude = UrlIncludeExclude
{ checkUrlIncludeExclude :: URLString -> Bool
}
alwaysInclude :: UrlIncludeExclude
alwaysInclude = UrlIncludeExclude { checkUrlIncludeExclude = const True }
mkUrlIncludeExclude :: ParsedRemoteConfig -> Annex UrlIncludeExclude
mkUrlIncludeExclude = go fallback
where
go b pc = case (getglob urlincludeField pc, getglob urlexcludeField pc) of
(Nothing, Nothing) -> b
(minclude, mexclude) -> mk minclude mexclude
getglob f pc = do
glob <- getRemoteConfigValue f pc
Just $ compileGlob glob CaseInsensative (GlobFilePath False)
mk minclude mexclude = pure $ UrlIncludeExclude
{ checkUrlIncludeExclude = \u -> and
[ case minclude of
Just glob -> matchGlob glob u
Nothing -> True
, case mexclude of
Nothing -> True
Just glob -> not (matchGlob glob u)
]
}
-- When nothing to include or exclude is specified, only include
-- urls that are not explicitly included by other web special remotes.
fallback = do
rcs <- M.elems . M.filter iswebremote <$> remoteConfigMap
l <- forM rcs $ \rc ->
parsedRemoteConfig remote rc
>>= go (pure neverinclude)
liftIO $ print ("fallback", l)
pure $ UrlIncludeExclude
{ checkUrlIncludeExclude = \u ->
not (any (\c -> checkUrlIncludeExclude c u) l)
}
iswebremote rc = (fromProposedAccepted <$> M.lookup typeField rc)
== Just (typename remote)
neverinclude = UrlIncludeExclude { checkUrlIncludeExclude = const False }