Update URLPattern to its latest version
This commit is contained in:
parent
81784827ff
commit
6763977316
4 changed files with 113 additions and 101 deletions
|
@ -4,8 +4,11 @@
|
|||
|
||||
#include "extensions/common/url_pattern.h"
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#include <ostream>
|
||||
|
||||
#include "base/macros.h"
|
||||
#include "base/strings/pattern.h"
|
||||
#include "base/strings/string_number_conversions.h"
|
||||
#include "base/strings/string_piece.h"
|
||||
|
@ -17,31 +20,28 @@
|
|||
#include "url/gurl.h"
|
||||
#include "url/url_util.h"
|
||||
|
||||
const char extensions::URLPattern::kAllUrlsPattern[] = "<all_urls>";
|
||||
const char URLPattern::kAllUrlsPattern[] = "<all_urls>";
|
||||
const char kExtensionScheme[] = "chrome-extension";
|
||||
|
||||
namespace {
|
||||
|
||||
// TODO(aa): What about more obscure schemes like data: and javascript: ?
|
||||
// Note: keep this array in sync with kValidSchemeMasks.
|
||||
const char* kValidSchemes[] = {
|
||||
url::kHttpScheme,
|
||||
url::kHttpsScheme,
|
||||
url::kFileScheme,
|
||||
url::kFtpScheme,
|
||||
content::kChromeUIScheme,
|
||||
kExtensionScheme,
|
||||
const char* const kValidSchemes[] = {
|
||||
url::kHttpScheme, url::kHttpsScheme,
|
||||
url::kFileScheme, url::kFtpScheme,
|
||||
content::kChromeUIScheme, kExtensionScheme,
|
||||
url::kFileSystemScheme,
|
||||
};
|
||||
|
||||
const int kValidSchemeMasks[] = {
|
||||
extensions::URLPattern::SCHEME_HTTP,
|
||||
extensions::URLPattern::SCHEME_HTTPS,
|
||||
extensions::URLPattern::SCHEME_FILE,
|
||||
extensions::URLPattern::SCHEME_FTP,
|
||||
extensions::URLPattern::SCHEME_CHROMEUI,
|
||||
extensions::URLPattern::SCHEME_EXTENSION,
|
||||
extensions::URLPattern::SCHEME_FILESYSTEM,
|
||||
URLPattern::SCHEME_HTTP,
|
||||
URLPattern::SCHEME_HTTPS,
|
||||
URLPattern::SCHEME_FILE,
|
||||
URLPattern::SCHEME_FTP,
|
||||
URLPattern::SCHEME_CHROMEUI,
|
||||
URLPattern::SCHEME_EXTENSION,
|
||||
URLPattern::SCHEME_FILESYSTEM,
|
||||
};
|
||||
|
||||
static_assert(arraysize(kValidSchemes) == arraysize(kValidSchemeMasks),
|
||||
|
@ -70,26 +70,26 @@ const char* const kParseResultMessages[] = {
|
|||
kParseErrorInvalidHost,
|
||||
};
|
||||
|
||||
static_assert(extensions::URLPattern::NUM_PARSE_RESULTS == arraysize(kParseResultMessages),
|
||||
static_assert(URLPattern::NUM_PARSE_RESULTS == arraysize(kParseResultMessages),
|
||||
"must add message for each parse result");
|
||||
|
||||
const char kPathSeparator[] = "/";
|
||||
|
||||
bool IsStandardScheme(const std::string& scheme) {
|
||||
bool IsStandardScheme(base::StringPiece scheme) {
|
||||
// "*" gets the same treatment as a standard scheme.
|
||||
if (scheme == "*")
|
||||
return true;
|
||||
|
||||
return url::IsStandard(scheme.c_str(),
|
||||
return url::IsStandard(scheme.data(),
|
||||
url::Component(0, static_cast<int>(scheme.length())));
|
||||
}
|
||||
|
||||
bool IsValidPortForScheme(const std::string& scheme, const std::string& port) {
|
||||
bool IsValidPortForScheme(base::StringPiece scheme, base::StringPiece port) {
|
||||
if (port == "*")
|
||||
return true;
|
||||
|
||||
// Only accept non-wildcard ports if the scheme uses ports.
|
||||
if (url::DefaultPortForScheme(scheme.c_str(), scheme.length()) ==
|
||||
if (url::DefaultPortForScheme(scheme.data(), scheme.length()) ==
|
||||
url::PORT_UNSPECIFIED) {
|
||||
return false;
|
||||
}
|
||||
|
@ -107,17 +107,23 @@ bool IsValidPortForScheme(const std::string& scheme, const std::string& port) {
|
|||
// the path will have only a single wildcard at the end. This makes figuring
|
||||
// out overlap much easier. It seems like there is probably a computer-sciency
|
||||
// way to solve the general case, but we don't need that yet.
|
||||
std::string StripTrailingWildcard(const std::string& path) {
|
||||
size_t wildcard_index = path.find('*');
|
||||
size_t path_last = path.size() - 1;
|
||||
return wildcard_index == path_last ? path.substr(0, path_last) : path;
|
||||
base::StringPiece StripTrailingWildcard(base::StringPiece path) {
|
||||
if (path.ends_with("*"))
|
||||
path.remove_suffix(1);
|
||||
return path;
|
||||
}
|
||||
|
||||
// Removes trailing dot from |host_piece| if any.
|
||||
base::StringPiece CanonicalizeHostForMatching(base::StringPiece host_piece) {
|
||||
if (host_piece.ends_with("."))
|
||||
host_piece.remove_suffix(1);
|
||||
return host_piece;
|
||||
}
|
||||
|
||||
} // namespace
|
||||
|
||||
namespace extensions {
|
||||
// static
|
||||
bool URLPattern::IsValidSchemeForExtensions(const std::string& scheme) {
|
||||
bool URLPattern::IsValidSchemeForExtensions(base::StringPiece scheme) {
|
||||
for (size_t i = 0; i < arraysize(kValidSchemes); ++i) {
|
||||
if (scheme == kValidSchemes[i])
|
||||
return true;
|
||||
|
@ -126,7 +132,7 @@ bool URLPattern::IsValidSchemeForExtensions(const std::string& scheme) {
|
|||
}
|
||||
|
||||
URLPattern::URLPattern()
|
||||
: valid_schemes_(SCHEME_ALL),
|
||||
: valid_schemes_(SCHEME_NONE),
|
||||
match_all_urls_(false),
|
||||
match_subdomains_(false),
|
||||
port_("*") {}
|
||||
|
@ -137,7 +143,7 @@ URLPattern::URLPattern(int valid_schemes)
|
|||
match_subdomains_(false),
|
||||
port_("*") {}
|
||||
|
||||
URLPattern::URLPattern(int valid_schemes, const std::string& pattern)
|
||||
URLPattern::URLPattern(int valid_schemes, base::StringPiece pattern)
|
||||
// Strict error checking is used, because this constructor is only
|
||||
// appropriate when we know |pattern| is valid.
|
||||
: valid_schemes_(valid_schemes),
|
||||
|
@ -149,6 +155,8 @@ URLPattern::URLPattern(int valid_schemes, const std::string& pattern)
|
|||
NOTREACHED() << "URLPattern invalid: " << pattern << " result " << result;
|
||||
}
|
||||
|
||||
URLPattern::URLPattern(const URLPattern& other) = default;
|
||||
|
||||
URLPattern::~URLPattern() {
|
||||
}
|
||||
|
||||
|
@ -168,7 +176,7 @@ std::ostream& operator<<(std::ostream& out, const URLPattern& url_pattern) {
|
|||
return out << '"' << url_pattern.GetAsString() << '"';
|
||||
}
|
||||
|
||||
URLPattern::ParseResult URLPattern::Parse(const std::string& pattern) {
|
||||
URLPattern::ParseResult URLPattern::Parse(base::StringPiece pattern) {
|
||||
spec_.clear();
|
||||
SetMatchAllURLs(false);
|
||||
SetMatchSubdomains(false);
|
||||
|
@ -185,12 +193,12 @@ URLPattern::ParseResult URLPattern::Parse(const std::string& pattern) {
|
|||
bool has_standard_scheme_separator = true;
|
||||
|
||||
// Some urls also use ':' alone as the scheme separator.
|
||||
if (scheme_end_pos == std::string::npos) {
|
||||
if (scheme_end_pos == base::StringPiece::npos) {
|
||||
scheme_end_pos = pattern.find(':');
|
||||
has_standard_scheme_separator = false;
|
||||
}
|
||||
|
||||
if (scheme_end_pos == std::string::npos)
|
||||
if (scheme_end_pos == base::StringPiece::npos)
|
||||
return PARSE_ERROR_MISSING_SCHEME_SEPARATOR;
|
||||
|
||||
if (!SetScheme(pattern.substr(0, scheme_end_pos)))
|
||||
|
@ -214,7 +222,7 @@ URLPattern::ParseResult URLPattern::Parse(const std::string& pattern) {
|
|||
path_start_pos = host_start_pos;
|
||||
} else if (scheme_ == url::kFileScheme) {
|
||||
size_t host_end_pos = pattern.find(kPathSeparator, host_start_pos);
|
||||
if (host_end_pos == std::string::npos) {
|
||||
if (host_end_pos == base::StringPiece::npos) {
|
||||
// Allow hostname omission.
|
||||
// e.g. file://* is interpreted as file:///*,
|
||||
// file://foo* is interpreted as file:///foo*.
|
||||
|
@ -231,10 +239,13 @@ URLPattern::ParseResult URLPattern::Parse(const std::string& pattern) {
|
|||
if (host_start_pos == host_end_pos)
|
||||
return PARSE_ERROR_EMPTY_HOST;
|
||||
|
||||
if (host_end_pos == std::string::npos)
|
||||
if (host_end_pos == base::StringPiece::npos)
|
||||
return PARSE_ERROR_EMPTY_PATH;
|
||||
|
||||
host_ = pattern.substr(host_start_pos, host_end_pos - host_start_pos);
|
||||
// TODO(devlin): This whole series is expensive. Luckily we don't do it
|
||||
// *too* often, but it could be optimized.
|
||||
pattern.substr(host_start_pos, host_end_pos - host_start_pos)
|
||||
.CopyToString(&host_);
|
||||
|
||||
// The first component can optionally be '*' to match all subdomains.
|
||||
std::vector<std::string> host_components = base::SplitString(
|
||||
|
@ -282,9 +293,9 @@ void URLPattern::SetValidSchemes(int valid_schemes) {
|
|||
valid_schemes_ = valid_schemes;
|
||||
}
|
||||
|
||||
void URLPattern::SetHost(const std::string& host) {
|
||||
void URLPattern::SetHost(base::StringPiece host) {
|
||||
spec_.clear();
|
||||
host_ = host;
|
||||
host.CopyToString(&host_);
|
||||
}
|
||||
|
||||
void URLPattern::SetMatchAllURLs(bool val) {
|
||||
|
@ -304,9 +315,9 @@ void URLPattern::SetMatchSubdomains(bool val) {
|
|||
match_subdomains_ = val;
|
||||
}
|
||||
|
||||
bool URLPattern::SetScheme(const std::string& scheme) {
|
||||
bool URLPattern::SetScheme(base::StringPiece scheme) {
|
||||
spec_.clear();
|
||||
scheme_ = scheme;
|
||||
scheme.CopyToString(&scheme_);
|
||||
if (scheme_ == "*") {
|
||||
valid_schemes_ &= (SCHEME_HTTP | SCHEME_HTTPS);
|
||||
} else if (!IsValidScheme(scheme_)) {
|
||||
|
@ -315,7 +326,7 @@ bool URLPattern::SetScheme(const std::string& scheme) {
|
|||
return true;
|
||||
}
|
||||
|
||||
bool URLPattern::IsValidScheme(const std::string& scheme) const {
|
||||
bool URLPattern::IsValidScheme(base::StringPiece scheme) const {
|
||||
if (valid_schemes_ == SCHEME_ALL)
|
||||
return true;
|
||||
|
||||
|
@ -327,18 +338,18 @@ bool URLPattern::IsValidScheme(const std::string& scheme) const {
|
|||
return false;
|
||||
}
|
||||
|
||||
void URLPattern::SetPath(const std::string& path) {
|
||||
void URLPattern::SetPath(base::StringPiece path) {
|
||||
spec_.clear();
|
||||
path_ = path;
|
||||
path.CopyToString(&path_);
|
||||
path_escaped_ = path_;
|
||||
base::ReplaceSubstringsAfterOffset(&path_escaped_, 0, "\\", "\\\\");
|
||||
base::ReplaceSubstringsAfterOffset(&path_escaped_, 0, "?", "\\?");
|
||||
}
|
||||
|
||||
bool URLPattern::SetPort(const std::string& port) {
|
||||
bool URLPattern::SetPort(base::StringPiece port) {
|
||||
spec_.clear();
|
||||
if (IsValidPortForScheme(scheme_, port)) {
|
||||
port_ = port;
|
||||
port.CopyToString(&port_);
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
|
@ -354,15 +365,17 @@ bool URLPattern::MatchesURL(const GURL& test) const {
|
|||
test_url = test.inner_url();
|
||||
}
|
||||
|
||||
if (!MatchesScheme(test_url->scheme()))
|
||||
if (!MatchesScheme(test_url->scheme_piece()))
|
||||
return false;
|
||||
|
||||
if (match_all_urls_)
|
||||
return true;
|
||||
|
||||
std::string path_for_request = test.PathForRequest();
|
||||
if (has_inner_url)
|
||||
path_for_request = test_url->path() + path_for_request;
|
||||
if (has_inner_url) {
|
||||
path_for_request = base::StringPrintf("%s%s", test_url->path_piece().data(),
|
||||
path_for_request.c_str());
|
||||
}
|
||||
|
||||
return MatchesSecurityOriginHelper(*test_url) &&
|
||||
MatchesPath(path_for_request);
|
||||
|
@ -387,30 +400,33 @@ bool URLPattern::MatchesSecurityOrigin(const GURL& test) const {
|
|||
return MatchesSecurityOriginHelper(*test_url);
|
||||
}
|
||||
|
||||
bool URLPattern::MatchesScheme(const std::string& test) const {
|
||||
bool URLPattern::MatchesScheme(base::StringPiece test) const {
|
||||
if (!IsValidScheme(test))
|
||||
return false;
|
||||
|
||||
return scheme_ == "*" || test == scheme_;
|
||||
}
|
||||
|
||||
bool URLPattern::MatchesHost(const std::string& host) const {
|
||||
std::string test(url::kHttpScheme);
|
||||
test += url::kStandardSchemeSeparator;
|
||||
test += host;
|
||||
test += "/";
|
||||
return MatchesHost(GURL(test));
|
||||
bool URLPattern::MatchesHost(base::StringPiece host) const {
|
||||
// TODO(devlin): This is a bit sad. Parsing urls is expensive.
|
||||
return MatchesHost(
|
||||
GURL(base::StringPrintf("%s%s%s/", url::kHttpScheme,
|
||||
url::kStandardSchemeSeparator, host.data())));
|
||||
}
|
||||
|
||||
bool URLPattern::MatchesHost(const GURL& test) const {
|
||||
const base::StringPiece test_host(
|
||||
CanonicalizeHostForMatching(test.host_piece()));
|
||||
const base::StringPiece pattern_host(CanonicalizeHostForMatching(host_));
|
||||
|
||||
// If the hosts are exactly equal, we have a match.
|
||||
if (test.host() == host_)
|
||||
if (test_host == pattern_host)
|
||||
return true;
|
||||
|
||||
// If we're matching subdomains, and we have no host in the match pattern,
|
||||
// that means that we're matching all hosts, which means we have a match no
|
||||
// matter what the test host is.
|
||||
if (match_subdomains_ && host_.empty())
|
||||
if (match_subdomains_ && pattern_host.empty())
|
||||
return true;
|
||||
|
||||
// Otherwise, we can only match if our match pattern matches subdomains.
|
||||
|
@ -423,14 +439,13 @@ bool URLPattern::MatchesHost(const GURL& test) const {
|
|||
return false;
|
||||
|
||||
// Check if the test host is a subdomain of our host.
|
||||
if (test.host().length() <= (host_.length() + 1))
|
||||
if (test_host.length() <= (pattern_host.length() + 1))
|
||||
return false;
|
||||
|
||||
if (test.host().compare(test.host().length() - host_.length(),
|
||||
host_.length(), host_) != 0)
|
||||
if (!test_host.ends_with(pattern_host))
|
||||
return false;
|
||||
|
||||
return test.host()[test.host().length() - host_.length() - 1] == '.';
|
||||
return test_host[test_host.length() - pattern_host.length() - 1] == '.';
|
||||
}
|
||||
|
||||
bool URLPattern::ImpliesAllHosts() const {
|
||||
|
@ -444,28 +459,24 @@ bool URLPattern::ImpliesAllHosts() const {
|
|||
if (!match_subdomains_)
|
||||
return false;
|
||||
|
||||
// If |host_| is a recognized TLD, this will be 0. We don't include private
|
||||
// TLDs, so that, e.g., *.appspot.com does not imply all hosts.
|
||||
size_t registry_length = net::registry_controlled_domains::GetRegistryLength(
|
||||
host_,
|
||||
net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
|
||||
net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);
|
||||
// If there was more than just a TLD in the host (e.g., *.foobar.com), it
|
||||
// doesn't imply all hosts.
|
||||
if (registry_length > 0)
|
||||
// doesn't imply all hosts. We don't include private TLDs, so that, e.g.,
|
||||
// *.appspot.com does not imply all hosts.
|
||||
if (net::registry_controlled_domains::HostHasRegistryControlledDomain(
|
||||
host_, net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
|
||||
net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES))
|
||||
return false;
|
||||
|
||||
// At this point the host could either be just a TLD ("com") or some unknown
|
||||
// TLD-like string ("notatld"). To disambiguate between them construct a
|
||||
// fake URL, and check the registry. This returns 0 if the TLD is
|
||||
// unrecognized, or the length of the recognized TLD.
|
||||
registry_length = net::registry_controlled_domains::GetRegistryLength(
|
||||
base::StringPrintf("foo.%s", host_.c_str()),
|
||||
// fake URL, and check the registry.
|
||||
//
|
||||
// If we recognized this TLD, then this is a pattern like *.com, and it
|
||||
// should imply all hosts.
|
||||
return net::registry_controlled_domains::HostHasRegistryControlledDomain(
|
||||
"notatld." + host_,
|
||||
net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
|
||||
net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);
|
||||
// If we recognized this TLD, then this is a pattern like *.com, and it
|
||||
// should imply all hosts. Otherwise, this doesn't imply all hosts.
|
||||
return registry_length > 0;
|
||||
}
|
||||
|
||||
bool URLPattern::MatchesSingleOrigin() const {
|
||||
|
@ -474,11 +485,16 @@ bool URLPattern::MatchesSingleOrigin() const {
|
|||
return !ImpliesAllHosts() && scheme_ != "*" && !match_subdomains_;
|
||||
}
|
||||
|
||||
bool URLPattern::MatchesPath(const std::string& test) const {
|
||||
bool URLPattern::MatchesPath(base::StringPiece test) const {
|
||||
// Make the behaviour of OverlapsWith consistent with MatchesURL, which is
|
||||
// need to match hosted apps on e.g. 'google.com' also run on 'google.com/'.
|
||||
if (test + "/*" == path_escaped_)
|
||||
// The below if is a no-copy way of doing (test + "/*" == path_escaped_).
|
||||
if (path_escaped_.length() == test.length() + 2 &&
|
||||
base::StartsWith(path_escaped_.c_str(), test,
|
||||
base::CompareCase::SENSITIVE) &&
|
||||
base::EndsWith(path_escaped_, "/*", base::CompareCase::SENSITIVE)) {
|
||||
return true;
|
||||
}
|
||||
|
||||
return base::MatchPattern(test, path_escaped_);
|
||||
}
|
||||
|
@ -516,7 +532,7 @@ const std::string& URLPattern::GetAsString() const {
|
|||
if (!path_.empty())
|
||||
spec += path_;
|
||||
|
||||
spec_ = spec;
|
||||
spec_ = std::move(spec);
|
||||
return spec_;
|
||||
}
|
||||
|
||||
|
@ -574,7 +590,7 @@ bool URLPattern::MatchesSecurityOriginHelper(const GURL& test) const {
|
|||
return true;
|
||||
}
|
||||
|
||||
bool URLPattern::MatchesPortPattern(const std::string& port) const {
|
||||
bool URLPattern::MatchesPortPattern(base::StringPiece port) const {
|
||||
return port_ == "*" || port_ == port;
|
||||
}
|
||||
|
||||
|
@ -615,5 +631,3 @@ const char* URLPattern::GetParseResultString(
|
|||
URLPattern::ParseResult parse_result) {
|
||||
return kParseResultMessages[parse_result];
|
||||
}
|
||||
|
||||
} // namespace extensions
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue