Update URLPattern to its latest version

This commit is contained in:
Cheng Zhao 2017-01-24 17:29:35 +09:00 committed by Kevin Sawicki
parent 81784827ff
commit 6763977316
4 changed files with 113 additions and 101 deletions

View file

@ -20,13 +20,13 @@ using content::BrowserThread;
namespace mate {
template<>
struct Converter<extensions::URLPattern> {
struct Converter<URLPattern> {
static bool FromV8(v8::Isolate* isolate, v8::Local<v8::Value> val,
extensions::URLPattern* out) {
URLPattern* out) {
std::string pattern;
if (!ConvertFromV8(isolate, val, &pattern))
return false;
return out->Parse(pattern) == extensions::URLPattern::PARSE_SUCCESS;
return out->Parse(pattern) == URLPattern::PARSE_SUCCESS;
}
};

View file

@ -19,13 +19,11 @@
#include "net/http/http_request_headers.h"
#include "net/http/http_response_headers.h"
namespace extensions {
class URLPattern;
}
namespace atom {
using URLPatterns = std::set<extensions::URLPattern>;
using URLPatterns = std::set<URLPattern>;
const char* ResourceTypeToString(content::ResourceType type);

View file

@ -4,8 +4,11 @@
#include "extensions/common/url_pattern.h"
#include <stddef.h>
#include <ostream>
#include "base/macros.h"
#include "base/strings/pattern.h"
#include "base/strings/string_number_conversions.h"
#include "base/strings/string_piece.h"
@ -17,31 +20,28 @@
#include "url/gurl.h"
#include "url/url_util.h"
const char extensions::URLPattern::kAllUrlsPattern[] = "<all_urls>";
const char URLPattern::kAllUrlsPattern[] = "<all_urls>";
const char kExtensionScheme[] = "chrome-extension";
namespace {
// TODO(aa): What about more obscure schemes like data: and javascript: ?
// Note: keep this array in sync with kValidSchemeMasks.
const char* kValidSchemes[] = {
url::kHttpScheme,
url::kHttpsScheme,
url::kFileScheme,
url::kFtpScheme,
content::kChromeUIScheme,
kExtensionScheme,
const char* const kValidSchemes[] = {
url::kHttpScheme, url::kHttpsScheme,
url::kFileScheme, url::kFtpScheme,
content::kChromeUIScheme, kExtensionScheme,
url::kFileSystemScheme,
};
const int kValidSchemeMasks[] = {
extensions::URLPattern::SCHEME_HTTP,
extensions::URLPattern::SCHEME_HTTPS,
extensions::URLPattern::SCHEME_FILE,
extensions::URLPattern::SCHEME_FTP,
extensions::URLPattern::SCHEME_CHROMEUI,
extensions::URLPattern::SCHEME_EXTENSION,
extensions::URLPattern::SCHEME_FILESYSTEM,
URLPattern::SCHEME_HTTP,
URLPattern::SCHEME_HTTPS,
URLPattern::SCHEME_FILE,
URLPattern::SCHEME_FTP,
URLPattern::SCHEME_CHROMEUI,
URLPattern::SCHEME_EXTENSION,
URLPattern::SCHEME_FILESYSTEM,
};
static_assert(arraysize(kValidSchemes) == arraysize(kValidSchemeMasks),
@ -70,26 +70,26 @@ const char* const kParseResultMessages[] = {
kParseErrorInvalidHost,
};
static_assert(extensions::URLPattern::NUM_PARSE_RESULTS == arraysize(kParseResultMessages),
static_assert(URLPattern::NUM_PARSE_RESULTS == arraysize(kParseResultMessages),
"must add message for each parse result");
const char kPathSeparator[] = "/";
bool IsStandardScheme(const std::string& scheme) {
bool IsStandardScheme(base::StringPiece scheme) {
// "*" gets the same treatment as a standard scheme.
if (scheme == "*")
return true;
return url::IsStandard(scheme.c_str(),
return url::IsStandard(scheme.data(),
url::Component(0, static_cast<int>(scheme.length())));
}
bool IsValidPortForScheme(const std::string& scheme, const std::string& port) {
bool IsValidPortForScheme(base::StringPiece scheme, base::StringPiece port) {
if (port == "*")
return true;
// Only accept non-wildcard ports if the scheme uses ports.
if (url::DefaultPortForScheme(scheme.c_str(), scheme.length()) ==
if (url::DefaultPortForScheme(scheme.data(), scheme.length()) ==
url::PORT_UNSPECIFIED) {
return false;
}
@ -107,17 +107,23 @@ bool IsValidPortForScheme(const std::string& scheme, const std::string& port) {
// the path will have only a single wildcard at the end. This makes figuring
// out overlap much easier. It seems like there is probably a computer-sciency
// way to solve the general case, but we don't need that yet.
std::string StripTrailingWildcard(const std::string& path) {
size_t wildcard_index = path.find('*');
size_t path_last = path.size() - 1;
return wildcard_index == path_last ? path.substr(0, path_last) : path;
base::StringPiece StripTrailingWildcard(base::StringPiece path) {
if (path.ends_with("*"))
path.remove_suffix(1);
return path;
}
// Removes trailing dot from |host_piece| if any.
base::StringPiece CanonicalizeHostForMatching(base::StringPiece host_piece) {
if (host_piece.ends_with("."))
host_piece.remove_suffix(1);
return host_piece;
}
} // namespace
namespace extensions {
// static
bool URLPattern::IsValidSchemeForExtensions(const std::string& scheme) {
bool URLPattern::IsValidSchemeForExtensions(base::StringPiece scheme) {
for (size_t i = 0; i < arraysize(kValidSchemes); ++i) {
if (scheme == kValidSchemes[i])
return true;
@ -126,7 +132,7 @@ bool URLPattern::IsValidSchemeForExtensions(const std::string& scheme) {
}
URLPattern::URLPattern()
: valid_schemes_(SCHEME_ALL),
: valid_schemes_(SCHEME_NONE),
match_all_urls_(false),
match_subdomains_(false),
port_("*") {}
@ -137,7 +143,7 @@ URLPattern::URLPattern(int valid_schemes)
match_subdomains_(false),
port_("*") {}
URLPattern::URLPattern(int valid_schemes, const std::string& pattern)
URLPattern::URLPattern(int valid_schemes, base::StringPiece pattern)
// Strict error checking is used, because this constructor is only
// appropriate when we know |pattern| is valid.
: valid_schemes_(valid_schemes),
@ -149,6 +155,8 @@ URLPattern::URLPattern(int valid_schemes, const std::string& pattern)
NOTREACHED() << "URLPattern invalid: " << pattern << " result " << result;
}
URLPattern::URLPattern(const URLPattern& other) = default;
URLPattern::~URLPattern() {
}
@ -168,7 +176,7 @@ std::ostream& operator<<(std::ostream& out, const URLPattern& url_pattern) {
return out << '"' << url_pattern.GetAsString() << '"';
}
URLPattern::ParseResult URLPattern::Parse(const std::string& pattern) {
URLPattern::ParseResult URLPattern::Parse(base::StringPiece pattern) {
spec_.clear();
SetMatchAllURLs(false);
SetMatchSubdomains(false);
@ -185,12 +193,12 @@ URLPattern::ParseResult URLPattern::Parse(const std::string& pattern) {
bool has_standard_scheme_separator = true;
// Some urls also use ':' alone as the scheme separator.
if (scheme_end_pos == std::string::npos) {
if (scheme_end_pos == base::StringPiece::npos) {
scheme_end_pos = pattern.find(':');
has_standard_scheme_separator = false;
}
if (scheme_end_pos == std::string::npos)
if (scheme_end_pos == base::StringPiece::npos)
return PARSE_ERROR_MISSING_SCHEME_SEPARATOR;
if (!SetScheme(pattern.substr(0, scheme_end_pos)))
@ -214,7 +222,7 @@ URLPattern::ParseResult URLPattern::Parse(const std::string& pattern) {
path_start_pos = host_start_pos;
} else if (scheme_ == url::kFileScheme) {
size_t host_end_pos = pattern.find(kPathSeparator, host_start_pos);
if (host_end_pos == std::string::npos) {
if (host_end_pos == base::StringPiece::npos) {
// Allow hostname omission.
// e.g. file://* is interpreted as file:///*,
// file://foo* is interpreted as file:///foo*.
@ -231,10 +239,13 @@ URLPattern::ParseResult URLPattern::Parse(const std::string& pattern) {
if (host_start_pos == host_end_pos)
return PARSE_ERROR_EMPTY_HOST;
if (host_end_pos == std::string::npos)
if (host_end_pos == base::StringPiece::npos)
return PARSE_ERROR_EMPTY_PATH;
host_ = pattern.substr(host_start_pos, host_end_pos - host_start_pos);
// TODO(devlin): This whole series is expensive. Luckily we don't do it
// *too* often, but it could be optimized.
pattern.substr(host_start_pos, host_end_pos - host_start_pos)
.CopyToString(&host_);
// The first component can optionally be '*' to match all subdomains.
std::vector<std::string> host_components = base::SplitString(
@ -282,9 +293,9 @@ void URLPattern::SetValidSchemes(int valid_schemes) {
valid_schemes_ = valid_schemes;
}
void URLPattern::SetHost(const std::string& host) {
void URLPattern::SetHost(base::StringPiece host) {
spec_.clear();
host_ = host;
host.CopyToString(&host_);
}
void URLPattern::SetMatchAllURLs(bool val) {
@ -304,9 +315,9 @@ void URLPattern::SetMatchSubdomains(bool val) {
match_subdomains_ = val;
}
bool URLPattern::SetScheme(const std::string& scheme) {
bool URLPattern::SetScheme(base::StringPiece scheme) {
spec_.clear();
scheme_ = scheme;
scheme.CopyToString(&scheme_);
if (scheme_ == "*") {
valid_schemes_ &= (SCHEME_HTTP | SCHEME_HTTPS);
} else if (!IsValidScheme(scheme_)) {
@ -315,7 +326,7 @@ bool URLPattern::SetScheme(const std::string& scheme) {
return true;
}
bool URLPattern::IsValidScheme(const std::string& scheme) const {
bool URLPattern::IsValidScheme(base::StringPiece scheme) const {
if (valid_schemes_ == SCHEME_ALL)
return true;
@ -327,18 +338,18 @@ bool URLPattern::IsValidScheme(const std::string& scheme) const {
return false;
}
void URLPattern::SetPath(const std::string& path) {
void URLPattern::SetPath(base::StringPiece path) {
spec_.clear();
path_ = path;
path.CopyToString(&path_);
path_escaped_ = path_;
base::ReplaceSubstringsAfterOffset(&path_escaped_, 0, "\\", "\\\\");
base::ReplaceSubstringsAfterOffset(&path_escaped_, 0, "?", "\\?");
}
bool URLPattern::SetPort(const std::string& port) {
bool URLPattern::SetPort(base::StringPiece port) {
spec_.clear();
if (IsValidPortForScheme(scheme_, port)) {
port_ = port;
port.CopyToString(&port_);
return true;
}
return false;
@ -354,15 +365,17 @@ bool URLPattern::MatchesURL(const GURL& test) const {
test_url = test.inner_url();
}
if (!MatchesScheme(test_url->scheme()))
if (!MatchesScheme(test_url->scheme_piece()))
return false;
if (match_all_urls_)
return true;
std::string path_for_request = test.PathForRequest();
if (has_inner_url)
path_for_request = test_url->path() + path_for_request;
if (has_inner_url) {
path_for_request = base::StringPrintf("%s%s", test_url->path_piece().data(),
path_for_request.c_str());
}
return MatchesSecurityOriginHelper(*test_url) &&
MatchesPath(path_for_request);
@ -387,30 +400,33 @@ bool URLPattern::MatchesSecurityOrigin(const GURL& test) const {
return MatchesSecurityOriginHelper(*test_url);
}
bool URLPattern::MatchesScheme(const std::string& test) const {
bool URLPattern::MatchesScheme(base::StringPiece test) const {
if (!IsValidScheme(test))
return false;
return scheme_ == "*" || test == scheme_;
}
bool URLPattern::MatchesHost(const std::string& host) const {
std::string test(url::kHttpScheme);
test += url::kStandardSchemeSeparator;
test += host;
test += "/";
return MatchesHost(GURL(test));
bool URLPattern::MatchesHost(base::StringPiece host) const {
// TODO(devlin): This is a bit sad. Parsing urls is expensive.
return MatchesHost(
GURL(base::StringPrintf("%s%s%s/", url::kHttpScheme,
url::kStandardSchemeSeparator, host.data())));
}
bool URLPattern::MatchesHost(const GURL& test) const {
const base::StringPiece test_host(
CanonicalizeHostForMatching(test.host_piece()));
const base::StringPiece pattern_host(CanonicalizeHostForMatching(host_));
// If the hosts are exactly equal, we have a match.
if (test.host() == host_)
if (test_host == pattern_host)
return true;
// If we're matching subdomains, and we have no host in the match pattern,
// that means that we're matching all hosts, which means we have a match no
// matter what the test host is.
if (match_subdomains_ && host_.empty())
if (match_subdomains_ && pattern_host.empty())
return true;
// Otherwise, we can only match if our match pattern matches subdomains.
@ -423,14 +439,13 @@ bool URLPattern::MatchesHost(const GURL& test) const {
return false;
// Check if the test host is a subdomain of our host.
if (test.host().length() <= (host_.length() + 1))
if (test_host.length() <= (pattern_host.length() + 1))
return false;
if (test.host().compare(test.host().length() - host_.length(),
host_.length(), host_) != 0)
if (!test_host.ends_with(pattern_host))
return false;
return test.host()[test.host().length() - host_.length() - 1] == '.';
return test_host[test_host.length() - pattern_host.length() - 1] == '.';
}
bool URLPattern::ImpliesAllHosts() const {
@ -444,28 +459,24 @@ bool URLPattern::ImpliesAllHosts() const {
if (!match_subdomains_)
return false;
// If |host_| is a recognized TLD, this will be 0. We don't include private
// TLDs, so that, e.g., *.appspot.com does not imply all hosts.
size_t registry_length = net::registry_controlled_domains::GetRegistryLength(
host_,
net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);
// If there was more than just a TLD in the host (e.g., *.foobar.com), it
// doesn't imply all hosts.
if (registry_length > 0)
// doesn't imply all hosts. We don't include private TLDs, so that, e.g.,
// *.appspot.com does not imply all hosts.
if (net::registry_controlled_domains::HostHasRegistryControlledDomain(
host_, net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES))
return false;
// At this point the host could either be just a TLD ("com") or some unknown
// TLD-like string ("notatld"). To disambiguate between them construct a
// fake URL, and check the registry. This returns 0 if the TLD is
// unrecognized, or the length of the recognized TLD.
registry_length = net::registry_controlled_domains::GetRegistryLength(
base::StringPrintf("foo.%s", host_.c_str()),
// fake URL, and check the registry.
//
// If we recognized this TLD, then this is a pattern like *.com, and it
// should imply all hosts.
return net::registry_controlled_domains::HostHasRegistryControlledDomain(
"notatld." + host_,
net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES,
net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES);
// If we recognized this TLD, then this is a pattern like *.com, and it
// should imply all hosts. Otherwise, this doesn't imply all hosts.
return registry_length > 0;
}
bool URLPattern::MatchesSingleOrigin() const {
@ -474,11 +485,16 @@ bool URLPattern::MatchesSingleOrigin() const {
return !ImpliesAllHosts() && scheme_ != "*" && !match_subdomains_;
}
bool URLPattern::MatchesPath(const std::string& test) const {
bool URLPattern::MatchesPath(base::StringPiece test) const {
// Make the behaviour of OverlapsWith consistent with MatchesURL, which is
// need to match hosted apps on e.g. 'google.com' also run on 'google.com/'.
if (test + "/*" == path_escaped_)
// The below if is a no-copy way of doing (test + "/*" == path_escaped_).
if (path_escaped_.length() == test.length() + 2 &&
base::StartsWith(path_escaped_.c_str(), test,
base::CompareCase::SENSITIVE) &&
base::EndsWith(path_escaped_, "/*", base::CompareCase::SENSITIVE)) {
return true;
}
return base::MatchPattern(test, path_escaped_);
}
@ -516,7 +532,7 @@ const std::string& URLPattern::GetAsString() const {
if (!path_.empty())
spec += path_;
spec_ = spec;
spec_ = std::move(spec);
return spec_;
}
@ -574,7 +590,7 @@ bool URLPattern::MatchesSecurityOriginHelper(const GURL& test) const {
return true;
}
bool URLPattern::MatchesPortPattern(const std::string& port) const {
bool URLPattern::MatchesPortPattern(base::StringPiece port) const {
return port_ == "*" || port_ == port;
}
@ -615,5 +631,3 @@ const char* URLPattern::GetParseResultString(
URLPattern::ParseResult parse_result) {
return kParseResultMessages[parse_result];
}
} // namespace extensions

View file

@ -9,9 +9,10 @@
#include <string>
#include <vector>
#include "base/strings/string_piece.h"
class GURL;
namespace extensions {
// A pattern that can be used to match URLs. A URLPattern is a very restricted
// subset of URL syntax:
//
@ -82,15 +83,16 @@ class URLPattern {
static const char kAllUrlsPattern[];
// Returns true if the given |scheme| is considered valid for extensions.
static bool IsValidSchemeForExtensions(const std::string& scheme);
static bool IsValidSchemeForExtensions(base::StringPiece scheme);
explicit URLPattern(int valid_schemes);
// Convenience to construct a URLPattern from a string. If the string is not
// known ahead of time, use Parse() instead, which returns success or failure.
URLPattern(int valid_schemes, const std::string& pattern);
URLPattern(int valid_schemes, base::StringPiece pattern);
URLPattern();
URLPattern(const URLPattern& other);
~URLPattern();
bool operator<(const URLPattern& other) const;
@ -101,7 +103,7 @@ class URLPattern {
// URLPattern::PARSE_SUCCESS on success, or an error code otherwise. On
// failure, this instance will have some intermediate values and is in an
// invalid state.
ParseResult Parse(const std::string& pattern_str);
ParseResult Parse(base::StringPiece pattern_str);
// Gets the bitmask of valid schemes.
int valid_schemes() const { return valid_schemes_; }
@ -110,7 +112,7 @@ class URLPattern {
// Gets the host the pattern matches. This can be an empty string if the
// pattern matches all hosts (the input was <scheme>://*/<whatever>).
const std::string& host() const { return host_; }
void SetHost(const std::string& host);
void SetHost(base::StringPiece host);
// Gets whether to match subdomains of host().
bool match_subdomains() const { return match_subdomains_; }
@ -119,7 +121,7 @@ class URLPattern {
// Gets the path the pattern matches with the leading slash. This can have
// embedded asterisks which are interpreted using glob rules.
const std::string& path() const { return path_; }
void SetPath(const std::string& path);
void SetPath(base::StringPiece path);
// Returns true if this pattern matches all urls.
bool match_all_urls() const { return match_all_urls_; }
@ -128,14 +130,14 @@ class URLPattern {
// Sets the scheme for pattern matches. This can be a single '*' if the
// pattern matches all valid schemes (as defined by the valid_schemes_
// property). Returns false on failure (if the scheme is not valid).
bool SetScheme(const std::string& scheme);
bool SetScheme(base::StringPiece scheme);
// Note: You should use MatchesScheme() instead of this getter unless you
// absolutely need the exact scheme. This is exposed for testing.
const std::string& scheme() const { return scheme_; }
// Returns true if the specified scheme can be used in this URL pattern, and
// false otherwise. Uses valid_schemes_ to determine validity.
bool IsValidScheme(const std::string& scheme) const;
bool IsValidScheme(base::StringPiece scheme) const;
// Returns true if this instance matches the specified URL.
bool MatchesURL(const GURL& test) const;
@ -147,14 +149,14 @@ class URLPattern {
// Note that if test is "filesystem", this may fail whereas MatchesURL
// may succeed. MatchesURL is smart enough to look at the inner_url instead
// of the outer "filesystem:" part.
bool MatchesScheme(const std::string& test) const;
bool MatchesScheme(base::StringPiece test) const;
// Returns true if |test| matches our host.
bool MatchesHost(const std::string& test) const;
bool MatchesHost(base::StringPiece test) const;
bool MatchesHost(const GURL& test) const;
// Returns true if |test| matches our path.
bool MatchesPath(const std::string& test) const;
bool MatchesPath(base::StringPiece test) const;
// Returns true if the pattern is vague enough that it implies all hosts,
// such as *://*/*.
@ -168,7 +170,7 @@ class URLPattern {
bool MatchesSingleOrigin() const;
// Sets the port. Returns false if the port is invalid.
bool SetPort(const std::string& port);
bool SetPort(base::StringPiece port);
const std::string& port() const { return port_; }
// Returns a string representing this instance.
@ -216,7 +218,7 @@ class URLPattern {
bool MatchesSecurityOriginHelper(const GURL& test) const;
// Returns true if our port matches the |port| pattern (it may be "*").
bool MatchesPortPattern(const std::string& port) const;
bool MatchesPortPattern(base::StringPiece port) const;
// If the URLPattern contains a wildcard scheme, returns a list of
// equivalent literal schemes, otherwise returns the current scheme.
@ -259,6 +261,4 @@ std::ostream& operator<<(std::ostream& out, const URLPattern& url_pattern);
typedef std::vector<URLPattern> URLPatternList;
} // namespace extensions
#endif // EXTENSIONS_COMMON_URL_PATTERN_H_