From 29f32c5ec71fd4a5fa6903a524486ff91dba6153 Mon Sep 17 00:00:00 2001 From: Robo Date: Fri, 4 Dec 2015 03:54:26 +0530 Subject: [PATCH] support filtering event with url regex --- atom/browser/api/atom_api_web_request.cc | 29 - atom/browser/api/atom_api_web_request.h | 9 +- atom/browser/net/atom_network_delegate.cc | 159 ++++- atom/browser/net/atom_network_delegate.h | 39 +- chromium_src/extensions/common/url_pattern.cc | 619 ++++++++++++++++++ chromium_src/extensions/common/url_pattern.h | 264 ++++++++ filenames.gypi | 2 + 7 files changed, 1061 insertions(+), 60 deletions(-) create mode 100644 chromium_src/extensions/common/url_pattern.cc create mode 100644 chromium_src/extensions/common/url_pattern.h diff --git a/atom/browser/api/atom_api_web_request.cc b/atom/browser/api/atom_api_web_request.cc index ad772e4c296..c45cd900ab7 100644 --- a/atom/browser/api/atom_api_web_request.cc +++ b/atom/browser/api/atom_api_web_request.cc @@ -45,35 +45,6 @@ void WebRequest::SetListener(mate::Arguments* args) { type, filter, callback)); } -mate::ObjectTemplateBuilder WebRequest::GetObjectTemplateBuilder( - v8::Isolate* isolate) { - return mate::ObjectTemplateBuilder(isolate) - .SetMethod("onBeforeRequest", - &WebRequest::SetListener< - AtomNetworkDelegate::kOnBeforeRequest>) - .SetMethod("onBeforeSendHeaders", - &WebRequest::SetListener< - AtomNetworkDelegate::kOnBeforeSendHeaders>) - .SetMethod("onSendHeaders", - &WebRequest::SetListener< - AtomNetworkDelegate::kOnSendHeaders>) - .SetMethod("onHeadersReceived", - &WebRequest::SetListener< - AtomNetworkDelegate::kOnHeadersReceived>) - .SetMethod("onBeforeRedirect", - &WebRequest::SetListener< - AtomNetworkDelegate::kOnBeforeRedirect>) - .SetMethod("onResponseStarted", - &WebRequest::SetListener< - AtomNetworkDelegate::kOnResponseStarted>) - .SetMethod("onCompleted", - &WebRequest::SetListener< - AtomNetworkDelegate::kOnCompleted>) - .SetMethod("onErrorOccurred", - &WebRequest::SetListener< - AtomNetworkDelegate::kOnErrorOccurred>); -} - // static mate::Handle WebRequest::Create( v8::Isolate* isolate, diff --git a/atom/browser/api/atom_api_web_request.h b/atom/browser/api/atom_api_web_request.h index 04fb758a9a7..053bc1bec97 100644 --- a/atom/browser/api/atom_api_web_request.h +++ b/atom/browser/api/atom_api_web_request.h @@ -7,10 +7,11 @@ #include +#include "atom/browser/api/trackable_object.h" #include "atom/browser/net/atom_network_delegate.h" #include "base/callback.h" #include "native_mate/arguments.h" -#include "native_mate/wrappable.h" +#include "native_mate/handle.h" namespace atom { @@ -18,7 +19,7 @@ class AtomBrowserContext; namespace api { -class WebRequest : public mate::TrackableObject { +class WebRequest : public mate::TrackableObject { public: static mate::Handle Create(v8::Isolate* isolate, AtomBrowserContext* browser_context); @@ -34,10 +35,6 @@ class WebRequest : public mate::TrackableObject { template void SetListener(mate::Arguments* args); - // mate::Wrappable: - mate::ObjectTemplateBuilder GetObjectTemplateBuilder( - v8::Isolate* isolate) override; - private: scoped_refptr browser_context_; diff --git a/atom/browser/net/atom_network_delegate.cc b/atom/browser/net/atom_network_delegate.cc index 3b3c8f97198..c8d341c5746 100644 --- a/atom/browser/net/atom_network_delegate.cc +++ b/atom/browser/net/atom_network_delegate.cc @@ -4,12 +4,9 @@ #include "atom/browser/net/atom_network_delegate.h" -#include - #include "atom/common/native_mate_converters/net_converter.h" #include "content/public/browser/browser_thread.h" #include "content/public/browser/resource_request_info.h" -#include "net/base/net_errors.h" #include "net/url_request/url_request.h" using content::BrowserThread; @@ -18,6 +15,40 @@ namespace atom { namespace { +std::string ResourceTypeToString(content::ResourceType type) { + switch (type) { + case content::RESOURCE_TYPE_MAIN_FRAME: + return "main_frame"; + case content::RESOURCE_TYPE_SUB_FRAME: + return "sub_frame"; + case content::RESOURCE_TYPE_STYLESHEET: + return "stylesheet"; + case content::RESOURCE_TYPE_SCRIPT: + return "script"; + case content::RESOURCE_TYPE_IMAGE: + return "image"; + case content::RESOURCE_TYPE_OBJECT: + return "object"; + case content::RESOURCE_TYPE_XHR: + return "xmlhttprequest"; + default: + return "other"; + } +} + +bool MatchesFilterCondition( + net::URLRequest* request, + const AtomNetworkDelegate::ListenerInfo& info) { + if (!info.url_patterns.empty()) { + auto url = request->url(); + for (auto& pattern : info.url_patterns) + if (pattern.MatchesURL(url)) + return true; + } + + return false; +} + base::DictionaryValue* ExtractRequestInfo(net::URLRequest* request) { base::DictionaryValue* dict = new base::DictionaryValue(); dict->SetInteger("id", request->identifier()); @@ -27,7 +58,7 @@ base::DictionaryValue* ExtractRequestInfo(net::URLRequest* request) { auto info = content::ResourceRequestInfo::ForRequest(request); if (info) resourceType = info->GetResourceType(); - dict->SetInteger("resourceType", resourceType); + dict->SetString("resourceType", ResourceTypeToString(resourceType)); dict->SetDouble("timestamp", base::Time::Now().ToDoubleT() * 1000); return dict; @@ -61,7 +92,7 @@ void OnBeforeURLRequestResponse( const AtomNetworkDelegate::BlockingResponse& result) { if (!result.redirectURL.is_empty()) *new_url = result.redirectURL; - callback.Run(result.cancel); + callback.Run(result.Cancel()); } void OnBeforeSendHeadersResponse( @@ -70,7 +101,7 @@ void OnBeforeSendHeadersResponse( const AtomNetworkDelegate::BlockingResponse& result) { if (!result.requestHeaders.IsEmpty()) *headers = result.requestHeaders; - callback.Run(result.cancel); + callback.Run(result.Cancel()); } void OnHeadersReceivedResponse( @@ -79,7 +110,7 @@ void OnHeadersReceivedResponse( const AtomNetworkDelegate::BlockingResponse& result) { if (result.responseHeaders.get()) *override_response_headers = result.responseHeaders; - callback.Run(result.cancel); + callback.Run(result.Cancel()); } } // namespace @@ -100,6 +131,19 @@ void AtomNetworkDelegate::SetListenerInIO( const Listener& callback) { ListenerInfo info; info.callback = callback; + + const base::ListValue* url_list = nullptr; + if (filter->GetList("urls", &url_list)) { + for (size_t i = 0; i < url_list->GetSize(); ++i) { + std::string url; + extensions::URLPattern pattern; + if (url_list->GetString(i, &url) && + pattern.Parse(url) == extensions::URLPattern::PARSE_SUCCESS) { + info.url_patterns.insert(pattern); + } + } + } + event_listener_map_[type] = info; } @@ -110,7 +154,11 @@ int AtomNetworkDelegate::OnBeforeURLRequest( brightray::NetworkDelegate::OnBeforeURLRequest(request, callback, new_url); auto listener_info = event_listener_map_[kOnBeforeRequest]; - if (!event_listener_map_.empty() && !listener_info.callback.is_null()) { + + if (!MatchesFilterCondition(request, listener_info)) + return net::OK; + + if (!listener_info.callback.is_null()) { auto wrapped_callback = listener_info.callback; auto details = ExtractRequestInfo(request); @@ -130,7 +178,11 @@ int AtomNetworkDelegate::OnBeforeSendHeaders( const net::CompletionCallback& callback, net::HttpRequestHeaders* headers) { auto listener_info = event_listener_map_[kOnBeforeSendHeaders]; - if (!event_listener_map_.empty() && !listener_info.callback.is_null()) { + + if (!MatchesFilterCondition(request, listener_info)) + return net::OK; + + if (!listener_info.callback.is_null()) { auto wrapped_callback = listener_info.callback; auto details = ExtractRequestInfo(request); details->Set("requestHeaders", GetRequestHeadersDict(*headers)); @@ -150,7 +202,11 @@ void AtomNetworkDelegate::OnSendHeaders( net::URLRequest* request, const net::HttpRequestHeaders& headers) { auto listener_info = event_listener_map_[kOnSendHeaders]; - if (!event_listener_map_.empty() && !listener_info.callback.is_null()) { + + if (!MatchesFilterCondition(request, listener_info)) + return; + + if (!listener_info.callback.is_null()) { auto wrapped_callback = listener_info.callback; auto details = ExtractRequestInfo(request); details->Set("requestHeaders", GetRequestHeadersDict(headers)); @@ -168,9 +224,17 @@ int AtomNetworkDelegate::OnHeadersReceived( scoped_refptr* override_response_headers, GURL* allowed_unsafe_redirect_url) { auto listener_info = event_listener_map_[kOnHeadersReceived]; - if (!event_listener_map_.empty() && !listener_info.callback.is_null()) { + + if (!MatchesFilterCondition(request, listener_info)) + return net::OK; + + if (!listener_info.callback.is_null()) { auto wrapped_callback = listener_info.callback; auto details = ExtractRequestInfo(request); + details->SetString("statusLine", + original_response_headers->GetStatusLine()); + details->SetInteger("statusCode", + original_response_headers->response_code()); details->Set("responseHeaders", GetResponseHeadersDict(original_response_headers)); @@ -189,10 +253,19 @@ int AtomNetworkDelegate::OnHeadersReceived( void AtomNetworkDelegate::OnBeforeRedirect(net::URLRequest* request, const GURL& new_location) { auto listener_info = event_listener_map_[kOnBeforeRedirect]; - if (!event_listener_map_.empty() && !listener_info.callback.is_null()) { + + if (!MatchesFilterCondition(request, listener_info)) + return; + + if (!listener_info.callback.is_null()) { auto wrapped_callback = listener_info.callback; auto details = ExtractRequestInfo(request); details->SetString("redirectURL", new_location.spec()); + details->SetInteger("statusCode", request->GetResponseCode()); + auto ip = request->GetSocketAddress().host(); + if (!ip.empty()) + details->SetString("ip", ip); + details->SetBoolean("fromCache", request->was_cached()); BrowserThread::PostTask(BrowserThread::UI, FROM_HERE, base::Bind(base::IgnoreResult(wrapped_callback), @@ -201,12 +274,23 @@ void AtomNetworkDelegate::OnBeforeRedirect(net::URLRequest* request, } void AtomNetworkDelegate::OnResponseStarted(net::URLRequest* request) { + if (request->status().status() != net::URLRequestStatus::SUCCESS) + return; + auto listener_info = event_listener_map_[kOnResponseStarted]; - if (!event_listener_map_.empty() && !listener_info.callback.is_null()) { + + if (!MatchesFilterCondition(request, listener_info)) + return; + + if (!listener_info.callback.is_null()) { auto wrapped_callback = listener_info.callback; auto details = ExtractRequestInfo(request); details->Set("responseHeaders", GetResponseHeadersDict(request->response_headers())); + details->SetBoolean("fromCache", request->was_cached()); + details->SetInteger("statusCode", + request->response_headers() ? + request->response_headers()->response_code() : 200); BrowserThread::PostTask(BrowserThread::UI, FROM_HERE, base::Bind(base::IgnoreResult(wrapped_callback), @@ -215,6 +299,55 @@ void AtomNetworkDelegate::OnResponseStarted(net::URLRequest* request) { } void AtomNetworkDelegate::OnCompleted(net::URLRequest* request, bool started) { + if (request->status().status() == net::URLRequestStatus::FAILED || + request->status().status() == net::URLRequestStatus::CANCELED) { + OnErrorOccurred(request); + return; + } else { + bool is_redirect = request->response_headers() && + net::HttpResponseHeaders::IsRedirectResponseCode( + request->response_headers()->response_code()); + if (is_redirect) + return; + } + + auto listener_info = event_listener_map_[kOnCompleted]; + + if (!MatchesFilterCondition(request, listener_info)) + return; + + if (!listener_info.callback.is_null()) { + auto wrapped_callback = listener_info.callback; + auto details = ExtractRequestInfo(request); + details->Set("responseHeaders", + GetResponseHeadersDict(request->response_headers())); + details->SetBoolean("fromCache", request->was_cached()); + details->SetInteger("statusCode", + request->response_headers() ? + request->response_headers()->response_code() : 200); + + BrowserThread::PostTask(BrowserThread::UI, FROM_HERE, + base::Bind(base::IgnoreResult(wrapped_callback), + details)); + } +} + +void AtomNetworkDelegate::OnErrorOccurred(net::URLRequest* request) { + auto listener_info = event_listener_map_[kOnErrorOccurred]; + + if (!MatchesFilterCondition(request, listener_info)) + return; + + if (!listener_info.callback.is_null()) { + auto wrapped_callback = listener_info.callback; + auto details = ExtractRequestInfo(request); + details->SetBoolean("fromCache", request->was_cached()); + details->SetString("error", net::ErrorToString(request->status().error())); + + BrowserThread::PostTask(BrowserThread::UI, FROM_HERE, + base::Bind(base::IgnoreResult(wrapped_callback), + details)); + } } } // namespace atom diff --git a/atom/browser/net/atom_network_delegate.h b/atom/browser/net/atom_network_delegate.h index e1cfdb21003..c48d252b2b0 100644 --- a/atom/browser/net/atom_network_delegate.h +++ b/atom/browser/net/atom_network_delegate.h @@ -6,18 +6,29 @@ #define ATOM_BROWSER_NET_ATOM_NETWORK_DELEGATE_H_ #include +#include #include #include "brightray/browser/network_delegate.h" #include "base/callback.h" #include "base/values.h" +#include "extensions/common/url_pattern.h" +#include "net/base/net_errors.h" #include "net/http/http_request_headers.h" #include "net/http/http_response_headers.h" +namespace extensions { +class URLPattern; +} + namespace atom { class AtomNetworkDelegate : public brightray::NetworkDelegate { public: + struct BlockingResponse; + using Listener = + base::Callback; + enum EventTypes { kInvalidEvent = 0, kOnBeforeRequest = 1 << 0, @@ -26,23 +37,32 @@ class AtomNetworkDelegate : public brightray::NetworkDelegate { kOnHeadersReceived = 1 << 3, kOnBeforeRedirect = 1 << 4, kOnResponseStarted = 1 << 5, - kOnErrorOccurred = 1 << 6, - kOnCompleted = 1 << 7, + kOnCompleted = 1 << 6, + kOnErrorOccurred = 1 << 7, + }; + + struct ListenerInfo { + ListenerInfo() {} + ~ListenerInfo() {} + + std::set url_patterns; + AtomNetworkDelegate::Listener callback; }; struct BlockingResponse { BlockingResponse() {} ~BlockingResponse() {} + int Cancel() const { + return cancel ? net::ERR_BLOCKED_BY_CLIENT : net::OK; + } + bool cancel; GURL redirectURL; net::HttpRequestHeaders requestHeaders; scoped_refptr responseHeaders; }; - using Listener = - base::Callback; - AtomNetworkDelegate(); ~AtomNetworkDelegate() override; @@ -71,14 +91,9 @@ class AtomNetworkDelegate : public brightray::NetworkDelegate { void OnResponseStarted(net::URLRequest* request) override; void OnCompleted(net::URLRequest* request, bool started) override; + void OnErrorOccurred(net::URLRequest* request); + private: - struct ListenerInfo { - ListenerInfo() {} - ~ListenerInfo() {} - - AtomNetworkDelegate::Listener callback; - }; - static std::map event_listener_map_; DISALLOW_COPY_AND_ASSIGN(AtomNetworkDelegate); diff --git a/chromium_src/extensions/common/url_pattern.cc b/chromium_src/extensions/common/url_pattern.cc new file mode 100644 index 00000000000..a6e51aa675b --- /dev/null +++ b/chromium_src/extensions/common/url_pattern.cc @@ -0,0 +1,619 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. + +#include "extensions/common/url_pattern.h" + +#include + +#include "base/strings/pattern.h" +#include "base/strings/string_number_conversions.h" +#include "base/strings/string_piece.h" +#include "base/strings/string_split.h" +#include "base/strings/string_util.h" +#include "base/strings/stringprintf.h" +#include "content/public/common/url_constants.h" +#include "net/base/registry_controlled_domains/registry_controlled_domain.h" +#include "url/gurl.h" +#include "url/url_util.h" + +const char extensions::URLPattern::kAllUrlsPattern[] = ""; +const char kExtensionScheme[] = "chrome-extension"; + +namespace { + +// TODO(aa): What about more obscure schemes like data: and javascript: ? +// Note: keep this array in sync with kValidSchemeMasks. +const char* kValidSchemes[] = { + url::kHttpScheme, + url::kHttpsScheme, + url::kFileScheme, + url::kFtpScheme, + content::kChromeUIScheme, + kExtensionScheme, + url::kFileSystemScheme, +}; + +const int kValidSchemeMasks[] = { + extensions::URLPattern::SCHEME_HTTP, + extensions::URLPattern::SCHEME_HTTPS, + extensions::URLPattern::SCHEME_FILE, + extensions::URLPattern::SCHEME_FTP, + extensions::URLPattern::SCHEME_CHROMEUI, + extensions::URLPattern::SCHEME_EXTENSION, + extensions::URLPattern::SCHEME_FILESYSTEM, +}; + +static_assert(arraysize(kValidSchemes) == arraysize(kValidSchemeMasks), + "must keep these arrays in sync"); + +const char kParseSuccess[] = "Success."; +const char kParseErrorMissingSchemeSeparator[] = "Missing scheme separator."; +const char kParseErrorInvalidScheme[] = "Invalid scheme."; +const char kParseErrorWrongSchemeType[] = "Wrong scheme type."; +const char kParseErrorEmptyHost[] = "Host can not be empty."; +const char kParseErrorInvalidHostWildcard[] = "Invalid host wildcard."; +const char kParseErrorEmptyPath[] = "Empty path."; +const char kParseErrorInvalidPort[] = "Invalid port."; +const char kParseErrorInvalidHost[] = "Invalid host."; + +// Message explaining each URLPattern::ParseResult. +const char* const kParseResultMessages[] = { + kParseSuccess, + kParseErrorMissingSchemeSeparator, + kParseErrorInvalidScheme, + kParseErrorWrongSchemeType, + kParseErrorEmptyHost, + kParseErrorInvalidHostWildcard, + kParseErrorEmptyPath, + kParseErrorInvalidPort, + kParseErrorInvalidHost, +}; + +static_assert(extensions::URLPattern::NUM_PARSE_RESULTS == arraysize(kParseResultMessages), + "must add message for each parse result"); + +const char kPathSeparator[] = "/"; + +bool IsStandardScheme(const std::string& scheme) { + // "*" gets the same treatment as a standard scheme. + if (scheme == "*") + return true; + + return url::IsStandard(scheme.c_str(), + url::Component(0, static_cast(scheme.length()))); +} + +bool IsValidPortForScheme(const std::string& scheme, const std::string& port) { + if (port == "*") + return true; + + // Only accept non-wildcard ports if the scheme uses ports. + if (url::DefaultPortForScheme(scheme.c_str(), scheme.length()) == + url::PORT_UNSPECIFIED) { + return false; + } + + int parsed_port = url::PORT_UNSPECIFIED; + if (!base::StringToInt(port, &parsed_port)) + return false; + return (parsed_port >= 0) && (parsed_port < 65536); +} + +// Returns |path| with the trailing wildcard stripped if one existed. +// +// The functions that rely on this (OverlapsWith and Contains) are only +// called for the patterns inside URLPatternSet. In those cases, we know that +// the path will have only a single wildcard at the end. This makes figuring +// out overlap much easier. It seems like there is probably a computer-sciency +// way to solve the general case, but we don't need that yet. +std::string StripTrailingWildcard(const std::string& path) { + size_t wildcard_index = path.find('*'); + size_t path_last = path.size() - 1; + return wildcard_index == path_last ? path.substr(0, path_last) : path; +} + +} // namespace + +namespace extensions { +// static +bool URLPattern::IsValidSchemeForExtensions(const std::string& scheme) { + for (size_t i = 0; i < arraysize(kValidSchemes); ++i) { + if (scheme == kValidSchemes[i]) + return true; + } + return false; +} + +URLPattern::URLPattern() + : valid_schemes_(SCHEME_ALL), + match_all_urls_(false), + match_subdomains_(false), + port_("*") {} + +URLPattern::URLPattern(int valid_schemes) + : valid_schemes_(valid_schemes), + match_all_urls_(false), + match_subdomains_(false), + port_("*") {} + +URLPattern::URLPattern(int valid_schemes, const std::string& pattern) + // Strict error checking is used, because this constructor is only + // appropriate when we know |pattern| is valid. + : valid_schemes_(valid_schemes), + match_all_urls_(false), + match_subdomains_(false), + port_("*") { + ParseResult result = Parse(pattern); + if (PARSE_SUCCESS != result) + NOTREACHED() << "URLPattern invalid: " << pattern << " result " << result; +} + +URLPattern::~URLPattern() { +} + +bool URLPattern::operator<(const URLPattern& other) const { + return GetAsString() < other.GetAsString(); +} + +bool URLPattern::operator>(const URLPattern& other) const { + return GetAsString() > other.GetAsString(); +} + +bool URLPattern::operator==(const URLPattern& other) const { + return GetAsString() == other.GetAsString(); +} + +std::ostream& operator<<(std::ostream& out, const URLPattern& url_pattern) { + return out << '"' << url_pattern.GetAsString() << '"'; +} + +URLPattern::ParseResult URLPattern::Parse(const std::string& pattern) { + spec_.clear(); + SetMatchAllURLs(false); + SetMatchSubdomains(false); + SetPort("*"); + + // Special case pattern to match every valid URL. + if (pattern == kAllUrlsPattern) { + SetMatchAllURLs(true); + return PARSE_SUCCESS; + } + + // Parse out the scheme. + size_t scheme_end_pos = pattern.find(url::kStandardSchemeSeparator); + bool has_standard_scheme_separator = true; + + // Some urls also use ':' alone as the scheme separator. + if (scheme_end_pos == std::string::npos) { + scheme_end_pos = pattern.find(':'); + has_standard_scheme_separator = false; + } + + if (scheme_end_pos == std::string::npos) + return PARSE_ERROR_MISSING_SCHEME_SEPARATOR; + + if (!SetScheme(pattern.substr(0, scheme_end_pos))) + return PARSE_ERROR_INVALID_SCHEME; + + bool standard_scheme = IsStandardScheme(scheme_); + if (standard_scheme != has_standard_scheme_separator) + return PARSE_ERROR_WRONG_SCHEME_SEPARATOR; + + // Advance past the scheme separator. + scheme_end_pos += + (standard_scheme ? strlen(url::kStandardSchemeSeparator) : 1); + if (scheme_end_pos >= pattern.size()) + return PARSE_ERROR_EMPTY_HOST; + + // Parse out the host and path. + size_t host_start_pos = scheme_end_pos; + size_t path_start_pos = 0; + + if (!standard_scheme) { + path_start_pos = host_start_pos; + } else if (scheme_ == url::kFileScheme) { + size_t host_end_pos = pattern.find(kPathSeparator, host_start_pos); + if (host_end_pos == std::string::npos) { + // Allow hostname omission. + // e.g. file://* is interpreted as file:///*, + // file://foo* is interpreted as file:///foo*. + path_start_pos = host_start_pos - 1; + } else { + // Ignore hostname if scheme is file://. + // e.g. file://localhost/foo is equal to file:///foo. + path_start_pos = host_end_pos; + } + } else { + size_t host_end_pos = pattern.find(kPathSeparator, host_start_pos); + + // Host is required. + if (host_start_pos == host_end_pos) + return PARSE_ERROR_EMPTY_HOST; + + if (host_end_pos == std::string::npos) + return PARSE_ERROR_EMPTY_PATH; + + host_ = pattern.substr(host_start_pos, host_end_pos - host_start_pos); + + // The first component can optionally be '*' to match all subdomains. + std::vector host_components = base::SplitString( + host_, ".", base::TRIM_WHITESPACE, base::SPLIT_WANT_ALL); + + // Could be empty if the host only consists of whitespace characters. + if (host_components.empty() || + (host_components.size() == 1 && host_components[0].empty())) + return PARSE_ERROR_EMPTY_HOST; + + if (host_components[0] == "*") { + match_subdomains_ = true; + host_components.erase(host_components.begin(), + host_components.begin() + 1); + } + host_ = JoinString(host_components, "."); + + path_start_pos = host_end_pos; + } + + SetPath(pattern.substr(path_start_pos)); + + size_t port_pos = host_.find(':'); + if (port_pos != std::string::npos) { + if (!SetPort(host_.substr(port_pos + 1))) + return PARSE_ERROR_INVALID_PORT; + host_ = host_.substr(0, port_pos); + } + + // No other '*' can occur in the host, though. This isn't necessary, but is + // done as a convenience to developers who might otherwise be confused and + // think '*' works as a glob in the host. + if (host_.find('*') != std::string::npos) + return PARSE_ERROR_INVALID_HOST_WILDCARD; + + // Null characters are not allowed in hosts. + if (host_.find('\0') != std::string::npos) + return PARSE_ERROR_INVALID_HOST; + + return PARSE_SUCCESS; +} + +void URLPattern::SetValidSchemes(int valid_schemes) { + spec_.clear(); + valid_schemes_ = valid_schemes; +} + +void URLPattern::SetHost(const std::string& host) { + spec_.clear(); + host_ = host; +} + +void URLPattern::SetMatchAllURLs(bool val) { + spec_.clear(); + match_all_urls_ = val; + + if (val) { + match_subdomains_ = true; + scheme_ = "*"; + host_.clear(); + SetPath("/*"); + } +} + +void URLPattern::SetMatchSubdomains(bool val) { + spec_.clear(); + match_subdomains_ = val; +} + +bool URLPattern::SetScheme(const std::string& scheme) { + spec_.clear(); + scheme_ = scheme; + if (scheme_ == "*") { + valid_schemes_ &= (SCHEME_HTTP | SCHEME_HTTPS); + } else if (!IsValidScheme(scheme_)) { + return false; + } + return true; +} + +bool URLPattern::IsValidScheme(const std::string& scheme) const { + if (valid_schemes_ == SCHEME_ALL) + return true; + + for (size_t i = 0; i < arraysize(kValidSchemes); ++i) { + if (scheme == kValidSchemes[i] && (valid_schemes_ & kValidSchemeMasks[i])) + return true; + } + + return false; +} + +void URLPattern::SetPath(const std::string& path) { + spec_.clear(); + path_ = path; + path_escaped_ = path_; + base::ReplaceSubstringsAfterOffset(&path_escaped_, 0, "\\", "\\\\"); + base::ReplaceSubstringsAfterOffset(&path_escaped_, 0, "?", "\\?"); +} + +bool URLPattern::SetPort(const std::string& port) { + spec_.clear(); + if (IsValidPortForScheme(scheme_, port)) { + port_ = port; + return true; + } + return false; +} + +bool URLPattern::MatchesURL(const GURL& test) const { + const GURL* test_url = &test; + bool has_inner_url = test.inner_url() != NULL; + + if (has_inner_url) { + if (!test.SchemeIsFileSystem()) + return false; // The only nested URLs we handle are filesystem URLs. + test_url = test.inner_url(); + } + + if (!MatchesScheme(test_url->scheme())) + return false; + + if (match_all_urls_) + return true; + + std::string path_for_request = test.PathForRequest(); + if (has_inner_url) + path_for_request = test_url->path() + path_for_request; + + return MatchesSecurityOriginHelper(*test_url) && + MatchesPath(path_for_request); +} + +bool URLPattern::MatchesSecurityOrigin(const GURL& test) const { + const GURL* test_url = &test; + bool has_inner_url = test.inner_url() != NULL; + + if (has_inner_url) { + if (!test.SchemeIsFileSystem()) + return false; // The only nested URLs we handle are filesystem URLs. + test_url = test.inner_url(); + } + + if (!MatchesScheme(test_url->scheme())) + return false; + + if (match_all_urls_) + return true; + + return MatchesSecurityOriginHelper(*test_url); +} + +bool URLPattern::MatchesScheme(const std::string& test) const { + if (!IsValidScheme(test)) + return false; + + return scheme_ == "*" || test == scheme_; +} + +bool URLPattern::MatchesHost(const std::string& host) const { + std::string test(url::kHttpScheme); + test += url::kStandardSchemeSeparator; + test += host; + test += "/"; + return MatchesHost(GURL(test)); +} + +bool URLPattern::MatchesHost(const GURL& test) const { + // If the hosts are exactly equal, we have a match. + if (test.host() == host_) + return true; + + // If we're matching subdomains, and we have no host in the match pattern, + // that means that we're matching all hosts, which means we have a match no + // matter what the test host is. + if (match_subdomains_ && host_.empty()) + return true; + + // Otherwise, we can only match if our match pattern matches subdomains. + if (!match_subdomains_) + return false; + + // We don't do subdomain matching against IP addresses, so we can give up now + // if the test host is an IP address. + if (test.HostIsIPAddress()) + return false; + + // Check if the test host is a subdomain of our host. + if (test.host().length() <= (host_.length() + 1)) + return false; + + if (test.host().compare(test.host().length() - host_.length(), + host_.length(), host_) != 0) + return false; + + return test.host()[test.host().length() - host_.length() - 1] == '.'; +} + +bool URLPattern::ImpliesAllHosts() const { + // Check if it matches all urls or is a pattern like http://*/*. + if (match_all_urls_ || + (match_subdomains_ && host_.empty() && port_ == "*" && path_ == "/*")) { + return true; + } + + // If this doesn't even match subdomains, it can't possibly imply all hosts. + if (!match_subdomains_) + return false; + + // If |host_| is a recognized TLD, this will be 0. We don't include private + // TLDs, so that, e.g., *.appspot.com does not imply all hosts. + size_t registry_length = net::registry_controlled_domains::GetRegistryLength( + host_, + net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, + net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); + // If there was more than just a TLD in the host (e.g., *.foobar.com), it + // doesn't imply all hosts. + if (registry_length > 0) + return false; + + // At this point the host could either be just a TLD ("com") or some unknown + // TLD-like string ("notatld"). To disambiguate between them construct a + // fake URL, and check the registry. This returns 0 if the TLD is + // unrecognized, or the length of the recognized TLD. + registry_length = net::registry_controlled_domains::GetRegistryLength( + base::StringPrintf("foo.%s", host_.c_str()), + net::registry_controlled_domains::EXCLUDE_UNKNOWN_REGISTRIES, + net::registry_controlled_domains::EXCLUDE_PRIVATE_REGISTRIES); + // If we recognized this TLD, then this is a pattern like *.com, and it + // should imply all hosts. Otherwise, this doesn't imply all hosts. + return registry_length > 0; +} + +bool URLPattern::MatchesSingleOrigin() const { + // Strictly speaking, the port is part of the origin, but in URLPattern it + // defaults to *. It's not very interesting anyway, so leave it out. + return !ImpliesAllHosts() && scheme_ != "*" && !match_subdomains_; +} + +bool URLPattern::MatchesPath(const std::string& test) const { + // Make the behaviour of OverlapsWith consistent with MatchesURL, which is + // need to match hosted apps on e.g. 'google.com' also run on 'google.com/'. + if (test + "/*" == path_escaped_) + return true; + + return base::MatchPattern(test, path_escaped_); +} + +const std::string& URLPattern::GetAsString() const { + if (!spec_.empty()) + return spec_; + + if (match_all_urls_) { + spec_ = kAllUrlsPattern; + return spec_; + } + + bool standard_scheme = IsStandardScheme(scheme_); + + std::string spec = scheme_ + + (standard_scheme ? url::kStandardSchemeSeparator : ":"); + + if (scheme_ != url::kFileScheme && standard_scheme) { + if (match_subdomains_) { + spec += "*"; + if (!host_.empty()) + spec += "."; + } + + if (!host_.empty()) + spec += host_; + + if (port_ != "*") { + spec += ":"; + spec += port_; + } + } + + if (!path_.empty()) + spec += path_; + + spec_ = spec; + return spec_; +} + +bool URLPattern::OverlapsWith(const URLPattern& other) const { + if (match_all_urls() || other.match_all_urls()) + return true; + return (MatchesAnyScheme(other.GetExplicitSchemes()) || + other.MatchesAnyScheme(GetExplicitSchemes())) + && (MatchesHost(other.host()) || other.MatchesHost(host())) + && (MatchesPortPattern(other.port()) || other.MatchesPortPattern(port())) + && (MatchesPath(StripTrailingWildcard(other.path())) || + other.MatchesPath(StripTrailingWildcard(path()))); +} + +bool URLPattern::Contains(const URLPattern& other) const { + if (match_all_urls()) + return true; + return MatchesAllSchemes(other.GetExplicitSchemes()) && + MatchesHost(other.host()) && + (!other.match_subdomains_ || match_subdomains_) && + MatchesPortPattern(other.port()) && + MatchesPath(StripTrailingWildcard(other.path())); +} + +bool URLPattern::MatchesAnyScheme( + const std::vector& schemes) const { + for (std::vector::const_iterator i = schemes.begin(); + i != schemes.end(); ++i) { + if (MatchesScheme(*i)) + return true; + } + + return false; +} + +bool URLPattern::MatchesAllSchemes( + const std::vector& schemes) const { + for (std::vector::const_iterator i = schemes.begin(); + i != schemes.end(); ++i) { + if (!MatchesScheme(*i)) + return false; + } + + return true; +} + +bool URLPattern::MatchesSecurityOriginHelper(const GURL& test) const { + // Ignore hostname if scheme is file://. + if (scheme_ != url::kFileScheme && !MatchesHost(test)) + return false; + + if (!MatchesPortPattern(base::IntToString(test.EffectiveIntPort()))) + return false; + + return true; +} + +bool URLPattern::MatchesPortPattern(const std::string& port) const { + return port_ == "*" || port_ == port; +} + +std::vector URLPattern::GetExplicitSchemes() const { + std::vector result; + + if (scheme_ != "*" && !match_all_urls_ && IsValidScheme(scheme_)) { + result.push_back(scheme_); + return result; + } + + for (size_t i = 0; i < arraysize(kValidSchemes); ++i) { + if (MatchesScheme(kValidSchemes[i])) { + result.push_back(kValidSchemes[i]); + } + } + + return result; +} + +std::vector URLPattern::ConvertToExplicitSchemes() const { + std::vector explicit_schemes = GetExplicitSchemes(); + std::vector result; + + for (std::vector::const_iterator i = explicit_schemes.begin(); + i != explicit_schemes.end(); ++i) { + URLPattern temp = *this; + temp.SetScheme(*i); + temp.SetMatchAllURLs(false); + result.push_back(temp); + } + + return result; +} + +// static +const char* URLPattern::GetParseResultString( + URLPattern::ParseResult parse_result) { + return kParseResultMessages[parse_result]; +} + +} // namespace extensions diff --git a/chromium_src/extensions/common/url_pattern.h b/chromium_src/extensions/common/url_pattern.h new file mode 100644 index 00000000000..fa9b6495e31 --- /dev/null +++ b/chromium_src/extensions/common/url_pattern.h @@ -0,0 +1,264 @@ +// Copyright (c) 2012 The Chromium Authors. All rights reserved. +// Use of this source code is governed by a BSD-style license that can be +// found in the LICENSE file. +#ifndef EXTENSIONS_COMMON_URL_PATTERN_H_ +#define EXTENSIONS_COMMON_URL_PATTERN_H_ + +#include +#include +#include +#include + +class GURL; + +namespace extensions { +// A pattern that can be used to match URLs. A URLPattern is a very restricted +// subset of URL syntax: +// +// := :// | '' +// := '*' | 'http' | 'https' | 'file' | 'ftp' | 'chrome' | +// 'chrome-extension' | 'filesystem' +// := '*' | '*.' + +// := [':' ('*' | )] +// := '/' +// +// * Host is not used when the scheme is 'file'. +// * The path can have embedded '*' characters which act as glob wildcards. +// * '' is a special pattern that matches any URL that contains a +// valid scheme (as specified by valid_schemes_). +// * The '*' scheme pattern excludes file URLs. +// +// Examples of valid patterns: +// - http://*/* +// - http://*/foo* +// - https://*.google.com/foo*bar +// - file://monkey* +// - http://127.0.0.1/* +// +// Examples of invalid patterns: +// - http://* -- path not specified +// - http://*foo/bar -- * not allowed as substring of host component +// - http://foo.*.bar/baz -- * must be first component +// - http:/bar -- scheme separator not found +// - foo://* -- invalid scheme +// - chrome:// -- we don't support chrome internal URLs +class URLPattern { + public: + // A collection of scheme bitmasks for use with valid_schemes. + enum SchemeMasks { + SCHEME_NONE = 0, + SCHEME_HTTP = 1 << 0, + SCHEME_HTTPS = 1 << 1, + SCHEME_FILE = 1 << 2, + SCHEME_FTP = 1 << 3, + SCHEME_CHROMEUI = 1 << 4, + SCHEME_EXTENSION = 1 << 5, + SCHEME_FILESYSTEM = 1 << 6, + + // IMPORTANT! + // SCHEME_ALL will match every scheme, including chrome://, chrome- + // extension://, about:, etc. Because this has lots of security + // implications, third-party extensions should usually not be able to get + // access to URL patterns initialized this way. If there is a reason + // for violating this general rule, document why this it safe. + SCHEME_ALL = -1, + }; + + // Error codes returned from Parse(). + enum ParseResult { + PARSE_SUCCESS = 0, + PARSE_ERROR_MISSING_SCHEME_SEPARATOR, + PARSE_ERROR_INVALID_SCHEME, + PARSE_ERROR_WRONG_SCHEME_SEPARATOR, + PARSE_ERROR_EMPTY_HOST, + PARSE_ERROR_INVALID_HOST_WILDCARD, + PARSE_ERROR_EMPTY_PATH, + PARSE_ERROR_INVALID_PORT, + PARSE_ERROR_INVALID_HOST, + NUM_PARSE_RESULTS + }; + + // The string pattern. + static const char kAllUrlsPattern[]; + + // Returns true if the given |scheme| is considered valid for extensions. + static bool IsValidSchemeForExtensions(const std::string& scheme); + + explicit URLPattern(int valid_schemes); + + // Convenience to construct a URLPattern from a string. If the string is not + // known ahead of time, use Parse() instead, which returns success or failure. + URLPattern(int valid_schemes, const std::string& pattern); + + URLPattern(); + ~URLPattern(); + + bool operator<(const URLPattern& other) const; + bool operator>(const URLPattern& other) const; + bool operator==(const URLPattern& other) const; + + // Initializes this instance by parsing the provided string. Returns + // URLPattern::PARSE_SUCCESS on success, or an error code otherwise. On + // failure, this instance will have some intermediate values and is in an + // invalid state. + ParseResult Parse(const std::string& pattern_str); + + // Gets the bitmask of valid schemes. + int valid_schemes() const { return valid_schemes_; } + void SetValidSchemes(int valid_schemes); + + // Gets the host the pattern matches. This can be an empty string if the + // pattern matches all hosts (the input was ://*/). + const std::string& host() const { return host_; } + void SetHost(const std::string& host); + + // Gets whether to match subdomains of host(). + bool match_subdomains() const { return match_subdomains_; } + void SetMatchSubdomains(bool val); + + // Gets the path the pattern matches with the leading slash. This can have + // embedded asterisks which are interpreted using glob rules. + const std::string& path() const { return path_; } + void SetPath(const std::string& path); + + // Returns true if this pattern matches all urls. + bool match_all_urls() const { return match_all_urls_; } + void SetMatchAllURLs(bool val); + + // Sets the scheme for pattern matches. This can be a single '*' if the + // pattern matches all valid schemes (as defined by the valid_schemes_ + // property). Returns false on failure (if the scheme is not valid). + bool SetScheme(const std::string& scheme); + // Note: You should use MatchesScheme() instead of this getter unless you + // absolutely need the exact scheme. This is exposed for testing. + const std::string& scheme() const { return scheme_; } + + // Returns true if the specified scheme can be used in this URL pattern, and + // false otherwise. Uses valid_schemes_ to determine validity. + bool IsValidScheme(const std::string& scheme) const; + + // Returns true if this instance matches the specified URL. + bool MatchesURL(const GURL& test) const; + + // Returns true if this instance matches the specified security origin. + bool MatchesSecurityOrigin(const GURL& test) const; + + // Returns true if |test| matches our scheme. + // Note that if test is "filesystem", this may fail whereas MatchesURL + // may succeed. MatchesURL is smart enough to look at the inner_url instead + // of the outer "filesystem:" part. + bool MatchesScheme(const std::string& test) const; + + // Returns true if |test| matches our host. + bool MatchesHost(const std::string& test) const; + bool MatchesHost(const GURL& test) const; + + // Returns true if |test| matches our path. + bool MatchesPath(const std::string& test) const; + + // Returns true if the pattern is vague enough that it implies all hosts, + // such as *://*/*. + // This is an expensive method, and should be used sparingly! + // You should probably use URLPatternSet::ShouldWarnAllHosts(), which is + // cached. + bool ImpliesAllHosts() const; + + // Returns true if the pattern only matches a single origin. The pattern may + // include a path. + bool MatchesSingleOrigin() const; + + // Sets the port. Returns false if the port is invalid. + bool SetPort(const std::string& port); + const std::string& port() const { return port_; } + + // Returns a string representing this instance. + const std::string& GetAsString() const; + + // Determines whether there is a URL that would match this instance and + // another instance. This method is symmetrical: Calling + // other.OverlapsWith(this) would result in the same answer. + bool OverlapsWith(const URLPattern& other) const; + + // Returns true if this pattern matches all possible URLs that |other| can + // match. For example, http://*.google.com encompasses http://www.google.com. + bool Contains(const URLPattern& other) const; + + // Converts this URLPattern into an equivalent set of URLPatterns that don't + // use a wildcard in the scheme component. If this URLPattern doesn't use a + // wildcard scheme, then the returned set will contain one element that is + // equivalent to this instance. + std::vector ConvertToExplicitSchemes() const; + + static bool EffectiveHostCompare(const URLPattern& a, const URLPattern& b) { + if (a.match_all_urls_ && b.match_all_urls_) + return false; + return a.host_.compare(b.host_) < 0; + } + + // Used for origin comparisons in a std::set. + class EffectiveHostCompareFunctor { + public: + bool operator()(const URLPattern& a, const URLPattern& b) const { + return EffectiveHostCompare(a, b); + } + }; + + // Get an error string for a ParseResult. + static const char* GetParseResultString(URLPattern::ParseResult parse_result); + + private: + // Returns true if any of the |schemes| items matches our scheme. + bool MatchesAnyScheme(const std::vector& schemes) const; + + // Returns true if all of the |schemes| items matches our scheme. + bool MatchesAllSchemes(const std::vector& schemes) const; + + bool MatchesSecurityOriginHelper(const GURL& test) const; + + // Returns true if our port matches the |port| pattern (it may be "*"). + bool MatchesPortPattern(const std::string& port) const; + + // If the URLPattern contains a wildcard scheme, returns a list of + // equivalent literal schemes, otherwise returns the current scheme. + std::vector GetExplicitSchemes() const; + + // A bitmask containing the schemes which are considered valid for this + // pattern. Parse() uses this to decide whether a pattern contains a valid + // scheme. + int valid_schemes_; + + // True if this is a special-case "" pattern. + bool match_all_urls_; + + // The scheme for the pattern. + std::string scheme_; + + // The host without any leading "*" components. + std::string host_; + + // Whether we should match subdomains of the host. This is true if the first + // component of the pattern's host was "*". + bool match_subdomains_; + + // The port. + std::string port_; + + // The path to match. This is everything after the host of the URL, or + // everything after the scheme in the case of file:// URLs. + std::string path_; + + // The path with "?" and "\" characters escaped for use with the + // MatchPattern() function. + std::string path_escaped_; + + // A string representing this URLPattern. + mutable std::string spec_; +}; + +std::ostream& operator<<(std::ostream& out, const URLPattern& url_pattern); + +typedef std::vector URLPatternList; + +} // namespace extensions + +#endif // EXTENSIONS_COMMON_URL_PATTERN_H_ diff --git a/filenames.gypi b/filenames.gypi index 640607d4017..f1209eeeb0e 100644 --- a/filenames.gypi +++ b/filenames.gypi @@ -476,6 +476,8 @@ 'chromium_src/chrome/utility/utility_message_handler.h', 'chromium_src/extensions/browser/app_window/size_constraints.cc', 'chromium_src/extensions/browser/app_window/size_constraints.h', + 'chromium_src/extensions/common/url_pattern.cc', + 'chromium_src/extensions/common/url_pattern.h', 'chromium_src/library_loaders/libspeechd_loader.cc', 'chromium_src/library_loaders/libspeechd.h', 'chromium_src/net/test/embedded_test_server/stream_listen_socket.cc',