diff --git a/atom/renderer/api/atom_api_spell_check_client.cc b/atom/renderer/api/atom_api_spell_check_client.cc index 3f7676595164..cc7716e75633 100644 --- a/atom/renderer/api/atom_api_spell_check_client.cc +++ b/atom/renderer/api/atom_api_spell_check_client.cc @@ -9,6 +9,7 @@ #include "atom/common/native_mate_converters/string16_converter.h" #include "base/logging.h" +#include "chrome/renderer/spellchecker/spellcheck_worditerator.h" #include "native_mate/converter.h" #include "native_mate/dictionary.h" #include "third_party/icu/source/common/unicode/uscript.h" @@ -83,16 +84,14 @@ void SpellCheckClient::RequestCheckingOfText( completionCallback->DidFinishCheckingText(results); } -void SpellCheckClient::ShowSpellingUI(bool show) { -} +void SpellCheckClient::ShowSpellingUI(bool show) {} bool SpellCheckClient::IsShowingSpellingUI() { return false; } void SpellCheckClient::UpdateSpellingUIWithMisspelledWord( - const blink::WebString& word) { -} + const blink::WebString& word) {} void SpellCheckClient::SpellCheckText( const base::string16& text, @@ -103,9 +102,9 @@ void SpellCheckClient::SpellCheckText( if (!text_iterator_.IsInitialized() && !text_iterator_.Initialize(&character_attributes_, true)) { - // We failed to initialize text_iterator_, return as spelled correctly. - VLOG(1) << "Failed to initialize SpellcheckWordIterator"; - return; + // We failed to initialize text_iterator_, return as spelled correctly. + VLOG(1) << "Failed to initialize SpellcheckWordIterator"; + return; } if (!contraction_iterator_.IsInitialized() && @@ -121,7 +120,13 @@ void SpellCheckClient::SpellCheckText( base::string16 word; int word_start; int word_length; - while (text_iterator_.GetNextWord(&word, &word_start, &word_length)) { + for (auto status = + text_iterator_.GetNextWord(&word, &word_start, &word_length); + status != SpellcheckWordIterator::IS_END_OF_TEXT; + status = text_iterator_.GetNextWord(&word, &word_start, &word_length)) { + if (status == SpellcheckWordIterator::IS_SKIPPABLE) + continue; + // Found a word (or a contraction) that the spellchecker can check the // spelling of. if (SpellCheckWord(scope, word)) @@ -145,7 +150,7 @@ void SpellCheckClient::SpellCheckText( bool SpellCheckClient::SpellCheckWord( const SpellCheckScope& scope, const base::string16& word_to_check) const { - DCHECK(!scope.spell_check_.IsEmpty()); + DCHECK(!scope.spell_check_.IsEmpty()); v8::Local word = mate::ConvertToV8(isolate_, word_to_check); v8::Local result = @@ -171,7 +176,14 @@ bool SpellCheckClient::IsValidContraction(const SpellCheckScope& scope, int word_start; int word_length; - while (contraction_iterator_.GetNextWord(&word, &word_start, &word_length)) { + for (auto status = + contraction_iterator_.GetNextWord(&word, &word_start, &word_length); + status != SpellcheckWordIterator::IS_END_OF_TEXT; + status = contraction_iterator_.GetNextWord(&word, &word_start, + &word_length)) { + if (status == SpellcheckWordIterator::IS_SKIPPABLE) + continue; + if (!SpellCheckWord(scope, word)) return false; } diff --git a/chromium_src/chrome/renderer/spellchecker/spellcheck_worditerator.cc b/chromium_src/chrome/renderer/spellchecker/spellcheck_worditerator.cc index 46465f4dfd4a..af2a12c3ec25 100644 --- a/chromium_src/chrome/renderer/spellchecker/spellcheck_worditerator.cc +++ b/chromium_src/chrome/renderer/spellchecker/spellcheck_worditerator.cc @@ -7,10 +7,13 @@ #include "chrome/renderer/spellchecker/spellcheck_worditerator.h" #include +#include #include +#include #include "base/i18n/break_iterator.h" #include "base/logging.h" +#include "base/macros.h" #include "base/strings/stringprintf.h" #include "base/strings/utf_string_conversions.h" #include "third_party/icu/source/common/unicode/normlzr.h" @@ -21,11 +24,9 @@ // SpellcheckCharAttribute implementation: SpellcheckCharAttribute::SpellcheckCharAttribute() - : script_code_(USCRIPT_LATIN) { -} + : script_code_(USCRIPT_LATIN) {} -SpellcheckCharAttribute::~SpellcheckCharAttribute() { -} +SpellcheckCharAttribute::~SpellcheckCharAttribute() {} void SpellcheckCharAttribute::SetDefaultLanguage(const std::string& language) { CreateRuleSets(language); @@ -33,8 +34,8 @@ void SpellcheckCharAttribute::SetDefaultLanguage(const std::string& language) { base::string16 SpellcheckCharAttribute::GetRuleSet( bool allow_contraction) const { - return allow_contraction ? - ruleset_allow_contraction_ : ruleset_disallow_contraction_; + return allow_contraction ? ruleset_allow_contraction_ + : ruleset_disallow_contraction_; } void SpellcheckCharAttribute::CreateRuleSets(const std::string& language) { @@ -160,8 +161,13 @@ void SpellcheckCharAttribute::CreateRuleSets(const std::string& language) { // Treat numbers as word characters except for Arabic and Hebrew. const char* aletter_extra = " [0123456789]"; - if (script_code_ == USCRIPT_HEBREW || script_code_ == USCRIPT_ARABIC) + if (script_code_ == USCRIPT_HEBREW) aletter_extra = ""; + else if (script_code_ == USCRIPT_ARABIC) + // When "script=Arabic", it does not include tatweel, which is + // "script=Common" so add it back. Otherwise, it creates unwanted + // word breaks. + aletter_extra = " [\\u0640]"; const char kMidLetterExtra[] = ""; // For Hebrew, treat single/double quoation marks as MidLetter. @@ -178,19 +184,11 @@ void SpellcheckCharAttribute::CreateRuleSets(const std::string& language) { const char kDisallowContraction[] = ""; ruleset_allow_contraction_ = base::ASCIIToUTF16( - base::StringPrintf(kRuleTemplate, - aletter, - aletter_extra, - midletter_extra, - aletter_plus, - kAllowContraction)); + base::StringPrintf(kRuleTemplate, aletter, aletter_extra, midletter_extra, + aletter_plus, kAllowContraction)); ruleset_disallow_contraction_ = base::ASCIIToUTF16( - base::StringPrintf(kRuleTemplate, - aletter, - aletter_extra, - midletter_extra, - aletter_plus, - kDisallowContraction)); + base::StringPrintf(kRuleTemplate, aletter, aletter_extra, midletter_extra, + aletter_plus, kDisallowContraction)); } bool SpellcheckCharAttribute::OutputChar(UChar c, @@ -214,12 +212,11 @@ bool SpellcheckCharAttribute::OutputChar(UChar c, bool SpellcheckCharAttribute::OutputArabic(UChar c, base::string16* output) const { - // Discard characters not from Arabic alphabets. We also discard vowel marks - // of Arabic (Damma, Fatha, Kasra, etc.) to prevent our Arabic dictionary from - // marking an Arabic word including vowel marks as misspelled. (We need to - // check these vowel marks manually and filter them out since their script - // codes are USCRIPT_ARABIC.) - if (0x0621 <= c && c <= 0x064D) + // Include non-Arabic characters (which should trigger a spelling error) + // and Arabic characters excluding vowel marks and class "Lm". + // We filter the latter because, while they are "letters", they are + // optional and so don't affect the correctness of the rest of the word. + if (!(0x0600 <= c && c <= 0x06FF) || (u_isalpha(c) && c != 0x0640)) output->push_back(c); return true; } @@ -281,8 +278,8 @@ bool SpellcheckCharAttribute::OutputHebrew(UChar c, // USCRIPT_HEBREW.) // Pass through ASCII single/double quotation marks and Hebrew Geresh and // Gershayim. - if ((0x05D0 <= c && c <= 0x05EA) || c == 0x22 || c == 0x27 || - c == 0x05F4 || c == 0x05F3) + if ((0x05D0 <= c && c <= 0x05EA) || c == 0x22 || c == 0x27 || c == 0x05F4 || + c == 0x05F3) output->push_back(c); return true; } @@ -301,10 +298,7 @@ bool SpellcheckCharAttribute::OutputDefault(UChar c, // SpellcheckWordIterator implementation: SpellcheckWordIterator::SpellcheckWordIterator() - : text_(NULL), - attribute_(NULL), - iterator_() { -} + : text_(nullptr), attribute_(nullptr), iterator_() {} SpellcheckWordIterator::~SpellcheckWordIterator() { Reset(); @@ -357,9 +351,10 @@ bool SpellcheckWordIterator::SetText(const base::char16* text, size_t length) { return true; } -bool SpellcheckWordIterator::GetNextWord(base::string16* word_string, - int* word_start, - int* word_length) { +SpellcheckWordIterator::WordIteratorStatus SpellcheckWordIterator::GetNextWord( + base::string16* word_string, + int* word_start, + int* word_length) { DCHECK(!!text_); word_string->clear(); @@ -367,28 +362,41 @@ bool SpellcheckWordIterator::GetNextWord(base::string16* word_string, *word_length = 0; if (!text_) { - return false; + return IS_END_OF_TEXT; } - // Find a word that can be checked for spelling. Our rule sets filter out - // invalid words (e.g. numbers and characters not supported by the - // spellchecker language) so this ubrk_getRuleStatus() call returns - // UBRK_WORD_NONE when this iterator finds an invalid word. So, we skip such - // words until we can find a valid word or reach the end of the input string. + // Find a word that can be checked for spelling or a character that can be + // skipped over. Rather than moving past a skippable character this returns + // IS_SKIPPABLE and defers handling the character to the calling function. while (iterator_->Advance()) { const size_t start = iterator_->prev(); const size_t length = iterator_->pos() - start; - if (iterator_->IsWord()) { - if (Normalize(start, length, word_string)) { + switch (iterator_->GetWordBreakStatus()) { + case base::i18n::BreakIterator::IS_WORD_BREAK: { + if (Normalize(start, length, word_string)) { + *word_start = start; + *word_length = length; + return IS_WORD; + } + break; + } + case base::i18n::BreakIterator::IS_SKIPPABLE_WORD: { + *word_string = iterator_->GetString(); *word_start = start; *word_length = length; - return true; + return IS_SKIPPABLE; + } + // |iterator_| is RULE_BASED so the break status should never be + // IS_LINE_OR_CHAR_BREAK. + case base::i18n::BreakIterator::IS_LINE_OR_CHAR_BREAK: { + NOTREACHED(); + break; } } } // There aren't any more words in the given text. - return false; + return IS_END_OF_TEXT; } void SpellcheckWordIterator::Reset() { diff --git a/chromium_src/chrome/renderer/spellchecker/spellcheck_worditerator.h b/chromium_src/chrome/renderer/spellchecker/spellcheck_worditerator.h index 7e07d29273a1..966137a324a2 100644 --- a/chromium_src/chrome/renderer/spellchecker/spellcheck_worditerator.h +++ b/chromium_src/chrome/renderer/spellchecker/spellcheck_worditerator.h @@ -9,6 +9,8 @@ #ifndef CHROME_RENDERER_SPELLCHECKER_SPELLCHECK_WORDITERATOR_H_ #define CHROME_RENDERER_SPELLCHECKER_SPELLCHECK_WORDITERATOR_H_ +#include + #include #include @@ -19,8 +21,8 @@ namespace base { namespace i18n { class BreakIterator; -} // namespace i18n -} // namespace base +} // namespace i18n +} // namespace base // A class which encapsulates language-specific operations used by // SpellcheckWordIterator. When we set the spellchecker language, this class @@ -112,6 +114,17 @@ class SpellcheckCharAttribute { // class SpellcheckWordIterator { public: + enum WordIteratorStatus { + // The end of a sequence of text that the iterator recognizes as characters + // that can form a word. + IS_WORD, + // Non-word characters that the iterator can skip past, such as punctuation, + // whitespace, and characters from another character set. + IS_SKIPPABLE, + // The end of the text that the iterator is going over. + IS_END_OF_TEXT + }; + SpellcheckWordIterator(); ~SpellcheckWordIterator(); @@ -130,19 +143,30 @@ class SpellcheckWordIterator { // without calling Initialize(). bool SetText(const base::char16* text, size_t length); - // Retrieves a word (or a contraction), stores its copy to 'word_string', and - // stores the position and the length for input word to 'word_start'. Since - // this function normalizes the output word, the length of 'word_string' may - // be different from the 'word_length'. Therefore, when we call functions that - // changes the input text, such as string16::replace(), we need to use - // 'word_start' and 'word_length' as listed in the following snippet. + // Advances |iterator_| through |text_| and gets the current status of the + // word iterator within |text|: // - // while(iterator.GetNextWord(&word, &offset, &length)) - // text.replace(offset, length, word); + // - Returns IS_WORD if the iterator just found the end of a sequence of word + // characters and it was able to normalize the sequence. This stores the + // normalized string into |word_string| and stores the position and length + // into |word_start| and |word_length| respectively. Keep in mind that + // since this function normalizes the output word, the length of + // |word_string| may be different from the |word_length|. Therefore, when + // we call functions that change the input text, such as + // string16::replace(), we need to use |word_start| and |word_length| as + // listed in the following snippet: // - bool GetNextWord(base::string16* word_string, - int* word_start, - int* word_length); + // while(iterator.GetNextWord(&word, &offset, &length)) + // text.replace(offset, length, word); + // + // - Returns IS_SKIPPABLE if the iterator just found a character that the + // iterator can skip past such as punctuation, whitespace, and characters + // from another character set. This stores the character, position, and + // length into |word_string|, |word_start|, and |word_length| respectively. + // + // - Returns IS_END_OF_TEXT if the iterator has reached the end of |text_|. + SpellcheckWordIterator::WordIteratorStatus + GetNextWord(base::string16* word_string, int* word_start, int* word_length); // Releases all the resources attached to this object. void Reset();