feat(worditerator): bump up worditerator into latest
This commit is contained in:
parent
6c6fa3190f
commit
e869c27041
3 changed files with 111 additions and 67 deletions
|
@ -9,6 +9,7 @@
|
||||||
|
|
||||||
#include "atom/common/native_mate_converters/string16_converter.h"
|
#include "atom/common/native_mate_converters/string16_converter.h"
|
||||||
#include "base/logging.h"
|
#include "base/logging.h"
|
||||||
|
#include "chrome/renderer/spellchecker/spellcheck_worditerator.h"
|
||||||
#include "native_mate/converter.h"
|
#include "native_mate/converter.h"
|
||||||
#include "native_mate/dictionary.h"
|
#include "native_mate/dictionary.h"
|
||||||
#include "third_party/icu/source/common/unicode/uscript.h"
|
#include "third_party/icu/source/common/unicode/uscript.h"
|
||||||
|
@ -83,16 +84,14 @@ void SpellCheckClient::RequestCheckingOfText(
|
||||||
completionCallback->DidFinishCheckingText(results);
|
completionCallback->DidFinishCheckingText(results);
|
||||||
}
|
}
|
||||||
|
|
||||||
void SpellCheckClient::ShowSpellingUI(bool show) {
|
void SpellCheckClient::ShowSpellingUI(bool show) {}
|
||||||
}
|
|
||||||
|
|
||||||
bool SpellCheckClient::IsShowingSpellingUI() {
|
bool SpellCheckClient::IsShowingSpellingUI() {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
void SpellCheckClient::UpdateSpellingUIWithMisspelledWord(
|
void SpellCheckClient::UpdateSpellingUIWithMisspelledWord(
|
||||||
const blink::WebString& word) {
|
const blink::WebString& word) {}
|
||||||
}
|
|
||||||
|
|
||||||
void SpellCheckClient::SpellCheckText(
|
void SpellCheckClient::SpellCheckText(
|
||||||
const base::string16& text,
|
const base::string16& text,
|
||||||
|
@ -103,9 +102,9 @@ void SpellCheckClient::SpellCheckText(
|
||||||
|
|
||||||
if (!text_iterator_.IsInitialized() &&
|
if (!text_iterator_.IsInitialized() &&
|
||||||
!text_iterator_.Initialize(&character_attributes_, true)) {
|
!text_iterator_.Initialize(&character_attributes_, true)) {
|
||||||
// We failed to initialize text_iterator_, return as spelled correctly.
|
// We failed to initialize text_iterator_, return as spelled correctly.
|
||||||
VLOG(1) << "Failed to initialize SpellcheckWordIterator";
|
VLOG(1) << "Failed to initialize SpellcheckWordIterator";
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (!contraction_iterator_.IsInitialized() &&
|
if (!contraction_iterator_.IsInitialized() &&
|
||||||
|
@ -121,7 +120,13 @@ void SpellCheckClient::SpellCheckText(
|
||||||
base::string16 word;
|
base::string16 word;
|
||||||
int word_start;
|
int word_start;
|
||||||
int word_length;
|
int word_length;
|
||||||
while (text_iterator_.GetNextWord(&word, &word_start, &word_length)) {
|
for (auto status =
|
||||||
|
text_iterator_.GetNextWord(&word, &word_start, &word_length);
|
||||||
|
status != SpellcheckWordIterator::IS_END_OF_TEXT;
|
||||||
|
status = text_iterator_.GetNextWord(&word, &word_start, &word_length)) {
|
||||||
|
if (status == SpellcheckWordIterator::IS_SKIPPABLE)
|
||||||
|
continue;
|
||||||
|
|
||||||
// Found a word (or a contraction) that the spellchecker can check the
|
// Found a word (or a contraction) that the spellchecker can check the
|
||||||
// spelling of.
|
// spelling of.
|
||||||
if (SpellCheckWord(scope, word))
|
if (SpellCheckWord(scope, word))
|
||||||
|
@ -145,7 +150,7 @@ void SpellCheckClient::SpellCheckText(
|
||||||
bool SpellCheckClient::SpellCheckWord(
|
bool SpellCheckClient::SpellCheckWord(
|
||||||
const SpellCheckScope& scope,
|
const SpellCheckScope& scope,
|
||||||
const base::string16& word_to_check) const {
|
const base::string16& word_to_check) const {
|
||||||
DCHECK(!scope.spell_check_.IsEmpty());
|
DCHECK(!scope.spell_check_.IsEmpty());
|
||||||
|
|
||||||
v8::Local<v8::Value> word = mate::ConvertToV8(isolate_, word_to_check);
|
v8::Local<v8::Value> word = mate::ConvertToV8(isolate_, word_to_check);
|
||||||
v8::Local<v8::Value> result =
|
v8::Local<v8::Value> result =
|
||||||
|
@ -171,7 +176,14 @@ bool SpellCheckClient::IsValidContraction(const SpellCheckScope& scope,
|
||||||
int word_start;
|
int word_start;
|
||||||
int word_length;
|
int word_length;
|
||||||
|
|
||||||
while (contraction_iterator_.GetNextWord(&word, &word_start, &word_length)) {
|
for (auto status =
|
||||||
|
contraction_iterator_.GetNextWord(&word, &word_start, &word_length);
|
||||||
|
status != SpellcheckWordIterator::IS_END_OF_TEXT;
|
||||||
|
status = contraction_iterator_.GetNextWord(&word, &word_start,
|
||||||
|
&word_length)) {
|
||||||
|
if (status == SpellcheckWordIterator::IS_SKIPPABLE)
|
||||||
|
continue;
|
||||||
|
|
||||||
if (!SpellCheckWord(scope, word))
|
if (!SpellCheckWord(scope, word))
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
@ -7,10 +7,13 @@
|
||||||
#include "chrome/renderer/spellchecker/spellcheck_worditerator.h"
|
#include "chrome/renderer/spellchecker/spellcheck_worditerator.h"
|
||||||
|
|
||||||
#include <map>
|
#include <map>
|
||||||
|
#include <memory>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
#include <utility>
|
||||||
|
|
||||||
#include "base/i18n/break_iterator.h"
|
#include "base/i18n/break_iterator.h"
|
||||||
#include "base/logging.h"
|
#include "base/logging.h"
|
||||||
|
#include "base/macros.h"
|
||||||
#include "base/strings/stringprintf.h"
|
#include "base/strings/stringprintf.h"
|
||||||
#include "base/strings/utf_string_conversions.h"
|
#include "base/strings/utf_string_conversions.h"
|
||||||
#include "third_party/icu/source/common/unicode/normlzr.h"
|
#include "third_party/icu/source/common/unicode/normlzr.h"
|
||||||
|
@ -21,11 +24,9 @@
|
||||||
// SpellcheckCharAttribute implementation:
|
// SpellcheckCharAttribute implementation:
|
||||||
|
|
||||||
SpellcheckCharAttribute::SpellcheckCharAttribute()
|
SpellcheckCharAttribute::SpellcheckCharAttribute()
|
||||||
: script_code_(USCRIPT_LATIN) {
|
: script_code_(USCRIPT_LATIN) {}
|
||||||
}
|
|
||||||
|
|
||||||
SpellcheckCharAttribute::~SpellcheckCharAttribute() {
|
SpellcheckCharAttribute::~SpellcheckCharAttribute() {}
|
||||||
}
|
|
||||||
|
|
||||||
void SpellcheckCharAttribute::SetDefaultLanguage(const std::string& language) {
|
void SpellcheckCharAttribute::SetDefaultLanguage(const std::string& language) {
|
||||||
CreateRuleSets(language);
|
CreateRuleSets(language);
|
||||||
|
@ -33,8 +34,8 @@ void SpellcheckCharAttribute::SetDefaultLanguage(const std::string& language) {
|
||||||
|
|
||||||
base::string16 SpellcheckCharAttribute::GetRuleSet(
|
base::string16 SpellcheckCharAttribute::GetRuleSet(
|
||||||
bool allow_contraction) const {
|
bool allow_contraction) const {
|
||||||
return allow_contraction ?
|
return allow_contraction ? ruleset_allow_contraction_
|
||||||
ruleset_allow_contraction_ : ruleset_disallow_contraction_;
|
: ruleset_disallow_contraction_;
|
||||||
}
|
}
|
||||||
|
|
||||||
void SpellcheckCharAttribute::CreateRuleSets(const std::string& language) {
|
void SpellcheckCharAttribute::CreateRuleSets(const std::string& language) {
|
||||||
|
@ -160,8 +161,13 @@ void SpellcheckCharAttribute::CreateRuleSets(const std::string& language) {
|
||||||
|
|
||||||
// Treat numbers as word characters except for Arabic and Hebrew.
|
// Treat numbers as word characters except for Arabic and Hebrew.
|
||||||
const char* aletter_extra = " [0123456789]";
|
const char* aletter_extra = " [0123456789]";
|
||||||
if (script_code_ == USCRIPT_HEBREW || script_code_ == USCRIPT_ARABIC)
|
if (script_code_ == USCRIPT_HEBREW)
|
||||||
aletter_extra = "";
|
aletter_extra = "";
|
||||||
|
else if (script_code_ == USCRIPT_ARABIC)
|
||||||
|
// When "script=Arabic", it does not include tatweel, which is
|
||||||
|
// "script=Common" so add it back. Otherwise, it creates unwanted
|
||||||
|
// word breaks.
|
||||||
|
aletter_extra = " [\\u0640]";
|
||||||
|
|
||||||
const char kMidLetterExtra[] = "";
|
const char kMidLetterExtra[] = "";
|
||||||
// For Hebrew, treat single/double quoation marks as MidLetter.
|
// For Hebrew, treat single/double quoation marks as MidLetter.
|
||||||
|
@ -178,19 +184,11 @@ void SpellcheckCharAttribute::CreateRuleSets(const std::string& language) {
|
||||||
const char kDisallowContraction[] = "";
|
const char kDisallowContraction[] = "";
|
||||||
|
|
||||||
ruleset_allow_contraction_ = base::ASCIIToUTF16(
|
ruleset_allow_contraction_ = base::ASCIIToUTF16(
|
||||||
base::StringPrintf(kRuleTemplate,
|
base::StringPrintf(kRuleTemplate, aletter, aletter_extra, midletter_extra,
|
||||||
aletter,
|
aletter_plus, kAllowContraction));
|
||||||
aletter_extra,
|
|
||||||
midletter_extra,
|
|
||||||
aletter_plus,
|
|
||||||
kAllowContraction));
|
|
||||||
ruleset_disallow_contraction_ = base::ASCIIToUTF16(
|
ruleset_disallow_contraction_ = base::ASCIIToUTF16(
|
||||||
base::StringPrintf(kRuleTemplate,
|
base::StringPrintf(kRuleTemplate, aletter, aletter_extra, midletter_extra,
|
||||||
aletter,
|
aletter_plus, kDisallowContraction));
|
||||||
aletter_extra,
|
|
||||||
midletter_extra,
|
|
||||||
aletter_plus,
|
|
||||||
kDisallowContraction));
|
|
||||||
}
|
}
|
||||||
|
|
||||||
bool SpellcheckCharAttribute::OutputChar(UChar c,
|
bool SpellcheckCharAttribute::OutputChar(UChar c,
|
||||||
|
@ -214,12 +212,11 @@ bool SpellcheckCharAttribute::OutputChar(UChar c,
|
||||||
|
|
||||||
bool SpellcheckCharAttribute::OutputArabic(UChar c,
|
bool SpellcheckCharAttribute::OutputArabic(UChar c,
|
||||||
base::string16* output) const {
|
base::string16* output) const {
|
||||||
// Discard characters not from Arabic alphabets. We also discard vowel marks
|
// Include non-Arabic characters (which should trigger a spelling error)
|
||||||
// of Arabic (Damma, Fatha, Kasra, etc.) to prevent our Arabic dictionary from
|
// and Arabic characters excluding vowel marks and class "Lm".
|
||||||
// marking an Arabic word including vowel marks as misspelled. (We need to
|
// We filter the latter because, while they are "letters", they are
|
||||||
// check these vowel marks manually and filter them out since their script
|
// optional and so don't affect the correctness of the rest of the word.
|
||||||
// codes are USCRIPT_ARABIC.)
|
if (!(0x0600 <= c && c <= 0x06FF) || (u_isalpha(c) && c != 0x0640))
|
||||||
if (0x0621 <= c && c <= 0x064D)
|
|
||||||
output->push_back(c);
|
output->push_back(c);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -281,8 +278,8 @@ bool SpellcheckCharAttribute::OutputHebrew(UChar c,
|
||||||
// USCRIPT_HEBREW.)
|
// USCRIPT_HEBREW.)
|
||||||
// Pass through ASCII single/double quotation marks and Hebrew Geresh and
|
// Pass through ASCII single/double quotation marks and Hebrew Geresh and
|
||||||
// Gershayim.
|
// Gershayim.
|
||||||
if ((0x05D0 <= c && c <= 0x05EA) || c == 0x22 || c == 0x27 ||
|
if ((0x05D0 <= c && c <= 0x05EA) || c == 0x22 || c == 0x27 || c == 0x05F4 ||
|
||||||
c == 0x05F4 || c == 0x05F3)
|
c == 0x05F3)
|
||||||
output->push_back(c);
|
output->push_back(c);
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -301,10 +298,7 @@ bool SpellcheckCharAttribute::OutputDefault(UChar c,
|
||||||
// SpellcheckWordIterator implementation:
|
// SpellcheckWordIterator implementation:
|
||||||
|
|
||||||
SpellcheckWordIterator::SpellcheckWordIterator()
|
SpellcheckWordIterator::SpellcheckWordIterator()
|
||||||
: text_(NULL),
|
: text_(nullptr), attribute_(nullptr), iterator_() {}
|
||||||
attribute_(NULL),
|
|
||||||
iterator_() {
|
|
||||||
}
|
|
||||||
|
|
||||||
SpellcheckWordIterator::~SpellcheckWordIterator() {
|
SpellcheckWordIterator::~SpellcheckWordIterator() {
|
||||||
Reset();
|
Reset();
|
||||||
|
@ -357,9 +351,10 @@ bool SpellcheckWordIterator::SetText(const base::char16* text, size_t length) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
bool SpellcheckWordIterator::GetNextWord(base::string16* word_string,
|
SpellcheckWordIterator::WordIteratorStatus SpellcheckWordIterator::GetNextWord(
|
||||||
int* word_start,
|
base::string16* word_string,
|
||||||
int* word_length) {
|
int* word_start,
|
||||||
|
int* word_length) {
|
||||||
DCHECK(!!text_);
|
DCHECK(!!text_);
|
||||||
|
|
||||||
word_string->clear();
|
word_string->clear();
|
||||||
|
@ -367,28 +362,41 @@ bool SpellcheckWordIterator::GetNextWord(base::string16* word_string,
|
||||||
*word_length = 0;
|
*word_length = 0;
|
||||||
|
|
||||||
if (!text_) {
|
if (!text_) {
|
||||||
return false;
|
return IS_END_OF_TEXT;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Find a word that can be checked for spelling. Our rule sets filter out
|
// Find a word that can be checked for spelling or a character that can be
|
||||||
// invalid words (e.g. numbers and characters not supported by the
|
// skipped over. Rather than moving past a skippable character this returns
|
||||||
// spellchecker language) so this ubrk_getRuleStatus() call returns
|
// IS_SKIPPABLE and defers handling the character to the calling function.
|
||||||
// UBRK_WORD_NONE when this iterator finds an invalid word. So, we skip such
|
|
||||||
// words until we can find a valid word or reach the end of the input string.
|
|
||||||
while (iterator_->Advance()) {
|
while (iterator_->Advance()) {
|
||||||
const size_t start = iterator_->prev();
|
const size_t start = iterator_->prev();
|
||||||
const size_t length = iterator_->pos() - start;
|
const size_t length = iterator_->pos() - start;
|
||||||
if (iterator_->IsWord()) {
|
switch (iterator_->GetWordBreakStatus()) {
|
||||||
if (Normalize(start, length, word_string)) {
|
case base::i18n::BreakIterator::IS_WORD_BREAK: {
|
||||||
|
if (Normalize(start, length, word_string)) {
|
||||||
|
*word_start = start;
|
||||||
|
*word_length = length;
|
||||||
|
return IS_WORD;
|
||||||
|
}
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
case base::i18n::BreakIterator::IS_SKIPPABLE_WORD: {
|
||||||
|
*word_string = iterator_->GetString();
|
||||||
*word_start = start;
|
*word_start = start;
|
||||||
*word_length = length;
|
*word_length = length;
|
||||||
return true;
|
return IS_SKIPPABLE;
|
||||||
|
}
|
||||||
|
// |iterator_| is RULE_BASED so the break status should never be
|
||||||
|
// IS_LINE_OR_CHAR_BREAK.
|
||||||
|
case base::i18n::BreakIterator::IS_LINE_OR_CHAR_BREAK: {
|
||||||
|
NOTREACHED();
|
||||||
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// There aren't any more words in the given text.
|
// There aren't any more words in the given text.
|
||||||
return false;
|
return IS_END_OF_TEXT;
|
||||||
}
|
}
|
||||||
|
|
||||||
void SpellcheckWordIterator::Reset() {
|
void SpellcheckWordIterator::Reset() {
|
||||||
|
|
|
@ -9,6 +9,8 @@
|
||||||
#ifndef CHROME_RENDERER_SPELLCHECKER_SPELLCHECK_WORDITERATOR_H_
|
#ifndef CHROME_RENDERER_SPELLCHECKER_SPELLCHECK_WORDITERATOR_H_
|
||||||
#define CHROME_RENDERER_SPELLCHECKER_SPELLCHECK_WORDITERATOR_H_
|
#define CHROME_RENDERER_SPELLCHECKER_SPELLCHECK_WORDITERATOR_H_
|
||||||
|
|
||||||
|
#include <stddef.h>
|
||||||
|
|
||||||
#include <memory>
|
#include <memory>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
|
@ -19,8 +21,8 @@
|
||||||
namespace base {
|
namespace base {
|
||||||
namespace i18n {
|
namespace i18n {
|
||||||
class BreakIterator;
|
class BreakIterator;
|
||||||
} // namespace i18n
|
} // namespace i18n
|
||||||
} // namespace base
|
} // namespace base
|
||||||
|
|
||||||
// A class which encapsulates language-specific operations used by
|
// A class which encapsulates language-specific operations used by
|
||||||
// SpellcheckWordIterator. When we set the spellchecker language, this class
|
// SpellcheckWordIterator. When we set the spellchecker language, this class
|
||||||
|
@ -112,6 +114,17 @@ class SpellcheckCharAttribute {
|
||||||
//
|
//
|
||||||
class SpellcheckWordIterator {
|
class SpellcheckWordIterator {
|
||||||
public:
|
public:
|
||||||
|
enum WordIteratorStatus {
|
||||||
|
// The end of a sequence of text that the iterator recognizes as characters
|
||||||
|
// that can form a word.
|
||||||
|
IS_WORD,
|
||||||
|
// Non-word characters that the iterator can skip past, such as punctuation,
|
||||||
|
// whitespace, and characters from another character set.
|
||||||
|
IS_SKIPPABLE,
|
||||||
|
// The end of the text that the iterator is going over.
|
||||||
|
IS_END_OF_TEXT
|
||||||
|
};
|
||||||
|
|
||||||
SpellcheckWordIterator();
|
SpellcheckWordIterator();
|
||||||
~SpellcheckWordIterator();
|
~SpellcheckWordIterator();
|
||||||
|
|
||||||
|
@ -130,19 +143,30 @@ class SpellcheckWordIterator {
|
||||||
// without calling Initialize().
|
// without calling Initialize().
|
||||||
bool SetText(const base::char16* text, size_t length);
|
bool SetText(const base::char16* text, size_t length);
|
||||||
|
|
||||||
// Retrieves a word (or a contraction), stores its copy to 'word_string', and
|
// Advances |iterator_| through |text_| and gets the current status of the
|
||||||
// stores the position and the length for input word to 'word_start'. Since
|
// word iterator within |text|:
|
||||||
// this function normalizes the output word, the length of 'word_string' may
|
|
||||||
// be different from the 'word_length'. Therefore, when we call functions that
|
|
||||||
// changes the input text, such as string16::replace(), we need to use
|
|
||||||
// 'word_start' and 'word_length' as listed in the following snippet.
|
|
||||||
//
|
//
|
||||||
// while(iterator.GetNextWord(&word, &offset, &length))
|
// - Returns IS_WORD if the iterator just found the end of a sequence of word
|
||||||
// text.replace(offset, length, word);
|
// characters and it was able to normalize the sequence. This stores the
|
||||||
|
// normalized string into |word_string| and stores the position and length
|
||||||
|
// into |word_start| and |word_length| respectively. Keep in mind that
|
||||||
|
// since this function normalizes the output word, the length of
|
||||||
|
// |word_string| may be different from the |word_length|. Therefore, when
|
||||||
|
// we call functions that change the input text, such as
|
||||||
|
// string16::replace(), we need to use |word_start| and |word_length| as
|
||||||
|
// listed in the following snippet:
|
||||||
//
|
//
|
||||||
bool GetNextWord(base::string16* word_string,
|
// while(iterator.GetNextWord(&word, &offset, &length))
|
||||||
int* word_start,
|
// text.replace(offset, length, word);
|
||||||
int* word_length);
|
//
|
||||||
|
// - Returns IS_SKIPPABLE if the iterator just found a character that the
|
||||||
|
// iterator can skip past such as punctuation, whitespace, and characters
|
||||||
|
// from another character set. This stores the character, position, and
|
||||||
|
// length into |word_string|, |word_start|, and |word_length| respectively.
|
||||||
|
//
|
||||||
|
// - Returns IS_END_OF_TEXT if the iterator has reached the end of |text_|.
|
||||||
|
SpellcheckWordIterator::WordIteratorStatus
|
||||||
|
GetNextWord(base::string16* word_string, int* word_start, int* word_length);
|
||||||
|
|
||||||
// Releases all the resources attached to this object.
|
// Releases all the resources attached to this object.
|
||||||
void Reset();
|
void Reset();
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue