2014-10-07 13:18:44 +00:00
|
|
|
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
|
|
|
|
// Use of this source code is governed by a BSD-style license that can be
|
|
|
|
// found in the LICENSE file.
|
|
|
|
|
|
|
|
#include <math.h>
|
2017-09-10 15:10:26 +00:00
|
|
|
#include <objbase.h>
|
2014-10-07 13:18:44 +00:00
|
|
|
#include <sapi.h>
|
2018-04-18 17:20:15 +00:00
|
|
|
#include <wrl/client.h>
|
2018-08-24 21:16:39 +00:00
|
|
|
#include <sphelper.h>
|
2014-10-07 13:18:44 +00:00
|
|
|
|
|
|
|
#include "base/memory/singleton.h"
|
|
|
|
#include "base/strings/string_number_conversions.h"
|
|
|
|
#include "base/strings/utf_string_conversions.h"
|
|
|
|
#include "base/values.h"
|
2018-08-24 21:16:39 +00:00
|
|
|
#include "base/win/scoped_co_mem.h"
|
2014-10-07 13:18:44 +00:00
|
|
|
#include "chrome/browser/speech/tts_controller.h"
|
|
|
|
#include "chrome/browser/speech/tts_platform.h"
|
|
|
|
|
2018-08-24 21:16:39 +00:00
|
|
|
namespace {
|
|
|
|
// ISpObjectToken key and value names.
|
|
|
|
const wchar_t kAttributesKey[] = L"Attributes";
|
|
|
|
const wchar_t kGenderValue[] = L"Gender";
|
|
|
|
const wchar_t kLanguageValue[] = L"Language";
|
|
|
|
} // anonymous namespace.
|
|
|
|
|
2014-10-07 13:18:44 +00:00
|
|
|
class TtsPlatformImplWin : public TtsPlatformImpl {
|
|
|
|
public:
|
2018-04-18 01:57:05 +00:00
|
|
|
bool PlatformImplAvailable() override { return true; }
|
2014-10-07 13:18:44 +00:00
|
|
|
|
2018-04-18 01:57:05 +00:00
|
|
|
bool Speak(int utterance_id,
|
|
|
|
const std::string& utterance,
|
|
|
|
const std::string& lang,
|
|
|
|
const VoiceData& voice,
|
|
|
|
const UtteranceContinuousParameters& params) override;
|
2014-10-07 13:18:44 +00:00
|
|
|
|
2015-12-07 22:23:01 +00:00
|
|
|
bool StopSpeaking() override;
|
2014-10-07 13:18:44 +00:00
|
|
|
|
2015-12-07 22:23:01 +00:00
|
|
|
void Pause() override;
|
2014-10-07 13:18:44 +00:00
|
|
|
|
2015-12-07 22:23:01 +00:00
|
|
|
void Resume() override;
|
2014-10-07 13:18:44 +00:00
|
|
|
|
2015-12-07 22:23:01 +00:00
|
|
|
bool IsSpeaking() override;
|
2014-10-07 13:18:44 +00:00
|
|
|
|
2015-12-07 22:23:01 +00:00
|
|
|
void GetVoices(std::vector<VoiceData>* out_voices) override;
|
2014-10-07 13:18:44 +00:00
|
|
|
|
|
|
|
// Get the single instance of this class.
|
|
|
|
static TtsPlatformImplWin* GetInstance();
|
|
|
|
|
|
|
|
static void __stdcall SpeechEventCallback(WPARAM w_param, LPARAM l_param);
|
|
|
|
|
|
|
|
private:
|
|
|
|
TtsPlatformImplWin();
|
2015-12-07 22:23:01 +00:00
|
|
|
~TtsPlatformImplWin() override {}
|
2014-10-07 13:18:44 +00:00
|
|
|
|
|
|
|
void OnSpeechEvent();
|
2018-08-24 21:16:39 +00:00
|
|
|
void SetVoiceFromName(const std::string& name);
|
2018-04-18 17:20:15 +00:00
|
|
|
Microsoft::WRL::ComPtr<ISpVoice> speech_synthesizer_;
|
2014-10-07 13:18:44 +00:00
|
|
|
|
|
|
|
// These apply to the current utterance only.
|
|
|
|
std::wstring utterance_;
|
|
|
|
int utterance_id_;
|
|
|
|
int prefix_len_;
|
|
|
|
ULONG stream_number_;
|
|
|
|
int char_position_;
|
|
|
|
bool paused_;
|
2018-08-24 21:16:39 +00:00
|
|
|
std::string last_voice_name_;
|
2015-12-07 18:02:06 +00:00
|
|
|
friend struct base::DefaultSingletonTraits<TtsPlatformImplWin>;
|
2014-10-07 13:18:44 +00:00
|
|
|
|
|
|
|
DISALLOW_COPY_AND_ASSIGN(TtsPlatformImplWin);
|
|
|
|
};
|
|
|
|
|
|
|
|
// static
|
|
|
|
TtsPlatformImpl* TtsPlatformImpl::GetInstance() {
|
|
|
|
return TtsPlatformImplWin::GetInstance();
|
|
|
|
}
|
|
|
|
|
2018-04-18 01:57:05 +00:00
|
|
|
bool TtsPlatformImplWin::Speak(int utterance_id,
|
|
|
|
const std::string& src_utterance,
|
|
|
|
const std::string& lang,
|
|
|
|
const VoiceData& voice,
|
|
|
|
const UtteranceContinuousParameters& params) {
|
2014-10-07 13:18:44 +00:00
|
|
|
std::wstring prefix;
|
|
|
|
std::wstring suffix;
|
|
|
|
|
2017-09-10 14:21:50 +00:00
|
|
|
if (!speech_synthesizer_.Get())
|
2014-10-07 13:18:44 +00:00
|
|
|
return false;
|
2018-08-24 21:16:39 +00:00
|
|
|
SetVoiceFromName(voice.name);
|
2014-10-07 13:18:44 +00:00
|
|
|
if (params.rate >= 0.0) {
|
|
|
|
// Map our multiplicative range of 0.1x to 10.0x onto Microsoft's
|
|
|
|
// linear range of -10 to 10:
|
|
|
|
// 0.1 -> -10
|
|
|
|
// 1.0 -> 0
|
|
|
|
// 10.0 -> 10
|
2016-03-09 19:16:12 +00:00
|
|
|
speech_synthesizer_->SetRate(static_cast<int32_t>(10 * log10(params.rate)));
|
2014-10-07 13:18:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
if (params.pitch >= 0.0) {
|
|
|
|
// The TTS api allows a range of -10 to 10 for speech pitch.
|
|
|
|
// TODO(dtseng): cleanup if we ever use any other properties that
|
|
|
|
// require xml.
|
|
|
|
std::wstring pitch_value =
|
|
|
|
base::IntToString16(static_cast<int>(params.pitch * 10 - 10));
|
|
|
|
prefix = L"<pitch absmiddle=\"" + pitch_value + L"\">";
|
|
|
|
suffix = L"</pitch>";
|
|
|
|
}
|
|
|
|
|
|
|
|
if (params.volume >= 0.0) {
|
|
|
|
// The TTS api allows a range of 0 to 100 for speech volume.
|
2016-03-09 19:16:12 +00:00
|
|
|
speech_synthesizer_->SetVolume(static_cast<uint16_t>(params.volume * 100));
|
2014-10-07 13:18:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// TODO(dmazzoni): convert SSML to SAPI xml. http://crbug.com/88072
|
|
|
|
|
|
|
|
utterance_ = base::UTF8ToWide(src_utterance);
|
|
|
|
utterance_id_ = utterance_id;
|
|
|
|
char_position_ = 0;
|
|
|
|
std::wstring merged_utterance = prefix + utterance_ + suffix;
|
|
|
|
prefix_len_ = prefix.size();
|
|
|
|
|
2018-04-18 01:57:05 +00:00
|
|
|
HRESULT result = speech_synthesizer_->Speak(merged_utterance.c_str(),
|
|
|
|
SPF_ASYNC, &stream_number_);
|
2014-10-07 13:18:44 +00:00
|
|
|
return (result == S_OK);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool TtsPlatformImplWin::StopSpeaking() {
|
2017-09-10 14:21:50 +00:00
|
|
|
if (speech_synthesizer_.Get()) {
|
2014-10-07 13:18:44 +00:00
|
|
|
// Clear the stream number so that any further events relating to this
|
|
|
|
// utterance are ignored.
|
|
|
|
stream_number_ = 0;
|
|
|
|
|
|
|
|
if (IsSpeaking()) {
|
|
|
|
// Stop speech by speaking the empty string with the purge flag.
|
|
|
|
speech_synthesizer_->Speak(L"", SPF_ASYNC | SPF_PURGEBEFORESPEAK, NULL);
|
|
|
|
}
|
|
|
|
if (paused_) {
|
|
|
|
speech_synthesizer_->Resume();
|
|
|
|
paused_ = false;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
void TtsPlatformImplWin::Pause() {
|
2017-09-10 14:21:50 +00:00
|
|
|
if (speech_synthesizer_.Get() && utterance_id_ && !paused_) {
|
2014-10-07 13:18:44 +00:00
|
|
|
speech_synthesizer_->Pause();
|
|
|
|
paused_ = true;
|
2018-04-18 01:57:05 +00:00
|
|
|
TtsController::GetInstance()->OnTtsEvent(utterance_id_, TTS_EVENT_PAUSE,
|
|
|
|
char_position_, "");
|
2014-10-07 13:18:44 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
void TtsPlatformImplWin::Resume() {
|
2017-09-10 14:21:50 +00:00
|
|
|
if (speech_synthesizer_.Get() && utterance_id_ && paused_) {
|
2014-10-07 13:18:44 +00:00
|
|
|
speech_synthesizer_->Resume();
|
|
|
|
paused_ = false;
|
2018-04-18 01:57:05 +00:00
|
|
|
TtsController::GetInstance()->OnTtsEvent(utterance_id_, TTS_EVENT_RESUME,
|
|
|
|
char_position_, "");
|
2014-10-07 13:18:44 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
bool TtsPlatformImplWin::IsSpeaking() {
|
2017-09-10 14:21:50 +00:00
|
|
|
if (speech_synthesizer_.Get()) {
|
2014-10-07 13:18:44 +00:00
|
|
|
SPVOICESTATUS status;
|
|
|
|
HRESULT result = speech_synthesizer_->GetStatus(&status, NULL);
|
|
|
|
if (result == S_OK) {
|
|
|
|
if (status.dwRunningState == 0 || // 0 == waiting to speak
|
|
|
|
status.dwRunningState == SPRS_IS_SPEAKING) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
2018-04-18 01:57:05 +00:00
|
|
|
void TtsPlatformImplWin::GetVoices(std::vector<VoiceData>* out_voices) {
|
2018-08-24 21:16:39 +00:00
|
|
|
Microsoft::WRL::ComPtr<IEnumSpObjectTokens> voice_tokens;
|
|
|
|
unsigned long voice_count;
|
|
|
|
if (S_OK !=
|
|
|
|
SpEnumTokens(SPCAT_VOICES, NULL, NULL, voice_tokens.GetAddressOf()))
|
|
|
|
return;
|
|
|
|
if (S_OK != voice_tokens->GetCount(&voice_count))
|
|
|
|
return;
|
|
|
|
for (unsigned i = 0; i < voice_count; i++) {
|
|
|
|
VoiceData voice;
|
|
|
|
Microsoft::WRL::ComPtr<ISpObjectToken> voice_token;
|
|
|
|
if (S_OK != voice_tokens->Next(1, voice_token.GetAddressOf(), NULL))
|
|
|
|
return;
|
|
|
|
base::win::ScopedCoMem<WCHAR> description;
|
|
|
|
if (S_OK != SpGetDescription(voice_token.Get(), &description))
|
|
|
|
continue;
|
|
|
|
voice.name = base::WideToUTF8(description.get());
|
|
|
|
Microsoft::WRL::ComPtr<ISpDataKey> attributes;
|
|
|
|
if (S_OK != voice_token->OpenKey(kAttributesKey, attributes.GetAddressOf()))
|
|
|
|
continue;
|
|
|
|
base::win::ScopedCoMem<WCHAR> gender;
|
|
|
|
if (S_OK == attributes->GetStringValue(kGenderValue, &gender)) {
|
|
|
|
if (0 == _wcsicmp(gender.get(), L"male"))
|
|
|
|
voice.gender = TTS_GENDER_MALE;
|
|
|
|
else if (0 == _wcsicmp(gender.get(), L"female"))
|
|
|
|
voice.gender = TTS_GENDER_FEMALE;
|
|
|
|
}
|
|
|
|
base::win::ScopedCoMem<WCHAR> language;
|
|
|
|
if (S_OK == attributes->GetStringValue(kLanguageValue, &language)) {
|
|
|
|
int lcid_value;
|
|
|
|
base::HexStringToInt(base::WideToUTF8(language.get()), &lcid_value);
|
|
|
|
LCID lcid = MAKELCID(lcid_value, SORT_DEFAULT);
|
|
|
|
WCHAR locale_name[LOCALE_NAME_MAX_LENGTH] = {0};
|
|
|
|
LCIDToLocaleName(lcid, locale_name, LOCALE_NAME_MAX_LENGTH, 0);
|
|
|
|
voice.lang = base::WideToUTF8(locale_name);
|
|
|
|
}
|
|
|
|
voice.native = true;
|
|
|
|
voice.events.insert(TTS_EVENT_START);
|
|
|
|
voice.events.insert(TTS_EVENT_END);
|
|
|
|
voice.events.insert(TTS_EVENT_MARKER);
|
|
|
|
voice.events.insert(TTS_EVENT_WORD);
|
|
|
|
voice.events.insert(TTS_EVENT_SENTENCE);
|
|
|
|
voice.events.insert(TTS_EVENT_PAUSE);
|
|
|
|
voice.events.insert(TTS_EVENT_RESUME);
|
|
|
|
out_voices->push_back(voice);
|
|
|
|
}
|
2014-10-07 13:18:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
void TtsPlatformImplWin::OnSpeechEvent() {
|
|
|
|
TtsController* controller = TtsController::GetInstance();
|
|
|
|
SPEVENT event;
|
|
|
|
while (S_OK == speech_synthesizer_->GetEvents(1, &event, NULL)) {
|
|
|
|
if (event.ulStreamNum != stream_number_)
|
|
|
|
continue;
|
|
|
|
|
|
|
|
switch (event.eEventId) {
|
2018-04-18 01:57:05 +00:00
|
|
|
case SPEI_START_INPUT_STREAM:
|
|
|
|
controller->OnTtsEvent(utterance_id_, TTS_EVENT_START, 0,
|
|
|
|
std::string());
|
|
|
|
break;
|
|
|
|
case SPEI_END_INPUT_STREAM:
|
|
|
|
char_position_ = utterance_.size();
|
|
|
|
controller->OnTtsEvent(utterance_id_, TTS_EVENT_END, char_position_,
|
|
|
|
std::string());
|
|
|
|
break;
|
|
|
|
case SPEI_TTS_BOOKMARK:
|
|
|
|
controller->OnTtsEvent(utterance_id_, TTS_EVENT_MARKER, char_position_,
|
|
|
|
std::string());
|
|
|
|
break;
|
|
|
|
case SPEI_WORD_BOUNDARY:
|
|
|
|
char_position_ = static_cast<ULONG>(event.lParam) - prefix_len_;
|
|
|
|
controller->OnTtsEvent(utterance_id_, TTS_EVENT_WORD, char_position_,
|
|
|
|
std::string());
|
|
|
|
break;
|
|
|
|
case SPEI_SENTENCE_BOUNDARY:
|
|
|
|
char_position_ = static_cast<ULONG>(event.lParam) - prefix_len_;
|
|
|
|
controller->OnTtsEvent(utterance_id_, TTS_EVENT_SENTENCE,
|
|
|
|
char_position_, std::string());
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
break;
|
2014-10-07 13:18:44 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2018-08-24 21:16:39 +00:00
|
|
|
void TtsPlatformImplWin::SetVoiceFromName(const std::string& name) {
|
|
|
|
if (name.empty() || name == last_voice_name_)
|
|
|
|
return;
|
|
|
|
last_voice_name_ = name;
|
|
|
|
Microsoft::WRL::ComPtr<IEnumSpObjectTokens> voice_tokens;
|
|
|
|
unsigned long voice_count;
|
|
|
|
if (S_OK !=
|
|
|
|
SpEnumTokens(SPCAT_VOICES, NULL, NULL, voice_tokens.GetAddressOf()))
|
|
|
|
return;
|
|
|
|
if (S_OK != voice_tokens->GetCount(&voice_count))
|
|
|
|
return;
|
|
|
|
for (unsigned i = 0; i < voice_count; i++) {
|
|
|
|
Microsoft::WRL::ComPtr<ISpObjectToken> voice_token;
|
|
|
|
if (S_OK != voice_tokens->Next(1, voice_token.GetAddressOf(), NULL))
|
|
|
|
return;
|
|
|
|
base::win::ScopedCoMem<WCHAR> description;
|
|
|
|
if (S_OK != SpGetDescription(voice_token.Get(), &description))
|
|
|
|
continue;
|
|
|
|
if (name == base::WideToUTF8(description.get())) {
|
|
|
|
speech_synthesizer_->SetVoice(voice_token.Get());
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2014-10-07 13:18:44 +00:00
|
|
|
TtsPlatformImplWin::TtsPlatformImplWin()
|
2018-04-18 01:57:05 +00:00
|
|
|
: utterance_id_(0),
|
|
|
|
prefix_len_(0),
|
|
|
|
stream_number_(0),
|
|
|
|
char_position_(0),
|
|
|
|
paused_(false) {
|
2017-09-10 15:10:26 +00:00
|
|
|
::CoCreateInstance(CLSID_SpVoice, nullptr, CLSCTX_ALL,
|
|
|
|
IID_PPV_ARGS(&speech_synthesizer_));
|
2017-09-10 14:21:50 +00:00
|
|
|
if (speech_synthesizer_.Get()) {
|
2014-10-07 13:18:44 +00:00
|
|
|
ULONGLONG event_mask =
|
2018-04-18 01:57:05 +00:00
|
|
|
SPFEI(SPEI_START_INPUT_STREAM) | SPFEI(SPEI_TTS_BOOKMARK) |
|
|
|
|
SPFEI(SPEI_WORD_BOUNDARY) | SPFEI(SPEI_SENTENCE_BOUNDARY) |
|
2014-10-07 13:18:44 +00:00
|
|
|
SPFEI(SPEI_END_INPUT_STREAM);
|
|
|
|
speech_synthesizer_->SetInterest(event_mask, event_mask);
|
|
|
|
speech_synthesizer_->SetNotifyCallbackFunction(
|
|
|
|
TtsPlatformImplWin::SpeechEventCallback, 0, 0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
// static
|
|
|
|
TtsPlatformImplWin* TtsPlatformImplWin::GetInstance() {
|
2015-12-07 22:23:01 +00:00
|
|
|
return base::Singleton<TtsPlatformImplWin,
|
|
|
|
base::LeakySingletonTraits<TtsPlatformImplWin>>::get();
|
2014-10-07 13:18:44 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
// static
|
2018-04-18 01:57:05 +00:00
|
|
|
void TtsPlatformImplWin::SpeechEventCallback(WPARAM w_param, LPARAM l_param) {
|
2014-10-07 13:18:44 +00:00
|
|
|
GetInstance()->OnSpeechEvent();
|
2015-12-07 18:02:06 +00:00
|
|
|
}
|