electron/chromium_src/chrome/browser/speech/tts_linux.cc

358 lines
11 KiB
C++
Raw Normal View History

// Copyright (c) 2012 The Chromium Authors. All rights reserved.
// Use of this source code is governed by a BSD-style license that can be
// found in the LICENSE file.
#include <math.h>
#include <map>
#include "base/command_line.h"
#include "base/debug/leak_annotations.h"
#include "base/memory/scoped_ptr.h"
#include "base/memory/singleton.h"
#include "base/synchronization/lock.h"
#include "chrome/browser/speech/tts_platform.h"
#include "content/public/browser/browser_thread.h"
#include "content/public/common/content_switches.h"
#include "library_loaders/libspeechd.h"
using content::BrowserThread;
namespace {
const char kNotSupportedError[] =
"Native speech synthesis not supported on this platform.";
struct SPDChromeVoice {
std::string name;
std::string module;
};
} // namespace
class TtsPlatformImplLinux : public TtsPlatformImpl {
public:
bool PlatformImplAvailable() override;
bool Speak(int utterance_id,
const std::string& utterance,
const std::string& lang,
const VoiceData& voice,
const UtteranceContinuousParameters& params) override;
bool StopSpeaking() override;
void Pause() override;
void Resume() override;
bool IsSpeaking() override;
void GetVoices(std::vector<VoiceData>* out_voices) override;
void OnSpeechEvent(SPDNotificationType type);
// Get the single instance of this class.
static TtsPlatformImplLinux* GetInstance();
private:
TtsPlatformImplLinux();
~TtsPlatformImplLinux() override;
// Initiate the connection with the speech dispatcher.
void Initialize();
// Resets the connection with speech dispatcher.
void Reset();
static void NotificationCallback(size_t msg_id,
size_t client_id,
SPDNotificationType type);
static void IndexMarkCallback(size_t msg_id,
size_t client_id,
SPDNotificationType state,
char* index_mark);
static SPDNotificationType current_notification_;
base::Lock initialization_lock_;
LibSpeechdLoader libspeechd_loader_;
SPDConnection* conn_;
// These apply to the current utterance only.
std::string utterance_;
int utterance_id_;
// Map a string composed of a voicename and module to the voicename. Used to
// uniquely identify a voice across all available modules.
scoped_ptr<std::map<std::string, SPDChromeVoice> > all_native_voices_;
friend struct base::DefaultSingletonTraits<TtsPlatformImplLinux>;
DISALLOW_COPY_AND_ASSIGN(TtsPlatformImplLinux);
};
// static
SPDNotificationType TtsPlatformImplLinux::current_notification_ =
SPD_EVENT_END;
TtsPlatformImplLinux::TtsPlatformImplLinux()
: utterance_id_(0) {
const base::CommandLine& command_line =
*base::CommandLine::ForCurrentProcess();
if (!command_line.HasSwitch(switches::kEnableSpeechDispatcher))
return;
BrowserThread::PostTask(BrowserThread::FILE,
FROM_HERE,
base::Bind(&TtsPlatformImplLinux::Initialize,
base::Unretained(this)));
}
void TtsPlatformImplLinux::Initialize() {
base::AutoLock lock(initialization_lock_);
if (!libspeechd_loader_.Load("libspeechd.so.2"))
return;
{
// spd_open has memory leaks which are hard to suppress.
// http://crbug.com/317360
ANNOTATE_SCOPED_MEMORY_LEAK;
conn_ = libspeechd_loader_.spd_open(
"chrome", "extension_api", NULL, SPD_MODE_THREADED);
}
if (!conn_)
return;
// Register callbacks for all events.
conn_->callback_begin =
conn_->callback_end =
conn_->callback_cancel =
conn_->callback_pause =
conn_->callback_resume =
&NotificationCallback;
conn_->callback_im = &IndexMarkCallback;
libspeechd_loader_.spd_set_notification_on(conn_, SPD_BEGIN);
libspeechd_loader_.spd_set_notification_on(conn_, SPD_END);
libspeechd_loader_.spd_set_notification_on(conn_, SPD_CANCEL);
libspeechd_loader_.spd_set_notification_on(conn_, SPD_PAUSE);
libspeechd_loader_.spd_set_notification_on(conn_, SPD_RESUME);
}
TtsPlatformImplLinux::~TtsPlatformImplLinux() {
base::AutoLock lock(initialization_lock_);
if (conn_) {
libspeechd_loader_.spd_close(conn_);
conn_ = NULL;
}
}
void TtsPlatformImplLinux::Reset() {
base::AutoLock lock(initialization_lock_);
if (conn_)
libspeechd_loader_.spd_close(conn_);
conn_ = libspeechd_loader_.spd_open(
"chrome", "extension_api", NULL, SPD_MODE_THREADED);
}
bool TtsPlatformImplLinux::PlatformImplAvailable() {
if (!initialization_lock_.Try())
return false;
bool result = libspeechd_loader_.loaded() && (conn_ != NULL);
initialization_lock_.Release();
return result;
}
bool TtsPlatformImplLinux::Speak(
int utterance_id,
const std::string& utterance,
const std::string& lang,
const VoiceData& voice,
const UtteranceContinuousParameters& params) {
if (!PlatformImplAvailable()) {
error_ = kNotSupportedError;
return false;
}
// Speech dispatcher's speech params are around 3x at either limit.
float rate = params.rate > 3 ? 3 : params.rate;
rate = params.rate < 0.334 ? 0.334 : rate;
float pitch = params.pitch > 3 ? 3 : params.pitch;
pitch = params.pitch < 0.334 ? 0.334 : pitch;
std::map<std::string, SPDChromeVoice>::iterator it =
all_native_voices_->find(voice.name);
if (it != all_native_voices_->end()) {
libspeechd_loader_.spd_set_output_module(conn_, it->second.module.c_str());
libspeechd_loader_.spd_set_synthesis_voice(conn_, it->second.name.c_str());
}
// Map our multiplicative range to Speech Dispatcher's linear range.
// .334 = -100.
// 3 = 100.
libspeechd_loader_.spd_set_voice_rate(conn_, 100 * log10(rate) / log10(3));
libspeechd_loader_.spd_set_voice_pitch(conn_, 100 * log10(pitch) / log10(3));
// Support languages other than the default
if (!lang.empty())
libspeechd_loader_.spd_set_language(conn_, lang.c_str());
utterance_ = utterance;
utterance_id_ = utterance_id;
if (libspeechd_loader_.spd_say(conn_, SPD_TEXT, utterance.c_str()) == -1) {
Reset();
return false;
}
return true;
}
bool TtsPlatformImplLinux::StopSpeaking() {
if (!PlatformImplAvailable())
return false;
if (libspeechd_loader_.spd_stop(conn_) == -1) {
Reset();
return false;
}
return true;
}
void TtsPlatformImplLinux::Pause() {
if (!PlatformImplAvailable())
return;
libspeechd_loader_.spd_pause(conn_);
}
void TtsPlatformImplLinux::Resume() {
if (!PlatformImplAvailable())
return;
libspeechd_loader_.spd_resume(conn_);
}
bool TtsPlatformImplLinux::IsSpeaking() {
return current_notification_ == SPD_EVENT_BEGIN;
}
void TtsPlatformImplLinux::GetVoices(
std::vector<VoiceData>* out_voices) {
if (!all_native_voices_.get()) {
all_native_voices_.reset(new std::map<std::string, SPDChromeVoice>());
char** modules = libspeechd_loader_.spd_list_modules(conn_);
if (!modules)
return;
for (int i = 0; modules[i]; i++) {
char* module = modules[i];
libspeechd_loader_.spd_set_output_module(conn_, module);
SPDVoice** native_voices =
libspeechd_loader_.spd_list_synthesis_voices(conn_);
if (!native_voices) {
free(module);
continue;
}
for (int j = 0; native_voices[j]; j++) {
SPDVoice* native_voice = native_voices[j];
SPDChromeVoice native_data;
native_data.name = native_voice->name;
native_data.module = module;
std::string key;
key.append(native_data.name);
key.append(" ");
key.append(native_data.module);
all_native_voices_->insert(
std::pair<std::string, SPDChromeVoice>(key, native_data));
free(native_voices[j]);
}
free(modules[i]);
}
}
for (std::map<std::string, SPDChromeVoice>::iterator it =
all_native_voices_->begin();
it != all_native_voices_->end();
it++) {
out_voices->push_back(VoiceData());
VoiceData& voice = out_voices->back();
voice.native = true;
voice.name = it->first;
voice.events.insert(TTS_EVENT_START);
voice.events.insert(TTS_EVENT_END);
voice.events.insert(TTS_EVENT_CANCELLED);
voice.events.insert(TTS_EVENT_MARKER);
voice.events.insert(TTS_EVENT_PAUSE);
voice.events.insert(TTS_EVENT_RESUME);
}
}
void TtsPlatformImplLinux::OnSpeechEvent(SPDNotificationType type) {
TtsController* controller = TtsController::GetInstance();
switch (type) {
case SPD_EVENT_BEGIN:
controller->OnTtsEvent(utterance_id_, TTS_EVENT_START, 0, std::string());
break;
case SPD_EVENT_RESUME:
controller->OnTtsEvent(utterance_id_, TTS_EVENT_RESUME, 0, std::string());
break;
case SPD_EVENT_END:
controller->OnTtsEvent(
utterance_id_, TTS_EVENT_END, utterance_.size(), std::string());
break;
case SPD_EVENT_PAUSE:
controller->OnTtsEvent(
utterance_id_, TTS_EVENT_PAUSE, utterance_.size(), std::string());
break;
case SPD_EVENT_CANCEL:
controller->OnTtsEvent(
utterance_id_, TTS_EVENT_CANCELLED, 0, std::string());
break;
case SPD_EVENT_INDEX_MARK:
controller->OnTtsEvent(utterance_id_, TTS_EVENT_MARKER, 0, std::string());
break;
}
}
// static
void TtsPlatformImplLinux::NotificationCallback(
size_t msg_id, size_t client_id, SPDNotificationType type) {
// We run Speech Dispatcher in threaded mode, so these callbacks should always
// be in a separate thread.
if (!BrowserThread::CurrentlyOn(BrowserThread::UI)) {
current_notification_ = type;
BrowserThread::PostTask(
BrowserThread::UI,
FROM_HERE,
base::Bind(&TtsPlatformImplLinux::OnSpeechEvent,
base::Unretained(TtsPlatformImplLinux::GetInstance()),
type));
}
}
// static
void TtsPlatformImplLinux::IndexMarkCallback(size_t msg_id,
size_t client_id,
SPDNotificationType state,
char* index_mark) {
// TODO(dtseng): index_mark appears to specify an index type supplied by a
// client. Need to explore how this is used before hooking it up with existing
// word, sentence events.
// We run Speech Dispatcher in threaded mode, so these callbacks should always
// be in a separate thread.
if (!BrowserThread::CurrentlyOn(BrowserThread::UI)) {
current_notification_ = state;
BrowserThread::PostTask(BrowserThread::UI, FROM_HERE,
base::Bind(&TtsPlatformImplLinux::OnSpeechEvent,
base::Unretained(TtsPlatformImplLinux::GetInstance()),
state));
}
}
// static
TtsPlatformImplLinux* TtsPlatformImplLinux::GetInstance() {
return base::Singleton<
TtsPlatformImplLinux,
base::LeakySingletonTraits<TtsPlatformImplLinux>>::get();
}
// static
TtsPlatformImpl* TtsPlatformImpl::GetInstance() {
return TtsPlatformImplLinux::GetInstance();
}