259 lines
		
	
	
	
		
			7.4 KiB
			
		
	
	
	
		
			C++
		
	
	
	
	
	
			
		
		
	
	
			259 lines
		
	
	
	
		
			7.4 KiB
			
		
	
	
	
		
			C++
		
	
	
	
	
	
// Copyright (c) 2012 The Chromium Authors. All rights reserved.
 | 
						|
// Use of this source code is governed by a BSD-style license that can be
 | 
						|
// found in the LICENSE file.
 | 
						|
 | 
						|
#include <math.h>
 | 
						|
#include <sapi.h>
 | 
						|
 | 
						|
#include "base/memory/singleton.h"
 | 
						|
#include "base/strings/string_number_conversions.h"
 | 
						|
#include "base/strings/utf_string_conversions.h"
 | 
						|
#include "base/values.h"
 | 
						|
#include "base/win/scoped_comptr.h"
 | 
						|
#include "chrome/browser/speech/tts_controller.h"
 | 
						|
#include "chrome/browser/speech/tts_platform.h"
 | 
						|
 | 
						|
class TtsPlatformImplWin : public TtsPlatformImpl {
 | 
						|
 public:
 | 
						|
  bool PlatformImplAvailable() override {
 | 
						|
    return true;
 | 
						|
  }
 | 
						|
 | 
						|
  bool Speak(
 | 
						|
      int utterance_id,
 | 
						|
      const std::string& utterance,
 | 
						|
      const std::string& lang,
 | 
						|
      const VoiceData& voice,
 | 
						|
      const UtteranceContinuousParameters& params) override;
 | 
						|
 | 
						|
  bool StopSpeaking() override;
 | 
						|
 | 
						|
  void Pause() override;
 | 
						|
 | 
						|
  void Resume() override;
 | 
						|
 | 
						|
  bool IsSpeaking() override;
 | 
						|
 | 
						|
  void GetVoices(std::vector<VoiceData>* out_voices) override;
 | 
						|
 | 
						|
  // Get the single instance of this class.
 | 
						|
  static TtsPlatformImplWin* GetInstance();
 | 
						|
 | 
						|
  static void __stdcall SpeechEventCallback(WPARAM w_param, LPARAM l_param);
 | 
						|
 | 
						|
 private:
 | 
						|
  TtsPlatformImplWin();
 | 
						|
  ~TtsPlatformImplWin() override {}
 | 
						|
 | 
						|
  void OnSpeechEvent();
 | 
						|
 | 
						|
  base::win::ScopedComPtr<ISpVoice> speech_synthesizer_;
 | 
						|
 | 
						|
  // These apply to the current utterance only.
 | 
						|
  std::wstring utterance_;
 | 
						|
  int utterance_id_;
 | 
						|
  int prefix_len_;
 | 
						|
  ULONG stream_number_;
 | 
						|
  int char_position_;
 | 
						|
  bool paused_;
 | 
						|
 | 
						|
  friend struct base::DefaultSingletonTraits<TtsPlatformImplWin>;
 | 
						|
 | 
						|
  DISALLOW_COPY_AND_ASSIGN(TtsPlatformImplWin);
 | 
						|
};
 | 
						|
 | 
						|
// static
 | 
						|
TtsPlatformImpl* TtsPlatformImpl::GetInstance() {
 | 
						|
  return TtsPlatformImplWin::GetInstance();
 | 
						|
}
 | 
						|
 | 
						|
bool TtsPlatformImplWin::Speak(
 | 
						|
    int utterance_id,
 | 
						|
    const std::string& src_utterance,
 | 
						|
    const std::string& lang,
 | 
						|
    const VoiceData& voice,
 | 
						|
    const UtteranceContinuousParameters& params) {
 | 
						|
  std::wstring prefix;
 | 
						|
  std::wstring suffix;
 | 
						|
 | 
						|
  if (!speech_synthesizer_.get())
 | 
						|
    return false;
 | 
						|
 | 
						|
  // TODO(dmazzoni): support languages other than the default: crbug.com/88059
 | 
						|
 | 
						|
  if (params.rate >= 0.0) {
 | 
						|
    // Map our multiplicative range of 0.1x to 10.0x onto Microsoft's
 | 
						|
    // linear range of -10 to 10:
 | 
						|
    //   0.1 -> -10
 | 
						|
    //   1.0 -> 0
 | 
						|
    //  10.0 -> 10
 | 
						|
    speech_synthesizer_->SetRate(static_cast<int32_t>(10 * log10(params.rate)));
 | 
						|
  }
 | 
						|
 | 
						|
  if (params.pitch >= 0.0) {
 | 
						|
    // The TTS api allows a range of -10 to 10 for speech pitch.
 | 
						|
    // TODO(dtseng): cleanup if we ever use any other properties that
 | 
						|
    // require xml.
 | 
						|
    std::wstring pitch_value =
 | 
						|
        base::IntToString16(static_cast<int>(params.pitch * 10 - 10));
 | 
						|
    prefix = L"<pitch absmiddle=\"" + pitch_value + L"\">";
 | 
						|
    suffix = L"</pitch>";
 | 
						|
  }
 | 
						|
 | 
						|
  if (params.volume >= 0.0) {
 | 
						|
    // The TTS api allows a range of 0 to 100 for speech volume.
 | 
						|
    speech_synthesizer_->SetVolume(static_cast<uint16_t>(params.volume * 100));
 | 
						|
  }
 | 
						|
 | 
						|
  // TODO(dmazzoni): convert SSML to SAPI xml. http://crbug.com/88072
 | 
						|
 | 
						|
  utterance_ = base::UTF8ToWide(src_utterance);
 | 
						|
  utterance_id_ = utterance_id;
 | 
						|
  char_position_ = 0;
 | 
						|
  std::wstring merged_utterance = prefix + utterance_ + suffix;
 | 
						|
  prefix_len_ = prefix.size();
 | 
						|
 | 
						|
  HRESULT result = speech_synthesizer_->Speak(
 | 
						|
      merged_utterance.c_str(),
 | 
						|
      SPF_ASYNC,
 | 
						|
      &stream_number_);
 | 
						|
  return (result == S_OK);
 | 
						|
}
 | 
						|
 | 
						|
bool TtsPlatformImplWin::StopSpeaking() {
 | 
						|
  if (speech_synthesizer_.get()) {
 | 
						|
    // Clear the stream number so that any further events relating to this
 | 
						|
    // utterance are ignored.
 | 
						|
    stream_number_ = 0;
 | 
						|
 | 
						|
    if (IsSpeaking()) {
 | 
						|
      // Stop speech by speaking the empty string with the purge flag.
 | 
						|
      speech_synthesizer_->Speak(L"", SPF_ASYNC | SPF_PURGEBEFORESPEAK, NULL);
 | 
						|
    }
 | 
						|
    if (paused_) {
 | 
						|
      speech_synthesizer_->Resume();
 | 
						|
      paused_ = false;
 | 
						|
    }
 | 
						|
  }
 | 
						|
  return true;
 | 
						|
}
 | 
						|
 | 
						|
void TtsPlatformImplWin::Pause() {
 | 
						|
  if (speech_synthesizer_.get() && utterance_id_ && !paused_) {
 | 
						|
    speech_synthesizer_->Pause();
 | 
						|
    paused_ = true;
 | 
						|
    TtsController::GetInstance()->OnTtsEvent(
 | 
						|
        utterance_id_, TTS_EVENT_PAUSE, char_position_, "");
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
void TtsPlatformImplWin::Resume() {
 | 
						|
  if (speech_synthesizer_.get() && utterance_id_ && paused_) {
 | 
						|
    speech_synthesizer_->Resume();
 | 
						|
    paused_ = false;
 | 
						|
    TtsController::GetInstance()->OnTtsEvent(
 | 
						|
        utterance_id_, TTS_EVENT_RESUME, char_position_, "");
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
bool TtsPlatformImplWin::IsSpeaking() {
 | 
						|
  if (speech_synthesizer_.get()) {
 | 
						|
    SPVOICESTATUS status;
 | 
						|
    HRESULT result = speech_synthesizer_->GetStatus(&status, NULL);
 | 
						|
    if (result == S_OK) {
 | 
						|
      if (status.dwRunningState == 0 ||  // 0 == waiting to speak
 | 
						|
          status.dwRunningState == SPRS_IS_SPEAKING) {
 | 
						|
        return true;
 | 
						|
      }
 | 
						|
    }
 | 
						|
  }
 | 
						|
  return false;
 | 
						|
}
 | 
						|
 | 
						|
void TtsPlatformImplWin::GetVoices(
 | 
						|
    std::vector<VoiceData>* out_voices) {
 | 
						|
  // TODO: get all voices, not just default voice.
 | 
						|
  // http://crbug.com/88059
 | 
						|
  out_voices->push_back(VoiceData());
 | 
						|
  VoiceData& voice = out_voices->back();
 | 
						|
  voice.native = true;
 | 
						|
  voice.name = "native";
 | 
						|
  voice.events.insert(TTS_EVENT_START);
 | 
						|
  voice.events.insert(TTS_EVENT_END);
 | 
						|
  voice.events.insert(TTS_EVENT_MARKER);
 | 
						|
  voice.events.insert(TTS_EVENT_WORD);
 | 
						|
  voice.events.insert(TTS_EVENT_SENTENCE);
 | 
						|
  voice.events.insert(TTS_EVENT_PAUSE);
 | 
						|
  voice.events.insert(TTS_EVENT_RESUME);
 | 
						|
}
 | 
						|
 | 
						|
void TtsPlatformImplWin::OnSpeechEvent() {
 | 
						|
  TtsController* controller = TtsController::GetInstance();
 | 
						|
  SPEVENT event;
 | 
						|
  while (S_OK == speech_synthesizer_->GetEvents(1, &event, NULL)) {
 | 
						|
    if (event.ulStreamNum != stream_number_)
 | 
						|
      continue;
 | 
						|
 | 
						|
    switch (event.eEventId) {
 | 
						|
    case SPEI_START_INPUT_STREAM:
 | 
						|
      controller->OnTtsEvent(
 | 
						|
          utterance_id_, TTS_EVENT_START, 0, std::string());
 | 
						|
      break;
 | 
						|
    case SPEI_END_INPUT_STREAM:
 | 
						|
      char_position_ = utterance_.size();
 | 
						|
      controller->OnTtsEvent(
 | 
						|
          utterance_id_, TTS_EVENT_END, char_position_, std::string());
 | 
						|
      break;
 | 
						|
    case SPEI_TTS_BOOKMARK:
 | 
						|
      controller->OnTtsEvent(
 | 
						|
          utterance_id_, TTS_EVENT_MARKER, char_position_, std::string());
 | 
						|
      break;
 | 
						|
    case SPEI_WORD_BOUNDARY:
 | 
						|
      char_position_ = static_cast<ULONG>(event.lParam) - prefix_len_;
 | 
						|
      controller->OnTtsEvent(
 | 
						|
          utterance_id_, TTS_EVENT_WORD, char_position_,
 | 
						|
          std::string());
 | 
						|
      break;
 | 
						|
    case SPEI_SENTENCE_BOUNDARY:
 | 
						|
      char_position_ = static_cast<ULONG>(event.lParam) - prefix_len_;
 | 
						|
      controller->OnTtsEvent(
 | 
						|
          utterance_id_, TTS_EVENT_SENTENCE, char_position_,
 | 
						|
          std::string());
 | 
						|
      break;
 | 
						|
    default:
 | 
						|
      break;
 | 
						|
    }
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
TtsPlatformImplWin::TtsPlatformImplWin()
 | 
						|
  : utterance_id_(0),
 | 
						|
    prefix_len_(0),
 | 
						|
    stream_number_(0),
 | 
						|
    char_position_(0),
 | 
						|
    paused_(false) {
 | 
						|
  speech_synthesizer_.CreateInstance(CLSID_SpVoice);
 | 
						|
  if (speech_synthesizer_.get()) {
 | 
						|
    ULONGLONG event_mask =
 | 
						|
        SPFEI(SPEI_START_INPUT_STREAM) |
 | 
						|
        SPFEI(SPEI_TTS_BOOKMARK) |
 | 
						|
        SPFEI(SPEI_WORD_BOUNDARY) |
 | 
						|
        SPFEI(SPEI_SENTENCE_BOUNDARY) |
 | 
						|
        SPFEI(SPEI_END_INPUT_STREAM);
 | 
						|
    speech_synthesizer_->SetInterest(event_mask, event_mask);
 | 
						|
    speech_synthesizer_->SetNotifyCallbackFunction(
 | 
						|
        TtsPlatformImplWin::SpeechEventCallback, 0, 0);
 | 
						|
  }
 | 
						|
}
 | 
						|
 | 
						|
// static
 | 
						|
TtsPlatformImplWin* TtsPlatformImplWin::GetInstance() {
 | 
						|
  return base::Singleton<TtsPlatformImplWin,
 | 
						|
                         base::LeakySingletonTraits<TtsPlatformImplWin>>::get();
 | 
						|
}
 | 
						|
 | 
						|
// static
 | 
						|
void TtsPlatformImplWin::SpeechEventCallback(
 | 
						|
    WPARAM w_param, LPARAM l_param) {
 | 
						|
  GetInstance()->OnSpeechEvent();
 | 
						|
}
 |