1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include <math.h>
6#include <sapi.h>
7
8#include "base/memory/singleton.h"
9#include "base/strings/string_number_conversions.h"
10#include "base/strings/utf_string_conversions.h"
11#include "base/values.h"
12#include "base/win/scoped_comptr.h"
13#include "chrome/browser/speech/tts_controller.h"
14#include "chrome/browser/speech/tts_platform.h"
15
16class TtsPlatformImplWin : public TtsPlatformImpl {
17 public:
18  virtual bool PlatformImplAvailable() {
19    return true;
20  }
21
22  virtual bool Speak(
23      int utterance_id,
24      const std::string& utterance,
25      const std::string& lang,
26      const VoiceData& voice,
27      const UtteranceContinuousParameters& params);
28
29  virtual bool StopSpeaking();
30
31  virtual void Pause();
32
33  virtual void Resume();
34
35  virtual bool IsSpeaking();
36
37  virtual void GetVoices(std::vector<VoiceData>* out_voices) OVERRIDE;
38
39  // Get the single instance of this class.
40  static TtsPlatformImplWin* GetInstance();
41
42  static void __stdcall SpeechEventCallback(WPARAM w_param, LPARAM l_param);
43
44 private:
45  TtsPlatformImplWin();
46  virtual ~TtsPlatformImplWin() {}
47
48  void OnSpeechEvent();
49
50  base::win::ScopedComPtr<ISpVoice> speech_synthesizer_;
51
52  // These apply to the current utterance only.
53  std::wstring utterance_;
54  int utterance_id_;
55  int prefix_len_;
56  ULONG stream_number_;
57  int char_position_;
58  bool paused_;
59
60  friend struct DefaultSingletonTraits<TtsPlatformImplWin>;
61
62  DISALLOW_COPY_AND_ASSIGN(TtsPlatformImplWin);
63};
64
65// static
66TtsPlatformImpl* TtsPlatformImpl::GetInstance() {
67  return TtsPlatformImplWin::GetInstance();
68}
69
70bool TtsPlatformImplWin::Speak(
71    int utterance_id,
72    const std::string& src_utterance,
73    const std::string& lang,
74    const VoiceData& voice,
75    const UtteranceContinuousParameters& params) {
76  std::wstring prefix;
77  std::wstring suffix;
78
79  if (!speech_synthesizer_.get())
80    return false;
81
82  // TODO(dmazzoni): support languages other than the default: crbug.com/88059
83
84  if (params.rate >= 0.0) {
85    // Map our multiplicative range of 0.1x to 10.0x onto Microsoft's
86    // linear range of -10 to 10:
87    //   0.1 -> -10
88    //   1.0 -> 0
89    //  10.0 -> 10
90    speech_synthesizer_->SetRate(static_cast<int32>(10 * log10(params.rate)));
91  }
92
93  if (params.pitch >= 0.0) {
94    // The TTS api allows a range of -10 to 10 for speech pitch.
95    // TODO(dtseng): cleanup if we ever use any other properties that
96    // require xml.
97    std::wstring pitch_value =
98        base::IntToString16(static_cast<int>(params.pitch * 10 - 10));
99    prefix = L"<pitch absmiddle=\"" + pitch_value + L"\">";
100    suffix = L"</pitch>";
101  }
102
103  if (params.volume >= 0.0) {
104    // The TTS api allows a range of 0 to 100 for speech volume.
105    speech_synthesizer_->SetVolume(static_cast<uint16>(params.volume * 100));
106  }
107
108  // TODO(dmazzoni): convert SSML to SAPI xml. http://crbug.com/88072
109
110  utterance_ = base::UTF8ToWide(src_utterance);
111  utterance_id_ = utterance_id;
112  char_position_ = 0;
113  std::wstring merged_utterance = prefix + utterance_ + suffix;
114  prefix_len_ = prefix.size();
115
116  HRESULT result = speech_synthesizer_->Speak(
117      merged_utterance.c_str(),
118      SPF_ASYNC,
119      &stream_number_);
120  return (result == S_OK);
121}
122
123bool TtsPlatformImplWin::StopSpeaking() {
124  if (speech_synthesizer_.get()) {
125    // Clear the stream number so that any further events relating to this
126    // utterance are ignored.
127    stream_number_ = 0;
128
129    if (IsSpeaking()) {
130      // Stop speech by speaking the empty string with the purge flag.
131      speech_synthesizer_->Speak(L"", SPF_ASYNC | SPF_PURGEBEFORESPEAK, NULL);
132    }
133    if (paused_) {
134      speech_synthesizer_->Resume();
135      paused_ = false;
136    }
137  }
138  return true;
139}
140
141void TtsPlatformImplWin::Pause() {
142  if (speech_synthesizer_.get() && utterance_id_ && !paused_) {
143    speech_synthesizer_->Pause();
144    paused_ = true;
145    TtsController::GetInstance()->OnTtsEvent(
146        utterance_id_, TTS_EVENT_PAUSE, char_position_, "");
147  }
148}
149
150void TtsPlatformImplWin::Resume() {
151  if (speech_synthesizer_.get() && utterance_id_ && paused_) {
152    speech_synthesizer_->Resume();
153    paused_ = false;
154    TtsController::GetInstance()->OnTtsEvent(
155        utterance_id_, TTS_EVENT_RESUME, char_position_, "");
156  }
157}
158
159bool TtsPlatformImplWin::IsSpeaking() {
160  if (speech_synthesizer_.get()) {
161    SPVOICESTATUS status;
162    HRESULT result = speech_synthesizer_->GetStatus(&status, NULL);
163    if (result == S_OK) {
164      if (status.dwRunningState == 0 ||  // 0 == waiting to speak
165          status.dwRunningState == SPRS_IS_SPEAKING) {
166        return true;
167      }
168    }
169  }
170  return false;
171}
172
173void TtsPlatformImplWin::GetVoices(
174    std::vector<VoiceData>* out_voices) {
175  // TODO: get all voices, not just default voice.
176  // http://crbug.com/88059
177  out_voices->push_back(VoiceData());
178  VoiceData& voice = out_voices->back();
179  voice.native = true;
180  voice.name = "native";
181  voice.events.insert(TTS_EVENT_START);
182  voice.events.insert(TTS_EVENT_END);
183  voice.events.insert(TTS_EVENT_MARKER);
184  voice.events.insert(TTS_EVENT_WORD);
185  voice.events.insert(TTS_EVENT_SENTENCE);
186  voice.events.insert(TTS_EVENT_PAUSE);
187  voice.events.insert(TTS_EVENT_RESUME);
188}
189
190void TtsPlatformImplWin::OnSpeechEvent() {
191  TtsController* controller = TtsController::GetInstance();
192  SPEVENT event;
193  while (S_OK == speech_synthesizer_->GetEvents(1, &event, NULL)) {
194    if (event.ulStreamNum != stream_number_)
195      continue;
196
197    switch (event.eEventId) {
198    case SPEI_START_INPUT_STREAM:
199      controller->OnTtsEvent(
200          utterance_id_, TTS_EVENT_START, 0, std::string());
201      break;
202    case SPEI_END_INPUT_STREAM:
203      char_position_ = utterance_.size();
204      controller->OnTtsEvent(
205          utterance_id_, TTS_EVENT_END, char_position_, std::string());
206      break;
207    case SPEI_TTS_BOOKMARK:
208      controller->OnTtsEvent(
209          utterance_id_, TTS_EVENT_MARKER, char_position_, std::string());
210      break;
211    case SPEI_WORD_BOUNDARY:
212      char_position_ = static_cast<ULONG>(event.lParam) - prefix_len_;
213      controller->OnTtsEvent(
214          utterance_id_, TTS_EVENT_WORD, char_position_,
215          std::string());
216      break;
217    case SPEI_SENTENCE_BOUNDARY:
218      char_position_ = static_cast<ULONG>(event.lParam) - prefix_len_;
219      controller->OnTtsEvent(
220          utterance_id_, TTS_EVENT_SENTENCE, char_position_,
221          std::string());
222      break;
223    }
224  }
225}
226
227TtsPlatformImplWin::TtsPlatformImplWin()
228  : utterance_id_(0),
229    prefix_len_(0),
230    stream_number_(0),
231    char_position_(0),
232    paused_(false) {
233  speech_synthesizer_.CreateInstance(CLSID_SpVoice);
234  if (speech_synthesizer_.get()) {
235    ULONGLONG event_mask =
236        SPFEI(SPEI_START_INPUT_STREAM) |
237        SPFEI(SPEI_TTS_BOOKMARK) |
238        SPFEI(SPEI_WORD_BOUNDARY) |
239        SPFEI(SPEI_SENTENCE_BOUNDARY) |
240        SPFEI(SPEI_END_INPUT_STREAM);
241    speech_synthesizer_->SetInterest(event_mask, event_mask);
242    speech_synthesizer_->SetNotifyCallbackFunction(
243        TtsPlatformImplWin::SpeechEventCallback, 0, 0);
244  }
245}
246
247// static
248TtsPlatformImplWin* TtsPlatformImplWin::GetInstance() {
249  return Singleton<TtsPlatformImplWin,
250                   LeakySingletonTraits<TtsPlatformImplWin> >::get();
251}
252
253// static
254void TtsPlatformImplWin::SpeechEventCallback(
255    WPARAM w_param, LPARAM l_param) {
256  GetInstance()->OnSpeechEvent();
257}
258