tts_controller.cc revision d57369da7c6519fef57db42085f7b42d4c8845c1
1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/speech/tts_controller.h"
6
7#include <string>
8#include <vector>
9
10#include "base/float_util.h"
11#include "base/values.h"
12#include "chrome/browser/extensions/extension_system.h"
13#include "chrome/browser/profiles/profile.h"
14#include "chrome/browser/speech/extension_api/tts_engine_extension_api.h"
15#include "chrome/browser/speech/extension_api/tts_extension_api.h"
16#include "chrome/browser/speech/tts_platform.h"
17#include "chrome/common/extensions/api/speech/tts_engine_manifest_handler.h"
18#include "extensions/common/extension.h"
19
20namespace {
21// A value to be used to indicate that there is no char index available.
22const int kInvalidCharIndex = -1;
23
24// Given a language/region code of the form 'fr-FR', returns just the basic
25// language portion, e.g. 'fr'.
26std::string TrimLanguageCode(std::string lang) {
27  if (lang.size() >= 5 && lang[2] == '-')
28    return lang.substr(0, 2);
29  else
30    return lang;
31}
32
33}  // namespace
34
35bool IsFinalTtsEventType(TtsEventType event_type) {
36  return (event_type == TTS_EVENT_END ||
37          event_type == TTS_EVENT_INTERRUPTED ||
38          event_type == TTS_EVENT_CANCELLED ||
39          event_type == TTS_EVENT_ERROR);
40}
41
42//
43// UtteranceContinuousParameters
44//
45
46
47UtteranceContinuousParameters::UtteranceContinuousParameters()
48    : rate(-1),
49      pitch(-1),
50      volume(-1) {}
51
52
53//
54// VoiceData
55//
56
57
58VoiceData::VoiceData()
59    : gender(TTS_GENDER_NONE),
60      remote(false),
61      native(false) {}
62
63VoiceData::~VoiceData() {}
64
65
66//
67// Utterance
68//
69
70// static
71int Utterance::next_utterance_id_ = 0;
72
73Utterance::Utterance(Profile* profile)
74    : profile_(profile),
75      id_(next_utterance_id_++),
76      src_id_(-1),
77      event_delegate_(NULL),
78      gender_(TTS_GENDER_NONE),
79      can_enqueue_(false),
80      char_index_(0),
81      finished_(false) {
82  options_.reset(new DictionaryValue());
83}
84
85Utterance::~Utterance() {
86  DCHECK(finished_);
87}
88
89void Utterance::OnTtsEvent(TtsEventType event_type,
90                           int char_index,
91                           const std::string& error_message) {
92  if (char_index >= 0)
93    char_index_ = char_index;
94  if (IsFinalTtsEventType(event_type))
95    finished_ = true;
96
97  if (event_delegate_)
98    event_delegate_->OnTtsEvent(this, event_type, char_index, error_message);
99  if (finished_)
100    event_delegate_ = NULL;
101}
102
103void Utterance::Finish() {
104  finished_ = true;
105}
106
107void Utterance::set_options(const Value* options) {
108  options_.reset(options->DeepCopy());
109}
110
111//
112// TtsController
113//
114
115// static
116TtsController* TtsController::GetInstance() {
117  return Singleton<TtsController>::get();
118}
119
120TtsController::TtsController()
121    : current_utterance_(NULL),
122      paused_(false),
123      platform_impl_(NULL) {
124}
125
126TtsController::~TtsController() {
127  if (current_utterance_) {
128    current_utterance_->Finish();
129    delete current_utterance_;
130  }
131
132  // Clear any queued utterances too.
133  ClearUtteranceQueue(false);  // Don't sent events.
134}
135
136void TtsController::SpeakOrEnqueue(Utterance* utterance) {
137  // If we're paused and we get an utterance that can't be queued,
138  // flush the queue but stay in the paused state.
139  if (paused_ && !utterance->can_enqueue()) {
140    Stop();
141    paused_ = true;
142    return;
143  }
144
145  if (paused_ || (IsSpeaking() && utterance->can_enqueue())) {
146    utterance_queue_.push(utterance);
147  } else {
148    Stop();
149    SpeakNow(utterance);
150  }
151}
152
153void TtsController::SpeakNow(Utterance* utterance) {
154  // Get all available voices and try to find a matching voice.
155  std::vector<VoiceData> voices;
156  GetVoices(utterance->profile(), &voices);
157  int index = GetMatchingVoice(utterance, voices);
158
159  // Select the matching voice, but if none was found, initialize an
160  // empty VoiceData with native = true, which will give the native
161  // speech synthesizer a chance to try to synthesize the utterance
162  // anyway.
163  VoiceData voice;
164  if (index >= 0 && index < static_cast<int>(voices.size()))
165    voice = voices[index];
166  else
167    voice.native = true;
168
169  GetPlatformImpl()->WillSpeakUtteranceWithVoice(utterance, voice);
170
171  if (!voice.native) {
172#if !defined(OS_ANDROID)
173    DCHECK(!voice.extension_id.empty());
174    current_utterance_ = utterance;
175    utterance->set_extension_id(voice.extension_id);
176    ExtensionTtsEngineSpeak(utterance, voice);
177    bool sends_end_event =
178        voice.events.find(TTS_EVENT_END) != voice.events.end();
179    if (!sends_end_event) {
180      utterance->Finish();
181      delete utterance;
182      current_utterance_ = NULL;
183      SpeakNextUtterance();
184    }
185#endif
186  } else {
187    // It's possible for certain platforms to send start events immediately
188    // during |speak|.
189    current_utterance_ = utterance;
190    GetPlatformImpl()->clear_error();
191    bool success = GetPlatformImpl()->Speak(
192        utterance->id(),
193        utterance->text(),
194        utterance->lang(),
195        voice,
196        utterance->continuous_parameters());
197    if (!success)
198      current_utterance_ = NULL;
199
200    if (!success) {
201      utterance->OnTtsEvent(TTS_EVENT_ERROR, kInvalidCharIndex,
202                            GetPlatformImpl()->error());
203      delete utterance;
204      return;
205    }
206  }
207}
208
209void TtsController::Stop() {
210  paused_ = false;
211  if (current_utterance_ && !current_utterance_->extension_id().empty()) {
212#if !defined(OS_ANDROID)
213    ExtensionTtsEngineStop(current_utterance_);
214#endif
215  } else {
216    GetPlatformImpl()->clear_error();
217    GetPlatformImpl()->StopSpeaking();
218  }
219
220  if (current_utterance_)
221    current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex,
222                                   std::string());
223  FinishCurrentUtterance();
224  ClearUtteranceQueue(true);  // Send events.
225}
226
227void TtsController::Pause() {
228  paused_ = true;
229  if (current_utterance_ && !current_utterance_->extension_id().empty()) {
230#if !defined(OS_ANDROID)
231    ExtensionTtsEnginePause(current_utterance_);
232#endif
233  } else if (current_utterance_) {
234    GetPlatformImpl()->clear_error();
235    GetPlatformImpl()->Pause();
236  }
237}
238
239void TtsController::Resume() {
240  paused_ = false;
241  if (current_utterance_ && !current_utterance_->extension_id().empty()) {
242#if !defined(OS_ANDROID)
243    ExtensionTtsEngineResume(current_utterance_);
244#endif
245  } else if (current_utterance_) {
246    GetPlatformImpl()->clear_error();
247    GetPlatformImpl()->Resume();
248  } else {
249    SpeakNextUtterance();
250  }
251}
252
253void TtsController::OnTtsEvent(int utterance_id,
254                                        TtsEventType event_type,
255                                        int char_index,
256                                        const std::string& error_message) {
257  // We may sometimes receive completion callbacks "late", after we've
258  // already finished the utterance (for example because another utterance
259  // interrupted or we got a call to Stop). This is normal and we can
260  // safely just ignore these events.
261  if (!current_utterance_ || utterance_id != current_utterance_->id()) {
262    return;
263  }
264  current_utterance_->OnTtsEvent(event_type, char_index, error_message);
265  if (current_utterance_->finished()) {
266    FinishCurrentUtterance();
267    SpeakNextUtterance();
268  }
269}
270
271void TtsController::GetVoices(Profile* profile,
272                              std::vector<VoiceData>* out_voices) {
273#if !defined(OS_ANDROID)
274  if (profile)
275    GetExtensionVoices(profile, out_voices);
276#endif
277
278  TtsPlatformImpl* platform_impl = GetPlatformImpl();
279  if (platform_impl && platform_impl->PlatformImplAvailable())
280    platform_impl->GetVoices(out_voices);
281}
282
283bool TtsController::IsSpeaking() {
284  return current_utterance_ != NULL || GetPlatformImpl()->IsSpeaking();
285}
286
287void TtsController::FinishCurrentUtterance() {
288  if (current_utterance_) {
289    if (!current_utterance_->finished())
290      current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex,
291                                     std::string());
292    delete current_utterance_;
293    current_utterance_ = NULL;
294  }
295}
296
297void TtsController::SpeakNextUtterance() {
298  if (paused_)
299    return;
300
301  // Start speaking the next utterance in the queue.  Keep trying in case
302  // one fails but there are still more in the queue to try.
303  while (!utterance_queue_.empty() && !current_utterance_) {
304    Utterance* utterance = utterance_queue_.front();
305    utterance_queue_.pop();
306    SpeakNow(utterance);
307  }
308}
309
310void TtsController::ClearUtteranceQueue(bool send_events) {
311  while (!utterance_queue_.empty()) {
312    Utterance* utterance = utterance_queue_.front();
313    utterance_queue_.pop();
314    if (send_events)
315      utterance->OnTtsEvent(TTS_EVENT_CANCELLED, kInvalidCharIndex,
316                            std::string());
317    else
318      utterance->Finish();
319    delete utterance;
320  }
321}
322
323void TtsController::SetPlatformImpl(
324    TtsPlatformImpl* platform_impl) {
325  platform_impl_ = platform_impl;
326}
327
328int TtsController::QueueSize() {
329  return static_cast<int>(utterance_queue_.size());
330}
331
332TtsPlatformImpl* TtsController::GetPlatformImpl() {
333  if (!platform_impl_)
334    platform_impl_ = TtsPlatformImpl::GetInstance();
335  return platform_impl_;
336}
337
338int TtsController::GetMatchingVoice(
339    const Utterance* utterance, std::vector<VoiceData>& voices) {
340  // Make two passes: the first time, do strict language matching
341  // ('fr-FR' does not match 'fr-CA'). The second time, do prefix
342  // language matching ('fr-FR' matches 'fr' and 'fr-CA')
343  for (int pass = 0; pass < 2; ++pass) {
344    for (size_t i = 0; i < voices.size(); ++i) {
345      const VoiceData& voice = voices[i];
346
347      if (!utterance->extension_id().empty() &&
348          utterance->extension_id() != voice.extension_id) {
349        continue;
350      }
351
352      if (!voice.name.empty() &&
353          !utterance->voice_name().empty() &&
354          voice.name != utterance->voice_name()) {
355        continue;
356      }
357      if (!voice.lang.empty() && !utterance->lang().empty()) {
358        std::string voice_lang = voice.lang;
359        std::string utterance_lang = utterance->lang();
360        if (pass == 1) {
361          voice_lang = TrimLanguageCode(voice_lang);
362          utterance_lang = TrimLanguageCode(utterance_lang);
363        }
364        if (voice_lang != utterance_lang) {
365          continue;
366        }
367      }
368      if (voice.gender != TTS_GENDER_NONE &&
369          utterance->gender() != TTS_GENDER_NONE &&
370          voice.gender != utterance->gender()) {
371        continue;
372      }
373
374      if (utterance->required_event_types().size() > 0) {
375        bool has_all_required_event_types = true;
376        for (std::set<TtsEventType>::const_iterator iter =
377                 utterance->required_event_types().begin();
378             iter != utterance->required_event_types().end();
379             ++iter) {
380          if (voice.events.find(*iter) == voice.events.end()) {
381            has_all_required_event_types = false;
382            break;
383          }
384        }
385        if (!has_all_required_event_types)
386          continue;
387      }
388
389      return static_cast<int>(i);
390    }
391  }
392
393  return -1;
394}
395
396void TtsController::VoicesChanged() {
397  for (std::set<VoicesChangedDelegate*>::iterator iter =
398           voices_changed_delegates_.begin();
399       iter != voices_changed_delegates_.end(); ++iter) {
400    (*iter)->OnVoicesChanged();
401  }
402}
403
404void TtsController::AddVoicesChangedDelegate(VoicesChangedDelegate* delegate) {
405  voices_changed_delegates_.insert(delegate);
406}
407
408void TtsController::RemoveVoicesChangedDelegate(
409    VoicesChangedDelegate* delegate) {
410  voices_changed_delegates_.erase(delegate);
411}
412