1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/speech/tts_controller.h"
6
7#include <string>
8#include <vector>
9
10#include "base/float_util.h"
11#include "base/values.h"
12#include "chrome/browser/extensions/extension_system.h"
13#include "chrome/browser/profiles/profile.h"
14#include "chrome/browser/speech/extension_api/tts_engine_extension_api.h"
15#include "chrome/browser/speech/extension_api/tts_extension_api.h"
16#include "chrome/browser/speech/tts_platform.h"
17#include "chrome/common/extensions/api/speech/tts_engine_manifest_handler.h"
18#include "chrome/common/extensions/extension.h"
19
20namespace {
21// A value to be used to indicate that there is no char index available.
22const int kInvalidCharIndex = -1;
23
24// Given a language/region code of the form 'fr-FR', returns just the basic
25// language portion, e.g. 'fr'.
26std::string TrimLanguageCode(std::string lang) {
27  if (lang.size() >= 5 && lang[2] == '-')
28    return lang.substr(0, 2);
29  else
30    return lang;
31}
32
33}  // namespace
34
35bool IsFinalTtsEventType(TtsEventType event_type) {
36  return (event_type == TTS_EVENT_END ||
37          event_type == TTS_EVENT_INTERRUPTED ||
38          event_type == TTS_EVENT_CANCELLED ||
39          event_type == TTS_EVENT_ERROR);
40}
41
42//
43// UtteranceContinuousParameters
44//
45
46
47UtteranceContinuousParameters::UtteranceContinuousParameters()
48    : rate(-1),
49      pitch(-1),
50      volume(-1) {}
51
52
53//
54// VoiceData
55//
56
57
58VoiceData::VoiceData()
59    : gender(TTS_GENDER_NONE),
60      native(false) {}
61
62VoiceData::~VoiceData() {}
63
64
65//
66// Utterance
67//
68
69// static
70int Utterance::next_utterance_id_ = 0;
71
72Utterance::Utterance(Profile* profile)
73    : profile_(profile),
74      id_(next_utterance_id_++),
75      src_id_(-1),
76      event_delegate_(NULL),
77      can_enqueue_(false),
78      char_index_(0),
79      finished_(false) {
80  options_.reset(new DictionaryValue());
81}
82
83Utterance::~Utterance() {
84  DCHECK(finished_);
85}
86
87void Utterance::OnTtsEvent(TtsEventType event_type,
88                           int char_index,
89                           const std::string& error_message) {
90  if (char_index >= 0)
91    char_index_ = char_index;
92  if (IsFinalTtsEventType(event_type))
93    finished_ = true;
94
95  if (event_delegate_)
96    event_delegate_->OnTtsEvent(this, event_type, char_index, error_message);
97  if (finished_)
98    event_delegate_ = NULL;
99}
100
101void Utterance::Finish() {
102  finished_ = true;
103}
104
105void Utterance::set_options(const Value* options) {
106  options_.reset(options->DeepCopy());
107}
108
109//
110// TtsController
111//
112
113// static
114TtsController* TtsController::GetInstance() {
115  return Singleton<TtsController>::get();
116}
117
118TtsController::TtsController()
119    : current_utterance_(NULL),
120      paused_(false),
121      platform_impl_(NULL) {
122}
123
124TtsController::~TtsController() {
125  if (current_utterance_) {
126    current_utterance_->Finish();
127    delete current_utterance_;
128  }
129
130  // Clear any queued utterances too.
131  ClearUtteranceQueue(false);  // Don't sent events.
132}
133
134void TtsController::SpeakOrEnqueue(Utterance* utterance) {
135  // If we're paused and we get an utterance that can't be queued,
136  // flush the queue but stay in the paused state.
137  if (paused_ && !utterance->can_enqueue()) {
138    Stop();
139    paused_ = true;
140    return;
141  }
142
143  if (paused_ || (IsSpeaking() && utterance->can_enqueue())) {
144    utterance_queue_.push(utterance);
145  } else {
146    Stop();
147    SpeakNow(utterance);
148  }
149}
150
151void TtsController::SpeakNow(Utterance* utterance) {
152  // Get all available voices and try to find a matching voice.
153  std::vector<VoiceData> voices;
154  GetVoices(utterance->profile(), &voices);
155  int index = GetMatchingVoice(utterance, voices);
156
157  // Select the matching voice, but if none was found, initialize an
158  // empty VoiceData with native = true, which will give the native
159  // speech synthesizer a chance to try to synthesize the utterance
160  // anyway.
161  VoiceData voice;
162  if (index >= 0 && index < static_cast<int>(voices.size()))
163    voice = voices[index];
164  else
165    voice.native = true;
166
167  if (!voice.native) {
168#if !defined(OS_ANDROID)
169    DCHECK(!voice.extension_id.empty());
170    current_utterance_ = utterance;
171    utterance->set_extension_id(voice.extension_id);
172    ExtensionTtsEngineSpeak(utterance, voice);
173    bool sends_end_event =
174        voice.events.find(TTS_EVENT_END) != voice.events.end();
175    if (!sends_end_event) {
176      utterance->Finish();
177      delete utterance;
178      current_utterance_ = NULL;
179      SpeakNextUtterance();
180    }
181#endif
182  } else {
183    GetPlatformImpl()->clear_error();
184    bool success = GetPlatformImpl()->Speak(
185        utterance->id(),
186        utterance->text(),
187        utterance->lang(),
188        voice,
189        utterance->continuous_parameters());
190
191    // If the native voice wasn't able to process this speech, see if
192    // the browser has built-in TTS that isn't loaded yet.
193    if (!success &&
194        GetPlatformImpl()->LoadBuiltInTtsExtension(utterance->profile())) {
195      utterance_queue_.push(utterance);
196      return;
197    }
198
199    if (!success) {
200      utterance->OnTtsEvent(TTS_EVENT_ERROR, kInvalidCharIndex,
201                            GetPlatformImpl()->error());
202      delete utterance;
203      return;
204    }
205    current_utterance_ = utterance;
206  }
207}
208
209void TtsController::Stop() {
210  paused_ = false;
211  if (current_utterance_ && !current_utterance_->extension_id().empty()) {
212#if !defined(OS_ANDROID)
213    ExtensionTtsEngineStop(current_utterance_);
214#endif
215  } else {
216    GetPlatformImpl()->clear_error();
217    GetPlatformImpl()->StopSpeaking();
218  }
219
220  if (current_utterance_)
221    current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex,
222                                   std::string());
223  FinishCurrentUtterance();
224  ClearUtteranceQueue(true);  // Send events.
225}
226
227void TtsController::Pause() {
228  paused_ = true;
229  if (current_utterance_ && !current_utterance_->extension_id().empty()) {
230#if !defined(OS_ANDROID)
231    ExtensionTtsEnginePause(current_utterance_);
232#endif
233  } else if (current_utterance_) {
234    GetPlatformImpl()->clear_error();
235    GetPlatformImpl()->Pause();
236  }
237}
238
239void TtsController::Resume() {
240  paused_ = false;
241  if (current_utterance_ && !current_utterance_->extension_id().empty()) {
242#if !defined(OS_ANDROID)
243    ExtensionTtsEngineResume(current_utterance_);
244#endif
245  } else if (current_utterance_) {
246    GetPlatformImpl()->clear_error();
247    GetPlatformImpl()->Resume();
248  } else {
249    SpeakNextUtterance();
250  }
251}
252
253void TtsController::OnTtsEvent(int utterance_id,
254                                        TtsEventType event_type,
255                                        int char_index,
256                                        const std::string& error_message) {
257  // We may sometimes receive completion callbacks "late", after we've
258  // already finished the utterance (for example because another utterance
259  // interrupted or we got a call to Stop). This is normal and we can
260  // safely just ignore these events.
261  if (!current_utterance_ || utterance_id != current_utterance_->id())
262    return;
263
264  current_utterance_->OnTtsEvent(event_type, char_index, error_message);
265  if (current_utterance_->finished()) {
266    FinishCurrentUtterance();
267    SpeakNextUtterance();
268  }
269}
270
271void TtsController::GetVoices(Profile* profile,
272                              std::vector<VoiceData>* out_voices) {
273#if !defined(OS_ANDROID)
274  if (profile)
275    GetExtensionVoices(profile, out_voices);
276#endif
277
278  TtsPlatformImpl* platform_impl = GetPlatformImpl();
279  if (platform_impl && platform_impl->PlatformImplAvailable())
280    platform_impl->GetVoices(out_voices);
281}
282
283bool TtsController::IsSpeaking() {
284  return current_utterance_ != NULL || GetPlatformImpl()->IsSpeaking();
285}
286
287void TtsController::FinishCurrentUtterance() {
288  if (current_utterance_) {
289    if (!current_utterance_->finished())
290      current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex,
291                                     std::string());
292    delete current_utterance_;
293    current_utterance_ = NULL;
294  }
295}
296
297void TtsController::SpeakNextUtterance() {
298  if (paused_)
299    return;
300
301  // Start speaking the next utterance in the queue.  Keep trying in case
302  // one fails but there are still more in the queue to try.
303  while (!utterance_queue_.empty() && !current_utterance_) {
304    Utterance* utterance = utterance_queue_.front();
305    utterance_queue_.pop();
306    SpeakNow(utterance);
307  }
308}
309
310void TtsController::RetrySpeakingQueuedUtterances() {
311  if (current_utterance_ == NULL && !utterance_queue_.empty())
312    SpeakNextUtterance();
313}
314
315void TtsController::ClearUtteranceQueue(bool send_events) {
316  while (!utterance_queue_.empty()) {
317    Utterance* utterance = utterance_queue_.front();
318    utterance_queue_.pop();
319    if (send_events)
320      utterance->OnTtsEvent(TTS_EVENT_CANCELLED, kInvalidCharIndex,
321                            std::string());
322    else
323      utterance->Finish();
324    delete utterance;
325  }
326}
327
328void TtsController::SetPlatformImpl(
329    TtsPlatformImpl* platform_impl) {
330  platform_impl_ = platform_impl;
331}
332
333int TtsController::QueueSize() {
334  return static_cast<int>(utterance_queue_.size());
335}
336
337TtsPlatformImpl* TtsController::GetPlatformImpl() {
338  if (!platform_impl_)
339    platform_impl_ = TtsPlatformImpl::GetInstance();
340  return platform_impl_;
341}
342
343int TtsController::GetMatchingVoice(
344    const Utterance* utterance, std::vector<VoiceData>& voices) {
345  // Make two passes: the first time, do strict language matching
346  // ('fr-FR' does not match 'fr-CA'). The second time, do prefix
347  // language matching ('fr-FR' matches 'fr' and 'fr-CA')
348  for (int pass = 0; pass < 2; ++pass) {
349    for (size_t i = 0; i < voices.size(); ++i) {
350      const VoiceData& voice = voices[i];
351
352      if (!utterance->extension_id().empty() &&
353          utterance->extension_id() != voice.extension_id) {
354        continue;
355      }
356
357      if (!voice.name.empty() &&
358          !utterance->voice_name().empty() &&
359          voice.name != utterance->voice_name()) {
360        continue;
361      }
362      if (!voice.lang.empty() && !utterance->lang().empty()) {
363        std::string voice_lang = voice.lang;
364        std::string utterance_lang = utterance->lang();
365        if (pass == 1) {
366          voice_lang = TrimLanguageCode(voice_lang);
367          utterance_lang = TrimLanguageCode(utterance_lang);
368        }
369        if (voice_lang != utterance_lang) {
370          continue;
371        }
372      }
373      if (voice.gender != TTS_GENDER_NONE &&
374          utterance->gender() != TTS_GENDER_NONE &&
375          voice.gender != utterance->gender()) {
376        continue;
377      }
378
379      if (utterance->required_event_types().size() > 0) {
380        bool has_all_required_event_types = true;
381        for (std::set<TtsEventType>::const_iterator iter =
382                 utterance->required_event_types().begin();
383             iter != utterance->required_event_types().end();
384             ++iter) {
385          if (voice.events.find(*iter) == voice.events.end()) {
386            has_all_required_event_types = false;
387            break;
388          }
389        }
390        if (!has_all_required_event_types)
391          continue;
392      }
393
394      return static_cast<int>(i);
395    }
396  }
397
398  return -1;
399}
400
401void TtsController::VoicesChanged() {
402  for (std::set<VoicesChangedDelegate*>::iterator iter =
403           voices_changed_delegates_.begin();
404       iter != voices_changed_delegates_.end(); ++iter) {
405    (*iter)->OnVoicesChanged();
406  }
407}
408
409void TtsController::AddVoicesChangedDelegate(VoicesChangedDelegate* delegate) {
410  voices_changed_delegates_.insert(delegate);
411}
412
413void TtsController::RemoveVoicesChangedDelegate(
414    VoicesChangedDelegate* delegate) {
415  voices_changed_delegates_.erase(delegate);
416}
417
418