1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "chrome/browser/speech/tts_controller.h" 6 7#include <string> 8#include <vector> 9 10#include "base/float_util.h" 11#include "base/values.h" 12#include "chrome/browser/extensions/extension_system.h" 13#include "chrome/browser/profiles/profile.h" 14#include "chrome/browser/speech/extension_api/tts_engine_extension_api.h" 15#include "chrome/browser/speech/extension_api/tts_extension_api.h" 16#include "chrome/browser/speech/tts_platform.h" 17#include "chrome/common/extensions/api/speech/tts_engine_manifest_handler.h" 18#include "chrome/common/extensions/extension.h" 19 20namespace { 21// A value to be used to indicate that there is no char index available. 22const int kInvalidCharIndex = -1; 23 24// Given a language/region code of the form 'fr-FR', returns just the basic 25// language portion, e.g. 'fr'. 26std::string TrimLanguageCode(std::string lang) { 27 if (lang.size() >= 5 && lang[2] == '-') 28 return lang.substr(0, 2); 29 else 30 return lang; 31} 32 33} // namespace 34 35bool IsFinalTtsEventType(TtsEventType event_type) { 36 return (event_type == TTS_EVENT_END || 37 event_type == TTS_EVENT_INTERRUPTED || 38 event_type == TTS_EVENT_CANCELLED || 39 event_type == TTS_EVENT_ERROR); 40} 41 42// 43// UtteranceContinuousParameters 44// 45 46 47UtteranceContinuousParameters::UtteranceContinuousParameters() 48 : rate(-1), 49 pitch(-1), 50 volume(-1) {} 51 52 53// 54// VoiceData 55// 56 57 58VoiceData::VoiceData() 59 : gender(TTS_GENDER_NONE), 60 native(false) {} 61 62VoiceData::~VoiceData() {} 63 64 65// 66// Utterance 67// 68 69// static 70int Utterance::next_utterance_id_ = 0; 71 72Utterance::Utterance(Profile* profile) 73 : profile_(profile), 74 id_(next_utterance_id_++), 75 src_id_(-1), 76 event_delegate_(NULL), 77 can_enqueue_(false), 78 char_index_(0), 79 finished_(false) { 80 options_.reset(new DictionaryValue()); 81} 82 83Utterance::~Utterance() { 84 DCHECK(finished_); 85} 86 87void Utterance::OnTtsEvent(TtsEventType event_type, 88 int char_index, 89 const std::string& error_message) { 90 if (char_index >= 0) 91 char_index_ = char_index; 92 if (IsFinalTtsEventType(event_type)) 93 finished_ = true; 94 95 if (event_delegate_) 96 event_delegate_->OnTtsEvent(this, event_type, char_index, error_message); 97 if (finished_) 98 event_delegate_ = NULL; 99} 100 101void Utterance::Finish() { 102 finished_ = true; 103} 104 105void Utterance::set_options(const Value* options) { 106 options_.reset(options->DeepCopy()); 107} 108 109// 110// TtsController 111// 112 113// static 114TtsController* TtsController::GetInstance() { 115 return Singleton<TtsController>::get(); 116} 117 118TtsController::TtsController() 119 : current_utterance_(NULL), 120 paused_(false), 121 platform_impl_(NULL) { 122} 123 124TtsController::~TtsController() { 125 if (current_utterance_) { 126 current_utterance_->Finish(); 127 delete current_utterance_; 128 } 129 130 // Clear any queued utterances too. 131 ClearUtteranceQueue(false); // Don't sent events. 132} 133 134void TtsController::SpeakOrEnqueue(Utterance* utterance) { 135 // If we're paused and we get an utterance that can't be queued, 136 // flush the queue but stay in the paused state. 137 if (paused_ && !utterance->can_enqueue()) { 138 Stop(); 139 paused_ = true; 140 return; 141 } 142 143 if (paused_ || (IsSpeaking() && utterance->can_enqueue())) { 144 utterance_queue_.push(utterance); 145 } else { 146 Stop(); 147 SpeakNow(utterance); 148 } 149} 150 151void TtsController::SpeakNow(Utterance* utterance) { 152 // Get all available voices and try to find a matching voice. 153 std::vector<VoiceData> voices; 154 GetVoices(utterance->profile(), &voices); 155 int index = GetMatchingVoice(utterance, voices); 156 157 // Select the matching voice, but if none was found, initialize an 158 // empty VoiceData with native = true, which will give the native 159 // speech synthesizer a chance to try to synthesize the utterance 160 // anyway. 161 VoiceData voice; 162 if (index >= 0 && index < static_cast<int>(voices.size())) 163 voice = voices[index]; 164 else 165 voice.native = true; 166 167 if (!voice.native) { 168#if !defined(OS_ANDROID) 169 DCHECK(!voice.extension_id.empty()); 170 current_utterance_ = utterance; 171 utterance->set_extension_id(voice.extension_id); 172 ExtensionTtsEngineSpeak(utterance, voice); 173 bool sends_end_event = 174 voice.events.find(TTS_EVENT_END) != voice.events.end(); 175 if (!sends_end_event) { 176 utterance->Finish(); 177 delete utterance; 178 current_utterance_ = NULL; 179 SpeakNextUtterance(); 180 } 181#endif 182 } else { 183 GetPlatformImpl()->clear_error(); 184 bool success = GetPlatformImpl()->Speak( 185 utterance->id(), 186 utterance->text(), 187 utterance->lang(), 188 voice, 189 utterance->continuous_parameters()); 190 191 // If the native voice wasn't able to process this speech, see if 192 // the browser has built-in TTS that isn't loaded yet. 193 if (!success && 194 GetPlatformImpl()->LoadBuiltInTtsExtension(utterance->profile())) { 195 utterance_queue_.push(utterance); 196 return; 197 } 198 199 if (!success) { 200 utterance->OnTtsEvent(TTS_EVENT_ERROR, kInvalidCharIndex, 201 GetPlatformImpl()->error()); 202 delete utterance; 203 return; 204 } 205 current_utterance_ = utterance; 206 } 207} 208 209void TtsController::Stop() { 210 paused_ = false; 211 if (current_utterance_ && !current_utterance_->extension_id().empty()) { 212#if !defined(OS_ANDROID) 213 ExtensionTtsEngineStop(current_utterance_); 214#endif 215 } else { 216 GetPlatformImpl()->clear_error(); 217 GetPlatformImpl()->StopSpeaking(); 218 } 219 220 if (current_utterance_) 221 current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex, 222 std::string()); 223 FinishCurrentUtterance(); 224 ClearUtteranceQueue(true); // Send events. 225} 226 227void TtsController::Pause() { 228 paused_ = true; 229 if (current_utterance_ && !current_utterance_->extension_id().empty()) { 230#if !defined(OS_ANDROID) 231 ExtensionTtsEnginePause(current_utterance_); 232#endif 233 } else if (current_utterance_) { 234 GetPlatformImpl()->clear_error(); 235 GetPlatformImpl()->Pause(); 236 } 237} 238 239void TtsController::Resume() { 240 paused_ = false; 241 if (current_utterance_ && !current_utterance_->extension_id().empty()) { 242#if !defined(OS_ANDROID) 243 ExtensionTtsEngineResume(current_utterance_); 244#endif 245 } else if (current_utterance_) { 246 GetPlatformImpl()->clear_error(); 247 GetPlatformImpl()->Resume(); 248 } else { 249 SpeakNextUtterance(); 250 } 251} 252 253void TtsController::OnTtsEvent(int utterance_id, 254 TtsEventType event_type, 255 int char_index, 256 const std::string& error_message) { 257 // We may sometimes receive completion callbacks "late", after we've 258 // already finished the utterance (for example because another utterance 259 // interrupted or we got a call to Stop). This is normal and we can 260 // safely just ignore these events. 261 if (!current_utterance_ || utterance_id != current_utterance_->id()) 262 return; 263 264 current_utterance_->OnTtsEvent(event_type, char_index, error_message); 265 if (current_utterance_->finished()) { 266 FinishCurrentUtterance(); 267 SpeakNextUtterance(); 268 } 269} 270 271void TtsController::GetVoices(Profile* profile, 272 std::vector<VoiceData>* out_voices) { 273#if !defined(OS_ANDROID) 274 if (profile) 275 GetExtensionVoices(profile, out_voices); 276#endif 277 278 TtsPlatformImpl* platform_impl = GetPlatformImpl(); 279 if (platform_impl && platform_impl->PlatformImplAvailable()) 280 platform_impl->GetVoices(out_voices); 281} 282 283bool TtsController::IsSpeaking() { 284 return current_utterance_ != NULL || GetPlatformImpl()->IsSpeaking(); 285} 286 287void TtsController::FinishCurrentUtterance() { 288 if (current_utterance_) { 289 if (!current_utterance_->finished()) 290 current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex, 291 std::string()); 292 delete current_utterance_; 293 current_utterance_ = NULL; 294 } 295} 296 297void TtsController::SpeakNextUtterance() { 298 if (paused_) 299 return; 300 301 // Start speaking the next utterance in the queue. Keep trying in case 302 // one fails but there are still more in the queue to try. 303 while (!utterance_queue_.empty() && !current_utterance_) { 304 Utterance* utterance = utterance_queue_.front(); 305 utterance_queue_.pop(); 306 SpeakNow(utterance); 307 } 308} 309 310void TtsController::RetrySpeakingQueuedUtterances() { 311 if (current_utterance_ == NULL && !utterance_queue_.empty()) 312 SpeakNextUtterance(); 313} 314 315void TtsController::ClearUtteranceQueue(bool send_events) { 316 while (!utterance_queue_.empty()) { 317 Utterance* utterance = utterance_queue_.front(); 318 utterance_queue_.pop(); 319 if (send_events) 320 utterance->OnTtsEvent(TTS_EVENT_CANCELLED, kInvalidCharIndex, 321 std::string()); 322 else 323 utterance->Finish(); 324 delete utterance; 325 } 326} 327 328void TtsController::SetPlatformImpl( 329 TtsPlatformImpl* platform_impl) { 330 platform_impl_ = platform_impl; 331} 332 333int TtsController::QueueSize() { 334 return static_cast<int>(utterance_queue_.size()); 335} 336 337TtsPlatformImpl* TtsController::GetPlatformImpl() { 338 if (!platform_impl_) 339 platform_impl_ = TtsPlatformImpl::GetInstance(); 340 return platform_impl_; 341} 342 343int TtsController::GetMatchingVoice( 344 const Utterance* utterance, std::vector<VoiceData>& voices) { 345 // Make two passes: the first time, do strict language matching 346 // ('fr-FR' does not match 'fr-CA'). The second time, do prefix 347 // language matching ('fr-FR' matches 'fr' and 'fr-CA') 348 for (int pass = 0; pass < 2; ++pass) { 349 for (size_t i = 0; i < voices.size(); ++i) { 350 const VoiceData& voice = voices[i]; 351 352 if (!utterance->extension_id().empty() && 353 utterance->extension_id() != voice.extension_id) { 354 continue; 355 } 356 357 if (!voice.name.empty() && 358 !utterance->voice_name().empty() && 359 voice.name != utterance->voice_name()) { 360 continue; 361 } 362 if (!voice.lang.empty() && !utterance->lang().empty()) { 363 std::string voice_lang = voice.lang; 364 std::string utterance_lang = utterance->lang(); 365 if (pass == 1) { 366 voice_lang = TrimLanguageCode(voice_lang); 367 utterance_lang = TrimLanguageCode(utterance_lang); 368 } 369 if (voice_lang != utterance_lang) { 370 continue; 371 } 372 } 373 if (voice.gender != TTS_GENDER_NONE && 374 utterance->gender() != TTS_GENDER_NONE && 375 voice.gender != utterance->gender()) { 376 continue; 377 } 378 379 if (utterance->required_event_types().size() > 0) { 380 bool has_all_required_event_types = true; 381 for (std::set<TtsEventType>::const_iterator iter = 382 utterance->required_event_types().begin(); 383 iter != utterance->required_event_types().end(); 384 ++iter) { 385 if (voice.events.find(*iter) == voice.events.end()) { 386 has_all_required_event_types = false; 387 break; 388 } 389 } 390 if (!has_all_required_event_types) 391 continue; 392 } 393 394 return static_cast<int>(i); 395 } 396 } 397 398 return -1; 399} 400 401void TtsController::VoicesChanged() { 402 for (std::set<VoicesChangedDelegate*>::iterator iter = 403 voices_changed_delegates_.begin(); 404 iter != voices_changed_delegates_.end(); ++iter) { 405 (*iter)->OnVoicesChanged(); 406 } 407} 408 409void TtsController::AddVoicesChangedDelegate(VoicesChangedDelegate* delegate) { 410 voices_changed_delegates_.insert(delegate); 411} 412 413void TtsController::RemoveVoicesChangedDelegate( 414 VoicesChangedDelegate* delegate) { 415 voices_changed_delegates_.erase(delegate); 416} 417 418