tts_controller.cc revision d57369da7c6519fef57db42085f7b42d4c8845c1
1// Copyright (c) 2012 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "chrome/browser/speech/tts_controller.h" 6 7#include <string> 8#include <vector> 9 10#include "base/float_util.h" 11#include "base/values.h" 12#include "chrome/browser/extensions/extension_system.h" 13#include "chrome/browser/profiles/profile.h" 14#include "chrome/browser/speech/extension_api/tts_engine_extension_api.h" 15#include "chrome/browser/speech/extension_api/tts_extension_api.h" 16#include "chrome/browser/speech/tts_platform.h" 17#include "chrome/common/extensions/api/speech/tts_engine_manifest_handler.h" 18#include "extensions/common/extension.h" 19 20namespace { 21// A value to be used to indicate that there is no char index available. 22const int kInvalidCharIndex = -1; 23 24// Given a language/region code of the form 'fr-FR', returns just the basic 25// language portion, e.g. 'fr'. 26std::string TrimLanguageCode(std::string lang) { 27 if (lang.size() >= 5 && lang[2] == '-') 28 return lang.substr(0, 2); 29 else 30 return lang; 31} 32 33} // namespace 34 35bool IsFinalTtsEventType(TtsEventType event_type) { 36 return (event_type == TTS_EVENT_END || 37 event_type == TTS_EVENT_INTERRUPTED || 38 event_type == TTS_EVENT_CANCELLED || 39 event_type == TTS_EVENT_ERROR); 40} 41 42// 43// UtteranceContinuousParameters 44// 45 46 47UtteranceContinuousParameters::UtteranceContinuousParameters() 48 : rate(-1), 49 pitch(-1), 50 volume(-1) {} 51 52 53// 54// VoiceData 55// 56 57 58VoiceData::VoiceData() 59 : gender(TTS_GENDER_NONE), 60 remote(false), 61 native(false) {} 62 63VoiceData::~VoiceData() {} 64 65 66// 67// Utterance 68// 69 70// static 71int Utterance::next_utterance_id_ = 0; 72 73Utterance::Utterance(Profile* profile) 74 : profile_(profile), 75 id_(next_utterance_id_++), 76 src_id_(-1), 77 event_delegate_(NULL), 78 gender_(TTS_GENDER_NONE), 79 can_enqueue_(false), 80 char_index_(0), 81 finished_(false) { 82 options_.reset(new DictionaryValue()); 83} 84 85Utterance::~Utterance() { 86 DCHECK(finished_); 87} 88 89void Utterance::OnTtsEvent(TtsEventType event_type, 90 int char_index, 91 const std::string& error_message) { 92 if (char_index >= 0) 93 char_index_ = char_index; 94 if (IsFinalTtsEventType(event_type)) 95 finished_ = true; 96 97 if (event_delegate_) 98 event_delegate_->OnTtsEvent(this, event_type, char_index, error_message); 99 if (finished_) 100 event_delegate_ = NULL; 101} 102 103void Utterance::Finish() { 104 finished_ = true; 105} 106 107void Utterance::set_options(const Value* options) { 108 options_.reset(options->DeepCopy()); 109} 110 111// 112// TtsController 113// 114 115// static 116TtsController* TtsController::GetInstance() { 117 return Singleton<TtsController>::get(); 118} 119 120TtsController::TtsController() 121 : current_utterance_(NULL), 122 paused_(false), 123 platform_impl_(NULL) { 124} 125 126TtsController::~TtsController() { 127 if (current_utterance_) { 128 current_utterance_->Finish(); 129 delete current_utterance_; 130 } 131 132 // Clear any queued utterances too. 133 ClearUtteranceQueue(false); // Don't sent events. 134} 135 136void TtsController::SpeakOrEnqueue(Utterance* utterance) { 137 // If we're paused and we get an utterance that can't be queued, 138 // flush the queue but stay in the paused state. 139 if (paused_ && !utterance->can_enqueue()) { 140 Stop(); 141 paused_ = true; 142 return; 143 } 144 145 if (paused_ || (IsSpeaking() && utterance->can_enqueue())) { 146 utterance_queue_.push(utterance); 147 } else { 148 Stop(); 149 SpeakNow(utterance); 150 } 151} 152 153void TtsController::SpeakNow(Utterance* utterance) { 154 // Get all available voices and try to find a matching voice. 155 std::vector<VoiceData> voices; 156 GetVoices(utterance->profile(), &voices); 157 int index = GetMatchingVoice(utterance, voices); 158 159 // Select the matching voice, but if none was found, initialize an 160 // empty VoiceData with native = true, which will give the native 161 // speech synthesizer a chance to try to synthesize the utterance 162 // anyway. 163 VoiceData voice; 164 if (index >= 0 && index < static_cast<int>(voices.size())) 165 voice = voices[index]; 166 else 167 voice.native = true; 168 169 GetPlatformImpl()->WillSpeakUtteranceWithVoice(utterance, voice); 170 171 if (!voice.native) { 172#if !defined(OS_ANDROID) 173 DCHECK(!voice.extension_id.empty()); 174 current_utterance_ = utterance; 175 utterance->set_extension_id(voice.extension_id); 176 ExtensionTtsEngineSpeak(utterance, voice); 177 bool sends_end_event = 178 voice.events.find(TTS_EVENT_END) != voice.events.end(); 179 if (!sends_end_event) { 180 utterance->Finish(); 181 delete utterance; 182 current_utterance_ = NULL; 183 SpeakNextUtterance(); 184 } 185#endif 186 } else { 187 // It's possible for certain platforms to send start events immediately 188 // during |speak|. 189 current_utterance_ = utterance; 190 GetPlatformImpl()->clear_error(); 191 bool success = GetPlatformImpl()->Speak( 192 utterance->id(), 193 utterance->text(), 194 utterance->lang(), 195 voice, 196 utterance->continuous_parameters()); 197 if (!success) 198 current_utterance_ = NULL; 199 200 if (!success) { 201 utterance->OnTtsEvent(TTS_EVENT_ERROR, kInvalidCharIndex, 202 GetPlatformImpl()->error()); 203 delete utterance; 204 return; 205 } 206 } 207} 208 209void TtsController::Stop() { 210 paused_ = false; 211 if (current_utterance_ && !current_utterance_->extension_id().empty()) { 212#if !defined(OS_ANDROID) 213 ExtensionTtsEngineStop(current_utterance_); 214#endif 215 } else { 216 GetPlatformImpl()->clear_error(); 217 GetPlatformImpl()->StopSpeaking(); 218 } 219 220 if (current_utterance_) 221 current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex, 222 std::string()); 223 FinishCurrentUtterance(); 224 ClearUtteranceQueue(true); // Send events. 225} 226 227void TtsController::Pause() { 228 paused_ = true; 229 if (current_utterance_ && !current_utterance_->extension_id().empty()) { 230#if !defined(OS_ANDROID) 231 ExtensionTtsEnginePause(current_utterance_); 232#endif 233 } else if (current_utterance_) { 234 GetPlatformImpl()->clear_error(); 235 GetPlatformImpl()->Pause(); 236 } 237} 238 239void TtsController::Resume() { 240 paused_ = false; 241 if (current_utterance_ && !current_utterance_->extension_id().empty()) { 242#if !defined(OS_ANDROID) 243 ExtensionTtsEngineResume(current_utterance_); 244#endif 245 } else if (current_utterance_) { 246 GetPlatformImpl()->clear_error(); 247 GetPlatformImpl()->Resume(); 248 } else { 249 SpeakNextUtterance(); 250 } 251} 252 253void TtsController::OnTtsEvent(int utterance_id, 254 TtsEventType event_type, 255 int char_index, 256 const std::string& error_message) { 257 // We may sometimes receive completion callbacks "late", after we've 258 // already finished the utterance (for example because another utterance 259 // interrupted or we got a call to Stop). This is normal and we can 260 // safely just ignore these events. 261 if (!current_utterance_ || utterance_id != current_utterance_->id()) { 262 return; 263 } 264 current_utterance_->OnTtsEvent(event_type, char_index, error_message); 265 if (current_utterance_->finished()) { 266 FinishCurrentUtterance(); 267 SpeakNextUtterance(); 268 } 269} 270 271void TtsController::GetVoices(Profile* profile, 272 std::vector<VoiceData>* out_voices) { 273#if !defined(OS_ANDROID) 274 if (profile) 275 GetExtensionVoices(profile, out_voices); 276#endif 277 278 TtsPlatformImpl* platform_impl = GetPlatformImpl(); 279 if (platform_impl && platform_impl->PlatformImplAvailable()) 280 platform_impl->GetVoices(out_voices); 281} 282 283bool TtsController::IsSpeaking() { 284 return current_utterance_ != NULL || GetPlatformImpl()->IsSpeaking(); 285} 286 287void TtsController::FinishCurrentUtterance() { 288 if (current_utterance_) { 289 if (!current_utterance_->finished()) 290 current_utterance_->OnTtsEvent(TTS_EVENT_INTERRUPTED, kInvalidCharIndex, 291 std::string()); 292 delete current_utterance_; 293 current_utterance_ = NULL; 294 } 295} 296 297void TtsController::SpeakNextUtterance() { 298 if (paused_) 299 return; 300 301 // Start speaking the next utterance in the queue. Keep trying in case 302 // one fails but there are still more in the queue to try. 303 while (!utterance_queue_.empty() && !current_utterance_) { 304 Utterance* utterance = utterance_queue_.front(); 305 utterance_queue_.pop(); 306 SpeakNow(utterance); 307 } 308} 309 310void TtsController::ClearUtteranceQueue(bool send_events) { 311 while (!utterance_queue_.empty()) { 312 Utterance* utterance = utterance_queue_.front(); 313 utterance_queue_.pop(); 314 if (send_events) 315 utterance->OnTtsEvent(TTS_EVENT_CANCELLED, kInvalidCharIndex, 316 std::string()); 317 else 318 utterance->Finish(); 319 delete utterance; 320 } 321} 322 323void TtsController::SetPlatformImpl( 324 TtsPlatformImpl* platform_impl) { 325 platform_impl_ = platform_impl; 326} 327 328int TtsController::QueueSize() { 329 return static_cast<int>(utterance_queue_.size()); 330} 331 332TtsPlatformImpl* TtsController::GetPlatformImpl() { 333 if (!platform_impl_) 334 platform_impl_ = TtsPlatformImpl::GetInstance(); 335 return platform_impl_; 336} 337 338int TtsController::GetMatchingVoice( 339 const Utterance* utterance, std::vector<VoiceData>& voices) { 340 // Make two passes: the first time, do strict language matching 341 // ('fr-FR' does not match 'fr-CA'). The second time, do prefix 342 // language matching ('fr-FR' matches 'fr' and 'fr-CA') 343 for (int pass = 0; pass < 2; ++pass) { 344 for (size_t i = 0; i < voices.size(); ++i) { 345 const VoiceData& voice = voices[i]; 346 347 if (!utterance->extension_id().empty() && 348 utterance->extension_id() != voice.extension_id) { 349 continue; 350 } 351 352 if (!voice.name.empty() && 353 !utterance->voice_name().empty() && 354 voice.name != utterance->voice_name()) { 355 continue; 356 } 357 if (!voice.lang.empty() && !utterance->lang().empty()) { 358 std::string voice_lang = voice.lang; 359 std::string utterance_lang = utterance->lang(); 360 if (pass == 1) { 361 voice_lang = TrimLanguageCode(voice_lang); 362 utterance_lang = TrimLanguageCode(utterance_lang); 363 } 364 if (voice_lang != utterance_lang) { 365 continue; 366 } 367 } 368 if (voice.gender != TTS_GENDER_NONE && 369 utterance->gender() != TTS_GENDER_NONE && 370 voice.gender != utterance->gender()) { 371 continue; 372 } 373 374 if (utterance->required_event_types().size() > 0) { 375 bool has_all_required_event_types = true; 376 for (std::set<TtsEventType>::const_iterator iter = 377 utterance->required_event_types().begin(); 378 iter != utterance->required_event_types().end(); 379 ++iter) { 380 if (voice.events.find(*iter) == voice.events.end()) { 381 has_all_required_event_types = false; 382 break; 383 } 384 } 385 if (!has_all_required_event_types) 386 continue; 387 } 388 389 return static_cast<int>(i); 390 } 391 } 392 393 return -1; 394} 395 396void TtsController::VoicesChanged() { 397 for (std::set<VoicesChangedDelegate*>::iterator iter = 398 voices_changed_delegates_.begin(); 399 iter != voices_changed_delegates_.end(); ++iter) { 400 (*iter)->OnVoicesChanged(); 401 } 402} 403 404void TtsController::AddVoicesChangedDelegate(VoicesChangedDelegate* delegate) { 405 voices_changed_delegates_.insert(delegate); 406} 407 408void TtsController::RemoveVoicesChangedDelegate( 409 VoicesChangedDelegate* delegate) { 410 voices_changed_delegates_.erase(delegate); 411} 412