speech_recognizer_impl.cc revision 868fa2fe829687343ffae624259930155e16dbd8
1// Copyright (c) 2013 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "content/browser/speech/speech_recognizer_impl.h" 6 7#include "base/basictypes.h" 8#include "base/bind.h" 9#include "base/time.h" 10#include "content/browser/browser_main_loop.h" 11#include "content/browser/speech/audio_buffer.h" 12#include "content/browser/speech/google_one_shot_remote_engine.h" 13#include "content/public/browser/browser_thread.h" 14#include "content/public/browser/speech_recognition_event_listener.h" 15#include "net/url_request/url_request_context_getter.h" 16 17using media::AudioInputController; 18using media::AudioManager; 19using media::AudioParameters; 20using media::ChannelLayout; 21 22namespace content { 23namespace { 24 25// The following constants are related to the volume level indicator shown in 26// the UI for recorded audio. 27// Multiplier used when new volume is greater than previous level. 28const float kUpSmoothingFactor = 1.0f; 29// Multiplier used when new volume is lesser than previous level. 30const float kDownSmoothingFactor = 0.7f; 31// RMS dB value of a maximum (unclipped) sine wave for int16 samples. 32const float kAudioMeterMaxDb = 90.31f; 33// This value corresponds to RMS dB for int16 with 6 most-significant-bits = 0. 34// Values lower than this will display as empty level-meter. 35const float kAudioMeterMinDb = 30.0f; 36const float kAudioMeterDbRange = kAudioMeterMaxDb - kAudioMeterMinDb; 37 38// Maximum level to draw to display unclipped meter. (1.0f displays clipping.) 39const float kAudioMeterRangeMaxUnclipped = 47.0f / 48.0f; 40 41// Returns true if more than 5% of the samples are at min or max value. 42bool DetectClipping(const AudioChunk& chunk) { 43 const int num_samples = chunk.NumSamples(); 44 const int16* samples = chunk.SamplesData16(); 45 const int kThreshold = num_samples / 20; 46 int clipping_samples = 0; 47 48 for (int i = 0; i < num_samples; ++i) { 49 if (samples[i] <= -32767 || samples[i] >= 32767) { 50 if (++clipping_samples > kThreshold) 51 return true; 52 } 53 } 54 return false; 55} 56 57void KeepAudioControllerRefcountedForDtor(scoped_refptr<AudioInputController>) { 58} 59 60} // namespace 61 62const int SpeechRecognizerImpl::kAudioSampleRate = 16000; 63const ChannelLayout SpeechRecognizerImpl::kChannelLayout = 64 media::CHANNEL_LAYOUT_MONO; 65const int SpeechRecognizerImpl::kNumBitsPerAudioSample = 16; 66const int SpeechRecognizerImpl::kNoSpeechTimeoutMs = 8000; 67const int SpeechRecognizerImpl::kEndpointerEstimationTimeMs = 300; 68media::AudioManager* SpeechRecognizerImpl::audio_manager_for_tests_ = NULL; 69 70COMPILE_ASSERT(SpeechRecognizerImpl::kNumBitsPerAudioSample % 8 == 0, 71 kNumBitsPerAudioSample_must_be_a_multiple_of_8); 72 73SpeechRecognizerImpl::SpeechRecognizerImpl( 74 SpeechRecognitionEventListener* listener, 75 int session_id, 76 bool is_single_shot, 77 SpeechRecognitionEngine* engine) 78 : SpeechRecognizer(listener, session_id), 79 recognition_engine_(engine), 80 endpointer_(kAudioSampleRate), 81 is_dispatching_event_(false), 82 is_single_shot_(is_single_shot), 83 state_(STATE_IDLE) { 84 DCHECK(this->listener() != NULL); 85 DCHECK(recognition_engine_ != NULL); 86 if (is_single_shot) { 87 // In single shot recognition, the session is automatically ended after: 88 // - 0.5 seconds of silence if time < 3 seconds 89 // - 1 seconds of silence if time >= 3 seconds 90 endpointer_.set_speech_input_complete_silence_length( 91 base::Time::kMicrosecondsPerSecond / 2); 92 endpointer_.set_long_speech_input_complete_silence_length( 93 base::Time::kMicrosecondsPerSecond); 94 endpointer_.set_long_speech_length(3 * base::Time::kMicrosecondsPerSecond); 95 } else { 96 // In continuous recognition, the session is automatically ended after 15 97 // seconds of silence. 98 const int64 cont_timeout_us = base::Time::kMicrosecondsPerSecond * 15; 99 endpointer_.set_speech_input_complete_silence_length(cont_timeout_us); 100 endpointer_.set_long_speech_length(0); // Use only a single timeout. 101 } 102 endpointer_.StartSession(); 103 recognition_engine_->set_delegate(this); 104} 105 106// ------- Methods that trigger Finite State Machine (FSM) events ------------ 107 108// NOTE:all the external events and requests should be enqueued (PostTask), even 109// if they come from the same (IO) thread, in order to preserve the relationship 110// of causality between events and avoid interleaved event processing due to 111// synchronous callbacks. 112 113void SpeechRecognizerImpl::StartRecognition() { 114 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, 115 base::Bind(&SpeechRecognizerImpl::DispatchEvent, 116 this, FSMEventArgs(EVENT_START))); 117} 118 119void SpeechRecognizerImpl::AbortRecognition() { 120 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, 121 base::Bind(&SpeechRecognizerImpl::DispatchEvent, 122 this, FSMEventArgs(EVENT_ABORT))); 123} 124 125void SpeechRecognizerImpl::StopAudioCapture() { 126 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, 127 base::Bind(&SpeechRecognizerImpl::DispatchEvent, 128 this, FSMEventArgs(EVENT_STOP_CAPTURE))); 129} 130 131bool SpeechRecognizerImpl::IsActive() const { 132 // Checking the FSM state from another thread (thus, while the FSM is 133 // potentially concurrently evolving) is meaningless. 134 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 135 return state_ != STATE_IDLE && state_ != STATE_ENDED; 136} 137 138bool SpeechRecognizerImpl::IsCapturingAudio() const { 139 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); // See IsActive(). 140 const bool is_capturing_audio = state_ >= STATE_STARTING && 141 state_ <= STATE_RECOGNIZING; 142 DCHECK((is_capturing_audio && (audio_controller_.get() != NULL)) || 143 (!is_capturing_audio && audio_controller_.get() == NULL)); 144 return is_capturing_audio; 145} 146 147const SpeechRecognitionEngine& 148SpeechRecognizerImpl::recognition_engine() const { 149 return *(recognition_engine_.get()); 150} 151 152SpeechRecognizerImpl::~SpeechRecognizerImpl() { 153 endpointer_.EndSession(); 154 if (audio_controller_.get()) { 155 audio_controller_->Close( 156 base::Bind(&KeepAudioControllerRefcountedForDtor, audio_controller_)); 157 } 158} 159 160// Invoked in the audio thread. 161void SpeechRecognizerImpl::OnError(AudioInputController* controller) { 162 FSMEventArgs event_args(EVENT_AUDIO_ERROR); 163 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, 164 base::Bind(&SpeechRecognizerImpl::DispatchEvent, 165 this, event_args)); 166} 167 168void SpeechRecognizerImpl::OnData(AudioInputController* controller, 169 const uint8* data, uint32 size) { 170 if (size == 0) // This could happen when audio capture stops and is normal. 171 return; 172 173 FSMEventArgs event_args(EVENT_AUDIO_DATA); 174 event_args.audio_data = new AudioChunk(data, static_cast<size_t>(size), 175 kNumBitsPerAudioSample / 8); 176 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, 177 base::Bind(&SpeechRecognizerImpl::DispatchEvent, 178 this, event_args)); 179} 180 181void SpeechRecognizerImpl::OnAudioClosed(AudioInputController*) {} 182 183void SpeechRecognizerImpl::OnSpeechRecognitionEngineResults( 184 const SpeechRecognitionResults& results) { 185 FSMEventArgs event_args(EVENT_ENGINE_RESULT); 186 event_args.engine_results = results; 187 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, 188 base::Bind(&SpeechRecognizerImpl::DispatchEvent, 189 this, event_args)); 190} 191 192void SpeechRecognizerImpl::OnSpeechRecognitionEngineError( 193 const SpeechRecognitionError& error) { 194 FSMEventArgs event_args(EVENT_ENGINE_ERROR); 195 event_args.engine_error = error; 196 BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, 197 base::Bind(&SpeechRecognizerImpl::DispatchEvent, 198 this, event_args)); 199} 200 201// ----------------------- Core FSM implementation --------------------------- 202// TODO(primiano): After the changes in the media package (r129173), this class 203// slightly violates the SpeechRecognitionEventListener interface contract. In 204// particular, it is not true anymore that this class can be freed after the 205// OnRecognitionEnd event, since the audio_controller_.Close() asynchronous 206// call can be still in progress after the end event. Currently, it does not 207// represent a problem for the browser itself, since refcounting protects us 208// against such race conditions. However, we should fix this in the next CLs. 209// For instance, tests are currently working just because the 210// TestAudioInputController is not closing asynchronously as the real controller 211// does, but they will become flaky if TestAudioInputController will be fixed. 212 213void SpeechRecognizerImpl::DispatchEvent(const FSMEventArgs& event_args) { 214 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 215 DCHECK_LE(event_args.event, EVENT_MAX_VALUE); 216 DCHECK_LE(state_, STATE_MAX_VALUE); 217 218 // Event dispatching must be sequential, otherwise it will break all the rules 219 // and the assumptions of the finite state automata model. 220 DCHECK(!is_dispatching_event_); 221 is_dispatching_event_ = true; 222 223 // Guard against the delegate freeing us until we finish processing the event. 224 scoped_refptr<SpeechRecognizerImpl> me(this); 225 226 if (event_args.event == EVENT_AUDIO_DATA) { 227 DCHECK(event_args.audio_data.get() != NULL); 228 ProcessAudioPipeline(*event_args.audio_data.get()); 229 } 230 231 // The audio pipeline must be processed before the event dispatch, otherwise 232 // it would take actions according to the future state instead of the current. 233 state_ = ExecuteTransitionAndGetNextState(event_args); 234 is_dispatching_event_ = false; 235} 236 237SpeechRecognizerImpl::FSMState 238SpeechRecognizerImpl::ExecuteTransitionAndGetNextState( 239 const FSMEventArgs& event_args) { 240 const FSMEvent event = event_args.event; 241 switch (state_) { 242 case STATE_IDLE: 243 switch (event) { 244 // TODO(primiano): restore UNREACHABLE_CONDITION on EVENT_ABORT and 245 // EVENT_STOP_CAPTURE below once speech input extensions are fixed. 246 case EVENT_ABORT: 247 return AbortSilently(event_args); 248 case EVENT_START: 249 return StartRecording(event_args); 250 case EVENT_STOP_CAPTURE: 251 return AbortSilently(event_args); 252 case EVENT_AUDIO_DATA: // Corner cases related to queued messages 253 case EVENT_ENGINE_RESULT: // being lately dispatched. 254 case EVENT_ENGINE_ERROR: 255 case EVENT_AUDIO_ERROR: 256 return DoNothing(event_args); 257 } 258 break; 259 case STATE_STARTING: 260 switch (event) { 261 case EVENT_ABORT: 262 return AbortWithError(event_args); 263 case EVENT_START: 264 return NotFeasible(event_args); 265 case EVENT_STOP_CAPTURE: 266 return AbortSilently(event_args); 267 case EVENT_AUDIO_DATA: 268 return StartRecognitionEngine(event_args); 269 case EVENT_ENGINE_RESULT: 270 return NotFeasible(event_args); 271 case EVENT_ENGINE_ERROR: 272 case EVENT_AUDIO_ERROR: 273 return AbortWithError(event_args); 274 } 275 break; 276 case STATE_ESTIMATING_ENVIRONMENT: 277 switch (event) { 278 case EVENT_ABORT: 279 return AbortWithError(event_args); 280 case EVENT_START: 281 return NotFeasible(event_args); 282 case EVENT_STOP_CAPTURE: 283 return StopCaptureAndWaitForResult(event_args); 284 case EVENT_AUDIO_DATA: 285 return WaitEnvironmentEstimationCompletion(event_args); 286 case EVENT_ENGINE_RESULT: 287 return ProcessIntermediateResult(event_args); 288 case EVENT_ENGINE_ERROR: 289 case EVENT_AUDIO_ERROR: 290 return AbortWithError(event_args); 291 } 292 break; 293 case STATE_WAITING_FOR_SPEECH: 294 switch (event) { 295 case EVENT_ABORT: 296 return AbortWithError(event_args); 297 case EVENT_START: 298 return NotFeasible(event_args); 299 case EVENT_STOP_CAPTURE: 300 return StopCaptureAndWaitForResult(event_args); 301 case EVENT_AUDIO_DATA: 302 return DetectUserSpeechOrTimeout(event_args); 303 case EVENT_ENGINE_RESULT: 304 return ProcessIntermediateResult(event_args); 305 case EVENT_ENGINE_ERROR: 306 case EVENT_AUDIO_ERROR: 307 return AbortWithError(event_args); 308 } 309 break; 310 case STATE_RECOGNIZING: 311 switch (event) { 312 case EVENT_ABORT: 313 return AbortWithError(event_args); 314 case EVENT_START: 315 return NotFeasible(event_args); 316 case EVENT_STOP_CAPTURE: 317 return StopCaptureAndWaitForResult(event_args); 318 case EVENT_AUDIO_DATA: 319 return DetectEndOfSpeech(event_args); 320 case EVENT_ENGINE_RESULT: 321 return ProcessIntermediateResult(event_args); 322 case EVENT_ENGINE_ERROR: 323 case EVENT_AUDIO_ERROR: 324 return AbortWithError(event_args); 325 } 326 break; 327 case STATE_WAITING_FINAL_RESULT: 328 switch (event) { 329 case EVENT_ABORT: 330 return AbortWithError(event_args); 331 case EVENT_START: 332 return NotFeasible(event_args); 333 case EVENT_STOP_CAPTURE: 334 case EVENT_AUDIO_DATA: 335 return DoNothing(event_args); 336 case EVENT_ENGINE_RESULT: 337 return ProcessFinalResult(event_args); 338 case EVENT_ENGINE_ERROR: 339 case EVENT_AUDIO_ERROR: 340 return AbortWithError(event_args); 341 } 342 break; 343 344 // TODO(primiano): remove this state when speech input extensions support 345 // will be removed and STATE_IDLE.EVENT_ABORT,EVENT_STOP_CAPTURE will be 346 // reset to NotFeasible (see TODO above). 347 case STATE_ENDED: 348 return DoNothing(event_args); 349 } 350 return NotFeasible(event_args); 351} 352 353// ----------- Contract for all the FSM evolution functions below ------------- 354// - Are guaranteed to be executed in the IO thread; 355// - Are guaranteed to be not reentrant (themselves and each other); 356// - event_args members are guaranteed to be stable during the call; 357// - The class won't be freed in the meanwhile due to callbacks; 358// - IsCapturingAudio() returns true if and only if audio_controller_ != NULL. 359 360// TODO(primiano): the audio pipeline is currently serial. However, the 361// clipper->endpointer->vumeter chain and the sr_engine could be parallelized. 362// We should profile the execution to see if it would be worth or not. 363void SpeechRecognizerImpl::ProcessAudioPipeline(const AudioChunk& raw_audio) { 364 const bool route_to_endpointer = state_ >= STATE_ESTIMATING_ENVIRONMENT && 365 state_ <= STATE_RECOGNIZING; 366 const bool route_to_sr_engine = route_to_endpointer; 367 const bool route_to_vumeter = state_ >= STATE_WAITING_FOR_SPEECH && 368 state_ <= STATE_RECOGNIZING; 369 const bool clip_detected = DetectClipping(raw_audio); 370 float rms = 0.0f; 371 372 num_samples_recorded_ += raw_audio.NumSamples(); 373 374 if (route_to_endpointer) 375 endpointer_.ProcessAudio(raw_audio, &rms); 376 377 if (route_to_vumeter) { 378 DCHECK(route_to_endpointer); // Depends on endpointer due to |rms|. 379 UpdateSignalAndNoiseLevels(rms, clip_detected); 380 } 381 if (route_to_sr_engine) { 382 DCHECK(recognition_engine_.get() != NULL); 383 recognition_engine_->TakeAudioChunk(raw_audio); 384 } 385} 386 387SpeechRecognizerImpl::FSMState 388SpeechRecognizerImpl::StartRecording(const FSMEventArgs&) { 389 DCHECK(recognition_engine_.get() != NULL); 390 DCHECK(!IsCapturingAudio()); 391 AudioManager* audio_manager = (audio_manager_for_tests_ != NULL) ? 392 audio_manager_for_tests_ : 393 BrowserMainLoop::GetAudioManager(); 394 DCHECK(audio_manager != NULL); 395 396 DVLOG(1) << "SpeechRecognizerImpl starting audio capture."; 397 num_samples_recorded_ = 0; 398 audio_level_ = 0; 399 listener()->OnRecognitionStart(session_id()); 400 401 if (!audio_manager->HasAudioInputDevices()) { 402 return Abort(SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO, 403 SPEECH_AUDIO_ERROR_DETAILS_NO_MIC)); 404 } 405 406 const int samples_per_packet = (kAudioSampleRate * 407 recognition_engine_->GetDesiredAudioChunkDurationMs()) / 1000; 408 AudioParameters params(AudioParameters::AUDIO_PCM_LINEAR, kChannelLayout, 409 kAudioSampleRate, kNumBitsPerAudioSample, 410 samples_per_packet); 411 audio_controller_ = AudioInputController::Create(audio_manager, this, params); 412 413 if (audio_controller_.get() == NULL) { 414 return Abort(SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO)); 415 } 416 417 // The endpointer needs to estimate the environment/background noise before 418 // starting to treat the audio as user input. We wait in the state 419 // ESTIMATING_ENVIRONMENT until such interval has elapsed before switching 420 // to user input mode. 421 endpointer_.SetEnvironmentEstimationMode(); 422 audio_controller_->Record(); 423 return STATE_STARTING; 424} 425 426SpeechRecognizerImpl::FSMState 427SpeechRecognizerImpl::StartRecognitionEngine(const FSMEventArgs& event_args) { 428 // This is the first audio packet captured, so the recognition engine is 429 // started and the delegate notified about the event. 430 DCHECK(recognition_engine_.get() != NULL); 431 recognition_engine_->StartRecognition(); 432 listener()->OnAudioStart(session_id()); 433 434 // This is a little hack, since TakeAudioChunk() is already called by 435 // ProcessAudioPipeline(). It is the best tradeoff, unless we allow dropping 436 // the first audio chunk captured after opening the audio device. 437 recognition_engine_->TakeAudioChunk(*(event_args.audio_data.get())); 438 return STATE_ESTIMATING_ENVIRONMENT; 439} 440 441SpeechRecognizerImpl::FSMState 442SpeechRecognizerImpl::WaitEnvironmentEstimationCompletion(const FSMEventArgs&) { 443 DCHECK(endpointer_.IsEstimatingEnvironment()); 444 if (GetElapsedTimeMs() >= kEndpointerEstimationTimeMs) { 445 endpointer_.SetUserInputMode(); 446 listener()->OnEnvironmentEstimationComplete(session_id()); 447 return STATE_WAITING_FOR_SPEECH; 448 } else { 449 return STATE_ESTIMATING_ENVIRONMENT; 450 } 451} 452 453SpeechRecognizerImpl::FSMState 454SpeechRecognizerImpl::DetectUserSpeechOrTimeout(const FSMEventArgs&) { 455 if (endpointer_.DidStartReceivingSpeech()) { 456 listener()->OnSoundStart(session_id()); 457 return STATE_RECOGNIZING; 458 } else if (GetElapsedTimeMs() >= kNoSpeechTimeoutMs) { 459 return Abort(SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_NO_SPEECH)); 460 } 461 return STATE_WAITING_FOR_SPEECH; 462} 463 464SpeechRecognizerImpl::FSMState 465SpeechRecognizerImpl::DetectEndOfSpeech(const FSMEventArgs& event_args) { 466 if (endpointer_.speech_input_complete()) 467 return StopCaptureAndWaitForResult(event_args); 468 return STATE_RECOGNIZING; 469} 470 471SpeechRecognizerImpl::FSMState 472SpeechRecognizerImpl::StopCaptureAndWaitForResult(const FSMEventArgs&) { 473 DCHECK(state_ >= STATE_ESTIMATING_ENVIRONMENT && state_ <= STATE_RECOGNIZING); 474 475 DVLOG(1) << "Concluding recognition"; 476 CloseAudioControllerAsynchronously(); 477 recognition_engine_->AudioChunksEnded(); 478 479 if (state_ > STATE_WAITING_FOR_SPEECH) 480 listener()->OnSoundEnd(session_id()); 481 482 listener()->OnAudioEnd(session_id()); 483 return STATE_WAITING_FINAL_RESULT; 484} 485 486SpeechRecognizerImpl::FSMState 487SpeechRecognizerImpl::AbortSilently(const FSMEventArgs& event_args) { 488 DCHECK_NE(event_args.event, EVENT_AUDIO_ERROR); 489 DCHECK_NE(event_args.event, EVENT_ENGINE_ERROR); 490 return Abort(SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_NONE)); 491} 492 493SpeechRecognizerImpl::FSMState 494SpeechRecognizerImpl::AbortWithError(const FSMEventArgs& event_args) { 495 if (event_args.event == EVENT_AUDIO_ERROR) { 496 return Abort(SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_AUDIO)); 497 } else if (event_args.event == EVENT_ENGINE_ERROR) { 498 return Abort(event_args.engine_error); 499 } 500 return Abort(SpeechRecognitionError(SPEECH_RECOGNITION_ERROR_ABORTED)); 501} 502 503SpeechRecognizerImpl::FSMState SpeechRecognizerImpl::Abort( 504 const SpeechRecognitionError& error) { 505 if (IsCapturingAudio()) 506 CloseAudioControllerAsynchronously(); 507 508 DVLOG(1) << "SpeechRecognizerImpl canceling recognition. "; 509 510 // The recognition engine is initialized only after STATE_STARTING. 511 if (state_ > STATE_STARTING) { 512 DCHECK(recognition_engine_.get() != NULL); 513 recognition_engine_->EndRecognition(); 514 } 515 516 if (state_ > STATE_WAITING_FOR_SPEECH && state_ < STATE_WAITING_FINAL_RESULT) 517 listener()->OnSoundEnd(session_id()); 518 519 if (state_ > STATE_STARTING && state_ < STATE_WAITING_FINAL_RESULT) 520 listener()->OnAudioEnd(session_id()); 521 522 if (error.code != SPEECH_RECOGNITION_ERROR_NONE) 523 listener()->OnRecognitionError(session_id(), error); 524 525 listener()->OnRecognitionEnd(session_id()); 526 527 return STATE_ENDED; 528} 529 530SpeechRecognizerImpl::FSMState SpeechRecognizerImpl::ProcessIntermediateResult( 531 const FSMEventArgs& event_args) { 532 // Provisional results can occur only during continuous (non one-shot) mode. 533 // If this check is reached it means that a continuous speech recognition 534 // engine is being used for a one shot recognition. 535 DCHECK_EQ(false, is_single_shot_); 536 537 // In continuous recognition, intermediate results can occur even when we are 538 // in the ESTIMATING_ENVIRONMENT or WAITING_FOR_SPEECH states (if the 539 // recognition engine is "faster" than our endpointer). In these cases we 540 // skip the endpointer and fast-forward to the RECOGNIZING state, with respect 541 // of the events triggering order. 542 if (state_ == STATE_ESTIMATING_ENVIRONMENT) { 543 DCHECK(endpointer_.IsEstimatingEnvironment()); 544 endpointer_.SetUserInputMode(); 545 listener()->OnEnvironmentEstimationComplete(session_id()); 546 } else if (state_ == STATE_WAITING_FOR_SPEECH) { 547 listener()->OnSoundStart(session_id()); 548 } else { 549 DCHECK_EQ(STATE_RECOGNIZING, state_); 550 } 551 552 listener()->OnRecognitionResults(session_id(), event_args.engine_results); 553 return STATE_RECOGNIZING; 554} 555 556SpeechRecognizerImpl::FSMState 557SpeechRecognizerImpl::ProcessFinalResult(const FSMEventArgs& event_args) { 558 const SpeechRecognitionResults& results = event_args.engine_results; 559 SpeechRecognitionResults::const_iterator i = results.begin(); 560 bool provisional_results_pending = false; 561 bool results_are_empty = true; 562 for (; i != results.end(); ++i) { 563 const SpeechRecognitionResult& result = *i; 564 if (result.is_provisional) { 565 provisional_results_pending = true; 566 DCHECK(!is_single_shot_); 567 } else if (results_are_empty) { 568 results_are_empty = result.hypotheses.empty(); 569 } 570 } 571 572 if (provisional_results_pending) { 573 listener()->OnRecognitionResults(session_id(), results); 574 // We don't end the recognition if a provisional result is received in 575 // STATE_WAITING_FINAL_RESULT. A definitive result will come next and will 576 // end the recognition. 577 return state_; 578 } 579 580 recognition_engine_->EndRecognition(); 581 582 if (!results_are_empty) { 583 // We could receive an empty result (which we won't propagate further) 584 // in the following (continuous) scenario: 585 // 1. The caller start pushing audio and receives some results; 586 // 2. A |StopAudioCapture| is issued later; 587 // 3. The final audio frames captured in the interval ]1,2] do not lead to 588 // any result (nor any error); 589 // 4. The speech recognition engine, therefore, emits an empty result to 590 // notify that the recognition is ended with no error, yet neither any 591 // further result. 592 listener()->OnRecognitionResults(session_id(), results); 593 } 594 595 listener()->OnRecognitionEnd(session_id()); 596 return STATE_ENDED; 597} 598 599SpeechRecognizerImpl::FSMState 600SpeechRecognizerImpl::DoNothing(const FSMEventArgs&) const { 601 return state_; // Just keep the current state. 602} 603 604SpeechRecognizerImpl::FSMState 605SpeechRecognizerImpl::NotFeasible(const FSMEventArgs& event_args) { 606 NOTREACHED() << "Unfeasible event " << event_args.event 607 << " in state " << state_; 608 return state_; 609} 610 611void SpeechRecognizerImpl::CloseAudioControllerAsynchronously() { 612 DCHECK(IsCapturingAudio()); 613 DVLOG(1) << "SpeechRecognizerImpl closing audio controller."; 614 // Issues a Close on the audio controller, passing an empty callback. The only 615 // purpose of such callback is to keep the audio controller refcounted until 616 // Close has completed (in the audio thread) and automatically destroy it 617 // afterwards (upon return from OnAudioClosed). 618 audio_controller_->Close(base::Bind(&SpeechRecognizerImpl::OnAudioClosed, 619 this, audio_controller_)); 620 audio_controller_ = NULL; // The controller is still refcounted by Bind. 621} 622 623int SpeechRecognizerImpl::GetElapsedTimeMs() const { 624 return (num_samples_recorded_ * 1000) / kAudioSampleRate; 625} 626 627void SpeechRecognizerImpl::UpdateSignalAndNoiseLevels(const float& rms, 628 bool clip_detected) { 629 // Calculate the input volume to display in the UI, smoothing towards the 630 // new level. 631 // TODO(primiano): Do we really need all this floating point arith here? 632 // Perhaps it might be quite expensive on mobile. 633 float level = (rms - kAudioMeterMinDb) / 634 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped); 635 level = std::min(std::max(0.0f, level), kAudioMeterRangeMaxUnclipped); 636 const float smoothing_factor = (level > audio_level_) ? kUpSmoothingFactor : 637 kDownSmoothingFactor; 638 audio_level_ += (level - audio_level_) * smoothing_factor; 639 640 float noise_level = (endpointer_.NoiseLevelDb() - kAudioMeterMinDb) / 641 (kAudioMeterDbRange / kAudioMeterRangeMaxUnclipped); 642 noise_level = std::min(std::max(0.0f, noise_level), 643 kAudioMeterRangeMaxUnclipped); 644 645 listener()->OnAudioLevelsChange( 646 session_id(), clip_detected ? 1.0f : audio_level_, noise_level); 647} 648 649void SpeechRecognizerImpl::SetAudioManagerForTests( 650 AudioManager* audio_manager) { 651 audio_manager_for_tests_ = audio_manager; 652} 653 654SpeechRecognizerImpl::FSMEventArgs::FSMEventArgs(FSMEvent event_value) 655 : event(event_value), 656 audio_data(NULL), 657 engine_error(SPEECH_RECOGNITION_ERROR_NONE) { 658} 659 660SpeechRecognizerImpl::FSMEventArgs::~FSMEventArgs() { 661} 662 663} // namespace content 664