speech_input_manager.cc revision 72a454cd3513ac24fbdd0e0cb9ad70b86a99b801
1// Copyright (c) 2011 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "chrome/browser/speech/speech_input_manager.h" 6 7#include <map> 8#include <string> 9 10#include "base/command_line.h" 11#include "base/lazy_instance.h" 12#include "base/ref_counted.h" 13#include "base/synchronization/lock.h" 14#include "base/threading/thread_restrictions.h" 15#include "base/utf_string_conversions.h" 16#include "chrome/browser/browser_process.h" 17#include "chrome/browser/browser_thread.h" 18#include "chrome/browser/platform_util.h" 19#include "chrome/browser/prefs/pref_service.h" 20#include "chrome/browser/speech/speech_input_bubble_controller.h" 21#include "chrome/browser/speech/speech_recognizer.h" 22#include "chrome/browser/tab_contents/tab_util.h" 23#include "chrome/common/chrome_switches.h" 24#include "chrome/common/pref_names.h" 25#include "grit/generated_resources.h" 26#include "media/audio/audio_manager.h" 27#include "ui/base/l10n/l10n_util.h" 28 29#if defined(OS_WIN) 30#include "chrome/installer/util/wmi.h" 31#endif 32 33namespace speech_input { 34 35namespace { 36 37// Asynchronously fetches the PC and audio hardware/driver info if 38// the user has opted into UMA. This information is sent with speech input 39// requests to the server for identifying and improving quality issues with 40// specific device configurations. 41class OptionalRequestInfo 42 : public base::RefCountedThreadSafe<OptionalRequestInfo> { 43 public: 44 OptionalRequestInfo() : can_report_metrics_(false) {} 45 46 void Refresh() { 47 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 48 // UMA opt-in can be checked only from the UI thread, so switch to that. 49 BrowserThread::PostTask(BrowserThread::UI, FROM_HERE, 50 NewRunnableMethod(this, 51 &OptionalRequestInfo::CheckUMAAndGetHardwareInfo)); 52 } 53 54 void CheckUMAAndGetHardwareInfo() { 55 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 56 if (g_browser_process->local_state()->GetBoolean( 57 prefs::kMetricsReportingEnabled)) { 58 // Access potentially slow OS calls from the FILE thread. 59 BrowserThread::PostTask(BrowserThread::FILE, FROM_HERE, 60 NewRunnableMethod(this, &OptionalRequestInfo::GetHardwareInfo)); 61 } 62 } 63 64 void GetHardwareInfo() { 65 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE)); 66 base::AutoLock lock(lock_); 67 can_report_metrics_ = true; 68#if defined(OS_WIN) 69 value_ = UTF16ToUTF8( 70 installer::WMIComputerSystem::GetModel() + L"|" + 71 AudioManager::GetAudioManager()->GetAudioInputDeviceModel()); 72#else // defined(OS_WIN) 73 value_ = UTF16ToUTF8( 74 AudioManager::GetAudioManager()->GetAudioInputDeviceModel()); 75#endif // defined(OS_WIN) 76 } 77 78 std::string value() { 79 base::AutoLock lock(lock_); 80 return value_; 81 } 82 83 bool can_report_metrics() { 84 base::AutoLock lock(lock_); 85 return can_report_metrics_; 86 } 87 88 private: 89 base::Lock lock_; 90 std::string value_; 91 bool can_report_metrics_; 92 93 DISALLOW_COPY_AND_ASSIGN(OptionalRequestInfo); 94}; 95 96class SpeechInputManagerImpl : public SpeechInputManager, 97 public SpeechInputBubbleControllerDelegate, 98 public SpeechRecognizerDelegate { 99 public: 100 // SpeechInputManager methods. 101 virtual void StartRecognition(SpeechInputManagerDelegate* delegate, 102 int caller_id, 103 int render_process_id, 104 int render_view_id, 105 const gfx::Rect& element_rect, 106 const std::string& language, 107 const std::string& grammar, 108 const std::string& origin_url); 109 virtual void CancelRecognition(int caller_id); 110 virtual void StopRecording(int caller_id); 111 virtual void CancelAllRequestsWithDelegate( 112 SpeechInputManagerDelegate* delegate); 113 114 // SpeechRecognizer::Delegate methods. 115 virtual void SetRecognitionResult(int caller_id, 116 bool error, 117 const SpeechInputResultArray& result); 118 virtual void DidCompleteRecording(int caller_id); 119 virtual void DidCompleteRecognition(int caller_id); 120 virtual void OnRecognizerError(int caller_id, 121 SpeechRecognizer::ErrorCode error); 122 virtual void DidCompleteEnvironmentEstimation(int caller_id); 123 virtual void SetInputVolume(int caller_id, float volume); 124 125 // SpeechInputBubbleController::Delegate methods. 126 virtual void InfoBubbleButtonClicked(int caller_id, 127 SpeechInputBubble::Button button); 128 virtual void InfoBubbleFocusChanged(int caller_id); 129 130 private: 131 struct SpeechInputRequest { 132 SpeechInputManagerDelegate* delegate; 133 scoped_refptr<SpeechRecognizer> recognizer; 134 bool is_active; // Set to true when recording or recognition is going on. 135 }; 136 137 // Private constructor to enforce singleton. 138 friend struct base::DefaultLazyInstanceTraits<SpeechInputManagerImpl>; 139 SpeechInputManagerImpl(); 140 virtual ~SpeechInputManagerImpl(); 141 142 bool HasPendingRequest(int caller_id) const; 143 SpeechInputManagerDelegate* GetDelegate(int caller_id) const; 144 145 void CancelRecognitionAndInformDelegate(int caller_id); 146 147 // Starts/restarts recognition for an existing request. 148 void StartRecognitionForRequest(int caller_id); 149 150 typedef std::map<int, SpeechInputRequest> SpeechRecognizerMap; 151 SpeechRecognizerMap requests_; 152 int recording_caller_id_; 153 scoped_refptr<SpeechInputBubbleController> bubble_controller_; 154 scoped_refptr<OptionalRequestInfo> optional_request_info_; 155}; 156 157base::LazyInstance<SpeechInputManagerImpl> g_speech_input_manager_impl( 158 base::LINKER_INITIALIZED); 159 160} // namespace 161 162SpeechInputManager* SpeechInputManager::Get() { 163 return g_speech_input_manager_impl.Pointer(); 164} 165 166bool SpeechInputManager::IsFeatureEnabled() { 167 bool enabled = true; 168 const CommandLine& command_line = *CommandLine::ForCurrentProcess(); 169 170 if (command_line.HasSwitch(switches::kDisableSpeechInput)) { 171 enabled = false; 172#if defined(GOOGLE_CHROME_BUILD) 173 } else if (!command_line.HasSwitch(switches::kEnableSpeechInput)) { 174 // We need to evaluate whether IO is OK here. http://crbug.com/63335. 175 base::ThreadRestrictions::ScopedAllowIO allow_io; 176 // Official Chrome builds have speech input enabled by default only in the 177 // dev channel. 178 std::string channel = platform_util::GetVersionStringModifier(); 179 enabled = (channel == "dev"); 180#endif 181 } 182 183 return enabled; 184} 185 186SpeechInputManagerImpl::SpeechInputManagerImpl() 187 : recording_caller_id_(0), 188 bubble_controller_(new SpeechInputBubbleController( 189 ALLOW_THIS_IN_INITIALIZER_LIST(this))) { 190} 191 192SpeechInputManagerImpl::~SpeechInputManagerImpl() { 193 while (requests_.begin() != requests_.end()) 194 CancelRecognition(requests_.begin()->first); 195} 196 197bool SpeechInputManagerImpl::HasPendingRequest(int caller_id) const { 198 return requests_.find(caller_id) != requests_.end(); 199} 200 201SpeechInputManagerDelegate* SpeechInputManagerImpl::GetDelegate( 202 int caller_id) const { 203 return requests_.find(caller_id)->second.delegate; 204} 205 206void SpeechInputManagerImpl::StartRecognition( 207 SpeechInputManagerDelegate* delegate, 208 int caller_id, 209 int render_process_id, 210 int render_view_id, 211 const gfx::Rect& element_rect, 212 const std::string& language, 213 const std::string& grammar, 214 const std::string& origin_url) { 215 DCHECK(!HasPendingRequest(caller_id)); 216 217 bubble_controller_->CreateBubble(caller_id, render_process_id, render_view_id, 218 element_rect); 219 220 if (!optional_request_info_.get()) { 221 optional_request_info_ = new OptionalRequestInfo(); 222 // Since hardware info is optional with speech input requests, we start an 223 // asynchronous fetch here and move on with recording audio. This first 224 // speech input request would send an empty string for hardware info and 225 // subsequent requests may have the hardware info available if the fetch 226 // completed before them. This way we don't end up stalling the user with 227 // a long wait and disk seeks when they click on a UI element and start 228 // speaking. 229 optional_request_info_->Refresh(); 230 } 231 232 SpeechInputRequest* request = &requests_[caller_id]; 233 request->delegate = delegate; 234 request->recognizer = new SpeechRecognizer( 235 this, caller_id, language, grammar, optional_request_info_->value(), 236 optional_request_info_->can_report_metrics() ? origin_url : ""); 237 request->is_active = false; 238 239 StartRecognitionForRequest(caller_id); 240} 241 242void SpeechInputManagerImpl::StartRecognitionForRequest(int caller_id) { 243 DCHECK(HasPendingRequest(caller_id)); 244 245 // If we are currently recording audio for another caller, abort that cleanly. 246 if (recording_caller_id_) 247 CancelRecognitionAndInformDelegate(recording_caller_id_); 248 249 if (!AudioManager::GetAudioManager()->HasAudioInputDevices()) { 250 bubble_controller_->SetBubbleMessage( 251 caller_id, l10n_util::GetStringUTF16(IDS_SPEECH_INPUT_NO_MIC)); 252 } else { 253 recording_caller_id_ = caller_id; 254 requests_[caller_id].is_active = true; 255 requests_[caller_id].recognizer->StartRecording(); 256 } 257} 258 259void SpeechInputManagerImpl::CancelRecognition(int caller_id) { 260 DCHECK(HasPendingRequest(caller_id)); 261 if (requests_[caller_id].is_active) 262 requests_[caller_id].recognizer->CancelRecognition(); 263 requests_.erase(caller_id); 264 if (recording_caller_id_ == caller_id) 265 recording_caller_id_ = 0; 266 bubble_controller_->CloseBubble(caller_id); 267} 268 269void SpeechInputManagerImpl::CancelAllRequestsWithDelegate( 270 SpeechInputManagerDelegate* delegate) { 271 SpeechRecognizerMap::iterator it = requests_.begin(); 272 while (it != requests_.end()) { 273 if (it->second.delegate == delegate) { 274 CancelRecognition(it->first); 275 // This map will have very few elements so it is simpler to restart. 276 it = requests_.begin(); 277 } else { 278 ++it; 279 } 280 } 281} 282 283void SpeechInputManagerImpl::StopRecording(int caller_id) { 284 DCHECK(HasPendingRequest(caller_id)); 285 requests_[caller_id].recognizer->StopRecording(); 286} 287 288void SpeechInputManagerImpl::SetRecognitionResult( 289 int caller_id, bool error, const SpeechInputResultArray& result) { 290 DCHECK(HasPendingRequest(caller_id)); 291 GetDelegate(caller_id)->SetRecognitionResult(caller_id, result); 292} 293 294void SpeechInputManagerImpl::DidCompleteRecording(int caller_id) { 295 DCHECK(recording_caller_id_ == caller_id); 296 DCHECK(HasPendingRequest(caller_id)); 297 recording_caller_id_ = 0; 298 GetDelegate(caller_id)->DidCompleteRecording(caller_id); 299 bubble_controller_->SetBubbleRecognizingMode(caller_id); 300} 301 302void SpeechInputManagerImpl::DidCompleteRecognition(int caller_id) { 303 GetDelegate(caller_id)->DidCompleteRecognition(caller_id); 304 requests_.erase(caller_id); 305 bubble_controller_->CloseBubble(caller_id); 306} 307 308void SpeechInputManagerImpl::OnRecognizerError( 309 int caller_id, SpeechRecognizer::ErrorCode error) { 310 if (caller_id == recording_caller_id_) 311 recording_caller_id_ = 0; 312 313 requests_[caller_id].is_active = false; 314 315 int message_id; 316 switch (error) { 317 case SpeechRecognizer::RECOGNIZER_ERROR_CAPTURE: 318 message_id = IDS_SPEECH_INPUT_ERROR; 319 break; 320 case SpeechRecognizer::RECOGNIZER_ERROR_NO_SPEECH: 321 message_id = IDS_SPEECH_INPUT_NO_SPEECH; 322 break; 323 case SpeechRecognizer::RECOGNIZER_ERROR_NO_RESULTS: 324 message_id = IDS_SPEECH_INPUT_NO_RESULTS; 325 break; 326 default: 327 NOTREACHED() << "unknown error " << error; 328 return; 329 } 330 bubble_controller_->SetBubbleMessage(caller_id, 331 l10n_util::GetStringUTF16(message_id)); 332} 333 334void SpeechInputManagerImpl::DidCompleteEnvironmentEstimation(int caller_id) { 335 DCHECK(HasPendingRequest(caller_id)); 336 DCHECK(recording_caller_id_ == caller_id); 337 338 // Speech recognizer has gathered enough background audio so we can ask the 339 // user to start speaking. 340 bubble_controller_->SetBubbleRecordingMode(caller_id); 341} 342 343void SpeechInputManagerImpl::SetInputVolume(int caller_id, float volume) { 344 DCHECK(HasPendingRequest(caller_id)); 345 DCHECK_EQ(recording_caller_id_, caller_id); 346 347 bubble_controller_->SetBubbleInputVolume(caller_id, volume); 348} 349 350void SpeechInputManagerImpl::CancelRecognitionAndInformDelegate(int caller_id) { 351 SpeechInputManagerDelegate* cur_delegate = GetDelegate(caller_id); 352 CancelRecognition(caller_id); 353 cur_delegate->DidCompleteRecording(caller_id); 354 cur_delegate->DidCompleteRecognition(caller_id); 355} 356 357void SpeechInputManagerImpl::InfoBubbleButtonClicked( 358 int caller_id, SpeechInputBubble::Button button) { 359 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 360 // Ignore if the caller id was not in our active recognizers list because the 361 // user might have clicked more than once, or recognition could have been 362 // cancelled due to other reasons before the user click was processed. 363 if (!HasPendingRequest(caller_id)) 364 return; 365 366 if (button == SpeechInputBubble::BUTTON_CANCEL) { 367 CancelRecognitionAndInformDelegate(caller_id); 368 } else if (button == SpeechInputBubble::BUTTON_TRY_AGAIN) { 369 StartRecognitionForRequest(caller_id); 370 } 371} 372 373void SpeechInputManagerImpl::InfoBubbleFocusChanged(int caller_id) { 374 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 375 // Ignore if the caller id was not in our active recognizers list because the 376 // user might have clicked more than once, or recognition could have been 377 // ended due to other reasons before the user click was processed. 378 if (HasPendingRequest(caller_id)) { 379 // If this is an ongoing recording or if we were displaying an error message 380 // to the user, abort it since user has switched focus. Otherwise 381 // recognition has started and keep that going so user can start speaking to 382 // another element while this gets the results in parallel. 383 if (recording_caller_id_ == caller_id || !requests_[caller_id].is_active) { 384 CancelRecognitionAndInformDelegate(caller_id); 385 } 386 } 387} 388 389} // namespace speech_input 390