speech_input_manager.cc revision 201ade2fbba22bfb27ae029f4d23fca6ded109a0
1// Copyright (c) 2010 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#include "chrome/browser/speech/speech_input_manager.h" 6 7#include "app/l10n_util.h" 8#include "base/lock.h" 9#include "base/ref_counted.h" 10#include "base/singleton.h" 11#include "base/utf_string_conversions.h" 12#include "chrome/browser/browser_process.h" 13#include "chrome/browser/browser_thread.h" 14#include "chrome/browser/prefs/pref_service.h" 15#include "chrome/browser/speech/speech_input_bubble_controller.h" 16#include "chrome/browser/speech/speech_recognizer.h" 17#include "chrome/browser/tab_contents/infobar_delegate.h" 18#include "chrome/browser/tab_contents/tab_contents.h" 19#include "chrome/browser/tab_contents/tab_util.h" 20#include "chrome/common/pref_names.h" 21#include "grit/generated_resources.h" 22#include "media/audio/audio_manager.h" 23#include <map> 24 25#if defined(OS_WIN) 26#include "chrome/installer/util/wmi.h" 27#endif 28 29namespace { 30 31// Asynchronously fetches the PC and audio hardware/driver info on windows if 32// the user has opted into UMA. This information is sent with speech input 33// requests to the server for identifying and improving quality issues with 34// specific device configurations. 35class HardwareInfo : public base::RefCountedThreadSafe<HardwareInfo> { 36 public: 37 HardwareInfo() {} 38 39#if defined(OS_WIN) 40 void Refresh() { 41 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 42 // UMA opt-in can be checked only from the UI thread, so switch to that. 43 BrowserThread::PostTask(BrowserThread::UI, FROM_HERE, 44 NewRunnableMethod(this, &HardwareInfo::CheckUMAAndGetHardwareInfo)); 45 } 46 47 void CheckUMAAndGetHardwareInfo() { 48 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI)); 49 if (g_browser_process->local_state()->GetBoolean( 50 prefs::kMetricsReportingEnabled)) { 51 // Access potentially slow OS calls from the FILE thread. 52 BrowserThread::PostTask(BrowserThread::FILE, FROM_HERE, 53 NewRunnableMethod(this, &HardwareInfo::GetHardwareInfo)); 54 } 55 } 56 57 void GetHardwareInfo() { 58 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE)); 59 AutoLock lock(lock_); 60 value_ = UTF16ToUTF8( 61 installer::WMIComputerSystem::GetModel() + L"|" + 62 AudioManager::GetAudioManager()->GetAudioInputDeviceModel()); 63 } 64 65 std::string value() { 66 AutoLock lock(lock_); 67 return value_; 68 } 69 70 private: 71 Lock lock_; 72 std::string value_; 73 74#else // defined(OS_WIN) 75 void Refresh() {} 76 std::string value() { return std::string(); } 77#endif // defined(OS_WIN) 78 79 DISALLOW_COPY_AND_ASSIGN(HardwareInfo); 80}; 81 82} 83 84namespace speech_input { 85 86class SpeechInputManagerImpl : public SpeechInputManager, 87 public SpeechInputBubbleControllerDelegate, 88 public SpeechRecognizerDelegate { 89 public: 90 // SpeechInputManager methods. 91 virtual void StartRecognition(SpeechInputManagerDelegate* delegate, 92 int caller_id, 93 int render_process_id, 94 int render_view_id, 95 const gfx::Rect& element_rect, 96 const std::string& language, 97 const std::string& grammar); 98 virtual void CancelRecognition(int caller_id); 99 virtual void StopRecording(int caller_id); 100 101 // SpeechRecognizer::Delegate methods. 102 virtual void SetRecognitionResult(int caller_id, 103 bool error, 104 const SpeechInputResultArray& result); 105 virtual void DidCompleteRecording(int caller_id); 106 virtual void DidCompleteRecognition(int caller_id); 107 virtual void OnRecognizerError(int caller_id, 108 SpeechRecognizer::ErrorCode error); 109 virtual void DidCompleteEnvironmentEstimation(int caller_id); 110 virtual void SetInputVolume(int caller_id, float volume); 111 112 // SpeechInputBubbleController::Delegate methods. 113 virtual void InfoBubbleButtonClicked(int caller_id, 114 SpeechInputBubble::Button button); 115 virtual void InfoBubbleFocusChanged(int caller_id); 116 117 private: 118 struct SpeechInputRequest { 119 SpeechInputManagerDelegate* delegate; 120 scoped_refptr<SpeechRecognizer> recognizer; 121 bool is_active; // Set to true when recording or recognition is going on. 122 }; 123 124 // Private constructor to enforce singleton. 125 friend struct DefaultSingletonTraits<SpeechInputManagerImpl>; 126 SpeechInputManagerImpl(); 127 virtual ~SpeechInputManagerImpl(); 128 129 bool HasPendingRequest(int caller_id) const; 130 SpeechInputManagerDelegate* GetDelegate(int caller_id) const; 131 132 void CancelRecognitionAndInformDelegate(int caller_id); 133 134 // Starts/restarts recognition for an existing request. 135 void StartRecognitionForRequest(int caller_id); 136 137 SpeechInputManagerDelegate* delegate_; 138 typedef std::map<int, SpeechInputRequest> SpeechRecognizerMap; 139 SpeechRecognizerMap requests_; 140 int recording_caller_id_; 141 scoped_refptr<SpeechInputBubbleController> bubble_controller_; 142 scoped_refptr<HardwareInfo> hardware_info_; 143}; 144 145SpeechInputManager* SpeechInputManager::Get() { 146 return Singleton<SpeechInputManagerImpl>::get(); 147} 148 149SpeechInputManagerImpl::SpeechInputManagerImpl() 150 : recording_caller_id_(0), 151 bubble_controller_(new SpeechInputBubbleController( 152 ALLOW_THIS_IN_INITIALIZER_LIST(this))) { 153} 154 155SpeechInputManagerImpl::~SpeechInputManagerImpl() { 156 while (requests_.begin() != requests_.end()) 157 CancelRecognition(requests_.begin()->first); 158} 159 160bool SpeechInputManagerImpl::HasPendingRequest(int caller_id) const { 161 return requests_.find(caller_id) != requests_.end(); 162} 163 164SpeechInputManagerDelegate* SpeechInputManagerImpl::GetDelegate( 165 int caller_id) const { 166 return requests_.find(caller_id)->second.delegate; 167} 168 169void SpeechInputManagerImpl::StartRecognition( 170 SpeechInputManagerDelegate* delegate, 171 int caller_id, 172 int render_process_id, 173 int render_view_id, 174 const gfx::Rect& element_rect, 175 const std::string& language, 176 const std::string& grammar) { 177 DCHECK(!HasPendingRequest(caller_id)); 178 179 bubble_controller_->CreateBubble(caller_id, render_process_id, render_view_id, 180 element_rect); 181 182 if (!hardware_info_.get()) { 183 hardware_info_ = new HardwareInfo(); 184 // Since hardware info is optional with speech input requests, we start an 185 // asynchronous fetch here and move on with recording audio. This first 186 // speech input request would send an empty string for hardware info and 187 // subsequent requests may have the hardware info available if the fetch 188 // completed before them. This way we don't end up stalling the user with 189 // a long wait and disk seeks when they click on a UI element and start 190 // speaking. 191 hardware_info_->Refresh(); 192 } 193 194 SpeechInputRequest* request = &requests_[caller_id]; 195 request->delegate = delegate; 196 request->recognizer = new SpeechRecognizer(this, caller_id, language, 197 grammar, hardware_info_->value()); 198 request->is_active = false; 199 200 StartRecognitionForRequest(caller_id); 201} 202 203void SpeechInputManagerImpl::StartRecognitionForRequest(int caller_id) { 204 DCHECK(HasPendingRequest(caller_id)); 205 206 // If we are currently recording audio for another caller, abort that cleanly. 207 if (recording_caller_id_) 208 CancelRecognitionAndInformDelegate(recording_caller_id_); 209 210 if (!AudioManager::GetAudioManager()->HasAudioInputDevices()) { 211 bubble_controller_->SetBubbleMessage( 212 caller_id, l10n_util::GetStringUTF16(IDS_SPEECH_INPUT_NO_MIC)); 213 } else { 214 recording_caller_id_ = caller_id; 215 requests_[caller_id].is_active = true; 216 requests_[caller_id].recognizer->StartRecording(); 217 } 218} 219 220void SpeechInputManagerImpl::CancelRecognition(int caller_id) { 221 DCHECK(HasPendingRequest(caller_id)); 222 if (requests_[caller_id].is_active) 223 requests_[caller_id].recognizer->CancelRecognition(); 224 requests_.erase(caller_id); 225 if (recording_caller_id_ == caller_id) 226 recording_caller_id_ = 0; 227 bubble_controller_->CloseBubble(caller_id); 228} 229 230void SpeechInputManagerImpl::StopRecording(int caller_id) { 231 DCHECK(HasPendingRequest(caller_id)); 232 requests_[caller_id].recognizer->StopRecording(); 233} 234 235void SpeechInputManagerImpl::SetRecognitionResult( 236 int caller_id, bool error, const SpeechInputResultArray& result) { 237 DCHECK(HasPendingRequest(caller_id)); 238 GetDelegate(caller_id)->SetRecognitionResult(caller_id, result); 239} 240 241void SpeechInputManagerImpl::DidCompleteRecording(int caller_id) { 242 DCHECK(recording_caller_id_ == caller_id); 243 DCHECK(HasPendingRequest(caller_id)); 244 recording_caller_id_ = 0; 245 GetDelegate(caller_id)->DidCompleteRecording(caller_id); 246 bubble_controller_->SetBubbleRecognizingMode(caller_id); 247} 248 249void SpeechInputManagerImpl::DidCompleteRecognition(int caller_id) { 250 GetDelegate(caller_id)->DidCompleteRecognition(caller_id); 251 requests_.erase(caller_id); 252 bubble_controller_->CloseBubble(caller_id); 253} 254 255void SpeechInputManagerImpl::OnRecognizerError( 256 int caller_id, SpeechRecognizer::ErrorCode error) { 257 if (caller_id == recording_caller_id_) 258 recording_caller_id_ = 0; 259 260 requests_[caller_id].is_active = false; 261 262 int message_id; 263 switch (error) { 264 case SpeechRecognizer::RECOGNIZER_ERROR_CAPTURE: 265 message_id = IDS_SPEECH_INPUT_ERROR; 266 break; 267 case SpeechRecognizer::RECOGNIZER_ERROR_NO_SPEECH: 268 message_id = IDS_SPEECH_INPUT_NO_SPEECH; 269 break; 270 case SpeechRecognizer::RECOGNIZER_ERROR_NO_RESULTS: 271 message_id = IDS_SPEECH_INPUT_NO_RESULTS; 272 break; 273 default: 274 NOTREACHED() << "unknown error " << error; 275 return; 276 } 277 bubble_controller_->SetBubbleMessage(caller_id, 278 l10n_util::GetStringUTF16(message_id)); 279} 280 281void SpeechInputManagerImpl::DidCompleteEnvironmentEstimation(int caller_id) { 282 DCHECK(HasPendingRequest(caller_id)); 283 DCHECK(recording_caller_id_ == caller_id); 284 285 // Speech recognizer has gathered enough background audio so we can ask the 286 // user to start speaking. 287 bubble_controller_->SetBubbleRecordingMode(caller_id); 288} 289 290void SpeechInputManagerImpl::SetInputVolume(int caller_id, float volume) { 291 DCHECK(HasPendingRequest(caller_id)); 292 DCHECK_EQ(recording_caller_id_, caller_id); 293 294 bubble_controller_->SetBubbleInputVolume(caller_id, volume); 295} 296 297void SpeechInputManagerImpl::CancelRecognitionAndInformDelegate(int caller_id) { 298 SpeechInputManagerDelegate* cur_delegate = GetDelegate(caller_id); 299 CancelRecognition(caller_id); 300 cur_delegate->DidCompleteRecording(caller_id); 301 cur_delegate->DidCompleteRecognition(caller_id); 302} 303 304void SpeechInputManagerImpl::InfoBubbleButtonClicked( 305 int caller_id, SpeechInputBubble::Button button) { 306 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 307 // Ignore if the caller id was not in our active recognizers list because the 308 // user might have clicked more than once, or recognition could have been 309 // cancelled due to other reasons before the user click was processed. 310 if (!HasPendingRequest(caller_id)) 311 return; 312 313 if (button == SpeechInputBubble::BUTTON_CANCEL) { 314 CancelRecognitionAndInformDelegate(caller_id); 315 } else if (button == SpeechInputBubble::BUTTON_TRY_AGAIN) { 316 StartRecognitionForRequest(caller_id); 317 } 318} 319 320void SpeechInputManagerImpl::InfoBubbleFocusChanged(int caller_id) { 321 DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO)); 322 // Ignore if the caller id was not in our active recognizers list because the 323 // user might have clicked more than once, or recognition could have been 324 // ended due to other reasons before the user click was processed. 325 if (HasPendingRequest(caller_id)) { 326 // If this is an ongoing recording or if we were displaying an error message 327 // to the user, abort it since user has switched focus. Otherwise 328 // recognition has started and keep that going so user can start speaking to 329 // another element while this gets the results in parallel. 330 if (recording_caller_id_ == caller_id || !requests_[caller_id].is_active) { 331 CancelRecognitionAndInformDelegate(caller_id); 332 } 333 } 334} 335 336} // namespace speech_input 337