speech_input_manager.cc revision 201ade2fbba22bfb27ae029f4d23fca6ded109a0
1// Copyright (c) 2010 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/speech/speech_input_manager.h"
6
7#include "app/l10n_util.h"
8#include "base/lock.h"
9#include "base/ref_counted.h"
10#include "base/singleton.h"
11#include "base/utf_string_conversions.h"
12#include "chrome/browser/browser_process.h"
13#include "chrome/browser/browser_thread.h"
14#include "chrome/browser/prefs/pref_service.h"
15#include "chrome/browser/speech/speech_input_bubble_controller.h"
16#include "chrome/browser/speech/speech_recognizer.h"
17#include "chrome/browser/tab_contents/infobar_delegate.h"
18#include "chrome/browser/tab_contents/tab_contents.h"
19#include "chrome/browser/tab_contents/tab_util.h"
20#include "chrome/common/pref_names.h"
21#include "grit/generated_resources.h"
22#include "media/audio/audio_manager.h"
23#include <map>
24
25#if defined(OS_WIN)
26#include "chrome/installer/util/wmi.h"
27#endif
28
29namespace {
30
31// Asynchronously fetches the PC and audio hardware/driver info on windows if
32// the user has opted into UMA. This information is sent with speech input
33// requests to the server for identifying and improving quality issues with
34// specific device configurations.
35class HardwareInfo : public base::RefCountedThreadSafe<HardwareInfo> {
36 public:
37  HardwareInfo() {}
38
39#if defined(OS_WIN)
40  void Refresh() {
41    DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
42    // UMA opt-in can be checked only from the UI thread, so switch to that.
43    BrowserThread::PostTask(BrowserThread::UI, FROM_HERE,
44        NewRunnableMethod(this, &HardwareInfo::CheckUMAAndGetHardwareInfo));
45  }
46
47  void CheckUMAAndGetHardwareInfo() {
48    DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
49    if (g_browser_process->local_state()->GetBoolean(
50        prefs::kMetricsReportingEnabled)) {
51      // Access potentially slow OS calls from the FILE thread.
52      BrowserThread::PostTask(BrowserThread::FILE, FROM_HERE,
53          NewRunnableMethod(this, &HardwareInfo::GetHardwareInfo));
54    }
55  }
56
57  void GetHardwareInfo() {
58    DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
59    AutoLock lock(lock_);
60    value_ = UTF16ToUTF8(
61        installer::WMIComputerSystem::GetModel() + L"|" +
62        AudioManager::GetAudioManager()->GetAudioInputDeviceModel());
63  }
64
65  std::string value() {
66    AutoLock lock(lock_);
67    return value_;
68  }
69
70 private:
71  Lock lock_;
72  std::string value_;
73
74#else // defined(OS_WIN)
75  void Refresh() {}
76  std::string value() { return std::string(); }
77#endif // defined(OS_WIN)
78
79  DISALLOW_COPY_AND_ASSIGN(HardwareInfo);
80};
81
82}
83
84namespace speech_input {
85
86class SpeechInputManagerImpl : public SpeechInputManager,
87                               public SpeechInputBubbleControllerDelegate,
88                               public SpeechRecognizerDelegate {
89 public:
90  // SpeechInputManager methods.
91  virtual void StartRecognition(SpeechInputManagerDelegate* delegate,
92                                int caller_id,
93                                int render_process_id,
94                                int render_view_id,
95                                const gfx::Rect& element_rect,
96                                const std::string& language,
97                                const std::string& grammar);
98  virtual void CancelRecognition(int caller_id);
99  virtual void StopRecording(int caller_id);
100
101  // SpeechRecognizer::Delegate methods.
102  virtual void SetRecognitionResult(int caller_id,
103                                    bool error,
104                                    const SpeechInputResultArray& result);
105  virtual void DidCompleteRecording(int caller_id);
106  virtual void DidCompleteRecognition(int caller_id);
107  virtual void OnRecognizerError(int caller_id,
108                                 SpeechRecognizer::ErrorCode error);
109  virtual void DidCompleteEnvironmentEstimation(int caller_id);
110  virtual void SetInputVolume(int caller_id, float volume);
111
112  // SpeechInputBubbleController::Delegate methods.
113  virtual void InfoBubbleButtonClicked(int caller_id,
114                                       SpeechInputBubble::Button button);
115  virtual void InfoBubbleFocusChanged(int caller_id);
116
117 private:
118  struct SpeechInputRequest {
119    SpeechInputManagerDelegate* delegate;
120    scoped_refptr<SpeechRecognizer> recognizer;
121    bool is_active;  // Set to true when recording or recognition is going on.
122  };
123
124  // Private constructor to enforce singleton.
125  friend struct DefaultSingletonTraits<SpeechInputManagerImpl>;
126  SpeechInputManagerImpl();
127  virtual ~SpeechInputManagerImpl();
128
129  bool HasPendingRequest(int caller_id) const;
130  SpeechInputManagerDelegate* GetDelegate(int caller_id) const;
131
132  void CancelRecognitionAndInformDelegate(int caller_id);
133
134  // Starts/restarts recognition for an existing request.
135  void StartRecognitionForRequest(int caller_id);
136
137  SpeechInputManagerDelegate* delegate_;
138  typedef std::map<int, SpeechInputRequest> SpeechRecognizerMap;
139  SpeechRecognizerMap requests_;
140  int recording_caller_id_;
141  scoped_refptr<SpeechInputBubbleController> bubble_controller_;
142  scoped_refptr<HardwareInfo> hardware_info_;
143};
144
145SpeechInputManager* SpeechInputManager::Get() {
146  return Singleton<SpeechInputManagerImpl>::get();
147}
148
149SpeechInputManagerImpl::SpeechInputManagerImpl()
150    : recording_caller_id_(0),
151      bubble_controller_(new SpeechInputBubbleController(
152          ALLOW_THIS_IN_INITIALIZER_LIST(this))) {
153}
154
155SpeechInputManagerImpl::~SpeechInputManagerImpl() {
156  while (requests_.begin() != requests_.end())
157    CancelRecognition(requests_.begin()->first);
158}
159
160bool SpeechInputManagerImpl::HasPendingRequest(int caller_id) const {
161  return requests_.find(caller_id) != requests_.end();
162}
163
164SpeechInputManagerDelegate* SpeechInputManagerImpl::GetDelegate(
165    int caller_id) const {
166  return requests_.find(caller_id)->second.delegate;
167}
168
169void SpeechInputManagerImpl::StartRecognition(
170    SpeechInputManagerDelegate* delegate,
171    int caller_id,
172    int render_process_id,
173    int render_view_id,
174    const gfx::Rect& element_rect,
175    const std::string& language,
176    const std::string& grammar) {
177  DCHECK(!HasPendingRequest(caller_id));
178
179  bubble_controller_->CreateBubble(caller_id, render_process_id, render_view_id,
180                                   element_rect);
181
182  if (!hardware_info_.get()) {
183    hardware_info_ = new HardwareInfo();
184    // Since hardware info is optional with speech input requests, we start an
185    // asynchronous fetch here and move on with recording audio. This first
186    // speech input request would send an empty string for hardware info and
187    // subsequent requests may have the hardware info available if the fetch
188    // completed before them. This way we don't end up stalling the user with
189    // a long wait and disk seeks when they click on a UI element and start
190    // speaking.
191    hardware_info_->Refresh();
192  }
193
194  SpeechInputRequest* request = &requests_[caller_id];
195  request->delegate = delegate;
196  request->recognizer = new SpeechRecognizer(this, caller_id, language,
197                                             grammar, hardware_info_->value());
198  request->is_active = false;
199
200  StartRecognitionForRequest(caller_id);
201}
202
203void SpeechInputManagerImpl::StartRecognitionForRequest(int caller_id) {
204  DCHECK(HasPendingRequest(caller_id));
205
206  // If we are currently recording audio for another caller, abort that cleanly.
207  if (recording_caller_id_)
208    CancelRecognitionAndInformDelegate(recording_caller_id_);
209
210  if (!AudioManager::GetAudioManager()->HasAudioInputDevices()) {
211    bubble_controller_->SetBubbleMessage(
212        caller_id, l10n_util::GetStringUTF16(IDS_SPEECH_INPUT_NO_MIC));
213  } else {
214    recording_caller_id_ = caller_id;
215    requests_[caller_id].is_active = true;
216    requests_[caller_id].recognizer->StartRecording();
217  }
218}
219
220void SpeechInputManagerImpl::CancelRecognition(int caller_id) {
221  DCHECK(HasPendingRequest(caller_id));
222  if (requests_[caller_id].is_active)
223    requests_[caller_id].recognizer->CancelRecognition();
224  requests_.erase(caller_id);
225  if (recording_caller_id_ == caller_id)
226    recording_caller_id_ = 0;
227  bubble_controller_->CloseBubble(caller_id);
228}
229
230void SpeechInputManagerImpl::StopRecording(int caller_id) {
231  DCHECK(HasPendingRequest(caller_id));
232  requests_[caller_id].recognizer->StopRecording();
233}
234
235void SpeechInputManagerImpl::SetRecognitionResult(
236    int caller_id, bool error, const SpeechInputResultArray& result) {
237  DCHECK(HasPendingRequest(caller_id));
238  GetDelegate(caller_id)->SetRecognitionResult(caller_id, result);
239}
240
241void SpeechInputManagerImpl::DidCompleteRecording(int caller_id) {
242  DCHECK(recording_caller_id_ == caller_id);
243  DCHECK(HasPendingRequest(caller_id));
244  recording_caller_id_ = 0;
245  GetDelegate(caller_id)->DidCompleteRecording(caller_id);
246  bubble_controller_->SetBubbleRecognizingMode(caller_id);
247}
248
249void SpeechInputManagerImpl::DidCompleteRecognition(int caller_id) {
250  GetDelegate(caller_id)->DidCompleteRecognition(caller_id);
251  requests_.erase(caller_id);
252  bubble_controller_->CloseBubble(caller_id);
253}
254
255void SpeechInputManagerImpl::OnRecognizerError(
256    int caller_id, SpeechRecognizer::ErrorCode error) {
257  if (caller_id == recording_caller_id_)
258    recording_caller_id_ = 0;
259
260  requests_[caller_id].is_active = false;
261
262  int message_id;
263  switch (error) {
264    case SpeechRecognizer::RECOGNIZER_ERROR_CAPTURE:
265      message_id = IDS_SPEECH_INPUT_ERROR;
266      break;
267    case SpeechRecognizer::RECOGNIZER_ERROR_NO_SPEECH:
268      message_id = IDS_SPEECH_INPUT_NO_SPEECH;
269      break;
270    case SpeechRecognizer::RECOGNIZER_ERROR_NO_RESULTS:
271      message_id = IDS_SPEECH_INPUT_NO_RESULTS;
272      break;
273    default:
274      NOTREACHED() << "unknown error " << error;
275      return;
276  }
277  bubble_controller_->SetBubbleMessage(caller_id,
278                                       l10n_util::GetStringUTF16(message_id));
279}
280
281void SpeechInputManagerImpl::DidCompleteEnvironmentEstimation(int caller_id) {
282  DCHECK(HasPendingRequest(caller_id));
283  DCHECK(recording_caller_id_ == caller_id);
284
285  // Speech recognizer has gathered enough background audio so we can ask the
286  // user to start speaking.
287  bubble_controller_->SetBubbleRecordingMode(caller_id);
288}
289
290void SpeechInputManagerImpl::SetInputVolume(int caller_id, float volume) {
291  DCHECK(HasPendingRequest(caller_id));
292  DCHECK_EQ(recording_caller_id_, caller_id);
293
294  bubble_controller_->SetBubbleInputVolume(caller_id, volume);
295}
296
297void SpeechInputManagerImpl::CancelRecognitionAndInformDelegate(int caller_id) {
298  SpeechInputManagerDelegate* cur_delegate = GetDelegate(caller_id);
299  CancelRecognition(caller_id);
300  cur_delegate->DidCompleteRecording(caller_id);
301  cur_delegate->DidCompleteRecognition(caller_id);
302}
303
304void SpeechInputManagerImpl::InfoBubbleButtonClicked(
305    int caller_id, SpeechInputBubble::Button button) {
306  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
307  // Ignore if the caller id was not in our active recognizers list because the
308  // user might have clicked more than once, or recognition could have been
309  // cancelled due to other reasons before the user click was processed.
310  if (!HasPendingRequest(caller_id))
311    return;
312
313  if (button == SpeechInputBubble::BUTTON_CANCEL) {
314    CancelRecognitionAndInformDelegate(caller_id);
315  } else if (button == SpeechInputBubble::BUTTON_TRY_AGAIN) {
316    StartRecognitionForRequest(caller_id);
317  }
318}
319
320void SpeechInputManagerImpl::InfoBubbleFocusChanged(int caller_id) {
321  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
322  // Ignore if the caller id was not in our active recognizers list because the
323  // user might have clicked more than once, or recognition could have been
324  // ended due to other reasons before the user click was processed.
325  if (HasPendingRequest(caller_id)) {
326    // If this is an ongoing recording or if we were displaying an error message
327    // to the user, abort it since user has switched focus. Otherwise
328    // recognition has started and keep that going so user can start speaking to
329    // another element while this gets the results in parallel.
330    if (recording_caller_id_ == caller_id || !requests_[caller_id].is_active) {
331      CancelRecognitionAndInformDelegate(caller_id);
332    }
333  }
334}
335
336}  // namespace speech_input
337