speech_input_manager.cc revision 72a454cd3513ac24fbdd0e0cb9ad70b86a99b801
1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/speech/speech_input_manager.h"
6
7#include <map>
8#include <string>
9
10#include "base/command_line.h"
11#include "base/lazy_instance.h"
12#include "base/ref_counted.h"
13#include "base/synchronization/lock.h"
14#include "base/threading/thread_restrictions.h"
15#include "base/utf_string_conversions.h"
16#include "chrome/browser/browser_process.h"
17#include "chrome/browser/browser_thread.h"
18#include "chrome/browser/platform_util.h"
19#include "chrome/browser/prefs/pref_service.h"
20#include "chrome/browser/speech/speech_input_bubble_controller.h"
21#include "chrome/browser/speech/speech_recognizer.h"
22#include "chrome/browser/tab_contents/tab_util.h"
23#include "chrome/common/chrome_switches.h"
24#include "chrome/common/pref_names.h"
25#include "grit/generated_resources.h"
26#include "media/audio/audio_manager.h"
27#include "ui/base/l10n/l10n_util.h"
28
29#if defined(OS_WIN)
30#include "chrome/installer/util/wmi.h"
31#endif
32
33namespace speech_input {
34
35namespace {
36
37// Asynchronously fetches the PC and audio hardware/driver info if
38// the user has opted into UMA. This information is sent with speech input
39// requests to the server for identifying and improving quality issues with
40// specific device configurations.
41class OptionalRequestInfo
42    : public base::RefCountedThreadSafe<OptionalRequestInfo> {
43 public:
44  OptionalRequestInfo() : can_report_metrics_(false) {}
45
46  void Refresh() {
47    DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
48    // UMA opt-in can be checked only from the UI thread, so switch to that.
49    BrowserThread::PostTask(BrowserThread::UI, FROM_HERE,
50        NewRunnableMethod(this,
51                          &OptionalRequestInfo::CheckUMAAndGetHardwareInfo));
52  }
53
54  void CheckUMAAndGetHardwareInfo() {
55    DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
56    if (g_browser_process->local_state()->GetBoolean(
57        prefs::kMetricsReportingEnabled)) {
58      // Access potentially slow OS calls from the FILE thread.
59      BrowserThread::PostTask(BrowserThread::FILE, FROM_HERE,
60          NewRunnableMethod(this, &OptionalRequestInfo::GetHardwareInfo));
61    }
62  }
63
64  void GetHardwareInfo() {
65    DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
66    base::AutoLock lock(lock_);
67    can_report_metrics_ = true;
68#if defined(OS_WIN)
69    value_ = UTF16ToUTF8(
70        installer::WMIComputerSystem::GetModel() + L"|" +
71        AudioManager::GetAudioManager()->GetAudioInputDeviceModel());
72#else  // defined(OS_WIN)
73    value_ = UTF16ToUTF8(
74        AudioManager::GetAudioManager()->GetAudioInputDeviceModel());
75#endif  // defined(OS_WIN)
76  }
77
78  std::string value() {
79    base::AutoLock lock(lock_);
80    return value_;
81  }
82
83  bool can_report_metrics() {
84    base::AutoLock lock(lock_);
85    return can_report_metrics_;
86  }
87
88 private:
89  base::Lock lock_;
90  std::string value_;
91  bool can_report_metrics_;
92
93  DISALLOW_COPY_AND_ASSIGN(OptionalRequestInfo);
94};
95
96class SpeechInputManagerImpl : public SpeechInputManager,
97                               public SpeechInputBubbleControllerDelegate,
98                               public SpeechRecognizerDelegate {
99 public:
100  // SpeechInputManager methods.
101  virtual void StartRecognition(SpeechInputManagerDelegate* delegate,
102                                int caller_id,
103                                int render_process_id,
104                                int render_view_id,
105                                const gfx::Rect& element_rect,
106                                const std::string& language,
107                                const std::string& grammar,
108                                const std::string& origin_url);
109  virtual void CancelRecognition(int caller_id);
110  virtual void StopRecording(int caller_id);
111  virtual void CancelAllRequestsWithDelegate(
112      SpeechInputManagerDelegate* delegate);
113
114  // SpeechRecognizer::Delegate methods.
115  virtual void SetRecognitionResult(int caller_id,
116                                    bool error,
117                                    const SpeechInputResultArray& result);
118  virtual void DidCompleteRecording(int caller_id);
119  virtual void DidCompleteRecognition(int caller_id);
120  virtual void OnRecognizerError(int caller_id,
121                                 SpeechRecognizer::ErrorCode error);
122  virtual void DidCompleteEnvironmentEstimation(int caller_id);
123  virtual void SetInputVolume(int caller_id, float volume);
124
125  // SpeechInputBubbleController::Delegate methods.
126  virtual void InfoBubbleButtonClicked(int caller_id,
127                                       SpeechInputBubble::Button button);
128  virtual void InfoBubbleFocusChanged(int caller_id);
129
130 private:
131  struct SpeechInputRequest {
132    SpeechInputManagerDelegate* delegate;
133    scoped_refptr<SpeechRecognizer> recognizer;
134    bool is_active;  // Set to true when recording or recognition is going on.
135  };
136
137  // Private constructor to enforce singleton.
138  friend struct base::DefaultLazyInstanceTraits<SpeechInputManagerImpl>;
139  SpeechInputManagerImpl();
140  virtual ~SpeechInputManagerImpl();
141
142  bool HasPendingRequest(int caller_id) const;
143  SpeechInputManagerDelegate* GetDelegate(int caller_id) const;
144
145  void CancelRecognitionAndInformDelegate(int caller_id);
146
147  // Starts/restarts recognition for an existing request.
148  void StartRecognitionForRequest(int caller_id);
149
150  typedef std::map<int, SpeechInputRequest> SpeechRecognizerMap;
151  SpeechRecognizerMap requests_;
152  int recording_caller_id_;
153  scoped_refptr<SpeechInputBubbleController> bubble_controller_;
154  scoped_refptr<OptionalRequestInfo> optional_request_info_;
155};
156
157base::LazyInstance<SpeechInputManagerImpl> g_speech_input_manager_impl(
158    base::LINKER_INITIALIZED);
159
160}  // namespace
161
162SpeechInputManager* SpeechInputManager::Get() {
163  return g_speech_input_manager_impl.Pointer();
164}
165
166bool SpeechInputManager::IsFeatureEnabled() {
167  bool enabled = true;
168  const CommandLine& command_line = *CommandLine::ForCurrentProcess();
169
170  if (command_line.HasSwitch(switches::kDisableSpeechInput)) {
171    enabled = false;
172#if defined(GOOGLE_CHROME_BUILD)
173  } else if (!command_line.HasSwitch(switches::kEnableSpeechInput)) {
174    // We need to evaluate whether IO is OK here. http://crbug.com/63335.
175    base::ThreadRestrictions::ScopedAllowIO allow_io;
176    // Official Chrome builds have speech input enabled by default only in the
177    // dev channel.
178    std::string channel = platform_util::GetVersionStringModifier();
179    enabled = (channel == "dev");
180#endif
181  }
182
183  return enabled;
184}
185
186SpeechInputManagerImpl::SpeechInputManagerImpl()
187    : recording_caller_id_(0),
188      bubble_controller_(new SpeechInputBubbleController(
189          ALLOW_THIS_IN_INITIALIZER_LIST(this))) {
190}
191
192SpeechInputManagerImpl::~SpeechInputManagerImpl() {
193  while (requests_.begin() != requests_.end())
194    CancelRecognition(requests_.begin()->first);
195}
196
197bool SpeechInputManagerImpl::HasPendingRequest(int caller_id) const {
198  return requests_.find(caller_id) != requests_.end();
199}
200
201SpeechInputManagerDelegate* SpeechInputManagerImpl::GetDelegate(
202    int caller_id) const {
203  return requests_.find(caller_id)->second.delegate;
204}
205
206void SpeechInputManagerImpl::StartRecognition(
207    SpeechInputManagerDelegate* delegate,
208    int caller_id,
209    int render_process_id,
210    int render_view_id,
211    const gfx::Rect& element_rect,
212    const std::string& language,
213    const std::string& grammar,
214    const std::string& origin_url) {
215  DCHECK(!HasPendingRequest(caller_id));
216
217  bubble_controller_->CreateBubble(caller_id, render_process_id, render_view_id,
218                                   element_rect);
219
220  if (!optional_request_info_.get()) {
221    optional_request_info_ = new OptionalRequestInfo();
222    // Since hardware info is optional with speech input requests, we start an
223    // asynchronous fetch here and move on with recording audio. This first
224    // speech input request would send an empty string for hardware info and
225    // subsequent requests may have the hardware info available if the fetch
226    // completed before them. This way we don't end up stalling the user with
227    // a long wait and disk seeks when they click on a UI element and start
228    // speaking.
229    optional_request_info_->Refresh();
230  }
231
232  SpeechInputRequest* request = &requests_[caller_id];
233  request->delegate = delegate;
234  request->recognizer = new SpeechRecognizer(
235      this, caller_id, language, grammar, optional_request_info_->value(),
236      optional_request_info_->can_report_metrics() ? origin_url : "");
237  request->is_active = false;
238
239  StartRecognitionForRequest(caller_id);
240}
241
242void SpeechInputManagerImpl::StartRecognitionForRequest(int caller_id) {
243  DCHECK(HasPendingRequest(caller_id));
244
245  // If we are currently recording audio for another caller, abort that cleanly.
246  if (recording_caller_id_)
247    CancelRecognitionAndInformDelegate(recording_caller_id_);
248
249  if (!AudioManager::GetAudioManager()->HasAudioInputDevices()) {
250    bubble_controller_->SetBubbleMessage(
251        caller_id, l10n_util::GetStringUTF16(IDS_SPEECH_INPUT_NO_MIC));
252  } else {
253    recording_caller_id_ = caller_id;
254    requests_[caller_id].is_active = true;
255    requests_[caller_id].recognizer->StartRecording();
256  }
257}
258
259void SpeechInputManagerImpl::CancelRecognition(int caller_id) {
260  DCHECK(HasPendingRequest(caller_id));
261  if (requests_[caller_id].is_active)
262    requests_[caller_id].recognizer->CancelRecognition();
263  requests_.erase(caller_id);
264  if (recording_caller_id_ == caller_id)
265    recording_caller_id_ = 0;
266  bubble_controller_->CloseBubble(caller_id);
267}
268
269void SpeechInputManagerImpl::CancelAllRequestsWithDelegate(
270    SpeechInputManagerDelegate* delegate) {
271  SpeechRecognizerMap::iterator it = requests_.begin();
272  while (it != requests_.end()) {
273    if (it->second.delegate == delegate) {
274      CancelRecognition(it->first);
275      // This map will have very few elements so it is simpler to restart.
276      it = requests_.begin();
277    } else {
278      ++it;
279    }
280  }
281}
282
283void SpeechInputManagerImpl::StopRecording(int caller_id) {
284  DCHECK(HasPendingRequest(caller_id));
285  requests_[caller_id].recognizer->StopRecording();
286}
287
288void SpeechInputManagerImpl::SetRecognitionResult(
289    int caller_id, bool error, const SpeechInputResultArray& result) {
290  DCHECK(HasPendingRequest(caller_id));
291  GetDelegate(caller_id)->SetRecognitionResult(caller_id, result);
292}
293
294void SpeechInputManagerImpl::DidCompleteRecording(int caller_id) {
295  DCHECK(recording_caller_id_ == caller_id);
296  DCHECK(HasPendingRequest(caller_id));
297  recording_caller_id_ = 0;
298  GetDelegate(caller_id)->DidCompleteRecording(caller_id);
299  bubble_controller_->SetBubbleRecognizingMode(caller_id);
300}
301
302void SpeechInputManagerImpl::DidCompleteRecognition(int caller_id) {
303  GetDelegate(caller_id)->DidCompleteRecognition(caller_id);
304  requests_.erase(caller_id);
305  bubble_controller_->CloseBubble(caller_id);
306}
307
308void SpeechInputManagerImpl::OnRecognizerError(
309    int caller_id, SpeechRecognizer::ErrorCode error) {
310  if (caller_id == recording_caller_id_)
311    recording_caller_id_ = 0;
312
313  requests_[caller_id].is_active = false;
314
315  int message_id;
316  switch (error) {
317    case SpeechRecognizer::RECOGNIZER_ERROR_CAPTURE:
318      message_id = IDS_SPEECH_INPUT_ERROR;
319      break;
320    case SpeechRecognizer::RECOGNIZER_ERROR_NO_SPEECH:
321      message_id = IDS_SPEECH_INPUT_NO_SPEECH;
322      break;
323    case SpeechRecognizer::RECOGNIZER_ERROR_NO_RESULTS:
324      message_id = IDS_SPEECH_INPUT_NO_RESULTS;
325      break;
326    default:
327      NOTREACHED() << "unknown error " << error;
328      return;
329  }
330  bubble_controller_->SetBubbleMessage(caller_id,
331                                       l10n_util::GetStringUTF16(message_id));
332}
333
334void SpeechInputManagerImpl::DidCompleteEnvironmentEstimation(int caller_id) {
335  DCHECK(HasPendingRequest(caller_id));
336  DCHECK(recording_caller_id_ == caller_id);
337
338  // Speech recognizer has gathered enough background audio so we can ask the
339  // user to start speaking.
340  bubble_controller_->SetBubbleRecordingMode(caller_id);
341}
342
343void SpeechInputManagerImpl::SetInputVolume(int caller_id, float volume) {
344  DCHECK(HasPendingRequest(caller_id));
345  DCHECK_EQ(recording_caller_id_, caller_id);
346
347  bubble_controller_->SetBubbleInputVolume(caller_id, volume);
348}
349
350void SpeechInputManagerImpl::CancelRecognitionAndInformDelegate(int caller_id) {
351  SpeechInputManagerDelegate* cur_delegate = GetDelegate(caller_id);
352  CancelRecognition(caller_id);
353  cur_delegate->DidCompleteRecording(caller_id);
354  cur_delegate->DidCompleteRecognition(caller_id);
355}
356
357void SpeechInputManagerImpl::InfoBubbleButtonClicked(
358    int caller_id, SpeechInputBubble::Button button) {
359  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
360  // Ignore if the caller id was not in our active recognizers list because the
361  // user might have clicked more than once, or recognition could have been
362  // cancelled due to other reasons before the user click was processed.
363  if (!HasPendingRequest(caller_id))
364    return;
365
366  if (button == SpeechInputBubble::BUTTON_CANCEL) {
367    CancelRecognitionAndInformDelegate(caller_id);
368  } else if (button == SpeechInputBubble::BUTTON_TRY_AGAIN) {
369    StartRecognitionForRequest(caller_id);
370  }
371}
372
373void SpeechInputManagerImpl::InfoBubbleFocusChanged(int caller_id) {
374  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
375  // Ignore if the caller id was not in our active recognizers list because the
376  // user might have clicked more than once, or recognition could have been
377  // ended due to other reasons before the user click was processed.
378  if (HasPendingRequest(caller_id)) {
379    // If this is an ongoing recording or if we were displaying an error message
380    // to the user, abort it since user has switched focus. Otherwise
381    // recognition has started and keep that going so user can start speaking to
382    // another element while this gets the results in parallel.
383    if (recording_caller_id_ == caller_id || !requests_[caller_id].is_active) {
384      CancelRecognitionAndInformDelegate(caller_id);
385    }
386  }
387}
388
389}  // namespace speech_input
390