1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "content/browser/speech/speech_input_manager.h"
6
7#include <map>
8#include <string>
9
10#include "base/lazy_instance.h"
11#include "base/memory/ref_counted.h"
12#include "base/synchronization/lock.h"
13#include "base/threading/thread_restrictions.h"
14#include "base/utf_string_conversions.h"
15#include "chrome/browser/browser_process.h"
16#include "chrome/browser/platform_util.h"
17#include "chrome/browser/prefs/pref_service.h"
18#include "chrome/browser/speech/speech_input_bubble_controller.h"
19#include "chrome/browser/tab_contents/tab_util.h"
20#include "chrome/common/chrome_switches.h"
21#include "chrome/common/pref_names.h"
22#include "content/browser/browser_thread.h"
23#include "content/browser/speech/speech_recognizer.h"
24#include "grit/generated_resources.h"
25#include "media/audio/audio_manager.h"
26#include "ui/base/l10n/l10n_util.h"
27
28#if defined(OS_WIN)
29#include "chrome/installer/util/wmi.h"
30#endif
31
32namespace speech_input {
33
34namespace {
35
36// Asynchronously fetches the PC and audio hardware/driver info if
37// the user has opted into UMA. This information is sent with speech input
38// requests to the server for identifying and improving quality issues with
39// specific device configurations.
40class OptionalRequestInfo
41    : public base::RefCountedThreadSafe<OptionalRequestInfo> {
42 public:
43  OptionalRequestInfo() : can_report_metrics_(false) {}
44
45  void Refresh() {
46    DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
47    // UMA opt-in can be checked only from the UI thread, so switch to that.
48    BrowserThread::PostTask(BrowserThread::UI, FROM_HERE,
49        NewRunnableMethod(this,
50                          &OptionalRequestInfo::CheckUMAAndGetHardwareInfo));
51  }
52
53  void CheckUMAAndGetHardwareInfo() {
54    DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
55    if (g_browser_process->local_state()->GetBoolean(
56        prefs::kMetricsReportingEnabled)) {
57      // Access potentially slow OS calls from the FILE thread.
58      BrowserThread::PostTask(BrowserThread::FILE, FROM_HERE,
59          NewRunnableMethod(this, &OptionalRequestInfo::GetHardwareInfo));
60    }
61  }
62
63  void GetHardwareInfo() {
64    DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
65    base::AutoLock lock(lock_);
66    can_report_metrics_ = true;
67#if defined(OS_WIN)
68    value_ = UTF16ToUTF8(
69        installer::WMIComputerSystem::GetModel() + L"|" +
70        AudioManager::GetAudioManager()->GetAudioInputDeviceModel());
71#else  // defined(OS_WIN)
72    value_ = UTF16ToUTF8(
73        AudioManager::GetAudioManager()->GetAudioInputDeviceModel());
74#endif  // defined(OS_WIN)
75  }
76
77  std::string value() {
78    base::AutoLock lock(lock_);
79    return value_;
80  }
81
82  bool can_report_metrics() {
83    base::AutoLock lock(lock_);
84    return can_report_metrics_;
85  }
86
87 private:
88  base::Lock lock_;
89  std::string value_;
90  bool can_report_metrics_;
91
92  DISALLOW_COPY_AND_ASSIGN(OptionalRequestInfo);
93};
94
95class SpeechInputManagerImpl : public SpeechInputManager,
96                               public SpeechInputBubbleControllerDelegate,
97                               public SpeechRecognizerDelegate {
98 public:
99  // SpeechInputManager methods.
100  virtual void StartRecognition(SpeechInputManagerDelegate* delegate,
101                                int caller_id,
102                                int render_process_id,
103                                int render_view_id,
104                                const gfx::Rect& element_rect,
105                                const std::string& language,
106                                const std::string& grammar,
107                                const std::string& origin_url);
108  virtual void CancelRecognition(int caller_id);
109  virtual void StopRecording(int caller_id);
110  virtual void CancelAllRequestsWithDelegate(
111      SpeechInputManagerDelegate* delegate);
112
113  // SpeechRecognizer::Delegate methods.
114  virtual void DidStartReceivingAudio(int caller_id);
115  virtual void SetRecognitionResult(int caller_id,
116                                    bool error,
117                                    const SpeechInputResultArray& result);
118  virtual void DidCompleteRecording(int caller_id);
119  virtual void DidCompleteRecognition(int caller_id);
120  virtual void OnRecognizerError(int caller_id,
121                                 SpeechRecognizer::ErrorCode error);
122  virtual void DidCompleteEnvironmentEstimation(int caller_id);
123  virtual void SetInputVolume(int caller_id, float volume, float noise_volume);
124
125  // SpeechInputBubbleController::Delegate methods.
126  virtual void InfoBubbleButtonClicked(int caller_id,
127                                       SpeechInputBubble::Button button);
128  virtual void InfoBubbleFocusChanged(int caller_id);
129
130 private:
131  struct SpeechInputRequest {
132    SpeechInputManagerDelegate* delegate;
133    scoped_refptr<SpeechRecognizer> recognizer;
134    bool is_active;  // Set to true when recording or recognition is going on.
135  };
136
137  // Private constructor to enforce singleton.
138  friend struct base::DefaultLazyInstanceTraits<SpeechInputManagerImpl>;
139  SpeechInputManagerImpl();
140  virtual ~SpeechInputManagerImpl();
141
142  bool HasPendingRequest(int caller_id) const;
143  SpeechInputManagerDelegate* GetDelegate(int caller_id) const;
144
145  void CancelRecognitionAndInformDelegate(int caller_id);
146
147  // Starts/restarts recognition for an existing request.
148  void StartRecognitionForRequest(int caller_id);
149
150  typedef std::map<int, SpeechInputRequest> SpeechRecognizerMap;
151  SpeechRecognizerMap requests_;
152  int recording_caller_id_;
153  scoped_refptr<SpeechInputBubbleController> bubble_controller_;
154  scoped_refptr<OptionalRequestInfo> optional_request_info_;
155};
156
157base::LazyInstance<SpeechInputManagerImpl> g_speech_input_manager_impl(
158    base::LINKER_INITIALIZED);
159
160}  // namespace
161
162SpeechInputManager* SpeechInputManager::Get() {
163  return g_speech_input_manager_impl.Pointer();
164}
165
166void SpeechInputManager::ShowAudioInputSettings() {
167  // Since AudioManager::ShowAudioInputSettings can potentially launch external
168  // processes, do that in the FILE thread to not block the calling threads.
169  if (!BrowserThread::CurrentlyOn(BrowserThread::FILE)) {
170    BrowserThread::PostTask(
171        BrowserThread::FILE, FROM_HERE,
172        NewRunnableFunction(&SpeechInputManager::ShowAudioInputSettings));
173    return;
174  }
175
176  DCHECK(AudioManager::GetAudioManager()->CanShowAudioInputSettings());
177  if (AudioManager::GetAudioManager()->CanShowAudioInputSettings())
178    AudioManager::GetAudioManager()->ShowAudioInputSettings();
179}
180
181SpeechInputManagerImpl::SpeechInputManagerImpl()
182    : recording_caller_id_(0),
183      bubble_controller_(new SpeechInputBubbleController(
184          ALLOW_THIS_IN_INITIALIZER_LIST(this))) {
185}
186
187SpeechInputManagerImpl::~SpeechInputManagerImpl() {
188  while (requests_.begin() != requests_.end())
189    CancelRecognition(requests_.begin()->first);
190}
191
192bool SpeechInputManagerImpl::HasPendingRequest(int caller_id) const {
193  return requests_.find(caller_id) != requests_.end();
194}
195
196SpeechInputManagerDelegate* SpeechInputManagerImpl::GetDelegate(
197    int caller_id) const {
198  return requests_.find(caller_id)->second.delegate;
199}
200
201void SpeechInputManagerImpl::StartRecognition(
202    SpeechInputManagerDelegate* delegate,
203    int caller_id,
204    int render_process_id,
205    int render_view_id,
206    const gfx::Rect& element_rect,
207    const std::string& language,
208    const std::string& grammar,
209    const std::string& origin_url) {
210  DCHECK(!HasPendingRequest(caller_id));
211
212  bubble_controller_->CreateBubble(caller_id, render_process_id, render_view_id,
213                                   element_rect);
214
215  if (!optional_request_info_.get()) {
216    optional_request_info_ = new OptionalRequestInfo();
217    // Since hardware info is optional with speech input requests, we start an
218    // asynchronous fetch here and move on with recording audio. This first
219    // speech input request would send an empty string for hardware info and
220    // subsequent requests may have the hardware info available if the fetch
221    // completed before them. This way we don't end up stalling the user with
222    // a long wait and disk seeks when they click on a UI element and start
223    // speaking.
224    optional_request_info_->Refresh();
225  }
226
227  SpeechInputRequest* request = &requests_[caller_id];
228  request->delegate = delegate;
229  request->recognizer = new SpeechRecognizer(
230      this, caller_id, language, grammar, optional_request_info_->value(),
231      optional_request_info_->can_report_metrics() ? origin_url : "");
232  request->is_active = false;
233
234  StartRecognitionForRequest(caller_id);
235}
236
237void SpeechInputManagerImpl::StartRecognitionForRequest(int caller_id) {
238  DCHECK(HasPendingRequest(caller_id));
239
240  // If we are currently recording audio for another caller, abort that cleanly.
241  if (recording_caller_id_)
242    CancelRecognitionAndInformDelegate(recording_caller_id_);
243
244  if (!AudioManager::GetAudioManager()->HasAudioInputDevices()) {
245    bubble_controller_->SetBubbleMessage(
246        caller_id, l10n_util::GetStringUTF16(IDS_SPEECH_INPUT_NO_MIC));
247  } else {
248    recording_caller_id_ = caller_id;
249    requests_[caller_id].is_active = true;
250    requests_[caller_id].recognizer->StartRecording();
251    bubble_controller_->SetBubbleWarmUpMode(caller_id);
252  }
253}
254
255void SpeechInputManagerImpl::CancelRecognition(int caller_id) {
256  DCHECK(HasPendingRequest(caller_id));
257  if (requests_[caller_id].is_active)
258    requests_[caller_id].recognizer->CancelRecognition();
259  requests_.erase(caller_id);
260  if (recording_caller_id_ == caller_id)
261    recording_caller_id_ = 0;
262  bubble_controller_->CloseBubble(caller_id);
263}
264
265void SpeechInputManagerImpl::CancelAllRequestsWithDelegate(
266    SpeechInputManagerDelegate* delegate) {
267  SpeechRecognizerMap::iterator it = requests_.begin();
268  while (it != requests_.end()) {
269    if (it->second.delegate == delegate) {
270      CancelRecognition(it->first);
271      // This map will have very few elements so it is simpler to restart.
272      it = requests_.begin();
273    } else {
274      ++it;
275    }
276  }
277}
278
279void SpeechInputManagerImpl::StopRecording(int caller_id) {
280  DCHECK(HasPendingRequest(caller_id));
281  requests_[caller_id].recognizer->StopRecording();
282}
283
284void SpeechInputManagerImpl::SetRecognitionResult(
285    int caller_id, bool error, const SpeechInputResultArray& result) {
286  DCHECK(HasPendingRequest(caller_id));
287  GetDelegate(caller_id)->SetRecognitionResult(caller_id, result);
288}
289
290void SpeechInputManagerImpl::DidCompleteRecording(int caller_id) {
291  DCHECK(recording_caller_id_ == caller_id);
292  DCHECK(HasPendingRequest(caller_id));
293  recording_caller_id_ = 0;
294  GetDelegate(caller_id)->DidCompleteRecording(caller_id);
295  bubble_controller_->SetBubbleRecognizingMode(caller_id);
296}
297
298void SpeechInputManagerImpl::DidCompleteRecognition(int caller_id) {
299  GetDelegate(caller_id)->DidCompleteRecognition(caller_id);
300  requests_.erase(caller_id);
301  bubble_controller_->CloseBubble(caller_id);
302}
303
304void SpeechInputManagerImpl::OnRecognizerError(
305    int caller_id, SpeechRecognizer::ErrorCode error) {
306  if (caller_id == recording_caller_id_)
307    recording_caller_id_ = 0;
308
309  requests_[caller_id].is_active = false;
310
311  struct ErrorMessageMapEntry {
312    SpeechRecognizer::ErrorCode error;
313    int message_id;
314  };
315  ErrorMessageMapEntry error_message_map[] = {
316    {
317      SpeechRecognizer::RECOGNIZER_ERROR_CAPTURE, IDS_SPEECH_INPUT_MIC_ERROR
318    }, {
319      SpeechRecognizer::RECOGNIZER_ERROR_NO_SPEECH, IDS_SPEECH_INPUT_NO_SPEECH
320    }, {
321      SpeechRecognizer::RECOGNIZER_ERROR_NO_RESULTS, IDS_SPEECH_INPUT_NO_RESULTS
322    }, {
323      SpeechRecognizer::RECOGNIZER_ERROR_NETWORK, IDS_SPEECH_INPUT_NET_ERROR
324    }
325  };
326  for (size_t i = 0; i < ARRAYSIZE_UNSAFE(error_message_map); ++i) {
327    if (error_message_map[i].error == error) {
328      bubble_controller_->SetBubbleMessage(
329          caller_id,
330          l10n_util::GetStringUTF16(error_message_map[i].message_id));
331      return;
332    }
333  }
334
335  NOTREACHED() << "unknown error " << error;
336}
337
338void SpeechInputManagerImpl::DidStartReceivingAudio(int caller_id) {
339  DCHECK(HasPendingRequest(caller_id));
340  DCHECK(recording_caller_id_ == caller_id);
341  bubble_controller_->SetBubbleRecordingMode(caller_id);
342}
343
344void SpeechInputManagerImpl::DidCompleteEnvironmentEstimation(int caller_id) {
345  DCHECK(HasPendingRequest(caller_id));
346  DCHECK(recording_caller_id_ == caller_id);
347}
348
349void SpeechInputManagerImpl::SetInputVolume(int caller_id, float volume,
350                                            float noise_volume) {
351  DCHECK(HasPendingRequest(caller_id));
352  DCHECK_EQ(recording_caller_id_, caller_id);
353
354  bubble_controller_->SetBubbleInputVolume(caller_id, volume, noise_volume);
355}
356
357void SpeechInputManagerImpl::CancelRecognitionAndInformDelegate(int caller_id) {
358  SpeechInputManagerDelegate* cur_delegate = GetDelegate(caller_id);
359  CancelRecognition(caller_id);
360  cur_delegate->DidCompleteRecording(caller_id);
361  cur_delegate->DidCompleteRecognition(caller_id);
362}
363
364void SpeechInputManagerImpl::InfoBubbleButtonClicked(
365    int caller_id, SpeechInputBubble::Button button) {
366  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
367  // Ignore if the caller id was not in our active recognizers list because the
368  // user might have clicked more than once, or recognition could have been
369  // cancelled due to other reasons before the user click was processed.
370  if (!HasPendingRequest(caller_id))
371    return;
372
373  if (button == SpeechInputBubble::BUTTON_CANCEL) {
374    CancelRecognitionAndInformDelegate(caller_id);
375  } else if (button == SpeechInputBubble::BUTTON_TRY_AGAIN) {
376    StartRecognitionForRequest(caller_id);
377  }
378}
379
380void SpeechInputManagerImpl::InfoBubbleFocusChanged(int caller_id) {
381  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
382  // Ignore if the caller id was not in our active recognizers list because the
383  // user might have clicked more than once, or recognition could have been
384  // ended due to other reasons before the user click was processed.
385  if (HasPendingRequest(caller_id)) {
386    // If this is an ongoing recording or if we were displaying an error message
387    // to the user, abort it since user has switched focus. Otherwise
388    // recognition has started and keep that going so user can start speaking to
389    // another element while this gets the results in parallel.
390    if (recording_caller_id_ == caller_id || !requests_[caller_id].is_active) {
391      CancelRecognitionAndInformDelegate(caller_id);
392    }
393  }
394}
395
396}  // namespace speech_input
397