1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "chrome/browser/speech/chrome_speech_recognition_manager_delegate.h"
6
7#include <set>
8#include <string>
9
10#include "base/bind.h"
11#include "base/prefs/pref_service.h"
12#include "base/strings/utf_string_conversions.h"
13#include "base/synchronization/lock.h"
14#include "base/threading/thread_restrictions.h"
15#include "chrome/browser/browser_process.h"
16#include "chrome/browser/profiles/profile_manager.h"
17#include "chrome/browser/tab_contents/tab_util.h"
18#include "chrome/common/pref_names.h"
19#include "chrome/common/url_constants.h"
20#include "content/public/browser/browser_thread.h"
21#include "content/public/browser/notification_registrar.h"
22#include "content/public/browser/notification_source.h"
23#include "content/public/browser/notification_types.h"
24#include "content/public/browser/render_process_host.h"
25#include "content/public/browser/render_view_host.h"
26#include "content/public/browser/resource_context.h"
27#include "content/public/browser/speech_recognition_manager.h"
28#include "content/public/browser/speech_recognition_session_config.h"
29#include "content/public/browser/speech_recognition_session_context.h"
30#include "content/public/browser/web_contents.h"
31#include "content/public/common/speech_recognition_error.h"
32#include "content/public/common/speech_recognition_result.h"
33#include "net/url_request/url_request_context_getter.h"
34
35#if defined(OS_WIN)
36#include "chrome/installer/util/wmi.h"
37#endif
38
39#if defined(ENABLE_EXTENSIONS)
40#include "chrome/browser/extensions/extension_service.h"
41#include "extensions/browser/view_type_utils.h"
42#endif
43
44using content::BrowserThread;
45using content::SpeechRecognitionManager;
46using content::WebContents;
47
48namespace speech {
49
50namespace {
51
52void TabClosedCallbackOnIOThread(int render_process_id, int render_view_id) {
53  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
54
55  SpeechRecognitionManager* manager = SpeechRecognitionManager::GetInstance();
56  // |manager| becomes NULL if a browser shutdown happens between the post of
57  // this task (from the UI thread) and this call (on the IO thread). In this
58  // case we just return.
59  if (!manager)
60    return;
61
62  manager->AbortAllSessionsForRenderView(render_process_id, render_view_id);
63}
64
65}  // namespace
66
67
68// Asynchronously fetches the PC and audio hardware/driver info if
69// the user has opted into UMA. This information is sent with speech input
70// requests to the server for identifying and improving quality issues with
71// specific device configurations.
72class ChromeSpeechRecognitionManagerDelegate::OptionalRequestInfo
73    : public base::RefCountedThreadSafe<OptionalRequestInfo> {
74 public:
75  OptionalRequestInfo() : can_report_metrics_(false) {
76  }
77
78  void Refresh() {
79    DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
80    // UMA opt-in can be checked only from the UI thread, so switch to that.
81    BrowserThread::PostTask(BrowserThread::UI, FROM_HERE,
82        base::Bind(&OptionalRequestInfo::CheckUMAAndGetHardwareInfo, this));
83  }
84
85  void CheckUMAAndGetHardwareInfo() {
86    DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
87    // prefs::kMetricsReportingEnabled is not registered for OS_CHROMEOS.
88#if !defined(OS_CHROMEOS)
89    if (g_browser_process->local_state()->GetBoolean(
90        prefs::kMetricsReportingEnabled)) {
91      // Access potentially slow OS calls from the FILE thread.
92      BrowserThread::PostTask(BrowserThread::FILE, FROM_HERE,
93          base::Bind(&OptionalRequestInfo::GetHardwareInfo, this));
94    }
95#endif
96  }
97
98  void GetHardwareInfo() {
99    DCHECK(BrowserThread::CurrentlyOn(BrowserThread::FILE));
100    base::AutoLock lock(lock_);
101    can_report_metrics_ = true;
102    base::string16 device_model =
103        SpeechRecognitionManager::GetInstance()->GetAudioInputDeviceModel();
104#if defined(OS_WIN)
105    value_ = base::UTF16ToUTF8(
106        installer::WMIComputerSystem::GetModel() + L"|" + device_model);
107#else  // defined(OS_WIN)
108    value_ = base::UTF16ToUTF8(device_model);
109#endif  // defined(OS_WIN)
110  }
111
112  std::string value() {
113    base::AutoLock lock(lock_);
114    return value_;
115  }
116
117  bool can_report_metrics() {
118    base::AutoLock lock(lock_);
119    return can_report_metrics_;
120  }
121
122 private:
123  friend class base::RefCountedThreadSafe<OptionalRequestInfo>;
124
125  ~OptionalRequestInfo() {}
126
127  base::Lock lock_;
128  std::string value_;
129  bool can_report_metrics_;
130
131  DISALLOW_COPY_AND_ASSIGN(OptionalRequestInfo);
132};
133
134// Simple utility to get notified when a WebContent (a tab or an extension's
135// background page) is closed or crashes. The callback will always be called on
136// the UI thread.
137// There is no restriction on the constructor, however this class must be
138// destroyed on the UI thread, due to the NotificationRegistrar dependency.
139class ChromeSpeechRecognitionManagerDelegate::TabWatcher
140    : public base::RefCountedThreadSafe<TabWatcher>,
141      public content::NotificationObserver {
142 public:
143  typedef base::Callback<void(int render_process_id, int render_view_id)>
144      TabClosedCallback;
145
146  explicit TabWatcher(TabClosedCallback tab_closed_callback)
147      : tab_closed_callback_(tab_closed_callback) {
148  }
149
150  // Starts monitoring the WebContents corresponding to the given
151  // |render_process_id|, |render_view_id| pair, invoking |tab_closed_callback_|
152  // if closed/unloaded.
153  void Watch(int render_process_id, int render_view_id) {
154    if (!BrowserThread::CurrentlyOn(BrowserThread::UI)) {
155      BrowserThread::PostTask(BrowserThread::UI, FROM_HERE, base::Bind(
156          &TabWatcher::Watch, this, render_process_id, render_view_id));
157      return;
158    }
159
160    WebContents* web_contents = tab_util::GetWebContentsByID(render_process_id,
161                                                             render_view_id);
162    // Sessions initiated by speech input extension APIs will end up in a NULL
163    // WebContent here, but they are properly managed by the
164    // chrome::SpeechInputExtensionManager. However, sessions initiated within a
165    // extension using the (new) speech JS APIs, will be properly handled here.
166    // TODO(primiano) turn this line into a DCHECK once speech input extension
167    // API is deprecated.
168    if (!web_contents)
169      return;
170
171    // Avoid multiple registrations on |registrar_| for the same |web_contents|.
172    if (FindWebContents(web_contents) !=  registered_web_contents_.end()) {
173      return;
174    }
175    registered_web_contents_.push_back(
176        WebContentsInfo(web_contents, render_process_id, render_view_id));
177
178    // Lazy initialize the registrar.
179    if (!registrar_.get())
180      registrar_.reset(new content::NotificationRegistrar());
181
182    registrar_->Add(this,
183                    content::NOTIFICATION_RENDER_VIEW_HOST_CHANGED,
184                    content::Source<WebContents>(web_contents));
185    registrar_->Add(this,
186                    content::NOTIFICATION_WEB_CONTENTS_DISCONNECTED,
187                    content::Source<WebContents>(web_contents));
188  }
189
190  // content::NotificationObserver implementation.
191  virtual void Observe(int type,
192                       const content::NotificationSource& source,
193                       const content::NotificationDetails& details) OVERRIDE {
194    DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
195    DCHECK(type == content::NOTIFICATION_WEB_CONTENTS_DISCONNECTED ||
196           type == content::NOTIFICATION_RENDER_VIEW_HOST_CHANGED);
197
198    WebContents* web_contents = content::Source<WebContents>(source).ptr();
199    std::vector<WebContentsInfo>::iterator iter = FindWebContents(web_contents);
200    DCHECK(iter != registered_web_contents_.end());
201    int render_process_id = iter->render_process_id;
202    int render_view_id = iter->render_view_id;
203    registered_web_contents_.erase(iter);
204
205    registrar_->Remove(this,
206                       content::NOTIFICATION_RENDER_VIEW_HOST_CHANGED,
207                       content::Source<WebContents>(web_contents));
208    registrar_->Remove(this,
209                       content::NOTIFICATION_WEB_CONTENTS_DISCONNECTED,
210                       content::Source<WebContents>(web_contents));
211
212    tab_closed_callback_.Run(render_process_id, render_view_id);
213  }
214
215 private:
216  struct WebContentsInfo {
217    WebContentsInfo(content::WebContents* web_contents,
218                    int render_process_id,
219                    int render_view_id)
220        : web_contents(web_contents),
221          render_process_id(render_process_id),
222          render_view_id(render_view_id) {}
223
224    ~WebContentsInfo() {}
225
226    content::WebContents* web_contents;
227    int render_process_id;
228    int render_view_id;
229  };
230
231  friend class base::RefCountedThreadSafe<TabWatcher>;
232
233  virtual ~TabWatcher() {
234    // Must be destroyed on the UI thread due to |registrar_| non thread-safety.
235    DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
236  }
237
238  // Helper function to find the iterator in |registered_web_contents_| which
239  // contains |web_contents|.
240  std::vector<WebContentsInfo>::iterator FindWebContents(
241      content::WebContents* web_contents) {
242    for (std::vector<WebContentsInfo>::iterator i(
243         registered_web_contents_.begin());
244         i != registered_web_contents_.end(); ++i) {
245      if (i->web_contents == web_contents)
246        return i;
247    }
248
249    return registered_web_contents_.end();
250  }
251
252  // Lazy-initialized and used on the UI thread to handle web contents
253  // notifications (tab closing).
254  scoped_ptr<content::NotificationRegistrar> registrar_;
255
256  // Keeps track of which WebContent(s) have been registered, in order to avoid
257  // double registrations on |registrar_| and to pass the correct render
258  // process id and render view id to |tab_closed_callback_| after the process
259  // has gone away.
260  std::vector<WebContentsInfo> registered_web_contents_;
261
262  // Callback used to notify, on the thread specified by |callback_thread_| the
263  // closure of a registered tab.
264  TabClosedCallback tab_closed_callback_;
265
266  DISALLOW_COPY_AND_ASSIGN(TabWatcher);
267};
268
269ChromeSpeechRecognitionManagerDelegate
270::ChromeSpeechRecognitionManagerDelegate() {
271}
272
273ChromeSpeechRecognitionManagerDelegate
274::~ChromeSpeechRecognitionManagerDelegate() {
275}
276
277void ChromeSpeechRecognitionManagerDelegate::TabClosedCallback(
278    int render_process_id, int render_view_id) {
279  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
280
281  // Tell the S.R. Manager (which lives on the IO thread) to abort all the
282  // sessions for the given renderer view.
283  BrowserThread::PostTask(BrowserThread::IO, FROM_HERE, base::Bind(
284      &TabClosedCallbackOnIOThread, render_process_id, render_view_id));
285}
286
287void ChromeSpeechRecognitionManagerDelegate::OnRecognitionStart(
288    int session_id) {
289  const content::SpeechRecognitionSessionContext& context =
290      SpeechRecognitionManager::GetInstance()->GetSessionContext(session_id);
291
292  // Register callback to auto abort session on tab closure.
293  // |tab_watcher_| is lazyly istantiated on the first call.
294  if (!tab_watcher_.get()) {
295    tab_watcher_ = new TabWatcher(
296        base::Bind(&ChromeSpeechRecognitionManagerDelegate::TabClosedCallback,
297                   base::Unretained(this)));
298  }
299  tab_watcher_->Watch(context.render_process_id, context.render_view_id);
300}
301
302void ChromeSpeechRecognitionManagerDelegate::OnAudioStart(int session_id) {
303}
304
305void ChromeSpeechRecognitionManagerDelegate::OnEnvironmentEstimationComplete(
306    int session_id) {
307}
308
309void ChromeSpeechRecognitionManagerDelegate::OnSoundStart(int session_id) {
310}
311
312void ChromeSpeechRecognitionManagerDelegate::OnSoundEnd(int session_id) {
313}
314
315void ChromeSpeechRecognitionManagerDelegate::OnAudioEnd(int session_id) {
316}
317
318void ChromeSpeechRecognitionManagerDelegate::OnRecognitionResults(
319    int session_id, const content::SpeechRecognitionResults& result) {
320}
321
322void ChromeSpeechRecognitionManagerDelegate::OnRecognitionError(
323    int session_id, const content::SpeechRecognitionError& error) {
324}
325
326void ChromeSpeechRecognitionManagerDelegate::OnAudioLevelsChange(
327    int session_id, float volume, float noise_volume) {
328}
329
330void ChromeSpeechRecognitionManagerDelegate::OnRecognitionEnd(int session_id) {
331}
332
333void ChromeSpeechRecognitionManagerDelegate::GetDiagnosticInformation(
334    bool* can_report_metrics,
335    std::string* hardware_info) {
336  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
337  if (!optional_request_info_.get()) {
338    optional_request_info_ = new OptionalRequestInfo();
339    // Since hardware info is optional with speech input requests, we start an
340    // asynchronous fetch here and move on with recording audio. This first
341    // speech input request would send an empty string for hardware info and
342    // subsequent requests may have the hardware info available if the fetch
343    // completed before them. This way we don't end up stalling the user with
344    // a long wait and disk seeks when they click on a UI element and start
345    // speaking.
346    optional_request_info_->Refresh();
347  }
348  *can_report_metrics = optional_request_info_->can_report_metrics();
349  *hardware_info = optional_request_info_->value();
350}
351
352void ChromeSpeechRecognitionManagerDelegate::CheckRecognitionIsAllowed(
353    int session_id,
354    base::Callback<void(bool ask_user, bool is_allowed)> callback) {
355  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::IO));
356
357  const content::SpeechRecognitionSessionContext& context =
358      SpeechRecognitionManager::GetInstance()->GetSessionContext(session_id);
359
360  // Make sure that initiators (extensions/web pages) properly set the
361  // |render_process_id| field, which is needed later to retrieve the profile.
362  DCHECK_NE(context.render_process_id, 0);
363
364  int render_process_id = context.render_process_id;
365  int render_view_id = context.render_view_id;
366  if (context.embedder_render_process_id) {
367    // If this is a request originated from a guest, we need to re-route the
368    // permission check through the embedder (app).
369    render_process_id = context.embedder_render_process_id;
370    render_view_id = context.embedder_render_view_id;
371  }
372
373  // Check that the render view type is appropriate, and whether or not we
374  // need to request permission from the user.
375  BrowserThread::PostTask(BrowserThread::UI, FROM_HERE,
376                          base::Bind(&CheckRenderViewType,
377                                     callback,
378                                     render_process_id,
379                                     render_view_id));
380}
381
382content::SpeechRecognitionEventListener*
383ChromeSpeechRecognitionManagerDelegate::GetEventListener() {
384  return this;
385}
386
387bool ChromeSpeechRecognitionManagerDelegate::FilterProfanities(
388    int render_process_id) {
389  content::RenderProcessHost* rph =
390      content::RenderProcessHost::FromID(render_process_id);
391  if (!rph)  // Guard against race conditions on RPH lifetime.
392    return true;
393
394  return Profile::FromBrowserContext(rph->GetBrowserContext())->GetPrefs()->
395      GetBoolean(prefs::kSpeechRecognitionFilterProfanities);
396}
397
398// static.
399void ChromeSpeechRecognitionManagerDelegate::CheckRenderViewType(
400    base::Callback<void(bool ask_user, bool is_allowed)> callback,
401    int render_process_id,
402    int render_view_id) {
403  DCHECK(BrowserThread::CurrentlyOn(BrowserThread::UI));
404  const content::RenderViewHost* render_view_host =
405      content::RenderViewHost::FromID(render_process_id, render_view_id);
406
407  bool allowed = false;
408  bool check_permission = false;
409
410  if (!render_view_host) {
411    // This happens for extensions. Manifest should be checked for permission.
412    allowed = true;
413    check_permission = false;
414    BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
415                            base::Bind(callback, check_permission, allowed));
416    return;
417  }
418
419  WebContents* web_contents = WebContents::FromRenderViewHost(render_view_host);
420
421  // chrome://app-list/ uses speech recognition.
422  if (web_contents->GetCommittedWebUI() &&
423      web_contents->GetLastCommittedURL().spec() ==
424      chrome::kChromeUIAppListStartPageURL) {
425    allowed = true;
426    check_permission = false;
427  }
428
429#if defined(ENABLE_EXTENSIONS)
430  extensions::ViewType view_type = extensions::GetViewType(web_contents);
431
432  if (view_type == extensions::VIEW_TYPE_TAB_CONTENTS ||
433      view_type == extensions::VIEW_TYPE_APP_WINDOW ||
434      view_type == extensions::VIEW_TYPE_LAUNCHER_PAGE ||
435      view_type == extensions::VIEW_TYPE_VIRTUAL_KEYBOARD ||
436      view_type == extensions::VIEW_TYPE_EXTENSION_BACKGROUND_PAGE) {
437    // If it is a tab, we can check for permission. For apps, this means
438    // manifest would be checked for permission.
439    allowed = true;
440    check_permission = true;
441  }
442#else
443  // Otherwise this should be a regular tab contents.
444  allowed = true;
445  check_permission = true;
446#endif
447
448  BrowserThread::PostTask(BrowserThread::IO, FROM_HERE,
449                          base::Bind(callback, check_permission, allowed));
450}
451
452}  // namespace speech
453