1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_
6#define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_
7
8#include <map>
9#include <string>
10
11#include "base/basictypes.h"
12#include "base/callback.h"
13#include "base/compiler_specific.h"
14#include "base/memory/weak_ptr.h"
15#include "content/browser/renderer_host/media/media_stream_requester.h"
16#include "content/public/browser/speech_recognition_event_listener.h"
17#include "content/public/browser/speech_recognition_manager.h"
18#include "content/public/browser/speech_recognition_session_config.h"
19#include "content/public/browser/speech_recognition_session_context.h"
20#include "content/public/common/speech_recognition_error.h"
21
22namespace media {
23class AudioManager;
24}
25
26namespace content {
27class BrowserMainLoop;
28class MediaStreamManager;
29class MediaStreamUIProxy;
30class SpeechRecognitionManagerDelegate;
31class SpeechRecognizer;
32
33// This is the manager for speech recognition. It is a single instance in
34// the browser process and can serve several requests. Each recognition request
35// corresponds to a session, initiated via |CreateSession|.
36//
37// In any moment, the manager has a single session known as the primary session,
38// |primary_session_id_|.
39// This is the session that is capturing audio, waiting for user permission,
40// etc. There may also be other, non-primary, sessions living in parallel that
41// are waiting for results but not recording audio.
42//
43// The SpeechRecognitionManager has the following responsibilities:
44//  - Handles requests received from various render views and makes sure only
45//    one of them accesses the audio device at any given time.
46//  - Handles the instantiation of SpeechRecognitionEngine objects when
47//    requested by SpeechRecognitionSessions.
48//  - Relays recognition results/status/error events of each session to the
49//    corresponding listener (demuxing on the base of their session_id).
50//  - Relays also recognition results/status/error events of every session to
51//    the catch-all snoop listener (optionally) provided by the delegate.
52class CONTENT_EXPORT SpeechRecognitionManagerImpl :
53    public NON_EXPORTED_BASE(SpeechRecognitionManager),
54    public SpeechRecognitionEventListener {
55 public:
56  // Returns the current SpeechRecognitionManagerImpl or NULL if the call is
57  // issued when it is not created yet or destroyed (by BrowserMainLoop).
58  static SpeechRecognitionManagerImpl* GetInstance();
59
60  // SpeechRecognitionManager implementation.
61  virtual int CreateSession(
62      const SpeechRecognitionSessionConfig& config) OVERRIDE;
63  virtual void StartSession(int session_id) OVERRIDE;
64  virtual void AbortSession(int session_id) OVERRIDE;
65  virtual void AbortAllSessionsForListener(
66        SpeechRecognitionEventListener* listener) OVERRIDE;
67  virtual void AbortAllSessionsForRenderView(int render_process_id,
68                                             int render_view_id) OVERRIDE;
69  virtual void StopAudioCaptureForSession(int session_id) OVERRIDE;
70  virtual const SpeechRecognitionSessionConfig& GetSessionConfig(
71      int session_id) const OVERRIDE;
72  virtual SpeechRecognitionSessionContext GetSessionContext(
73      int session_id) const OVERRIDE;
74  virtual int GetSession(int render_process_id,
75                         int render_view_id,
76                         int request_id) const OVERRIDE;
77  virtual bool HasAudioInputDevices() OVERRIDE;
78  virtual string16 GetAudioInputDeviceModel() OVERRIDE;
79  virtual void ShowAudioInputSettings() OVERRIDE;
80
81  // SpeechRecognitionEventListener methods.
82  virtual void OnRecognitionStart(int session_id) OVERRIDE;
83  virtual void OnAudioStart(int session_id) OVERRIDE;
84  virtual void OnEnvironmentEstimationComplete(int session_id) OVERRIDE;
85  virtual void OnSoundStart(int session_id) OVERRIDE;
86  virtual void OnSoundEnd(int session_id) OVERRIDE;
87  virtual void OnAudioEnd(int session_id) OVERRIDE;
88  virtual void OnRecognitionEnd(int session_id) OVERRIDE;
89  virtual void OnRecognitionResults(
90      int session_id, const SpeechRecognitionResults& result) OVERRIDE;
91  virtual void OnRecognitionError(
92      int session_id, const SpeechRecognitionError& error) OVERRIDE;
93  virtual void OnAudioLevelsChange(int session_id, float volume,
94                                   float noise_volume) OVERRIDE;
95
96  SpeechRecognitionManagerDelegate* delegate() const { return delegate_.get(); }
97
98 protected:
99  // BrowserMainLoop is the only one allowed to istantiate and free us.
100  friend class BrowserMainLoop;
101  // Needed for dtor.
102  friend struct base::DefaultDeleter<SpeechRecognitionManagerImpl>;
103  SpeechRecognitionManagerImpl(media::AudioManager* audio_manager,
104                               MediaStreamManager* media_stream_manager);
105  virtual ~SpeechRecognitionManagerImpl();
106
107 private:
108  // Data types for the internal Finite State Machine (FSM).
109  enum FSMState {
110    SESSION_STATE_IDLE = 0,
111    SESSION_STATE_CAPTURING_AUDIO,
112    SESSION_STATE_WAITING_FOR_RESULT,
113    SESSION_STATE_MAX_VALUE = SESSION_STATE_WAITING_FOR_RESULT
114  };
115
116  enum FSMEvent {
117    EVENT_ABORT = 0,
118    EVENT_START,
119    EVENT_STOP_CAPTURE,
120    EVENT_AUDIO_ENDED,
121    EVENT_RECOGNITION_ENDED,
122    EVENT_MAX_VALUE = EVENT_RECOGNITION_ENDED
123  };
124
125  struct Session {
126    Session();
127    ~Session();
128
129    int id;
130    bool listener_is_active;
131    SpeechRecognitionSessionConfig config;
132    SpeechRecognitionSessionContext context;
133    scoped_refptr<SpeechRecognizer> recognizer;
134    scoped_ptr<MediaStreamUIProxy> ui;
135  };
136
137  // Callback issued by the SpeechRecognitionManagerDelegate for reporting
138  // asynchronously the result of the CheckRecognitionIsAllowed call.
139  void RecognitionAllowedCallback(int session_id,
140                                  bool ask_user,
141                                  bool is_allowed);
142
143  // Callback to get back the result of a media request. |devices| is an array
144  // of devices approved to be used for the request, |devices| is empty if the
145  // users deny the request.
146  void MediaRequestPermissionCallback(int session_id,
147                                      const MediaStreamDevices& devices,
148                                      scoped_ptr<MediaStreamUIProxy> stream_ui);
149
150  // Entry point for pushing any external event into the session handling FSM.
151  void DispatchEvent(int session_id, FSMEvent event);
152
153  // Defines the behavior of the session handling FSM, selecting the appropriate
154  // transition according to the session, its current state and the event.
155  void ExecuteTransitionAndGetNextState(Session* session,
156                                        FSMState session_state,
157                                        FSMEvent event);
158
159  // Retrieves the state of the session, enquiring directly the recognizer.
160  FSMState GetSessionState(int session_id) const;
161
162  // The methods below handle transitions of the session handling FSM.
163  void SessionStart(const Session& session);
164  void SessionAbort(const Session& session);
165  void SessionStopAudioCapture(const Session& session);
166  void ResetCapturingSessionId(const Session& session);
167  void SessionDelete(Session* session);
168  void NotFeasible(const Session& session, FSMEvent event);
169
170  bool SessionExists(int session_id) const;
171  Session* GetSession(int session_id) const;
172  SpeechRecognitionEventListener* GetListener(int session_id) const;
173  SpeechRecognitionEventListener* GetDelegateListener() const;
174  int GetNextSessionID();
175
176  media::AudioManager* audio_manager_;
177  MediaStreamManager* media_stream_manager_;
178  typedef std::map<int, Session*> SessionsTable;
179  SessionsTable sessions_;
180  int primary_session_id_;
181  int last_session_id_;
182  bool is_dispatching_event_;
183  scoped_ptr<SpeechRecognitionManagerDelegate> delegate_;
184
185  // Used for posting asynchronous tasks (on the IO thread) without worrying
186  // about this class being destroyed in the meanwhile (due to browser shutdown)
187  // since tasks pending on a destroyed WeakPtr are automatically discarded.
188  base::WeakPtrFactory<SpeechRecognitionManagerImpl> weak_factory_;
189};
190
191}  // namespace content
192
193#endif  // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_
194