15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2012 The Chromium Authors. All rights reserved. 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file. 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_ 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_ 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <map> 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <string> 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/basictypes.h" 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/callback.h" 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/compiler_specific.h" 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/memory/weak_ptr.h" 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "content/browser/renderer_host/media/media_stream_requester.h" 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "content/public/browser/speech_recognition_event_listener.h" 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "content/public/browser/speech_recognition_manager.h" 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "content/public/browser/speech_recognition_session_config.h" 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "content/public/browser/speech_recognition_session_context.h" 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "content/public/common/speech_recognition_error.h" 215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 22eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdochnamespace media { 23eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdochclass AudioManager; 24eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch} 25eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace content { 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class BrowserMainLoop; 28eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdochclass MediaStreamManager; 29868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles)class MediaStreamUIProxy; 305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class SpeechRecognitionManagerDelegate; 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class SpeechRecognizer; 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// This is the manager for speech recognition. It is a single instance in 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// the browser process and can serve several requests. Each recognition request 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// corresponds to a session, initiated via |CreateSession|. 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// In any moment, the manager has a single session known as the primary session, 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// |primary_session_id_|. 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// This is the session that is capturing audio, waiting for user permission, 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// etc. There may also be other, non-primary, sessions living in parallel that 415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// are waiting for results but not recording audio. 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The SpeechRecognitionManager has the following responsibilities: 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// - Handles requests received from various render views and makes sure only 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// one of them accesses the audio device at any given time. 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// - Handles the instantiation of SpeechRecognitionEngine objects when 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// requested by SpeechRecognitionSessions. 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// - Relays recognition results/status/error events of each session to the 495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// corresponding listener (demuxing on the base of their session_id). 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// - Relays also recognition results/status/error events of every session to 515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// the catch-all snoop listener (optionally) provided by the delegate. 525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class CONTENT_EXPORT SpeechRecognitionManagerImpl : 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public NON_EXPORTED_BASE(SpeechRecognitionManager), 545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public SpeechRecognitionEventListener { 555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public: 565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Returns the current SpeechRecognitionManagerImpl or NULL if the call is 575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // issued when it is not created yet or destroyed (by BrowserMainLoop). 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static SpeechRecognitionManagerImpl* GetInstance(); 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // SpeechRecognitionManager implementation. 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) virtual int CreateSession( 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const SpeechRecognitionSessionConfig& config) OVERRIDE; 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) virtual void StartSession(int session_id) OVERRIDE; 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) virtual void AbortSession(int session_id) OVERRIDE; 65f5859ba46034e02ada5ef522d9d9c09fbbddccd8Ben Murdoch virtual void AbortAllSessionsForRenderProcess(int render_process_id) OVERRIDE; 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) virtual void AbortAllSessionsForRenderView(int render_process_id, 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int render_view_id) OVERRIDE; 685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) virtual void StopAudioCaptureForSession(int session_id) OVERRIDE; 695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) virtual const SpeechRecognitionSessionConfig& GetSessionConfig( 705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int session_id) const OVERRIDE; 715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) virtual SpeechRecognitionSessionContext GetSessionContext( 725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int session_id) const OVERRIDE; 735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) virtual int GetSession(int render_process_id, 745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int render_view_id, 755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int request_id) const OVERRIDE; 765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) virtual bool HasAudioInputDevices() OVERRIDE; 77a3f6a49ab37290eeeb8db0f41ec0f1cb74a68be7Torne (Richard Coles) virtual base::string16 GetAudioInputDeviceModel() OVERRIDE; 785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) virtual void ShowAudioInputSettings() OVERRIDE; 795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // SpeechRecognitionEventListener methods. 815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) virtual void OnRecognitionStart(int session_id) OVERRIDE; 825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) virtual void OnAudioStart(int session_id) OVERRIDE; 835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) virtual void OnEnvironmentEstimationComplete(int session_id) OVERRIDE; 845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) virtual void OnSoundStart(int session_id) OVERRIDE; 855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) virtual void OnSoundEnd(int session_id) OVERRIDE; 865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) virtual void OnAudioEnd(int session_id) OVERRIDE; 875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) virtual void OnRecognitionEnd(int session_id) OVERRIDE; 882a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) virtual void OnRecognitionResults( 892a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) int session_id, const SpeechRecognitionResults& result) OVERRIDE; 905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) virtual void OnRecognitionError( 915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int session_id, const SpeechRecognitionError& error) OVERRIDE; 925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) virtual void OnAudioLevelsChange(int session_id, float volume, 935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) float noise_volume) OVERRIDE; 945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 957dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch SpeechRecognitionManagerDelegate* delegate() const { return delegate_.get(); } 967dbb3d5cf0c15f500944d211057644d6a2f37371Ben Murdoch 975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) protected: 985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // BrowserMainLoop is the only one allowed to istantiate and free us. 995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) friend class BrowserMainLoop; 1002a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) // Needed for dtor. 1012a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) friend struct base::DefaultDeleter<SpeechRecognitionManagerImpl>; 102eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch SpeechRecognitionManagerImpl(media::AudioManager* audio_manager, 103eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch MediaStreamManager* media_stream_manager); 1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) virtual ~SpeechRecognitionManagerImpl(); 1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private: 1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Data types for the internal Finite State Machine (FSM). 1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) enum FSMState { 1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SESSION_STATE_IDLE = 0, 1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SESSION_STATE_CAPTURING_AUDIO, 1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SESSION_STATE_WAITING_FOR_RESULT, 1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SESSION_STATE_MAX_VALUE = SESSION_STATE_WAITING_FOR_RESULT 1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) }; 1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) enum FSMEvent { 1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EVENT_ABORT = 0, 1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EVENT_START, 1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EVENT_STOP_CAPTURE, 1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EVENT_AUDIO_ENDED, 1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EVENT_RECOGNITION_ENDED, 1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EVENT_MAX_VALUE = EVENT_RECOGNITION_ENDED 1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) }; 1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) struct Session { 1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Session(); 1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ~Session(); 1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int id; 1294e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) bool abort_requested; 1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool listener_is_active; 1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SpeechRecognitionSessionConfig config; 1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SpeechRecognitionSessionContext context; 1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) scoped_refptr<SpeechRecognizer> recognizer; 134868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) scoped_ptr<MediaStreamUIProxy> ui; 1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) }; 1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Callback issued by the SpeechRecognitionManagerDelegate for reporting 1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // asynchronously the result of the CheckRecognitionIsAllowed call. 1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void RecognitionAllowedCallback(int session_id, 1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool ask_user, 1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool is_allowed); 1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 143868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) // Callback to get back the result of a media request. |devices| is an array 144868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) // of devices approved to be used for the request, |devices| is empty if the 145868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) // users deny the request. 146868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) void MediaRequestPermissionCallback(int session_id, 147868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) const MediaStreamDevices& devices, 148868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) scoped_ptr<MediaStreamUIProxy> stream_ui); 1492a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) 1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Entry point for pushing any external event into the session handling FSM. 1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void DispatchEvent(int session_id, FSMEvent event); 1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Defines the behavior of the session handling FSM, selecting the appropriate 1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // transition according to the session, its current state and the event. 155868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) void ExecuteTransitionAndGetNextState(Session* session, 156868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) FSMState session_state, 157868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) FSMEvent event); 1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Retrieves the state of the session, enquiring directly the recognizer. 1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) FSMState GetSessionState(int session_id) const; 1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // The methods below handle transitions of the session handling FSM. 1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void SessionStart(const Session& session); 1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void SessionAbort(const Session& session); 1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void SessionStopAudioCapture(const Session& session); 1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void ResetCapturingSessionId(const Session& session); 167868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) void SessionDelete(Session* session); 1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void NotFeasible(const Session& session, FSMEvent event); 1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool SessionExists(int session_id) const; 171868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) Session* GetSession(int session_id) const; 1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SpeechRecognitionEventListener* GetListener(int session_id) const; 1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SpeechRecognitionEventListener* GetDelegateListener() const; 1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int GetNextSessionID(); 1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 176eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch media::AudioManager* audio_manager_; 177eb525c5499e34cc9c4b825d6d9e75bb07cc06aceBen Murdoch MediaStreamManager* media_stream_manager_; 178868fa2fe829687343ffae624259930155e16dbd8Torne (Richard Coles) typedef std::map<int, Session*> SessionsTable; 1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SessionsTable sessions_; 1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int primary_session_id_; 1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int last_session_id_; 1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool is_dispatching_event_; 1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) scoped_ptr<SpeechRecognitionManagerDelegate> delegate_; 1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Used for posting asynchronous tasks (on the IO thread) without worrying 1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // about this class being destroyed in the meanwhile (due to browser shutdown) 1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // since tasks pending on a destroyed WeakPtr are automatically discarded. 1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) base::WeakPtrFactory<SpeechRecognitionManagerImpl> weak_factory_; 1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} // namespace content 1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_MANAGER_IMPL_H_ 194