15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2012 The Chromium Authors. All rights reserved. 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file. 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifndef CONTENT_BROWSER_SPEECH_GOOGLE_STREAMING_REMOTE_ENGINE_H_ 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define CONTENT_BROWSER_SPEECH_GOOGLE_STREAMING_REMOTE_ENGINE_H_ 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <string> 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <vector> 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/basictypes.h" 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/memory/ref_counted.h" 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/memory/scoped_ptr.h" 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/threading/non_thread_safe.h" 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "content/browser/speech/audio_encoder.h" 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "content/browser/speech/chunked_byte_buffer.h" 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "content/browser/speech/speech_recognition_engine.h" 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "content/common/content_export.h" 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "content/public/common/speech_recognition_error.h" 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "net/url_request/url_fetcher_delegate.h" 215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace net { 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class URLRequestContextGetter; 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace content { 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class AudioChunk; 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)struct SpeechRecognitionError; 305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)struct SpeechRecognitionResult; 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Implements a SpeechRecognitionEngine supporting continuous recognition by 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// means of interaction with Google streaming speech recognition webservice. 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// More in details, this class establishes two HTTP(S) connections with the 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// webservice, for each session, herein called "upstream" and "downstream". 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Audio chunks are sent on the upstream by means of a chunked HTTP POST upload. 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Recognition results are retrieved in a full-duplex fashion (i.e. while 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// pushing audio on the upstream) on the downstream by means of a chunked 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// HTTP GET request. Pairing between the two stream is handled through a 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// randomly generated key, unique for each request, which is passed in the 415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// &pair= arg to both stream request URLs. 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// In the case of a regular session, the upstream is closed when the audio 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// capture ends (notified through a |AudioChunksEnded| call) and the downstream 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// waits for a corresponding server closure (eventually some late results can 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// come after closing the upstream). 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Both stream are guaranteed to be closed when |EndRecognition| call is issued. 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class CONTENT_EXPORT GoogleStreamingRemoteEngine 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) : public NON_EXPORTED_BASE(SpeechRecognitionEngine), 495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public net::URLFetcherDelegate, 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public NON_EXPORTED_BASE(base::NonThreadSafe) { 515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public: 524e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) // Duration of each audio packet. 534e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) static const int kAudioPacketIntervalMs; 544e180b6a0b4720a9b8e9e959a882386f690f08ffTorne (Richard Coles) 558bcbed890bc3ce4d7a057a8f32cab53fa534672eTorne (Richard Coles) // IDs passed to URLFetcher::Create(). Used for testing. 568bcbed890bc3ce4d7a057a8f32cab53fa534672eTorne (Richard Coles) static const int kUpstreamUrlFetcherIdForTesting; 578bcbed890bc3ce4d7a057a8f32cab53fa534672eTorne (Richard Coles) static const int kDownstreamUrlFetcherIdForTesting; 588bcbed890bc3ce4d7a057a8f32cab53fa534672eTorne (Richard Coles) 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) explicit GoogleStreamingRemoteEngine(net::URLRequestContextGetter* context); 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) virtual ~GoogleStreamingRemoteEngine(); 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // SpeechRecognitionEngine methods. 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) virtual void SetConfig(const SpeechRecognitionEngineConfig& config) OVERRIDE; 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) virtual void StartRecognition() OVERRIDE; 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) virtual void EndRecognition() OVERRIDE; 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) virtual void TakeAudioChunk(const AudioChunk& data) OVERRIDE; 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) virtual void AudioChunksEnded() OVERRIDE; 685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) virtual bool IsRecognitionPending() const OVERRIDE; 695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) virtual int GetDesiredAudioChunkDurationMs() const OVERRIDE; 705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // net::URLFetcherDelegate methods. 725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) virtual void OnURLFetchComplete(const net::URLFetcher* source) OVERRIDE; 735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) virtual void OnURLFetchDownloadProgress(const net::URLFetcher* source, 745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int64 current, int64 total) OVERRIDE; 755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private: 775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Response status codes from the speech recognition webservice. 785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static const int kWebserviceStatusNoError; 795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) static const int kWebserviceStatusErrorNoMatch; 805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Data types for the internal Finite State Machine (FSM). 825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) enum FSMState { 835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) STATE_IDLE = 0, 845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) STATE_BOTH_STREAMS_CONNECTED, 855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) STATE_WAITING_DOWNSTREAM_RESULTS, 865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) STATE_MAX_VALUE = STATE_WAITING_DOWNSTREAM_RESULTS 875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) }; 885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) enum FSMEvent { 905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EVENT_END_RECOGNITION = 0, 915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EVENT_START_RECOGNITION, 925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EVENT_AUDIO_CHUNK, 935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EVENT_AUDIO_CHUNKS_ENDED, 945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EVENT_UPSTREAM_ERROR, 955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EVENT_DOWNSTREAM_ERROR, 965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EVENT_DOWNSTREAM_RESPONSE, 975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EVENT_DOWNSTREAM_CLOSED, 985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) EVENT_MAX_VALUE = EVENT_DOWNSTREAM_CLOSED 995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) }; 1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) struct FSMEventArgs { 1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) explicit FSMEventArgs(FSMEvent event_value); 1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ~FSMEventArgs(); 1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) FSMEvent event; 1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // In case of EVENT_AUDIO_CHUNK, holds the chunk pushed by |TakeAudioChunk|. 1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) scoped_refptr<const AudioChunk> audio_data; 1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // In case of EVENT_DOWNSTREAM_RESPONSE, hold the current chunk bytes. 1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) scoped_ptr<std::vector<uint8> > response; 1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private: 1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DISALLOW_COPY_AND_ASSIGN(FSMEventArgs); 1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) }; 1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Invoked by both upstream and downstream URLFetcher callbacks to handle 1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // new chunk data, connection closed or errors notifications. 1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void DispatchHTTPResponse(const net::URLFetcher* source, 1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool end_of_response); 1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Entry point for pushing any new external event into the recognizer FSM. 1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void DispatchEvent(const FSMEventArgs& event_args); 1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Defines the behavior of the recognizer FSM, selecting the appropriate 1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // transition according to the current state and event. 1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) FSMState ExecuteTransitionAndGetNextState(const FSMEventArgs& event_args); 1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // The methods below handle transitions of the recognizer FSM. 1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) FSMState ConnectBothStreams(const FSMEventArgs& event_args); 1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) FSMState TransmitAudioUpstream(const FSMEventArgs& event_args); 1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) FSMState ProcessDownstreamResponse(const FSMEventArgs& event_args); 1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) FSMState RaiseNoMatchErrorIfGotNoResults(const FSMEventArgs& event_args); 1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) FSMState CloseUpstreamAndWaitForResults(const FSMEventArgs& event_args); 1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) FSMState CloseDownstream(const FSMEventArgs& event_args); 1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) FSMState AbortSilently(const FSMEventArgs& event_args); 1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) FSMState AbortWithError(const FSMEventArgs& event_args); 1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) FSMState Abort(SpeechRecognitionErrorCode error); 1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) FSMState DoNothing(const FSMEventArgs& event_args); 1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) FSMState NotFeasible(const FSMEventArgs& event_args); 1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::string GetAcceptedLanguages() const; 1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::string GenerateRequestKey() const; 1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SpeechRecognitionEngineConfig config_; 1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) scoped_ptr<net::URLFetcher> upstream_fetcher_; 1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) scoped_ptr<net::URLFetcher> downstream_fetcher_; 1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) scoped_refptr<net::URLRequestContextGetter> url_context_; 1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) scoped_ptr<AudioEncoder> encoder_; 1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ChunkedByteBuffer chunked_byte_buffer_; 1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) size_t previous_response_length_; 1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool got_last_definitive_result_; 1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool is_dispatching_event_; 1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) FSMState state_; 1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) DISALLOW_COPY_AND_ASSIGN(GoogleStreamingRemoteEngine); 1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} // namespace content 1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif // CONTENT_BROWSER_SPEECH_GOOGLE_STREAMING_REMOTE_ENGINE_H_ 162