15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2012 The Chromium Authors. All rights reserved. 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file. 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_ENGINE_H_ 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_ENGINE_H_ 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <string> 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/basictypes.h" 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "content/common/content_export.h" 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "content/public/common/speech_recognition_grammar.h" 132a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "content/public/common/speech_recognition_result.h" 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace content { 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class AudioChunk; 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)struct SpeechRecognitionError; 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// This interface models the basic contract that a speech recognition engine, 215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// either working locally or relying on a remote web-service, must obey. 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The expected call sequence for exported methods is: 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// StartRecognition Mandatory at beginning of SR. 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// TakeAudioChunk For every audio chunk pushed. 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// AudioChunksEnded Finalize the audio stream (omitted in case of errors). 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// EndRecognition Mandatory at end of SR (even on errors). 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// No delegate callbacks are allowed before StartRecognition or after 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// EndRecognition. If a recognition was started, the caller can free the 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// SpeechRecognitionEngine only after calling EndRecognition. 305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class SpeechRecognitionEngine { 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public: 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Interface for receiving callbacks from this object. 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) class Delegate { 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public: 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Called whenever a result is retrieved. It might be issued several times, 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // (e.g., in the case of continuous speech recognition engine 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // implementations). 382a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) virtual void OnSpeechRecognitionEngineResults( 392a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles) const SpeechRecognitionResults& results) = 0; 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) virtual void OnSpeechRecognitionEngineError( 415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) const SpeechRecognitionError& error) = 0; 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) protected: 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) virtual ~Delegate() {} 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) }; 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Remote engine configuration. 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) struct CONTENT_EXPORT Config { 495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Config(); 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) ~Config(); 515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::string language; 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) SpeechRecognitionGrammarArray grammars; 545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool filter_profanities; 555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool continuous; 565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) bool interim_results; 575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) uint32 max_hypotheses; 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::string hardware_info; 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) std::string origin_url; 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int audio_sample_rate; 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) int audio_num_bits_per_sample; 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) }; 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) virtual ~SpeechRecognitionEngine() {} 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Set/change the recognition engine configuration. It is not allowed to call 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // this function while a recognition is ongoing. 685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) virtual void SetConfig(const Config& config) = 0; 695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Called when the speech recognition begins, before any TakeAudioChunk call. 715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) virtual void StartRecognition() = 0; 725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // End any recognition activity and don't make any further callback. 745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Must be always called to close the corresponding StartRecognition call, 755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // even in case of errors. 765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // No further TakeAudioChunk/AudioChunksEnded calls are allowed after this. 775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) virtual void EndRecognition() = 0; 785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Push a chunk of uncompressed audio data, where the chunk length agrees with 805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // GetDesiredAudioChunkDurationMs(). 815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) virtual void TakeAudioChunk(const AudioChunk& data) = 0; 825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Notifies the engine that audio capture has completed and no more chunks 845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // will be pushed. The engine, however, can still provide further results 855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // using the audio chunks collected so far. 865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) virtual void AudioChunksEnded() = 0; 875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Checks wheter recognition of pushed audio data is pending. 895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) virtual bool IsRecognitionPending() const = 0; 905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // Retrieves the desired duration, in milliseconds, of pushed AudioChunk(s). 925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) virtual int GetDesiredAudioChunkDurationMs() const = 0; 935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) // set_delegate detached from constructor for lazy dependency injection. 955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) void set_delegate(Delegate* delegate) { delegate_ = delegate; } 965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) protected: 985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Delegate* delegate() const { return delegate_; } 995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private: 1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) Delegate* delegate_; 1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}; 1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// These typedefs are to workaround the issue with certain versions of 1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Visual Studio where it gets confused between multiple Delegate 1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// classes and gives a C2500 error. 1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef SpeechRecognitionEngine::Delegate SpeechRecognitionEngineDelegate; 1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef SpeechRecognitionEngine::Config SpeechRecognitionEngineConfig; 1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)} // namespace content 1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_ENGINE_H_ 113