15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2012 The Chromium Authors. All rights reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_ENGINE_H_
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_ENGINE_H_
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <string>
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "base/basictypes.h"
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "content/common/content_export.h"
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "content/public/common/speech_recognition_grammar.h"
132a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)#include "content/public/common/speech_recognition_result.h"
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)namespace content {
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class AudioChunk;
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)struct SpeechRecognitionError;
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// This interface models the basic contract that a speech recognition engine,
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// either working locally or relying on a remote web-service, must obey.
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// The expected call sequence for exported methods is:
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// StartRecognition      Mandatory at beginning of SR.
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   TakeAudioChunk      For every audio chunk pushed.
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)//   AudioChunksEnded    Finalize the audio stream (omitted in case of errors).
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// EndRecognition        Mandatory at end of SR (even on errors).
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// No delegate callbacks are allowed before StartRecognition or after
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// EndRecognition. If a recognition was started, the caller can free the
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// SpeechRecognitionEngine only after calling EndRecognition.
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class SpeechRecognitionEngine {
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Interface for receiving callbacks from this object.
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  class Delegate {
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   public:
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // Called whenever a result is retrieved. It might be issued several times,
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // (e.g., in the case of continuous speech recognition engine
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    // implementations).
382a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)    virtual void OnSpeechRecognitionEngineResults(
392a99a7e74a7f215066514fe81d2bfa6639d9edddTorne (Richard Coles)        const SpeechRecognitionResults& results) = 0;
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    virtual void OnSpeechRecognitionEngineError(
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)        const SpeechRecognitionError& error) = 0;
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)   protected:
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    virtual ~Delegate() {}
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  };
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Remote engine configuration.
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  struct CONTENT_EXPORT Config {
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    Config();
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    ~Config();
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    std::string language;
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    SpeechRecognitionGrammarArray grammars;
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    bool filter_profanities;
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    bool continuous;
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    bool interim_results;
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    uint32 max_hypotheses;
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    std::string hardware_info;
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    std::string origin_url;
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int audio_sample_rate;
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    int audio_num_bits_per_sample;
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  };
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  virtual ~SpeechRecognitionEngine() {}
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Set/change the recognition engine configuration. It is not allowed to call
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // this function while a recognition is ongoing.
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  virtual void SetConfig(const Config& config) = 0;
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Called when the speech recognition begins, before any TakeAudioChunk call.
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  virtual void StartRecognition() = 0;
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // End any recognition activity and don't make any further callback.
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Must be always called to close the corresponding StartRecognition call,
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // even in case of errors.
765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // No further TakeAudioChunk/AudioChunksEnded calls are allowed after this.
775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  virtual void EndRecognition() = 0;
785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Push a chunk of uncompressed audio data, where the chunk length agrees with
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // GetDesiredAudioChunkDurationMs().
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  virtual void TakeAudioChunk(const AudioChunk& data) = 0;
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Notifies the engine that audio capture has completed and no more chunks
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // will be pushed. The engine, however, can still provide further results
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // using the audio chunks collected so far.
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  virtual void AudioChunksEnded() = 0;
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Checks wheter recognition of pushed audio data is pending.
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  virtual bool IsRecognitionPending() const = 0;
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Retrieves the desired duration, in milliseconds, of pushed AudioChunk(s).
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  virtual int GetDesiredAudioChunkDurationMs() const = 0;
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // set_delegate detached from constructor for lazy dependency injection.
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void set_delegate(Delegate* delegate) { delegate_ = delegate; }
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) protected:
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Delegate* delegate() const { return delegate_; }
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private:
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  Delegate* delegate_;
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// These typedefs are to workaround the issue with certain versions of
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Visual Studio where it gets confused between multiple Delegate
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// classes and gives a C2500 error.
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef SpeechRecognitionEngine::Delegate SpeechRecognitionEngineDelegate;
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)typedef SpeechRecognitionEngine::Config SpeechRecognitionEngineConfig;
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)}  // namespace content
1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif  // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_ENGINE_H_
113