1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_ENGINE_H_
6#define CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_ENGINE_H_
7
8#include <string>
9
10#include "base/basictypes.h"
11#include "content/common/content_export.h"
12#include "content/public/common/speech_recognition_grammar.h"
13#include "content/public/common/speech_recognition_result.h"
14
15namespace content {
16
17class AudioChunk;
18struct SpeechRecognitionError;
19
20// This interface models the basic contract that a speech recognition engine,
21// either working locally or relying on a remote web-service, must obey.
22// The expected call sequence for exported methods is:
23// StartRecognition      Mandatory at beginning of SR.
24//   TakeAudioChunk      For every audio chunk pushed.
25//   AudioChunksEnded    Finalize the audio stream (omitted in case of errors).
26// EndRecognition        Mandatory at end of SR (even on errors).
27// No delegate callbacks are allowed before StartRecognition or after
28// EndRecognition. If a recognition was started, the caller can free the
29// SpeechRecognitionEngine only after calling EndRecognition.
30class SpeechRecognitionEngine {
31 public:
32  // Interface for receiving callbacks from this object.
33  class Delegate {
34   public:
35    // Called whenever a result is retrieved. It might be issued several times,
36    // (e.g., in the case of continuous speech recognition engine
37    // implementations).
38    virtual void OnSpeechRecognitionEngineResults(
39        const SpeechRecognitionResults& results) = 0;
40    virtual void OnSpeechRecognitionEngineError(
41        const SpeechRecognitionError& error) = 0;
42
43   protected:
44    virtual ~Delegate() {}
45  };
46
47  // Remote engine configuration.
48  struct CONTENT_EXPORT Config {
49    Config();
50    ~Config();
51
52    std::string language;
53    SpeechRecognitionGrammarArray grammars;
54    bool filter_profanities;
55    bool continuous;
56    bool interim_results;
57    uint32 max_hypotheses;
58    std::string hardware_info;
59    std::string origin_url;
60    int audio_sample_rate;
61    int audio_num_bits_per_sample;
62  };
63
64  virtual ~SpeechRecognitionEngine() {}
65
66  // Set/change the recognition engine configuration. It is not allowed to call
67  // this function while a recognition is ongoing.
68  virtual void SetConfig(const Config& config) = 0;
69
70  // Called when the speech recognition begins, before any TakeAudioChunk call.
71  virtual void StartRecognition() = 0;
72
73  // End any recognition activity and don't make any further callback.
74  // Must be always called to close the corresponding StartRecognition call,
75  // even in case of errors.
76  // No further TakeAudioChunk/AudioChunksEnded calls are allowed after this.
77  virtual void EndRecognition() = 0;
78
79  // Push a chunk of uncompressed audio data, where the chunk length agrees with
80  // GetDesiredAudioChunkDurationMs().
81  virtual void TakeAudioChunk(const AudioChunk& data) = 0;
82
83  // Notifies the engine that audio capture has completed and no more chunks
84  // will be pushed. The engine, however, can still provide further results
85  // using the audio chunks collected so far.
86  virtual void AudioChunksEnded() = 0;
87
88  // Checks wheter recognition of pushed audio data is pending.
89  virtual bool IsRecognitionPending() const = 0;
90
91  // Retrieves the desired duration, in milliseconds, of pushed AudioChunk(s).
92  virtual int GetDesiredAudioChunkDurationMs() const = 0;
93
94  // set_delegate detached from constructor for lazy dependency injection.
95  void set_delegate(Delegate* delegate) { delegate_ = delegate; }
96
97 protected:
98  Delegate* delegate() const { return delegate_; }
99
100 private:
101  Delegate* delegate_;
102};
103
104// These typedefs are to workaround the issue with certain versions of
105// Visual Studio where it gets confused between multiple Delegate
106// classes and gives a C2500 error.
107typedef SpeechRecognitionEngine::Delegate SpeechRecognitionEngineDelegate;
108typedef SpeechRecognitionEngine::Config SpeechRecognitionEngineConfig;
109
110}  // namespace content
111
112#endif  // CONTENT_BROWSER_SPEECH_SPEECH_RECOGNITION_ENGINE_H_
113