1// Copyright (c) 2011 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef CHROME_BROWSER_EXTENSIONS_EXTENSION_TTS_API_H_
6#define CHROME_BROWSER_EXTENSIONS_EXTENSION_TTS_API_H_
7
8#include <queue>
9#include <string>
10
11#include "base/memory/singleton.h"
12#include "base/task.h"
13#include "chrome/browser/extensions/extension_function.h"
14#include "chrome/browser/extensions/extension_tts_api_util.h"
15
16// Abstract class that defines the native platform TTS interface.
17class ExtensionTtsPlatformImpl {
18 public:
19  static ExtensionTtsPlatformImpl* GetInstance();
20
21  // Speak the given utterance with the given parameters if possible,
22  // and return true on success. Utterance will always be nonempty.
23  // If the user does not specify the other values, then locale and gender
24  // will be empty strings, and rate, pitch, and volume will be -1.0.
25  //
26  // The ExtensionTtsController will only try to speak one utterance at
27  // a time. If it wants to interrupt speech, it will always call Stop
28  // before speaking again, otherwise it will wait until IsSpeaking
29  // returns false before calling Speak again.
30  virtual bool Speak(
31      const std::string& utterance,
32      const std::string& locale,
33      const std::string& gender,
34      double rate,
35      double pitch,
36      double volume) = 0;
37
38  // Stop speaking immediately and return true on success.
39  virtual bool StopSpeaking() = 0;
40
41  // Return true if the synthesis engine is currently speaking.
42  virtual bool IsSpeaking() = 0;
43
44  virtual std::string error();
45  virtual void clear_error();
46  virtual void set_error(const std::string& error);
47
48 protected:
49  ExtensionTtsPlatformImpl() {}
50  virtual ~ExtensionTtsPlatformImpl() {}
51
52  std::string error_;
53
54  DISALLOW_COPY_AND_ASSIGN(ExtensionTtsPlatformImpl);
55};
56
57// One speech utterance.
58class Utterance {
59 public:
60  // Construct an utterance given a profile, the text to speak,
61  // the options passed to tts.speak, and a completion task to call
62  // when the utterance is done speaking.
63  Utterance(Profile* profile,
64            const std::string& text,
65            DictionaryValue* options,
66            Task* completion_task);
67  ~Utterance();
68
69  // Calls the completion task and then destroys itself.
70  void FinishAndDestroy();
71
72  void set_error(const std::string& error) { error_ = error; }
73  void set_extension_id(const std::string& extension_id) {
74    extension_id_ = extension_id;
75  }
76
77  // Accessors
78  Profile* profile() { return profile_; }
79  const std::string& extension_id() { return extension_id_; }
80  int id() { return id_; }
81  const std::string& text() { return text_; }
82  const Value* options() { return options_.get(); }
83  const std::string& voice_name() { return voice_name_; }
84  const std::string& locale() { return locale_; }
85  const std::string& gender() { return gender_; }
86  double rate() { return rate_; }
87  double pitch() { return pitch_; }
88  double volume() { return volume_; }
89  bool can_enqueue() { return can_enqueue_; }
90  const std::string& error() { return error_; }
91
92 private:
93  // The profile that initiated this utterance.
94  Profile* profile_;
95
96  // The extension ID of the extension providing TTS for this utterance, or
97  // empty if native TTS is being used.
98  std::string extension_id_;
99
100  // The unique ID of this utterance, used to associate callback functions
101  // with utterances.
102  int id_;
103
104  // The id of the next utterance, so we can associate requests with
105  // responses.
106  static int next_utterance_id_;
107
108  // The text to speak.
109  std::string text_;
110
111  // The full options arg passed to tts.speak, which may include fields
112  // other than the ones we explicitly parse, below.
113  scoped_ptr<Value> options_;
114
115  // The parsed options.
116  std::string voice_name_;
117  std::string locale_;
118  std::string gender_;
119  double rate_;
120  double pitch_;
121  double volume_;
122  bool can_enqueue_;
123
124  // The error string to pass to the completion task. Will be empty if
125  // no error occurred.
126  std::string error_;
127
128  // The method to call when this utterance has completed speaking.
129  Task* completion_task_;
130};
131
132// Singleton class that manages text-to-speech.
133class ExtensionTtsController {
134 public:
135  // Get the single instance of this class.
136  static ExtensionTtsController* GetInstance();
137
138  // Returns true if we're currently speaking an utterance.
139  bool IsSpeaking() const;
140
141  // Speak the given utterance. If the utterance's can_enqueue flag is true
142  // and another utterance is in progress, adds it to the end of the queue.
143  // Otherwise, interrupts any current utterance and speaks this one
144  // immediately.
145  void SpeakOrEnqueue(Utterance* utterance);
146
147  // Stop all utterances and flush the queue.
148  void Stop();
149
150  // Called when an extension finishes speaking an utterance.
151  void OnSpeechFinished(int request_id, const std::string& error_message);
152
153  // For unit testing.
154  void SetPlatformImpl(ExtensionTtsPlatformImpl* platform_impl);
155
156 private:
157  ExtensionTtsController();
158  virtual ~ExtensionTtsController();
159
160  // Get the platform TTS implementation (or injected mock).
161  ExtensionTtsPlatformImpl* GetPlatformImpl();
162
163  // Start speaking the given utterance. Will either take ownership of
164  // |utterance| or delete it if there's an error.
165  void SpeakNow(Utterance* utterance);
166
167  // Called periodically when speech is ongoing. Checks to see if the
168  // underlying platform speech system has finished the current utterance,
169  // and if so finishes it and pops the next utterance off the queue.
170  void CheckSpeechStatus();
171
172  // Clear the utterance queue.
173  void ClearUtteranceQueue();
174
175  // Finalize and delete the current utterance.
176  void FinishCurrentUtterance();
177
178  // Start speaking the next utterance in the queue.
179  void SpeakNextUtterance();
180
181  // Return the id string of the first extension with tts_voices in its
182  // manifest that matches the speech parameters of this utterance,
183  // or the empty string if none is found.
184  std::string GetMatchingExtensionId(Utterance* utterance);
185
186  ScopedRunnableMethodFactory<ExtensionTtsController> method_factory_;
187  friend struct DefaultSingletonTraits<ExtensionTtsController>;
188
189  // The current utterance being spoken.
190  Utterance* current_utterance_;
191
192  // A queue of utterances to speak after the current one finishes.
193  std::queue<Utterance*> utterance_queue_;
194
195  // A pointer to the platform implementation of text-to-speech, for
196  // dependency injection.
197  ExtensionTtsPlatformImpl* platform_impl_;
198
199  DISALLOW_COPY_AND_ASSIGN(ExtensionTtsController);
200};
201
202//
203// Extension API function definitions
204//
205
206class ExtensionTtsSpeakFunction : public AsyncExtensionFunction {
207 private:
208  ~ExtensionTtsSpeakFunction() {}
209  virtual bool RunImpl();
210  void SpeechFinished();
211  Utterance* utterance_;
212  DECLARE_EXTENSION_FUNCTION_NAME("experimental.tts.speak")
213};
214
215class ExtensionTtsStopSpeakingFunction : public SyncExtensionFunction {
216 private:
217  ~ExtensionTtsStopSpeakingFunction() {}
218  virtual bool RunImpl();
219  DECLARE_EXTENSION_FUNCTION_NAME("experimental.tts.stop")
220};
221
222class ExtensionTtsIsSpeakingFunction : public SyncExtensionFunction {
223 private:
224  ~ExtensionTtsIsSpeakingFunction() {}
225  virtual bool RunImpl();
226  DECLARE_EXTENSION_FUNCTION_NAME("experimental.tts.isSpeaking")
227};
228
229class ExtensionTtsSpeakCompletedFunction : public SyncExtensionFunction {
230 private:
231  ~ExtensionTtsSpeakCompletedFunction() {}
232  virtual bool RunImpl();
233  DECLARE_EXTENSION_FUNCTION_NAME("experimental.tts.speakCompleted")
234};
235
236#endif  // CHROME_BROWSER_EXTENSIONS_EXTENSION_TTS_API_H_
237