1// Copyright (c) 2012 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_
6#define CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_
7
8#include <queue>
9#include <set>
10#include <string>
11#include <vector>
12
13#include "base/memory/scoped_ptr.h"
14#include "base/memory/singleton.h"
15#include "base/memory/weak_ptr.h"
16#include "url/gurl.h"
17
18class Utterance;
19class TtsPlatformImpl;
20
21namespace base {
22class Value;
23}
24
25namespace content {
26class BrowserContext;
27}
28
29// Events sent back from the TTS engine indicating the progress.
30enum TtsEventType {
31  TTS_EVENT_START,
32  TTS_EVENT_END,
33  TTS_EVENT_WORD,
34  TTS_EVENT_SENTENCE,
35  TTS_EVENT_MARKER,
36  TTS_EVENT_INTERRUPTED,
37  TTS_EVENT_CANCELLED,
38  TTS_EVENT_ERROR,
39  TTS_EVENT_PAUSE,
40  TTS_EVENT_RESUME
41};
42
43enum TtsGenderType {
44  TTS_GENDER_NONE,
45  TTS_GENDER_MALE,
46  TTS_GENDER_FEMALE
47};
48
49// Returns true if this event type is one that indicates an utterance
50// is finished and can be destroyed.
51bool IsFinalTtsEventType(TtsEventType event_type);
52
53// The continuous parameters that apply to a given utterance.
54struct UtteranceContinuousParameters {
55  UtteranceContinuousParameters();
56
57  double rate;
58  double pitch;
59  double volume;
60};
61
62// Information about one voice.
63struct VoiceData {
64  VoiceData();
65  ~VoiceData();
66
67  std::string name;
68  std::string lang;
69  TtsGenderType gender;
70  std::string extension_id;
71  std::set<TtsEventType> events;
72
73  // If true, the synthesis engine is a remote network resource.
74  // It may be higher latency and may incur bandwidth costs.
75  bool remote;
76
77  // If true, this is implemented by this platform's subclass of
78  // TtsPlatformImpl. If false, this is implemented by an extension.
79  bool native;
80  std::string native_voice_identifier;
81};
82
83// Interface that delegates TTS requests to user-installed extensions.
84class TtsEngineDelegate {
85 public:
86  virtual ~TtsEngineDelegate() {}
87
88  // Return a list of all available voices registered.
89  virtual void GetVoices(content::BrowserContext* browser_context,
90                         std::vector<VoiceData>* out_voices) = 0;
91
92  // Speak the given utterance by sending an event to the given TTS engine.
93  virtual void Speak(Utterance* utterance, const VoiceData& voice) = 0;
94
95  // Stop speaking the given utterance by sending an event to the target
96  // associated with this utterance.
97  virtual void Stop(Utterance* utterance) = 0;
98
99  // Pause in the middle of speaking this utterance.
100  virtual void Pause(Utterance* utterance) = 0;
101
102  // Resume speaking this utterance.
103  virtual void Resume(Utterance* utterance) = 0;
104
105  // Load the built-in component extension for ChromeOS.
106  virtual bool LoadBuiltInTtsExtension(
107      content::BrowserContext* browser_context) = 0;
108};
109
110// Class that wants to receive events on utterances.
111class UtteranceEventDelegate {
112 public:
113  virtual ~UtteranceEventDelegate() {}
114  virtual void OnTtsEvent(Utterance* utterance,
115                          TtsEventType event_type,
116                          int char_index,
117                          const std::string& error_message) = 0;
118};
119
120// Class that wants to be notified when the set of
121// voices has changed.
122class VoicesChangedDelegate {
123 public:
124  virtual ~VoicesChangedDelegate() {}
125  virtual void OnVoicesChanged() = 0;
126};
127
128// One speech utterance.
129class Utterance {
130 public:
131  // Construct an utterance given a profile and a completion task to call
132  // when the utterance is done speaking. Before speaking this utterance,
133  // its other parameters like text, rate, pitch, etc. should all be set.
134  explicit Utterance(content::BrowserContext* browser_context);
135  ~Utterance();
136
137  // Sends an event to the delegate. If the event type is TTS_EVENT_END
138  // or TTS_EVENT_ERROR, deletes the utterance. If |char_index| is -1,
139  // uses the last good value.
140  void OnTtsEvent(TtsEventType event_type,
141                  int char_index,
142                  const std::string& error_message);
143
144  // Finish an utterance without sending an event to the delegate.
145  void Finish();
146
147  // Getters and setters for the text to speak and other speech options.
148  void set_text(const std::string& text) { text_ = text; }
149  const std::string& text() const { return text_; }
150
151  void set_options(const base::Value* options);
152  const base::Value* options() const { return options_.get(); }
153
154  void set_src_extension_id(const std::string& src_extension_id) {
155    src_extension_id_ = src_extension_id;
156  }
157  const std::string& src_extension_id() { return src_extension_id_; }
158
159  void set_src_id(int src_id) { src_id_ = src_id; }
160  int src_id() { return src_id_; }
161
162  void set_src_url(const GURL& src_url) { src_url_ = src_url; }
163  const GURL& src_url() { return src_url_; }
164
165  void set_voice_name(const std::string& voice_name) {
166    voice_name_ = voice_name;
167  }
168  const std::string& voice_name() const { return voice_name_; }
169
170  void set_lang(const std::string& lang) {
171    lang_ = lang;
172  }
173  const std::string& lang() const { return lang_; }
174
175  void set_gender(TtsGenderType gender) {
176    gender_ = gender;
177  }
178  TtsGenderType gender() const { return gender_; }
179
180  void set_continuous_parameters(const UtteranceContinuousParameters& params) {
181    continuous_parameters_ = params;
182  }
183  const UtteranceContinuousParameters& continuous_parameters() {
184    return continuous_parameters_;
185  }
186
187  void set_can_enqueue(bool can_enqueue) { can_enqueue_ = can_enqueue; }
188  bool can_enqueue() const { return can_enqueue_; }
189
190  void set_required_event_types(const std::set<TtsEventType>& types) {
191    required_event_types_ = types;
192  }
193  const std::set<TtsEventType>& required_event_types() const {
194    return required_event_types_;
195  }
196
197  void set_desired_event_types(const std::set<TtsEventType>& types) {
198    desired_event_types_ = types;
199  }
200  const std::set<TtsEventType>& desired_event_types() const {
201    return desired_event_types_;
202  }
203
204  const std::string& extension_id() const { return extension_id_; }
205  void set_extension_id(const std::string& extension_id) {
206    extension_id_ = extension_id;
207  }
208
209  UtteranceEventDelegate* event_delegate() const {
210    return event_delegate_.get();
211  }
212  void set_event_delegate(
213      base::WeakPtr<UtteranceEventDelegate> event_delegate) {
214    event_delegate_ = event_delegate;
215  }
216
217  // Getters and setters for internal state.
218  content::BrowserContext* browser_context() const { return browser_context_; }
219  int id() const { return id_; }
220  bool finished() const { return finished_; }
221
222 private:
223  // The BrowserContext that initiated this utterance.
224  content::BrowserContext* browser_context_;
225
226  // The extension ID of the extension providing TTS for this utterance, or
227  // empty if native TTS is being used.
228  std::string extension_id_;
229
230  // The unique ID of this utterance, used to associate callback functions
231  // with utterances.
232  int id_;
233
234  // The id of the next utterance, so we can associate requests with
235  // responses.
236  static int next_utterance_id_;
237
238  // The text to speak.
239  std::string text_;
240
241  // The full options arg passed to tts.speak, which may include fields
242  // other than the ones we explicitly parse, below.
243  scoped_ptr<base::Value> options_;
244
245  // The extension ID of the extension that called speak() and should
246  // receive events.
247  std::string src_extension_id_;
248
249  // The source extension's ID of this utterance, so that it can associate
250  // events with the appropriate callback.
251  int src_id_;
252
253  // The URL of the page where the source extension called speak.
254  GURL src_url_;
255
256  // The delegate to be called when an utterance event is fired.
257  base::WeakPtr<UtteranceEventDelegate> event_delegate_;
258
259  // The parsed options.
260  std::string voice_name_;
261  std::string lang_;
262  TtsGenderType gender_;
263  UtteranceContinuousParameters continuous_parameters_;
264  bool can_enqueue_;
265  std::set<TtsEventType> required_event_types_;
266  std::set<TtsEventType> desired_event_types_;
267
268  // The index of the current char being spoken.
269  int char_index_;
270
271  // True if this utterance received an event indicating it's done.
272  bool finished_;
273};
274
275// Singleton class that manages text-to-speech for the TTS and TTS engine
276// extension APIs, maintaining a queue of pending utterances and keeping
277// track of all state.
278class TtsController {
279 public:
280  // Get the single instance of this class.
281  static TtsController* GetInstance();
282
283  // Returns true if we're currently speaking an utterance.
284  virtual bool IsSpeaking() = 0;
285
286  // Speak the given utterance. If the utterance's can_enqueue flag is true
287  // and another utterance is in progress, adds it to the end of the queue.
288  // Otherwise, interrupts any current utterance and speaks this one
289  // immediately.
290  virtual void SpeakOrEnqueue(Utterance* utterance) = 0;
291
292  // Stop all utterances and flush the queue. Implies leaving pause mode
293  // as well.
294  virtual void Stop() = 0;
295
296  // Pause the speech queue. Some engines may support pausing in the middle
297  // of an utterance.
298  virtual void Pause() = 0;
299
300  // Resume speaking.
301  virtual void Resume() = 0;
302
303  // Handle events received from the speech engine. Events are forwarded to
304  // the callback function, and in addition, completion and error events
305  // trigger finishing the current utterance and starting the next one, if
306  // any.
307  virtual void OnTtsEvent(int utterance_id,
308                          TtsEventType event_type,
309                          int char_index,
310                          const std::string& error_message) = 0;
311
312  // Return a list of all available voices, including the native voice,
313  // if supported, and all voices registered by extensions.
314  virtual void GetVoices(content::BrowserContext* browser_context,
315                         std::vector<VoiceData>* out_voices) = 0;
316
317  // Called by the extension system or platform implementation when the
318  // list of voices may have changed and should be re-queried.
319  virtual void VoicesChanged() = 0;
320
321  // Add a delegate that wants to be notified when the set of voices changes.
322  virtual void AddVoicesChangedDelegate(VoicesChangedDelegate* delegate) = 0;
323
324  // Remove delegate that wants to be notified when the set of voices changes.
325  virtual void RemoveVoicesChangedDelegate(VoicesChangedDelegate* delegate) = 0;
326
327  // Set the delegate that processes TTS requests with user-installed
328  // extensions.
329  virtual void SetTtsEngineDelegate(TtsEngineDelegate* delegate) = 0;
330
331  // Get the delegate that processes TTS requests with user-installed
332  // extensions.
333  virtual TtsEngineDelegate* GetTtsEngineDelegate() = 0;
334
335  // For unit testing.
336  virtual void SetPlatformImpl(TtsPlatformImpl* platform_impl) = 0;
337  virtual int QueueSize() = 0;
338
339 protected:
340  virtual ~TtsController() {}
341};
342
343#endif  // CHROME_BROWSER_SPEECH_TTS_CONTROLLER_H_
344