1// Copyright 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5/**
6 * @fileoverview
7 * This is a component extension that implements a text-to-speech (TTS)
8 * engine powered by Google's speech synthesis API.
9 *
10 * This is an "event page", so it's not loaded when the API isn't being used,
11 * and doesn't waste resources. When a web page or web app makes a speech
12 * request and the parameters match one of the voices in this extension's
13 * manifest, it makes a request to Google's API using Chrome's private key
14 * and plays the resulting speech using HTML5 audio.
15 */
16
17/**
18 * The main class for this extension. Adds listeners to
19 * chrome.ttsEngine.onSpeak and chrome.ttsEngine.onStop and implements
20 * them using Google's speech synthesis API.
21 * @constructor
22 */
23function TtsExtension() {}
24
25TtsExtension.prototype = {
26  /**
27   * The url prefix of the speech server, including static query
28   * parameters that don't change.
29   * @type {string}
30   * @const
31   * @private
32   */
33  SPEECH_SERVER_URL_:
34      'https://www.google.com/speech-api/v2/synthesize?' +
35      'enc=mpeg&client=chromium',
36
37  /**
38   * A mapping from language and gender to voice name, hardcoded for now
39   * until the speech synthesis server capabilities response provides this.
40   * The key of this map is of the form '<lang>-<gender>'.
41   * @type {Object.<string, string>}
42   * @private
43   */
44  LANG_AND_GENDER_TO_VOICE_NAME_: {
45    'en-gb-male': 'rjs',
46    'en-gb-female': 'fis',
47  },
48
49  /**
50   * The arguments passed to the onSpeak event handler for the utterance
51   * that's currently being spoken. Should be null when no object is
52   * pending.
53   *
54   * @type {?{utterance: string, options: Object, callback: Function}}
55   * @private
56   */
57  currentUtterance_: null,
58
59  /**
60   * The HTML5 audio element we use for playing the sound served by the
61   * speech server.
62   * @type {HTMLAudioElement}
63   * @private
64   */
65  audioElement_: null,
66
67  /**
68   * A mapping from voice name to language and gender, derived from the
69   * manifest file.  This is used in case the speech synthesis request
70   * specifies a voice name but doesn't specify a language code or gender.
71   * @type {Object.<string, {lang: string, gender: string}>}
72   * @private
73   */
74  voiceNameToLangAndGender_: {},
75
76  /**
77   * This is the main function called to initialize this extension.
78   * Initializes data structures and adds event listeners.
79   */
80  init: function() {
81    // Get voices from manifest.
82    var voices = chrome.app.getDetails().tts_engine.voices;
83    for (var i = 0; i < voices.length; i++) {
84      this.voiceNameToLangAndGender_[voices[i].voice_name] = {
85        lang: voices[i].lang,
86        gender: voices[i].gender
87      };
88    }
89
90    // Initialize the audio element and event listeners on it.
91    this.audioElement_ = document.createElement('audio');
92    document.body.appendChild(this.audioElement_);
93    this.audioElement_.addEventListener(
94        'ended', this.onStop_.bind(this), false);
95    this.audioElement_.addEventListener(
96        'canplaythrough', this.onStart_.bind(this), false);
97
98    // Install event listeners for the ttsEngine API.
99    chrome.ttsEngine.onSpeak.addListener(this.onSpeak_.bind(this));
100    chrome.ttsEngine.onStop.addListener(this.onStop_.bind(this));
101    chrome.ttsEngine.onPause.addListener(this.onPause_.bind(this));
102    chrome.ttsEngine.onResume.addListener(this.onResume_.bind(this));
103  },
104
105  /**
106   * Handler for the chrome.ttsEngine.onSpeak interface.
107   * Gets Chrome's Google API key and then uses it to generate a request
108   * url for the requested speech utterance. Sets that url as the source
109   * of the HTML5 audio element.
110   * @param {string} utterance The text to be spoken.
111   * @param {Object} options Options to control the speech, as defined
112   *     in the Chrome ttsEngine extension API.
113   * @private
114   */
115  onSpeak_: function(utterance, options, callback) {
116    // Truncate the utterance if it's too long. Both Chrome's tts
117    // extension api and the web speech api specify 32k as the
118    // maximum limit for an utterance.
119    if (utterance.length > 32768)
120      utterance = utterance.substr(0, 32768);
121
122    try {
123      // First, stop any pending audio.
124      this.onStop_();
125
126      this.currentUtterance_ = {
127        utterance: utterance,
128        options: options,
129        callback: callback
130      };
131
132      var lang = options.lang;
133      var gender = options.gender;
134      if (options.voiceName) {
135        lang = this.voiceNameToLangAndGender_[options.voiceName].lang;
136        gender = this.voiceNameToLangAndGender_[options.voiceName].gender;
137      }
138
139      if (!lang)
140        lang = navigator.language;
141
142      // Look up the specific voice name for this language and gender.
143      // If it's not in the map, it doesn't matter - the language will
144      // be used directly. This is only used for languages where more
145      // than one gender is actually available.
146      var key = lang.toLowerCase() + '-' + gender;
147      var voiceName = this.LANG_AND_GENDER_TO_VOICE_NAME_[key];
148
149      var url = this.SPEECH_SERVER_URL_;
150      chrome.systemPrivate.getApiKey((function(key) {
151        url += '&key=' + key;
152        url += '&text=' + encodeURIComponent(utterance);
153        url += '&lang=' + lang.toLowerCase();
154
155        if (voiceName)
156          url += '&name=' + voiceName;
157
158        if (options.rate) {
159          // Input rate is between 0.1 and 10.0 with a default of 1.0.
160          // Output speed is between 0.0 and 1.0 with a default of 0.5.
161          url += '&speed=' + (options.rate / 2.0);
162        }
163
164        if (options.pitch) {
165          // Input pitch is between 0.0 and 2.0 with a default of 1.0.
166          // Output pitch is between 0.0 and 1.0 with a default of 0.5.
167          url += '&pitch=' + (options.pitch / 2.0);
168        }
169
170        // This begins loading the audio but does not play it.
171        // When enough of the audio has loaded to begin playback,
172        // the 'canplaythrough' handler will call this.onStart_,
173        // which sends a start event to the ttsEngine callback and
174        // then begins playing audio.
175        this.audioElement_.src = url;
176      }).bind(this));
177    } catch (err) {
178      console.error(String(err));
179      callback({
180        'type': 'error',
181        'errorMessage': String(err)
182      });
183      this.currentUtterance_ = null;
184    }
185  },
186
187  /**
188   * Handler for the chrome.ttsEngine.onStop interface.
189   * Called either when the ttsEngine API requests us to stop, or when
190   * we reach the end of the audio stream. Pause the audio element to
191   * silence it, and send a callback to the ttsEngine API to let it know
192   * that we've completed. Note that the ttsEngine API manages callback
193   * messages and will automatically replace the 'end' event with a
194   * more specific callback like 'interrupted' when sending it to the
195   * TTS client.
196   * @private
197   */
198  onStop_: function() {
199    if (this.currentUtterance_) {
200      this.audioElement_.pause();
201      this.currentUtterance_.callback({
202        'type': 'end',
203        'charIndex': this.currentUtterance_.utterance.length
204      });
205    }
206    this.currentUtterance_ = null;
207  },
208
209  /**
210   * Handler for the canplaythrough event on the audio element.
211   * Called when the audio element has buffered enough audio to begin
212   * playback. Send the 'start' event to the ttsEngine callback and
213   * then begin playing the audio element.
214   * @private
215   */
216  onStart_: function() {
217    if (this.currentUtterance_) {
218      if (this.currentUtterance_.options.volume !== undefined) {
219        // Both APIs use the same range for volume, between 0.0 and 1.0.
220        this.audioElement_.volume = this.currentUtterance_.options.volume;
221      }
222      this.audioElement_.play();
223      this.currentUtterance_.callback({
224          'type': 'start',
225          'charIndex': 0
226      });
227    }
228  },
229
230  /**
231   * Handler for the chrome.ttsEngine.onPause interface.
232   * Pauses audio if we're in the middle of an utterance.
233   * @private
234   */
235  onPause_: function() {
236    if (this.currentUtterance_) {
237      this.audioElement_.pause();
238    }
239  },
240
241  /**
242   * Handler for the chrome.ttsEngine.onPause interface.
243   * Resumes audio if we're in the middle of an utterance.
244   * @private
245   */
246  onResume_: function() {
247    if (this.currentUtterance_) {
248      this.audioElement_.play();
249    }
250  }
251
252};
253
254(new TtsExtension()).init();
255