tts_background.js revision 116680a4aac90f2aa7413d9095a592090648e557
1// Copyright 2014 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5/**
6 * @fileoverview Sends Text-To-Speech commands to Chrome's native TTS
7 * extension API.
8 *
9 */
10
11goog.provide('cvox.TtsBackground');
12
13goog.require('cvox.AbstractTts');
14goog.require('cvox.ChromeTtsBase');
15goog.require('cvox.ChromeVox');
16goog.require('cvox.MathMap');
17goog.require('goog.i18n.MessageFormat');
18
19
20/**
21 * @constructor
22 * @param {string} textString The string of text to be spoken.
23 * @param {Object} properties Speech properties to use for this utterance.
24 */
25cvox.Utterance = function(textString, properties) {
26  this.textString = textString;
27  this.properties = properties;
28  this.id = cvox.Utterance.nextUtteranceId_++;
29};
30
31/**
32 * The next utterance id to use.
33 * @type {number}
34 * @private
35 */
36cvox.Utterance.nextUtteranceId_ = 1;
37
38/**
39 * @constructor
40 * @param {boolean=} opt_enableMath Whether to process math. Used when running
41 * on forge. Defaults to true.
42 * @extends {cvox.ChromeTtsBase}
43 */
44cvox.TtsBackground = function(opt_enableMath) {
45  opt_enableMath = opt_enableMath == undefined ? true : opt_enableMath;
46  goog.base(this);
47  this.currentVoice = localStorage['voiceName'] || '';
48
49  this.ttsProperties['rate'] = (parseFloat(localStorage['rate']) ||
50      this.propertyDefault['rate']);
51  this.ttsProperties['pitch'] = (parseFloat(localStorage['pitch']) ||
52      this.propertyDefault['pitch']);
53  this.ttsProperties['volume'] = (parseFloat(localStorage['volume']) ||
54      this.propertyDefault['volume']);
55
56  // Use the current locale as the speech language if not otherwise
57  // specified.
58  if (this.ttsProperties['lang'] == undefined) {
59    this.ttsProperties['lang'] =
60        chrome.i18n.getMessage('@@ui_locale').replace('_', '-');
61  }
62
63  this.lastEventType = 'end';
64
65  this.setPreferredVoice_();
66  if (!this.currentVoice) {
67    this.setDefaultVoiceName_();
68  }
69
70  /** @private {number} */
71  this.currentPunctuationEcho_ =
72      parseInt(localStorage[cvox.AbstractTts.PUNCTUATION_ECHO] || 1, 10);
73
74  /**
75   * @type {!Array.<{name:(string),
76   * msg:(string),
77   * regexp:(RegExp),
78   * clear:(boolean)}>}
79   * @private
80  */
81  this.punctuationEchoes_ = [
82    /**
83     * Punctuation echoed for the 'none' option.
84     */
85    {
86      name: 'none',
87      msg: 'no_punctuation',
88      regexp: /[-$#"()*;:<>\n\\\/+='~`@_]/g,
89      clear: true
90    },
91
92    /**
93     * Punctuation echoed for the 'some' option.
94     */
95    {
96      name: 'some',
97      msg: 'some_punctuation',
98      regexp: /[$#"*<>\\\/\{\}+=~`%]/g,
99      clear: false
100    },
101
102    /**
103     * Punctuation echoed for the 'all' option.
104     */
105    {
106      name: 'all',
107      msg: 'all_punctuation',
108      regexp: /[-$#"()*;:<>\n\\\/\{\}\[\]+='~`!@_.,?%]/g,
109      clear: false
110    }
111  ];
112
113  /**
114   * A list of punctuation characters that should always be spliced into output
115   * even with literal word substitutions.
116   * This is important for tts prosity.
117   * @type {!Array.<string>}
118   * @private
119  */
120  this.retainPunctuation_ =
121      [';', '?', '!', '\''];
122
123  /**
124   * Mapping for math elements.
125   * @type {cvox.MathMap}
126   */
127  this.mathmap = opt_enableMath ? new cvox.MathMap() : null;
128
129  /**
130   * The id of a callback returned from setTimeout.
131   * @type {number|undefined}
132   */
133  this.timeoutId_;
134
135  try {
136    /**
137     * @type {Object.<string, string>}
138     * @private
139     * @const
140     */
141    this.PHONETIC_MAP_ = /** @type {Object.<string, string>} */(
142        JSON.parse(cvox.ChromeVox.msgs.getMsg('phonetic_map')));
143  } catch (e) {
144    console.log('Error; unable to parse phonetic map msg.');
145  }
146
147  /**
148   * Capturing tts event listeners.
149   * @type {Array.<cvox.TtsCapturingEventListener>}
150   * @private
151   */
152  this.capturingTtsEventListeners_ = [];
153
154  /**
155   * The current utterance.
156   * @type {cvox.Utterance}
157   * @private
158   */
159  this.currentUtterance_ = null;
160
161  /**
162   * The utterance queue.
163   * @type {Array.<cvox.Utterance>}
164   * @private
165   */
166  this.utteranceQueue_ = [];
167};
168goog.inherits(cvox.TtsBackground, cvox.ChromeTtsBase);
169
170
171/**
172 * The amount of time to wait before speaking a phonetic word for a
173 * letter.
174 * @type {number}
175 * @private
176 * @const
177 */
178cvox.TtsBackground.PHONETIC_DELAY_MS_ = 1000;
179
180/**
181 * The list of properties allowed to be passed to the chrome.tts.speak API.
182 * Anything outside this list will be stripped.
183 * @type {Array.<string>}
184 * @private
185 * @const
186 */
187cvox.TtsBackground.ALLOWED_PROPERTIES_ = [
188    'desiredEventTypes',
189    'enqueue',
190    'extensionId',
191    'gender',
192    'lang',
193    'onEvent',
194    'pitch',
195    'rate',
196    'requiredEventTypes',
197    'voiceName',
198    'volume'];
199
200/**
201 * Sets the current voice to the one that the user selected on the options page
202 * if that voice exists.
203 * @private
204 */
205cvox.TtsBackground.prototype.setPreferredVoice_ = function() {
206  var self = this;
207  chrome.tts.getVoices(
208      function(voices) {
209        for (var i = 0, v; v = voices[i]; i++) {
210          if (v['voiceName'] == localStorage['voiceName']) {
211            self.currentVoice = v['voiceName'];
212            return;
213          }
214        }
215      });
216};
217
218
219/** @override */
220cvox.TtsBackground.prototype.speak = function(
221    textString, queueMode, properties) {
222  goog.base(this, 'speak', textString, queueMode, properties);
223
224  if (!properties) {
225    properties = {};
226  }
227  if (queueMode === undefined) {
228    queueMode = cvox.AbstractTts.QUEUE_MODE_QUEUE;
229  }
230
231  // Chunk to improve responsiveness. Use a replace/split pattern in order to
232  // retain the original punctuation.
233  var splitTextString = textString.replace(/([-\n\r.,!?;])(\s)/g, '$1$2|');
234  splitTextString = splitTextString.split('|');
235  // Since we are substituting the chunk delimiters back into the string, only
236  // recurse when there are more than 2 split items. This should result in only
237  // one recursive call.
238  if (splitTextString.length > 2) {
239    var startCallback = properties['startCallback'];
240    var endCallback = properties['endCallback'];
241    for (var i = 0; i < splitTextString.length; i++) {
242      var propertiesCopy = {};
243      for (var p in properties) {
244        propertiesCopy[p] = properties[p];
245      }
246      propertiesCopy['startCallback'] = i == 0 ? startCallback : null;
247      propertiesCopy['endCallback'] =
248          i == (splitTextString.length - 1) ? endCallback : null;
249      this.speak(splitTextString[i], queueMode, propertiesCopy);
250      queueMode = cvox.AbstractTts.QUEUE_MODE_QUEUE;
251    }
252    return this;
253  }
254
255  textString = this.preprocess(textString, properties);
256
257  // TODO(dtseng): Google TTS has bad performance when speaking numbers. This
258  // pattern causes ChromeVox to read numbers as digits rather than words.
259  textString = this.getNumberAsDigits_(textString);
260
261  // TODO(dtseng): Google TTS flushes the queue when encountering strings of
262  // this pattern which stops ChromeVox speech.
263  if (!textString || !textString.match(/\w+/g)) {
264    // We still want to callback for listeners in our content script.
265    if (properties['startCallback']) {
266      try {
267        properties['startCallback']();
268      } catch (e) {
269      }
270    }
271    if (properties['endCallback']) {
272      try {
273        properties['endCallback']();
274      } catch (e) {
275      }
276    }
277    if (queueMode === cvox.AbstractTts.QUEUE_MODE_FLUSH) {
278      this.stop();
279    }
280    return this;
281  }
282
283  var mergedProperties = this.mergeProperties(properties);
284
285  if (this.currentVoice && (this.currentVoice == localStorage['voiceName'])) {
286    mergedProperties['voiceName'] = this.currentVoice;
287  }
288  if (localStorage['voiceName'] &&
289      this.currentVoice != localStorage['voiceName']) {
290    this.setPreferredVoice_();
291  }
292
293  if (queueMode == cvox.AbstractTts.QUEUE_MODE_CATEGORY_FLUSH &&
294      !mergedProperties['category']) {
295    queueMode = cvox.AbstractTts.QUEUE_MODE_FLUSH;
296  }
297
298  var utterance = new cvox.Utterance(textString, mergedProperties);
299  this.speakUsingQueue_(utterance, queueMode);
300};
301
302/**
303 * Use the speech queue to handle the given speech request.
304 * @param {cvox.Utterance} utterance The utterance to speak.
305 * @param {number} queueMode The queue mode.
306 * @private
307 */
308cvox.TtsBackground.prototype.speakUsingQueue_ = function(utterance, queueMode) {
309  // First, take care of removing the current utterance and flushing
310  // anything from the queue we need to. If we remove the current utterance,
311  // make a note that we're going to stop speech.
312  if (queueMode == cvox.AbstractTts.QUEUE_MODE_FLUSH ||
313      queueMode == cvox.AbstractTts.QUEUE_MODE_CATEGORY_FLUSH) {
314    if (this.shouldCancel_(this.currentUtterance_, utterance, queueMode)) {
315      this.cancelUtterance_(this.currentUtterance_);
316      this.currentUtterance_ = null;
317    }
318    var i = 0;
319    while (i < this.utteranceQueue_.length) {
320      if (this.shouldCancel_(
321              this.utteranceQueue_[i], utterance, queueMode)) {
322        this.cancelUtterance_(this.utteranceQueue_[i]);
323        this.utteranceQueue_.splice(i, 1);
324      } else {
325        i++;
326      }
327    }
328  }
329
330  // Next, add the new utterance to the queue.
331  this.utteranceQueue_.push(utterance);
332
333  // Now start speaking the next item in the queue.
334  this.startSpeakingNextItemInQueue_();
335};
336
337/**
338 * If nothing is speaking, pop the first item off the speech queue and
339 * start speaking it. This is called when a speech request is made and
340 * when the current utterance finishes speaking.
341 * @private
342 */
343cvox.TtsBackground.prototype.startSpeakingNextItemInQueue_ = function() {
344  if (this.currentUtterance_) {
345    return;
346  }
347
348  if (this.utteranceQueue_.length == 0) {
349    return;
350  }
351
352  this.currentUtterance_ = this.utteranceQueue_.shift();
353  var utteranceId = this.currentUtterance_.id;
354
355  this.currentUtterance_.properties['onEvent'] = goog.bind(function(event) {
356    this.onTtsEvent_(event, utteranceId);
357  }, this);
358
359  var validatedProperties = {};
360  for (var i = 0; i < cvox.TtsBackground.ALLOWED_PROPERTIES_.length; i++) {
361    var p = cvox.TtsBackground.ALLOWED_PROPERTIES_[i];
362    if (this.currentUtterance_.properties[p]) {
363      validatedProperties[p] = this.currentUtterance_.properties[p];
364    }
365  }
366
367  chrome.tts.speak(this.currentUtterance_.textString,
368                   validatedProperties);
369};
370
371/**
372 * Called when we get a speech event from Chrome. We ignore any event
373 * that doesn't pertain to the current utterance, but when speech starts
374 * or ends we optionally call callback functions, and start speaking the
375 * next utterance if there's another one enqueued.
376 * @param {Object} event The TTS event from chrome.
377 * @param {number} utteranceId The id of the associated utterance.
378 * @private
379 */
380cvox.TtsBackground.prototype.onTtsEvent_ = function(event, utteranceId) {
381  this.lastEventType = event['type'];
382
383  // Ignore events sent on utterances other than the current one.
384  if (!this.currentUtterance_ ||
385      utteranceId != this.currentUtterance_.id) {
386    return;
387  }
388
389  var utterance = this.currentUtterance_;
390
391  switch (event.type) {
392    case 'start':
393      this.capturingTtsEventListeners_.forEach(function(listener) {
394        listener.onTtsStart();
395      });
396      if (utterance.properties['startCallback']) {
397        try {
398          utterance.properties['startCallback']();
399        } catch (e) {
400        }
401      }
402      break;
403    case 'end':
404      this.capturingTtsEventListeners_.forEach(function(listener) {
405        listener.onTtsEnd();
406      });
407    // Intentionally falls through.
408    case 'interrupted':
409      this.cancelUtterance_(utterance);
410      this.currentUtterance_ = null;
411      this.startSpeakingNextItemInQueue_();
412      break;
413    case 'error':
414      this.onError_(event['errorMessage']);
415      this.startSpeakingNextItemInQueue_();
416      break;
417  }
418};
419
420/**
421 * Determines if |utteranceToCancel| should be canceled (interrupted if
422 * currently speaking, or removed from the queue if not), given the new
423 * utterance we want to speak and the queue mode. If the queue mode is
424 * QUEUE or FLUSH, the logic is straightforward. If the queue mode is
425 * CATEGORY_FLUSH, we only flush utterances with the same category.
426 *
427 * @param {cvox.Utterance} utteranceToCancel The utterance in question.
428 * @param {cvox.Utterance} newUtterance The new utterance we're enqueueing.
429 * @param {number} queueMode The queue mode.
430 * @return {boolean} True if this utterance should be canceled.
431 * @private
432 */
433cvox.TtsBackground.prototype.shouldCancel_ =
434    function(utteranceToCancel, newUtterance, queueMode) {
435  if (!utteranceToCancel) {
436    return false;
437  }
438  if (utteranceToCancel.properties['doNotInterrupt']) {
439    return false;
440  }
441  switch (queueMode) {
442    case cvox.AbstractTts.QUEUE_MODE_QUEUE:
443      return false;
444    case cvox.AbstractTts.QUEUE_MODE_FLUSH:
445      return true;
446    case cvox.AbstractTts.QUEUE_MODE_CATEGORY_FLUSH:
447      return (utteranceToCancel.properties['category'] ==
448          newUtterance.properties['category']);
449  }
450};
451
452/**
453 * Do any cleanup necessary to cancel an utterance, like callings its
454 * callback function if any.
455 * @param {cvox.Utterance} utterance The utterance to cancel.
456 * @private
457 */
458cvox.TtsBackground.prototype.cancelUtterance_ = function(utterance) {
459  if (utterance && utterance.properties['endCallback']) {
460    try {
461      utterance.properties['endCallback']();
462    } catch (e) {
463    }
464  }
465};
466
467/** @override */
468cvox.TtsBackground.prototype.increaseOrDecreaseProperty =
469    function(propertyName, increase) {
470      goog.base(this, 'increaseOrDecreaseProperty', propertyName, increase);
471  localStorage[propertyName] = this.ttsProperties[propertyName];
472};
473
474/** @override */
475cvox.TtsBackground.prototype.isSpeaking = function() {
476  goog.base(this, 'isSpeaking');
477  return this.lastEventType != 'end';
478};
479
480/** @override */
481cvox.TtsBackground.prototype.stop = function() {
482  goog.base(this, 'stop');
483
484  this.cancelUtterance_(this.currentUtterance_);
485  this.currentUtterance_ = null;
486
487  for (var i = 0; i < this.utteranceQueue_.length; i++) {
488    this.cancelUtterance_(this.utteranceQueue_[i]);
489  }
490  this.utteranceQueue_.length = 0;
491
492  chrome.tts.stop();
493};
494
495/** @override */
496cvox.TtsBackground.prototype.addCapturingEventListener = function(listener) {
497  this.capturingTtsEventListeners_.push(listener);
498};
499
500/**
501 * An error handler passed as a callback to chrome.tts.speak.
502 * @param {string} errorMessage Describes the error (set by onEvent).
503 * @private
504 */
505cvox.TtsBackground.prototype.onError_ = function(errorMessage) {
506  // Reset voice related parameters.
507  delete localStorage['voiceName'];
508};
509
510/**
511 * Converts an engine property value to a percentage from 0.00 to 1.00.
512 * @param {string} property The property to convert.
513 * @return {?number} The percentage of the property.
514 */
515cvox.TtsBackground.prototype.propertyToPercentage = function(property) {
516  return (this.ttsProperties[property] - this.propertyMin[property]) /
517         Math.abs(this.propertyMax[property] - this.propertyMin[property]);
518};
519
520
521/**
522 * @override
523 */
524cvox.TtsBackground.prototype.preprocess = function(text, properties) {
525  properties = properties ? properties : {};
526
527  // Perform specialized processing, such as mathematics.
528  if (properties.math) {
529    text = this.preprocessMath_(text, properties.math);
530  }
531
532  // Perform generic processing.
533  text = goog.base(this, 'preprocess', text, properties);
534
535  // Perform any remaining processing such as punctuation expansion.
536  var pE = null;
537  if (properties[cvox.AbstractTts.PUNCTUATION_ECHO]) {
538    for (var i = 0; pE = this.punctuationEchoes_[i]; i++) {
539      if (properties[cvox.AbstractTts.PUNCTUATION_ECHO] == pE.name) {
540        break;
541      }
542    }
543  } else {
544    pE = this.punctuationEchoes_[this.currentPunctuationEcho_];
545  }
546  text =
547      text.replace(pE.regexp, this.createPunctuationReplace_(pE.clear));
548
549  // Try pronouncing phonetically for single characters. Cancel previous calls
550  // to pronouncePhonetically_ if we fail to pronounce on this invokation or if
551  // this text is math which should never be pronounced phonetically.
552  if (properties.math ||
553      !properties['phoneticCharacters'] ||
554      !this.pronouncePhonetically_(text)) {
555    this.clearTimeout_();
556  }
557
558  // Try looking up in our unicode tables for a short description.
559  if (!properties.math && text.length == 1 && this.mathmap) {
560    text = this.mathmap.store.lookupString(
561        text.toLowerCase(),
562        cvox.MathStore.createDynamicConstraint('default', 'short')) || text;
563  }
564
565  //  Remove all whitespace from the beginning and end, and collapse all
566  // inner strings of whitespace to a single space.
567  text = text.replace(/\s+/g, ' ').replace(/^\s+|\s+$/g, '');
568
569  return text;
570};
571
572
573/**
574 * Method that cycles among the available punctuation echo levels.
575 * @return {string} The resulting punctuation level message id.
576 */
577cvox.TtsBackground.prototype.cyclePunctuationEcho = function() {
578  this.currentPunctuationEcho_ =
579      (this.currentPunctuationEcho_ + 1) % this.punctuationEchoes_.length;
580  localStorage[cvox.AbstractTts.PUNCTUATION_ECHO] =
581      this.currentPunctuationEcho_;
582  return this.punctuationEchoes_[this.currentPunctuationEcho_].msg;
583};
584
585
586/**
587 * Process a math expression into a string suitable for a speech engine.
588 * @param {string} text Text representing a math expression.
589 * @param {Object= } math Parameter containing information how to
590 *     process the math expression.
591 * @return {string} The string with a spoken version of the math expression.
592 * @private
593 */
594cvox.TtsBackground.prototype.preprocessMath_ = function(text, math) {
595  if (!this.mathmap) {
596    return text;
597  }
598  var result = '';
599  var dynamicCstr = cvox.MathStore.createDynamicConstraint(
600      math['domain'], math['style']);
601  result = this.mathmap.store.lookupString(text, dynamicCstr);
602  if (result) {
603    return result;
604  }
605  return text;
606};
607
608
609/**
610 * Converts a number into space-separated digits.
611 * For numbers containing 4 or fewer digits, we return the original number.
612 * This ensures that numbers like 123,456 or 2011 are not "digitized" while
613 * 123456 is.
614 * @param {string} text The text to process.
615 * @return {string} A string with all numbers converted.
616 * @private
617 */
618cvox.TtsBackground.prototype.getNumberAsDigits_ = function(text) {
619  return text.replace(/\d+/g, function(num) {
620    if (num.length <= 4) {
621      return num;
622    }
623    return num.split('').join(' ');
624  });
625};
626
627
628/**
629 * Constructs a function for string.replace that handles description of a
630 *  punctuation character.
631 * @param {boolean} clear Whether we want to use whitespace in place of match.
632 * @return {function(string): string} The replacement function.
633 * @private
634 */
635cvox.TtsBackground.prototype.createPunctuationReplace_ = function(clear) {
636  return goog.bind(function(match) {
637    var retain = this.retainPunctuation_.indexOf(match) != -1 ?
638        match : ' ';
639    return clear ? retain :
640        ' ' + (new goog.i18n.MessageFormat(cvox.ChromeVox.msgs.getMsg(
641                cvox.AbstractTts.CHARACTER_DICTIONARY[match])))
642            .format({'COUNT': 1}) + retain + ' ';
643  }, this);
644};
645
646
647/**
648 * Pronounces single letters phonetically after some timeout.
649 * @param {string} text The text.
650 * @return {boolean} True if the text resulted in speech.
651 * @private
652 */
653cvox.TtsBackground.prototype.pronouncePhonetically_ = function(text) {
654  if (!this.PHONETIC_MAP_) {
655    return false;
656  }
657  text = text.toLowerCase();
658  text = this.PHONETIC_MAP_[text];
659  if (text) {
660    this.clearTimeout_();
661    var self = this;
662    this.timeoutId_ = setTimeout(function() {
663      self.speak(text, 1);
664    }, cvox.TtsBackground.PHONETIC_DELAY_MS_);
665    return true;
666  }
667  return false;
668};
669
670
671/**
672 * Clears the last timeout set via setTimeout.
673 * @private
674 */
675cvox.TtsBackground.prototype.clearTimeout_ = function() {
676  if (goog.isDef(this.timeoutId_)) {
677    clearTimeout(this.timeoutId_);
678    this.timeoutId_ = undefined;
679  }
680};
681
682/**
683 * Sets the name of a voice appropriate for the current locale preferring
684 * non-remote voices.
685 * @private
686 */
687cvox.TtsBackground.prototype.setDefaultVoiceName_ = function() {
688  chrome.tts.getVoices(
689      goog.bind(function(voices) {
690        var currentLocale =
691            chrome.i18n.getMessage('@@ui_locale').replace('_', '-');
692        voices.sort(function(v1, v2) {
693          if (v1['remote'] && !v2['remote']) {
694            return 1;
695          }
696          if (!v1['remote'] && v2['remote']) {
697            return -1;
698          }
699          if (v1['lang'] == currentLocale && v2['lang'] != currentLocale) {
700            return -1;
701          }
702          if (v1['lang'] != currentLocale && v2['lang'] == currentLocale) {
703            return 1;
704          }
705          return 0;
706        });
707        if (voices[0]) {
708          var voiceName = voices[0].voiceName;
709          this.currentVoice = voiceName;
710        }
711      }, this));
712};
713