abstract_tts.js revision cedac228d2dd51db4b79ea1e72c7f249408ee061
1// Copyright 2014 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5/**
6 * @fileoverview Base class for Text-to-Speech engines that actually transform
7 * text to speech.
8 *
9 */
10
11goog.provide('cvox.AbstractTts');
12
13goog.require('cvox.TtsInterface');
14goog.require('goog.i18n.MessageFormat');
15
16/**
17 * Creates a new instance.
18 * @constructor
19 * @implements {cvox.TtsInterface}
20 */
21cvox.AbstractTts = function() {
22  this.ttsProperties = new Object();
23
24  /**
25   * Default value for TTS properties.
26   * Note that these as well as the subsequent properties might be different
27   * on different host platforms (like Chrome, Android, etc.).
28   * @type {{pitch : number,
29   *         rate: number,
30   *         volume: number}}
31   * @protected
32   */
33  this.propertyDefault = {
34    'rate': 0.5,
35    'pitch': 0.5,
36    'volume': 0.5
37  };
38
39  /**
40   * Min value for TTS properties.
41   * @type {{pitch : number,
42   *         rate: number,
43   *         volume: number}}
44   * @protected
45   */
46  this.propertyMin = {
47    'rate': 0.0,
48    'pitch': 0.0,
49    'volume': 0.0
50  };
51
52  /**
53   * Max value for TTS properties.
54   * @type {{pitch : number,
55   *         rate: number,
56   *         volume: number}}
57   * @protected
58   */
59  this.propertyMax = {
60    'rate': 1.0,
61    'pitch': 1.0,
62    'volume': 1.0
63  };
64
65  /**
66   * Step value for TTS properties.
67   * @type {{pitch : number,
68   *         rate: number,
69   *         volume: number}}
70   * @protected
71   */
72  this.propertyStep = {
73    'rate': 0.1,
74    'pitch': 0.1,
75    'volume': 0.1
76  };
77
78
79  /** @private */
80
81  if (cvox.AbstractTts.pronunciationDictionaryRegexp_ == undefined) {
82    // Create an expression that matches all words in the pronunciation
83    // dictionary on word boundaries, ignoring case.
84    var words = [];
85    for (var word in cvox.AbstractTts.PRONUNCIATION_DICTIONARY) {
86      words.push(word);
87    }
88    var expr = '\\b(' + words.join('|') + ')\\b';
89    cvox.AbstractTts.pronunciationDictionaryRegexp_ = new RegExp(expr, 'ig');
90  }
91
92  if (cvox.AbstractTts.substitutionDictionaryRegexp_ == undefined) {
93    // Create an expression that matches all words in the substitution
94    // dictionary.
95    var symbols = [];
96    for (var symbol in cvox.AbstractTts.SUBSTITUTION_DICTIONARY) {
97      symbols.push(symbol);
98    }
99    var expr = '(' + symbols.join('|') + ')';
100    cvox.AbstractTts.substitutionDictionaryRegexp_ = new RegExp(expr, 'ig');
101  }
102};
103
104
105/**
106 * Default TTS properties for this TTS engine.
107 * @type {Object}
108 * @protected
109 */
110cvox.AbstractTts.prototype.ttsProperties;
111
112
113/** @override */
114cvox.AbstractTts.prototype.speak = function(textString, queueMode, properties) {
115  return this;
116};
117
118
119/** @override */
120cvox.AbstractTts.prototype.isSpeaking = function() {
121  return false;
122};
123
124
125/** @override */
126cvox.AbstractTts.prototype.stop = function() {
127};
128
129
130/** @override */
131cvox.AbstractTts.prototype.addCapturingEventListener = function(listener) { };
132
133
134/** @override */
135cvox.AbstractTts.prototype.increaseOrDecreaseProperty =
136    function(propertyName, increase) {
137      var min = this.propertyMin[propertyName];
138      var max = this.propertyMax[propertyName];
139      var step = this.propertyStep[propertyName];
140      var current = this.ttsProperties[propertyName];
141      current = increase ? current + step : current - step;
142      this.ttsProperties[propertyName] = Math.max(Math.min(current, max), min);
143    };
144
145
146/**
147 * Merges the given properties with the default ones. Always returns a
148 * new object, so that you can safely modify the result of mergeProperties
149 * without worrying that you're modifying an object used elsewhere.
150 * @param {Object=} properties The properties to merge with the current ones.
151 * @return {Object} The merged properties.
152 * @protected
153 */
154cvox.AbstractTts.prototype.mergeProperties = function(properties) {
155  var mergedProperties = new Object();
156  var p;
157  if (this.ttsProperties) {
158    for (p in this.ttsProperties) {
159      mergedProperties[p] = this.ttsProperties[p];
160    }
161  }
162  if (properties) {
163    var tts = cvox.AbstractTts;
164    if (typeof(properties[tts.VOLUME]) == 'number') {
165      mergedProperties[tts.VOLUME] = properties[tts.VOLUME];
166    }
167    if (typeof(properties[tts.PITCH]) == 'number') {
168      mergedProperties[tts.PITCH] = properties[tts.PITCH];
169    }
170    if (typeof(properties[tts.RATE]) == 'number') {
171      mergedProperties[tts.RATE] = properties[tts.RATE];
172    }
173    if (typeof(properties[tts.LANG]) == 'string') {
174      mergedProperties[tts.LANG] = properties[tts.LANG];
175    }
176
177    var context = this;
178    var mergeRelativeProperty = function(abs, rel) {
179      if (typeof(properties[rel]) == 'number' &&
180          typeof(mergedProperties[abs]) == 'number') {
181        mergedProperties[abs] += properties[rel];
182        var min = context.propertyMin[abs];
183        var max = context.propertyMax[abs];
184        if (mergedProperties[abs] > max) {
185          mergedProperties[abs] = max;
186        } else if (mergedProperties[abs] < min) {
187          mergedProperties[abs] = min;
188        }
189      }
190    };
191
192    mergeRelativeProperty(tts.VOLUME, tts.RELATIVE_VOLUME);
193    mergeRelativeProperty(tts.PITCH, tts.RELATIVE_PITCH);
194    mergeRelativeProperty(tts.RATE, tts.RELATIVE_RATE);
195  }
196
197  return mergedProperties;
198};
199
200
201/**
202 * Method to preprocess text to be spoken properly by a speech
203 * engine.
204 *
205 * 1. Replace any single character with a description of that character.
206 *
207 * 2. Convert all-caps words to lowercase if they don't look like an
208 *    acronym / abbreviation.
209 *
210 * @param {string} text A text string to be spoken.
211 * @param {Object= } properties Out parameter populated with how to speak the
212 *     string.
213 * @return {string} The text formatted in a way that will sound better by
214 *     most speech engines.
215 * @protected
216 */
217cvox.AbstractTts.prototype.preprocess = function(text, properties) {
218  if (text.length == 1 && text >= 'A' && text <= 'Z') {
219    for (var prop in cvox.AbstractTts.PERSONALITY_CAPITAL)
220    properties[prop] = cvox.AbstractTts.PERSONALITY_CAPITAL[prop];
221  }
222
223  // Substitute all symbols in the substitution dictionary. This is pretty
224  // efficient because we use a single regexp that matches all symbols
225  // simultaneously.
226  text = text.replace(
227      cvox.AbstractTts.substitutionDictionaryRegexp_,
228      function(symbol) {
229        return ' ' + cvox.AbstractTts.SUBSTITUTION_DICTIONARY[symbol] + ' ';
230      });
231
232  // Handle single characters that we want to make sure we pronounce.
233  if (text.length == 1) {
234    return cvox.AbstractTts.CHARACTER_DICTIONARY[text] ?
235        (new goog.i18n.MessageFormat(cvox.ChromeVox.msgs.getMsg(
236                cvox.AbstractTts.CHARACTER_DICTIONARY[text])))
237            .format({'COUNT': 1}) :
238        text.toUpperCase();
239  }
240
241  // Substitute all words in the pronunciation dictionary. This is pretty
242  // efficient because we use a single regexp that matches all words
243  // simultaneously, and it calls a function with each match, which we can
244  // use to look up the replacement in our dictionary.
245  text = text.replace(
246      cvox.AbstractTts.pronunciationDictionaryRegexp_,
247      function(word) {
248        return cvox.AbstractTts.PRONUNCIATION_DICTIONARY[word.toLowerCase()];
249      });
250
251  // Special case for google+, where the punctuation must be pronounced.
252  text = text.replace(/google\+/ig, 'google plus');
253
254  // Expand all repeated characters.
255  text = text.replace(
256      cvox.AbstractTts.repetitionRegexp_, cvox.AbstractTts.repetitionReplace_);
257
258  // If there's no lower case letters, and at least two spaces, skip spacing
259  // text.
260  var skipSpacing = false;
261  if (!text.match(/[a-z]+/) && text.indexOf(' ') != text.lastIndexOf(' ')) {
262    skipSpacing = true;
263  }
264
265  // Convert all-caps words to lowercase if they don't look like acronyms,
266  // otherwise add a space before all-caps words so that all-caps words in
267  // the middle of camelCase will be separated.
268  text = text.replace(/[A-Z]+/g, function(word) {
269    // If a word contains vowels and is more than 3 letters long, it is
270    // probably a real word and not just an abbreviation. Convert it to lower
271    // case and speak it normally.
272    if ((word.length > 3) && word.match(/([AEIOUY])/g)) {
273      return word.toLowerCase();
274    } else if (!skipSpacing) {
275      // Builds spaced-out camelCased/all CAPS words so they sound better when
276      // spoken by TTS engines.
277      return ' ' + word.split('').join(' ');
278    } else {
279      return word;
280    }
281  });
282
283  return text;
284};
285
286
287/** TTS rate property. @type {string} */
288cvox.AbstractTts.RATE = 'rate';
289/** TTS pitch property. @type {string} */
290cvox.AbstractTts.PITCH = 'pitch';
291/** TTS volume property. @type {string} */
292cvox.AbstractTts.VOLUME = 'volume';
293/** TTS language property. @type {string} */
294cvox.AbstractTts.LANG = 'lang';
295
296/** TTS relative rate property. @type {string} */
297cvox.AbstractTts.RELATIVE_RATE = 'relativeRate';
298/** TTS relative pitch property. @type {string} */
299cvox.AbstractTts.RELATIVE_PITCH = 'relativePitch';
300/** TTS relative volume property. @type {string} */
301cvox.AbstractTts.RELATIVE_VOLUME = 'relativeVolume';
302
303/** TTS color property (for the lens display). @type {string} */
304cvox.AbstractTts.COLOR = 'color';
305/** TTS CSS font-weight property (for the lens display). @type {string} */
306cvox.AbstractTts.FONT_WEIGHT = 'fontWeight';
307
308/** TTS punctuation-echo property. @type {string} */
309cvox.AbstractTts.PUNCTUATION_ECHO = 'punctuationEcho';
310
311/** TTS pause property. @type {string} */
312cvox.AbstractTts.PAUSE = 'pause';
313
314/**
315 * TTS personality for annotations - text spoken by ChromeVox that
316 * elaborates on a user interface element but isn't displayed on-screen.
317 * @type {Object}
318 */
319cvox.AbstractTts.PERSONALITY_ANNOTATION = {
320  'relativePitch': -0.25,
321  // TODO:(rshearer) Added this color change for I/O presentation.
322  'color': 'yellow',
323  'punctuationEcho': 'none'
324};
325
326
327/**
328 * TTS personality for announcements - text spoken by ChromeVox that
329 * isn't tied to any user interface elements.
330 * @type {Object}
331 */
332cvox.AbstractTts.PERSONALITY_ANNOUNCEMENT = {
333  'punctuationEcho': 'none'
334};
335
336/**
337 * TTS personality for alerts from the system, such as battery level
338 * warnings.
339 * @type {Object}
340 */
341cvox.AbstractTts.PERSONALITY_SYSTEM_ALERT = {
342  'punctuationEcho': 'none',
343  'doNotInterrupt': true
344};
345
346/**
347 * TTS personality for an aside - text in parentheses.
348 * @type {Object}
349 */
350cvox.AbstractTts.PERSONALITY_ASIDE = {
351  'relativePitch': -0.1,
352  'color': '#669'
353};
354
355
356/**
357 * TTS personality for capital letters.
358 * @type {Object}
359 */
360cvox.AbstractTts.PERSONALITY_CAPITAL = {
361  'relativePitch': 0.6
362};
363
364
365/**
366 * TTS personality for deleted text.
367 * @type {Object}
368 */
369cvox.AbstractTts.PERSONALITY_DELETED = {
370  'punctuationEcho': 'none',
371  'relativePitch': -0.6
372};
373
374
375/**
376 * TTS personality for quoted text.
377 * @type {Object}
378 */
379cvox.AbstractTts.PERSONALITY_QUOTE = {
380  'relativePitch': 0.1,
381  'color': '#b6b',
382  'fontWeight': 'bold'
383};
384
385
386/**
387 * TTS personality for strong or bold text.
388 * @type {Object}
389 */
390cvox.AbstractTts.PERSONALITY_STRONG = {
391  'relativePitch': 0.1,
392  'color': '#b66',
393  'fontWeight': 'bold'
394};
395
396
397/**
398 * TTS personality for emphasis or italicized text.
399 * @type {Object}
400 */
401cvox.AbstractTts.PERSONALITY_EMPHASIS = {
402  'relativeVolume': 0.1,
403  'relativeRate': -0.1,
404  'color': '#6bb',
405  'fontWeight': 'bold'
406};
407
408
409/**
410 * Flag indicating if the TTS is being debugged.
411 * @type {boolean}
412 */
413cvox.AbstractTts.DEBUG = true;
414
415
416/**
417 * Speech queue mode that interrupts the current utterance.
418 * @type {number}
419 */
420cvox.AbstractTts.QUEUE_MODE_FLUSH = 0;
421
422
423/**
424 * Speech queue mode that does not interrupt the current utterance.
425 * @type {number}
426 */
427cvox.AbstractTts.QUEUE_MODE_QUEUE = 1;
428
429
430/**
431 * Character dictionary. These symbols are replaced with their human readable
432 * equivalents. This replacement only occurs for single character utterances.
433 * @type {Object.<string, string>}
434 */
435cvox.AbstractTts.CHARACTER_DICTIONARY = {
436  ' ': 'space',
437  '`': 'backtick',
438  '~': 'tilde',
439  '!': 'exclamation',
440  '@': 'at',
441  '#': 'pound',
442  '$': 'dollar',
443  '%': 'percent',
444  '^': 'caret',
445  '&': 'ampersand',
446  '*': 'asterisk',
447  '(': 'open_paren',
448  ')': 'close_paren',
449  '-': 'dash',
450  '_': 'underscore',
451  '=': 'equals',
452  '+': 'plus',
453  '[': 'left_bracket',
454  ']': 'right_bracket',
455  '{': 'left_brace',
456  '}': 'right_brace',
457  '|': 'pipe',
458  ';': 'semicolon',
459  ':': 'colon',
460  ',': 'comma',
461  '.': 'dot',
462  '<': 'less_than',
463  '>': 'greater_than',
464  '/': 'slash',
465  '?': 'question_mark',
466  '"': 'quote',
467  '\'': 'apostrophe',
468  '\t': 'tab',
469  '\r': 'return',
470  '\n': 'new_line',
471  '\\': 'backslash'
472};
473
474
475/**
476 * Pronunciation dictionary. Each key must be lowercase, its replacement
477 * should be spelled out the way most TTS engines will pronounce it
478 * correctly. This particular dictionary only handles letters and numbers,
479 * no symbols.
480 * @type {Object.<string, string>}
481 */
482cvox.AbstractTts.PRONUNCIATION_DICTIONARY = {
483  'admob': 'ad-mob',
484  'adsense': 'ad-sense',
485  'adwords': 'ad-words',
486  'angularjs': 'angular j s',
487  'bcc': 'B C C',
488  'cc': 'C C',
489  'chromevox': 'chrome vox',
490  'cr48': 'C R 48',
491  'ctrl': 'control',
492  'doubleclick': 'double-click',
493  'gmail': 'gee mail',
494  'gtalk': 'gee talk',
495  'http': 'H T T P',
496  'https' : 'H T T P S',
497  'igoogle': 'eye google',
498  'pagerank': 'page-rank',
499  'username': 'user-name',
500  'www': 'W W W',
501  'youtube': 'you tube'
502};
503
504
505/**
506 * Pronunciation dictionary regexp.
507 * @type {RegExp};
508 * @private
509 */
510cvox.AbstractTts.pronunciationDictionaryRegexp_;
511
512
513/**
514 * Substitution dictionary. These symbols or patterns are ALWAYS substituted
515 * whenever they occur, so this should be reserved only for unicode characters
516 * and characters that never have any different meaning in context.
517 *
518 * For example, do not include '$' here because $2 should be read as
519 * "two dollars".
520 * @type {Object.<string, string>}
521 */
522cvox.AbstractTts.SUBSTITUTION_DICTIONARY = {
523  '://': 'colon slash slash',
524  '\u00bc': 'one fourth',
525  '\u00bd': 'one half',
526  '\u2190': 'left arrow',
527  '\u2191': 'up arrow',
528  '\u2192': 'right arrow',
529  '\u2193': 'down arrow',
530  '\u21d0': 'left double arrow',
531  '\u21d1': 'up double arrow',
532  '\u21d2': 'right double  arrow',
533  '\u21d3': 'down double arrow',
534  '\u21e6': 'left arrow',
535  '\u21e7': 'up arrow',
536  '\u21e8': 'right arrow',
537  '\u21e9': 'down arrow',
538  '\u2303': 'control',
539  '\u2318': 'command',
540  '\u2325': 'option',
541  '\u25b2': 'up triangle',
542  '\u25b3': 'up triangle',
543  '\u25b4': 'up triangle',
544  '\u25b5': 'up triangle',
545  '\u25b6': 'right triangle',
546  '\u25b7': 'right triangle',
547  '\u25b8': 'right triangle',
548  '\u25b9': 'right triangle',
549  '\u25ba': 'right pointer',
550  '\u25bb': 'right pointer',
551  '\u25bc': 'down triangle',
552  '\u25bd': 'down triangle',
553  '\u25be': 'down triangle',
554  '\u25bf': 'down triangle',
555  '\u25c0': 'left triangle',
556  '\u25c1': 'left triangle',
557  '\u25c2': 'left triangle',
558  '\u25c3': 'left triangle',
559  '\u25c4': 'left pointer',
560  '\u25c5': 'left pointer',
561  '\uf8ff': 'apple'
562};
563
564
565/**
566 * Substitution dictionary regexp.
567 * @type {RegExp};
568 * @private
569 */
570cvox.AbstractTts.substitutionDictionaryRegexp_;
571
572
573/**
574 * repetition filter regexp.
575 * @type {RegExp}
576 * @private
577 */
578cvox.AbstractTts.repetitionRegexp_ =
579    /([-\/\\|!@#$%^&*\(\)=_+\[\]\{\}.?;'":<>])\1{2,}/g;
580
581
582/**
583 * Constructs a description of a repeated character. Use as a param to
584 * string.replace.
585 * @param {string} match The matching string.
586 * @return {string} The description.
587 * @private
588 */
589cvox.AbstractTts.repetitionReplace_ = function(match) {
590  var count = match.length;
591  return ' ' + (new goog.i18n.MessageFormat(cvox.ChromeVox.msgs.getMsg(
592      cvox.AbstractTts.CHARACTER_DICTIONARY[match[0]])))
593          .format({'COUNT': count}) + ' ';
594};
595
596
597/**
598 * @override
599 */
600cvox.AbstractTts.prototype.getDefaultProperty = function(property) {
601  return this.propertyDefault[property];
602};
603