abstract_tts.js revision 116680a4aac90f2aa7413d9095a592090648e557
1// Copyright 2014 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5/**
6 * @fileoverview Base class for Text-to-Speech engines that actually transform
7 * text to speech.
8 *
9 */
10
11goog.provide('cvox.AbstractTts');
12
13goog.require('cvox.TtsInterface');
14goog.require('goog.i18n.MessageFormat');
15
16/**
17 * Creates a new instance.
18 * @constructor
19 * @implements {cvox.TtsInterface}
20 */
21cvox.AbstractTts = function() {
22  this.ttsProperties = new Object();
23
24  /**
25   * Default value for TTS properties.
26   * Note that these as well as the subsequent properties might be different
27   * on different host platforms (like Chrome, Android, etc.).
28   * @type {{pitch : number,
29   *         rate: number,
30   *         volume: number}}
31   * @protected
32   */
33  this.propertyDefault = {
34    'rate': 0.5,
35    'pitch': 0.5,
36    'volume': 0.5
37  };
38
39  /**
40   * Min value for TTS properties.
41   * @type {{pitch : number,
42   *         rate: number,
43   *         volume: number}}
44   * @protected
45   */
46  this.propertyMin = {
47    'rate': 0.0,
48    'pitch': 0.0,
49    'volume': 0.0
50  };
51
52  /**
53   * Max value for TTS properties.
54   * @type {{pitch : number,
55   *         rate: number,
56   *         volume: number}}
57   * @protected
58   */
59  this.propertyMax = {
60    'rate': 1.0,
61    'pitch': 1.0,
62    'volume': 1.0
63  };
64
65  /**
66   * Step value for TTS properties.
67   * @type {{pitch : number,
68   *         rate: number,
69   *         volume: number}}
70   * @protected
71   */
72  this.propertyStep = {
73    'rate': 0.1,
74    'pitch': 0.1,
75    'volume': 0.1
76  };
77
78
79  /** @private */
80
81  if (cvox.AbstractTts.pronunciationDictionaryRegexp_ == undefined) {
82    // Create an expression that matches all words in the pronunciation
83    // dictionary on word boundaries, ignoring case.
84    var words = [];
85    for (var word in cvox.AbstractTts.PRONUNCIATION_DICTIONARY) {
86      words.push(word);
87    }
88    var expr = '\\b(' + words.join('|') + ')\\b';
89    cvox.AbstractTts.pronunciationDictionaryRegexp_ = new RegExp(expr, 'ig');
90  }
91
92  if (cvox.AbstractTts.substitutionDictionaryRegexp_ == undefined) {
93    // Create an expression that matches all words in the substitution
94    // dictionary.
95    var symbols = [];
96    for (var symbol in cvox.AbstractTts.SUBSTITUTION_DICTIONARY) {
97      symbols.push(symbol);
98    }
99    var expr = '(' + symbols.join('|') + ')';
100    cvox.AbstractTts.substitutionDictionaryRegexp_ = new RegExp(expr, 'ig');
101  }
102};
103
104
105/**
106 * Default TTS properties for this TTS engine.
107 * @type {Object}
108 * @protected
109 */
110cvox.AbstractTts.prototype.ttsProperties;
111
112
113/** @override */
114cvox.AbstractTts.prototype.speak = function(textString, queueMode, properties) {
115  return this;
116};
117
118
119/** @override */
120cvox.AbstractTts.prototype.isSpeaking = function() {
121  return false;
122};
123
124
125/** @override */
126cvox.AbstractTts.prototype.stop = function() {
127};
128
129
130/** @override */
131cvox.AbstractTts.prototype.addCapturingEventListener = function(listener) { };
132
133
134/** @override */
135cvox.AbstractTts.prototype.increaseOrDecreaseProperty =
136    function(propertyName, increase) {
137      var min = this.propertyMin[propertyName];
138      var max = this.propertyMax[propertyName];
139      var step = this.propertyStep[propertyName];
140      var current = this.ttsProperties[propertyName];
141      current = increase ? current + step : current - step;
142      this.ttsProperties[propertyName] = Math.max(Math.min(current, max), min);
143    };
144
145
146/**
147 * Merges the given properties with the default ones. Always returns a
148 * new object, so that you can safely modify the result of mergeProperties
149 * without worrying that you're modifying an object used elsewhere.
150 * @param {Object=} properties The properties to merge with the current ones.
151 * @return {Object} The merged properties.
152 * @protected
153 */
154cvox.AbstractTts.prototype.mergeProperties = function(properties) {
155  var mergedProperties = new Object();
156  var p;
157  if (this.ttsProperties) {
158    for (p in this.ttsProperties) {
159      mergedProperties[p] = this.ttsProperties[p];
160    }
161  }
162  if (properties) {
163    var tts = cvox.AbstractTts;
164    if (typeof(properties[tts.VOLUME]) == 'number') {
165      mergedProperties[tts.VOLUME] = properties[tts.VOLUME];
166    }
167    if (typeof(properties[tts.PITCH]) == 'number') {
168      mergedProperties[tts.PITCH] = properties[tts.PITCH];
169    }
170    if (typeof(properties[tts.RATE]) == 'number') {
171      mergedProperties[tts.RATE] = properties[tts.RATE];
172    }
173    if (typeof(properties[tts.LANG]) == 'string') {
174      mergedProperties[tts.LANG] = properties[tts.LANG];
175    }
176
177    var context = this;
178    var mergeRelativeProperty = function(abs, rel) {
179      if (typeof(properties[rel]) == 'number' &&
180          typeof(mergedProperties[abs]) == 'number') {
181        mergedProperties[abs] += properties[rel];
182        var min = context.propertyMin[abs];
183        var max = context.propertyMax[abs];
184        if (mergedProperties[abs] > max) {
185          mergedProperties[abs] = max;
186        } else if (mergedProperties[abs] < min) {
187          mergedProperties[abs] = min;
188        }
189      }
190    };
191
192    mergeRelativeProperty(tts.VOLUME, tts.RELATIVE_VOLUME);
193    mergeRelativeProperty(tts.PITCH, tts.RELATIVE_PITCH);
194    mergeRelativeProperty(tts.RATE, tts.RELATIVE_RATE);
195  }
196
197  for (p in properties) {
198    if (!mergedProperties.hasOwnProperty(p)) {
199      mergedProperties[p] = properties[p];
200    }
201  }
202
203  return mergedProperties;
204};
205
206
207/**
208 * Method to preprocess text to be spoken properly by a speech
209 * engine.
210 *
211 * 1. Replace any single character with a description of that character.
212 *
213 * 2. Convert all-caps words to lowercase if they don't look like an
214 *    acronym / abbreviation.
215 *
216 * @param {string} text A text string to be spoken.
217 * @param {Object= } properties Out parameter populated with how to speak the
218 *     string.
219 * @return {string} The text formatted in a way that will sound better by
220 *     most speech engines.
221 * @protected
222 */
223cvox.AbstractTts.prototype.preprocess = function(text, properties) {
224  if (text.length == 1 && text >= 'A' && text <= 'Z') {
225    for (var prop in cvox.AbstractTts.PERSONALITY_CAPITAL)
226    properties[prop] = cvox.AbstractTts.PERSONALITY_CAPITAL[prop];
227  }
228
229  // Substitute all symbols in the substitution dictionary. This is pretty
230  // efficient because we use a single regexp that matches all symbols
231  // simultaneously.
232  text = text.replace(
233      cvox.AbstractTts.substitutionDictionaryRegexp_,
234      function(symbol) {
235        return ' ' + cvox.AbstractTts.SUBSTITUTION_DICTIONARY[symbol] + ' ';
236      });
237
238  // Handle single characters that we want to make sure we pronounce.
239  if (text.length == 1) {
240    return cvox.AbstractTts.CHARACTER_DICTIONARY[text] ?
241        (new goog.i18n.MessageFormat(cvox.ChromeVox.msgs.getMsg(
242                cvox.AbstractTts.CHARACTER_DICTIONARY[text])))
243            .format({'COUNT': 1}) :
244        text.toUpperCase();
245  }
246
247  // Substitute all words in the pronunciation dictionary. This is pretty
248  // efficient because we use a single regexp that matches all words
249  // simultaneously, and it calls a function with each match, which we can
250  // use to look up the replacement in our dictionary.
251  text = text.replace(
252      cvox.AbstractTts.pronunciationDictionaryRegexp_,
253      function(word) {
254        return cvox.AbstractTts.PRONUNCIATION_DICTIONARY[word.toLowerCase()];
255      });
256
257  // Special case for google+, where the punctuation must be pronounced.
258  text = text.replace(/google\+/ig, 'google plus');
259
260  // Expand all repeated characters.
261  text = text.replace(
262      cvox.AbstractTts.repetitionRegexp_, cvox.AbstractTts.repetitionReplace_);
263
264  // If there's no lower case letters, and at least two spaces, skip spacing
265  // text.
266  var skipSpacing = false;
267  if (!text.match(/[a-z]+/) && text.indexOf(' ') != text.lastIndexOf(' ')) {
268    skipSpacing = true;
269  }
270
271  // Convert all-caps words to lowercase if they don't look like acronyms,
272  // otherwise add a space before all-caps words so that all-caps words in
273  // the middle of camelCase will be separated.
274  text = text.replace(/[A-Z]+/g, function(word) {
275    // If a word contains vowels and is more than 3 letters long, it is
276    // probably a real word and not just an abbreviation. Convert it to lower
277    // case and speak it normally.
278    if ((word.length > 3) && word.match(/([AEIOUY])/g)) {
279      return word.toLowerCase();
280    } else if (!skipSpacing) {
281      // Builds spaced-out camelCased/all CAPS words so they sound better when
282      // spoken by TTS engines.
283      return ' ' + word.split('').join(' ');
284    } else {
285      return word;
286    }
287  });
288
289  return text;
290};
291
292
293/** TTS rate property. @type {string} */
294cvox.AbstractTts.RATE = 'rate';
295/** TTS pitch property. @type {string} */
296cvox.AbstractTts.PITCH = 'pitch';
297/** TTS volume property. @type {string} */
298cvox.AbstractTts.VOLUME = 'volume';
299/** TTS language property. @type {string} */
300cvox.AbstractTts.LANG = 'lang';
301
302/** TTS relative rate property. @type {string} */
303cvox.AbstractTts.RELATIVE_RATE = 'relativeRate';
304/** TTS relative pitch property. @type {string} */
305cvox.AbstractTts.RELATIVE_PITCH = 'relativePitch';
306/** TTS relative volume property. @type {string} */
307cvox.AbstractTts.RELATIVE_VOLUME = 'relativeVolume';
308
309/** TTS color property (for the lens display). @type {string} */
310cvox.AbstractTts.COLOR = 'color';
311/** TTS CSS font-weight property (for the lens display). @type {string} */
312cvox.AbstractTts.FONT_WEIGHT = 'fontWeight';
313
314/** TTS punctuation-echo property. @type {string} */
315cvox.AbstractTts.PUNCTUATION_ECHO = 'punctuationEcho';
316
317/** TTS pause property. @type {string} */
318cvox.AbstractTts.PAUSE = 'pause';
319
320/**
321 * TTS personality for annotations - text spoken by ChromeVox that
322 * elaborates on a user interface element but isn't displayed on-screen.
323 * @type {Object}
324 */
325cvox.AbstractTts.PERSONALITY_ANNOTATION = {
326  'relativePitch': -0.25,
327  // TODO:(rshearer) Added this color change for I/O presentation.
328  'color': 'yellow',
329  'punctuationEcho': 'none'
330};
331
332
333/**
334 * TTS personality for announcements - text spoken by ChromeVox that
335 * isn't tied to any user interface elements.
336 * @type {Object}
337 */
338cvox.AbstractTts.PERSONALITY_ANNOUNCEMENT = {
339  'punctuationEcho': 'none'
340};
341
342/**
343 * TTS personality for alerts from the system, such as battery level
344 * warnings.
345 * @type {Object}
346 */
347cvox.AbstractTts.PERSONALITY_SYSTEM_ALERT = {
348  'punctuationEcho': 'none',
349  'doNotInterrupt': true
350};
351
352/**
353 * TTS personality for an aside - text in parentheses.
354 * @type {Object}
355 */
356cvox.AbstractTts.PERSONALITY_ASIDE = {
357  'relativePitch': -0.1,
358  'color': '#669'
359};
360
361
362/**
363 * TTS personality for capital letters.
364 * @type {Object}
365 */
366cvox.AbstractTts.PERSONALITY_CAPITAL = {
367  'relativePitch': 0.6
368};
369
370
371/**
372 * TTS personality for deleted text.
373 * @type {Object}
374 */
375cvox.AbstractTts.PERSONALITY_DELETED = {
376  'punctuationEcho': 'none',
377  'relativePitch': -0.6
378};
379
380
381/**
382 * TTS personality for quoted text.
383 * @type {Object}
384 */
385cvox.AbstractTts.PERSONALITY_QUOTE = {
386  'relativePitch': 0.1,
387  'color': '#b6b',
388  'fontWeight': 'bold'
389};
390
391
392/**
393 * TTS personality for strong or bold text.
394 * @type {Object}
395 */
396cvox.AbstractTts.PERSONALITY_STRONG = {
397  'relativePitch': 0.1,
398  'color': '#b66',
399  'fontWeight': 'bold'
400};
401
402
403/**
404 * TTS personality for emphasis or italicized text.
405 * @type {Object}
406 */
407cvox.AbstractTts.PERSONALITY_EMPHASIS = {
408  'relativeVolume': 0.1,
409  'relativeRate': -0.1,
410  'color': '#6bb',
411  'fontWeight': 'bold'
412};
413
414
415/**
416 * Flag indicating if the TTS is being debugged.
417 * @type {boolean}
418 */
419cvox.AbstractTts.DEBUG = true;
420
421
422/**
423 * Speech queue mode that interrupts the current utterance.
424 * @type {number}
425 */
426cvox.AbstractTts.QUEUE_MODE_FLUSH = 0;
427
428
429/**
430 * Speech queue mode that does not interrupt the current utterance.
431 * @type {number}
432 */
433cvox.AbstractTts.QUEUE_MODE_QUEUE = 1;
434
435
436/**
437 * Speech queue mode that flushes all utterances of the same category
438 * (as set by properties['category']).
439 * @type {number}
440 */
441cvox.AbstractTts.QUEUE_MODE_CATEGORY_FLUSH = 2;
442
443
444/**
445 * Character dictionary. These symbols are replaced with their human readable
446 * equivalents. This replacement only occurs for single character utterances.
447 * @type {Object.<string, string>}
448 */
449cvox.AbstractTts.CHARACTER_DICTIONARY = {
450  ' ': 'space',
451  '`': 'backtick',
452  '~': 'tilde',
453  '!': 'exclamation',
454  '@': 'at',
455  '#': 'pound',
456  '$': 'dollar',
457  '%': 'percent',
458  '^': 'caret',
459  '&': 'ampersand',
460  '*': 'asterisk',
461  '(': 'open_paren',
462  ')': 'close_paren',
463  '-': 'dash',
464  '_': 'underscore',
465  '=': 'equals',
466  '+': 'plus',
467  '[': 'left_bracket',
468  ']': 'right_bracket',
469  '{': 'left_brace',
470  '}': 'right_brace',
471  '|': 'pipe',
472  ';': 'semicolon',
473  ':': 'colon',
474  ',': 'comma',
475  '.': 'dot',
476  '<': 'less_than',
477  '>': 'greater_than',
478  '/': 'slash',
479  '?': 'question_mark',
480  '"': 'quote',
481  '\'': 'apostrophe',
482  '\t': 'tab',
483  '\r': 'return',
484  '\n': 'new_line',
485  '\\': 'backslash'
486};
487
488
489/**
490 * Pronunciation dictionary. Each key must be lowercase, its replacement
491 * should be spelled out the way most TTS engines will pronounce it
492 * correctly. This particular dictionary only handles letters and numbers,
493 * no symbols.
494 * @type {Object.<string, string>}
495 */
496cvox.AbstractTts.PRONUNCIATION_DICTIONARY = {
497  'admob': 'ad-mob',
498  'adsense': 'ad-sense',
499  'adwords': 'ad-words',
500  'angularjs': 'angular j s',
501  'bcc': 'B C C',
502  'cc': 'C C',
503  'chromevox': 'chrome vox',
504  'cr48': 'C R 48',
505  'ctrl': 'control',
506  'doubleclick': 'double-click',
507  'gmail': 'gee mail',
508  'gtalk': 'gee talk',
509  'http': 'H T T P',
510  'https' : 'H T T P S',
511  'igoogle': 'eye google',
512  'pagerank': 'page-rank',
513  'username': 'user-name',
514  'www': 'W W W',
515  'youtube': 'you tube'
516};
517
518
519/**
520 * Pronunciation dictionary regexp.
521 * @type {RegExp};
522 * @private
523 */
524cvox.AbstractTts.pronunciationDictionaryRegexp_;
525
526
527/**
528 * Substitution dictionary. These symbols or patterns are ALWAYS substituted
529 * whenever they occur, so this should be reserved only for unicode characters
530 * and characters that never have any different meaning in context.
531 *
532 * For example, do not include '$' here because $2 should be read as
533 * "two dollars".
534 * @type {Object.<string, string>}
535 */
536cvox.AbstractTts.SUBSTITUTION_DICTIONARY = {
537  '://': 'colon slash slash',
538  '\u00bc': 'one fourth',
539  '\u00bd': 'one half',
540  '\u2190': 'left arrow',
541  '\u2191': 'up arrow',
542  '\u2192': 'right arrow',
543  '\u2193': 'down arrow',
544  '\u21d0': 'left double arrow',
545  '\u21d1': 'up double arrow',
546  '\u21d2': 'right double  arrow',
547  '\u21d3': 'down double arrow',
548  '\u21e6': 'left arrow',
549  '\u21e7': 'up arrow',
550  '\u21e8': 'right arrow',
551  '\u21e9': 'down arrow',
552  '\u2303': 'control',
553  '\u2318': 'command',
554  '\u2325': 'option',
555  '\u25b2': 'up triangle',
556  '\u25b3': 'up triangle',
557  '\u25b4': 'up triangle',
558  '\u25b5': 'up triangle',
559  '\u25b6': 'right triangle',
560  '\u25b7': 'right triangle',
561  '\u25b8': 'right triangle',
562  '\u25b9': 'right triangle',
563  '\u25ba': 'right pointer',
564  '\u25bb': 'right pointer',
565  '\u25bc': 'down triangle',
566  '\u25bd': 'down triangle',
567  '\u25be': 'down triangle',
568  '\u25bf': 'down triangle',
569  '\u25c0': 'left triangle',
570  '\u25c1': 'left triangle',
571  '\u25c2': 'left triangle',
572  '\u25c3': 'left triangle',
573  '\u25c4': 'left pointer',
574  '\u25c5': 'left pointer',
575  '\uf8ff': 'apple'
576};
577
578
579/**
580 * Substitution dictionary regexp.
581 * @type {RegExp};
582 * @private
583 */
584cvox.AbstractTts.substitutionDictionaryRegexp_;
585
586
587/**
588 * repetition filter regexp.
589 * @type {RegExp}
590 * @private
591 */
592cvox.AbstractTts.repetitionRegexp_ =
593    /([-\/\\|!@#$%^&*\(\)=_+\[\]\{\}.?;'":<>])\1{2,}/g;
594
595
596/**
597 * Constructs a description of a repeated character. Use as a param to
598 * string.replace.
599 * @param {string} match The matching string.
600 * @return {string} The description.
601 * @private
602 */
603cvox.AbstractTts.repetitionReplace_ = function(match) {
604  var count = match.length;
605  return ' ' + (new goog.i18n.MessageFormat(cvox.ChromeVox.msgs.getMsg(
606      cvox.AbstractTts.CHARACTER_DICTIONARY[match[0]])))
607          .format({'COUNT': count}) + ' ';
608};
609
610
611/**
612 * @override
613 */
614cvox.AbstractTts.prototype.getDefaultProperty = function(property) {
615  return this.propertyDefault[property];
616};
617