abstract_tts.js revision 116680a4aac90f2aa7413d9095a592090648e557
1// Copyright 2014 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5/** 6 * @fileoverview Base class for Text-to-Speech engines that actually transform 7 * text to speech. 8 * 9 */ 10 11goog.provide('cvox.AbstractTts'); 12 13goog.require('cvox.TtsInterface'); 14goog.require('goog.i18n.MessageFormat'); 15 16/** 17 * Creates a new instance. 18 * @constructor 19 * @implements {cvox.TtsInterface} 20 */ 21cvox.AbstractTts = function() { 22 this.ttsProperties = new Object(); 23 24 /** 25 * Default value for TTS properties. 26 * Note that these as well as the subsequent properties might be different 27 * on different host platforms (like Chrome, Android, etc.). 28 * @type {{pitch : number, 29 * rate: number, 30 * volume: number}} 31 * @protected 32 */ 33 this.propertyDefault = { 34 'rate': 0.5, 35 'pitch': 0.5, 36 'volume': 0.5 37 }; 38 39 /** 40 * Min value for TTS properties. 41 * @type {{pitch : number, 42 * rate: number, 43 * volume: number}} 44 * @protected 45 */ 46 this.propertyMin = { 47 'rate': 0.0, 48 'pitch': 0.0, 49 'volume': 0.0 50 }; 51 52 /** 53 * Max value for TTS properties. 54 * @type {{pitch : number, 55 * rate: number, 56 * volume: number}} 57 * @protected 58 */ 59 this.propertyMax = { 60 'rate': 1.0, 61 'pitch': 1.0, 62 'volume': 1.0 63 }; 64 65 /** 66 * Step value for TTS properties. 67 * @type {{pitch : number, 68 * rate: number, 69 * volume: number}} 70 * @protected 71 */ 72 this.propertyStep = { 73 'rate': 0.1, 74 'pitch': 0.1, 75 'volume': 0.1 76 }; 77 78 79 /** @private */ 80 81 if (cvox.AbstractTts.pronunciationDictionaryRegexp_ == undefined) { 82 // Create an expression that matches all words in the pronunciation 83 // dictionary on word boundaries, ignoring case. 84 var words = []; 85 for (var word in cvox.AbstractTts.PRONUNCIATION_DICTIONARY) { 86 words.push(word); 87 } 88 var expr = '\\b(' + words.join('|') + ')\\b'; 89 cvox.AbstractTts.pronunciationDictionaryRegexp_ = new RegExp(expr, 'ig'); 90 } 91 92 if (cvox.AbstractTts.substitutionDictionaryRegexp_ == undefined) { 93 // Create an expression that matches all words in the substitution 94 // dictionary. 95 var symbols = []; 96 for (var symbol in cvox.AbstractTts.SUBSTITUTION_DICTIONARY) { 97 symbols.push(symbol); 98 } 99 var expr = '(' + symbols.join('|') + ')'; 100 cvox.AbstractTts.substitutionDictionaryRegexp_ = new RegExp(expr, 'ig'); 101 } 102}; 103 104 105/** 106 * Default TTS properties for this TTS engine. 107 * @type {Object} 108 * @protected 109 */ 110cvox.AbstractTts.prototype.ttsProperties; 111 112 113/** @override */ 114cvox.AbstractTts.prototype.speak = function(textString, queueMode, properties) { 115 return this; 116}; 117 118 119/** @override */ 120cvox.AbstractTts.prototype.isSpeaking = function() { 121 return false; 122}; 123 124 125/** @override */ 126cvox.AbstractTts.prototype.stop = function() { 127}; 128 129 130/** @override */ 131cvox.AbstractTts.prototype.addCapturingEventListener = function(listener) { }; 132 133 134/** @override */ 135cvox.AbstractTts.prototype.increaseOrDecreaseProperty = 136 function(propertyName, increase) { 137 var min = this.propertyMin[propertyName]; 138 var max = this.propertyMax[propertyName]; 139 var step = this.propertyStep[propertyName]; 140 var current = this.ttsProperties[propertyName]; 141 current = increase ? current + step : current - step; 142 this.ttsProperties[propertyName] = Math.max(Math.min(current, max), min); 143 }; 144 145 146/** 147 * Merges the given properties with the default ones. Always returns a 148 * new object, so that you can safely modify the result of mergeProperties 149 * without worrying that you're modifying an object used elsewhere. 150 * @param {Object=} properties The properties to merge with the current ones. 151 * @return {Object} The merged properties. 152 * @protected 153 */ 154cvox.AbstractTts.prototype.mergeProperties = function(properties) { 155 var mergedProperties = new Object(); 156 var p; 157 if (this.ttsProperties) { 158 for (p in this.ttsProperties) { 159 mergedProperties[p] = this.ttsProperties[p]; 160 } 161 } 162 if (properties) { 163 var tts = cvox.AbstractTts; 164 if (typeof(properties[tts.VOLUME]) == 'number') { 165 mergedProperties[tts.VOLUME] = properties[tts.VOLUME]; 166 } 167 if (typeof(properties[tts.PITCH]) == 'number') { 168 mergedProperties[tts.PITCH] = properties[tts.PITCH]; 169 } 170 if (typeof(properties[tts.RATE]) == 'number') { 171 mergedProperties[tts.RATE] = properties[tts.RATE]; 172 } 173 if (typeof(properties[tts.LANG]) == 'string') { 174 mergedProperties[tts.LANG] = properties[tts.LANG]; 175 } 176 177 var context = this; 178 var mergeRelativeProperty = function(abs, rel) { 179 if (typeof(properties[rel]) == 'number' && 180 typeof(mergedProperties[abs]) == 'number') { 181 mergedProperties[abs] += properties[rel]; 182 var min = context.propertyMin[abs]; 183 var max = context.propertyMax[abs]; 184 if (mergedProperties[abs] > max) { 185 mergedProperties[abs] = max; 186 } else if (mergedProperties[abs] < min) { 187 mergedProperties[abs] = min; 188 } 189 } 190 }; 191 192 mergeRelativeProperty(tts.VOLUME, tts.RELATIVE_VOLUME); 193 mergeRelativeProperty(tts.PITCH, tts.RELATIVE_PITCH); 194 mergeRelativeProperty(tts.RATE, tts.RELATIVE_RATE); 195 } 196 197 for (p in properties) { 198 if (!mergedProperties.hasOwnProperty(p)) { 199 mergedProperties[p] = properties[p]; 200 } 201 } 202 203 return mergedProperties; 204}; 205 206 207/** 208 * Method to preprocess text to be spoken properly by a speech 209 * engine. 210 * 211 * 1. Replace any single character with a description of that character. 212 * 213 * 2. Convert all-caps words to lowercase if they don't look like an 214 * acronym / abbreviation. 215 * 216 * @param {string} text A text string to be spoken. 217 * @param {Object= } properties Out parameter populated with how to speak the 218 * string. 219 * @return {string} The text formatted in a way that will sound better by 220 * most speech engines. 221 * @protected 222 */ 223cvox.AbstractTts.prototype.preprocess = function(text, properties) { 224 if (text.length == 1 && text >= 'A' && text <= 'Z') { 225 for (var prop in cvox.AbstractTts.PERSONALITY_CAPITAL) 226 properties[prop] = cvox.AbstractTts.PERSONALITY_CAPITAL[prop]; 227 } 228 229 // Substitute all symbols in the substitution dictionary. This is pretty 230 // efficient because we use a single regexp that matches all symbols 231 // simultaneously. 232 text = text.replace( 233 cvox.AbstractTts.substitutionDictionaryRegexp_, 234 function(symbol) { 235 return ' ' + cvox.AbstractTts.SUBSTITUTION_DICTIONARY[symbol] + ' '; 236 }); 237 238 // Handle single characters that we want to make sure we pronounce. 239 if (text.length == 1) { 240 return cvox.AbstractTts.CHARACTER_DICTIONARY[text] ? 241 (new goog.i18n.MessageFormat(cvox.ChromeVox.msgs.getMsg( 242 cvox.AbstractTts.CHARACTER_DICTIONARY[text]))) 243 .format({'COUNT': 1}) : 244 text.toUpperCase(); 245 } 246 247 // Substitute all words in the pronunciation dictionary. This is pretty 248 // efficient because we use a single regexp that matches all words 249 // simultaneously, and it calls a function with each match, which we can 250 // use to look up the replacement in our dictionary. 251 text = text.replace( 252 cvox.AbstractTts.pronunciationDictionaryRegexp_, 253 function(word) { 254 return cvox.AbstractTts.PRONUNCIATION_DICTIONARY[word.toLowerCase()]; 255 }); 256 257 // Special case for google+, where the punctuation must be pronounced. 258 text = text.replace(/google\+/ig, 'google plus'); 259 260 // Expand all repeated characters. 261 text = text.replace( 262 cvox.AbstractTts.repetitionRegexp_, cvox.AbstractTts.repetitionReplace_); 263 264 // If there's no lower case letters, and at least two spaces, skip spacing 265 // text. 266 var skipSpacing = false; 267 if (!text.match(/[a-z]+/) && text.indexOf(' ') != text.lastIndexOf(' ')) { 268 skipSpacing = true; 269 } 270 271 // Convert all-caps words to lowercase if they don't look like acronyms, 272 // otherwise add a space before all-caps words so that all-caps words in 273 // the middle of camelCase will be separated. 274 text = text.replace(/[A-Z]+/g, function(word) { 275 // If a word contains vowels and is more than 3 letters long, it is 276 // probably a real word and not just an abbreviation. Convert it to lower 277 // case and speak it normally. 278 if ((word.length > 3) && word.match(/([AEIOUY])/g)) { 279 return word.toLowerCase(); 280 } else if (!skipSpacing) { 281 // Builds spaced-out camelCased/all CAPS words so they sound better when 282 // spoken by TTS engines. 283 return ' ' + word.split('').join(' '); 284 } else { 285 return word; 286 } 287 }); 288 289 return text; 290}; 291 292 293/** TTS rate property. @type {string} */ 294cvox.AbstractTts.RATE = 'rate'; 295/** TTS pitch property. @type {string} */ 296cvox.AbstractTts.PITCH = 'pitch'; 297/** TTS volume property. @type {string} */ 298cvox.AbstractTts.VOLUME = 'volume'; 299/** TTS language property. @type {string} */ 300cvox.AbstractTts.LANG = 'lang'; 301 302/** TTS relative rate property. @type {string} */ 303cvox.AbstractTts.RELATIVE_RATE = 'relativeRate'; 304/** TTS relative pitch property. @type {string} */ 305cvox.AbstractTts.RELATIVE_PITCH = 'relativePitch'; 306/** TTS relative volume property. @type {string} */ 307cvox.AbstractTts.RELATIVE_VOLUME = 'relativeVolume'; 308 309/** TTS color property (for the lens display). @type {string} */ 310cvox.AbstractTts.COLOR = 'color'; 311/** TTS CSS font-weight property (for the lens display). @type {string} */ 312cvox.AbstractTts.FONT_WEIGHT = 'fontWeight'; 313 314/** TTS punctuation-echo property. @type {string} */ 315cvox.AbstractTts.PUNCTUATION_ECHO = 'punctuationEcho'; 316 317/** TTS pause property. @type {string} */ 318cvox.AbstractTts.PAUSE = 'pause'; 319 320/** 321 * TTS personality for annotations - text spoken by ChromeVox that 322 * elaborates on a user interface element but isn't displayed on-screen. 323 * @type {Object} 324 */ 325cvox.AbstractTts.PERSONALITY_ANNOTATION = { 326 'relativePitch': -0.25, 327 // TODO:(rshearer) Added this color change for I/O presentation. 328 'color': 'yellow', 329 'punctuationEcho': 'none' 330}; 331 332 333/** 334 * TTS personality for announcements - text spoken by ChromeVox that 335 * isn't tied to any user interface elements. 336 * @type {Object} 337 */ 338cvox.AbstractTts.PERSONALITY_ANNOUNCEMENT = { 339 'punctuationEcho': 'none' 340}; 341 342/** 343 * TTS personality for alerts from the system, such as battery level 344 * warnings. 345 * @type {Object} 346 */ 347cvox.AbstractTts.PERSONALITY_SYSTEM_ALERT = { 348 'punctuationEcho': 'none', 349 'doNotInterrupt': true 350}; 351 352/** 353 * TTS personality for an aside - text in parentheses. 354 * @type {Object} 355 */ 356cvox.AbstractTts.PERSONALITY_ASIDE = { 357 'relativePitch': -0.1, 358 'color': '#669' 359}; 360 361 362/** 363 * TTS personality for capital letters. 364 * @type {Object} 365 */ 366cvox.AbstractTts.PERSONALITY_CAPITAL = { 367 'relativePitch': 0.6 368}; 369 370 371/** 372 * TTS personality for deleted text. 373 * @type {Object} 374 */ 375cvox.AbstractTts.PERSONALITY_DELETED = { 376 'punctuationEcho': 'none', 377 'relativePitch': -0.6 378}; 379 380 381/** 382 * TTS personality for quoted text. 383 * @type {Object} 384 */ 385cvox.AbstractTts.PERSONALITY_QUOTE = { 386 'relativePitch': 0.1, 387 'color': '#b6b', 388 'fontWeight': 'bold' 389}; 390 391 392/** 393 * TTS personality for strong or bold text. 394 * @type {Object} 395 */ 396cvox.AbstractTts.PERSONALITY_STRONG = { 397 'relativePitch': 0.1, 398 'color': '#b66', 399 'fontWeight': 'bold' 400}; 401 402 403/** 404 * TTS personality for emphasis or italicized text. 405 * @type {Object} 406 */ 407cvox.AbstractTts.PERSONALITY_EMPHASIS = { 408 'relativeVolume': 0.1, 409 'relativeRate': -0.1, 410 'color': '#6bb', 411 'fontWeight': 'bold' 412}; 413 414 415/** 416 * Flag indicating if the TTS is being debugged. 417 * @type {boolean} 418 */ 419cvox.AbstractTts.DEBUG = true; 420 421 422/** 423 * Speech queue mode that interrupts the current utterance. 424 * @type {number} 425 */ 426cvox.AbstractTts.QUEUE_MODE_FLUSH = 0; 427 428 429/** 430 * Speech queue mode that does not interrupt the current utterance. 431 * @type {number} 432 */ 433cvox.AbstractTts.QUEUE_MODE_QUEUE = 1; 434 435 436/** 437 * Speech queue mode that flushes all utterances of the same category 438 * (as set by properties['category']). 439 * @type {number} 440 */ 441cvox.AbstractTts.QUEUE_MODE_CATEGORY_FLUSH = 2; 442 443 444/** 445 * Character dictionary. These symbols are replaced with their human readable 446 * equivalents. This replacement only occurs for single character utterances. 447 * @type {Object.<string, string>} 448 */ 449cvox.AbstractTts.CHARACTER_DICTIONARY = { 450 ' ': 'space', 451 '`': 'backtick', 452 '~': 'tilde', 453 '!': 'exclamation', 454 '@': 'at', 455 '#': 'pound', 456 '$': 'dollar', 457 '%': 'percent', 458 '^': 'caret', 459 '&': 'ampersand', 460 '*': 'asterisk', 461 '(': 'open_paren', 462 ')': 'close_paren', 463 '-': 'dash', 464 '_': 'underscore', 465 '=': 'equals', 466 '+': 'plus', 467 '[': 'left_bracket', 468 ']': 'right_bracket', 469 '{': 'left_brace', 470 '}': 'right_brace', 471 '|': 'pipe', 472 ';': 'semicolon', 473 ':': 'colon', 474 ',': 'comma', 475 '.': 'dot', 476 '<': 'less_than', 477 '>': 'greater_than', 478 '/': 'slash', 479 '?': 'question_mark', 480 '"': 'quote', 481 '\'': 'apostrophe', 482 '\t': 'tab', 483 '\r': 'return', 484 '\n': 'new_line', 485 '\\': 'backslash' 486}; 487 488 489/** 490 * Pronunciation dictionary. Each key must be lowercase, its replacement 491 * should be spelled out the way most TTS engines will pronounce it 492 * correctly. This particular dictionary only handles letters and numbers, 493 * no symbols. 494 * @type {Object.<string, string>} 495 */ 496cvox.AbstractTts.PRONUNCIATION_DICTIONARY = { 497 'admob': 'ad-mob', 498 'adsense': 'ad-sense', 499 'adwords': 'ad-words', 500 'angularjs': 'angular j s', 501 'bcc': 'B C C', 502 'cc': 'C C', 503 'chromevox': 'chrome vox', 504 'cr48': 'C R 48', 505 'ctrl': 'control', 506 'doubleclick': 'double-click', 507 'gmail': 'gee mail', 508 'gtalk': 'gee talk', 509 'http': 'H T T P', 510 'https' : 'H T T P S', 511 'igoogle': 'eye google', 512 'pagerank': 'page-rank', 513 'username': 'user-name', 514 'www': 'W W W', 515 'youtube': 'you tube' 516}; 517 518 519/** 520 * Pronunciation dictionary regexp. 521 * @type {RegExp}; 522 * @private 523 */ 524cvox.AbstractTts.pronunciationDictionaryRegexp_; 525 526 527/** 528 * Substitution dictionary. These symbols or patterns are ALWAYS substituted 529 * whenever they occur, so this should be reserved only for unicode characters 530 * and characters that never have any different meaning in context. 531 * 532 * For example, do not include '$' here because $2 should be read as 533 * "two dollars". 534 * @type {Object.<string, string>} 535 */ 536cvox.AbstractTts.SUBSTITUTION_DICTIONARY = { 537 '://': 'colon slash slash', 538 '\u00bc': 'one fourth', 539 '\u00bd': 'one half', 540 '\u2190': 'left arrow', 541 '\u2191': 'up arrow', 542 '\u2192': 'right arrow', 543 '\u2193': 'down arrow', 544 '\u21d0': 'left double arrow', 545 '\u21d1': 'up double arrow', 546 '\u21d2': 'right double arrow', 547 '\u21d3': 'down double arrow', 548 '\u21e6': 'left arrow', 549 '\u21e7': 'up arrow', 550 '\u21e8': 'right arrow', 551 '\u21e9': 'down arrow', 552 '\u2303': 'control', 553 '\u2318': 'command', 554 '\u2325': 'option', 555 '\u25b2': 'up triangle', 556 '\u25b3': 'up triangle', 557 '\u25b4': 'up triangle', 558 '\u25b5': 'up triangle', 559 '\u25b6': 'right triangle', 560 '\u25b7': 'right triangle', 561 '\u25b8': 'right triangle', 562 '\u25b9': 'right triangle', 563 '\u25ba': 'right pointer', 564 '\u25bb': 'right pointer', 565 '\u25bc': 'down triangle', 566 '\u25bd': 'down triangle', 567 '\u25be': 'down triangle', 568 '\u25bf': 'down triangle', 569 '\u25c0': 'left triangle', 570 '\u25c1': 'left triangle', 571 '\u25c2': 'left triangle', 572 '\u25c3': 'left triangle', 573 '\u25c4': 'left pointer', 574 '\u25c5': 'left pointer', 575 '\uf8ff': 'apple' 576}; 577 578 579/** 580 * Substitution dictionary regexp. 581 * @type {RegExp}; 582 * @private 583 */ 584cvox.AbstractTts.substitutionDictionaryRegexp_; 585 586 587/** 588 * repetition filter regexp. 589 * @type {RegExp} 590 * @private 591 */ 592cvox.AbstractTts.repetitionRegexp_ = 593 /([-\/\\|!@#$%^&*\(\)=_+\[\]\{\}.?;'":<>])\1{2,}/g; 594 595 596/** 597 * Constructs a description of a repeated character. Use as a param to 598 * string.replace. 599 * @param {string} match The matching string. 600 * @return {string} The description. 601 * @private 602 */ 603cvox.AbstractTts.repetitionReplace_ = function(match) { 604 var count = match.length; 605 return ' ' + (new goog.i18n.MessageFormat(cvox.ChromeVox.msgs.getMsg( 606 cvox.AbstractTts.CHARACTER_DICTIONARY[match[0]]))) 607 .format({'COUNT': count}) + ' '; 608}; 609 610 611/** 612 * @override 613 */ 614cvox.AbstractTts.prototype.getDefaultProperty = function(property) { 615 return this.propertyDefault[property]; 616}; 617