abstract_tts.js revision cedac228d2dd51db4b79ea1e72c7f249408ee061
1// Copyright 2014 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5/** 6 * @fileoverview Base class for Text-to-Speech engines that actually transform 7 * text to speech. 8 * 9 */ 10 11goog.provide('cvox.AbstractTts'); 12 13goog.require('cvox.TtsInterface'); 14goog.require('goog.i18n.MessageFormat'); 15 16/** 17 * Creates a new instance. 18 * @constructor 19 * @implements {cvox.TtsInterface} 20 */ 21cvox.AbstractTts = function() { 22 this.ttsProperties = new Object(); 23 24 /** 25 * Default value for TTS properties. 26 * Note that these as well as the subsequent properties might be different 27 * on different host platforms (like Chrome, Android, etc.). 28 * @type {{pitch : number, 29 * rate: number, 30 * volume: number}} 31 * @protected 32 */ 33 this.propertyDefault = { 34 'rate': 0.5, 35 'pitch': 0.5, 36 'volume': 0.5 37 }; 38 39 /** 40 * Min value for TTS properties. 41 * @type {{pitch : number, 42 * rate: number, 43 * volume: number}} 44 * @protected 45 */ 46 this.propertyMin = { 47 'rate': 0.0, 48 'pitch': 0.0, 49 'volume': 0.0 50 }; 51 52 /** 53 * Max value for TTS properties. 54 * @type {{pitch : number, 55 * rate: number, 56 * volume: number}} 57 * @protected 58 */ 59 this.propertyMax = { 60 'rate': 1.0, 61 'pitch': 1.0, 62 'volume': 1.0 63 }; 64 65 /** 66 * Step value for TTS properties. 67 * @type {{pitch : number, 68 * rate: number, 69 * volume: number}} 70 * @protected 71 */ 72 this.propertyStep = { 73 'rate': 0.1, 74 'pitch': 0.1, 75 'volume': 0.1 76 }; 77 78 79 /** @private */ 80 81 if (cvox.AbstractTts.pronunciationDictionaryRegexp_ == undefined) { 82 // Create an expression that matches all words in the pronunciation 83 // dictionary on word boundaries, ignoring case. 84 var words = []; 85 for (var word in cvox.AbstractTts.PRONUNCIATION_DICTIONARY) { 86 words.push(word); 87 } 88 var expr = '\\b(' + words.join('|') + ')\\b'; 89 cvox.AbstractTts.pronunciationDictionaryRegexp_ = new RegExp(expr, 'ig'); 90 } 91 92 if (cvox.AbstractTts.substitutionDictionaryRegexp_ == undefined) { 93 // Create an expression that matches all words in the substitution 94 // dictionary. 95 var symbols = []; 96 for (var symbol in cvox.AbstractTts.SUBSTITUTION_DICTIONARY) { 97 symbols.push(symbol); 98 } 99 var expr = '(' + symbols.join('|') + ')'; 100 cvox.AbstractTts.substitutionDictionaryRegexp_ = new RegExp(expr, 'ig'); 101 } 102}; 103 104 105/** 106 * Default TTS properties for this TTS engine. 107 * @type {Object} 108 * @protected 109 */ 110cvox.AbstractTts.prototype.ttsProperties; 111 112 113/** @override */ 114cvox.AbstractTts.prototype.speak = function(textString, queueMode, properties) { 115 return this; 116}; 117 118 119/** @override */ 120cvox.AbstractTts.prototype.isSpeaking = function() { 121 return false; 122}; 123 124 125/** @override */ 126cvox.AbstractTts.prototype.stop = function() { 127}; 128 129 130/** @override */ 131cvox.AbstractTts.prototype.addCapturingEventListener = function(listener) { }; 132 133 134/** @override */ 135cvox.AbstractTts.prototype.increaseOrDecreaseProperty = 136 function(propertyName, increase) { 137 var min = this.propertyMin[propertyName]; 138 var max = this.propertyMax[propertyName]; 139 var step = this.propertyStep[propertyName]; 140 var current = this.ttsProperties[propertyName]; 141 current = increase ? current + step : current - step; 142 this.ttsProperties[propertyName] = Math.max(Math.min(current, max), min); 143 }; 144 145 146/** 147 * Merges the given properties with the default ones. Always returns a 148 * new object, so that you can safely modify the result of mergeProperties 149 * without worrying that you're modifying an object used elsewhere. 150 * @param {Object=} properties The properties to merge with the current ones. 151 * @return {Object} The merged properties. 152 * @protected 153 */ 154cvox.AbstractTts.prototype.mergeProperties = function(properties) { 155 var mergedProperties = new Object(); 156 var p; 157 if (this.ttsProperties) { 158 for (p in this.ttsProperties) { 159 mergedProperties[p] = this.ttsProperties[p]; 160 } 161 } 162 if (properties) { 163 var tts = cvox.AbstractTts; 164 if (typeof(properties[tts.VOLUME]) == 'number') { 165 mergedProperties[tts.VOLUME] = properties[tts.VOLUME]; 166 } 167 if (typeof(properties[tts.PITCH]) == 'number') { 168 mergedProperties[tts.PITCH] = properties[tts.PITCH]; 169 } 170 if (typeof(properties[tts.RATE]) == 'number') { 171 mergedProperties[tts.RATE] = properties[tts.RATE]; 172 } 173 if (typeof(properties[tts.LANG]) == 'string') { 174 mergedProperties[tts.LANG] = properties[tts.LANG]; 175 } 176 177 var context = this; 178 var mergeRelativeProperty = function(abs, rel) { 179 if (typeof(properties[rel]) == 'number' && 180 typeof(mergedProperties[abs]) == 'number') { 181 mergedProperties[abs] += properties[rel]; 182 var min = context.propertyMin[abs]; 183 var max = context.propertyMax[abs]; 184 if (mergedProperties[abs] > max) { 185 mergedProperties[abs] = max; 186 } else if (mergedProperties[abs] < min) { 187 mergedProperties[abs] = min; 188 } 189 } 190 }; 191 192 mergeRelativeProperty(tts.VOLUME, tts.RELATIVE_VOLUME); 193 mergeRelativeProperty(tts.PITCH, tts.RELATIVE_PITCH); 194 mergeRelativeProperty(tts.RATE, tts.RELATIVE_RATE); 195 } 196 197 return mergedProperties; 198}; 199 200 201/** 202 * Method to preprocess text to be spoken properly by a speech 203 * engine. 204 * 205 * 1. Replace any single character with a description of that character. 206 * 207 * 2. Convert all-caps words to lowercase if they don't look like an 208 * acronym / abbreviation. 209 * 210 * @param {string} text A text string to be spoken. 211 * @param {Object= } properties Out parameter populated with how to speak the 212 * string. 213 * @return {string} The text formatted in a way that will sound better by 214 * most speech engines. 215 * @protected 216 */ 217cvox.AbstractTts.prototype.preprocess = function(text, properties) { 218 if (text.length == 1 && text >= 'A' && text <= 'Z') { 219 for (var prop in cvox.AbstractTts.PERSONALITY_CAPITAL) 220 properties[prop] = cvox.AbstractTts.PERSONALITY_CAPITAL[prop]; 221 } 222 223 // Substitute all symbols in the substitution dictionary. This is pretty 224 // efficient because we use a single regexp that matches all symbols 225 // simultaneously. 226 text = text.replace( 227 cvox.AbstractTts.substitutionDictionaryRegexp_, 228 function(symbol) { 229 return ' ' + cvox.AbstractTts.SUBSTITUTION_DICTIONARY[symbol] + ' '; 230 }); 231 232 // Handle single characters that we want to make sure we pronounce. 233 if (text.length == 1) { 234 return cvox.AbstractTts.CHARACTER_DICTIONARY[text] ? 235 (new goog.i18n.MessageFormat(cvox.ChromeVox.msgs.getMsg( 236 cvox.AbstractTts.CHARACTER_DICTIONARY[text]))) 237 .format({'COUNT': 1}) : 238 text.toUpperCase(); 239 } 240 241 // Substitute all words in the pronunciation dictionary. This is pretty 242 // efficient because we use a single regexp that matches all words 243 // simultaneously, and it calls a function with each match, which we can 244 // use to look up the replacement in our dictionary. 245 text = text.replace( 246 cvox.AbstractTts.pronunciationDictionaryRegexp_, 247 function(word) { 248 return cvox.AbstractTts.PRONUNCIATION_DICTIONARY[word.toLowerCase()]; 249 }); 250 251 // Special case for google+, where the punctuation must be pronounced. 252 text = text.replace(/google\+/ig, 'google plus'); 253 254 // Expand all repeated characters. 255 text = text.replace( 256 cvox.AbstractTts.repetitionRegexp_, cvox.AbstractTts.repetitionReplace_); 257 258 // If there's no lower case letters, and at least two spaces, skip spacing 259 // text. 260 var skipSpacing = false; 261 if (!text.match(/[a-z]+/) && text.indexOf(' ') != text.lastIndexOf(' ')) { 262 skipSpacing = true; 263 } 264 265 // Convert all-caps words to lowercase if they don't look like acronyms, 266 // otherwise add a space before all-caps words so that all-caps words in 267 // the middle of camelCase will be separated. 268 text = text.replace(/[A-Z]+/g, function(word) { 269 // If a word contains vowels and is more than 3 letters long, it is 270 // probably a real word and not just an abbreviation. Convert it to lower 271 // case and speak it normally. 272 if ((word.length > 3) && word.match(/([AEIOUY])/g)) { 273 return word.toLowerCase(); 274 } else if (!skipSpacing) { 275 // Builds spaced-out camelCased/all CAPS words so they sound better when 276 // spoken by TTS engines. 277 return ' ' + word.split('').join(' '); 278 } else { 279 return word; 280 } 281 }); 282 283 return text; 284}; 285 286 287/** TTS rate property. @type {string} */ 288cvox.AbstractTts.RATE = 'rate'; 289/** TTS pitch property. @type {string} */ 290cvox.AbstractTts.PITCH = 'pitch'; 291/** TTS volume property. @type {string} */ 292cvox.AbstractTts.VOLUME = 'volume'; 293/** TTS language property. @type {string} */ 294cvox.AbstractTts.LANG = 'lang'; 295 296/** TTS relative rate property. @type {string} */ 297cvox.AbstractTts.RELATIVE_RATE = 'relativeRate'; 298/** TTS relative pitch property. @type {string} */ 299cvox.AbstractTts.RELATIVE_PITCH = 'relativePitch'; 300/** TTS relative volume property. @type {string} */ 301cvox.AbstractTts.RELATIVE_VOLUME = 'relativeVolume'; 302 303/** TTS color property (for the lens display). @type {string} */ 304cvox.AbstractTts.COLOR = 'color'; 305/** TTS CSS font-weight property (for the lens display). @type {string} */ 306cvox.AbstractTts.FONT_WEIGHT = 'fontWeight'; 307 308/** TTS punctuation-echo property. @type {string} */ 309cvox.AbstractTts.PUNCTUATION_ECHO = 'punctuationEcho'; 310 311/** TTS pause property. @type {string} */ 312cvox.AbstractTts.PAUSE = 'pause'; 313 314/** 315 * TTS personality for annotations - text spoken by ChromeVox that 316 * elaborates on a user interface element but isn't displayed on-screen. 317 * @type {Object} 318 */ 319cvox.AbstractTts.PERSONALITY_ANNOTATION = { 320 'relativePitch': -0.25, 321 // TODO:(rshearer) Added this color change for I/O presentation. 322 'color': 'yellow', 323 'punctuationEcho': 'none' 324}; 325 326 327/** 328 * TTS personality for announcements - text spoken by ChromeVox that 329 * isn't tied to any user interface elements. 330 * @type {Object} 331 */ 332cvox.AbstractTts.PERSONALITY_ANNOUNCEMENT = { 333 'punctuationEcho': 'none' 334}; 335 336/** 337 * TTS personality for alerts from the system, such as battery level 338 * warnings. 339 * @type {Object} 340 */ 341cvox.AbstractTts.PERSONALITY_SYSTEM_ALERT = { 342 'punctuationEcho': 'none', 343 'doNotInterrupt': true 344}; 345 346/** 347 * TTS personality for an aside - text in parentheses. 348 * @type {Object} 349 */ 350cvox.AbstractTts.PERSONALITY_ASIDE = { 351 'relativePitch': -0.1, 352 'color': '#669' 353}; 354 355 356/** 357 * TTS personality for capital letters. 358 * @type {Object} 359 */ 360cvox.AbstractTts.PERSONALITY_CAPITAL = { 361 'relativePitch': 0.6 362}; 363 364 365/** 366 * TTS personality for deleted text. 367 * @type {Object} 368 */ 369cvox.AbstractTts.PERSONALITY_DELETED = { 370 'punctuationEcho': 'none', 371 'relativePitch': -0.6 372}; 373 374 375/** 376 * TTS personality for quoted text. 377 * @type {Object} 378 */ 379cvox.AbstractTts.PERSONALITY_QUOTE = { 380 'relativePitch': 0.1, 381 'color': '#b6b', 382 'fontWeight': 'bold' 383}; 384 385 386/** 387 * TTS personality for strong or bold text. 388 * @type {Object} 389 */ 390cvox.AbstractTts.PERSONALITY_STRONG = { 391 'relativePitch': 0.1, 392 'color': '#b66', 393 'fontWeight': 'bold' 394}; 395 396 397/** 398 * TTS personality for emphasis or italicized text. 399 * @type {Object} 400 */ 401cvox.AbstractTts.PERSONALITY_EMPHASIS = { 402 'relativeVolume': 0.1, 403 'relativeRate': -0.1, 404 'color': '#6bb', 405 'fontWeight': 'bold' 406}; 407 408 409/** 410 * Flag indicating if the TTS is being debugged. 411 * @type {boolean} 412 */ 413cvox.AbstractTts.DEBUG = true; 414 415 416/** 417 * Speech queue mode that interrupts the current utterance. 418 * @type {number} 419 */ 420cvox.AbstractTts.QUEUE_MODE_FLUSH = 0; 421 422 423/** 424 * Speech queue mode that does not interrupt the current utterance. 425 * @type {number} 426 */ 427cvox.AbstractTts.QUEUE_MODE_QUEUE = 1; 428 429 430/** 431 * Character dictionary. These symbols are replaced with their human readable 432 * equivalents. This replacement only occurs for single character utterances. 433 * @type {Object.<string, string>} 434 */ 435cvox.AbstractTts.CHARACTER_DICTIONARY = { 436 ' ': 'space', 437 '`': 'backtick', 438 '~': 'tilde', 439 '!': 'exclamation', 440 '@': 'at', 441 '#': 'pound', 442 '$': 'dollar', 443 '%': 'percent', 444 '^': 'caret', 445 '&': 'ampersand', 446 '*': 'asterisk', 447 '(': 'open_paren', 448 ')': 'close_paren', 449 '-': 'dash', 450 '_': 'underscore', 451 '=': 'equals', 452 '+': 'plus', 453 '[': 'left_bracket', 454 ']': 'right_bracket', 455 '{': 'left_brace', 456 '}': 'right_brace', 457 '|': 'pipe', 458 ';': 'semicolon', 459 ':': 'colon', 460 ',': 'comma', 461 '.': 'dot', 462 '<': 'less_than', 463 '>': 'greater_than', 464 '/': 'slash', 465 '?': 'question_mark', 466 '"': 'quote', 467 '\'': 'apostrophe', 468 '\t': 'tab', 469 '\r': 'return', 470 '\n': 'new_line', 471 '\\': 'backslash' 472}; 473 474 475/** 476 * Pronunciation dictionary. Each key must be lowercase, its replacement 477 * should be spelled out the way most TTS engines will pronounce it 478 * correctly. This particular dictionary only handles letters and numbers, 479 * no symbols. 480 * @type {Object.<string, string>} 481 */ 482cvox.AbstractTts.PRONUNCIATION_DICTIONARY = { 483 'admob': 'ad-mob', 484 'adsense': 'ad-sense', 485 'adwords': 'ad-words', 486 'angularjs': 'angular j s', 487 'bcc': 'B C C', 488 'cc': 'C C', 489 'chromevox': 'chrome vox', 490 'cr48': 'C R 48', 491 'ctrl': 'control', 492 'doubleclick': 'double-click', 493 'gmail': 'gee mail', 494 'gtalk': 'gee talk', 495 'http': 'H T T P', 496 'https' : 'H T T P S', 497 'igoogle': 'eye google', 498 'pagerank': 'page-rank', 499 'username': 'user-name', 500 'www': 'W W W', 501 'youtube': 'you tube' 502}; 503 504 505/** 506 * Pronunciation dictionary regexp. 507 * @type {RegExp}; 508 * @private 509 */ 510cvox.AbstractTts.pronunciationDictionaryRegexp_; 511 512 513/** 514 * Substitution dictionary. These symbols or patterns are ALWAYS substituted 515 * whenever they occur, so this should be reserved only for unicode characters 516 * and characters that never have any different meaning in context. 517 * 518 * For example, do not include '$' here because $2 should be read as 519 * "two dollars". 520 * @type {Object.<string, string>} 521 */ 522cvox.AbstractTts.SUBSTITUTION_DICTIONARY = { 523 '://': 'colon slash slash', 524 '\u00bc': 'one fourth', 525 '\u00bd': 'one half', 526 '\u2190': 'left arrow', 527 '\u2191': 'up arrow', 528 '\u2192': 'right arrow', 529 '\u2193': 'down arrow', 530 '\u21d0': 'left double arrow', 531 '\u21d1': 'up double arrow', 532 '\u21d2': 'right double arrow', 533 '\u21d3': 'down double arrow', 534 '\u21e6': 'left arrow', 535 '\u21e7': 'up arrow', 536 '\u21e8': 'right arrow', 537 '\u21e9': 'down arrow', 538 '\u2303': 'control', 539 '\u2318': 'command', 540 '\u2325': 'option', 541 '\u25b2': 'up triangle', 542 '\u25b3': 'up triangle', 543 '\u25b4': 'up triangle', 544 '\u25b5': 'up triangle', 545 '\u25b6': 'right triangle', 546 '\u25b7': 'right triangle', 547 '\u25b8': 'right triangle', 548 '\u25b9': 'right triangle', 549 '\u25ba': 'right pointer', 550 '\u25bb': 'right pointer', 551 '\u25bc': 'down triangle', 552 '\u25bd': 'down triangle', 553 '\u25be': 'down triangle', 554 '\u25bf': 'down triangle', 555 '\u25c0': 'left triangle', 556 '\u25c1': 'left triangle', 557 '\u25c2': 'left triangle', 558 '\u25c3': 'left triangle', 559 '\u25c4': 'left pointer', 560 '\u25c5': 'left pointer', 561 '\uf8ff': 'apple' 562}; 563 564 565/** 566 * Substitution dictionary regexp. 567 * @type {RegExp}; 568 * @private 569 */ 570cvox.AbstractTts.substitutionDictionaryRegexp_; 571 572 573/** 574 * repetition filter regexp. 575 * @type {RegExp} 576 * @private 577 */ 578cvox.AbstractTts.repetitionRegexp_ = 579 /([-\/\\|!@#$%^&*\(\)=_+\[\]\{\}.?;'":<>])\1{2,}/g; 580 581 582/** 583 * Constructs a description of a repeated character. Use as a param to 584 * string.replace. 585 * @param {string} match The matching string. 586 * @return {string} The description. 587 * @private 588 */ 589cvox.AbstractTts.repetitionReplace_ = function(match) { 590 var count = match.length; 591 return ' ' + (new goog.i18n.MessageFormat(cvox.ChromeVox.msgs.getMsg( 592 cvox.AbstractTts.CHARACTER_DICTIONARY[match[0]]))) 593 .format({'COUNT': count}) + ' '; 594}; 595 596 597/** 598 * @override 599 */ 600cvox.AbstractTts.prototype.getDefaultProperty = function(property) { 601 return this.propertyDefault[property]; 602}; 603