tts_background.js revision 116680a4aac90f2aa7413d9095a592090648e557
1// Copyright 2014 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5/** 6 * @fileoverview Sends Text-To-Speech commands to Chrome's native TTS 7 * extension API. 8 * 9 */ 10 11goog.provide('cvox.TtsBackground'); 12 13goog.require('cvox.AbstractTts'); 14goog.require('cvox.ChromeTtsBase'); 15goog.require('cvox.ChromeVox'); 16goog.require('cvox.MathMap'); 17goog.require('goog.i18n.MessageFormat'); 18 19 20/** 21 * @constructor 22 * @param {string} textString The string of text to be spoken. 23 * @param {Object} properties Speech properties to use for this utterance. 24 */ 25cvox.Utterance = function(textString, properties) { 26 this.textString = textString; 27 this.properties = properties; 28 this.id = cvox.Utterance.nextUtteranceId_++; 29}; 30 31/** 32 * The next utterance id to use. 33 * @type {number} 34 * @private 35 */ 36cvox.Utterance.nextUtteranceId_ = 1; 37 38/** 39 * @constructor 40 * @param {boolean=} opt_enableMath Whether to process math. Used when running 41 * on forge. Defaults to true. 42 * @extends {cvox.ChromeTtsBase} 43 */ 44cvox.TtsBackground = function(opt_enableMath) { 45 opt_enableMath = opt_enableMath == undefined ? true : opt_enableMath; 46 goog.base(this); 47 this.currentVoice = localStorage['voiceName'] || ''; 48 49 this.ttsProperties['rate'] = (parseFloat(localStorage['rate']) || 50 this.propertyDefault['rate']); 51 this.ttsProperties['pitch'] = (parseFloat(localStorage['pitch']) || 52 this.propertyDefault['pitch']); 53 this.ttsProperties['volume'] = (parseFloat(localStorage['volume']) || 54 this.propertyDefault['volume']); 55 56 // Use the current locale as the speech language if not otherwise 57 // specified. 58 if (this.ttsProperties['lang'] == undefined) { 59 this.ttsProperties['lang'] = 60 chrome.i18n.getMessage('@@ui_locale').replace('_', '-'); 61 } 62 63 this.lastEventType = 'end'; 64 65 this.setPreferredVoice_(); 66 if (!this.currentVoice) { 67 this.setDefaultVoiceName_(); 68 } 69 70 /** @private {number} */ 71 this.currentPunctuationEcho_ = 72 parseInt(localStorage[cvox.AbstractTts.PUNCTUATION_ECHO] || 1, 10); 73 74 /** 75 * @type {!Array.<{name:(string), 76 * msg:(string), 77 * regexp:(RegExp), 78 * clear:(boolean)}>} 79 * @private 80 */ 81 this.punctuationEchoes_ = [ 82 /** 83 * Punctuation echoed for the 'none' option. 84 */ 85 { 86 name: 'none', 87 msg: 'no_punctuation', 88 regexp: /[-$#"()*;:<>\n\\\/+='~`@_]/g, 89 clear: true 90 }, 91 92 /** 93 * Punctuation echoed for the 'some' option. 94 */ 95 { 96 name: 'some', 97 msg: 'some_punctuation', 98 regexp: /[$#"*<>\\\/\{\}+=~`%]/g, 99 clear: false 100 }, 101 102 /** 103 * Punctuation echoed for the 'all' option. 104 */ 105 { 106 name: 'all', 107 msg: 'all_punctuation', 108 regexp: /[-$#"()*;:<>\n\\\/\{\}\[\]+='~`!@_.,?%]/g, 109 clear: false 110 } 111 ]; 112 113 /** 114 * A list of punctuation characters that should always be spliced into output 115 * even with literal word substitutions. 116 * This is important for tts prosity. 117 * @type {!Array.<string>} 118 * @private 119 */ 120 this.retainPunctuation_ = 121 [';', '?', '!', '\'']; 122 123 /** 124 * Mapping for math elements. 125 * @type {cvox.MathMap} 126 */ 127 this.mathmap = opt_enableMath ? new cvox.MathMap() : null; 128 129 /** 130 * The id of a callback returned from setTimeout. 131 * @type {number|undefined} 132 */ 133 this.timeoutId_; 134 135 try { 136 /** 137 * @type {Object.<string, string>} 138 * @private 139 * @const 140 */ 141 this.PHONETIC_MAP_ = /** @type {Object.<string, string>} */( 142 JSON.parse(cvox.ChromeVox.msgs.getMsg('phonetic_map'))); 143 } catch (e) { 144 console.log('Error; unable to parse phonetic map msg.'); 145 } 146 147 /** 148 * Capturing tts event listeners. 149 * @type {Array.<cvox.TtsCapturingEventListener>} 150 * @private 151 */ 152 this.capturingTtsEventListeners_ = []; 153 154 /** 155 * The current utterance. 156 * @type {cvox.Utterance} 157 * @private 158 */ 159 this.currentUtterance_ = null; 160 161 /** 162 * The utterance queue. 163 * @type {Array.<cvox.Utterance>} 164 * @private 165 */ 166 this.utteranceQueue_ = []; 167}; 168goog.inherits(cvox.TtsBackground, cvox.ChromeTtsBase); 169 170 171/** 172 * The amount of time to wait before speaking a phonetic word for a 173 * letter. 174 * @type {number} 175 * @private 176 * @const 177 */ 178cvox.TtsBackground.PHONETIC_DELAY_MS_ = 1000; 179 180/** 181 * The list of properties allowed to be passed to the chrome.tts.speak API. 182 * Anything outside this list will be stripped. 183 * @type {Array.<string>} 184 * @private 185 * @const 186 */ 187cvox.TtsBackground.ALLOWED_PROPERTIES_ = [ 188 'desiredEventTypes', 189 'enqueue', 190 'extensionId', 191 'gender', 192 'lang', 193 'onEvent', 194 'pitch', 195 'rate', 196 'requiredEventTypes', 197 'voiceName', 198 'volume']; 199 200/** 201 * Sets the current voice to the one that the user selected on the options page 202 * if that voice exists. 203 * @private 204 */ 205cvox.TtsBackground.prototype.setPreferredVoice_ = function() { 206 var self = this; 207 chrome.tts.getVoices( 208 function(voices) { 209 for (var i = 0, v; v = voices[i]; i++) { 210 if (v['voiceName'] == localStorage['voiceName']) { 211 self.currentVoice = v['voiceName']; 212 return; 213 } 214 } 215 }); 216}; 217 218 219/** @override */ 220cvox.TtsBackground.prototype.speak = function( 221 textString, queueMode, properties) { 222 goog.base(this, 'speak', textString, queueMode, properties); 223 224 if (!properties) { 225 properties = {}; 226 } 227 if (queueMode === undefined) { 228 queueMode = cvox.AbstractTts.QUEUE_MODE_QUEUE; 229 } 230 231 // Chunk to improve responsiveness. Use a replace/split pattern in order to 232 // retain the original punctuation. 233 var splitTextString = textString.replace(/([-\n\r.,!?;])(\s)/g, '$1$2|'); 234 splitTextString = splitTextString.split('|'); 235 // Since we are substituting the chunk delimiters back into the string, only 236 // recurse when there are more than 2 split items. This should result in only 237 // one recursive call. 238 if (splitTextString.length > 2) { 239 var startCallback = properties['startCallback']; 240 var endCallback = properties['endCallback']; 241 for (var i = 0; i < splitTextString.length; i++) { 242 var propertiesCopy = {}; 243 for (var p in properties) { 244 propertiesCopy[p] = properties[p]; 245 } 246 propertiesCopy['startCallback'] = i == 0 ? startCallback : null; 247 propertiesCopy['endCallback'] = 248 i == (splitTextString.length - 1) ? endCallback : null; 249 this.speak(splitTextString[i], queueMode, propertiesCopy); 250 queueMode = cvox.AbstractTts.QUEUE_MODE_QUEUE; 251 } 252 return this; 253 } 254 255 textString = this.preprocess(textString, properties); 256 257 // TODO(dtseng): Google TTS has bad performance when speaking numbers. This 258 // pattern causes ChromeVox to read numbers as digits rather than words. 259 textString = this.getNumberAsDigits_(textString); 260 261 // TODO(dtseng): Google TTS flushes the queue when encountering strings of 262 // this pattern which stops ChromeVox speech. 263 if (!textString || !textString.match(/\w+/g)) { 264 // We still want to callback for listeners in our content script. 265 if (properties['startCallback']) { 266 try { 267 properties['startCallback'](); 268 } catch (e) { 269 } 270 } 271 if (properties['endCallback']) { 272 try { 273 properties['endCallback'](); 274 } catch (e) { 275 } 276 } 277 if (queueMode === cvox.AbstractTts.QUEUE_MODE_FLUSH) { 278 this.stop(); 279 } 280 return this; 281 } 282 283 var mergedProperties = this.mergeProperties(properties); 284 285 if (this.currentVoice && (this.currentVoice == localStorage['voiceName'])) { 286 mergedProperties['voiceName'] = this.currentVoice; 287 } 288 if (localStorage['voiceName'] && 289 this.currentVoice != localStorage['voiceName']) { 290 this.setPreferredVoice_(); 291 } 292 293 if (queueMode == cvox.AbstractTts.QUEUE_MODE_CATEGORY_FLUSH && 294 !mergedProperties['category']) { 295 queueMode = cvox.AbstractTts.QUEUE_MODE_FLUSH; 296 } 297 298 var utterance = new cvox.Utterance(textString, mergedProperties); 299 this.speakUsingQueue_(utterance, queueMode); 300}; 301 302/** 303 * Use the speech queue to handle the given speech request. 304 * @param {cvox.Utterance} utterance The utterance to speak. 305 * @param {number} queueMode The queue mode. 306 * @private 307 */ 308cvox.TtsBackground.prototype.speakUsingQueue_ = function(utterance, queueMode) { 309 // First, take care of removing the current utterance and flushing 310 // anything from the queue we need to. If we remove the current utterance, 311 // make a note that we're going to stop speech. 312 if (queueMode == cvox.AbstractTts.QUEUE_MODE_FLUSH || 313 queueMode == cvox.AbstractTts.QUEUE_MODE_CATEGORY_FLUSH) { 314 if (this.shouldCancel_(this.currentUtterance_, utterance, queueMode)) { 315 this.cancelUtterance_(this.currentUtterance_); 316 this.currentUtterance_ = null; 317 } 318 var i = 0; 319 while (i < this.utteranceQueue_.length) { 320 if (this.shouldCancel_( 321 this.utteranceQueue_[i], utterance, queueMode)) { 322 this.cancelUtterance_(this.utteranceQueue_[i]); 323 this.utteranceQueue_.splice(i, 1); 324 } else { 325 i++; 326 } 327 } 328 } 329 330 // Next, add the new utterance to the queue. 331 this.utteranceQueue_.push(utterance); 332 333 // Now start speaking the next item in the queue. 334 this.startSpeakingNextItemInQueue_(); 335}; 336 337/** 338 * If nothing is speaking, pop the first item off the speech queue and 339 * start speaking it. This is called when a speech request is made and 340 * when the current utterance finishes speaking. 341 * @private 342 */ 343cvox.TtsBackground.prototype.startSpeakingNextItemInQueue_ = function() { 344 if (this.currentUtterance_) { 345 return; 346 } 347 348 if (this.utteranceQueue_.length == 0) { 349 return; 350 } 351 352 this.currentUtterance_ = this.utteranceQueue_.shift(); 353 var utteranceId = this.currentUtterance_.id; 354 355 this.currentUtterance_.properties['onEvent'] = goog.bind(function(event) { 356 this.onTtsEvent_(event, utteranceId); 357 }, this); 358 359 var validatedProperties = {}; 360 for (var i = 0; i < cvox.TtsBackground.ALLOWED_PROPERTIES_.length; i++) { 361 var p = cvox.TtsBackground.ALLOWED_PROPERTIES_[i]; 362 if (this.currentUtterance_.properties[p]) { 363 validatedProperties[p] = this.currentUtterance_.properties[p]; 364 } 365 } 366 367 chrome.tts.speak(this.currentUtterance_.textString, 368 validatedProperties); 369}; 370 371/** 372 * Called when we get a speech event from Chrome. We ignore any event 373 * that doesn't pertain to the current utterance, but when speech starts 374 * or ends we optionally call callback functions, and start speaking the 375 * next utterance if there's another one enqueued. 376 * @param {Object} event The TTS event from chrome. 377 * @param {number} utteranceId The id of the associated utterance. 378 * @private 379 */ 380cvox.TtsBackground.prototype.onTtsEvent_ = function(event, utteranceId) { 381 this.lastEventType = event['type']; 382 383 // Ignore events sent on utterances other than the current one. 384 if (!this.currentUtterance_ || 385 utteranceId != this.currentUtterance_.id) { 386 return; 387 } 388 389 var utterance = this.currentUtterance_; 390 391 switch (event.type) { 392 case 'start': 393 this.capturingTtsEventListeners_.forEach(function(listener) { 394 listener.onTtsStart(); 395 }); 396 if (utterance.properties['startCallback']) { 397 try { 398 utterance.properties['startCallback'](); 399 } catch (e) { 400 } 401 } 402 break; 403 case 'end': 404 this.capturingTtsEventListeners_.forEach(function(listener) { 405 listener.onTtsEnd(); 406 }); 407 // Intentionally falls through. 408 case 'interrupted': 409 this.cancelUtterance_(utterance); 410 this.currentUtterance_ = null; 411 this.startSpeakingNextItemInQueue_(); 412 break; 413 case 'error': 414 this.onError_(event['errorMessage']); 415 this.startSpeakingNextItemInQueue_(); 416 break; 417 } 418}; 419 420/** 421 * Determines if |utteranceToCancel| should be canceled (interrupted if 422 * currently speaking, or removed from the queue if not), given the new 423 * utterance we want to speak and the queue mode. If the queue mode is 424 * QUEUE or FLUSH, the logic is straightforward. If the queue mode is 425 * CATEGORY_FLUSH, we only flush utterances with the same category. 426 * 427 * @param {cvox.Utterance} utteranceToCancel The utterance in question. 428 * @param {cvox.Utterance} newUtterance The new utterance we're enqueueing. 429 * @param {number} queueMode The queue mode. 430 * @return {boolean} True if this utterance should be canceled. 431 * @private 432 */ 433cvox.TtsBackground.prototype.shouldCancel_ = 434 function(utteranceToCancel, newUtterance, queueMode) { 435 if (!utteranceToCancel) { 436 return false; 437 } 438 if (utteranceToCancel.properties['doNotInterrupt']) { 439 return false; 440 } 441 switch (queueMode) { 442 case cvox.AbstractTts.QUEUE_MODE_QUEUE: 443 return false; 444 case cvox.AbstractTts.QUEUE_MODE_FLUSH: 445 return true; 446 case cvox.AbstractTts.QUEUE_MODE_CATEGORY_FLUSH: 447 return (utteranceToCancel.properties['category'] == 448 newUtterance.properties['category']); 449 } 450}; 451 452/** 453 * Do any cleanup necessary to cancel an utterance, like callings its 454 * callback function if any. 455 * @param {cvox.Utterance} utterance The utterance to cancel. 456 * @private 457 */ 458cvox.TtsBackground.prototype.cancelUtterance_ = function(utterance) { 459 if (utterance && utterance.properties['endCallback']) { 460 try { 461 utterance.properties['endCallback'](); 462 } catch (e) { 463 } 464 } 465}; 466 467/** @override */ 468cvox.TtsBackground.prototype.increaseOrDecreaseProperty = 469 function(propertyName, increase) { 470 goog.base(this, 'increaseOrDecreaseProperty', propertyName, increase); 471 localStorage[propertyName] = this.ttsProperties[propertyName]; 472}; 473 474/** @override */ 475cvox.TtsBackground.prototype.isSpeaking = function() { 476 goog.base(this, 'isSpeaking'); 477 return this.lastEventType != 'end'; 478}; 479 480/** @override */ 481cvox.TtsBackground.prototype.stop = function() { 482 goog.base(this, 'stop'); 483 484 this.cancelUtterance_(this.currentUtterance_); 485 this.currentUtterance_ = null; 486 487 for (var i = 0; i < this.utteranceQueue_.length; i++) { 488 this.cancelUtterance_(this.utteranceQueue_[i]); 489 } 490 this.utteranceQueue_.length = 0; 491 492 chrome.tts.stop(); 493}; 494 495/** @override */ 496cvox.TtsBackground.prototype.addCapturingEventListener = function(listener) { 497 this.capturingTtsEventListeners_.push(listener); 498}; 499 500/** 501 * An error handler passed as a callback to chrome.tts.speak. 502 * @param {string} errorMessage Describes the error (set by onEvent). 503 * @private 504 */ 505cvox.TtsBackground.prototype.onError_ = function(errorMessage) { 506 // Reset voice related parameters. 507 delete localStorage['voiceName']; 508}; 509 510/** 511 * Converts an engine property value to a percentage from 0.00 to 1.00. 512 * @param {string} property The property to convert. 513 * @return {?number} The percentage of the property. 514 */ 515cvox.TtsBackground.prototype.propertyToPercentage = function(property) { 516 return (this.ttsProperties[property] - this.propertyMin[property]) / 517 Math.abs(this.propertyMax[property] - this.propertyMin[property]); 518}; 519 520 521/** 522 * @override 523 */ 524cvox.TtsBackground.prototype.preprocess = function(text, properties) { 525 properties = properties ? properties : {}; 526 527 // Perform specialized processing, such as mathematics. 528 if (properties.math) { 529 text = this.preprocessMath_(text, properties.math); 530 } 531 532 // Perform generic processing. 533 text = goog.base(this, 'preprocess', text, properties); 534 535 // Perform any remaining processing such as punctuation expansion. 536 var pE = null; 537 if (properties[cvox.AbstractTts.PUNCTUATION_ECHO]) { 538 for (var i = 0; pE = this.punctuationEchoes_[i]; i++) { 539 if (properties[cvox.AbstractTts.PUNCTUATION_ECHO] == pE.name) { 540 break; 541 } 542 } 543 } else { 544 pE = this.punctuationEchoes_[this.currentPunctuationEcho_]; 545 } 546 text = 547 text.replace(pE.regexp, this.createPunctuationReplace_(pE.clear)); 548 549 // Try pronouncing phonetically for single characters. Cancel previous calls 550 // to pronouncePhonetically_ if we fail to pronounce on this invokation or if 551 // this text is math which should never be pronounced phonetically. 552 if (properties.math || 553 !properties['phoneticCharacters'] || 554 !this.pronouncePhonetically_(text)) { 555 this.clearTimeout_(); 556 } 557 558 // Try looking up in our unicode tables for a short description. 559 if (!properties.math && text.length == 1 && this.mathmap) { 560 text = this.mathmap.store.lookupString( 561 text.toLowerCase(), 562 cvox.MathStore.createDynamicConstraint('default', 'short')) || text; 563 } 564 565 // Remove all whitespace from the beginning and end, and collapse all 566 // inner strings of whitespace to a single space. 567 text = text.replace(/\s+/g, ' ').replace(/^\s+|\s+$/g, ''); 568 569 return text; 570}; 571 572 573/** 574 * Method that cycles among the available punctuation echo levels. 575 * @return {string} The resulting punctuation level message id. 576 */ 577cvox.TtsBackground.prototype.cyclePunctuationEcho = function() { 578 this.currentPunctuationEcho_ = 579 (this.currentPunctuationEcho_ + 1) % this.punctuationEchoes_.length; 580 localStorage[cvox.AbstractTts.PUNCTUATION_ECHO] = 581 this.currentPunctuationEcho_; 582 return this.punctuationEchoes_[this.currentPunctuationEcho_].msg; 583}; 584 585 586/** 587 * Process a math expression into a string suitable for a speech engine. 588 * @param {string} text Text representing a math expression. 589 * @param {Object= } math Parameter containing information how to 590 * process the math expression. 591 * @return {string} The string with a spoken version of the math expression. 592 * @private 593 */ 594cvox.TtsBackground.prototype.preprocessMath_ = function(text, math) { 595 if (!this.mathmap) { 596 return text; 597 } 598 var result = ''; 599 var dynamicCstr = cvox.MathStore.createDynamicConstraint( 600 math['domain'], math['style']); 601 result = this.mathmap.store.lookupString(text, dynamicCstr); 602 if (result) { 603 return result; 604 } 605 return text; 606}; 607 608 609/** 610 * Converts a number into space-separated digits. 611 * For numbers containing 4 or fewer digits, we return the original number. 612 * This ensures that numbers like 123,456 or 2011 are not "digitized" while 613 * 123456 is. 614 * @param {string} text The text to process. 615 * @return {string} A string with all numbers converted. 616 * @private 617 */ 618cvox.TtsBackground.prototype.getNumberAsDigits_ = function(text) { 619 return text.replace(/\d+/g, function(num) { 620 if (num.length <= 4) { 621 return num; 622 } 623 return num.split('').join(' '); 624 }); 625}; 626 627 628/** 629 * Constructs a function for string.replace that handles description of a 630 * punctuation character. 631 * @param {boolean} clear Whether we want to use whitespace in place of match. 632 * @return {function(string): string} The replacement function. 633 * @private 634 */ 635cvox.TtsBackground.prototype.createPunctuationReplace_ = function(clear) { 636 return goog.bind(function(match) { 637 var retain = this.retainPunctuation_.indexOf(match) != -1 ? 638 match : ' '; 639 return clear ? retain : 640 ' ' + (new goog.i18n.MessageFormat(cvox.ChromeVox.msgs.getMsg( 641 cvox.AbstractTts.CHARACTER_DICTIONARY[match]))) 642 .format({'COUNT': 1}) + retain + ' '; 643 }, this); 644}; 645 646 647/** 648 * Pronounces single letters phonetically after some timeout. 649 * @param {string} text The text. 650 * @return {boolean} True if the text resulted in speech. 651 * @private 652 */ 653cvox.TtsBackground.prototype.pronouncePhonetically_ = function(text) { 654 if (!this.PHONETIC_MAP_) { 655 return false; 656 } 657 text = text.toLowerCase(); 658 text = this.PHONETIC_MAP_[text]; 659 if (text) { 660 this.clearTimeout_(); 661 var self = this; 662 this.timeoutId_ = setTimeout(function() { 663 self.speak(text, 1); 664 }, cvox.TtsBackground.PHONETIC_DELAY_MS_); 665 return true; 666 } 667 return false; 668}; 669 670 671/** 672 * Clears the last timeout set via setTimeout. 673 * @private 674 */ 675cvox.TtsBackground.prototype.clearTimeout_ = function() { 676 if (goog.isDef(this.timeoutId_)) { 677 clearTimeout(this.timeoutId_); 678 this.timeoutId_ = undefined; 679 } 680}; 681 682/** 683 * Sets the name of a voice appropriate for the current locale preferring 684 * non-remote voices. 685 * @private 686 */ 687cvox.TtsBackground.prototype.setDefaultVoiceName_ = function() { 688 chrome.tts.getVoices( 689 goog.bind(function(voices) { 690 var currentLocale = 691 chrome.i18n.getMessage('@@ui_locale').replace('_', '-'); 692 voices.sort(function(v1, v2) { 693 if (v1['remote'] && !v2['remote']) { 694 return 1; 695 } 696 if (!v1['remote'] && v2['remote']) { 697 return -1; 698 } 699 if (v1['lang'] == currentLocale && v2['lang'] != currentLocale) { 700 return -1; 701 } 702 if (v1['lang'] != currentLocale && v2['lang'] == currentLocale) { 703 return 1; 704 } 705 return 0; 706 }); 707 if (voices[0]) { 708 var voiceName = voices[0].voiceName; 709 this.currentVoice = voiceName; 710 } 711 }, this)); 712}; 713