1// Copyright 2006 The Closure Library Authors. All Rights Reserved. 2// 3// Licensed under the Apache License, Version 2.0 (the "License"); 4// you may not use this file except in compliance with the License. 5// You may obtain a copy of the License at 6// 7// http://www.apache.org/licenses/LICENSE-2.0 8// 9// Unless required by applicable law or agreed to in writing, software 10// distributed under the License is distributed on an "AS-IS" BASIS, 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12// See the License for the specific language governing permissions and 13// limitations under the License. 14 15/** 16 * @fileoverview Utilities for string manipulation. 17 */ 18 19 20/** 21 * Namespace for string utilities 22 */ 23goog.provide('goog.string'); 24goog.provide('goog.string.Unicode'); 25 26 27/** 28 * Common Unicode string characters. 29 * @enum {string} 30 */ 31goog.string.Unicode = { 32 NBSP: '\xa0' 33}; 34 35 36/** 37 * Fast prefix-checker. 38 * @param {string} str The string to check. 39 * @param {string} prefix A string to look for at the start of {@code str}. 40 * @return {boolean} True if {@code str} begins with {@code prefix}. 41 */ 42goog.string.startsWith = function(str, prefix) { 43 return str.lastIndexOf(prefix, 0) == 0; 44}; 45 46 47/** 48 * Fast suffix-checker. 49 * @param {string} str The string to check. 50 * @param {string} suffix A string to look for at the end of {@code str}. 51 * @return {boolean} True if {@code str} ends with {@code suffix}. 52 */ 53goog.string.endsWith = function(str, suffix) { 54 var l = str.length - suffix.length; 55 return l >= 0 && str.indexOf(suffix, l) == l; 56}; 57 58 59/** 60 * Case-insensitive prefix-checker. 61 * @param {string} str The string to check. 62 * @param {string} prefix A string to look for at the end of {@code str}. 63 * @return {boolean} True if {@code str} begins with {@code prefix} (ignoring 64 * case). 65 */ 66goog.string.caseInsensitiveStartsWith = function(str, prefix) { 67 return goog.string.caseInsensitiveCompare( 68 prefix, str.substr(0, prefix.length)) == 0; 69}; 70 71 72/** 73 * Case-insensitive suffix-checker. 74 * @param {string} str The string to check. 75 * @param {string} suffix A string to look for at the end of {@code str}. 76 * @return {boolean} True if {@code str} ends with {@code suffix} (ignoring 77 * case). 78 */ 79goog.string.caseInsensitiveEndsWith = function(str, suffix) { 80 return goog.string.caseInsensitiveCompare( 81 suffix, str.substr(str.length - suffix.length, suffix.length)) == 0; 82}; 83 84 85/** 86 * Does simple python-style string substitution. 87 * subs("foo%s hot%s", "bar", "dog") becomes "foobar hotdog". 88 * @param {string} str The string containing the pattern. 89 * @param {...*} var_args The items to substitute into the pattern. 90 * @return {string} A copy of {@code str} in which each occurrence of 91 * {@code %s} has been replaced an argument from {@code var_args}. 92 */ 93goog.string.subs = function(str, var_args) { 94 // This appears to be slow, but testing shows it compares more or less 95 // equivalent to the regex.exec method. 96 for (var i = 1; i < arguments.length; i++) { 97 // We cast to String in case an argument is a Function. Replacing $&, for 98 // example, with $$$& stops the replace from subsituting the whole match 99 // into the resultant string. $$$& in the first replace becomes $$& in the 100 // second, which leaves $& in the resultant string. Also: 101 // $$, $`, $', $n $nn 102 var replacement = String(arguments[i]).replace(/\$/g, '$$$$'); 103 str = str.replace(/\%s/, replacement); 104 } 105 return str; 106}; 107 108 109/** 110 * Converts multiple whitespace chars (spaces, non-breaking-spaces, new lines 111 * and tabs) to a single space, and strips leading and trailing whitespace. 112 * @param {string} str Input string. 113 * @return {string} A copy of {@code str} with collapsed whitespace. 114 */ 115goog.string.collapseWhitespace = function(str) { 116 // Since IE doesn't include non-breaking-space (0xa0) in their \s character 117 // class (as required by section 7.2 of the ECMAScript spec), we explicitly 118 // include it in the regexp to enforce consistent cross-browser behavior. 119 return str.replace(/[\s\xa0]+/g, ' ').replace(/^\s+|\s+$/g, ''); 120}; 121 122 123/** 124 * Checks if a string is empty or contains only whitespaces. 125 * @param {string} str The string to check. 126 * @return {boolean} True if {@code str} is empty or whitespace only. 127 */ 128goog.string.isEmpty = function(str) { 129 // testing length == 0 first is actually slower in all browsers (about the 130 // same in Opera). 131 // Since IE doesn't include non-breaking-space (0xa0) in their \s character 132 // class (as required by section 7.2 of the ECMAScript spec), we explicitly 133 // include it in the regexp to enforce consistent cross-browser behavior. 134 return /^[\s\xa0]*$/.test(str); 135}; 136 137 138/** 139 * Checks if a string is null, empty or contains only whitespaces. 140 * @param {*} str The string to check. 141 * @return {boolean} True if{@code str} is null, empty, or whitespace only. 142 */ 143goog.string.isEmptySafe = function(str) { 144 return goog.string.isEmpty(goog.string.makeSafe(str)); 145}; 146 147 148/** 149 * Checks if a string is all breaking whitespace. 150 * @param {string} str The string to check. 151 * @return {boolean} Whether the string is all breaking whitespace. 152 */ 153goog.string.isBreakingWhitespace = function(str) { 154 return !/[^\t\n\r ]/.test(str); 155}; 156 157 158/** 159 * Checks if a string contains all letters. 160 * @param {string} str string to check. 161 * @return {boolean} True if {@code str} consists entirely of letters. 162 */ 163goog.string.isAlpha = function(str) { 164 return !/[^a-zA-Z]/.test(str); 165}; 166 167 168/** 169 * Checks if a string contains only numbers. 170 * @param {*} str string to check. If not a string, it will be 171 * casted to one. 172 * @return {boolean} True if {@code str} is numeric. 173 */ 174goog.string.isNumeric = function(str) { 175 return !/[^0-9]/.test(str); 176}; 177 178 179/** 180 * Checks if a string contains only numbers or letters. 181 * @param {string} str string to check. 182 * @return {boolean} True if {@code str} is alphanumeric. 183 */ 184goog.string.isAlphaNumeric = function(str) { 185 return !/[^a-zA-Z0-9]/.test(str); 186}; 187 188 189/** 190 * Checks if a character is a space character. 191 * @param {string} ch Character to check. 192 * @return {boolean} True if {code ch} is a space. 193 */ 194goog.string.isSpace = function(ch) { 195 return ch == ' '; 196}; 197 198 199/** 200 * Checks if a character is a valid unicode character. 201 * @param {string} ch Character to check. 202 * @return {boolean} True if {code ch} is a valid unicode character. 203 */ 204goog.string.isUnicodeChar = function(ch) { 205 return ch.length == 1 && ch >= ' ' && ch <= '~' || 206 ch >= '\u0080' && ch <= '\uFFFD'; 207}; 208 209 210/** 211 * Takes a string and replaces newlines with a space. Multiple lines are 212 * replaced with a single space. 213 * @param {string} str The string from which to strip newlines. 214 * @return {string} A copy of {@code str} stripped of newlines. 215 */ 216goog.string.stripNewlines = function(str) { 217 return str.replace(/(\r\n|\r|\n)+/g, ' '); 218}; 219 220 221/** 222 * Replaces Windows and Mac new lines with unix style: \r or \r\n with \n. 223 * @param {string} str The string to in which to canonicalize newlines. 224 * @return {string} {@code str} A copy of {@code} with canonicalized newlines. 225 */ 226goog.string.canonicalizeNewlines = function(str) { 227 return str.replace(/(\r\n|\r|\n)/g, '\n'); 228}; 229 230 231/** 232 * Normalizes whitespace in a string, replacing all whitespace chars with 233 * a space. 234 * @param {string} str The string in which to normalize whitespace. 235 * @return {string} A copy of {@code str} with all whitespace normalized. 236 */ 237goog.string.normalizeWhitespace = function(str) { 238 return str.replace(/\xa0|\s/g, ' '); 239}; 240 241 242/** 243 * Normalizes spaces in a string, replacing all consecutive spaces and tabs 244 * with a single space. Replaces non-breaking space with a space. 245 * @param {string} str The string in which to normalize spaces. 246 * @return {string} A copy of {@code str} with all consecutive spaces and tabs 247 * replaced with a single space. 248 */ 249goog.string.normalizeSpaces = function(str) { 250 return str.replace(/\xa0|[ \t]+/g, ' '); 251}; 252 253 254/** 255 * Removes the breaking spaces from the left and right of the string and 256 * collapses the sequences of breaking spaces in the middle into single spaces. 257 * The original and the result strings render the same way in HTML. 258 * @param {string} str A string in which to collapse spaces. 259 * @return {string} Copy of the string with normalized breaking spaces. 260 */ 261goog.string.collapseBreakingSpaces = function(str) { 262 return str.replace(/[\t\r\n ]+/g, ' ').replace( 263 /^[\t\r\n ]+|[\t\r\n ]+$/g, ''); 264}; 265 266 267/** 268 * Trims white spaces to the left and right of a string. 269 * @param {string} str The string to trim. 270 * @return {string} A trimmed copy of {@code str}. 271 */ 272goog.string.trim = function(str) { 273 // Since IE doesn't include non-breaking-space (0xa0) in their \s character 274 // class (as required by section 7.2 of the ECMAScript spec), we explicitly 275 // include it in the regexp to enforce consistent cross-browser behavior. 276 return str.replace(/^[\s\xa0]+|[\s\xa0]+$/g, ''); 277}; 278 279 280/** 281 * Trims whitespaces at the left end of a string. 282 * @param {string} str The string to left trim. 283 * @return {string} A trimmed copy of {@code str}. 284 */ 285goog.string.trimLeft = function(str) { 286 // Since IE doesn't include non-breaking-space (0xa0) in their \s character 287 // class (as required by section 7.2 of the ECMAScript spec), we explicitly 288 // include it in the regexp to enforce consistent cross-browser behavior. 289 return str.replace(/^[\s\xa0]+/, ''); 290}; 291 292 293/** 294 * Trims whitespaces at the right end of a string. 295 * @param {string} str The string to right trim. 296 * @return {string} A trimmed copy of {@code str}. 297 */ 298goog.string.trimRight = function(str) { 299 // Since IE doesn't include non-breaking-space (0xa0) in their \s character 300 // class (as required by section 7.2 of the ECMAScript spec), we explicitly 301 // include it in the regexp to enforce consistent cross-browser behavior. 302 return str.replace(/[\s\xa0]+$/, ''); 303}; 304 305 306/** 307 * A string comparator that ignores case. 308 * -1 = str1 less than str2 309 * 0 = str1 equals str2 310 * 1 = str1 greater than str2 311 * 312 * @param {string} str1 The string to compare. 313 * @param {string} str2 The string to compare {@code str1} to. 314 * @return {number} The comparator result, as described above. 315 */ 316goog.string.caseInsensitiveCompare = function(str1, str2) { 317 var test1 = String(str1).toLowerCase(); 318 var test2 = String(str2).toLowerCase(); 319 320 if (test1 < test2) { 321 return -1; 322 } else if (test1 == test2) { 323 return 0; 324 } else { 325 return 1; 326 } 327}; 328 329 330/** 331 * Regular expression used for splitting a string into substrings of fractional 332 * numbers, integers, and non-numeric characters. 333 * @type {RegExp} 334 * @private 335 */ 336goog.string.numerateCompareRegExp_ = /(\.\d+)|(\d+)|(\D+)/g; 337 338 339/** 340 * String comparison function that handles numbers in a way humans might expect. 341 * Using this function, the string "File 2.jpg" sorts before "File 10.jpg". The 342 * comparison is mostly case-insensitive, though strings that are identical 343 * except for case are sorted with the upper-case strings before lower-case. 344 * 345 * This comparison function is significantly slower (about 500x) than either 346 * the default or the case-insensitive compare. It should not be used in 347 * time-critical code, but should be fast enough to sort several hundred short 348 * strings (like filenames) with a reasonable delay. 349 * 350 * @param {string} str1 The string to compare in a numerically sensitive way. 351 * @param {string} str2 The string to compare {@code str1} to. 352 * @return {number} less than 0 if str1 < str2, 0 if str1 == str2, greater than 353 * 0 if str1 > str2. 354 */ 355goog.string.numerateCompare = function(str1, str2) { 356 if (str1 == str2) { 357 return 0; 358 } 359 if (!str1) { 360 return -1; 361 } 362 if (!str2) { 363 return 1; 364 } 365 366 // Using match to split the entire string ahead of time turns out to be faster 367 // for most inputs than using RegExp.exec or iterating over each character. 368 var tokens1 = str1.toLowerCase().match(goog.string.numerateCompareRegExp_); 369 var tokens2 = str2.toLowerCase().match(goog.string.numerateCompareRegExp_); 370 371 var count = Math.min(tokens1.length, tokens2.length); 372 373 for (var i = 0; i < count; i++) { 374 var a = tokens1[i]; 375 var b = tokens2[i]; 376 377 // Compare pairs of tokens, returning if one token sorts before the other. 378 if (a != b) { 379 380 // Only if both tokens are integers is a special comparison required. 381 // Decimal numbers are sorted as strings (e.g., '.09' < '.1'). 382 var num1 = parseInt(a, 10); 383 if (!isNaN(num1)) { 384 var num2 = parseInt(b, 10); 385 if (!isNaN(num2) && num1 - num2) { 386 return num1 - num2; 387 } 388 } 389 return a < b ? -1 : 1; 390 } 391 } 392 393 // If one string is a substring of the other, the shorter string sorts first. 394 if (tokens1.length != tokens2.length) { 395 return tokens1.length - tokens2.length; 396 } 397 398 // The two strings must be equivalent except for case (perfect equality is 399 // tested at the head of the function.) Revert to default ASCII-betical string 400 // comparison to stablize the sort. 401 return str1 < str2 ? -1 : 1; 402}; 403 404 405/** 406 * Regular expression used for determining if a string needs to be encoded. 407 * @type {RegExp} 408 * @private 409 */ 410goog.string.encodeUriRegExp_ = /^[a-zA-Z0-9\-_.!~*'()]*$/; 411 412 413/** 414 * URL-encodes a string 415 * @param {*} str The string to url-encode. 416 * @return {string} An encoded copy of {@code str} that is safe for urls. 417 * Note that '#', ':', and other characters used to delimit portions 418 * of URLs *will* be encoded. 419 */ 420goog.string.urlEncode = function(str) { 421 str = String(str); 422 // Checking if the search matches before calling encodeURIComponent avoids an 423 // extra allocation in IE6. This adds about 10us time in FF and a similiar 424 // over head in IE6 for lower working set apps, but for large working set 425 // apps like Gmail, it saves about 70us per call. 426 if (!goog.string.encodeUriRegExp_.test(str)) { 427 return encodeURIComponent(str); 428 } 429 return str; 430}; 431 432 433/** 434 * URL-decodes the string. We need to specially handle '+'s because 435 * the javascript library doesn't convert them to spaces. 436 * @param {string} str The string to url decode. 437 * @return {string} The decoded {@code str}. 438 */ 439goog.string.urlDecode = function(str) { 440 return decodeURIComponent(str.replace(/\+/g, ' ')); 441}; 442 443 444/** 445 * Converts \n to <br>s or <br />s. 446 * @param {string} str The string in which to convert newlines. 447 * @param {boolean=} opt_xml Whether to use XML compatible tags. 448 * @return {string} A copy of {@code str} with converted newlines. 449 */ 450goog.string.newLineToBr = function(str, opt_xml) { 451 return str.replace(/(\r\n|\r|\n)/g, opt_xml ? '<br />' : '<br>'); 452}; 453 454 455/** 456 * Escape double quote '"' characters in addition to '&', '<', and '>' so that a 457 * string can be included in an HTML tag attribute value within double quotes. 458 * 459 * It should be noted that > doesn't need to be escaped for the HTML or XML to 460 * be valid, but it has been decided to escape it for consistency with other 461 * implementations. 462 * 463 * NOTE(user): 464 * HtmlEscape is often called during the generation of large blocks of HTML. 465 * Using statics for the regular expressions and strings is an optimization 466 * that can more than half the amount of time IE spends in this function for 467 * large apps, since strings and regexes both contribute to GC allocations. 468 * 469 * Testing for the presence of a character before escaping increases the number 470 * of function calls, but actually provides a speed increase for the average 471 * case -- since the average case often doesn't require the escaping of all 4 472 * characters and indexOf() is much cheaper than replace(). 473 * The worst case does suffer slightly from the additional calls, therefore the 474 * opt_isLikelyToContainHtmlChars option has been included for situations 475 * where all 4 HTML entities are very likely to be present and need escaping. 476 * 477 * Some benchmarks (times tended to fluctuate +-0.05ms): 478 * FireFox IE6 479 * (no chars / average (mix of cases) / all 4 chars) 480 * no checks 0.13 / 0.22 / 0.22 0.23 / 0.53 / 0.80 481 * indexOf 0.08 / 0.17 / 0.26 0.22 / 0.54 / 0.84 482 * indexOf + re test 0.07 / 0.17 / 0.28 0.19 / 0.50 / 0.85 483 * 484 * An additional advantage of checking if replace actually needs to be called 485 * is a reduction in the number of object allocations, so as the size of the 486 * application grows the difference between the various methods would increase. 487 * 488 * @param {string} str string to be escaped. 489 * @param {boolean=} opt_isLikelyToContainHtmlChars Don't perform a check to see 490 * if the character needs replacing - use this option if you expect each of 491 * the characters to appear often. Leave false if you expect few html 492 * characters to occur in your strings, such as if you are escaping HTML. 493 * @return {string} An escaped copy of {@code str}. 494 */ 495goog.string.htmlEscape = function(str, opt_isLikelyToContainHtmlChars) { 496 497 if (opt_isLikelyToContainHtmlChars) { 498 return str.replace(goog.string.amperRe_, '&') 499 .replace(goog.string.ltRe_, '<') 500 .replace(goog.string.gtRe_, '>') 501 .replace(goog.string.quotRe_, '"'); 502 503 } else { 504 // quick test helps in the case when there are no chars to replace, in 505 // worst case this makes barely a difference to the time taken 506 if (!goog.string.allRe_.test(str)) return str; 507 508 // str.indexOf is faster than regex.test in this case 509 if (str.indexOf('&') != -1) { 510 str = str.replace(goog.string.amperRe_, '&'); 511 } 512 if (str.indexOf('<') != -1) { 513 str = str.replace(goog.string.ltRe_, '<'); 514 } 515 if (str.indexOf('>') != -1) { 516 str = str.replace(goog.string.gtRe_, '>'); 517 } 518 if (str.indexOf('"') != -1) { 519 str = str.replace(goog.string.quotRe_, '"'); 520 } 521 return str; 522 } 523}; 524 525 526/** 527 * Regular expression that matches an ampersand, for use in escaping. 528 * @type {RegExp} 529 * @private 530 */ 531goog.string.amperRe_ = /&/g; 532 533 534/** 535 * Regular expression that matches a less than sign, for use in escaping. 536 * @type {RegExp} 537 * @private 538 */ 539goog.string.ltRe_ = /</g; 540 541 542/** 543 * Regular expression that matches a greater than sign, for use in escaping. 544 * @type {RegExp} 545 * @private 546 */ 547goog.string.gtRe_ = />/g; 548 549 550/** 551 * Regular expression that matches a double quote, for use in escaping. 552 * @type {RegExp} 553 * @private 554 */ 555goog.string.quotRe_ = /\"/g; 556 557 558/** 559 * Regular expression that matches any character that needs to be escaped. 560 * @type {RegExp} 561 * @private 562 */ 563goog.string.allRe_ = /[&<>\"]/; 564 565 566/** 567 * Unescapes an HTML string. 568 * 569 * @param {string} str The string to unescape. 570 * @return {string} An unescaped copy of {@code str}. 571 */ 572goog.string.unescapeEntities = function(str) { 573 if (goog.string.contains(str, '&')) { 574 // We are careful not to use a DOM if we do not have one. We use the [] 575 // notation so that the JSCompiler will not complain about these objects and 576 // fields in the case where we have no DOM. 577 if ('document' in goog.global) { 578 return goog.string.unescapeEntitiesUsingDom_(str); 579 } else { 580 // Fall back on pure XML entities 581 return goog.string.unescapePureXmlEntities_(str); 582 } 583 } 584 return str; 585}; 586 587 588/** 589 * Unescapes an HTML string using a DOM to resolve non-XML, non-numeric 590 * entities. This function is XSS-safe and whitespace-preserving. 591 * @private 592 * @param {string} str The string to unescape. 593 * @return {string} The unescaped {@code str} string. 594 */ 595goog.string.unescapeEntitiesUsingDom_ = function(str) { 596 var seen = {'&': '&', '<': '<', '>': '>', '"': '"'}; 597 var div = document.createElement('div'); 598 // Match as many valid entity characters as possible. If the actual entity 599 // happens to be shorter, it will still work as innerHTML will return the 600 // trailing characters unchanged. Since the entity characters do not include 601 // open angle bracket, there is no chance of XSS from the innerHTML use. 602 // Since no whitespace is passed to innerHTML, whitespace is preserved. 603 return str.replace(goog.string.HTML_ENTITY_PATTERN_, function(s, entity) { 604 // Check for cached entity. 605 var value = seen[s]; 606 if (value) { 607 return value; 608 } 609 // Check for numeric entity. 610 if (entity.charAt(0) == '#') { 611 // Prefix with 0 so that hex entities (e.g. ) parse as hex numbers. 612 var n = Number('0' + entity.substr(1)); 613 if (!isNaN(n)) { 614 value = String.fromCharCode(n); 615 } 616 } 617 // Fall back to innerHTML otherwise. 618 if (!value) { 619 // Append a non-entity character to avoid a bug in Webkit that parses 620 // an invalid entity at the end of innerHTML text as the empty string. 621 div.innerHTML = s + ' '; 622 // Then remove the trailing character from the result. 623 value = div.firstChild.nodeValue.slice(0, -1); 624 } 625 // Cache and return. 626 return seen[s] = value; 627 }); 628}; 629 630 631/** 632 * Unescapes XML entities. 633 * @private 634 * @param {string} str The string to unescape. 635 * @return {string} An unescaped copy of {@code str}. 636 */ 637goog.string.unescapePureXmlEntities_ = function(str) { 638 return str.replace(/&([^;]+);/g, function(s, entity) { 639 switch (entity) { 640 case 'amp': 641 return '&'; 642 case 'lt': 643 return '<'; 644 case 'gt': 645 return '>'; 646 case 'quot': 647 return '"'; 648 default: 649 if (entity.charAt(0) == '#') { 650 // Prefix with 0 so that hex entities (e.g. ) parse as hex. 651 var n = Number('0' + entity.substr(1)); 652 if (!isNaN(n)) { 653 return String.fromCharCode(n); 654 } 655 } 656 // For invalid entities we just return the entity 657 return s; 658 } 659 }); 660}; 661 662 663/** 664 * Regular expression that matches an HTML entity. 665 * See also HTML5: Tokenization / Tokenizing character references. 666 * @private 667 * @type {!RegExp} 668 */ 669goog.string.HTML_ENTITY_PATTERN_ = /&([^;\s<&]+);?/g; 670 671 672/** 673 * Do escaping of whitespace to preserve spatial formatting. We use character 674 * entity #160 to make it safer for xml. 675 * @param {string} str The string in which to escape whitespace. 676 * @param {boolean=} opt_xml Whether to use XML compatible tags. 677 * @return {string} An escaped copy of {@code str}. 678 */ 679goog.string.whitespaceEscape = function(str, opt_xml) { 680 return goog.string.newLineToBr(str.replace(/ /g, '  '), opt_xml); 681}; 682 683 684/** 685 * Strip quote characters around a string. The second argument is a string of 686 * characters to treat as quotes. This can be a single character or a string of 687 * multiple character and in that case each of those are treated as possible 688 * quote characters. For example: 689 * 690 * <pre> 691 * goog.string.stripQuotes('"abc"', '"`') --> 'abc' 692 * goog.string.stripQuotes('`abc`', '"`') --> 'abc' 693 * </pre> 694 * 695 * @param {string} str The string to strip. 696 * @param {string} quoteChars The quote characters to strip. 697 * @return {string} A copy of {@code str} without the quotes. 698 */ 699goog.string.stripQuotes = function(str, quoteChars) { 700 var length = quoteChars.length; 701 for (var i = 0; i < length; i++) { 702 var quoteChar = length == 1 ? quoteChars : quoteChars.charAt(i); 703 if (str.charAt(0) == quoteChar && str.charAt(str.length - 1) == quoteChar) { 704 return str.substring(1, str.length - 1); 705 } 706 } 707 return str; 708}; 709 710 711/** 712 * Truncates a string to a certain length and adds '...' if necessary. The 713 * length also accounts for the ellipsis, so a maximum length of 10 and a string 714 * 'Hello World!' produces 'Hello W...'. 715 * @param {string} str The string to truncate. 716 * @param {number} chars Max number of characters. 717 * @param {boolean=} opt_protectEscapedCharacters Whether to protect escaped 718 * characters from being cut off in the middle. 719 * @return {string} The truncated {@code str} string. 720 */ 721goog.string.truncate = function(str, chars, opt_protectEscapedCharacters) { 722 if (opt_protectEscapedCharacters) { 723 str = goog.string.unescapeEntities(str); 724 } 725 726 if (str.length > chars) { 727 str = str.substring(0, chars - 3) + '...'; 728 } 729 730 if (opt_protectEscapedCharacters) { 731 str = goog.string.htmlEscape(str); 732 } 733 734 return str; 735}; 736 737 738/** 739 * Truncate a string in the middle, adding "..." if necessary, 740 * and favoring the beginning of the string. 741 * @param {string} str The string to truncate the middle of. 742 * @param {number} chars Max number of characters. 743 * @param {boolean=} opt_protectEscapedCharacters Whether to protect escaped 744 * characters from being cutoff in the middle. 745 * @param {number=} opt_trailingChars Optional number of trailing characters to 746 * leave at the end of the string, instead of truncating as close to the 747 * middle as possible. 748 * @return {string} A truncated copy of {@code str}. 749 */ 750goog.string.truncateMiddle = function(str, chars, 751 opt_protectEscapedCharacters, opt_trailingChars) { 752 if (opt_protectEscapedCharacters) { 753 str = goog.string.unescapeEntities(str); 754 } 755 756 if (opt_trailingChars && str.length > chars) { 757 if (opt_trailingChars > chars) { 758 opt_trailingChars = chars; 759 } 760 var endPoint = str.length - opt_trailingChars; 761 var startPoint = chars - opt_trailingChars; 762 str = str.substring(0, startPoint) + '...' + str.substring(endPoint); 763 } else if (str.length > chars) { 764 // Favor the beginning of the string: 765 var half = Math.floor(chars / 2); 766 var endPos = str.length - half; 767 half += chars % 2; 768 str = str.substring(0, half) + '...' + str.substring(endPos); 769 } 770 771 if (opt_protectEscapedCharacters) { 772 str = goog.string.htmlEscape(str); 773 } 774 775 return str; 776}; 777 778 779/** 780 * Special chars that need to be escaped for goog.string.quote. 781 * @private 782 * @type {Object} 783 */ 784goog.string.specialEscapeChars_ = { 785 '\0': '\\0', 786 '\b': '\\b', 787 '\f': '\\f', 788 '\n': '\\n', 789 '\r': '\\r', 790 '\t': '\\t', 791 '\x0B': '\\x0B', // '\v' is not supported in JScript 792 '"': '\\"', 793 '\\': '\\\\' 794}; 795 796 797/** 798 * Character mappings used internally for goog.string.escapeChar. 799 * @private 800 * @type {Object} 801 */ 802goog.string.jsEscapeCache_ = { 803 '\'': '\\\'' 804}; 805 806 807/** 808 * Encloses a string in double quotes and escapes characters so that the 809 * string is a valid JS string. 810 * @param {string} s The string to quote. 811 * @return {string} A copy of {@code s} surrounded by double quotes. 812 */ 813goog.string.quote = function(s) { 814 s = String(s); 815 if (s.quote) { 816 return s.quote(); 817 } else { 818 var sb = ['"']; 819 for (var i = 0; i < s.length; i++) { 820 var ch = s.charAt(i); 821 var cc = ch.charCodeAt(0); 822 sb[i + 1] = goog.string.specialEscapeChars_[ch] || 823 ((cc > 31 && cc < 127) ? ch : goog.string.escapeChar(ch)); 824 } 825 sb.push('"'); 826 return sb.join(''); 827 } 828}; 829 830 831/** 832 * Takes a string and returns the escaped string for that character. 833 * @param {string} str The string to escape. 834 * @return {string} An escaped string representing {@code str}. 835 */ 836goog.string.escapeString = function(str) { 837 var sb = []; 838 for (var i = 0; i < str.length; i++) { 839 sb[i] = goog.string.escapeChar(str.charAt(i)); 840 } 841 return sb.join(''); 842}; 843 844 845/** 846 * Takes a character and returns the escaped string for that character. For 847 * example escapeChar(String.fromCharCode(15)) -> "\\x0E". 848 * @param {string} c The character to escape. 849 * @return {string} An escaped string representing {@code c}. 850 */ 851goog.string.escapeChar = function(c) { 852 if (c in goog.string.jsEscapeCache_) { 853 return goog.string.jsEscapeCache_[c]; 854 } 855 856 if (c in goog.string.specialEscapeChars_) { 857 return goog.string.jsEscapeCache_[c] = goog.string.specialEscapeChars_[c]; 858 } 859 860 var rv = c; 861 var cc = c.charCodeAt(0); 862 if (cc > 31 && cc < 127) { 863 rv = c; 864 } else { 865 // tab is 9 but handled above 866 if (cc < 256) { 867 rv = '\\x'; 868 if (cc < 16 || cc > 256) { 869 rv += '0'; 870 } 871 } else { 872 rv = '\\u'; 873 if (cc < 4096) { // \u1000 874 rv += '0'; 875 } 876 } 877 rv += cc.toString(16).toUpperCase(); 878 } 879 880 return goog.string.jsEscapeCache_[c] = rv; 881}; 882 883 884/** 885 * Takes a string and creates a map (Object) in which the keys are the 886 * characters in the string. The value for the key is set to true. You can 887 * then use goog.object.map or goog.array.map to change the values. 888 * @param {string} s The string to build the map from. 889 * @return {Object} The map of characters used. 890 */ 891// TODO(arv): It seems like we should have a generic goog.array.toMap. But do 892// we want a dependency on goog.array in goog.string? 893goog.string.toMap = function(s) { 894 var rv = {}; 895 for (var i = 0; i < s.length; i++) { 896 rv[s.charAt(i)] = true; 897 } 898 return rv; 899}; 900 901 902/** 903 * Checks whether a string contains a given character. 904 * @param {string} s The string to test. 905 * @param {string} ss The substring to test for. 906 * @return {boolean} True if {@code s} contains {@code ss}. 907 */ 908goog.string.contains = function(s, ss) { 909 return s.indexOf(ss) != -1; 910}; 911 912 913/** 914 * Removes a substring of a specified length at a specific 915 * index in a string. 916 * @param {string} s The base string from which to remove. 917 * @param {number} index The index at which to remove the substring. 918 * @param {number} stringLength The length of the substring to remove. 919 * @return {string} A copy of {@code s} with the substring removed or the full 920 * string if nothing is removed or the input is invalid. 921 */ 922goog.string.removeAt = function(s, index, stringLength) { 923 var resultStr = s; 924 // If the index is greater or equal to 0 then remove substring 925 if (index >= 0 && index < s.length && stringLength > 0) { 926 resultStr = s.substr(0, index) + 927 s.substr(index + stringLength, s.length - index - stringLength); 928 } 929 return resultStr; 930}; 931 932 933/** 934 * Removes the first occurrence of a substring from a string. 935 * @param {string} s The base string from which to remove. 936 * @param {string} ss The string to remove. 937 * @return {string} A copy of {@code s} with {@code ss} removed or the full 938 * string if nothing is removed. 939 */ 940goog.string.remove = function(s, ss) { 941 var re = new RegExp(goog.string.regExpEscape(ss), ''); 942 return s.replace(re, ''); 943}; 944 945 946/** 947 * Removes all occurrences of a substring from a string. 948 * @param {string} s The base string from which to remove. 949 * @param {string} ss The string to remove. 950 * @return {string} A copy of {@code s} with {@code ss} removed or the full 951 * string if nothing is removed. 952 */ 953goog.string.removeAll = function(s, ss) { 954 var re = new RegExp(goog.string.regExpEscape(ss), 'g'); 955 return s.replace(re, ''); 956}; 957 958 959/** 960 * Escapes characters in the string that are not safe to use in a RegExp. 961 * @param {*} s The string to escape. If not a string, it will be casted 962 * to one. 963 * @return {string} A RegExp safe, escaped copy of {@code s}. 964 */ 965goog.string.regExpEscape = function(s) { 966 return String(s).replace(/([-()\[\]{}+?*.$\^|,:#<!\\])/g, '\\$1'). 967 replace(/\x08/g, '\\x08'); 968}; 969 970 971/** 972 * Repeats a string n times. 973 * @param {string} string The string to repeat. 974 * @param {number} length The number of times to repeat. 975 * @return {string} A string containing {@code length} repetitions of 976 * {@code string}. 977 */ 978goog.string.repeat = function(string, length) { 979 return new Array(length + 1).join(string); 980}; 981 982 983/** 984 * Pads number to given length and optionally rounds it to a given precision. 985 * For example: 986 * <pre>padNumber(1.25, 2, 3) -> '01.250' 987 * padNumber(1.25, 2) -> '01.25' 988 * padNumber(1.25, 2, 1) -> '01.3' 989 * padNumber(1.25, 0) -> '1.25'</pre> 990 * 991 * @param {number} num The number to pad. 992 * @param {number} length The desired length. 993 * @param {number=} opt_precision The desired precision. 994 * @return {string} {@code num} as a string with the given options. 995 */ 996goog.string.padNumber = function(num, length, opt_precision) { 997 var s = goog.isDef(opt_precision) ? num.toFixed(opt_precision) : String(num); 998 var index = s.indexOf('.'); 999 if (index == -1) { 1000 index = s.length; 1001 } 1002 return goog.string.repeat('0', Math.max(0, length - index)) + s; 1003}; 1004 1005 1006/** 1007 * Returns a string representation of the given object, with 1008 * null and undefined being returned as the empty string. 1009 * 1010 * @param {*} obj The object to convert. 1011 * @return {string} A string representation of the {@code obj}. 1012 */ 1013goog.string.makeSafe = function(obj) { 1014 return obj == null ? '' : String(obj); 1015}; 1016 1017 1018/** 1019 * Concatenates string expressions. This is useful 1020 * since some browsers are very inefficient when it comes to using plus to 1021 * concat strings. Be careful when using null and undefined here since 1022 * these will not be included in the result. If you need to represent these 1023 * be sure to cast the argument to a String first. 1024 * For example: 1025 * <pre>buildString('a', 'b', 'c', 'd') -> 'abcd' 1026 * buildString(null, undefined) -> '' 1027 * </pre> 1028 * @param {...*} var_args A list of strings to concatenate. If not a string, 1029 * it will be casted to one. 1030 * @return {string} The concatenation of {@code var_args}. 1031 */ 1032goog.string.buildString = function(var_args) { 1033 return Array.prototype.join.call(arguments, ''); 1034}; 1035 1036 1037/** 1038 * Returns a string with at least 64-bits of randomness. 1039 * 1040 * Doesn't trust Javascript's random function entirely. Uses a combination of 1041 * random and current timestamp, and then encodes the string in base-36 to 1042 * make it shorter. 1043 * 1044 * @return {string} A random string, e.g. sn1s7vb4gcic. 1045 */ 1046goog.string.getRandomString = function() { 1047 var x = 2147483648; 1048 return Math.floor(Math.random() * x).toString(36) + 1049 Math.abs(Math.floor(Math.random() * x) ^ goog.now()).toString(36); 1050}; 1051 1052 1053/** 1054 * Compares two version numbers. 1055 * 1056 * @param {string|number} version1 Version of first item. 1057 * @param {string|number} version2 Version of second item. 1058 * 1059 * @return {number} 1 if {@code version1} is higher. 1060 * 0 if arguments are equal. 1061 * -1 if {@code version2} is higher. 1062 */ 1063goog.string.compareVersions = function(version1, version2) { 1064 var order = 0; 1065 // Trim leading and trailing whitespace and split the versions into 1066 // subversions. 1067 var v1Subs = goog.string.trim(String(version1)).split('.'); 1068 var v2Subs = goog.string.trim(String(version2)).split('.'); 1069 var subCount = Math.max(v1Subs.length, v2Subs.length); 1070 1071 // Iterate over the subversions, as long as they appear to be equivalent. 1072 for (var subIdx = 0; order == 0 && subIdx < subCount; subIdx++) { 1073 var v1Sub = v1Subs[subIdx] || ''; 1074 var v2Sub = v2Subs[subIdx] || ''; 1075 1076 // Split the subversions into pairs of numbers and qualifiers (like 'b'). 1077 // Two different RegExp objects are needed because they are both using 1078 // the 'g' flag. 1079 var v1CompParser = new RegExp('(\\d*)(\\D*)', 'g'); 1080 var v2CompParser = new RegExp('(\\d*)(\\D*)', 'g'); 1081 do { 1082 var v1Comp = v1CompParser.exec(v1Sub) || ['', '', '']; 1083 var v2Comp = v2CompParser.exec(v2Sub) || ['', '', '']; 1084 // Break if there are no more matches. 1085 if (v1Comp[0].length == 0 && v2Comp[0].length == 0) { 1086 break; 1087 } 1088 1089 // Parse the numeric part of the subversion. A missing number is 1090 // equivalent to 0. 1091 var v1CompNum = v1Comp[1].length == 0 ? 0 : parseInt(v1Comp[1], 10); 1092 var v2CompNum = v2Comp[1].length == 0 ? 0 : parseInt(v2Comp[1], 10); 1093 1094 // Compare the subversion components. The number has the highest 1095 // precedence. Next, if the numbers are equal, a subversion without any 1096 // qualifier is always higher than a subversion with any qualifier. Next, 1097 // the qualifiers are compared as strings. 1098 order = goog.string.compareElements_(v1CompNum, v2CompNum) || 1099 goog.string.compareElements_(v1Comp[2].length == 0, 1100 v2Comp[2].length == 0) || 1101 goog.string.compareElements_(v1Comp[2], v2Comp[2]); 1102 // Stop as soon as an inequality is discovered. 1103 } while (order == 0); 1104 } 1105 1106 return order; 1107}; 1108 1109 1110/** 1111 * Compares elements of a version number. 1112 * 1113 * @param {string|number|boolean} left An element from a version number. 1114 * @param {string|number|boolean} right An element from a version number. 1115 * 1116 * @return {number} 1 if {@code left} is higher. 1117 * 0 if arguments are equal. 1118 * -1 if {@code right} is higher. 1119 * @private 1120 */ 1121goog.string.compareElements_ = function(left, right) { 1122 if (left < right) { 1123 return -1; 1124 } else if (left > right) { 1125 return 1; 1126 } 1127 return 0; 1128}; 1129 1130 1131/** 1132 * Maximum value of #goog.string.hashCode, exclusive. 2^32. 1133 * @type {number} 1134 * @private 1135 */ 1136goog.string.HASHCODE_MAX_ = 0x100000000; 1137 1138 1139/** 1140 * String hash function similar to java.lang.String.hashCode(). 1141 * The hash code for a string is computed as 1142 * s[0] * 31 ^ (n - 1) + s[1] * 31 ^ (n - 2) + ... + s[n - 1], 1143 * where s[i] is the ith character of the string and n is the length of 1144 * the string. We mod the result to make it between 0 (inclusive) and 2^32 1145 * (exclusive). 1146 * @param {string} str A string. 1147 * @return {number} Hash value for {@code str}, between 0 (inclusive) and 2^32 1148 * (exclusive). The empty string returns 0. 1149 */ 1150goog.string.hashCode = function(str) { 1151 var result = 0; 1152 for (var i = 0; i < str.length; ++i) { 1153 result = 31 * result + str.charCodeAt(i); 1154 // Normalize to 4 byte range, 0 ... 2^32. 1155 result %= goog.string.HASHCODE_MAX_; 1156 } 1157 return result; 1158}; 1159 1160 1161/** 1162 * The most recent unique ID. |0 is equivalent to Math.floor in this case. 1163 * @type {number} 1164 * @private 1165 */ 1166goog.string.uniqueStringCounter_ = Math.random() * 0x80000000 | 0; 1167 1168 1169/** 1170 * Generates and returns a string which is unique in the current document. 1171 * This is useful, for example, to create unique IDs for DOM elements. 1172 * @return {string} A unique id. 1173 */ 1174goog.string.createUniqueString = function() { 1175 return 'goog_' + goog.string.uniqueStringCounter_++; 1176}; 1177 1178 1179/** 1180 * Converts the supplied string to a number, which may be Ininity or NaN. 1181 * This function strips whitespace: (toNumber(' 123') === 123) 1182 * This function accepts scientific notation: (toNumber('1e1') === 10) 1183 * 1184 * This is better than Javascript's built-in conversions because, sadly: 1185 * (Number(' ') === 0) and (parseFloat('123a') === 123) 1186 * 1187 * @param {string} str The string to convert. 1188 * @return {number} The number the supplied string represents, or NaN. 1189 */ 1190goog.string.toNumber = function(str) { 1191 var num = Number(str); 1192 if (num == 0 && goog.string.isEmpty(str)) { 1193 return NaN; 1194 } 1195 return num; 1196}; 1197 1198 1199/** 1200 * A memoized cache for goog.string.toCamelCase. 1201 * @type {Object.<string>} 1202 * @private 1203 */ 1204goog.string.toCamelCaseCache_ = {}; 1205 1206 1207/** 1208 * Converts a string from selector-case to camelCase (e.g. from 1209 * "multi-part-string" to "multiPartString"), useful for converting 1210 * CSS selectors and HTML dataset keys to their equivalent JS properties. 1211 * @param {string} str The string in selector-case form. 1212 * @return {string} The string in camelCase form. 1213 */ 1214goog.string.toCamelCase = function(str) { 1215 return goog.string.toCamelCaseCache_[str] || 1216 (goog.string.toCamelCaseCache_[str] = 1217 String(str).replace(/\-([a-z])/g, function(all, match) { 1218 return match.toUpperCase(); 1219 })); 1220}; 1221 1222 1223/** 1224 * A memoized cache for goog.string.toSelectorCase. 1225 * @type {Object.<string>} 1226 * @private 1227 */ 1228goog.string.toSelectorCaseCache_ = {}; 1229 1230 1231/** 1232 * Converts a string from camelCase to selector-case (e.g. from 1233 * "multiPartString" to "multi-part-string"), useful for converting JS 1234 * style and dataset properties to equivalent CSS selectors and HTML keys. 1235 * @param {string} str The string in camelCase form. 1236 * @return {string} The string in selector-case form. 1237 */ 1238goog.string.toSelectorCase = function(str) { 1239 return goog.string.toSelectorCaseCache_[str] || 1240 (goog.string.toSelectorCaseCache_[str] = 1241 String(str).replace(/([A-Z])/g, '-$1').toLowerCase()); 1242}; 1243