1// Copyright 2006 The Closure Library Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7//      http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS-IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15/**
16 * @fileoverview Utilities for string manipulation.
17 */
18
19
20/**
21 * Namespace for string utilities
22 */
23goog.provide('goog.string');
24goog.provide('goog.string.Unicode');
25
26
27/**
28 * Common Unicode string characters.
29 * @enum {string}
30 */
31goog.string.Unicode = {
32  NBSP: '\xa0'
33};
34
35
36/**
37 * Fast prefix-checker.
38 * @param {string} str The string to check.
39 * @param {string} prefix A string to look for at the start of {@code str}.
40 * @return {boolean} True if {@code str} begins with {@code prefix}.
41 */
42goog.string.startsWith = function(str, prefix) {
43  return str.lastIndexOf(prefix, 0) == 0;
44};
45
46
47/**
48 * Fast suffix-checker.
49 * @param {string} str The string to check.
50 * @param {string} suffix A string to look for at the end of {@code str}.
51 * @return {boolean} True if {@code str} ends with {@code suffix}.
52 */
53goog.string.endsWith = function(str, suffix) {
54  var l = str.length - suffix.length;
55  return l >= 0 && str.indexOf(suffix, l) == l;
56};
57
58
59/**
60 * Case-insensitive prefix-checker.
61 * @param {string} str The string to check.
62 * @param {string} prefix  A string to look for at the end of {@code str}.
63 * @return {boolean} True if {@code str} begins with {@code prefix} (ignoring
64 *     case).
65 */
66goog.string.caseInsensitiveStartsWith = function(str, prefix) {
67  return goog.string.caseInsensitiveCompare(
68      prefix, str.substr(0, prefix.length)) == 0;
69};
70
71
72/**
73 * Case-insensitive suffix-checker.
74 * @param {string} str The string to check.
75 * @param {string} suffix A string to look for at the end of {@code str}.
76 * @return {boolean} True if {@code str} ends with {@code suffix} (ignoring
77 *     case).
78 */
79goog.string.caseInsensitiveEndsWith = function(str, suffix) {
80  return goog.string.caseInsensitiveCompare(
81      suffix, str.substr(str.length - suffix.length, suffix.length)) == 0;
82};
83
84
85/**
86 * Does simple python-style string substitution.
87 * subs("foo%s hot%s", "bar", "dog") becomes "foobar hotdog".
88 * @param {string} str The string containing the pattern.
89 * @param {...*} var_args The items to substitute into the pattern.
90 * @return {string} A copy of {@code str} in which each occurrence of
91 *     {@code %s} has been replaced an argument from {@code var_args}.
92 */
93goog.string.subs = function(str, var_args) {
94  // This appears to be slow, but testing shows it compares more or less
95  // equivalent to the regex.exec method.
96  for (var i = 1; i < arguments.length; i++) {
97    // We cast to String in case an argument is a Function.  Replacing $&, for
98    // example, with $$$& stops the replace from subsituting the whole match
99    // into the resultant string.  $$$& in the first replace becomes $$& in the
100    //  second, which leaves $& in the resultant string.  Also:
101    // $$, $`, $', $n $nn
102    var replacement = String(arguments[i]).replace(/\$/g, '$$$$');
103    str = str.replace(/\%s/, replacement);
104  }
105  return str;
106};
107
108
109/**
110 * Converts multiple whitespace chars (spaces, non-breaking-spaces, new lines
111 * and tabs) to a single space, and strips leading and trailing whitespace.
112 * @param {string} str Input string.
113 * @return {string} A copy of {@code str} with collapsed whitespace.
114 */
115goog.string.collapseWhitespace = function(str) {
116  // Since IE doesn't include non-breaking-space (0xa0) in their \s character
117  // class (as required by section 7.2 of the ECMAScript spec), we explicitly
118  // include it in the regexp to enforce consistent cross-browser behavior.
119  return str.replace(/[\s\xa0]+/g, ' ').replace(/^\s+|\s+$/g, '');
120};
121
122
123/**
124 * Checks if a string is empty or contains only whitespaces.
125 * @param {string} str The string to check.
126 * @return {boolean} True if {@code str} is empty or whitespace only.
127 */
128goog.string.isEmpty = function(str) {
129  // testing length == 0 first is actually slower in all browsers (about the
130  // same in Opera).
131  // Since IE doesn't include non-breaking-space (0xa0) in their \s character
132  // class (as required by section 7.2 of the ECMAScript spec), we explicitly
133  // include it in the regexp to enforce consistent cross-browser behavior.
134  return /^[\s\xa0]*$/.test(str);
135};
136
137
138/**
139 * Checks if a string is null, empty or contains only whitespaces.
140 * @param {*} str The string to check.
141 * @return {boolean} True if{@code str} is null, empty, or whitespace only.
142 */
143goog.string.isEmptySafe = function(str) {
144  return goog.string.isEmpty(goog.string.makeSafe(str));
145};
146
147
148/**
149 * Checks if a string is all breaking whitespace.
150 * @param {string} str The string to check.
151 * @return {boolean} Whether the string is all breaking whitespace.
152 */
153goog.string.isBreakingWhitespace = function(str) {
154  return !/[^\t\n\r ]/.test(str);
155};
156
157
158/**
159 * Checks if a string contains all letters.
160 * @param {string} str string to check.
161 * @return {boolean} True if {@code str} consists entirely of letters.
162 */
163goog.string.isAlpha = function(str) {
164  return !/[^a-zA-Z]/.test(str);
165};
166
167
168/**
169 * Checks if a string contains only numbers.
170 * @param {*} str string to check. If not a string, it will be
171 *     casted to one.
172 * @return {boolean} True if {@code str} is numeric.
173 */
174goog.string.isNumeric = function(str) {
175  return !/[^0-9]/.test(str);
176};
177
178
179/**
180 * Checks if a string contains only numbers or letters.
181 * @param {string} str string to check.
182 * @return {boolean} True if {@code str} is alphanumeric.
183 */
184goog.string.isAlphaNumeric = function(str) {
185  return !/[^a-zA-Z0-9]/.test(str);
186};
187
188
189/**
190 * Checks if a character is a space character.
191 * @param {string} ch Character to check.
192 * @return {boolean} True if {code ch} is a space.
193 */
194goog.string.isSpace = function(ch) {
195  return ch == ' ';
196};
197
198
199/**
200 * Checks if a character is a valid unicode character.
201 * @param {string} ch Character to check.
202 * @return {boolean} True if {code ch} is a valid unicode character.
203 */
204goog.string.isUnicodeChar = function(ch) {
205  return ch.length == 1 && ch >= ' ' && ch <= '~' ||
206         ch >= '\u0080' && ch <= '\uFFFD';
207};
208
209
210/**
211 * Takes a string and replaces newlines with a space. Multiple lines are
212 * replaced with a single space.
213 * @param {string} str The string from which to strip newlines.
214 * @return {string} A copy of {@code str} stripped of newlines.
215 */
216goog.string.stripNewlines = function(str) {
217  return str.replace(/(\r\n|\r|\n)+/g, ' ');
218};
219
220
221/**
222 * Replaces Windows and Mac new lines with unix style: \r or \r\n with \n.
223 * @param {string} str The string to in which to canonicalize newlines.
224 * @return {string} {@code str} A copy of {@code} with canonicalized newlines.
225 */
226goog.string.canonicalizeNewlines = function(str) {
227  return str.replace(/(\r\n|\r|\n)/g, '\n');
228};
229
230
231/**
232 * Normalizes whitespace in a string, replacing all whitespace chars with
233 * a space.
234 * @param {string} str The string in which to normalize whitespace.
235 * @return {string} A copy of {@code str} with all whitespace normalized.
236 */
237goog.string.normalizeWhitespace = function(str) {
238  return str.replace(/\xa0|\s/g, ' ');
239};
240
241
242/**
243 * Normalizes spaces in a string, replacing all consecutive spaces and tabs
244 * with a single space. Replaces non-breaking space with a space.
245 * @param {string} str The string in which to normalize spaces.
246 * @return {string} A copy of {@code str} with all consecutive spaces and tabs
247 *    replaced with a single space.
248 */
249goog.string.normalizeSpaces = function(str) {
250  return str.replace(/\xa0|[ \t]+/g, ' ');
251};
252
253
254/**
255 * Removes the breaking spaces from the left and right of the string and
256 * collapses the sequences of breaking spaces in the middle into single spaces.
257 * The original and the result strings render the same way in HTML.
258 * @param {string} str A string in which to collapse spaces.
259 * @return {string} Copy of the string with normalized breaking spaces.
260 */
261goog.string.collapseBreakingSpaces = function(str) {
262  return str.replace(/[\t\r\n ]+/g, ' ').replace(
263      /^[\t\r\n ]+|[\t\r\n ]+$/g, '');
264};
265
266
267/**
268 * Trims white spaces to the left and right of a string.
269 * @param {string} str The string to trim.
270 * @return {string} A trimmed copy of {@code str}.
271 */
272goog.string.trim = function(str) {
273  // Since IE doesn't include non-breaking-space (0xa0) in their \s character
274  // class (as required by section 7.2 of the ECMAScript spec), we explicitly
275  // include it in the regexp to enforce consistent cross-browser behavior.
276  return str.replace(/^[\s\xa0]+|[\s\xa0]+$/g, '');
277};
278
279
280/**
281 * Trims whitespaces at the left end of a string.
282 * @param {string} str The string to left trim.
283 * @return {string} A trimmed copy of {@code str}.
284 */
285goog.string.trimLeft = function(str) {
286  // Since IE doesn't include non-breaking-space (0xa0) in their \s character
287  // class (as required by section 7.2 of the ECMAScript spec), we explicitly
288  // include it in the regexp to enforce consistent cross-browser behavior.
289  return str.replace(/^[\s\xa0]+/, '');
290};
291
292
293/**
294 * Trims whitespaces at the right end of a string.
295 * @param {string} str The string to right trim.
296 * @return {string} A trimmed copy of {@code str}.
297 */
298goog.string.trimRight = function(str) {
299  // Since IE doesn't include non-breaking-space (0xa0) in their \s character
300  // class (as required by section 7.2 of the ECMAScript spec), we explicitly
301  // include it in the regexp to enforce consistent cross-browser behavior.
302  return str.replace(/[\s\xa0]+$/, '');
303};
304
305
306/**
307 * A string comparator that ignores case.
308 * -1 = str1 less than str2
309 *  0 = str1 equals str2
310 *  1 = str1 greater than str2
311 *
312 * @param {string} str1 The string to compare.
313 * @param {string} str2 The string to compare {@code str1} to.
314 * @return {number} The comparator result, as described above.
315 */
316goog.string.caseInsensitiveCompare = function(str1, str2) {
317  var test1 = String(str1).toLowerCase();
318  var test2 = String(str2).toLowerCase();
319
320  if (test1 < test2) {
321    return -1;
322  } else if (test1 == test2) {
323    return 0;
324  } else {
325    return 1;
326  }
327};
328
329
330/**
331 * Regular expression used for splitting a string into substrings of fractional
332 * numbers, integers, and non-numeric characters.
333 * @type {RegExp}
334 * @private
335 */
336goog.string.numerateCompareRegExp_ = /(\.\d+)|(\d+)|(\D+)/g;
337
338
339/**
340 * String comparison function that handles numbers in a way humans might expect.
341 * Using this function, the string "File 2.jpg" sorts before "File 10.jpg". The
342 * comparison is mostly case-insensitive, though strings that are identical
343 * except for case are sorted with the upper-case strings before lower-case.
344 *
345 * This comparison function is significantly slower (about 500x) than either
346 * the default or the case-insensitive compare. It should not be used in
347 * time-critical code, but should be fast enough to sort several hundred short
348 * strings (like filenames) with a reasonable delay.
349 *
350 * @param {string} str1 The string to compare in a numerically sensitive way.
351 * @param {string} str2 The string to compare {@code str1} to.
352 * @return {number} less than 0 if str1 < str2, 0 if str1 == str2, greater than
353 *     0 if str1 > str2.
354 */
355goog.string.numerateCompare = function(str1, str2) {
356  if (str1 == str2) {
357    return 0;
358  }
359  if (!str1) {
360    return -1;
361  }
362  if (!str2) {
363    return 1;
364  }
365
366  // Using match to split the entire string ahead of time turns out to be faster
367  // for most inputs than using RegExp.exec or iterating over each character.
368  var tokens1 = str1.toLowerCase().match(goog.string.numerateCompareRegExp_);
369  var tokens2 = str2.toLowerCase().match(goog.string.numerateCompareRegExp_);
370
371  var count = Math.min(tokens1.length, tokens2.length);
372
373  for (var i = 0; i < count; i++) {
374    var a = tokens1[i];
375    var b = tokens2[i];
376
377    // Compare pairs of tokens, returning if one token sorts before the other.
378    if (a != b) {
379
380      // Only if both tokens are integers is a special comparison required.
381      // Decimal numbers are sorted as strings (e.g., '.09' < '.1').
382      var num1 = parseInt(a, 10);
383      if (!isNaN(num1)) {
384        var num2 = parseInt(b, 10);
385        if (!isNaN(num2) && num1 - num2) {
386          return num1 - num2;
387        }
388      }
389      return a < b ? -1 : 1;
390    }
391  }
392
393  // If one string is a substring of the other, the shorter string sorts first.
394  if (tokens1.length != tokens2.length) {
395    return tokens1.length - tokens2.length;
396  }
397
398  // The two strings must be equivalent except for case (perfect equality is
399  // tested at the head of the function.) Revert to default ASCII-betical string
400  // comparison to stablize the sort.
401  return str1 < str2 ? -1 : 1;
402};
403
404
405/**
406 * Regular expression used for determining if a string needs to be encoded.
407 * @type {RegExp}
408 * @private
409 */
410goog.string.encodeUriRegExp_ = /^[a-zA-Z0-9\-_.!~*'()]*$/;
411
412
413/**
414 * URL-encodes a string
415 * @param {*} str The string to url-encode.
416 * @return {string} An encoded copy of {@code str} that is safe for urls.
417 *     Note that '#', ':', and other characters used to delimit portions
418 *     of URLs *will* be encoded.
419 */
420goog.string.urlEncode = function(str) {
421  str = String(str);
422  // Checking if the search matches before calling encodeURIComponent avoids an
423  // extra allocation in IE6. This adds about 10us time in FF and a similiar
424  // over head in IE6 for lower working set apps, but for large working set
425  // apps like Gmail, it saves about 70us per call.
426  if (!goog.string.encodeUriRegExp_.test(str)) {
427    return encodeURIComponent(str);
428  }
429  return str;
430};
431
432
433/**
434 * URL-decodes the string. We need to specially handle '+'s because
435 * the javascript library doesn't convert them to spaces.
436 * @param {string} str The string to url decode.
437 * @return {string} The decoded {@code str}.
438 */
439goog.string.urlDecode = function(str) {
440  return decodeURIComponent(str.replace(/\+/g, ' '));
441};
442
443
444/**
445 * Converts \n to <br>s or <br />s.
446 * @param {string} str The string in which to convert newlines.
447 * @param {boolean=} opt_xml Whether to use XML compatible tags.
448 * @return {string} A copy of {@code str} with converted newlines.
449 */
450goog.string.newLineToBr = function(str, opt_xml) {
451  return str.replace(/(\r\n|\r|\n)/g, opt_xml ? '<br />' : '<br>');
452};
453
454
455/**
456 * Escape double quote '"' characters in addition to '&', '<', and '>' so that a
457 * string can be included in an HTML tag attribute value within double quotes.
458 *
459 * It should be noted that > doesn't need to be escaped for the HTML or XML to
460 * be valid, but it has been decided to escape it for consistency with other
461 * implementations.
462 *
463 * NOTE(user):
464 * HtmlEscape is often called during the generation of large blocks of HTML.
465 * Using statics for the regular expressions and strings is an optimization
466 * that can more than half the amount of time IE spends in this function for
467 * large apps, since strings and regexes both contribute to GC allocations.
468 *
469 * Testing for the presence of a character before escaping increases the number
470 * of function calls, but actually provides a speed increase for the average
471 * case -- since the average case often doesn't require the escaping of all 4
472 * characters and indexOf() is much cheaper than replace().
473 * The worst case does suffer slightly from the additional calls, therefore the
474 * opt_isLikelyToContainHtmlChars option has been included for situations
475 * where all 4 HTML entities are very likely to be present and need escaping.
476 *
477 * Some benchmarks (times tended to fluctuate +-0.05ms):
478 *                                     FireFox                     IE6
479 * (no chars / average (mix of cases) / all 4 chars)
480 * no checks                     0.13 / 0.22 / 0.22         0.23 / 0.53 / 0.80
481 * indexOf                       0.08 / 0.17 / 0.26         0.22 / 0.54 / 0.84
482 * indexOf + re test             0.07 / 0.17 / 0.28         0.19 / 0.50 / 0.85
483 *
484 * An additional advantage of checking if replace actually needs to be called
485 * is a reduction in the number of object allocations, so as the size of the
486 * application grows the difference between the various methods would increase.
487 *
488 * @param {string} str string to be escaped.
489 * @param {boolean=} opt_isLikelyToContainHtmlChars Don't perform a check to see
490 *     if the character needs replacing - use this option if you expect each of
491 *     the characters to appear often. Leave false if you expect few html
492 *     characters to occur in your strings, such as if you are escaping HTML.
493 * @return {string} An escaped copy of {@code str}.
494 */
495goog.string.htmlEscape = function(str, opt_isLikelyToContainHtmlChars) {
496
497  if (opt_isLikelyToContainHtmlChars) {
498    return str.replace(goog.string.amperRe_, '&amp;')
499          .replace(goog.string.ltRe_, '&lt;')
500          .replace(goog.string.gtRe_, '&gt;')
501          .replace(goog.string.quotRe_, '&quot;');
502
503  } else {
504    // quick test helps in the case when there are no chars to replace, in
505    // worst case this makes barely a difference to the time taken
506    if (!goog.string.allRe_.test(str)) return str;
507
508    // str.indexOf is faster than regex.test in this case
509    if (str.indexOf('&') != -1) {
510      str = str.replace(goog.string.amperRe_, '&amp;');
511    }
512    if (str.indexOf('<') != -1) {
513      str = str.replace(goog.string.ltRe_, '&lt;');
514    }
515    if (str.indexOf('>') != -1) {
516      str = str.replace(goog.string.gtRe_, '&gt;');
517    }
518    if (str.indexOf('"') != -1) {
519      str = str.replace(goog.string.quotRe_, '&quot;');
520    }
521    return str;
522  }
523};
524
525
526/**
527 * Regular expression that matches an ampersand, for use in escaping.
528 * @type {RegExp}
529 * @private
530 */
531goog.string.amperRe_ = /&/g;
532
533
534/**
535 * Regular expression that matches a less than sign, for use in escaping.
536 * @type {RegExp}
537 * @private
538 */
539goog.string.ltRe_ = /</g;
540
541
542/**
543 * Regular expression that matches a greater than sign, for use in escaping.
544 * @type {RegExp}
545 * @private
546 */
547goog.string.gtRe_ = />/g;
548
549
550/**
551 * Regular expression that matches a double quote, for use in escaping.
552 * @type {RegExp}
553 * @private
554 */
555goog.string.quotRe_ = /\"/g;
556
557
558/**
559 * Regular expression that matches any character that needs to be escaped.
560 * @type {RegExp}
561 * @private
562 */
563goog.string.allRe_ = /[&<>\"]/;
564
565
566/**
567 * Unescapes an HTML string.
568 *
569 * @param {string} str The string to unescape.
570 * @return {string} An unescaped copy of {@code str}.
571 */
572goog.string.unescapeEntities = function(str) {
573  if (goog.string.contains(str, '&')) {
574    // We are careful not to use a DOM if we do not have one. We use the []
575    // notation so that the JSCompiler will not complain about these objects and
576    // fields in the case where we have no DOM.
577    if ('document' in goog.global) {
578      return goog.string.unescapeEntitiesUsingDom_(str);
579    } else {
580      // Fall back on pure XML entities
581      return goog.string.unescapePureXmlEntities_(str);
582    }
583  }
584  return str;
585};
586
587
588/**
589 * Unescapes an HTML string using a DOM to resolve non-XML, non-numeric
590 * entities. This function is XSS-safe and whitespace-preserving.
591 * @private
592 * @param {string} str The string to unescape.
593 * @return {string} The unescaped {@code str} string.
594 */
595goog.string.unescapeEntitiesUsingDom_ = function(str) {
596  var seen = {'&amp;': '&', '&lt;': '<', '&gt;': '>', '&quot;': '"'};
597  var div = document.createElement('div');
598  // Match as many valid entity characters as possible. If the actual entity
599  // happens to be shorter, it will still work as innerHTML will return the
600  // trailing characters unchanged. Since the entity characters do not include
601  // open angle bracket, there is no chance of XSS from the innerHTML use.
602  // Since no whitespace is passed to innerHTML, whitespace is preserved.
603  return str.replace(goog.string.HTML_ENTITY_PATTERN_, function(s, entity) {
604    // Check for cached entity.
605    var value = seen[s];
606    if (value) {
607      return value;
608    }
609    // Check for numeric entity.
610    if (entity.charAt(0) == '#') {
611      // Prefix with 0 so that hex entities (e.g. &#x10) parse as hex numbers.
612      var n = Number('0' + entity.substr(1));
613      if (!isNaN(n)) {
614        value = String.fromCharCode(n);
615      }
616    }
617    // Fall back to innerHTML otherwise.
618    if (!value) {
619      // Append a non-entity character to avoid a bug in Webkit that parses
620      // an invalid entity at the end of innerHTML text as the empty string.
621      div.innerHTML = s + ' ';
622      // Then remove the trailing character from the result.
623      value = div.firstChild.nodeValue.slice(0, -1);
624    }
625    // Cache and return.
626    return seen[s] = value;
627  });
628};
629
630
631/**
632 * Unescapes XML entities.
633 * @private
634 * @param {string} str The string to unescape.
635 * @return {string} An unescaped copy of {@code str}.
636 */
637goog.string.unescapePureXmlEntities_ = function(str) {
638  return str.replace(/&([^;]+);/g, function(s, entity) {
639    switch (entity) {
640      case 'amp':
641        return '&';
642      case 'lt':
643        return '<';
644      case 'gt':
645        return '>';
646      case 'quot':
647        return '"';
648      default:
649        if (entity.charAt(0) == '#') {
650          // Prefix with 0 so that hex entities (e.g. &#x10) parse as hex.
651          var n = Number('0' + entity.substr(1));
652          if (!isNaN(n)) {
653            return String.fromCharCode(n);
654          }
655        }
656        // For invalid entities we just return the entity
657        return s;
658    }
659  });
660};
661
662
663/**
664 * Regular expression that matches an HTML entity.
665 * See also HTML5: Tokenization / Tokenizing character references.
666 * @private
667 * @type {!RegExp}
668 */
669goog.string.HTML_ENTITY_PATTERN_ = /&([^;\s<&]+);?/g;
670
671
672/**
673 * Do escaping of whitespace to preserve spatial formatting. We use character
674 * entity #160 to make it safer for xml.
675 * @param {string} str The string in which to escape whitespace.
676 * @param {boolean=} opt_xml Whether to use XML compatible tags.
677 * @return {string} An escaped copy of {@code str}.
678 */
679goog.string.whitespaceEscape = function(str, opt_xml) {
680  return goog.string.newLineToBr(str.replace(/  /g, ' &#160;'), opt_xml);
681};
682
683
684/**
685 * Strip quote characters around a string.  The second argument is a string of
686 * characters to treat as quotes.  This can be a single character or a string of
687 * multiple character and in that case each of those are treated as possible
688 * quote characters. For example:
689 *
690 * <pre>
691 * goog.string.stripQuotes('"abc"', '"`') --> 'abc'
692 * goog.string.stripQuotes('`abc`', '"`') --> 'abc'
693 * </pre>
694 *
695 * @param {string} str The string to strip.
696 * @param {string} quoteChars The quote characters to strip.
697 * @return {string} A copy of {@code str} without the quotes.
698 */
699goog.string.stripQuotes = function(str, quoteChars) {
700  var length = quoteChars.length;
701  for (var i = 0; i < length; i++) {
702    var quoteChar = length == 1 ? quoteChars : quoteChars.charAt(i);
703    if (str.charAt(0) == quoteChar && str.charAt(str.length - 1) == quoteChar) {
704      return str.substring(1, str.length - 1);
705    }
706  }
707  return str;
708};
709
710
711/**
712 * Truncates a string to a certain length and adds '...' if necessary.  The
713 * length also accounts for the ellipsis, so a maximum length of 10 and a string
714 * 'Hello World!' produces 'Hello W...'.
715 * @param {string} str The string to truncate.
716 * @param {number} chars Max number of characters.
717 * @param {boolean=} opt_protectEscapedCharacters Whether to protect escaped
718 *     characters from being cut off in the middle.
719 * @return {string} The truncated {@code str} string.
720 */
721goog.string.truncate = function(str, chars, opt_protectEscapedCharacters) {
722  if (opt_protectEscapedCharacters) {
723    str = goog.string.unescapeEntities(str);
724  }
725
726  if (str.length > chars) {
727    str = str.substring(0, chars - 3) + '...';
728  }
729
730  if (opt_protectEscapedCharacters) {
731    str = goog.string.htmlEscape(str);
732  }
733
734  return str;
735};
736
737
738/**
739 * Truncate a string in the middle, adding "..." if necessary,
740 * and favoring the beginning of the string.
741 * @param {string} str The string to truncate the middle of.
742 * @param {number} chars Max number of characters.
743 * @param {boolean=} opt_protectEscapedCharacters Whether to protect escaped
744 *     characters from being cutoff in the middle.
745 * @param {number=} opt_trailingChars Optional number of trailing characters to
746 *     leave at the end of the string, instead of truncating as close to the
747 *     middle as possible.
748 * @return {string} A truncated copy of {@code str}.
749 */
750goog.string.truncateMiddle = function(str, chars,
751    opt_protectEscapedCharacters, opt_trailingChars) {
752  if (opt_protectEscapedCharacters) {
753    str = goog.string.unescapeEntities(str);
754  }
755
756  if (opt_trailingChars && str.length > chars) {
757    if (opt_trailingChars > chars) {
758      opt_trailingChars = chars;
759    }
760    var endPoint = str.length - opt_trailingChars;
761    var startPoint = chars - opt_trailingChars;
762    str = str.substring(0, startPoint) + '...' + str.substring(endPoint);
763  } else if (str.length > chars) {
764    // Favor the beginning of the string:
765    var half = Math.floor(chars / 2);
766    var endPos = str.length - half;
767    half += chars % 2;
768    str = str.substring(0, half) + '...' + str.substring(endPos);
769  }
770
771  if (opt_protectEscapedCharacters) {
772    str = goog.string.htmlEscape(str);
773  }
774
775  return str;
776};
777
778
779/**
780 * Special chars that need to be escaped for goog.string.quote.
781 * @private
782 * @type {Object}
783 */
784goog.string.specialEscapeChars_ = {
785  '\0': '\\0',
786  '\b': '\\b',
787  '\f': '\\f',
788  '\n': '\\n',
789  '\r': '\\r',
790  '\t': '\\t',
791  '\x0B': '\\x0B', // '\v' is not supported in JScript
792  '"': '\\"',
793  '\\': '\\\\'
794};
795
796
797/**
798 * Character mappings used internally for goog.string.escapeChar.
799 * @private
800 * @type {Object}
801 */
802goog.string.jsEscapeCache_ = {
803  '\'': '\\\''
804};
805
806
807/**
808 * Encloses a string in double quotes and escapes characters so that the
809 * string is a valid JS string.
810 * @param {string} s The string to quote.
811 * @return {string} A copy of {@code s} surrounded by double quotes.
812 */
813goog.string.quote = function(s) {
814  s = String(s);
815  if (s.quote) {
816    return s.quote();
817  } else {
818    var sb = ['"'];
819    for (var i = 0; i < s.length; i++) {
820      var ch = s.charAt(i);
821      var cc = ch.charCodeAt(0);
822      sb[i + 1] = goog.string.specialEscapeChars_[ch] ||
823          ((cc > 31 && cc < 127) ? ch : goog.string.escapeChar(ch));
824    }
825    sb.push('"');
826    return sb.join('');
827  }
828};
829
830
831/**
832 * Takes a string and returns the escaped string for that character.
833 * @param {string} str The string to escape.
834 * @return {string} An escaped string representing {@code str}.
835 */
836goog.string.escapeString = function(str) {
837  var sb = [];
838  for (var i = 0; i < str.length; i++) {
839    sb[i] = goog.string.escapeChar(str.charAt(i));
840  }
841  return sb.join('');
842};
843
844
845/**
846 * Takes a character and returns the escaped string for that character. For
847 * example escapeChar(String.fromCharCode(15)) -> "\\x0E".
848 * @param {string} c The character to escape.
849 * @return {string} An escaped string representing {@code c}.
850 */
851goog.string.escapeChar = function(c) {
852  if (c in goog.string.jsEscapeCache_) {
853    return goog.string.jsEscapeCache_[c];
854  }
855
856  if (c in goog.string.specialEscapeChars_) {
857    return goog.string.jsEscapeCache_[c] = goog.string.specialEscapeChars_[c];
858  }
859
860  var rv = c;
861  var cc = c.charCodeAt(0);
862  if (cc > 31 && cc < 127) {
863    rv = c;
864  } else {
865    // tab is 9 but handled above
866    if (cc < 256) {
867      rv = '\\x';
868      if (cc < 16 || cc > 256) {
869        rv += '0';
870      }
871    } else {
872      rv = '\\u';
873      if (cc < 4096) { // \u1000
874        rv += '0';
875      }
876    }
877    rv += cc.toString(16).toUpperCase();
878  }
879
880  return goog.string.jsEscapeCache_[c] = rv;
881};
882
883
884/**
885 * Takes a string and creates a map (Object) in which the keys are the
886 * characters in the string. The value for the key is set to true. You can
887 * then use goog.object.map or goog.array.map to change the values.
888 * @param {string} s The string to build the map from.
889 * @return {Object} The map of characters used.
890 */
891// TODO(arv): It seems like we should have a generic goog.array.toMap. But do
892//            we want a dependency on goog.array in goog.string?
893goog.string.toMap = function(s) {
894  var rv = {};
895  for (var i = 0; i < s.length; i++) {
896    rv[s.charAt(i)] = true;
897  }
898  return rv;
899};
900
901
902/**
903 * Checks whether a string contains a given character.
904 * @param {string} s The string to test.
905 * @param {string} ss The substring to test for.
906 * @return {boolean} True if {@code s} contains {@code ss}.
907 */
908goog.string.contains = function(s, ss) {
909  return s.indexOf(ss) != -1;
910};
911
912
913/**
914 * Removes a substring of a specified length at a specific
915 * index in a string.
916 * @param {string} s The base string from which to remove.
917 * @param {number} index The index at which to remove the substring.
918 * @param {number} stringLength The length of the substring to remove.
919 * @return {string} A copy of {@code s} with the substring removed or the full
920 *     string if nothing is removed or the input is invalid.
921 */
922goog.string.removeAt = function(s, index, stringLength) {
923  var resultStr = s;
924  // If the index is greater or equal to 0 then remove substring
925  if (index >= 0 && index < s.length && stringLength > 0) {
926    resultStr = s.substr(0, index) +
927        s.substr(index + stringLength, s.length - index - stringLength);
928  }
929  return resultStr;
930};
931
932
933/**
934 *  Removes the first occurrence of a substring from a string.
935 *  @param {string} s The base string from which to remove.
936 *  @param {string} ss The string to remove.
937 *  @return {string} A copy of {@code s} with {@code ss} removed or the full
938 *      string if nothing is removed.
939 */
940goog.string.remove = function(s, ss) {
941  var re = new RegExp(goog.string.regExpEscape(ss), '');
942  return s.replace(re, '');
943};
944
945
946/**
947 *  Removes all occurrences of a substring from a string.
948 *  @param {string} s The base string from which to remove.
949 *  @param {string} ss The string to remove.
950 *  @return {string} A copy of {@code s} with {@code ss} removed or the full
951 *      string if nothing is removed.
952 */
953goog.string.removeAll = function(s, ss) {
954  var re = new RegExp(goog.string.regExpEscape(ss), 'g');
955  return s.replace(re, '');
956};
957
958
959/**
960 * Escapes characters in the string that are not safe to use in a RegExp.
961 * @param {*} s The string to escape. If not a string, it will be casted
962 *     to one.
963 * @return {string} A RegExp safe, escaped copy of {@code s}.
964 */
965goog.string.regExpEscape = function(s) {
966  return String(s).replace(/([-()\[\]{}+?*.$\^|,:#<!\\])/g, '\\$1').
967      replace(/\x08/g, '\\x08');
968};
969
970
971/**
972 * Repeats a string n times.
973 * @param {string} string The string to repeat.
974 * @param {number} length The number of times to repeat.
975 * @return {string} A string containing {@code length} repetitions of
976 *     {@code string}.
977 */
978goog.string.repeat = function(string, length) {
979  return new Array(length + 1).join(string);
980};
981
982
983/**
984 * Pads number to given length and optionally rounds it to a given precision.
985 * For example:
986 * <pre>padNumber(1.25, 2, 3) -> '01.250'
987 * padNumber(1.25, 2) -> '01.25'
988 * padNumber(1.25, 2, 1) -> '01.3'
989 * padNumber(1.25, 0) -> '1.25'</pre>
990 *
991 * @param {number} num The number to pad.
992 * @param {number} length The desired length.
993 * @param {number=} opt_precision The desired precision.
994 * @return {string} {@code num} as a string with the given options.
995 */
996goog.string.padNumber = function(num, length, opt_precision) {
997  var s = goog.isDef(opt_precision) ? num.toFixed(opt_precision) : String(num);
998  var index = s.indexOf('.');
999  if (index == -1) {
1000    index = s.length;
1001  }
1002  return goog.string.repeat('0', Math.max(0, length - index)) + s;
1003};
1004
1005
1006/**
1007 * Returns a string representation of the given object, with
1008 * null and undefined being returned as the empty string.
1009 *
1010 * @param {*} obj The object to convert.
1011 * @return {string} A string representation of the {@code obj}.
1012 */
1013goog.string.makeSafe = function(obj) {
1014  return obj == null ? '' : String(obj);
1015};
1016
1017
1018/**
1019 * Concatenates string expressions. This is useful
1020 * since some browsers are very inefficient when it comes to using plus to
1021 * concat strings. Be careful when using null and undefined here since
1022 * these will not be included in the result. If you need to represent these
1023 * be sure to cast the argument to a String first.
1024 * For example:
1025 * <pre>buildString('a', 'b', 'c', 'd') -> 'abcd'
1026 * buildString(null, undefined) -> ''
1027 * </pre>
1028 * @param {...*} var_args A list of strings to concatenate. If not a string,
1029 *     it will be casted to one.
1030 * @return {string} The concatenation of {@code var_args}.
1031 */
1032goog.string.buildString = function(var_args) {
1033  return Array.prototype.join.call(arguments, '');
1034};
1035
1036
1037/**
1038 * Returns a string with at least 64-bits of randomness.
1039 *
1040 * Doesn't trust Javascript's random function entirely. Uses a combination of
1041 * random and current timestamp, and then encodes the string in base-36 to
1042 * make it shorter.
1043 *
1044 * @return {string} A random string, e.g. sn1s7vb4gcic.
1045 */
1046goog.string.getRandomString = function() {
1047  var x = 2147483648;
1048  return Math.floor(Math.random() * x).toString(36) +
1049         Math.abs(Math.floor(Math.random() * x) ^ goog.now()).toString(36);
1050};
1051
1052
1053/**
1054 * Compares two version numbers.
1055 *
1056 * @param {string|number} version1 Version of first item.
1057 * @param {string|number} version2 Version of second item.
1058 *
1059 * @return {number}  1 if {@code version1} is higher.
1060 *                   0 if arguments are equal.
1061 *                  -1 if {@code version2} is higher.
1062 */
1063goog.string.compareVersions = function(version1, version2) {
1064  var order = 0;
1065  // Trim leading and trailing whitespace and split the versions into
1066  // subversions.
1067  var v1Subs = goog.string.trim(String(version1)).split('.');
1068  var v2Subs = goog.string.trim(String(version2)).split('.');
1069  var subCount = Math.max(v1Subs.length, v2Subs.length);
1070
1071  // Iterate over the subversions, as long as they appear to be equivalent.
1072  for (var subIdx = 0; order == 0 && subIdx < subCount; subIdx++) {
1073    var v1Sub = v1Subs[subIdx] || '';
1074    var v2Sub = v2Subs[subIdx] || '';
1075
1076    // Split the subversions into pairs of numbers and qualifiers (like 'b').
1077    // Two different RegExp objects are needed because they are both using
1078    // the 'g' flag.
1079    var v1CompParser = new RegExp('(\\d*)(\\D*)', 'g');
1080    var v2CompParser = new RegExp('(\\d*)(\\D*)', 'g');
1081    do {
1082      var v1Comp = v1CompParser.exec(v1Sub) || ['', '', ''];
1083      var v2Comp = v2CompParser.exec(v2Sub) || ['', '', ''];
1084      // Break if there are no more matches.
1085      if (v1Comp[0].length == 0 && v2Comp[0].length == 0) {
1086        break;
1087      }
1088
1089      // Parse the numeric part of the subversion. A missing number is
1090      // equivalent to 0.
1091      var v1CompNum = v1Comp[1].length == 0 ? 0 : parseInt(v1Comp[1], 10);
1092      var v2CompNum = v2Comp[1].length == 0 ? 0 : parseInt(v2Comp[1], 10);
1093
1094      // Compare the subversion components. The number has the highest
1095      // precedence. Next, if the numbers are equal, a subversion without any
1096      // qualifier is always higher than a subversion with any qualifier. Next,
1097      // the qualifiers are compared as strings.
1098      order = goog.string.compareElements_(v1CompNum, v2CompNum) ||
1099          goog.string.compareElements_(v1Comp[2].length == 0,
1100              v2Comp[2].length == 0) ||
1101          goog.string.compareElements_(v1Comp[2], v2Comp[2]);
1102      // Stop as soon as an inequality is discovered.
1103    } while (order == 0);
1104  }
1105
1106  return order;
1107};
1108
1109
1110/**
1111 * Compares elements of a version number.
1112 *
1113 * @param {string|number|boolean} left An element from a version number.
1114 * @param {string|number|boolean} right An element from a version number.
1115 *
1116 * @return {number}  1 if {@code left} is higher.
1117 *                   0 if arguments are equal.
1118 *                  -1 if {@code right} is higher.
1119 * @private
1120 */
1121goog.string.compareElements_ = function(left, right) {
1122  if (left < right) {
1123    return -1;
1124  } else if (left > right) {
1125    return 1;
1126  }
1127  return 0;
1128};
1129
1130
1131/**
1132 * Maximum value of #goog.string.hashCode, exclusive. 2^32.
1133 * @type {number}
1134 * @private
1135 */
1136goog.string.HASHCODE_MAX_ = 0x100000000;
1137
1138
1139/**
1140 * String hash function similar to java.lang.String.hashCode().
1141 * The hash code for a string is computed as
1142 * s[0] * 31 ^ (n - 1) + s[1] * 31 ^ (n - 2) + ... + s[n - 1],
1143 * where s[i] is the ith character of the string and n is the length of
1144 * the string. We mod the result to make it between 0 (inclusive) and 2^32
1145 * (exclusive).
1146 * @param {string} str A string.
1147 * @return {number} Hash value for {@code str}, between 0 (inclusive) and 2^32
1148 *  (exclusive). The empty string returns 0.
1149 */
1150goog.string.hashCode = function(str) {
1151  var result = 0;
1152  for (var i = 0; i < str.length; ++i) {
1153    result = 31 * result + str.charCodeAt(i);
1154    // Normalize to 4 byte range, 0 ... 2^32.
1155    result %= goog.string.HASHCODE_MAX_;
1156  }
1157  return result;
1158};
1159
1160
1161/**
1162 * The most recent unique ID. |0 is equivalent to Math.floor in this case.
1163 * @type {number}
1164 * @private
1165 */
1166goog.string.uniqueStringCounter_ = Math.random() * 0x80000000 | 0;
1167
1168
1169/**
1170 * Generates and returns a string which is unique in the current document.
1171 * This is useful, for example, to create unique IDs for DOM elements.
1172 * @return {string} A unique id.
1173 */
1174goog.string.createUniqueString = function() {
1175  return 'goog_' + goog.string.uniqueStringCounter_++;
1176};
1177
1178
1179/**
1180 * Converts the supplied string to a number, which may be Ininity or NaN.
1181 * This function strips whitespace: (toNumber(' 123') === 123)
1182 * This function accepts scientific notation: (toNumber('1e1') === 10)
1183 *
1184 * This is better than Javascript's built-in conversions because, sadly:
1185 *     (Number(' ') === 0) and (parseFloat('123a') === 123)
1186 *
1187 * @param {string} str The string to convert.
1188 * @return {number} The number the supplied string represents, or NaN.
1189 */
1190goog.string.toNumber = function(str) {
1191  var num = Number(str);
1192  if (num == 0 && goog.string.isEmpty(str)) {
1193    return NaN;
1194  }
1195  return num;
1196};
1197
1198
1199/**
1200 * A memoized cache for goog.string.toCamelCase.
1201 * @type {Object.<string>}
1202 * @private
1203 */
1204goog.string.toCamelCaseCache_ = {};
1205
1206
1207/**
1208 * Converts a string from selector-case to camelCase (e.g. from
1209 * "multi-part-string" to "multiPartString"), useful for converting
1210 * CSS selectors and HTML dataset keys to their equivalent JS properties.
1211 * @param {string} str The string in selector-case form.
1212 * @return {string} The string in camelCase form.
1213 */
1214goog.string.toCamelCase = function(str) {
1215  return goog.string.toCamelCaseCache_[str] ||
1216      (goog.string.toCamelCaseCache_[str] =
1217          String(str).replace(/\-([a-z])/g, function(all, match) {
1218            return match.toUpperCase();
1219          }));
1220};
1221
1222
1223/**
1224 * A memoized cache for goog.string.toSelectorCase.
1225 * @type {Object.<string>}
1226 * @private
1227 */
1228goog.string.toSelectorCaseCache_ = {};
1229
1230
1231/**
1232 * Converts a string from camelCase to selector-case (e.g. from
1233 * "multiPartString" to "multi-part-string"), useful for converting JS
1234 * style and dataset properties to equivalent CSS selectors and HTML keys.
1235 * @param {string} str The string in camelCase form.
1236 * @return {string} The string in selector-case form.
1237 */
1238goog.string.toSelectorCase = function(str) {
1239  return goog.string.toSelectorCaseCache_[str] ||
1240      (goog.string.toSelectorCaseCache_[str] =
1241          String(str).replace(/([A-Z])/g, '-$1').toLowerCase());
1242};
1243