string_util.cc revision f2477e01787aa58f445919b809d89e252beef54f
1// Copyright 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/strings/string_util.h"
6
7#include <ctype.h>
8#include <errno.h>
9#include <math.h>
10#include <stdarg.h>
11#include <stdio.h>
12#include <stdlib.h>
13#include <string.h>
14#include <time.h>
15#include <wchar.h>
16#include <wctype.h>
17
18#include <algorithm>
19#include <vector>
20
21#include "base/basictypes.h"
22#include "base/logging.h"
23#include "base/memory/singleton.h"
24#include "base/strings/utf_string_conversion_utils.h"
25#include "base/strings/utf_string_conversions.h"
26#include "base/third_party/icu/icu_utf.h"
27#include "build/build_config.h"
28
29namespace {
30
31// Force the singleton used by Empty[W]String[16] to be a unique type. This
32// prevents other code that might accidentally use Singleton<string> from
33// getting our internal one.
34struct EmptyStrings {
35  EmptyStrings() {}
36  const std::string s;
37  const std::wstring ws;
38  const string16 s16;
39
40  static EmptyStrings* GetInstance() {
41    return Singleton<EmptyStrings>::get();
42  }
43};
44
45// Used by ReplaceStringPlaceholders to track the position in the string of
46// replaced parameters.
47struct ReplacementOffset {
48  ReplacementOffset(uintptr_t parameter, size_t offset)
49      : parameter(parameter),
50        offset(offset) {}
51
52  // Index of the parameter.
53  uintptr_t parameter;
54
55  // Starting position in the string.
56  size_t offset;
57};
58
59static bool CompareParameter(const ReplacementOffset& elem1,
60                             const ReplacementOffset& elem2) {
61  return elem1.parameter < elem2.parameter;
62}
63
64}  // namespace
65
66namespace base {
67
68bool IsWprintfFormatPortable(const wchar_t* format) {
69  for (const wchar_t* position = format; *position != '\0'; ++position) {
70    if (*position == '%') {
71      bool in_specification = true;
72      bool modifier_l = false;
73      while (in_specification) {
74        // Eat up characters until reaching a known specifier.
75        if (*++position == '\0') {
76          // The format string ended in the middle of a specification.  Call
77          // it portable because no unportable specifications were found.  The
78          // string is equally broken on all platforms.
79          return true;
80        }
81
82        if (*position == 'l') {
83          // 'l' is the only thing that can save the 's' and 'c' specifiers.
84          modifier_l = true;
85        } else if (((*position == 's' || *position == 'c') && !modifier_l) ||
86                   *position == 'S' || *position == 'C' || *position == 'F' ||
87                   *position == 'D' || *position == 'O' || *position == 'U') {
88          // Not portable.
89          return false;
90        }
91
92        if (wcschr(L"diouxXeEfgGaAcspn%", *position)) {
93          // Portable, keep scanning the rest of the format string.
94          in_specification = false;
95        }
96      }
97    }
98  }
99
100  return true;
101}
102
103}  // namespace base
104
105
106const std::string& EmptyString() {
107  return EmptyStrings::GetInstance()->s;
108}
109
110const std::wstring& EmptyWString() {
111  return EmptyStrings::GetInstance()->ws;
112}
113
114const string16& EmptyString16() {
115  return EmptyStrings::GetInstance()->s16;
116}
117
118template<typename STR>
119bool ReplaceCharsT(const STR& input,
120                   const typename STR::value_type replace_chars[],
121                   const STR& replace_with,
122                   STR* output) {
123  bool removed = false;
124  size_t replace_length = replace_with.length();
125
126  *output = input;
127
128  size_t found = output->find_first_of(replace_chars);
129  while (found != STR::npos) {
130    removed = true;
131    output->replace(found, 1, replace_with);
132    found = output->find_first_of(replace_chars, found + replace_length);
133  }
134
135  return removed;
136}
137
138bool ReplaceChars(const string16& input,
139                  const char16 replace_chars[],
140                  const string16& replace_with,
141                  string16* output) {
142  return ReplaceCharsT(input, replace_chars, replace_with, output);
143}
144
145bool ReplaceChars(const std::string& input,
146                  const char replace_chars[],
147                  const std::string& replace_with,
148                  std::string* output) {
149  return ReplaceCharsT(input, replace_chars, replace_with, output);
150}
151
152bool RemoveChars(const string16& input,
153                 const char16 remove_chars[],
154                 string16* output) {
155  return ReplaceChars(input, remove_chars, string16(), output);
156}
157
158bool RemoveChars(const std::string& input,
159                 const char remove_chars[],
160                 std::string* output) {
161  return ReplaceChars(input, remove_chars, std::string(), output);
162}
163
164template<typename STR>
165TrimPositions TrimStringT(const STR& input,
166                          const typename STR::value_type trim_chars[],
167                          TrimPositions positions,
168                          STR* output) {
169  // Find the edges of leading/trailing whitespace as desired.
170  const typename STR::size_type last_char = input.length() - 1;
171  const typename STR::size_type first_good_char = (positions & TRIM_LEADING) ?
172      input.find_first_not_of(trim_chars) : 0;
173  const typename STR::size_type last_good_char = (positions & TRIM_TRAILING) ?
174      input.find_last_not_of(trim_chars) : last_char;
175
176  // When the string was all whitespace, report that we stripped off whitespace
177  // from whichever position the caller was interested in.  For empty input, we
178  // stripped no whitespace, but we still need to clear |output|.
179  if (input.empty() ||
180      (first_good_char == STR::npos) || (last_good_char == STR::npos)) {
181    bool input_was_empty = input.empty();  // in case output == &input
182    output->clear();
183    return input_was_empty ? TRIM_NONE : positions;
184  }
185
186  // Trim the whitespace.
187  *output =
188      input.substr(first_good_char, last_good_char - first_good_char + 1);
189
190  // Return where we trimmed from.
191  return static_cast<TrimPositions>(
192      ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) |
193      ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING));
194}
195
196bool TrimString(const string16& input,
197                const char16 trim_chars[],
198                string16* output) {
199  return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
200}
201
202bool TrimString(const std::string& input,
203                const char trim_chars[],
204                std::string* output) {
205  return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
206}
207
208void TruncateUTF8ToByteSize(const std::string& input,
209                            const size_t byte_size,
210                            std::string* output) {
211  DCHECK(output);
212  if (byte_size > input.length()) {
213    *output = input;
214    return;
215  }
216  DCHECK_LE(byte_size, static_cast<uint32>(kint32max));
217  // Note: This cast is necessary because CBU8_NEXT uses int32s.
218  int32 truncation_length = static_cast<int32>(byte_size);
219  int32 char_index = truncation_length - 1;
220  const char* data = input.data();
221
222  // Using CBU8, we will move backwards from the truncation point
223  // to the beginning of the string looking for a valid UTF8
224  // character.  Once a full UTF8 character is found, we will
225  // truncate the string to the end of that character.
226  while (char_index >= 0) {
227    int32 prev = char_index;
228    uint32 code_point = 0;
229    CBU8_NEXT(data, char_index, truncation_length, code_point);
230    if (!base::IsValidCharacter(code_point) ||
231        !base::IsValidCodepoint(code_point)) {
232      char_index = prev - 1;
233    } else {
234      break;
235    }
236  }
237
238  if (char_index >= 0 )
239    *output = input.substr(0, char_index);
240  else
241    output->clear();
242}
243
244TrimPositions TrimWhitespace(const string16& input,
245                             TrimPositions positions,
246                             string16* output) {
247  return TrimStringT(input, kWhitespaceUTF16, positions, output);
248}
249
250TrimPositions TrimWhitespaceASCII(const std::string& input,
251                                  TrimPositions positions,
252                                  std::string* output) {
253  return TrimStringT(input, kWhitespaceASCII, positions, output);
254}
255
256// This function is only for backward-compatibility.
257// To be removed when all callers are updated.
258TrimPositions TrimWhitespace(const std::string& input,
259                             TrimPositions positions,
260                             std::string* output) {
261  return TrimWhitespaceASCII(input, positions, output);
262}
263
264template<typename STR>
265STR CollapseWhitespaceT(const STR& text,
266                        bool trim_sequences_with_line_breaks) {
267  STR result;
268  result.resize(text.size());
269
270  // Set flags to pretend we're already in a trimmed whitespace sequence, so we
271  // will trim any leading whitespace.
272  bool in_whitespace = true;
273  bool already_trimmed = true;
274
275  int chars_written = 0;
276  for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) {
277    if (IsWhitespace(*i)) {
278      if (!in_whitespace) {
279        // Reduce all whitespace sequences to a single space.
280        in_whitespace = true;
281        result[chars_written++] = L' ';
282      }
283      if (trim_sequences_with_line_breaks && !already_trimmed &&
284          ((*i == '\n') || (*i == '\r'))) {
285        // Whitespace sequences containing CR or LF are eliminated entirely.
286        already_trimmed = true;
287        --chars_written;
288      }
289    } else {
290      // Non-whitespace chracters are copied straight across.
291      in_whitespace = false;
292      already_trimmed = false;
293      result[chars_written++] = *i;
294    }
295  }
296
297  if (in_whitespace && !already_trimmed) {
298    // Any trailing whitespace is eliminated.
299    --chars_written;
300  }
301
302  result.resize(chars_written);
303  return result;
304}
305
306string16 CollapseWhitespace(const string16& text,
307                            bool trim_sequences_with_line_breaks) {
308  return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
309}
310
311std::string CollapseWhitespaceASCII(const std::string& text,
312                                    bool trim_sequences_with_line_breaks) {
313  return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
314}
315
316bool ContainsOnlyWhitespaceASCII(const std::string& str) {
317  for (std::string::const_iterator i(str.begin()); i != str.end(); ++i) {
318    if (!IsAsciiWhitespace(*i))
319      return false;
320  }
321  return true;
322}
323
324bool ContainsOnlyWhitespace(const string16& str) {
325  return str.find_first_not_of(kWhitespaceUTF16) == string16::npos;
326}
327
328template<typename STR>
329static bool ContainsOnlyCharsT(const STR& input, const STR& characters) {
330  for (typename STR::const_iterator iter = input.begin();
331       iter != input.end(); ++iter) {
332    if (characters.find(*iter) == STR::npos)
333      return false;
334  }
335  return true;
336}
337
338bool ContainsOnlyChars(const string16& input, const string16& characters) {
339  return ContainsOnlyCharsT(input, characters);
340}
341
342bool ContainsOnlyChars(const std::string& input,
343                       const std::string& characters) {
344  return ContainsOnlyCharsT(input, characters);
345}
346
347#if !defined(WCHAR_T_IS_UTF16)
348bool IsStringASCII(const std::wstring& str);
349#endif
350
351std::string WideToASCII(const std::wstring& wide) {
352  DCHECK(IsStringASCII(wide)) << wide;
353  return std::string(wide.begin(), wide.end());
354}
355
356std::string UTF16ToASCII(const string16& utf16) {
357  DCHECK(IsStringASCII(utf16)) << utf16;
358  return std::string(utf16.begin(), utf16.end());
359}
360
361template<class STR>
362static bool DoIsStringASCII(const STR& str) {
363  for (size_t i = 0; i < str.length(); i++) {
364    typename ToUnsigned<typename STR::value_type>::Unsigned c = str[i];
365    if (c > 0x7F)
366      return false;
367  }
368  return true;
369}
370
371#if !defined(WCHAR_T_IS_UTF16)
372bool IsStringASCII(const std::wstring& str) {
373  return DoIsStringASCII(str);
374}
375#endif
376
377bool IsStringASCII(const string16& str) {
378  return DoIsStringASCII(str);
379}
380
381bool IsStringASCII(const base::StringPiece& str) {
382  return DoIsStringASCII(str);
383}
384
385bool IsStringUTF8(const std::string& str) {
386  const char *src = str.data();
387  int32 src_len = static_cast<int32>(str.length());
388  int32 char_index = 0;
389
390  while (char_index < src_len) {
391    int32 code_point;
392    CBU8_NEXT(src, char_index, src_len, code_point);
393    if (!base::IsValidCharacter(code_point))
394      return false;
395  }
396  return true;
397}
398
399template<typename Iter>
400static inline bool DoLowerCaseEqualsASCII(Iter a_begin,
401                                          Iter a_end,
402                                          const char* b) {
403  for (Iter it = a_begin; it != a_end; ++it, ++b) {
404    if (!*b || base::ToLowerASCII(*it) != *b)
405      return false;
406  }
407  return *b == 0;
408}
409
410// Front-ends for LowerCaseEqualsASCII.
411bool LowerCaseEqualsASCII(const std::string& a, const char* b) {
412  return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
413}
414
415bool LowerCaseEqualsASCII(const string16& a, const char* b) {
416  return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
417}
418
419bool LowerCaseEqualsASCII(std::string::const_iterator a_begin,
420                          std::string::const_iterator a_end,
421                          const char* b) {
422  return DoLowerCaseEqualsASCII(a_begin, a_end, b);
423}
424
425bool LowerCaseEqualsASCII(string16::const_iterator a_begin,
426                          string16::const_iterator a_end,
427                          const char* b) {
428  return DoLowerCaseEqualsASCII(a_begin, a_end, b);
429}
430
431// TODO(port): Resolve wchar_t/iterator issues that require OS_ANDROID here.
432#if !defined(OS_ANDROID)
433bool LowerCaseEqualsASCII(const char* a_begin,
434                          const char* a_end,
435                          const char* b) {
436  return DoLowerCaseEqualsASCII(a_begin, a_end, b);
437}
438
439bool LowerCaseEqualsASCII(const char16* a_begin,
440                          const char16* a_end,
441                          const char* b) {
442  return DoLowerCaseEqualsASCII(a_begin, a_end, b);
443}
444
445#endif  // !defined(OS_ANDROID)
446
447bool EqualsASCII(const string16& a, const base::StringPiece& b) {
448  if (a.length() != b.length())
449    return false;
450  return std::equal(b.begin(), b.end(), a.begin());
451}
452
453bool StartsWithASCII(const std::string& str,
454                     const std::string& search,
455                     bool case_sensitive) {
456  if (case_sensitive)
457    return str.compare(0, search.length(), search) == 0;
458  else
459    return base::strncasecmp(str.c_str(), search.c_str(), search.length()) == 0;
460}
461
462template <typename STR>
463bool StartsWithT(const STR& str, const STR& search, bool case_sensitive) {
464  if (case_sensitive) {
465    return str.compare(0, search.length(), search) == 0;
466  } else {
467    if (search.size() > str.size())
468      return false;
469    return std::equal(search.begin(), search.end(), str.begin(),
470                      base::CaseInsensitiveCompare<typename STR::value_type>());
471  }
472}
473
474bool StartsWith(const string16& str, const string16& search,
475                bool case_sensitive) {
476  return StartsWithT(str, search, case_sensitive);
477}
478
479template <typename STR>
480bool EndsWithT(const STR& str, const STR& search, bool case_sensitive) {
481  typename STR::size_type str_length = str.length();
482  typename STR::size_type search_length = search.length();
483  if (search_length > str_length)
484    return false;
485  if (case_sensitive) {
486    return str.compare(str_length - search_length, search_length, search) == 0;
487  } else {
488    return std::equal(search.begin(), search.end(),
489                      str.begin() + (str_length - search_length),
490                      base::CaseInsensitiveCompare<typename STR::value_type>());
491  }
492}
493
494bool EndsWith(const std::string& str, const std::string& search,
495              bool case_sensitive) {
496  return EndsWithT(str, search, case_sensitive);
497}
498
499bool EndsWith(const string16& str, const string16& search,
500              bool case_sensitive) {
501  return EndsWithT(str, search, case_sensitive);
502}
503
504static const char* const kByteStringsUnlocalized[] = {
505  " B",
506  " kB",
507  " MB",
508  " GB",
509  " TB",
510  " PB"
511};
512
513string16 FormatBytesUnlocalized(int64 bytes) {
514  double unit_amount = static_cast<double>(bytes);
515  size_t dimension = 0;
516  const int kKilo = 1024;
517  while (unit_amount >= kKilo &&
518         dimension < arraysize(kByteStringsUnlocalized) - 1) {
519    unit_amount /= kKilo;
520    dimension++;
521  }
522
523  char buf[64];
524  if (bytes != 0 && dimension > 0 && unit_amount < 100) {
525    base::snprintf(buf, arraysize(buf), "%.1lf%s", unit_amount,
526                   kByteStringsUnlocalized[dimension]);
527  } else {
528    base::snprintf(buf, arraysize(buf), "%.0lf%s", unit_amount,
529                   kByteStringsUnlocalized[dimension]);
530  }
531
532  return ASCIIToUTF16(buf);
533}
534
535template<class StringType>
536void DoReplaceSubstringsAfterOffset(StringType* str,
537                                    typename StringType::size_type start_offset,
538                                    const StringType& find_this,
539                                    const StringType& replace_with,
540                                    bool replace_all) {
541  if ((start_offset == StringType::npos) || (start_offset >= str->length()))
542    return;
543
544  DCHECK(!find_this.empty());
545  for (typename StringType::size_type offs(str->find(find_this, start_offset));
546      offs != StringType::npos; offs = str->find(find_this, offs)) {
547    str->replace(offs, find_this.length(), replace_with);
548    offs += replace_with.length();
549
550    if (!replace_all)
551      break;
552  }
553}
554
555void ReplaceFirstSubstringAfterOffset(string16* str,
556                                      string16::size_type start_offset,
557                                      const string16& find_this,
558                                      const string16& replace_with) {
559  DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
560                                 false);  // replace first instance
561}
562
563void ReplaceFirstSubstringAfterOffset(std::string* str,
564                                      std::string::size_type start_offset,
565                                      const std::string& find_this,
566                                      const std::string& replace_with) {
567  DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
568                                 false);  // replace first instance
569}
570
571void ReplaceSubstringsAfterOffset(string16* str,
572                                  string16::size_type start_offset,
573                                  const string16& find_this,
574                                  const string16& replace_with) {
575  DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
576                                 true);  // replace all instances
577}
578
579void ReplaceSubstringsAfterOffset(std::string* str,
580                                  std::string::size_type start_offset,
581                                  const std::string& find_this,
582                                  const std::string& replace_with) {
583  DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
584                                 true);  // replace all instances
585}
586
587
588template<typename STR>
589static size_t TokenizeT(const STR& str,
590                        const STR& delimiters,
591                        std::vector<STR>* tokens) {
592  tokens->clear();
593
594  typename STR::size_type start = str.find_first_not_of(delimiters);
595  while (start != STR::npos) {
596    typename STR::size_type end = str.find_first_of(delimiters, start + 1);
597    if (end == STR::npos) {
598      tokens->push_back(str.substr(start));
599      break;
600    } else {
601      tokens->push_back(str.substr(start, end - start));
602      start = str.find_first_not_of(delimiters, end + 1);
603    }
604  }
605
606  return tokens->size();
607}
608
609size_t Tokenize(const string16& str,
610                const string16& delimiters,
611                std::vector<string16>* tokens) {
612  return TokenizeT(str, delimiters, tokens);
613}
614
615size_t Tokenize(const std::string& str,
616                const std::string& delimiters,
617                std::vector<std::string>* tokens) {
618  return TokenizeT(str, delimiters, tokens);
619}
620
621size_t Tokenize(const base::StringPiece& str,
622                const base::StringPiece& delimiters,
623                std::vector<base::StringPiece>* tokens) {
624  return TokenizeT(str, delimiters, tokens);
625}
626
627template<typename STR>
628static STR JoinStringT(const std::vector<STR>& parts, const STR& sep) {
629  if (parts.empty())
630    return STR();
631
632  STR result(parts[0]);
633  typename std::vector<STR>::const_iterator iter = parts.begin();
634  ++iter;
635
636  for (; iter != parts.end(); ++iter) {
637    result += sep;
638    result += *iter;
639  }
640
641  return result;
642}
643
644std::string JoinString(const std::vector<std::string>& parts, char sep) {
645  return JoinStringT(parts, std::string(1, sep));
646}
647
648string16 JoinString(const std::vector<string16>& parts, char16 sep) {
649  return JoinStringT(parts, string16(1, sep));
650}
651
652std::string JoinString(const std::vector<std::string>& parts,
653                       const std::string& separator) {
654  return JoinStringT(parts, separator);
655}
656
657string16 JoinString(const std::vector<string16>& parts,
658                    const string16& separator) {
659  return JoinStringT(parts, separator);
660}
661
662template<class FormatStringType, class OutStringType>
663OutStringType DoReplaceStringPlaceholders(const FormatStringType& format_string,
664    const std::vector<OutStringType>& subst, std::vector<size_t>* offsets) {
665  size_t substitutions = subst.size();
666
667  size_t sub_length = 0;
668  for (typename std::vector<OutStringType>::const_iterator iter = subst.begin();
669       iter != subst.end(); ++iter) {
670    sub_length += iter->length();
671  }
672
673  OutStringType formatted;
674  formatted.reserve(format_string.length() + sub_length);
675
676  std::vector<ReplacementOffset> r_offsets;
677  for (typename FormatStringType::const_iterator i = format_string.begin();
678       i != format_string.end(); ++i) {
679    if ('$' == *i) {
680      if (i + 1 != format_string.end()) {
681        ++i;
682        DCHECK('$' == *i || '1' <= *i) << "Invalid placeholder: " << *i;
683        if ('$' == *i) {
684          while (i != format_string.end() && '$' == *i) {
685            formatted.push_back('$');
686            ++i;
687          }
688          --i;
689        } else {
690          uintptr_t index = 0;
691          while (i != format_string.end() && '0' <= *i && *i <= '9') {
692            index *= 10;
693            index += *i - '0';
694            ++i;
695          }
696          --i;
697          index -= 1;
698          if (offsets) {
699            ReplacementOffset r_offset(index,
700                static_cast<int>(formatted.size()));
701            r_offsets.insert(std::lower_bound(r_offsets.begin(),
702                                              r_offsets.end(),
703                                              r_offset,
704                                              &CompareParameter),
705                             r_offset);
706          }
707          if (index < substitutions)
708            formatted.append(subst.at(index));
709        }
710      }
711    } else {
712      formatted.push_back(*i);
713    }
714  }
715  if (offsets) {
716    for (std::vector<ReplacementOffset>::const_iterator i = r_offsets.begin();
717         i != r_offsets.end(); ++i) {
718      offsets->push_back(i->offset);
719    }
720  }
721  return formatted;
722}
723
724string16 ReplaceStringPlaceholders(const string16& format_string,
725                                   const std::vector<string16>& subst,
726                                   std::vector<size_t>* offsets) {
727  return DoReplaceStringPlaceholders(format_string, subst, offsets);
728}
729
730std::string ReplaceStringPlaceholders(const base::StringPiece& format_string,
731                                      const std::vector<std::string>& subst,
732                                      std::vector<size_t>* offsets) {
733  return DoReplaceStringPlaceholders(format_string, subst, offsets);
734}
735
736string16 ReplaceStringPlaceholders(const string16& format_string,
737                                   const string16& a,
738                                   size_t* offset) {
739  std::vector<size_t> offsets;
740  std::vector<string16> subst;
741  subst.push_back(a);
742  string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets);
743
744  DCHECK_EQ(1U, offsets.size());
745  if (offset)
746    *offset = offsets[0];
747  return result;
748}
749
750static bool IsWildcard(base_icu::UChar32 character) {
751  return character == '*' || character == '?';
752}
753
754// Move the strings pointers to the point where they start to differ.
755template <typename CHAR, typename NEXT>
756static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end,
757                         const CHAR** string, const CHAR* string_end,
758                         NEXT next) {
759  const CHAR* escape = NULL;
760  while (*pattern != pattern_end && *string != string_end) {
761    if (!escape && IsWildcard(**pattern)) {
762      // We don't want to match wildcard here, except if it's escaped.
763      return;
764    }
765
766    // Check if the escapement char is found. If so, skip it and move to the
767    // next character.
768    if (!escape && **pattern == '\\') {
769      escape = *pattern;
770      next(pattern, pattern_end);
771      continue;
772    }
773
774    // Check if the chars match, if so, increment the ptrs.
775    const CHAR* pattern_next = *pattern;
776    const CHAR* string_next = *string;
777    base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end);
778    if (pattern_char == next(&string_next, string_end) &&
779        pattern_char != (base_icu::UChar32) CBU_SENTINEL) {
780      *pattern = pattern_next;
781      *string = string_next;
782    } else {
783      // Uh ho, it did not match, we are done. If the last char was an
784      // escapement, that means that it was an error to advance the ptr here,
785      // let's put it back where it was. This also mean that the MatchPattern
786      // function will return false because if we can't match an escape char
787      // here, then no one will.
788      if (escape) {
789        *pattern = escape;
790      }
791      return;
792    }
793
794    escape = NULL;
795  }
796}
797
798template <typename CHAR, typename NEXT>
799static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) {
800  while (*pattern != end) {
801    if (!IsWildcard(**pattern))
802      return;
803    next(pattern, end);
804  }
805}
806
807template <typename CHAR, typename NEXT>
808static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end,
809                          const CHAR* pattern, const CHAR* pattern_end,
810                          int depth,
811                          NEXT next) {
812  const int kMaxDepth = 16;
813  if (depth > kMaxDepth)
814    return false;
815
816  // Eat all the matching chars.
817  EatSameChars(&pattern, pattern_end, &eval, eval_end, next);
818
819  // If the string is empty, then the pattern must be empty too, or contains
820  // only wildcards.
821  if (eval == eval_end) {
822    EatWildcard(&pattern, pattern_end, next);
823    return pattern == pattern_end;
824  }
825
826  // Pattern is empty but not string, this is not a match.
827  if (pattern == pattern_end)
828    return false;
829
830  // If this is a question mark, then we need to compare the rest with
831  // the current string or the string with one character eaten.
832  const CHAR* next_pattern = pattern;
833  next(&next_pattern, pattern_end);
834  if (pattern[0] == '?') {
835    if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
836                      depth + 1, next))
837      return true;
838    const CHAR* next_eval = eval;
839    next(&next_eval, eval_end);
840    if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end,
841                      depth + 1, next))
842      return true;
843  }
844
845  // This is a *, try to match all the possible substrings with the remainder
846  // of the pattern.
847  if (pattern[0] == '*') {
848    // Collapse duplicate wild cards (********** into *) so that the
849    // method does not recurse unnecessarily. http://crbug.com/52839
850    EatWildcard(&next_pattern, pattern_end, next);
851
852    while (eval != eval_end) {
853      if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
854                        depth + 1, next))
855        return true;
856      eval++;
857    }
858
859    // We reached the end of the string, let see if the pattern contains only
860    // wildcards.
861    if (eval == eval_end) {
862      EatWildcard(&pattern, pattern_end, next);
863      if (pattern != pattern_end)
864        return false;
865      return true;
866    }
867  }
868
869  return false;
870}
871
872struct NextCharUTF8 {
873  base_icu::UChar32 operator()(const char** p, const char* end) {
874    base_icu::UChar32 c;
875    int offset = 0;
876    CBU8_NEXT(*p, offset, end - *p, c);
877    *p += offset;
878    return c;
879  }
880};
881
882struct NextCharUTF16 {
883  base_icu::UChar32 operator()(const char16** p, const char16* end) {
884    base_icu::UChar32 c;
885    int offset = 0;
886    CBU16_NEXT(*p, offset, end - *p, c);
887    *p += offset;
888    return c;
889  }
890};
891
892bool MatchPattern(const base::StringPiece& eval,
893                  const base::StringPiece& pattern) {
894  return MatchPatternT(eval.data(), eval.data() + eval.size(),
895                       pattern.data(), pattern.data() + pattern.size(),
896                       0, NextCharUTF8());
897}
898
899bool MatchPattern(const string16& eval, const string16& pattern) {
900  return MatchPatternT(eval.c_str(), eval.c_str() + eval.size(),
901                       pattern.c_str(), pattern.c_str() + pattern.size(),
902                       0, NextCharUTF16());
903}
904
905// The following code is compatible with the OpenBSD lcpy interface.  See:
906//   http://www.gratisoft.us/todd/papers/strlcpy.html
907//   ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c
908
909namespace {
910
911template <typename CHAR>
912size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) {
913  for (size_t i = 0; i < dst_size; ++i) {
914    if ((dst[i] = src[i]) == 0)  // We hit and copied the terminating NULL.
915      return i;
916  }
917
918  // We were left off at dst_size.  We over copied 1 byte.  Null terminate.
919  if (dst_size != 0)
920    dst[dst_size - 1] = 0;
921
922  // Count the rest of the |src|, and return it's length in characters.
923  while (src[dst_size]) ++dst_size;
924  return dst_size;
925}
926
927}  // namespace
928
929size_t base::strlcpy(char* dst, const char* src, size_t dst_size) {
930  return lcpyT<char>(dst, src, dst_size);
931}
932size_t base::wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) {
933  return lcpyT<wchar_t>(dst, src, dst_size);
934}
935