1// Copyright 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/strings/string_util.h"
6
7#include <ctype.h>
8#include <errno.h>
9#include <math.h>
10#include <stdarg.h>
11#include <stdio.h>
12#include <stdlib.h>
13#include <string.h>
14#include <time.h>
15#include <wchar.h>
16#include <wctype.h>
17
18#include <algorithm>
19#include <vector>
20
21#include "base/basictypes.h"
22#include "base/logging.h"
23#include "base/memory/singleton.h"
24#include "base/strings/utf_string_conversion_utils.h"
25#include "base/strings/utf_string_conversions.h"
26#include "base/third_party/icu/icu_utf.h"
27#include "build/build_config.h"
28
29// Remove when this entire file is in the base namespace.
30using base::char16;
31using base::string16;
32
33namespace {
34
35// Force the singleton used by EmptyString[16] to be a unique type. This
36// prevents other code that might accidentally use Singleton<string> from
37// getting our internal one.
38struct EmptyStrings {
39  EmptyStrings() {}
40  const std::string s;
41  const string16 s16;
42
43  static EmptyStrings* GetInstance() {
44    return Singleton<EmptyStrings>::get();
45  }
46};
47
48// Used by ReplaceStringPlaceholders to track the position in the string of
49// replaced parameters.
50struct ReplacementOffset {
51  ReplacementOffset(uintptr_t parameter, size_t offset)
52      : parameter(parameter),
53        offset(offset) {}
54
55  // Index of the parameter.
56  uintptr_t parameter;
57
58  // Starting position in the string.
59  size_t offset;
60};
61
62static bool CompareParameter(const ReplacementOffset& elem1,
63                             const ReplacementOffset& elem2) {
64  return elem1.parameter < elem2.parameter;
65}
66
67}  // namespace
68
69namespace base {
70
71bool IsWprintfFormatPortable(const wchar_t* format) {
72  for (const wchar_t* position = format; *position != '\0'; ++position) {
73    if (*position == '%') {
74      bool in_specification = true;
75      bool modifier_l = false;
76      while (in_specification) {
77        // Eat up characters until reaching a known specifier.
78        if (*++position == '\0') {
79          // The format string ended in the middle of a specification.  Call
80          // it portable because no unportable specifications were found.  The
81          // string is equally broken on all platforms.
82          return true;
83        }
84
85        if (*position == 'l') {
86          // 'l' is the only thing that can save the 's' and 'c' specifiers.
87          modifier_l = true;
88        } else if (((*position == 's' || *position == 'c') && !modifier_l) ||
89                   *position == 'S' || *position == 'C' || *position == 'F' ||
90                   *position == 'D' || *position == 'O' || *position == 'U') {
91          // Not portable.
92          return false;
93        }
94
95        if (wcschr(L"diouxXeEfgGaAcspn%", *position)) {
96          // Portable, keep scanning the rest of the format string.
97          in_specification = false;
98        }
99      }
100    }
101  }
102
103  return true;
104}
105
106const std::string& EmptyString() {
107  return EmptyStrings::GetInstance()->s;
108}
109
110const string16& EmptyString16() {
111  return EmptyStrings::GetInstance()->s16;
112}
113
114template<typename STR>
115bool ReplaceCharsT(const STR& input,
116                   const STR& replace_chars,
117                   const STR& replace_with,
118                   STR* output) {
119  bool removed = false;
120  size_t replace_length = replace_with.length();
121
122  *output = input;
123
124  size_t found = output->find_first_of(replace_chars);
125  while (found != STR::npos) {
126    removed = true;
127    output->replace(found, 1, replace_with);
128    found = output->find_first_of(replace_chars, found + replace_length);
129  }
130
131  return removed;
132}
133
134bool ReplaceChars(const string16& input,
135                  const base::StringPiece16& replace_chars,
136                  const string16& replace_with,
137                  string16* output) {
138  return ReplaceCharsT(input, replace_chars.as_string(), replace_with, output);
139}
140
141bool ReplaceChars(const std::string& input,
142                  const base::StringPiece& replace_chars,
143                  const std::string& replace_with,
144                  std::string* output) {
145  return ReplaceCharsT(input, replace_chars.as_string(), replace_with, output);
146}
147
148bool RemoveChars(const string16& input,
149                 const base::StringPiece16& remove_chars,
150                 string16* output) {
151  return ReplaceChars(input, remove_chars.as_string(), string16(), output);
152}
153
154bool RemoveChars(const std::string& input,
155                 const base::StringPiece& remove_chars,
156                 std::string* output) {
157  return ReplaceChars(input, remove_chars.as_string(), std::string(), output);
158}
159
160template<typename STR>
161TrimPositions TrimStringT(const STR& input,
162                          const STR& trim_chars,
163                          TrimPositions positions,
164                          STR* output) {
165  // Find the edges of leading/trailing whitespace as desired.
166  const size_t last_char = input.length() - 1;
167  const size_t first_good_char = (positions & TRIM_LEADING) ?
168      input.find_first_not_of(trim_chars) : 0;
169  const size_t last_good_char = (positions & TRIM_TRAILING) ?
170      input.find_last_not_of(trim_chars) : last_char;
171
172  // When the string was all whitespace, report that we stripped off whitespace
173  // from whichever position the caller was interested in.  For empty input, we
174  // stripped no whitespace, but we still need to clear |output|.
175  if (input.empty() ||
176      (first_good_char == STR::npos) || (last_good_char == STR::npos)) {
177    bool input_was_empty = input.empty();  // in case output == &input
178    output->clear();
179    return input_was_empty ? TRIM_NONE : positions;
180  }
181
182  // Trim the whitespace.
183  *output =
184      input.substr(first_good_char, last_good_char - first_good_char + 1);
185
186  // Return where we trimmed from.
187  return static_cast<TrimPositions>(
188      ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) |
189      ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING));
190}
191
192bool TrimString(const string16& input,
193                const base::StringPiece16& trim_chars,
194                string16* output) {
195  return TrimStringT(input, trim_chars.as_string(), TRIM_ALL, output) !=
196      TRIM_NONE;
197}
198
199bool TrimString(const std::string& input,
200                const base::StringPiece& trim_chars,
201                std::string* output) {
202  return TrimStringT(input, trim_chars.as_string(), TRIM_ALL, output) !=
203      TRIM_NONE;
204}
205
206void TruncateUTF8ToByteSize(const std::string& input,
207                            const size_t byte_size,
208                            std::string* output) {
209  DCHECK(output);
210  if (byte_size > input.length()) {
211    *output = input;
212    return;
213  }
214  DCHECK_LE(byte_size, static_cast<uint32>(kint32max));
215  // Note: This cast is necessary because CBU8_NEXT uses int32s.
216  int32 truncation_length = static_cast<int32>(byte_size);
217  int32 char_index = truncation_length - 1;
218  const char* data = input.data();
219
220  // Using CBU8, we will move backwards from the truncation point
221  // to the beginning of the string looking for a valid UTF8
222  // character.  Once a full UTF8 character is found, we will
223  // truncate the string to the end of that character.
224  while (char_index >= 0) {
225    int32 prev = char_index;
226    base_icu::UChar32 code_point = 0;
227    CBU8_NEXT(data, char_index, truncation_length, code_point);
228    if (!IsValidCharacter(code_point) ||
229        !IsValidCodepoint(code_point)) {
230      char_index = prev - 1;
231    } else {
232      break;
233    }
234  }
235
236  if (char_index >= 0 )
237    *output = input.substr(0, char_index);
238  else
239    output->clear();
240}
241
242TrimPositions TrimWhitespace(const string16& input,
243                             TrimPositions positions,
244                             string16* output) {
245  return TrimStringT(input, base::string16(kWhitespaceUTF16), positions,
246                     output);
247}
248
249TrimPositions TrimWhitespaceASCII(const std::string& input,
250                                  TrimPositions positions,
251                                  std::string* output) {
252  return TrimStringT(input, std::string(kWhitespaceASCII), positions, output);
253}
254
255// This function is only for backward-compatibility.
256// To be removed when all callers are updated.
257TrimPositions TrimWhitespace(const std::string& input,
258                             TrimPositions positions,
259                             std::string* output) {
260  return TrimWhitespaceASCII(input, positions, output);
261}
262
263template<typename STR>
264STR CollapseWhitespaceT(const STR& text,
265                        bool trim_sequences_with_line_breaks) {
266  STR result;
267  result.resize(text.size());
268
269  // Set flags to pretend we're already in a trimmed whitespace sequence, so we
270  // will trim any leading whitespace.
271  bool in_whitespace = true;
272  bool already_trimmed = true;
273
274  int chars_written = 0;
275  for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) {
276    if (IsWhitespace(*i)) {
277      if (!in_whitespace) {
278        // Reduce all whitespace sequences to a single space.
279        in_whitespace = true;
280        result[chars_written++] = L' ';
281      }
282      if (trim_sequences_with_line_breaks && !already_trimmed &&
283          ((*i == '\n') || (*i == '\r'))) {
284        // Whitespace sequences containing CR or LF are eliminated entirely.
285        already_trimmed = true;
286        --chars_written;
287      }
288    } else {
289      // Non-whitespace chracters are copied straight across.
290      in_whitespace = false;
291      already_trimmed = false;
292      result[chars_written++] = *i;
293    }
294  }
295
296  if (in_whitespace && !already_trimmed) {
297    // Any trailing whitespace is eliminated.
298    --chars_written;
299  }
300
301  result.resize(chars_written);
302  return result;
303}
304
305string16 CollapseWhitespace(const string16& text,
306                            bool trim_sequences_with_line_breaks) {
307  return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
308}
309
310std::string CollapseWhitespaceASCII(const std::string& text,
311                                    bool trim_sequences_with_line_breaks) {
312  return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
313}
314
315bool ContainsOnlyChars(const StringPiece& input,
316                       const StringPiece& characters) {
317  return input.find_first_not_of(characters) == StringPiece::npos;
318}
319
320bool ContainsOnlyChars(const StringPiece16& input,
321                       const StringPiece16& characters) {
322  return input.find_first_not_of(characters) == StringPiece16::npos;
323}
324
325template<class STR>
326static bool DoIsStringASCII(const STR& str) {
327  for (size_t i = 0; i < str.length(); i++) {
328    typename ToUnsigned<typename STR::value_type>::Unsigned c = str[i];
329    if (c > 0x7F)
330      return false;
331  }
332  return true;
333}
334
335bool IsStringASCII(const StringPiece& str) {
336  return DoIsStringASCII(str);
337}
338
339bool IsStringASCII(const string16& str) {
340  return DoIsStringASCII(str);
341}
342
343bool IsStringUTF8(const std::string& str) {
344  const char *src = str.data();
345  int32 src_len = static_cast<int32>(str.length());
346  int32 char_index = 0;
347
348  while (char_index < src_len) {
349    int32 code_point;
350    CBU8_NEXT(src, char_index, src_len, code_point);
351    if (!IsValidCharacter(code_point))
352      return false;
353  }
354  return true;
355}
356
357}  // namespace base
358
359template<typename Iter>
360static inline bool DoLowerCaseEqualsASCII(Iter a_begin,
361                                          Iter a_end,
362                                          const char* b) {
363  for (Iter it = a_begin; it != a_end; ++it, ++b) {
364    if (!*b || base::ToLowerASCII(*it) != *b)
365      return false;
366  }
367  return *b == 0;
368}
369
370// Front-ends for LowerCaseEqualsASCII.
371bool LowerCaseEqualsASCII(const std::string& a, const char* b) {
372  return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
373}
374
375bool LowerCaseEqualsASCII(const string16& a, const char* b) {
376  return DoLowerCaseEqualsASCII(a.begin(), a.end(), b);
377}
378
379bool LowerCaseEqualsASCII(std::string::const_iterator a_begin,
380                          std::string::const_iterator a_end,
381                          const char* b) {
382  return DoLowerCaseEqualsASCII(a_begin, a_end, b);
383}
384
385bool LowerCaseEqualsASCII(string16::const_iterator a_begin,
386                          string16::const_iterator a_end,
387                          const char* b) {
388  return DoLowerCaseEqualsASCII(a_begin, a_end, b);
389}
390
391// TODO(port): Resolve wchar_t/iterator issues that require OS_ANDROID here.
392#if !defined(OS_ANDROID)
393bool LowerCaseEqualsASCII(const char* a_begin,
394                          const char* a_end,
395                          const char* b) {
396  return DoLowerCaseEqualsASCII(a_begin, a_end, b);
397}
398
399bool LowerCaseEqualsASCII(const char16* a_begin,
400                          const char16* a_end,
401                          const char* b) {
402  return DoLowerCaseEqualsASCII(a_begin, a_end, b);
403}
404
405#endif  // !defined(OS_ANDROID)
406
407bool EqualsASCII(const string16& a, const base::StringPiece& b) {
408  if (a.length() != b.length())
409    return false;
410  return std::equal(b.begin(), b.end(), a.begin());
411}
412
413bool StartsWithASCII(const std::string& str,
414                     const std::string& search,
415                     bool case_sensitive) {
416  if (case_sensitive)
417    return str.compare(0, search.length(), search) == 0;
418  else
419    return base::strncasecmp(str.c_str(), search.c_str(), search.length()) == 0;
420}
421
422template <typename STR>
423bool StartsWithT(const STR& str, const STR& search, bool case_sensitive) {
424  if (case_sensitive) {
425    return str.compare(0, search.length(), search) == 0;
426  } else {
427    if (search.size() > str.size())
428      return false;
429    return std::equal(search.begin(), search.end(), str.begin(),
430                      base::CaseInsensitiveCompare<typename STR::value_type>());
431  }
432}
433
434bool StartsWith(const string16& str, const string16& search,
435                bool case_sensitive) {
436  return StartsWithT(str, search, case_sensitive);
437}
438
439template <typename STR>
440bool EndsWithT(const STR& str, const STR& search, bool case_sensitive) {
441  size_t str_length = str.length();
442  size_t search_length = search.length();
443  if (search_length > str_length)
444    return false;
445  if (case_sensitive)
446    return str.compare(str_length - search_length, search_length, search) == 0;
447  return std::equal(search.begin(), search.end(),
448                    str.begin() + (str_length - search_length),
449                    base::CaseInsensitiveCompare<typename STR::value_type>());
450}
451
452bool EndsWith(const std::string& str, const std::string& search,
453              bool case_sensitive) {
454  return EndsWithT(str, search, case_sensitive);
455}
456
457bool EndsWith(const string16& str, const string16& search,
458              bool case_sensitive) {
459  return EndsWithT(str, search, case_sensitive);
460}
461
462static const char* const kByteStringsUnlocalized[] = {
463  " B",
464  " kB",
465  " MB",
466  " GB",
467  " TB",
468  " PB"
469};
470
471string16 FormatBytesUnlocalized(int64 bytes) {
472  double unit_amount = static_cast<double>(bytes);
473  size_t dimension = 0;
474  const int kKilo = 1024;
475  while (unit_amount >= kKilo &&
476         dimension < arraysize(kByteStringsUnlocalized) - 1) {
477    unit_amount /= kKilo;
478    dimension++;
479  }
480
481  char buf[64];
482  if (bytes != 0 && dimension > 0 && unit_amount < 100) {
483    base::snprintf(buf, arraysize(buf), "%.1lf%s", unit_amount,
484                   kByteStringsUnlocalized[dimension]);
485  } else {
486    base::snprintf(buf, arraysize(buf), "%.0lf%s", unit_amount,
487                   kByteStringsUnlocalized[dimension]);
488  }
489
490  return base::ASCIIToUTF16(buf);
491}
492
493template<class StringType>
494void DoReplaceSubstringsAfterOffset(StringType* str,
495                                    size_t start_offset,
496                                    const StringType& find_this,
497                                    const StringType& replace_with,
498                                    bool replace_all) {
499  if ((start_offset == StringType::npos) || (start_offset >= str->length()))
500    return;
501
502  DCHECK(!find_this.empty());
503  for (size_t offs(str->find(find_this, start_offset));
504      offs != StringType::npos; offs = str->find(find_this, offs)) {
505    str->replace(offs, find_this.length(), replace_with);
506    offs += replace_with.length();
507
508    if (!replace_all)
509      break;
510  }
511}
512
513void ReplaceFirstSubstringAfterOffset(string16* str,
514                                      size_t start_offset,
515                                      const string16& find_this,
516                                      const string16& replace_with) {
517  DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
518                                 false);  // replace first instance
519}
520
521void ReplaceFirstSubstringAfterOffset(std::string* str,
522                                      size_t start_offset,
523                                      const std::string& find_this,
524                                      const std::string& replace_with) {
525  DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
526                                 false);  // replace first instance
527}
528
529void ReplaceSubstringsAfterOffset(string16* str,
530                                  size_t start_offset,
531                                  const string16& find_this,
532                                  const string16& replace_with) {
533  DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
534                                 true);  // replace all instances
535}
536
537void ReplaceSubstringsAfterOffset(std::string* str,
538                                  size_t start_offset,
539                                  const std::string& find_this,
540                                  const std::string& replace_with) {
541  DoReplaceSubstringsAfterOffset(str, start_offset, find_this, replace_with,
542                                 true);  // replace all instances
543}
544
545
546template<typename STR>
547static size_t TokenizeT(const STR& str,
548                        const STR& delimiters,
549                        std::vector<STR>* tokens) {
550  tokens->clear();
551
552  size_t start = str.find_first_not_of(delimiters);
553  while (start != STR::npos) {
554    size_t end = str.find_first_of(delimiters, start + 1);
555    if (end == STR::npos) {
556      tokens->push_back(str.substr(start));
557      break;
558    } else {
559      tokens->push_back(str.substr(start, end - start));
560      start = str.find_first_not_of(delimiters, end + 1);
561    }
562  }
563
564  return tokens->size();
565}
566
567size_t Tokenize(const string16& str,
568                const string16& delimiters,
569                std::vector<string16>* tokens) {
570  return TokenizeT(str, delimiters, tokens);
571}
572
573size_t Tokenize(const std::string& str,
574                const std::string& delimiters,
575                std::vector<std::string>* tokens) {
576  return TokenizeT(str, delimiters, tokens);
577}
578
579size_t Tokenize(const base::StringPiece& str,
580                const base::StringPiece& delimiters,
581                std::vector<base::StringPiece>* tokens) {
582  return TokenizeT(str, delimiters, tokens);
583}
584
585template<typename STR>
586static STR JoinStringT(const std::vector<STR>& parts, const STR& sep) {
587  if (parts.empty())
588    return STR();
589
590  STR result(parts[0]);
591  typename std::vector<STR>::const_iterator iter = parts.begin();
592  ++iter;
593
594  for (; iter != parts.end(); ++iter) {
595    result += sep;
596    result += *iter;
597  }
598
599  return result;
600}
601
602std::string JoinString(const std::vector<std::string>& parts, char sep) {
603  return JoinStringT(parts, std::string(1, sep));
604}
605
606string16 JoinString(const std::vector<string16>& parts, char16 sep) {
607  return JoinStringT(parts, string16(1, sep));
608}
609
610std::string JoinString(const std::vector<std::string>& parts,
611                       const std::string& separator) {
612  return JoinStringT(parts, separator);
613}
614
615string16 JoinString(const std::vector<string16>& parts,
616                    const string16& separator) {
617  return JoinStringT(parts, separator);
618}
619
620template<class FormatStringType, class OutStringType>
621OutStringType DoReplaceStringPlaceholders(const FormatStringType& format_string,
622    const std::vector<OutStringType>& subst, std::vector<size_t>* offsets) {
623  size_t substitutions = subst.size();
624
625  size_t sub_length = 0;
626  for (typename std::vector<OutStringType>::const_iterator iter = subst.begin();
627       iter != subst.end(); ++iter) {
628    sub_length += iter->length();
629  }
630
631  OutStringType formatted;
632  formatted.reserve(format_string.length() + sub_length);
633
634  std::vector<ReplacementOffset> r_offsets;
635  for (typename FormatStringType::const_iterator i = format_string.begin();
636       i != format_string.end(); ++i) {
637    if ('$' == *i) {
638      if (i + 1 != format_string.end()) {
639        ++i;
640        DCHECK('$' == *i || '1' <= *i) << "Invalid placeholder: " << *i;
641        if ('$' == *i) {
642          while (i != format_string.end() && '$' == *i) {
643            formatted.push_back('$');
644            ++i;
645          }
646          --i;
647        } else {
648          uintptr_t index = 0;
649          while (i != format_string.end() && '0' <= *i && *i <= '9') {
650            index *= 10;
651            index += *i - '0';
652            ++i;
653          }
654          --i;
655          index -= 1;
656          if (offsets) {
657            ReplacementOffset r_offset(index,
658                static_cast<int>(formatted.size()));
659            r_offsets.insert(std::lower_bound(r_offsets.begin(),
660                                              r_offsets.end(),
661                                              r_offset,
662                                              &CompareParameter),
663                             r_offset);
664          }
665          if (index < substitutions)
666            formatted.append(subst.at(index));
667        }
668      }
669    } else {
670      formatted.push_back(*i);
671    }
672  }
673  if (offsets) {
674    for (std::vector<ReplacementOffset>::const_iterator i = r_offsets.begin();
675         i != r_offsets.end(); ++i) {
676      offsets->push_back(i->offset);
677    }
678  }
679  return formatted;
680}
681
682string16 ReplaceStringPlaceholders(const string16& format_string,
683                                   const std::vector<string16>& subst,
684                                   std::vector<size_t>* offsets) {
685  return DoReplaceStringPlaceholders(format_string, subst, offsets);
686}
687
688std::string ReplaceStringPlaceholders(const base::StringPiece& format_string,
689                                      const std::vector<std::string>& subst,
690                                      std::vector<size_t>* offsets) {
691  return DoReplaceStringPlaceholders(format_string, subst, offsets);
692}
693
694string16 ReplaceStringPlaceholders(const string16& format_string,
695                                   const string16& a,
696                                   size_t* offset) {
697  std::vector<size_t> offsets;
698  std::vector<string16> subst;
699  subst.push_back(a);
700  string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets);
701
702  DCHECK_EQ(1U, offsets.size());
703  if (offset)
704    *offset = offsets[0];
705  return result;
706}
707
708static bool IsWildcard(base_icu::UChar32 character) {
709  return character == '*' || character == '?';
710}
711
712// Move the strings pointers to the point where they start to differ.
713template <typename CHAR, typename NEXT>
714static void EatSameChars(const CHAR** pattern, const CHAR* pattern_end,
715                         const CHAR** string, const CHAR* string_end,
716                         NEXT next) {
717  const CHAR* escape = NULL;
718  while (*pattern != pattern_end && *string != string_end) {
719    if (!escape && IsWildcard(**pattern)) {
720      // We don't want to match wildcard here, except if it's escaped.
721      return;
722    }
723
724    // Check if the escapement char is found. If so, skip it and move to the
725    // next character.
726    if (!escape && **pattern == '\\') {
727      escape = *pattern;
728      next(pattern, pattern_end);
729      continue;
730    }
731
732    // Check if the chars match, if so, increment the ptrs.
733    const CHAR* pattern_next = *pattern;
734    const CHAR* string_next = *string;
735    base_icu::UChar32 pattern_char = next(&pattern_next, pattern_end);
736    if (pattern_char == next(&string_next, string_end) &&
737        pattern_char != CBU_SENTINEL) {
738      *pattern = pattern_next;
739      *string = string_next;
740    } else {
741      // Uh oh, it did not match, we are done. If the last char was an
742      // escapement, that means that it was an error to advance the ptr here,
743      // let's put it back where it was. This also mean that the MatchPattern
744      // function will return false because if we can't match an escape char
745      // here, then no one will.
746      if (escape) {
747        *pattern = escape;
748      }
749      return;
750    }
751
752    escape = NULL;
753  }
754}
755
756template <typename CHAR, typename NEXT>
757static void EatWildcard(const CHAR** pattern, const CHAR* end, NEXT next) {
758  while (*pattern != end) {
759    if (!IsWildcard(**pattern))
760      return;
761    next(pattern, end);
762  }
763}
764
765template <typename CHAR, typename NEXT>
766static bool MatchPatternT(const CHAR* eval, const CHAR* eval_end,
767                          const CHAR* pattern, const CHAR* pattern_end,
768                          int depth,
769                          NEXT next) {
770  const int kMaxDepth = 16;
771  if (depth > kMaxDepth)
772    return false;
773
774  // Eat all the matching chars.
775  EatSameChars(&pattern, pattern_end, &eval, eval_end, next);
776
777  // If the string is empty, then the pattern must be empty too, or contains
778  // only wildcards.
779  if (eval == eval_end) {
780    EatWildcard(&pattern, pattern_end, next);
781    return pattern == pattern_end;
782  }
783
784  // Pattern is empty but not string, this is not a match.
785  if (pattern == pattern_end)
786    return false;
787
788  // If this is a question mark, then we need to compare the rest with
789  // the current string or the string with one character eaten.
790  const CHAR* next_pattern = pattern;
791  next(&next_pattern, pattern_end);
792  if (pattern[0] == '?') {
793    if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
794                      depth + 1, next))
795      return true;
796    const CHAR* next_eval = eval;
797    next(&next_eval, eval_end);
798    if (MatchPatternT(next_eval, eval_end, next_pattern, pattern_end,
799                      depth + 1, next))
800      return true;
801  }
802
803  // This is a *, try to match all the possible substrings with the remainder
804  // of the pattern.
805  if (pattern[0] == '*') {
806    // Collapse duplicate wild cards (********** into *) so that the
807    // method does not recurse unnecessarily. http://crbug.com/52839
808    EatWildcard(&next_pattern, pattern_end, next);
809
810    while (eval != eval_end) {
811      if (MatchPatternT(eval, eval_end, next_pattern, pattern_end,
812                        depth + 1, next))
813        return true;
814      eval++;
815    }
816
817    // We reached the end of the string, let see if the pattern contains only
818    // wildcards.
819    if (eval == eval_end) {
820      EatWildcard(&pattern, pattern_end, next);
821      if (pattern != pattern_end)
822        return false;
823      return true;
824    }
825  }
826
827  return false;
828}
829
830struct NextCharUTF8 {
831  base_icu::UChar32 operator()(const char** p, const char* end) {
832    base_icu::UChar32 c;
833    int offset = 0;
834    CBU8_NEXT(*p, offset, end - *p, c);
835    *p += offset;
836    return c;
837  }
838};
839
840struct NextCharUTF16 {
841  base_icu::UChar32 operator()(const char16** p, const char16* end) {
842    base_icu::UChar32 c;
843    int offset = 0;
844    CBU16_NEXT(*p, offset, end - *p, c);
845    *p += offset;
846    return c;
847  }
848};
849
850bool MatchPattern(const base::StringPiece& eval,
851                  const base::StringPiece& pattern) {
852  return MatchPatternT(eval.data(), eval.data() + eval.size(),
853                       pattern.data(), pattern.data() + pattern.size(),
854                       0, NextCharUTF8());
855}
856
857bool MatchPattern(const string16& eval, const string16& pattern) {
858  return MatchPatternT(eval.c_str(), eval.c_str() + eval.size(),
859                       pattern.c_str(), pattern.c_str() + pattern.size(),
860                       0, NextCharUTF16());
861}
862
863// The following code is compatible with the OpenBSD lcpy interface.  See:
864//   http://www.gratisoft.us/todd/papers/strlcpy.html
865//   ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c
866
867namespace {
868
869template <typename CHAR>
870size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) {
871  for (size_t i = 0; i < dst_size; ++i) {
872    if ((dst[i] = src[i]) == 0)  // We hit and copied the terminating NULL.
873      return i;
874  }
875
876  // We were left off at dst_size.  We over copied 1 byte.  Null terminate.
877  if (dst_size != 0)
878    dst[dst_size - 1] = 0;
879
880  // Count the rest of the |src|, and return it's length in characters.
881  while (src[dst_size]) ++dst_size;
882  return dst_size;
883}
884
885}  // namespace
886
887size_t base::strlcpy(char* dst, const char* src, size_t dst_size) {
888  return lcpyT<char>(dst, src, dst_size);
889}
890size_t base::wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) {
891  return lcpyT<wchar_t>(dst, src, dst_size);
892}
893