1// Copyright 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#include "base/strings/string_util.h"
6
7#include <ctype.h>
8#include <errno.h>
9#include <math.h>
10#include <stdarg.h>
11#include <stdint.h>
12#include <stdio.h>
13#include <stdlib.h>
14#include <string.h>
15#include <time.h>
16#include <wchar.h>
17#include <wctype.h>
18
19#include <algorithm>
20#include <limits>
21#include <vector>
22
23#include "base/logging.h"
24#include "base/macros.h"
25#include "base/memory/singleton.h"
26#include "base/strings/string_split.h"
27#include "base/strings/utf_string_conversion_utils.h"
28#include "base/strings/utf_string_conversions.h"
29#include "base/third_party/icu/icu_utf.h"
30#include "build/build_config.h"
31
32namespace base {
33
34namespace {
35
36// Force the singleton used by EmptyString[16] to be a unique type. This
37// prevents other code that might accidentally use Singleton<string> from
38// getting our internal one.
39struct EmptyStrings {
40  EmptyStrings() {}
41  const std::string s;
42  const string16 s16;
43
44  static EmptyStrings* GetInstance() {
45    return Singleton<EmptyStrings>::get();
46  }
47};
48
49// Used by ReplaceStringPlaceholders to track the position in the string of
50// replaced parameters.
51struct ReplacementOffset {
52  ReplacementOffset(uintptr_t parameter, size_t offset)
53      : parameter(parameter),
54        offset(offset) {}
55
56  // Index of the parameter.
57  uintptr_t parameter;
58
59  // Starting position in the string.
60  size_t offset;
61};
62
63static bool CompareParameter(const ReplacementOffset& elem1,
64                             const ReplacementOffset& elem2) {
65  return elem1.parameter < elem2.parameter;
66}
67
68// Assuming that a pointer is the size of a "machine word", then
69// uintptr_t is an integer type that is also a machine word.
70typedef uintptr_t MachineWord;
71const uintptr_t kMachineWordAlignmentMask = sizeof(MachineWord) - 1;
72
73inline bool IsAlignedToMachineWord(const void* pointer) {
74  return !(reinterpret_cast<MachineWord>(pointer) & kMachineWordAlignmentMask);
75}
76
77template<typename T> inline T* AlignToMachineWord(T* pointer) {
78  return reinterpret_cast<T*>(reinterpret_cast<MachineWord>(pointer) &
79                              ~kMachineWordAlignmentMask);
80}
81
82template<size_t size, typename CharacterType> struct NonASCIIMask;
83template<> struct NonASCIIMask<4, char16> {
84    static inline uint32_t value() { return 0xFF80FF80U; }
85};
86template<> struct NonASCIIMask<4, char> {
87    static inline uint32_t value() { return 0x80808080U; }
88};
89template<> struct NonASCIIMask<8, char16> {
90    static inline uint64_t value() { return 0xFF80FF80FF80FF80ULL; }
91};
92template<> struct NonASCIIMask<8, char> {
93    static inline uint64_t value() { return 0x8080808080808080ULL; }
94};
95#if defined(WCHAR_T_IS_UTF32)
96template<> struct NonASCIIMask<4, wchar_t> {
97    static inline uint32_t value() { return 0xFFFFFF80U; }
98};
99template<> struct NonASCIIMask<8, wchar_t> {
100    static inline uint64_t value() { return 0xFFFFFF80FFFFFF80ULL; }
101};
102#endif  // WCHAR_T_IS_UTF32
103
104}  // namespace
105
106bool IsWprintfFormatPortable(const wchar_t* format) {
107  for (const wchar_t* position = format; *position != '\0'; ++position) {
108    if (*position == '%') {
109      bool in_specification = true;
110      bool modifier_l = false;
111      while (in_specification) {
112        // Eat up characters until reaching a known specifier.
113        if (*++position == '\0') {
114          // The format string ended in the middle of a specification.  Call
115          // it portable because no unportable specifications were found.  The
116          // string is equally broken on all platforms.
117          return true;
118        }
119
120        if (*position == 'l') {
121          // 'l' is the only thing that can save the 's' and 'c' specifiers.
122          modifier_l = true;
123        } else if (((*position == 's' || *position == 'c') && !modifier_l) ||
124                   *position == 'S' || *position == 'C' || *position == 'F' ||
125                   *position == 'D' || *position == 'O' || *position == 'U') {
126          // Not portable.
127          return false;
128        }
129
130        if (wcschr(L"diouxXeEfgGaAcspn%", *position)) {
131          // Portable, keep scanning the rest of the format string.
132          in_specification = false;
133        }
134      }
135    }
136  }
137
138  return true;
139}
140
141namespace {
142
143template<typename StringType>
144StringType ToLowerASCIIImpl(BasicStringPiece<StringType> str) {
145  StringType ret;
146  ret.reserve(str.size());
147  for (size_t i = 0; i < str.size(); i++)
148    ret.push_back(ToLowerASCII(str[i]));
149  return ret;
150}
151
152template<typename StringType>
153StringType ToUpperASCIIImpl(BasicStringPiece<StringType> str) {
154  StringType ret;
155  ret.reserve(str.size());
156  for (size_t i = 0; i < str.size(); i++)
157    ret.push_back(ToUpperASCII(str[i]));
158  return ret;
159}
160
161}  // namespace
162
163std::string ToLowerASCII(StringPiece str) {
164  return ToLowerASCIIImpl<std::string>(str);
165}
166
167string16 ToLowerASCII(StringPiece16 str) {
168  return ToLowerASCIIImpl<string16>(str);
169}
170
171std::string ToUpperASCII(StringPiece str) {
172  return ToUpperASCIIImpl<std::string>(str);
173}
174
175string16 ToUpperASCII(StringPiece16 str) {
176  return ToUpperASCIIImpl<string16>(str);
177}
178
179template<class StringType>
180int CompareCaseInsensitiveASCIIT(BasicStringPiece<StringType> a,
181                                 BasicStringPiece<StringType> b) {
182  // Find the first characters that aren't equal and compare them.  If the end
183  // of one of the strings is found before a nonequal character, the lengths
184  // of the strings are compared.
185  size_t i = 0;
186  while (i < a.length() && i < b.length()) {
187    typename StringType::value_type lower_a = ToLowerASCII(a[i]);
188    typename StringType::value_type lower_b = ToLowerASCII(b[i]);
189    if (lower_a < lower_b)
190      return -1;
191    if (lower_a > lower_b)
192      return 1;
193    i++;
194  }
195
196  // End of one string hit before finding a different character. Expect the
197  // common case to be "strings equal" at this point so check that first.
198  if (a.length() == b.length())
199    return 0;
200
201  if (a.length() < b.length())
202    return -1;
203  return 1;
204}
205
206int CompareCaseInsensitiveASCII(StringPiece a, StringPiece b) {
207  return CompareCaseInsensitiveASCIIT<std::string>(a, b);
208}
209
210int CompareCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b) {
211  return CompareCaseInsensitiveASCIIT<string16>(a, b);
212}
213
214bool EqualsCaseInsensitiveASCII(StringPiece a, StringPiece b) {
215  if (a.length() != b.length())
216    return false;
217  return CompareCaseInsensitiveASCIIT<std::string>(a, b) == 0;
218}
219
220bool EqualsCaseInsensitiveASCII(StringPiece16 a, StringPiece16 b) {
221  if (a.length() != b.length())
222    return false;
223  return CompareCaseInsensitiveASCIIT<string16>(a, b) == 0;
224}
225
226const std::string& EmptyString() {
227  return EmptyStrings::GetInstance()->s;
228}
229
230const string16& EmptyString16() {
231  return EmptyStrings::GetInstance()->s16;
232}
233
234template<typename STR>
235bool ReplaceCharsT(const STR& input,
236                   const STR& replace_chars,
237                   const STR& replace_with,
238                   STR* output) {
239  bool removed = false;
240  size_t replace_length = replace_with.length();
241
242  *output = input;
243
244  size_t found = output->find_first_of(replace_chars);
245  while (found != STR::npos) {
246    removed = true;
247    output->replace(found, 1, replace_with);
248    found = output->find_first_of(replace_chars, found + replace_length);
249  }
250
251  return removed;
252}
253
254bool ReplaceChars(const string16& input,
255                  const StringPiece16& replace_chars,
256                  const string16& replace_with,
257                  string16* output) {
258  return ReplaceCharsT(input, replace_chars.as_string(), replace_with, output);
259}
260
261bool ReplaceChars(const std::string& input,
262                  const StringPiece& replace_chars,
263                  const std::string& replace_with,
264                  std::string* output) {
265  return ReplaceCharsT(input, replace_chars.as_string(), replace_with, output);
266}
267
268bool RemoveChars(const string16& input,
269                 const StringPiece16& remove_chars,
270                 string16* output) {
271  return ReplaceChars(input, remove_chars.as_string(), string16(), output);
272}
273
274bool RemoveChars(const std::string& input,
275                 const StringPiece& remove_chars,
276                 std::string* output) {
277  return ReplaceChars(input, remove_chars.as_string(), std::string(), output);
278}
279
280template<typename Str>
281TrimPositions TrimStringT(const Str& input,
282                          BasicStringPiece<Str> trim_chars,
283                          TrimPositions positions,
284                          Str* output) {
285  // Find the edges of leading/trailing whitespace as desired. Need to use
286  // a StringPiece version of input to be able to call find* on it with the
287  // StringPiece version of trim_chars (normally the trim_chars will be a
288  // constant so avoid making a copy).
289  BasicStringPiece<Str> input_piece(input);
290  const size_t last_char = input.length() - 1;
291  const size_t first_good_char = (positions & TRIM_LEADING) ?
292      input_piece.find_first_not_of(trim_chars) : 0;
293  const size_t last_good_char = (positions & TRIM_TRAILING) ?
294      input_piece.find_last_not_of(trim_chars) : last_char;
295
296  // When the string was all trimmed, report that we stripped off characters
297  // from whichever position the caller was interested in. For empty input, we
298  // stripped no characters, but we still need to clear |output|.
299  if (input.empty() ||
300      (first_good_char == Str::npos) || (last_good_char == Str::npos)) {
301    bool input_was_empty = input.empty();  // in case output == &input
302    output->clear();
303    return input_was_empty ? TRIM_NONE : positions;
304  }
305
306  // Trim.
307  *output =
308      input.substr(first_good_char, last_good_char - first_good_char + 1);
309
310  // Return where we trimmed from.
311  return static_cast<TrimPositions>(
312      ((first_good_char == 0) ? TRIM_NONE : TRIM_LEADING) |
313      ((last_good_char == last_char) ? TRIM_NONE : TRIM_TRAILING));
314}
315
316bool TrimString(const string16& input,
317                StringPiece16 trim_chars,
318                string16* output) {
319  return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
320}
321
322bool TrimString(const std::string& input,
323                StringPiece trim_chars,
324                std::string* output) {
325  return TrimStringT(input, trim_chars, TRIM_ALL, output) != TRIM_NONE;
326}
327
328template<typename Str>
329BasicStringPiece<Str> TrimStringPieceT(BasicStringPiece<Str> input,
330                                       BasicStringPiece<Str> trim_chars,
331                                       TrimPositions positions) {
332  size_t begin = (positions & TRIM_LEADING) ?
333      input.find_first_not_of(trim_chars) : 0;
334  size_t end = (positions & TRIM_TRAILING) ?
335      input.find_last_not_of(trim_chars) + 1 : input.size();
336  return input.substr(begin, end - begin);
337}
338
339StringPiece16 TrimString(StringPiece16 input,
340                         const StringPiece16& trim_chars,
341                         TrimPositions positions) {
342  return TrimStringPieceT(input, trim_chars, positions);
343}
344
345StringPiece TrimString(StringPiece input,
346                       const StringPiece& trim_chars,
347                       TrimPositions positions) {
348  return TrimStringPieceT(input, trim_chars, positions);
349}
350
351void TruncateUTF8ToByteSize(const std::string& input,
352                            const size_t byte_size,
353                            std::string* output) {
354  DCHECK(output);
355  if (byte_size > input.length()) {
356    *output = input;
357    return;
358  }
359  DCHECK_LE(byte_size,
360            static_cast<uint32_t>(std::numeric_limits<int32_t>::max()));
361  // Note: This cast is necessary because CBU8_NEXT uses int32_ts.
362  int32_t truncation_length = static_cast<int32_t>(byte_size);
363  int32_t char_index = truncation_length - 1;
364  const char* data = input.data();
365
366  // Using CBU8, we will move backwards from the truncation point
367  // to the beginning of the string looking for a valid UTF8
368  // character.  Once a full UTF8 character is found, we will
369  // truncate the string to the end of that character.
370  while (char_index >= 0) {
371    int32_t prev = char_index;
372    base_icu::UChar32 code_point = 0;
373    CBU8_NEXT(data, char_index, truncation_length, code_point);
374    if (!IsValidCharacter(code_point) ||
375        !IsValidCodepoint(code_point)) {
376      char_index = prev - 1;
377    } else {
378      break;
379    }
380  }
381
382  if (char_index >= 0 )
383    *output = input.substr(0, char_index);
384  else
385    output->clear();
386}
387
388TrimPositions TrimWhitespace(const string16& input,
389                             TrimPositions positions,
390                             string16* output) {
391  return TrimStringT(input, StringPiece16(kWhitespaceUTF16), positions, output);
392}
393
394StringPiece16 TrimWhitespace(StringPiece16 input,
395                             TrimPositions positions) {
396  return TrimStringPieceT(input, StringPiece16(kWhitespaceUTF16), positions);
397}
398
399TrimPositions TrimWhitespaceASCII(const std::string& input,
400                                  TrimPositions positions,
401                                  std::string* output) {
402  return TrimStringT(input, StringPiece(kWhitespaceASCII), positions, output);
403}
404
405StringPiece TrimWhitespaceASCII(StringPiece input, TrimPositions positions) {
406  return TrimStringPieceT(input, StringPiece(kWhitespaceASCII), positions);
407}
408
409template<typename STR>
410STR CollapseWhitespaceT(const STR& text,
411                        bool trim_sequences_with_line_breaks) {
412  STR result;
413  result.resize(text.size());
414
415  // Set flags to pretend we're already in a trimmed whitespace sequence, so we
416  // will trim any leading whitespace.
417  bool in_whitespace = true;
418  bool already_trimmed = true;
419
420  int chars_written = 0;
421  for (typename STR::const_iterator i(text.begin()); i != text.end(); ++i) {
422    if (IsUnicodeWhitespace(*i)) {
423      if (!in_whitespace) {
424        // Reduce all whitespace sequences to a single space.
425        in_whitespace = true;
426        result[chars_written++] = L' ';
427      }
428      if (trim_sequences_with_line_breaks && !already_trimmed &&
429          ((*i == '\n') || (*i == '\r'))) {
430        // Whitespace sequences containing CR or LF are eliminated entirely.
431        already_trimmed = true;
432        --chars_written;
433      }
434    } else {
435      // Non-whitespace chracters are copied straight across.
436      in_whitespace = false;
437      already_trimmed = false;
438      result[chars_written++] = *i;
439    }
440  }
441
442  if (in_whitespace && !already_trimmed) {
443    // Any trailing whitespace is eliminated.
444    --chars_written;
445  }
446
447  result.resize(chars_written);
448  return result;
449}
450
451string16 CollapseWhitespace(const string16& text,
452                            bool trim_sequences_with_line_breaks) {
453  return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
454}
455
456std::string CollapseWhitespaceASCII(const std::string& text,
457                                    bool trim_sequences_with_line_breaks) {
458  return CollapseWhitespaceT(text, trim_sequences_with_line_breaks);
459}
460
461bool ContainsOnlyChars(const StringPiece& input,
462                       const StringPiece& characters) {
463  return input.find_first_not_of(characters) == StringPiece::npos;
464}
465
466bool ContainsOnlyChars(const StringPiece16& input,
467                       const StringPiece16& characters) {
468  return input.find_first_not_of(characters) == StringPiece16::npos;
469}
470
471template <class Char>
472inline bool DoIsStringASCII(const Char* characters, size_t length) {
473  MachineWord all_char_bits = 0;
474  const Char* end = characters + length;
475
476  // Prologue: align the input.
477  while (!IsAlignedToMachineWord(characters) && characters != end) {
478    all_char_bits |= *characters;
479    ++characters;
480  }
481
482  // Compare the values of CPU word size.
483  const Char* word_end = AlignToMachineWord(end);
484  const size_t loop_increment = sizeof(MachineWord) / sizeof(Char);
485  while (characters < word_end) {
486    all_char_bits |= *(reinterpret_cast<const MachineWord*>(characters));
487    characters += loop_increment;
488  }
489
490  // Process the remaining bytes.
491  while (characters != end) {
492    all_char_bits |= *characters;
493    ++characters;
494  }
495
496  MachineWord non_ascii_bit_mask =
497      NonASCIIMask<sizeof(MachineWord), Char>::value();
498  return !(all_char_bits & non_ascii_bit_mask);
499}
500
501bool IsStringASCII(const StringPiece& str) {
502  return DoIsStringASCII(str.data(), str.length());
503}
504
505bool IsStringASCII(const StringPiece16& str) {
506  return DoIsStringASCII(str.data(), str.length());
507}
508
509bool IsStringASCII(const string16& str) {
510  return DoIsStringASCII(str.data(), str.length());
511}
512
513#if defined(WCHAR_T_IS_UTF32)
514bool IsStringASCII(const std::wstring& str) {
515  return DoIsStringASCII(str.data(), str.length());
516}
517#endif
518
519bool IsStringUTF8(const StringPiece& str) {
520  const char *src = str.data();
521  int32_t src_len = static_cast<int32_t>(str.length());
522  int32_t char_index = 0;
523
524  while (char_index < src_len) {
525    int32_t code_point;
526    CBU8_NEXT(src, char_index, src_len, code_point);
527    if (!IsValidCharacter(code_point))
528      return false;
529  }
530  return true;
531}
532
533// Implementation note: Normally this function will be called with a hardcoded
534// constant for the lowercase_ascii parameter. Constructing a StringPiece from
535// a C constant requires running strlen, so the result will be two passes
536// through the buffers, one to file the length of lowercase_ascii, and one to
537// compare each letter.
538//
539// This function could have taken a const char* to avoid this and only do one
540// pass through the string. But the strlen is faster than the case-insensitive
541// compares and lets us early-exit in the case that the strings are different
542// lengths (will often be the case for non-matches). So whether one approach or
543// the other will be faster depends on the case.
544//
545// The hardcoded strings are typically very short so it doesn't matter, and the
546// string piece gives additional flexibility for the caller (doesn't have to be
547// null terminated) so we choose the StringPiece route.
548template<typename Str>
549static inline bool DoLowerCaseEqualsASCII(BasicStringPiece<Str> str,
550                                          StringPiece lowercase_ascii) {
551  if (str.size() != lowercase_ascii.size())
552    return false;
553  for (size_t i = 0; i < str.size(); i++) {
554    if (ToLowerASCII(str[i]) != lowercase_ascii[i])
555      return false;
556  }
557  return true;
558}
559
560bool LowerCaseEqualsASCII(StringPiece str, StringPiece lowercase_ascii) {
561  return DoLowerCaseEqualsASCII<std::string>(str, lowercase_ascii);
562}
563
564bool LowerCaseEqualsASCII(StringPiece16 str, StringPiece lowercase_ascii) {
565  return DoLowerCaseEqualsASCII<string16>(str, lowercase_ascii);
566}
567
568bool EqualsASCII(StringPiece16 str, StringPiece ascii) {
569  if (str.length() != ascii.length())
570    return false;
571  return std::equal(ascii.begin(), ascii.end(), str.begin());
572}
573
574template<typename Str>
575bool StartsWithT(BasicStringPiece<Str> str,
576                 BasicStringPiece<Str> search_for,
577                 CompareCase case_sensitivity) {
578  if (search_for.size() > str.size())
579    return false;
580
581  BasicStringPiece<Str> source = str.substr(0, search_for.size());
582
583  switch (case_sensitivity) {
584    case CompareCase::SENSITIVE:
585      return source == search_for;
586
587    case CompareCase::INSENSITIVE_ASCII:
588      return std::equal(
589          search_for.begin(), search_for.end(),
590          source.begin(),
591          CaseInsensitiveCompareASCII<typename Str::value_type>());
592
593    default:
594      NOTREACHED();
595      return false;
596  }
597}
598
599bool StartsWith(StringPiece str,
600                StringPiece search_for,
601                CompareCase case_sensitivity) {
602  return StartsWithT<std::string>(str, search_for, case_sensitivity);
603}
604
605bool StartsWith(StringPiece16 str,
606                StringPiece16 search_for,
607                CompareCase case_sensitivity) {
608  return StartsWithT<string16>(str, search_for, case_sensitivity);
609}
610
611template <typename Str>
612bool EndsWithT(BasicStringPiece<Str> str,
613               BasicStringPiece<Str> search_for,
614               CompareCase case_sensitivity) {
615  if (search_for.size() > str.size())
616    return false;
617
618  BasicStringPiece<Str> source = str.substr(str.size() - search_for.size(),
619                                            search_for.size());
620
621  switch (case_sensitivity) {
622    case CompareCase::SENSITIVE:
623      return source == search_for;
624
625    case CompareCase::INSENSITIVE_ASCII:
626      return std::equal(
627          source.begin(), source.end(),
628          search_for.begin(),
629          CaseInsensitiveCompareASCII<typename Str::value_type>());
630
631    default:
632      NOTREACHED();
633      return false;
634  }
635}
636
637bool EndsWith(StringPiece str,
638              StringPiece search_for,
639              CompareCase case_sensitivity) {
640  return EndsWithT<std::string>(str, search_for, case_sensitivity);
641}
642
643bool EndsWith(StringPiece16 str,
644              StringPiece16 search_for,
645              CompareCase case_sensitivity) {
646  return EndsWithT<string16>(str, search_for, case_sensitivity);
647}
648
649char HexDigitToInt(wchar_t c) {
650  DCHECK(IsHexDigit(c));
651  if (c >= '0' && c <= '9')
652    return static_cast<char>(c - '0');
653  if (c >= 'A' && c <= 'F')
654    return static_cast<char>(c - 'A' + 10);
655  if (c >= 'a' && c <= 'f')
656    return static_cast<char>(c - 'a' + 10);
657  return 0;
658}
659
660bool IsUnicodeWhitespace(wchar_t c) {
661  // kWhitespaceWide is a NULL-terminated string
662  for (const wchar_t* cur = kWhitespaceWide; *cur; ++cur) {
663    if (*cur == c)
664      return true;
665  }
666  return false;
667}
668
669static const char* const kByteStringsUnlocalized[] = {
670  " B",
671  " kB",
672  " MB",
673  " GB",
674  " TB",
675  " PB"
676};
677
678string16 FormatBytesUnlocalized(int64_t bytes) {
679  double unit_amount = static_cast<double>(bytes);
680  size_t dimension = 0;
681  const int kKilo = 1024;
682  while (unit_amount >= kKilo &&
683         dimension < arraysize(kByteStringsUnlocalized) - 1) {
684    unit_amount /= kKilo;
685    dimension++;
686  }
687
688  char buf[64];
689  if (bytes != 0 && dimension > 0 && unit_amount < 100) {
690    base::snprintf(buf, arraysize(buf), "%.1lf%s", unit_amount,
691                   kByteStringsUnlocalized[dimension]);
692  } else {
693    base::snprintf(buf, arraysize(buf), "%.0lf%s", unit_amount,
694                   kByteStringsUnlocalized[dimension]);
695  }
696
697  return ASCIIToUTF16(buf);
698}
699
700// Runs in O(n) time in the length of |str|.
701template<class StringType>
702void DoReplaceSubstringsAfterOffset(StringType* str,
703                                    size_t offset,
704                                    BasicStringPiece<StringType> find_this,
705                                    BasicStringPiece<StringType> replace_with,
706                                    bool replace_all) {
707  DCHECK(!find_this.empty());
708
709  // If the find string doesn't appear, there's nothing to do.
710  offset = str->find(find_this.data(), offset, find_this.size());
711  if (offset == StringType::npos)
712    return;
713
714  // If we're only replacing one instance, there's no need to do anything
715  // complicated.
716  size_t find_length = find_this.length();
717  if (!replace_all) {
718    str->replace(offset, find_length, replace_with.data(), replace_with.size());
719    return;
720  }
721
722  // If the find and replace strings are the same length, we can simply use
723  // replace() on each instance, and finish the entire operation in O(n) time.
724  size_t replace_length = replace_with.length();
725  if (find_length == replace_length) {
726    do {
727      str->replace(offset, find_length,
728                   replace_with.data(), replace_with.size());
729      offset = str->find(find_this.data(), offset + replace_length,
730                         find_this.size());
731    } while (offset != StringType::npos);
732    return;
733  }
734
735  // Since the find and replace strings aren't the same length, a loop like the
736  // one above would be O(n^2) in the worst case, as replace() will shift the
737  // entire remaining string each time.  We need to be more clever to keep
738  // things O(n).
739  //
740  // If we're shortening the string, we can alternate replacements with shifting
741  // forward the intervening characters using memmove().
742  size_t str_length = str->length();
743  if (find_length > replace_length) {
744    size_t write_offset = offset;
745    do {
746      if (replace_length) {
747        str->replace(write_offset, replace_length,
748                     replace_with.data(), replace_with.size());
749        write_offset += replace_length;
750      }
751      size_t read_offset = offset + find_length;
752      offset = std::min(
753          str->find(find_this.data(), read_offset, find_this.size()),
754          str_length);
755      size_t length = offset - read_offset;
756      if (length) {
757        memmove(&(*str)[write_offset], &(*str)[read_offset],
758                length * sizeof(typename StringType::value_type));
759        write_offset += length;
760      }
761    } while (offset < str_length);
762    str->resize(write_offset);
763    return;
764  }
765
766  // We're lengthening the string.  We can use alternating replacements and
767  // memmove() calls like above, but we need to precalculate the final string
768  // length and then expand from back-to-front to avoid overwriting the string
769  // as we're reading it, needing to shift, or having to copy to a second string
770  // temporarily.
771  size_t first_match = offset;
772
773  // First, calculate the final length and resize the string.
774  size_t final_length = str_length;
775  size_t expansion = replace_length - find_length;
776  size_t current_match;
777  do {
778    final_length += expansion;
779    // Minor optimization: save this offset into |current_match|, so that on
780    // exit from the loop, |current_match| will point at the last instance of
781    // the find string, and we won't need to find() it again immediately.
782    current_match = offset;
783    offset = str->find(find_this.data(), offset + find_length,
784                       find_this.size());
785  } while (offset != StringType::npos);
786  str->resize(final_length);
787
788  // Now do the replacement loop, working backwards through the string.
789  for (size_t prev_match = str_length, write_offset = final_length; ;
790       current_match = str->rfind(find_this.data(), current_match - 1,
791                                  find_this.size())) {
792    size_t read_offset = current_match + find_length;
793    size_t length = prev_match - read_offset;
794    if (length) {
795      write_offset -= length;
796      memmove(&(*str)[write_offset], &(*str)[read_offset],
797              length * sizeof(typename StringType::value_type));
798    }
799    write_offset -= replace_length;
800    str->replace(write_offset, replace_length,
801                 replace_with.data(), replace_with.size());
802    if (current_match == first_match)
803      return;
804    prev_match = current_match;
805  }
806}
807
808void ReplaceFirstSubstringAfterOffset(string16* str,
809                                      size_t start_offset,
810                                      StringPiece16 find_this,
811                                      StringPiece16 replace_with) {
812  DoReplaceSubstringsAfterOffset<string16>(
813      str, start_offset, find_this, replace_with, false);  // Replace first.
814}
815
816void ReplaceFirstSubstringAfterOffset(std::string* str,
817                                      size_t start_offset,
818                                      StringPiece find_this,
819                                      StringPiece replace_with) {
820  DoReplaceSubstringsAfterOffset<std::string>(
821      str, start_offset, find_this, replace_with, false);  // Replace first.
822}
823
824void ReplaceSubstringsAfterOffset(string16* str,
825                                  size_t start_offset,
826                                  StringPiece16 find_this,
827                                  StringPiece16 replace_with) {
828  DoReplaceSubstringsAfterOffset<string16>(
829      str, start_offset, find_this, replace_with, true);  // Replace all.
830}
831
832void ReplaceSubstringsAfterOffset(std::string* str,
833                                  size_t start_offset,
834                                  StringPiece find_this,
835                                  StringPiece replace_with) {
836  DoReplaceSubstringsAfterOffset<std::string>(
837      str, start_offset, find_this, replace_with, true);  // Replace all.
838}
839
840template <class string_type>
841inline typename string_type::value_type* WriteIntoT(string_type* str,
842                                                    size_t length_with_null) {
843  DCHECK_GT(length_with_null, 1u);
844  str->reserve(length_with_null);
845  str->resize(length_with_null - 1);
846  return &((*str)[0]);
847}
848
849char* WriteInto(std::string* str, size_t length_with_null) {
850  return WriteIntoT(str, length_with_null);
851}
852
853char16* WriteInto(string16* str, size_t length_with_null) {
854  return WriteIntoT(str, length_with_null);
855}
856
857template<typename STR>
858static STR JoinStringT(const std::vector<STR>& parts,
859                       BasicStringPiece<STR> sep) {
860  if (parts.empty())
861    return STR();
862
863  STR result(parts[0]);
864  auto iter = parts.begin();
865  ++iter;
866
867  for (; iter != parts.end(); ++iter) {
868    sep.AppendToString(&result);
869    result += *iter;
870  }
871
872  return result;
873}
874
875std::string JoinString(const std::vector<std::string>& parts,
876                       StringPiece separator) {
877  return JoinStringT(parts, separator);
878}
879
880string16 JoinString(const std::vector<string16>& parts,
881                    StringPiece16 separator) {
882  return JoinStringT(parts, separator);
883}
884
885template<class FormatStringType, class OutStringType>
886OutStringType DoReplaceStringPlaceholders(
887    const FormatStringType& format_string,
888    const std::vector<OutStringType>& subst,
889    std::vector<size_t>* offsets) {
890  size_t substitutions = subst.size();
891
892  size_t sub_length = 0;
893  for (const auto& cur : subst)
894    sub_length += cur.length();
895
896  OutStringType formatted;
897  formatted.reserve(format_string.length() + sub_length);
898
899  std::vector<ReplacementOffset> r_offsets;
900  for (auto i = format_string.begin(); i != format_string.end(); ++i) {
901    if ('$' == *i) {
902      if (i + 1 != format_string.end()) {
903        ++i;
904        DCHECK('$' == *i || '1' <= *i) << "Invalid placeholder: " << *i;
905        if ('$' == *i) {
906          while (i != format_string.end() && '$' == *i) {
907            formatted.push_back('$');
908            ++i;
909          }
910          --i;
911        } else {
912          uintptr_t index = 0;
913          while (i != format_string.end() && '0' <= *i && *i <= '9') {
914            index *= 10;
915            index += *i - '0';
916            ++i;
917          }
918          --i;
919          index -= 1;
920          if (offsets) {
921            ReplacementOffset r_offset(index,
922                static_cast<int>(formatted.size()));
923            r_offsets.insert(std::lower_bound(r_offsets.begin(),
924                                              r_offsets.end(),
925                                              r_offset,
926                                              &CompareParameter),
927                             r_offset);
928          }
929          if (index < substitutions)
930            formatted.append(subst.at(index));
931        }
932      }
933    } else {
934      formatted.push_back(*i);
935    }
936  }
937  if (offsets) {
938    for (const auto& cur : r_offsets)
939      offsets->push_back(cur.offset);
940  }
941  return formatted;
942}
943
944string16 ReplaceStringPlaceholders(const string16& format_string,
945                                   const std::vector<string16>& subst,
946                                   std::vector<size_t>* offsets) {
947  return DoReplaceStringPlaceholders(format_string, subst, offsets);
948}
949
950std::string ReplaceStringPlaceholders(const StringPiece& format_string,
951                                      const std::vector<std::string>& subst,
952                                      std::vector<size_t>* offsets) {
953  return DoReplaceStringPlaceholders(format_string, subst, offsets);
954}
955
956string16 ReplaceStringPlaceholders(const string16& format_string,
957                                   const string16& a,
958                                   size_t* offset) {
959  std::vector<size_t> offsets;
960  std::vector<string16> subst;
961  subst.push_back(a);
962  string16 result = ReplaceStringPlaceholders(format_string, subst, &offsets);
963
964  DCHECK_EQ(1U, offsets.size());
965  if (offset)
966    *offset = offsets[0];
967  return result;
968}
969
970// The following code is compatible with the OpenBSD lcpy interface.  See:
971//   http://www.gratisoft.us/todd/papers/strlcpy.html
972//   ftp://ftp.openbsd.org/pub/OpenBSD/src/lib/libc/string/{wcs,str}lcpy.c
973
974namespace {
975
976template <typename CHAR>
977size_t lcpyT(CHAR* dst, const CHAR* src, size_t dst_size) {
978  for (size_t i = 0; i < dst_size; ++i) {
979    if ((dst[i] = src[i]) == 0)  // We hit and copied the terminating NULL.
980      return i;
981  }
982
983  // We were left off at dst_size.  We over copied 1 byte.  Null terminate.
984  if (dst_size != 0)
985    dst[dst_size - 1] = 0;
986
987  // Count the rest of the |src|, and return it's length in characters.
988  while (src[dst_size]) ++dst_size;
989  return dst_size;
990}
991
992}  // namespace
993
994size_t strlcpy(char* dst, const char* src, size_t dst_size) {
995  return lcpyT<char>(dst, src, dst_size);
996}
997size_t wcslcpy(wchar_t* dst, const wchar_t* src, size_t dst_size) {
998  return lcpyT<wchar_t>(dst, src, dst_size);
999}
1000
1001}  // namespace base
1002