1// Copyright 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4//
5// This file defines utility functions for working with strings.
6
7#ifndef BASE_STRINGS_STRING_UTIL_H_
8#define BASE_STRINGS_STRING_UTIL_H_
9
10#include <ctype.h>
11#include <stdarg.h>   // va_list
12#include <stddef.h>
13#include <stdint.h>
14
15#include <string>
16#include <vector>
17
18#include "base/compiler_specific.h"
19#include "base/strings/string_piece.h"  // For implicit conversions.
20#include "build/build_config.h"
21
22// On Android, bionic's stdio.h defines an snprintf macro when being built with
23// clang. Undefine it here so it won't collide with base::snprintf().
24#undef snprintf
25
26namespace base {
27
28// C standard-library functions that aren't cross-platform are provided as
29// "base::...", and their prototypes are listed below. These functions are
30// then implemented as inline calls to the platform-specific equivalents in the
31// platform-specific headers.
32
33// Wrapper for vsnprintf that always null-terminates and always returns the
34// number of characters that would be in an untruncated formatted
35// string, even when truncation occurs.
36int vsnprintf(char* buffer, size_t size, const char* format, va_list arguments)
37    PRINTF_FORMAT(3, 0);
38
39// Some of these implementations need to be inlined.
40
41// We separate the declaration from the implementation of this inline
42// function just so the PRINTF_FORMAT works.
43inline int snprintf(char* buffer,
44                    size_t size,
45                    _Printf_format_string_ const char* format,
46                    ...) PRINTF_FORMAT(3, 4);
47inline int snprintf(char* buffer,
48                    size_t size,
49                    _Printf_format_string_ const char* format,
50                    ...) {
51  va_list arguments;
52  va_start(arguments, format);
53  int result = vsnprintf(buffer, size, format, arguments);
54  va_end(arguments);
55  return result;
56}
57
58// BSD-style safe and consistent string copy functions.
59// Copies |src| to |dst|, where |dst_size| is the total allocated size of |dst|.
60// Copies at most |dst_size|-1 characters, and always NULL terminates |dst|, as
61// long as |dst_size| is not 0.  Returns the length of |src| in characters.
62// If the return value is >= dst_size, then the output was truncated.
63// NOTE: All sizes are in number of characters, NOT in bytes.
64size_t strlcpy(char* dst, const char* src, size_t dst_size);
65
66// ASCII-specific tolower.  The standard library's tolower is locale sensitive,
67// so we don't want to use it here.
68inline char ToLowerASCII(char c) {
69  return (c >= 'A' && c <= 'Z') ? (c + ('a' - 'A')) : c;
70}
71
72// ASCII-specific toupper.  The standard library's toupper is locale sensitive,
73// so we don't want to use it here.
74inline char ToUpperASCII(char c) {
75  return (c >= 'a' && c <= 'z') ? (c + ('A' - 'a')) : c;
76}
77// Converts the given string to it's ASCII-lowercase equivalent.
78std::string ToLowerASCII(StringPiece str);
79// Converts the given string to it's ASCII-uppercase equivalent.
80std::string ToUpperASCII(StringPiece str);
81
82// Functor for case-insensitive ASCII comparisons for STL algorithms like
83// std::search.
84//
85// Note that a full Unicode version of this functor is not possible to write
86// because case mappings might change the number of characters, depend on
87// context (combining accents), and require handling UTF-16. If you need
88// proper Unicode support, use base::i18n::ToLower/FoldCase and then just
89// use a normal operator== on the result.
90template<typename Char> struct CaseInsensitiveCompareASCII {
91 public:
92  bool operator()(Char x, Char y) const {
93    return ToLowerASCII(x) == ToLowerASCII(y);
94  }
95};
96
97// Like strcasecmp for case-insensitive ASCII characters only. Returns:
98//   -1  (a < b)
99//    0  (a == b)
100//    1  (a > b)
101// (unlike strcasecmp which can return values greater or less than 1/-1). For
102// full Unicode support, use base::i18n::ToLower or base::i18h::FoldCase
103// and then just call the normal string operators on the result.
104int CompareCaseInsensitiveASCII(StringPiece a, StringPiece b);
105
106// Equality for ASCII case-insensitive comparisons. For full Unicode support,
107// use base::i18n::ToLower or base::i18h::FoldCase and then compare with either
108// == or !=.
109bool EqualsCaseInsensitiveASCII(StringPiece a, StringPiece b);
110
111// Contains the set of characters representing whitespace in the corresponding
112// encoding. Null-terminated. The ASCII versions are the whitespaces as defined
113// by HTML5, and don't include control characters.
114extern const char kWhitespaceASCII[];
115
116// Replaces characters in |replace_chars| from anywhere in |input| with
117// |replace_with|.  Each character in |replace_chars| will be replaced with
118// the |replace_with| string.  Returns true if any characters were replaced.
119// |replace_chars| must be null-terminated.
120// NOTE: Safe to use the same variable for both |input| and |output|.
121bool ReplaceChars(const std::string& input,
122                  const StringPiece& replace_chars,
123                  const std::string& replace_with,
124                  std::string* output);
125
126enum TrimPositions {
127  TRIM_NONE     = 0,
128  TRIM_LEADING  = 1 << 0,
129  TRIM_TRAILING = 1 << 1,
130  TRIM_ALL      = TRIM_LEADING | TRIM_TRAILING,
131};
132
133// Removes characters in |trim_chars| from the beginning and end of |input|.
134// The 8-bit version only works on 8-bit characters, not UTF-8.
135//
136// It is safe to use the same variable for both |input| and |output| (this is
137// the normal usage to trim in-place).
138bool TrimString(const std::string& input,
139                StringPiece trim_chars,
140                std::string* output);
141
142// StringPiece versions of the above. The returned pieces refer to the original
143// buffer.
144StringPiece TrimString(StringPiece input,
145                       const StringPiece& trim_chars,
146                       TrimPositions positions);
147
148// Trims any whitespace from either end of the input string.
149//
150// The StringPiece versions return a substring referencing the input buffer.
151// The ASCII versions look only for ASCII whitespace.
152//
153// The std::string versions return where whitespace was found.
154// NOTE: Safe to use the same variable for both input and output.
155TrimPositions TrimWhitespaceASCII(const std::string& input,
156                                  TrimPositions positions,
157                                  std::string* output);
158
159// Returns true if the specified string matches the criteria. How can a wide
160// string be 8-bit or UTF8? It contains only characters that are < 256 (in the
161// first case) or characters that use only 8-bits and whose 8-bit
162// representation looks like a UTF-8 string (the second case).
163//
164// Note that IsStringUTF8 checks not only if the input is structurally
165// valid but also if it doesn't contain any non-character codepoint
166// (e.g. U+FFFE). It's done on purpose because all the existing callers want
167// to have the maximum 'discriminating' power from other encodings. If
168// there's a use case for just checking the structural validity, we have to
169// add a new function for that.
170//
171// IsStringASCII assumes the input is likely all ASCII, and does not leave early
172// if it is not the case.
173bool IsStringUTF8(const StringPiece& str);
174bool IsStringASCII(const StringPiece& str);
175
176}  // namespace base
177
178#if defined(OS_WIN)
179#include "base/strings/string_util_win.h"
180#elif defined(OS_POSIX)
181#include "base/strings/string_util_posix.h"
182#else
183#error Define string operations appropriately for your platform
184#endif
185
186#endif  // BASE_STRINGS_STRING_UTIL_H_
187