1// Protocol Buffers - Google's data interchange format 2// Copyright 2008 Google Inc. All rights reserved. 3// http://code.google.com/p/protobuf/ 4// 5// Redistribution and use in source and binary forms, with or without 6// modification, are permitted provided that the following conditions are 7// met: 8// 9// * Redistributions of source code must retain the above copyright 10// notice, this list of conditions and the following disclaimer. 11// * Redistributions in binary form must reproduce the above 12// copyright notice, this list of conditions and the following disclaimer 13// in the documentation and/or other materials provided with the 14// distribution. 15// * Neither the name of Google Inc. nor the names of its 16// contributors may be used to endorse or promote products derived from 17// this software without specific prior written permission. 18// 19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31// from google3/strings/strutil.h 32 33#ifndef GOOGLE_PROTOBUF_STUBS_STRUTIL_H__ 34#define GOOGLE_PROTOBUF_STUBS_STRUTIL_H__ 35 36#include <stdlib.h> 37#include <vector> 38#include <google/protobuf/stubs/common.h> 39 40namespace google { 41namespace protobuf { 42 43#ifdef _MSC_VER 44#define strtoll _strtoi64 45#define strtoull _strtoui64 46#elif defined(__DECCXX) && defined(__osf__) 47// HP C++ on Tru64 does not have strtoll, but strtol is already 64-bit. 48#define strtoll strtol 49#define strtoull strtoul 50#endif 51 52// ---------------------------------------------------------------------- 53// ascii_isalnum() 54// Check if an ASCII character is alphanumeric. We can't use ctype's 55// isalnum() because it is affected by locale. This function is applied 56// to identifiers in the protocol buffer language, not to natural-language 57// strings, so locale should not be taken into account. 58// ascii_isdigit() 59// Like above, but only accepts digits. 60// ---------------------------------------------------------------------- 61 62inline bool ascii_isalnum(char c) { 63 return ('a' <= c && c <= 'z') || 64 ('A' <= c && c <= 'Z') || 65 ('0' <= c && c <= '9'); 66} 67 68inline bool ascii_isdigit(char c) { 69 return ('0' <= c && c <= '9'); 70} 71 72// ---------------------------------------------------------------------- 73// HasPrefixString() 74// Check if a string begins with a given prefix. 75// StripPrefixString() 76// Given a string and a putative prefix, returns the string minus the 77// prefix string if the prefix matches, otherwise the original 78// string. 79// ---------------------------------------------------------------------- 80inline bool HasPrefixString(const string& str, 81 const string& prefix) { 82 return str.size() >= prefix.size() && 83 str.compare(0, prefix.size(), prefix) == 0; 84} 85 86inline string StripPrefixString(const string& str, const string& prefix) { 87 if (HasPrefixString(str, prefix)) { 88 return str.substr(prefix.size()); 89 } else { 90 return str; 91 } 92} 93 94// ---------------------------------------------------------------------- 95// HasSuffixString() 96// Return true if str ends in suffix. 97// StripSuffixString() 98// Given a string and a putative suffix, returns the string minus the 99// suffix string if the suffix matches, otherwise the original 100// string. 101// ---------------------------------------------------------------------- 102inline bool HasSuffixString(const string& str, 103 const string& suffix) { 104 return str.size() >= suffix.size() && 105 str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0; 106} 107 108inline string StripSuffixString(const string& str, const string& suffix) { 109 if (HasSuffixString(str, suffix)) { 110 return str.substr(0, str.size() - suffix.size()); 111 } else { 112 return str; 113 } 114} 115 116// ---------------------------------------------------------------------- 117// StripString 118// Replaces any occurrence of the character 'remove' (or the characters 119// in 'remove') with the character 'replacewith'. 120// Good for keeping html characters or protocol characters (\t) out 121// of places where they might cause a problem. 122// ---------------------------------------------------------------------- 123LIBPROTOBUF_EXPORT void StripString(string* s, const char* remove, 124 char replacewith); 125 126// ---------------------------------------------------------------------- 127// LowerString() 128// UpperString() 129// Convert the characters in "s" to lowercase or uppercase. ASCII-only: 130// these functions intentionally ignore locale because they are applied to 131// identifiers used in the Protocol Buffer language, not to natural-language 132// strings. 133// ---------------------------------------------------------------------- 134 135inline void LowerString(string * s) { 136 string::iterator end = s->end(); 137 for (string::iterator i = s->begin(); i != end; ++i) { 138 // tolower() changes based on locale. We don't want this! 139 if ('A' <= *i && *i <= 'Z') *i += 'a' - 'A'; 140 } 141} 142 143inline void UpperString(string * s) { 144 string::iterator end = s->end(); 145 for (string::iterator i = s->begin(); i != end; ++i) { 146 // toupper() changes based on locale. We don't want this! 147 if ('a' <= *i && *i <= 'z') *i += 'A' - 'a'; 148 } 149} 150 151// ---------------------------------------------------------------------- 152// StringReplace() 153// Give me a string and two patterns "old" and "new", and I replace 154// the first instance of "old" in the string with "new", if it 155// exists. RETURN a new string, regardless of whether the replacement 156// happened or not. 157// ---------------------------------------------------------------------- 158 159LIBPROTOBUF_EXPORT string StringReplace(const string& s, const string& oldsub, 160 const string& newsub, bool replace_all); 161 162// ---------------------------------------------------------------------- 163// SplitStringUsing() 164// Split a string using a character delimiter. Append the components 165// to 'result'. If there are consecutive delimiters, this function skips 166// over all of them. 167// ---------------------------------------------------------------------- 168LIBPROTOBUF_EXPORT void SplitStringUsing(const string& full, const char* delim, 169 vector<string>* res); 170 171// ---------------------------------------------------------------------- 172// JoinStrings() 173// These methods concatenate a vector of strings into a C++ string, using 174// the C-string "delim" as a separator between components. There are two 175// flavors of the function, one flavor returns the concatenated string, 176// another takes a pointer to the target string. In the latter case the 177// target string is cleared and overwritten. 178// ---------------------------------------------------------------------- 179LIBPROTOBUF_EXPORT void JoinStrings(const vector<string>& components, 180 const char* delim, string* result); 181 182inline string JoinStrings(const vector<string>& components, 183 const char* delim) { 184 string result; 185 JoinStrings(components, delim, &result); 186 return result; 187} 188 189// ---------------------------------------------------------------------- 190// UnescapeCEscapeSequences() 191// Copies "source" to "dest", rewriting C-style escape sequences 192// -- '\n', '\r', '\\', '\ooo', etc -- to their ASCII 193// equivalents. "dest" must be sufficiently large to hold all 194// the characters in the rewritten string (i.e. at least as large 195// as strlen(source) + 1 should be safe, since the replacements 196// are always shorter than the original escaped sequences). It's 197// safe for source and dest to be the same. RETURNS the length 198// of dest. 199// 200// It allows hex sequences \xhh, or generally \xhhhhh with an 201// arbitrary number of hex digits, but all of them together must 202// specify a value of a single byte (e.g. \x0045 is equivalent 203// to \x45, and \x1234 is erroneous). 204// 205// It also allows escape sequences of the form \uhhhh (exactly four 206// hex digits, upper or lower case) or \Uhhhhhhhh (exactly eight 207// hex digits, upper or lower case) to specify a Unicode code 208// point. The dest array will contain the UTF8-encoded version of 209// that code-point (e.g., if source contains \u2019, then dest will 210// contain the three bytes 0xE2, 0x80, and 0x99). For the inverse 211// transformation, use UniLib::UTF8EscapeString 212// (util/utf8/unilib.h), not CEscapeString. 213// 214// Errors: In the first form of the call, errors are reported with 215// LOG(ERROR). The same is true for the second form of the call if 216// the pointer to the string vector is NULL; otherwise, error 217// messages are stored in the vector. In either case, the effect on 218// the dest array is not defined, but rest of the source will be 219// processed. 220// ---------------------------------------------------------------------- 221 222LIBPROTOBUF_EXPORT int UnescapeCEscapeSequences(const char* source, char* dest); 223LIBPROTOBUF_EXPORT int UnescapeCEscapeSequences(const char* source, char* dest, 224 vector<string> *errors); 225 226// ---------------------------------------------------------------------- 227// UnescapeCEscapeString() 228// This does the same thing as UnescapeCEscapeSequences, but creates 229// a new string. The caller does not need to worry about allocating 230// a dest buffer. This should be used for non performance critical 231// tasks such as printing debug messages. It is safe for src and dest 232// to be the same. 233// 234// The second call stores its errors in a supplied string vector. 235// If the string vector pointer is NULL, it reports the errors with LOG(). 236// 237// In the first and second calls, the length of dest is returned. In the 238// the third call, the new string is returned. 239// ---------------------------------------------------------------------- 240 241LIBPROTOBUF_EXPORT int UnescapeCEscapeString(const string& src, string* dest); 242LIBPROTOBUF_EXPORT int UnescapeCEscapeString(const string& src, string* dest, 243 vector<string> *errors); 244LIBPROTOBUF_EXPORT string UnescapeCEscapeString(const string& src); 245 246// ---------------------------------------------------------------------- 247// CEscapeString() 248// Copies 'src' to 'dest', escaping dangerous characters using 249// C-style escape sequences. This is very useful for preparing query 250// flags. 'src' and 'dest' should not overlap. 251// Returns the number of bytes written to 'dest' (not including the \0) 252// or -1 if there was insufficient space. 253// 254// Currently only \n, \r, \t, ", ', \ and !isprint() chars are escaped. 255// ---------------------------------------------------------------------- 256LIBPROTOBUF_EXPORT int CEscapeString(const char* src, int src_len, 257 char* dest, int dest_len); 258 259// ---------------------------------------------------------------------- 260// CEscape() 261// More convenient form of CEscapeString: returns result as a "string". 262// This version is slower than CEscapeString() because it does more 263// allocation. However, it is much more convenient to use in 264// non-speed-critical code like logging messages etc. 265// ---------------------------------------------------------------------- 266LIBPROTOBUF_EXPORT string CEscape(const string& src); 267 268namespace strings { 269// Like CEscape() but does not escape bytes with the upper bit set. 270LIBPROTOBUF_EXPORT string Utf8SafeCEscape(const string& src); 271 272// Like CEscape() but uses hex (\x) escapes instead of octals. 273LIBPROTOBUF_EXPORT string CHexEscape(const string& src); 274} // namespace strings 275 276// ---------------------------------------------------------------------- 277// strto32() 278// strtou32() 279// strto64() 280// strtou64() 281// Architecture-neutral plug compatible replacements for strtol() and 282// strtoul(). Long's have different lengths on ILP-32 and LP-64 283// platforms, so using these is safer, from the point of view of 284// overflow behavior, than using the standard libc functions. 285// ---------------------------------------------------------------------- 286LIBPROTOBUF_EXPORT int32 strto32_adaptor(const char *nptr, char **endptr, 287 int base); 288LIBPROTOBUF_EXPORT uint32 strtou32_adaptor(const char *nptr, char **endptr, 289 int base); 290 291inline int32 strto32(const char *nptr, char **endptr, int base) { 292 if (sizeof(int32) == sizeof(long)) 293 return strtol(nptr, endptr, base); 294 else 295 return strto32_adaptor(nptr, endptr, base); 296} 297 298inline uint32 strtou32(const char *nptr, char **endptr, int base) { 299 if (sizeof(uint32) == sizeof(unsigned long)) 300 return strtoul(nptr, endptr, base); 301 else 302 return strtou32_adaptor(nptr, endptr, base); 303} 304 305// For now, long long is 64-bit on all the platforms we care about, so these 306// functions can simply pass the call to strto[u]ll. 307inline int64 strto64(const char *nptr, char **endptr, int base) { 308 GOOGLE_COMPILE_ASSERT(sizeof(int64) == sizeof(long long), 309 sizeof_int64_is_not_sizeof_long_long); 310 return strtoll(nptr, endptr, base); 311} 312 313inline uint64 strtou64(const char *nptr, char **endptr, int base) { 314 GOOGLE_COMPILE_ASSERT(sizeof(uint64) == sizeof(unsigned long long), 315 sizeof_uint64_is_not_sizeof_long_long); 316 return strtoull(nptr, endptr, base); 317} 318 319// ---------------------------------------------------------------------- 320// FastIntToBuffer() 321// FastHexToBuffer() 322// FastHex64ToBuffer() 323// FastHex32ToBuffer() 324// FastTimeToBuffer() 325// These are intended for speed. FastIntToBuffer() assumes the 326// integer is non-negative. FastHexToBuffer() puts output in 327// hex rather than decimal. FastTimeToBuffer() puts the output 328// into RFC822 format. 329// 330// FastHex64ToBuffer() puts a 64-bit unsigned value in hex-format, 331// padded to exactly 16 bytes (plus one byte for '\0') 332// 333// FastHex32ToBuffer() puts a 32-bit unsigned value in hex-format, 334// padded to exactly 8 bytes (plus one byte for '\0') 335// 336// All functions take the output buffer as an arg. 337// They all return a pointer to the beginning of the output, 338// which may not be the beginning of the input buffer. 339// ---------------------------------------------------------------------- 340 341// Suggested buffer size for FastToBuffer functions. Also works with 342// DoubleToBuffer() and FloatToBuffer(). 343static const int kFastToBufferSize = 32; 344 345LIBPROTOBUF_EXPORT char* FastInt32ToBuffer(int32 i, char* buffer); 346LIBPROTOBUF_EXPORT char* FastInt64ToBuffer(int64 i, char* buffer); 347char* FastUInt32ToBuffer(uint32 i, char* buffer); // inline below 348char* FastUInt64ToBuffer(uint64 i, char* buffer); // inline below 349LIBPROTOBUF_EXPORT char* FastHexToBuffer(int i, char* buffer); 350LIBPROTOBUF_EXPORT char* FastHex64ToBuffer(uint64 i, char* buffer); 351LIBPROTOBUF_EXPORT char* FastHex32ToBuffer(uint32 i, char* buffer); 352 353// at least 22 bytes long 354inline char* FastIntToBuffer(int i, char* buffer) { 355 return (sizeof(i) == 4 ? 356 FastInt32ToBuffer(i, buffer) : FastInt64ToBuffer(i, buffer)); 357} 358inline char* FastUIntToBuffer(unsigned int i, char* buffer) { 359 return (sizeof(i) == 4 ? 360 FastUInt32ToBuffer(i, buffer) : FastUInt64ToBuffer(i, buffer)); 361} 362inline char* FastLongToBuffer(long i, char* buffer) { 363 return (sizeof(i) == 4 ? 364 FastInt32ToBuffer(i, buffer) : FastInt64ToBuffer(i, buffer)); 365} 366inline char* FastULongToBuffer(unsigned long i, char* buffer) { 367 return (sizeof(i) == 4 ? 368 FastUInt32ToBuffer(i, buffer) : FastUInt64ToBuffer(i, buffer)); 369} 370 371// ---------------------------------------------------------------------- 372// FastInt32ToBufferLeft() 373// FastUInt32ToBufferLeft() 374// FastInt64ToBufferLeft() 375// FastUInt64ToBufferLeft() 376// 377// Like the Fast*ToBuffer() functions above, these are intended for speed. 378// Unlike the Fast*ToBuffer() functions, however, these functions write 379// their output to the beginning of the buffer (hence the name, as the 380// output is left-aligned). The caller is responsible for ensuring that 381// the buffer has enough space to hold the output. 382// 383// Returns a pointer to the end of the string (i.e. the null character 384// terminating the string). 385// ---------------------------------------------------------------------- 386 387LIBPROTOBUF_EXPORT char* FastInt32ToBufferLeft(int32 i, char* buffer); 388LIBPROTOBUF_EXPORT char* FastUInt32ToBufferLeft(uint32 i, char* buffer); 389LIBPROTOBUF_EXPORT char* FastInt64ToBufferLeft(int64 i, char* buffer); 390LIBPROTOBUF_EXPORT char* FastUInt64ToBufferLeft(uint64 i, char* buffer); 391 392// Just define these in terms of the above. 393inline char* FastUInt32ToBuffer(uint32 i, char* buffer) { 394 FastUInt32ToBufferLeft(i, buffer); 395 return buffer; 396} 397inline char* FastUInt64ToBuffer(uint64 i, char* buffer) { 398 FastUInt64ToBufferLeft(i, buffer); 399 return buffer; 400} 401 402// ---------------------------------------------------------------------- 403// SimpleItoa() 404// Description: converts an integer to a string. 405// 406// Return value: string 407// ---------------------------------------------------------------------- 408LIBPROTOBUF_EXPORT string SimpleItoa(int i); 409LIBPROTOBUF_EXPORT string SimpleItoa(unsigned int i); 410LIBPROTOBUF_EXPORT string SimpleItoa(long i); 411LIBPROTOBUF_EXPORT string SimpleItoa(unsigned long i); 412LIBPROTOBUF_EXPORT string SimpleItoa(long long i); 413LIBPROTOBUF_EXPORT string SimpleItoa(unsigned long long i); 414 415// ---------------------------------------------------------------------- 416// SimpleDtoa() 417// SimpleFtoa() 418// DoubleToBuffer() 419// FloatToBuffer() 420// Description: converts a double or float to a string which, if 421// passed to NoLocaleStrtod(), will produce the exact same original double 422// (except in case of NaN; all NaNs are considered the same value). 423// We try to keep the string short but it's not guaranteed to be as 424// short as possible. 425// 426// DoubleToBuffer() and FloatToBuffer() write the text to the given 427// buffer and return it. The buffer must be at least 428// kDoubleToBufferSize bytes for doubles and kFloatToBufferSize 429// bytes for floats. kFastToBufferSize is also guaranteed to be large 430// enough to hold either. 431// 432// Return value: string 433// ---------------------------------------------------------------------- 434LIBPROTOBUF_EXPORT string SimpleDtoa(double value); 435LIBPROTOBUF_EXPORT string SimpleFtoa(float value); 436 437LIBPROTOBUF_EXPORT char* DoubleToBuffer(double i, char* buffer); 438LIBPROTOBUF_EXPORT char* FloatToBuffer(float i, char* buffer); 439 440// In practice, doubles should never need more than 24 bytes and floats 441// should never need more than 14 (including null terminators), but we 442// overestimate to be safe. 443static const int kDoubleToBufferSize = 32; 444static const int kFloatToBufferSize = 24; 445 446// ---------------------------------------------------------------------- 447// NoLocaleStrtod() 448// Exactly like strtod(), except it always behaves as if in the "C" 449// locale (i.e. decimal points must be '.'s). 450// ---------------------------------------------------------------------- 451 452LIBPROTOBUF_EXPORT double NoLocaleStrtod(const char* text, char** endptr); 453 454} // namespace protobuf 455} // namespace google 456 457#endif // GOOGLE_PROTOBUF_STUBS_STRUTIL_H__ 458 459 460