1// Protocol Buffers - Google's data interchange format 2// Copyright 2008 Google Inc. All rights reserved. 3// https://developers.google.com/protocol-buffers/ 4// 5// Redistribution and use in source and binary forms, with or without 6// modification, are permitted provided that the following conditions are 7// met: 8// 9// * Redistributions of source code must retain the above copyright 10// notice, this list of conditions and the following disclaimer. 11// * Redistributions in binary form must reproduce the above 12// copyright notice, this list of conditions and the following disclaimer 13// in the documentation and/or other materials provided with the 14// distribution. 15// * Neither the name of Google Inc. nor the names of its 16// contributors may be used to endorse or promote products derived from 17// this software without specific prior written permission. 18// 19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31// from google3/strings/strutil.h 32 33#ifndef GOOGLE_PROTOBUF_STUBS_STRUTIL_H__ 34#define GOOGLE_PROTOBUF_STUBS_STRUTIL_H__ 35 36#include <stdlib.h> 37#include <vector> 38#include <google/protobuf/stubs/common.h> 39 40namespace google { 41namespace protobuf { 42 43#ifdef _MSC_VER 44#define strtoll _strtoi64 45#define strtoull _strtoui64 46#elif defined(__DECCXX) && defined(__osf__) 47// HP C++ on Tru64 does not have strtoll, but strtol is already 64-bit. 48#define strtoll strtol 49#define strtoull strtoul 50#endif 51 52// ---------------------------------------------------------------------- 53// ascii_isalnum() 54// Check if an ASCII character is alphanumeric. We can't use ctype's 55// isalnum() because it is affected by locale. This function is applied 56// to identifiers in the protocol buffer language, not to natural-language 57// strings, so locale should not be taken into account. 58// ascii_isdigit() 59// Like above, but only accepts digits. 60// ---------------------------------------------------------------------- 61 62inline bool ascii_isalnum(char c) { 63 return ('a' <= c && c <= 'z') || 64 ('A' <= c && c <= 'Z') || 65 ('0' <= c && c <= '9'); 66} 67 68inline bool ascii_isdigit(char c) { 69 return ('0' <= c && c <= '9'); 70} 71 72// ---------------------------------------------------------------------- 73// HasPrefixString() 74// Check if a string begins with a given prefix. 75// StripPrefixString() 76// Given a string and a putative prefix, returns the string minus the 77// prefix string if the prefix matches, otherwise the original 78// string. 79// ---------------------------------------------------------------------- 80inline bool HasPrefixString(const string& str, 81 const string& prefix) { 82 return str.size() >= prefix.size() && 83 str.compare(0, prefix.size(), prefix) == 0; 84} 85 86inline string StripPrefixString(const string& str, const string& prefix) { 87 if (HasPrefixString(str, prefix)) { 88 return str.substr(prefix.size()); 89 } else { 90 return str; 91 } 92} 93 94// ---------------------------------------------------------------------- 95// HasSuffixString() 96// Return true if str ends in suffix. 97// StripSuffixString() 98// Given a string and a putative suffix, returns the string minus the 99// suffix string if the suffix matches, otherwise the original 100// string. 101// ---------------------------------------------------------------------- 102inline bool HasSuffixString(const string& str, 103 const string& suffix) { 104 return str.size() >= suffix.size() && 105 str.compare(str.size() - suffix.size(), suffix.size(), suffix) == 0; 106} 107 108inline string StripSuffixString(const string& str, const string& suffix) { 109 if (HasSuffixString(str, suffix)) { 110 return str.substr(0, str.size() - suffix.size()); 111 } else { 112 return str; 113 } 114} 115 116// ---------------------------------------------------------------------- 117// StripString 118// Replaces any occurrence of the character 'remove' (or the characters 119// in 'remove') with the character 'replacewith'. 120// Good for keeping html characters or protocol characters (\t) out 121// of places where they might cause a problem. 122// ---------------------------------------------------------------------- 123LIBPROTOBUF_EXPORT void StripString(string* s, const char* remove, 124 char replacewith); 125 126// ---------------------------------------------------------------------- 127// LowerString() 128// UpperString() 129// ToUpper() 130// Convert the characters in "s" to lowercase or uppercase. ASCII-only: 131// these functions intentionally ignore locale because they are applied to 132// identifiers used in the Protocol Buffer language, not to natural-language 133// strings. 134// ---------------------------------------------------------------------- 135 136inline void LowerString(string * s) { 137 string::iterator end = s->end(); 138 for (string::iterator i = s->begin(); i != end; ++i) { 139 // tolower() changes based on locale. We don't want this! 140 if ('A' <= *i && *i <= 'Z') *i += 'a' - 'A'; 141 } 142} 143 144inline void UpperString(string * s) { 145 string::iterator end = s->end(); 146 for (string::iterator i = s->begin(); i != end; ++i) { 147 // toupper() changes based on locale. We don't want this! 148 if ('a' <= *i && *i <= 'z') *i += 'A' - 'a'; 149 } 150} 151 152inline string ToUpper(const string& s) { 153 string out = s; 154 UpperString(&out); 155 return out; 156} 157 158// ---------------------------------------------------------------------- 159// StringReplace() 160// Give me a string and two patterns "old" and "new", and I replace 161// the first instance of "old" in the string with "new", if it 162// exists. RETURN a new string, regardless of whether the replacement 163// happened or not. 164// ---------------------------------------------------------------------- 165 166LIBPROTOBUF_EXPORT string StringReplace(const string& s, const string& oldsub, 167 const string& newsub, bool replace_all); 168 169// ---------------------------------------------------------------------- 170// SplitStringUsing() 171// Split a string using a character delimiter. Append the components 172// to 'result'. If there are consecutive delimiters, this function skips 173// over all of them. 174// ---------------------------------------------------------------------- 175LIBPROTOBUF_EXPORT void SplitStringUsing(const string& full, const char* delim, 176 vector<string>* res); 177 178// Split a string using one or more byte delimiters, presented 179// as a nul-terminated c string. Append the components to 'result'. 180// If there are consecutive delimiters, this function will return 181// corresponding empty strings. If you want to drop the empty 182// strings, try SplitStringUsing(). 183// 184// If "full" is the empty string, yields an empty string as the only value. 185// ---------------------------------------------------------------------- 186LIBPROTOBUF_EXPORT void SplitStringAllowEmpty(const string& full, 187 const char* delim, 188 vector<string>* result); 189 190// ---------------------------------------------------------------------- 191// Split() 192// Split a string using a character delimiter. 193// ---------------------------------------------------------------------- 194inline vector<string> Split( 195 const string& full, const char* delim, bool skip_empty = true) { 196 vector<string> result; 197 if (skip_empty) { 198 SplitStringUsing(full, delim, &result); 199 } else { 200 SplitStringAllowEmpty(full, delim, &result); 201 } 202 return result; 203} 204 205// ---------------------------------------------------------------------- 206// JoinStrings() 207// These methods concatenate a vector of strings into a C++ string, using 208// the C-string "delim" as a separator between components. There are two 209// flavors of the function, one flavor returns the concatenated string, 210// another takes a pointer to the target string. In the latter case the 211// target string is cleared and overwritten. 212// ---------------------------------------------------------------------- 213LIBPROTOBUF_EXPORT void JoinStrings(const vector<string>& components, 214 const char* delim, string* result); 215 216inline string JoinStrings(const vector<string>& components, 217 const char* delim) { 218 string result; 219 JoinStrings(components, delim, &result); 220 return result; 221} 222 223// ---------------------------------------------------------------------- 224// UnescapeCEscapeSequences() 225// Copies "source" to "dest", rewriting C-style escape sequences 226// -- '\n', '\r', '\\', '\ooo', etc -- to their ASCII 227// equivalents. "dest" must be sufficiently large to hold all 228// the characters in the rewritten string (i.e. at least as large 229// as strlen(source) + 1 should be safe, since the replacements 230// are always shorter than the original escaped sequences). It's 231// safe for source and dest to be the same. RETURNS the length 232// of dest. 233// 234// It allows hex sequences \xhh, or generally \xhhhhh with an 235// arbitrary number of hex digits, but all of them together must 236// specify a value of a single byte (e.g. \x0045 is equivalent 237// to \x45, and \x1234 is erroneous). 238// 239// It also allows escape sequences of the form \uhhhh (exactly four 240// hex digits, upper or lower case) or \Uhhhhhhhh (exactly eight 241// hex digits, upper or lower case) to specify a Unicode code 242// point. The dest array will contain the UTF8-encoded version of 243// that code-point (e.g., if source contains \u2019, then dest will 244// contain the three bytes 0xE2, 0x80, and 0x99). 245// 246// Errors: In the first form of the call, errors are reported with 247// LOG(ERROR). The same is true for the second form of the call if 248// the pointer to the string vector is NULL; otherwise, error 249// messages are stored in the vector. In either case, the effect on 250// the dest array is not defined, but rest of the source will be 251// processed. 252// ---------------------------------------------------------------------- 253 254LIBPROTOBUF_EXPORT int UnescapeCEscapeSequences(const char* source, char* dest); 255LIBPROTOBUF_EXPORT int UnescapeCEscapeSequences(const char* source, char* dest, 256 vector<string> *errors); 257 258// ---------------------------------------------------------------------- 259// UnescapeCEscapeString() 260// This does the same thing as UnescapeCEscapeSequences, but creates 261// a new string. The caller does not need to worry about allocating 262// a dest buffer. This should be used for non performance critical 263// tasks such as printing debug messages. It is safe for src and dest 264// to be the same. 265// 266// The second call stores its errors in a supplied string vector. 267// If the string vector pointer is NULL, it reports the errors with LOG(). 268// 269// In the first and second calls, the length of dest is returned. In the 270// the third call, the new string is returned. 271// ---------------------------------------------------------------------- 272 273LIBPROTOBUF_EXPORT int UnescapeCEscapeString(const string& src, string* dest); 274LIBPROTOBUF_EXPORT int UnescapeCEscapeString(const string& src, string* dest, 275 vector<string> *errors); 276LIBPROTOBUF_EXPORT string UnescapeCEscapeString(const string& src); 277 278// ---------------------------------------------------------------------- 279// CEscapeString() 280// Copies 'src' to 'dest', escaping dangerous characters using 281// C-style escape sequences. This is very useful for preparing query 282// flags. 'src' and 'dest' should not overlap. 283// Returns the number of bytes written to 'dest' (not including the \0) 284// or -1 if there was insufficient space. 285// 286// Currently only \n, \r, \t, ", ', \ and !isprint() chars are escaped. 287// ---------------------------------------------------------------------- 288LIBPROTOBUF_EXPORT int CEscapeString(const char* src, int src_len, 289 char* dest, int dest_len); 290 291// ---------------------------------------------------------------------- 292// CEscape() 293// More convenient form of CEscapeString: returns result as a "string". 294// This version is slower than CEscapeString() because it does more 295// allocation. However, it is much more convenient to use in 296// non-speed-critical code like logging messages etc. 297// ---------------------------------------------------------------------- 298LIBPROTOBUF_EXPORT string CEscape(const string& src); 299 300namespace strings { 301// Like CEscape() but does not escape bytes with the upper bit set. 302LIBPROTOBUF_EXPORT string Utf8SafeCEscape(const string& src); 303 304// Like CEscape() but uses hex (\x) escapes instead of octals. 305LIBPROTOBUF_EXPORT string CHexEscape(const string& src); 306} // namespace strings 307 308// ---------------------------------------------------------------------- 309// strto32() 310// strtou32() 311// strto64() 312// strtou64() 313// Architecture-neutral plug compatible replacements for strtol() and 314// strtoul(). Long's have different lengths on ILP-32 and LP-64 315// platforms, so using these is safer, from the point of view of 316// overflow behavior, than using the standard libc functions. 317// ---------------------------------------------------------------------- 318LIBPROTOBUF_EXPORT int32 strto32_adaptor(const char *nptr, char **endptr, 319 int base); 320LIBPROTOBUF_EXPORT uint32 strtou32_adaptor(const char *nptr, char **endptr, 321 int base); 322 323inline int32 strto32(const char *nptr, char **endptr, int base) { 324 if (sizeof(int32) == sizeof(long)) 325 return strtol(nptr, endptr, base); 326 else 327 return strto32_adaptor(nptr, endptr, base); 328} 329 330inline uint32 strtou32(const char *nptr, char **endptr, int base) { 331 if (sizeof(uint32) == sizeof(unsigned long)) 332 return strtoul(nptr, endptr, base); 333 else 334 return strtou32_adaptor(nptr, endptr, base); 335} 336 337// For now, long long is 64-bit on all the platforms we care about, so these 338// functions can simply pass the call to strto[u]ll. 339inline int64 strto64(const char *nptr, char **endptr, int base) { 340 GOOGLE_COMPILE_ASSERT(sizeof(int64) == sizeof(long long), 341 sizeof_int64_is_not_sizeof_long_long); 342 return strtoll(nptr, endptr, base); 343} 344 345inline uint64 strtou64(const char *nptr, char **endptr, int base) { 346 GOOGLE_COMPILE_ASSERT(sizeof(uint64) == sizeof(unsigned long long), 347 sizeof_uint64_is_not_sizeof_long_long); 348 return strtoull(nptr, endptr, base); 349} 350 351// ---------------------------------------------------------------------- 352// safe_strto32() 353// ---------------------------------------------------------------------- 354LIBPROTOBUF_EXPORT bool safe_int(string text, int32* value_p); 355 356inline bool safe_strto32(string text, int32* value) { 357 return safe_int(text, value); 358} 359 360// ---------------------------------------------------------------------- 361// FastIntToBuffer() 362// FastHexToBuffer() 363// FastHex64ToBuffer() 364// FastHex32ToBuffer() 365// FastTimeToBuffer() 366// These are intended for speed. FastIntToBuffer() assumes the 367// integer is non-negative. FastHexToBuffer() puts output in 368// hex rather than decimal. FastTimeToBuffer() puts the output 369// into RFC822 format. 370// 371// FastHex64ToBuffer() puts a 64-bit unsigned value in hex-format, 372// padded to exactly 16 bytes (plus one byte for '\0') 373// 374// FastHex32ToBuffer() puts a 32-bit unsigned value in hex-format, 375// padded to exactly 8 bytes (plus one byte for '\0') 376// 377// All functions take the output buffer as an arg. 378// They all return a pointer to the beginning of the output, 379// which may not be the beginning of the input buffer. 380// ---------------------------------------------------------------------- 381 382// Suggested buffer size for FastToBuffer functions. Also works with 383// DoubleToBuffer() and FloatToBuffer(). 384static const int kFastToBufferSize = 32; 385 386LIBPROTOBUF_EXPORT char* FastInt32ToBuffer(int32 i, char* buffer); 387LIBPROTOBUF_EXPORT char* FastInt64ToBuffer(int64 i, char* buffer); 388char* FastUInt32ToBuffer(uint32 i, char* buffer); // inline below 389char* FastUInt64ToBuffer(uint64 i, char* buffer); // inline below 390LIBPROTOBUF_EXPORT char* FastHexToBuffer(int i, char* buffer); 391LIBPROTOBUF_EXPORT char* FastHex64ToBuffer(uint64 i, char* buffer); 392LIBPROTOBUF_EXPORT char* FastHex32ToBuffer(uint32 i, char* buffer); 393 394// at least 22 bytes long 395inline char* FastIntToBuffer(int i, char* buffer) { 396 return (sizeof(i) == 4 ? 397 FastInt32ToBuffer(i, buffer) : FastInt64ToBuffer(i, buffer)); 398} 399inline char* FastUIntToBuffer(unsigned int i, char* buffer) { 400 return (sizeof(i) == 4 ? 401 FastUInt32ToBuffer(i, buffer) : FastUInt64ToBuffer(i, buffer)); 402} 403inline char* FastLongToBuffer(long i, char* buffer) { 404 return (sizeof(i) == 4 ? 405 FastInt32ToBuffer(i, buffer) : FastInt64ToBuffer(i, buffer)); 406} 407inline char* FastULongToBuffer(unsigned long i, char* buffer) { 408 return (sizeof(i) == 4 ? 409 FastUInt32ToBuffer(i, buffer) : FastUInt64ToBuffer(i, buffer)); 410} 411 412// ---------------------------------------------------------------------- 413// FastInt32ToBufferLeft() 414// FastUInt32ToBufferLeft() 415// FastInt64ToBufferLeft() 416// FastUInt64ToBufferLeft() 417// 418// Like the Fast*ToBuffer() functions above, these are intended for speed. 419// Unlike the Fast*ToBuffer() functions, however, these functions write 420// their output to the beginning of the buffer (hence the name, as the 421// output is left-aligned). The caller is responsible for ensuring that 422// the buffer has enough space to hold the output. 423// 424// Returns a pointer to the end of the string (i.e. the null character 425// terminating the string). 426// ---------------------------------------------------------------------- 427 428LIBPROTOBUF_EXPORT char* FastInt32ToBufferLeft(int32 i, char* buffer); 429LIBPROTOBUF_EXPORT char* FastUInt32ToBufferLeft(uint32 i, char* buffer); 430LIBPROTOBUF_EXPORT char* FastInt64ToBufferLeft(int64 i, char* buffer); 431LIBPROTOBUF_EXPORT char* FastUInt64ToBufferLeft(uint64 i, char* buffer); 432 433// Just define these in terms of the above. 434inline char* FastUInt32ToBuffer(uint32 i, char* buffer) { 435 FastUInt32ToBufferLeft(i, buffer); 436 return buffer; 437} 438inline char* FastUInt64ToBuffer(uint64 i, char* buffer) { 439 FastUInt64ToBufferLeft(i, buffer); 440 return buffer; 441} 442 443// ---------------------------------------------------------------------- 444// SimpleItoa() 445// Description: converts an integer to a string. 446// 447// Return value: string 448// ---------------------------------------------------------------------- 449LIBPROTOBUF_EXPORT string SimpleItoa(int i); 450LIBPROTOBUF_EXPORT string SimpleItoa(unsigned int i); 451LIBPROTOBUF_EXPORT string SimpleItoa(long i); 452LIBPROTOBUF_EXPORT string SimpleItoa(unsigned long i); 453LIBPROTOBUF_EXPORT string SimpleItoa(long long i); 454LIBPROTOBUF_EXPORT string SimpleItoa(unsigned long long i); 455 456// ---------------------------------------------------------------------- 457// SimpleDtoa() 458// SimpleFtoa() 459// DoubleToBuffer() 460// FloatToBuffer() 461// Description: converts a double or float to a string which, if 462// passed to NoLocaleStrtod(), will produce the exact same original double 463// (except in case of NaN; all NaNs are considered the same value). 464// We try to keep the string short but it's not guaranteed to be as 465// short as possible. 466// 467// DoubleToBuffer() and FloatToBuffer() write the text to the given 468// buffer and return it. The buffer must be at least 469// kDoubleToBufferSize bytes for doubles and kFloatToBufferSize 470// bytes for floats. kFastToBufferSize is also guaranteed to be large 471// enough to hold either. 472// 473// Return value: string 474// ---------------------------------------------------------------------- 475LIBPROTOBUF_EXPORT string SimpleDtoa(double value); 476LIBPROTOBUF_EXPORT string SimpleFtoa(float value); 477 478LIBPROTOBUF_EXPORT char* DoubleToBuffer(double i, char* buffer); 479LIBPROTOBUF_EXPORT char* FloatToBuffer(float i, char* buffer); 480 481// In practice, doubles should never need more than 24 bytes and floats 482// should never need more than 14 (including null terminators), but we 483// overestimate to be safe. 484static const int kDoubleToBufferSize = 32; 485static const int kFloatToBufferSize = 24; 486 487// ---------------------------------------------------------------------- 488// ToString() are internal help methods used in StrCat() and Join() 489// ---------------------------------------------------------------------- 490namespace internal { 491inline string ToString(int i) { 492 return SimpleItoa(i); 493} 494 495inline string ToString(string a) { 496 return a; 497} 498} // namespace internal 499 500// ---------------------------------------------------------------------- 501// StrCat() 502// These methods join some strings together. 503// ---------------------------------------------------------------------- 504template <typename T1, typename T2, typename T3, typename T4, typename T5> 505string StrCat( 506 const T1& a, const T2& b, const T3& c, const T4& d, const T5& e) { 507 return internal::ToString(a) + internal::ToString(b) + 508 internal::ToString(c) + internal::ToString(d) + internal::ToString(e); 509} 510 511template <typename T1, typename T2, typename T3, typename T4> 512string StrCat( 513 const T1& a, const T2& b, const T3& c, const T4& d) { 514 return internal::ToString(a) + internal::ToString(b) + 515 internal::ToString(c) + internal::ToString(d); 516} 517 518template <typename T1, typename T2, typename T3> 519string StrCat(const T1& a, const T2& b, const T3& c) { 520 return internal::ToString(a) + internal::ToString(b) + 521 internal::ToString(c); 522} 523 524template <typename T1, typename T2> 525string StrCat(const T1& a, const T2& b) { 526 return internal::ToString(a) + internal::ToString(b); 527} 528 529// ---------------------------------------------------------------------- 530// Join() 531// These methods concatenate a range of components into a C++ string, using 532// the C-string "delim" as a separator between components. 533// ---------------------------------------------------------------------- 534template <typename Iterator> 535void Join(Iterator start, Iterator end, 536 const char* delim, string* result) { 537 for (Iterator it = start; it != end; ++it) { 538 if (it != start) { 539 result->append(delim); 540 } 541 result->append(internal::ToString(*it)); 542 } 543} 544 545template <typename Range> 546string Join(const Range& components, 547 const char* delim) { 548 string result; 549 Join(components.begin(), components.end(), delim, &result); 550 return result; 551} 552 553// ---------------------------------------------------------------------- 554// ToHex() 555// Return a lower-case hex string representation of the given integer. 556// ---------------------------------------------------------------------- 557LIBPROTOBUF_EXPORT string ToHex(uint64 num); 558 559} // namespace protobuf 560} // namespace google 561 562#endif // GOOGLE_PROTOBUF_STUBS_STRUTIL_H__ 563