1// Protocol Buffers - Google's data interchange format 2// Copyright 2008 Google Inc. All rights reserved. 3// https://developers.google.com/protocol-buffers/ 4// 5// Redistribution and use in source and binary forms, with or without 6// modification, are permitted provided that the following conditions are 7// met: 8// 9// * Redistributions of source code must retain the above copyright 10// notice, this list of conditions and the following disclaimer. 11// * Redistributions in binary form must reproduce the above 12// copyright notice, this list of conditions and the following disclaimer 13// in the documentation and/or other materials provided with the 14// distribution. 15// * Neither the name of Google Inc. nor the names of its 16// contributors may be used to endorse or promote products derived from 17// this software without specific prior written permission. 18// 19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 31// A StringPiece points to part or all of a string, Cord, double-quoted string 32// literal, or other string-like object. A StringPiece does *not* own the 33// string to which it points. A StringPiece is not null-terminated. 34// 35// You can use StringPiece as a function or method parameter. A StringPiece 36// parameter can receive a double-quoted string literal argument, a "const 37// char*" argument, a string argument, or a StringPiece argument with no data 38// copying. Systematic use of StringPiece for arguments reduces data 39// copies and strlen() calls. 40// 41// Prefer passing StringPieces by value: 42// void MyFunction(StringPiece arg); 43// If circumstances require, you may also pass by const reference: 44// void MyFunction(const StringPiece& arg); // not preferred 45// Both of these have the same lifetime semantics. Passing by value 46// generates slightly smaller code. For more discussion, see the thread 47// go/stringpiecebyvalue on c-users. 48// 49// StringPiece is also suitable for local variables if you know that 50// the lifetime of the underlying object is longer than the lifetime 51// of your StringPiece variable. 52// 53// Beware of binding a StringPiece to a temporary: 54// StringPiece sp = obj.MethodReturningString(); // BAD: lifetime problem 55// 56// This code is okay: 57// string str = obj.MethodReturningString(); // str owns its contents 58// StringPiece sp(str); // GOOD, because str outlives sp 59// 60// StringPiece is sometimes a poor choice for a return value and usually a poor 61// choice for a data member. If you do use a StringPiece this way, it is your 62// responsibility to ensure that the object pointed to by the StringPiece 63// outlives the StringPiece. 64// 65// A StringPiece may represent just part of a string; thus the name "Piece". 66// For example, when splitting a string, vector<StringPiece> is a natural data 67// type for the output. For another example, a Cord is a non-contiguous, 68// potentially very long string-like object. The Cord class has an interface 69// that iteratively provides StringPiece objects that point to the 70// successive pieces of a Cord object. 71// 72// A StringPiece is not null-terminated. If you write code that scans a 73// StringPiece, you must check its length before reading any characters. 74// Common idioms that work on null-terminated strings do not work on 75// StringPiece objects. 76// 77// There are several ways to create a null StringPiece: 78// StringPiece() 79// StringPiece(NULL) 80// StringPiece(NULL, 0) 81// For all of the above, sp.data() == NULL, sp.length() == 0, 82// and sp.empty() == true. Also, if you create a StringPiece with 83// a non-NULL pointer then sp.data() != NULL. Once created, 84// sp.data() will stay either NULL or not-NULL, except if you call 85// sp.clear() or sp.set(). 86// 87// Thus, you can use StringPiece(NULL) to signal an out-of-band value 88// that is different from other StringPiece values. This is similar 89// to the way that const char* p1 = NULL; is different from 90// const char* p2 = "";. 91// 92// There are many ways to create an empty StringPiece: 93// StringPiece() 94// StringPiece(NULL) 95// StringPiece(NULL, 0) 96// StringPiece("") 97// StringPiece("", 0) 98// StringPiece("abcdef", 0) 99// StringPiece("abcdef"+6, 0) 100// For all of the above, sp.length() will be 0 and sp.empty() will be true. 101// For some empty StringPiece values, sp.data() will be NULL. 102// For some empty StringPiece values, sp.data() will not be NULL. 103// 104// Be careful not to confuse: null StringPiece and empty StringPiece. 105// The set of empty StringPieces properly includes the set of null StringPieces. 106// That is, every null StringPiece is an empty StringPiece, 107// but some non-null StringPieces are empty Stringpieces too. 108// 109// All empty StringPiece values compare equal to each other. 110// Even a null StringPieces compares equal to a non-null empty StringPiece: 111// StringPiece() == StringPiece("", 0) 112// StringPiece(NULL) == StringPiece("abc", 0) 113// StringPiece(NULL, 0) == StringPiece("abcdef"+6, 0) 114// 115// Look carefully at this example: 116// StringPiece("") == NULL 117// True or false? TRUE, because StringPiece::operator== converts 118// the right-hand side from NULL to StringPiece(NULL), 119// and then compares two zero-length spans of characters. 120// However, we are working to make this example produce a compile error. 121// 122// Suppose you want to write: 123// bool TestWhat?(StringPiece sp) { return sp == NULL; } // BAD 124// Do not do that. Write one of these instead: 125// bool TestNull(StringPiece sp) { return sp.data() == NULL; } 126// bool TestEmpty(StringPiece sp) { return sp.empty(); } 127// The intent of TestWhat? is unclear. Did you mean TestNull or TestEmpty? 128// Right now, TestWhat? behaves likes TestEmpty. 129// We are working to make TestWhat? produce a compile error. 130// TestNull is good to test for an out-of-band signal. 131// TestEmpty is good to test for an empty StringPiece. 132// 133// Caveats (again): 134// (1) The lifetime of the pointed-to string (or piece of a string) 135// must be longer than the lifetime of the StringPiece. 136// (2) There may or may not be a '\0' character after the end of 137// StringPiece data. 138// (3) A null StringPiece is empty. 139// An empty StringPiece may or may not be a null StringPiece. 140 141#ifndef GOOGLE_PROTOBUF_STUBS_STRINGPIECE_H_ 142#define GOOGLE_PROTOBUF_STUBS_STRINGPIECE_H_ 143 144#include <assert.h> 145#include <stddef.h> 146#include <string.h> 147#include <iosfwd> 148#include <limits> 149#include <string> 150 151#include <google/protobuf/stubs/common.h> 152#include <google/protobuf/stubs/hash.h> 153 154namespace google { 155namespace protobuf { 156// StringPiece has *two* size types. 157// StringPiece::size_type 158// is unsigned 159// is 32 bits in LP32, 64 bits in LP64, 64 bits in LLP64 160// no future changes intended 161// stringpiece_ssize_type 162// is signed 163// is 32 bits in LP32, 64 bits in LP64, 64 bits in LLP64 164// future changes intended: http://go/64BitStringPiece 165// 166typedef string::difference_type stringpiece_ssize_type; 167 168// STRINGPIECE_CHECK_SIZE protects us from 32-bit overflows. 169// TODO(mec): delete this after stringpiece_ssize_type goes 64 bit. 170#if !defined(NDEBUG) 171#define STRINGPIECE_CHECK_SIZE 1 172#elif defined(_FORTIFY_SOURCE) && _FORTIFY_SOURCE > 0 173#define STRINGPIECE_CHECK_SIZE 1 174#else 175#define STRINGPIECE_CHECK_SIZE 0 176#endif 177 178class LIBPROTOBUF_EXPORT StringPiece { 179 private: 180 const char* ptr_; 181 stringpiece_ssize_type length_; 182 183 // Prevent overflow in debug mode or fortified mode. 184 // sizeof(stringpiece_ssize_type) may be smaller than sizeof(size_t). 185 static stringpiece_ssize_type CheckedSsizeTFromSizeT(size_t size) { 186#if STRINGPIECE_CHECK_SIZE > 0 187#ifdef max 188#undef max 189#endif 190 if (size > static_cast<size_t>( 191 std::numeric_limits<stringpiece_ssize_type>::max())) { 192 // Some people grep for this message in logs 193 // so take care if you ever change it. 194 LogFatalSizeTooBig(size, "size_t to int conversion"); 195 } 196#endif 197 return static_cast<stringpiece_ssize_type>(size); 198 } 199 200 // Out-of-line error path. 201 static void LogFatalSizeTooBig(size_t size, const char* details); 202 203 public: 204 // We provide non-explicit singleton constructors so users can pass 205 // in a "const char*" or a "string" wherever a "StringPiece" is 206 // expected. 207 // 208 // Style guide exception granted: 209 // http://goto/style-guide-exception-20978288 210 StringPiece() : ptr_(NULL), length_(0) {} 211 212 StringPiece(const char* str) // NOLINT(runtime/explicit) 213 : ptr_(str), length_(0) { 214 if (str != NULL) { 215 length_ = CheckedSsizeTFromSizeT(strlen(str)); 216 } 217 } 218 219 template <class Allocator> 220 StringPiece( // NOLINT(runtime/explicit) 221 const std::basic_string<char, std::char_traits<char>, Allocator>& str) 222 : ptr_(str.data()), length_(0) { 223 length_ = CheckedSsizeTFromSizeT(str.size()); 224 } 225#if defined(HAS_GLOBAL_STRING) 226 template <class Allocator> 227 StringPiece( // NOLINT(runtime/explicit) 228 const basic_string<char, std::char_traits<char>, Allocator>& str) 229 : ptr_(str.data()), length_(0) { 230 length_ = CheckedSsizeTFromSizeT(str.size()); 231 } 232#endif 233 234 StringPiece(const char* offset, stringpiece_ssize_type len) 235 : ptr_(offset), length_(len) { 236 assert(len >= 0); 237 } 238 239 // Substring of another StringPiece. 240 // pos must be non-negative and <= x.length(). 241 StringPiece(StringPiece x, stringpiece_ssize_type pos); 242 // Substring of another StringPiece. 243 // pos must be non-negative and <= x.length(). 244 // len must be non-negative and will be pinned to at most x.length() - pos. 245 StringPiece(StringPiece x, 246 stringpiece_ssize_type pos, 247 stringpiece_ssize_type len); 248 249 // data() may return a pointer to a buffer with embedded NULs, and the 250 // returned buffer may or may not be null terminated. Therefore it is 251 // typically a mistake to pass data() to a routine that expects a NUL 252 // terminated string. 253 const char* data() const { return ptr_; } 254 stringpiece_ssize_type size() const { return length_; } 255 stringpiece_ssize_type length() const { return length_; } 256 bool empty() const { return length_ == 0; } 257 258 void clear() { 259 ptr_ = NULL; 260 length_ = 0; 261 } 262 263 void set(const char* data, stringpiece_ssize_type len) { 264 assert(len >= 0); 265 ptr_ = data; 266 length_ = len; 267 } 268 269 void set(const char* str) { 270 ptr_ = str; 271 if (str != NULL) 272 length_ = CheckedSsizeTFromSizeT(strlen(str)); 273 else 274 length_ = 0; 275 } 276 277 void set(const void* data, stringpiece_ssize_type len) { 278 ptr_ = reinterpret_cast<const char*>(data); 279 length_ = len; 280 } 281 282 char operator[](stringpiece_ssize_type i) const { 283 assert(0 <= i); 284 assert(i < length_); 285 return ptr_[i]; 286 } 287 288 void remove_prefix(stringpiece_ssize_type n) { 289 assert(length_ >= n); 290 ptr_ += n; 291 length_ -= n; 292 } 293 294 void remove_suffix(stringpiece_ssize_type n) { 295 assert(length_ >= n); 296 length_ -= n; 297 } 298 299 // returns {-1, 0, 1} 300 int compare(StringPiece x) const { 301 const stringpiece_ssize_type min_size = 302 length_ < x.length_ ? length_ : x.length_; 303 int r = memcmp(ptr_, x.ptr_, min_size); 304 if (r < 0) return -1; 305 if (r > 0) return 1; 306 if (length_ < x.length_) return -1; 307 if (length_ > x.length_) return 1; 308 return 0; 309 } 310 311 string as_string() const { 312 return ToString(); 313 } 314 // We also define ToString() here, since many other string-like 315 // interfaces name the routine that converts to a C++ string 316 // "ToString", and it's confusing to have the method that does that 317 // for a StringPiece be called "as_string()". We also leave the 318 // "as_string()" method defined here for existing code. 319 string ToString() const { 320 if (ptr_ == NULL) return string(); 321 return string(data(), size()); 322 } 323 324 operator string() const { 325 return ToString(); 326 } 327 328 void CopyToString(string* target) const; 329 void AppendToString(string* target) const; 330 331 bool starts_with(StringPiece x) const { 332 return (length_ >= x.length_) && (memcmp(ptr_, x.ptr_, x.length_) == 0); 333 } 334 335 bool ends_with(StringPiece x) const { 336 return ((length_ >= x.length_) && 337 (memcmp(ptr_ + (length_-x.length_), x.ptr_, x.length_) == 0)); 338 } 339 340 // Checks whether StringPiece starts with x and if so advances the beginning 341 // of it to past the match. It's basically a shortcut for starts_with 342 // followed by remove_prefix. 343 bool Consume(StringPiece x); 344 // Like above but for the end of the string. 345 bool ConsumeFromEnd(StringPiece x); 346 347 // standard STL container boilerplate 348 typedef char value_type; 349 typedef const char* pointer; 350 typedef const char& reference; 351 typedef const char& const_reference; 352 typedef size_t size_type; 353 typedef ptrdiff_t difference_type; 354 static const size_type npos; 355 typedef const char* const_iterator; 356 typedef const char* iterator; 357 typedef std::reverse_iterator<const_iterator> const_reverse_iterator; 358 typedef std::reverse_iterator<iterator> reverse_iterator; 359 iterator begin() const { return ptr_; } 360 iterator end() const { return ptr_ + length_; } 361 const_reverse_iterator rbegin() const { 362 return const_reverse_iterator(ptr_ + length_); 363 } 364 const_reverse_iterator rend() const { 365 return const_reverse_iterator(ptr_); 366 } 367 stringpiece_ssize_type max_size() const { return length_; } 368 stringpiece_ssize_type capacity() const { return length_; } 369 370 // cpplint.py emits a false positive [build/include_what_you_use] 371 stringpiece_ssize_type copy(char* buf, size_type n, size_type pos = 0) const; // NOLINT 372 373 bool contains(StringPiece s) const; 374 375 stringpiece_ssize_type find(StringPiece s, size_type pos = 0) const; 376 stringpiece_ssize_type find(char c, size_type pos = 0) const; 377 stringpiece_ssize_type rfind(StringPiece s, size_type pos = npos) const; 378 stringpiece_ssize_type rfind(char c, size_type pos = npos) const; 379 380 stringpiece_ssize_type find_first_of(StringPiece s, size_type pos = 0) const; 381 stringpiece_ssize_type find_first_of(char c, size_type pos = 0) const { 382 return find(c, pos); 383 } 384 stringpiece_ssize_type find_first_not_of(StringPiece s, 385 size_type pos = 0) const; 386 stringpiece_ssize_type find_first_not_of(char c, size_type pos = 0) const; 387 stringpiece_ssize_type find_last_of(StringPiece s, 388 size_type pos = npos) const; 389 stringpiece_ssize_type find_last_of(char c, size_type pos = npos) const { 390 return rfind(c, pos); 391 } 392 stringpiece_ssize_type find_last_not_of(StringPiece s, 393 size_type pos = npos) const; 394 stringpiece_ssize_type find_last_not_of(char c, size_type pos = npos) const; 395 396 StringPiece substr(size_type pos, size_type n = npos) const; 397}; 398 399// This large function is defined inline so that in a fairly common case where 400// one of the arguments is a literal, the compiler can elide a lot of the 401// following comparisons. 402inline bool operator==(StringPiece x, StringPiece y) { 403 stringpiece_ssize_type len = x.size(); 404 if (len != y.size()) { 405 return false; 406 } 407 408 return x.data() == y.data() || len <= 0 || 409 memcmp(x.data(), y.data(), len) == 0; 410} 411 412inline bool operator!=(StringPiece x, StringPiece y) { 413 return !(x == y); 414} 415 416inline bool operator<(StringPiece x, StringPiece y) { 417 const stringpiece_ssize_type min_size = 418 x.size() < y.size() ? x.size() : y.size(); 419 const int r = memcmp(x.data(), y.data(), min_size); 420 return (r < 0) || (r == 0 && x.size() < y.size()); 421} 422 423inline bool operator>(StringPiece x, StringPiece y) { 424 return y < x; 425} 426 427inline bool operator<=(StringPiece x, StringPiece y) { 428 return !(x > y); 429} 430 431inline bool operator>=(StringPiece x, StringPiece y) { 432 return !(x < y); 433} 434 435// allow StringPiece to be logged 436extern std::ostream& operator<<(std::ostream& o, StringPiece piece); 437 438namespace internal { 439// StringPiece is not a POD and can not be used in an union (pre C++11). We 440// need a POD version of it. 441struct StringPiecePod { 442 // Create from a StringPiece. 443 static StringPiecePod CreateFromStringPiece(StringPiece str) { 444 StringPiecePod pod; 445 pod.data_ = str.data(); 446 pod.size_ = str.size(); 447 return pod; 448 } 449 450 // Cast to StringPiece. 451 operator StringPiece() const { return StringPiece(data_, size_); } 452 453 bool operator==(const char* value) const { 454 return StringPiece(data_, size_) == StringPiece(value); 455 } 456 457 char operator[](stringpiece_ssize_type i) const { 458 assert(0 <= i); 459 assert(i < size_); 460 return data_[i]; 461 } 462 463 const char* data() const { return data_; } 464 465 stringpiece_ssize_type size() const { 466 return size_; 467 } 468 469 std::string ToString() const { return std::string(data_, size_); } 470 private: 471 const char* data_; 472 stringpiece_ssize_type size_; 473}; 474 475} // namespace internal 476} // namespace protobuf 477} // namespace google 478 479GOOGLE_PROTOBUF_HASH_NAMESPACE_DECLARATION_START 480template<> struct hash<StringPiece> { 481 size_t operator()(const StringPiece& s) const { 482 size_t result = 0; 483 for (const char *str = s.data(), *end = str + s.size(); str < end; str++) { 484 result = 5 * result + *str; 485 } 486 return result; 487 } 488}; 489GOOGLE_PROTOBUF_HASH_NAMESPACE_DECLARATION_END 490 491#endif // STRINGS_STRINGPIECE_H_ 492