1// Protocol Buffers - Google's data interchange format
2// Copyright 2008 Google Inc.  All rights reserved.
3// https://developers.google.com/protocol-buffers/
4//
5// Redistribution and use in source and binary forms, with or without
6// modification, are permitted provided that the following conditions are
7// met:
8//
9//     * Redistributions of source code must retain the above copyright
10// notice, this list of conditions and the following disclaimer.
11//     * Redistributions in binary form must reproduce the above
12// copyright notice, this list of conditions and the following disclaimer
13// in the documentation and/or other materials provided with the
14// distribution.
15//     * Neither the name of Google Inc. nor the names of its
16// contributors may be used to endorse or promote products derived from
17// this software without specific prior written permission.
18//
19// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
22// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
23// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
24// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
25// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
26// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
27// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
28// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
29// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
30
31// A StringPiece points to part or all of a string, Cord, double-quoted string
32// literal, or other string-like object.  A StringPiece does *not* own the
33// string to which it points.  A StringPiece is not null-terminated.
34//
35// You can use StringPiece as a function or method parameter.  A StringPiece
36// parameter can receive a double-quoted string literal argument, a "const
37// char*" argument, a string argument, or a StringPiece argument with no data
38// copying.  Systematic use of StringPiece for arguments reduces data
39// copies and strlen() calls.
40//
41// Prefer passing StringPieces by value:
42//   void MyFunction(StringPiece arg);
43// If circumstances require, you may also pass by const reference:
44//   void MyFunction(const StringPiece& arg);  // not preferred
45// Both of these have the same lifetime semantics.  Passing by value
46// generates slightly smaller code.  For more discussion, see the thread
47// go/stringpiecebyvalue on c-users.
48//
49// StringPiece is also suitable for local variables if you know that
50// the lifetime of the underlying object is longer than the lifetime
51// of your StringPiece variable.
52//
53// Beware of binding a StringPiece to a temporary:
54//   StringPiece sp = obj.MethodReturningString();  // BAD: lifetime problem
55//
56// This code is okay:
57//   string str = obj.MethodReturningString();  // str owns its contents
58//   StringPiece sp(str);  // GOOD, because str outlives sp
59//
60// StringPiece is sometimes a poor choice for a return value and usually a poor
61// choice for a data member.  If you do use a StringPiece this way, it is your
62// responsibility to ensure that the object pointed to by the StringPiece
63// outlives the StringPiece.
64//
65// A StringPiece may represent just part of a string; thus the name "Piece".
66// For example, when splitting a string, vector<StringPiece> is a natural data
67// type for the output.  For another example, a Cord is a non-contiguous,
68// potentially very long string-like object.  The Cord class has an interface
69// that iteratively provides StringPiece objects that point to the
70// successive pieces of a Cord object.
71//
72// A StringPiece is not null-terminated.  If you write code that scans a
73// StringPiece, you must check its length before reading any characters.
74// Common idioms that work on null-terminated strings do not work on
75// StringPiece objects.
76//
77// There are several ways to create a null StringPiece:
78//   StringPiece()
79//   StringPiece(NULL)
80//   StringPiece(NULL, 0)
81// For all of the above, sp.data() == NULL, sp.length() == 0,
82// and sp.empty() == true.  Also, if you create a StringPiece with
83// a non-NULL pointer then sp.data() != NULL.  Once created,
84// sp.data() will stay either NULL or not-NULL, except if you call
85// sp.clear() or sp.set().
86//
87// Thus, you can use StringPiece(NULL) to signal an out-of-band value
88// that is different from other StringPiece values.  This is similar
89// to the way that const char* p1 = NULL; is different from
90// const char* p2 = "";.
91//
92// There are many ways to create an empty StringPiece:
93//   StringPiece()
94//   StringPiece(NULL)
95//   StringPiece(NULL, 0)
96//   StringPiece("")
97//   StringPiece("", 0)
98//   StringPiece("abcdef", 0)
99//   StringPiece("abcdef"+6, 0)
100// For all of the above, sp.length() will be 0 and sp.empty() will be true.
101// For some empty StringPiece values, sp.data() will be NULL.
102// For some empty StringPiece values, sp.data() will not be NULL.
103//
104// Be careful not to confuse: null StringPiece and empty StringPiece.
105// The set of empty StringPieces properly includes the set of null StringPieces.
106// That is, every null StringPiece is an empty StringPiece,
107// but some non-null StringPieces are empty Stringpieces too.
108//
109// All empty StringPiece values compare equal to each other.
110// Even a null StringPieces compares equal to a non-null empty StringPiece:
111//  StringPiece() == StringPiece("", 0)
112//  StringPiece(NULL) == StringPiece("abc", 0)
113//  StringPiece(NULL, 0) == StringPiece("abcdef"+6, 0)
114//
115// Look carefully at this example:
116//   StringPiece("") == NULL
117// True or false?  TRUE, because StringPiece::operator== converts
118// the right-hand side from NULL to StringPiece(NULL),
119// and then compares two zero-length spans of characters.
120// However, we are working to make this example produce a compile error.
121//
122// Suppose you want to write:
123//   bool TestWhat?(StringPiece sp) { return sp == NULL; }  // BAD
124// Do not do that.  Write one of these instead:
125//   bool TestNull(StringPiece sp) { return sp.data() == NULL; }
126//   bool TestEmpty(StringPiece sp) { return sp.empty(); }
127// The intent of TestWhat? is unclear.  Did you mean TestNull or TestEmpty?
128// Right now, TestWhat? behaves likes TestEmpty.
129// We are working to make TestWhat? produce a compile error.
130// TestNull is good to test for an out-of-band signal.
131// TestEmpty is good to test for an empty StringPiece.
132//
133// Caveats (again):
134// (1) The lifetime of the pointed-to string (or piece of a string)
135//     must be longer than the lifetime of the StringPiece.
136// (2) There may or may not be a '\0' character after the end of
137//     StringPiece data.
138// (3) A null StringPiece is empty.
139//     An empty StringPiece may or may not be a null StringPiece.
140
141#ifndef GOOGLE_PROTOBUF_STUBS_STRINGPIECE_H_
142#define GOOGLE_PROTOBUF_STUBS_STRINGPIECE_H_
143
144#include <assert.h>
145#include <stddef.h>
146#include <string.h>
147#include <iosfwd>
148#include <limits>
149#include <string>
150
151#include <google/protobuf/stubs/common.h>
152#include <google/protobuf/stubs/hash.h>
153
154namespace google {
155namespace protobuf {
156// StringPiece has *two* size types.
157// StringPiece::size_type
158//   is unsigned
159//   is 32 bits in LP32, 64 bits in LP64, 64 bits in LLP64
160//   no future changes intended
161// stringpiece_ssize_type
162//   is signed
163//   is 32 bits in LP32, 64 bits in LP64, 64 bits in LLP64
164//   future changes intended: http://go/64BitStringPiece
165//
166typedef string::difference_type stringpiece_ssize_type;
167
168// STRINGPIECE_CHECK_SIZE protects us from 32-bit overflows.
169// TODO(mec): delete this after stringpiece_ssize_type goes 64 bit.
170#if !defined(NDEBUG)
171#define STRINGPIECE_CHECK_SIZE 1
172#elif defined(_FORTIFY_SOURCE) && _FORTIFY_SOURCE > 0
173#define STRINGPIECE_CHECK_SIZE 1
174#else
175#define STRINGPIECE_CHECK_SIZE 0
176#endif
177
178class LIBPROTOBUF_EXPORT StringPiece {
179 private:
180  const char* ptr_;
181  stringpiece_ssize_type length_;
182
183  // Prevent overflow in debug mode or fortified mode.
184  // sizeof(stringpiece_ssize_type) may be smaller than sizeof(size_t).
185  static stringpiece_ssize_type CheckedSsizeTFromSizeT(size_t size) {
186#if STRINGPIECE_CHECK_SIZE > 0
187#ifdef max
188#undef max
189#endif
190    if (size > static_cast<size_t>(
191        std::numeric_limits<stringpiece_ssize_type>::max())) {
192      // Some people grep for this message in logs
193      // so take care if you ever change it.
194      LogFatalSizeTooBig(size, "size_t to int conversion");
195    }
196#endif
197    return static_cast<stringpiece_ssize_type>(size);
198  }
199
200  // Out-of-line error path.
201  static void LogFatalSizeTooBig(size_t size, const char* details);
202
203 public:
204  // We provide non-explicit singleton constructors so users can pass
205  // in a "const char*" or a "string" wherever a "StringPiece" is
206  // expected.
207  //
208  // Style guide exception granted:
209  // http://goto/style-guide-exception-20978288
210  StringPiece() : ptr_(NULL), length_(0) {}
211
212  StringPiece(const char* str)  // NOLINT(runtime/explicit)
213      : ptr_(str), length_(0) {
214    if (str != NULL) {
215      length_ = CheckedSsizeTFromSizeT(strlen(str));
216    }
217  }
218
219  template <class Allocator>
220  StringPiece(  // NOLINT(runtime/explicit)
221      const std::basic_string<char, std::char_traits<char>, Allocator>& str)
222      : ptr_(str.data()), length_(0) {
223    length_ = CheckedSsizeTFromSizeT(str.size());
224  }
225#if defined(HAS_GLOBAL_STRING)
226  template <class Allocator>
227  StringPiece(  // NOLINT(runtime/explicit)
228      const basic_string<char, std::char_traits<char>, Allocator>& str)
229      : ptr_(str.data()), length_(0) {
230    length_ = CheckedSsizeTFromSizeT(str.size());
231  }
232#endif
233
234  StringPiece(const char* offset, stringpiece_ssize_type len)
235      : ptr_(offset), length_(len) {
236    assert(len >= 0);
237  }
238
239  // Substring of another StringPiece.
240  // pos must be non-negative and <= x.length().
241  StringPiece(StringPiece x, stringpiece_ssize_type pos);
242  // Substring of another StringPiece.
243  // pos must be non-negative and <= x.length().
244  // len must be non-negative and will be pinned to at most x.length() - pos.
245  StringPiece(StringPiece x,
246              stringpiece_ssize_type pos,
247              stringpiece_ssize_type len);
248
249  // data() may return a pointer to a buffer with embedded NULs, and the
250  // returned buffer may or may not be null terminated.  Therefore it is
251  // typically a mistake to pass data() to a routine that expects a NUL
252  // terminated string.
253  const char* data() const { return ptr_; }
254  stringpiece_ssize_type size() const { return length_; }
255  stringpiece_ssize_type length() const { return length_; }
256  bool empty() const { return length_ == 0; }
257
258  void clear() {
259    ptr_ = NULL;
260    length_ = 0;
261  }
262
263  void set(const char* data, stringpiece_ssize_type len) {
264    assert(len >= 0);
265    ptr_ = data;
266    length_ = len;
267  }
268
269  void set(const char* str) {
270    ptr_ = str;
271    if (str != NULL)
272      length_ = CheckedSsizeTFromSizeT(strlen(str));
273    else
274      length_ = 0;
275  }
276
277  void set(const void* data, stringpiece_ssize_type len) {
278    ptr_ = reinterpret_cast<const char*>(data);
279    length_ = len;
280  }
281
282  char operator[](stringpiece_ssize_type i) const {
283    assert(0 <= i);
284    assert(i < length_);
285    return ptr_[i];
286  }
287
288  void remove_prefix(stringpiece_ssize_type n) {
289    assert(length_ >= n);
290    ptr_ += n;
291    length_ -= n;
292  }
293
294  void remove_suffix(stringpiece_ssize_type n) {
295    assert(length_ >= n);
296    length_ -= n;
297  }
298
299  // returns {-1, 0, 1}
300  int compare(StringPiece x) const {
301    const stringpiece_ssize_type min_size =
302        length_ < x.length_ ? length_ : x.length_;
303    int r = memcmp(ptr_, x.ptr_, min_size);
304    if (r < 0) return -1;
305    if (r > 0) return 1;
306    if (length_ < x.length_) return -1;
307    if (length_ > x.length_) return 1;
308    return 0;
309  }
310
311  string as_string() const {
312    return ToString();
313  }
314  // We also define ToString() here, since many other string-like
315  // interfaces name the routine that converts to a C++ string
316  // "ToString", and it's confusing to have the method that does that
317  // for a StringPiece be called "as_string()".  We also leave the
318  // "as_string()" method defined here for existing code.
319  string ToString() const {
320    if (ptr_ == NULL) return string();
321    return string(data(), size());
322  }
323
324  operator string() const {
325    return ToString();
326  }
327
328  void CopyToString(string* target) const;
329  void AppendToString(string* target) const;
330
331  bool starts_with(StringPiece x) const {
332    return (length_ >= x.length_) && (memcmp(ptr_, x.ptr_, x.length_) == 0);
333  }
334
335  bool ends_with(StringPiece x) const {
336    return ((length_ >= x.length_) &&
337            (memcmp(ptr_ + (length_-x.length_), x.ptr_, x.length_) == 0));
338  }
339
340  // Checks whether StringPiece starts with x and if so advances the beginning
341  // of it to past the match.  It's basically a shortcut for starts_with
342  // followed by remove_prefix.
343  bool Consume(StringPiece x);
344  // Like above but for the end of the string.
345  bool ConsumeFromEnd(StringPiece x);
346
347  // standard STL container boilerplate
348  typedef char value_type;
349  typedef const char* pointer;
350  typedef const char& reference;
351  typedef const char& const_reference;
352  typedef size_t size_type;
353  typedef ptrdiff_t difference_type;
354  static const size_type npos;
355  typedef const char* const_iterator;
356  typedef const char* iterator;
357  typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
358  typedef std::reverse_iterator<iterator> reverse_iterator;
359  iterator begin() const { return ptr_; }
360  iterator end() const { return ptr_ + length_; }
361  const_reverse_iterator rbegin() const {
362    return const_reverse_iterator(ptr_ + length_);
363  }
364  const_reverse_iterator rend() const {
365    return const_reverse_iterator(ptr_);
366  }
367  stringpiece_ssize_type max_size() const { return length_; }
368  stringpiece_ssize_type capacity() const { return length_; }
369
370  // cpplint.py emits a false positive [build/include_what_you_use]
371  stringpiece_ssize_type copy(char* buf, size_type n, size_type pos = 0) const;  // NOLINT
372
373  bool contains(StringPiece s) const;
374
375  stringpiece_ssize_type find(StringPiece s, size_type pos = 0) const;
376  stringpiece_ssize_type find(char c, size_type pos = 0) const;
377  stringpiece_ssize_type rfind(StringPiece s, size_type pos = npos) const;
378  stringpiece_ssize_type rfind(char c, size_type pos = npos) const;
379
380  stringpiece_ssize_type find_first_of(StringPiece s, size_type pos = 0) const;
381  stringpiece_ssize_type find_first_of(char c, size_type pos = 0) const {
382    return find(c, pos);
383  }
384  stringpiece_ssize_type find_first_not_of(StringPiece s,
385                                           size_type pos = 0) const;
386  stringpiece_ssize_type find_first_not_of(char c, size_type pos = 0) const;
387  stringpiece_ssize_type find_last_of(StringPiece s,
388                                      size_type pos = npos) const;
389  stringpiece_ssize_type find_last_of(char c, size_type pos = npos) const {
390    return rfind(c, pos);
391  }
392  stringpiece_ssize_type find_last_not_of(StringPiece s,
393                                          size_type pos = npos) const;
394  stringpiece_ssize_type find_last_not_of(char c, size_type pos = npos) const;
395
396  StringPiece substr(size_type pos, size_type n = npos) const;
397};
398
399// This large function is defined inline so that in a fairly common case where
400// one of the arguments is a literal, the compiler can elide a lot of the
401// following comparisons.
402inline bool operator==(StringPiece x, StringPiece y) {
403  stringpiece_ssize_type len = x.size();
404  if (len != y.size()) {
405    return false;
406  }
407
408  return x.data() == y.data() || len <= 0 ||
409      memcmp(x.data(), y.data(), len) == 0;
410}
411
412inline bool operator!=(StringPiece x, StringPiece y) {
413  return !(x == y);
414}
415
416inline bool operator<(StringPiece x, StringPiece y) {
417  const stringpiece_ssize_type min_size =
418      x.size() < y.size() ? x.size() : y.size();
419  const int r = memcmp(x.data(), y.data(), min_size);
420  return (r < 0) || (r == 0 && x.size() < y.size());
421}
422
423inline bool operator>(StringPiece x, StringPiece y) {
424  return y < x;
425}
426
427inline bool operator<=(StringPiece x, StringPiece y) {
428  return !(x > y);
429}
430
431inline bool operator>=(StringPiece x, StringPiece y) {
432  return !(x < y);
433}
434
435// allow StringPiece to be logged
436extern std::ostream& operator<<(std::ostream& o, StringPiece piece);
437
438namespace internal {
439// StringPiece is not a POD and can not be used in an union (pre C++11). We
440// need a POD version of it.
441struct StringPiecePod {
442  // Create from a StringPiece.
443  static StringPiecePod CreateFromStringPiece(StringPiece str) {
444    StringPiecePod pod;
445    pod.data_ = str.data();
446    pod.size_ = str.size();
447    return pod;
448  }
449
450  // Cast to StringPiece.
451  operator StringPiece() const { return StringPiece(data_, size_); }
452
453  bool operator==(const char* value) const {
454    return StringPiece(data_, size_) == StringPiece(value);
455  }
456
457  char operator[](stringpiece_ssize_type i) const {
458    assert(0 <= i);
459    assert(i < size_);
460    return data_[i];
461  }
462
463  const char* data() const { return data_; }
464
465  stringpiece_ssize_type size() const {
466    return size_;
467  }
468
469  std::string ToString() const { return std::string(data_, size_); }
470 private:
471  const char* data_;
472  stringpiece_ssize_type size_;
473};
474
475}  // namespace internal
476}  // namespace protobuf
477}  // namespace google
478
479GOOGLE_PROTOBUF_HASH_NAMESPACE_DECLARATION_START
480template<> struct hash<StringPiece> {
481  size_t operator()(const StringPiece& s) const {
482    size_t result = 0;
483    for (const char *str = s.data(), *end = str + s.size(); str < end; str++) {
484      result = 5 * result + *str;
485    }
486    return result;
487  }
488};
489GOOGLE_PROTOBUF_HASH_NAMESPACE_DECLARATION_END
490
491#endif  // STRINGS_STRINGPIECE_H_
492