stringpiece.h revision 2ee91b4af4353b9e6a9d591c32fedfc58fd4ef35
12ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Copyright 2001-2010 The RE2 Authors.  All Rights Reserved.
22ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Use of this source code is governed by a BSD-style
32ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// license that can be found in the LICENSE file.
42ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
52ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// A string-like object that points to a sized piece of memory.
62ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson//
72ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Functions or methods may use const StringPiece& parameters to accept either
82ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// a "const char*" or a "string" value that will be implicitly converted to
92ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// a StringPiece.  The implicit conversion means that it is often appropriate
102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// to include this .h file in other files rather than forward-declaring
112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// StringPiece as would be appropriate for most other Google classes.
122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson//
132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Systematic usage of StringPiece is encouraged as it will reduce unnecessary
142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// conversions from "const char*" to "string" and back again.
152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson//
162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson//
172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// Arghh!  I wish C++ literals were "string".
182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#ifndef STRINGS_STRINGPIECE_H__
202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#define STRINGS_STRINGPIECE_H__
212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include <string.h>
232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include <cstddef>
242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include <iosfwd>
252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#include <string>
262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonnamespace re2 {
282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonclass StringPiece {
302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson private:
312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  const char*   ptr_;
322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  int           length_;
332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson public:
352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // We provide non-explicit singleton constructors so users can pass
362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // in a "const char*" or a "string" wherever a "StringPiece" is
372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // expected.
382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  StringPiece() : ptr_(NULL), length_(0) { }
392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  StringPiece(const char* str)
402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    : ptr_(str), length_((str == NULL) ? 0 : static_cast<int>(strlen(str))) { }
412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  StringPiece(const std::string& str)
422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    : ptr_(str.data()), length_(static_cast<int>(str.size())) { }
432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  StringPiece(const char* offset, int len) : ptr_(offset), length_(len) { }
442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // data() may return a pointer to a buffer with embedded NULs, and the
462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // returned buffer may or may not be null terminated.  Therefore it is
472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // typically a mistake to pass data() to a routine that expects a NUL
482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // terminated string.
492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  const char* data() const { return ptr_; }
502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  int size() const { return length_; }
512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  int length() const { return length_; }
522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  bool empty() const { return length_ == 0; }
532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  void clear() { ptr_ = NULL; length_ = 0; }
552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  void set(const char* data, int len) { ptr_ = data; length_ = len; }
562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  void set(const char* str) {
572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    ptr_ = str;
582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    if (str != NULL)
592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      length_ = static_cast<int>(strlen(str));
602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    else
612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      length_ = 0;
622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  void set(const void* data, int len) {
642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    ptr_ = reinterpret_cast<const char*>(data);
652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    length_ = len;
662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  char operator[](int i) const { return ptr_[i]; }
692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  void remove_prefix(int n) {
712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    ptr_ += n;
722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    length_ -= n;
732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  void remove_suffix(int n) {
762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    length_ -= n;
772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  int compare(const StringPiece& x) const {
802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    int r = memcmp(ptr_, x.ptr_, std::min(length_, x.length_));
812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    if (r == 0) {
822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      if (length_ < x.length_) r = -1;
832ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson      else if (length_ > x.length_) r = +1;
842ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    }
852ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    return r;
862ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
872ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
882ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  std::string as_string() const {
892ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    return std::string(data(), size());
902ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
912ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // We also define ToString() here, since many other string-like
922ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // interfaces name the routine that converts to a C++ string
932ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // "ToString", and it's confusing to have the method that does that
942ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // for a StringPiece be called "as_string()".  We also leave the
952ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // "as_string()" method defined here for existing code.
962ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  std::string ToString() const {
972ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    return std::string(data(), size());
982ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
992ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1002ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  void CopyToString(std::string* target) const;
1012ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  void AppendToString(std::string* target) const;
1022ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1032ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // Does "this" start with "x"
1042ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  bool starts_with(const StringPiece& x) const {
1052ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    return ((length_ >= x.length_) &&
1062ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson            (memcmp(ptr_, x.ptr_, x.length_) == 0));
1072ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
1082ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1092ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // Does "this" end with "x"
1102ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  bool ends_with(const StringPiece& x) const {
1112ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    return ((length_ >= x.length_) &&
1122ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson            (memcmp(ptr_ + (length_-x.length_), x.ptr_, x.length_) == 0));
1132ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
1142ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1152ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // standard STL container boilerplate
1162ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  typedef char value_type;
1172ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  typedef const char* pointer;
1182ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  typedef const char& reference;
1192ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  typedef const char& const_reference;
1202ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  typedef size_t size_type;
1212ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  typedef ptrdiff_t difference_type;
1222ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  static const size_type npos;
1232ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  typedef const char* const_iterator;
1242ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  typedef const char* iterator;
1252ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  typedef std::reverse_iterator<const_iterator> const_reverse_iterator;
1262ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  typedef std::reverse_iterator<iterator> reverse_iterator;
1272ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  iterator begin() const { return ptr_; }
1282ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  iterator end() const { return ptr_ + length_; }
1292ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  const_reverse_iterator rbegin() const {
1302ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    return const_reverse_iterator(ptr_ + length_);
1312ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
1322ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  const_reverse_iterator rend() const {
1332ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson    return const_reverse_iterator(ptr_);
1342ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  }
1352ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  // STLS says return size_type, but Google says return int
1362ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  int max_size() const { return length_; }
1372ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  int capacity() const { return length_; }
1382ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1392ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  int copy(char* buf, size_type n, size_type pos = 0) const;
1402ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1412ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  int find(const StringPiece& s, size_type pos = 0) const;
1422ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  int find(char c, size_type pos = 0) const;
1432ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  int rfind(const StringPiece& s, size_type pos = npos) const;
1442ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  int rfind(char c, size_type pos = npos) const;
1452ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1462ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  StringPiece substr(size_type pos, size_type n = npos) const;
1472ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1482ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  static bool _equal(const StringPiece&, const StringPiece&);
1492ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson};
1502ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1512ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsoninline bool operator==(const StringPiece& x, const StringPiece& y) {
1522ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  return StringPiece::_equal(x, y);
1532ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
1542ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1552ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsoninline bool operator!=(const StringPiece& x, const StringPiece& y) {
1562ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  return !(x == y);
1572ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
1582ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1592ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsoninline bool operator<(const StringPiece& x, const StringPiece& y) {
1602ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  const int r = memcmp(x.data(), y.data(),
1612ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson                       std::min(x.size(), y.size()));
1622ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  return ((r < 0) || ((r == 0) && (x.size() < y.size())));
1632ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
1642ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1652ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsoninline bool operator>(const StringPiece& x, const StringPiece& y) {
1662ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  return y < x;
1672ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
1682ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1692ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsoninline bool operator<=(const StringPiece& x, const StringPiece& y) {
1702ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  return !(x > y);
1712ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
1722ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1732ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsoninline bool operator>=(const StringPiece& x, const StringPiece& y) {
1742ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson  return !(x < y);
1752ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}
1762ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1772ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson}  // namespace re2
1782ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1792ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson// allow StringPiece to be logged
1802ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodsonextern std::ostream& operator<<(std::ostream& o, const re2::StringPiece& piece);
1812ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson
1822ee91b4af4353b9e6a9d591c32fedfc58fd4ef35Ian Hodson#endif  // STRINGS_STRINGPIECE_H__
183