ustring.h revision 9066cfe9886ac131c34d59ed0e2d287b0e3c0087
1// This file is part of the ustl library, an STL implementation. 2// 3// Copyright (C) 2005 by Mike Sharov <msharov@users.sourceforge.net> 4// This file is free software, distributed under the MIT License. 5// 6// ustring.h 7// 8 9#ifndef USTRING_H_1249CB7A098A9010763AAC6D37B133CF 10#define USTRING_H_1249CB7A098A9010763AAC6D37B133CF 11 12#include "memblock.h" 13#include "utf8.h" 14#include <stdarg.h> // for va_list, va_start, and va_end (in string::format) 15 16namespace ustl { 17 18/// \class string ustring.h ustl.h 19/// \ingroup Sequences 20/// 21/// \brief STL basic_string<char> equivalent. 22/// 23/// An STL container for text string manipulation. 24/// Differences from C++ standard: 25/// - string is a class, not a template. Wide characters are assumed to be 26/// encoded with utf8 at all times except when rendering or editing, 27/// where you would use a utf8 iterator. 28/// - format member function - you can, of course use an \ref ostringstream, 29/// which also have format functions, but most of the time this way 30/// is more convenient. Because uSTL does not implement locales, 31/// format is the only way to create localized strings. 32/// - const char* cast operator. It is much clearer to use this than having 33/// to type .c_str() every time. 34/// - length returns the number of _characters_, not bytes. 35/// This function is O(N), so use wisely. 36/// 37class string : public memblock { 38public: 39 typedef char value_type; 40 typedef value_type* pointer; 41 typedef const value_type* const_pointer; 42 typedef wchar_t wvalue_type; 43 typedef wvalue_type* wpointer; 44 typedef const wvalue_type* const_wpointer; 45 typedef pointer iterator; 46 typedef const_pointer const_iterator; 47 typedef value_type& reference; 48 typedef value_type const_reference; 49 typedef ::ustl::reverse_iterator<iterator> reverse_iterator; 50 typedef ::ustl::reverse_iterator<const_iterator> const_reverse_iterator; 51 typedef utf8in_iterator<const_iterator> utf8_iterator; 52public: 53 static const uoff_t npos = static_cast<uoff_t>(-1); ///< Value that means the end of string. 54 static const value_type c_Terminator = 0; ///< String terminator 55 static const size_type size_Terminator = sizeof(c_Terminator); ///< Most systems terminate strings with '\\0' 56 static const char empty_string [size_Terminator]; ///< An empty string. 57public: 58 string (void); 59 string (const string& s); 60 inline string (const string& s, uoff_t o, size_type n); 61 inline explicit string (const cmemlink& l); 62 string (const_pointer s); 63 inline string (const_pointer s, size_type len); 64 inline string (const_pointer s1, const_pointer s2); 65 explicit string (size_type n, value_type c = c_Terminator); 66 inline pointer data (void) { return (string::pointer (memblock::data())); } 67 inline const_pointer c_str (void) const { return (string::const_pointer (memblock::cdata())); } 68 inline size_type max_size (void) const { size_type s (memblock::max_size()); return (s - !!s); } 69 inline size_type capacity (void) const { size_type c (memblock::capacity()); return (c - !!c); } 70 void resize (size_type n); 71 inline void clear (void) { resize (0); } 72 inline const_iterator begin (void) const { return (const_iterator (memblock::begin())); } 73 inline iterator begin (void) { return (iterator (memblock::begin())); } 74 inline const_iterator end (void) const { return (const_iterator (memblock::end())); } 75 inline iterator end (void) { return (iterator (memblock::end())); } 76 inline const_reverse_iterator rbegin (void) const { return (const_reverse_iterator (end())); } 77 inline reverse_iterator rbegin (void) { return (reverse_iterator (end())); } 78 inline const_reverse_iterator rend (void) const { return (const_reverse_iterator (begin())); } 79 inline reverse_iterator rend (void) { return (reverse_iterator (begin())); } 80 inline utf8_iterator utf8_begin (void) const { return (utf8_iterator (begin())); } 81 inline utf8_iterator utf8_end (void) const { return (utf8_iterator (end())); } 82 inline const_reference at (uoff_t pos) const { assert (pos <= size() && begin()); return (begin()[pos]); } 83 inline reference at (uoff_t pos) { assert (pos <= size() && begin()); return (begin()[pos]); } 84 inline const_iterator iat (uoff_t pos) const { return (begin() + min (pos, size())); } 85 inline iterator iat (uoff_t pos) { return (begin() + min (pos, size())); } 86 inline size_type length (void) const { return (distance (utf8_begin(), utf8_end())); } 87 inline void append (const_iterator i1, const_iterator i2) { append (i1, distance (i1, i2)); } 88 void append (const_pointer s, size_type len); 89 void append (const_pointer s); 90 void append (size_type n, const_reference c); 91 inline void append (size_type n, wvalue_type c) { insert (size(), c, n); } 92 inline void append (const_wpointer s1, const_wpointer s2) { insert (size(), s1, s2); } 93 inline void append (const_wpointer s) { const_wpointer se (s); for (;se&&*se;++se); append (s, se); } 94 inline void append (const string& s) { append (s.begin(), s.end()); } 95 inline void append (const string& s, uoff_t o, size_type n) { append (s.iat(o), s.iat(o+n)); } 96 inline void assign (const_iterator i1, const_iterator i2) { assign (i1, distance (i1, i2)); } 97 void assign (const_pointer s, size_type len); 98 void assign (const_pointer s); 99 inline void assign (const_wpointer s1, const_wpointer s2) { clear(); append (s1, s2); } 100 inline void assign (const_wpointer s1) { clear(); append (s1); } 101 inline void assign (const string& s) { assign (s.begin(), s.end()); } 102 inline void assign (const string& s, uoff_t o, size_type n) { assign (s.iat(o), s.iat(o+n)); } 103 size_type copyto (pointer p, size_type n, const_iterator start = NULL) const; 104 inline int compare (const string& s) const { return (compare (begin(), end(), s.begin(), s.end())); } 105 inline int compare (const_pointer s) const { return (compare (begin(), end(), s, s + strlen(s))); } 106 static int compare (const_iterator first1, const_iterator last1, const_iterator first2, const_iterator last2); 107 inline operator const value_type* (void) const; 108 inline operator value_type* (void); 109 inline const string& operator= (const string& s) { assign (s.begin(), s.end()); return (*this); } 110 inline const string& operator= (const_reference c) { assign (&c, 1); return (*this); } 111 inline const string& operator= (const_pointer s) { assign (s); return (*this); } 112 inline const string& operator= (const_wpointer s) { assign (s); return (*this); } 113 inline const string& operator+= (const string& s) { append (s.begin(), s.size()); return (*this); } 114 inline const string& operator+= (const_reference c) { append (1, c); return (*this); } 115 inline const string& operator+= (const_pointer s) { append (s); return (*this); } 116 inline const string& operator+= (wvalue_type c) { append (1, c); return (*this); } 117 inline const string& operator+= (const_wpointer s) { append (s); return (*this); } 118 inline string operator+ (const string& s) const; 119 inline bool operator== (const string& s) const { return (memblock::operator== (s)); } 120 bool operator== (const_pointer s) const; 121 inline bool operator== (const_reference c) const { return (size() == 1 && c == at(0)); } 122 inline bool operator!= (const string& s) const { return (!operator== (s)); } 123 inline bool operator!= (const_pointer s) const { return (!operator== (s)); } 124 inline bool operator!= (const_reference c) const { return (!operator== (c)); } 125 inline bool operator< (const string& s) const { return (0 > compare (s)); } 126 inline bool operator< (const_pointer s) const { return (0 > compare (s)); } 127 inline bool operator< (const_reference c) const { return (0 > compare (begin(), end(), &c, &c + 1)); } 128 inline bool operator> (const_pointer s) const { return (0 < compare (s)); } 129 void insert (const uoff_t ip, wvalue_type c, size_type n = 1); 130 void insert (const uoff_t ip, const_wpointer first, const_wpointer last, const size_type n = 1); 131 iterator insert (iterator start, const_reference c, size_type n = 1); 132 iterator insert (iterator start, const_pointer s, size_type n = 1); 133 iterator insert (iterator start, const_pointer first, const_iterator last, size_type n = 1); 134 inline void insert (uoff_t ip, const_pointer s, size_type nlen) { insert (iat(ip), s, s + nlen); } 135 inline void insert (uoff_t ip, size_type n, value_type c) { insert (iat(ip), c, n); } 136 inline void insert (uoff_t ip, const string& s, uoff_t sp, size_type slen) { insert (iat(ip), s.iat(sp), s.iat(sp + slen)); } 137 iterator erase (iterator start, size_type size = 1); 138 void erase (uoff_t start, size_type size = 1); 139 inline iterator erase (iterator first, const_iterator last) { return (erase (first, size_type(distance(first,last)))); } 140 OVERLOAD_POINTER_AND_SIZE_T_V2(erase, iterator) 141 inline void push_back (const_reference c) { append (1, c); } 142 inline void push_back (wvalue_type c) { append (1, c); } 143 inline void pop_back (void) { resize (size() - 1); } 144 void replace (iterator first, iterator last, const_pointer s); 145 void replace (iterator first, iterator last, const_pointer i1, const_pointer i2, size_type n = 1); 146 inline void replace (iterator first, iterator last, const string& s) { replace (first, last, s.begin(), s.end()); } 147 inline void replace (iterator first, iterator last, const_pointer s, size_type slen) { replace (first, last, s, s + slen); } 148 inline void replace (iterator first, iterator last, size_type n, value_type c) { replace (first, last, &c, &c + 1, n); } 149 inline void replace (uoff_t rp, size_type n, const string& s) { replace (iat(rp), iat(rp + n), s); } 150 inline void replace (uoff_t rp, size_type n, const string& s, uoff_t sp, size_type slen) { replace (iat(rp), iat(rp + n), s.iat(sp), s.iat(sp + slen)); } 151 inline void replace (uoff_t rp, size_type n, const_pointer s, size_type slen) { replace (iat(rp), iat(rp + n), s, s + slen); } 152 inline void replace (uoff_t rp, size_type n, const_pointer s) { replace (iat(rp), iat(rp + n), string(s)); } 153 inline void replace (uoff_t rp, size_type n, size_type count, value_type c) { replace (iat(rp), iat(rp + n), count, c); } 154 inline string substr (uoff_t o, size_type n) const { return (string (*this, o, n)); } 155 uoff_t find (const_reference c, uoff_t pos = 0) const; 156 uoff_t find (const string& s, uoff_t pos = 0) const; 157 uoff_t rfind (const_reference c, uoff_t pos = npos) const; 158 uoff_t rfind (const string& s, uoff_t pos = npos) const; 159 uoff_t find_first_of (const string& s, uoff_t pos = 0) const; 160 uoff_t find_first_not_of (const string& s, uoff_t pos = 0) const; 161 uoff_t find_last_of (const string& s, uoff_t pos = npos) const; 162 uoff_t find_last_not_of (const string& s, uoff_t pos = npos) const; 163 int vformat (const char* fmt, va_list args); 164 int format (const char* fmt, ...) __attribute__((__format__(__printf__, 2, 3))); 165 void read (istream&); 166 void write (ostream& os) const; 167 size_t stream_size (void) const; 168 static hashvalue_t hash (const char* f1, const char* l1); 169private: 170 DLL_LOCAL iterator utf8_iat (uoff_t i); 171protected: 172 inline virtual size_type minimumFreeCapacity (void) const { return (size_Terminator); } 173}; 174 175//---------------------------------------------------------------------- 176 177/// Assigns itself the value of string \p s 178inline string::string (const cmemlink& s) 179: memblock () 180{ 181 assign (const_iterator (s.begin()), s.size()); 182} 183 184/// Assigns itself a [o,o+n) substring of \p s. 185inline string::string (const string& s, uoff_t o, size_type n) 186: memblock() 187{ 188 assign (s, o, n); 189} 190 191/// Copies the value of \p s of length \p len into itself. 192inline string::string (const_pointer s, size_type len) 193: memblock () 194{ 195 assign (s, len); 196} 197 198/// Copies into itself the string data between \p s1 and \p s2 199inline string::string (const_pointer s1, const_pointer s2) 200: memblock () 201{ 202 assert (s1 <= s2 && "Negative ranges result in memory allocation errors."); 203 assign (s1, s2); 204} 205 206/// Returns the pointer to the first character. 207inline string::operator const string::value_type* (void) const 208{ 209 assert ((!end() || *end() == c_Terminator) && "This string is linked to data that is not 0-terminated. This may cause serious security problems. Please assign the data instead of linking."); 210 return (begin()); 211} 212 213/// Returns the pointer to the first character. 214inline string::operator string::value_type* (void) 215{ 216 assert ((end() && *end() == c_Terminator) && "This string is linked to data that is not 0-terminated. This may cause serious security problems. Please assign the data instead of linking."); 217 return (begin()); 218} 219 220/// Concatenates itself with \p s 221inline string string::operator+ (const string& s) const 222{ 223 string result (*this); 224 result += s; 225 return (result); 226} 227 228//---------------------------------------------------------------------- 229// Operators needed to avoid comparing pointer to pointer 230 231#define PTR_STRING_CMP(op, impl) \ 232inline bool op (const char* s1, const string& s2) { return impl; } 233PTR_STRING_CMP (operator==, (s2 == s1)) 234PTR_STRING_CMP (operator!=, (s2 != s1)) 235PTR_STRING_CMP (operator<, (s2 > s1)) 236PTR_STRING_CMP (operator<=, (s2 >= s1)) 237PTR_STRING_CMP (operator>, (s2 < s1)) 238PTR_STRING_CMP (operator>=, (s2 <= s1)) 239#undef PTR_STRING_CMP 240 241//---------------------------------------------------------------------- 242 243template <typename T> 244inline hashvalue_t hash_value (const T& v) 245{ return (string::hash (v.begin(), v.end())); } 246 247template <> 248inline hashvalue_t hash_value (const string::const_pointer& v) 249{ return (string::hash (v, v + strlen(v))); } 250 251template <> 252inline hashvalue_t hash_value (const string::pointer& v) 253{ return (string::hash (v, v + strlen(v))); } 254 255//---------------------------------------------------------------------- 256 257} // namespace ustl 258 259// Specialization for stream alignment 260ALIGNOF (ustl::string, alignof (string::value_type())) 261 262#endif 263 264