ustring.cpp revision 9066cfe9886ac131c34d59ed0e2d287b0e3c0087
1// This file is part of the ustl library, an STL implementation. 2// 3// Copyright (C) 2005 by Mike Sharov <msharov@users.sourceforge.net> 4// This file is free software, distributed under the MIT License. 5// 6// ustring.cpp 7// 8// STL basic_string equivalent functionality. 9// 10 11#include "uassert.h" 12#include "ustring.h" 13#include "mistream.h" 14#include "mostream.h" 15#include "ualgo.h" 16#include <stdio.h> // for vsnprintf (in string::format) 17 18#include "uassert.h" 19 20namespace ustl { 21 22//---------------------------------------------------------------------- 23 24const uoff_t string::npos; 25const string::size_type string::size_Terminator; 26const string::value_type string::c_Terminator; 27const char string::empty_string[string::size_Terminator] = ""; 28 29typedef utf8in_iterator<string::const_iterator> utf8icstring_iterator; 30typedef utf8in_iterator<string::iterator> utf8istring_iterator; 31typedef utf8out_iterator<string::iterator> utf8ostring_iterator; 32 33//---------------------------------------------------------------------- 34 35/// Creates an empty string. 36string::string (void) 37: memblock () 38{ 39 link (empty_string, 0U); 40} 41 42/// Assigns itself the value of string \p s 43string::string (const string& s) 44: memblock() 45{ 46 if (s.is_linked()) 47 link (s.c_str(), s.size()); 48 else 49 assign (s); 50} 51 52/// Links to \p s 53string::string (const_pointer s) 54: memblock () 55{ 56 if (!s) 57 s = empty_string; 58 link (s, strlen(s)); 59} 60 61/// Creates a string of length \p n filled with character \p c. 62string::string (size_type n, value_type c) 63: memblock () 64{ 65 resize (n); 66 fill_n (begin(), n, c); 67} 68 69/// Resize the string to \p n characters. New space contents is undefined. 70void string::resize (size_type n) 71{ 72 memblock::resize (n); 73 at(n) = c_Terminator; 74} 75 76/// Assigns itself the value of string \p s 77void string::assign (const_pointer s) 78{ 79 if (!s) 80 s = empty_string; 81 assign (s, strlen (s)); 82} 83 84/// Assigns itself the value of string \p s of length \p len. 85void string::assign (const_pointer s, size_type len) 86{ 87 while (len && s[len - 1] == c_Terminator) 88 -- len; 89 resize (len); 90 copy (s, len); 91} 92 93/// Appends to itself the value of string \p s of length \p len. 94void string::append (const_pointer s) 95{ 96 if (!s) 97 s = empty_string; 98 append (s, strlen (s)); 99} 100 101/// Appends to itself the value of string \p s of length \p len. 102void string::append (const_pointer s, size_type len) 103{ 104 while (len && s[len - 1] == c_Terminator) 105 -- len; 106 resize (size() + len); 107 copy_n (s, len, end() - len); 108} 109 110/// Appends to itself \p n characters of value \p c. 111void string::append (size_type n, value_type c) 112{ 113 resize (size() + n); 114 fill_n (end() - n, n, c); 115} 116 117/// Copies into itself at offset \p start, the value of string \p p of length \p n. 118string::size_type string::copyto (pointer p, size_type n, const_iterator start) const 119{ 120 assert (p && n); 121 if (!start) 122 start = begin(); 123 const size_type btc = min(n - size_Terminator, size()); 124 copy_n (start, btc, p); 125 p[btc] = c_Terminator; 126 return (btc + size_Terminator); 127} 128 129/// Returns comparison value regarding string \p s. 130/// The return value is: 131/// \li 1 if this string is greater (by value, not length) than string \p s 132/// \li 0 if this string is equal to string \p s 133/// \li -1 if this string is less than string \p s 134/// 135/*static*/ int string::compare (const_iterator first1, const_iterator last1, const_iterator first2, const_iterator last2) 136{ 137 assert (first1 <= last1 && (first2 <= last2 || !last2) && "Negative ranges result in memory allocation errors."); 138 const size_type len1 = distance (first1, last1), len2 = distance (first2, last2); 139 const int rvbylen = sign (int(len1 - len2)); 140 int rv = memcmp (first1, first2, min (len1, len2)); 141 return (rv ? rv : rvbylen); 142} 143 144/// Returns true if this string is equal to string \p s. 145bool string::operator== (const_pointer s) const 146{ 147 if (!s) 148 s = empty_string; 149 return (size() == strlen(s) && 0 == memcmp (c_str(), s, size())); 150} 151 152/// Returns the beginning of character \p i. 153string::iterator string::utf8_iat (uoff_t i) 154{ 155 utf8istring_iterator cfinder (begin()); 156 cfinder += i; 157 return (cfinder.base()); 158} 159 160/// Inserts wide character \p c at \p ip \p n times as a UTF-8 string. 161/// 162/// \p ip is a character position, not a byte position, and must fall in 163/// the 0 through length() range. 164/// The first argument is not an iterator because it is rather difficult 165/// to get one. You'd have to use ((utf8begin() + n).base()) as the first 166/// argument, which is rather ugly. Besides, then this insert would be 167/// ambiguous with the regular character insert. 168/// 169void string::insert (const uoff_t ip, wchar_t c, size_type n) 170{ 171 iterator ipp (utf8_iat (ip)); 172 ipp = iterator (memblock::insert (memblock::iterator(ipp), n * Utf8Bytes(c))); 173 fill_n (utf8out (ipp), n, c); 174 *end() = c_Terminator; 175} 176 177/// Inserts sequence of wide characters at \p ip. 178void string::insert (const uoff_t ip, const wchar_t* first, const wchar_t* last, const size_type n) 179{ 180 iterator ipp (utf8_iat (ip)); 181 size_type nti = distance (first, last), bti = 0; 182 for (uoff_t i = 0; i < nti; ++ i) 183 bti += Utf8Bytes(first[i]); 184 ipp = iterator (memblock::insert (memblock::iterator(ipp), n * bti)); 185 utf8ostring_iterator uout (utf8out (ipp)); 186 for (uoff_t j = 0; j < n; ++ j) 187 for (uoff_t k = 0; k < nti; ++ k, ++ uout) 188 *uout = first[k]; 189 *end() = c_Terminator; 190} 191 192/// Inserts character \p c into this string at \p start. 193string::iterator string::insert (iterator start, const_reference c, size_type n) 194{ 195 start = iterator (memblock::insert (memblock::iterator(start), n)); 196 fill_n (start, n, c); 197 *end() = c_Terminator; 198 return (start); 199} 200 201/// Inserts \p count instances of string \p s at offset \p start. 202string::iterator string::insert (iterator start, const_pointer s, size_type n) 203{ 204 if (!s) 205 s = empty_string; 206 return (insert (start, s, s + strlen(s), n)); 207} 208 209/// Inserts [first,last] \p n times. 210string::iterator string::insert (iterator start, const_pointer first, const_pointer last, size_type n) 211{ 212 assert (first <= last); 213 assert (begin() <= start && end() >= start); 214 assert ((first < begin() || first >= end() || size() + abs_distance(first,last) < capacity()) && "Insertion of self with autoresize is not supported"); 215 start = iterator (memblock::insert (memblock::iterator(start), distance(first, last) * n)); 216 fill (memblock::iterator(start), first, distance(first, last), n); 217 *end() = c_Terminator; 218 return (start); 219} 220 221/// Erases \p size bytes at \p start. 222string::iterator string::erase (iterator ep, size_type n) 223{ 224 string::iterator rv = memblock::erase (memblock::iterator(ep), n); 225 *end() = c_Terminator; 226 return (rv); 227} 228 229/// Erases \p size characters at \p start. 230/// \p start is a character position, not a byte position, and must be 231/// in the 0..length() range. 232/// 233void string::erase (uoff_t ep, size_type n) 234{ 235 iterator first (utf8_iat(ep)); 236 size_t nbytes (utf8_iat(ep + n) - first); 237 memblock::erase (first, nbytes); 238 *end() = c_Terminator; 239} 240 241/// Replaces range [\p start, \p start + \p len] with string \p s. 242void string::replace (iterator first, iterator last, const_pointer s) 243{ 244 if (!s) 245 s = empty_string; 246 replace (first, last, s, s + strlen(s)); 247} 248 249/// Replaces range [\p start, \p start + \p len] with \p count instances of string \p s. 250void string::replace (iterator first, iterator last, const_pointer i1, const_pointer i2, size_type n) 251{ 252 assert (first <= last); 253 assert (n || distance(first, last)); 254 assert (first >= begin() && first <= end() && last >= first && last <= end()); 255 assert ((i1 < begin() || i1 >= end() || abs_distance(i1,i2) * n + size() < capacity()) && "Replacement by self can not autoresize"); 256 const size_type bte = distance(first, last), bti = distance(i1, i2) * n; 257 if (bti < bte) 258 first = iterator (memblock::erase (memblock::iterator(first), bte - bti)); 259 else if (bte < bti) 260 first = iterator (memblock::insert (memblock::iterator(first), bti - bte)); 261 fill (memblock::iterator(first), i1, distance(i1, i2), n); 262 *end() = c_Terminator; 263} 264 265/// Returns the offset of the first occurence of \p c after \p pos. 266uoff_t string::find (const_reference c, uoff_t pos) const 267{ 268 const_iterator found = ::ustl::find (iat(pos), end(), c); 269 return (found < end() ? distance(begin(),found) : npos); 270} 271 272/// Returns the offset of the first occurence of substring \p s of length \p n after \p pos. 273uoff_t string::find (const string& s, uoff_t pos) const 274{ 275 if (s.empty() || s.size() > size() - pos) 276 return (npos); 277 const uoff_t endi = s.size() - 1; 278 const_reference endchar = s[endi]; 279 uoff_t lastPos = endi; 280 while (lastPos-- && s[lastPos] != endchar); 281 const size_type skip = endi - lastPos; 282 const_iterator i = iat(pos) + endi; 283 for (; i < end() && (i = ::ustl::find (i, end(), endchar)) < end(); i += skip) 284 if (memcmp (i - endi, s.c_str(), s.size()) == 0) 285 return (distance (begin(), i) - endi); 286 return (npos); 287} 288 289/// Returns the offset of the last occurence of character \p c before \p pos. 290uoff_t string::rfind (const_reference c, uoff_t pos) const 291{ 292 for (int i = min(pos,size()-1); i >= 0; --i) 293 if (at(i) == c) 294 return (i); 295 return (npos); 296} 297 298/// Returns the offset of the last occurence of substring \p s of size \p n before \p pos. 299uoff_t string::rfind (const string& s, uoff_t pos) const 300{ 301 const_iterator d = iat(pos) - 1; 302 const_iterator sp = begin() + s.size() - 1; 303 const_iterator m = s.end() - 1; 304 for (uoff_t i = 0; d > sp && i < s.size(); -- d) 305 for (i = 0; i < s.size(); ++ i) 306 if (m[-i] != d[-i]) 307 break; 308 return (d > sp ? distance (begin(), d + 2 - s.size()) : npos); 309} 310 311/// Returns the offset of the first occurence of one of characters in \p s of size \p n after \p pos. 312uoff_t string::find_first_of (const string& s, uoff_t pos) const 313{ 314 for (uoff_t i = min(pos,size()); i < size(); ++ i) 315 if (s.find (at(i)) != npos) 316 return (i); 317 return (npos); 318} 319 320/// Returns the offset of the first occurence of one of characters not in \p s of size \p n after \p pos. 321uoff_t string::find_first_not_of (const string& s, uoff_t pos) const 322{ 323 for (uoff_t i = min(pos,size()); i < size(); ++ i) 324 if (s.find (at(i)) == npos) 325 return (i); 326 return (npos); 327} 328 329/// Returns the offset of the last occurence of one of characters in \p s of size \p n before \p pos. 330uoff_t string::find_last_of (const string& s, uoff_t pos) const 331{ 332 for (int i = min(pos,size()-1); i >= 0; -- i) 333 if (s.find (at(i)) != npos) 334 return (i); 335 return (npos); 336} 337 338/// Returns the offset of the last occurence of one of characters not in \p s of size \p n before \p pos. 339uoff_t string::find_last_not_of (const string& s, uoff_t pos) const 340{ 341 for (int i = min(pos,size()-1); i >= 0; -- i) 342 if (s.find (at(i)) == npos) 343 return (i); 344 return (npos); 345} 346 347/// Equivalent to a vsprintf on the string. 348int string::vformat (const char* fmt, va_list args) 349{ 350#if HAVE_VA_COPY 351 va_list args2; 352#else 353 #define args2 args 354 #undef __va_copy 355 #define __va_copy(x,y) 356#endif 357 size_t rv = size(); 358 do { 359 reserve (rv); 360 __va_copy (args2, args); 361 rv = vsnprintf (data(), memblock::capacity(), fmt, args2); 362 rv = min (rv, memblock::capacity()); 363 } while (rv > capacity()); 364 resize (min (rv, capacity())); 365 return (rv); 366} 367 368/// Equivalent to a sprintf on the string. 369int string::format (const char* fmt, ...) 370{ 371 va_list args; 372 va_start (args, fmt); 373 const int rv = vformat (fmt, args); 374 va_end (args); 375 return (rv); 376} 377 378/// Returns the number of bytes required to write this object to a stream. 379size_t string::stream_size (void) const 380{ 381 return (Utf8Bytes(size()) + size()); 382} 383 384/// Reads the object from stream \p os 385void string::read (istream& is) 386{ 387 char szbuf [8]; 388 is >> szbuf[0]; 389 size_t szsz (Utf8SequenceBytes (szbuf[0]) - 1), n = 0; 390 is.verify_remaining ("read", "ustl::string", szsz); 391 is.read (szbuf + 1, szsz); 392 n = *utf8in(szbuf); 393 is.verify_remaining ("read", "ustl::string", n); 394 resize (n); 395 is.read (data(), size()); 396} 397 398/// Writes the object to stream \p os 399void string::write (ostream& os) const 400{ 401 const written_size_type sz (size()); 402 assert (sz == size() && "No support for writing strings larger than 4G"); 403 404 char szbuf [8]; 405 utf8out_iterator<char*> szout (szbuf); 406 *szout = sz; 407 size_t szsz = distance (szbuf, szout.base()); 408 409 os.verify_remaining ("write", "ustl::string", szsz + sz); 410 os.write (szbuf, szsz); 411 os.write (cdata(), sz); 412} 413 414/// Returns a hash value for [first, last) 415/*static*/ hashvalue_t string::hash (const char* first, const char* last) 416{ 417 hashvalue_t h = 0; 418 // This has the bits flowing into each other from both sides of the number 419 for (; first < last; ++ first) 420 h = *first + ((h << 7) | (h >> BitsInType(hashvalue_t) - 7)); 421 return (h); 422} 423 424} // namespace ustl 425 426