UString.h revision d227fc870c7a697500a3c900c31baf05fb9a8524
1/* 2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org) 3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. 4 * Copyright (C) 2009 Google Inc. All rights reserved. 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Library General Public 8 * License as published by the Free Software Foundation; either 9 * version 2 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Library General Public License for more details. 15 * 16 * You should have received a copy of the GNU Library General Public License 17 * along with this library; see the file COPYING.LIB. If not, write to 18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 19 * Boston, MA 02110-1301, USA. 20 * 21 */ 22 23#ifndef UString_h 24#define UString_h 25 26#include "Collector.h" 27#include <stdint.h> 28#include <string.h> 29#include <wtf/Assertions.h> 30#include <wtf/CrossThreadRefCounted.h> 31#include <wtf/OwnFastMallocPtr.h> 32#include <wtf/PassRefPtr.h> 33#include <wtf/PtrAndFlags.h> 34#include <wtf/RefPtr.h> 35#include <wtf/Vector.h> 36#include <wtf/unicode/Unicode.h> 37 38namespace JSC { 39 40 using WTF::PlacementNewAdoptType; 41 using WTF::PlacementNewAdopt; 42 43 class IdentifierTable; 44 45 class CString { 46 public: 47 CString() 48 : m_length(0) 49 , m_data(0) 50 { 51 } 52 53 CString(const char*); 54 CString(const char*, size_t); 55 CString(const CString&); 56 57 ~CString(); 58 59 static CString adopt(char*, size_t); // buffer should be allocated with new[]. 60 61 CString& append(const CString&); 62 CString& operator=(const char* c); 63 CString& operator=(const CString&); 64 CString& operator+=(const CString& c) { return append(c); } 65 66 size_t size() const { return m_length; } 67 const char* c_str() const { return m_data; } 68 69 private: 70 size_t m_length; 71 char* m_data; 72 }; 73 74 typedef Vector<char, 32> CStringBuffer; 75 76 class UString { 77 friend class JIT; 78 79 public: 80 typedef CrossThreadRefCounted<OwnFastMallocPtr<UChar> > SharedUChar; 81 struct BaseString; 82 struct Rep : Noncopyable { 83 friend class JIT; 84 85 static PassRefPtr<Rep> create(UChar* buffer, int length) 86 { 87 return adoptRef(new BaseString(buffer, length)); 88 } 89 90 static PassRefPtr<Rep> createEmptyBuffer(size_t size) 91 { 92 // Guard against integer overflow 93 if (size < (std::numeric_limits<size_t>::max() / sizeof(UChar))) { 94 void* buf = 0; 95 if (tryFastMalloc(size * sizeof(UChar)).getValue(buf)) 96 return adoptRef(new BaseString(static_cast<UChar*>(buf), 0, size)); 97 } 98 return adoptRef(new BaseString(0, 0, 0)); 99 } 100 101 static PassRefPtr<Rep> createCopying(const UChar*, int); 102 static PassRefPtr<Rep> create(PassRefPtr<Rep> base, int offset, int length); 103 104 // Constructs a string from a UTF-8 string, using strict conversion (see comments in UTF8.h). 105 // Returns UString::Rep::null for null input or conversion failure. 106 static PassRefPtr<Rep> createFromUTF8(const char*); 107 108 // Uses SharedUChar to have joint ownership over the UChar*. 109 static PassRefPtr<Rep> create(UChar*, int, PassRefPtr<SharedUChar>); 110 111 SharedUChar* sharedBuffer(); 112 void destroy(); 113 114 bool baseIsSelf() const { return m_identifierTableAndFlags.isFlagSet(BaseStringFlag); } 115 UChar* data() const; 116 int size() const { return len; } 117 118 unsigned hash() const { if (_hash == 0) _hash = computeHash(data(), len); return _hash; } 119 unsigned computedHash() const { ASSERT(_hash); return _hash; } // fast path for Identifiers 120 121 static unsigned computeHash(const UChar*, int length); 122 static unsigned computeHash(const char*, int length); 123 static unsigned computeHash(const char* s) { return computeHash(s, strlen(s)); } 124 125 IdentifierTable* identifierTable() const { return m_identifierTableAndFlags.get(); } 126 void setIdentifierTable(IdentifierTable* table) { ASSERT(!isStatic()); m_identifierTableAndFlags.set(table); } 127 128 bool isStatic() const { return m_identifierTableAndFlags.isFlagSet(StaticFlag); } 129 void setStatic(bool); 130 void setBaseString(PassRefPtr<BaseString>); 131 BaseString* baseString(); 132 const BaseString* baseString() const; 133 134 Rep* ref() { ++rc; return this; } 135 ALWAYS_INLINE void deref() { if (--rc == 0) destroy(); } 136 137 void checkConsistency() const; 138 enum UStringFlags { 139 StaticFlag, 140 BaseStringFlag 141 }; 142 143 // unshared data 144 int offset; 145 int len; 146 int rc; // For null and empty static strings, this field does not reflect a correct count, because ref/deref are not thread-safe. A special case in destroy() guarantees that these do not get deleted. 147 mutable unsigned _hash; 148 PtrAndFlags<IdentifierTable, UStringFlags> m_identifierTableAndFlags; 149 150 static BaseString& null() { return *nullBaseString; } 151 static BaseString& empty() { return *emptyBaseString; } 152 153 bool reserveCapacity(int capacity); 154 155 protected: 156 // Constructor for use by BaseString subclass; they use the union with m_baseString for another purpose. 157 Rep(int length) 158 : offset(0) 159 , len(length) 160 , rc(1) 161 , _hash(0) 162 , m_baseString(0) 163 { 164 } 165 166 Rep(PassRefPtr<BaseString> base, int offsetInBase, int length) 167 : offset(offsetInBase) 168 , len(length) 169 , rc(1) 170 , _hash(0) 171 , m_baseString(base.releaseRef()) 172 { 173 checkConsistency(); 174 } 175 176 union { 177 // If !baseIsSelf() 178 BaseString* m_baseString; 179 // If baseIsSelf() 180 SharedUChar* m_sharedBuffer; 181 }; 182 183 private: 184 // For SmallStringStorage which allocates an array and does initialization manually. 185 Rep() { } 186 187 friend class SmallStringsStorage; 188 friend void initializeUString(); 189 JS_EXPORTDATA static BaseString* nullBaseString; 190 JS_EXPORTDATA static BaseString* emptyBaseString; 191 }; 192 193 194 struct BaseString : public Rep { 195 bool isShared() { return rc != 1 || isBufferReadOnly(); } 196 void setSharedBuffer(PassRefPtr<SharedUChar>); 197 198 bool isBufferReadOnly() 199 { 200 if (!m_sharedBuffer) 201 return false; 202 return slowIsBufferReadOnly(); 203 } 204 205 // potentially shared data. 206 UChar* buf; 207 int preCapacity; 208 int usedPreCapacity; 209 int capacity; 210 int usedCapacity; 211 212 size_t reportedCost; 213 214 private: 215 BaseString(UChar* buffer, int length, int additionalCapacity = 0) 216 : Rep(length) 217 , buf(buffer) 218 , preCapacity(0) 219 , usedPreCapacity(0) 220 , capacity(length + additionalCapacity) 221 , usedCapacity(length) 222 , reportedCost(0) 223 { 224 m_identifierTableAndFlags.setFlag(BaseStringFlag); 225 checkConsistency(); 226 } 227 228 SharedUChar* sharedBuffer(); 229 bool slowIsBufferReadOnly(); 230 231 friend struct Rep; 232 friend class SmallStringsStorage; 233 friend void initializeUString(); 234 }; 235 236 public: 237 UString(); 238 UString(const char*); 239 UString(const UChar*, int length); 240 UString(UChar*, int length, bool copy); 241 242 UString(const UString& s) 243 : m_rep(s.m_rep) 244 { 245 } 246 247 UString(const Vector<UChar>& buffer); 248 249 ~UString() 250 { 251 } 252 253 // Special constructor for cases where we overwrite an object in place. 254 UString(PlacementNewAdoptType) 255 : m_rep(PlacementNewAdopt) 256 { 257 } 258 259 static UString from(int); 260 static UString from(long long); 261 static UString from(unsigned int); 262 static UString from(long); 263 static UString from(double); 264 265 struct Range { 266 public: 267 Range(int pos, int len) 268 : position(pos) 269 , length(len) 270 { 271 } 272 273 Range() 274 { 275 } 276 277 int position; 278 int length; 279 }; 280 281 UString spliceSubstringsWithSeparators(const Range* substringRanges, int rangeCount, const UString* separators, int separatorCount) const; 282 283 UString replaceRange(int rangeStart, int RangeEnd, const UString& replacement) const; 284 285 UString& append(const UString&); 286 UString& append(const char*); 287 UString& append(UChar); 288 UString& append(char c) { return append(static_cast<UChar>(static_cast<unsigned char>(c))); } 289 UString& append(const UChar*, int size); 290 UString& appendNumeric(int); 291 UString& appendNumeric(double); 292 293 bool getCString(CStringBuffer&) const; 294 295 // NOTE: This method should only be used for *debugging* purposes as it 296 // is neither Unicode safe nor free from side effects nor thread-safe. 297 char* ascii() const; 298 299 /** 300 * Convert the string to UTF-8, assuming it is UTF-16 encoded. 301 * In non-strict mode, this function is tolerant of badly formed UTF-16, it 302 * can create UTF-8 strings that are invalid because they have characters in 303 * the range U+D800-U+DDFF, U+FFFE, or U+FFFF, but the UTF-8 string is 304 * guaranteed to be otherwise valid. 305 * In strict mode, error is returned as null CString. 306 */ 307 CString UTF8String(bool strict = false) const; 308 309 UString& operator=(const char*c); 310 311 UString& operator+=(const UString& s) { return append(s); } 312 UString& operator+=(const char* s) { return append(s); } 313 314 const UChar* data() const { return m_rep->data(); } 315 316 bool isNull() const { return (m_rep == &Rep::null()); } 317 bool isEmpty() const { return (!m_rep->len); } 318 319 bool is8Bit() const; 320 321 int size() const { return m_rep->size(); } 322 323 UChar operator[](int pos) const; 324 325 double toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const; 326 double toDouble(bool tolerateTrailingJunk) const; 327 double toDouble() const; 328 329 uint32_t toUInt32(bool* ok = 0) const; 330 uint32_t toUInt32(bool* ok, bool tolerateEmptyString) const; 331 uint32_t toStrictUInt32(bool* ok = 0) const; 332 333 unsigned toArrayIndex(bool* ok = 0) const; 334 335 int find(const UString& f, int pos = 0) const; 336 int find(UChar, int pos = 0) const; 337 int rfind(const UString& f, int pos) const; 338 int rfind(UChar, int pos) const; 339 340 UString substr(int pos = 0, int len = -1) const; 341 342 static const UString& null() { return *nullUString; } 343 344 Rep* rep() const { return m_rep.get(); } 345 static Rep* nullRep(); 346 347 UString(PassRefPtr<Rep> r) 348 : m_rep(r) 349 { 350 ASSERT(m_rep); 351 } 352 353 size_t cost() const; 354 355 // Attempt to grow this string such that it can grow to a total length of 'capacity' 356 // without reallocation. This may fail a number of reasons - if the BasicString is 357 // shared and another string is using part of the capacity beyond our end point, if 358 // the realloc fails, or if this string is empty and has no storage. 359 // 360 // This method returns a boolean indicating success. 361 bool reserveCapacity(int capacity) 362 { 363 return m_rep->reserveCapacity(capacity); 364 } 365 366 private: 367 void expandCapacity(int requiredLength); 368 void expandPreCapacity(int requiredPreCap); 369 void makeNull(); 370 371 RefPtr<Rep> m_rep; 372 static UString* nullUString; 373 374 friend void initializeUString(); 375 friend bool operator==(const UString&, const UString&); 376 friend PassRefPtr<Rep> concatenate(Rep*, Rep*); // returns 0 if out of memory 377 }; 378 PassRefPtr<UString::Rep> concatenate(UString::Rep*, UString::Rep*); 379 PassRefPtr<UString::Rep> concatenate(UString::Rep*, int); 380 PassRefPtr<UString::Rep> concatenate(UString::Rep*, double); 381 382 inline bool operator==(const UString& s1, const UString& s2) 383 { 384 int size = s1.size(); 385 switch (size) { 386 case 0: 387 return !s2.size(); 388 case 1: 389 return s2.size() == 1 && s1.data()[0] == s2.data()[0]; 390 case 2: { 391 if (s2.size() != 2) 392 return false; 393 const UChar* d1 = s1.data(); 394 const UChar* d2 = s2.data(); 395 return (d1[0] == d2[0]) & (d1[1] == d2[1]); 396 } 397 default: 398 return s2.size() == size && memcmp(s1.data(), s2.data(), size * sizeof(UChar)) == 0; 399 } 400 } 401 402 403 inline bool operator!=(const UString& s1, const UString& s2) 404 { 405 return !JSC::operator==(s1, s2); 406 } 407 408 bool operator<(const UString& s1, const UString& s2); 409 bool operator>(const UString& s1, const UString& s2); 410 411 bool operator==(const UString& s1, const char* s2); 412 413 inline bool operator!=(const UString& s1, const char* s2) 414 { 415 return !JSC::operator==(s1, s2); 416 } 417 418 inline bool operator==(const char *s1, const UString& s2) 419 { 420 return operator==(s2, s1); 421 } 422 423 inline bool operator!=(const char *s1, const UString& s2) 424 { 425 return !JSC::operator==(s1, s2); 426 } 427 428 bool operator==(const CString&, const CString&); 429 430 inline UString operator+(const UString& s1, const UString& s2) 431 { 432 RefPtr<UString::Rep> result = concatenate(s1.rep(), s2.rep()); 433 return UString(result ? result.release() : UString::nullRep()); 434 } 435 436 int compare(const UString&, const UString&); 437 438 bool equal(const UString::Rep*, const UString::Rep*); 439 440 inline PassRefPtr<UString::Rep> UString::Rep::create(PassRefPtr<UString::Rep> rep, int offset, int length) 441 { 442 ASSERT(rep); 443 rep->checkConsistency(); 444 445 int repOffset = rep->offset; 446 447 PassRefPtr<BaseString> base = rep->baseString(); 448 449 ASSERT(-(offset + repOffset) <= base->usedPreCapacity); 450 ASSERT(offset + repOffset + length <= base->usedCapacity); 451 452 // Steal the single reference this Rep was created with. 453 return adoptRef(new Rep(base, repOffset + offset, length)); 454 } 455 456 inline UChar* UString::Rep::data() const 457 { 458 const BaseString* base = baseString(); 459 return base->buf + base->preCapacity + offset; 460 } 461 462 inline void UString::Rep::setStatic(bool v) 463 { 464 ASSERT(!identifierTable()); 465 if (v) 466 m_identifierTableAndFlags.setFlag(StaticFlag); 467 else 468 m_identifierTableAndFlags.clearFlag(StaticFlag); 469 } 470 471 inline void UString::Rep::setBaseString(PassRefPtr<BaseString> base) 472 { 473 ASSERT(base != this); 474 ASSERT(!baseIsSelf()); 475 m_baseString = base.releaseRef(); 476 } 477 478 inline UString::BaseString* UString::Rep::baseString() 479 { 480 return !baseIsSelf() ? m_baseString : reinterpret_cast<BaseString*>(this) ; 481 } 482 483 inline const UString::BaseString* UString::Rep::baseString() const 484 { 485 return const_cast<Rep*>(this)->baseString(); 486 } 487 488#ifdef NDEBUG 489 inline void UString::Rep::checkConsistency() const 490 { 491 } 492#endif 493 494 inline UString::UString() 495 : m_rep(&Rep::null()) 496 { 497 } 498 499 // Rule from ECMA 15.2 about what an array index is. 500 // Must exactly match string form of an unsigned integer, and be less than 2^32 - 1. 501 inline unsigned UString::toArrayIndex(bool* ok) const 502 { 503 unsigned i = toStrictUInt32(ok); 504 if (ok && i >= 0xFFFFFFFFU) 505 *ok = false; 506 return i; 507 } 508 509 // We'd rather not do shared substring append for small strings, since 510 // this runs too much risk of a tiny initial string holding down a 511 // huge buffer. 512 // FIXME: this should be size_t but that would cause warnings until we 513 // fix UString sizes to be size_t instead of int 514 static const int minShareSize = Heap::minExtraCostSize / sizeof(UChar); 515 516 inline size_t UString::cost() const 517 { 518 BaseString* base = m_rep->baseString(); 519 size_t capacity = (base->capacity + base->preCapacity) * sizeof(UChar); 520 size_t reportedCost = base->reportedCost; 521 ASSERT(capacity >= reportedCost); 522 523 size_t capacityDelta = capacity - reportedCost; 524 525 if (capacityDelta < static_cast<size_t>(minShareSize)) 526 return 0; 527 528 base->reportedCost = capacity; 529 530 return capacityDelta; 531 } 532 533 struct IdentifierRepHash : PtrHash<RefPtr<JSC::UString::Rep> > { 534 static unsigned hash(const RefPtr<JSC::UString::Rep>& key) { return key->computedHash(); } 535 static unsigned hash(JSC::UString::Rep* key) { return key->computedHash(); } 536 }; 537 538 void initializeUString(); 539} // namespace JSC 540 541namespace WTF { 542 543 template<typename T> struct DefaultHash; 544 template<typename T> struct StrHash; 545 546 template<> struct StrHash<JSC::UString::Rep*> { 547 static unsigned hash(const JSC::UString::Rep* key) { return key->hash(); } 548 static bool equal(const JSC::UString::Rep* a, const JSC::UString::Rep* b) { return JSC::equal(a, b); } 549 static const bool safeToCompareToEmptyOrDeleted = false; 550 }; 551 552 template<> struct StrHash<RefPtr<JSC::UString::Rep> > : public StrHash<JSC::UString::Rep*> { 553 using StrHash<JSC::UString::Rep*>::hash; 554 static unsigned hash(const RefPtr<JSC::UString::Rep>& key) { return key->hash(); } 555 using StrHash<JSC::UString::Rep*>::equal; 556 static bool equal(const RefPtr<JSC::UString::Rep>& a, const RefPtr<JSC::UString::Rep>& b) { return JSC::equal(a.get(), b.get()); } 557 static bool equal(const JSC::UString::Rep* a, const RefPtr<JSC::UString::Rep>& b) { return JSC::equal(a, b.get()); } 558 static bool equal(const RefPtr<JSC::UString::Rep>& a, const JSC::UString::Rep* b) { return JSC::equal(a.get(), b); } 559 560 static const bool safeToCompareToEmptyOrDeleted = false; 561 }; 562 563 template<> struct DefaultHash<JSC::UString::Rep*> { 564 typedef StrHash<JSC::UString::Rep*> Hash; 565 }; 566 567 template<> struct DefaultHash<RefPtr<JSC::UString::Rep> > { 568 typedef StrHash<RefPtr<JSC::UString::Rep> > Hash; 569 570 }; 571 572} // namespace WTF 573 574#endif 575