UString.h revision d0825bca7fe65beaee391d30da42e937db621564
1/* 2 * Copyright (C) 1999-2000 Harri Porten (porten@kde.org) 3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved. 4 * Copyright (C) 2009 Google Inc. All rights reserved. 5 * 6 * This library is free software; you can redistribute it and/or 7 * modify it under the terms of the GNU Library General Public 8 * License as published by the Free Software Foundation; either 9 * version 2 of the License, or (at your option) any later version. 10 * 11 * This library is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 14 * Library General Public License for more details. 15 * 16 * You should have received a copy of the GNU Library General Public License 17 * along with this library; see the file COPYING.LIB. If not, write to 18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, 19 * Boston, MA 02110-1301, USA. 20 * 21 */ 22 23#ifndef UString_h 24#define UString_h 25 26#include "Collector.h" 27#include "UStringImpl.h" 28#include <stdint.h> 29#include <string.h> 30#include <wtf/Assertions.h> 31#include <wtf/CrossThreadRefCounted.h> 32#include <wtf/OwnFastMallocPtr.h> 33#include <wtf/PassRefPtr.h> 34#include <wtf/PtrAndFlags.h> 35#include <wtf/RefPtr.h> 36#include <wtf/Vector.h> 37#include <wtf/unicode/Unicode.h> 38 39namespace JSC { 40 41 using WTF::PlacementNewAdoptType; 42 using WTF::PlacementNewAdopt; 43 44 class CString { 45 public: 46 CString() 47 : m_length(0) 48 , m_data(0) 49 { 50 } 51 52 CString(const char*); 53 CString(const char*, size_t); 54 CString(const CString&); 55 56 ~CString(); 57 58 static CString adopt(char*, size_t); // buffer should be allocated with new[]. 59 60 CString& append(const CString&); 61 CString& operator=(const char* c); 62 CString& operator=(const CString&); 63 CString& operator+=(const CString& c) { return append(c); } 64 65 size_t size() const { return m_length; } 66 const char* c_str() const { return m_data; } 67 68 private: 69 size_t m_length; 70 char* m_data; 71 }; 72 73 bool operator==(const CString&, const CString&); 74 75 typedef Vector<char, 32> CStringBuffer; 76 77 class UString { 78 friend class JIT; 79 80 public: 81 typedef UStringImpl Rep; 82 83 public: 84 // UString constructors passed char*s assume ISO Latin-1 encoding; for UTF8 use 'createFromUTF8', below. 85 UString(); 86 UString(const char*); // Constructor for null-terminated string. 87 UString(const char*, int length); 88 UString(const UChar*, int length); 89 UString(const Vector<UChar>& buffer); 90 91 UString(const UString& s) 92 : m_rep(s.m_rep) 93 { 94 } 95 96 // Special constructor for cases where we overwrite an object in place. 97 UString(PlacementNewAdoptType) 98 : m_rep(PlacementNewAdopt) 99 { 100 } 101 102 ~UString() 103 { 104 } 105 106 template<size_t inlineCapacity> 107 static PassRefPtr<UStringImpl> adopt(Vector<UChar, inlineCapacity>& vector) 108 { 109 return Rep::adopt(vector); 110 } 111 112 static UString createFromUTF8(const char*); 113 114 static UString from(int); 115 static UString from(long long); 116 static UString from(unsigned int); 117 static UString from(long); 118 static UString from(double); 119 120 struct Range { 121 public: 122 Range(int pos, int len) 123 : position(pos) 124 , length(len) 125 { 126 } 127 128 Range() 129 { 130 } 131 132 int position; 133 int length; 134 }; 135 136 UString spliceSubstringsWithSeparators(const Range* substringRanges, int rangeCount, const UString* separators, int separatorCount) const; 137 138 UString replaceRange(int rangeStart, int RangeEnd, const UString& replacement) const; 139 140 bool getCString(CStringBuffer&) const; 141 142 // NOTE: This method should only be used for *debugging* purposes as it 143 // is neither Unicode safe nor free from side effects nor thread-safe. 144 char* ascii() const; 145 146 /** 147 * Convert the string to UTF-8, assuming it is UTF-16 encoded. 148 * In non-strict mode, this function is tolerant of badly formed UTF-16, it 149 * can create UTF-8 strings that are invalid because they have characters in 150 * the range U+D800-U+DDFF, U+FFFE, or U+FFFF, but the UTF-8 string is 151 * guaranteed to be otherwise valid. 152 * In strict mode, error is returned as null CString. 153 */ 154 CString UTF8String(bool strict = false) const; 155 156 UString& operator=(const char*c); 157 158 const UChar* data() const { return m_rep->data(); } 159 160 bool isNull() const { return m_rep == &Rep::null(); } 161 bool isEmpty() const { return !m_rep->size(); } 162 163 bool is8Bit() const; 164 165 int size() const { return m_rep->size(); } 166 167 UChar operator[](int pos) const; 168 169 double toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const; 170 double toDouble(bool tolerateTrailingJunk) const; 171 double toDouble() const; 172 173 uint32_t toUInt32(bool* ok = 0) const; 174 uint32_t toUInt32(bool* ok, bool tolerateEmptyString) const; 175 uint32_t toStrictUInt32(bool* ok = 0) const; 176 177 unsigned toArrayIndex(bool* ok = 0) const; 178 179 int find(const UString& f, int pos = 0) const; 180 int find(UChar, int pos = 0) const; 181 int rfind(const UString& f, int pos) const; 182 int rfind(UChar, int pos) const; 183 184 UString substr(int pos = 0, int len = -1) const; 185 186 static const UString& null() { return *nullUString; } 187 188 Rep* rep() const { return m_rep.get(); } 189 static Rep* nullRep(); 190 191 UString(PassRefPtr<Rep> r) 192 : m_rep(r) 193 { 194 ASSERT(m_rep); 195 } 196 197 size_t cost() const { return m_rep->cost(); } 198 199 private: 200 void makeNull(); 201 202 RefPtr<Rep> m_rep; 203 static UString* nullUString; 204 205 friend void initializeUString(); 206 friend bool operator==(const UString&, const UString&); 207 }; 208 209 ALWAYS_INLINE bool operator==(const UString& s1, const UString& s2) 210 { 211 int size = s1.size(); 212 switch (size) { 213 case 0: 214 return !s2.size(); 215 case 1: 216 return s2.size() == 1 && s1.data()[0] == s2.data()[0]; 217 case 2: { 218 if (s2.size() != 2) 219 return false; 220 const UChar* d1 = s1.data(); 221 const UChar* d2 = s2.data(); 222 return (d1[0] == d2[0]) & (d1[1] == d2[1]); 223 } 224 default: 225 return s2.size() == size && memcmp(s1.data(), s2.data(), size * sizeof(UChar)) == 0; 226 } 227 } 228 229 230 inline bool operator!=(const UString& s1, const UString& s2) 231 { 232 return !JSC::operator==(s1, s2); 233 } 234 235 bool operator<(const UString& s1, const UString& s2); 236 bool operator>(const UString& s1, const UString& s2); 237 238 bool operator==(const UString& s1, const char* s2); 239 240 inline bool operator!=(const UString& s1, const char* s2) 241 { 242 return !JSC::operator==(s1, s2); 243 } 244 245 inline bool operator==(const char *s1, const UString& s2) 246 { 247 return operator==(s2, s1); 248 } 249 250 inline bool operator!=(const char *s1, const UString& s2) 251 { 252 return !JSC::operator==(s1, s2); 253 } 254 255 int compare(const UString&, const UString&); 256 257 inline UString::UString() 258 : m_rep(&Rep::null()) 259 { 260 } 261 262 // Rule from ECMA 15.2 about what an array index is. 263 // Must exactly match string form of an unsigned integer, and be less than 2^32 - 1. 264 inline unsigned UString::toArrayIndex(bool* ok) const 265 { 266 unsigned i = toStrictUInt32(ok); 267 if (ok && i >= 0xFFFFFFFFU) 268 *ok = false; 269 return i; 270 } 271 272 // We'd rather not do shared substring append for small strings, since 273 // this runs too much risk of a tiny initial string holding down a 274 // huge buffer. 275 // FIXME: this should be size_t but that would cause warnings until we 276 // fix UString sizes to be size_t instead of int 277 static const int minShareSize = Heap::minExtraCost / sizeof(UChar); 278 279 struct IdentifierRepHash : PtrHash<RefPtr<JSC::UString::Rep> > { 280 static unsigned hash(const RefPtr<JSC::UString::Rep>& key) { return key->existingHash(); } 281 static unsigned hash(JSC::UString::Rep* key) { return key->existingHash(); } 282 }; 283 284 void initializeUString(); 285 286 template<typename StringType> 287 class StringTypeAdapter { 288 }; 289 290 template<> 291 class StringTypeAdapter<char*> { 292 public: 293 StringTypeAdapter<char*>(char* buffer) 294 : m_buffer((unsigned char*)buffer) 295 , m_length(strlen(buffer)) 296 { 297 } 298 299 unsigned length() { return m_length; } 300 301 void writeTo(UChar* destination) 302 { 303 for (unsigned i = 0; i < m_length; ++i) 304 destination[i] = m_buffer[i]; 305 } 306 307 private: 308 const unsigned char* m_buffer; 309 unsigned m_length; 310 }; 311 312 template<> 313 class StringTypeAdapter<const char*> { 314 public: 315 StringTypeAdapter<const char*>(const char* buffer) 316 : m_buffer((unsigned char*)buffer) 317 , m_length(strlen(buffer)) 318 { 319 } 320 321 unsigned length() { return m_length; } 322 323 void writeTo(UChar* destination) 324 { 325 for (unsigned i = 0; i < m_length; ++i) 326 destination[i] = m_buffer[i]; 327 } 328 329 private: 330 const unsigned char* m_buffer; 331 unsigned m_length; 332 }; 333 334 template<> 335 class StringTypeAdapter<UString> { 336 public: 337 StringTypeAdapter<UString>(UString& string) 338 : m_data(string.data()) 339 , m_length(string.size()) 340 { 341 } 342 343 unsigned length() { return m_length; } 344 345 void writeTo(UChar* destination) 346 { 347 for (unsigned i = 0; i < m_length; ++i) 348 destination[i] = m_data[i]; 349 } 350 351 private: 352 const UChar* m_data; 353 unsigned m_length; 354 }; 355 356 template<typename StringType1, typename StringType2> 357 UString makeString(StringType1 string1, StringType2 string2) 358 { 359 StringTypeAdapter<StringType1> adapter1(string1); 360 StringTypeAdapter<StringType2> adapter2(string2); 361 362 UChar* buffer; 363 unsigned length = adapter1.length() + adapter2.length(); 364 PassRefPtr<UStringImpl> resultImpl = UStringImpl::tryCreateUninitialized(length, buffer); 365 if (!resultImpl) 366 return UString(); 367 368 UChar* result = buffer; 369 adapter1.writeTo(result); 370 result += adapter1.length(); 371 adapter2.writeTo(result); 372 373 return resultImpl; 374 } 375 376 template<typename StringType1, typename StringType2, typename StringType3> 377 UString makeString(StringType1 string1, StringType2 string2, StringType3 string3) 378 { 379 StringTypeAdapter<StringType1> adapter1(string1); 380 StringTypeAdapter<StringType2> adapter2(string2); 381 StringTypeAdapter<StringType3> adapter3(string3); 382 383 UChar* buffer; 384 unsigned length = adapter1.length() + adapter2.length() + adapter3.length(); 385 PassRefPtr<UStringImpl> resultImpl = UStringImpl::tryCreateUninitialized(length, buffer); 386 if (!resultImpl) 387 return UString(); 388 389 UChar* result = buffer; 390 adapter1.writeTo(result); 391 result += adapter1.length(); 392 adapter2.writeTo(result); 393 result += adapter2.length(); 394 adapter3.writeTo(result); 395 396 return resultImpl; 397 } 398 399 template<typename StringType1, typename StringType2, typename StringType3, typename StringType4> 400 UString makeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4) 401 { 402 StringTypeAdapter<StringType1> adapter1(string1); 403 StringTypeAdapter<StringType2> adapter2(string2); 404 StringTypeAdapter<StringType3> adapter3(string3); 405 StringTypeAdapter<StringType4> adapter4(string4); 406 407 UChar* buffer; 408 unsigned length = adapter1.length() + adapter2.length() + adapter3.length() + adapter4.length(); 409 PassRefPtr<UStringImpl> resultImpl = UStringImpl::tryCreateUninitialized(length, buffer); 410 if (!resultImpl) 411 return UString(); 412 413 UChar* result = buffer; 414 adapter1.writeTo(result); 415 result += adapter1.length(); 416 adapter2.writeTo(result); 417 result += adapter2.length(); 418 adapter3.writeTo(result); 419 result += adapter3.length(); 420 adapter4.writeTo(result); 421 422 return resultImpl; 423 } 424 425 template<typename StringType1, typename StringType2, typename StringType3, typename StringType4, typename StringType5> 426 UString makeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4, StringType5 string5) 427 { 428 StringTypeAdapter<StringType1> adapter1(string1); 429 StringTypeAdapter<StringType2> adapter2(string2); 430 StringTypeAdapter<StringType3> adapter3(string3); 431 StringTypeAdapter<StringType4> adapter4(string4); 432 StringTypeAdapter<StringType5> adapter5(string5); 433 434 UChar* buffer; 435 unsigned length = adapter1.length() + adapter2.length() + adapter3.length() + adapter4.length() + adapter5.length(); 436 PassRefPtr<UStringImpl> resultImpl = UStringImpl::tryCreateUninitialized(length, buffer); 437 if (!resultImpl) 438 return UString(); 439 440 UChar* result = buffer; 441 adapter1.writeTo(result); 442 result += adapter1.length(); 443 adapter2.writeTo(result); 444 result += adapter2.length(); 445 adapter3.writeTo(result); 446 result += adapter3.length(); 447 adapter4.writeTo(result); 448 result += adapter4.length(); 449 adapter5.writeTo(result); 450 451 return resultImpl; 452 } 453 454 template<typename StringType1, typename StringType2, typename StringType3, typename StringType4, typename StringType5, typename StringType6> 455 UString makeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4, StringType5 string5, StringType6 string6) 456 { 457 StringTypeAdapter<StringType1> adapter1(string1); 458 StringTypeAdapter<StringType2> adapter2(string2); 459 StringTypeAdapter<StringType3> adapter3(string3); 460 StringTypeAdapter<StringType4> adapter4(string4); 461 StringTypeAdapter<StringType5> adapter5(string5); 462 StringTypeAdapter<StringType6> adapter6(string6); 463 464 UChar* buffer; 465 unsigned length = adapter1.length() + adapter2.length() + adapter3.length() + adapter4.length() + adapter5.length() + adapter6.length(); 466 PassRefPtr<UStringImpl> resultImpl = UStringImpl::tryCreateUninitialized(length, buffer); 467 if (!resultImpl) 468 return UString(); 469 470 UChar* result = buffer; 471 adapter1.writeTo(result); 472 result += adapter1.length(); 473 adapter2.writeTo(result); 474 result += adapter2.length(); 475 adapter3.writeTo(result); 476 result += adapter3.length(); 477 adapter4.writeTo(result); 478 result += adapter4.length(); 479 adapter5.writeTo(result); 480 result += adapter5.length(); 481 adapter6.writeTo(result); 482 483 return resultImpl; 484 } 485 486 template<typename StringType1, typename StringType2, typename StringType3, typename StringType4, typename StringType5, typename StringType6, typename StringType7> 487 UString makeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4, StringType5 string5, StringType6 string6, StringType7 string7) 488 { 489 StringTypeAdapter<StringType1> adapter1(string1); 490 StringTypeAdapter<StringType2> adapter2(string2); 491 StringTypeAdapter<StringType3> adapter3(string3); 492 StringTypeAdapter<StringType4> adapter4(string4); 493 StringTypeAdapter<StringType5> adapter5(string5); 494 StringTypeAdapter<StringType6> adapter6(string6); 495 StringTypeAdapter<StringType7> adapter7(string7); 496 497 UChar* buffer; 498 unsigned length = adapter1.length() + adapter2.length() + adapter3.length() + adapter4.length() + adapter5.length() + adapter6.length() + adapter7.length(); 499 PassRefPtr<UStringImpl> resultImpl = UStringImpl::tryCreateUninitialized(length, buffer); 500 if (!resultImpl) 501 return UString(); 502 503 UChar* result = buffer; 504 adapter1.writeTo(result); 505 result += adapter1.length(); 506 adapter2.writeTo(result); 507 result += adapter2.length(); 508 adapter3.writeTo(result); 509 result += adapter3.length(); 510 adapter4.writeTo(result); 511 result += adapter4.length(); 512 adapter5.writeTo(result); 513 result += adapter5.length(); 514 adapter6.writeTo(result); 515 result += adapter6.length(); 516 adapter7.writeTo(result); 517 518 return resultImpl; 519 } 520 521 template<typename StringType1, typename StringType2, typename StringType3, typename StringType4, typename StringType5, typename StringType6, typename StringType7, typename StringType8> 522 UString makeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4, StringType5 string5, StringType6 string6, StringType7 string7, StringType8 string8) 523 { 524 StringTypeAdapter<StringType1> adapter1(string1); 525 StringTypeAdapter<StringType2> adapter2(string2); 526 StringTypeAdapter<StringType3> adapter3(string3); 527 StringTypeAdapter<StringType4> adapter4(string4); 528 StringTypeAdapter<StringType5> adapter5(string5); 529 StringTypeAdapter<StringType6> adapter6(string6); 530 StringTypeAdapter<StringType7> adapter7(string7); 531 StringTypeAdapter<StringType8> adapter8(string8); 532 533 UChar* buffer; 534 unsigned length = adapter1.length() + adapter2.length() + adapter3.length() + adapter4.length() + adapter5.length() + adapter6.length() + adapter7.length() + adapter8.length(); 535 PassRefPtr<UStringImpl> resultImpl = UStringImpl::tryCreateUninitialized(length, buffer); 536 if (!resultImpl) 537 return UString(); 538 539 UChar* result = buffer; 540 adapter1.writeTo(result); 541 result += adapter1.length(); 542 adapter2.writeTo(result); 543 result += adapter2.length(); 544 adapter3.writeTo(result); 545 result += adapter3.length(); 546 adapter4.writeTo(result); 547 result += adapter4.length(); 548 adapter5.writeTo(result); 549 result += adapter5.length(); 550 adapter6.writeTo(result); 551 result += adapter6.length(); 552 adapter7.writeTo(result); 553 result += adapter7.length(); 554 adapter8.writeTo(result); 555 556 return resultImpl; 557 } 558 559} // namespace JSC 560 561namespace WTF { 562 563 template<typename T> struct DefaultHash; 564 template<typename T> struct StrHash; 565 566 template<> struct StrHash<JSC::UString::Rep*> { 567 static unsigned hash(const JSC::UString::Rep* key) { return key->hash(); } 568 static bool equal(const JSC::UString::Rep* a, const JSC::UString::Rep* b) { return JSC::equal(a, b); } 569 static const bool safeToCompareToEmptyOrDeleted = false; 570 }; 571 572 template<> struct StrHash<RefPtr<JSC::UString::Rep> > : public StrHash<JSC::UString::Rep*> { 573 using StrHash<JSC::UString::Rep*>::hash; 574 static unsigned hash(const RefPtr<JSC::UString::Rep>& key) { return key->hash(); } 575 using StrHash<JSC::UString::Rep*>::equal; 576 static bool equal(const RefPtr<JSC::UString::Rep>& a, const RefPtr<JSC::UString::Rep>& b) { return JSC::equal(a.get(), b.get()); } 577 static bool equal(const JSC::UString::Rep* a, const RefPtr<JSC::UString::Rep>& b) { return JSC::equal(a, b.get()); } 578 static bool equal(const RefPtr<JSC::UString::Rep>& a, const JSC::UString::Rep* b) { return JSC::equal(a.get(), b); } 579 580 static const bool safeToCompareToEmptyOrDeleted = false; 581 }; 582 583 template<> struct DefaultHash<JSC::UString::Rep*> { 584 typedef StrHash<JSC::UString::Rep*> Hash; 585 }; 586 587 template<> struct DefaultHash<RefPtr<JSC::UString::Rep> > { 588 typedef StrHash<RefPtr<JSC::UString::Rep> > Hash; 589 590 }; 591 592} // namespace WTF 593 594#endif 595