1/*
2 *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3 *  Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
4 *  Copyright (C) 2007 Cameron Zwarich (cwzwarich@uwaterloo.ca)
5 *  Copyright (C) 2009 Google Inc. All rights reserved.
6 *
7 *  This library is free software; you can redistribute it and/or
8 *  modify it under the terms of the GNU Library General Public
9 *  License as published by the Free Software Foundation; either
10 *  version 2 of the License, or (at your option) any later version.
11 *
12 *  This library is distributed in the hope that it will be useful,
13 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
14 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
15 *  Library General Public License for more details.
16 *
17 *  You should have received a copy of the GNU Library General Public License
18 *  along with this library; see the file COPYING.LIB.  If not, write to
19 *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
20 *  Boston, MA 02110-1301, USA.
21 *
22 */
23
24#include "config.h"
25#include "UString.h"
26
27#include "JSGlobalObjectFunctions.h"
28#include "Collector.h"
29#include "dtoa.h"
30#include "Identifier.h"
31#include "Operations.h"
32#include <ctype.h>
33#include <limits.h>
34#include <limits>
35#include <math.h>
36#include <stdio.h>
37#include <stdlib.h>
38#include <string.h>
39#include <wtf/ASCIICType.h>
40#include <wtf/Assertions.h>
41#include <wtf/MathExtras.h>
42#include <wtf/StringExtras.h>
43#include <wtf/Vector.h>
44#include <wtf/unicode/UTF8.h>
45#include <wtf/StringExtras.h>
46
47#if HAVE(STRINGS_H)
48#include <strings.h>
49#endif
50
51using namespace WTF;
52using namespace WTF::Unicode;
53using namespace std;
54
55namespace JSC {
56
57extern const double NaN;
58extern const double Inf;
59
60CString::CString(const char* c)
61    : m_length(strlen(c))
62    , m_data(new char[m_length + 1])
63{
64    memcpy(m_data, c, m_length + 1);
65}
66
67CString::CString(const char* c, size_t length)
68    : m_length(length)
69    , m_data(new char[length + 1])
70{
71    memcpy(m_data, c, m_length);
72    m_data[m_length] = 0;
73}
74
75CString::CString(const CString& b)
76{
77    m_length = b.m_length;
78    if (b.m_data) {
79        m_data = new char[m_length + 1];
80        memcpy(m_data, b.m_data, m_length + 1);
81    } else
82        m_data = 0;
83}
84
85CString::~CString()
86{
87    delete [] m_data;
88}
89
90CString CString::adopt(char* c, size_t length)
91{
92    CString s;
93    s.m_data = c;
94    s.m_length = length;
95    return s;
96}
97
98CString& CString::append(const CString& t)
99{
100    char* n;
101    n = new char[m_length + t.m_length + 1];
102    if (m_length)
103        memcpy(n, m_data, m_length);
104    if (t.m_length)
105        memcpy(n + m_length, t.m_data, t.m_length);
106    m_length += t.m_length;
107    n[m_length] = 0;
108
109    delete [] m_data;
110    m_data = n;
111
112    return *this;
113}
114
115CString& CString::operator=(const char* c)
116{
117    if (m_data)
118        delete [] m_data;
119    m_length = strlen(c);
120    m_data = new char[m_length + 1];
121    memcpy(m_data, c, m_length + 1);
122
123    return *this;
124}
125
126CString& CString::operator=(const CString& str)
127{
128    if (this == &str)
129        return *this;
130
131    if (m_data)
132        delete [] m_data;
133    m_length = str.m_length;
134    if (str.m_data) {
135        m_data = new char[m_length + 1];
136        memcpy(m_data, str.m_data, m_length + 1);
137    } else
138        m_data = 0;
139
140    return *this;
141}
142
143bool operator==(const CString& c1, const CString& c2)
144{
145    size_t len = c1.size();
146    return len == c2.size() && (len == 0 || memcmp(c1.c_str(), c2.c_str(), len) == 0);
147}
148
149// These static strings are immutable, except for rc, whose initial value is chosen to
150// reduce the possibility of it becoming zero due to ref/deref not being thread-safe.
151static UChar sharedEmptyChar;
152UStringImpl* UStringImpl::s_empty;
153
154UString::Rep* UString::s_nullRep;
155UString* UString::s_nullUString;
156
157void initializeUString()
158{
159    UStringImpl::s_empty = new UStringImpl(&sharedEmptyChar, 0, UStringImpl::ConstructStaticString);
160
161    UString::s_nullRep = new UStringImpl(0, 0, UStringImpl::ConstructStaticString);
162    UString::s_nullUString = new UString;
163}
164
165UString::UString(const char* c)
166    : m_rep(Rep::create(c))
167{
168}
169
170UString::UString(const char* c, int length)
171    : m_rep(Rep::create(c, length))
172{
173}
174
175UString::UString(const UChar* c, int length)
176{
177    if (length == 0)
178        m_rep = &Rep::empty();
179    else
180        m_rep = Rep::create(c, length);
181}
182
183UString UString::from(int i)
184{
185    UChar buf[1 + sizeof(i) * 3];
186    UChar* end = buf + sizeof(buf) / sizeof(UChar);
187    UChar* p = end;
188
189    if (i == 0)
190        *--p = '0';
191    else if (i == INT_MIN) {
192        char minBuf[1 + sizeof(i) * 3];
193        sprintf(minBuf, "%d", INT_MIN);
194        return UString(minBuf);
195    } else {
196        bool negative = false;
197        if (i < 0) {
198            negative = true;
199            i = -i;
200        }
201        while (i) {
202            *--p = static_cast<unsigned short>((i % 10) + '0');
203            i /= 10;
204        }
205        if (negative)
206            *--p = '-';
207    }
208
209    return UString(p, static_cast<int>(end - p));
210}
211
212UString UString::from(long long i)
213{
214    UChar buf[1 + sizeof(i) * 3];
215    UChar* end = buf + sizeof(buf) / sizeof(UChar);
216    UChar* p = end;
217
218    if (i == 0)
219        *--p = '0';
220    else if (i == std::numeric_limits<long long>::min()) {
221        char minBuf[1 + sizeof(i) * 3];
222#if OS(WINDOWS)
223        snprintf(minBuf, sizeof(minBuf) - 1, "%I64d", std::numeric_limits<long long>::min());
224#else
225        snprintf(minBuf, sizeof(minBuf) - 1, "%lld", std::numeric_limits<long long>::min());
226#endif
227        return UString(minBuf);
228    } else {
229        bool negative = false;
230        if (i < 0) {
231            negative = true;
232            i = -i;
233        }
234        while (i) {
235            *--p = static_cast<unsigned short>((i % 10) + '0');
236            i /= 10;
237        }
238        if (negative)
239            *--p = '-';
240    }
241
242    return UString(p, static_cast<int>(end - p));
243}
244
245UString UString::from(unsigned int u)
246{
247    UChar buf[sizeof(u) * 3];
248    UChar* end = buf + sizeof(buf) / sizeof(UChar);
249    UChar* p = end;
250
251    if (u == 0)
252        *--p = '0';
253    else {
254        while (u) {
255            *--p = static_cast<unsigned short>((u % 10) + '0');
256            u /= 10;
257        }
258    }
259
260    return UString(p, static_cast<int>(end - p));
261}
262
263UString UString::from(long l)
264{
265    UChar buf[1 + sizeof(l) * 3];
266    UChar* end = buf + sizeof(buf) / sizeof(UChar);
267    UChar* p = end;
268
269    if (l == 0)
270        *--p = '0';
271    else if (l == LONG_MIN) {
272        char minBuf[1 + sizeof(l) * 3];
273        sprintf(minBuf, "%ld", LONG_MIN);
274        return UString(minBuf);
275    } else {
276        bool negative = false;
277        if (l < 0) {
278            negative = true;
279            l = -l;
280        }
281        while (l) {
282            *--p = static_cast<unsigned short>((l % 10) + '0');
283            l /= 10;
284        }
285        if (negative)
286            *--p = '-';
287    }
288
289    return UString(p, static_cast<int>(end - p));
290}
291
292UString UString::from(double d)
293{
294    DtoaBuffer buffer;
295    unsigned length;
296    doubleToStringInJavaScriptFormat(d, buffer, &length);
297    return UString(buffer, length);
298}
299
300bool UString::getCString(CStringBuffer& buffer) const
301{
302    int length = size();
303    int neededSize = length + 1;
304    buffer.resize(neededSize);
305    char* buf = buffer.data();
306
307    UChar ored = 0;
308    const UChar* p = data();
309    char* q = buf;
310    const UChar* limit = p + length;
311    while (p != limit) {
312        UChar c = p[0];
313        ored |= c;
314        *q = static_cast<char>(c);
315        ++p;
316        ++q;
317    }
318    *q = '\0';
319
320    return !(ored & 0xFF00);
321}
322
323char* UString::ascii() const
324{
325    static char* asciiBuffer = 0;
326
327    int length = size();
328    int neededSize = length + 1;
329    delete[] asciiBuffer;
330    asciiBuffer = new char[neededSize];
331
332    const UChar* p = data();
333    char* q = asciiBuffer;
334    const UChar* limit = p + length;
335    while (p != limit) {
336        *q = static_cast<char>(p[0]);
337        ++p;
338        ++q;
339    }
340    *q = '\0';
341
342    return asciiBuffer;
343}
344
345bool UString::is8Bit() const
346{
347    const UChar* u = data();
348    const UChar* limit = u + size();
349    while (u < limit) {
350        if (u[0] > 0xFF)
351            return false;
352        ++u;
353    }
354
355    return true;
356}
357
358UChar UString::operator[](int pos) const
359{
360    if (pos >= size())
361        return '\0';
362    return data()[pos];
363}
364
365double UString::toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const
366{
367    if (size() == 1) {
368        UChar c = data()[0];
369        if (isASCIIDigit(c))
370            return c - '0';
371        if (isASCIISpace(c) && tolerateEmptyString)
372            return 0;
373        return NaN;
374    }
375
376    // FIXME: If tolerateTrailingJunk is true, then we want to tolerate non-8-bit junk
377    // after the number, so this is too strict a check.
378    CStringBuffer s;
379    if (!getCString(s))
380        return NaN;
381    const char* c = s.data();
382
383    // skip leading white space
384    while (isASCIISpace(*c))
385        c++;
386
387    // empty string ?
388    if (*c == '\0')
389        return tolerateEmptyString ? 0.0 : NaN;
390
391    double d;
392
393    // hex number ?
394    if (*c == '0' && (*(c + 1) == 'x' || *(c + 1) == 'X')) {
395        const char* firstDigitPosition = c + 2;
396        c++;
397        d = 0.0;
398        while (*(++c)) {
399            if (*c >= '0' && *c <= '9')
400                d = d * 16.0 + *c - '0';
401            else if ((*c >= 'A' && *c <= 'F') || (*c >= 'a' && *c <= 'f'))
402                d = d * 16.0 + (*c & 0xdf) - 'A' + 10.0;
403            else
404                break;
405        }
406
407        if (d >= mantissaOverflowLowerBound)
408            d = parseIntOverflow(firstDigitPosition, c - firstDigitPosition, 16);
409    } else {
410        // regular number ?
411        char* end;
412        d = WTF::strtod(c, &end);
413        if ((d != 0.0 || end != c) && d != Inf && d != -Inf) {
414            c = end;
415        } else {
416            double sign = 1.0;
417
418            if (*c == '+')
419                c++;
420            else if (*c == '-') {
421                sign = -1.0;
422                c++;
423            }
424
425            // We used strtod() to do the conversion. However, strtod() handles
426            // infinite values slightly differently than JavaScript in that it
427            // converts the string "inf" with any capitalization to infinity,
428            // whereas the ECMA spec requires that it be converted to NaN.
429
430            if (c[0] == 'I' && c[1] == 'n' && c[2] == 'f' && c[3] == 'i' && c[4] == 'n' && c[5] == 'i' && c[6] == 't' && c[7] == 'y') {
431                d = sign * Inf;
432                c += 8;
433            } else if ((d == Inf || d == -Inf) && *c != 'I' && *c != 'i')
434                c = end;
435            else
436                return NaN;
437        }
438    }
439
440    // allow trailing white space
441    while (isASCIISpace(*c))
442        c++;
443    // don't allow anything after - unless tolerant=true
444    if (!tolerateTrailingJunk && *c != '\0')
445        d = NaN;
446
447    return d;
448}
449
450double UString::toDouble(bool tolerateTrailingJunk) const
451{
452    return toDouble(tolerateTrailingJunk, true);
453}
454
455double UString::toDouble() const
456{
457    return toDouble(false, true);
458}
459
460uint32_t UString::toUInt32(bool* ok) const
461{
462    double d = toDouble();
463    bool b = true;
464
465    if (d != static_cast<uint32_t>(d)) {
466        b = false;
467        d = 0;
468    }
469
470    if (ok)
471        *ok = b;
472
473    return static_cast<uint32_t>(d);
474}
475
476uint32_t UString::toUInt32(bool* ok, bool tolerateEmptyString) const
477{
478    double d = toDouble(false, tolerateEmptyString);
479    bool b = true;
480
481    if (d != static_cast<uint32_t>(d)) {
482        b = false;
483        d = 0;
484    }
485
486    if (ok)
487        *ok = b;
488
489    return static_cast<uint32_t>(d);
490}
491
492uint32_t UString::toStrictUInt32(bool* ok) const
493{
494    if (ok)
495        *ok = false;
496
497    // Empty string is not OK.
498    int len = m_rep->size();
499    if (len == 0)
500        return 0;
501    const UChar* p = m_rep->data();
502    unsigned short c = p[0];
503
504    // If the first digit is 0, only 0 itself is OK.
505    if (c == '0') {
506        if (len == 1 && ok)
507            *ok = true;
508        return 0;
509    }
510
511    // Convert to UInt32, checking for overflow.
512    uint32_t i = 0;
513    while (1) {
514        // Process character, turning it into a digit.
515        if (c < '0' || c > '9')
516            return 0;
517        const unsigned d = c - '0';
518
519        // Multiply by 10, checking for overflow out of 32 bits.
520        if (i > 0xFFFFFFFFU / 10)
521            return 0;
522        i *= 10;
523
524        // Add in the digit, checking for overflow out of 32 bits.
525        const unsigned max = 0xFFFFFFFFU - d;
526        if (i > max)
527            return 0;
528        i += d;
529
530        // Handle end of string.
531        if (--len == 0) {
532            if (ok)
533                *ok = true;
534            return i;
535        }
536
537        // Get next character.
538        c = *(++p);
539    }
540}
541
542int UString::find(const UString& f, int pos) const
543{
544    int fsz = f.size();
545
546    if (pos < 0)
547        pos = 0;
548
549    if (fsz == 1) {
550        UChar ch = f[0];
551        const UChar* end = data() + size();
552        for (const UChar* c = data() + pos; c < end; c++) {
553            if (*c == ch)
554                return static_cast<int>(c - data());
555        }
556        return -1;
557    }
558
559    int sz = size();
560    if (sz < fsz)
561        return -1;
562    if (fsz == 0)
563        return pos;
564    const UChar* end = data() + sz - fsz;
565    int fsizeminusone = (fsz - 1) * sizeof(UChar);
566    const UChar* fdata = f.data();
567    unsigned short fchar = fdata[0];
568    ++fdata;
569    for (const UChar* c = data() + pos; c <= end; c++) {
570        if (c[0] == fchar && !memcmp(c + 1, fdata, fsizeminusone))
571            return static_cast<int>(c - data());
572    }
573
574    return -1;
575}
576
577int UString::find(UChar ch, int pos) const
578{
579    if (pos < 0)
580        pos = 0;
581    const UChar* end = data() + size();
582    for (const UChar* c = data() + pos; c < end; c++) {
583        if (*c == ch)
584            return static_cast<int>(c - data());
585    }
586
587    return -1;
588}
589
590int UString::rfind(const UString& f, int pos) const
591{
592    int sz = size();
593    int fsz = f.size();
594    if (sz < fsz)
595        return -1;
596    if (pos < 0)
597        pos = 0;
598    if (pos > sz - fsz)
599        pos = sz - fsz;
600    if (fsz == 0)
601        return pos;
602    int fsizeminusone = (fsz - 1) * sizeof(UChar);
603    const UChar* fdata = f.data();
604    for (const UChar* c = data() + pos; c >= data(); c--) {
605        if (*c == *fdata && !memcmp(c + 1, fdata + 1, fsizeminusone))
606            return static_cast<int>(c - data());
607    }
608
609    return -1;
610}
611
612int UString::rfind(UChar ch, int pos) const
613{
614    if (isEmpty())
615        return -1;
616    if (pos + 1 >= size())
617        pos = size() - 1;
618    for (const UChar* c = data() + pos; c >= data(); c--) {
619        if (*c == ch)
620            return static_cast<int>(c - data());
621    }
622
623    return -1;
624}
625
626UString UString::substr(int pos, int len) const
627{
628    int s = size();
629
630    if (pos < 0)
631        pos = 0;
632    else if (pos >= s)
633        pos = s;
634    if (len < 0)
635        len = s;
636    if (pos + len >= s)
637        len = s - pos;
638
639    if (pos == 0 && len == s)
640        return *this;
641
642    return UString(Rep::create(m_rep, pos, len));
643}
644
645bool operator==(const UString& s1, const char *s2)
646{
647    if (s2 == 0)
648        return s1.isEmpty();
649
650    const UChar* u = s1.data();
651    const UChar* uend = u + s1.size();
652    while (u != uend && *s2) {
653        if (u[0] != (unsigned char)*s2)
654            return false;
655        s2++;
656        u++;
657    }
658
659    return u == uend && *s2 == 0;
660}
661
662bool operator<(const UString& s1, const UString& s2)
663{
664    const int l1 = s1.size();
665    const int l2 = s2.size();
666    const int lmin = l1 < l2 ? l1 : l2;
667    const UChar* c1 = s1.data();
668    const UChar* c2 = s2.data();
669    int l = 0;
670    while (l < lmin && *c1 == *c2) {
671        c1++;
672        c2++;
673        l++;
674    }
675    if (l < lmin)
676        return (c1[0] < c2[0]);
677
678    return (l1 < l2);
679}
680
681bool operator>(const UString& s1, const UString& s2)
682{
683    const int l1 = s1.size();
684    const int l2 = s2.size();
685    const int lmin = l1 < l2 ? l1 : l2;
686    const UChar* c1 = s1.data();
687    const UChar* c2 = s2.data();
688    int l = 0;
689    while (l < lmin && *c1 == *c2) {
690        c1++;
691        c2++;
692        l++;
693    }
694    if (l < lmin)
695        return (c1[0] > c2[0]);
696
697    return (l1 > l2);
698}
699
700int compare(const UString& s1, const UString& s2)
701{
702    const int l1 = s1.size();
703    const int l2 = s2.size();
704    const int lmin = l1 < l2 ? l1 : l2;
705    const UChar* c1 = s1.data();
706    const UChar* c2 = s2.data();
707    int l = 0;
708    while (l < lmin && *c1 == *c2) {
709        c1++;
710        c2++;
711        l++;
712    }
713
714    if (l < lmin)
715        return (c1[0] > c2[0]) ? 1 : -1;
716
717    if (l1 == l2)
718        return 0;
719
720    return (l1 > l2) ? 1 : -1;
721}
722
723bool equal(const UString::Rep* r, const UString::Rep* b)
724{
725    int length = r->size();
726    if (length != b->size())
727        return false;
728    const UChar* d = r->data();
729    const UChar* s = b->data();
730    for (int i = 0; i != length; ++i) {
731        if (d[i] != s[i])
732            return false;
733    }
734    return true;
735}
736
737CString UString::UTF8String(bool strict) const
738{
739    // Allocate a buffer big enough to hold all the characters.
740    const int length = size();
741    Vector<char, 1024> buffer(length * 3);
742
743    // Convert to runs of 8-bit characters.
744    char* p = buffer.data();
745    const UChar* d = reinterpret_cast<const UChar*>(&data()[0]);
746    ConversionResult result = convertUTF16ToUTF8(&d, d + length, &p, p + buffer.size(), strict);
747    if (result != conversionOK)
748        return CString();
749
750    return CString(buffer.data(), p - buffer.data());
751}
752
753} // namespace JSC
754