UString.h revision d0825bca7fe65beaee391d30da42e937db621564
1/*
2 *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3 *  Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
4 *  Copyright (C) 2009 Google Inc. All rights reserved.
5 *
6 *  This library is free software; you can redistribute it and/or
7 *  modify it under the terms of the GNU Library General Public
8 *  License as published by the Free Software Foundation; either
9 *  version 2 of the License, or (at your option) any later version.
10 *
11 *  This library is distributed in the hope that it will be useful,
12 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 *  Library General Public License for more details.
15 *
16 *  You should have received a copy of the GNU Library General Public License
17 *  along with this library; see the file COPYING.LIB.  If not, write to
18 *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 *  Boston, MA 02110-1301, USA.
20 *
21 */
22
23#ifndef UString_h
24#define UString_h
25
26#include "Collector.h"
27#include "UStringImpl.h"
28#include <stdint.h>
29#include <string.h>
30#include <wtf/Assertions.h>
31#include <wtf/CrossThreadRefCounted.h>
32#include <wtf/OwnFastMallocPtr.h>
33#include <wtf/PassRefPtr.h>
34#include <wtf/PtrAndFlags.h>
35#include <wtf/RefPtr.h>
36#include <wtf/Vector.h>
37#include <wtf/unicode/Unicode.h>
38
39namespace JSC {
40
41    using WTF::PlacementNewAdoptType;
42    using WTF::PlacementNewAdopt;
43
44    class CString {
45    public:
46        CString()
47            : m_length(0)
48            , m_data(0)
49        {
50        }
51
52        CString(const char*);
53        CString(const char*, size_t);
54        CString(const CString&);
55
56        ~CString();
57
58        static CString adopt(char*, size_t); // buffer should be allocated with new[].
59
60        CString& append(const CString&);
61        CString& operator=(const char* c);
62        CString& operator=(const CString&);
63        CString& operator+=(const CString& c) { return append(c); }
64
65        size_t size() const { return m_length; }
66        const char* c_str() const { return m_data; }
67
68    private:
69        size_t m_length;
70        char* m_data;
71    };
72
73    bool operator==(const CString&, const CString&);
74
75    typedef Vector<char, 32> CStringBuffer;
76
77    class UString {
78        friend class JIT;
79
80    public:
81        typedef UStringImpl Rep;
82
83    public:
84        // UString constructors passed char*s assume ISO Latin-1 encoding; for UTF8 use 'createFromUTF8', below.
85        UString();
86        UString(const char*); // Constructor for null-terminated string.
87        UString(const char*, int length);
88        UString(const UChar*, int length);
89        UString(const Vector<UChar>& buffer);
90
91        UString(const UString& s)
92            : m_rep(s.m_rep)
93        {
94        }
95
96        // Special constructor for cases where we overwrite an object in place.
97        UString(PlacementNewAdoptType)
98            : m_rep(PlacementNewAdopt)
99        {
100        }
101
102        ~UString()
103        {
104        }
105
106        template<size_t inlineCapacity>
107        static PassRefPtr<UStringImpl> adopt(Vector<UChar, inlineCapacity>& vector)
108        {
109            return Rep::adopt(vector);
110        }
111
112        static UString createFromUTF8(const char*);
113
114        static UString from(int);
115        static UString from(long long);
116        static UString from(unsigned int);
117        static UString from(long);
118        static UString from(double);
119
120        struct Range {
121        public:
122            Range(int pos, int len)
123                : position(pos)
124                , length(len)
125            {
126            }
127
128            Range()
129            {
130            }
131
132            int position;
133            int length;
134        };
135
136        UString spliceSubstringsWithSeparators(const Range* substringRanges, int rangeCount, const UString* separators, int separatorCount) const;
137
138        UString replaceRange(int rangeStart, int RangeEnd, const UString& replacement) const;
139
140        bool getCString(CStringBuffer&) const;
141
142        // NOTE: This method should only be used for *debugging* purposes as it
143        // is neither Unicode safe nor free from side effects nor thread-safe.
144        char* ascii() const;
145
146        /**
147         * Convert the string to UTF-8, assuming it is UTF-16 encoded.
148         * In non-strict mode, this function is tolerant of badly formed UTF-16, it
149         * can create UTF-8 strings that are invalid because they have characters in
150         * the range U+D800-U+DDFF, U+FFFE, or U+FFFF, but the UTF-8 string is
151         * guaranteed to be otherwise valid.
152         * In strict mode, error is returned as null CString.
153         */
154        CString UTF8String(bool strict = false) const;
155
156        UString& operator=(const char*c);
157
158        const UChar* data() const { return m_rep->data(); }
159
160        bool isNull() const { return m_rep == &Rep::null(); }
161        bool isEmpty() const { return !m_rep->size(); }
162
163        bool is8Bit() const;
164
165        int size() const { return m_rep->size(); }
166
167        UChar operator[](int pos) const;
168
169        double toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const;
170        double toDouble(bool tolerateTrailingJunk) const;
171        double toDouble() const;
172
173        uint32_t toUInt32(bool* ok = 0) const;
174        uint32_t toUInt32(bool* ok, bool tolerateEmptyString) const;
175        uint32_t toStrictUInt32(bool* ok = 0) const;
176
177        unsigned toArrayIndex(bool* ok = 0) const;
178
179        int find(const UString& f, int pos = 0) const;
180        int find(UChar, int pos = 0) const;
181        int rfind(const UString& f, int pos) const;
182        int rfind(UChar, int pos) const;
183
184        UString substr(int pos = 0, int len = -1) const;
185
186        static const UString& null() { return *nullUString; }
187
188        Rep* rep() const { return m_rep.get(); }
189        static Rep* nullRep();
190
191        UString(PassRefPtr<Rep> r)
192            : m_rep(r)
193        {
194            ASSERT(m_rep);
195        }
196
197        size_t cost() const { return m_rep->cost(); }
198
199    private:
200        void makeNull();
201
202        RefPtr<Rep> m_rep;
203        static UString* nullUString;
204
205        friend void initializeUString();
206        friend bool operator==(const UString&, const UString&);
207    };
208
209    ALWAYS_INLINE bool operator==(const UString& s1, const UString& s2)
210    {
211        int size = s1.size();
212        switch (size) {
213        case 0:
214            return !s2.size();
215        case 1:
216            return s2.size() == 1 && s1.data()[0] == s2.data()[0];
217        case 2: {
218            if (s2.size() != 2)
219                return false;
220            const UChar* d1 = s1.data();
221            const UChar* d2 = s2.data();
222            return (d1[0] == d2[0]) & (d1[1] == d2[1]);
223        }
224        default:
225            return s2.size() == size && memcmp(s1.data(), s2.data(), size * sizeof(UChar)) == 0;
226        }
227    }
228
229
230    inline bool operator!=(const UString& s1, const UString& s2)
231    {
232        return !JSC::operator==(s1, s2);
233    }
234
235    bool operator<(const UString& s1, const UString& s2);
236    bool operator>(const UString& s1, const UString& s2);
237
238    bool operator==(const UString& s1, const char* s2);
239
240    inline bool operator!=(const UString& s1, const char* s2)
241    {
242        return !JSC::operator==(s1, s2);
243    }
244
245    inline bool operator==(const char *s1, const UString& s2)
246    {
247        return operator==(s2, s1);
248    }
249
250    inline bool operator!=(const char *s1, const UString& s2)
251    {
252        return !JSC::operator==(s1, s2);
253    }
254
255    int compare(const UString&, const UString&);
256
257    inline UString::UString()
258        : m_rep(&Rep::null())
259    {
260    }
261
262    // Rule from ECMA 15.2 about what an array index is.
263    // Must exactly match string form of an unsigned integer, and be less than 2^32 - 1.
264    inline unsigned UString::toArrayIndex(bool* ok) const
265    {
266        unsigned i = toStrictUInt32(ok);
267        if (ok && i >= 0xFFFFFFFFU)
268            *ok = false;
269        return i;
270    }
271
272    // We'd rather not do shared substring append for small strings, since
273    // this runs too much risk of a tiny initial string holding down a
274    // huge buffer.
275    // FIXME: this should be size_t but that would cause warnings until we
276    // fix UString sizes to be size_t instead of int
277    static const int minShareSize = Heap::minExtraCost / sizeof(UChar);
278
279    struct IdentifierRepHash : PtrHash<RefPtr<JSC::UString::Rep> > {
280        static unsigned hash(const RefPtr<JSC::UString::Rep>& key) { return key->existingHash(); }
281        static unsigned hash(JSC::UString::Rep* key) { return key->existingHash(); }
282    };
283
284    void initializeUString();
285
286    template<typename StringType>
287    class StringTypeAdapter {
288    };
289
290    template<>
291    class StringTypeAdapter<char*> {
292    public:
293        StringTypeAdapter<char*>(char* buffer)
294            : m_buffer((unsigned char*)buffer)
295            , m_length(strlen(buffer))
296        {
297        }
298
299        unsigned length() { return m_length; }
300
301        void writeTo(UChar* destination)
302        {
303            for (unsigned i = 0; i < m_length; ++i)
304                destination[i] = m_buffer[i];
305        }
306
307    private:
308        const unsigned char* m_buffer;
309        unsigned m_length;
310    };
311
312    template<>
313    class StringTypeAdapter<const char*> {
314    public:
315        StringTypeAdapter<const char*>(const char* buffer)
316            : m_buffer((unsigned char*)buffer)
317            , m_length(strlen(buffer))
318        {
319        }
320
321        unsigned length() { return m_length; }
322
323        void writeTo(UChar* destination)
324        {
325            for (unsigned i = 0; i < m_length; ++i)
326                destination[i] = m_buffer[i];
327        }
328
329    private:
330        const unsigned char* m_buffer;
331        unsigned m_length;
332    };
333
334    template<>
335    class StringTypeAdapter<UString> {
336    public:
337        StringTypeAdapter<UString>(UString& string)
338            : m_data(string.data())
339            , m_length(string.size())
340        {
341        }
342
343        unsigned length() { return m_length; }
344
345        void writeTo(UChar* destination)
346        {
347            for (unsigned i = 0; i < m_length; ++i)
348                destination[i] = m_data[i];
349        }
350
351    private:
352        const UChar* m_data;
353        unsigned m_length;
354    };
355
356    template<typename StringType1, typename StringType2>
357    UString makeString(StringType1 string1, StringType2 string2)
358    {
359        StringTypeAdapter<StringType1> adapter1(string1);
360        StringTypeAdapter<StringType2> adapter2(string2);
361
362        UChar* buffer;
363        unsigned length = adapter1.length() + adapter2.length();
364        PassRefPtr<UStringImpl> resultImpl = UStringImpl::tryCreateUninitialized(length, buffer);
365        if (!resultImpl)
366            return UString();
367
368        UChar* result = buffer;
369        adapter1.writeTo(result);
370        result += adapter1.length();
371        adapter2.writeTo(result);
372
373        return resultImpl;
374    }
375
376    template<typename StringType1, typename StringType2, typename StringType3>
377    UString makeString(StringType1 string1, StringType2 string2, StringType3 string3)
378    {
379        StringTypeAdapter<StringType1> adapter1(string1);
380        StringTypeAdapter<StringType2> adapter2(string2);
381        StringTypeAdapter<StringType3> adapter3(string3);
382
383        UChar* buffer;
384        unsigned length = adapter1.length() + adapter2.length() + adapter3.length();
385        PassRefPtr<UStringImpl> resultImpl = UStringImpl::tryCreateUninitialized(length, buffer);
386        if (!resultImpl)
387            return UString();
388
389        UChar* result = buffer;
390        adapter1.writeTo(result);
391        result += adapter1.length();
392        adapter2.writeTo(result);
393        result += adapter2.length();
394        adapter3.writeTo(result);
395
396        return resultImpl;
397    }
398
399    template<typename StringType1, typename StringType2, typename StringType3, typename StringType4>
400    UString makeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4)
401    {
402        StringTypeAdapter<StringType1> adapter1(string1);
403        StringTypeAdapter<StringType2> adapter2(string2);
404        StringTypeAdapter<StringType3> adapter3(string3);
405        StringTypeAdapter<StringType4> adapter4(string4);
406
407        UChar* buffer;
408        unsigned length = adapter1.length() + adapter2.length() + adapter3.length() + adapter4.length();
409        PassRefPtr<UStringImpl> resultImpl = UStringImpl::tryCreateUninitialized(length, buffer);
410        if (!resultImpl)
411            return UString();
412
413        UChar* result = buffer;
414        adapter1.writeTo(result);
415        result += adapter1.length();
416        adapter2.writeTo(result);
417        result += adapter2.length();
418        adapter3.writeTo(result);
419        result += adapter3.length();
420        adapter4.writeTo(result);
421
422        return resultImpl;
423    }
424
425    template<typename StringType1, typename StringType2, typename StringType3, typename StringType4, typename StringType5>
426    UString makeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4, StringType5 string5)
427    {
428        StringTypeAdapter<StringType1> adapter1(string1);
429        StringTypeAdapter<StringType2> adapter2(string2);
430        StringTypeAdapter<StringType3> adapter3(string3);
431        StringTypeAdapter<StringType4> adapter4(string4);
432        StringTypeAdapter<StringType5> adapter5(string5);
433
434        UChar* buffer;
435        unsigned length = adapter1.length() + adapter2.length() + adapter3.length() + adapter4.length() + adapter5.length();
436        PassRefPtr<UStringImpl> resultImpl = UStringImpl::tryCreateUninitialized(length, buffer);
437        if (!resultImpl)
438            return UString();
439
440        UChar* result = buffer;
441        adapter1.writeTo(result);
442        result += adapter1.length();
443        adapter2.writeTo(result);
444        result += adapter2.length();
445        adapter3.writeTo(result);
446        result += adapter3.length();
447        adapter4.writeTo(result);
448        result += adapter4.length();
449        adapter5.writeTo(result);
450
451        return resultImpl;
452    }
453
454    template<typename StringType1, typename StringType2, typename StringType3, typename StringType4, typename StringType5, typename StringType6>
455    UString makeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4, StringType5 string5, StringType6 string6)
456    {
457        StringTypeAdapter<StringType1> adapter1(string1);
458        StringTypeAdapter<StringType2> adapter2(string2);
459        StringTypeAdapter<StringType3> adapter3(string3);
460        StringTypeAdapter<StringType4> adapter4(string4);
461        StringTypeAdapter<StringType5> adapter5(string5);
462        StringTypeAdapter<StringType6> adapter6(string6);
463
464        UChar* buffer;
465        unsigned length = adapter1.length() + adapter2.length() + adapter3.length() + adapter4.length() + adapter5.length() + adapter6.length();
466        PassRefPtr<UStringImpl> resultImpl = UStringImpl::tryCreateUninitialized(length, buffer);
467        if (!resultImpl)
468            return UString();
469
470        UChar* result = buffer;
471        adapter1.writeTo(result);
472        result += adapter1.length();
473        adapter2.writeTo(result);
474        result += adapter2.length();
475        adapter3.writeTo(result);
476        result += adapter3.length();
477        adapter4.writeTo(result);
478        result += adapter4.length();
479        adapter5.writeTo(result);
480        result += adapter5.length();
481        adapter6.writeTo(result);
482
483        return resultImpl;
484    }
485
486    template<typename StringType1, typename StringType2, typename StringType3, typename StringType4, typename StringType5, typename StringType6, typename StringType7>
487    UString makeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4, StringType5 string5, StringType6 string6, StringType7 string7)
488    {
489        StringTypeAdapter<StringType1> adapter1(string1);
490        StringTypeAdapter<StringType2> adapter2(string2);
491        StringTypeAdapter<StringType3> adapter3(string3);
492        StringTypeAdapter<StringType4> adapter4(string4);
493        StringTypeAdapter<StringType5> adapter5(string5);
494        StringTypeAdapter<StringType6> adapter6(string6);
495        StringTypeAdapter<StringType7> adapter7(string7);
496
497        UChar* buffer;
498        unsigned length = adapter1.length() + adapter2.length() + adapter3.length() + adapter4.length() + adapter5.length() + adapter6.length() + adapter7.length();
499        PassRefPtr<UStringImpl> resultImpl = UStringImpl::tryCreateUninitialized(length, buffer);
500        if (!resultImpl)
501            return UString();
502
503        UChar* result = buffer;
504        adapter1.writeTo(result);
505        result += adapter1.length();
506        adapter2.writeTo(result);
507        result += adapter2.length();
508        adapter3.writeTo(result);
509        result += adapter3.length();
510        adapter4.writeTo(result);
511        result += adapter4.length();
512        adapter5.writeTo(result);
513        result += adapter5.length();
514        adapter6.writeTo(result);
515        result += adapter6.length();
516        adapter7.writeTo(result);
517
518        return resultImpl;
519    }
520
521    template<typename StringType1, typename StringType2, typename StringType3, typename StringType4, typename StringType5, typename StringType6, typename StringType7, typename StringType8>
522    UString makeString(StringType1 string1, StringType2 string2, StringType3 string3, StringType4 string4, StringType5 string5, StringType6 string6, StringType7 string7, StringType8 string8)
523    {
524        StringTypeAdapter<StringType1> adapter1(string1);
525        StringTypeAdapter<StringType2> adapter2(string2);
526        StringTypeAdapter<StringType3> adapter3(string3);
527        StringTypeAdapter<StringType4> adapter4(string4);
528        StringTypeAdapter<StringType5> adapter5(string5);
529        StringTypeAdapter<StringType6> adapter6(string6);
530        StringTypeAdapter<StringType7> adapter7(string7);
531        StringTypeAdapter<StringType8> adapter8(string8);
532
533        UChar* buffer;
534        unsigned length = adapter1.length() + adapter2.length() + adapter3.length() + adapter4.length() + adapter5.length() + adapter6.length() + adapter7.length() + adapter8.length();
535        PassRefPtr<UStringImpl> resultImpl = UStringImpl::tryCreateUninitialized(length, buffer);
536        if (!resultImpl)
537            return UString();
538
539        UChar* result = buffer;
540        adapter1.writeTo(result);
541        result += adapter1.length();
542        adapter2.writeTo(result);
543        result += adapter2.length();
544        adapter3.writeTo(result);
545        result += adapter3.length();
546        adapter4.writeTo(result);
547        result += adapter4.length();
548        adapter5.writeTo(result);
549        result += adapter5.length();
550        adapter6.writeTo(result);
551        result += adapter6.length();
552        adapter7.writeTo(result);
553        result += adapter7.length();
554        adapter8.writeTo(result);
555
556        return resultImpl;
557    }
558
559} // namespace JSC
560
561namespace WTF {
562
563    template<typename T> struct DefaultHash;
564    template<typename T> struct StrHash;
565
566    template<> struct StrHash<JSC::UString::Rep*> {
567        static unsigned hash(const JSC::UString::Rep* key) { return key->hash(); }
568        static bool equal(const JSC::UString::Rep* a, const JSC::UString::Rep* b) { return JSC::equal(a, b); }
569        static const bool safeToCompareToEmptyOrDeleted = false;
570    };
571
572    template<> struct StrHash<RefPtr<JSC::UString::Rep> > : public StrHash<JSC::UString::Rep*> {
573        using StrHash<JSC::UString::Rep*>::hash;
574        static unsigned hash(const RefPtr<JSC::UString::Rep>& key) { return key->hash(); }
575        using StrHash<JSC::UString::Rep*>::equal;
576        static bool equal(const RefPtr<JSC::UString::Rep>& a, const RefPtr<JSC::UString::Rep>& b) { return JSC::equal(a.get(), b.get()); }
577        static bool equal(const JSC::UString::Rep* a, const RefPtr<JSC::UString::Rep>& b) { return JSC::equal(a, b.get()); }
578        static bool equal(const RefPtr<JSC::UString::Rep>& a, const JSC::UString::Rep* b) { return JSC::equal(a.get(), b); }
579
580        static const bool safeToCompareToEmptyOrDeleted = false;
581    };
582
583    template<> struct DefaultHash<JSC::UString::Rep*> {
584        typedef StrHash<JSC::UString::Rep*> Hash;
585    };
586
587    template<> struct DefaultHash<RefPtr<JSC::UString::Rep> > {
588        typedef StrHash<RefPtr<JSC::UString::Rep> > Hash;
589
590    };
591
592} // namespace WTF
593
594#endif
595