UString.h revision d227fc870c7a697500a3c900c31baf05fb9a8524
1/*
2 *  Copyright (C) 1999-2000 Harri Porten (porten@kde.org)
3 *  Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
4 *  Copyright (C) 2009 Google Inc. All rights reserved.
5 *
6 *  This library is free software; you can redistribute it and/or
7 *  modify it under the terms of the GNU Library General Public
8 *  License as published by the Free Software Foundation; either
9 *  version 2 of the License, or (at your option) any later version.
10 *
11 *  This library is distributed in the hope that it will be useful,
12 *  but WITHOUT ANY WARRANTY; without even the implied warranty of
13 *  MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 *  Library General Public License for more details.
15 *
16 *  You should have received a copy of the GNU Library General Public License
17 *  along with this library; see the file COPYING.LIB.  If not, write to
18 *  the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 *  Boston, MA 02110-1301, USA.
20 *
21 */
22
23#ifndef UString_h
24#define UString_h
25
26#include "Collector.h"
27#include <stdint.h>
28#include <string.h>
29#include <wtf/Assertions.h>
30#include <wtf/CrossThreadRefCounted.h>
31#include <wtf/OwnFastMallocPtr.h>
32#include <wtf/PassRefPtr.h>
33#include <wtf/PtrAndFlags.h>
34#include <wtf/RefPtr.h>
35#include <wtf/Vector.h>
36#include <wtf/unicode/Unicode.h>
37
38namespace JSC {
39
40    using WTF::PlacementNewAdoptType;
41    using WTF::PlacementNewAdopt;
42
43    class IdentifierTable;
44
45    class CString {
46    public:
47        CString()
48            : m_length(0)
49            , m_data(0)
50        {
51        }
52
53        CString(const char*);
54        CString(const char*, size_t);
55        CString(const CString&);
56
57        ~CString();
58
59        static CString adopt(char*, size_t); // buffer should be allocated with new[].
60
61        CString& append(const CString&);
62        CString& operator=(const char* c);
63        CString& operator=(const CString&);
64        CString& operator+=(const CString& c) { return append(c); }
65
66        size_t size() const { return m_length; }
67        const char* c_str() const { return m_data; }
68
69    private:
70        size_t m_length;
71        char* m_data;
72    };
73
74    typedef Vector<char, 32> CStringBuffer;
75
76    class UString {
77        friend class JIT;
78
79    public:
80        typedef CrossThreadRefCounted<OwnFastMallocPtr<UChar> > SharedUChar;
81        struct BaseString;
82        struct Rep : Noncopyable {
83            friend class JIT;
84
85            static PassRefPtr<Rep> create(UChar* buffer, int length)
86            {
87                return adoptRef(new BaseString(buffer, length));
88            }
89
90            static PassRefPtr<Rep> createEmptyBuffer(size_t size)
91            {
92                // Guard against integer overflow
93                if (size < (std::numeric_limits<size_t>::max() / sizeof(UChar))) {
94                    void* buf = 0;
95                    if (tryFastMalloc(size * sizeof(UChar)).getValue(buf))
96                        return adoptRef(new BaseString(static_cast<UChar*>(buf), 0, size));
97                }
98                return adoptRef(new BaseString(0, 0, 0));
99            }
100
101            static PassRefPtr<Rep> createCopying(const UChar*, int);
102            static PassRefPtr<Rep> create(PassRefPtr<Rep> base, int offset, int length);
103
104            // Constructs a string from a UTF-8 string, using strict conversion (see comments in UTF8.h).
105            // Returns UString::Rep::null for null input or conversion failure.
106            static PassRefPtr<Rep> createFromUTF8(const char*);
107
108            // Uses SharedUChar to have joint ownership over the UChar*.
109            static PassRefPtr<Rep> create(UChar*, int, PassRefPtr<SharedUChar>);
110
111            SharedUChar* sharedBuffer();
112            void destroy();
113
114            bool baseIsSelf() const { return m_identifierTableAndFlags.isFlagSet(BaseStringFlag); }
115            UChar* data() const;
116            int size() const { return len; }
117
118            unsigned hash() const { if (_hash == 0) _hash = computeHash(data(), len); return _hash; }
119            unsigned computedHash() const { ASSERT(_hash); return _hash; } // fast path for Identifiers
120
121            static unsigned computeHash(const UChar*, int length);
122            static unsigned computeHash(const char*, int length);
123            static unsigned computeHash(const char* s) { return computeHash(s, strlen(s)); }
124
125            IdentifierTable* identifierTable() const { return m_identifierTableAndFlags.get(); }
126            void setIdentifierTable(IdentifierTable* table) { ASSERT(!isStatic()); m_identifierTableAndFlags.set(table); }
127
128            bool isStatic() const { return m_identifierTableAndFlags.isFlagSet(StaticFlag); }
129            void setStatic(bool);
130            void setBaseString(PassRefPtr<BaseString>);
131            BaseString* baseString();
132            const BaseString* baseString() const;
133
134            Rep* ref() { ++rc; return this; }
135            ALWAYS_INLINE void deref() { if (--rc == 0) destroy(); }
136
137            void checkConsistency() const;
138            enum UStringFlags {
139                StaticFlag,
140                BaseStringFlag
141            };
142
143            // unshared data
144            int offset;
145            int len;
146            int rc; // For null and empty static strings, this field does not reflect a correct count, because ref/deref are not thread-safe. A special case in destroy() guarantees that these do not get deleted.
147            mutable unsigned _hash;
148            PtrAndFlags<IdentifierTable, UStringFlags> m_identifierTableAndFlags;
149
150            static BaseString& null() { return *nullBaseString; }
151            static BaseString& empty() { return *emptyBaseString; }
152
153            bool reserveCapacity(int capacity);
154
155        protected:
156            // Constructor for use by BaseString subclass; they use the union with m_baseString for another purpose.
157            Rep(int length)
158                : offset(0)
159                , len(length)
160                , rc(1)
161                , _hash(0)
162                , m_baseString(0)
163            {
164            }
165
166            Rep(PassRefPtr<BaseString> base, int offsetInBase, int length)
167                : offset(offsetInBase)
168                , len(length)
169                , rc(1)
170                , _hash(0)
171                , m_baseString(base.releaseRef())
172            {
173                checkConsistency();
174            }
175
176            union {
177                // If !baseIsSelf()
178                BaseString* m_baseString;
179                // If baseIsSelf()
180                SharedUChar* m_sharedBuffer;
181            };
182
183        private:
184            // For SmallStringStorage which allocates an array and does initialization manually.
185            Rep() { }
186
187            friend class SmallStringsStorage;
188            friend void initializeUString();
189            JS_EXPORTDATA static BaseString* nullBaseString;
190            JS_EXPORTDATA static BaseString* emptyBaseString;
191        };
192
193
194        struct BaseString : public Rep {
195            bool isShared() { return rc != 1 || isBufferReadOnly(); }
196            void setSharedBuffer(PassRefPtr<SharedUChar>);
197
198            bool isBufferReadOnly()
199            {
200                if (!m_sharedBuffer)
201                    return false;
202                return slowIsBufferReadOnly();
203            }
204
205            // potentially shared data.
206            UChar* buf;
207            int preCapacity;
208            int usedPreCapacity;
209            int capacity;
210            int usedCapacity;
211
212            size_t reportedCost;
213
214        private:
215            BaseString(UChar* buffer, int length, int additionalCapacity = 0)
216                : Rep(length)
217                , buf(buffer)
218                , preCapacity(0)
219                , usedPreCapacity(0)
220                , capacity(length + additionalCapacity)
221                , usedCapacity(length)
222                , reportedCost(0)
223            {
224                m_identifierTableAndFlags.setFlag(BaseStringFlag);
225                checkConsistency();
226            }
227
228            SharedUChar* sharedBuffer();
229            bool slowIsBufferReadOnly();
230
231            friend struct Rep;
232            friend class SmallStringsStorage;
233            friend void initializeUString();
234        };
235
236    public:
237        UString();
238        UString(const char*);
239        UString(const UChar*, int length);
240        UString(UChar*, int length, bool copy);
241
242        UString(const UString& s)
243            : m_rep(s.m_rep)
244        {
245        }
246
247        UString(const Vector<UChar>& buffer);
248
249        ~UString()
250        {
251        }
252
253        // Special constructor for cases where we overwrite an object in place.
254        UString(PlacementNewAdoptType)
255            : m_rep(PlacementNewAdopt)
256        {
257        }
258
259        static UString from(int);
260        static UString from(long long);
261        static UString from(unsigned int);
262        static UString from(long);
263        static UString from(double);
264
265        struct Range {
266        public:
267            Range(int pos, int len)
268                : position(pos)
269                , length(len)
270            {
271            }
272
273            Range()
274            {
275            }
276
277            int position;
278            int length;
279        };
280
281        UString spliceSubstringsWithSeparators(const Range* substringRanges, int rangeCount, const UString* separators, int separatorCount) const;
282
283        UString replaceRange(int rangeStart, int RangeEnd, const UString& replacement) const;
284
285        UString& append(const UString&);
286        UString& append(const char*);
287        UString& append(UChar);
288        UString& append(char c) { return append(static_cast<UChar>(static_cast<unsigned char>(c))); }
289        UString& append(const UChar*, int size);
290        UString& appendNumeric(int);
291        UString& appendNumeric(double);
292
293        bool getCString(CStringBuffer&) const;
294
295        // NOTE: This method should only be used for *debugging* purposes as it
296        // is neither Unicode safe nor free from side effects nor thread-safe.
297        char* ascii() const;
298
299        /**
300         * Convert the string to UTF-8, assuming it is UTF-16 encoded.
301         * In non-strict mode, this function is tolerant of badly formed UTF-16, it
302         * can create UTF-8 strings that are invalid because they have characters in
303         * the range U+D800-U+DDFF, U+FFFE, or U+FFFF, but the UTF-8 string is
304         * guaranteed to be otherwise valid.
305         * In strict mode, error is returned as null CString.
306         */
307        CString UTF8String(bool strict = false) const;
308
309        UString& operator=(const char*c);
310
311        UString& operator+=(const UString& s) { return append(s); }
312        UString& operator+=(const char* s) { return append(s); }
313
314        const UChar* data() const { return m_rep->data(); }
315
316        bool isNull() const { return (m_rep == &Rep::null()); }
317        bool isEmpty() const { return (!m_rep->len); }
318
319        bool is8Bit() const;
320
321        int size() const { return m_rep->size(); }
322
323        UChar operator[](int pos) const;
324
325        double toDouble(bool tolerateTrailingJunk, bool tolerateEmptyString) const;
326        double toDouble(bool tolerateTrailingJunk) const;
327        double toDouble() const;
328
329        uint32_t toUInt32(bool* ok = 0) const;
330        uint32_t toUInt32(bool* ok, bool tolerateEmptyString) const;
331        uint32_t toStrictUInt32(bool* ok = 0) const;
332
333        unsigned toArrayIndex(bool* ok = 0) const;
334
335        int find(const UString& f, int pos = 0) const;
336        int find(UChar, int pos = 0) const;
337        int rfind(const UString& f, int pos) const;
338        int rfind(UChar, int pos) const;
339
340        UString substr(int pos = 0, int len = -1) const;
341
342        static const UString& null() { return *nullUString; }
343
344        Rep* rep() const { return m_rep.get(); }
345        static Rep* nullRep();
346
347        UString(PassRefPtr<Rep> r)
348            : m_rep(r)
349        {
350            ASSERT(m_rep);
351        }
352
353        size_t cost() const;
354
355        // Attempt to grow this string such that it can grow to a total length of 'capacity'
356        // without reallocation.  This may fail a number of reasons - if the BasicString is
357        // shared and another string is using part of the capacity beyond our end point, if
358        // the realloc fails, or if this string is empty and has no storage.
359        //
360        // This method returns a boolean indicating success.
361        bool reserveCapacity(int capacity)
362        {
363            return m_rep->reserveCapacity(capacity);
364        }
365
366    private:
367        void expandCapacity(int requiredLength);
368        void expandPreCapacity(int requiredPreCap);
369        void makeNull();
370
371        RefPtr<Rep> m_rep;
372        static UString* nullUString;
373
374        friend void initializeUString();
375        friend bool operator==(const UString&, const UString&);
376        friend PassRefPtr<Rep> concatenate(Rep*, Rep*); // returns 0 if out of memory
377    };
378    PassRefPtr<UString::Rep> concatenate(UString::Rep*, UString::Rep*);
379    PassRefPtr<UString::Rep> concatenate(UString::Rep*, int);
380    PassRefPtr<UString::Rep> concatenate(UString::Rep*, double);
381
382    inline bool operator==(const UString& s1, const UString& s2)
383    {
384        int size = s1.size();
385        switch (size) {
386        case 0:
387            return !s2.size();
388        case 1:
389            return s2.size() == 1 && s1.data()[0] == s2.data()[0];
390        case 2: {
391            if (s2.size() != 2)
392                return false;
393            const UChar* d1 = s1.data();
394            const UChar* d2 = s2.data();
395            return (d1[0] == d2[0]) & (d1[1] == d2[1]);
396        }
397        default:
398            return s2.size() == size && memcmp(s1.data(), s2.data(), size * sizeof(UChar)) == 0;
399        }
400    }
401
402
403    inline bool operator!=(const UString& s1, const UString& s2)
404    {
405        return !JSC::operator==(s1, s2);
406    }
407
408    bool operator<(const UString& s1, const UString& s2);
409    bool operator>(const UString& s1, const UString& s2);
410
411    bool operator==(const UString& s1, const char* s2);
412
413    inline bool operator!=(const UString& s1, const char* s2)
414    {
415        return !JSC::operator==(s1, s2);
416    }
417
418    inline bool operator==(const char *s1, const UString& s2)
419    {
420        return operator==(s2, s1);
421    }
422
423    inline bool operator!=(const char *s1, const UString& s2)
424    {
425        return !JSC::operator==(s1, s2);
426    }
427
428    bool operator==(const CString&, const CString&);
429
430    inline UString operator+(const UString& s1, const UString& s2)
431    {
432        RefPtr<UString::Rep> result = concatenate(s1.rep(), s2.rep());
433        return UString(result ? result.release() : UString::nullRep());
434    }
435
436    int compare(const UString&, const UString&);
437
438    bool equal(const UString::Rep*, const UString::Rep*);
439
440    inline PassRefPtr<UString::Rep> UString::Rep::create(PassRefPtr<UString::Rep> rep, int offset, int length)
441    {
442        ASSERT(rep);
443        rep->checkConsistency();
444
445        int repOffset = rep->offset;
446
447        PassRefPtr<BaseString> base = rep->baseString();
448
449        ASSERT(-(offset + repOffset) <= base->usedPreCapacity);
450        ASSERT(offset + repOffset + length <= base->usedCapacity);
451
452        // Steal the single reference this Rep was created with.
453        return adoptRef(new Rep(base, repOffset + offset, length));
454    }
455
456    inline UChar* UString::Rep::data() const
457    {
458        const BaseString* base = baseString();
459        return base->buf + base->preCapacity + offset;
460    }
461
462    inline void UString::Rep::setStatic(bool v)
463    {
464        ASSERT(!identifierTable());
465        if (v)
466            m_identifierTableAndFlags.setFlag(StaticFlag);
467        else
468            m_identifierTableAndFlags.clearFlag(StaticFlag);
469    }
470
471    inline void UString::Rep::setBaseString(PassRefPtr<BaseString> base)
472    {
473        ASSERT(base != this);
474        ASSERT(!baseIsSelf());
475        m_baseString = base.releaseRef();
476    }
477
478    inline UString::BaseString* UString::Rep::baseString()
479    {
480        return !baseIsSelf() ? m_baseString : reinterpret_cast<BaseString*>(this) ;
481    }
482
483    inline const UString::BaseString* UString::Rep::baseString() const
484    {
485        return const_cast<Rep*>(this)->baseString();
486    }
487
488#ifdef NDEBUG
489    inline void UString::Rep::checkConsistency() const
490    {
491    }
492#endif
493
494    inline UString::UString()
495        : m_rep(&Rep::null())
496    {
497    }
498
499    // Rule from ECMA 15.2 about what an array index is.
500    // Must exactly match string form of an unsigned integer, and be less than 2^32 - 1.
501    inline unsigned UString::toArrayIndex(bool* ok) const
502    {
503        unsigned i = toStrictUInt32(ok);
504        if (ok && i >= 0xFFFFFFFFU)
505            *ok = false;
506        return i;
507    }
508
509    // We'd rather not do shared substring append for small strings, since
510    // this runs too much risk of a tiny initial string holding down a
511    // huge buffer.
512    // FIXME: this should be size_t but that would cause warnings until we
513    // fix UString sizes to be size_t instead of int
514    static const int minShareSize = Heap::minExtraCostSize / sizeof(UChar);
515
516    inline size_t UString::cost() const
517    {
518        BaseString* base = m_rep->baseString();
519        size_t capacity = (base->capacity + base->preCapacity) * sizeof(UChar);
520        size_t reportedCost = base->reportedCost;
521        ASSERT(capacity >= reportedCost);
522
523        size_t capacityDelta = capacity - reportedCost;
524
525        if (capacityDelta < static_cast<size_t>(minShareSize))
526            return 0;
527
528        base->reportedCost = capacity;
529
530        return capacityDelta;
531    }
532
533    struct IdentifierRepHash : PtrHash<RefPtr<JSC::UString::Rep> > {
534        static unsigned hash(const RefPtr<JSC::UString::Rep>& key) { return key->computedHash(); }
535        static unsigned hash(JSC::UString::Rep* key) { return key->computedHash(); }
536    };
537
538    void initializeUString();
539} // namespace JSC
540
541namespace WTF {
542
543    template<typename T> struct DefaultHash;
544    template<typename T> struct StrHash;
545
546    template<> struct StrHash<JSC::UString::Rep*> {
547        static unsigned hash(const JSC::UString::Rep* key) { return key->hash(); }
548        static bool equal(const JSC::UString::Rep* a, const JSC::UString::Rep* b) { return JSC::equal(a, b); }
549        static const bool safeToCompareToEmptyOrDeleted = false;
550    };
551
552    template<> struct StrHash<RefPtr<JSC::UString::Rep> > : public StrHash<JSC::UString::Rep*> {
553        using StrHash<JSC::UString::Rep*>::hash;
554        static unsigned hash(const RefPtr<JSC::UString::Rep>& key) { return key->hash(); }
555        using StrHash<JSC::UString::Rep*>::equal;
556        static bool equal(const RefPtr<JSC::UString::Rep>& a, const RefPtr<JSC::UString::Rep>& b) { return JSC::equal(a.get(), b.get()); }
557        static bool equal(const JSC::UString::Rep* a, const RefPtr<JSC::UString::Rep>& b) { return JSC::equal(a, b.get()); }
558        static bool equal(const RefPtr<JSC::UString::Rep>& a, const JSC::UString::Rep* b) { return JSC::equal(a.get(), b); }
559
560        static const bool safeToCompareToEmptyOrDeleted = false;
561    };
562
563    template<> struct DefaultHash<JSC::UString::Rep*> {
564        typedef StrHash<JSC::UString::Rep*> Hash;
565    };
566
567    template<> struct DefaultHash<RefPtr<JSC::UString::Rep> > {
568        typedef StrHash<RefPtr<JSC::UString::Rep> > Hash;
569
570    };
571
572} // namespace WTF
573
574#endif
575