1/*
2 * Copyright (C) 2009 Apple Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions
6 * are met:
7 * 1. Redistributions of source code must retain the above copyright
8 *    notice, this list of conditions and the following disclaimer.
9 * 2. Redistributions in binary form must reproduce the above copyright
10 *    notice, this list of conditions and the following disclaimer in the
11 *    documentation and/or other materials provided with the distribution.
12 *
13 * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
14 * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
15 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
16 * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL APPLE INC. OR
17 * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
18 * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
19 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
20 * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
21 * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
22 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
23 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
24 */
25
26#ifndef UStringImpl_h
27#define UStringImpl_h
28
29#include <limits>
30#include <wtf/CrossThreadRefCounted.h>
31#include <wtf/OwnFastMallocPtr.h>
32#include <wtf/PossiblyNull.h>
33#include <wtf/StringHashFunctions.h>
34#include <wtf/Vector.h>
35#include <wtf/unicode/Unicode.h>
36
37namespace JSC {
38
39class IdentifierTable;
40
41typedef CrossThreadRefCounted<OwnFastMallocPtr<UChar> > SharedUChar;
42
43class UntypedPtrAndBitfield {
44public:
45    UntypedPtrAndBitfield() {}
46
47    UntypedPtrAndBitfield(void* ptrValue, uintptr_t bitValue)
48        : m_value(reinterpret_cast<uintptr_t>(ptrValue) | bitValue)
49#ifndef NDEBUG
50        , m_leaksPtr(ptrValue)
51#endif
52    {
53        ASSERT(ptrValue == asPtr<void*>());
54        ASSERT((*this & ~s_alignmentMask) == bitValue);
55    }
56
57    template<typename T>
58    T asPtr() const { return reinterpret_cast<T>(m_value & s_alignmentMask); }
59
60    UntypedPtrAndBitfield& operator&=(uintptr_t bits)
61    {
62        m_value &= bits | s_alignmentMask;
63        return *this;
64    }
65
66    UntypedPtrAndBitfield& operator|=(uintptr_t bits)
67    {
68        m_value |= bits & ~s_alignmentMask;
69        return *this;
70    }
71
72    uintptr_t operator&(uintptr_t mask) const
73    {
74        return m_value & mask & ~s_alignmentMask;
75    }
76
77private:
78    static const uintptr_t s_alignmentMask = ~static_cast<uintptr_t>(0x7);
79    uintptr_t m_value;
80#ifndef NDEBUG
81        void* m_leaksPtr; // Only used to allow tools like leaks on OSX to detect that the memory is referenced.
82#endif
83};
84
85class UStringImpl : Noncopyable {
86public:
87    template<size_t inlineCapacity>
88    static PassRefPtr<UStringImpl> adopt(Vector<UChar, inlineCapacity>& vector)
89    {
90        if (unsigned length = vector.size()) {
91            ASSERT(vector.data());
92            return adoptRef(new UStringImpl(vector.releaseBuffer(), length, BufferOwned));
93        }
94        return &empty();
95    }
96
97    static PassRefPtr<UStringImpl> create(const char* c);
98    static PassRefPtr<UStringImpl> create(const char* c, int length);
99    static PassRefPtr<UStringImpl> create(const UChar* buffer, int length);
100
101    static PassRefPtr<UStringImpl> create(PassRefPtr<UStringImpl> rep, int offset, int length)
102    {
103        ASSERT(rep);
104        rep->checkConsistency();
105        return adoptRef(new UStringImpl(rep->m_data + offset, length, rep->bufferOwnerString()));
106    }
107
108    static PassRefPtr<UStringImpl> create(PassRefPtr<SharedUChar> sharedBuffer, UChar* buffer, int length)
109    {
110        return adoptRef(new UStringImpl(buffer, length, sharedBuffer));
111    }
112
113    static PassRefPtr<UStringImpl> createUninitialized(unsigned length, UChar*& output)
114    {
115        if (!length) {
116            output = 0;
117            return &empty();
118        }
119
120        if (length > ((std::numeric_limits<size_t>::max() - sizeof(UStringImpl)) / sizeof(UChar)))
121            CRASH();
122        UStringImpl* resultImpl = static_cast<UStringImpl*>(fastMalloc(sizeof(UChar) * length + sizeof(UStringImpl)));
123        output = reinterpret_cast<UChar*>(resultImpl + 1);
124        return adoptRef(new(resultImpl) UStringImpl(output, length, BufferInternal));
125    }
126
127    static PassRefPtr<UStringImpl> tryCreateUninitialized(unsigned length, UChar*& output)
128    {
129        if (!length) {
130            output = 0;
131            return &empty();
132        }
133
134        if (length > ((std::numeric_limits<size_t>::max() - sizeof(UStringImpl)) / sizeof(UChar)))
135            return 0;
136        UStringImpl* resultImpl;
137        if (!tryFastMalloc(sizeof(UChar) * length + sizeof(UStringImpl)).getValue(resultImpl))
138            return 0;
139        output = reinterpret_cast<UChar*>(resultImpl + 1);
140        return adoptRef(new(resultImpl) UStringImpl(output, length, BufferInternal));
141    }
142
143    SharedUChar* sharedBuffer();
144    UChar* data() const { return m_data; }
145    int size() const { return m_length; }
146    size_t cost()
147    {
148        // For substrings, return the cost of the base string.
149        if (bufferOwnership() == BufferSubstring)
150            return m_dataBuffer.asPtr<UStringImpl*>()->cost();
151
152        if (m_dataBuffer & s_reportedCostBit)
153            return 0;
154        m_dataBuffer |= s_reportedCostBit;
155        return m_length;
156    }
157    unsigned hash() const { if (!m_hash) m_hash = computeHash(data(), m_length); return m_hash; }
158    unsigned existingHash() const { ASSERT(m_hash); return m_hash; } // fast path for Identifiers
159    void setHash(unsigned hash) { ASSERT(hash == computeHash(data(), m_length)); m_hash = hash; } // fast path for Identifiers
160    bool isIdentifier() const { return m_isIdentifier; }
161    void setIsIdentifier(bool isIdentifier) { m_isIdentifier = isIdentifier; }
162
163    UStringImpl* ref() { m_refCount += s_refCountIncrement; return this; }
164    ALWAYS_INLINE void deref() { if (!(m_refCount -= s_refCountIncrement)) delete this; }
165
166    static void copyChars(UChar* destination, const UChar* source, unsigned numCharacters)
167    {
168        if (numCharacters <= s_copyCharsInlineCutOff) {
169            for (unsigned i = 0; i < numCharacters; ++i)
170                destination[i] = source[i];
171        } else
172            memcpy(destination, source, numCharacters * sizeof(UChar));
173    }
174
175    static unsigned computeHash(const UChar* s, int length) { ASSERT(length >= 0); return WTF::stringHash(s, length); }
176    static unsigned computeHash(const char* s, int length) { ASSERT(length >= 0); return WTF::stringHash(s, length); }
177    static unsigned computeHash(const char* s) { return WTF::stringHash(s); }
178
179    static UStringImpl& empty() { return *s_empty; }
180
181    ALWAYS_INLINE void checkConsistency() const
182    {
183        // There is no recursion of substrings.
184        ASSERT(bufferOwnerString()->bufferOwnership() != BufferSubstring);
185        // Static strings cannot be put in identifier tables, because they are globally shared.
186        ASSERT(!isStatic() || !isIdentifier());
187    }
188
189private:
190    enum BufferOwnership {
191        BufferInternal,
192        BufferOwned,
193        BufferSubstring,
194        BufferShared,
195    };
196
197    // For SmallStringStorage, which allocates an array and uses an in-place new.
198    UStringImpl() { }
199
200    // Used to construct normal strings with an internal or external buffer.
201    UStringImpl(UChar* data, int length, BufferOwnership ownership)
202        : m_data(data)
203        , m_length(length)
204        , m_refCount(s_refCountIncrement)
205        , m_hash(0)
206        , m_isIdentifier(false)
207        , m_dataBuffer(0, ownership)
208    {
209        ASSERT((ownership == BufferInternal) || (ownership == BufferOwned));
210        checkConsistency();
211    }
212
213    // Used to construct static strings, which have an special refCount that can never hit zero.
214    // This means that the static string will never be destroyed, which is important because
215    // static strings will be shared across threads & ref-counted in a non-threadsafe manner.
216    enum StaticStringConstructType { ConstructStaticString };
217    UStringImpl(UChar* data, int length, StaticStringConstructType)
218        : m_data(data)
219        , m_length(length)
220        , m_refCount(s_staticRefCountInitialValue)
221        , m_hash(0)
222        , m_isIdentifier(false)
223        , m_dataBuffer(0, BufferOwned)
224    {
225        checkConsistency();
226    }
227
228    // Used to create new strings that are a substring of an existing string.
229    UStringImpl(UChar* data, int length, PassRefPtr<UStringImpl> base)
230        : m_data(data)
231        , m_length(length)
232        , m_refCount(s_refCountIncrement)
233        , m_hash(0)
234        , m_isIdentifier(false)
235        , m_dataBuffer(base.releaseRef(), BufferSubstring)
236    {
237        // Do use static strings as a base for substrings; UntypedPtrAndBitfield assumes
238        // that all pointers will be at least 8-byte aligned, we cannot guarantee that of
239        // UStringImpls that are not heap allocated.
240        ASSERT(m_dataBuffer.asPtr<UStringImpl*>()->size());
241        ASSERT(!m_dataBuffer.asPtr<UStringImpl*>()->isStatic());
242        checkConsistency();
243    }
244
245    // Used to construct new strings sharing an existing shared buffer.
246    UStringImpl(UChar* data, int length, PassRefPtr<SharedUChar> sharedBuffer)
247        : m_data(data)
248        , m_length(length)
249        , m_refCount(s_refCountIncrement)
250        , m_hash(0)
251        , m_isIdentifier(false)
252        , m_dataBuffer(sharedBuffer.releaseRef(), BufferShared)
253    {
254        checkConsistency();
255    }
256
257    using Noncopyable::operator new;
258    void* operator new(size_t, void* inPlace) { return inPlace; }
259
260    ~UStringImpl();
261
262    // This number must be at least 2 to avoid sharing empty, null as well as 1 character strings from SmallStrings.
263    static const int s_minLengthToShare = 10;
264    static const unsigned s_copyCharsInlineCutOff = 20;
265    static const uintptr_t s_bufferOwnershipMask = 3;
266    static const uintptr_t s_reportedCostBit = 4;
267    // We initialize and increment/decrement the refCount for all normal (non-static) strings by the value 2.
268    // We initialize static strings with an odd number (specifically, 1), such that the refCount cannot reach zero.
269    static const int s_refCountIncrement = 2;
270    static const int s_staticRefCountInitialValue = 1;
271
272    UStringImpl* bufferOwnerString() { return (bufferOwnership() == BufferSubstring) ? m_dataBuffer.asPtr<UStringImpl*>() :  this; }
273    const UStringImpl* bufferOwnerString() const { return (bufferOwnership() == BufferSubstring) ? m_dataBuffer.asPtr<UStringImpl*>() :  this; }
274    SharedUChar* baseSharedBuffer();
275    unsigned bufferOwnership() const { return m_dataBuffer & s_bufferOwnershipMask; }
276    bool isStatic() const { return m_refCount & 1; }
277
278    // unshared data
279    UChar* m_data;
280    int m_length;
281    unsigned m_refCount;
282    mutable unsigned m_hash : 31;
283    mutable unsigned m_isIdentifier : 1;
284    UntypedPtrAndBitfield m_dataBuffer;
285
286    JS_EXPORTDATA static UStringImpl* s_empty;
287
288    friend class JIT;
289    friend class SmallStringsStorage;
290    friend void initializeUString();
291};
292
293bool equal(const UStringImpl*, const UStringImpl*);
294
295}
296
297#endif
298