1/*
2 * Copyright (C) 1999 Lars Knoll (knoll@kde.org)
3 * Copyright (C) 2005, 2006, 2007, 2008, 2009, 2010 Apple Inc. All rights reserved.
4 * Copyright (C) 2009 Google Inc. All rights reserved.
5 *
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Library General Public
8 * License as published by the Free Software Foundation; either
9 * version 2 of the License, or (at your option) any later version.
10 *
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
14 * Library General Public License for more details.
15 *
16 * You should have received a copy of the GNU Library General Public License
17 * along with this library; see the file COPYING.LIB.  If not, write to
18 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
19 * Boston, MA 02110-1301, USA.
20 *
21 */
22
23#ifndef StringImpl_h
24#define StringImpl_h
25
26#include <limits.h>
27#include <wtf/ASCIICType.h>
28#include <wtf/CrossThreadRefCounted.h>
29#include <wtf/Forward.h>
30#include <wtf/OwnFastMallocPtr.h>
31#include <wtf/StdLibExtras.h>
32#include <wtf/StringHasher.h>
33#include <wtf/Vector.h>
34#include <wtf/text/StringImplBase.h>
35#include <wtf/unicode/Unicode.h>
36
37#if USE(CF)
38typedef const struct __CFString * CFStringRef;
39#endif
40
41#ifdef __OBJC__
42@class NSString;
43#endif
44
45// FIXME: This is a temporary layering violation while we move string code to WTF.
46// Landing the file moves in one patch, will follow on with patches to change the namespaces.
47namespace JSC {
48struct IdentifierCStringTranslator;
49struct IdentifierUCharBufferTranslator;
50}
51
52namespace WTF {
53
54struct CStringTranslator;
55struct HashAndCharactersTranslator;
56struct HashAndUTF8CharactersTranslator;
57struct UCharBufferTranslator;
58
59enum TextCaseSensitivity { TextCaseSensitive, TextCaseInsensitive };
60
61typedef OwnFastMallocPtr<const UChar> SharableUChar;
62typedef CrossThreadRefCounted<SharableUChar> SharedUChar;
63typedef bool (*CharacterMatchFunctionPtr)(UChar);
64
65class StringImpl : public StringImplBase {
66    friend struct JSC::IdentifierCStringTranslator;
67    friend struct JSC::IdentifierUCharBufferTranslator;
68    friend struct WTF::CStringTranslator;
69    friend struct WTF::HashAndCharactersTranslator;
70    friend struct WTF::HashAndUTF8CharactersTranslator;
71    friend struct WTF::UCharBufferTranslator;
72    friend class AtomicStringImpl;
73private:
74    // Used to construct static strings, which have an special refCount that can never hit zero.
75    // This means that the static string will never be destroyed, which is important because
76    // static strings will be shared across threads & ref-counted in a non-threadsafe manner.
77    StringImpl(const UChar* characters, unsigned length, StaticStringConstructType)
78        : StringImplBase(length, ConstructStaticString)
79        , m_data(characters)
80        , m_buffer(0)
81        , m_hash(0)
82    {
83        // Ensure that the hash is computed so that AtomicStringHash can call existingHash()
84        // with impunity. The empty string is special because it is never entered into
85        // AtomicString's HashKey, but still needs to compare correctly.
86        hash();
87    }
88
89    // Create a normal string with internal storage (BufferInternal)
90    StringImpl(unsigned length)
91        : StringImplBase(length, BufferInternal)
92        , m_data(reinterpret_cast<const UChar*>(this + 1))
93        , m_buffer(0)
94        , m_hash(0)
95    {
96        ASSERT(m_data);
97        ASSERT(m_length);
98    }
99
100    // Create a StringImpl adopting ownership of the provided buffer (BufferOwned)
101    StringImpl(const UChar* characters, unsigned length)
102        : StringImplBase(length, BufferOwned)
103        , m_data(characters)
104        , m_buffer(0)
105        , m_hash(0)
106    {
107        ASSERT(m_data);
108        ASSERT(m_length);
109    }
110
111    // Used to create new strings that are a substring of an existing StringImpl (BufferSubstring)
112    StringImpl(const UChar* characters, unsigned length, PassRefPtr<StringImpl> base)
113        : StringImplBase(length, BufferSubstring)
114        , m_data(characters)
115        , m_substringBuffer(base.leakRef())
116        , m_hash(0)
117    {
118        ASSERT(m_data);
119        ASSERT(m_length);
120        ASSERT(m_substringBuffer->bufferOwnership() != BufferSubstring);
121    }
122
123    // Used to construct new strings sharing an existing SharedUChar (BufferShared)
124    StringImpl(const UChar* characters, unsigned length, PassRefPtr<SharedUChar> sharedBuffer)
125        : StringImplBase(length, BufferShared)
126        , m_data(characters)
127        , m_sharedBuffer(sharedBuffer.leakRef())
128        , m_hash(0)
129    {
130        ASSERT(m_data);
131        ASSERT(m_length);
132    }
133
134    // For use only by AtomicString's XXXTranslator helpers.
135    void setHash(unsigned hash)
136    {
137        ASSERT(!isStatic());
138        ASSERT(!m_hash);
139        ASSERT(hash == StringHasher::computeHash(m_data, m_length));
140        m_hash = hash;
141    }
142
143public:
144    ~StringImpl();
145
146    static PassRefPtr<StringImpl> create(const UChar*, unsigned length);
147    static PassRefPtr<StringImpl> create(const char*, unsigned length);
148    static PassRefPtr<StringImpl> create(const char*);
149    static PassRefPtr<StringImpl> create(const UChar*, unsigned length, PassRefPtr<SharedUChar> sharedBuffer);
150    static ALWAYS_INLINE PassRefPtr<StringImpl> create(PassRefPtr<StringImpl> rep, unsigned offset, unsigned length)
151    {
152        ASSERT(rep);
153        ASSERT(length <= rep->length());
154
155        if (!length)
156            return empty();
157
158        StringImpl* ownerRep = (rep->bufferOwnership() == BufferSubstring) ? rep->m_substringBuffer : rep.get();
159        return adoptRef(new StringImpl(rep->m_data + offset, length, ownerRep));
160    }
161
162    static PassRefPtr<StringImpl> createUninitialized(unsigned length, UChar*& data);
163    static ALWAYS_INLINE PassRefPtr<StringImpl> tryCreateUninitialized(unsigned length, UChar*& output)
164    {
165        if (!length) {
166            output = 0;
167            return empty();
168        }
169
170        if (length > ((std::numeric_limits<unsigned>::max() - sizeof(StringImpl)) / sizeof(UChar))) {
171            output = 0;
172            return 0;
173        }
174        StringImpl* resultImpl;
175        if (!tryFastMalloc(sizeof(UChar) * length + sizeof(StringImpl)).getValue(resultImpl)) {
176            output = 0;
177            return 0;
178        }
179        output = reinterpret_cast<UChar*>(resultImpl + 1);
180        return adoptRef(new(resultImpl) StringImpl(length));
181    }
182
183    static unsigned dataOffset() { return OBJECT_OFFSETOF(StringImpl, m_data); }
184    static PassRefPtr<StringImpl> createWithTerminatingNullCharacter(const StringImpl&);
185    static PassRefPtr<StringImpl> createStrippingNullCharacters(const UChar*, unsigned length);
186
187    template<size_t inlineCapacity>
188    static PassRefPtr<StringImpl> adopt(Vector<UChar, inlineCapacity>& vector)
189    {
190        if (size_t size = vector.size()) {
191            ASSERT(vector.data());
192            if (size > std::numeric_limits<unsigned>::max())
193                CRASH();
194            return adoptRef(new StringImpl(vector.releaseBuffer(), size));
195        }
196        return empty();
197    }
198    static PassRefPtr<StringImpl> adopt(StringBuffer&);
199
200    SharedUChar* sharedBuffer();
201    const UChar* characters() const { return m_data; }
202
203    size_t cost()
204    {
205        // For substrings, return the cost of the base string.
206        if (bufferOwnership() == BufferSubstring)
207            return m_substringBuffer->cost();
208
209        if (m_refCountAndFlags & s_refCountFlagShouldReportedCost) {
210            m_refCountAndFlags &= ~s_refCountFlagShouldReportedCost;
211            return m_length;
212        }
213        return 0;
214    }
215
216    bool isIdentifier() const { return m_refCountAndFlags & s_refCountFlagIsIdentifier; }
217    void setIsIdentifier(bool isIdentifier)
218    {
219        ASSERT(!isStatic());
220        if (isIdentifier)
221            m_refCountAndFlags |= s_refCountFlagIsIdentifier;
222        else
223            m_refCountAndFlags &= ~s_refCountFlagIsIdentifier;
224    }
225
226    bool hasTerminatingNullCharacter() const { return m_refCountAndFlags & s_refCountFlagHasTerminatingNullCharacter; }
227
228    bool isAtomic() const { return m_refCountAndFlags & s_refCountFlagIsAtomic; }
229    void setIsAtomic(bool isIdentifier)
230    {
231        ASSERT(!isStatic());
232        if (isIdentifier)
233            m_refCountAndFlags |= s_refCountFlagIsAtomic;
234        else
235            m_refCountAndFlags &= ~s_refCountFlagIsAtomic;
236    }
237
238    unsigned hash() const { if (!m_hash) m_hash = StringHasher::computeHash(m_data, m_length); return m_hash; }
239    unsigned existingHash() const { ASSERT(m_hash); return m_hash; }
240
241    ALWAYS_INLINE void deref() { m_refCountAndFlags -= s_refCountIncrement; if (!(m_refCountAndFlags & (s_refCountMask | s_refCountFlagStatic))) delete this; }
242    ALWAYS_INLINE bool hasOneRef() const { return (m_refCountAndFlags & (s_refCountMask | s_refCountFlagStatic)) == s_refCountIncrement; }
243
244    static StringImpl* empty();
245
246    static void copyChars(UChar* destination, const UChar* source, unsigned numCharacters)
247    {
248        if (numCharacters <= s_copyCharsInlineCutOff) {
249            for (unsigned i = 0; i < numCharacters; ++i)
250                destination[i] = source[i];
251        } else
252            memcpy(destination, source, numCharacters * sizeof(UChar));
253    }
254
255    // Returns a StringImpl suitable for use on another thread.
256    PassRefPtr<StringImpl> crossThreadString();
257    // Makes a deep copy. Helpful only if you need to use a String on another thread
258    // (use crossThreadString if the method call doesn't need to be threadsafe).
259    // Since StringImpl objects are immutable, there's no other reason to make a copy.
260    PassRefPtr<StringImpl> threadsafeCopy() const;
261
262    PassRefPtr<StringImpl> substring(unsigned pos, unsigned len = UINT_MAX);
263
264    UChar operator[](unsigned i) { ASSERT(i < m_length); return m_data[i]; }
265    UChar32 characterStartingAt(unsigned);
266
267    bool containsOnlyWhitespace();
268
269    int toIntStrict(bool* ok = 0, int base = 10);
270    unsigned toUIntStrict(bool* ok = 0, int base = 10);
271    int64_t toInt64Strict(bool* ok = 0, int base = 10);
272    uint64_t toUInt64Strict(bool* ok = 0, int base = 10);
273    intptr_t toIntPtrStrict(bool* ok = 0, int base = 10);
274
275    int toInt(bool* ok = 0); // ignores trailing garbage
276    unsigned toUInt(bool* ok = 0); // ignores trailing garbage
277    int64_t toInt64(bool* ok = 0); // ignores trailing garbage
278    uint64_t toUInt64(bool* ok = 0); // ignores trailing garbage
279    intptr_t toIntPtr(bool* ok = 0); // ignores trailing garbage
280
281    double toDouble(bool* ok = 0, bool* didReadNumber = 0);
282    float toFloat(bool* ok = 0, bool* didReadNumber = 0);
283
284    PassRefPtr<StringImpl> lower();
285    PassRefPtr<StringImpl> upper();
286
287    enum LastCharacterBehavior { ObscureLastCharacter, DisplayLastCharacter };
288
289    PassRefPtr<StringImpl> secure(UChar, LastCharacterBehavior = ObscureLastCharacter);
290    PassRefPtr<StringImpl> foldCase();
291
292    PassRefPtr<StringImpl> stripWhiteSpace();
293    PassRefPtr<StringImpl> simplifyWhiteSpace();
294
295    PassRefPtr<StringImpl> removeCharacters(CharacterMatchFunctionPtr);
296
297    size_t find(UChar, unsigned index = 0);
298    size_t find(CharacterMatchFunctionPtr, unsigned index = 0);
299    size_t find(const char*, unsigned index = 0);
300    size_t find(StringImpl*, unsigned index = 0);
301    size_t findIgnoringCase(const char*, unsigned index = 0);
302    size_t findIgnoringCase(StringImpl*, unsigned index = 0);
303
304    size_t reverseFind(UChar, unsigned index = UINT_MAX);
305    size_t reverseFind(StringImpl*, unsigned index = UINT_MAX);
306    size_t reverseFindIgnoringCase(StringImpl*, unsigned index = UINT_MAX);
307
308    bool startsWith(StringImpl* str, bool caseSensitive = true) { return (caseSensitive ? reverseFind(str, 0) : reverseFindIgnoringCase(str, 0)) == 0; }
309    bool endsWith(StringImpl*, bool caseSensitive = true);
310
311    PassRefPtr<StringImpl> replace(UChar, UChar);
312    PassRefPtr<StringImpl> replace(UChar, StringImpl*);
313    PassRefPtr<StringImpl> replace(StringImpl*, StringImpl*);
314    PassRefPtr<StringImpl> replace(unsigned index, unsigned len, StringImpl*);
315
316    WTF::Unicode::Direction defaultWritingDirection(bool* hasStrongDirectionality = 0);
317
318#if USE(CF)
319    CFStringRef createCFString();
320#endif
321#ifdef __OBJC__
322    operator NSString*();
323#endif
324
325private:
326    // This number must be at least 2 to avoid sharing empty, null as well as 1 character strings from SmallStrings.
327    static const unsigned s_copyCharsInlineCutOff = 20;
328
329    static PassRefPtr<StringImpl> createStrippingNullCharactersSlowCase(const UChar*, unsigned length);
330
331    BufferOwnership bufferOwnership() const { return static_cast<BufferOwnership>(m_refCountAndFlags & s_refCountMaskBufferOwnership); }
332    bool isStatic() const { return m_refCountAndFlags & s_refCountFlagStatic; }
333    const UChar* m_data;
334    union {
335        void* m_buffer;
336        StringImpl* m_substringBuffer;
337        SharedUChar* m_sharedBuffer;
338    };
339    mutable unsigned m_hash;
340};
341
342bool equal(const StringImpl*, const StringImpl*);
343bool equal(const StringImpl*, const char*);
344inline bool equal(const char* a, StringImpl* b) { return equal(b, a); }
345
346bool equalIgnoringCase(StringImpl*, StringImpl*);
347bool equalIgnoringCase(StringImpl*, const char*);
348inline bool equalIgnoringCase(const char* a, StringImpl* b) { return equalIgnoringCase(b, a); }
349bool equalIgnoringCase(const UChar* a, const char* b, unsigned length);
350inline bool equalIgnoringCase(const char* a, const UChar* b, unsigned length) { return equalIgnoringCase(b, a, length); }
351
352bool equalIgnoringNullity(StringImpl*, StringImpl*);
353
354template<size_t inlineCapacity>
355bool equalIgnoringNullity(const Vector<UChar, inlineCapacity>& a, StringImpl* b)
356{
357    if (!b)
358        return !a.size();
359    if (a.size() != b->length())
360        return false;
361    return !memcmp(a.data(), b->characters(), b->length());
362}
363
364int codePointCompare(const StringImpl*, const StringImpl*);
365
366static inline bool isSpaceOrNewline(UChar c)
367{
368    // Use isASCIISpace() for basic Latin-1.
369    // This will include newlines, which aren't included in Unicode DirWS.
370    return c <= 0x7F ? WTF::isASCIISpace(c) : WTF::Unicode::direction(c) == WTF::Unicode::WhiteSpaceNeutral;
371}
372
373// This is a hot function because it's used when parsing HTML.
374inline PassRefPtr<StringImpl> StringImpl::createStrippingNullCharacters(const UChar* characters, unsigned length)
375{
376    ASSERT(characters);
377    ASSERT(length);
378
379    // Optimize for the case where there are no Null characters by quickly
380    // searching for nulls, and then using StringImpl::create, which will
381    // memcpy the whole buffer.  This is faster than assigning character by
382    // character during the loop.
383
384    // Fast case.
385    int foundNull = 0;
386    for (unsigned i = 0; !foundNull && i < length; i++) {
387        int c = characters[i]; // more efficient than using UChar here (at least on Intel Mac OS)
388        foundNull |= !c;
389    }
390    if (!foundNull)
391        return StringImpl::create(characters, length);
392
393    return StringImpl::createStrippingNullCharactersSlowCase(characters, length);
394}
395
396struct StringHash;
397
398// StringHash is the default hash for StringImpl* and RefPtr<StringImpl>
399template<typename T> struct DefaultHash;
400template<> struct DefaultHash<StringImpl*> {
401    typedef StringHash Hash;
402};
403template<> struct DefaultHash<RefPtr<StringImpl> > {
404    typedef StringHash Hash;
405};
406
407}
408
409using WTF::StringImpl;
410using WTF::equal;
411using WTF::TextCaseSensitivity;
412using WTF::TextCaseSensitive;
413using WTF::TextCaseInsensitive;
414
415#endif
416