1/*
2 * (C) 1999 Lars Knoll (knoll@kde.org)
3 * Copyright (C) 2004, 2005, 2006, 2007, 2008, 2009, 2010 Apple Inc. All rights reserved.
4 *
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Library General Public
7 * License as published by the Free Software Foundation; either
8 * version 2 of the License, or (at your option) any later version.
9 *
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
13 * Library General Public License for more details.
14 *
15 * You should have received a copy of the GNU Library General Public License
16 * along with this library; see the file COPYING.LIB.  If not, write to
17 * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
18 * Boston, MA 02110-1301, USA.
19 *
20 */
21
22#ifndef WTFString_h
23#define WTFString_h
24
25// This file would be called String.h, but that conflicts with <string.h>
26// on systems without case-sensitive file systems.
27
28#include "StringImpl.h"
29
30#ifdef __OBJC__
31#include <objc/objc.h>
32#endif
33
34#if USE(CF)
35typedef const struct __CFString * CFStringRef;
36#endif
37
38#if PLATFORM(QT)
39QT_BEGIN_NAMESPACE
40class QString;
41QT_END_NAMESPACE
42#include <QDataStream>
43#endif
44
45#if PLATFORM(WX)
46class wxString;
47#endif
48
49#if PLATFORM(HAIKU)
50class BString;
51#endif
52
53#if PLATFORM(BREWMP)
54// AECHAR is defined in AEEStdDef.h, but don't include it here to avoid conflicts.
55#ifndef _AECHAR_DEFINED
56typedef uint16             AECHAR;
57#define _AECHAR_DEFINED
58#endif
59#endif
60
61namespace WTF {
62
63class CString;
64struct StringHash;
65
66// Declarations of string operations
67
68bool charactersAreAllASCII(const UChar*, size_t);
69bool charactersAreAllLatin1(const UChar*, size_t);
70int charactersToIntStrict(const UChar*, size_t, bool* ok = 0, int base = 10);
71unsigned charactersToUIntStrict(const UChar*, size_t, bool* ok = 0, int base = 10);
72int64_t charactersToInt64Strict(const UChar*, size_t, bool* ok = 0, int base = 10);
73uint64_t charactersToUInt64Strict(const UChar*, size_t, bool* ok = 0, int base = 10);
74intptr_t charactersToIntPtrStrict(const UChar*, size_t, bool* ok = 0, int base = 10);
75
76int charactersToInt(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage
77unsigned charactersToUInt(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage
78int64_t charactersToInt64(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage
79uint64_t charactersToUInt64(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage
80intptr_t charactersToIntPtr(const UChar*, size_t, bool* ok = 0); // ignores trailing garbage
81
82double charactersToDouble(const UChar*, size_t, bool* ok = 0, bool* didReadNumber = 0);
83float charactersToFloat(const UChar*, size_t, bool* ok = 0, bool* didReadNumber = 0);
84
85template<bool isSpecialCharacter(UChar)> bool isAllSpecialCharacters(const UChar*, size_t);
86
87class String {
88public:
89    // Construct a null string, distinguishable from an empty string.
90    String() { }
91
92    // Construct a string with UTF-16 data.
93    String(const UChar* characters, unsigned length);
94
95    // Construct a string by copying the contents of a vector.  To avoid
96    // copying, consider using String::adopt instead.
97    template<size_t inlineCapacity>
98    explicit String(const Vector<UChar, inlineCapacity>&);
99
100    // Construct a string with UTF-16 data, from a null-terminated source.
101    String(const UChar*);
102
103    // Construct a string with latin1 data.
104    String(const char* characters, unsigned length);
105
106    // Construct a string with latin1 data, from a null-terminated source.
107    String(const char* characters);
108
109    // Construct a string referencing an existing StringImpl.
110    String(StringImpl* impl) : m_impl(impl) { }
111    String(PassRefPtr<StringImpl> impl) : m_impl(impl) { }
112    String(RefPtr<StringImpl> impl) : m_impl(impl) { }
113
114    // Inline the destructor.
115    ALWAYS_INLINE ~String() { }
116
117    void swap(String& o) { m_impl.swap(o.m_impl); }
118
119    static String adopt(StringBuffer& buffer) { return StringImpl::adopt(buffer); }
120    template<size_t inlineCapacity>
121    static String adopt(Vector<UChar, inlineCapacity>& vector) { return StringImpl::adopt(vector); }
122
123    bool isNull() const { return !m_impl; }
124    bool isEmpty() const { return !m_impl || !m_impl->length(); }
125
126    StringImpl* impl() const { return m_impl.get(); }
127
128    unsigned length() const
129    {
130        if (!m_impl)
131            return 0;
132        return m_impl->length();
133    }
134
135    const UChar* characters() const
136    {
137        if (!m_impl)
138            return 0;
139        return m_impl->characters();
140    }
141
142    CString ascii() const;
143    CString latin1() const;
144    CString utf8(bool strict = false) const;
145
146    UChar operator[](unsigned index) const
147    {
148        if (!m_impl || index >= m_impl->length())
149            return 0;
150        return m_impl->characters()[index];
151    }
152
153    static String number(short);
154    static String number(unsigned short);
155    static String number(int);
156    static String number(unsigned);
157    static String number(long);
158    static String number(unsigned long);
159    static String number(long long);
160    static String number(unsigned long long);
161    static String number(double);
162
163    // Find a single character or string, also with match function & latin1 forms.
164    size_t find(UChar c, unsigned start = 0) const
165        { return m_impl ? m_impl->find(c, start) : notFound; }
166    size_t find(const String& str, unsigned start = 0) const
167        { return m_impl ? m_impl->find(str.impl(), start) : notFound; }
168    size_t find(CharacterMatchFunctionPtr matchFunction, unsigned start = 0) const
169        { return m_impl ? m_impl->find(matchFunction, start) : notFound; }
170    size_t find(const char* str, unsigned start = 0) const
171        { return m_impl ? m_impl->find(str, start) : notFound; }
172
173    // Find the last instance of a single character or string.
174    size_t reverseFind(UChar c, unsigned start = UINT_MAX) const
175        { return m_impl ? m_impl->reverseFind(c, start) : notFound; }
176    size_t reverseFind(const String& str, unsigned start = UINT_MAX) const
177        { return m_impl ? m_impl->reverseFind(str.impl(), start) : notFound; }
178
179    // Case insensitive string matching.
180    size_t findIgnoringCase(const char* str, unsigned start = 0) const
181        { return m_impl ? m_impl->findIgnoringCase(str, start) : notFound; }
182    size_t findIgnoringCase(const String& str, unsigned start = 0) const
183        { return m_impl ? m_impl->findIgnoringCase(str.impl(), start) : notFound; }
184    size_t reverseFindIgnoringCase(const String& str, unsigned start = UINT_MAX) const
185        { return m_impl ? m_impl->reverseFindIgnoringCase(str.impl(), start) : notFound; }
186
187    // Wrappers for find & reverseFind adding dynamic sensitivity check.
188    size_t find(const char* str, unsigned start, bool caseSensitive) const
189        { return caseSensitive ? find(str, start) : findIgnoringCase(str, start); }
190    size_t find(const String& str, unsigned start, bool caseSensitive) const
191        { return caseSensitive ? find(str, start) : findIgnoringCase(str, start); }
192    size_t reverseFind(const String& str, unsigned start, bool caseSensitive) const
193        { return caseSensitive ? reverseFind(str, start) : reverseFindIgnoringCase(str, start); }
194
195    const UChar* charactersWithNullTermination();
196
197    UChar32 characterStartingAt(unsigned) const; // Ditto.
198
199    bool contains(UChar c) const { return find(c) != notFound; }
200    bool contains(const char* str, bool caseSensitive = true) const { return find(str, 0, caseSensitive) != notFound; }
201    bool contains(const String& str, bool caseSensitive = true) const { return find(str, 0, caseSensitive) != notFound; }
202
203    bool startsWith(const String& s, bool caseSensitive = true) const
204        { return m_impl ? m_impl->startsWith(s.impl(), caseSensitive) : s.isEmpty(); }
205    bool endsWith(const String& s, bool caseSensitive = true) const
206        { return m_impl ? m_impl->endsWith(s.impl(), caseSensitive) : s.isEmpty(); }
207
208    void append(const String&);
209    void append(char);
210    void append(UChar);
211    void append(const UChar*, unsigned length);
212    void insert(const String&, unsigned pos);
213    void insert(const UChar*, unsigned length, unsigned pos);
214
215    String& replace(UChar a, UChar b) { if (m_impl) m_impl = m_impl->replace(a, b); return *this; }
216    String& replace(UChar a, const String& b) { if (m_impl) m_impl = m_impl->replace(a, b.impl()); return *this; }
217    String& replace(const String& a, const String& b) { if (m_impl) m_impl = m_impl->replace(a.impl(), b.impl()); return *this; }
218    String& replace(unsigned index, unsigned len, const String& b) { if (m_impl) m_impl = m_impl->replace(index, len, b.impl()); return *this; }
219
220    void makeLower() { if (m_impl) m_impl = m_impl->lower(); }
221    void makeUpper() { if (m_impl) m_impl = m_impl->upper(); }
222    void makeSecure(UChar aChar) { if (m_impl) m_impl = m_impl->secure(aChar); }
223
224    void truncate(unsigned len);
225    void remove(unsigned pos, int len = 1);
226
227    String substring(unsigned pos, unsigned len = UINT_MAX) const;
228    String substringSharingImpl(unsigned pos, unsigned len = UINT_MAX) const;
229    String left(unsigned len) const { return substring(0, len); }
230    String right(unsigned len) const { return substring(length() - len, len); }
231
232    // Returns a lowercase/uppercase version of the string
233    String lower() const;
234    String upper() const;
235
236    String stripWhiteSpace() const;
237    String simplifyWhiteSpace() const;
238
239    String removeCharacters(CharacterMatchFunctionPtr) const;
240    template<bool isSpecialCharacter(UChar)> bool isAllSpecialCharacters() const;
241
242    // Return the string with case folded for case insensitive comparison.
243    String foldCase() const;
244
245#if !PLATFORM(QT)
246    static String format(const char *, ...) WTF_ATTRIBUTE_PRINTF(1, 2);
247#else
248    static String format(const char *, ...);
249#endif
250
251    // Returns an uninitialized string. The characters needs to be written
252    // into the buffer returned in data before the returned string is used.
253    // Failure to do this will have unpredictable results.
254    static String createUninitialized(unsigned length, UChar*& data) { return StringImpl::createUninitialized(length, data); }
255
256    void split(const String& separator, Vector<String>& result) const;
257    void split(const String& separator, bool allowEmptyEntries, Vector<String>& result) const;
258    void split(UChar separator, Vector<String>& result) const;
259    void split(UChar separator, bool allowEmptyEntries, Vector<String>& result) const;
260
261    int toIntStrict(bool* ok = 0, int base = 10) const;
262    unsigned toUIntStrict(bool* ok = 0, int base = 10) const;
263    int64_t toInt64Strict(bool* ok = 0, int base = 10) const;
264    uint64_t toUInt64Strict(bool* ok = 0, int base = 10) const;
265    intptr_t toIntPtrStrict(bool* ok = 0, int base = 10) const;
266
267    int toInt(bool* ok = 0) const;
268    unsigned toUInt(bool* ok = 0) const;
269    int64_t toInt64(bool* ok = 0) const;
270    uint64_t toUInt64(bool* ok = 0) const;
271    intptr_t toIntPtr(bool* ok = 0) const;
272    double toDouble(bool* ok = 0, bool* didReadNumber = 0) const;
273    float toFloat(bool* ok = 0, bool* didReadNumber = 0) const;
274
275    bool percentage(int& percentage) const;
276
277    // Returns a StringImpl suitable for use on another thread.
278    String crossThreadString() const;
279    // Makes a deep copy. Helpful only if you need to use a String on another thread
280    // (use crossThreadString if the method call doesn't need to be threadsafe).
281    // Since the underlying StringImpl objects are immutable, there's no other reason
282    // to ever prefer copy() over plain old assignment.
283    String threadsafeCopy() const;
284
285    // Prevent Strings from being implicitly convertable to bool as it will be ambiguous on any platform that
286    // allows implicit conversion to another pointer type (e.g., Mac allows implicit conversion to NSString*).
287    typedef struct ImplicitConversionFromWTFStringToBoolDisallowedA* (String::*UnspecifiedBoolTypeA);
288    typedef struct ImplicitConversionFromWTFStringToBoolDisallowedB* (String::*UnspecifiedBoolTypeB);
289    operator UnspecifiedBoolTypeA() const;
290    operator UnspecifiedBoolTypeB() const;
291
292#if USE(CF)
293    String(CFStringRef);
294    CFStringRef createCFString() const;
295#endif
296
297#ifdef __OBJC__
298    String(NSString*);
299
300    // This conversion maps NULL to "", which loses the meaning of NULL, but we
301    // need this mapping because AppKit crashes when passed nil NSStrings.
302    operator NSString*() const { if (!m_impl) return @""; return *m_impl; }
303#endif
304
305#if PLATFORM(QT)
306    String(const QString&);
307    String(const QStringRef&);
308    operator QString() const;
309#endif
310
311#if PLATFORM(WX)
312    String(const wxString&);
313    operator wxString() const;
314#endif
315
316#if PLATFORM(HAIKU)
317    String(const BString&);
318    operator BString() const;
319#endif
320
321#if PLATFORM(BREWMP)
322    String(const AECHAR*);
323#endif
324
325    // String::fromUTF8 will return a null string if
326    // the input data contains invalid UTF-8 sequences.
327    static String fromUTF8(const char*, size_t);
328    static String fromUTF8(const char*);
329
330    // Tries to convert the passed in string to UTF-8, but will fall back to Latin-1 if the string is not valid UTF-8.
331    static String fromUTF8WithLatin1Fallback(const char*, size_t);
332
333    // Determines the writing direction using the Unicode Bidi Algorithm rules P2 and P3.
334    WTF::Unicode::Direction defaultWritingDirection(bool* hasStrongDirectionality = 0) const
335    {
336        if (m_impl)
337            return m_impl->defaultWritingDirection(hasStrongDirectionality);
338        if (hasStrongDirectionality)
339            *hasStrongDirectionality = false;
340        return WTF::Unicode::LeftToRight;
341    }
342
343    bool containsOnlyASCII() const { return charactersAreAllASCII(characters(), length()); }
344    bool containsOnlyLatin1() const { return charactersAreAllLatin1(characters(), length()); }
345
346    // Hash table deleted values, which are only constructed and never copied or destroyed.
347    String(WTF::HashTableDeletedValueType) : m_impl(WTF::HashTableDeletedValue) { }
348    bool isHashTableDeletedValue() const { return m_impl.isHashTableDeletedValue(); }
349
350private:
351    RefPtr<StringImpl> m_impl;
352};
353
354#if PLATFORM(QT)
355QDataStream& operator<<(QDataStream& stream, const String& str);
356QDataStream& operator>>(QDataStream& stream, String& str);
357#endif
358
359String operator+(const String&, const String&);
360String operator+(const String&, const char*);
361String operator+(const char*, const String&);
362
363inline String& operator+=(String& a, const String& b) { a.append(b); return a; }
364
365inline bool operator==(const String& a, const String& b) { return equal(a.impl(), b.impl()); }
366inline bool operator==(const String& a, const char* b) { return equal(a.impl(), b); }
367inline bool operator==(const char* a, const String& b) { return equal(a, b.impl()); }
368
369inline bool operator!=(const String& a, const String& b) { return !equal(a.impl(), b.impl()); }
370inline bool operator!=(const String& a, const char* b) { return !equal(a.impl(), b); }
371inline bool operator!=(const char* a, const String& b) { return !equal(a, b.impl()); }
372
373inline bool equalIgnoringCase(const String& a, const String& b) { return equalIgnoringCase(a.impl(), b.impl()); }
374inline bool equalIgnoringCase(const String& a, const char* b) { return equalIgnoringCase(a.impl(), b); }
375inline bool equalIgnoringCase(const char* a, const String& b) { return equalIgnoringCase(a, b.impl()); }
376
377inline bool equalPossiblyIgnoringCase(const String& a, const String& b, bool ignoreCase)
378{
379    return ignoreCase ? equalIgnoringCase(a, b) : (a == b);
380}
381
382inline bool equalIgnoringNullity(const String& a, const String& b) { return equalIgnoringNullity(a.impl(), b.impl()); }
383
384template<size_t inlineCapacity>
385inline bool equalIgnoringNullity(const Vector<UChar, inlineCapacity>& a, const String& b) { return equalIgnoringNullity(a, b.impl()); }
386
387inline bool operator!(const String& str) { return str.isNull(); }
388
389inline void swap(String& a, String& b) { a.swap(b); }
390
391// Definitions of string operations
392
393template<size_t inlineCapacity>
394String::String(const Vector<UChar, inlineCapacity>& vector)
395    : m_impl(vector.size() ? StringImpl::create(vector.data(), vector.size()) : 0)
396{
397}
398
399#ifdef __OBJC__
400// This is for situations in WebKit where the long standing behavior has been
401// "nil if empty", so we try to maintain longstanding behavior for the sake of
402// entrenched clients
403inline NSString* nsStringNilIfEmpty(const String& str) {  return str.isEmpty() ? nil : (NSString*)str; }
404#endif
405
406inline bool charactersAreAllASCII(const UChar* characters, size_t length)
407{
408    UChar ored = 0;
409    for (size_t i = 0; i < length; ++i)
410        ored |= characters[i];
411    return !(ored & 0xFF80);
412}
413
414inline bool charactersAreAllLatin1(const UChar* characters, size_t length)
415{
416    UChar ored = 0;
417    for (size_t i = 0; i < length; ++i)
418        ored |= characters[i];
419    return !(ored & 0xFF00);
420}
421
422int codePointCompare(const String&, const String&);
423
424inline size_t find(const UChar* characters, unsigned length, UChar matchCharacter, unsigned index = 0)
425{
426    while (index < length) {
427        if (characters[index] == matchCharacter)
428            return index;
429        ++index;
430    }
431    return notFound;
432}
433
434inline size_t find(const UChar* characters, unsigned length, CharacterMatchFunctionPtr matchFunction, unsigned index = 0)
435{
436    while (index < length) {
437        if (matchFunction(characters[index]))
438            return index;
439        ++index;
440    }
441    return notFound;
442}
443
444inline size_t reverseFind(const UChar* characters, unsigned length, UChar matchCharacter, unsigned index = UINT_MAX)
445{
446    if (!length)
447        return notFound;
448    if (index >= length)
449        index = length - 1;
450    while (characters[index] != matchCharacter) {
451        if (!index--)
452            return notFound;
453    }
454    return index;
455}
456
457inline void append(Vector<UChar>& vector, const String& string)
458{
459    vector.append(string.characters(), string.length());
460}
461
462inline void appendNumber(Vector<UChar>& vector, unsigned char number)
463{
464    int numberLength = number > 99 ? 3 : (number > 9 ? 2 : 1);
465    size_t vectorSize = vector.size();
466    vector.grow(vectorSize + numberLength);
467
468    switch (numberLength) {
469    case 3:
470        vector[vectorSize + 2] = number % 10 + '0';
471        number /= 10;
472
473    case 2:
474        vector[vectorSize + 1] = number % 10 + '0';
475        number /= 10;
476
477    case 1:
478        vector[vectorSize] = number % 10 + '0';
479    }
480}
481
482template<bool isSpecialCharacter(UChar)> inline bool isAllSpecialCharacters(const UChar* characters, size_t length)
483{
484    for (size_t i = 0; i < length; ++i) {
485        if (!isSpecialCharacter(characters[i]))
486            return false;
487    }
488    return true;
489}
490
491template<bool isSpecialCharacter(UChar)> inline bool String::isAllSpecialCharacters() const
492{
493    return WTF::isAllSpecialCharacters<isSpecialCharacter>(characters(), length());
494}
495
496// StringHash is the default hash for String
497template<typename T> struct DefaultHash;
498template<> struct DefaultHash<String> {
499    typedef StringHash Hash;
500};
501
502template <> struct VectorTraits<String> : SimpleClassVectorTraits { };
503
504}
505
506using WTF::CString;
507using WTF::String;
508using WTF::append;
509using WTF::appendNumber;
510using WTF::charactersAreAllASCII;
511using WTF::charactersAreAllLatin1;
512using WTF::charactersToIntStrict;
513using WTF::charactersToUIntStrict;
514using WTF::charactersToInt64Strict;
515using WTF::charactersToUInt64Strict;
516using WTF::charactersToIntPtrStrict;
517using WTF::charactersToInt;
518using WTF::charactersToUInt;
519using WTF::charactersToInt64;
520using WTF::charactersToUInt64;
521using WTF::charactersToIntPtr;
522using WTF::charactersToDouble;
523using WTF::charactersToFloat;
524using WTF::equal;
525using WTF::equalIgnoringCase;
526using WTF::find;
527using WTF::isAllSpecialCharacters;
528using WTF::isSpaceOrNewline;
529using WTF::reverseFind;
530
531#endif
532