168513a70bcd92384395513322f1b801e7bf9c729Steve Block/*
268513a70bcd92384395513322f1b801e7bf9c729Steve Block * Copyright (C) 2010 Apple Inc. All rights reserved.
368513a70bcd92384395513322f1b801e7bf9c729Steve Block *
468513a70bcd92384395513322f1b801e7bf9c729Steve Block * Redistribution and use in source and binary forms, with or without
568513a70bcd92384395513322f1b801e7bf9c729Steve Block * modification, are permitted provided that the following conditions
668513a70bcd92384395513322f1b801e7bf9c729Steve Block * are met:
768513a70bcd92384395513322f1b801e7bf9c729Steve Block * 1.  Redistributions of source code must retain the above copyright
868513a70bcd92384395513322f1b801e7bf9c729Steve Block *     notice, this list of conditions and the following disclaimer.
968513a70bcd92384395513322f1b801e7bf9c729Steve Block * 2.  Redistributions in binary form must reproduce the above copyright
1068513a70bcd92384395513322f1b801e7bf9c729Steve Block *     notice, this list of conditions and the following disclaimer in the
1168513a70bcd92384395513322f1b801e7bf9c729Steve Block *     documentation and/or other materials provided with the distribution.
1268513a70bcd92384395513322f1b801e7bf9c729Steve Block *
1368513a70bcd92384395513322f1b801e7bf9c729Steve Block * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' AND ANY
1468513a70bcd92384395513322f1b801e7bf9c729Steve Block * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
1568513a70bcd92384395513322f1b801e7bf9c729Steve Block * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
1668513a70bcd92384395513322f1b801e7bf9c729Steve Block * DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS BE LIABLE FOR ANY
1768513a70bcd92384395513322f1b801e7bf9c729Steve Block * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
1868513a70bcd92384395513322f1b801e7bf9c729Steve Block * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
1968513a70bcd92384395513322f1b801e7bf9c729Steve Block * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON
2068513a70bcd92384395513322f1b801e7bf9c729Steve Block * ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
2168513a70bcd92384395513322f1b801e7bf9c729Steve Block * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
2268513a70bcd92384395513322f1b801e7bf9c729Steve Block * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
2368513a70bcd92384395513322f1b801e7bf9c729Steve Block */
2468513a70bcd92384395513322f1b801e7bf9c729Steve Block
2568513a70bcd92384395513322f1b801e7bf9c729Steve Block#include "config.h"
2668513a70bcd92384395513322f1b801e7bf9c729Steve Block#include "HTMLParserIdioms.h"
2768513a70bcd92384395513322f1b801e7bf9c729Steve Block
282fc2651226baac27029e38c9d6ef883fa32084dbSteve Block#include <limits>
2968513a70bcd92384395513322f1b801e7bf9c729Steve Block#include <wtf/MathExtras.h>
3068513a70bcd92384395513322f1b801e7bf9c729Steve Block#include <wtf/dtoa.h>
3168513a70bcd92384395513322f1b801e7bf9c729Steve Block#include <wtf/text/AtomicString.h>
3268513a70bcd92384395513322f1b801e7bf9c729Steve Block
3368513a70bcd92384395513322f1b801e7bf9c729Steve Blocknamespace WebCore {
3468513a70bcd92384395513322f1b801e7bf9c729Steve Block
3568513a70bcd92384395513322f1b801e7bf9c729Steve BlockString stripLeadingAndTrailingHTMLSpaces(const String& string)
3668513a70bcd92384395513322f1b801e7bf9c729Steve Block{
3768513a70bcd92384395513322f1b801e7bf9c729Steve Block    const UChar* characters = string.characters();
3868513a70bcd92384395513322f1b801e7bf9c729Steve Block    unsigned length = string.length();
3968513a70bcd92384395513322f1b801e7bf9c729Steve Block
4068513a70bcd92384395513322f1b801e7bf9c729Steve Block    unsigned numLeadingSpaces;
4168513a70bcd92384395513322f1b801e7bf9c729Steve Block    for (numLeadingSpaces = 0; numLeadingSpaces < length; ++numLeadingSpaces) {
4268513a70bcd92384395513322f1b801e7bf9c729Steve Block        if (isNotHTMLSpace(characters[numLeadingSpaces]))
4368513a70bcd92384395513322f1b801e7bf9c729Steve Block            break;
4468513a70bcd92384395513322f1b801e7bf9c729Steve Block    }
4568513a70bcd92384395513322f1b801e7bf9c729Steve Block
4668513a70bcd92384395513322f1b801e7bf9c729Steve Block    if (numLeadingSpaces == length)
47a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch        return string.isNull() ? string : emptyAtom.string();
4868513a70bcd92384395513322f1b801e7bf9c729Steve Block
4968513a70bcd92384395513322f1b801e7bf9c729Steve Block    unsigned numTrailingSpaces;
5068513a70bcd92384395513322f1b801e7bf9c729Steve Block    for (numTrailingSpaces = 0; numTrailingSpaces < length; ++numTrailingSpaces) {
5168513a70bcd92384395513322f1b801e7bf9c729Steve Block        if (isNotHTMLSpace(characters[length - numTrailingSpaces - 1]))
5268513a70bcd92384395513322f1b801e7bf9c729Steve Block            break;
5368513a70bcd92384395513322f1b801e7bf9c729Steve Block    }
5468513a70bcd92384395513322f1b801e7bf9c729Steve Block
5568513a70bcd92384395513322f1b801e7bf9c729Steve Block    ASSERT(numLeadingSpaces + numTrailingSpaces < length);
5668513a70bcd92384395513322f1b801e7bf9c729Steve Block
57a94275402997c11dd2e778633dacf4b7e630a35dBen Murdoch    return string.substring(numLeadingSpaces, length - (numLeadingSpaces + numTrailingSpaces));
5868513a70bcd92384395513322f1b801e7bf9c729Steve Block}
5968513a70bcd92384395513322f1b801e7bf9c729Steve Block
6068513a70bcd92384395513322f1b801e7bf9c729Steve BlockString serializeForNumberType(double number)
6168513a70bcd92384395513322f1b801e7bf9c729Steve Block{
6268513a70bcd92384395513322f1b801e7bf9c729Steve Block    // According to HTML5, "the best representation of the number n as a floating
6368513a70bcd92384395513322f1b801e7bf9c729Steve Block    // point number" is a string produced by applying ToString() to n.
6468513a70bcd92384395513322f1b801e7bf9c729Steve Block    NumberToStringBuffer buffer;
6568513a70bcd92384395513322f1b801e7bf9c729Steve Block    unsigned length = numberToString(number, buffer);
6668513a70bcd92384395513322f1b801e7bf9c729Steve Block    return String(buffer, length);
6768513a70bcd92384395513322f1b801e7bf9c729Steve Block}
6868513a70bcd92384395513322f1b801e7bf9c729Steve Block
6968513a70bcd92384395513322f1b801e7bf9c729Steve Blockbool parseToDoubleForNumberType(const String& string, double* result)
7068513a70bcd92384395513322f1b801e7bf9c729Steve Block{
7168513a70bcd92384395513322f1b801e7bf9c729Steve Block    // See HTML5 2.4.4.3 `Real numbers.'
7268513a70bcd92384395513322f1b801e7bf9c729Steve Block
7368513a70bcd92384395513322f1b801e7bf9c729Steve Block    // String::toDouble() accepts leading + and whitespace characters, which are not valid here.
7468513a70bcd92384395513322f1b801e7bf9c729Steve Block    UChar firstCharacter = string[0];
7568513a70bcd92384395513322f1b801e7bf9c729Steve Block    if (firstCharacter != '-' && !isASCIIDigit(firstCharacter))
7668513a70bcd92384395513322f1b801e7bf9c729Steve Block        return false;
7768513a70bcd92384395513322f1b801e7bf9c729Steve Block
7868513a70bcd92384395513322f1b801e7bf9c729Steve Block    bool valid = false;
7968513a70bcd92384395513322f1b801e7bf9c729Steve Block    double value = string.toDouble(&valid);
8068513a70bcd92384395513322f1b801e7bf9c729Steve Block    if (!valid)
8168513a70bcd92384395513322f1b801e7bf9c729Steve Block        return false;
8268513a70bcd92384395513322f1b801e7bf9c729Steve Block
8368513a70bcd92384395513322f1b801e7bf9c729Steve Block    // NaN and infinity are considered valid by String::toDouble, but not valid here.
8468513a70bcd92384395513322f1b801e7bf9c729Steve Block    if (!isfinite(value))
8568513a70bcd92384395513322f1b801e7bf9c729Steve Block        return false;
8668513a70bcd92384395513322f1b801e7bf9c729Steve Block
87e14391e94c850b8bd03680c23b38978db68687a8John Reck    // Numbers are considered finite IEEE 754 single-precision floating point values.
88e14391e94c850b8bd03680c23b38978db68687a8John Reck    // See HTML5 2.4.4.3 `Real numbers.'
892fc2651226baac27029e38c9d6ef883fa32084dbSteve Block    if (-std::numeric_limits<float>::max() > value || value > std::numeric_limits<float>::max())
90e14391e94c850b8bd03680c23b38978db68687a8John Reck        return false;
91e14391e94c850b8bd03680c23b38978db68687a8John Reck
9268513a70bcd92384395513322f1b801e7bf9c729Steve Block    if (result) {
9368513a70bcd92384395513322f1b801e7bf9c729Steve Block        // The following expression converts -0 to +0.
9468513a70bcd92384395513322f1b801e7bf9c729Steve Block        *result = value ? value : 0;
9568513a70bcd92384395513322f1b801e7bf9c729Steve Block    }
9668513a70bcd92384395513322f1b801e7bf9c729Steve Block
9768513a70bcd92384395513322f1b801e7bf9c729Steve Block    return true;
9868513a70bcd92384395513322f1b801e7bf9c729Steve Block}
9968513a70bcd92384395513322f1b801e7bf9c729Steve Block
1006b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brennerbool parseToDoubleForNumberTypeWithDecimalPlaces(const String& string, double *result, unsigned *decimalPlaces)
1016b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner{
1026b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner    if (decimalPlaces)
1036b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner        *decimalPlaces = 0;
1046b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner
1056b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner    if (!parseToDoubleForNumberType(string, result))
1066b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner        return false;
1076b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner
1086b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner    if (!decimalPlaces)
1096b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner        return true;
1106b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner
1116b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner    size_t dotIndex = string.find('.');
1126b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner    size_t eIndex = string.find('e');
1136b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner    if (eIndex == notFound)
1146b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner        eIndex = string.find('E');
1156b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner
1166b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner    unsigned baseDecimalPlaces = 0;
1176b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner    if (dotIndex != notFound) {
1186b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner        if (eIndex == notFound)
1196b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner            baseDecimalPlaces = string.length() - dotIndex - 1;
1206b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner        else
1216b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner            baseDecimalPlaces = eIndex - dotIndex - 1;
1226b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner    }
1236b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner
1246b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner    int exponent = 0;
1256b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner    if (eIndex != notFound) {
1266b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner        unsigned cursor = eIndex + 1, cursorSaved;
1276b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner        int digit, exponentSign;
1286b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner        int32_t exponent32;
1296b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner        size_t length = string.length();
1306b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner
1316b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner        // Not using String.toInt() in order to perform the same computation as dtoa() does.
1326b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner        exponentSign = 0;
1336b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner        switch (digit = string[cursor]) {
1346b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner        case '-':
1356b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner            exponentSign = 1;
1366b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner        case '+':
1376b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner            digit = string[++cursor];
1386b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner        }
1396b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner        if (digit >= '0' && digit <= '9') {
1406b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner            while (cursor < length && digit == '0')
1416b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner                digit = string[++cursor];
1426b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner            if (digit > '0' && digit <= '9') {
1436b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner                exponent32 = digit - '0';
1446b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner                cursorSaved = cursor;
1456b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner                while (cursor < length && (digit = string[++cursor]) >= '0' && digit <= '9')
1466b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner                    exponent32 = (10 * exponent32) + digit - '0';
1476b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner                if (cursor - cursorSaved > 8 || exponent32 > 19999)
1486b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner                    /* Avoid confusion from exponents
1496b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner                     * so large that e might overflow.
1506b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner                     */
1516b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner                    exponent = 19999; /* safe for 16 bit ints */
1526b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner                else
1536b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner                    exponent = static_cast<int>(exponent32);
1546b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner                if (exponentSign)
1556b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner                    exponent = -exponent;
1566b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner            } else
1576b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner                exponent = 0;
1586b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner        }
1596b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner    }
1606b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner
1616b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner    int intDecimalPlaces = baseDecimalPlaces - exponent;
1626b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner    if (intDecimalPlaces < 0)
1636b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner        *decimalPlaces = 0;
1646b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner    else if (intDecimalPlaces > 19999)
1656b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner        *decimalPlaces = 19999;
1666b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner    else
1676b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner        *decimalPlaces = static_cast<unsigned>(intDecimalPlaces);
1686b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner
1696b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner    return true;
1706b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner}
1716b70adc33054f8aee8c54d0f460458a9df11b8a5Russell Brenner
17268513a70bcd92384395513322f1b801e7bf9c729Steve Block// http://www.whatwg.org/specs/web-apps/current-work/#rules-for-parsing-integers
17368513a70bcd92384395513322f1b801e7bf9c729Steve Blockbool parseHTMLInteger(const String& input, int& value)
17468513a70bcd92384395513322f1b801e7bf9c729Steve Block{
17568513a70bcd92384395513322f1b801e7bf9c729Steve Block    // Step 1
17668513a70bcd92384395513322f1b801e7bf9c729Steve Block    // Step 2
17768513a70bcd92384395513322f1b801e7bf9c729Steve Block    const UChar* position = input.characters();
17868513a70bcd92384395513322f1b801e7bf9c729Steve Block    const UChar* end = position + input.length();
17968513a70bcd92384395513322f1b801e7bf9c729Steve Block
18068513a70bcd92384395513322f1b801e7bf9c729Steve Block    // Step 3
18168513a70bcd92384395513322f1b801e7bf9c729Steve Block    int sign = 1;
18268513a70bcd92384395513322f1b801e7bf9c729Steve Block
18368513a70bcd92384395513322f1b801e7bf9c729Steve Block    // Step 4
18468513a70bcd92384395513322f1b801e7bf9c729Steve Block    while (position < end) {
18568513a70bcd92384395513322f1b801e7bf9c729Steve Block        if (!isHTMLSpace(*position))
18668513a70bcd92384395513322f1b801e7bf9c729Steve Block            break;
18768513a70bcd92384395513322f1b801e7bf9c729Steve Block        ++position;
18868513a70bcd92384395513322f1b801e7bf9c729Steve Block    }
18968513a70bcd92384395513322f1b801e7bf9c729Steve Block
19068513a70bcd92384395513322f1b801e7bf9c729Steve Block    // Step 5
19168513a70bcd92384395513322f1b801e7bf9c729Steve Block    if (position == end)
19268513a70bcd92384395513322f1b801e7bf9c729Steve Block        return false;
19368513a70bcd92384395513322f1b801e7bf9c729Steve Block    ASSERT(position < end);
19468513a70bcd92384395513322f1b801e7bf9c729Steve Block
19568513a70bcd92384395513322f1b801e7bf9c729Steve Block    // Step 6
19668513a70bcd92384395513322f1b801e7bf9c729Steve Block    if (*position == '-') {
19768513a70bcd92384395513322f1b801e7bf9c729Steve Block        sign = -1;
19868513a70bcd92384395513322f1b801e7bf9c729Steve Block        ++position;
19968513a70bcd92384395513322f1b801e7bf9c729Steve Block    } else if (*position == '+')
20068513a70bcd92384395513322f1b801e7bf9c729Steve Block        ++position;
20168513a70bcd92384395513322f1b801e7bf9c729Steve Block    if (position == end)
20268513a70bcd92384395513322f1b801e7bf9c729Steve Block        return false;
20368513a70bcd92384395513322f1b801e7bf9c729Steve Block    ASSERT(position < end);
20468513a70bcd92384395513322f1b801e7bf9c729Steve Block
20568513a70bcd92384395513322f1b801e7bf9c729Steve Block    // Step 7
20668513a70bcd92384395513322f1b801e7bf9c729Steve Block    if (!isASCIIDigit(*position))
20768513a70bcd92384395513322f1b801e7bf9c729Steve Block        return false;
20868513a70bcd92384395513322f1b801e7bf9c729Steve Block
20968513a70bcd92384395513322f1b801e7bf9c729Steve Block    // Step 8
21068513a70bcd92384395513322f1b801e7bf9c729Steve Block    Vector<UChar, 16> digits;
21168513a70bcd92384395513322f1b801e7bf9c729Steve Block    while (position < end) {
21268513a70bcd92384395513322f1b801e7bf9c729Steve Block        if (!isASCIIDigit(*position))
21368513a70bcd92384395513322f1b801e7bf9c729Steve Block            break;
21468513a70bcd92384395513322f1b801e7bf9c729Steve Block        digits.append(*position++);
21568513a70bcd92384395513322f1b801e7bf9c729Steve Block    }
21668513a70bcd92384395513322f1b801e7bf9c729Steve Block
21768513a70bcd92384395513322f1b801e7bf9c729Steve Block    // Step 9
21868513a70bcd92384395513322f1b801e7bf9c729Steve Block    value = sign * charactersToIntStrict(digits.data(), digits.size());
21968513a70bcd92384395513322f1b801e7bf9c729Steve Block    return true;
22068513a70bcd92384395513322f1b801e7bf9c729Steve Block}
22168513a70bcd92384395513322f1b801e7bf9c729Steve Block
22268513a70bcd92384395513322f1b801e7bf9c729Steve Block}
223