conversions-inl.h revision 85b71799222b55eb5dd74ea26efe0c64ab655c8c
1// Copyright 2011 the V8 project authors. All rights reserved.
2// Redistribution and use in source and binary forms, with or without
3// modification, are permitted provided that the following conditions are
4// met:
5//
6//     * Redistributions of source code must retain the above copyright
7//       notice, this list of conditions and the following disclaimer.
8//     * Redistributions in binary form must reproduce the above
9//       copyright notice, this list of conditions and the following
10//       disclaimer in the documentation and/or other materials provided
11//       with the distribution.
12//     * Neither the name of Google Inc. nor the names of its
13//       contributors may be used to endorse or promote products derived
14//       from this software without specific prior written permission.
15//
16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27
28#ifndef V8_CONVERSIONS_INL_H_
29#define V8_CONVERSIONS_INL_H_
30
31#include <limits.h>        // Required for INT_MAX etc.
32#include <math.h>
33#include <float.h>         // Required for DBL_MAX and on Win32 for finite()
34#include <stdarg.h>
35#include "globals.h"       // Required for V8_INFINITY
36
37// ----------------------------------------------------------------------------
38// Extra POSIX/ANSI functions for Win32/MSVC.
39
40#include "conversions.h"
41#include "double.h"
42#include "platform.h"
43#include "scanner.h"
44#include "strtod.h"
45
46namespace v8 {
47namespace internal {
48
49static inline double JunkStringValue() {
50  return std::numeric_limits<double>::quiet_NaN();
51}
52
53
54// The fast double-to-unsigned-int conversion routine does not guarantee
55// rounding towards zero, or any reasonable value if the argument is larger
56// than what fits in an unsigned 32-bit integer.
57static inline unsigned int FastD2UI(double x) {
58  // There is no unsigned version of lrint, so there is no fast path
59  // in this function as there is in FastD2I. Using lrint doesn't work
60  // for values of 2^31 and above.
61
62  // Convert "small enough" doubles to uint32_t by fixing the 32
63  // least significant non-fractional bits in the low 32 bits of the
64  // double, and reading them from there.
65  const double k2Pow52 = 4503599627370496.0;
66  bool negative = x < 0;
67  if (negative) {
68    x = -x;
69  }
70  if (x < k2Pow52) {
71    x += k2Pow52;
72    uint32_t result;
73    Address mantissa_ptr = reinterpret_cast<Address>(&x);
74    // Copy least significant 32 bits of mantissa.
75    memcpy(&result, mantissa_ptr, sizeof(result));
76    return negative ? ~result + 1 : result;
77  }
78  // Large number (outside uint32 range), Infinity or NaN.
79  return 0x80000000u;  // Return integer indefinite.
80}
81
82
83static inline double DoubleToInteger(double x) {
84  if (isnan(x)) return 0;
85  if (!isfinite(x) || x == 0) return x;
86  return (x >= 0) ? floor(x) : ceil(x);
87}
88
89
90int32_t DoubleToInt32(double x) {
91  int32_t i = FastD2I(x);
92  if (FastI2D(i) == x) return i;
93  Double d(x);
94  int exponent = d.Exponent();
95  if (exponent < 0) {
96    if (exponent <= -Double::kSignificandSize) return 0;
97    return d.Sign() * static_cast<int32_t>(d.Significand() >> -exponent);
98  } else {
99    if (exponent > 31) return 0;
100    return d.Sign() * static_cast<int32_t>(d.Significand() << exponent);
101  }
102}
103
104
105template <class Iterator, class EndMark>
106static bool SubStringEquals(Iterator* current,
107                            EndMark end,
108                            const char* substring) {
109  ASSERT(**current == *substring);
110  for (substring++; *substring != '\0'; substring++) {
111    ++*current;
112    if (*current == end || **current != *substring) return false;
113  }
114  ++*current;
115  return true;
116}
117
118
119// Returns true if a nonspace character has been found and false if the
120// end was been reached before finding a nonspace character.
121template <class Iterator, class EndMark>
122static inline bool AdvanceToNonspace(UnicodeCache* unicode_cache,
123                                     Iterator* current,
124                                     EndMark end) {
125  while (*current != end) {
126    if (!unicode_cache->IsWhiteSpace(**current)) return true;
127    ++*current;
128  }
129  return false;
130}
131
132
133// Parsing integers with radix 2, 4, 8, 16, 32. Assumes current != end.
134template <int radix_log_2, class Iterator, class EndMark>
135static double InternalStringToIntDouble(UnicodeCache* unicode_cache,
136                                        Iterator current,
137                                        EndMark end,
138                                        bool negative,
139                                        bool allow_trailing_junk) {
140  ASSERT(current != end);
141
142  // Skip leading 0s.
143  while (*current == '0') {
144    ++current;
145    if (current == end) return SignedZero(negative);
146  }
147
148  int64_t number = 0;
149  int exponent = 0;
150  const int radix = (1 << radix_log_2);
151
152  do {
153    int digit;
154    if (*current >= '0' && *current <= '9' && *current < '0' + radix) {
155      digit = static_cast<char>(*current) - '0';
156    } else if (radix > 10 && *current >= 'a' && *current < 'a' + radix - 10) {
157      digit = static_cast<char>(*current) - 'a' + 10;
158    } else if (radix > 10 && *current >= 'A' && *current < 'A' + radix - 10) {
159      digit = static_cast<char>(*current) - 'A' + 10;
160    } else {
161      if (allow_trailing_junk ||
162          !AdvanceToNonspace(unicode_cache, &current, end)) {
163        break;
164      } else {
165        return JunkStringValue();
166      }
167    }
168
169    number = number * radix + digit;
170    int overflow = static_cast<int>(number >> 53);
171    if (overflow != 0) {
172      // Overflow occurred. Need to determine which direction to round the
173      // result.
174      int overflow_bits_count = 1;
175      while (overflow > 1) {
176        overflow_bits_count++;
177        overflow >>= 1;
178      }
179
180      int dropped_bits_mask = ((1 << overflow_bits_count) - 1);
181      int dropped_bits = static_cast<int>(number) & dropped_bits_mask;
182      number >>= overflow_bits_count;
183      exponent = overflow_bits_count;
184
185      bool zero_tail = true;
186      while (true) {
187        ++current;
188        if (current == end || !isDigit(*current, radix)) break;
189        zero_tail = zero_tail && *current == '0';
190        exponent += radix_log_2;
191      }
192
193      if (!allow_trailing_junk &&
194          AdvanceToNonspace(unicode_cache, &current, end)) {
195        return JunkStringValue();
196      }
197
198      int middle_value = (1 << (overflow_bits_count - 1));
199      if (dropped_bits > middle_value) {
200        number++;  // Rounding up.
201      } else if (dropped_bits == middle_value) {
202        // Rounding to even to consistency with decimals: half-way case rounds
203        // up if significant part is odd and down otherwise.
204        if ((number & 1) != 0 || !zero_tail) {
205          number++;  // Rounding up.
206        }
207      }
208
209      // Rounding up may cause overflow.
210      if ((number & ((int64_t)1 << 53)) != 0) {
211        exponent++;
212        number >>= 1;
213      }
214      break;
215    }
216    ++current;
217  } while (current != end);
218
219  ASSERT(number < ((int64_t)1 << 53));
220  ASSERT(static_cast<int64_t>(static_cast<double>(number)) == number);
221
222  if (exponent == 0) {
223    if (negative) {
224      if (number == 0) return -0.0;
225      number = -number;
226    }
227    return static_cast<double>(number);
228  }
229
230  ASSERT(number != 0);
231  // The double could be constructed faster from number (mantissa), exponent
232  // and sign. Assuming it's a rare case more simple code is used.
233  return static_cast<double>(negative ? -number : number) * pow(2.0, exponent);
234}
235
236
237template <class Iterator, class EndMark>
238static double InternalStringToInt(UnicodeCache* unicode_cache,
239                                  Iterator current,
240                                  EndMark end,
241                                  int radix) {
242  const bool allow_trailing_junk = true;
243  const double empty_string_val = JunkStringValue();
244
245  if (!AdvanceToNonspace(unicode_cache, &current, end)) {
246    return empty_string_val;
247  }
248
249  bool negative = false;
250  bool leading_zero = false;
251
252  if (*current == '+') {
253    // Ignore leading sign; skip following spaces.
254    ++current;
255    if (current == end) {
256      return JunkStringValue();
257    }
258  } else if (*current == '-') {
259    ++current;
260    if (current == end) {
261      return JunkStringValue();
262    }
263    negative = true;
264  }
265
266  if (radix == 0) {
267    // Radix detection.
268    if (*current == '0') {
269      ++current;
270      if (current == end) return SignedZero(negative);
271      if (*current == 'x' || *current == 'X') {
272        radix = 16;
273        ++current;
274        if (current == end) return JunkStringValue();
275      } else {
276        radix = 8;
277        leading_zero = true;
278      }
279    } else {
280      radix = 10;
281    }
282  } else if (radix == 16) {
283    if (*current == '0') {
284      // Allow "0x" prefix.
285      ++current;
286      if (current == end) return SignedZero(negative);
287      if (*current == 'x' || *current == 'X') {
288        ++current;
289        if (current == end) return JunkStringValue();
290      } else {
291        leading_zero = true;
292      }
293    }
294  }
295
296  if (radix < 2 || radix > 36) return JunkStringValue();
297
298  // Skip leading zeros.
299  while (*current == '0') {
300    leading_zero = true;
301    ++current;
302    if (current == end) return SignedZero(negative);
303  }
304
305  if (!leading_zero && !isDigit(*current, radix)) {
306    return JunkStringValue();
307  }
308
309  if (IsPowerOf2(radix)) {
310    switch (radix) {
311      case 2:
312        return InternalStringToIntDouble<1>(
313            unicode_cache, current, end, negative, allow_trailing_junk);
314      case 4:
315        return InternalStringToIntDouble<2>(
316            unicode_cache, current, end, negative, allow_trailing_junk);
317      case 8:
318        return InternalStringToIntDouble<3>(
319            unicode_cache, current, end, negative, allow_trailing_junk);
320
321      case 16:
322        return InternalStringToIntDouble<4>(
323            unicode_cache, current, end, negative, allow_trailing_junk);
324
325      case 32:
326        return InternalStringToIntDouble<5>(
327            unicode_cache, current, end, negative, allow_trailing_junk);
328      default:
329        UNREACHABLE();
330    }
331  }
332
333  if (radix == 10) {
334    // Parsing with strtod.
335    const int kMaxSignificantDigits = 309;  // Doubles are less than 1.8e308.
336    // The buffer may contain up to kMaxSignificantDigits + 1 digits and a zero
337    // end.
338    const int kBufferSize = kMaxSignificantDigits + 2;
339    char buffer[kBufferSize];
340    int buffer_pos = 0;
341    while (*current >= '0' && *current <= '9') {
342      if (buffer_pos <= kMaxSignificantDigits) {
343        // If the number has more than kMaxSignificantDigits it will be parsed
344        // as infinity.
345        ASSERT(buffer_pos < kBufferSize);
346        buffer[buffer_pos++] = static_cast<char>(*current);
347      }
348      ++current;
349      if (current == end) break;
350    }
351
352    if (!allow_trailing_junk &&
353        AdvanceToNonspace(unicode_cache, &current, end)) {
354      return JunkStringValue();
355    }
356
357    ASSERT(buffer_pos < kBufferSize);
358    buffer[buffer_pos] = '\0';
359    Vector<const char> buffer_vector(buffer, buffer_pos);
360    return negative ? -Strtod(buffer_vector, 0) : Strtod(buffer_vector, 0);
361  }
362
363  // The following code causes accumulating rounding error for numbers greater
364  // than ~2^56. It's explicitly allowed in the spec: "if R is not 2, 4, 8, 10,
365  // 16, or 32, then mathInt may be an implementation-dependent approximation to
366  // the mathematical integer value" (15.1.2.2).
367
368  int lim_0 = '0' + (radix < 10 ? radix : 10);
369  int lim_a = 'a' + (radix - 10);
370  int lim_A = 'A' + (radix - 10);
371
372  // NOTE: The code for computing the value may seem a bit complex at
373  // first glance. It is structured to use 32-bit multiply-and-add
374  // loops as long as possible to avoid loosing precision.
375
376  double v = 0.0;
377  bool done = false;
378  do {
379    // Parse the longest part of the string starting at index j
380    // possible while keeping the multiplier, and thus the part
381    // itself, within 32 bits.
382    unsigned int part = 0, multiplier = 1;
383    while (true) {
384      int d;
385      if (*current >= '0' && *current < lim_0) {
386        d = *current - '0';
387      } else if (*current >= 'a' && *current < lim_a) {
388        d = *current - 'a' + 10;
389      } else if (*current >= 'A' && *current < lim_A) {
390        d = *current - 'A' + 10;
391      } else {
392        done = true;
393        break;
394      }
395
396      // Update the value of the part as long as the multiplier fits
397      // in 32 bits. When we can't guarantee that the next iteration
398      // will not overflow the multiplier, we stop parsing the part
399      // by leaving the loop.
400      const unsigned int kMaximumMultiplier = 0xffffffffU / 36;
401      uint32_t m = multiplier * radix;
402      if (m > kMaximumMultiplier) break;
403      part = part * radix + d;
404      multiplier = m;
405      ASSERT(multiplier > part);
406
407      ++current;
408      if (current == end) {
409        done = true;
410        break;
411      }
412    }
413
414    // Update the value and skip the part in the string.
415    v = v * multiplier + part;
416  } while (!done);
417
418  if (!allow_trailing_junk &&
419      AdvanceToNonspace(unicode_cache, &current, end)) {
420    return JunkStringValue();
421  }
422
423  return negative ? -v : v;
424}
425
426
427// Converts a string to a double value. Assumes the Iterator supports
428// the following operations:
429// 1. current == end (other ops are not allowed), current != end.
430// 2. *current - gets the current character in the sequence.
431// 3. ++current (advances the position).
432template <class Iterator, class EndMark>
433static double InternalStringToDouble(UnicodeCache* unicode_cache,
434                                     Iterator current,
435                                     EndMark end,
436                                     int flags,
437                                     double empty_string_val) {
438  // To make sure that iterator dereferencing is valid the following
439  // convention is used:
440  // 1. Each '++current' statement is followed by check for equality to 'end'.
441  // 2. If AdvanceToNonspace returned false then current == end.
442  // 3. If 'current' becomes be equal to 'end' the function returns or goes to
443  // 'parsing_done'.
444  // 4. 'current' is not dereferenced after the 'parsing_done' label.
445  // 5. Code before 'parsing_done' may rely on 'current != end'.
446  if (!AdvanceToNonspace(unicode_cache, &current, end)) {
447    return empty_string_val;
448  }
449
450  const bool allow_trailing_junk = (flags & ALLOW_TRAILING_JUNK) != 0;
451
452  // The longest form of simplified number is: "-<significant digits>'.1eXXX\0".
453  const int kBufferSize = kMaxSignificantDigits + 10;
454  char buffer[kBufferSize];  // NOLINT: size is known at compile time.
455  int buffer_pos = 0;
456
457  // Exponent will be adjusted if insignificant digits of the integer part
458  // or insignificant leading zeros of the fractional part are dropped.
459  int exponent = 0;
460  int significant_digits = 0;
461  int insignificant_digits = 0;
462  bool nonzero_digit_dropped = false;
463
464  bool negative = false;
465
466  if (*current == '+') {
467    // Ignore leading sign.
468    ++current;
469    if (current == end) return JunkStringValue();
470  } else if (*current == '-') {
471    ++current;
472    if (current == end) return JunkStringValue();
473    negative = true;
474  }
475
476  static const char kInfinitySymbol[] = "Infinity";
477  if (*current == kInfinitySymbol[0]) {
478    if (!SubStringEquals(&current, end, kInfinitySymbol)) {
479      return JunkStringValue();
480    }
481
482    if (!allow_trailing_junk &&
483        AdvanceToNonspace(unicode_cache, &current, end)) {
484      return JunkStringValue();
485    }
486
487    ASSERT(buffer_pos == 0);
488    return negative ? -V8_INFINITY : V8_INFINITY;
489  }
490
491  bool leading_zero = false;
492  if (*current == '0') {
493    ++current;
494    if (current == end) return SignedZero(negative);
495
496    leading_zero = true;
497
498    // It could be hexadecimal value.
499    if ((flags & ALLOW_HEX) && (*current == 'x' || *current == 'X')) {
500      ++current;
501      if (current == end || !isDigit(*current, 16)) {
502        return JunkStringValue();  // "0x".
503      }
504
505      return InternalStringToIntDouble<4>(unicode_cache,
506                                          current,
507                                          end,
508                                          negative,
509                                          allow_trailing_junk);
510    }
511
512    // Ignore leading zeros in the integer part.
513    while (*current == '0') {
514      ++current;
515      if (current == end) return SignedZero(negative);
516    }
517  }
518
519  bool octal = leading_zero && (flags & ALLOW_OCTALS) != 0;
520
521  // Copy significant digits of the integer part (if any) to the buffer.
522  while (*current >= '0' && *current <= '9') {
523    if (significant_digits < kMaxSignificantDigits) {
524      ASSERT(buffer_pos < kBufferSize);
525      buffer[buffer_pos++] = static_cast<char>(*current);
526      significant_digits++;
527      // Will later check if it's an octal in the buffer.
528    } else {
529      insignificant_digits++;  // Move the digit into the exponential part.
530      nonzero_digit_dropped = nonzero_digit_dropped || *current != '0';
531    }
532    octal = octal && *current < '8';
533    ++current;
534    if (current == end) goto parsing_done;
535  }
536
537  if (significant_digits == 0) {
538    octal = false;
539  }
540
541  if (*current == '.') {
542    if (octal && !allow_trailing_junk) return JunkStringValue();
543    if (octal) goto parsing_done;
544
545    ++current;
546    if (current == end) {
547      if (significant_digits == 0 && !leading_zero) {
548        return JunkStringValue();
549      } else {
550        goto parsing_done;
551      }
552    }
553
554    if (significant_digits == 0) {
555      // octal = false;
556      // Integer part consists of 0 or is absent. Significant digits start after
557      // leading zeros (if any).
558      while (*current == '0') {
559        ++current;
560        if (current == end) return SignedZero(negative);
561        exponent--;  // Move this 0 into the exponent.
562      }
563    }
564
565    // There is a fractional part.  We don't emit a '.', but adjust the exponent
566    // instead.
567    while (*current >= '0' && *current <= '9') {
568      if (significant_digits < kMaxSignificantDigits) {
569        ASSERT(buffer_pos < kBufferSize);
570        buffer[buffer_pos++] = static_cast<char>(*current);
571        significant_digits++;
572        exponent--;
573      } else {
574        // Ignore insignificant digits in the fractional part.
575        nonzero_digit_dropped = nonzero_digit_dropped || *current != '0';
576      }
577      ++current;
578      if (current == end) goto parsing_done;
579    }
580  }
581
582  if (!leading_zero && exponent == 0 && significant_digits == 0) {
583    // If leading_zeros is true then the string contains zeros.
584    // If exponent < 0 then string was [+-]\.0*...
585    // If significant_digits != 0 the string is not equal to 0.
586    // Otherwise there are no digits in the string.
587    return JunkStringValue();
588  }
589
590  // Parse exponential part.
591  if (*current == 'e' || *current == 'E') {
592    if (octal) return JunkStringValue();
593    ++current;
594    if (current == end) {
595      if (allow_trailing_junk) {
596        goto parsing_done;
597      } else {
598        return JunkStringValue();
599      }
600    }
601    char sign = '+';
602    if (*current == '+' || *current == '-') {
603      sign = static_cast<char>(*current);
604      ++current;
605      if (current == end) {
606        if (allow_trailing_junk) {
607          goto parsing_done;
608        } else {
609          return JunkStringValue();
610        }
611      }
612    }
613
614    if (current == end || *current < '0' || *current > '9') {
615      if (allow_trailing_junk) {
616        goto parsing_done;
617      } else {
618        return JunkStringValue();
619      }
620    }
621
622    const int max_exponent = INT_MAX / 2;
623    ASSERT(-max_exponent / 2 <= exponent && exponent <= max_exponent / 2);
624    int num = 0;
625    do {
626      // Check overflow.
627      int digit = *current - '0';
628      if (num >= max_exponent / 10
629          && !(num == max_exponent / 10 && digit <= max_exponent % 10)) {
630        num = max_exponent;
631      } else {
632        num = num * 10 + digit;
633      }
634      ++current;
635    } while (current != end && *current >= '0' && *current <= '9');
636
637    exponent += (sign == '-' ? -num : num);
638  }
639
640  if (!allow_trailing_junk &&
641      AdvanceToNonspace(unicode_cache, &current, end)) {
642    return JunkStringValue();
643  }
644
645  parsing_done:
646  exponent += insignificant_digits;
647
648  if (octal) {
649    return InternalStringToIntDouble<3>(unicode_cache,
650                                        buffer,
651                                        buffer + buffer_pos,
652                                        negative,
653                                        allow_trailing_junk);
654  }
655
656  if (nonzero_digit_dropped) {
657    buffer[buffer_pos++] = '1';
658    exponent--;
659  }
660
661  ASSERT(buffer_pos < kBufferSize);
662  buffer[buffer_pos] = '\0';
663
664  double converted = Strtod(Vector<const char>(buffer, buffer_pos), exponent);
665  return negative ? -converted : converted;
666}
667
668} }  // namespace v8::internal
669
670#endif  // V8_CONVERSIONS_INL_H_
671