conversions-inl.h revision 85b71799222b55eb5dd74ea26efe0c64ab655c8c
1// Copyright 2011 the V8 project authors. All rights reserved. 2// Redistribution and use in source and binary forms, with or without 3// modification, are permitted provided that the following conditions are 4// met: 5// 6// * Redistributions of source code must retain the above copyright 7// notice, this list of conditions and the following disclaimer. 8// * Redistributions in binary form must reproduce the above 9// copyright notice, this list of conditions and the following 10// disclaimer in the documentation and/or other materials provided 11// with the distribution. 12// * Neither the name of Google Inc. nor the names of its 13// contributors may be used to endorse or promote products derived 14// from this software without specific prior written permission. 15// 16// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 17// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 18// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 19// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 20// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 21// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 22// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 26// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 28#ifndef V8_CONVERSIONS_INL_H_ 29#define V8_CONVERSIONS_INL_H_ 30 31#include <limits.h> // Required for INT_MAX etc. 32#include <math.h> 33#include <float.h> // Required for DBL_MAX and on Win32 for finite() 34#include <stdarg.h> 35#include "globals.h" // Required for V8_INFINITY 36 37// ---------------------------------------------------------------------------- 38// Extra POSIX/ANSI functions for Win32/MSVC. 39 40#include "conversions.h" 41#include "double.h" 42#include "platform.h" 43#include "scanner.h" 44#include "strtod.h" 45 46namespace v8 { 47namespace internal { 48 49static inline double JunkStringValue() { 50 return std::numeric_limits<double>::quiet_NaN(); 51} 52 53 54// The fast double-to-unsigned-int conversion routine does not guarantee 55// rounding towards zero, or any reasonable value if the argument is larger 56// than what fits in an unsigned 32-bit integer. 57static inline unsigned int FastD2UI(double x) { 58 // There is no unsigned version of lrint, so there is no fast path 59 // in this function as there is in FastD2I. Using lrint doesn't work 60 // for values of 2^31 and above. 61 62 // Convert "small enough" doubles to uint32_t by fixing the 32 63 // least significant non-fractional bits in the low 32 bits of the 64 // double, and reading them from there. 65 const double k2Pow52 = 4503599627370496.0; 66 bool negative = x < 0; 67 if (negative) { 68 x = -x; 69 } 70 if (x < k2Pow52) { 71 x += k2Pow52; 72 uint32_t result; 73 Address mantissa_ptr = reinterpret_cast<Address>(&x); 74 // Copy least significant 32 bits of mantissa. 75 memcpy(&result, mantissa_ptr, sizeof(result)); 76 return negative ? ~result + 1 : result; 77 } 78 // Large number (outside uint32 range), Infinity or NaN. 79 return 0x80000000u; // Return integer indefinite. 80} 81 82 83static inline double DoubleToInteger(double x) { 84 if (isnan(x)) return 0; 85 if (!isfinite(x) || x == 0) return x; 86 return (x >= 0) ? floor(x) : ceil(x); 87} 88 89 90int32_t DoubleToInt32(double x) { 91 int32_t i = FastD2I(x); 92 if (FastI2D(i) == x) return i; 93 Double d(x); 94 int exponent = d.Exponent(); 95 if (exponent < 0) { 96 if (exponent <= -Double::kSignificandSize) return 0; 97 return d.Sign() * static_cast<int32_t>(d.Significand() >> -exponent); 98 } else { 99 if (exponent > 31) return 0; 100 return d.Sign() * static_cast<int32_t>(d.Significand() << exponent); 101 } 102} 103 104 105template <class Iterator, class EndMark> 106static bool SubStringEquals(Iterator* current, 107 EndMark end, 108 const char* substring) { 109 ASSERT(**current == *substring); 110 for (substring++; *substring != '\0'; substring++) { 111 ++*current; 112 if (*current == end || **current != *substring) return false; 113 } 114 ++*current; 115 return true; 116} 117 118 119// Returns true if a nonspace character has been found and false if the 120// end was been reached before finding a nonspace character. 121template <class Iterator, class EndMark> 122static inline bool AdvanceToNonspace(UnicodeCache* unicode_cache, 123 Iterator* current, 124 EndMark end) { 125 while (*current != end) { 126 if (!unicode_cache->IsWhiteSpace(**current)) return true; 127 ++*current; 128 } 129 return false; 130} 131 132 133// Parsing integers with radix 2, 4, 8, 16, 32. Assumes current != end. 134template <int radix_log_2, class Iterator, class EndMark> 135static double InternalStringToIntDouble(UnicodeCache* unicode_cache, 136 Iterator current, 137 EndMark end, 138 bool negative, 139 bool allow_trailing_junk) { 140 ASSERT(current != end); 141 142 // Skip leading 0s. 143 while (*current == '0') { 144 ++current; 145 if (current == end) return SignedZero(negative); 146 } 147 148 int64_t number = 0; 149 int exponent = 0; 150 const int radix = (1 << radix_log_2); 151 152 do { 153 int digit; 154 if (*current >= '0' && *current <= '9' && *current < '0' + radix) { 155 digit = static_cast<char>(*current) - '0'; 156 } else if (radix > 10 && *current >= 'a' && *current < 'a' + radix - 10) { 157 digit = static_cast<char>(*current) - 'a' + 10; 158 } else if (radix > 10 && *current >= 'A' && *current < 'A' + radix - 10) { 159 digit = static_cast<char>(*current) - 'A' + 10; 160 } else { 161 if (allow_trailing_junk || 162 !AdvanceToNonspace(unicode_cache, ¤t, end)) { 163 break; 164 } else { 165 return JunkStringValue(); 166 } 167 } 168 169 number = number * radix + digit; 170 int overflow = static_cast<int>(number >> 53); 171 if (overflow != 0) { 172 // Overflow occurred. Need to determine which direction to round the 173 // result. 174 int overflow_bits_count = 1; 175 while (overflow > 1) { 176 overflow_bits_count++; 177 overflow >>= 1; 178 } 179 180 int dropped_bits_mask = ((1 << overflow_bits_count) - 1); 181 int dropped_bits = static_cast<int>(number) & dropped_bits_mask; 182 number >>= overflow_bits_count; 183 exponent = overflow_bits_count; 184 185 bool zero_tail = true; 186 while (true) { 187 ++current; 188 if (current == end || !isDigit(*current, radix)) break; 189 zero_tail = zero_tail && *current == '0'; 190 exponent += radix_log_2; 191 } 192 193 if (!allow_trailing_junk && 194 AdvanceToNonspace(unicode_cache, ¤t, end)) { 195 return JunkStringValue(); 196 } 197 198 int middle_value = (1 << (overflow_bits_count - 1)); 199 if (dropped_bits > middle_value) { 200 number++; // Rounding up. 201 } else if (dropped_bits == middle_value) { 202 // Rounding to even to consistency with decimals: half-way case rounds 203 // up if significant part is odd and down otherwise. 204 if ((number & 1) != 0 || !zero_tail) { 205 number++; // Rounding up. 206 } 207 } 208 209 // Rounding up may cause overflow. 210 if ((number & ((int64_t)1 << 53)) != 0) { 211 exponent++; 212 number >>= 1; 213 } 214 break; 215 } 216 ++current; 217 } while (current != end); 218 219 ASSERT(number < ((int64_t)1 << 53)); 220 ASSERT(static_cast<int64_t>(static_cast<double>(number)) == number); 221 222 if (exponent == 0) { 223 if (negative) { 224 if (number == 0) return -0.0; 225 number = -number; 226 } 227 return static_cast<double>(number); 228 } 229 230 ASSERT(number != 0); 231 // The double could be constructed faster from number (mantissa), exponent 232 // and sign. Assuming it's a rare case more simple code is used. 233 return static_cast<double>(negative ? -number : number) * pow(2.0, exponent); 234} 235 236 237template <class Iterator, class EndMark> 238static double InternalStringToInt(UnicodeCache* unicode_cache, 239 Iterator current, 240 EndMark end, 241 int radix) { 242 const bool allow_trailing_junk = true; 243 const double empty_string_val = JunkStringValue(); 244 245 if (!AdvanceToNonspace(unicode_cache, ¤t, end)) { 246 return empty_string_val; 247 } 248 249 bool negative = false; 250 bool leading_zero = false; 251 252 if (*current == '+') { 253 // Ignore leading sign; skip following spaces. 254 ++current; 255 if (current == end) { 256 return JunkStringValue(); 257 } 258 } else if (*current == '-') { 259 ++current; 260 if (current == end) { 261 return JunkStringValue(); 262 } 263 negative = true; 264 } 265 266 if (radix == 0) { 267 // Radix detection. 268 if (*current == '0') { 269 ++current; 270 if (current == end) return SignedZero(negative); 271 if (*current == 'x' || *current == 'X') { 272 radix = 16; 273 ++current; 274 if (current == end) return JunkStringValue(); 275 } else { 276 radix = 8; 277 leading_zero = true; 278 } 279 } else { 280 radix = 10; 281 } 282 } else if (radix == 16) { 283 if (*current == '0') { 284 // Allow "0x" prefix. 285 ++current; 286 if (current == end) return SignedZero(negative); 287 if (*current == 'x' || *current == 'X') { 288 ++current; 289 if (current == end) return JunkStringValue(); 290 } else { 291 leading_zero = true; 292 } 293 } 294 } 295 296 if (radix < 2 || radix > 36) return JunkStringValue(); 297 298 // Skip leading zeros. 299 while (*current == '0') { 300 leading_zero = true; 301 ++current; 302 if (current == end) return SignedZero(negative); 303 } 304 305 if (!leading_zero && !isDigit(*current, radix)) { 306 return JunkStringValue(); 307 } 308 309 if (IsPowerOf2(radix)) { 310 switch (radix) { 311 case 2: 312 return InternalStringToIntDouble<1>( 313 unicode_cache, current, end, negative, allow_trailing_junk); 314 case 4: 315 return InternalStringToIntDouble<2>( 316 unicode_cache, current, end, negative, allow_trailing_junk); 317 case 8: 318 return InternalStringToIntDouble<3>( 319 unicode_cache, current, end, negative, allow_trailing_junk); 320 321 case 16: 322 return InternalStringToIntDouble<4>( 323 unicode_cache, current, end, negative, allow_trailing_junk); 324 325 case 32: 326 return InternalStringToIntDouble<5>( 327 unicode_cache, current, end, negative, allow_trailing_junk); 328 default: 329 UNREACHABLE(); 330 } 331 } 332 333 if (radix == 10) { 334 // Parsing with strtod. 335 const int kMaxSignificantDigits = 309; // Doubles are less than 1.8e308. 336 // The buffer may contain up to kMaxSignificantDigits + 1 digits and a zero 337 // end. 338 const int kBufferSize = kMaxSignificantDigits + 2; 339 char buffer[kBufferSize]; 340 int buffer_pos = 0; 341 while (*current >= '0' && *current <= '9') { 342 if (buffer_pos <= kMaxSignificantDigits) { 343 // If the number has more than kMaxSignificantDigits it will be parsed 344 // as infinity. 345 ASSERT(buffer_pos < kBufferSize); 346 buffer[buffer_pos++] = static_cast<char>(*current); 347 } 348 ++current; 349 if (current == end) break; 350 } 351 352 if (!allow_trailing_junk && 353 AdvanceToNonspace(unicode_cache, ¤t, end)) { 354 return JunkStringValue(); 355 } 356 357 ASSERT(buffer_pos < kBufferSize); 358 buffer[buffer_pos] = '\0'; 359 Vector<const char> buffer_vector(buffer, buffer_pos); 360 return negative ? -Strtod(buffer_vector, 0) : Strtod(buffer_vector, 0); 361 } 362 363 // The following code causes accumulating rounding error for numbers greater 364 // than ~2^56. It's explicitly allowed in the spec: "if R is not 2, 4, 8, 10, 365 // 16, or 32, then mathInt may be an implementation-dependent approximation to 366 // the mathematical integer value" (15.1.2.2). 367 368 int lim_0 = '0' + (radix < 10 ? radix : 10); 369 int lim_a = 'a' + (radix - 10); 370 int lim_A = 'A' + (radix - 10); 371 372 // NOTE: The code for computing the value may seem a bit complex at 373 // first glance. It is structured to use 32-bit multiply-and-add 374 // loops as long as possible to avoid loosing precision. 375 376 double v = 0.0; 377 bool done = false; 378 do { 379 // Parse the longest part of the string starting at index j 380 // possible while keeping the multiplier, and thus the part 381 // itself, within 32 bits. 382 unsigned int part = 0, multiplier = 1; 383 while (true) { 384 int d; 385 if (*current >= '0' && *current < lim_0) { 386 d = *current - '0'; 387 } else if (*current >= 'a' && *current < lim_a) { 388 d = *current - 'a' + 10; 389 } else if (*current >= 'A' && *current < lim_A) { 390 d = *current - 'A' + 10; 391 } else { 392 done = true; 393 break; 394 } 395 396 // Update the value of the part as long as the multiplier fits 397 // in 32 bits. When we can't guarantee that the next iteration 398 // will not overflow the multiplier, we stop parsing the part 399 // by leaving the loop. 400 const unsigned int kMaximumMultiplier = 0xffffffffU / 36; 401 uint32_t m = multiplier * radix; 402 if (m > kMaximumMultiplier) break; 403 part = part * radix + d; 404 multiplier = m; 405 ASSERT(multiplier > part); 406 407 ++current; 408 if (current == end) { 409 done = true; 410 break; 411 } 412 } 413 414 // Update the value and skip the part in the string. 415 v = v * multiplier + part; 416 } while (!done); 417 418 if (!allow_trailing_junk && 419 AdvanceToNonspace(unicode_cache, ¤t, end)) { 420 return JunkStringValue(); 421 } 422 423 return negative ? -v : v; 424} 425 426 427// Converts a string to a double value. Assumes the Iterator supports 428// the following operations: 429// 1. current == end (other ops are not allowed), current != end. 430// 2. *current - gets the current character in the sequence. 431// 3. ++current (advances the position). 432template <class Iterator, class EndMark> 433static double InternalStringToDouble(UnicodeCache* unicode_cache, 434 Iterator current, 435 EndMark end, 436 int flags, 437 double empty_string_val) { 438 // To make sure that iterator dereferencing is valid the following 439 // convention is used: 440 // 1. Each '++current' statement is followed by check for equality to 'end'. 441 // 2. If AdvanceToNonspace returned false then current == end. 442 // 3. If 'current' becomes be equal to 'end' the function returns or goes to 443 // 'parsing_done'. 444 // 4. 'current' is not dereferenced after the 'parsing_done' label. 445 // 5. Code before 'parsing_done' may rely on 'current != end'. 446 if (!AdvanceToNonspace(unicode_cache, ¤t, end)) { 447 return empty_string_val; 448 } 449 450 const bool allow_trailing_junk = (flags & ALLOW_TRAILING_JUNK) != 0; 451 452 // The longest form of simplified number is: "-<significant digits>'.1eXXX\0". 453 const int kBufferSize = kMaxSignificantDigits + 10; 454 char buffer[kBufferSize]; // NOLINT: size is known at compile time. 455 int buffer_pos = 0; 456 457 // Exponent will be adjusted if insignificant digits of the integer part 458 // or insignificant leading zeros of the fractional part are dropped. 459 int exponent = 0; 460 int significant_digits = 0; 461 int insignificant_digits = 0; 462 bool nonzero_digit_dropped = false; 463 464 bool negative = false; 465 466 if (*current == '+') { 467 // Ignore leading sign. 468 ++current; 469 if (current == end) return JunkStringValue(); 470 } else if (*current == '-') { 471 ++current; 472 if (current == end) return JunkStringValue(); 473 negative = true; 474 } 475 476 static const char kInfinitySymbol[] = "Infinity"; 477 if (*current == kInfinitySymbol[0]) { 478 if (!SubStringEquals(¤t, end, kInfinitySymbol)) { 479 return JunkStringValue(); 480 } 481 482 if (!allow_trailing_junk && 483 AdvanceToNonspace(unicode_cache, ¤t, end)) { 484 return JunkStringValue(); 485 } 486 487 ASSERT(buffer_pos == 0); 488 return negative ? -V8_INFINITY : V8_INFINITY; 489 } 490 491 bool leading_zero = false; 492 if (*current == '0') { 493 ++current; 494 if (current == end) return SignedZero(negative); 495 496 leading_zero = true; 497 498 // It could be hexadecimal value. 499 if ((flags & ALLOW_HEX) && (*current == 'x' || *current == 'X')) { 500 ++current; 501 if (current == end || !isDigit(*current, 16)) { 502 return JunkStringValue(); // "0x". 503 } 504 505 return InternalStringToIntDouble<4>(unicode_cache, 506 current, 507 end, 508 negative, 509 allow_trailing_junk); 510 } 511 512 // Ignore leading zeros in the integer part. 513 while (*current == '0') { 514 ++current; 515 if (current == end) return SignedZero(negative); 516 } 517 } 518 519 bool octal = leading_zero && (flags & ALLOW_OCTALS) != 0; 520 521 // Copy significant digits of the integer part (if any) to the buffer. 522 while (*current >= '0' && *current <= '9') { 523 if (significant_digits < kMaxSignificantDigits) { 524 ASSERT(buffer_pos < kBufferSize); 525 buffer[buffer_pos++] = static_cast<char>(*current); 526 significant_digits++; 527 // Will later check if it's an octal in the buffer. 528 } else { 529 insignificant_digits++; // Move the digit into the exponential part. 530 nonzero_digit_dropped = nonzero_digit_dropped || *current != '0'; 531 } 532 octal = octal && *current < '8'; 533 ++current; 534 if (current == end) goto parsing_done; 535 } 536 537 if (significant_digits == 0) { 538 octal = false; 539 } 540 541 if (*current == '.') { 542 if (octal && !allow_trailing_junk) return JunkStringValue(); 543 if (octal) goto parsing_done; 544 545 ++current; 546 if (current == end) { 547 if (significant_digits == 0 && !leading_zero) { 548 return JunkStringValue(); 549 } else { 550 goto parsing_done; 551 } 552 } 553 554 if (significant_digits == 0) { 555 // octal = false; 556 // Integer part consists of 0 or is absent. Significant digits start after 557 // leading zeros (if any). 558 while (*current == '0') { 559 ++current; 560 if (current == end) return SignedZero(negative); 561 exponent--; // Move this 0 into the exponent. 562 } 563 } 564 565 // There is a fractional part. We don't emit a '.', but adjust the exponent 566 // instead. 567 while (*current >= '0' && *current <= '9') { 568 if (significant_digits < kMaxSignificantDigits) { 569 ASSERT(buffer_pos < kBufferSize); 570 buffer[buffer_pos++] = static_cast<char>(*current); 571 significant_digits++; 572 exponent--; 573 } else { 574 // Ignore insignificant digits in the fractional part. 575 nonzero_digit_dropped = nonzero_digit_dropped || *current != '0'; 576 } 577 ++current; 578 if (current == end) goto parsing_done; 579 } 580 } 581 582 if (!leading_zero && exponent == 0 && significant_digits == 0) { 583 // If leading_zeros is true then the string contains zeros. 584 // If exponent < 0 then string was [+-]\.0*... 585 // If significant_digits != 0 the string is not equal to 0. 586 // Otherwise there are no digits in the string. 587 return JunkStringValue(); 588 } 589 590 // Parse exponential part. 591 if (*current == 'e' || *current == 'E') { 592 if (octal) return JunkStringValue(); 593 ++current; 594 if (current == end) { 595 if (allow_trailing_junk) { 596 goto parsing_done; 597 } else { 598 return JunkStringValue(); 599 } 600 } 601 char sign = '+'; 602 if (*current == '+' || *current == '-') { 603 sign = static_cast<char>(*current); 604 ++current; 605 if (current == end) { 606 if (allow_trailing_junk) { 607 goto parsing_done; 608 } else { 609 return JunkStringValue(); 610 } 611 } 612 } 613 614 if (current == end || *current < '0' || *current > '9') { 615 if (allow_trailing_junk) { 616 goto parsing_done; 617 } else { 618 return JunkStringValue(); 619 } 620 } 621 622 const int max_exponent = INT_MAX / 2; 623 ASSERT(-max_exponent / 2 <= exponent && exponent <= max_exponent / 2); 624 int num = 0; 625 do { 626 // Check overflow. 627 int digit = *current - '0'; 628 if (num >= max_exponent / 10 629 && !(num == max_exponent / 10 && digit <= max_exponent % 10)) { 630 num = max_exponent; 631 } else { 632 num = num * 10 + digit; 633 } 634 ++current; 635 } while (current != end && *current >= '0' && *current <= '9'); 636 637 exponent += (sign == '-' ? -num : num); 638 } 639 640 if (!allow_trailing_junk && 641 AdvanceToNonspace(unicode_cache, ¤t, end)) { 642 return JunkStringValue(); 643 } 644 645 parsing_done: 646 exponent += insignificant_digits; 647 648 if (octal) { 649 return InternalStringToIntDouble<3>(unicode_cache, 650 buffer, 651 buffer + buffer_pos, 652 negative, 653 allow_trailing_junk); 654 } 655 656 if (nonzero_digit_dropped) { 657 buffer[buffer_pos++] = '1'; 658 exponent--; 659 } 660 661 ASSERT(buffer_pos < kBufferSize); 662 buffer[buffer_pos] = '\0'; 663 664 double converted = Strtod(Vector<const char>(buffer, buffer_pos), exponent); 665 return negative ? -converted : converted; 666} 667 668} } // namespace v8::internal 669 670#endif // V8_CONVERSIONS_INL_H_ 671