1/* -*- Mode: C; c-file-style: "python" -*- */
2
3#include <Python.h>
4#include <locale.h>
5
6/* Case-insensitive string match used for nan and inf detection; t should be
7   lower-case.  Returns 1 for a successful match, 0 otherwise. */
8
9static int
10case_insensitive_match(const char *s, const char *t)
11{
12    while(*t && Py_TOLOWER(*s) == *t) {
13        s++;
14        t++;
15    }
16    return *t ? 0 : 1;
17}
18
19/* _Py_parse_inf_or_nan: Attempt to parse a string of the form "nan", "inf" or
20   "infinity", with an optional leading sign of "+" or "-".  On success,
21   return the NaN or Infinity as a double and set *endptr to point just beyond
22   the successfully parsed portion of the string.  On failure, return -1.0 and
23   set *endptr to point to the start of the string. */
24
25#ifndef PY_NO_SHORT_FLOAT_REPR
26
27double
28_Py_parse_inf_or_nan(const char *p, char **endptr)
29{
30    double retval;
31    const char *s;
32    int negate = 0;
33
34    s = p;
35    if (*s == '-') {
36        negate = 1;
37        s++;
38    }
39    else if (*s == '+') {
40        s++;
41    }
42    if (case_insensitive_match(s, "inf")) {
43        s += 3;
44        if (case_insensitive_match(s, "inity"))
45            s += 5;
46        retval = _Py_dg_infinity(negate);
47    }
48    else if (case_insensitive_match(s, "nan")) {
49        s += 3;
50        retval = _Py_dg_stdnan(negate);
51    }
52    else {
53        s = p;
54        retval = -1.0;
55    }
56    *endptr = (char *)s;
57    return retval;
58}
59
60#else
61
62double
63_Py_parse_inf_or_nan(const char *p, char **endptr)
64{
65    double retval;
66    const char *s;
67    int negate = 0;
68
69    s = p;
70    if (*s == '-') {
71        negate = 1;
72        s++;
73    }
74    else if (*s == '+') {
75        s++;
76    }
77    if (case_insensitive_match(s, "inf")) {
78        s += 3;
79        if (case_insensitive_match(s, "inity"))
80            s += 5;
81        retval = negate ? -Py_HUGE_VAL : Py_HUGE_VAL;
82    }
83#ifdef Py_NAN
84    else if (case_insensitive_match(s, "nan")) {
85        s += 3;
86        retval = negate ? -Py_NAN : Py_NAN;
87    }
88#endif
89    else {
90        s = p;
91        retval = -1.0;
92    }
93    *endptr = (char *)s;
94    return retval;
95}
96
97#endif
98
99/**
100 * _PyOS_ascii_strtod:
101 * @nptr:    the string to convert to a numeric value.
102 * @endptr:  if non-%NULL, it returns the character after
103 *           the last character used in the conversion.
104 *
105 * Converts a string to a #gdouble value.
106 * This function behaves like the standard strtod() function
107 * does in the C locale. It does this without actually
108 * changing the current locale, since that would not be
109 * thread-safe.
110 *
111 * This function is typically used when reading configuration
112 * files or other non-user input that should be locale independent.
113 * To handle input from the user you should normally use the
114 * locale-sensitive system strtod() function.
115 *
116 * If the correct value would cause overflow, plus or minus %HUGE_VAL
117 * is returned (according to the sign of the value), and %ERANGE is
118 * stored in %errno. If the correct value would cause underflow,
119 * zero is returned and %ERANGE is stored in %errno.
120 * If memory allocation fails, %ENOMEM is stored in %errno.
121 *
122 * This function resets %errno before calling strtod() so that
123 * you can reliably detect overflow and underflow.
124 *
125 * Return value: the #gdouble value.
126 **/
127
128#ifndef PY_NO_SHORT_FLOAT_REPR
129
130static double
131_PyOS_ascii_strtod(const char *nptr, char **endptr)
132{
133    double result;
134    _Py_SET_53BIT_PRECISION_HEADER;
135
136    assert(nptr != NULL);
137    /* Set errno to zero, so that we can distinguish zero results
138       and underflows */
139    errno = 0;
140
141    _Py_SET_53BIT_PRECISION_START;
142    result = _Py_dg_strtod(nptr, endptr);
143    _Py_SET_53BIT_PRECISION_END;
144
145    if (*endptr == nptr)
146        /* string might represent an inf or nan */
147        result = _Py_parse_inf_or_nan(nptr, endptr);
148
149    return result;
150
151}
152
153#else
154
155/*
156   Use system strtod;  since strtod is locale aware, we may
157   have to first fix the decimal separator.
158
159   Note that unlike _Py_dg_strtod, the system strtod may not always give
160   correctly rounded results.
161*/
162
163static double
164_PyOS_ascii_strtod(const char *nptr, char **endptr)
165{
166    char *fail_pos;
167    double val;
168    struct lconv *locale_data;
169    const char *decimal_point;
170    size_t decimal_point_len;
171    const char *p, *decimal_point_pos;
172    const char *end = NULL; /* Silence gcc */
173    const char *digits_pos = NULL;
174    int negate = 0;
175
176    assert(nptr != NULL);
177
178    fail_pos = NULL;
179
180    locale_data = localeconv();
181    decimal_point = locale_data->decimal_point;
182    decimal_point_len = strlen(decimal_point);
183
184    assert(decimal_point_len != 0);
185
186    decimal_point_pos = NULL;
187
188    /* Parse infinities and nans */
189    val = _Py_parse_inf_or_nan(nptr, endptr);
190    if (*endptr != nptr)
191        return val;
192
193    /* Set errno to zero, so that we can distinguish zero results
194       and underflows */
195    errno = 0;
196
197    /* We process the optional sign manually, then pass the remainder to
198       the system strtod.  This ensures that the result of an underflow
199       has the correct sign. (bug #1725)  */
200    p = nptr;
201    /* Process leading sign, if present */
202    if (*p == '-') {
203        negate = 1;
204        p++;
205    }
206    else if (*p == '+') {
207        p++;
208    }
209
210    /* Some platform strtods accept hex floats; Python shouldn't (at the
211       moment), so we check explicitly for strings starting with '0x'. */
212    if (*p == '0' && (*(p+1) == 'x' || *(p+1) == 'X'))
213        goto invalid_string;
214
215    /* Check that what's left begins with a digit or decimal point */
216    if (!Py_ISDIGIT(*p) && *p != '.')
217        goto invalid_string;
218
219    digits_pos = p;
220    if (decimal_point[0] != '.' ||
221        decimal_point[1] != 0)
222    {
223        /* Look for a '.' in the input; if present, it'll need to be
224           swapped for the current locale's decimal point before we
225           call strtod.  On the other hand, if we find the current
226           locale's decimal point then the input is invalid. */
227        while (Py_ISDIGIT(*p))
228            p++;
229
230        if (*p == '.')
231        {
232            decimal_point_pos = p++;
233
234            /* locate end of number */
235            while (Py_ISDIGIT(*p))
236                p++;
237
238            if (*p == 'e' || *p == 'E')
239                p++;
240            if (*p == '+' || *p == '-')
241                p++;
242            while (Py_ISDIGIT(*p))
243                p++;
244            end = p;
245        }
246        else if (strncmp(p, decimal_point, decimal_point_len) == 0)
247            /* Python bug #1417699 */
248            goto invalid_string;
249        /* For the other cases, we need not convert the decimal
250           point */
251    }
252
253    if (decimal_point_pos) {
254        char *copy, *c;
255        /* Create a copy of the input, with the '.' converted to the
256           locale-specific decimal point */
257        copy = (char *)PyMem_MALLOC(end - digits_pos +
258                                    1 + decimal_point_len);
259        if (copy == NULL) {
260            *endptr = (char *)nptr;
261            errno = ENOMEM;
262            return val;
263        }
264
265        c = copy;
266        memcpy(c, digits_pos, decimal_point_pos - digits_pos);
267        c += decimal_point_pos - digits_pos;
268        memcpy(c, decimal_point, decimal_point_len);
269        c += decimal_point_len;
270        memcpy(c, decimal_point_pos + 1,
271               end - (decimal_point_pos + 1));
272        c += end - (decimal_point_pos + 1);
273        *c = 0;
274
275        val = strtod(copy, &fail_pos);
276
277        if (fail_pos)
278        {
279            if (fail_pos > decimal_point_pos)
280                fail_pos = (char *)digits_pos +
281                    (fail_pos - copy) -
282                    (decimal_point_len - 1);
283            else
284                fail_pos = (char *)digits_pos +
285                    (fail_pos - copy);
286        }
287
288        PyMem_FREE(copy);
289
290    }
291    else {
292        val = strtod(digits_pos, &fail_pos);
293    }
294
295    if (fail_pos == digits_pos)
296        goto invalid_string;
297
298    if (negate && fail_pos != nptr)
299        val = -val;
300    *endptr = fail_pos;
301
302    return val;
303
304  invalid_string:
305    *endptr = (char*)nptr;
306    errno = EINVAL;
307    return -1.0;
308}
309
310#endif
311
312/* PyOS_string_to_double converts a null-terminated byte string s (interpreted
313   as a string of ASCII characters) to a float.  The string should not have
314   leading or trailing whitespace.  The conversion is independent of the
315   current locale.
316
317   If endptr is NULL, try to convert the whole string.  Raise ValueError and
318   return -1.0 if the string is not a valid representation of a floating-point
319   number.
320
321   If endptr is non-NULL, try to convert as much of the string as possible.
322   If no initial segment of the string is the valid representation of a
323   floating-point number then *endptr is set to point to the beginning of the
324   string, -1.0 is returned and again ValueError is raised.
325
326   On overflow (e.g., when trying to convert '1e500' on an IEEE 754 machine),
327   if overflow_exception is NULL then +-Py_HUGE_VAL is returned, and no Python
328   exception is raised.  Otherwise, overflow_exception should point to
329   a Python exception, this exception will be raised, -1.0 will be returned,
330   and *endptr will point just past the end of the converted value.
331
332   If any other failure occurs (for example lack of memory), -1.0 is returned
333   and the appropriate Python exception will have been set.
334*/
335
336double
337PyOS_string_to_double(const char *s,
338                      char **endptr,
339                      PyObject *overflow_exception)
340{
341    double x, result=-1.0;
342    char *fail_pos;
343
344    errno = 0;
345    PyFPE_START_PROTECT("PyOS_string_to_double", return -1.0)
346    x = _PyOS_ascii_strtod(s, &fail_pos);
347    PyFPE_END_PROTECT(x)
348
349    if (errno == ENOMEM) {
350        PyErr_NoMemory();
351        fail_pos = (char *)s;
352    }
353    else if (!endptr && (fail_pos == s || *fail_pos != '\0'))
354        PyErr_Format(PyExc_ValueError,
355                      "could not convert string to float: "
356                      "%.200s", s);
357    else if (fail_pos == s)
358        PyErr_Format(PyExc_ValueError,
359                      "could not convert string to float: "
360                      "%.200s", s);
361    else if (errno == ERANGE && fabs(x) >= 1.0 && overflow_exception)
362        PyErr_Format(overflow_exception,
363                      "value too large to convert to float: "
364                      "%.200s", s);
365    else
366        result = x;
367
368    if (endptr != NULL)
369        *endptr = fail_pos;
370    return result;
371}
372
373/* Remove underscores that follow the underscore placement rule from
374   the string and then call the `innerfunc` function on the result.
375   It should return a new object or NULL on exception.
376
377   `what` is used for the error message emitted when underscores are detected
378   that don't follow the rule. `arg` is an opaque pointer passed to the inner
379   function.
380
381   This is used to implement underscore-agnostic conversion for floats
382   and complex numbers.
383*/
384PyObject *
385_Py_string_to_number_with_underscores(
386    const char *s, Py_ssize_t orig_len, const char *what, PyObject *obj, void *arg,
387    PyObject *(*innerfunc)(const char *, Py_ssize_t, void *))
388{
389    char prev;
390    const char *p, *last;
391    char *dup, *end;
392    PyObject *result;
393
394    if (strchr(s, '_') == NULL) {
395        return innerfunc(s, orig_len, arg);
396    }
397
398    dup = PyMem_Malloc(orig_len + 1);
399    end = dup;
400    prev = '\0';
401    last = s + orig_len;
402    for (p = s; *p; p++) {
403        if (*p == '_') {
404            /* Underscores are only allowed after digits. */
405            if (!(prev >= '0' && prev <= '9')) {
406                goto error;
407            }
408        }
409        else {
410            *end++ = *p;
411            /* Underscores are only allowed before digits. */
412            if (prev == '_' && !(*p >= '0' && *p <= '9')) {
413                goto error;
414            }
415        }
416        prev = *p;
417    }
418    /* Underscores are not allowed at the end. */
419    if (prev == '_') {
420        goto error;
421    }
422    /* No embedded NULs allowed. */
423    if (p != last) {
424        goto error;
425    }
426    *end = '\0';
427    result = innerfunc(dup, end - dup, arg);
428    PyMem_Free(dup);
429    return result;
430
431  error:
432    PyMem_Free(dup);
433    PyErr_Format(PyExc_ValueError,
434		 "could not convert string to %s: "
435		 "%R", what, obj);
436    return NULL;
437}
438
439#ifdef PY_NO_SHORT_FLOAT_REPR
440
441/* Given a string that may have a decimal point in the current
442   locale, change it back to a dot.  Since the string cannot get
443   longer, no need for a maximum buffer size parameter. */
444Py_LOCAL_INLINE(void)
445change_decimal_from_locale_to_dot(char* buffer)
446{
447    struct lconv *locale_data = localeconv();
448    const char *decimal_point = locale_data->decimal_point;
449
450    if (decimal_point[0] != '.' || decimal_point[1] != 0) {
451        size_t decimal_point_len = strlen(decimal_point);
452
453        if (*buffer == '+' || *buffer == '-')
454            buffer++;
455        while (Py_ISDIGIT(*buffer))
456            buffer++;
457        if (strncmp(buffer, decimal_point, decimal_point_len) == 0) {
458            *buffer = '.';
459            buffer++;
460            if (decimal_point_len > 1) {
461                /* buffer needs to get smaller */
462                size_t rest_len = strlen(buffer +
463                                     (decimal_point_len - 1));
464                memmove(buffer,
465                    buffer + (decimal_point_len - 1),
466                    rest_len);
467                buffer[rest_len] = 0;
468            }
469        }
470    }
471}
472
473
474/* From the C99 standard, section 7.19.6:
475The exponent always contains at least two digits, and only as many more digits
476as necessary to represent the exponent.
477*/
478#define MIN_EXPONENT_DIGITS 2
479
480/* Ensure that any exponent, if present, is at least MIN_EXPONENT_DIGITS
481   in length. */
482Py_LOCAL_INLINE(void)
483ensure_minimum_exponent_length(char* buffer, size_t buf_size)
484{
485    char *p = strpbrk(buffer, "eE");
486    if (p && (*(p + 1) == '-' || *(p + 1) == '+')) {
487        char *start = p + 2;
488        int exponent_digit_cnt = 0;
489        int leading_zero_cnt = 0;
490        int in_leading_zeros = 1;
491        int significant_digit_cnt;
492
493        /* Skip over the exponent and the sign. */
494        p += 2;
495
496        /* Find the end of the exponent, keeping track of leading
497           zeros. */
498        while (*p && Py_ISDIGIT(*p)) {
499            if (in_leading_zeros && *p == '0')
500                ++leading_zero_cnt;
501            if (*p != '0')
502                in_leading_zeros = 0;
503            ++p;
504            ++exponent_digit_cnt;
505        }
506
507        significant_digit_cnt = exponent_digit_cnt - leading_zero_cnt;
508        if (exponent_digit_cnt == MIN_EXPONENT_DIGITS) {
509            /* If there are 2 exactly digits, we're done,
510               regardless of what they contain */
511        }
512        else if (exponent_digit_cnt > MIN_EXPONENT_DIGITS) {
513            int extra_zeros_cnt;
514
515            /* There are more than 2 digits in the exponent.  See
516               if we can delete some of the leading zeros */
517            if (significant_digit_cnt < MIN_EXPONENT_DIGITS)
518                significant_digit_cnt = MIN_EXPONENT_DIGITS;
519            extra_zeros_cnt = exponent_digit_cnt -
520                significant_digit_cnt;
521
522            /* Delete extra_zeros_cnt worth of characters from the
523               front of the exponent */
524            assert(extra_zeros_cnt >= 0);
525
526            /* Add one to significant_digit_cnt to copy the
527               trailing 0 byte, thus setting the length */
528            memmove(start,
529                start + extra_zeros_cnt,
530                significant_digit_cnt + 1);
531        }
532        else {
533            /* If there are fewer than 2 digits, add zeros
534               until there are 2, if there's enough room */
535            int zeros = MIN_EXPONENT_DIGITS - exponent_digit_cnt;
536            if (start + zeros + exponent_digit_cnt + 1
537                  < buffer + buf_size) {
538                memmove(start + zeros, start,
539                    exponent_digit_cnt + 1);
540                memset(start, '0', zeros);
541            }
542        }
543    }
544}
545
546/* Remove trailing zeros after the decimal point from a numeric string; also
547   remove the decimal point if all digits following it are zero.  The numeric
548   string must end in '\0', and should not have any leading or trailing
549   whitespace.  Assumes that the decimal point is '.'. */
550Py_LOCAL_INLINE(void)
551remove_trailing_zeros(char *buffer)
552{
553    char *old_fraction_end, *new_fraction_end, *end, *p;
554
555    p = buffer;
556    if (*p == '-' || *p == '+')
557        /* Skip leading sign, if present */
558        ++p;
559    while (Py_ISDIGIT(*p))
560        ++p;
561
562    /* if there's no decimal point there's nothing to do */
563    if (*p++ != '.')
564        return;
565
566    /* scan any digits after the point */
567    while (Py_ISDIGIT(*p))
568        ++p;
569    old_fraction_end = p;
570
571    /* scan up to ending '\0' */
572    while (*p != '\0')
573        p++;
574    /* +1 to make sure that we move the null byte as well */
575    end = p+1;
576
577    /* scan back from fraction_end, looking for removable zeros */
578    p = old_fraction_end;
579    while (*(p-1) == '0')
580        --p;
581    /* and remove point if we've got that far */
582    if (*(p-1) == '.')
583        --p;
584    new_fraction_end = p;
585
586    memmove(new_fraction_end, old_fraction_end, end-old_fraction_end);
587}
588
589/* Ensure that buffer has a decimal point in it.  The decimal point will not
590   be in the current locale, it will always be '.'. Don't add a decimal point
591   if an exponent is present.  Also, convert to exponential notation where
592   adding a '.0' would produce too many significant digits (see issue 5864).
593
594   Returns a pointer to the fixed buffer, or NULL on failure.
595*/
596Py_LOCAL_INLINE(char *)
597ensure_decimal_point(char* buffer, size_t buf_size, int precision)
598{
599    int digit_count, insert_count = 0, convert_to_exp = 0;
600    char *chars_to_insert, *digits_start;
601
602    /* search for the first non-digit character */
603    char *p = buffer;
604    if (*p == '-' || *p == '+')
605        /* Skip leading sign, if present.  I think this could only
606           ever be '-', but it can't hurt to check for both. */
607        ++p;
608    digits_start = p;
609    while (*p && Py_ISDIGIT(*p))
610        ++p;
611    digit_count = Py_SAFE_DOWNCAST(p - digits_start, Py_ssize_t, int);
612
613    if (*p == '.') {
614        if (Py_ISDIGIT(*(p+1))) {
615            /* Nothing to do, we already have a decimal
616               point and a digit after it */
617        }
618        else {
619            /* We have a decimal point, but no following
620               digit.  Insert a zero after the decimal. */
621            /* can't ever get here via PyOS_double_to_string */
622            assert(precision == -1);
623            ++p;
624            chars_to_insert = "0";
625            insert_count = 1;
626        }
627    }
628    else if (!(*p == 'e' || *p == 'E')) {
629        /* Don't add ".0" if we have an exponent. */
630        if (digit_count == precision) {
631            /* issue 5864: don't add a trailing .0 in the case
632               where the '%g'-formatted result already has as many
633               significant digits as were requested.  Switch to
634               exponential notation instead. */
635            convert_to_exp = 1;
636            /* no exponent, no point, and we shouldn't land here
637               for infs and nans, so we must be at the end of the
638               string. */
639            assert(*p == '\0');
640        }
641        else {
642            assert(precision == -1 || digit_count < precision);
643            chars_to_insert = ".0";
644            insert_count = 2;
645        }
646    }
647    if (insert_count) {
648        size_t buf_len = strlen(buffer);
649        if (buf_len + insert_count + 1 >= buf_size) {
650            /* If there is not enough room in the buffer
651               for the additional text, just skip it.  It's
652               not worth generating an error over. */
653        }
654        else {
655            memmove(p + insert_count, p,
656                buffer + strlen(buffer) - p + 1);
657            memcpy(p, chars_to_insert, insert_count);
658        }
659    }
660    if (convert_to_exp) {
661        int written;
662        size_t buf_avail;
663        p = digits_start;
664        /* insert decimal point */
665        assert(digit_count >= 1);
666        memmove(p+2, p+1, digit_count); /* safe, but overwrites nul */
667        p[1] = '.';
668        p += digit_count+1;
669        assert(p <= buf_size+buffer);
670        buf_avail = buf_size+buffer-p;
671        if (buf_avail == 0)
672            return NULL;
673        /* Add exponent.  It's okay to use lower case 'e': we only
674           arrive here as a result of using the empty format code or
675           repr/str builtins and those never want an upper case 'E' */
676        written = PyOS_snprintf(p, buf_avail, "e%+.02d", digit_count-1);
677        if (!(0 <= written &&
678              written < Py_SAFE_DOWNCAST(buf_avail, size_t, int)))
679            /* output truncated, or something else bad happened */
680            return NULL;
681        remove_trailing_zeros(buffer);
682    }
683    return buffer;
684}
685
686/* see FORMATBUFLEN in unicodeobject.c */
687#define FLOAT_FORMATBUFLEN 120
688
689/**
690 * _PyOS_ascii_formatd:
691 * @buffer: A buffer to place the resulting string in
692 * @buf_size: The length of the buffer.
693 * @format: The printf()-style format to use for the
694 *          code to use for converting.
695 * @d: The #gdouble to convert
696 * @precision: The precision to use when formatting.
697 *
698 * Converts a #gdouble to a string, using the '.' as
699 * decimal point. To format the number you pass in
700 * a printf()-style format string. Allowed conversion
701 * specifiers are 'e', 'E', 'f', 'F', 'g', 'G', and 'Z'.
702 *
703 * 'Z' is the same as 'g', except it always has a decimal and
704 *     at least one digit after the decimal.
705 *
706 * Return value: The pointer to the buffer with the converted string.
707 * On failure returns NULL but does not set any Python exception.
708 **/
709static char *
710_PyOS_ascii_formatd(char       *buffer,
711                   size_t      buf_size,
712                   const char *format,
713                   double      d,
714                   int         precision)
715{
716    char format_char;
717    size_t format_len = strlen(format);
718
719    /* Issue 2264: code 'Z' requires copying the format.  'Z' is 'g', but
720       also with at least one character past the decimal. */
721    char tmp_format[FLOAT_FORMATBUFLEN];
722
723    /* The last character in the format string must be the format char */
724    format_char = format[format_len - 1];
725
726    if (format[0] != '%')
727        return NULL;
728
729    /* I'm not sure why this test is here.  It's ensuring that the format
730       string after the first character doesn't have a single quote, a
731       lowercase l, or a percent. This is the reverse of the commented-out
732       test about 10 lines ago. */
733    if (strpbrk(format + 1, "'l%"))
734        return NULL;
735
736    /* Also curious about this function is that it accepts format strings
737       like "%xg", which are invalid for floats.  In general, the
738       interface to this function is not very good, but changing it is
739       difficult because it's a public API. */
740
741    if (!(format_char == 'e' || format_char == 'E' ||
742          format_char == 'f' || format_char == 'F' ||
743          format_char == 'g' || format_char == 'G' ||
744          format_char == 'Z'))
745        return NULL;
746
747    /* Map 'Z' format_char to 'g', by copying the format string and
748       replacing the final char with a 'g' */
749    if (format_char == 'Z') {
750        if (format_len + 1 >= sizeof(tmp_format)) {
751            /* The format won't fit in our copy.  Error out.  In
752               practice, this will never happen and will be
753               detected by returning NULL */
754            return NULL;
755        }
756        strcpy(tmp_format, format);
757        tmp_format[format_len - 1] = 'g';
758        format = tmp_format;
759    }
760
761
762    /* Have PyOS_snprintf do the hard work */
763    PyOS_snprintf(buffer, buf_size, format, d);
764
765    /* Do various fixups on the return string */
766
767    /* Get the current locale, and find the decimal point string.
768       Convert that string back to a dot. */
769    change_decimal_from_locale_to_dot(buffer);
770
771    /* If an exponent exists, ensure that the exponent is at least
772       MIN_EXPONENT_DIGITS digits, providing the buffer is large enough
773       for the extra zeros.  Also, if there are more than
774       MIN_EXPONENT_DIGITS, remove as many zeros as possible until we get
775       back to MIN_EXPONENT_DIGITS */
776    ensure_minimum_exponent_length(buffer, buf_size);
777
778    /* If format_char is 'Z', make sure we have at least one character
779       after the decimal point (and make sure we have a decimal point);
780       also switch to exponential notation in some edge cases where the
781       extra character would produce more significant digits that we
782       really want. */
783    if (format_char == 'Z')
784        buffer = ensure_decimal_point(buffer, buf_size, precision);
785
786    return buffer;
787}
788
789/* The fallback code to use if _Py_dg_dtoa is not available. */
790
791PyAPI_FUNC(char *) PyOS_double_to_string(double val,
792                                         char format_code,
793                                         int precision,
794                                         int flags,
795                                         int *type)
796{
797    char format[32];
798    Py_ssize_t bufsize;
799    char *buf;
800    int t, exp;
801    int upper = 0;
802
803    /* Validate format_code, and map upper and lower case */
804    switch (format_code) {
805    case 'e':          /* exponent */
806    case 'f':          /* fixed */
807    case 'g':          /* general */
808        break;
809    case 'E':
810        upper = 1;
811        format_code = 'e';
812        break;
813    case 'F':
814        upper = 1;
815        format_code = 'f';
816        break;
817    case 'G':
818        upper = 1;
819        format_code = 'g';
820        break;
821    case 'r':          /* repr format */
822        /* Supplied precision is unused, must be 0. */
823        if (precision != 0) {
824            PyErr_BadInternalCall();
825            return NULL;
826        }
827        /* The repr() precision (17 significant decimal digits) is the
828           minimal number that is guaranteed to have enough precision
829           so that if the number is read back in the exact same binary
830           value is recreated.  This is true for IEEE floating point
831           by design, and also happens to work for all other modern
832           hardware. */
833        precision = 17;
834        format_code = 'g';
835        break;
836    default:
837        PyErr_BadInternalCall();
838        return NULL;
839    }
840
841    /* Here's a quick-and-dirty calculation to figure out how big a buffer
842       we need.  In general, for a finite float we need:
843
844         1 byte for each digit of the decimal significand, and
845
846         1 for a possible sign
847         1 for a possible decimal point
848         2 for a possible [eE][+-]
849         1 for each digit of the exponent;  if we allow 19 digits
850           total then we're safe up to exponents of 2**63.
851         1 for the trailing nul byte
852
853       This gives a total of 24 + the number of digits in the significand,
854       and the number of digits in the significand is:
855
856         for 'g' format: at most precision, except possibly
857           when precision == 0, when it's 1.
858         for 'e' format: precision+1
859         for 'f' format: precision digits after the point, at least 1
860           before.  To figure out how many digits appear before the point
861           we have to examine the size of the number.  If fabs(val) < 1.0
862           then there will be only one digit before the point.  If
863           fabs(val) >= 1.0, then there are at most
864
865         1+floor(log10(ceiling(fabs(val))))
866
867           digits before the point (where the 'ceiling' allows for the
868           possibility that the rounding rounds the integer part of val
869           up).  A safe upper bound for the above quantity is
870           1+floor(exp/3), where exp is the unique integer such that 0.5
871           <= fabs(val)/2**exp < 1.0.  This exp can be obtained from
872           frexp.
873
874       So we allow room for precision+1 digits for all formats, plus an
875       extra floor(exp/3) digits for 'f' format.
876
877    */
878
879    if (Py_IS_NAN(val) || Py_IS_INFINITY(val))
880        /* 3 for 'inf'/'nan', 1 for sign, 1 for '\0' */
881        bufsize = 5;
882    else {
883        bufsize = 25 + precision;
884        if (format_code == 'f' && fabs(val) >= 1.0) {
885            frexp(val, &exp);
886            bufsize += exp/3;
887        }
888    }
889
890    buf = PyMem_Malloc(bufsize);
891    if (buf == NULL) {
892        PyErr_NoMemory();
893        return NULL;
894    }
895
896    /* Handle nan and inf. */
897    if (Py_IS_NAN(val)) {
898        strcpy(buf, "nan");
899        t = Py_DTST_NAN;
900    } else if (Py_IS_INFINITY(val)) {
901        if (copysign(1., val) == 1.)
902            strcpy(buf, "inf");
903        else
904            strcpy(buf, "-inf");
905        t = Py_DTST_INFINITE;
906    } else {
907        t = Py_DTST_FINITE;
908        if (flags & Py_DTSF_ADD_DOT_0)
909            format_code = 'Z';
910
911        PyOS_snprintf(format, sizeof(format), "%%%s.%i%c",
912                      (flags & Py_DTSF_ALT ? "#" : ""), precision,
913                      format_code);
914        _PyOS_ascii_formatd(buf, bufsize, format, val, precision);
915    }
916
917    /* Add sign when requested.  It's convenient (esp. when formatting
918     complex numbers) to include a sign even for inf and nan. */
919    if (flags & Py_DTSF_SIGN && buf[0] != '-') {
920        size_t len = strlen(buf);
921        /* the bufsize calculations above should ensure that we've got
922           space to add a sign */
923        assert((size_t)bufsize >= len+2);
924        memmove(buf+1, buf, len+1);
925        buf[0] = '+';
926    }
927    if (upper) {
928        /* Convert to upper case. */
929        char *p1;
930        for (p1 = buf; *p1; p1++)
931            *p1 = Py_TOUPPER(*p1);
932    }
933
934    if (type)
935        *type = t;
936    return buf;
937}
938
939#else
940
941/* _Py_dg_dtoa is available. */
942
943/* I'm using a lookup table here so that I don't have to invent a non-locale
944   specific way to convert to uppercase */
945#define OFS_INF 0
946#define OFS_NAN 1
947#define OFS_E 2
948
949/* The lengths of these are known to the code below, so don't change them */
950static const char * const lc_float_strings[] = {
951    "inf",
952    "nan",
953    "e",
954};
955static const char * const uc_float_strings[] = {
956    "INF",
957    "NAN",
958    "E",
959};
960
961
962/* Convert a double d to a string, and return a PyMem_Malloc'd block of
963   memory contain the resulting string.
964
965   Arguments:
966     d is the double to be converted
967     format_code is one of 'e', 'f', 'g', 'r'.  'e', 'f' and 'g'
968       correspond to '%e', '%f' and '%g';  'r' corresponds to repr.
969     mode is one of '0', '2' or '3', and is completely determined by
970       format_code: 'e' and 'g' use mode 2; 'f' mode 3, 'r' mode 0.
971     precision is the desired precision
972     always_add_sign is nonzero if a '+' sign should be included for positive
973       numbers
974     add_dot_0_if_integer is nonzero if integers in non-exponential form
975       should have ".0" added.  Only applies to format codes 'r' and 'g'.
976     use_alt_formatting is nonzero if alternative formatting should be
977       used.  Only applies to format codes 'e', 'f' and 'g'.  For code 'g',
978       at most one of use_alt_formatting and add_dot_0_if_integer should
979       be nonzero.
980     type, if non-NULL, will be set to one of these constants to identify
981       the type of the 'd' argument:
982     Py_DTST_FINITE
983     Py_DTST_INFINITE
984     Py_DTST_NAN
985
986   Returns a PyMem_Malloc'd block of memory containing the resulting string,
987    or NULL on error. If NULL is returned, the Python error has been set.
988 */
989
990static char *
991format_float_short(double d, char format_code,
992                   int mode, int precision,
993                   int always_add_sign, int add_dot_0_if_integer,
994                   int use_alt_formatting, const char * const *float_strings,
995                   int *type)
996{
997    char *buf = NULL;
998    char *p = NULL;
999    Py_ssize_t bufsize = 0;
1000    char *digits, *digits_end;
1001    int decpt_as_int, sign, exp_len, exp = 0, use_exp = 0;
1002    Py_ssize_t decpt, digits_len, vdigits_start, vdigits_end;
1003    _Py_SET_53BIT_PRECISION_HEADER;
1004
1005    /* _Py_dg_dtoa returns a digit string (no decimal point or exponent).
1006       Must be matched by a call to _Py_dg_freedtoa. */
1007    _Py_SET_53BIT_PRECISION_START;
1008    digits = _Py_dg_dtoa(d, mode, precision, &decpt_as_int, &sign,
1009                         &digits_end);
1010    _Py_SET_53BIT_PRECISION_END;
1011
1012    decpt = (Py_ssize_t)decpt_as_int;
1013    if (digits == NULL) {
1014        /* The only failure mode is no memory. */
1015        PyErr_NoMemory();
1016        goto exit;
1017    }
1018    assert(digits_end != NULL && digits_end >= digits);
1019    digits_len = digits_end - digits;
1020
1021    if (digits_len && !Py_ISDIGIT(digits[0])) {
1022        /* Infinities and nans here; adapt Gay's output,
1023           so convert Infinity to inf and NaN to nan, and
1024           ignore sign of nan. Then return. */
1025
1026        /* ignore the actual sign of a nan */
1027        if (digits[0] == 'n' || digits[0] == 'N')
1028            sign = 0;
1029
1030        /* We only need 5 bytes to hold the result "+inf\0" . */
1031        bufsize = 5; /* Used later in an assert. */
1032        buf = (char *)PyMem_Malloc(bufsize);
1033        if (buf == NULL) {
1034            PyErr_NoMemory();
1035            goto exit;
1036        }
1037        p = buf;
1038
1039        if (sign == 1) {
1040            *p++ = '-';
1041        }
1042        else if (always_add_sign) {
1043            *p++ = '+';
1044        }
1045        if (digits[0] == 'i' || digits[0] == 'I') {
1046            strncpy(p, float_strings[OFS_INF], 3);
1047            p += 3;
1048
1049            if (type)
1050                *type = Py_DTST_INFINITE;
1051        }
1052        else if (digits[0] == 'n' || digits[0] == 'N') {
1053            strncpy(p, float_strings[OFS_NAN], 3);
1054            p += 3;
1055
1056            if (type)
1057                *type = Py_DTST_NAN;
1058        }
1059        else {
1060            /* shouldn't get here: Gay's code should always return
1061               something starting with a digit, an 'I',  or 'N' */
1062            strncpy(p, "ERR", 3);
1063            /* p += 3; */
1064            assert(0);
1065        }
1066        goto exit;
1067    }
1068
1069    /* The result must be finite (not inf or nan). */
1070    if (type)
1071        *type = Py_DTST_FINITE;
1072
1073
1074    /* We got digits back, format them.  We may need to pad 'digits'
1075       either on the left or right (or both) with extra zeros, so in
1076       general the resulting string has the form
1077
1078         [<sign>]<zeros><digits><zeros>[<exponent>]
1079
1080       where either of the <zeros> pieces could be empty, and there's a
1081       decimal point that could appear either in <digits> or in the
1082       leading or trailing <zeros>.
1083
1084       Imagine an infinite 'virtual' string vdigits, consisting of the
1085       string 'digits' (starting at index 0) padded on both the left and
1086       right with infinite strings of zeros.  We want to output a slice
1087
1088         vdigits[vdigits_start : vdigits_end]
1089
1090       of this virtual string.  Thus if vdigits_start < 0 then we'll end
1091       up producing some leading zeros; if vdigits_end > digits_len there
1092       will be trailing zeros in the output.  The next section of code
1093       determines whether to use an exponent or not, figures out the
1094       position 'decpt' of the decimal point, and computes 'vdigits_start'
1095       and 'vdigits_end'. */
1096    vdigits_end = digits_len;
1097    switch (format_code) {
1098    case 'e':
1099        use_exp = 1;
1100        vdigits_end = precision;
1101        break;
1102    case 'f':
1103        vdigits_end = decpt + precision;
1104        break;
1105    case 'g':
1106        if (decpt <= -4 || decpt >
1107            (add_dot_0_if_integer ? precision-1 : precision))
1108            use_exp = 1;
1109        if (use_alt_formatting)
1110            vdigits_end = precision;
1111        break;
1112    case 'r':
1113        /* convert to exponential format at 1e16.  We used to convert
1114           at 1e17, but that gives odd-looking results for some values
1115           when a 16-digit 'shortest' repr is padded with bogus zeros.
1116           For example, repr(2e16+8) would give 20000000000000010.0;
1117           the true value is 20000000000000008.0. */
1118        if (decpt <= -4 || decpt > 16)
1119            use_exp = 1;
1120        break;
1121    default:
1122        PyErr_BadInternalCall();
1123        goto exit;
1124    }
1125
1126    /* if using an exponent, reset decimal point position to 1 and adjust
1127       exponent accordingly.*/
1128    if (use_exp) {
1129        exp = (int)decpt - 1;
1130        decpt = 1;
1131    }
1132    /* ensure vdigits_start < decpt <= vdigits_end, or vdigits_start <
1133       decpt < vdigits_end if add_dot_0_if_integer and no exponent */
1134    vdigits_start = decpt <= 0 ? decpt-1 : 0;
1135    if (!use_exp && add_dot_0_if_integer)
1136        vdigits_end = vdigits_end > decpt ? vdigits_end : decpt + 1;
1137    else
1138        vdigits_end = vdigits_end > decpt ? vdigits_end : decpt;
1139
1140    /* double check inequalities */
1141    assert(vdigits_start <= 0 &&
1142           0 <= digits_len &&
1143           digits_len <= vdigits_end);
1144    /* decimal point should be in (vdigits_start, vdigits_end] */
1145    assert(vdigits_start < decpt && decpt <= vdigits_end);
1146
1147    /* Compute an upper bound how much memory we need. This might be a few
1148       chars too long, but no big deal. */
1149    bufsize =
1150        /* sign, decimal point and trailing 0 byte */
1151        3 +
1152
1153        /* total digit count (including zero padding on both sides) */
1154        (vdigits_end - vdigits_start) +
1155
1156        /* exponent "e+100", max 3 numerical digits */
1157        (use_exp ? 5 : 0);
1158
1159    /* Now allocate the memory and initialize p to point to the start of
1160       it. */
1161    buf = (char *)PyMem_Malloc(bufsize);
1162    if (buf == NULL) {
1163        PyErr_NoMemory();
1164        goto exit;
1165    }
1166    p = buf;
1167
1168    /* Add a negative sign if negative, and a plus sign if non-negative
1169       and always_add_sign is true. */
1170    if (sign == 1)
1171        *p++ = '-';
1172    else if (always_add_sign)
1173        *p++ = '+';
1174
1175    /* note that exactly one of the three 'if' conditions is true,
1176       so we include exactly one decimal point */
1177    /* Zero padding on left of digit string */
1178    if (decpt <= 0) {
1179        memset(p, '0', decpt-vdigits_start);
1180        p += decpt - vdigits_start;
1181        *p++ = '.';
1182        memset(p, '0', 0-decpt);
1183        p += 0-decpt;
1184    }
1185    else {
1186        memset(p, '0', 0-vdigits_start);
1187        p += 0 - vdigits_start;
1188    }
1189
1190    /* Digits, with included decimal point */
1191    if (0 < decpt && decpt <= digits_len) {
1192        strncpy(p, digits, decpt-0);
1193        p += decpt-0;
1194        *p++ = '.';
1195        strncpy(p, digits+decpt, digits_len-decpt);
1196        p += digits_len-decpt;
1197    }
1198    else {
1199        strncpy(p, digits, digits_len);
1200        p += digits_len;
1201    }
1202
1203    /* And zeros on the right */
1204    if (digits_len < decpt) {
1205        memset(p, '0', decpt-digits_len);
1206        p += decpt-digits_len;
1207        *p++ = '.';
1208        memset(p, '0', vdigits_end-decpt);
1209        p += vdigits_end-decpt;
1210    }
1211    else {
1212        memset(p, '0', vdigits_end-digits_len);
1213        p += vdigits_end-digits_len;
1214    }
1215
1216    /* Delete a trailing decimal pt unless using alternative formatting. */
1217    if (p[-1] == '.' && !use_alt_formatting)
1218        p--;
1219
1220    /* Now that we've done zero padding, add an exponent if needed. */
1221    if (use_exp) {
1222        *p++ = float_strings[OFS_E][0];
1223        exp_len = sprintf(p, "%+.02d", exp);
1224        p += exp_len;
1225    }
1226  exit:
1227    if (buf) {
1228        *p = '\0';
1229        /* It's too late if this fails, as we've already stepped on
1230           memory that isn't ours. But it's an okay debugging test. */
1231        assert(p-buf < bufsize);
1232    }
1233    if (digits)
1234        _Py_dg_freedtoa(digits);
1235
1236    return buf;
1237}
1238
1239
1240PyAPI_FUNC(char *) PyOS_double_to_string(double val,
1241                                         char format_code,
1242                                         int precision,
1243                                         int flags,
1244                                         int *type)
1245{
1246    const char * const *float_strings = lc_float_strings;
1247    int mode;
1248
1249    /* Validate format_code, and map upper and lower case. Compute the
1250       mode and make any adjustments as needed. */
1251    switch (format_code) {
1252    /* exponent */
1253    case 'E':
1254        float_strings = uc_float_strings;
1255        format_code = 'e';
1256        /* Fall through. */
1257    case 'e':
1258        mode = 2;
1259        precision++;
1260        break;
1261
1262    /* fixed */
1263    case 'F':
1264        float_strings = uc_float_strings;
1265        format_code = 'f';
1266        /* Fall through. */
1267    case 'f':
1268        mode = 3;
1269        break;
1270
1271    /* general */
1272    case 'G':
1273        float_strings = uc_float_strings;
1274        format_code = 'g';
1275        /* Fall through. */
1276    case 'g':
1277        mode = 2;
1278        /* precision 0 makes no sense for 'g' format; interpret as 1 */
1279        if (precision == 0)
1280            precision = 1;
1281        break;
1282
1283    /* repr format */
1284    case 'r':
1285        mode = 0;
1286        /* Supplied precision is unused, must be 0. */
1287        if (precision != 0) {
1288            PyErr_BadInternalCall();
1289            return NULL;
1290        }
1291        break;
1292
1293    default:
1294        PyErr_BadInternalCall();
1295        return NULL;
1296    }
1297
1298    return format_float_short(val, format_code, mode, precision,
1299                              flags & Py_DTSF_SIGN,
1300                              flags & Py_DTSF_ADD_DOT_0,
1301                              flags & Py_DTSF_ALT,
1302                              float_strings, type);
1303}
1304#endif /* ifdef PY_NO_SHORT_FLOAT_REPR */
1305