1/*
2    string_format.h -- implementation of string.format().
3
4    It uses the Objects/stringlib conventions, so that it can be
5    compiled for both unicode and string objects.
6*/
7
8
9/* Defines for Python 2.6 compatibility */
10#if PY_VERSION_HEX < 0x03000000
11#define PyLong_FromSsize_t _PyLong_FromSsize_t
12#endif
13
14/* Defines for more efficiently reallocating the string buffer */
15#define INITIAL_SIZE_INCREMENT 100
16#define SIZE_MULTIPLIER 2
17#define MAX_SIZE_INCREMENT  3200
18
19
20/************************************************************************/
21/***********   Global data structures and forward declarations  *********/
22/************************************************************************/
23
24/*
25   A SubString consists of the characters between two string or
26   unicode pointers.
27*/
28typedef struct {
29    STRINGLIB_CHAR *ptr;
30    STRINGLIB_CHAR *end;
31} SubString;
32
33
34typedef enum {
35    ANS_INIT,
36    ANS_AUTO,
37    ANS_MANUAL
38} AutoNumberState;   /* Keep track if we're auto-numbering fields */
39
40/* Keeps track of our auto-numbering state, and which number field we're on */
41typedef struct {
42    AutoNumberState an_state;
43    int an_field_number;
44} AutoNumber;
45
46
47/* forward declaration for recursion */
48static PyObject *
49build_string(SubString *input, PyObject *args, PyObject *kwargs,
50             int recursion_depth, AutoNumber *auto_number);
51
52
53
54/************************************************************************/
55/**************************  Utility  functions  ************************/
56/************************************************************************/
57
58static void
59AutoNumber_Init(AutoNumber *auto_number)
60{
61    auto_number->an_state = ANS_INIT;
62    auto_number->an_field_number = 0;
63}
64
65/* fill in a SubString from a pointer and length */
66Py_LOCAL_INLINE(void)
67SubString_init(SubString *str, STRINGLIB_CHAR *p, Py_ssize_t len)
68{
69    str->ptr = p;
70    if (p == NULL)
71        str->end = NULL;
72    else
73        str->end = str->ptr + len;
74}
75
76/* return a new string.  if str->ptr is NULL, return None */
77Py_LOCAL_INLINE(PyObject *)
78SubString_new_object(SubString *str)
79{
80    if (str->ptr == NULL) {
81        Py_INCREF(Py_None);
82        return Py_None;
83    }
84    return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
85}
86
87/* return a new string.  if str->ptr is NULL, return None */
88Py_LOCAL_INLINE(PyObject *)
89SubString_new_object_or_empty(SubString *str)
90{
91    if (str->ptr == NULL) {
92        return STRINGLIB_NEW(NULL, 0);
93    }
94    return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
95}
96
97/* Return 1 if an error has been detected switching between automatic
98   field numbering and manual field specification, else return 0. Set
99   ValueError on error. */
100static int
101autonumber_state_error(AutoNumberState state, int field_name_is_empty)
102{
103    if (state == ANS_MANUAL) {
104        if (field_name_is_empty) {
105            PyErr_SetString(PyExc_ValueError, "cannot switch from "
106                            "manual field specification to "
107                            "automatic field numbering");
108            return 1;
109        }
110    }
111    else {
112        if (!field_name_is_empty) {
113            PyErr_SetString(PyExc_ValueError, "cannot switch from "
114                            "automatic field numbering to "
115                            "manual field specification");
116            return 1;
117        }
118    }
119    return 0;
120}
121
122
123/************************************************************************/
124/***********    Output string management functions       ****************/
125/************************************************************************/
126
127typedef struct {
128    STRINGLIB_CHAR *ptr;
129    STRINGLIB_CHAR *end;
130    PyObject *obj;
131    Py_ssize_t size_increment;
132} OutputString;
133
134/* initialize an OutputString object, reserving size characters */
135static int
136output_initialize(OutputString *output, Py_ssize_t size)
137{
138    output->obj = STRINGLIB_NEW(NULL, size);
139    if (output->obj == NULL)
140        return 0;
141
142    output->ptr = STRINGLIB_STR(output->obj);
143    output->end = STRINGLIB_LEN(output->obj) + output->ptr;
144    output->size_increment = INITIAL_SIZE_INCREMENT;
145
146    return 1;
147}
148
149/*
150    output_extend reallocates the output string buffer.
151    It returns a status:  0 for a failed reallocation,
152    1 for success.
153*/
154
155static int
156output_extend(OutputString *output, Py_ssize_t count)
157{
158    STRINGLIB_CHAR *startptr = STRINGLIB_STR(output->obj);
159    Py_ssize_t curlen = output->ptr - startptr;
160    Py_ssize_t maxlen = curlen + count + output->size_increment;
161
162    if (STRINGLIB_RESIZE(&output->obj, maxlen) < 0)
163        return 0;
164    startptr = STRINGLIB_STR(output->obj);
165    output->ptr = startptr + curlen;
166    output->end = startptr + maxlen;
167    if (output->size_increment < MAX_SIZE_INCREMENT)
168        output->size_increment *= SIZE_MULTIPLIER;
169    return 1;
170}
171
172/*
173    output_data dumps characters into our output string
174    buffer.
175
176    In some cases, it has to reallocate the string.
177
178    It returns a status:  0 for a failed reallocation,
179    1 for success.
180*/
181static int
182output_data(OutputString *output, const STRINGLIB_CHAR *s, Py_ssize_t count)
183{
184    if ((count > output->end - output->ptr) && !output_extend(output, count))
185        return 0;
186    memcpy(output->ptr, s, count * sizeof(STRINGLIB_CHAR));
187    output->ptr += count;
188    return 1;
189}
190
191/************************************************************************/
192/***********  Format string parsing -- integers and identifiers *********/
193/************************************************************************/
194
195static Py_ssize_t
196get_integer(const SubString *str)
197{
198    Py_ssize_t accumulator = 0;
199    Py_ssize_t digitval;
200    STRINGLIB_CHAR *p;
201
202    /* empty string is an error */
203    if (str->ptr >= str->end)
204        return -1;
205
206    for (p = str->ptr; p < str->end; p++) {
207        digitval = STRINGLIB_TODECIMAL(*p);
208        if (digitval < 0)
209            return -1;
210        /*
211           Detect possible overflow before it happens:
212
213              accumulator * 10 + digitval > PY_SSIZE_T_MAX if and only if
214              accumulator > (PY_SSIZE_T_MAX - digitval) / 10.
215        */
216        if (accumulator > (PY_SSIZE_T_MAX - digitval) / 10) {
217            PyErr_Format(PyExc_ValueError,
218                         "Too many decimal digits in format string");
219            return -1;
220        }
221        accumulator = accumulator * 10 + digitval;
222    }
223    return accumulator;
224}
225
226/************************************************************************/
227/******** Functions to get field objects and specification strings ******/
228/************************************************************************/
229
230/* do the equivalent of obj.name */
231static PyObject *
232getattr(PyObject *obj, SubString *name)
233{
234    PyObject *newobj;
235    PyObject *str = SubString_new_object(name);
236    if (str == NULL)
237        return NULL;
238    newobj = PyObject_GetAttr(obj, str);
239    Py_DECREF(str);
240    return newobj;
241}
242
243/* do the equivalent of obj[idx], where obj is a sequence */
244static PyObject *
245getitem_sequence(PyObject *obj, Py_ssize_t idx)
246{
247    return PySequence_GetItem(obj, idx);
248}
249
250/* do the equivalent of obj[idx], where obj is not a sequence */
251static PyObject *
252getitem_idx(PyObject *obj, Py_ssize_t idx)
253{
254    PyObject *newobj;
255    PyObject *idx_obj = PyLong_FromSsize_t(idx);
256    if (idx_obj == NULL)
257        return NULL;
258    newobj = PyObject_GetItem(obj, idx_obj);
259    Py_DECREF(idx_obj);
260    return newobj;
261}
262
263/* do the equivalent of obj[name] */
264static PyObject *
265getitem_str(PyObject *obj, SubString *name)
266{
267    PyObject *newobj;
268    PyObject *str = SubString_new_object(name);
269    if (str == NULL)
270        return NULL;
271    newobj = PyObject_GetItem(obj, str);
272    Py_DECREF(str);
273    return newobj;
274}
275
276typedef struct {
277    /* the entire string we're parsing.  we assume that someone else
278       is managing its lifetime, and that it will exist for the
279       lifetime of the iterator.  can be empty */
280    SubString str;
281
282    /* pointer to where we are inside field_name */
283    STRINGLIB_CHAR *ptr;
284} FieldNameIterator;
285
286
287static int
288FieldNameIterator_init(FieldNameIterator *self, STRINGLIB_CHAR *ptr,
289                       Py_ssize_t len)
290{
291    SubString_init(&self->str, ptr, len);
292    self->ptr = self->str.ptr;
293    return 1;
294}
295
296static int
297_FieldNameIterator_attr(FieldNameIterator *self, SubString *name)
298{
299    STRINGLIB_CHAR c;
300
301    name->ptr = self->ptr;
302
303    /* return everything until '.' or '[' */
304    while (self->ptr < self->str.end) {
305        switch (c = *self->ptr++) {
306        case '[':
307        case '.':
308            /* backup so that we this character will be seen next time */
309            self->ptr--;
310            break;
311        default:
312            continue;
313        }
314        break;
315    }
316    /* end of string is okay */
317    name->end = self->ptr;
318    return 1;
319}
320
321static int
322_FieldNameIterator_item(FieldNameIterator *self, SubString *name)
323{
324    int bracket_seen = 0;
325    STRINGLIB_CHAR c;
326
327    name->ptr = self->ptr;
328
329    /* return everything until ']' */
330    while (self->ptr < self->str.end) {
331        switch (c = *self->ptr++) {
332        case ']':
333            bracket_seen = 1;
334            break;
335        default:
336            continue;
337        }
338        break;
339    }
340    /* make sure we ended with a ']' */
341    if (!bracket_seen) {
342        PyErr_SetString(PyExc_ValueError, "Missing ']' in format string");
343        return 0;
344    }
345
346    /* end of string is okay */
347    /* don't include the ']' */
348    name->end = self->ptr-1;
349    return 1;
350}
351
352/* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */
353static int
354FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
355                       Py_ssize_t *name_idx, SubString *name)
356{
357    /* check at end of input */
358    if (self->ptr >= self->str.end)
359        return 1;
360
361    switch (*self->ptr++) {
362    case '.':
363        *is_attribute = 1;
364        if (_FieldNameIterator_attr(self, name) == 0)
365            return 0;
366        *name_idx = -1;
367        break;
368    case '[':
369        *is_attribute = 0;
370        if (_FieldNameIterator_item(self, name) == 0)
371            return 0;
372        *name_idx = get_integer(name);
373        if (*name_idx == -1 && PyErr_Occurred())
374            return 0;
375        break;
376    default:
377        /* Invalid character follows ']' */
378        PyErr_SetString(PyExc_ValueError, "Only '.' or '[' may "
379                        "follow ']' in format field specifier");
380        return 0;
381    }
382
383    /* empty string is an error */
384    if (name->ptr == name->end) {
385        PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");
386        return 0;
387    }
388
389    return 2;
390}
391
392
393/* input: field_name
394   output: 'first' points to the part before the first '[' or '.'
395           'first_idx' is -1 if 'first' is not an integer, otherwise
396                       it's the value of first converted to an integer
397           'rest' is an iterator to return the rest
398*/
399static int
400field_name_split(STRINGLIB_CHAR *ptr, Py_ssize_t len, SubString *first,
401                 Py_ssize_t *first_idx, FieldNameIterator *rest,
402                 AutoNumber *auto_number)
403{
404    STRINGLIB_CHAR c;
405    STRINGLIB_CHAR *p = ptr;
406    STRINGLIB_CHAR *end = ptr + len;
407    int field_name_is_empty;
408    int using_numeric_index;
409
410    /* find the part up until the first '.' or '[' */
411    while (p < end) {
412        switch (c = *p++) {
413        case '[':
414        case '.':
415            /* backup so that we this character is available to the
416               "rest" iterator */
417            p--;
418            break;
419        default:
420            continue;
421        }
422        break;
423    }
424
425    /* set up the return values */
426    SubString_init(first, ptr, p - ptr);
427    FieldNameIterator_init(rest, p, end - p);
428
429    /* see if "first" is an integer, in which case it's used as an index */
430    *first_idx = get_integer(first);
431    if (*first_idx == -1 && PyErr_Occurred())
432        return 0;
433
434    field_name_is_empty = first->ptr >= first->end;
435
436    /* If the field name is omitted or if we have a numeric index
437       specified, then we're doing numeric indexing into args. */
438    using_numeric_index = field_name_is_empty || *first_idx != -1;
439
440    /* We always get here exactly one time for each field we're
441       processing. And we get here in field order (counting by left
442       braces). So this is the perfect place to handle automatic field
443       numbering if the field name is omitted. */
444
445    /* Check if we need to do the auto-numbering. It's not needed if
446       we're called from string.Format routines, because it's handled
447       in that class by itself. */
448    if (auto_number) {
449        /* Initialize our auto numbering state if this is the first
450           time we're either auto-numbering or manually numbering. */
451        if (auto_number->an_state == ANS_INIT && using_numeric_index)
452            auto_number->an_state = field_name_is_empty ?
453                ANS_AUTO : ANS_MANUAL;
454
455        /* Make sure our state is consistent with what we're doing
456           this time through. Only check if we're using a numeric
457           index. */
458        if (using_numeric_index)
459            if (autonumber_state_error(auto_number->an_state,
460                                       field_name_is_empty))
461                return 0;
462        /* Zero length field means we want to do auto-numbering of the
463           fields. */
464        if (field_name_is_empty)
465            *first_idx = (auto_number->an_field_number)++;
466    }
467
468    return 1;
469}
470
471
472/*
473    get_field_object returns the object inside {}, before the
474    format_spec.  It handles getindex and getattr lookups and consumes
475    the entire input string.
476*/
477static PyObject *
478get_field_object(SubString *input, PyObject *args, PyObject *kwargs,
479                 AutoNumber *auto_number)
480{
481    PyObject *obj = NULL;
482    int ok;
483    int is_attribute;
484    SubString name;
485    SubString first;
486    Py_ssize_t index;
487    FieldNameIterator rest;
488
489    if (!field_name_split(input->ptr, input->end - input->ptr, &first,
490                          &index, &rest, auto_number)) {
491        goto error;
492    }
493
494    if (index == -1) {
495        /* look up in kwargs */
496        PyObject *key = SubString_new_object(&first);
497        if (key == NULL)
498            goto error;
499        if ((kwargs == NULL) || (obj = PyDict_GetItem(kwargs, key)) == NULL) {
500            PyErr_SetObject(PyExc_KeyError, key);
501            Py_DECREF(key);
502            goto error;
503        }
504        Py_DECREF(key);
505        Py_INCREF(obj);
506    }
507    else {
508        /* look up in args */
509        obj = PySequence_GetItem(args, index);
510        if (obj == NULL)
511            goto error;
512    }
513
514    /* iterate over the rest of the field_name */
515    while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index,
516                                        &name)) == 2) {
517        PyObject *tmp;
518
519        if (is_attribute)
520            /* getattr lookup "." */
521            tmp = getattr(obj, &name);
522        else
523            /* getitem lookup "[]" */
524            if (index == -1)
525                tmp = getitem_str(obj, &name);
526            else
527                if (PySequence_Check(obj))
528                    tmp = getitem_sequence(obj, index);
529                else
530                    /* not a sequence */
531                    tmp = getitem_idx(obj, index);
532        if (tmp == NULL)
533            goto error;
534
535        /* assign to obj */
536        Py_DECREF(obj);
537        obj = tmp;
538    }
539    /* end of iterator, this is the non-error case */
540    if (ok == 1)
541        return obj;
542error:
543    Py_XDECREF(obj);
544    return NULL;
545}
546
547/************************************************************************/
548/*****************  Field rendering functions  **************************/
549/************************************************************************/
550
551/*
552    render_field() is the main function in this section.  It takes the
553    field object and field specification string generated by
554    get_field_and_spec, and renders the field into the output string.
555
556    render_field calls fieldobj.__format__(format_spec) method, and
557    appends to the output.
558*/
559static int
560render_field(PyObject *fieldobj, SubString *format_spec, OutputString *output)
561{
562    int ok = 0;
563    PyObject *result = NULL;
564    PyObject *format_spec_object = NULL;
565    PyObject *(*formatter)(PyObject *, STRINGLIB_CHAR *, Py_ssize_t) = NULL;
566    STRINGLIB_CHAR* format_spec_start = format_spec->ptr ?
567            format_spec->ptr : NULL;
568    Py_ssize_t format_spec_len = format_spec->ptr ?
569            format_spec->end - format_spec->ptr : 0;
570
571    /* If we know the type exactly, skip the lookup of __format__ and just
572       call the formatter directly. */
573#if STRINGLIB_IS_UNICODE
574    if (PyUnicode_CheckExact(fieldobj))
575        formatter = _PyUnicode_FormatAdvanced;
576    /* Unfortunately, there's a problem with checking for int, long,
577       and float here.  If we're being included as unicode, their
578       formatters expect string format_spec args.  For now, just skip
579       this optimization for unicode.  This could be fixed, but it's a
580       hassle. */
581#else
582    if (PyString_CheckExact(fieldobj))
583        formatter = _PyBytes_FormatAdvanced;
584    else if (PyInt_CheckExact(fieldobj))
585        formatter =_PyInt_FormatAdvanced;
586    else if (PyLong_CheckExact(fieldobj))
587        formatter =_PyLong_FormatAdvanced;
588    else if (PyFloat_CheckExact(fieldobj))
589        formatter = _PyFloat_FormatAdvanced;
590#endif
591
592    if (formatter) {
593        /* we know exactly which formatter will be called when __format__ is
594           looked up, so call it directly, instead. */
595        result = formatter(fieldobj, format_spec_start, format_spec_len);
596    }
597    else {
598        /* We need to create an object out of the pointers we have, because
599           __format__ takes a string/unicode object for format_spec. */
600        format_spec_object = STRINGLIB_NEW(format_spec_start,
601                                           format_spec_len);
602        if (format_spec_object == NULL)
603            goto done;
604
605        result = PyObject_Format(fieldobj, format_spec_object);
606    }
607    if (result == NULL)
608        goto done;
609
610#if PY_VERSION_HEX >= 0x03000000
611    assert(PyUnicode_Check(result));
612#else
613    assert(PyString_Check(result) || PyUnicode_Check(result));
614
615    /* Convert result to our type.  We could be str, and result could
616       be unicode */
617    {
618        PyObject *tmp = STRINGLIB_TOSTR(result);
619        if (tmp == NULL)
620            goto done;
621        Py_DECREF(result);
622        result = tmp;
623    }
624#endif
625
626    ok = output_data(output,
627                     STRINGLIB_STR(result), STRINGLIB_LEN(result));
628done:
629    Py_XDECREF(format_spec_object);
630    Py_XDECREF(result);
631    return ok;
632}
633
634static int
635parse_field(SubString *str, SubString *field_name, SubString *format_spec,
636            STRINGLIB_CHAR *conversion)
637{
638    /* Note this function works if the field name is zero length,
639       which is good.  Zero length field names are handled later, in
640       field_name_split. */
641
642    STRINGLIB_CHAR c = 0;
643
644    /* initialize these, as they may be empty */
645    *conversion = '\0';
646    SubString_init(format_spec, NULL, 0);
647
648    /* Search for the field name.  it's terminated by the end of
649       the string, or a ':' or '!' */
650    field_name->ptr = str->ptr;
651    while (str->ptr < str->end) {
652        switch (c = *(str->ptr++)) {
653        case ':':
654        case '!':
655            break;
656        default:
657            continue;
658        }
659        break;
660    }
661
662    if (c == '!' || c == ':') {
663        /* we have a format specifier and/or a conversion */
664        /* don't include the last character */
665        field_name->end = str->ptr-1;
666
667        /* the format specifier is the rest of the string */
668        format_spec->ptr = str->ptr;
669        format_spec->end = str->end;
670
671        /* see if there's a conversion specifier */
672        if (c == '!') {
673            /* there must be another character present */
674            if (format_spec->ptr >= format_spec->end) {
675                PyErr_SetString(PyExc_ValueError,
676                                "end of format while looking for conversion "
677                                "specifier");
678                return 0;
679            }
680            *conversion = *(format_spec->ptr++);
681
682            /* if there is another character, it must be a colon */
683            if (format_spec->ptr < format_spec->end) {
684                c = *(format_spec->ptr++);
685                if (c != ':') {
686                    PyErr_SetString(PyExc_ValueError,
687                                    "expected ':' after format specifier");
688                    return 0;
689                }
690            }
691        }
692    }
693    else
694        /* end of string, there's no format_spec or conversion */
695        field_name->end = str->ptr;
696
697    return 1;
698}
699
700/************************************************************************/
701/******* Output string allocation and escape-to-markup processing  ******/
702/************************************************************************/
703
704/* MarkupIterator breaks the string into pieces of either literal
705   text, or things inside {} that need to be marked up.  it is
706   designed to make it easy to wrap a Python iterator around it, for
707   use with the Formatter class */
708
709typedef struct {
710    SubString str;
711} MarkupIterator;
712
713static int
714MarkupIterator_init(MarkupIterator *self, STRINGLIB_CHAR *ptr, Py_ssize_t len)
715{
716    SubString_init(&self->str, ptr, len);
717    return 1;
718}
719
720/* returns 0 on error, 1 on non-error termination, and 2 if it got a
721   string (or something to be expanded) */
722static int
723MarkupIterator_next(MarkupIterator *self, SubString *literal,
724                    int *field_present, SubString *field_name,
725                    SubString *format_spec, STRINGLIB_CHAR *conversion,
726                    int *format_spec_needs_expanding)
727{
728    int at_end;
729    STRINGLIB_CHAR c = 0;
730    STRINGLIB_CHAR *start;
731    int count;
732    Py_ssize_t len;
733    int markup_follows = 0;
734
735    /* initialize all of the output variables */
736    SubString_init(literal, NULL, 0);
737    SubString_init(field_name, NULL, 0);
738    SubString_init(format_spec, NULL, 0);
739    *conversion = '\0';
740    *format_spec_needs_expanding = 0;
741    *field_present = 0;
742
743    /* No more input, end of iterator.  This is the normal exit
744       path. */
745    if (self->str.ptr >= self->str.end)
746        return 1;
747
748    start = self->str.ptr;
749
750    /* First read any literal text. Read until the end of string, an
751       escaped '{' or '}', or an unescaped '{'.  In order to never
752       allocate memory and so I can just pass pointers around, if
753       there's an escaped '{' or '}' then we'll return the literal
754       including the brace, but no format object.  The next time
755       through, we'll return the rest of the literal, skipping past
756       the second consecutive brace. */
757    while (self->str.ptr < self->str.end) {
758        switch (c = *(self->str.ptr++)) {
759        case '{':
760        case '}':
761            markup_follows = 1;
762            break;
763        default:
764            continue;
765        }
766        break;
767    }
768
769    at_end = self->str.ptr >= self->str.end;
770    len = self->str.ptr - start;
771
772    if ((c == '}') && (at_end || (c != *self->str.ptr))) {
773        PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
774                        "in format string");
775        return 0;
776    }
777    if (at_end && c == '{') {
778        PyErr_SetString(PyExc_ValueError, "Single '{' encountered "
779                        "in format string");
780        return 0;
781    }
782    if (!at_end) {
783        if (c == *self->str.ptr) {
784            /* escaped } or {, skip it in the input.  there is no
785               markup object following us, just this literal text */
786            self->str.ptr++;
787            markup_follows = 0;
788        }
789        else
790            len--;
791    }
792
793    /* record the literal text */
794    literal->ptr = start;
795    literal->end = start + len;
796
797    if (!markup_follows)
798        return 2;
799
800    /* this is markup, find the end of the string by counting nested
801       braces.  note that this prohibits escaped braces, so that
802       format_specs cannot have braces in them. */
803    *field_present = 1;
804    count = 1;
805
806    start = self->str.ptr;
807
808    /* we know we can't have a zero length string, so don't worry
809       about that case */
810    while (self->str.ptr < self->str.end) {
811        switch (c = *(self->str.ptr++)) {
812        case '{':
813            /* the format spec needs to be recursively expanded.
814               this is an optimization, and not strictly needed */
815            *format_spec_needs_expanding = 1;
816            count++;
817            break;
818        case '}':
819            count--;
820            if (count <= 0) {
821                /* we're done.  parse and get out */
822                SubString s;
823
824                SubString_init(&s, start, self->str.ptr - 1 - start);
825                if (parse_field(&s, field_name, format_spec, conversion) == 0)
826                    return 0;
827
828                /* success */
829                return 2;
830            }
831            break;
832        }
833    }
834
835    /* end of string while searching for matching '}' */
836    PyErr_SetString(PyExc_ValueError, "unmatched '{' in format");
837    return 0;
838}
839
840
841/* do the !r or !s conversion on obj */
842static PyObject *
843do_conversion(PyObject *obj, STRINGLIB_CHAR conversion)
844{
845    /* XXX in pre-3.0, do we need to convert this to unicode, since it
846       might have returned a string? */
847    switch (conversion) {
848    case 'r':
849        return PyObject_Repr(obj);
850    case 's':
851        return STRINGLIB_TOSTR(obj);
852    default:
853        if (conversion > 32 && conversion < 127) {
854                /* It's the ASCII subrange; casting to char is safe
855                   (assuming the execution character set is an ASCII
856                   superset). */
857                PyErr_Format(PyExc_ValueError,
858                     "Unknown conversion specifier %c",
859                     (char)conversion);
860        } else
861                PyErr_Format(PyExc_ValueError,
862                     "Unknown conversion specifier \\x%x",
863                     (unsigned int)conversion);
864        return NULL;
865    }
866}
867
868/* given:
869
870   {field_name!conversion:format_spec}
871
872   compute the result and write it to output.
873   format_spec_needs_expanding is an optimization.  if it's false,
874   just output the string directly, otherwise recursively expand the
875   format_spec string.
876
877   field_name is allowed to be zero length, in which case we
878   are doing auto field numbering.
879*/
880
881static int
882output_markup(SubString *field_name, SubString *format_spec,
883              int format_spec_needs_expanding, STRINGLIB_CHAR conversion,
884              OutputString *output, PyObject *args, PyObject *kwargs,
885              int recursion_depth, AutoNumber *auto_number)
886{
887    PyObject *tmp = NULL;
888    PyObject *fieldobj = NULL;
889    SubString expanded_format_spec;
890    SubString *actual_format_spec;
891    int result = 0;
892
893    /* convert field_name to an object */
894    fieldobj = get_field_object(field_name, args, kwargs, auto_number);
895    if (fieldobj == NULL)
896        goto done;
897
898    if (conversion != '\0') {
899        tmp = do_conversion(fieldobj, conversion);
900        if (tmp == NULL)
901            goto done;
902
903        /* do the assignment, transferring ownership: fieldobj = tmp */
904        Py_DECREF(fieldobj);
905        fieldobj = tmp;
906        tmp = NULL;
907    }
908
909    /* if needed, recurively compute the format_spec */
910    if (format_spec_needs_expanding) {
911        tmp = build_string(format_spec, args, kwargs, recursion_depth-1,
912                           auto_number);
913        if (tmp == NULL)
914            goto done;
915
916        /* note that in the case we're expanding the format string,
917           tmp must be kept around until after the call to
918           render_field. */
919        SubString_init(&expanded_format_spec,
920                       STRINGLIB_STR(tmp), STRINGLIB_LEN(tmp));
921        actual_format_spec = &expanded_format_spec;
922    }
923    else
924        actual_format_spec = format_spec;
925
926    if (render_field(fieldobj, actual_format_spec, output) == 0)
927        goto done;
928
929    result = 1;
930
931done:
932    Py_XDECREF(fieldobj);
933    Py_XDECREF(tmp);
934
935    return result;
936}
937
938/*
939    do_markup is the top-level loop for the format() method.  It
940    searches through the format string for escapes to markup codes, and
941    calls other functions to move non-markup text to the output,
942    and to perform the markup to the output.
943*/
944static int
945do_markup(SubString *input, PyObject *args, PyObject *kwargs,
946          OutputString *output, int recursion_depth, AutoNumber *auto_number)
947{
948    MarkupIterator iter;
949    int format_spec_needs_expanding;
950    int result;
951    int field_present;
952    SubString literal;
953    SubString field_name;
954    SubString format_spec;
955    STRINGLIB_CHAR conversion;
956
957    MarkupIterator_init(&iter, input->ptr, input->end - input->ptr);
958    while ((result = MarkupIterator_next(&iter, &literal, &field_present,
959                                         &field_name, &format_spec,
960                                         &conversion,
961                                         &format_spec_needs_expanding)) == 2) {
962        if (!output_data(output, literal.ptr, literal.end - literal.ptr))
963            return 0;
964        if (field_present)
965            if (!output_markup(&field_name, &format_spec,
966                               format_spec_needs_expanding, conversion, output,
967                               args, kwargs, recursion_depth, auto_number))
968                return 0;
969    }
970    return result;
971}
972
973
974/*
975    build_string allocates the output string and then
976    calls do_markup to do the heavy lifting.
977*/
978static PyObject *
979build_string(SubString *input, PyObject *args, PyObject *kwargs,
980             int recursion_depth, AutoNumber *auto_number)
981{
982    OutputString output;
983    PyObject *result = NULL;
984    Py_ssize_t count;
985
986    output.obj = NULL; /* needed so cleanup code always works */
987
988    /* check the recursion level */
989    if (recursion_depth <= 0) {
990        PyErr_SetString(PyExc_ValueError,
991                        "Max string recursion exceeded");
992        goto done;
993    }
994
995    /* initial size is the length of the format string, plus the size
996       increment.  seems like a reasonable default */
997    if (!output_initialize(&output,
998                           input->end - input->ptr +
999                           INITIAL_SIZE_INCREMENT))
1000        goto done;
1001
1002    if (!do_markup(input, args, kwargs, &output, recursion_depth,
1003                   auto_number)) {
1004        goto done;
1005    }
1006
1007    count = output.ptr - STRINGLIB_STR(output.obj);
1008    if (STRINGLIB_RESIZE(&output.obj, count) < 0) {
1009        goto done;
1010    }
1011
1012    /* transfer ownership to result */
1013    result = output.obj;
1014    output.obj = NULL;
1015
1016done:
1017    Py_XDECREF(output.obj);
1018    return result;
1019}
1020
1021/************************************************************************/
1022/*********** main routine ***********************************************/
1023/************************************************************************/
1024
1025/* this is the main entry point */
1026static PyObject *
1027do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
1028{
1029    SubString input;
1030
1031    /* PEP 3101 says only 2 levels, so that
1032       "{0:{1}}".format('abc', 's')            # works
1033       "{0:{1:{2}}}".format('abc', 's', '')    # fails
1034    */
1035    int recursion_depth = 2;
1036
1037    AutoNumber auto_number;
1038
1039    AutoNumber_Init(&auto_number);
1040    SubString_init(&input, STRINGLIB_STR(self), STRINGLIB_LEN(self));
1041    return build_string(&input, args, kwargs, recursion_depth, &auto_number);
1042}
1043
1044
1045
1046/************************************************************************/
1047/*********** formatteriterator ******************************************/
1048/************************************************************************/
1049
1050/* This is used to implement string.Formatter.vparse().  It exists so
1051   Formatter can share code with the built in unicode.format() method.
1052   It's really just a wrapper around MarkupIterator that is callable
1053   from Python. */
1054
1055typedef struct {
1056    PyObject_HEAD
1057
1058    STRINGLIB_OBJECT *str;
1059
1060    MarkupIterator it_markup;
1061} formatteriterobject;
1062
1063static void
1064formatteriter_dealloc(formatteriterobject *it)
1065{
1066    Py_XDECREF(it->str);
1067    PyObject_FREE(it);
1068}
1069
1070/* returns a tuple:
1071   (literal, field_name, format_spec, conversion)
1072
1073   literal is any literal text to output.  might be zero length
1074   field_name is the string before the ':'.  might be None
1075   format_spec is the string after the ':'.  mibht be None
1076   conversion is either None, or the string after the '!'
1077*/
1078static PyObject *
1079formatteriter_next(formatteriterobject *it)
1080{
1081    SubString literal;
1082    SubString field_name;
1083    SubString format_spec;
1084    STRINGLIB_CHAR conversion;
1085    int format_spec_needs_expanding;
1086    int field_present;
1087    int result = MarkupIterator_next(&it->it_markup, &literal, &field_present,
1088                                     &field_name, &format_spec, &conversion,
1089                                     &format_spec_needs_expanding);
1090
1091    /* all of the SubString objects point into it->str, so no
1092       memory management needs to be done on them */
1093    assert(0 <= result && result <= 2);
1094    if (result == 0 || result == 1)
1095        /* if 0, error has already been set, if 1, iterator is empty */
1096        return NULL;
1097    else {
1098        PyObject *literal_str = NULL;
1099        PyObject *field_name_str = NULL;
1100        PyObject *format_spec_str = NULL;
1101        PyObject *conversion_str = NULL;
1102        PyObject *tuple = NULL;
1103
1104        literal_str = SubString_new_object(&literal);
1105        if (literal_str == NULL)
1106            goto done;
1107
1108        field_name_str = SubString_new_object(&field_name);
1109        if (field_name_str == NULL)
1110            goto done;
1111
1112        /* if field_name is non-zero length, return a string for
1113           format_spec (even if zero length), else return None */
1114        format_spec_str = (field_present ?
1115                           SubString_new_object_or_empty :
1116                           SubString_new_object)(&format_spec);
1117        if (format_spec_str == NULL)
1118            goto done;
1119
1120        /* if the conversion is not specified, return a None,
1121           otherwise create a one length string with the conversion
1122           character */
1123        if (conversion == '\0') {
1124            conversion_str = Py_None;
1125            Py_INCREF(conversion_str);
1126        }
1127        else
1128            conversion_str = STRINGLIB_NEW(&conversion, 1);
1129        if (conversion_str == NULL)
1130            goto done;
1131
1132        tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str,
1133                             conversion_str);
1134    done:
1135        Py_XDECREF(literal_str);
1136        Py_XDECREF(field_name_str);
1137        Py_XDECREF(format_spec_str);
1138        Py_XDECREF(conversion_str);
1139        return tuple;
1140    }
1141}
1142
1143static PyMethodDef formatteriter_methods[] = {
1144    {NULL,              NULL}           /* sentinel */
1145};
1146
1147static PyTypeObject PyFormatterIter_Type = {
1148    PyVarObject_HEAD_INIT(&PyType_Type, 0)
1149    "formatteriterator",                /* tp_name */
1150    sizeof(formatteriterobject),        /* tp_basicsize */
1151    0,                                  /* tp_itemsize */
1152    /* methods */
1153    (destructor)formatteriter_dealloc,  /* tp_dealloc */
1154    0,                                  /* tp_print */
1155    0,                                  /* tp_getattr */
1156    0,                                  /* tp_setattr */
1157    0,                                  /* tp_compare */
1158    0,                                  /* tp_repr */
1159    0,                                  /* tp_as_number */
1160    0,                                  /* tp_as_sequence */
1161    0,                                  /* tp_as_mapping */
1162    0,                                  /* tp_hash */
1163    0,                                  /* tp_call */
1164    0,                                  /* tp_str */
1165    PyObject_GenericGetAttr,            /* tp_getattro */
1166    0,                                  /* tp_setattro */
1167    0,                                  /* tp_as_buffer */
1168    Py_TPFLAGS_DEFAULT,                 /* tp_flags */
1169    0,                                  /* tp_doc */
1170    0,                                  /* tp_traverse */
1171    0,                                  /* tp_clear */
1172    0,                                  /* tp_richcompare */
1173    0,                                  /* tp_weaklistoffset */
1174    PyObject_SelfIter,                  /* tp_iter */
1175    (iternextfunc)formatteriter_next,   /* tp_iternext */
1176    formatteriter_methods,              /* tp_methods */
1177    0,
1178};
1179
1180/* unicode_formatter_parser is used to implement
1181   string.Formatter.vformat.  it parses a string and returns tuples
1182   describing the parsed elements.  It's a wrapper around
1183   stringlib/string_format.h's MarkupIterator */
1184static PyObject *
1185formatter_parser(STRINGLIB_OBJECT *self)
1186{
1187    formatteriterobject *it;
1188
1189    it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
1190    if (it == NULL)
1191        return NULL;
1192
1193    /* take ownership, give the object to the iterator */
1194    Py_INCREF(self);
1195    it->str = self;
1196
1197    /* initialize the contained MarkupIterator */
1198    MarkupIterator_init(&it->it_markup,
1199                        STRINGLIB_STR(self),
1200                        STRINGLIB_LEN(self));
1201
1202    return (PyObject *)it;
1203}
1204
1205
1206/************************************************************************/
1207/*********** fieldnameiterator ******************************************/
1208/************************************************************************/
1209
1210
1211/* This is used to implement string.Formatter.vparse().  It parses the
1212   field name into attribute and item values.  It's a Python-callable
1213   wrapper around FieldNameIterator */
1214
1215typedef struct {
1216    PyObject_HEAD
1217
1218    STRINGLIB_OBJECT *str;
1219
1220    FieldNameIterator it_field;
1221} fieldnameiterobject;
1222
1223static void
1224fieldnameiter_dealloc(fieldnameiterobject *it)
1225{
1226    Py_XDECREF(it->str);
1227    PyObject_FREE(it);
1228}
1229
1230/* returns a tuple:
1231   (is_attr, value)
1232   is_attr is true if we used attribute syntax (e.g., '.foo')
1233              false if we used index syntax (e.g., '[foo]')
1234   value is an integer or string
1235*/
1236static PyObject *
1237fieldnameiter_next(fieldnameiterobject *it)
1238{
1239    int result;
1240    int is_attr;
1241    Py_ssize_t idx;
1242    SubString name;
1243
1244    result = FieldNameIterator_next(&it->it_field, &is_attr,
1245                                    &idx, &name);
1246    if (result == 0 || result == 1)
1247        /* if 0, error has already been set, if 1, iterator is empty */
1248        return NULL;
1249    else {
1250        PyObject* result = NULL;
1251        PyObject* is_attr_obj = NULL;
1252        PyObject* obj = NULL;
1253
1254        is_attr_obj = PyBool_FromLong(is_attr);
1255        if (is_attr_obj == NULL)
1256            goto done;
1257
1258        /* either an integer or a string */
1259        if (idx != -1)
1260            obj = PyLong_FromSsize_t(idx);
1261        else
1262            obj = SubString_new_object(&name);
1263        if (obj == NULL)
1264            goto done;
1265
1266        /* return a tuple of values */
1267        result = PyTuple_Pack(2, is_attr_obj, obj);
1268
1269    done:
1270        Py_XDECREF(is_attr_obj);
1271        Py_XDECREF(obj);
1272        return result;
1273    }
1274}
1275
1276static PyMethodDef fieldnameiter_methods[] = {
1277    {NULL,              NULL}           /* sentinel */
1278};
1279
1280static PyTypeObject PyFieldNameIter_Type = {
1281    PyVarObject_HEAD_INIT(&PyType_Type, 0)
1282    "fieldnameiterator",                /* tp_name */
1283    sizeof(fieldnameiterobject),        /* tp_basicsize */
1284    0,                                  /* tp_itemsize */
1285    /* methods */
1286    (destructor)fieldnameiter_dealloc,  /* tp_dealloc */
1287    0,                                  /* tp_print */
1288    0,                                  /* tp_getattr */
1289    0,                                  /* tp_setattr */
1290    0,                                  /* tp_compare */
1291    0,                                  /* tp_repr */
1292    0,                                  /* tp_as_number */
1293    0,                                  /* tp_as_sequence */
1294    0,                                  /* tp_as_mapping */
1295    0,                                  /* tp_hash */
1296    0,                                  /* tp_call */
1297    0,                                  /* tp_str */
1298    PyObject_GenericGetAttr,            /* tp_getattro */
1299    0,                                  /* tp_setattro */
1300    0,                                  /* tp_as_buffer */
1301    Py_TPFLAGS_DEFAULT,                 /* tp_flags */
1302    0,                                  /* tp_doc */
1303    0,                                  /* tp_traverse */
1304    0,                                  /* tp_clear */
1305    0,                                  /* tp_richcompare */
1306    0,                                  /* tp_weaklistoffset */
1307    PyObject_SelfIter,                  /* tp_iter */
1308    (iternextfunc)fieldnameiter_next,   /* tp_iternext */
1309    fieldnameiter_methods,              /* tp_methods */
1310    0};
1311
1312/* unicode_formatter_field_name_split is used to implement
1313   string.Formatter.vformat.  it takes an PEP 3101 "field name", and
1314   returns a tuple of (first, rest): "first", the part before the
1315   first '.' or '['; and "rest", an iterator for the rest of the field
1316   name.  it's a wrapper around stringlib/string_format.h's
1317   field_name_split.  The iterator it returns is a
1318   FieldNameIterator */
1319static PyObject *
1320formatter_field_name_split(STRINGLIB_OBJECT *self)
1321{
1322    SubString first;
1323    Py_ssize_t first_idx;
1324    fieldnameiterobject *it;
1325
1326    PyObject *first_obj = NULL;
1327    PyObject *result = NULL;
1328
1329    it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
1330    if (it == NULL)
1331        return NULL;
1332
1333    /* take ownership, give the object to the iterator.  this is
1334       just to keep the field_name alive */
1335    Py_INCREF(self);
1336    it->str = self;
1337
1338    /* Pass in auto_number = NULL. We'll return an empty string for
1339       first_obj in that case. */
1340    if (!field_name_split(STRINGLIB_STR(self),
1341                          STRINGLIB_LEN(self),
1342                          &first, &first_idx, &it->it_field, NULL))
1343        goto done;
1344
1345    /* first becomes an integer, if possible; else a string */
1346    if (first_idx != -1)
1347        first_obj = PyLong_FromSsize_t(first_idx);
1348    else
1349        /* convert "first" into a string object */
1350        first_obj = SubString_new_object(&first);
1351    if (first_obj == NULL)
1352        goto done;
1353
1354    /* return a tuple of values */
1355    result = PyTuple_Pack(2, first_obj, it);
1356
1357done:
1358    Py_XDECREF(it);
1359    Py_XDECREF(first_obj);
1360    return result;
1361}
1362