1/*
2    string_format.h -- implementation of string.format().
3
4    It uses the Objects/stringlib conventions, so that it can be
5    compiled for both unicode and string objects.
6*/
7
8
9/* Defines for Python 2.6 compatibility */
10#if PY_VERSION_HEX < 0x03000000
11#define PyLong_FromSsize_t _PyLong_FromSsize_t
12#endif
13
14/* Defines for more efficiently reallocating the string buffer */
15#define INITIAL_SIZE_INCREMENT 100
16#define SIZE_MULTIPLIER 2
17#define MAX_SIZE_INCREMENT  3200
18
19
20/************************************************************************/
21/***********   Global data structures and forward declarations  *********/
22/************************************************************************/
23
24/*
25   A SubString consists of the characters between two string or
26   unicode pointers.
27*/
28typedef struct {
29    STRINGLIB_CHAR *ptr;
30    STRINGLIB_CHAR *end;
31} SubString;
32
33
34typedef enum {
35    ANS_INIT,
36    ANS_AUTO,
37    ANS_MANUAL
38} AutoNumberState;   /* Keep track if we're auto-numbering fields */
39
40/* Keeps track of our auto-numbering state, and which number field we're on */
41typedef struct {
42    AutoNumberState an_state;
43    int an_field_number;
44} AutoNumber;
45
46
47/* forward declaration for recursion */
48static PyObject *
49build_string(SubString *input, PyObject *args, PyObject *kwargs,
50             int recursion_depth, AutoNumber *auto_number);
51
52
53
54/************************************************************************/
55/**************************  Utility  functions  ************************/
56/************************************************************************/
57
58static void
59AutoNumber_Init(AutoNumber *auto_number)
60{
61    auto_number->an_state = ANS_INIT;
62    auto_number->an_field_number = 0;
63}
64
65/* fill in a SubString from a pointer and length */
66Py_LOCAL_INLINE(void)
67SubString_init(SubString *str, STRINGLIB_CHAR *p, Py_ssize_t len)
68{
69    str->ptr = p;
70    if (p == NULL)
71        str->end = NULL;
72    else
73        str->end = str->ptr + len;
74}
75
76/* return a new string.  if str->ptr is NULL, return None */
77Py_LOCAL_INLINE(PyObject *)
78SubString_new_object(SubString *str)
79{
80    if (str->ptr == NULL) {
81        Py_INCREF(Py_None);
82        return Py_None;
83    }
84    return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
85}
86
87/* return a new string.  if str->ptr is NULL, return None */
88Py_LOCAL_INLINE(PyObject *)
89SubString_new_object_or_empty(SubString *str)
90{
91    if (str->ptr == NULL) {
92        return STRINGLIB_NEW(NULL, 0);
93    }
94    return STRINGLIB_NEW(str->ptr, str->end - str->ptr);
95}
96
97/* Return 1 if an error has been detected switching between automatic
98   field numbering and manual field specification, else return 0. Set
99   ValueError on error. */
100static int
101autonumber_state_error(AutoNumberState state, int field_name_is_empty)
102{
103    if (state == ANS_MANUAL) {
104        if (field_name_is_empty) {
105            PyErr_SetString(PyExc_ValueError, "cannot switch from "
106                            "manual field specification to "
107                            "automatic field numbering");
108            return 1;
109        }
110    }
111    else {
112        if (!field_name_is_empty) {
113            PyErr_SetString(PyExc_ValueError, "cannot switch from "
114                            "automatic field numbering to "
115                            "manual field specification");
116            return 1;
117        }
118    }
119    return 0;
120}
121
122
123/************************************************************************/
124/***********    Output string management functions       ****************/
125/************************************************************************/
126
127typedef struct {
128    STRINGLIB_CHAR *ptr;
129    STRINGLIB_CHAR *end;
130    PyObject *obj;
131    Py_ssize_t size_increment;
132} OutputString;
133
134/* initialize an OutputString object, reserving size characters */
135static int
136output_initialize(OutputString *output, Py_ssize_t size)
137{
138    output->obj = STRINGLIB_NEW(NULL, size);
139    if (output->obj == NULL)
140        return 0;
141
142    output->ptr = STRINGLIB_STR(output->obj);
143    output->end = STRINGLIB_LEN(output->obj) + output->ptr;
144    output->size_increment = INITIAL_SIZE_INCREMENT;
145
146    return 1;
147}
148
149/*
150    output_extend reallocates the output string buffer.
151    It returns a status:  0 for a failed reallocation,
152    1 for success.
153*/
154
155static int
156output_extend(OutputString *output, Py_ssize_t count)
157{
158    STRINGLIB_CHAR *startptr = STRINGLIB_STR(output->obj);
159    Py_ssize_t curlen = output->ptr - startptr;
160    Py_ssize_t maxlen = curlen + count + output->size_increment;
161
162    if (STRINGLIB_RESIZE(&output->obj, maxlen) < 0)
163        return 0;
164    startptr = STRINGLIB_STR(output->obj);
165    output->ptr = startptr + curlen;
166    output->end = startptr + maxlen;
167    if (output->size_increment < MAX_SIZE_INCREMENT)
168        output->size_increment *= SIZE_MULTIPLIER;
169    return 1;
170}
171
172/*
173    output_data dumps characters into our output string
174    buffer.
175
176    In some cases, it has to reallocate the string.
177
178    It returns a status:  0 for a failed reallocation,
179    1 for success.
180*/
181static int
182output_data(OutputString *output, const STRINGLIB_CHAR *s, Py_ssize_t count)
183{
184    if ((count > output->end - output->ptr) && !output_extend(output, count))
185        return 0;
186    memcpy(output->ptr, s, count * sizeof(STRINGLIB_CHAR));
187    output->ptr += count;
188    return 1;
189}
190
191/************************************************************************/
192/***********  Format string parsing -- integers and identifiers *********/
193/************************************************************************/
194
195static Py_ssize_t
196get_integer(const SubString *str)
197{
198    Py_ssize_t accumulator = 0;
199    Py_ssize_t digitval;
200    Py_ssize_t oldaccumulator;
201    STRINGLIB_CHAR *p;
202
203    /* empty string is an error */
204    if (str->ptr >= str->end)
205        return -1;
206
207    for (p = str->ptr; p < str->end; p++) {
208        digitval = STRINGLIB_TODECIMAL(*p);
209        if (digitval < 0)
210            return -1;
211        /*
212           This trick was copied from old Unicode format code.  It's cute,
213           but would really suck on an old machine with a slow divide
214           implementation.  Fortunately, in the normal case we do not
215           expect too many digits.
216        */
217        oldaccumulator = accumulator;
218        accumulator *= 10;
219        if ((accumulator+10)/10 != oldaccumulator+1) {
220            PyErr_Format(PyExc_ValueError,
221                         "Too many decimal digits in format string");
222            return -1;
223        }
224        accumulator += digitval;
225    }
226    return accumulator;
227}
228
229/************************************************************************/
230/******** Functions to get field objects and specification strings ******/
231/************************************************************************/
232
233/* do the equivalent of obj.name */
234static PyObject *
235getattr(PyObject *obj, SubString *name)
236{
237    PyObject *newobj;
238    PyObject *str = SubString_new_object(name);
239    if (str == NULL)
240        return NULL;
241    newobj = PyObject_GetAttr(obj, str);
242    Py_DECREF(str);
243    return newobj;
244}
245
246/* do the equivalent of obj[idx], where obj is a sequence */
247static PyObject *
248getitem_sequence(PyObject *obj, Py_ssize_t idx)
249{
250    return PySequence_GetItem(obj, idx);
251}
252
253/* do the equivalent of obj[idx], where obj is not a sequence */
254static PyObject *
255getitem_idx(PyObject *obj, Py_ssize_t idx)
256{
257    PyObject *newobj;
258    PyObject *idx_obj = PyLong_FromSsize_t(idx);
259    if (idx_obj == NULL)
260        return NULL;
261    newobj = PyObject_GetItem(obj, idx_obj);
262    Py_DECREF(idx_obj);
263    return newobj;
264}
265
266/* do the equivalent of obj[name] */
267static PyObject *
268getitem_str(PyObject *obj, SubString *name)
269{
270    PyObject *newobj;
271    PyObject *str = SubString_new_object(name);
272    if (str == NULL)
273        return NULL;
274    newobj = PyObject_GetItem(obj, str);
275    Py_DECREF(str);
276    return newobj;
277}
278
279typedef struct {
280    /* the entire string we're parsing.  we assume that someone else
281       is managing its lifetime, and that it will exist for the
282       lifetime of the iterator.  can be empty */
283    SubString str;
284
285    /* pointer to where we are inside field_name */
286    STRINGLIB_CHAR *ptr;
287} FieldNameIterator;
288
289
290static int
291FieldNameIterator_init(FieldNameIterator *self, STRINGLIB_CHAR *ptr,
292                       Py_ssize_t len)
293{
294    SubString_init(&self->str, ptr, len);
295    self->ptr = self->str.ptr;
296    return 1;
297}
298
299static int
300_FieldNameIterator_attr(FieldNameIterator *self, SubString *name)
301{
302    STRINGLIB_CHAR c;
303
304    name->ptr = self->ptr;
305
306    /* return everything until '.' or '[' */
307    while (self->ptr < self->str.end) {
308        switch (c = *self->ptr++) {
309        case '[':
310        case '.':
311            /* backup so that we this character will be seen next time */
312            self->ptr--;
313            break;
314        default:
315            continue;
316        }
317        break;
318    }
319    /* end of string is okay */
320    name->end = self->ptr;
321    return 1;
322}
323
324static int
325_FieldNameIterator_item(FieldNameIterator *self, SubString *name)
326{
327    int bracket_seen = 0;
328    STRINGLIB_CHAR c;
329
330    name->ptr = self->ptr;
331
332    /* return everything until ']' */
333    while (self->ptr < self->str.end) {
334        switch (c = *self->ptr++) {
335        case ']':
336            bracket_seen = 1;
337            break;
338        default:
339            continue;
340        }
341        break;
342    }
343    /* make sure we ended with a ']' */
344    if (!bracket_seen) {
345        PyErr_SetString(PyExc_ValueError, "Missing ']' in format string");
346        return 0;
347    }
348
349    /* end of string is okay */
350    /* don't include the ']' */
351    name->end = self->ptr-1;
352    return 1;
353}
354
355/* returns 0 on error, 1 on non-error termination, and 2 if it returns a value */
356static int
357FieldNameIterator_next(FieldNameIterator *self, int *is_attribute,
358                       Py_ssize_t *name_idx, SubString *name)
359{
360    /* check at end of input */
361    if (self->ptr >= self->str.end)
362        return 1;
363
364    switch (*self->ptr++) {
365    case '.':
366        *is_attribute = 1;
367        if (_FieldNameIterator_attr(self, name) == 0)
368            return 0;
369        *name_idx = -1;
370        break;
371    case '[':
372        *is_attribute = 0;
373        if (_FieldNameIterator_item(self, name) == 0)
374            return 0;
375        *name_idx = get_integer(name);
376        if (*name_idx == -1 && PyErr_Occurred())
377            return 0;
378        break;
379    default:
380        /* Invalid character follows ']' */
381        PyErr_SetString(PyExc_ValueError, "Only '.' or '[' may "
382                        "follow ']' in format field specifier");
383        return 0;
384    }
385
386    /* empty string is an error */
387    if (name->ptr == name->end) {
388        PyErr_SetString(PyExc_ValueError, "Empty attribute in format string");
389        return 0;
390    }
391
392    return 2;
393}
394
395
396/* input: field_name
397   output: 'first' points to the part before the first '[' or '.'
398           'first_idx' is -1 if 'first' is not an integer, otherwise
399                       it's the value of first converted to an integer
400           'rest' is an iterator to return the rest
401*/
402static int
403field_name_split(STRINGLIB_CHAR *ptr, Py_ssize_t len, SubString *first,
404                 Py_ssize_t *first_idx, FieldNameIterator *rest,
405                 AutoNumber *auto_number)
406{
407    STRINGLIB_CHAR c;
408    STRINGLIB_CHAR *p = ptr;
409    STRINGLIB_CHAR *end = ptr + len;
410    int field_name_is_empty;
411    int using_numeric_index;
412
413    /* find the part up until the first '.' or '[' */
414    while (p < end) {
415        switch (c = *p++) {
416        case '[':
417        case '.':
418            /* backup so that we this character is available to the
419               "rest" iterator */
420            p--;
421            break;
422        default:
423            continue;
424        }
425        break;
426    }
427
428    /* set up the return values */
429    SubString_init(first, ptr, p - ptr);
430    FieldNameIterator_init(rest, p, end - p);
431
432    /* see if "first" is an integer, in which case it's used as an index */
433    *first_idx = get_integer(first);
434    if (*first_idx == -1 && PyErr_Occurred())
435        return 0;
436
437    field_name_is_empty = first->ptr >= first->end;
438
439    /* If the field name is omitted or if we have a numeric index
440       specified, then we're doing numeric indexing into args. */
441    using_numeric_index = field_name_is_empty || *first_idx != -1;
442
443    /* We always get here exactly one time for each field we're
444       processing. And we get here in field order (counting by left
445       braces). So this is the perfect place to handle automatic field
446       numbering if the field name is omitted. */
447
448    /* Check if we need to do the auto-numbering. It's not needed if
449       we're called from string.Format routines, because it's handled
450       in that class by itself. */
451    if (auto_number) {
452        /* Initialize our auto numbering state if this is the first
453           time we're either auto-numbering or manually numbering. */
454        if (auto_number->an_state == ANS_INIT && using_numeric_index)
455            auto_number->an_state = field_name_is_empty ?
456                ANS_AUTO : ANS_MANUAL;
457
458        /* Make sure our state is consistent with what we're doing
459           this time through. Only check if we're using a numeric
460           index. */
461        if (using_numeric_index)
462            if (autonumber_state_error(auto_number->an_state,
463                                       field_name_is_empty))
464                return 0;
465        /* Zero length field means we want to do auto-numbering of the
466           fields. */
467        if (field_name_is_empty)
468            *first_idx = (auto_number->an_field_number)++;
469    }
470
471    return 1;
472}
473
474
475/*
476    get_field_object returns the object inside {}, before the
477    format_spec.  It handles getindex and getattr lookups and consumes
478    the entire input string.
479*/
480static PyObject *
481get_field_object(SubString *input, PyObject *args, PyObject *kwargs,
482                 AutoNumber *auto_number)
483{
484    PyObject *obj = NULL;
485    int ok;
486    int is_attribute;
487    SubString name;
488    SubString first;
489    Py_ssize_t index;
490    FieldNameIterator rest;
491
492    if (!field_name_split(input->ptr, input->end - input->ptr, &first,
493                          &index, &rest, auto_number)) {
494        goto error;
495    }
496
497    if (index == -1) {
498        /* look up in kwargs */
499        PyObject *key = SubString_new_object(&first);
500        if (key == NULL)
501            goto error;
502        if ((kwargs == NULL) || (obj = PyDict_GetItem(kwargs, key)) == NULL) {
503            PyErr_SetObject(PyExc_KeyError, key);
504            Py_DECREF(key);
505            goto error;
506        }
507        Py_DECREF(key);
508        Py_INCREF(obj);
509    }
510    else {
511        /* look up in args */
512        obj = PySequence_GetItem(args, index);
513        if (obj == NULL)
514            goto error;
515    }
516
517    /* iterate over the rest of the field_name */
518    while ((ok = FieldNameIterator_next(&rest, &is_attribute, &index,
519                                        &name)) == 2) {
520        PyObject *tmp;
521
522        if (is_attribute)
523            /* getattr lookup "." */
524            tmp = getattr(obj, &name);
525        else
526            /* getitem lookup "[]" */
527            if (index == -1)
528                tmp = getitem_str(obj, &name);
529            else
530                if (PySequence_Check(obj))
531                    tmp = getitem_sequence(obj, index);
532                else
533                    /* not a sequence */
534                    tmp = getitem_idx(obj, index);
535        if (tmp == NULL)
536            goto error;
537
538        /* assign to obj */
539        Py_DECREF(obj);
540        obj = tmp;
541    }
542    /* end of iterator, this is the non-error case */
543    if (ok == 1)
544        return obj;
545error:
546    Py_XDECREF(obj);
547    return NULL;
548}
549
550/************************************************************************/
551/*****************  Field rendering functions  **************************/
552/************************************************************************/
553
554/*
555    render_field() is the main function in this section.  It takes the
556    field object and field specification string generated by
557    get_field_and_spec, and renders the field into the output string.
558
559    render_field calls fieldobj.__format__(format_spec) method, and
560    appends to the output.
561*/
562static int
563render_field(PyObject *fieldobj, SubString *format_spec, OutputString *output)
564{
565    int ok = 0;
566    PyObject *result = NULL;
567    PyObject *format_spec_object = NULL;
568    PyObject *(*formatter)(PyObject *, STRINGLIB_CHAR *, Py_ssize_t) = NULL;
569    STRINGLIB_CHAR* format_spec_start = format_spec->ptr ?
570            format_spec->ptr : NULL;
571    Py_ssize_t format_spec_len = format_spec->ptr ?
572            format_spec->end - format_spec->ptr : 0;
573
574    /* If we know the type exactly, skip the lookup of __format__ and just
575       call the formatter directly. */
576#if STRINGLIB_IS_UNICODE
577    if (PyUnicode_CheckExact(fieldobj))
578        formatter = _PyUnicode_FormatAdvanced;
579    /* Unfortunately, there's a problem with checking for int, long,
580       and float here.  If we're being included as unicode, their
581       formatters expect string format_spec args.  For now, just skip
582       this optimization for unicode.  This could be fixed, but it's a
583       hassle. */
584#else
585    if (PyString_CheckExact(fieldobj))
586        formatter = _PyBytes_FormatAdvanced;
587    else if (PyInt_CheckExact(fieldobj))
588        formatter =_PyInt_FormatAdvanced;
589    else if (PyLong_CheckExact(fieldobj))
590        formatter =_PyLong_FormatAdvanced;
591    else if (PyFloat_CheckExact(fieldobj))
592        formatter = _PyFloat_FormatAdvanced;
593#endif
594
595    if (formatter) {
596        /* we know exactly which formatter will be called when __format__ is
597           looked up, so call it directly, instead. */
598        result = formatter(fieldobj, format_spec_start, format_spec_len);
599    }
600    else {
601        /* We need to create an object out of the pointers we have, because
602           __format__ takes a string/unicode object for format_spec. */
603        format_spec_object = STRINGLIB_NEW(format_spec_start,
604                                           format_spec_len);
605        if (format_spec_object == NULL)
606            goto done;
607
608        result = PyObject_Format(fieldobj, format_spec_object);
609    }
610    if (result == NULL)
611        goto done;
612
613#if PY_VERSION_HEX >= 0x03000000
614    assert(PyUnicode_Check(result));
615#else
616    assert(PyString_Check(result) || PyUnicode_Check(result));
617
618    /* Convert result to our type.  We could be str, and result could
619       be unicode */
620    {
621        PyObject *tmp = STRINGLIB_TOSTR(result);
622        if (tmp == NULL)
623            goto done;
624        Py_DECREF(result);
625        result = tmp;
626    }
627#endif
628
629    ok = output_data(output,
630                     STRINGLIB_STR(result), STRINGLIB_LEN(result));
631done:
632    Py_XDECREF(format_spec_object);
633    Py_XDECREF(result);
634    return ok;
635}
636
637static int
638parse_field(SubString *str, SubString *field_name, SubString *format_spec,
639            STRINGLIB_CHAR *conversion)
640{
641    /* Note this function works if the field name is zero length,
642       which is good.  Zero length field names are handled later, in
643       field_name_split. */
644
645    STRINGLIB_CHAR c = 0;
646
647    /* initialize these, as they may be empty */
648    *conversion = '\0';
649    SubString_init(format_spec, NULL, 0);
650
651    /* Search for the field name.  it's terminated by the end of
652       the string, or a ':' or '!' */
653    field_name->ptr = str->ptr;
654    while (str->ptr < str->end) {
655        switch (c = *(str->ptr++)) {
656        case ':':
657        case '!':
658            break;
659        default:
660            continue;
661        }
662        break;
663    }
664
665    if (c == '!' || c == ':') {
666        /* we have a format specifier and/or a conversion */
667        /* don't include the last character */
668        field_name->end = str->ptr-1;
669
670        /* the format specifier is the rest of the string */
671        format_spec->ptr = str->ptr;
672        format_spec->end = str->end;
673
674        /* see if there's a conversion specifier */
675        if (c == '!') {
676            /* there must be another character present */
677            if (format_spec->ptr >= format_spec->end) {
678                PyErr_SetString(PyExc_ValueError,
679                                "end of format while looking for conversion "
680                                "specifier");
681                return 0;
682            }
683            *conversion = *(format_spec->ptr++);
684
685            /* if there is another character, it must be a colon */
686            if (format_spec->ptr < format_spec->end) {
687                c = *(format_spec->ptr++);
688                if (c != ':') {
689                    PyErr_SetString(PyExc_ValueError,
690                                    "expected ':' after format specifier");
691                    return 0;
692                }
693            }
694        }
695    }
696    else
697        /* end of string, there's no format_spec or conversion */
698        field_name->end = str->ptr;
699
700    return 1;
701}
702
703/************************************************************************/
704/******* Output string allocation and escape-to-markup processing  ******/
705/************************************************************************/
706
707/* MarkupIterator breaks the string into pieces of either literal
708   text, or things inside {} that need to be marked up.  it is
709   designed to make it easy to wrap a Python iterator around it, for
710   use with the Formatter class */
711
712typedef struct {
713    SubString str;
714} MarkupIterator;
715
716static int
717MarkupIterator_init(MarkupIterator *self, STRINGLIB_CHAR *ptr, Py_ssize_t len)
718{
719    SubString_init(&self->str, ptr, len);
720    return 1;
721}
722
723/* returns 0 on error, 1 on non-error termination, and 2 if it got a
724   string (or something to be expanded) */
725static int
726MarkupIterator_next(MarkupIterator *self, SubString *literal,
727                    int *field_present, SubString *field_name,
728                    SubString *format_spec, STRINGLIB_CHAR *conversion,
729                    int *format_spec_needs_expanding)
730{
731    int at_end;
732    STRINGLIB_CHAR c = 0;
733    STRINGLIB_CHAR *start;
734    int count;
735    Py_ssize_t len;
736    int markup_follows = 0;
737
738    /* initialize all of the output variables */
739    SubString_init(literal, NULL, 0);
740    SubString_init(field_name, NULL, 0);
741    SubString_init(format_spec, NULL, 0);
742    *conversion = '\0';
743    *format_spec_needs_expanding = 0;
744    *field_present = 0;
745
746    /* No more input, end of iterator.  This is the normal exit
747       path. */
748    if (self->str.ptr >= self->str.end)
749        return 1;
750
751    start = self->str.ptr;
752
753    /* First read any literal text. Read until the end of string, an
754       escaped '{' or '}', or an unescaped '{'.  In order to never
755       allocate memory and so I can just pass pointers around, if
756       there's an escaped '{' or '}' then we'll return the literal
757       including the brace, but no format object.  The next time
758       through, we'll return the rest of the literal, skipping past
759       the second consecutive brace. */
760    while (self->str.ptr < self->str.end) {
761        switch (c = *(self->str.ptr++)) {
762        case '{':
763        case '}':
764            markup_follows = 1;
765            break;
766        default:
767            continue;
768        }
769        break;
770    }
771
772    at_end = self->str.ptr >= self->str.end;
773    len = self->str.ptr - start;
774
775    if ((c == '}') && (at_end || (c != *self->str.ptr))) {
776        PyErr_SetString(PyExc_ValueError, "Single '}' encountered "
777                        "in format string");
778        return 0;
779    }
780    if (at_end && c == '{') {
781        PyErr_SetString(PyExc_ValueError, "Single '{' encountered "
782                        "in format string");
783        return 0;
784    }
785    if (!at_end) {
786        if (c == *self->str.ptr) {
787            /* escaped } or {, skip it in the input.  there is no
788               markup object following us, just this literal text */
789            self->str.ptr++;
790            markup_follows = 0;
791        }
792        else
793            len--;
794    }
795
796    /* record the literal text */
797    literal->ptr = start;
798    literal->end = start + len;
799
800    if (!markup_follows)
801        return 2;
802
803    /* this is markup, find the end of the string by counting nested
804       braces.  note that this prohibits escaped braces, so that
805       format_specs cannot have braces in them. */
806    *field_present = 1;
807    count = 1;
808
809    start = self->str.ptr;
810
811    /* we know we can't have a zero length string, so don't worry
812       about that case */
813    while (self->str.ptr < self->str.end) {
814        switch (c = *(self->str.ptr++)) {
815        case '{':
816            /* the format spec needs to be recursively expanded.
817               this is an optimization, and not strictly needed */
818            *format_spec_needs_expanding = 1;
819            count++;
820            break;
821        case '}':
822            count--;
823            if (count <= 0) {
824                /* we're done.  parse and get out */
825                SubString s;
826
827                SubString_init(&s, start, self->str.ptr - 1 - start);
828                if (parse_field(&s, field_name, format_spec, conversion) == 0)
829                    return 0;
830
831                /* success */
832                return 2;
833            }
834            break;
835        }
836    }
837
838    /* end of string while searching for matching '}' */
839    PyErr_SetString(PyExc_ValueError, "unmatched '{' in format");
840    return 0;
841}
842
843
844/* do the !r or !s conversion on obj */
845static PyObject *
846do_conversion(PyObject *obj, STRINGLIB_CHAR conversion)
847{
848    /* XXX in pre-3.0, do we need to convert this to unicode, since it
849       might have returned a string? */
850    switch (conversion) {
851    case 'r':
852        return PyObject_Repr(obj);
853    case 's':
854        return STRINGLIB_TOSTR(obj);
855    default:
856        if (conversion > 32 && conversion < 127) {
857                /* It's the ASCII subrange; casting to char is safe
858                   (assuming the execution character set is an ASCII
859                   superset). */
860                PyErr_Format(PyExc_ValueError,
861                     "Unknown conversion specifier %c",
862                     (char)conversion);
863        } else
864                PyErr_Format(PyExc_ValueError,
865                     "Unknown conversion specifier \\x%x",
866                     (unsigned int)conversion);
867        return NULL;
868    }
869}
870
871/* given:
872
873   {field_name!conversion:format_spec}
874
875   compute the result and write it to output.
876   format_spec_needs_expanding is an optimization.  if it's false,
877   just output the string directly, otherwise recursively expand the
878   format_spec string.
879
880   field_name is allowed to be zero length, in which case we
881   are doing auto field numbering.
882*/
883
884static int
885output_markup(SubString *field_name, SubString *format_spec,
886              int format_spec_needs_expanding, STRINGLIB_CHAR conversion,
887              OutputString *output, PyObject *args, PyObject *kwargs,
888              int recursion_depth, AutoNumber *auto_number)
889{
890    PyObject *tmp = NULL;
891    PyObject *fieldobj = NULL;
892    SubString expanded_format_spec;
893    SubString *actual_format_spec;
894    int result = 0;
895
896    /* convert field_name to an object */
897    fieldobj = get_field_object(field_name, args, kwargs, auto_number);
898    if (fieldobj == NULL)
899        goto done;
900
901    if (conversion != '\0') {
902        tmp = do_conversion(fieldobj, conversion);
903        if (tmp == NULL)
904            goto done;
905
906        /* do the assignment, transferring ownership: fieldobj = tmp */
907        Py_DECREF(fieldobj);
908        fieldobj = tmp;
909        tmp = NULL;
910    }
911
912    /* if needed, recurively compute the format_spec */
913    if (format_spec_needs_expanding) {
914        tmp = build_string(format_spec, args, kwargs, recursion_depth-1,
915                           auto_number);
916        if (tmp == NULL)
917            goto done;
918
919        /* note that in the case we're expanding the format string,
920           tmp must be kept around until after the call to
921           render_field. */
922        SubString_init(&expanded_format_spec,
923                       STRINGLIB_STR(tmp), STRINGLIB_LEN(tmp));
924        actual_format_spec = &expanded_format_spec;
925    }
926    else
927        actual_format_spec = format_spec;
928
929    if (render_field(fieldobj, actual_format_spec, output) == 0)
930        goto done;
931
932    result = 1;
933
934done:
935    Py_XDECREF(fieldobj);
936    Py_XDECREF(tmp);
937
938    return result;
939}
940
941/*
942    do_markup is the top-level loop for the format() method.  It
943    searches through the format string for escapes to markup codes, and
944    calls other functions to move non-markup text to the output,
945    and to perform the markup to the output.
946*/
947static int
948do_markup(SubString *input, PyObject *args, PyObject *kwargs,
949          OutputString *output, int recursion_depth, AutoNumber *auto_number)
950{
951    MarkupIterator iter;
952    int format_spec_needs_expanding;
953    int result;
954    int field_present;
955    SubString literal;
956    SubString field_name;
957    SubString format_spec;
958    STRINGLIB_CHAR conversion;
959
960    MarkupIterator_init(&iter, input->ptr, input->end - input->ptr);
961    while ((result = MarkupIterator_next(&iter, &literal, &field_present,
962                                         &field_name, &format_spec,
963                                         &conversion,
964                                         &format_spec_needs_expanding)) == 2) {
965        if (!output_data(output, literal.ptr, literal.end - literal.ptr))
966            return 0;
967        if (field_present)
968            if (!output_markup(&field_name, &format_spec,
969                               format_spec_needs_expanding, conversion, output,
970                               args, kwargs, recursion_depth, auto_number))
971                return 0;
972    }
973    return result;
974}
975
976
977/*
978    build_string allocates the output string and then
979    calls do_markup to do the heavy lifting.
980*/
981static PyObject *
982build_string(SubString *input, PyObject *args, PyObject *kwargs,
983             int recursion_depth, AutoNumber *auto_number)
984{
985    OutputString output;
986    PyObject *result = NULL;
987    Py_ssize_t count;
988
989    output.obj = NULL; /* needed so cleanup code always works */
990
991    /* check the recursion level */
992    if (recursion_depth <= 0) {
993        PyErr_SetString(PyExc_ValueError,
994                        "Max string recursion exceeded");
995        goto done;
996    }
997
998    /* initial size is the length of the format string, plus the size
999       increment.  seems like a reasonable default */
1000    if (!output_initialize(&output,
1001                           input->end - input->ptr +
1002                           INITIAL_SIZE_INCREMENT))
1003        goto done;
1004
1005    if (!do_markup(input, args, kwargs, &output, recursion_depth,
1006                   auto_number)) {
1007        goto done;
1008    }
1009
1010    count = output.ptr - STRINGLIB_STR(output.obj);
1011    if (STRINGLIB_RESIZE(&output.obj, count) < 0) {
1012        goto done;
1013    }
1014
1015    /* transfer ownership to result */
1016    result = output.obj;
1017    output.obj = NULL;
1018
1019done:
1020    Py_XDECREF(output.obj);
1021    return result;
1022}
1023
1024/************************************************************************/
1025/*********** main routine ***********************************************/
1026/************************************************************************/
1027
1028/* this is the main entry point */
1029static PyObject *
1030do_string_format(PyObject *self, PyObject *args, PyObject *kwargs)
1031{
1032    SubString input;
1033
1034    /* PEP 3101 says only 2 levels, so that
1035       "{0:{1}}".format('abc', 's')            # works
1036       "{0:{1:{2}}}".format('abc', 's', '')    # fails
1037    */
1038    int recursion_depth = 2;
1039
1040    AutoNumber auto_number;
1041
1042    AutoNumber_Init(&auto_number);
1043    SubString_init(&input, STRINGLIB_STR(self), STRINGLIB_LEN(self));
1044    return build_string(&input, args, kwargs, recursion_depth, &auto_number);
1045}
1046
1047
1048
1049/************************************************************************/
1050/*********** formatteriterator ******************************************/
1051/************************************************************************/
1052
1053/* This is used to implement string.Formatter.vparse().  It exists so
1054   Formatter can share code with the built in unicode.format() method.
1055   It's really just a wrapper around MarkupIterator that is callable
1056   from Python. */
1057
1058typedef struct {
1059    PyObject_HEAD
1060
1061    STRINGLIB_OBJECT *str;
1062
1063    MarkupIterator it_markup;
1064} formatteriterobject;
1065
1066static void
1067formatteriter_dealloc(formatteriterobject *it)
1068{
1069    Py_XDECREF(it->str);
1070    PyObject_FREE(it);
1071}
1072
1073/* returns a tuple:
1074   (literal, field_name, format_spec, conversion)
1075
1076   literal is any literal text to output.  might be zero length
1077   field_name is the string before the ':'.  might be None
1078   format_spec is the string after the ':'.  mibht be None
1079   conversion is either None, or the string after the '!'
1080*/
1081static PyObject *
1082formatteriter_next(formatteriterobject *it)
1083{
1084    SubString literal;
1085    SubString field_name;
1086    SubString format_spec;
1087    STRINGLIB_CHAR conversion;
1088    int format_spec_needs_expanding;
1089    int field_present;
1090    int result = MarkupIterator_next(&it->it_markup, &literal, &field_present,
1091                                     &field_name, &format_spec, &conversion,
1092                                     &format_spec_needs_expanding);
1093
1094    /* all of the SubString objects point into it->str, so no
1095       memory management needs to be done on them */
1096    assert(0 <= result && result <= 2);
1097    if (result == 0 || result == 1)
1098        /* if 0, error has already been set, if 1, iterator is empty */
1099        return NULL;
1100    else {
1101        PyObject *literal_str = NULL;
1102        PyObject *field_name_str = NULL;
1103        PyObject *format_spec_str = NULL;
1104        PyObject *conversion_str = NULL;
1105        PyObject *tuple = NULL;
1106
1107        literal_str = SubString_new_object(&literal);
1108        if (literal_str == NULL)
1109            goto done;
1110
1111        field_name_str = SubString_new_object(&field_name);
1112        if (field_name_str == NULL)
1113            goto done;
1114
1115        /* if field_name is non-zero length, return a string for
1116           format_spec (even if zero length), else return None */
1117        format_spec_str = (field_present ?
1118                           SubString_new_object_or_empty :
1119                           SubString_new_object)(&format_spec);
1120        if (format_spec_str == NULL)
1121            goto done;
1122
1123        /* if the conversion is not specified, return a None,
1124           otherwise create a one length string with the conversion
1125           character */
1126        if (conversion == '\0') {
1127            conversion_str = Py_None;
1128            Py_INCREF(conversion_str);
1129        }
1130        else
1131            conversion_str = STRINGLIB_NEW(&conversion, 1);
1132        if (conversion_str == NULL)
1133            goto done;
1134
1135        tuple = PyTuple_Pack(4, literal_str, field_name_str, format_spec_str,
1136                             conversion_str);
1137    done:
1138        Py_XDECREF(literal_str);
1139        Py_XDECREF(field_name_str);
1140        Py_XDECREF(format_spec_str);
1141        Py_XDECREF(conversion_str);
1142        return tuple;
1143    }
1144}
1145
1146static PyMethodDef formatteriter_methods[] = {
1147    {NULL,              NULL}           /* sentinel */
1148};
1149
1150static PyTypeObject PyFormatterIter_Type = {
1151    PyVarObject_HEAD_INIT(&PyType_Type, 0)
1152    "formatteriterator",                /* tp_name */
1153    sizeof(formatteriterobject),        /* tp_basicsize */
1154    0,                                  /* tp_itemsize */
1155    /* methods */
1156    (destructor)formatteriter_dealloc,  /* tp_dealloc */
1157    0,                                  /* tp_print */
1158    0,                                  /* tp_getattr */
1159    0,                                  /* tp_setattr */
1160    0,                                  /* tp_compare */
1161    0,                                  /* tp_repr */
1162    0,                                  /* tp_as_number */
1163    0,                                  /* tp_as_sequence */
1164    0,                                  /* tp_as_mapping */
1165    0,                                  /* tp_hash */
1166    0,                                  /* tp_call */
1167    0,                                  /* tp_str */
1168    PyObject_GenericGetAttr,            /* tp_getattro */
1169    0,                                  /* tp_setattro */
1170    0,                                  /* tp_as_buffer */
1171    Py_TPFLAGS_DEFAULT,                 /* tp_flags */
1172    0,                                  /* tp_doc */
1173    0,                                  /* tp_traverse */
1174    0,                                  /* tp_clear */
1175    0,                                  /* tp_richcompare */
1176    0,                                  /* tp_weaklistoffset */
1177    PyObject_SelfIter,                  /* tp_iter */
1178    (iternextfunc)formatteriter_next,   /* tp_iternext */
1179    formatteriter_methods,              /* tp_methods */
1180    0,
1181};
1182
1183/* unicode_formatter_parser is used to implement
1184   string.Formatter.vformat.  it parses a string and returns tuples
1185   describing the parsed elements.  It's a wrapper around
1186   stringlib/string_format.h's MarkupIterator */
1187static PyObject *
1188formatter_parser(STRINGLIB_OBJECT *self)
1189{
1190    formatteriterobject *it;
1191
1192    it = PyObject_New(formatteriterobject, &PyFormatterIter_Type);
1193    if (it == NULL)
1194        return NULL;
1195
1196    /* take ownership, give the object to the iterator */
1197    Py_INCREF(self);
1198    it->str = self;
1199
1200    /* initialize the contained MarkupIterator */
1201    MarkupIterator_init(&it->it_markup,
1202                        STRINGLIB_STR(self),
1203                        STRINGLIB_LEN(self));
1204
1205    return (PyObject *)it;
1206}
1207
1208
1209/************************************************************************/
1210/*********** fieldnameiterator ******************************************/
1211/************************************************************************/
1212
1213
1214/* This is used to implement string.Formatter.vparse().  It parses the
1215   field name into attribute and item values.  It's a Python-callable
1216   wrapper around FieldNameIterator */
1217
1218typedef struct {
1219    PyObject_HEAD
1220
1221    STRINGLIB_OBJECT *str;
1222
1223    FieldNameIterator it_field;
1224} fieldnameiterobject;
1225
1226static void
1227fieldnameiter_dealloc(fieldnameiterobject *it)
1228{
1229    Py_XDECREF(it->str);
1230    PyObject_FREE(it);
1231}
1232
1233/* returns a tuple:
1234   (is_attr, value)
1235   is_attr is true if we used attribute syntax (e.g., '.foo')
1236              false if we used index syntax (e.g., '[foo]')
1237   value is an integer or string
1238*/
1239static PyObject *
1240fieldnameiter_next(fieldnameiterobject *it)
1241{
1242    int result;
1243    int is_attr;
1244    Py_ssize_t idx;
1245    SubString name;
1246
1247    result = FieldNameIterator_next(&it->it_field, &is_attr,
1248                                    &idx, &name);
1249    if (result == 0 || result == 1)
1250        /* if 0, error has already been set, if 1, iterator is empty */
1251        return NULL;
1252    else {
1253        PyObject* result = NULL;
1254        PyObject* is_attr_obj = NULL;
1255        PyObject* obj = NULL;
1256
1257        is_attr_obj = PyBool_FromLong(is_attr);
1258        if (is_attr_obj == NULL)
1259            goto done;
1260
1261        /* either an integer or a string */
1262        if (idx != -1)
1263            obj = PyLong_FromSsize_t(idx);
1264        else
1265            obj = SubString_new_object(&name);
1266        if (obj == NULL)
1267            goto done;
1268
1269        /* return a tuple of values */
1270        result = PyTuple_Pack(2, is_attr_obj, obj);
1271
1272    done:
1273        Py_XDECREF(is_attr_obj);
1274        Py_XDECREF(obj);
1275        return result;
1276    }
1277}
1278
1279static PyMethodDef fieldnameiter_methods[] = {
1280    {NULL,              NULL}           /* sentinel */
1281};
1282
1283static PyTypeObject PyFieldNameIter_Type = {
1284    PyVarObject_HEAD_INIT(&PyType_Type, 0)
1285    "fieldnameiterator",                /* tp_name */
1286    sizeof(fieldnameiterobject),        /* tp_basicsize */
1287    0,                                  /* tp_itemsize */
1288    /* methods */
1289    (destructor)fieldnameiter_dealloc,  /* tp_dealloc */
1290    0,                                  /* tp_print */
1291    0,                                  /* tp_getattr */
1292    0,                                  /* tp_setattr */
1293    0,                                  /* tp_compare */
1294    0,                                  /* tp_repr */
1295    0,                                  /* tp_as_number */
1296    0,                                  /* tp_as_sequence */
1297    0,                                  /* tp_as_mapping */
1298    0,                                  /* tp_hash */
1299    0,                                  /* tp_call */
1300    0,                                  /* tp_str */
1301    PyObject_GenericGetAttr,            /* tp_getattro */
1302    0,                                  /* tp_setattro */
1303    0,                                  /* tp_as_buffer */
1304    Py_TPFLAGS_DEFAULT,                 /* tp_flags */
1305    0,                                  /* tp_doc */
1306    0,                                  /* tp_traverse */
1307    0,                                  /* tp_clear */
1308    0,                                  /* tp_richcompare */
1309    0,                                  /* tp_weaklistoffset */
1310    PyObject_SelfIter,                  /* tp_iter */
1311    (iternextfunc)fieldnameiter_next,   /* tp_iternext */
1312    fieldnameiter_methods,              /* tp_methods */
1313    0};
1314
1315/* unicode_formatter_field_name_split is used to implement
1316   string.Formatter.vformat.  it takes an PEP 3101 "field name", and
1317   returns a tuple of (first, rest): "first", the part before the
1318   first '.' or '['; and "rest", an iterator for the rest of the field
1319   name.  it's a wrapper around stringlib/string_format.h's
1320   field_name_split.  The iterator it returns is a
1321   FieldNameIterator */
1322static PyObject *
1323formatter_field_name_split(STRINGLIB_OBJECT *self)
1324{
1325    SubString first;
1326    Py_ssize_t first_idx;
1327    fieldnameiterobject *it;
1328
1329    PyObject *first_obj = NULL;
1330    PyObject *result = NULL;
1331
1332    it = PyObject_New(fieldnameiterobject, &PyFieldNameIter_Type);
1333    if (it == NULL)
1334        return NULL;
1335
1336    /* take ownership, give the object to the iterator.  this is
1337       just to keep the field_name alive */
1338    Py_INCREF(self);
1339    it->str = self;
1340
1341    /* Pass in auto_number = NULL. We'll return an empty string for
1342       first_obj in that case. */
1343    if (!field_name_split(STRINGLIB_STR(self),
1344                          STRINGLIB_LEN(self),
1345                          &first, &first_idx, &it->it_field, NULL))
1346        goto done;
1347
1348    /* first becomes an integer, if possible; else a string */
1349    if (first_idx != -1)
1350        first_obj = PyLong_FromSsize_t(first_idx);
1351    else
1352        /* convert "first" into a string object */
1353        first_obj = SubString_new_object(&first);
1354    if (first_obj == NULL)
1355        goto done;
1356
1357    /* return a tuple of values */
1358    result = PyTuple_Pack(2, first_obj, it);
1359
1360done:
1361    Py_XDECREF(it);
1362    Py_XDECREF(first_obj);
1363    return result;
1364}
1365