1/* String (str/bytes) object implementation */
2
3#define PY_SSIZE_T_CLEAN
4
5#include "Python.h"
6#include <ctype.h>
7#include <stddef.h>
8
9#ifdef COUNT_ALLOCS
10Py_ssize_t null_strings, one_strings;
11#endif
12
13static PyStringObject *characters[UCHAR_MAX + 1];
14static PyStringObject *nullstring;
15
16/* This dictionary holds all interned strings.  Note that references to
17   strings in this dictionary are *not* counted in the string's ob_refcnt.
18   When the interned string reaches a refcnt of 0 the string deallocation
19   function will delete the reference from this dictionary.
20
21   Another way to look at this is that to say that the actual reference
22   count of a string is:  s->ob_refcnt + (s->ob_sstate?2:0)
23*/
24static PyObject *interned;
25
26/* PyStringObject_SIZE gives the basic size of a string; any memory allocation
27   for a string of length n should request PyStringObject_SIZE + n bytes.
28
29   Using PyStringObject_SIZE instead of sizeof(PyStringObject) saves
30   3 bytes per string allocation on a typical system.
31*/
32#define PyStringObject_SIZE (offsetof(PyStringObject, ob_sval) + 1)
33
34/*
35   For PyString_FromString(), the parameter `str' points to a null-terminated
36   string containing exactly `size' bytes.
37
38   For PyString_FromStringAndSize(), the parameter the parameter `str' is
39   either NULL or else points to a string containing at least `size' bytes.
40   For PyString_FromStringAndSize(), the string in the `str' parameter does
41   not have to be null-terminated.  (Therefore it is safe to construct a
42   substring by calling `PyString_FromStringAndSize(origstring, substrlen)'.)
43   If `str' is NULL then PyString_FromStringAndSize() will allocate `size+1'
44   bytes (setting the last byte to the null terminating character) and you can
45   fill in the data yourself.  If `str' is non-NULL then the resulting
46   PyString object must be treated as immutable and you must not fill in nor
47   alter the data yourself, since the strings may be shared.
48
49   The PyObject member `op->ob_size', which denotes the number of "extra
50   items" in a variable-size object, will contain the number of bytes
51   allocated for string data, not counting the null terminating character.
52   It is therefore equal to the `size' parameter (for
53   PyString_FromStringAndSize()) or the length of the string in the `str'
54   parameter (for PyString_FromString()).
55*/
56PyObject *
57PyString_FromStringAndSize(const char *str, Py_ssize_t size)
58{
59    register PyStringObject *op;
60    if (size < 0) {
61        PyErr_SetString(PyExc_SystemError,
62            "Negative size passed to PyString_FromStringAndSize");
63        return NULL;
64    }
65    if (size == 0 && (op = nullstring) != NULL) {
66#ifdef COUNT_ALLOCS
67        null_strings++;
68#endif
69        Py_INCREF(op);
70        return (PyObject *)op;
71    }
72    if (size == 1 && str != NULL &&
73        (op = characters[*str & UCHAR_MAX]) != NULL)
74    {
75#ifdef COUNT_ALLOCS
76        one_strings++;
77#endif
78        Py_INCREF(op);
79        return (PyObject *)op;
80    }
81
82    if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
83        PyErr_SetString(PyExc_OverflowError, "string is too large");
84        return NULL;
85    }
86
87    /* Inline PyObject_NewVar */
88    op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
89    if (op == NULL)
90        return PyErr_NoMemory();
91    PyObject_INIT_VAR(op, &PyString_Type, size);
92    op->ob_shash = -1;
93    op->ob_sstate = SSTATE_NOT_INTERNED;
94    if (str != NULL)
95        Py_MEMCPY(op->ob_sval, str, size);
96    op->ob_sval[size] = '\0';
97    /* share short strings */
98    if (size == 0) {
99        PyObject *t = (PyObject *)op;
100        PyString_InternInPlace(&t);
101        op = (PyStringObject *)t;
102        nullstring = op;
103        Py_INCREF(op);
104    } else if (size == 1 && str != NULL) {
105        PyObject *t = (PyObject *)op;
106        PyString_InternInPlace(&t);
107        op = (PyStringObject *)t;
108        characters[*str & UCHAR_MAX] = op;
109        Py_INCREF(op);
110    }
111    return (PyObject *) op;
112}
113
114PyObject *
115PyString_FromString(const char *str)
116{
117    register size_t size;
118    register PyStringObject *op;
119
120    assert(str != NULL);
121    size = strlen(str);
122    if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
123        PyErr_SetString(PyExc_OverflowError,
124            "string is too long for a Python string");
125        return NULL;
126    }
127    if (size == 0 && (op = nullstring) != NULL) {
128#ifdef COUNT_ALLOCS
129        null_strings++;
130#endif
131        Py_INCREF(op);
132        return (PyObject *)op;
133    }
134    if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
135#ifdef COUNT_ALLOCS
136        one_strings++;
137#endif
138        Py_INCREF(op);
139        return (PyObject *)op;
140    }
141
142    /* Inline PyObject_NewVar */
143    op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
144    if (op == NULL)
145        return PyErr_NoMemory();
146    PyObject_INIT_VAR(op, &PyString_Type, size);
147    op->ob_shash = -1;
148    op->ob_sstate = SSTATE_NOT_INTERNED;
149    Py_MEMCPY(op->ob_sval, str, size+1);
150    /* share short strings */
151    if (size == 0) {
152        PyObject *t = (PyObject *)op;
153        PyString_InternInPlace(&t);
154        op = (PyStringObject *)t;
155        nullstring = op;
156        Py_INCREF(op);
157    } else if (size == 1) {
158        PyObject *t = (PyObject *)op;
159        PyString_InternInPlace(&t);
160        op = (PyStringObject *)t;
161        characters[*str & UCHAR_MAX] = op;
162        Py_INCREF(op);
163    }
164    return (PyObject *) op;
165}
166
167PyObject *
168PyString_FromFormatV(const char *format, va_list vargs)
169{
170    va_list count;
171    Py_ssize_t n = 0;
172    const char* f;
173    char *s;
174    PyObject* string;
175
176#ifdef VA_LIST_IS_ARRAY
177    Py_MEMCPY(count, vargs, sizeof(va_list));
178#else
179#ifdef  __va_copy
180    __va_copy(count, vargs);
181#else
182    count = vargs;
183#endif
184#endif
185    /* step 1: figure out how large a buffer we need */
186    for (f = format; *f; f++) {
187        if (*f == '%') {
188#ifdef HAVE_LONG_LONG
189            int longlongflag = 0;
190#endif
191            const char* p = f;
192            while (*++f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
193                ;
194
195            /* skip the 'l' or 'z' in {%ld, %zd, %lu, %zu} since
196             * they don't affect the amount of space we reserve.
197             */
198            if (*f == 'l') {
199                if (f[1] == 'd' || f[1] == 'u') {
200                    ++f;
201                }
202#ifdef HAVE_LONG_LONG
203                else if (f[1] == 'l' &&
204                         (f[2] == 'd' || f[2] == 'u')) {
205                    longlongflag = 1;
206                    f += 2;
207                }
208#endif
209            }
210            else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
211                ++f;
212            }
213
214            switch (*f) {
215            case 'c':
216                (void)va_arg(count, int);
217                /* fall through... */
218            case '%':
219                n++;
220                break;
221            case 'd': case 'u': case 'i': case 'x':
222                (void) va_arg(count, int);
223#ifdef HAVE_LONG_LONG
224                /* Need at most
225                   ceil(log10(256)*SIZEOF_LONG_LONG) digits,
226                   plus 1 for the sign.  53/22 is an upper
227                   bound for log10(256). */
228                if (longlongflag)
229                    n += 2 + (SIZEOF_LONG_LONG*53-1) / 22;
230                else
231#endif
232                    /* 20 bytes is enough to hold a 64-bit
233                       integer.  Decimal takes the most
234                       space.  This isn't enough for
235                       octal. */
236                    n += 20;
237
238                break;
239            case 's':
240                s = va_arg(count, char*);
241                n += strlen(s);
242                break;
243            case 'p':
244                (void) va_arg(count, int);
245                /* maximum 64-bit pointer representation:
246                 * 0xffffffffffffffff
247                 * so 19 characters is enough.
248                 * XXX I count 18 -- what's the extra for?
249                 */
250                n += 19;
251                break;
252            default:
253                /* if we stumble upon an unknown
254                   formatting code, copy the rest of
255                   the format string to the output
256                   string. (we cannot just skip the
257                   code, since there's no way to know
258                   what's in the argument list) */
259                n += strlen(p);
260                goto expand;
261            }
262        } else
263            n++;
264    }
265 expand:
266    /* step 2: fill the buffer */
267    /* Since we've analyzed how much space we need for the worst case,
268       use sprintf directly instead of the slower PyOS_snprintf. */
269    string = PyString_FromStringAndSize(NULL, n);
270    if (!string)
271        return NULL;
272
273    s = PyString_AsString(string);
274
275    for (f = format; *f; f++) {
276        if (*f == '%') {
277            const char* p = f++;
278            Py_ssize_t i;
279            int longflag = 0;
280#ifdef HAVE_LONG_LONG
281            int longlongflag = 0;
282#endif
283            int size_tflag = 0;
284            /* parse the width.precision part (we're only
285               interested in the precision value, if any) */
286            n = 0;
287            while (isdigit(Py_CHARMASK(*f)))
288                n = (n*10) + *f++ - '0';
289            if (*f == '.') {
290                f++;
291                n = 0;
292                while (isdigit(Py_CHARMASK(*f)))
293                    n = (n*10) + *f++ - '0';
294            }
295            while (*f && *f != '%' && !isalpha(Py_CHARMASK(*f)))
296                f++;
297            /* Handle %ld, %lu, %lld and %llu. */
298            if (*f == 'l') {
299                if (f[1] == 'd' || f[1] == 'u') {
300                    longflag = 1;
301                    ++f;
302                }
303#ifdef HAVE_LONG_LONG
304                else if (f[1] == 'l' &&
305                         (f[2] == 'd' || f[2] == 'u')) {
306                    longlongflag = 1;
307                    f += 2;
308                }
309#endif
310            }
311            /* handle the size_t flag. */
312            else if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
313                size_tflag = 1;
314                ++f;
315            }
316
317            switch (*f) {
318            case 'c':
319                *s++ = va_arg(vargs, int);
320                break;
321            case 'd':
322                if (longflag)
323                    sprintf(s, "%ld", va_arg(vargs, long));
324#ifdef HAVE_LONG_LONG
325                else if (longlongflag)
326                    sprintf(s, "%" PY_FORMAT_LONG_LONG "d",
327                        va_arg(vargs, PY_LONG_LONG));
328#endif
329                else if (size_tflag)
330                    sprintf(s, "%" PY_FORMAT_SIZE_T "d",
331                        va_arg(vargs, Py_ssize_t));
332                else
333                    sprintf(s, "%d", va_arg(vargs, int));
334                s += strlen(s);
335                break;
336            case 'u':
337                if (longflag)
338                    sprintf(s, "%lu",
339                        va_arg(vargs, unsigned long));
340#ifdef HAVE_LONG_LONG
341                else if (longlongflag)
342                    sprintf(s, "%" PY_FORMAT_LONG_LONG "u",
343                        va_arg(vargs, PY_LONG_LONG));
344#endif
345                else if (size_tflag)
346                    sprintf(s, "%" PY_FORMAT_SIZE_T "u",
347                        va_arg(vargs, size_t));
348                else
349                    sprintf(s, "%u",
350                        va_arg(vargs, unsigned int));
351                s += strlen(s);
352                break;
353            case 'i':
354                sprintf(s, "%i", va_arg(vargs, int));
355                s += strlen(s);
356                break;
357            case 'x':
358                sprintf(s, "%x", va_arg(vargs, int));
359                s += strlen(s);
360                break;
361            case 's':
362                p = va_arg(vargs, char*);
363                i = strlen(p);
364                if (n > 0 && i > n)
365                    i = n;
366                Py_MEMCPY(s, p, i);
367                s += i;
368                break;
369            case 'p':
370                sprintf(s, "%p", va_arg(vargs, void*));
371                /* %p is ill-defined:  ensure leading 0x. */
372                if (s[1] == 'X')
373                    s[1] = 'x';
374                else if (s[1] != 'x') {
375                    memmove(s+2, s, strlen(s)+1);
376                    s[0] = '0';
377                    s[1] = 'x';
378                }
379                s += strlen(s);
380                break;
381            case '%':
382                *s++ = '%';
383                break;
384            default:
385                strcpy(s, p);
386                s += strlen(s);
387                goto end;
388            }
389        } else
390            *s++ = *f;
391    }
392
393 end:
394    if (_PyString_Resize(&string, s - PyString_AS_STRING(string)))
395        return NULL;
396    return string;
397}
398
399PyObject *
400PyString_FromFormat(const char *format, ...)
401{
402    PyObject* ret;
403    va_list vargs;
404
405#ifdef HAVE_STDARG_PROTOTYPES
406    va_start(vargs, format);
407#else
408    va_start(vargs);
409#endif
410    ret = PyString_FromFormatV(format, vargs);
411    va_end(vargs);
412    return ret;
413}
414
415
416PyObject *PyString_Decode(const char *s,
417                          Py_ssize_t size,
418                          const char *encoding,
419                          const char *errors)
420{
421    PyObject *v, *str;
422
423    str = PyString_FromStringAndSize(s, size);
424    if (str == NULL)
425        return NULL;
426    v = PyString_AsDecodedString(str, encoding, errors);
427    Py_DECREF(str);
428    return v;
429}
430
431PyObject *PyString_AsDecodedObject(PyObject *str,
432                                   const char *encoding,
433                                   const char *errors)
434{
435    PyObject *v;
436
437    if (!PyString_Check(str)) {
438        PyErr_BadArgument();
439        goto onError;
440    }
441
442    if (encoding == NULL) {
443#ifdef Py_USING_UNICODE
444        encoding = PyUnicode_GetDefaultEncoding();
445#else
446        PyErr_SetString(PyExc_ValueError, "no encoding specified");
447        goto onError;
448#endif
449    }
450
451    /* Decode via the codec registry */
452    v = PyCodec_Decode(str, encoding, errors);
453    if (v == NULL)
454        goto onError;
455
456    return v;
457
458 onError:
459    return NULL;
460}
461
462PyObject *PyString_AsDecodedString(PyObject *str,
463                                   const char *encoding,
464                                   const char *errors)
465{
466    PyObject *v;
467
468    v = PyString_AsDecodedObject(str, encoding, errors);
469    if (v == NULL)
470        goto onError;
471
472#ifdef Py_USING_UNICODE
473    /* Convert Unicode to a string using the default encoding */
474    if (PyUnicode_Check(v)) {
475        PyObject *temp = v;
476        v = PyUnicode_AsEncodedString(v, NULL, NULL);
477        Py_DECREF(temp);
478        if (v == NULL)
479            goto onError;
480    }
481#endif
482    if (!PyString_Check(v)) {
483        PyErr_Format(PyExc_TypeError,
484                     "decoder did not return a string object (type=%.400s)",
485                     Py_TYPE(v)->tp_name);
486        Py_DECREF(v);
487        goto onError;
488    }
489
490    return v;
491
492 onError:
493    return NULL;
494}
495
496PyObject *PyString_Encode(const char *s,
497                          Py_ssize_t size,
498                          const char *encoding,
499                          const char *errors)
500{
501    PyObject *v, *str;
502
503    str = PyString_FromStringAndSize(s, size);
504    if (str == NULL)
505        return NULL;
506    v = PyString_AsEncodedString(str, encoding, errors);
507    Py_DECREF(str);
508    return v;
509}
510
511PyObject *PyString_AsEncodedObject(PyObject *str,
512                                   const char *encoding,
513                                   const char *errors)
514{
515    PyObject *v;
516
517    if (!PyString_Check(str)) {
518        PyErr_BadArgument();
519        goto onError;
520    }
521
522    if (encoding == NULL) {
523#ifdef Py_USING_UNICODE
524        encoding = PyUnicode_GetDefaultEncoding();
525#else
526        PyErr_SetString(PyExc_ValueError, "no encoding specified");
527        goto onError;
528#endif
529    }
530
531    /* Encode via the codec registry */
532    v = PyCodec_Encode(str, encoding, errors);
533    if (v == NULL)
534        goto onError;
535
536    return v;
537
538 onError:
539    return NULL;
540}
541
542PyObject *PyString_AsEncodedString(PyObject *str,
543                                   const char *encoding,
544                                   const char *errors)
545{
546    PyObject *v;
547
548    v = PyString_AsEncodedObject(str, encoding, errors);
549    if (v == NULL)
550        goto onError;
551
552#ifdef Py_USING_UNICODE
553    /* Convert Unicode to a string using the default encoding */
554    if (PyUnicode_Check(v)) {
555        PyObject *temp = v;
556        v = PyUnicode_AsEncodedString(v, NULL, NULL);
557        Py_DECREF(temp);
558        if (v == NULL)
559            goto onError;
560    }
561#endif
562    if (!PyString_Check(v)) {
563        PyErr_Format(PyExc_TypeError,
564                     "encoder did not return a string object (type=%.400s)",
565                     Py_TYPE(v)->tp_name);
566        Py_DECREF(v);
567        goto onError;
568    }
569
570    return v;
571
572 onError:
573    return NULL;
574}
575
576static void
577string_dealloc(PyObject *op)
578{
579    switch (PyString_CHECK_INTERNED(op)) {
580        case SSTATE_NOT_INTERNED:
581            break;
582
583        case SSTATE_INTERNED_MORTAL:
584            /* revive dead object temporarily for DelItem */
585            Py_REFCNT(op) = 3;
586            if (PyDict_DelItem(interned, op) != 0)
587                Py_FatalError(
588                    "deletion of interned string failed");
589            break;
590
591        case SSTATE_INTERNED_IMMORTAL:
592            Py_FatalError("Immortal interned string died.");
593
594        default:
595            Py_FatalError("Inconsistent interned string state.");
596    }
597    Py_TYPE(op)->tp_free(op);
598}
599
600/* Unescape a backslash-escaped string. If unicode is non-zero,
601   the string is a u-literal. If recode_encoding is non-zero,
602   the string is UTF-8 encoded and should be re-encoded in the
603   specified encoding.  */
604
605PyObject *PyString_DecodeEscape(const char *s,
606                                Py_ssize_t len,
607                                const char *errors,
608                                Py_ssize_t unicode,
609                                const char *recode_encoding)
610{
611    int c;
612    char *p, *buf;
613    const char *end;
614    PyObject *v;
615    Py_ssize_t newlen = recode_encoding ? 4*len:len;
616    v = PyString_FromStringAndSize((char *)NULL, newlen);
617    if (v == NULL)
618        return NULL;
619    p = buf = PyString_AsString(v);
620    end = s + len;
621    while (s < end) {
622        if (*s != '\\') {
623          non_esc:
624#ifdef Py_USING_UNICODE
625            if (recode_encoding && (*s & 0x80)) {
626                PyObject *u, *w;
627                char *r;
628                const char* t;
629                Py_ssize_t rn;
630                t = s;
631                /* Decode non-ASCII bytes as UTF-8. */
632                while (t < end && (*t & 0x80)) t++;
633                u = PyUnicode_DecodeUTF8(s, t - s, errors);
634                if(!u) goto failed;
635
636                /* Recode them in target encoding. */
637                w = PyUnicode_AsEncodedString(
638                    u, recode_encoding, errors);
639                Py_DECREF(u);
640                if (!w)                 goto failed;
641
642                /* Append bytes to output buffer. */
643                assert(PyString_Check(w));
644                r = PyString_AS_STRING(w);
645                rn = PyString_GET_SIZE(w);
646                Py_MEMCPY(p, r, rn);
647                p += rn;
648                Py_DECREF(w);
649                s = t;
650            } else {
651                *p++ = *s++;
652            }
653#else
654            *p++ = *s++;
655#endif
656            continue;
657        }
658        s++;
659        if (s==end) {
660            PyErr_SetString(PyExc_ValueError,
661                            "Trailing \\ in string");
662            goto failed;
663        }
664        switch (*s++) {
665        /* XXX This assumes ASCII! */
666        case '\n': break;
667        case '\\': *p++ = '\\'; break;
668        case '\'': *p++ = '\''; break;
669        case '\"': *p++ = '\"'; break;
670        case 'b': *p++ = '\b'; break;
671        case 'f': *p++ = '\014'; break; /* FF */
672        case 't': *p++ = '\t'; break;
673        case 'n': *p++ = '\n'; break;
674        case 'r': *p++ = '\r'; break;
675        case 'v': *p++ = '\013'; break; /* VT */
676        case 'a': *p++ = '\007'; break; /* BEL, not classic C */
677        case '0': case '1': case '2': case '3':
678        case '4': case '5': case '6': case '7':
679            c = s[-1] - '0';
680            if (s < end && '0' <= *s && *s <= '7') {
681                c = (c<<3) + *s++ - '0';
682                if (s < end && '0' <= *s && *s <= '7')
683                    c = (c<<3) + *s++ - '0';
684            }
685            *p++ = c;
686            break;
687        case 'x':
688            if (s+1 < end &&
689                isxdigit(Py_CHARMASK(s[0])) &&
690                isxdigit(Py_CHARMASK(s[1])))
691            {
692                unsigned int x = 0;
693                c = Py_CHARMASK(*s);
694                s++;
695                if (isdigit(c))
696                    x = c - '0';
697                else if (islower(c))
698                    x = 10 + c - 'a';
699                else
700                    x = 10 + c - 'A';
701                x = x << 4;
702                c = Py_CHARMASK(*s);
703                s++;
704                if (isdigit(c))
705                    x += c - '0';
706                else if (islower(c))
707                    x += 10 + c - 'a';
708                else
709                    x += 10 + c - 'A';
710                *p++ = x;
711                break;
712            }
713            if (!errors || strcmp(errors, "strict") == 0) {
714                PyErr_SetString(PyExc_ValueError,
715                                "invalid \\x escape");
716                goto failed;
717            }
718            if (strcmp(errors, "replace") == 0) {
719                *p++ = '?';
720            } else if (strcmp(errors, "ignore") == 0)
721                /* do nothing */;
722            else {
723                PyErr_Format(PyExc_ValueError,
724                             "decoding error; "
725                             "unknown error handling code: %.400s",
726                             errors);
727                goto failed;
728            }
729            /* skip \x */
730            if (s < end && isxdigit(Py_CHARMASK(s[0])))
731                s++; /* and a hexdigit */
732            break;
733#ifndef Py_USING_UNICODE
734        case 'u':
735        case 'U':
736        case 'N':
737            if (unicode) {
738                PyErr_SetString(PyExc_ValueError,
739                          "Unicode escapes not legal "
740                          "when Unicode disabled");
741                goto failed;
742            }
743#endif
744        default:
745            *p++ = '\\';
746            s--;
747            goto non_esc; /* an arbitrary number of unescaped
748                             UTF-8 bytes may follow. */
749        }
750    }
751    if (p-buf < newlen)
752        _PyString_Resize(&v, p - buf); /* v is cleared on error */
753    return v;
754  failed:
755    Py_DECREF(v);
756    return NULL;
757}
758
759/* -------------------------------------------------------------------- */
760/* object api */
761
762static Py_ssize_t
763string_getsize(register PyObject *op)
764{
765    char *s;
766    Py_ssize_t len;
767    if (PyString_AsStringAndSize(op, &s, &len))
768        return -1;
769    return len;
770}
771
772static /*const*/ char *
773string_getbuffer(register PyObject *op)
774{
775    char *s;
776    Py_ssize_t len;
777    if (PyString_AsStringAndSize(op, &s, &len))
778        return NULL;
779    return s;
780}
781
782Py_ssize_t
783PyString_Size(register PyObject *op)
784{
785    if (!PyString_Check(op))
786        return string_getsize(op);
787    return Py_SIZE(op);
788}
789
790/*const*/ char *
791PyString_AsString(register PyObject *op)
792{
793    if (!PyString_Check(op))
794        return string_getbuffer(op);
795    return ((PyStringObject *)op) -> ob_sval;
796}
797
798int
799PyString_AsStringAndSize(register PyObject *obj,
800                         register char **s,
801                         register Py_ssize_t *len)
802{
803    if (s == NULL) {
804        PyErr_BadInternalCall();
805        return -1;
806    }
807
808    if (!PyString_Check(obj)) {
809#ifdef Py_USING_UNICODE
810        if (PyUnicode_Check(obj)) {
811            obj = _PyUnicode_AsDefaultEncodedString(obj, NULL);
812            if (obj == NULL)
813                return -1;
814        }
815        else
816#endif
817        {
818            PyErr_Format(PyExc_TypeError,
819                         "expected string or Unicode object, "
820                         "%.200s found", Py_TYPE(obj)->tp_name);
821            return -1;
822        }
823    }
824
825    *s = PyString_AS_STRING(obj);
826    if (len != NULL)
827        *len = PyString_GET_SIZE(obj);
828    else if (strlen(*s) != (size_t)PyString_GET_SIZE(obj)) {
829        PyErr_SetString(PyExc_TypeError,
830                        "expected string without null bytes");
831        return -1;
832    }
833    return 0;
834}
835
836/* -------------------------------------------------------------------- */
837/* Methods */
838
839#include "stringlib/stringdefs.h"
840#include "stringlib/fastsearch.h"
841
842#include "stringlib/count.h"
843#include "stringlib/find.h"
844#include "stringlib/partition.h"
845#include "stringlib/split.h"
846
847#define _Py_InsertThousandsGrouping _PyString_InsertThousandsGrouping
848#include "stringlib/localeutil.h"
849
850
851
852static int
853string_print(PyStringObject *op, FILE *fp, int flags)
854{
855    Py_ssize_t i, str_len;
856    char c;
857    int quote;
858
859    /* XXX Ought to check for interrupts when writing long strings */
860    if (! PyString_CheckExact(op)) {
861        int ret;
862        /* A str subclass may have its own __str__ method. */
863        op = (PyStringObject *) PyObject_Str((PyObject *)op);
864        if (op == NULL)
865            return -1;
866        ret = string_print(op, fp, flags);
867        Py_DECREF(op);
868        return ret;
869    }
870    if (flags & Py_PRINT_RAW) {
871        char *data = op->ob_sval;
872        Py_ssize_t size = Py_SIZE(op);
873        Py_BEGIN_ALLOW_THREADS
874        while (size > INT_MAX) {
875            /* Very long strings cannot be written atomically.
876             * But don't write exactly INT_MAX bytes at a time
877             * to avoid memory aligment issues.
878             */
879            const int chunk_size = INT_MAX & ~0x3FFF;
880            fwrite(data, 1, chunk_size, fp);
881            data += chunk_size;
882            size -= chunk_size;
883        }
884#ifdef __VMS
885        if (size) fwrite(data, (size_t)size, 1, fp);
886#else
887        fwrite(data, 1, (size_t)size, fp);
888#endif
889        Py_END_ALLOW_THREADS
890        return 0;
891    }
892
893    /* figure out which quote to use; single is preferred */
894    quote = '\'';
895    if (memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
896        !memchr(op->ob_sval, '"', Py_SIZE(op)))
897        quote = '"';
898
899    str_len = Py_SIZE(op);
900    Py_BEGIN_ALLOW_THREADS
901    fputc(quote, fp);
902    for (i = 0; i < str_len; i++) {
903        /* Since strings are immutable and the caller should have a
904        reference, accessing the interal buffer should not be an issue
905        with the GIL released. */
906        c = op->ob_sval[i];
907        if (c == quote || c == '\\')
908            fprintf(fp, "\\%c", c);
909        else if (c == '\t')
910            fprintf(fp, "\\t");
911        else if (c == '\n')
912            fprintf(fp, "\\n");
913        else if (c == '\r')
914            fprintf(fp, "\\r");
915        else if (c < ' ' || c >= 0x7f)
916            fprintf(fp, "\\x%02x", c & 0xff);
917        else
918            fputc(c, fp);
919    }
920    fputc(quote, fp);
921    Py_END_ALLOW_THREADS
922    return 0;
923}
924
925PyObject *
926PyString_Repr(PyObject *obj, int smartquotes)
927{
928    register PyStringObject* op = (PyStringObject*) obj;
929    size_t newsize;
930    PyObject *v;
931    if (Py_SIZE(op) > (PY_SSIZE_T_MAX - 2)/4) {
932        PyErr_SetString(PyExc_OverflowError,
933            "string is too large to make repr");
934        return NULL;
935    }
936    newsize = 2 + 4*Py_SIZE(op);
937    v = PyString_FromStringAndSize((char *)NULL, newsize);
938    if (v == NULL) {
939        return NULL;
940    }
941    else {
942        register Py_ssize_t i;
943        register char c;
944        register char *p;
945        int quote;
946
947        /* figure out which quote to use; single is preferred */
948        quote = '\'';
949        if (smartquotes &&
950            memchr(op->ob_sval, '\'', Py_SIZE(op)) &&
951            !memchr(op->ob_sval, '"', Py_SIZE(op)))
952            quote = '"';
953
954        p = PyString_AS_STRING(v);
955        *p++ = quote;
956        for (i = 0; i < Py_SIZE(op); i++) {
957            /* There's at least enough room for a hex escape
958               and a closing quote. */
959            assert(newsize - (p - PyString_AS_STRING(v)) >= 5);
960            c = op->ob_sval[i];
961            if (c == quote || c == '\\')
962                *p++ = '\\', *p++ = c;
963            else if (c == '\t')
964                *p++ = '\\', *p++ = 't';
965            else if (c == '\n')
966                *p++ = '\\', *p++ = 'n';
967            else if (c == '\r')
968                *p++ = '\\', *p++ = 'r';
969            else if (c < ' ' || c >= 0x7f) {
970                /* For performance, we don't want to call
971                   PyOS_snprintf here (extra layers of
972                   function call). */
973                sprintf(p, "\\x%02x", c & 0xff);
974                p += 4;
975            }
976            else
977                *p++ = c;
978        }
979        assert(newsize - (p - PyString_AS_STRING(v)) >= 1);
980        *p++ = quote;
981        *p = '\0';
982        if (_PyString_Resize(&v, (p - PyString_AS_STRING(v))))
983            return NULL;
984        return v;
985    }
986}
987
988static PyObject *
989string_repr(PyObject *op)
990{
991    return PyString_Repr(op, 1);
992}
993
994static PyObject *
995string_str(PyObject *s)
996{
997    assert(PyString_Check(s));
998    if (PyString_CheckExact(s)) {
999        Py_INCREF(s);
1000        return s;
1001    }
1002    else {
1003        /* Subtype -- return genuine string with the same value. */
1004        PyStringObject *t = (PyStringObject *) s;
1005        return PyString_FromStringAndSize(t->ob_sval, Py_SIZE(t));
1006    }
1007}
1008
1009static Py_ssize_t
1010string_length(PyStringObject *a)
1011{
1012    return Py_SIZE(a);
1013}
1014
1015static PyObject *
1016string_concat(register PyStringObject *a, register PyObject *bb)
1017{
1018    register Py_ssize_t size;
1019    register PyStringObject *op;
1020    if (!PyString_Check(bb)) {
1021#ifdef Py_USING_UNICODE
1022        if (PyUnicode_Check(bb))
1023            return PyUnicode_Concat((PyObject *)a, bb);
1024#endif
1025        if (PyByteArray_Check(bb))
1026            return PyByteArray_Concat((PyObject *)a, bb);
1027        PyErr_Format(PyExc_TypeError,
1028                     "cannot concatenate 'str' and '%.200s' objects",
1029                     Py_TYPE(bb)->tp_name);
1030        return NULL;
1031    }
1032#define b ((PyStringObject *)bb)
1033    /* Optimize cases with empty left or right operand */
1034    if ((Py_SIZE(a) == 0 || Py_SIZE(b) == 0) &&
1035        PyString_CheckExact(a) && PyString_CheckExact(b)) {
1036        if (Py_SIZE(a) == 0) {
1037            Py_INCREF(bb);
1038            return bb;
1039        }
1040        Py_INCREF(a);
1041        return (PyObject *)a;
1042    }
1043    size = Py_SIZE(a) + Py_SIZE(b);
1044    /* Check that string sizes are not negative, to prevent an
1045       overflow in cases where we are passed incorrectly-created
1046       strings with negative lengths (due to a bug in other code).
1047    */
1048    if (Py_SIZE(a) < 0 || Py_SIZE(b) < 0 ||
1049        Py_SIZE(a) > PY_SSIZE_T_MAX - Py_SIZE(b)) {
1050        PyErr_SetString(PyExc_OverflowError,
1051                        "strings are too large to concat");
1052        return NULL;
1053    }
1054
1055    /* Inline PyObject_NewVar */
1056    if (size > PY_SSIZE_T_MAX - PyStringObject_SIZE) {
1057        PyErr_SetString(PyExc_OverflowError,
1058                        "strings are too large to concat");
1059        return NULL;
1060    }
1061    op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + size);
1062    if (op == NULL)
1063        return PyErr_NoMemory();
1064    PyObject_INIT_VAR(op, &PyString_Type, size);
1065    op->ob_shash = -1;
1066    op->ob_sstate = SSTATE_NOT_INTERNED;
1067    Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1068    Py_MEMCPY(op->ob_sval + Py_SIZE(a), b->ob_sval, Py_SIZE(b));
1069    op->ob_sval[size] = '\0';
1070    return (PyObject *) op;
1071#undef b
1072}
1073
1074static PyObject *
1075string_repeat(register PyStringObject *a, register Py_ssize_t n)
1076{
1077    register Py_ssize_t i;
1078    register Py_ssize_t j;
1079    register Py_ssize_t size;
1080    register PyStringObject *op;
1081    size_t nbytes;
1082    if (n < 0)
1083        n = 0;
1084    /* watch out for overflows:  the size can overflow int,
1085     * and the # of bytes needed can overflow size_t
1086     */
1087    size = Py_SIZE(a) * n;
1088    if (n && size / n != Py_SIZE(a)) {
1089        PyErr_SetString(PyExc_OverflowError,
1090            "repeated string is too long");
1091        return NULL;
1092    }
1093    if (size == Py_SIZE(a) && PyString_CheckExact(a)) {
1094        Py_INCREF(a);
1095        return (PyObject *)a;
1096    }
1097    nbytes = (size_t)size;
1098    if (nbytes + PyStringObject_SIZE <= nbytes) {
1099        PyErr_SetString(PyExc_OverflowError,
1100            "repeated string is too long");
1101        return NULL;
1102    }
1103    op = (PyStringObject *)PyObject_MALLOC(PyStringObject_SIZE + nbytes);
1104    if (op == NULL)
1105        return PyErr_NoMemory();
1106    PyObject_INIT_VAR(op, &PyString_Type, size);
1107    op->ob_shash = -1;
1108    op->ob_sstate = SSTATE_NOT_INTERNED;
1109    op->ob_sval[size] = '\0';
1110    if (Py_SIZE(a) == 1 && n > 0) {
1111        memset(op->ob_sval, a->ob_sval[0] , n);
1112        return (PyObject *) op;
1113    }
1114    i = 0;
1115    if (i < size) {
1116        Py_MEMCPY(op->ob_sval, a->ob_sval, Py_SIZE(a));
1117        i = Py_SIZE(a);
1118    }
1119    while (i < size) {
1120        j = (i <= size-i)  ?  i  :  size-i;
1121        Py_MEMCPY(op->ob_sval+i, op->ob_sval, j);
1122        i += j;
1123    }
1124    return (PyObject *) op;
1125}
1126
1127/* String slice a[i:j] consists of characters a[i] ... a[j-1] */
1128
1129static PyObject *
1130string_slice(register PyStringObject *a, register Py_ssize_t i,
1131             register Py_ssize_t j)
1132     /* j -- may be negative! */
1133{
1134    if (i < 0)
1135        i = 0;
1136    if (j < 0)
1137        j = 0; /* Avoid signed/unsigned bug in next line */
1138    if (j > Py_SIZE(a))
1139        j = Py_SIZE(a);
1140    if (i == 0 && j == Py_SIZE(a) && PyString_CheckExact(a)) {
1141        /* It's the same as a */
1142        Py_INCREF(a);
1143        return (PyObject *)a;
1144    }
1145    if (j < i)
1146        j = i;
1147    return PyString_FromStringAndSize(a->ob_sval + i, j-i);
1148}
1149
1150static int
1151string_contains(PyObject *str_obj, PyObject *sub_obj)
1152{
1153    if (!PyString_CheckExact(sub_obj)) {
1154#ifdef Py_USING_UNICODE
1155        if (PyUnicode_Check(sub_obj))
1156            return PyUnicode_Contains(str_obj, sub_obj);
1157#endif
1158        if (!PyString_Check(sub_obj)) {
1159            PyErr_Format(PyExc_TypeError,
1160                "'in <string>' requires string as left operand, "
1161                "not %.200s", Py_TYPE(sub_obj)->tp_name);
1162            return -1;
1163        }
1164    }
1165
1166    return stringlib_contains_obj(str_obj, sub_obj);
1167}
1168
1169static PyObject *
1170string_item(PyStringObject *a, register Py_ssize_t i)
1171{
1172    char pchar;
1173    PyObject *v;
1174    if (i < 0 || i >= Py_SIZE(a)) {
1175        PyErr_SetString(PyExc_IndexError, "string index out of range");
1176        return NULL;
1177    }
1178    pchar = a->ob_sval[i];
1179    v = (PyObject *)characters[pchar & UCHAR_MAX];
1180    if (v == NULL)
1181        v = PyString_FromStringAndSize(&pchar, 1);
1182    else {
1183#ifdef COUNT_ALLOCS
1184        one_strings++;
1185#endif
1186        Py_INCREF(v);
1187    }
1188    return v;
1189}
1190
1191static PyObject*
1192string_richcompare(PyStringObject *a, PyStringObject *b, int op)
1193{
1194    int c;
1195    Py_ssize_t len_a, len_b;
1196    Py_ssize_t min_len;
1197    PyObject *result;
1198
1199    /* Make sure both arguments are strings. */
1200    if (!(PyString_Check(a) && PyString_Check(b))) {
1201        result = Py_NotImplemented;
1202        goto out;
1203    }
1204    if (a == b) {
1205        switch (op) {
1206        case Py_EQ:case Py_LE:case Py_GE:
1207            result = Py_True;
1208            goto out;
1209        case Py_NE:case Py_LT:case Py_GT:
1210            result = Py_False;
1211            goto out;
1212        }
1213    }
1214    if (op == Py_EQ) {
1215        /* Supporting Py_NE here as well does not save
1216           much time, since Py_NE is rarely used.  */
1217        if (Py_SIZE(a) == Py_SIZE(b)
1218            && (a->ob_sval[0] == b->ob_sval[0]
1219            && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0)) {
1220            result = Py_True;
1221        } else {
1222            result = Py_False;
1223        }
1224        goto out;
1225    }
1226    len_a = Py_SIZE(a); len_b = Py_SIZE(b);
1227    min_len = (len_a < len_b) ? len_a : len_b;
1228    if (min_len > 0) {
1229        c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1230        if (c==0)
1231            c = memcmp(a->ob_sval, b->ob_sval, min_len);
1232    } else
1233        c = 0;
1234    if (c == 0)
1235        c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1236    switch (op) {
1237    case Py_LT: c = c <  0; break;
1238    case Py_LE: c = c <= 0; break;
1239    case Py_EQ: assert(0);  break; /* unreachable */
1240    case Py_NE: c = c != 0; break;
1241    case Py_GT: c = c >  0; break;
1242    case Py_GE: c = c >= 0; break;
1243    default:
1244        result = Py_NotImplemented;
1245        goto out;
1246    }
1247    result = c ? Py_True : Py_False;
1248  out:
1249    Py_INCREF(result);
1250    return result;
1251}
1252
1253int
1254_PyString_Eq(PyObject *o1, PyObject *o2)
1255{
1256    PyStringObject *a = (PyStringObject*) o1;
1257    PyStringObject *b = (PyStringObject*) o2;
1258    return Py_SIZE(a) == Py_SIZE(b)
1259      && memcmp(a->ob_sval, b->ob_sval, Py_SIZE(a)) == 0;
1260}
1261
1262static long
1263string_hash(PyStringObject *a)
1264{
1265    register Py_ssize_t len;
1266    register unsigned char *p;
1267    register long x;
1268
1269#ifdef Py_DEBUG
1270    assert(_Py_HashSecret_Initialized);
1271#endif
1272    if (a->ob_shash != -1)
1273        return a->ob_shash;
1274    len = Py_SIZE(a);
1275    /*
1276      We make the hash of the empty string be 0, rather than using
1277      (prefix ^ suffix), since this slightly obfuscates the hash secret
1278    */
1279    if (len == 0) {
1280        a->ob_shash = 0;
1281        return 0;
1282    }
1283    p = (unsigned char *) a->ob_sval;
1284    x = _Py_HashSecret.prefix;
1285    x ^= *p << 7;
1286    while (--len >= 0)
1287        x = (1000003*x) ^ *p++;
1288    x ^= Py_SIZE(a);
1289    x ^= _Py_HashSecret.suffix;
1290    if (x == -1)
1291        x = -2;
1292    a->ob_shash = x;
1293    return x;
1294}
1295
1296static PyObject*
1297string_subscript(PyStringObject* self, PyObject* item)
1298{
1299    if (PyIndex_Check(item)) {
1300        Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1301        if (i == -1 && PyErr_Occurred())
1302            return NULL;
1303        if (i < 0)
1304            i += PyString_GET_SIZE(self);
1305        return string_item(self, i);
1306    }
1307    else if (PySlice_Check(item)) {
1308        Py_ssize_t start, stop, step, slicelength, cur, i;
1309        char* source_buf;
1310        char* result_buf;
1311        PyObject* result;
1312
1313        if (PySlice_GetIndicesEx((PySliceObject*)item,
1314                         PyString_GET_SIZE(self),
1315                         &start, &stop, &step, &slicelength) < 0) {
1316            return NULL;
1317        }
1318
1319        if (slicelength <= 0) {
1320            return PyString_FromStringAndSize("", 0);
1321        }
1322        else if (start == 0 && step == 1 &&
1323                 slicelength == PyString_GET_SIZE(self) &&
1324                 PyString_CheckExact(self)) {
1325            Py_INCREF(self);
1326            return (PyObject *)self;
1327        }
1328        else if (step == 1) {
1329            return PyString_FromStringAndSize(
1330                PyString_AS_STRING(self) + start,
1331                slicelength);
1332        }
1333        else {
1334            source_buf = PyString_AsString((PyObject*)self);
1335            result_buf = (char *)PyMem_Malloc(slicelength);
1336            if (result_buf == NULL)
1337                return PyErr_NoMemory();
1338
1339            for (cur = start, i = 0; i < slicelength;
1340                 cur += step, i++) {
1341                result_buf[i] = source_buf[cur];
1342            }
1343
1344            result = PyString_FromStringAndSize(result_buf,
1345                                                slicelength);
1346            PyMem_Free(result_buf);
1347            return result;
1348        }
1349    }
1350    else {
1351        PyErr_Format(PyExc_TypeError,
1352                     "string indices must be integers, not %.200s",
1353                     Py_TYPE(item)->tp_name);
1354        return NULL;
1355    }
1356}
1357
1358static Py_ssize_t
1359string_buffer_getreadbuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1360{
1361    if ( index != 0 ) {
1362        PyErr_SetString(PyExc_SystemError,
1363                        "accessing non-existent string segment");
1364        return -1;
1365    }
1366    *ptr = (void *)self->ob_sval;
1367    return Py_SIZE(self);
1368}
1369
1370static Py_ssize_t
1371string_buffer_getwritebuf(PyStringObject *self, Py_ssize_t index, const void **ptr)
1372{
1373    PyErr_SetString(PyExc_TypeError,
1374                    "Cannot use string as modifiable buffer");
1375    return -1;
1376}
1377
1378static Py_ssize_t
1379string_buffer_getsegcount(PyStringObject *self, Py_ssize_t *lenp)
1380{
1381    if ( lenp )
1382        *lenp = Py_SIZE(self);
1383    return 1;
1384}
1385
1386static Py_ssize_t
1387string_buffer_getcharbuf(PyStringObject *self, Py_ssize_t index, const char **ptr)
1388{
1389    if ( index != 0 ) {
1390        PyErr_SetString(PyExc_SystemError,
1391                        "accessing non-existent string segment");
1392        return -1;
1393    }
1394    *ptr = self->ob_sval;
1395    return Py_SIZE(self);
1396}
1397
1398static int
1399string_buffer_getbuffer(PyStringObject *self, Py_buffer *view, int flags)
1400{
1401    return PyBuffer_FillInfo(view, (PyObject*)self,
1402                             (void *)self->ob_sval, Py_SIZE(self),
1403                             1, flags);
1404}
1405
1406static PySequenceMethods string_as_sequence = {
1407    (lenfunc)string_length, /*sq_length*/
1408    (binaryfunc)string_concat, /*sq_concat*/
1409    (ssizeargfunc)string_repeat, /*sq_repeat*/
1410    (ssizeargfunc)string_item, /*sq_item*/
1411    (ssizessizeargfunc)string_slice, /*sq_slice*/
1412    0,                  /*sq_ass_item*/
1413    0,                  /*sq_ass_slice*/
1414    (objobjproc)string_contains /*sq_contains*/
1415};
1416
1417static PyMappingMethods string_as_mapping = {
1418    (lenfunc)string_length,
1419    (binaryfunc)string_subscript,
1420    0,
1421};
1422
1423static PyBufferProcs string_as_buffer = {
1424    (readbufferproc)string_buffer_getreadbuf,
1425    (writebufferproc)string_buffer_getwritebuf,
1426    (segcountproc)string_buffer_getsegcount,
1427    (charbufferproc)string_buffer_getcharbuf,
1428    (getbufferproc)string_buffer_getbuffer,
1429    0, /* XXX */
1430};
1431
1432
1433
1434#define LEFTSTRIP 0
1435#define RIGHTSTRIP 1
1436#define BOTHSTRIP 2
1437
1438/* Arrays indexed by above */
1439static const char *stripformat[] = {"|O:lstrip", "|O:rstrip", "|O:strip"};
1440
1441#define STRIPNAME(i) (stripformat[i]+3)
1442
1443PyDoc_STRVAR(split__doc__,
1444"S.split([sep [,maxsplit]]) -> list of strings\n\
1445\n\
1446Return a list of the words in the string S, using sep as the\n\
1447delimiter string.  If maxsplit is given, at most maxsplit\n\
1448splits are done. If sep is not specified or is None, any\n\
1449whitespace string is a separator and empty strings are removed\n\
1450from the result.");
1451
1452static PyObject *
1453string_split(PyStringObject *self, PyObject *args)
1454{
1455    Py_ssize_t len = PyString_GET_SIZE(self), n;
1456    Py_ssize_t maxsplit = -1;
1457    const char *s = PyString_AS_STRING(self), *sub;
1458    PyObject *subobj = Py_None;
1459
1460    if (!PyArg_ParseTuple(args, "|On:split", &subobj, &maxsplit))
1461        return NULL;
1462    if (maxsplit < 0)
1463        maxsplit = PY_SSIZE_T_MAX;
1464    if (subobj == Py_None)
1465        return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1466    if (PyString_Check(subobj)) {
1467        sub = PyString_AS_STRING(subobj);
1468        n = PyString_GET_SIZE(subobj);
1469    }
1470#ifdef Py_USING_UNICODE
1471    else if (PyUnicode_Check(subobj))
1472        return PyUnicode_Split((PyObject *)self, subobj, maxsplit);
1473#endif
1474    else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1475        return NULL;
1476
1477    return stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1478}
1479
1480PyDoc_STRVAR(partition__doc__,
1481"S.partition(sep) -> (head, sep, tail)\n\
1482\n\
1483Search for the separator sep in S, and return the part before it,\n\
1484the separator itself, and the part after it.  If the separator is not\n\
1485found, return S and two empty strings.");
1486
1487static PyObject *
1488string_partition(PyStringObject *self, PyObject *sep_obj)
1489{
1490    const char *sep;
1491    Py_ssize_t sep_len;
1492
1493    if (PyString_Check(sep_obj)) {
1494        sep = PyString_AS_STRING(sep_obj);
1495        sep_len = PyString_GET_SIZE(sep_obj);
1496    }
1497#ifdef Py_USING_UNICODE
1498    else if (PyUnicode_Check(sep_obj))
1499        return PyUnicode_Partition((PyObject *) self, sep_obj);
1500#endif
1501    else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1502        return NULL;
1503
1504    return stringlib_partition(
1505        (PyObject*) self,
1506        PyString_AS_STRING(self), PyString_GET_SIZE(self),
1507        sep_obj, sep, sep_len
1508        );
1509}
1510
1511PyDoc_STRVAR(rpartition__doc__,
1512"S.rpartition(sep) -> (head, sep, tail)\n\
1513\n\
1514Search for the separator sep in S, starting at the end of S, and return\n\
1515the part before it, the separator itself, and the part after it.  If the\n\
1516separator is not found, return two empty strings and S.");
1517
1518static PyObject *
1519string_rpartition(PyStringObject *self, PyObject *sep_obj)
1520{
1521    const char *sep;
1522    Py_ssize_t sep_len;
1523
1524    if (PyString_Check(sep_obj)) {
1525        sep = PyString_AS_STRING(sep_obj);
1526        sep_len = PyString_GET_SIZE(sep_obj);
1527    }
1528#ifdef Py_USING_UNICODE
1529    else if (PyUnicode_Check(sep_obj))
1530        return PyUnicode_RPartition((PyObject *) self, sep_obj);
1531#endif
1532    else if (PyObject_AsCharBuffer(sep_obj, &sep, &sep_len))
1533        return NULL;
1534
1535    return stringlib_rpartition(
1536        (PyObject*) self,
1537        PyString_AS_STRING(self), PyString_GET_SIZE(self),
1538        sep_obj, sep, sep_len
1539        );
1540}
1541
1542PyDoc_STRVAR(rsplit__doc__,
1543"S.rsplit([sep [,maxsplit]]) -> list of strings\n\
1544\n\
1545Return a list of the words in the string S, using sep as the\n\
1546delimiter string, starting at the end of the string and working\n\
1547to the front.  If maxsplit is given, at most maxsplit splits are\n\
1548done. If sep is not specified or is None, any whitespace string\n\
1549is a separator.");
1550
1551static PyObject *
1552string_rsplit(PyStringObject *self, PyObject *args)
1553{
1554    Py_ssize_t len = PyString_GET_SIZE(self), n;
1555    Py_ssize_t maxsplit = -1;
1556    const char *s = PyString_AS_STRING(self), *sub;
1557    PyObject *subobj = Py_None;
1558
1559    if (!PyArg_ParseTuple(args, "|On:rsplit", &subobj, &maxsplit))
1560        return NULL;
1561    if (maxsplit < 0)
1562        maxsplit = PY_SSIZE_T_MAX;
1563    if (subobj == Py_None)
1564        return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1565    if (PyString_Check(subobj)) {
1566        sub = PyString_AS_STRING(subobj);
1567        n = PyString_GET_SIZE(subobj);
1568    }
1569#ifdef Py_USING_UNICODE
1570    else if (PyUnicode_Check(subobj))
1571        return PyUnicode_RSplit((PyObject *)self, subobj, maxsplit);
1572#endif
1573    else if (PyObject_AsCharBuffer(subobj, &sub, &n))
1574        return NULL;
1575
1576    return stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1577}
1578
1579
1580PyDoc_STRVAR(join__doc__,
1581"S.join(iterable) -> string\n\
1582\n\
1583Return a string which is the concatenation of the strings in the\n\
1584iterable.  The separator between elements is S.");
1585
1586static PyObject *
1587string_join(PyStringObject *self, PyObject *orig)
1588{
1589    char *sep = PyString_AS_STRING(self);
1590    const Py_ssize_t seplen = PyString_GET_SIZE(self);
1591    PyObject *res = NULL;
1592    char *p;
1593    Py_ssize_t seqlen = 0;
1594    size_t sz = 0;
1595    Py_ssize_t i;
1596    PyObject *seq, *item;
1597
1598    seq = PySequence_Fast(orig, "can only join an iterable");
1599    if (seq == NULL) {
1600        return NULL;
1601    }
1602
1603    seqlen = PySequence_Size(seq);
1604    if (seqlen == 0) {
1605        Py_DECREF(seq);
1606        return PyString_FromString("");
1607    }
1608    if (seqlen == 1) {
1609        item = PySequence_Fast_GET_ITEM(seq, 0);
1610        if (PyString_CheckExact(item) || PyUnicode_CheckExact(item)) {
1611            Py_INCREF(item);
1612            Py_DECREF(seq);
1613            return item;
1614        }
1615    }
1616
1617    /* There are at least two things to join, or else we have a subclass
1618     * of the builtin types in the sequence.
1619     * Do a pre-pass to figure out the total amount of space we'll
1620     * need (sz), see whether any argument is absurd, and defer to
1621     * the Unicode join if appropriate.
1622     */
1623    for (i = 0; i < seqlen; i++) {
1624        const size_t old_sz = sz;
1625        item = PySequence_Fast_GET_ITEM(seq, i);
1626        if (!PyString_Check(item)){
1627#ifdef Py_USING_UNICODE
1628            if (PyUnicode_Check(item)) {
1629                /* Defer to Unicode join.
1630                 * CAUTION:  There's no gurantee that the
1631                 * original sequence can be iterated over
1632                 * again, so we must pass seq here.
1633                 */
1634                PyObject *result;
1635                result = PyUnicode_Join((PyObject *)self, seq);
1636                Py_DECREF(seq);
1637                return result;
1638            }
1639#endif
1640            PyErr_Format(PyExc_TypeError,
1641                         "sequence item %zd: expected string,"
1642                         " %.80s found",
1643                         i, Py_TYPE(item)->tp_name);
1644            Py_DECREF(seq);
1645            return NULL;
1646        }
1647        sz += PyString_GET_SIZE(item);
1648        if (i != 0)
1649            sz += seplen;
1650        if (sz < old_sz || sz > PY_SSIZE_T_MAX) {
1651            PyErr_SetString(PyExc_OverflowError,
1652                "join() result is too long for a Python string");
1653            Py_DECREF(seq);
1654            return NULL;
1655        }
1656    }
1657
1658    /* Allocate result space. */
1659    res = PyString_FromStringAndSize((char*)NULL, sz);
1660    if (res == NULL) {
1661        Py_DECREF(seq);
1662        return NULL;
1663    }
1664
1665    /* Catenate everything. */
1666    p = PyString_AS_STRING(res);
1667    for (i = 0; i < seqlen; ++i) {
1668        size_t n;
1669        item = PySequence_Fast_GET_ITEM(seq, i);
1670        n = PyString_GET_SIZE(item);
1671        Py_MEMCPY(p, PyString_AS_STRING(item), n);
1672        p += n;
1673        if (i < seqlen - 1) {
1674            Py_MEMCPY(p, sep, seplen);
1675            p += seplen;
1676        }
1677    }
1678
1679    Py_DECREF(seq);
1680    return res;
1681}
1682
1683PyObject *
1684_PyString_Join(PyObject *sep, PyObject *x)
1685{
1686    assert(sep != NULL && PyString_Check(sep));
1687    assert(x != NULL);
1688    return string_join((PyStringObject *)sep, x);
1689}
1690
1691/* helper macro to fixup start/end slice values */
1692#define ADJUST_INDICES(start, end, len)         \
1693    if (end > len)                          \
1694        end = len;                          \
1695    else if (end < 0) {                     \
1696        end += len;                         \
1697        if (end < 0)                        \
1698        end = 0;                        \
1699    }                                       \
1700    if (start < 0) {                        \
1701        start += len;                       \
1702        if (start < 0)                      \
1703        start = 0;                      \
1704    }
1705
1706Py_LOCAL_INLINE(Py_ssize_t)
1707string_find_internal(PyStringObject *self, PyObject *args, int dir)
1708{
1709    PyObject *subobj;
1710    const char *sub;
1711    Py_ssize_t sub_len;
1712    Py_ssize_t start=0, end=PY_SSIZE_T_MAX;
1713
1714    if (!stringlib_parse_args_finds("find/rfind/index/rindex",
1715                                    args, &subobj, &start, &end))
1716        return -2;
1717
1718    if (PyString_Check(subobj)) {
1719        sub = PyString_AS_STRING(subobj);
1720        sub_len = PyString_GET_SIZE(subobj);
1721    }
1722#ifdef Py_USING_UNICODE
1723    else if (PyUnicode_Check(subobj))
1724        return PyUnicode_Find(
1725            (PyObject *)self, subobj, start, end, dir);
1726#endif
1727    else if (PyObject_AsCharBuffer(subobj, &sub, &sub_len))
1728        /* XXX - the "expected a character buffer object" is pretty
1729           confusing for a non-expert.  remap to something else ? */
1730        return -2;
1731
1732    if (dir > 0)
1733        return stringlib_find_slice(
1734            PyString_AS_STRING(self), PyString_GET_SIZE(self),
1735            sub, sub_len, start, end);
1736    else
1737        return stringlib_rfind_slice(
1738            PyString_AS_STRING(self), PyString_GET_SIZE(self),
1739            sub, sub_len, start, end);
1740}
1741
1742
1743PyDoc_STRVAR(find__doc__,
1744"S.find(sub [,start [,end]]) -> int\n\
1745\n\
1746Return the lowest index in S where substring sub is found,\n\
1747such that sub is contained within S[start:end].  Optional\n\
1748arguments start and end are interpreted as in slice notation.\n\
1749\n\
1750Return -1 on failure.");
1751
1752static PyObject *
1753string_find(PyStringObject *self, PyObject *args)
1754{
1755    Py_ssize_t result = string_find_internal(self, args, +1);
1756    if (result == -2)
1757        return NULL;
1758    return PyInt_FromSsize_t(result);
1759}
1760
1761
1762PyDoc_STRVAR(index__doc__,
1763"S.index(sub [,start [,end]]) -> int\n\
1764\n\
1765Like S.find() but raise ValueError when the substring is not found.");
1766
1767static PyObject *
1768string_index(PyStringObject *self, PyObject *args)
1769{
1770    Py_ssize_t result = string_find_internal(self, args, +1);
1771    if (result == -2)
1772        return NULL;
1773    if (result == -1) {
1774        PyErr_SetString(PyExc_ValueError,
1775                        "substring not found");
1776        return NULL;
1777    }
1778    return PyInt_FromSsize_t(result);
1779}
1780
1781
1782PyDoc_STRVAR(rfind__doc__,
1783"S.rfind(sub [,start [,end]]) -> int\n\
1784\n\
1785Return the highest index in S where substring sub is found,\n\
1786such that sub is contained within S[start:end].  Optional\n\
1787arguments start and end are interpreted as in slice notation.\n\
1788\n\
1789Return -1 on failure.");
1790
1791static PyObject *
1792string_rfind(PyStringObject *self, PyObject *args)
1793{
1794    Py_ssize_t result = string_find_internal(self, args, -1);
1795    if (result == -2)
1796        return NULL;
1797    return PyInt_FromSsize_t(result);
1798}
1799
1800
1801PyDoc_STRVAR(rindex__doc__,
1802"S.rindex(sub [,start [,end]]) -> int\n\
1803\n\
1804Like S.rfind() but raise ValueError when the substring is not found.");
1805
1806static PyObject *
1807string_rindex(PyStringObject *self, PyObject *args)
1808{
1809    Py_ssize_t result = string_find_internal(self, args, -1);
1810    if (result == -2)
1811        return NULL;
1812    if (result == -1) {
1813        PyErr_SetString(PyExc_ValueError,
1814                        "substring not found");
1815        return NULL;
1816    }
1817    return PyInt_FromSsize_t(result);
1818}
1819
1820
1821Py_LOCAL_INLINE(PyObject *)
1822do_xstrip(PyStringObject *self, int striptype, PyObject *sepobj)
1823{
1824    char *s = PyString_AS_STRING(self);
1825    Py_ssize_t len = PyString_GET_SIZE(self);
1826    char *sep = PyString_AS_STRING(sepobj);
1827    Py_ssize_t seplen = PyString_GET_SIZE(sepobj);
1828    Py_ssize_t i, j;
1829
1830    i = 0;
1831    if (striptype != RIGHTSTRIP) {
1832        while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1833            i++;
1834        }
1835    }
1836
1837    j = len;
1838    if (striptype != LEFTSTRIP) {
1839        do {
1840            j--;
1841        } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1842        j++;
1843    }
1844
1845    if (i == 0 && j == len && PyString_CheckExact(self)) {
1846        Py_INCREF(self);
1847        return (PyObject*)self;
1848    }
1849    else
1850        return PyString_FromStringAndSize(s+i, j-i);
1851}
1852
1853
1854Py_LOCAL_INLINE(PyObject *)
1855do_strip(PyStringObject *self, int striptype)
1856{
1857    char *s = PyString_AS_STRING(self);
1858    Py_ssize_t len = PyString_GET_SIZE(self), i, j;
1859
1860    i = 0;
1861    if (striptype != RIGHTSTRIP) {
1862        while (i < len && isspace(Py_CHARMASK(s[i]))) {
1863            i++;
1864        }
1865    }
1866
1867    j = len;
1868    if (striptype != LEFTSTRIP) {
1869        do {
1870            j--;
1871        } while (j >= i && isspace(Py_CHARMASK(s[j])));
1872        j++;
1873    }
1874
1875    if (i == 0 && j == len && PyString_CheckExact(self)) {
1876        Py_INCREF(self);
1877        return (PyObject*)self;
1878    }
1879    else
1880        return PyString_FromStringAndSize(s+i, j-i);
1881}
1882
1883
1884Py_LOCAL_INLINE(PyObject *)
1885do_argstrip(PyStringObject *self, int striptype, PyObject *args)
1886{
1887    PyObject *sep = NULL;
1888
1889    if (!PyArg_ParseTuple(args, (char *)stripformat[striptype], &sep))
1890        return NULL;
1891
1892    if (sep != NULL && sep != Py_None) {
1893        if (PyString_Check(sep))
1894            return do_xstrip(self, striptype, sep);
1895#ifdef Py_USING_UNICODE
1896        else if (PyUnicode_Check(sep)) {
1897            PyObject *uniself = PyUnicode_FromObject((PyObject *)self);
1898            PyObject *res;
1899            if (uniself==NULL)
1900                return NULL;
1901            res = _PyUnicode_XStrip((PyUnicodeObject *)uniself,
1902                striptype, sep);
1903            Py_DECREF(uniself);
1904            return res;
1905        }
1906#endif
1907        PyErr_Format(PyExc_TypeError,
1908#ifdef Py_USING_UNICODE
1909                     "%s arg must be None, str or unicode",
1910#else
1911                     "%s arg must be None or str",
1912#endif
1913                     STRIPNAME(striptype));
1914        return NULL;
1915    }
1916
1917    return do_strip(self, striptype);
1918}
1919
1920
1921PyDoc_STRVAR(strip__doc__,
1922"S.strip([chars]) -> string or unicode\n\
1923\n\
1924Return a copy of the string S with leading and trailing\n\
1925whitespace removed.\n\
1926If chars is given and not None, remove characters in chars instead.\n\
1927If chars is unicode, S will be converted to unicode before stripping");
1928
1929static PyObject *
1930string_strip(PyStringObject *self, PyObject *args)
1931{
1932    if (PyTuple_GET_SIZE(args) == 0)
1933        return do_strip(self, BOTHSTRIP); /* Common case */
1934    else
1935        return do_argstrip(self, BOTHSTRIP, args);
1936}
1937
1938
1939PyDoc_STRVAR(lstrip__doc__,
1940"S.lstrip([chars]) -> string or unicode\n\
1941\n\
1942Return a copy of the string S with leading whitespace removed.\n\
1943If chars is given and not None, remove characters in chars instead.\n\
1944If chars is unicode, S will be converted to unicode before stripping");
1945
1946static PyObject *
1947string_lstrip(PyStringObject *self, PyObject *args)
1948{
1949    if (PyTuple_GET_SIZE(args) == 0)
1950        return do_strip(self, LEFTSTRIP); /* Common case */
1951    else
1952        return do_argstrip(self, LEFTSTRIP, args);
1953}
1954
1955
1956PyDoc_STRVAR(rstrip__doc__,
1957"S.rstrip([chars]) -> string or unicode\n\
1958\n\
1959Return a copy of the string S with trailing whitespace removed.\n\
1960If chars is given and not None, remove characters in chars instead.\n\
1961If chars is unicode, S will be converted to unicode before stripping");
1962
1963static PyObject *
1964string_rstrip(PyStringObject *self, PyObject *args)
1965{
1966    if (PyTuple_GET_SIZE(args) == 0)
1967        return do_strip(self, RIGHTSTRIP); /* Common case */
1968    else
1969        return do_argstrip(self, RIGHTSTRIP, args);
1970}
1971
1972
1973PyDoc_STRVAR(lower__doc__,
1974"S.lower() -> string\n\
1975\n\
1976Return a copy of the string S converted to lowercase.");
1977
1978/* _tolower and _toupper are defined by SUSv2, but they're not ISO C */
1979#ifndef _tolower
1980#define _tolower tolower
1981#endif
1982
1983static PyObject *
1984string_lower(PyStringObject *self)
1985{
1986    char *s;
1987    Py_ssize_t i, n = PyString_GET_SIZE(self);
1988    PyObject *newobj;
1989
1990    newobj = PyString_FromStringAndSize(NULL, n);
1991    if (!newobj)
1992        return NULL;
1993
1994    s = PyString_AS_STRING(newobj);
1995
1996    Py_MEMCPY(s, PyString_AS_STRING(self), n);
1997
1998    for (i = 0; i < n; i++) {
1999        int c = Py_CHARMASK(s[i]);
2000        if (isupper(c))
2001            s[i] = _tolower(c);
2002    }
2003
2004    return newobj;
2005}
2006
2007PyDoc_STRVAR(upper__doc__,
2008"S.upper() -> string\n\
2009\n\
2010Return a copy of the string S converted to uppercase.");
2011
2012#ifndef _toupper
2013#define _toupper toupper
2014#endif
2015
2016static PyObject *
2017string_upper(PyStringObject *self)
2018{
2019    char *s;
2020    Py_ssize_t i, n = PyString_GET_SIZE(self);
2021    PyObject *newobj;
2022
2023    newobj = PyString_FromStringAndSize(NULL, n);
2024    if (!newobj)
2025        return NULL;
2026
2027    s = PyString_AS_STRING(newobj);
2028
2029    Py_MEMCPY(s, PyString_AS_STRING(self), n);
2030
2031    for (i = 0; i < n; i++) {
2032        int c = Py_CHARMASK(s[i]);
2033        if (islower(c))
2034            s[i] = _toupper(c);
2035    }
2036
2037    return newobj;
2038}
2039
2040PyDoc_STRVAR(title__doc__,
2041"S.title() -> string\n\
2042\n\
2043Return a titlecased version of S, i.e. words start with uppercase\n\
2044characters, all remaining cased characters have lowercase.");
2045
2046static PyObject*
2047string_title(PyStringObject *self)
2048{
2049    char *s = PyString_AS_STRING(self), *s_new;
2050    Py_ssize_t i, n = PyString_GET_SIZE(self);
2051    int previous_is_cased = 0;
2052    PyObject *newobj;
2053
2054    newobj = PyString_FromStringAndSize(NULL, n);
2055    if (newobj == NULL)
2056        return NULL;
2057    s_new = PyString_AsString(newobj);
2058    for (i = 0; i < n; i++) {
2059        int c = Py_CHARMASK(*s++);
2060        if (islower(c)) {
2061            if (!previous_is_cased)
2062                c = toupper(c);
2063            previous_is_cased = 1;
2064        } else if (isupper(c)) {
2065            if (previous_is_cased)
2066                c = tolower(c);
2067            previous_is_cased = 1;
2068        } else
2069            previous_is_cased = 0;
2070        *s_new++ = c;
2071    }
2072    return newobj;
2073}
2074
2075PyDoc_STRVAR(capitalize__doc__,
2076"S.capitalize() -> string\n\
2077\n\
2078Return a copy of the string S with only its first character\n\
2079capitalized.");
2080
2081static PyObject *
2082string_capitalize(PyStringObject *self)
2083{
2084    char *s = PyString_AS_STRING(self), *s_new;
2085    Py_ssize_t i, n = PyString_GET_SIZE(self);
2086    PyObject *newobj;
2087
2088    newobj = PyString_FromStringAndSize(NULL, n);
2089    if (newobj == NULL)
2090        return NULL;
2091    s_new = PyString_AsString(newobj);
2092    if (0 < n) {
2093        int c = Py_CHARMASK(*s++);
2094        if (islower(c))
2095            *s_new = toupper(c);
2096        else
2097            *s_new = c;
2098        s_new++;
2099    }
2100    for (i = 1; i < n; i++) {
2101        int c = Py_CHARMASK(*s++);
2102        if (isupper(c))
2103            *s_new = tolower(c);
2104        else
2105            *s_new = c;
2106        s_new++;
2107    }
2108    return newobj;
2109}
2110
2111
2112PyDoc_STRVAR(count__doc__,
2113"S.count(sub[, start[, end]]) -> int\n\
2114\n\
2115Return the number of non-overlapping occurrences of substring sub in\n\
2116string S[start:end].  Optional arguments start and end are interpreted\n\
2117as in slice notation.");
2118
2119static PyObject *
2120string_count(PyStringObject *self, PyObject *args)
2121{
2122    PyObject *sub_obj;
2123    const char *str = PyString_AS_STRING(self), *sub;
2124    Py_ssize_t sub_len;
2125    Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
2126
2127    if (!stringlib_parse_args_finds("count", args, &sub_obj, &start, &end))
2128        return NULL;
2129
2130    if (PyString_Check(sub_obj)) {
2131        sub = PyString_AS_STRING(sub_obj);
2132        sub_len = PyString_GET_SIZE(sub_obj);
2133    }
2134#ifdef Py_USING_UNICODE
2135    else if (PyUnicode_Check(sub_obj)) {
2136        Py_ssize_t count;
2137        count = PyUnicode_Count((PyObject *)self, sub_obj, start, end);
2138        if (count == -1)
2139            return NULL;
2140        else
2141            return PyInt_FromSsize_t(count);
2142    }
2143#endif
2144    else if (PyObject_AsCharBuffer(sub_obj, &sub, &sub_len))
2145        return NULL;
2146
2147    ADJUST_INDICES(start, end, PyString_GET_SIZE(self));
2148
2149    return PyInt_FromSsize_t(
2150        stringlib_count(str + start, end - start, sub, sub_len, PY_SSIZE_T_MAX)
2151        );
2152}
2153
2154PyDoc_STRVAR(swapcase__doc__,
2155"S.swapcase() -> string\n\
2156\n\
2157Return a copy of the string S with uppercase characters\n\
2158converted to lowercase and vice versa.");
2159
2160static PyObject *
2161string_swapcase(PyStringObject *self)
2162{
2163    char *s = PyString_AS_STRING(self), *s_new;
2164    Py_ssize_t i, n = PyString_GET_SIZE(self);
2165    PyObject *newobj;
2166
2167    newobj = PyString_FromStringAndSize(NULL, n);
2168    if (newobj == NULL)
2169        return NULL;
2170    s_new = PyString_AsString(newobj);
2171    for (i = 0; i < n; i++) {
2172        int c = Py_CHARMASK(*s++);
2173        if (islower(c)) {
2174            *s_new = toupper(c);
2175        }
2176        else if (isupper(c)) {
2177            *s_new = tolower(c);
2178        }
2179        else
2180            *s_new = c;
2181        s_new++;
2182    }
2183    return newobj;
2184}
2185
2186
2187PyDoc_STRVAR(translate__doc__,
2188"S.translate(table [,deletechars]) -> string\n\
2189\n\
2190Return a copy of the string S, where all characters occurring\n\
2191in the optional argument deletechars are removed, and the\n\
2192remaining characters have been mapped through the given\n\
2193translation table, which must be a string of length 256 or None.\n\
2194If the table argument is None, no translation is applied and\n\
2195the operation simply removes the characters in deletechars.");
2196
2197static PyObject *
2198string_translate(PyStringObject *self, PyObject *args)
2199{
2200    register char *input, *output;
2201    const char *table;
2202    register Py_ssize_t i, c, changed = 0;
2203    PyObject *input_obj = (PyObject*)self;
2204    const char *output_start, *del_table=NULL;
2205    Py_ssize_t inlen, tablen, dellen = 0;
2206    PyObject *result;
2207    int trans_table[256];
2208    PyObject *tableobj, *delobj = NULL;
2209
2210    if (!PyArg_UnpackTuple(args, "translate", 1, 2,
2211                          &tableobj, &delobj))
2212        return NULL;
2213
2214    if (PyString_Check(tableobj)) {
2215        table = PyString_AS_STRING(tableobj);
2216        tablen = PyString_GET_SIZE(tableobj);
2217    }
2218    else if (tableobj == Py_None) {
2219        table = NULL;
2220        tablen = 256;
2221    }
2222#ifdef Py_USING_UNICODE
2223    else if (PyUnicode_Check(tableobj)) {
2224        /* Unicode .translate() does not support the deletechars
2225           parameter; instead a mapping to None will cause characters
2226           to be deleted. */
2227        if (delobj != NULL) {
2228            PyErr_SetString(PyExc_TypeError,
2229            "deletions are implemented differently for unicode");
2230            return NULL;
2231        }
2232        return PyUnicode_Translate((PyObject *)self, tableobj, NULL);
2233    }
2234#endif
2235    else if (PyObject_AsCharBuffer(tableobj, &table, &tablen))
2236        return NULL;
2237
2238    if (tablen != 256) {
2239        PyErr_SetString(PyExc_ValueError,
2240          "translation table must be 256 characters long");
2241        return NULL;
2242    }
2243
2244    if (delobj != NULL) {
2245        if (PyString_Check(delobj)) {
2246            del_table = PyString_AS_STRING(delobj);
2247            dellen = PyString_GET_SIZE(delobj);
2248        }
2249#ifdef Py_USING_UNICODE
2250        else if (PyUnicode_Check(delobj)) {
2251            PyErr_SetString(PyExc_TypeError,
2252            "deletions are implemented differently for unicode");
2253            return NULL;
2254        }
2255#endif
2256        else if (PyObject_AsCharBuffer(delobj, &del_table, &dellen))
2257            return NULL;
2258    }
2259    else {
2260        del_table = NULL;
2261        dellen = 0;
2262    }
2263
2264    inlen = PyString_GET_SIZE(input_obj);
2265    result = PyString_FromStringAndSize((char *)NULL, inlen);
2266    if (result == NULL)
2267        return NULL;
2268    output_start = output = PyString_AsString(result);
2269    input = PyString_AS_STRING(input_obj);
2270
2271    if (dellen == 0 && table != NULL) {
2272        /* If no deletions are required, use faster code */
2273        for (i = inlen; --i >= 0; ) {
2274            c = Py_CHARMASK(*input++);
2275            if (Py_CHARMASK((*output++ = table[c])) != c)
2276                changed = 1;
2277        }
2278        if (changed || !PyString_CheckExact(input_obj))
2279            return result;
2280        Py_DECREF(result);
2281        Py_INCREF(input_obj);
2282        return input_obj;
2283    }
2284
2285    if (table == NULL) {
2286        for (i = 0; i < 256; i++)
2287            trans_table[i] = Py_CHARMASK(i);
2288    } else {
2289        for (i = 0; i < 256; i++)
2290            trans_table[i] = Py_CHARMASK(table[i]);
2291    }
2292
2293    for (i = 0; i < dellen; i++)
2294        trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
2295
2296    for (i = inlen; --i >= 0; ) {
2297        c = Py_CHARMASK(*input++);
2298        if (trans_table[c] != -1)
2299            if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2300                continue;
2301        changed = 1;
2302    }
2303    if (!changed && PyString_CheckExact(input_obj)) {
2304        Py_DECREF(result);
2305        Py_INCREF(input_obj);
2306        return input_obj;
2307    }
2308    /* Fix the size of the resulting string */
2309    if (inlen > 0 && _PyString_Resize(&result, output - output_start))
2310        return NULL;
2311    return result;
2312}
2313
2314
2315/* find and count characters and substrings */
2316
2317#define findchar(target, target_len, c)                         \
2318  ((char *)memchr((const void *)(target), c, target_len))
2319
2320/* String ops must return a string.  */
2321/* If the object is subclass of string, create a copy */
2322Py_LOCAL(PyStringObject *)
2323return_self(PyStringObject *self)
2324{
2325    if (PyString_CheckExact(self)) {
2326        Py_INCREF(self);
2327        return self;
2328    }
2329    return (PyStringObject *)PyString_FromStringAndSize(
2330        PyString_AS_STRING(self),
2331        PyString_GET_SIZE(self));
2332}
2333
2334Py_LOCAL_INLINE(Py_ssize_t)
2335countchar(const char *target, Py_ssize_t target_len, char c, Py_ssize_t maxcount)
2336{
2337    Py_ssize_t count=0;
2338    const char *start=target;
2339    const char *end=target+target_len;
2340
2341    while ( (start=findchar(start, end-start, c)) != NULL ) {
2342        count++;
2343        if (count >= maxcount)
2344            break;
2345        start += 1;
2346    }
2347    return count;
2348}
2349
2350
2351/* Algorithms for different cases of string replacement */
2352
2353/* len(self)>=1, from="", len(to)>=1, maxcount>=1 */
2354Py_LOCAL(PyStringObject *)
2355replace_interleave(PyStringObject *self,
2356                   const char *to_s, Py_ssize_t to_len,
2357                   Py_ssize_t maxcount)
2358{
2359    char *self_s, *result_s;
2360    Py_ssize_t self_len, result_len;
2361    Py_ssize_t count, i, product;
2362    PyStringObject *result;
2363
2364    self_len = PyString_GET_SIZE(self);
2365
2366    /* 1 at the end plus 1 after every character */
2367    count = self_len+1;
2368    if (maxcount < count)
2369        count = maxcount;
2370
2371    /* Check for overflow */
2372    /*   result_len = count * to_len + self_len; */
2373    product = count * to_len;
2374    if (product / to_len != count) {
2375        PyErr_SetString(PyExc_OverflowError,
2376                        "replace string is too long");
2377        return NULL;
2378    }
2379    result_len = product + self_len;
2380    if (result_len < 0) {
2381        PyErr_SetString(PyExc_OverflowError,
2382                        "replace string is too long");
2383        return NULL;
2384    }
2385
2386    if (! (result = (PyStringObject *)
2387                     PyString_FromStringAndSize(NULL, result_len)) )
2388        return NULL;
2389
2390    self_s = PyString_AS_STRING(self);
2391    result_s = PyString_AS_STRING(result);
2392
2393    /* TODO: special case single character, which doesn't need memcpy */
2394
2395    /* Lay the first one down (guaranteed this will occur) */
2396    Py_MEMCPY(result_s, to_s, to_len);
2397    result_s += to_len;
2398    count -= 1;
2399
2400    for (i=0; i<count; i++) {
2401        *result_s++ = *self_s++;
2402        Py_MEMCPY(result_s, to_s, to_len);
2403        result_s += to_len;
2404    }
2405
2406    /* Copy the rest of the original string */
2407    Py_MEMCPY(result_s, self_s, self_len-i);
2408
2409    return result;
2410}
2411
2412/* Special case for deleting a single character */
2413/* len(self)>=1, len(from)==1, to="", maxcount>=1 */
2414Py_LOCAL(PyStringObject *)
2415replace_delete_single_character(PyStringObject *self,
2416                                char from_c, Py_ssize_t maxcount)
2417{
2418    char *self_s, *result_s;
2419    char *start, *next, *end;
2420    Py_ssize_t self_len, result_len;
2421    Py_ssize_t count;
2422    PyStringObject *result;
2423
2424    self_len = PyString_GET_SIZE(self);
2425    self_s = PyString_AS_STRING(self);
2426
2427    count = countchar(self_s, self_len, from_c, maxcount);
2428    if (count == 0) {
2429        return return_self(self);
2430    }
2431
2432    result_len = self_len - count;  /* from_len == 1 */
2433    assert(result_len>=0);
2434
2435    if ( (result = (PyStringObject *)
2436                    PyString_FromStringAndSize(NULL, result_len)) == NULL)
2437        return NULL;
2438    result_s = PyString_AS_STRING(result);
2439
2440    start = self_s;
2441    end = self_s + self_len;
2442    while (count-- > 0) {
2443        next = findchar(start, end-start, from_c);
2444        if (next == NULL)
2445            break;
2446        Py_MEMCPY(result_s, start, next-start);
2447        result_s += (next-start);
2448        start = next+1;
2449    }
2450    Py_MEMCPY(result_s, start, end-start);
2451
2452    return result;
2453}
2454
2455/* len(self)>=1, len(from)>=2, to="", maxcount>=1 */
2456
2457Py_LOCAL(PyStringObject *)
2458replace_delete_substring(PyStringObject *self,
2459                         const char *from_s, Py_ssize_t from_len,
2460                         Py_ssize_t maxcount) {
2461    char *self_s, *result_s;
2462    char *start, *next, *end;
2463    Py_ssize_t self_len, result_len;
2464    Py_ssize_t count, offset;
2465    PyStringObject *result;
2466
2467    self_len = PyString_GET_SIZE(self);
2468    self_s = PyString_AS_STRING(self);
2469
2470    count = stringlib_count(self_s, self_len,
2471                            from_s, from_len,
2472                            maxcount);
2473
2474    if (count == 0) {
2475        /* no matches */
2476        return return_self(self);
2477    }
2478
2479    result_len = self_len - (count * from_len);
2480    assert (result_len>=0);
2481
2482    if ( (result = (PyStringObject *)
2483          PyString_FromStringAndSize(NULL, result_len)) == NULL )
2484        return NULL;
2485
2486    result_s = PyString_AS_STRING(result);
2487
2488    start = self_s;
2489    end = self_s + self_len;
2490    while (count-- > 0) {
2491        offset = stringlib_find(start, end-start,
2492                                from_s, from_len,
2493                                0);
2494        if (offset == -1)
2495            break;
2496        next = start + offset;
2497
2498        Py_MEMCPY(result_s, start, next-start);
2499
2500        result_s += (next-start);
2501        start = next+from_len;
2502    }
2503    Py_MEMCPY(result_s, start, end-start);
2504    return result;
2505}
2506
2507/* len(self)>=1, len(from)==len(to)==1, maxcount>=1 */
2508Py_LOCAL(PyStringObject *)
2509replace_single_character_in_place(PyStringObject *self,
2510                                  char from_c, char to_c,
2511                                  Py_ssize_t maxcount)
2512{
2513    char *self_s, *result_s, *start, *end, *next;
2514    Py_ssize_t self_len;
2515    PyStringObject *result;
2516
2517    /* The result string will be the same size */
2518    self_s = PyString_AS_STRING(self);
2519    self_len = PyString_GET_SIZE(self);
2520
2521    next = findchar(self_s, self_len, from_c);
2522
2523    if (next == NULL) {
2524        /* No matches; return the original string */
2525        return return_self(self);
2526    }
2527
2528    /* Need to make a new string */
2529    result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2530    if (result == NULL)
2531        return NULL;
2532    result_s = PyString_AS_STRING(result);
2533    Py_MEMCPY(result_s, self_s, self_len);
2534
2535    /* change everything in-place, starting with this one */
2536    start =  result_s + (next-self_s);
2537    *start = to_c;
2538    start++;
2539    end = result_s + self_len;
2540
2541    while (--maxcount > 0) {
2542        next = findchar(start, end-start, from_c);
2543        if (next == NULL)
2544            break;
2545        *next = to_c;
2546        start = next+1;
2547    }
2548
2549    return result;
2550}
2551
2552/* len(self)>=1, len(from)==len(to)>=2, maxcount>=1 */
2553Py_LOCAL(PyStringObject *)
2554replace_substring_in_place(PyStringObject *self,
2555                           const char *from_s, Py_ssize_t from_len,
2556                           const char *to_s, Py_ssize_t to_len,
2557                           Py_ssize_t maxcount)
2558{
2559    char *result_s, *start, *end;
2560    char *self_s;
2561    Py_ssize_t self_len, offset;
2562    PyStringObject *result;
2563
2564    /* The result string will be the same size */
2565
2566    self_s = PyString_AS_STRING(self);
2567    self_len = PyString_GET_SIZE(self);
2568
2569    offset = stringlib_find(self_s, self_len,
2570                            from_s, from_len,
2571                            0);
2572    if (offset == -1) {
2573        /* No matches; return the original string */
2574        return return_self(self);
2575    }
2576
2577    /* Need to make a new string */
2578    result = (PyStringObject *) PyString_FromStringAndSize(NULL, self_len);
2579    if (result == NULL)
2580        return NULL;
2581    result_s = PyString_AS_STRING(result);
2582    Py_MEMCPY(result_s, self_s, self_len);
2583
2584    /* change everything in-place, starting with this one */
2585    start =  result_s + offset;
2586    Py_MEMCPY(start, to_s, from_len);
2587    start += from_len;
2588    end = result_s + self_len;
2589
2590    while ( --maxcount > 0) {
2591        offset = stringlib_find(start, end-start,
2592                                from_s, from_len,
2593                                0);
2594        if (offset==-1)
2595            break;
2596        Py_MEMCPY(start+offset, to_s, from_len);
2597        start += offset+from_len;
2598    }
2599
2600    return result;
2601}
2602
2603/* len(self)>=1, len(from)==1, len(to)>=2, maxcount>=1 */
2604Py_LOCAL(PyStringObject *)
2605replace_single_character(PyStringObject *self,
2606                         char from_c,
2607                         const char *to_s, Py_ssize_t to_len,
2608                         Py_ssize_t maxcount)
2609{
2610    char *self_s, *result_s;
2611    char *start, *next, *end;
2612    Py_ssize_t self_len, result_len;
2613    Py_ssize_t count, product;
2614    PyStringObject *result;
2615
2616    self_s = PyString_AS_STRING(self);
2617    self_len = PyString_GET_SIZE(self);
2618
2619    count = countchar(self_s, self_len, from_c, maxcount);
2620    if (count == 0) {
2621        /* no matches, return unchanged */
2622        return return_self(self);
2623    }
2624
2625    /* use the difference between current and new, hence the "-1" */
2626    /*   result_len = self_len + count * (to_len-1)  */
2627    product = count * (to_len-1);
2628    if (product / (to_len-1) != count) {
2629        PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2630        return NULL;
2631    }
2632    result_len = self_len + product;
2633    if (result_len < 0) {
2634        PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2635        return NULL;
2636    }
2637
2638    if ( (result = (PyStringObject *)
2639          PyString_FromStringAndSize(NULL, result_len)) == NULL)
2640        return NULL;
2641    result_s = PyString_AS_STRING(result);
2642
2643    start = self_s;
2644    end = self_s + self_len;
2645    while (count-- > 0) {
2646        next = findchar(start, end-start, from_c);
2647        if (next == NULL)
2648            break;
2649
2650        if (next == start) {
2651            /* replace with the 'to' */
2652            Py_MEMCPY(result_s, to_s, to_len);
2653            result_s += to_len;
2654            start += 1;
2655        } else {
2656            /* copy the unchanged old then the 'to' */
2657            Py_MEMCPY(result_s, start, next-start);
2658            result_s += (next-start);
2659            Py_MEMCPY(result_s, to_s, to_len);
2660            result_s += to_len;
2661            start = next+1;
2662        }
2663    }
2664    /* Copy the remainder of the remaining string */
2665    Py_MEMCPY(result_s, start, end-start);
2666
2667    return result;
2668}
2669
2670/* len(self)>=1, len(from)>=2, len(to)>=2, maxcount>=1 */
2671Py_LOCAL(PyStringObject *)
2672replace_substring(PyStringObject *self,
2673                  const char *from_s, Py_ssize_t from_len,
2674                  const char *to_s, Py_ssize_t to_len,
2675                  Py_ssize_t maxcount) {
2676    char *self_s, *result_s;
2677    char *start, *next, *end;
2678    Py_ssize_t self_len, result_len;
2679    Py_ssize_t count, offset, product;
2680    PyStringObject *result;
2681
2682    self_s = PyString_AS_STRING(self);
2683    self_len = PyString_GET_SIZE(self);
2684
2685    count = stringlib_count(self_s, self_len,
2686                            from_s, from_len,
2687                            maxcount);
2688
2689    if (count == 0) {
2690        /* no matches, return unchanged */
2691        return return_self(self);
2692    }
2693
2694    /* Check for overflow */
2695    /*    result_len = self_len + count * (to_len-from_len) */
2696    product = count * (to_len-from_len);
2697    if (product / (to_len-from_len) != count) {
2698        PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2699        return NULL;
2700    }
2701    result_len = self_len + product;
2702    if (result_len < 0) {
2703        PyErr_SetString(PyExc_OverflowError, "replace string is too long");
2704        return NULL;
2705    }
2706
2707    if ( (result = (PyStringObject *)
2708          PyString_FromStringAndSize(NULL, result_len)) == NULL)
2709        return NULL;
2710    result_s = PyString_AS_STRING(result);
2711
2712    start = self_s;
2713    end = self_s + self_len;
2714    while (count-- > 0) {
2715        offset = stringlib_find(start, end-start,
2716                                from_s, from_len,
2717                                0);
2718        if (offset == -1)
2719            break;
2720        next = start+offset;
2721        if (next == start) {
2722            /* replace with the 'to' */
2723            Py_MEMCPY(result_s, to_s, to_len);
2724            result_s += to_len;
2725            start += from_len;
2726        } else {
2727            /* copy the unchanged old then the 'to' */
2728            Py_MEMCPY(result_s, start, next-start);
2729            result_s += (next-start);
2730            Py_MEMCPY(result_s, to_s, to_len);
2731            result_s += to_len;
2732            start = next+from_len;
2733        }
2734    }
2735    /* Copy the remainder of the remaining string */
2736    Py_MEMCPY(result_s, start, end-start);
2737
2738    return result;
2739}
2740
2741
2742Py_LOCAL(PyStringObject *)
2743replace(PyStringObject *self,
2744    const char *from_s, Py_ssize_t from_len,
2745    const char *to_s, Py_ssize_t to_len,
2746    Py_ssize_t maxcount)
2747{
2748    if (maxcount < 0) {
2749        maxcount = PY_SSIZE_T_MAX;
2750    } else if (maxcount == 0 || PyString_GET_SIZE(self) == 0) {
2751        /* nothing to do; return the original string */
2752        return return_self(self);
2753    }
2754
2755    if (maxcount == 0 ||
2756        (from_len == 0 && to_len == 0)) {
2757        /* nothing to do; return the original string */
2758        return return_self(self);
2759    }
2760
2761    /* Handle zero-length special cases */
2762
2763    if (from_len == 0) {
2764        /* insert the 'to' string everywhere.   */
2765        /*    >>> "Python".replace("", ".")     */
2766        /*    '.P.y.t.h.o.n.'                   */
2767        return replace_interleave(self, to_s, to_len, maxcount);
2768    }
2769
2770    /* Except for "".replace("", "A") == "A" there is no way beyond this */
2771    /* point for an empty self string to generate a non-empty string */
2772    /* Special case so the remaining code always gets a non-empty string */
2773    if (PyString_GET_SIZE(self) == 0) {
2774        return return_self(self);
2775    }
2776
2777    if (to_len == 0) {
2778        /* delete all occurances of 'from' string */
2779        if (from_len == 1) {
2780            return replace_delete_single_character(
2781                self, from_s[0], maxcount);
2782        } else {
2783            return replace_delete_substring(self, from_s, from_len, maxcount);
2784        }
2785    }
2786
2787    /* Handle special case where both strings have the same length */
2788
2789    if (from_len == to_len) {
2790        if (from_len == 1) {
2791            return replace_single_character_in_place(
2792                self,
2793                from_s[0],
2794                to_s[0],
2795                maxcount);
2796        } else {
2797            return replace_substring_in_place(
2798                self, from_s, from_len, to_s, to_len, maxcount);
2799        }
2800    }
2801
2802    /* Otherwise use the more generic algorithms */
2803    if (from_len == 1) {
2804        return replace_single_character(self, from_s[0],
2805                                        to_s, to_len, maxcount);
2806    } else {
2807        /* len('from')>=2, len('to')>=1 */
2808        return replace_substring(self, from_s, from_len, to_s, to_len, maxcount);
2809    }
2810}
2811
2812PyDoc_STRVAR(replace__doc__,
2813"S.replace(old, new[, count]) -> string\n\
2814\n\
2815Return a copy of string S with all occurrences of substring\n\
2816old replaced by new.  If the optional argument count is\n\
2817given, only the first count occurrences are replaced.");
2818
2819static PyObject *
2820string_replace(PyStringObject *self, PyObject *args)
2821{
2822    Py_ssize_t count = -1;
2823    PyObject *from, *to;
2824    const char *from_s, *to_s;
2825    Py_ssize_t from_len, to_len;
2826
2827    if (!PyArg_ParseTuple(args, "OO|n:replace", &from, &to, &count))
2828        return NULL;
2829
2830    if (PyString_Check(from)) {
2831        from_s = PyString_AS_STRING(from);
2832        from_len = PyString_GET_SIZE(from);
2833    }
2834#ifdef Py_USING_UNICODE
2835    if (PyUnicode_Check(from))
2836        return PyUnicode_Replace((PyObject *)self,
2837                                 from, to, count);
2838#endif
2839    else if (PyObject_AsCharBuffer(from, &from_s, &from_len))
2840        return NULL;
2841
2842    if (PyString_Check(to)) {
2843        to_s = PyString_AS_STRING(to);
2844        to_len = PyString_GET_SIZE(to);
2845    }
2846#ifdef Py_USING_UNICODE
2847    else if (PyUnicode_Check(to))
2848        return PyUnicode_Replace((PyObject *)self,
2849                                 from, to, count);
2850#endif
2851    else if (PyObject_AsCharBuffer(to, &to_s, &to_len))
2852        return NULL;
2853
2854    return (PyObject *)replace((PyStringObject *) self,
2855                               from_s, from_len,
2856                               to_s, to_len, count);
2857}
2858
2859/** End DALKE **/
2860
2861/* Matches the end (direction >= 0) or start (direction < 0) of self
2862 * against substr, using the start and end arguments. Returns
2863 * -1 on error, 0 if not found and 1 if found.
2864 */
2865Py_LOCAL(int)
2866_string_tailmatch(PyStringObject *self, PyObject *substr, Py_ssize_t start,
2867                  Py_ssize_t end, int direction)
2868{
2869    Py_ssize_t len = PyString_GET_SIZE(self);
2870    Py_ssize_t slen;
2871    const char* sub;
2872    const char* str;
2873
2874    if (PyString_Check(substr)) {
2875        sub = PyString_AS_STRING(substr);
2876        slen = PyString_GET_SIZE(substr);
2877    }
2878#ifdef Py_USING_UNICODE
2879    else if (PyUnicode_Check(substr))
2880        return PyUnicode_Tailmatch((PyObject *)self,
2881                                   substr, start, end, direction);
2882#endif
2883    else if (PyObject_AsCharBuffer(substr, &sub, &slen))
2884        return -1;
2885    str = PyString_AS_STRING(self);
2886
2887    ADJUST_INDICES(start, end, len);
2888
2889    if (direction < 0) {
2890        /* startswith */
2891        if (start+slen > len)
2892            return 0;
2893    } else {
2894        /* endswith */
2895        if (end-start < slen || start > len)
2896            return 0;
2897
2898        if (end-slen > start)
2899            start = end - slen;
2900    }
2901    if (end-start >= slen)
2902        return ! memcmp(str+start, sub, slen);
2903    return 0;
2904}
2905
2906
2907PyDoc_STRVAR(startswith__doc__,
2908"S.startswith(prefix[, start[, end]]) -> bool\n\
2909\n\
2910Return True if S starts with the specified prefix, False otherwise.\n\
2911With optional start, test S beginning at that position.\n\
2912With optional end, stop comparing S at that position.\n\
2913prefix can also be a tuple of strings to try.");
2914
2915static PyObject *
2916string_startswith(PyStringObject *self, PyObject *args)
2917{
2918    Py_ssize_t start = 0;
2919    Py_ssize_t end = PY_SSIZE_T_MAX;
2920    PyObject *subobj;
2921    int result;
2922
2923    if (!stringlib_parse_args_finds("startswith", args, &subobj, &start, &end))
2924        return NULL;
2925    if (PyTuple_Check(subobj)) {
2926        Py_ssize_t i;
2927        for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2928            result = _string_tailmatch(self,
2929                            PyTuple_GET_ITEM(subobj, i),
2930                            start, end, -1);
2931            if (result == -1)
2932                return NULL;
2933            else if (result) {
2934                Py_RETURN_TRUE;
2935            }
2936        }
2937        Py_RETURN_FALSE;
2938    }
2939    result = _string_tailmatch(self, subobj, start, end, -1);
2940    if (result == -1) {
2941        if (PyErr_ExceptionMatches(PyExc_TypeError))
2942            PyErr_Format(PyExc_TypeError, "startswith first arg must be str, "
2943                         "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);
2944        return NULL;
2945    }
2946    else
2947        return PyBool_FromLong(result);
2948}
2949
2950
2951PyDoc_STRVAR(endswith__doc__,
2952"S.endswith(suffix[, start[, end]]) -> bool\n\
2953\n\
2954Return True if S ends with the specified suffix, False otherwise.\n\
2955With optional start, test S beginning at that position.\n\
2956With optional end, stop comparing S at that position.\n\
2957suffix can also be a tuple of strings to try.");
2958
2959static PyObject *
2960string_endswith(PyStringObject *self, PyObject *args)
2961{
2962    Py_ssize_t start = 0;
2963    Py_ssize_t end = PY_SSIZE_T_MAX;
2964    PyObject *subobj;
2965    int result;
2966
2967    if (!stringlib_parse_args_finds("endswith", args, &subobj, &start, &end))
2968        return NULL;
2969    if (PyTuple_Check(subobj)) {
2970        Py_ssize_t i;
2971        for (i = 0; i < PyTuple_GET_SIZE(subobj); i++) {
2972            result = _string_tailmatch(self,
2973                            PyTuple_GET_ITEM(subobj, i),
2974                            start, end, +1);
2975            if (result == -1)
2976                return NULL;
2977            else if (result) {
2978                Py_RETURN_TRUE;
2979            }
2980        }
2981        Py_RETURN_FALSE;
2982    }
2983    result = _string_tailmatch(self, subobj, start, end, +1);
2984    if (result == -1) {
2985        if (PyErr_ExceptionMatches(PyExc_TypeError))
2986            PyErr_Format(PyExc_TypeError, "endswith first arg must be str, "
2987                         "unicode, or tuple, not %s", Py_TYPE(subobj)->tp_name);
2988        return NULL;
2989    }
2990    else
2991        return PyBool_FromLong(result);
2992}
2993
2994
2995PyDoc_STRVAR(encode__doc__,
2996"S.encode([encoding[,errors]]) -> object\n\
2997\n\
2998Encodes S using the codec registered for encoding. encoding defaults\n\
2999to the default encoding. errors may be given to set a different error\n\
3000handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3001a UnicodeEncodeError. Other possible values are 'ignore', 'replace' and\n\
3002'xmlcharrefreplace' as well as any other name registered with\n\
3003codecs.register_error that is able to handle UnicodeEncodeErrors.");
3004
3005static PyObject *
3006string_encode(PyStringObject *self, PyObject *args, PyObject *kwargs)
3007{
3008    static char *kwlist[] = {"encoding", "errors", 0};
3009    char *encoding = NULL;
3010    char *errors = NULL;
3011    PyObject *v;
3012
3013    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:encode",
3014                                     kwlist, &encoding, &errors))
3015        return NULL;
3016    v = PyString_AsEncodedObject((PyObject *)self, encoding, errors);
3017    if (v == NULL)
3018        goto onError;
3019    if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3020        PyErr_Format(PyExc_TypeError,
3021                     "encoder did not return a string/unicode object "
3022                     "(type=%.400s)",
3023                     Py_TYPE(v)->tp_name);
3024        Py_DECREF(v);
3025        return NULL;
3026    }
3027    return v;
3028
3029 onError:
3030    return NULL;
3031}
3032
3033
3034PyDoc_STRVAR(decode__doc__,
3035"S.decode([encoding[,errors]]) -> object\n\
3036\n\
3037Decodes S using the codec registered for encoding. encoding defaults\n\
3038to the default encoding. errors may be given to set a different error\n\
3039handling scheme. Default is 'strict' meaning that encoding errors raise\n\
3040a UnicodeDecodeError. Other possible values are 'ignore' and 'replace'\n\
3041as well as any other name registered with codecs.register_error that is\n\
3042able to handle UnicodeDecodeErrors.");
3043
3044static PyObject *
3045string_decode(PyStringObject *self, PyObject *args, PyObject *kwargs)
3046{
3047    static char *kwlist[] = {"encoding", "errors", 0};
3048    char *encoding = NULL;
3049    char *errors = NULL;
3050    PyObject *v;
3051
3052    if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|ss:decode",
3053                                     kwlist, &encoding, &errors))
3054        return NULL;
3055    v = PyString_AsDecodedObject((PyObject *)self, encoding, errors);
3056    if (v == NULL)
3057        goto onError;
3058    if (!PyString_Check(v) && !PyUnicode_Check(v)) {
3059        PyErr_Format(PyExc_TypeError,
3060                     "decoder did not return a string/unicode object "
3061                     "(type=%.400s)",
3062                     Py_TYPE(v)->tp_name);
3063        Py_DECREF(v);
3064        return NULL;
3065    }
3066    return v;
3067
3068 onError:
3069    return NULL;
3070}
3071
3072
3073PyDoc_STRVAR(expandtabs__doc__,
3074"S.expandtabs([tabsize]) -> string\n\
3075\n\
3076Return a copy of S where all tab characters are expanded using spaces.\n\
3077If tabsize is not given, a tab size of 8 characters is assumed.");
3078
3079static PyObject*
3080string_expandtabs(PyStringObject *self, PyObject *args)
3081{
3082    const char *e, *p, *qe;
3083    char *q;
3084    Py_ssize_t i, j, incr;
3085    PyObject *u;
3086    int tabsize = 8;
3087
3088    if (!PyArg_ParseTuple(args, "|i:expandtabs", &tabsize))
3089        return NULL;
3090
3091    /* First pass: determine size of output string */
3092    i = 0; /* chars up to and including most recent \n or \r */
3093    j = 0; /* chars since most recent \n or \r (use in tab calculations) */
3094    e = PyString_AS_STRING(self) + PyString_GET_SIZE(self); /* end of input */
3095    for (p = PyString_AS_STRING(self); p < e; p++) {
3096        if (*p == '\t') {
3097            if (tabsize > 0) {
3098                incr = tabsize - (j % tabsize);
3099                if (j > PY_SSIZE_T_MAX - incr)
3100                    goto overflow1;
3101                j += incr;
3102            }
3103        }
3104        else {
3105            if (j > PY_SSIZE_T_MAX - 1)
3106                goto overflow1;
3107            j++;
3108            if (*p == '\n' || *p == '\r') {
3109                if (i > PY_SSIZE_T_MAX - j)
3110                    goto overflow1;
3111                i += j;
3112                j = 0;
3113            }
3114        }
3115    }
3116
3117    if (i > PY_SSIZE_T_MAX - j)
3118        goto overflow1;
3119
3120    /* Second pass: create output string and fill it */
3121    u = PyString_FromStringAndSize(NULL, i + j);
3122    if (!u)
3123        return NULL;
3124
3125    j = 0; /* same as in first pass */
3126    q = PyString_AS_STRING(u); /* next output char */
3127    qe = PyString_AS_STRING(u) + PyString_GET_SIZE(u); /* end of output */
3128
3129    for (p = PyString_AS_STRING(self); p < e; p++) {
3130        if (*p == '\t') {
3131            if (tabsize > 0) {
3132                i = tabsize - (j % tabsize);
3133                j += i;
3134                while (i--) {
3135                    if (q >= qe)
3136                        goto overflow2;
3137                    *q++ = ' ';
3138                }
3139            }
3140        }
3141        else {
3142            if (q >= qe)
3143                goto overflow2;
3144            *q++ = *p;
3145            j++;
3146            if (*p == '\n' || *p == '\r')
3147                j = 0;
3148        }
3149    }
3150
3151    return u;
3152
3153  overflow2:
3154    Py_DECREF(u);
3155  overflow1:
3156    PyErr_SetString(PyExc_OverflowError, "new string is too long");
3157    return NULL;
3158}
3159
3160Py_LOCAL_INLINE(PyObject *)
3161pad(PyStringObject *self, Py_ssize_t left, Py_ssize_t right, char fill)
3162{
3163    PyObject *u;
3164
3165    if (left < 0)
3166        left = 0;
3167    if (right < 0)
3168        right = 0;
3169
3170    if (left == 0 && right == 0 && PyString_CheckExact(self)) {
3171        Py_INCREF(self);
3172        return (PyObject *)self;
3173    }
3174
3175    u = PyString_FromStringAndSize(NULL,
3176                                   left + PyString_GET_SIZE(self) + right);
3177    if (u) {
3178        if (left)
3179            memset(PyString_AS_STRING(u), fill, left);
3180        Py_MEMCPY(PyString_AS_STRING(u) + left,
3181               PyString_AS_STRING(self),
3182               PyString_GET_SIZE(self));
3183        if (right)
3184            memset(PyString_AS_STRING(u) + left + PyString_GET_SIZE(self),
3185               fill, right);
3186    }
3187
3188    return u;
3189}
3190
3191PyDoc_STRVAR(ljust__doc__,
3192"S.ljust(width[, fillchar]) -> string\n"
3193"\n"
3194"Return S left-justified in a string of length width. Padding is\n"
3195"done using the specified fill character (default is a space).");
3196
3197static PyObject *
3198string_ljust(PyStringObject *self, PyObject *args)
3199{
3200    Py_ssize_t width;
3201    char fillchar = ' ';
3202
3203    if (!PyArg_ParseTuple(args, "n|c:ljust", &width, &fillchar))
3204        return NULL;
3205
3206    if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3207        Py_INCREF(self);
3208        return (PyObject*) self;
3209    }
3210
3211    return pad(self, 0, width - PyString_GET_SIZE(self), fillchar);
3212}
3213
3214
3215PyDoc_STRVAR(rjust__doc__,
3216"S.rjust(width[, fillchar]) -> string\n"
3217"\n"
3218"Return S right-justified in a string of length width. Padding is\n"
3219"done using the specified fill character (default is a space)");
3220
3221static PyObject *
3222string_rjust(PyStringObject *self, PyObject *args)
3223{
3224    Py_ssize_t width;
3225    char fillchar = ' ';
3226
3227    if (!PyArg_ParseTuple(args, "n|c:rjust", &width, &fillchar))
3228        return NULL;
3229
3230    if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3231        Py_INCREF(self);
3232        return (PyObject*) self;
3233    }
3234
3235    return pad(self, width - PyString_GET_SIZE(self), 0, fillchar);
3236}
3237
3238
3239PyDoc_STRVAR(center__doc__,
3240"S.center(width[, fillchar]) -> string\n"
3241"\n"
3242"Return S centered in a string of length width. Padding is\n"
3243"done using the specified fill character (default is a space)");
3244
3245static PyObject *
3246string_center(PyStringObject *self, PyObject *args)
3247{
3248    Py_ssize_t marg, left;
3249    Py_ssize_t width;
3250    char fillchar = ' ';
3251
3252    if (!PyArg_ParseTuple(args, "n|c:center", &width, &fillchar))
3253        return NULL;
3254
3255    if (PyString_GET_SIZE(self) >= width && PyString_CheckExact(self)) {
3256        Py_INCREF(self);
3257        return (PyObject*) self;
3258    }
3259
3260    marg = width - PyString_GET_SIZE(self);
3261    left = marg / 2 + (marg & width & 1);
3262
3263    return pad(self, left, marg - left, fillchar);
3264}
3265
3266PyDoc_STRVAR(zfill__doc__,
3267"S.zfill(width) -> string\n"
3268"\n"
3269"Pad a numeric string S with zeros on the left, to fill a field\n"
3270"of the specified width.  The string S is never truncated.");
3271
3272static PyObject *
3273string_zfill(PyStringObject *self, PyObject *args)
3274{
3275    Py_ssize_t fill;
3276    PyObject *s;
3277    char *p;
3278    Py_ssize_t width;
3279
3280    if (!PyArg_ParseTuple(args, "n:zfill", &width))
3281        return NULL;
3282
3283    if (PyString_GET_SIZE(self) >= width) {
3284        if (PyString_CheckExact(self)) {
3285            Py_INCREF(self);
3286            return (PyObject*) self;
3287        }
3288        else
3289            return PyString_FromStringAndSize(
3290            PyString_AS_STRING(self),
3291            PyString_GET_SIZE(self)
3292            );
3293    }
3294
3295    fill = width - PyString_GET_SIZE(self);
3296
3297    s = pad(self, fill, 0, '0');
3298
3299    if (s == NULL)
3300        return NULL;
3301
3302    p = PyString_AS_STRING(s);
3303    if (p[fill] == '+' || p[fill] == '-') {
3304        /* move sign to beginning of string */
3305        p[0] = p[fill];
3306        p[fill] = '0';
3307    }
3308
3309    return (PyObject*) s;
3310}
3311
3312PyDoc_STRVAR(isspace__doc__,
3313"S.isspace() -> bool\n\
3314\n\
3315Return True if all characters in S are whitespace\n\
3316and there is at least one character in S, False otherwise.");
3317
3318static PyObject*
3319string_isspace(PyStringObject *self)
3320{
3321    register const unsigned char *p
3322        = (unsigned char *) PyString_AS_STRING(self);
3323    register const unsigned char *e;
3324
3325    /* Shortcut for single character strings */
3326    if (PyString_GET_SIZE(self) == 1 &&
3327        isspace(*p))
3328        return PyBool_FromLong(1);
3329
3330    /* Special case for empty strings */
3331    if (PyString_GET_SIZE(self) == 0)
3332        return PyBool_FromLong(0);
3333
3334    e = p + PyString_GET_SIZE(self);
3335    for (; p < e; p++) {
3336        if (!isspace(*p))
3337            return PyBool_FromLong(0);
3338    }
3339    return PyBool_FromLong(1);
3340}
3341
3342
3343PyDoc_STRVAR(isalpha__doc__,
3344"S.isalpha() -> bool\n\
3345\n\
3346Return True if all characters in S are alphabetic\n\
3347and there is at least one character in S, False otherwise.");
3348
3349static PyObject*
3350string_isalpha(PyStringObject *self)
3351{
3352    register const unsigned char *p
3353        = (unsigned char *) PyString_AS_STRING(self);
3354    register const unsigned char *e;
3355
3356    /* Shortcut for single character strings */
3357    if (PyString_GET_SIZE(self) == 1 &&
3358        isalpha(*p))
3359        return PyBool_FromLong(1);
3360
3361    /* Special case for empty strings */
3362    if (PyString_GET_SIZE(self) == 0)
3363        return PyBool_FromLong(0);
3364
3365    e = p + PyString_GET_SIZE(self);
3366    for (; p < e; p++) {
3367        if (!isalpha(*p))
3368            return PyBool_FromLong(0);
3369    }
3370    return PyBool_FromLong(1);
3371}
3372
3373
3374PyDoc_STRVAR(isalnum__doc__,
3375"S.isalnum() -> bool\n\
3376\n\
3377Return True if all characters in S are alphanumeric\n\
3378and there is at least one character in S, False otherwise.");
3379
3380static PyObject*
3381string_isalnum(PyStringObject *self)
3382{
3383    register const unsigned char *p
3384        = (unsigned char *) PyString_AS_STRING(self);
3385    register const unsigned char *e;
3386
3387    /* Shortcut for single character strings */
3388    if (PyString_GET_SIZE(self) == 1 &&
3389        isalnum(*p))
3390        return PyBool_FromLong(1);
3391
3392    /* Special case for empty strings */
3393    if (PyString_GET_SIZE(self) == 0)
3394        return PyBool_FromLong(0);
3395
3396    e = p + PyString_GET_SIZE(self);
3397    for (; p < e; p++) {
3398        if (!isalnum(*p))
3399            return PyBool_FromLong(0);
3400    }
3401    return PyBool_FromLong(1);
3402}
3403
3404
3405PyDoc_STRVAR(isdigit__doc__,
3406"S.isdigit() -> bool\n\
3407\n\
3408Return True if all characters in S are digits\n\
3409and there is at least one character in S, False otherwise.");
3410
3411static PyObject*
3412string_isdigit(PyStringObject *self)
3413{
3414    register const unsigned char *p
3415        = (unsigned char *) PyString_AS_STRING(self);
3416    register const unsigned char *e;
3417
3418    /* Shortcut for single character strings */
3419    if (PyString_GET_SIZE(self) == 1 &&
3420        isdigit(*p))
3421        return PyBool_FromLong(1);
3422
3423    /* Special case for empty strings */
3424    if (PyString_GET_SIZE(self) == 0)
3425        return PyBool_FromLong(0);
3426
3427    e = p + PyString_GET_SIZE(self);
3428    for (; p < e; p++) {
3429        if (!isdigit(*p))
3430            return PyBool_FromLong(0);
3431    }
3432    return PyBool_FromLong(1);
3433}
3434
3435
3436PyDoc_STRVAR(islower__doc__,
3437"S.islower() -> bool\n\
3438\n\
3439Return True if all cased characters in S are lowercase and there is\n\
3440at least one cased character in S, False otherwise.");
3441
3442static PyObject*
3443string_islower(PyStringObject *self)
3444{
3445    register const unsigned char *p
3446        = (unsigned char *) PyString_AS_STRING(self);
3447    register const unsigned char *e;
3448    int cased;
3449
3450    /* Shortcut for single character strings */
3451    if (PyString_GET_SIZE(self) == 1)
3452        return PyBool_FromLong(islower(*p) != 0);
3453
3454    /* Special case for empty strings */
3455    if (PyString_GET_SIZE(self) == 0)
3456        return PyBool_FromLong(0);
3457
3458    e = p + PyString_GET_SIZE(self);
3459    cased = 0;
3460    for (; p < e; p++) {
3461        if (isupper(*p))
3462            return PyBool_FromLong(0);
3463        else if (!cased && islower(*p))
3464            cased = 1;
3465    }
3466    return PyBool_FromLong(cased);
3467}
3468
3469
3470PyDoc_STRVAR(isupper__doc__,
3471"S.isupper() -> bool\n\
3472\n\
3473Return True if all cased characters in S are uppercase and there is\n\
3474at least one cased character in S, False otherwise.");
3475
3476static PyObject*
3477string_isupper(PyStringObject *self)
3478{
3479    register const unsigned char *p
3480        = (unsigned char *) PyString_AS_STRING(self);
3481    register const unsigned char *e;
3482    int cased;
3483
3484    /* Shortcut for single character strings */
3485    if (PyString_GET_SIZE(self) == 1)
3486        return PyBool_FromLong(isupper(*p) != 0);
3487
3488    /* Special case for empty strings */
3489    if (PyString_GET_SIZE(self) == 0)
3490        return PyBool_FromLong(0);
3491
3492    e = p + PyString_GET_SIZE(self);
3493    cased = 0;
3494    for (; p < e; p++) {
3495        if (islower(*p))
3496            return PyBool_FromLong(0);
3497        else if (!cased && isupper(*p))
3498            cased = 1;
3499    }
3500    return PyBool_FromLong(cased);
3501}
3502
3503
3504PyDoc_STRVAR(istitle__doc__,
3505"S.istitle() -> bool\n\
3506\n\
3507Return True if S is a titlecased string and there is at least one\n\
3508character in S, i.e. uppercase characters may only follow uncased\n\
3509characters and lowercase characters only cased ones. Return False\n\
3510otherwise.");
3511
3512static PyObject*
3513string_istitle(PyStringObject *self, PyObject *uncased)
3514{
3515    register const unsigned char *p
3516        = (unsigned char *) PyString_AS_STRING(self);
3517    register const unsigned char *e;
3518    int cased, previous_is_cased;
3519
3520    /* Shortcut for single character strings */
3521    if (PyString_GET_SIZE(self) == 1)
3522        return PyBool_FromLong(isupper(*p) != 0);
3523
3524    /* Special case for empty strings */
3525    if (PyString_GET_SIZE(self) == 0)
3526        return PyBool_FromLong(0);
3527
3528    e = p + PyString_GET_SIZE(self);
3529    cased = 0;
3530    previous_is_cased = 0;
3531    for (; p < e; p++) {
3532        register const unsigned char ch = *p;
3533
3534        if (isupper(ch)) {
3535            if (previous_is_cased)
3536                return PyBool_FromLong(0);
3537            previous_is_cased = 1;
3538            cased = 1;
3539        }
3540        else if (islower(ch)) {
3541            if (!previous_is_cased)
3542                return PyBool_FromLong(0);
3543            previous_is_cased = 1;
3544            cased = 1;
3545        }
3546        else
3547            previous_is_cased = 0;
3548    }
3549    return PyBool_FromLong(cased);
3550}
3551
3552
3553PyDoc_STRVAR(splitlines__doc__,
3554"S.splitlines(keepends=False) -> list of strings\n\
3555\n\
3556Return a list of the lines in S, breaking at line boundaries.\n\
3557Line breaks are not included in the resulting list unless keepends\n\
3558is given and true.");
3559
3560static PyObject*
3561string_splitlines(PyStringObject *self, PyObject *args)
3562{
3563    int keepends = 0;
3564
3565    if (!PyArg_ParseTuple(args, "|i:splitlines", &keepends))
3566        return NULL;
3567
3568    return stringlib_splitlines(
3569        (PyObject*) self, PyString_AS_STRING(self), PyString_GET_SIZE(self),
3570        keepends
3571    );
3572}
3573
3574PyDoc_STRVAR(sizeof__doc__,
3575"S.__sizeof__() -> size of S in memory, in bytes");
3576
3577static PyObject *
3578string_sizeof(PyStringObject *v)
3579{
3580    Py_ssize_t res;
3581    res = PyStringObject_SIZE + PyString_GET_SIZE(v) * Py_TYPE(v)->tp_itemsize;
3582    return PyInt_FromSsize_t(res);
3583}
3584
3585static PyObject *
3586string_getnewargs(PyStringObject *v)
3587{
3588    return Py_BuildValue("(s#)", v->ob_sval, Py_SIZE(v));
3589}
3590
3591
3592#include "stringlib/string_format.h"
3593
3594PyDoc_STRVAR(format__doc__,
3595"S.format(*args, **kwargs) -> string\n\
3596\n\
3597Return a formatted version of S, using substitutions from args and kwargs.\n\
3598The substitutions are identified by braces ('{' and '}').");
3599
3600static PyObject *
3601string__format__(PyObject* self, PyObject* args)
3602{
3603    PyObject *format_spec;
3604    PyObject *result = NULL;
3605    PyObject *tmp = NULL;
3606
3607    /* If 2.x, convert format_spec to the same type as value */
3608    /* This is to allow things like u''.format('') */
3609    if (!PyArg_ParseTuple(args, "O:__format__", &format_spec))
3610        goto done;
3611    if (!(PyString_Check(format_spec) || PyUnicode_Check(format_spec))) {
3612        PyErr_Format(PyExc_TypeError, "__format__ arg must be str "
3613                     "or unicode, not %s", Py_TYPE(format_spec)->tp_name);
3614        goto done;
3615    }
3616    tmp = PyObject_Str(format_spec);
3617    if (tmp == NULL)
3618        goto done;
3619    format_spec = tmp;
3620
3621    result = _PyBytes_FormatAdvanced(self,
3622                                     PyString_AS_STRING(format_spec),
3623                                     PyString_GET_SIZE(format_spec));
3624done:
3625    Py_XDECREF(tmp);
3626    return result;
3627}
3628
3629PyDoc_STRVAR(p_format__doc__,
3630"S.__format__(format_spec) -> string\n\
3631\n\
3632Return a formatted version of S as described by format_spec.");
3633
3634
3635static PyMethodDef
3636string_methods[] = {
3637    /* Counterparts of the obsolete stropmodule functions; except
3638       string.maketrans(). */
3639    {"join", (PyCFunction)string_join, METH_O, join__doc__},
3640    {"split", (PyCFunction)string_split, METH_VARARGS, split__doc__},
3641    {"rsplit", (PyCFunction)string_rsplit, METH_VARARGS, rsplit__doc__},
3642    {"lower", (PyCFunction)string_lower, METH_NOARGS, lower__doc__},
3643    {"upper", (PyCFunction)string_upper, METH_NOARGS, upper__doc__},
3644    {"islower", (PyCFunction)string_islower, METH_NOARGS, islower__doc__},
3645    {"isupper", (PyCFunction)string_isupper, METH_NOARGS, isupper__doc__},
3646    {"isspace", (PyCFunction)string_isspace, METH_NOARGS, isspace__doc__},
3647    {"isdigit", (PyCFunction)string_isdigit, METH_NOARGS, isdigit__doc__},
3648    {"istitle", (PyCFunction)string_istitle, METH_NOARGS, istitle__doc__},
3649    {"isalpha", (PyCFunction)string_isalpha, METH_NOARGS, isalpha__doc__},
3650    {"isalnum", (PyCFunction)string_isalnum, METH_NOARGS, isalnum__doc__},
3651    {"capitalize", (PyCFunction)string_capitalize, METH_NOARGS,
3652     capitalize__doc__},
3653    {"count", (PyCFunction)string_count, METH_VARARGS, count__doc__},
3654    {"endswith", (PyCFunction)string_endswith, METH_VARARGS,
3655     endswith__doc__},
3656    {"partition", (PyCFunction)string_partition, METH_O, partition__doc__},
3657    {"find", (PyCFunction)string_find, METH_VARARGS, find__doc__},
3658    {"index", (PyCFunction)string_index, METH_VARARGS, index__doc__},
3659    {"lstrip", (PyCFunction)string_lstrip, METH_VARARGS, lstrip__doc__},
3660    {"replace", (PyCFunction)string_replace, METH_VARARGS, replace__doc__},
3661    {"rfind", (PyCFunction)string_rfind, METH_VARARGS, rfind__doc__},
3662    {"rindex", (PyCFunction)string_rindex, METH_VARARGS, rindex__doc__},
3663    {"rstrip", (PyCFunction)string_rstrip, METH_VARARGS, rstrip__doc__},
3664    {"rpartition", (PyCFunction)string_rpartition, METH_O,
3665     rpartition__doc__},
3666    {"startswith", (PyCFunction)string_startswith, METH_VARARGS,
3667     startswith__doc__},
3668    {"strip", (PyCFunction)string_strip, METH_VARARGS, strip__doc__},
3669    {"swapcase", (PyCFunction)string_swapcase, METH_NOARGS,
3670     swapcase__doc__},
3671    {"translate", (PyCFunction)string_translate, METH_VARARGS,
3672     translate__doc__},
3673    {"title", (PyCFunction)string_title, METH_NOARGS, title__doc__},
3674    {"ljust", (PyCFunction)string_ljust, METH_VARARGS, ljust__doc__},
3675    {"rjust", (PyCFunction)string_rjust, METH_VARARGS, rjust__doc__},
3676    {"center", (PyCFunction)string_center, METH_VARARGS, center__doc__},
3677    {"zfill", (PyCFunction)string_zfill, METH_VARARGS, zfill__doc__},
3678    {"format", (PyCFunction) do_string_format, METH_VARARGS | METH_KEYWORDS, format__doc__},
3679    {"__format__", (PyCFunction) string__format__, METH_VARARGS, p_format__doc__},
3680    {"_formatter_field_name_split", (PyCFunction) formatter_field_name_split, METH_NOARGS},
3681    {"_formatter_parser", (PyCFunction) formatter_parser, METH_NOARGS},
3682    {"encode", (PyCFunction)string_encode, METH_VARARGS | METH_KEYWORDS, encode__doc__},
3683    {"decode", (PyCFunction)string_decode, METH_VARARGS | METH_KEYWORDS, decode__doc__},
3684    {"expandtabs", (PyCFunction)string_expandtabs, METH_VARARGS,
3685     expandtabs__doc__},
3686    {"splitlines", (PyCFunction)string_splitlines, METH_VARARGS,
3687     splitlines__doc__},
3688    {"__sizeof__", (PyCFunction)string_sizeof, METH_NOARGS,
3689     sizeof__doc__},
3690    {"__getnewargs__",          (PyCFunction)string_getnewargs, METH_NOARGS},
3691    {NULL,     NULL}                         /* sentinel */
3692};
3693
3694static PyObject *
3695str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
3696
3697static PyObject *
3698string_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3699{
3700    PyObject *x = NULL;
3701    static char *kwlist[] = {"object", 0};
3702
3703    if (type != &PyString_Type)
3704        return str_subtype_new(type, args, kwds);
3705    if (!PyArg_ParseTupleAndKeywords(args, kwds, "|O:str", kwlist, &x))
3706        return NULL;
3707    if (x == NULL)
3708        return PyString_FromString("");
3709    return PyObject_Str(x);
3710}
3711
3712static PyObject *
3713str_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3714{
3715    PyObject *tmp, *pnew;
3716    Py_ssize_t n;
3717
3718    assert(PyType_IsSubtype(type, &PyString_Type));
3719    tmp = string_new(&PyString_Type, args, kwds);
3720    if (tmp == NULL)
3721        return NULL;
3722    assert(PyString_CheckExact(tmp));
3723    n = PyString_GET_SIZE(tmp);
3724    pnew = type->tp_alloc(type, n);
3725    if (pnew != NULL) {
3726        Py_MEMCPY(PyString_AS_STRING(pnew), PyString_AS_STRING(tmp), n+1);
3727        ((PyStringObject *)pnew)->ob_shash =
3728            ((PyStringObject *)tmp)->ob_shash;
3729        ((PyStringObject *)pnew)->ob_sstate = SSTATE_NOT_INTERNED;
3730    }
3731    Py_DECREF(tmp);
3732    return pnew;
3733}
3734
3735static PyObject *
3736basestring_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
3737{
3738    PyErr_SetString(PyExc_TypeError,
3739                    "The basestring type cannot be instantiated");
3740    return NULL;
3741}
3742
3743static PyObject *
3744string_mod(PyObject *v, PyObject *w)
3745{
3746    if (!PyString_Check(v)) {
3747        Py_INCREF(Py_NotImplemented);
3748        return Py_NotImplemented;
3749    }
3750    return PyString_Format(v, w);
3751}
3752
3753PyDoc_STRVAR(basestring_doc,
3754"Type basestring cannot be instantiated; it is the base for str and unicode.");
3755
3756static PyNumberMethods string_as_number = {
3757    0,                          /*nb_add*/
3758    0,                          /*nb_subtract*/
3759    0,                          /*nb_multiply*/
3760    0,                          /*nb_divide*/
3761    string_mod,                 /*nb_remainder*/
3762};
3763
3764
3765PyTypeObject PyBaseString_Type = {
3766    PyVarObject_HEAD_INIT(&PyType_Type, 0)
3767    "basestring",
3768    0,
3769    0,
3770    0,                                          /* tp_dealloc */
3771    0,                                          /* tp_print */
3772    0,                                          /* tp_getattr */
3773    0,                                          /* tp_setattr */
3774    0,                                          /* tp_compare */
3775    0,                                          /* tp_repr */
3776    0,                                          /* tp_as_number */
3777    0,                                          /* tp_as_sequence */
3778    0,                                          /* tp_as_mapping */
3779    0,                                          /* tp_hash */
3780    0,                                          /* tp_call */
3781    0,                                          /* tp_str */
3782    0,                                          /* tp_getattro */
3783    0,                                          /* tp_setattro */
3784    0,                                          /* tp_as_buffer */
3785    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE, /* tp_flags */
3786    basestring_doc,                             /* tp_doc */
3787    0,                                          /* tp_traverse */
3788    0,                                          /* tp_clear */
3789    0,                                          /* tp_richcompare */
3790    0,                                          /* tp_weaklistoffset */
3791    0,                                          /* tp_iter */
3792    0,                                          /* tp_iternext */
3793    0,                                          /* tp_methods */
3794    0,                                          /* tp_members */
3795    0,                                          /* tp_getset */
3796    &PyBaseObject_Type,                         /* tp_base */
3797    0,                                          /* tp_dict */
3798    0,                                          /* tp_descr_get */
3799    0,                                          /* tp_descr_set */
3800    0,                                          /* tp_dictoffset */
3801    0,                                          /* tp_init */
3802    0,                                          /* tp_alloc */
3803    basestring_new,                             /* tp_new */
3804    0,                                          /* tp_free */
3805};
3806
3807PyDoc_STRVAR(string_doc,
3808"str(object='') -> string\n\
3809\n\
3810Return a nice string representation of the object.\n\
3811If the argument is a string, the return value is the same object.");
3812
3813PyTypeObject PyString_Type = {
3814    PyVarObject_HEAD_INIT(&PyType_Type, 0)
3815    "str",
3816    PyStringObject_SIZE,
3817    sizeof(char),
3818    string_dealloc,                             /* tp_dealloc */
3819    (printfunc)string_print,                    /* tp_print */
3820    0,                                          /* tp_getattr */
3821    0,                                          /* tp_setattr */
3822    0,                                          /* tp_compare */
3823    string_repr,                                /* tp_repr */
3824    &string_as_number,                          /* tp_as_number */
3825    &string_as_sequence,                        /* tp_as_sequence */
3826    &string_as_mapping,                         /* tp_as_mapping */
3827    (hashfunc)string_hash,                      /* tp_hash */
3828    0,                                          /* tp_call */
3829    string_str,                                 /* tp_str */
3830    PyObject_GenericGetAttr,                    /* tp_getattro */
3831    0,                                          /* tp_setattro */
3832    &string_as_buffer,                          /* tp_as_buffer */
3833    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_CHECKTYPES |
3834        Py_TPFLAGS_BASETYPE | Py_TPFLAGS_STRING_SUBCLASS |
3835        Py_TPFLAGS_HAVE_NEWBUFFER,              /* tp_flags */
3836    string_doc,                                 /* tp_doc */
3837    0,                                          /* tp_traverse */
3838    0,                                          /* tp_clear */
3839    (richcmpfunc)string_richcompare,            /* tp_richcompare */
3840    0,                                          /* tp_weaklistoffset */
3841    0,                                          /* tp_iter */
3842    0,                                          /* tp_iternext */
3843    string_methods,                             /* tp_methods */
3844    0,                                          /* tp_members */
3845    0,                                          /* tp_getset */
3846    &PyBaseString_Type,                         /* tp_base */
3847    0,                                          /* tp_dict */
3848    0,                                          /* tp_descr_get */
3849    0,                                          /* tp_descr_set */
3850    0,                                          /* tp_dictoffset */
3851    0,                                          /* tp_init */
3852    0,                                          /* tp_alloc */
3853    string_new,                                 /* tp_new */
3854    PyObject_Del,                               /* tp_free */
3855};
3856
3857void
3858PyString_Concat(register PyObject **pv, register PyObject *w)
3859{
3860    register PyObject *v;
3861    if (*pv == NULL)
3862        return;
3863    if (w == NULL || !PyString_Check(*pv)) {
3864        Py_CLEAR(*pv);
3865        return;
3866    }
3867    v = string_concat((PyStringObject *) *pv, w);
3868    Py_DECREF(*pv);
3869    *pv = v;
3870}
3871
3872void
3873PyString_ConcatAndDel(register PyObject **pv, register PyObject *w)
3874{
3875    PyString_Concat(pv, w);
3876    Py_XDECREF(w);
3877}
3878
3879
3880/* The following function breaks the notion that strings are immutable:
3881   it changes the size of a string.  We get away with this only if there
3882   is only one module referencing the object.  You can also think of it
3883   as creating a new string object and destroying the old one, only
3884   more efficiently.  In any case, don't use this if the string may
3885   already be known to some other part of the code...
3886   Note that if there's not enough memory to resize the string, the original
3887   string object at *pv is deallocated, *pv is set to NULL, an "out of
3888   memory" exception is set, and -1 is returned.  Else (on success) 0 is
3889   returned, and the value in *pv may or may not be the same as on input.
3890   As always, an extra byte is allocated for a trailing \0 byte (newsize
3891   does *not* include that), and a trailing \0 byte is stored.
3892*/
3893
3894int
3895_PyString_Resize(PyObject **pv, Py_ssize_t newsize)
3896{
3897    register PyObject *v;
3898    register PyStringObject *sv;
3899    v = *pv;
3900    if (!PyString_Check(v) || Py_REFCNT(v) != 1 || newsize < 0 ||
3901        PyString_CHECK_INTERNED(v)) {
3902        *pv = 0;
3903        Py_DECREF(v);
3904        PyErr_BadInternalCall();
3905        return -1;
3906    }
3907    /* XXX UNREF/NEWREF interface should be more symmetrical */
3908    _Py_DEC_REFTOTAL;
3909    _Py_ForgetReference(v);
3910    *pv = (PyObject *)
3911        PyObject_REALLOC((char *)v, PyStringObject_SIZE + newsize);
3912    if (*pv == NULL) {
3913        PyObject_Del(v);
3914        PyErr_NoMemory();
3915        return -1;
3916    }
3917    _Py_NewReference(*pv);
3918    sv = (PyStringObject *) *pv;
3919    Py_SIZE(sv) = newsize;
3920    sv->ob_sval[newsize] = '\0';
3921    sv->ob_shash = -1;          /* invalidate cached hash value */
3922    return 0;
3923}
3924
3925/* Helpers for formatstring */
3926
3927Py_LOCAL_INLINE(PyObject *)
3928getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
3929{
3930    Py_ssize_t argidx = *p_argidx;
3931    if (argidx < arglen) {
3932        (*p_argidx)++;
3933        if (arglen < 0)
3934            return args;
3935        else
3936            return PyTuple_GetItem(args, argidx);
3937    }
3938    PyErr_SetString(PyExc_TypeError,
3939                    "not enough arguments for format string");
3940    return NULL;
3941}
3942
3943/* Format codes
3944 * F_LJUST      '-'
3945 * F_SIGN       '+'
3946 * F_BLANK      ' '
3947 * F_ALT        '#'
3948 * F_ZERO       '0'
3949 */
3950#define F_LJUST (1<<0)
3951#define F_SIGN  (1<<1)
3952#define F_BLANK (1<<2)
3953#define F_ALT   (1<<3)
3954#define F_ZERO  (1<<4)
3955
3956/* Returns a new reference to a PyString object, or NULL on failure. */
3957
3958static PyObject *
3959formatfloat(PyObject *v, int flags, int prec, int type)
3960{
3961    char *p;
3962    PyObject *result;
3963    double x;
3964
3965    x = PyFloat_AsDouble(v);
3966    if (x == -1.0 && PyErr_Occurred()) {
3967        PyErr_Format(PyExc_TypeError, "float argument required, "
3968                     "not %.200s", Py_TYPE(v)->tp_name);
3969        return NULL;
3970    }
3971
3972    if (prec < 0)
3973        prec = 6;
3974
3975    p = PyOS_double_to_string(x, type, prec,
3976                              (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
3977
3978    if (p == NULL)
3979        return NULL;
3980    result = PyString_FromStringAndSize(p, strlen(p));
3981    PyMem_Free(p);
3982    return result;
3983}
3984
3985/* _PyString_FormatLong emulates the format codes d, u, o, x and X, and
3986 * the F_ALT flag, for Python's long (unbounded) ints.  It's not used for
3987 * Python's regular ints.
3988 * Return value:  a new PyString*, or NULL if error.
3989 *  .  *pbuf is set to point into it,
3990 *     *plen set to the # of chars following that.
3991 *     Caller must decref it when done using pbuf.
3992 *     The string starting at *pbuf is of the form
3993 *         "-"? ("0x" | "0X")? digit+
3994 *     "0x"/"0X" are present only for x and X conversions, with F_ALT
3995 *         set in flags.  The case of hex digits will be correct,
3996 *     There will be at least prec digits, zero-filled on the left if
3997 *         necessary to get that many.
3998 * val          object to be converted
3999 * flags        bitmask of format flags; only F_ALT is looked at
4000 * prec         minimum number of digits; 0-fill on left if needed
4001 * type         a character in [duoxX]; u acts the same as d
4002 *
4003 * CAUTION:  o, x and X conversions on regular ints can never
4004 * produce a '-' sign, but can for Python's unbounded ints.
4005 */
4006PyObject*
4007_PyString_FormatLong(PyObject *val, int flags, int prec, int type,
4008                     char **pbuf, int *plen)
4009{
4010    PyObject *result = NULL;
4011    char *buf;
4012    Py_ssize_t i;
4013    int sign;           /* 1 if '-', else 0 */
4014    int len;            /* number of characters */
4015    Py_ssize_t llen;
4016    int numdigits;      /* len == numnondigits + numdigits */
4017    int numnondigits = 0;
4018
4019    switch (type) {
4020    case 'd':
4021    case 'u':
4022        result = Py_TYPE(val)->tp_str(val);
4023        break;
4024    case 'o':
4025        result = Py_TYPE(val)->tp_as_number->nb_oct(val);
4026        break;
4027    case 'x':
4028    case 'X':
4029        numnondigits = 2;
4030        result = Py_TYPE(val)->tp_as_number->nb_hex(val);
4031        break;
4032    default:
4033        assert(!"'type' not in [duoxX]");
4034    }
4035    if (!result)
4036        return NULL;
4037
4038    buf = PyString_AsString(result);
4039    if (!buf) {
4040        Py_DECREF(result);
4041        return NULL;
4042    }
4043
4044    /* To modify the string in-place, there can only be one reference. */
4045    if (Py_REFCNT(result) != 1) {
4046        PyErr_BadInternalCall();
4047        return NULL;
4048    }
4049    llen = PyString_Size(result);
4050    if (llen > INT_MAX) {
4051        PyErr_SetString(PyExc_ValueError, "string too large in _PyString_FormatLong");
4052        return NULL;
4053    }
4054    len = (int)llen;
4055    if (buf[len-1] == 'L') {
4056        --len;
4057        buf[len] = '\0';
4058    }
4059    sign = buf[0] == '-';
4060    numnondigits += sign;
4061    numdigits = len - numnondigits;
4062    assert(numdigits > 0);
4063
4064    /* Get rid of base marker unless F_ALT */
4065    if ((flags & F_ALT) == 0) {
4066        /* Need to skip 0x, 0X or 0. */
4067        int skipped = 0;
4068        switch (type) {
4069        case 'o':
4070            assert(buf[sign] == '0');
4071            /* If 0 is only digit, leave it alone. */
4072            if (numdigits > 1) {
4073                skipped = 1;
4074                --numdigits;
4075            }
4076            break;
4077        case 'x':
4078        case 'X':
4079            assert(buf[sign] == '0');
4080            assert(buf[sign + 1] == 'x');
4081            skipped = 2;
4082            numnondigits -= 2;
4083            break;
4084        }
4085        if (skipped) {
4086            buf += skipped;
4087            len -= skipped;
4088            if (sign)
4089                buf[0] = '-';
4090        }
4091        assert(len == numnondigits + numdigits);
4092        assert(numdigits > 0);
4093    }
4094
4095    /* Fill with leading zeroes to meet minimum width. */
4096    if (prec > numdigits) {
4097        PyObject *r1 = PyString_FromStringAndSize(NULL,
4098                                numnondigits + prec);
4099        char *b1;
4100        if (!r1) {
4101            Py_DECREF(result);
4102            return NULL;
4103        }
4104        b1 = PyString_AS_STRING(r1);
4105        for (i = 0; i < numnondigits; ++i)
4106            *b1++ = *buf++;
4107        for (i = 0; i < prec - numdigits; i++)
4108            *b1++ = '0';
4109        for (i = 0; i < numdigits; i++)
4110            *b1++ = *buf++;
4111        *b1 = '\0';
4112        Py_DECREF(result);
4113        result = r1;
4114        buf = PyString_AS_STRING(result);
4115        len = numnondigits + prec;
4116    }
4117
4118    /* Fix up case for hex conversions. */
4119    if (type == 'X') {
4120        /* Need to convert all lower case letters to upper case.
4121           and need to convert 0x to 0X (and -0x to -0X). */
4122        for (i = 0; i < len; i++)
4123            if (buf[i] >= 'a' && buf[i] <= 'x')
4124                buf[i] -= 'a'-'A';
4125    }
4126    *pbuf = buf;
4127    *plen = len;
4128    return result;
4129}
4130
4131Py_LOCAL_INLINE(int)
4132formatint(char *buf, size_t buflen, int flags,
4133          int prec, int type, PyObject *v)
4134{
4135    /* fmt = '%#.' + `prec` + 'l' + `type`
4136       worst case length = 3 + 19 (worst len of INT_MAX on 64-bit machine)
4137       + 1 + 1 = 24 */
4138    char fmt[64];       /* plenty big enough! */
4139    char *sign;
4140    long x;
4141
4142    x = PyInt_AsLong(v);
4143    if (x == -1 && PyErr_Occurred()) {
4144        PyErr_Format(PyExc_TypeError, "int argument required, not %.200s",
4145                     Py_TYPE(v)->tp_name);
4146        return -1;
4147    }
4148    if (x < 0 && type == 'u') {
4149        type = 'd';
4150    }
4151    if (x < 0 && (type == 'x' || type == 'X' || type == 'o'))
4152        sign = "-";
4153    else
4154        sign = "";
4155    if (prec < 0)
4156        prec = 1;
4157
4158    if ((flags & F_ALT) &&
4159        (type == 'x' || type == 'X')) {
4160        /* When converting under %#x or %#X, there are a number
4161         * of issues that cause pain:
4162         * - when 0 is being converted, the C standard leaves off
4163         *   the '0x' or '0X', which is inconsistent with other
4164         *   %#x/%#X conversions and inconsistent with Python's
4165         *   hex() function
4166         * - there are platforms that violate the standard and
4167         *   convert 0 with the '0x' or '0X'
4168         *   (Metrowerks, Compaq Tru64)
4169         * - there are platforms that give '0x' when converting
4170         *   under %#X, but convert 0 in accordance with the
4171         *   standard (OS/2 EMX)
4172         *
4173         * We can achieve the desired consistency by inserting our
4174         * own '0x' or '0X' prefix, and substituting %x/%X in place
4175         * of %#x/%#X.
4176         *
4177         * Note that this is the same approach as used in
4178         * formatint() in unicodeobject.c
4179         */
4180        PyOS_snprintf(fmt, sizeof(fmt), "%s0%c%%.%dl%c",
4181                      sign, type, prec, type);
4182    }
4183    else {
4184        PyOS_snprintf(fmt, sizeof(fmt), "%s%%%s.%dl%c",
4185                      sign, (flags&F_ALT) ? "#" : "",
4186                      prec, type);
4187    }
4188
4189    /* buf = '+'/'-'/'' + '0'/'0x'/'' + '[0-9]'*max(prec, len(x in octal))
4190     * worst case buf = '-0x' + [0-9]*prec, where prec >= 11
4191     */
4192    if (buflen <= 14 || buflen <= (size_t)3 + (size_t)prec) {
4193        PyErr_SetString(PyExc_OverflowError,
4194            "formatted integer is too long (precision too large?)");
4195        return -1;
4196    }
4197    if (sign[0])
4198        PyOS_snprintf(buf, buflen, fmt, -x);
4199    else
4200        PyOS_snprintf(buf, buflen, fmt, x);
4201    return (int)strlen(buf);
4202}
4203
4204Py_LOCAL_INLINE(int)
4205formatchar(char *buf, size_t buflen, PyObject *v)
4206{
4207    /* presume that the buffer is at least 2 characters long */
4208    if (PyString_Check(v)) {
4209        if (!PyArg_Parse(v, "c;%c requires int or char", &buf[0]))
4210            return -1;
4211    }
4212    else {
4213        if (!PyArg_Parse(v, "b;%c requires int or char", &buf[0]))
4214            return -1;
4215    }
4216    buf[1] = '\0';
4217    return 1;
4218}
4219
4220/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...)
4221
4222   FORMATBUFLEN is the length of the buffer in which the ints &
4223   chars are formatted. XXX This is a magic number. Each formatting
4224   routine does bounds checking to ensure no overflow, but a better
4225   solution may be to malloc a buffer of appropriate size for each
4226   format. For now, the current solution is sufficient.
4227*/
4228#define FORMATBUFLEN (size_t)120
4229
4230PyObject *
4231PyString_Format(PyObject *format, PyObject *args)
4232{
4233    char *fmt, *res;
4234    Py_ssize_t arglen, argidx;
4235    Py_ssize_t reslen, rescnt, fmtcnt;
4236    int args_owned = 0;
4237    PyObject *result, *orig_args;
4238#ifdef Py_USING_UNICODE
4239    PyObject *v, *w;
4240#endif
4241    PyObject *dict = NULL;
4242    if (format == NULL || !PyString_Check(format) || args == NULL) {
4243        PyErr_BadInternalCall();
4244        return NULL;
4245    }
4246    orig_args = args;
4247    fmt = PyString_AS_STRING(format);
4248    fmtcnt = PyString_GET_SIZE(format);
4249    reslen = rescnt = fmtcnt + 100;
4250    result = PyString_FromStringAndSize((char *)NULL, reslen);
4251    if (result == NULL)
4252        return NULL;
4253    res = PyString_AsString(result);
4254    if (PyTuple_Check(args)) {
4255        arglen = PyTuple_GET_SIZE(args);
4256        argidx = 0;
4257    }
4258    else {
4259        arglen = -1;
4260        argidx = -2;
4261    }
4262    if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
4263        !PyTuple_Check(args) && !PyObject_TypeCheck(args, &PyBaseString_Type))
4264        dict = args;
4265    while (--fmtcnt >= 0) {
4266        if (*fmt != '%') {
4267            if (--rescnt < 0) {
4268                rescnt = fmtcnt + 100;
4269                reslen += rescnt;
4270                if (_PyString_Resize(&result, reslen))
4271                    return NULL;
4272                res = PyString_AS_STRING(result)
4273                    + reslen - rescnt;
4274                --rescnt;
4275            }
4276            *res++ = *fmt++;
4277        }
4278        else {
4279            /* Got a format specifier */
4280            int flags = 0;
4281            Py_ssize_t width = -1;
4282            int prec = -1;
4283            int c = '\0';
4284            int fill;
4285            int isnumok;
4286            PyObject *v = NULL;
4287            PyObject *temp = NULL;
4288            char *pbuf;
4289            int sign;
4290            Py_ssize_t len;
4291            char formatbuf[FORMATBUFLEN];
4292                 /* For format{int,char}() */
4293#ifdef Py_USING_UNICODE
4294            char *fmt_start = fmt;
4295            Py_ssize_t argidx_start = argidx;
4296#endif
4297
4298            fmt++;
4299            if (*fmt == '(') {
4300                char *keystart;
4301                Py_ssize_t keylen;
4302                PyObject *key;
4303                int pcount = 1;
4304
4305                if (dict == NULL) {
4306                    PyErr_SetString(PyExc_TypeError,
4307                             "format requires a mapping");
4308                    goto error;
4309                }
4310                ++fmt;
4311                --fmtcnt;
4312                keystart = fmt;
4313                /* Skip over balanced parentheses */
4314                while (pcount > 0 && --fmtcnt >= 0) {
4315                    if (*fmt == ')')
4316                        --pcount;
4317                    else if (*fmt == '(')
4318                        ++pcount;
4319                    fmt++;
4320                }
4321                keylen = fmt - keystart - 1;
4322                if (fmtcnt < 0 || pcount > 0) {
4323                    PyErr_SetString(PyExc_ValueError,
4324                               "incomplete format key");
4325                    goto error;
4326                }
4327                key = PyString_FromStringAndSize(keystart,
4328                                                 keylen);
4329                if (key == NULL)
4330                    goto error;
4331                if (args_owned) {
4332                    Py_DECREF(args);
4333                    args_owned = 0;
4334                }
4335                args = PyObject_GetItem(dict, key);
4336                Py_DECREF(key);
4337                if (args == NULL) {
4338                    goto error;
4339                }
4340                args_owned = 1;
4341                arglen = -1;
4342                argidx = -2;
4343            }
4344            while (--fmtcnt >= 0) {
4345                switch (c = *fmt++) {
4346                case '-': flags |= F_LJUST; continue;
4347                case '+': flags |= F_SIGN; continue;
4348                case ' ': flags |= F_BLANK; continue;
4349                case '#': flags |= F_ALT; continue;
4350                case '0': flags |= F_ZERO; continue;
4351                }
4352                break;
4353            }
4354            if (c == '*') {
4355                v = getnextarg(args, arglen, &argidx);
4356                if (v == NULL)
4357                    goto error;
4358                if (!PyInt_Check(v)) {
4359                    PyErr_SetString(PyExc_TypeError,
4360                                    "* wants int");
4361                    goto error;
4362                }
4363                width = PyInt_AsSsize_t(v);
4364                if (width == -1 && PyErr_Occurred())
4365                    goto error;
4366                if (width < 0) {
4367                    flags |= F_LJUST;
4368                    width = -width;
4369                }
4370                if (--fmtcnt >= 0)
4371                    c = *fmt++;
4372            }
4373            else if (c >= 0 && isdigit(c)) {
4374                width = c - '0';
4375                while (--fmtcnt >= 0) {
4376                    c = Py_CHARMASK(*fmt++);
4377                    if (!isdigit(c))
4378                        break;
4379                    if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
4380                        PyErr_SetString(
4381                            PyExc_ValueError,
4382                            "width too big");
4383                        goto error;
4384                    }
4385                    width = width*10 + (c - '0');
4386                }
4387            }
4388            if (c == '.') {
4389                prec = 0;
4390                if (--fmtcnt >= 0)
4391                    c = *fmt++;
4392                if (c == '*') {
4393                    v = getnextarg(args, arglen, &argidx);
4394                    if (v == NULL)
4395                        goto error;
4396                    if (!PyInt_Check(v)) {
4397                        PyErr_SetString(
4398                            PyExc_TypeError,
4399                            "* wants int");
4400                        goto error;
4401                    }
4402                    prec = _PyInt_AsInt(v);
4403                    if (prec == -1 && PyErr_Occurred())
4404                        goto error;
4405                    if (prec < 0)
4406                        prec = 0;
4407                    if (--fmtcnt >= 0)
4408                        c = *fmt++;
4409                }
4410                else if (c >= 0 && isdigit(c)) {
4411                    prec = c - '0';
4412                    while (--fmtcnt >= 0) {
4413                        c = Py_CHARMASK(*fmt++);
4414                        if (!isdigit(c))
4415                            break;
4416                        if (prec > (INT_MAX - ((int)c - '0')) / 10) {
4417                            PyErr_SetString(
4418                                PyExc_ValueError,
4419                                "prec too big");
4420                            goto error;
4421                        }
4422                        prec = prec*10 + (c - '0');
4423                    }
4424                }
4425            } /* prec */
4426            if (fmtcnt >= 0) {
4427                if (c == 'h' || c == 'l' || c == 'L') {
4428                    if (--fmtcnt >= 0)
4429                        c = *fmt++;
4430                }
4431            }
4432            if (fmtcnt < 0) {
4433                PyErr_SetString(PyExc_ValueError,
4434                                "incomplete format");
4435                goto error;
4436            }
4437            if (c != '%') {
4438                v = getnextarg(args, arglen, &argidx);
4439                if (v == NULL)
4440                    goto error;
4441            }
4442            sign = 0;
4443            fill = ' ';
4444            switch (c) {
4445            case '%':
4446                pbuf = "%";
4447                len = 1;
4448                break;
4449            case 's':
4450#ifdef Py_USING_UNICODE
4451                if (PyUnicode_Check(v)) {
4452                    fmt = fmt_start;
4453                    argidx = argidx_start;
4454                    goto unicode;
4455                }
4456#endif
4457                temp = _PyObject_Str(v);
4458#ifdef Py_USING_UNICODE
4459                if (temp != NULL && PyUnicode_Check(temp)) {
4460                    Py_DECREF(temp);
4461                    fmt = fmt_start;
4462                    argidx = argidx_start;
4463                    goto unicode;
4464                }
4465#endif
4466                /* Fall through */
4467            case 'r':
4468                if (c == 'r')
4469                    temp = PyObject_Repr(v);
4470                if (temp == NULL)
4471                    goto error;
4472                if (!PyString_Check(temp)) {
4473                    PyErr_SetString(PyExc_TypeError,
4474                      "%s argument has non-string str()");
4475                    Py_DECREF(temp);
4476                    goto error;
4477                }
4478                pbuf = PyString_AS_STRING(temp);
4479                len = PyString_GET_SIZE(temp);
4480                if (prec >= 0 && len > prec)
4481                    len = prec;
4482                break;
4483            case 'i':
4484            case 'd':
4485            case 'u':
4486            case 'o':
4487            case 'x':
4488            case 'X':
4489                if (c == 'i')
4490                    c = 'd';
4491                isnumok = 0;
4492                if (PyNumber_Check(v)) {
4493                    PyObject *iobj=NULL;
4494
4495                    if (PyInt_Check(v) || (PyLong_Check(v))) {
4496                        iobj = v;
4497                        Py_INCREF(iobj);
4498                    }
4499                    else {
4500                        iobj = PyNumber_Int(v);
4501                        if (iobj==NULL) {
4502                            PyErr_Clear();
4503                            iobj = PyNumber_Long(v);
4504                        }
4505                    }
4506                    if (iobj!=NULL) {
4507                        if (PyInt_Check(iobj)) {
4508                            isnumok = 1;
4509                            pbuf = formatbuf;
4510                            len = formatint(pbuf,
4511                                            sizeof(formatbuf),
4512                                            flags, prec, c, iobj);
4513                            Py_DECREF(iobj);
4514                            if (len < 0)
4515                                goto error;
4516                            sign = 1;
4517                        }
4518                        else if (PyLong_Check(iobj)) {
4519                            int ilen;
4520
4521                            isnumok = 1;
4522                            temp = _PyString_FormatLong(iobj, flags,
4523                                prec, c, &pbuf, &ilen);
4524                            Py_DECREF(iobj);
4525                            len = ilen;
4526                            if (!temp)
4527                                goto error;
4528                            sign = 1;
4529                        }
4530                        else {
4531                            Py_DECREF(iobj);
4532                        }
4533                    }
4534                }
4535                if (!isnumok) {
4536                    PyErr_Format(PyExc_TypeError,
4537                        "%%%c format: a number is required, "
4538                        "not %.200s", c, Py_TYPE(v)->tp_name);
4539                    goto error;
4540                }
4541                if (flags & F_ZERO)
4542                    fill = '0';
4543                break;
4544            case 'e':
4545            case 'E':
4546            case 'f':
4547            case 'F':
4548            case 'g':
4549            case 'G':
4550                temp = formatfloat(v, flags, prec, c);
4551                if (temp == NULL)
4552                    goto error;
4553                pbuf = PyString_AS_STRING(temp);
4554                len = PyString_GET_SIZE(temp);
4555                sign = 1;
4556                if (flags & F_ZERO)
4557                    fill = '0';
4558                break;
4559            case 'c':
4560#ifdef Py_USING_UNICODE
4561                if (PyUnicode_Check(v)) {
4562                    fmt = fmt_start;
4563                    argidx = argidx_start;
4564                    goto unicode;
4565                }
4566#endif
4567                pbuf = formatbuf;
4568                len = formatchar(pbuf, sizeof(formatbuf), v);
4569                if (len < 0)
4570                    goto error;
4571                break;
4572            default:
4573                PyErr_Format(PyExc_ValueError,
4574                  "unsupported format character '%c' (0x%x) "
4575                  "at index %zd",
4576                  c, c,
4577                  (Py_ssize_t)(fmt - 1 -
4578                               PyString_AsString(format)));
4579                goto error;
4580            }
4581            if (sign) {
4582                if (*pbuf == '-' || *pbuf == '+') {
4583                    sign = *pbuf++;
4584                    len--;
4585                }
4586                else if (flags & F_SIGN)
4587                    sign = '+';
4588                else if (flags & F_BLANK)
4589                    sign = ' ';
4590                else
4591                    sign = 0;
4592            }
4593            if (width < len)
4594                width = len;
4595            if (rescnt - (sign != 0) < width) {
4596                reslen -= rescnt;
4597                rescnt = width + fmtcnt + 100;
4598                reslen += rescnt;
4599                if (reslen < 0) {
4600                    Py_DECREF(result);
4601                    Py_XDECREF(temp);
4602                    return PyErr_NoMemory();
4603                }
4604                if (_PyString_Resize(&result, reslen)) {
4605                    Py_XDECREF(temp);
4606                    return NULL;
4607                }
4608                res = PyString_AS_STRING(result)
4609                    + reslen - rescnt;
4610            }
4611            if (sign) {
4612                if (fill != ' ')
4613                    *res++ = sign;
4614                rescnt--;
4615                if (width > len)
4616                    width--;
4617            }
4618            if ((flags & F_ALT) && (c == 'x' || c == 'X')) {
4619                assert(pbuf[0] == '0');
4620                assert(pbuf[1] == c);
4621                if (fill != ' ') {
4622                    *res++ = *pbuf++;
4623                    *res++ = *pbuf++;
4624                }
4625                rescnt -= 2;
4626                width -= 2;
4627                if (width < 0)
4628                    width = 0;
4629                len -= 2;
4630            }
4631            if (width > len && !(flags & F_LJUST)) {
4632                do {
4633                    --rescnt;
4634                    *res++ = fill;
4635                } while (--width > len);
4636            }
4637            if (fill == ' ') {
4638                if (sign)
4639                    *res++ = sign;
4640                if ((flags & F_ALT) &&
4641                    (c == 'x' || c == 'X')) {
4642                    assert(pbuf[0] == '0');
4643                    assert(pbuf[1] == c);
4644                    *res++ = *pbuf++;
4645                    *res++ = *pbuf++;
4646                }
4647            }
4648            Py_MEMCPY(res, pbuf, len);
4649            res += len;
4650            rescnt -= len;
4651            while (--width >= len) {
4652                --rescnt;
4653                *res++ = ' ';
4654            }
4655            if (dict && (argidx < arglen) && c != '%') {
4656                PyErr_SetString(PyExc_TypeError,
4657                           "not all arguments converted during string formatting");
4658                Py_XDECREF(temp);
4659                goto error;
4660            }
4661            Py_XDECREF(temp);
4662        } /* '%' */
4663    } /* until end */
4664    if (argidx < arglen && !dict) {
4665        PyErr_SetString(PyExc_TypeError,
4666                        "not all arguments converted during string formatting");
4667        goto error;
4668    }
4669    if (args_owned) {
4670        Py_DECREF(args);
4671    }
4672    if (_PyString_Resize(&result, reslen - rescnt))
4673        return NULL;
4674    return result;
4675
4676#ifdef Py_USING_UNICODE
4677 unicode:
4678    if (args_owned) {
4679        Py_DECREF(args);
4680        args_owned = 0;
4681    }
4682    /* Fiddle args right (remove the first argidx arguments) */
4683    if (PyTuple_Check(orig_args) && argidx > 0) {
4684        PyObject *v;
4685        Py_ssize_t n = PyTuple_GET_SIZE(orig_args) - argidx;
4686        v = PyTuple_New(n);
4687        if (v == NULL)
4688            goto error;
4689        while (--n >= 0) {
4690            PyObject *w = PyTuple_GET_ITEM(orig_args, n + argidx);
4691            Py_INCREF(w);
4692            PyTuple_SET_ITEM(v, n, w);
4693        }
4694        args = v;
4695    } else {
4696        Py_INCREF(orig_args);
4697        args = orig_args;
4698    }
4699    args_owned = 1;
4700    /* Take what we have of the result and let the Unicode formatting
4701       function format the rest of the input. */
4702    rescnt = res - PyString_AS_STRING(result);
4703    if (_PyString_Resize(&result, rescnt))
4704        goto error;
4705    fmtcnt = PyString_GET_SIZE(format) - \
4706             (fmt - PyString_AS_STRING(format));
4707    format = PyUnicode_Decode(fmt, fmtcnt, NULL, NULL);
4708    if (format == NULL)
4709        goto error;
4710    v = PyUnicode_Format(format, args);
4711    Py_DECREF(format);
4712    if (v == NULL)
4713        goto error;
4714    /* Paste what we have (result) to what the Unicode formatting
4715       function returned (v) and return the result (or error) */
4716    w = PyUnicode_Concat(result, v);
4717    Py_DECREF(result);
4718    Py_DECREF(v);
4719    Py_DECREF(args);
4720    return w;
4721#endif /* Py_USING_UNICODE */
4722
4723 error:
4724    Py_DECREF(result);
4725    if (args_owned) {
4726        Py_DECREF(args);
4727    }
4728    return NULL;
4729}
4730
4731void
4732PyString_InternInPlace(PyObject **p)
4733{
4734    register PyStringObject *s = (PyStringObject *)(*p);
4735    PyObject *t;
4736    if (s == NULL || !PyString_Check(s))
4737        Py_FatalError("PyString_InternInPlace: strings only please!");
4738    /* If it's a string subclass, we don't really know what putting
4739       it in the interned dict might do. */
4740    if (!PyString_CheckExact(s))
4741        return;
4742    if (PyString_CHECK_INTERNED(s))
4743        return;
4744    if (interned == NULL) {
4745        interned = PyDict_New();
4746        if (interned == NULL) {
4747            PyErr_Clear(); /* Don't leave an exception */
4748            return;
4749        }
4750    }
4751    t = PyDict_GetItem(interned, (PyObject *)s);
4752    if (t) {
4753        Py_INCREF(t);
4754        Py_DECREF(*p);
4755        *p = t;
4756        return;
4757    }
4758
4759    if (PyDict_SetItem(interned, (PyObject *)s, (PyObject *)s) < 0) {
4760        PyErr_Clear();
4761        return;
4762    }
4763    /* The two references in interned are not counted by refcnt.
4764       The string deallocator will take care of this */
4765    Py_REFCNT(s) -= 2;
4766    PyString_CHECK_INTERNED(s) = SSTATE_INTERNED_MORTAL;
4767}
4768
4769void
4770PyString_InternImmortal(PyObject **p)
4771{
4772    PyString_InternInPlace(p);
4773    if (PyString_CHECK_INTERNED(*p) != SSTATE_INTERNED_IMMORTAL) {
4774        PyString_CHECK_INTERNED(*p) = SSTATE_INTERNED_IMMORTAL;
4775        Py_INCREF(*p);
4776    }
4777}
4778
4779
4780PyObject *
4781PyString_InternFromString(const char *cp)
4782{
4783    PyObject *s = PyString_FromString(cp);
4784    if (s == NULL)
4785        return NULL;
4786    PyString_InternInPlace(&s);
4787    return s;
4788}
4789
4790void
4791PyString_Fini(void)
4792{
4793    int i;
4794    for (i = 0; i < UCHAR_MAX + 1; i++)
4795        Py_CLEAR(characters[i]);
4796    Py_CLEAR(nullstring);
4797}
4798
4799void _Py_ReleaseInternedStrings(void)
4800{
4801    PyObject *keys;
4802    PyStringObject *s;
4803    Py_ssize_t i, n;
4804    Py_ssize_t immortal_size = 0, mortal_size = 0;
4805
4806    if (interned == NULL || !PyDict_Check(interned))
4807        return;
4808    keys = PyDict_Keys(interned);
4809    if (keys == NULL || !PyList_Check(keys)) {
4810        PyErr_Clear();
4811        return;
4812    }
4813
4814    /* Since _Py_ReleaseInternedStrings() is intended to help a leak
4815       detector, interned strings are not forcibly deallocated; rather, we
4816       give them their stolen references back, and then clear and DECREF
4817       the interned dict. */
4818
4819    n = PyList_GET_SIZE(keys);
4820    fprintf(stderr, "releasing %" PY_FORMAT_SIZE_T "d interned strings\n",
4821        n);
4822    for (i = 0; i < n; i++) {
4823        s = (PyStringObject *) PyList_GET_ITEM(keys, i);
4824        switch (s->ob_sstate) {
4825        case SSTATE_NOT_INTERNED:
4826            /* XXX Shouldn't happen */
4827            break;
4828        case SSTATE_INTERNED_IMMORTAL:
4829            Py_REFCNT(s) += 1;
4830            immortal_size += Py_SIZE(s);
4831            break;
4832        case SSTATE_INTERNED_MORTAL:
4833            Py_REFCNT(s) += 2;
4834            mortal_size += Py_SIZE(s);
4835            break;
4836        default:
4837            Py_FatalError("Inconsistent interned string state.");
4838        }
4839        s->ob_sstate = SSTATE_NOT_INTERNED;
4840    }
4841    fprintf(stderr, "total size of all interned strings: "
4842                    "%" PY_FORMAT_SIZE_T "d/%" PY_FORMAT_SIZE_T "d "
4843                    "mortal/immortal\n", mortal_size, immortal_size);
4844    Py_DECREF(keys);
4845    PyDict_Clear(interned);
4846    Py_CLEAR(interned);
4847}
4848