1/* bytes object implementation */
2
3#define PY_SSIZE_T_CLEAN
4
5#include "Python.h"
6
7#include "bytes_methods.h"
8#include "pystrhex.h"
9#include <stddef.h>
10
11/*[clinic input]
12class bytes "PyBytesObject *" "&PyBytes_Type"
13[clinic start generated code]*/
14/*[clinic end generated code: output=da39a3ee5e6b4b0d input=7a238f965d64892b]*/
15
16#include "clinic/bytesobject.c.h"
17
18#ifdef COUNT_ALLOCS
19Py_ssize_t null_strings, one_strings;
20#endif
21
22static PyBytesObject *characters[UCHAR_MAX + 1];
23static PyBytesObject *nullstring;
24
25/* PyBytesObject_SIZE gives the basic size of a string; any memory allocation
26   for a string of length n should request PyBytesObject_SIZE + n bytes.
27
28   Using PyBytesObject_SIZE instead of sizeof(PyBytesObject) saves
29   3 bytes per string allocation on a typical system.
30*/
31#define PyBytesObject_SIZE (offsetof(PyBytesObject, ob_sval) + 1)
32
33/* Forward declaration */
34Py_LOCAL_INLINE(Py_ssize_t) _PyBytesWriter_GetSize(_PyBytesWriter *writer,
35                                                   char *str);
36
37/*
38   For PyBytes_FromString(), the parameter `str' points to a null-terminated
39   string containing exactly `size' bytes.
40
41   For PyBytes_FromStringAndSize(), the parameter `str' is
42   either NULL or else points to a string containing at least `size' bytes.
43   For PyBytes_FromStringAndSize(), the string in the `str' parameter does
44   not have to be null-terminated.  (Therefore it is safe to construct a
45   substring by calling `PyBytes_FromStringAndSize(origstring, substrlen)'.)
46   If `str' is NULL then PyBytes_FromStringAndSize() will allocate `size+1'
47   bytes (setting the last byte to the null terminating character) and you can
48   fill in the data yourself.  If `str' is non-NULL then the resulting
49   PyBytes object must be treated as immutable and you must not fill in nor
50   alter the data yourself, since the strings may be shared.
51
52   The PyObject member `op->ob_size', which denotes the number of "extra
53   items" in a variable-size object, will contain the number of bytes
54   allocated for string data, not counting the null terminating character.
55   It is therefore equal to the `size' parameter (for
56   PyBytes_FromStringAndSize()) or the length of the string in the `str'
57   parameter (for PyBytes_FromString()).
58*/
59static PyObject *
60_PyBytes_FromSize(Py_ssize_t size, int use_calloc)
61{
62    PyBytesObject *op;
63    assert(size >= 0);
64
65    if (size == 0 && (op = nullstring) != NULL) {
66#ifdef COUNT_ALLOCS
67        null_strings++;
68#endif
69        Py_INCREF(op);
70        return (PyObject *)op;
71    }
72
73    if ((size_t)size > (size_t)PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
74        PyErr_SetString(PyExc_OverflowError,
75                        "byte string is too large");
76        return NULL;
77    }
78
79    /* Inline PyObject_NewVar */
80    if (use_calloc)
81        op = (PyBytesObject *)PyObject_Calloc(1, PyBytesObject_SIZE + size);
82    else
83        op = (PyBytesObject *)PyObject_Malloc(PyBytesObject_SIZE + size);
84    if (op == NULL)
85        return PyErr_NoMemory();
86    (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
87    op->ob_shash = -1;
88    if (!use_calloc)
89        op->ob_sval[size] = '\0';
90    /* empty byte string singleton */
91    if (size == 0) {
92        nullstring = op;
93        Py_INCREF(op);
94    }
95    return (PyObject *) op;
96}
97
98PyObject *
99PyBytes_FromStringAndSize(const char *str, Py_ssize_t size)
100{
101    PyBytesObject *op;
102    if (size < 0) {
103        PyErr_SetString(PyExc_SystemError,
104            "Negative size passed to PyBytes_FromStringAndSize");
105        return NULL;
106    }
107    if (size == 1 && str != NULL &&
108        (op = characters[*str & UCHAR_MAX]) != NULL)
109    {
110#ifdef COUNT_ALLOCS
111        one_strings++;
112#endif
113        Py_INCREF(op);
114        return (PyObject *)op;
115    }
116
117    op = (PyBytesObject *)_PyBytes_FromSize(size, 0);
118    if (op == NULL)
119        return NULL;
120    if (str == NULL)
121        return (PyObject *) op;
122
123    memcpy(op->ob_sval, str, size);
124    /* share short strings */
125    if (size == 1) {
126        characters[*str & UCHAR_MAX] = op;
127        Py_INCREF(op);
128    }
129    return (PyObject *) op;
130}
131
132PyObject *
133PyBytes_FromString(const char *str)
134{
135    size_t size;
136    PyBytesObject *op;
137
138    assert(str != NULL);
139    size = strlen(str);
140    if (size > PY_SSIZE_T_MAX - PyBytesObject_SIZE) {
141        PyErr_SetString(PyExc_OverflowError,
142            "byte string is too long");
143        return NULL;
144    }
145    if (size == 0 && (op = nullstring) != NULL) {
146#ifdef COUNT_ALLOCS
147        null_strings++;
148#endif
149        Py_INCREF(op);
150        return (PyObject *)op;
151    }
152    if (size == 1 && (op = characters[*str & UCHAR_MAX]) != NULL) {
153#ifdef COUNT_ALLOCS
154        one_strings++;
155#endif
156        Py_INCREF(op);
157        return (PyObject *)op;
158    }
159
160    /* Inline PyObject_NewVar */
161    op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + size);
162    if (op == NULL)
163        return PyErr_NoMemory();
164    (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
165    op->ob_shash = -1;
166    memcpy(op->ob_sval, str, size+1);
167    /* share short strings */
168    if (size == 0) {
169        nullstring = op;
170        Py_INCREF(op);
171    } else if (size == 1) {
172        characters[*str & UCHAR_MAX] = op;
173        Py_INCREF(op);
174    }
175    return (PyObject *) op;
176}
177
178PyObject *
179PyBytes_FromFormatV(const char *format, va_list vargs)
180{
181    char *s;
182    const char *f;
183    const char *p;
184    Py_ssize_t prec;
185    int longflag;
186    int size_tflag;
187    /* Longest 64-bit formatted numbers:
188       - "18446744073709551615\0" (21 bytes)
189       - "-9223372036854775808\0" (21 bytes)
190       Decimal takes the most space (it isn't enough for octal.)
191
192       Longest 64-bit pointer representation:
193       "0xffffffffffffffff\0" (19 bytes). */
194    char buffer[21];
195    _PyBytesWriter writer;
196
197    _PyBytesWriter_Init(&writer);
198
199    s = _PyBytesWriter_Alloc(&writer, strlen(format));
200    if (s == NULL)
201        return NULL;
202    writer.overallocate = 1;
203
204#define WRITE_BYTES(str) \
205    do { \
206        s = _PyBytesWriter_WriteBytes(&writer, s, (str), strlen(str)); \
207        if (s == NULL) \
208            goto error; \
209    } while (0)
210
211    for (f = format; *f; f++) {
212        if (*f != '%') {
213            *s++ = *f;
214            continue;
215        }
216
217        p = f++;
218
219        /* ignore the width (ex: 10 in "%10s") */
220        while (Py_ISDIGIT(*f))
221            f++;
222
223        /* parse the precision (ex: 10 in "%.10s") */
224        prec = 0;
225        if (*f == '.') {
226            f++;
227            for (; Py_ISDIGIT(*f); f++) {
228                prec = (prec * 10) + (*f - '0');
229            }
230        }
231
232        while (*f && *f != '%' && !Py_ISALPHA(*f))
233            f++;
234
235        /* handle the long flag ('l'), but only for %ld and %lu.
236           others can be added when necessary. */
237        longflag = 0;
238        if (*f == 'l' && (f[1] == 'd' || f[1] == 'u')) {
239            longflag = 1;
240            ++f;
241        }
242
243        /* handle the size_t flag ('z'). */
244        size_tflag = 0;
245        if (*f == 'z' && (f[1] == 'd' || f[1] == 'u')) {
246            size_tflag = 1;
247            ++f;
248        }
249
250        /* subtract bytes preallocated for the format string
251           (ex: 2 for "%s") */
252        writer.min_size -= (f - p + 1);
253
254        switch (*f) {
255        case 'c':
256        {
257            int c = va_arg(vargs, int);
258            if (c < 0 || c > 255) {
259                PyErr_SetString(PyExc_OverflowError,
260                                "PyBytes_FromFormatV(): %c format "
261                                "expects an integer in range [0; 255]");
262                goto error;
263            }
264            writer.min_size++;
265            *s++ = (unsigned char)c;
266            break;
267        }
268
269        case 'd':
270            if (longflag)
271                sprintf(buffer, "%ld", va_arg(vargs, long));
272            else if (size_tflag)
273                sprintf(buffer, "%" PY_FORMAT_SIZE_T "d",
274                    va_arg(vargs, Py_ssize_t));
275            else
276                sprintf(buffer, "%d", va_arg(vargs, int));
277            assert(strlen(buffer) < sizeof(buffer));
278            WRITE_BYTES(buffer);
279            break;
280
281        case 'u':
282            if (longflag)
283                sprintf(buffer, "%lu",
284                    va_arg(vargs, unsigned long));
285            else if (size_tflag)
286                sprintf(buffer, "%" PY_FORMAT_SIZE_T "u",
287                    va_arg(vargs, size_t));
288            else
289                sprintf(buffer, "%u",
290                    va_arg(vargs, unsigned int));
291            assert(strlen(buffer) < sizeof(buffer));
292            WRITE_BYTES(buffer);
293            break;
294
295        case 'i':
296            sprintf(buffer, "%i", va_arg(vargs, int));
297            assert(strlen(buffer) < sizeof(buffer));
298            WRITE_BYTES(buffer);
299            break;
300
301        case 'x':
302            sprintf(buffer, "%x", va_arg(vargs, int));
303            assert(strlen(buffer) < sizeof(buffer));
304            WRITE_BYTES(buffer);
305            break;
306
307        case 's':
308        {
309            Py_ssize_t i;
310
311            p = va_arg(vargs, const char*);
312            i = strlen(p);
313            if (prec > 0 && i > prec)
314                i = prec;
315            s = _PyBytesWriter_WriteBytes(&writer, s, p, i);
316            if (s == NULL)
317                goto error;
318            break;
319        }
320
321        case 'p':
322            sprintf(buffer, "%p", va_arg(vargs, void*));
323            assert(strlen(buffer) < sizeof(buffer));
324            /* %p is ill-defined:  ensure leading 0x. */
325            if (buffer[1] == 'X')
326                buffer[1] = 'x';
327            else if (buffer[1] != 'x') {
328                memmove(buffer+2, buffer, strlen(buffer)+1);
329                buffer[0] = '0';
330                buffer[1] = 'x';
331            }
332            WRITE_BYTES(buffer);
333            break;
334
335        case '%':
336            writer.min_size++;
337            *s++ = '%';
338            break;
339
340        default:
341            if (*f == 0) {
342                /* fix min_size if we reached the end of the format string */
343                writer.min_size++;
344            }
345
346            /* invalid format string: copy unformatted string and exit */
347            WRITE_BYTES(p);
348            return _PyBytesWriter_Finish(&writer, s);
349        }
350    }
351
352#undef WRITE_BYTES
353
354    return _PyBytesWriter_Finish(&writer, s);
355
356 error:
357    _PyBytesWriter_Dealloc(&writer);
358    return NULL;
359}
360
361PyObject *
362PyBytes_FromFormat(const char *format, ...)
363{
364    PyObject* ret;
365    va_list vargs;
366
367#ifdef HAVE_STDARG_PROTOTYPES
368    va_start(vargs, format);
369#else
370    va_start(vargs);
371#endif
372    ret = PyBytes_FromFormatV(format, vargs);
373    va_end(vargs);
374    return ret;
375}
376
377/* Helpers for formatstring */
378
379Py_LOCAL_INLINE(PyObject *)
380getnextarg(PyObject *args, Py_ssize_t arglen, Py_ssize_t *p_argidx)
381{
382    Py_ssize_t argidx = *p_argidx;
383    if (argidx < arglen) {
384        (*p_argidx)++;
385        if (arglen < 0)
386            return args;
387        else
388            return PyTuple_GetItem(args, argidx);
389    }
390    PyErr_SetString(PyExc_TypeError,
391                    "not enough arguments for format string");
392    return NULL;
393}
394
395/* Format codes
396 * F_LJUST      '-'
397 * F_SIGN       '+'
398 * F_BLANK      ' '
399 * F_ALT        '#'
400 * F_ZERO       '0'
401 */
402#define F_LJUST (1<<0)
403#define F_SIGN  (1<<1)
404#define F_BLANK (1<<2)
405#define F_ALT   (1<<3)
406#define F_ZERO  (1<<4)
407
408/* Returns a new reference to a PyBytes object, or NULL on failure. */
409
410static char*
411formatfloat(PyObject *v, int flags, int prec, int type,
412            PyObject **p_result, _PyBytesWriter *writer, char *str)
413{
414    char *p;
415    PyObject *result;
416    double x;
417    size_t len;
418
419    x = PyFloat_AsDouble(v);
420    if (x == -1.0 && PyErr_Occurred()) {
421        PyErr_Format(PyExc_TypeError, "float argument required, "
422                     "not %.200s", Py_TYPE(v)->tp_name);
423        return NULL;
424    }
425
426    if (prec < 0)
427        prec = 6;
428
429    p = PyOS_double_to_string(x, type, prec,
430                              (flags & F_ALT) ? Py_DTSF_ALT : 0, NULL);
431
432    if (p == NULL)
433        return NULL;
434
435    len = strlen(p);
436    if (writer != NULL) {
437        str = _PyBytesWriter_Prepare(writer, str, len);
438        if (str == NULL)
439            return NULL;
440        memcpy(str, p, len);
441        PyMem_Free(p);
442        str += len;
443        return str;
444    }
445
446    result = PyBytes_FromStringAndSize(p, len);
447    PyMem_Free(p);
448    *p_result = result;
449    return str;
450}
451
452static PyObject *
453formatlong(PyObject *v, int flags, int prec, int type)
454{
455    PyObject *result, *iobj;
456    if (type == 'i')
457        type = 'd';
458    if (PyLong_Check(v))
459        return _PyUnicode_FormatLong(v, flags & F_ALT, prec, type);
460    if (PyNumber_Check(v)) {
461        /* make sure number is a type of integer for o, x, and X */
462        if (type == 'o' || type == 'x' || type == 'X')
463            iobj = PyNumber_Index(v);
464        else
465            iobj = PyNumber_Long(v);
466        if (iobj == NULL) {
467            if (!PyErr_ExceptionMatches(PyExc_TypeError))
468                return NULL;
469        }
470        else if (!PyLong_Check(iobj))
471            Py_CLEAR(iobj);
472        if (iobj != NULL) {
473            result = _PyUnicode_FormatLong(iobj, flags & F_ALT, prec, type);
474            Py_DECREF(iobj);
475            return result;
476        }
477    }
478    PyErr_Format(PyExc_TypeError,
479        "%%%c format: %s is required, not %.200s", type,
480        (type == 'o' || type == 'x' || type == 'X') ? "an integer"
481                                                    : "a number",
482        Py_TYPE(v)->tp_name);
483    return NULL;
484}
485
486static int
487byte_converter(PyObject *arg, char *p)
488{
489    if (PyBytes_Check(arg) && PyBytes_GET_SIZE(arg) == 1) {
490        *p = PyBytes_AS_STRING(arg)[0];
491        return 1;
492    }
493    else if (PyByteArray_Check(arg) && PyByteArray_GET_SIZE(arg) == 1) {
494        *p = PyByteArray_AS_STRING(arg)[0];
495        return 1;
496    }
497    else {
498        PyObject *iobj;
499        long ival;
500        int overflow;
501        /* make sure number is a type of integer */
502        if (PyLong_Check(arg)) {
503            ival = PyLong_AsLongAndOverflow(arg, &overflow);
504        }
505        else {
506            iobj = PyNumber_Index(arg);
507            if (iobj == NULL) {
508                if (!PyErr_ExceptionMatches(PyExc_TypeError))
509                    return 0;
510                goto onError;
511            }
512            ival = PyLong_AsLongAndOverflow(iobj, &overflow);
513            Py_DECREF(iobj);
514        }
515        if (!overflow && ival == -1 && PyErr_Occurred())
516            goto onError;
517        if (overflow || !(0 <= ival && ival <= 255)) {
518            PyErr_SetString(PyExc_OverflowError,
519                            "%c arg not in range(256)");
520            return 0;
521        }
522        *p = (char)ival;
523        return 1;
524    }
525  onError:
526    PyErr_SetString(PyExc_TypeError,
527        "%c requires an integer in range(256) or a single byte");
528    return 0;
529}
530
531static PyObject *
532format_obj(PyObject *v, const char **pbuf, Py_ssize_t *plen)
533{
534    PyObject *func, *result;
535    _Py_IDENTIFIER(__bytes__);
536    /* is it a bytes object? */
537    if (PyBytes_Check(v)) {
538        *pbuf = PyBytes_AS_STRING(v);
539        *plen = PyBytes_GET_SIZE(v);
540        Py_INCREF(v);
541        return v;
542    }
543    if (PyByteArray_Check(v)) {
544        *pbuf = PyByteArray_AS_STRING(v);
545        *plen = PyByteArray_GET_SIZE(v);
546        Py_INCREF(v);
547        return v;
548    }
549    /* does it support __bytes__? */
550    func = _PyObject_LookupSpecial(v, &PyId___bytes__);
551    if (func != NULL) {
552        result = PyObject_CallFunctionObjArgs(func, NULL);
553        Py_DECREF(func);
554        if (result == NULL)
555            return NULL;
556        if (!PyBytes_Check(result)) {
557            PyErr_Format(PyExc_TypeError,
558                         "__bytes__ returned non-bytes (type %.200s)",
559                         Py_TYPE(result)->tp_name);
560            Py_DECREF(result);
561            return NULL;
562        }
563        *pbuf = PyBytes_AS_STRING(result);
564        *plen = PyBytes_GET_SIZE(result);
565        return result;
566    }
567    PyErr_Format(PyExc_TypeError,
568                 "%%b requires bytes, or an object that implements __bytes__, not '%.100s'",
569                 Py_TYPE(v)->tp_name);
570    return NULL;
571}
572
573/* fmt%(v1,v2,...) is roughly equivalent to sprintf(fmt, v1, v2, ...) */
574
575PyObject *
576_PyBytes_FormatEx(const char *format, Py_ssize_t format_len,
577                  PyObject *args, int use_bytearray)
578{
579    const char *fmt;
580    char *res;
581    Py_ssize_t arglen, argidx;
582    Py_ssize_t fmtcnt;
583    int args_owned = 0;
584    PyObject *dict = NULL;
585    _PyBytesWriter writer;
586
587    if (args == NULL) {
588        PyErr_BadInternalCall();
589        return NULL;
590    }
591    fmt = format;
592    fmtcnt = format_len;
593
594    _PyBytesWriter_Init(&writer);
595    writer.use_bytearray = use_bytearray;
596
597    res = _PyBytesWriter_Alloc(&writer, fmtcnt);
598    if (res == NULL)
599        return NULL;
600    if (!use_bytearray)
601        writer.overallocate = 1;
602
603    if (PyTuple_Check(args)) {
604        arglen = PyTuple_GET_SIZE(args);
605        argidx = 0;
606    }
607    else {
608        arglen = -1;
609        argidx = -2;
610    }
611    if (Py_TYPE(args)->tp_as_mapping && Py_TYPE(args)->tp_as_mapping->mp_subscript &&
612        !PyTuple_Check(args) && !PyBytes_Check(args) && !PyUnicode_Check(args) &&
613        !PyByteArray_Check(args)) {
614            dict = args;
615    }
616
617    while (--fmtcnt >= 0) {
618        if (*fmt != '%') {
619            Py_ssize_t len;
620            char *pos;
621
622            pos = strchr(fmt + 1, '%');
623            if (pos != NULL)
624                len = pos - fmt;
625            else
626                len = format_len - (fmt - format);
627            assert(len != 0);
628
629            memcpy(res, fmt, len);
630            res += len;
631            fmt += len;
632            fmtcnt -= (len - 1);
633        }
634        else {
635            /* Got a format specifier */
636            int flags = 0;
637            Py_ssize_t width = -1;
638            int prec = -1;
639            int c = '\0';
640            int fill;
641            PyObject *v = NULL;
642            PyObject *temp = NULL;
643            const char *pbuf = NULL;
644            int sign;
645            Py_ssize_t len = 0;
646            char onechar; /* For byte_converter() */
647            Py_ssize_t alloc;
648#ifdef Py_DEBUG
649            char *before;
650#endif
651
652            fmt++;
653            if (*fmt == '(') {
654                const char *keystart;
655                Py_ssize_t keylen;
656                PyObject *key;
657                int pcount = 1;
658
659                if (dict == NULL) {
660                    PyErr_SetString(PyExc_TypeError,
661                             "format requires a mapping");
662                    goto error;
663                }
664                ++fmt;
665                --fmtcnt;
666                keystart = fmt;
667                /* Skip over balanced parentheses */
668                while (pcount > 0 && --fmtcnt >= 0) {
669                    if (*fmt == ')')
670                        --pcount;
671                    else if (*fmt == '(')
672                        ++pcount;
673                    fmt++;
674                }
675                keylen = fmt - keystart - 1;
676                if (fmtcnt < 0 || pcount > 0) {
677                    PyErr_SetString(PyExc_ValueError,
678                               "incomplete format key");
679                    goto error;
680                }
681                key = PyBytes_FromStringAndSize(keystart,
682                                                 keylen);
683                if (key == NULL)
684                    goto error;
685                if (args_owned) {
686                    Py_DECREF(args);
687                    args_owned = 0;
688                }
689                args = PyObject_GetItem(dict, key);
690                Py_DECREF(key);
691                if (args == NULL) {
692                    goto error;
693                }
694                args_owned = 1;
695                arglen = -1;
696                argidx = -2;
697            }
698
699            /* Parse flags. Example: "%+i" => flags=F_SIGN. */
700            while (--fmtcnt >= 0) {
701                switch (c = *fmt++) {
702                case '-': flags |= F_LJUST; continue;
703                case '+': flags |= F_SIGN; continue;
704                case ' ': flags |= F_BLANK; continue;
705                case '#': flags |= F_ALT; continue;
706                case '0': flags |= F_ZERO; continue;
707                }
708                break;
709            }
710
711            /* Parse width. Example: "%10s" => width=10 */
712            if (c == '*') {
713                v = getnextarg(args, arglen, &argidx);
714                if (v == NULL)
715                    goto error;
716                if (!PyLong_Check(v)) {
717                    PyErr_SetString(PyExc_TypeError,
718                                    "* wants int");
719                    goto error;
720                }
721                width = PyLong_AsSsize_t(v);
722                if (width == -1 && PyErr_Occurred())
723                    goto error;
724                if (width < 0) {
725                    flags |= F_LJUST;
726                    width = -width;
727                }
728                if (--fmtcnt >= 0)
729                    c = *fmt++;
730            }
731            else if (c >= 0 && isdigit(c)) {
732                width = c - '0';
733                while (--fmtcnt >= 0) {
734                    c = Py_CHARMASK(*fmt++);
735                    if (!isdigit(c))
736                        break;
737                    if (width > (PY_SSIZE_T_MAX - ((int)c - '0')) / 10) {
738                        PyErr_SetString(
739                            PyExc_ValueError,
740                            "width too big");
741                        goto error;
742                    }
743                    width = width*10 + (c - '0');
744                }
745            }
746
747            /* Parse precision. Example: "%.3f" => prec=3 */
748            if (c == '.') {
749                prec = 0;
750                if (--fmtcnt >= 0)
751                    c = *fmt++;
752                if (c == '*') {
753                    v = getnextarg(args, arglen, &argidx);
754                    if (v == NULL)
755                        goto error;
756                    if (!PyLong_Check(v)) {
757                        PyErr_SetString(
758                            PyExc_TypeError,
759                            "* wants int");
760                        goto error;
761                    }
762                    prec = _PyLong_AsInt(v);
763                    if (prec == -1 && PyErr_Occurred())
764                        goto error;
765                    if (prec < 0)
766                        prec = 0;
767                    if (--fmtcnt >= 0)
768                        c = *fmt++;
769                }
770                else if (c >= 0 && isdigit(c)) {
771                    prec = c - '0';
772                    while (--fmtcnt >= 0) {
773                        c = Py_CHARMASK(*fmt++);
774                        if (!isdigit(c))
775                            break;
776                        if (prec > (INT_MAX - ((int)c - '0')) / 10) {
777                            PyErr_SetString(
778                                PyExc_ValueError,
779                                "prec too big");
780                            goto error;
781                        }
782                        prec = prec*10 + (c - '0');
783                    }
784                }
785            } /* prec */
786            if (fmtcnt >= 0) {
787                if (c == 'h' || c == 'l' || c == 'L') {
788                    if (--fmtcnt >= 0)
789                        c = *fmt++;
790                }
791            }
792            if (fmtcnt < 0) {
793                PyErr_SetString(PyExc_ValueError,
794                                "incomplete format");
795                goto error;
796            }
797            if (c != '%') {
798                v = getnextarg(args, arglen, &argidx);
799                if (v == NULL)
800                    goto error;
801            }
802
803            if (fmtcnt < 0) {
804                /* last writer: disable writer overallocation */
805                writer.overallocate = 0;
806            }
807
808            sign = 0;
809            fill = ' ';
810            switch (c) {
811            case '%':
812                *res++ = '%';
813                continue;
814
815            case 'r':
816                // %r is only for 2/3 code; 3 only code should use %a
817            case 'a':
818                temp = PyObject_ASCII(v);
819                if (temp == NULL)
820                    goto error;
821                assert(PyUnicode_IS_ASCII(temp));
822                pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
823                len = PyUnicode_GET_LENGTH(temp);
824                if (prec >= 0 && len > prec)
825                    len = prec;
826                break;
827
828            case 's':
829                // %s is only for 2/3 code; 3 only code should use %b
830            case 'b':
831                temp = format_obj(v, &pbuf, &len);
832                if (temp == NULL)
833                    goto error;
834                if (prec >= 0 && len > prec)
835                    len = prec;
836                break;
837
838            case 'i':
839            case 'd':
840            case 'u':
841            case 'o':
842            case 'x':
843            case 'X':
844                if (PyLong_CheckExact(v)
845                    && width == -1 && prec == -1
846                    && !(flags & (F_SIGN | F_BLANK))
847                    && c != 'X')
848                {
849                    /* Fast path */
850                    int alternate = flags & F_ALT;
851                    int base;
852
853                    switch(c)
854                    {
855                        default:
856                            assert(0 && "'type' not in [diuoxX]");
857                        case 'd':
858                        case 'i':
859                        case 'u':
860                            base = 10;
861                            break;
862                        case 'o':
863                            base = 8;
864                            break;
865                        case 'x':
866                        case 'X':
867                            base = 16;
868                            break;
869                    }
870
871                    /* Fast path */
872                    writer.min_size -= 2; /* size preallocated for "%d" */
873                    res = _PyLong_FormatBytesWriter(&writer, res,
874                                                    v, base, alternate);
875                    if (res == NULL)
876                        goto error;
877                    continue;
878                }
879
880                temp = formatlong(v, flags, prec, c);
881                if (!temp)
882                    goto error;
883                assert(PyUnicode_IS_ASCII(temp));
884                pbuf = (const char *)PyUnicode_1BYTE_DATA(temp);
885                len = PyUnicode_GET_LENGTH(temp);
886                sign = 1;
887                if (flags & F_ZERO)
888                    fill = '0';
889                break;
890
891            case 'e':
892            case 'E':
893            case 'f':
894            case 'F':
895            case 'g':
896            case 'G':
897                if (width == -1 && prec == -1
898                    && !(flags & (F_SIGN | F_BLANK)))
899                {
900                    /* Fast path */
901                    writer.min_size -= 2; /* size preallocated for "%f" */
902                    res = formatfloat(v, flags, prec, c, NULL, &writer, res);
903                    if (res == NULL)
904                        goto error;
905                    continue;
906                }
907
908                if (!formatfloat(v, flags, prec, c, &temp, NULL, res))
909                    goto error;
910                pbuf = PyBytes_AS_STRING(temp);
911                len = PyBytes_GET_SIZE(temp);
912                sign = 1;
913                if (flags & F_ZERO)
914                    fill = '0';
915                break;
916
917            case 'c':
918                pbuf = &onechar;
919                len = byte_converter(v, &onechar);
920                if (!len)
921                    goto error;
922                if (width == -1) {
923                    /* Fast path */
924                    *res++ = onechar;
925                    continue;
926                }
927                break;
928
929            default:
930                PyErr_Format(PyExc_ValueError,
931                  "unsupported format character '%c' (0x%x) "
932                  "at index %zd",
933                  c, c,
934                  (Py_ssize_t)(fmt - 1 - format));
935                goto error;
936            }
937
938            if (sign) {
939                if (*pbuf == '-' || *pbuf == '+') {
940                    sign = *pbuf++;
941                    len--;
942                }
943                else if (flags & F_SIGN)
944                    sign = '+';
945                else if (flags & F_BLANK)
946                    sign = ' ';
947                else
948                    sign = 0;
949            }
950            if (width < len)
951                width = len;
952
953            alloc = width;
954            if (sign != 0 && len == width)
955                alloc++;
956            /* 2: size preallocated for %s */
957            if (alloc > 2) {
958                res = _PyBytesWriter_Prepare(&writer, res, alloc - 2);
959                if (res == NULL)
960                    goto error;
961            }
962#ifdef Py_DEBUG
963            before = res;
964#endif
965
966            /* Write the sign if needed */
967            if (sign) {
968                if (fill != ' ')
969                    *res++ = sign;
970                if (width > len)
971                    width--;
972            }
973
974            /* Write the numeric prefix for "x", "X" and "o" formats
975               if the alternate form is used.
976               For example, write "0x" for the "%#x" format. */
977            if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
978                assert(pbuf[0] == '0');
979                assert(pbuf[1] == c);
980                if (fill != ' ') {
981                    *res++ = *pbuf++;
982                    *res++ = *pbuf++;
983                }
984                width -= 2;
985                if (width < 0)
986                    width = 0;
987                len -= 2;
988            }
989
990            /* Pad left with the fill character if needed */
991            if (width > len && !(flags & F_LJUST)) {
992                memset(res, fill, width - len);
993                res += (width - len);
994                width = len;
995            }
996
997            /* If padding with spaces: write sign if needed and/or numeric
998               prefix if the alternate form is used */
999            if (fill == ' ') {
1000                if (sign)
1001                    *res++ = sign;
1002                if ((flags & F_ALT) && (c == 'o' || c == 'x' || c == 'X')) {
1003                    assert(pbuf[0] == '0');
1004                    assert(pbuf[1] == c);
1005                    *res++ = *pbuf++;
1006                    *res++ = *pbuf++;
1007                }
1008            }
1009
1010            /* Copy bytes */
1011            memcpy(res, pbuf, len);
1012            res += len;
1013
1014            /* Pad right with the fill character if needed */
1015            if (width > len) {
1016                memset(res, ' ', width - len);
1017                res += (width - len);
1018            }
1019
1020            if (dict && (argidx < arglen) && c != '%') {
1021                PyErr_SetString(PyExc_TypeError,
1022                           "not all arguments converted during bytes formatting");
1023                Py_XDECREF(temp);
1024                goto error;
1025            }
1026            Py_XDECREF(temp);
1027
1028#ifdef Py_DEBUG
1029            /* check that we computed the exact size for this write */
1030            assert((res - before) == alloc);
1031#endif
1032        } /* '%' */
1033
1034        /* If overallocation was disabled, ensure that it was the last
1035           write. Otherwise, we missed an optimization */
1036        assert(writer.overallocate || fmtcnt < 0 || use_bytearray);
1037    } /* until end */
1038
1039    if (argidx < arglen && !dict) {
1040        PyErr_SetString(PyExc_TypeError,
1041                        "not all arguments converted during bytes formatting");
1042        goto error;
1043    }
1044
1045    if (args_owned) {
1046        Py_DECREF(args);
1047    }
1048    return _PyBytesWriter_Finish(&writer, res);
1049
1050 error:
1051    _PyBytesWriter_Dealloc(&writer);
1052    if (args_owned) {
1053        Py_DECREF(args);
1054    }
1055    return NULL;
1056}
1057
1058/* =-= */
1059
1060static void
1061bytes_dealloc(PyObject *op)
1062{
1063    Py_TYPE(op)->tp_free(op);
1064}
1065
1066/* Unescape a backslash-escaped string. If unicode is non-zero,
1067   the string is a u-literal. If recode_encoding is non-zero,
1068   the string is UTF-8 encoded and should be re-encoded in the
1069   specified encoding.  */
1070
1071static char *
1072_PyBytes_DecodeEscapeRecode(const char **s, const char *end,
1073                            const char *errors, const char *recode_encoding,
1074                            _PyBytesWriter *writer, char *p)
1075{
1076    PyObject *u, *w;
1077    const char* t;
1078
1079    t = *s;
1080    /* Decode non-ASCII bytes as UTF-8. */
1081    while (t < end && (*t & 0x80))
1082        t++;
1083    u = PyUnicode_DecodeUTF8(*s, t - *s, errors);
1084    if (u == NULL)
1085        return NULL;
1086
1087    /* Recode them in target encoding. */
1088    w = PyUnicode_AsEncodedString(u, recode_encoding, errors);
1089    Py_DECREF(u);
1090    if  (w == NULL)
1091        return NULL;
1092    assert(PyBytes_Check(w));
1093
1094    /* Append bytes to output buffer. */
1095    writer->min_size--;   /* subtract 1 preallocated byte */
1096    p = _PyBytesWriter_WriteBytes(writer, p,
1097                                  PyBytes_AS_STRING(w),
1098                                  PyBytes_GET_SIZE(w));
1099    Py_DECREF(w);
1100    if (p == NULL)
1101        return NULL;
1102
1103    *s = t;
1104    return p;
1105}
1106
1107PyObject *_PyBytes_DecodeEscape(const char *s,
1108                                Py_ssize_t len,
1109                                const char *errors,
1110                                Py_ssize_t unicode,
1111                                const char *recode_encoding,
1112                                const char **first_invalid_escape)
1113{
1114    int c;
1115    char *p;
1116    const char *end;
1117    _PyBytesWriter writer;
1118
1119    _PyBytesWriter_Init(&writer);
1120
1121    p = _PyBytesWriter_Alloc(&writer, len);
1122    if (p == NULL)
1123        return NULL;
1124    writer.overallocate = 1;
1125
1126    *first_invalid_escape = NULL;
1127
1128    end = s + len;
1129    while (s < end) {
1130        if (*s != '\\') {
1131          non_esc:
1132            if (!(recode_encoding && (*s & 0x80))) {
1133                *p++ = *s++;
1134            }
1135            else {
1136                /* non-ASCII character and need to recode */
1137                p = _PyBytes_DecodeEscapeRecode(&s, end,
1138                                                errors, recode_encoding,
1139                                                &writer, p);
1140                if (p == NULL)
1141                    goto failed;
1142            }
1143            continue;
1144        }
1145
1146        s++;
1147        if (s == end) {
1148            PyErr_SetString(PyExc_ValueError,
1149                            "Trailing \\ in string");
1150            goto failed;
1151        }
1152
1153        switch (*s++) {
1154        /* XXX This assumes ASCII! */
1155        case '\n': break;
1156        case '\\': *p++ = '\\'; break;
1157        case '\'': *p++ = '\''; break;
1158        case '\"': *p++ = '\"'; break;
1159        case 'b': *p++ = '\b'; break;
1160        case 'f': *p++ = '\014'; break; /* FF */
1161        case 't': *p++ = '\t'; break;
1162        case 'n': *p++ = '\n'; break;
1163        case 'r': *p++ = '\r'; break;
1164        case 'v': *p++ = '\013'; break; /* VT */
1165        case 'a': *p++ = '\007'; break; /* BEL, not classic C */
1166        case '0': case '1': case '2': case '3':
1167        case '4': case '5': case '6': case '7':
1168            c = s[-1] - '0';
1169            if (s < end && '0' <= *s && *s <= '7') {
1170                c = (c<<3) + *s++ - '0';
1171                if (s < end && '0' <= *s && *s <= '7')
1172                    c = (c<<3) + *s++ - '0';
1173            }
1174            *p++ = c;
1175            break;
1176        case 'x':
1177            if (s+1 < end) {
1178                int digit1, digit2;
1179                digit1 = _PyLong_DigitValue[Py_CHARMASK(s[0])];
1180                digit2 = _PyLong_DigitValue[Py_CHARMASK(s[1])];
1181                if (digit1 < 16 && digit2 < 16) {
1182                    *p++ = (unsigned char)((digit1 << 4) + digit2);
1183                    s += 2;
1184                    break;
1185                }
1186            }
1187            /* invalid hexadecimal digits */
1188
1189            if (!errors || strcmp(errors, "strict") == 0) {
1190                PyErr_Format(PyExc_ValueError,
1191                             "invalid \\x escape at position %d",
1192                             s - 2 - (end - len));
1193                goto failed;
1194            }
1195            if (strcmp(errors, "replace") == 0) {
1196                *p++ = '?';
1197            } else if (strcmp(errors, "ignore") == 0)
1198                /* do nothing */;
1199            else {
1200                PyErr_Format(PyExc_ValueError,
1201                             "decoding error; unknown "
1202                             "error handling code: %.400s",
1203                             errors);
1204                goto failed;
1205            }
1206            /* skip \x */
1207            if (s < end && Py_ISXDIGIT(s[0]))
1208                s++; /* and a hexdigit */
1209            break;
1210
1211        default:
1212            if (*first_invalid_escape == NULL) {
1213                *first_invalid_escape = s-1; /* Back up one char, since we've
1214                                                already incremented s. */
1215            }
1216            *p++ = '\\';
1217            s--;
1218            goto non_esc; /* an arbitrary number of unescaped
1219                             UTF-8 bytes may follow. */
1220        }
1221    }
1222
1223    return _PyBytesWriter_Finish(&writer, p);
1224
1225  failed:
1226    _PyBytesWriter_Dealloc(&writer);
1227    return NULL;
1228}
1229
1230PyObject *PyBytes_DecodeEscape(const char *s,
1231                                Py_ssize_t len,
1232                                const char *errors,
1233                                Py_ssize_t unicode,
1234                                const char *recode_encoding)
1235{
1236    const char* first_invalid_escape;
1237    PyObject *result = _PyBytes_DecodeEscape(s, len, errors, unicode,
1238                                             recode_encoding,
1239                                             &first_invalid_escape);
1240    if (result == NULL)
1241        return NULL;
1242    if (first_invalid_escape != NULL) {
1243        if (PyErr_WarnFormat(PyExc_DeprecationWarning, 1,
1244                             "invalid escape sequence '\\%c'",
1245                             *first_invalid_escape) < 0) {
1246            Py_DECREF(result);
1247            return NULL;
1248        }
1249    }
1250    return result;
1251
1252}
1253/* -------------------------------------------------------------------- */
1254/* object api */
1255
1256Py_ssize_t
1257PyBytes_Size(PyObject *op)
1258{
1259    if (!PyBytes_Check(op)) {
1260        PyErr_Format(PyExc_TypeError,
1261             "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1262        return -1;
1263    }
1264    return Py_SIZE(op);
1265}
1266
1267char *
1268PyBytes_AsString(PyObject *op)
1269{
1270    if (!PyBytes_Check(op)) {
1271        PyErr_Format(PyExc_TypeError,
1272             "expected bytes, %.200s found", Py_TYPE(op)->tp_name);
1273        return NULL;
1274    }
1275    return ((PyBytesObject *)op)->ob_sval;
1276}
1277
1278int
1279PyBytes_AsStringAndSize(PyObject *obj,
1280                         char **s,
1281                         Py_ssize_t *len)
1282{
1283    if (s == NULL) {
1284        PyErr_BadInternalCall();
1285        return -1;
1286    }
1287
1288    if (!PyBytes_Check(obj)) {
1289        PyErr_Format(PyExc_TypeError,
1290             "expected bytes, %.200s found", Py_TYPE(obj)->tp_name);
1291        return -1;
1292    }
1293
1294    *s = PyBytes_AS_STRING(obj);
1295    if (len != NULL)
1296        *len = PyBytes_GET_SIZE(obj);
1297    else if (strlen(*s) != (size_t)PyBytes_GET_SIZE(obj)) {
1298        PyErr_SetString(PyExc_ValueError,
1299                        "embedded null byte");
1300        return -1;
1301    }
1302    return 0;
1303}
1304
1305/* -------------------------------------------------------------------- */
1306/* Methods */
1307
1308#include "stringlib/stringdefs.h"
1309
1310#include "stringlib/fastsearch.h"
1311#include "stringlib/count.h"
1312#include "stringlib/find.h"
1313#include "stringlib/join.h"
1314#include "stringlib/partition.h"
1315#include "stringlib/split.h"
1316#include "stringlib/ctype.h"
1317
1318#include "stringlib/transmogrify.h"
1319
1320PyObject *
1321PyBytes_Repr(PyObject *obj, int smartquotes)
1322{
1323    PyBytesObject* op = (PyBytesObject*) obj;
1324    Py_ssize_t i, length = Py_SIZE(op);
1325    Py_ssize_t newsize, squotes, dquotes;
1326    PyObject *v;
1327    unsigned char quote, *s, *p;
1328
1329    /* Compute size of output string */
1330    squotes = dquotes = 0;
1331    newsize = 3; /* b'' */
1332    s = (unsigned char*)op->ob_sval;
1333    for (i = 0; i < length; i++) {
1334        Py_ssize_t incr = 1;
1335        switch(s[i]) {
1336        case '\'': squotes++; break;
1337        case '"':  dquotes++; break;
1338        case '\\': case '\t': case '\n': case '\r':
1339            incr = 2; break; /* \C */
1340        default:
1341            if (s[i] < ' ' || s[i] >= 0x7f)
1342                incr = 4; /* \xHH */
1343        }
1344        if (newsize > PY_SSIZE_T_MAX - incr)
1345            goto overflow;
1346        newsize += incr;
1347    }
1348    quote = '\'';
1349    if (smartquotes && squotes && !dquotes)
1350        quote = '"';
1351    if (squotes && quote == '\'') {
1352        if (newsize > PY_SSIZE_T_MAX - squotes)
1353            goto overflow;
1354        newsize += squotes;
1355    }
1356
1357    v = PyUnicode_New(newsize, 127);
1358    if (v == NULL) {
1359        return NULL;
1360    }
1361    p = PyUnicode_1BYTE_DATA(v);
1362
1363    *p++ = 'b', *p++ = quote;
1364    for (i = 0; i < length; i++) {
1365        unsigned char c = op->ob_sval[i];
1366        if (c == quote || c == '\\')
1367            *p++ = '\\', *p++ = c;
1368        else if (c == '\t')
1369            *p++ = '\\', *p++ = 't';
1370        else if (c == '\n')
1371            *p++ = '\\', *p++ = 'n';
1372        else if (c == '\r')
1373            *p++ = '\\', *p++ = 'r';
1374        else if (c < ' ' || c >= 0x7f) {
1375            *p++ = '\\';
1376            *p++ = 'x';
1377            *p++ = Py_hexdigits[(c & 0xf0) >> 4];
1378            *p++ = Py_hexdigits[c & 0xf];
1379        }
1380        else
1381            *p++ = c;
1382    }
1383    *p++ = quote;
1384    assert(_PyUnicode_CheckConsistency(v, 1));
1385    return v;
1386
1387  overflow:
1388    PyErr_SetString(PyExc_OverflowError,
1389                    "bytes object is too large to make repr");
1390    return NULL;
1391}
1392
1393static PyObject *
1394bytes_repr(PyObject *op)
1395{
1396    return PyBytes_Repr(op, 1);
1397}
1398
1399static PyObject *
1400bytes_str(PyObject *op)
1401{
1402    if (Py_BytesWarningFlag) {
1403        if (PyErr_WarnEx(PyExc_BytesWarning,
1404                         "str() on a bytes instance", 1))
1405            return NULL;
1406    }
1407    return bytes_repr(op);
1408}
1409
1410static Py_ssize_t
1411bytes_length(PyBytesObject *a)
1412{
1413    return Py_SIZE(a);
1414}
1415
1416/* This is also used by PyBytes_Concat() */
1417static PyObject *
1418bytes_concat(PyObject *a, PyObject *b)
1419{
1420    Py_buffer va, vb;
1421    PyObject *result = NULL;
1422
1423    va.len = -1;
1424    vb.len = -1;
1425    if (PyObject_GetBuffer(a, &va, PyBUF_SIMPLE) != 0 ||
1426        PyObject_GetBuffer(b, &vb, PyBUF_SIMPLE) != 0) {
1427        PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
1428                     Py_TYPE(a)->tp_name, Py_TYPE(b)->tp_name);
1429        goto done;
1430    }
1431
1432    /* Optimize end cases */
1433    if (va.len == 0 && PyBytes_CheckExact(b)) {
1434        result = b;
1435        Py_INCREF(result);
1436        goto done;
1437    }
1438    if (vb.len == 0 && PyBytes_CheckExact(a)) {
1439        result = a;
1440        Py_INCREF(result);
1441        goto done;
1442    }
1443
1444    if (va.len > PY_SSIZE_T_MAX - vb.len) {
1445        PyErr_NoMemory();
1446        goto done;
1447    }
1448
1449    result = PyBytes_FromStringAndSize(NULL, va.len + vb.len);
1450    if (result != NULL) {
1451        memcpy(PyBytes_AS_STRING(result), va.buf, va.len);
1452        memcpy(PyBytes_AS_STRING(result) + va.len, vb.buf, vb.len);
1453    }
1454
1455  done:
1456    if (va.len != -1)
1457        PyBuffer_Release(&va);
1458    if (vb.len != -1)
1459        PyBuffer_Release(&vb);
1460    return result;
1461}
1462
1463static PyObject *
1464bytes_repeat(PyBytesObject *a, Py_ssize_t n)
1465{
1466    Py_ssize_t i;
1467    Py_ssize_t j;
1468    Py_ssize_t size;
1469    PyBytesObject *op;
1470    size_t nbytes;
1471    if (n < 0)
1472        n = 0;
1473    /* watch out for overflows:  the size can overflow int,
1474     * and the # of bytes needed can overflow size_t
1475     */
1476    if (n > 0 && Py_SIZE(a) > PY_SSIZE_T_MAX / n) {
1477        PyErr_SetString(PyExc_OverflowError,
1478            "repeated bytes are too long");
1479        return NULL;
1480    }
1481    size = Py_SIZE(a) * n;
1482    if (size == Py_SIZE(a) && PyBytes_CheckExact(a)) {
1483        Py_INCREF(a);
1484        return (PyObject *)a;
1485    }
1486    nbytes = (size_t)size;
1487    if (nbytes + PyBytesObject_SIZE <= nbytes) {
1488        PyErr_SetString(PyExc_OverflowError,
1489            "repeated bytes are too long");
1490        return NULL;
1491    }
1492    op = (PyBytesObject *)PyObject_MALLOC(PyBytesObject_SIZE + nbytes);
1493    if (op == NULL)
1494        return PyErr_NoMemory();
1495    (void)PyObject_INIT_VAR(op, &PyBytes_Type, size);
1496    op->ob_shash = -1;
1497    op->ob_sval[size] = '\0';
1498    if (Py_SIZE(a) == 1 && n > 0) {
1499        memset(op->ob_sval, a->ob_sval[0] , n);
1500        return (PyObject *) op;
1501    }
1502    i = 0;
1503    if (i < size) {
1504        memcpy(op->ob_sval, a->ob_sval, Py_SIZE(a));
1505        i = Py_SIZE(a);
1506    }
1507    while (i < size) {
1508        j = (i <= size-i)  ?  i  :  size-i;
1509        memcpy(op->ob_sval+i, op->ob_sval, j);
1510        i += j;
1511    }
1512    return (PyObject *) op;
1513}
1514
1515static int
1516bytes_contains(PyObject *self, PyObject *arg)
1517{
1518    return _Py_bytes_contains(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), arg);
1519}
1520
1521static PyObject *
1522bytes_item(PyBytesObject *a, Py_ssize_t i)
1523{
1524    if (i < 0 || i >= Py_SIZE(a)) {
1525        PyErr_SetString(PyExc_IndexError, "index out of range");
1526        return NULL;
1527    }
1528    return PyLong_FromLong((unsigned char)a->ob_sval[i]);
1529}
1530
1531static int
1532bytes_compare_eq(PyBytesObject *a, PyBytesObject *b)
1533{
1534    int cmp;
1535    Py_ssize_t len;
1536
1537    len = Py_SIZE(a);
1538    if (Py_SIZE(b) != len)
1539        return 0;
1540
1541    if (a->ob_sval[0] != b->ob_sval[0])
1542        return 0;
1543
1544    cmp = memcmp(a->ob_sval, b->ob_sval, len);
1545    return (cmp == 0);
1546}
1547
1548static PyObject*
1549bytes_richcompare(PyBytesObject *a, PyBytesObject *b, int op)
1550{
1551    int c;
1552    Py_ssize_t len_a, len_b;
1553    Py_ssize_t min_len;
1554    PyObject *result;
1555    int rc;
1556
1557    /* Make sure both arguments are strings. */
1558    if (!(PyBytes_Check(a) && PyBytes_Check(b))) {
1559        if (Py_BytesWarningFlag && (op == Py_EQ || op == Py_NE)) {
1560            rc = PyObject_IsInstance((PyObject*)a,
1561                                     (PyObject*)&PyUnicode_Type);
1562            if (!rc)
1563                rc = PyObject_IsInstance((PyObject*)b,
1564                                         (PyObject*)&PyUnicode_Type);
1565            if (rc < 0)
1566                return NULL;
1567            if (rc) {
1568                if (PyErr_WarnEx(PyExc_BytesWarning,
1569                                 "Comparison between bytes and string", 1))
1570                    return NULL;
1571            }
1572            else {
1573                rc = PyObject_IsInstance((PyObject*)a,
1574                                         (PyObject*)&PyLong_Type);
1575                if (!rc)
1576                    rc = PyObject_IsInstance((PyObject*)b,
1577                                             (PyObject*)&PyLong_Type);
1578                if (rc < 0)
1579                    return NULL;
1580                if (rc) {
1581                    if (PyErr_WarnEx(PyExc_BytesWarning,
1582                                     "Comparison between bytes and int", 1))
1583                        return NULL;
1584                }
1585            }
1586        }
1587        result = Py_NotImplemented;
1588    }
1589    else if (a == b) {
1590        switch (op) {
1591        case Py_EQ:
1592        case Py_LE:
1593        case Py_GE:
1594            /* a string is equal to itself */
1595            result = Py_True;
1596            break;
1597        case Py_NE:
1598        case Py_LT:
1599        case Py_GT:
1600            result = Py_False;
1601            break;
1602        default:
1603            PyErr_BadArgument();
1604            return NULL;
1605        }
1606    }
1607    else if (op == Py_EQ || op == Py_NE) {
1608        int eq = bytes_compare_eq(a, b);
1609        eq ^= (op == Py_NE);
1610        result = eq ? Py_True : Py_False;
1611    }
1612    else {
1613        len_a = Py_SIZE(a);
1614        len_b = Py_SIZE(b);
1615        min_len = Py_MIN(len_a, len_b);
1616        if (min_len > 0) {
1617            c = Py_CHARMASK(*a->ob_sval) - Py_CHARMASK(*b->ob_sval);
1618            if (c == 0)
1619                c = memcmp(a->ob_sval, b->ob_sval, min_len);
1620        }
1621        else
1622            c = 0;
1623        if (c == 0)
1624            c = (len_a < len_b) ? -1 : (len_a > len_b) ? 1 : 0;
1625        switch (op) {
1626        case Py_LT: c = c <  0; break;
1627        case Py_LE: c = c <= 0; break;
1628        case Py_GT: c = c >  0; break;
1629        case Py_GE: c = c >= 0; break;
1630        default:
1631            PyErr_BadArgument();
1632            return NULL;
1633        }
1634        result = c ? Py_True : Py_False;
1635    }
1636
1637    Py_INCREF(result);
1638    return result;
1639}
1640
1641static Py_hash_t
1642bytes_hash(PyBytesObject *a)
1643{
1644    if (a->ob_shash == -1) {
1645        /* Can't fail */
1646        a->ob_shash = _Py_HashBytes(a->ob_sval, Py_SIZE(a));
1647    }
1648    return a->ob_shash;
1649}
1650
1651static PyObject*
1652bytes_subscript(PyBytesObject* self, PyObject* item)
1653{
1654    if (PyIndex_Check(item)) {
1655        Py_ssize_t i = PyNumber_AsSsize_t(item, PyExc_IndexError);
1656        if (i == -1 && PyErr_Occurred())
1657            return NULL;
1658        if (i < 0)
1659            i += PyBytes_GET_SIZE(self);
1660        if (i < 0 || i >= PyBytes_GET_SIZE(self)) {
1661            PyErr_SetString(PyExc_IndexError,
1662                            "index out of range");
1663            return NULL;
1664        }
1665        return PyLong_FromLong((unsigned char)self->ob_sval[i]);
1666    }
1667    else if (PySlice_Check(item)) {
1668        Py_ssize_t start, stop, step, slicelength, cur, i;
1669        char* source_buf;
1670        char* result_buf;
1671        PyObject* result;
1672
1673        if (PySlice_GetIndicesEx(item,
1674                         PyBytes_GET_SIZE(self),
1675                         &start, &stop, &step, &slicelength) < 0) {
1676            return NULL;
1677        }
1678
1679        if (slicelength <= 0) {
1680            return PyBytes_FromStringAndSize("", 0);
1681        }
1682        else if (start == 0 && step == 1 &&
1683                 slicelength == PyBytes_GET_SIZE(self) &&
1684                 PyBytes_CheckExact(self)) {
1685            Py_INCREF(self);
1686            return (PyObject *)self;
1687        }
1688        else if (step == 1) {
1689            return PyBytes_FromStringAndSize(
1690                PyBytes_AS_STRING(self) + start,
1691                slicelength);
1692        }
1693        else {
1694            source_buf = PyBytes_AS_STRING(self);
1695            result = PyBytes_FromStringAndSize(NULL, slicelength);
1696            if (result == NULL)
1697                return NULL;
1698
1699            result_buf = PyBytes_AS_STRING(result);
1700            for (cur = start, i = 0; i < slicelength;
1701                 cur += step, i++) {
1702                result_buf[i] = source_buf[cur];
1703            }
1704
1705            return result;
1706        }
1707    }
1708    else {
1709        PyErr_Format(PyExc_TypeError,
1710                     "byte indices must be integers or slices, not %.200s",
1711                     Py_TYPE(item)->tp_name);
1712        return NULL;
1713    }
1714}
1715
1716static int
1717bytes_buffer_getbuffer(PyBytesObject *self, Py_buffer *view, int flags)
1718{
1719    return PyBuffer_FillInfo(view, (PyObject*)self, (void *)self->ob_sval, Py_SIZE(self),
1720                             1, flags);
1721}
1722
1723static PySequenceMethods bytes_as_sequence = {
1724    (lenfunc)bytes_length, /*sq_length*/
1725    (binaryfunc)bytes_concat, /*sq_concat*/
1726    (ssizeargfunc)bytes_repeat, /*sq_repeat*/
1727    (ssizeargfunc)bytes_item, /*sq_item*/
1728    0,                  /*sq_slice*/
1729    0,                  /*sq_ass_item*/
1730    0,                  /*sq_ass_slice*/
1731    (objobjproc)bytes_contains /*sq_contains*/
1732};
1733
1734static PyMappingMethods bytes_as_mapping = {
1735    (lenfunc)bytes_length,
1736    (binaryfunc)bytes_subscript,
1737    0,
1738};
1739
1740static PyBufferProcs bytes_as_buffer = {
1741    (getbufferproc)bytes_buffer_getbuffer,
1742    NULL,
1743};
1744
1745
1746#define LEFTSTRIP 0
1747#define RIGHTSTRIP 1
1748#define BOTHSTRIP 2
1749
1750/*[clinic input]
1751bytes.split
1752
1753    sep: object = None
1754        The delimiter according which to split the bytes.
1755        None (the default value) means split on ASCII whitespace characters
1756        (space, tab, return, newline, formfeed, vertical tab).
1757    maxsplit: Py_ssize_t = -1
1758        Maximum number of splits to do.
1759        -1 (the default value) means no limit.
1760
1761Return a list of the sections in the bytes, using sep as the delimiter.
1762[clinic start generated code]*/
1763
1764static PyObject *
1765bytes_split_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1766/*[clinic end generated code: output=52126b5844c1d8ef input=8b809b39074abbfa]*/
1767{
1768    Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1769    const char *s = PyBytes_AS_STRING(self), *sub;
1770    Py_buffer vsub;
1771    PyObject *list;
1772
1773    if (maxsplit < 0)
1774        maxsplit = PY_SSIZE_T_MAX;
1775    if (sep == Py_None)
1776        return stringlib_split_whitespace((PyObject*) self, s, len, maxsplit);
1777    if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1778        return NULL;
1779    sub = vsub.buf;
1780    n = vsub.len;
1781
1782    list = stringlib_split((PyObject*) self, s, len, sub, n, maxsplit);
1783    PyBuffer_Release(&vsub);
1784    return list;
1785}
1786
1787/*[clinic input]
1788bytes.partition
1789
1790    sep: Py_buffer
1791    /
1792
1793Partition the bytes into three parts using the given separator.
1794
1795This will search for the separator sep in the bytes. If the separator is found,
1796returns a 3-tuple containing the part before the separator, the separator
1797itself, and the part after it.
1798
1799If the separator is not found, returns a 3-tuple containing the original bytes
1800object and two empty bytes objects.
1801[clinic start generated code]*/
1802
1803static PyObject *
1804bytes_partition_impl(PyBytesObject *self, Py_buffer *sep)
1805/*[clinic end generated code: output=f532b392a17ff695 input=61cca95519406099]*/
1806{
1807    return stringlib_partition(
1808        (PyObject*) self,
1809        PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1810        sep->obj, (const char *)sep->buf, sep->len
1811        );
1812}
1813
1814/*[clinic input]
1815bytes.rpartition
1816
1817    sep: Py_buffer
1818    /
1819
1820Partition the bytes into three parts using the given separator.
1821
1822This will search for the separator sep in the bytes, starting and the end. If
1823the separator is found, returns a 3-tuple containing the part before the
1824separator, the separator itself, and the part after it.
1825
1826If the separator is not found, returns a 3-tuple containing two empty bytes
1827objects and the original bytes object.
1828[clinic start generated code]*/
1829
1830static PyObject *
1831bytes_rpartition_impl(PyBytesObject *self, Py_buffer *sep)
1832/*[clinic end generated code: output=191b114cbb028e50 input=67f689e63a62d478]*/
1833{
1834    return stringlib_rpartition(
1835        (PyObject*) self,
1836        PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
1837        sep->obj, (const char *)sep->buf, sep->len
1838        );
1839}
1840
1841/*[clinic input]
1842bytes.rsplit = bytes.split
1843
1844Return a list of the sections in the bytes, using sep as the delimiter.
1845
1846Splitting is done starting at the end of the bytes and working to the front.
1847[clinic start generated code]*/
1848
1849static PyObject *
1850bytes_rsplit_impl(PyBytesObject *self, PyObject *sep, Py_ssize_t maxsplit)
1851/*[clinic end generated code: output=ba698d9ea01e1c8f input=0f86c9f28f7d7b7b]*/
1852{
1853    Py_ssize_t len = PyBytes_GET_SIZE(self), n;
1854    const char *s = PyBytes_AS_STRING(self), *sub;
1855    Py_buffer vsub;
1856    PyObject *list;
1857
1858    if (maxsplit < 0)
1859        maxsplit = PY_SSIZE_T_MAX;
1860    if (sep == Py_None)
1861        return stringlib_rsplit_whitespace((PyObject*) self, s, len, maxsplit);
1862    if (PyObject_GetBuffer(sep, &vsub, PyBUF_SIMPLE) != 0)
1863        return NULL;
1864    sub = vsub.buf;
1865    n = vsub.len;
1866
1867    list = stringlib_rsplit((PyObject*) self, s, len, sub, n, maxsplit);
1868    PyBuffer_Release(&vsub);
1869    return list;
1870}
1871
1872
1873/*[clinic input]
1874bytes.join
1875
1876    iterable_of_bytes: object
1877    /
1878
1879Concatenate any number of bytes objects.
1880
1881The bytes whose method is called is inserted in between each pair.
1882
1883The result is returned as a new bytes object.
1884
1885Example: b'.'.join([b'ab', b'pq', b'rs']) -> b'ab.pq.rs'.
1886[clinic start generated code]*/
1887
1888static PyObject *
1889bytes_join(PyBytesObject *self, PyObject *iterable_of_bytes)
1890/*[clinic end generated code: output=a046f379f626f6f8 input=7fe377b95bd549d2]*/
1891{
1892    return stringlib_bytes_join((PyObject*)self, iterable_of_bytes);
1893}
1894
1895PyObject *
1896_PyBytes_Join(PyObject *sep, PyObject *x)
1897{
1898    assert(sep != NULL && PyBytes_Check(sep));
1899    assert(x != NULL);
1900    return bytes_join((PyBytesObject*)sep, x);
1901}
1902
1903static PyObject *
1904bytes_find(PyBytesObject *self, PyObject *args)
1905{
1906    return _Py_bytes_find(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1907}
1908
1909static PyObject *
1910bytes_index(PyBytesObject *self, PyObject *args)
1911{
1912    return _Py_bytes_index(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1913}
1914
1915
1916static PyObject *
1917bytes_rfind(PyBytesObject *self, PyObject *args)
1918{
1919    return _Py_bytes_rfind(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1920}
1921
1922
1923static PyObject *
1924bytes_rindex(PyBytesObject *self, PyObject *args)
1925{
1926    return _Py_bytes_rindex(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
1927}
1928
1929
1930Py_LOCAL_INLINE(PyObject *)
1931do_xstrip(PyBytesObject *self, int striptype, PyObject *sepobj)
1932{
1933    Py_buffer vsep;
1934    char *s = PyBytes_AS_STRING(self);
1935    Py_ssize_t len = PyBytes_GET_SIZE(self);
1936    char *sep;
1937    Py_ssize_t seplen;
1938    Py_ssize_t i, j;
1939
1940    if (PyObject_GetBuffer(sepobj, &vsep, PyBUF_SIMPLE) != 0)
1941        return NULL;
1942    sep = vsep.buf;
1943    seplen = vsep.len;
1944
1945    i = 0;
1946    if (striptype != RIGHTSTRIP) {
1947        while (i < len && memchr(sep, Py_CHARMASK(s[i]), seplen)) {
1948            i++;
1949        }
1950    }
1951
1952    j = len;
1953    if (striptype != LEFTSTRIP) {
1954        do {
1955            j--;
1956        } while (j >= i && memchr(sep, Py_CHARMASK(s[j]), seplen));
1957        j++;
1958    }
1959
1960    PyBuffer_Release(&vsep);
1961
1962    if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1963        Py_INCREF(self);
1964        return (PyObject*)self;
1965    }
1966    else
1967        return PyBytes_FromStringAndSize(s+i, j-i);
1968}
1969
1970
1971Py_LOCAL_INLINE(PyObject *)
1972do_strip(PyBytesObject *self, int striptype)
1973{
1974    char *s = PyBytes_AS_STRING(self);
1975    Py_ssize_t len = PyBytes_GET_SIZE(self), i, j;
1976
1977    i = 0;
1978    if (striptype != RIGHTSTRIP) {
1979        while (i < len && Py_ISSPACE(s[i])) {
1980            i++;
1981        }
1982    }
1983
1984    j = len;
1985    if (striptype != LEFTSTRIP) {
1986        do {
1987            j--;
1988        } while (j >= i && Py_ISSPACE(s[j]));
1989        j++;
1990    }
1991
1992    if (i == 0 && j == len && PyBytes_CheckExact(self)) {
1993        Py_INCREF(self);
1994        return (PyObject*)self;
1995    }
1996    else
1997        return PyBytes_FromStringAndSize(s+i, j-i);
1998}
1999
2000
2001Py_LOCAL_INLINE(PyObject *)
2002do_argstrip(PyBytesObject *self, int striptype, PyObject *bytes)
2003{
2004    if (bytes != NULL && bytes != Py_None) {
2005        return do_xstrip(self, striptype, bytes);
2006    }
2007    return do_strip(self, striptype);
2008}
2009
2010/*[clinic input]
2011bytes.strip
2012
2013    bytes: object = None
2014    /
2015
2016Strip leading and trailing bytes contained in the argument.
2017
2018If the argument is omitted or None, strip leading and trailing ASCII whitespace.
2019[clinic start generated code]*/
2020
2021static PyObject *
2022bytes_strip_impl(PyBytesObject *self, PyObject *bytes)
2023/*[clinic end generated code: output=c7c228d3bd104a1b input=8a354640e4e0b3ef]*/
2024{
2025    return do_argstrip(self, BOTHSTRIP, bytes);
2026}
2027
2028/*[clinic input]
2029bytes.lstrip
2030
2031    bytes: object = None
2032    /
2033
2034Strip leading bytes contained in the argument.
2035
2036If the argument is omitted or None, strip leading  ASCII whitespace.
2037[clinic start generated code]*/
2038
2039static PyObject *
2040bytes_lstrip_impl(PyBytesObject *self, PyObject *bytes)
2041/*[clinic end generated code: output=28602e586f524e82 input=9baff4398c3f6857]*/
2042{
2043    return do_argstrip(self, LEFTSTRIP, bytes);
2044}
2045
2046/*[clinic input]
2047bytes.rstrip
2048
2049    bytes: object = None
2050    /
2051
2052Strip trailing bytes contained in the argument.
2053
2054If the argument is omitted or None, strip trailing ASCII whitespace.
2055[clinic start generated code]*/
2056
2057static PyObject *
2058bytes_rstrip_impl(PyBytesObject *self, PyObject *bytes)
2059/*[clinic end generated code: output=547e3815c95447da input=b78af445c727e32b]*/
2060{
2061    return do_argstrip(self, RIGHTSTRIP, bytes);
2062}
2063
2064
2065static PyObject *
2066bytes_count(PyBytesObject *self, PyObject *args)
2067{
2068    return _Py_bytes_count(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2069}
2070
2071
2072/*[clinic input]
2073bytes.translate
2074
2075    table: object
2076        Translation table, which must be a bytes object of length 256.
2077    /
2078    delete as deletechars: object(c_default="NULL") = b''
2079
2080Return a copy with each character mapped by the given translation table.
2081
2082All characters occurring in the optional argument delete are removed.
2083The remaining characters are mapped through the given translation table.
2084[clinic start generated code]*/
2085
2086static PyObject *
2087bytes_translate_impl(PyBytesObject *self, PyObject *table,
2088                     PyObject *deletechars)
2089/*[clinic end generated code: output=43be3437f1956211 input=0ecdf159f654233c]*/
2090{
2091    char *input, *output;
2092    Py_buffer table_view = {NULL, NULL};
2093    Py_buffer del_table_view = {NULL, NULL};
2094    const char *table_chars;
2095    Py_ssize_t i, c, changed = 0;
2096    PyObject *input_obj = (PyObject*)self;
2097    const char *output_start, *del_table_chars=NULL;
2098    Py_ssize_t inlen, tablen, dellen = 0;
2099    PyObject *result;
2100    int trans_table[256];
2101
2102    if (PyBytes_Check(table)) {
2103        table_chars = PyBytes_AS_STRING(table);
2104        tablen = PyBytes_GET_SIZE(table);
2105    }
2106    else if (table == Py_None) {
2107        table_chars = NULL;
2108        tablen = 256;
2109    }
2110    else {
2111        if (PyObject_GetBuffer(table, &table_view, PyBUF_SIMPLE) != 0)
2112            return NULL;
2113        table_chars = table_view.buf;
2114        tablen = table_view.len;
2115    }
2116
2117    if (tablen != 256) {
2118        PyErr_SetString(PyExc_ValueError,
2119          "translation table must be 256 characters long");
2120        PyBuffer_Release(&table_view);
2121        return NULL;
2122    }
2123
2124    if (deletechars != NULL) {
2125        if (PyBytes_Check(deletechars)) {
2126            del_table_chars = PyBytes_AS_STRING(deletechars);
2127            dellen = PyBytes_GET_SIZE(deletechars);
2128        }
2129        else {
2130            if (PyObject_GetBuffer(deletechars, &del_table_view, PyBUF_SIMPLE) != 0) {
2131                PyBuffer_Release(&table_view);
2132                return NULL;
2133            }
2134            del_table_chars = del_table_view.buf;
2135            dellen = del_table_view.len;
2136        }
2137    }
2138    else {
2139        del_table_chars = NULL;
2140        dellen = 0;
2141    }
2142
2143    inlen = PyBytes_GET_SIZE(input_obj);
2144    result = PyBytes_FromStringAndSize((char *)NULL, inlen);
2145    if (result == NULL) {
2146        PyBuffer_Release(&del_table_view);
2147        PyBuffer_Release(&table_view);
2148        return NULL;
2149    }
2150    output_start = output = PyBytes_AS_STRING(result);
2151    input = PyBytes_AS_STRING(input_obj);
2152
2153    if (dellen == 0 && table_chars != NULL) {
2154        /* If no deletions are required, use faster code */
2155        for (i = inlen; --i >= 0; ) {
2156            c = Py_CHARMASK(*input++);
2157            if (Py_CHARMASK((*output++ = table_chars[c])) != c)
2158                changed = 1;
2159        }
2160        if (!changed && PyBytes_CheckExact(input_obj)) {
2161            Py_INCREF(input_obj);
2162            Py_DECREF(result);
2163            result = input_obj;
2164        }
2165        PyBuffer_Release(&del_table_view);
2166        PyBuffer_Release(&table_view);
2167        return result;
2168    }
2169
2170    if (table_chars == NULL) {
2171        for (i = 0; i < 256; i++)
2172            trans_table[i] = Py_CHARMASK(i);
2173    } else {
2174        for (i = 0; i < 256; i++)
2175            trans_table[i] = Py_CHARMASK(table_chars[i]);
2176    }
2177    PyBuffer_Release(&table_view);
2178
2179    for (i = 0; i < dellen; i++)
2180        trans_table[(int) Py_CHARMASK(del_table_chars[i])] = -1;
2181    PyBuffer_Release(&del_table_view);
2182
2183    for (i = inlen; --i >= 0; ) {
2184        c = Py_CHARMASK(*input++);
2185        if (trans_table[c] != -1)
2186            if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
2187                continue;
2188        changed = 1;
2189    }
2190    if (!changed && PyBytes_CheckExact(input_obj)) {
2191        Py_DECREF(result);
2192        Py_INCREF(input_obj);
2193        return input_obj;
2194    }
2195    /* Fix the size of the resulting string */
2196    if (inlen > 0)
2197        _PyBytes_Resize(&result, output - output_start);
2198    return result;
2199}
2200
2201
2202/*[clinic input]
2203
2204@staticmethod
2205bytes.maketrans
2206
2207    frm: Py_buffer
2208    to: Py_buffer
2209    /
2210
2211Return a translation table useable for the bytes or bytearray translate method.
2212
2213The returned table will be one where each byte in frm is mapped to the byte at
2214the same position in to.
2215
2216The bytes objects frm and to must be of the same length.
2217[clinic start generated code]*/
2218
2219static PyObject *
2220bytes_maketrans_impl(Py_buffer *frm, Py_buffer *to)
2221/*[clinic end generated code: output=a36f6399d4b77f6f input=de7a8fc5632bb8f1]*/
2222{
2223    return _Py_bytes_maketrans(frm, to);
2224}
2225
2226
2227/*[clinic input]
2228bytes.replace
2229
2230    old: Py_buffer
2231    new: Py_buffer
2232    count: Py_ssize_t = -1
2233        Maximum number of occurrences to replace.
2234        -1 (the default value) means replace all occurrences.
2235    /
2236
2237Return a copy with all occurrences of substring old replaced by new.
2238
2239If the optional argument count is given, only the first count occurrences are
2240replaced.
2241[clinic start generated code]*/
2242
2243static PyObject *
2244bytes_replace_impl(PyBytesObject *self, Py_buffer *old, Py_buffer *new,
2245                   Py_ssize_t count)
2246/*[clinic end generated code: output=994fa588b6b9c104 input=b2fbbf0bf04de8e5]*/
2247{
2248    return stringlib_replace((PyObject *)self,
2249                             (const char *)old->buf, old->len,
2250                             (const char *)new->buf, new->len, count);
2251}
2252
2253/** End DALKE **/
2254
2255
2256static PyObject *
2257bytes_startswith(PyBytesObject *self, PyObject *args)
2258{
2259    return _Py_bytes_startswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2260}
2261
2262static PyObject *
2263bytes_endswith(PyBytesObject *self, PyObject *args)
2264{
2265    return _Py_bytes_endswith(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self), args);
2266}
2267
2268
2269/*[clinic input]
2270bytes.decode
2271
2272    encoding: str(c_default="NULL") = 'utf-8'
2273        The encoding with which to decode the bytes.
2274    errors: str(c_default="NULL") = 'strict'
2275        The error handling scheme to use for the handling of decoding errors.
2276        The default is 'strict' meaning that decoding errors raise a
2277        UnicodeDecodeError. Other possible values are 'ignore' and 'replace'
2278        as well as any other name registered with codecs.register_error that
2279        can handle UnicodeDecodeErrors.
2280
2281Decode the bytes using the codec registered for encoding.
2282[clinic start generated code]*/
2283
2284static PyObject *
2285bytes_decode_impl(PyBytesObject *self, const char *encoding,
2286                  const char *errors)
2287/*[clinic end generated code: output=5649a53dde27b314 input=958174769d2a40ca]*/
2288{
2289    return PyUnicode_FromEncodedObject((PyObject*)self, encoding, errors);
2290}
2291
2292
2293/*[clinic input]
2294bytes.splitlines
2295
2296    keepends: int(c_default="0") = False
2297
2298Return a list of the lines in the bytes, breaking at line boundaries.
2299
2300Line breaks are not included in the resulting list unless keepends is given and
2301true.
2302[clinic start generated code]*/
2303
2304static PyObject *
2305bytes_splitlines_impl(PyBytesObject *self, int keepends)
2306/*[clinic end generated code: output=3484149a5d880ffb input=7f4aac67144f9944]*/
2307{
2308    return stringlib_splitlines(
2309        (PyObject*) self, PyBytes_AS_STRING(self),
2310        PyBytes_GET_SIZE(self), keepends
2311        );
2312}
2313
2314/*[clinic input]
2315@classmethod
2316bytes.fromhex
2317
2318    string: unicode
2319    /
2320
2321Create a bytes object from a string of hexadecimal numbers.
2322
2323Spaces between two numbers are accepted.
2324Example: bytes.fromhex('B9 01EF') -> b'\\xb9\\x01\\xef'.
2325[clinic start generated code]*/
2326
2327static PyObject *
2328bytes_fromhex_impl(PyTypeObject *type, PyObject *string)
2329/*[clinic end generated code: output=0973acc63661bb2e input=bf4d1c361670acd3]*/
2330{
2331    PyObject *result = _PyBytes_FromHex(string, 0);
2332    if (type != &PyBytes_Type && result != NULL) {
2333        Py_SETREF(result, PyObject_CallFunctionObjArgs((PyObject *)type,
2334                                                       result, NULL));
2335    }
2336    return result;
2337}
2338
2339PyObject*
2340_PyBytes_FromHex(PyObject *string, int use_bytearray)
2341{
2342    char *buf;
2343    Py_ssize_t hexlen, invalid_char;
2344    unsigned int top, bot;
2345    Py_UCS1 *str, *end;
2346    _PyBytesWriter writer;
2347
2348    _PyBytesWriter_Init(&writer);
2349    writer.use_bytearray = use_bytearray;
2350
2351    assert(PyUnicode_Check(string));
2352    if (PyUnicode_READY(string))
2353        return NULL;
2354    hexlen = PyUnicode_GET_LENGTH(string);
2355
2356    if (!PyUnicode_IS_ASCII(string)) {
2357        void *data = PyUnicode_DATA(string);
2358        unsigned int kind = PyUnicode_KIND(string);
2359        Py_ssize_t i;
2360
2361        /* search for the first non-ASCII character */
2362        for (i = 0; i < hexlen; i++) {
2363            if (PyUnicode_READ(kind, data, i) >= 128)
2364                break;
2365        }
2366        invalid_char = i;
2367        goto error;
2368    }
2369
2370    assert(PyUnicode_KIND(string) == PyUnicode_1BYTE_KIND);
2371    str = PyUnicode_1BYTE_DATA(string);
2372
2373    /* This overestimates if there are spaces */
2374    buf = _PyBytesWriter_Alloc(&writer, hexlen / 2);
2375    if (buf == NULL)
2376        return NULL;
2377
2378    end = str + hexlen;
2379    while (str < end) {
2380        /* skip over spaces in the input */
2381        if (*str == ' ') {
2382            do {
2383                str++;
2384            } while (*str == ' ');
2385            if (str >= end)
2386                break;
2387        }
2388
2389        top = _PyLong_DigitValue[*str];
2390        if (top >= 16) {
2391            invalid_char = str - PyUnicode_1BYTE_DATA(string);
2392            goto error;
2393        }
2394        str++;
2395
2396        bot = _PyLong_DigitValue[*str];
2397        if (bot >= 16) {
2398            invalid_char = str - PyUnicode_1BYTE_DATA(string);
2399            goto error;
2400        }
2401        str++;
2402
2403        *buf++ = (unsigned char)((top << 4) + bot);
2404    }
2405
2406    return _PyBytesWriter_Finish(&writer, buf);
2407
2408  error:
2409    PyErr_Format(PyExc_ValueError,
2410                 "non-hexadecimal number found in "
2411                 "fromhex() arg at position %zd", invalid_char);
2412    _PyBytesWriter_Dealloc(&writer);
2413    return NULL;
2414}
2415
2416PyDoc_STRVAR(hex__doc__,
2417"B.hex() -> string\n\
2418\n\
2419Create a string of hexadecimal numbers from a bytes object.\n\
2420Example: b'\\xb9\\x01\\xef'.hex() -> 'b901ef'.");
2421
2422static PyObject *
2423bytes_hex(PyBytesObject *self)
2424{
2425    char* argbuf = PyBytes_AS_STRING(self);
2426    Py_ssize_t arglen = PyBytes_GET_SIZE(self);
2427    return _Py_strhex(argbuf, arglen);
2428}
2429
2430static PyObject *
2431bytes_getnewargs(PyBytesObject *v)
2432{
2433    return Py_BuildValue("(y#)", v->ob_sval, Py_SIZE(v));
2434}
2435
2436
2437static PyMethodDef
2438bytes_methods[] = {
2439    {"__getnewargs__",          (PyCFunction)bytes_getnewargs,  METH_NOARGS},
2440    {"capitalize", (PyCFunction)stringlib_capitalize, METH_NOARGS,
2441     _Py_capitalize__doc__},
2442    {"center", (PyCFunction)stringlib_center, METH_VARARGS,
2443     _Py_center__doc__},
2444    {"count", (PyCFunction)bytes_count, METH_VARARGS,
2445     _Py_count__doc__},
2446    BYTES_DECODE_METHODDEF
2447    {"endswith", (PyCFunction)bytes_endswith, METH_VARARGS,
2448     _Py_endswith__doc__},
2449    {"expandtabs", (PyCFunction)stringlib_expandtabs, METH_VARARGS | METH_KEYWORDS,
2450     _Py_expandtabs__doc__},
2451    {"find", (PyCFunction)bytes_find, METH_VARARGS,
2452     _Py_find__doc__},
2453    BYTES_FROMHEX_METHODDEF
2454    {"hex", (PyCFunction)bytes_hex, METH_NOARGS, hex__doc__},
2455    {"index", (PyCFunction)bytes_index, METH_VARARGS, _Py_index__doc__},
2456    {"isalnum", (PyCFunction)stringlib_isalnum, METH_NOARGS,
2457     _Py_isalnum__doc__},
2458    {"isalpha", (PyCFunction)stringlib_isalpha, METH_NOARGS,
2459     _Py_isalpha__doc__},
2460    {"isdigit", (PyCFunction)stringlib_isdigit, METH_NOARGS,
2461     _Py_isdigit__doc__},
2462    {"islower", (PyCFunction)stringlib_islower, METH_NOARGS,
2463     _Py_islower__doc__},
2464    {"isspace", (PyCFunction)stringlib_isspace, METH_NOARGS,
2465     _Py_isspace__doc__},
2466    {"istitle", (PyCFunction)stringlib_istitle, METH_NOARGS,
2467     _Py_istitle__doc__},
2468    {"isupper", (PyCFunction)stringlib_isupper, METH_NOARGS,
2469     _Py_isupper__doc__},
2470    BYTES_JOIN_METHODDEF
2471    {"ljust", (PyCFunction)stringlib_ljust, METH_VARARGS, _Py_ljust__doc__},
2472    {"lower", (PyCFunction)stringlib_lower, METH_NOARGS, _Py_lower__doc__},
2473    BYTES_LSTRIP_METHODDEF
2474    BYTES_MAKETRANS_METHODDEF
2475    BYTES_PARTITION_METHODDEF
2476    BYTES_REPLACE_METHODDEF
2477    {"rfind", (PyCFunction)bytes_rfind, METH_VARARGS, _Py_rfind__doc__},
2478    {"rindex", (PyCFunction)bytes_rindex, METH_VARARGS, _Py_rindex__doc__},
2479    {"rjust", (PyCFunction)stringlib_rjust, METH_VARARGS, _Py_rjust__doc__},
2480    BYTES_RPARTITION_METHODDEF
2481    BYTES_RSPLIT_METHODDEF
2482    BYTES_RSTRIP_METHODDEF
2483    BYTES_SPLIT_METHODDEF
2484    BYTES_SPLITLINES_METHODDEF
2485    {"startswith", (PyCFunction)bytes_startswith, METH_VARARGS,
2486     _Py_startswith__doc__},
2487    BYTES_STRIP_METHODDEF
2488    {"swapcase", (PyCFunction)stringlib_swapcase, METH_NOARGS,
2489     _Py_swapcase__doc__},
2490    {"title", (PyCFunction)stringlib_title, METH_NOARGS, _Py_title__doc__},
2491    BYTES_TRANSLATE_METHODDEF
2492    {"upper", (PyCFunction)stringlib_upper, METH_NOARGS, _Py_upper__doc__},
2493    {"zfill", (PyCFunction)stringlib_zfill, METH_VARARGS, _Py_zfill__doc__},
2494    {NULL,     NULL}                         /* sentinel */
2495};
2496
2497static PyObject *
2498bytes_mod(PyObject *self, PyObject *arg)
2499{
2500    if (!PyBytes_Check(self)) {
2501        Py_RETURN_NOTIMPLEMENTED;
2502    }
2503    return _PyBytes_FormatEx(PyBytes_AS_STRING(self), PyBytes_GET_SIZE(self),
2504                             arg, 0);
2505}
2506
2507static PyNumberMethods bytes_as_number = {
2508    0,              /*nb_add*/
2509    0,              /*nb_subtract*/
2510    0,              /*nb_multiply*/
2511    bytes_mod,      /*nb_remainder*/
2512};
2513
2514static PyObject *
2515bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds);
2516
2517static PyObject *
2518bytes_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2519{
2520    PyObject *x = NULL;
2521    const char *encoding = NULL;
2522    const char *errors = NULL;
2523    PyObject *new = NULL;
2524    PyObject *func;
2525    Py_ssize_t size;
2526    static char *kwlist[] = {"source", "encoding", "errors", 0};
2527    _Py_IDENTIFIER(__bytes__);
2528
2529    if (type != &PyBytes_Type)
2530        return bytes_subtype_new(type, args, kwds);
2531    if (!PyArg_ParseTupleAndKeywords(args, kwds, "|Oss:bytes", kwlist, &x,
2532                                     &encoding, &errors))
2533        return NULL;
2534    if (x == NULL) {
2535        if (encoding != NULL || errors != NULL) {
2536            PyErr_SetString(PyExc_TypeError,
2537                            "encoding or errors without sequence "
2538                            "argument");
2539            return NULL;
2540        }
2541        return PyBytes_FromStringAndSize(NULL, 0);
2542    }
2543
2544    if (encoding != NULL) {
2545        /* Encode via the codec registry */
2546        if (!PyUnicode_Check(x)) {
2547            PyErr_SetString(PyExc_TypeError,
2548                            "encoding without a string argument");
2549            return NULL;
2550        }
2551        new = PyUnicode_AsEncodedString(x, encoding, errors);
2552        if (new == NULL)
2553            return NULL;
2554        assert(PyBytes_Check(new));
2555        return new;
2556    }
2557
2558    if (errors != NULL) {
2559        PyErr_SetString(PyExc_TypeError,
2560                        PyUnicode_Check(x) ?
2561                        "string argument without an encoding" :
2562                        "errors without a string argument");
2563        return NULL;
2564    }
2565
2566    /* We'd like to call PyObject_Bytes here, but we need to check for an
2567       integer argument before deferring to PyBytes_FromObject, something
2568       PyObject_Bytes doesn't do. */
2569    func = _PyObject_LookupSpecial(x, &PyId___bytes__);
2570    if (func != NULL) {
2571        new = PyObject_CallFunctionObjArgs(func, NULL);
2572        Py_DECREF(func);
2573        if (new == NULL)
2574            return NULL;
2575        if (!PyBytes_Check(new)) {
2576            PyErr_Format(PyExc_TypeError,
2577                         "__bytes__ returned non-bytes (type %.200s)",
2578                         Py_TYPE(new)->tp_name);
2579            Py_DECREF(new);
2580            return NULL;
2581        }
2582        return new;
2583    }
2584    else if (PyErr_Occurred())
2585        return NULL;
2586
2587    if (PyUnicode_Check(x)) {
2588        PyErr_SetString(PyExc_TypeError,
2589                        "string argument without an encoding");
2590        return NULL;
2591    }
2592    /* Is it an integer? */
2593    if (PyIndex_Check(x)) {
2594        size = PyNumber_AsSsize_t(x, PyExc_OverflowError);
2595        if (size == -1 && PyErr_Occurred()) {
2596            if (PyErr_ExceptionMatches(PyExc_OverflowError))
2597                return NULL;
2598            PyErr_Clear();  /* fall through */
2599        }
2600        else {
2601            if (size < 0) {
2602                PyErr_SetString(PyExc_ValueError, "negative count");
2603                return NULL;
2604            }
2605            new = _PyBytes_FromSize(size, 1);
2606            if (new == NULL)
2607                return NULL;
2608            return new;
2609        }
2610    }
2611
2612    return PyBytes_FromObject(x);
2613}
2614
2615static PyObject*
2616_PyBytes_FromBuffer(PyObject *x)
2617{
2618    PyObject *new;
2619    Py_buffer view;
2620
2621    if (PyObject_GetBuffer(x, &view, PyBUF_FULL_RO) < 0)
2622        return NULL;
2623
2624    new = PyBytes_FromStringAndSize(NULL, view.len);
2625    if (!new)
2626        goto fail;
2627    if (PyBuffer_ToContiguous(((PyBytesObject *)new)->ob_sval,
2628                &view, view.len, 'C') < 0)
2629        goto fail;
2630    PyBuffer_Release(&view);
2631    return new;
2632
2633fail:
2634    Py_XDECREF(new);
2635    PyBuffer_Release(&view);
2636    return NULL;
2637}
2638
2639#define _PyBytes_FROM_LIST_BODY(x, GET_ITEM)                                \
2640    do {                                                                    \
2641        PyObject *bytes;                                                    \
2642        Py_ssize_t i;                                                       \
2643        Py_ssize_t value;                                                   \
2644        char *str;                                                          \
2645        PyObject *item;                                                     \
2646                                                                            \
2647        bytes = PyBytes_FromStringAndSize(NULL, Py_SIZE(x));                \
2648        if (bytes == NULL)                                                  \
2649            return NULL;                                                    \
2650        str = ((PyBytesObject *)bytes)->ob_sval;                            \
2651                                                                            \
2652        for (i = 0; i < Py_SIZE(x); i++) {                                  \
2653            item = GET_ITEM((x), i);                                        \
2654            value = PyNumber_AsSsize_t(item, NULL);                         \
2655            if (value == -1 && PyErr_Occurred())                            \
2656                goto error;                                                 \
2657                                                                            \
2658            if (value < 0 || value >= 256) {                                \
2659                PyErr_SetString(PyExc_ValueError,                           \
2660                                "bytes must be in range(0, 256)");          \
2661                goto error;                                                 \
2662            }                                                               \
2663            *str++ = (char) value;                                          \
2664        }                                                                   \
2665        return bytes;                                                       \
2666                                                                            \
2667    error:                                                                  \
2668        Py_DECREF(bytes);                                                   \
2669        return NULL;                                                        \
2670    } while (0)
2671
2672static PyObject*
2673_PyBytes_FromList(PyObject *x)
2674{
2675    _PyBytes_FROM_LIST_BODY(x, PyList_GET_ITEM);
2676}
2677
2678static PyObject*
2679_PyBytes_FromTuple(PyObject *x)
2680{
2681    _PyBytes_FROM_LIST_BODY(x, PyTuple_GET_ITEM);
2682}
2683
2684static PyObject *
2685_PyBytes_FromIterator(PyObject *it, PyObject *x)
2686{
2687    char *str;
2688    Py_ssize_t i, size;
2689    _PyBytesWriter writer;
2690
2691    /* For iterator version, create a string object and resize as needed */
2692    size = PyObject_LengthHint(x, 64);
2693    if (size == -1 && PyErr_Occurred())
2694        return NULL;
2695
2696    _PyBytesWriter_Init(&writer);
2697    str = _PyBytesWriter_Alloc(&writer, size);
2698    if (str == NULL)
2699        return NULL;
2700    writer.overallocate = 1;
2701    size = writer.allocated;
2702
2703    /* Run the iterator to exhaustion */
2704    for (i = 0; ; i++) {
2705        PyObject *item;
2706        Py_ssize_t value;
2707
2708        /* Get the next item */
2709        item = PyIter_Next(it);
2710        if (item == NULL) {
2711            if (PyErr_Occurred())
2712                goto error;
2713            break;
2714        }
2715
2716        /* Interpret it as an int (__index__) */
2717        value = PyNumber_AsSsize_t(item, NULL);
2718        Py_DECREF(item);
2719        if (value == -1 && PyErr_Occurred())
2720            goto error;
2721
2722        /* Range check */
2723        if (value < 0 || value >= 256) {
2724            PyErr_SetString(PyExc_ValueError,
2725                            "bytes must be in range(0, 256)");
2726            goto error;
2727        }
2728
2729        /* Append the byte */
2730        if (i >= size) {
2731            str = _PyBytesWriter_Resize(&writer, str, size+1);
2732            if (str == NULL)
2733                return NULL;
2734            size = writer.allocated;
2735        }
2736        *str++ = (char) value;
2737    }
2738
2739    return _PyBytesWriter_Finish(&writer, str);
2740
2741  error:
2742    _PyBytesWriter_Dealloc(&writer);
2743    return NULL;
2744}
2745
2746PyObject *
2747PyBytes_FromObject(PyObject *x)
2748{
2749    PyObject *it, *result;
2750
2751    if (x == NULL) {
2752        PyErr_BadInternalCall();
2753        return NULL;
2754    }
2755
2756    if (PyBytes_CheckExact(x)) {
2757        Py_INCREF(x);
2758        return x;
2759    }
2760
2761    /* Use the modern buffer interface */
2762    if (PyObject_CheckBuffer(x))
2763        return _PyBytes_FromBuffer(x);
2764
2765    if (PyList_CheckExact(x))
2766        return _PyBytes_FromList(x);
2767
2768    if (PyTuple_CheckExact(x))
2769        return _PyBytes_FromTuple(x);
2770
2771    if (!PyUnicode_Check(x)) {
2772        it = PyObject_GetIter(x);
2773        if (it != NULL) {
2774            result = _PyBytes_FromIterator(it, x);
2775            Py_DECREF(it);
2776            return result;
2777        }
2778    }
2779
2780    PyErr_Format(PyExc_TypeError,
2781                 "cannot convert '%.200s' object to bytes",
2782                 x->ob_type->tp_name);
2783    return NULL;
2784}
2785
2786static PyObject *
2787bytes_subtype_new(PyTypeObject *type, PyObject *args, PyObject *kwds)
2788{
2789    PyObject *tmp, *pnew;
2790    Py_ssize_t n;
2791
2792    assert(PyType_IsSubtype(type, &PyBytes_Type));
2793    tmp = bytes_new(&PyBytes_Type, args, kwds);
2794    if (tmp == NULL)
2795        return NULL;
2796    assert(PyBytes_Check(tmp));
2797    n = PyBytes_GET_SIZE(tmp);
2798    pnew = type->tp_alloc(type, n);
2799    if (pnew != NULL) {
2800        memcpy(PyBytes_AS_STRING(pnew),
2801                  PyBytes_AS_STRING(tmp), n+1);
2802        ((PyBytesObject *)pnew)->ob_shash =
2803            ((PyBytesObject *)tmp)->ob_shash;
2804    }
2805    Py_DECREF(tmp);
2806    return pnew;
2807}
2808
2809PyDoc_STRVAR(bytes_doc,
2810"bytes(iterable_of_ints) -> bytes\n\
2811bytes(string, encoding[, errors]) -> bytes\n\
2812bytes(bytes_or_buffer) -> immutable copy of bytes_or_buffer\n\
2813bytes(int) -> bytes object of size given by the parameter initialized with null bytes\n\
2814bytes() -> empty bytes object\n\
2815\n\
2816Construct an immutable array of bytes from:\n\
2817  - an iterable yielding integers in range(256)\n\
2818  - a text string encoded using the specified encoding\n\
2819  - any object implementing the buffer API.\n\
2820  - an integer");
2821
2822static PyObject *bytes_iter(PyObject *seq);
2823
2824PyTypeObject PyBytes_Type = {
2825    PyVarObject_HEAD_INIT(&PyType_Type, 0)
2826    "bytes",
2827    PyBytesObject_SIZE,
2828    sizeof(char),
2829    bytes_dealloc,                      /* tp_dealloc */
2830    0,                                          /* tp_print */
2831    0,                                          /* tp_getattr */
2832    0,                                          /* tp_setattr */
2833    0,                                          /* tp_reserved */
2834    (reprfunc)bytes_repr,                       /* tp_repr */
2835    &bytes_as_number,                           /* tp_as_number */
2836    &bytes_as_sequence,                         /* tp_as_sequence */
2837    &bytes_as_mapping,                          /* tp_as_mapping */
2838    (hashfunc)bytes_hash,                       /* tp_hash */
2839    0,                                          /* tp_call */
2840    bytes_str,                                  /* tp_str */
2841    PyObject_GenericGetAttr,                    /* tp_getattro */
2842    0,                                          /* tp_setattro */
2843    &bytes_as_buffer,                           /* tp_as_buffer */
2844    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_BASETYPE |
2845        Py_TPFLAGS_BYTES_SUBCLASS,              /* tp_flags */
2846    bytes_doc,                                  /* tp_doc */
2847    0,                                          /* tp_traverse */
2848    0,                                          /* tp_clear */
2849    (richcmpfunc)bytes_richcompare,             /* tp_richcompare */
2850    0,                                          /* tp_weaklistoffset */
2851    bytes_iter,                                 /* tp_iter */
2852    0,                                          /* tp_iternext */
2853    bytes_methods,                              /* tp_methods */
2854    0,                                          /* tp_members */
2855    0,                                          /* tp_getset */
2856    &PyBaseObject_Type,                         /* tp_base */
2857    0,                                          /* tp_dict */
2858    0,                                          /* tp_descr_get */
2859    0,                                          /* tp_descr_set */
2860    0,                                          /* tp_dictoffset */
2861    0,                                          /* tp_init */
2862    0,                                          /* tp_alloc */
2863    bytes_new,                                  /* tp_new */
2864    PyObject_Del,                               /* tp_free */
2865};
2866
2867void
2868PyBytes_Concat(PyObject **pv, PyObject *w)
2869{
2870    assert(pv != NULL);
2871    if (*pv == NULL)
2872        return;
2873    if (w == NULL) {
2874        Py_CLEAR(*pv);
2875        return;
2876    }
2877
2878    if (Py_REFCNT(*pv) == 1 && PyBytes_CheckExact(*pv)) {
2879        /* Only one reference, so we can resize in place */
2880        Py_ssize_t oldsize;
2881        Py_buffer wb;
2882
2883        wb.len = -1;
2884        if (PyObject_GetBuffer(w, &wb, PyBUF_SIMPLE) != 0) {
2885            PyErr_Format(PyExc_TypeError, "can't concat %.100s to %.100s",
2886                         Py_TYPE(w)->tp_name, Py_TYPE(*pv)->tp_name);
2887            Py_CLEAR(*pv);
2888            return;
2889        }
2890
2891        oldsize = PyBytes_GET_SIZE(*pv);
2892        if (oldsize > PY_SSIZE_T_MAX - wb.len) {
2893            PyErr_NoMemory();
2894            goto error;
2895        }
2896        if (_PyBytes_Resize(pv, oldsize + wb.len) < 0)
2897            goto error;
2898
2899        memcpy(PyBytes_AS_STRING(*pv) + oldsize, wb.buf, wb.len);
2900        PyBuffer_Release(&wb);
2901        return;
2902
2903      error:
2904        PyBuffer_Release(&wb);
2905        Py_CLEAR(*pv);
2906        return;
2907    }
2908
2909    else {
2910        /* Multiple references, need to create new object */
2911        PyObject *v;
2912        v = bytes_concat(*pv, w);
2913        Py_SETREF(*pv, v);
2914    }
2915}
2916
2917void
2918PyBytes_ConcatAndDel(PyObject **pv, PyObject *w)
2919{
2920    PyBytes_Concat(pv, w);
2921    Py_XDECREF(w);
2922}
2923
2924
2925/* The following function breaks the notion that bytes are immutable:
2926   it changes the size of a bytes object.  We get away with this only if there
2927   is only one module referencing the object.  You can also think of it
2928   as creating a new bytes object and destroying the old one, only
2929   more efficiently.  In any case, don't use this if the bytes object may
2930   already be known to some other part of the code...
2931   Note that if there's not enough memory to resize the bytes object, the
2932   original bytes object at *pv is deallocated, *pv is set to NULL, an "out of
2933   memory" exception is set, and -1 is returned.  Else (on success) 0 is
2934   returned, and the value in *pv may or may not be the same as on input.
2935   As always, an extra byte is allocated for a trailing \0 byte (newsize
2936   does *not* include that), and a trailing \0 byte is stored.
2937*/
2938
2939int
2940_PyBytes_Resize(PyObject **pv, Py_ssize_t newsize)
2941{
2942    PyObject *v;
2943    PyBytesObject *sv;
2944    v = *pv;
2945    if (!PyBytes_Check(v) || newsize < 0) {
2946        goto error;
2947    }
2948    if (Py_SIZE(v) == newsize) {
2949        /* return early if newsize equals to v->ob_size */
2950        return 0;
2951    }
2952    if (Py_REFCNT(v) != 1) {
2953        goto error;
2954    }
2955    /* XXX UNREF/NEWREF interface should be more symmetrical */
2956    _Py_DEC_REFTOTAL;
2957    _Py_ForgetReference(v);
2958    *pv = (PyObject *)
2959        PyObject_REALLOC(v, PyBytesObject_SIZE + newsize);
2960    if (*pv == NULL) {
2961        PyObject_Del(v);
2962        PyErr_NoMemory();
2963        return -1;
2964    }
2965    _Py_NewReference(*pv);
2966    sv = (PyBytesObject *) *pv;
2967    Py_SIZE(sv) = newsize;
2968    sv->ob_sval[newsize] = '\0';
2969    sv->ob_shash = -1;          /* invalidate cached hash value */
2970    return 0;
2971error:
2972    *pv = 0;
2973    Py_DECREF(v);
2974    PyErr_BadInternalCall();
2975    return -1;
2976}
2977
2978void
2979PyBytes_Fini(void)
2980{
2981    int i;
2982    for (i = 0; i < UCHAR_MAX + 1; i++)
2983        Py_CLEAR(characters[i]);
2984    Py_CLEAR(nullstring);
2985}
2986
2987/*********************** Bytes Iterator ****************************/
2988
2989typedef struct {
2990    PyObject_HEAD
2991    Py_ssize_t it_index;
2992    PyBytesObject *it_seq; /* Set to NULL when iterator is exhausted */
2993} striterobject;
2994
2995static void
2996striter_dealloc(striterobject *it)
2997{
2998    _PyObject_GC_UNTRACK(it);
2999    Py_XDECREF(it->it_seq);
3000    PyObject_GC_Del(it);
3001}
3002
3003static int
3004striter_traverse(striterobject *it, visitproc visit, void *arg)
3005{
3006    Py_VISIT(it->it_seq);
3007    return 0;
3008}
3009
3010static PyObject *
3011striter_next(striterobject *it)
3012{
3013    PyBytesObject *seq;
3014    PyObject *item;
3015
3016    assert(it != NULL);
3017    seq = it->it_seq;
3018    if (seq == NULL)
3019        return NULL;
3020    assert(PyBytes_Check(seq));
3021
3022    if (it->it_index < PyBytes_GET_SIZE(seq)) {
3023        item = PyLong_FromLong(
3024            (unsigned char)seq->ob_sval[it->it_index]);
3025        if (item != NULL)
3026            ++it->it_index;
3027        return item;
3028    }
3029
3030    it->it_seq = NULL;
3031    Py_DECREF(seq);
3032    return NULL;
3033}
3034
3035static PyObject *
3036striter_len(striterobject *it)
3037{
3038    Py_ssize_t len = 0;
3039    if (it->it_seq)
3040        len = PyBytes_GET_SIZE(it->it_seq) - it->it_index;
3041    return PyLong_FromSsize_t(len);
3042}
3043
3044PyDoc_STRVAR(length_hint_doc,
3045             "Private method returning an estimate of len(list(it)).");
3046
3047static PyObject *
3048striter_reduce(striterobject *it)
3049{
3050    if (it->it_seq != NULL) {
3051        return Py_BuildValue("N(O)n", _PyObject_GetBuiltin("iter"),
3052                             it->it_seq, it->it_index);
3053    } else {
3054        PyObject *u = PyUnicode_FromUnicode(NULL, 0);
3055        if (u == NULL)
3056            return NULL;
3057        return Py_BuildValue("N(N)", _PyObject_GetBuiltin("iter"), u);
3058    }
3059}
3060
3061PyDoc_STRVAR(reduce_doc, "Return state information for pickling.");
3062
3063static PyObject *
3064striter_setstate(striterobject *it, PyObject *state)
3065{
3066    Py_ssize_t index = PyLong_AsSsize_t(state);
3067    if (index == -1 && PyErr_Occurred())
3068        return NULL;
3069    if (it->it_seq != NULL) {
3070        if (index < 0)
3071            index = 0;
3072        else if (index > PyBytes_GET_SIZE(it->it_seq))
3073            index = PyBytes_GET_SIZE(it->it_seq); /* iterator exhausted */
3074        it->it_index = index;
3075    }
3076    Py_RETURN_NONE;
3077}
3078
3079PyDoc_STRVAR(setstate_doc, "Set state information for unpickling.");
3080
3081static PyMethodDef striter_methods[] = {
3082    {"__length_hint__", (PyCFunction)striter_len, METH_NOARGS,
3083     length_hint_doc},
3084    {"__reduce__",      (PyCFunction)striter_reduce, METH_NOARGS,
3085     reduce_doc},
3086    {"__setstate__",    (PyCFunction)striter_setstate, METH_O,
3087     setstate_doc},
3088    {NULL,              NULL}           /* sentinel */
3089};
3090
3091PyTypeObject PyBytesIter_Type = {
3092    PyVarObject_HEAD_INIT(&PyType_Type, 0)
3093    "bytes_iterator",                           /* tp_name */
3094    sizeof(striterobject),                      /* tp_basicsize */
3095    0,                                          /* tp_itemsize */
3096    /* methods */
3097    (destructor)striter_dealloc,                /* tp_dealloc */
3098    0,                                          /* tp_print */
3099    0,                                          /* tp_getattr */
3100    0,                                          /* tp_setattr */
3101    0,                                          /* tp_reserved */
3102    0,                                          /* tp_repr */
3103    0,                                          /* tp_as_number */
3104    0,                                          /* tp_as_sequence */
3105    0,                                          /* tp_as_mapping */
3106    0,                                          /* tp_hash */
3107    0,                                          /* tp_call */
3108    0,                                          /* tp_str */
3109    PyObject_GenericGetAttr,                    /* tp_getattro */
3110    0,                                          /* tp_setattro */
3111    0,                                          /* tp_as_buffer */
3112    Py_TPFLAGS_DEFAULT | Py_TPFLAGS_HAVE_GC,/* tp_flags */
3113    0,                                          /* tp_doc */
3114    (traverseproc)striter_traverse,     /* tp_traverse */
3115    0,                                          /* tp_clear */
3116    0,                                          /* tp_richcompare */
3117    0,                                          /* tp_weaklistoffset */
3118    PyObject_SelfIter,                          /* tp_iter */
3119    (iternextfunc)striter_next,                 /* tp_iternext */
3120    striter_methods,                            /* tp_methods */
3121    0,
3122};
3123
3124static PyObject *
3125bytes_iter(PyObject *seq)
3126{
3127    striterobject *it;
3128
3129    if (!PyBytes_Check(seq)) {
3130        PyErr_BadInternalCall();
3131        return NULL;
3132    }
3133    it = PyObject_GC_New(striterobject, &PyBytesIter_Type);
3134    if (it == NULL)
3135        return NULL;
3136    it->it_index = 0;
3137    Py_INCREF(seq);
3138    it->it_seq = (PyBytesObject *)seq;
3139    _PyObject_GC_TRACK(it);
3140    return (PyObject *)it;
3141}
3142
3143
3144/* _PyBytesWriter API */
3145
3146#ifdef MS_WINDOWS
3147   /* On Windows, overallocate by 50% is the best factor */
3148#  define OVERALLOCATE_FACTOR 2
3149#else
3150   /* On Linux, overallocate by 25% is the best factor */
3151#  define OVERALLOCATE_FACTOR 4
3152#endif
3153
3154void
3155_PyBytesWriter_Init(_PyBytesWriter *writer)
3156{
3157    /* Set all attributes before small_buffer to 0 */
3158    memset(writer, 0, offsetof(_PyBytesWriter, small_buffer));
3159#ifdef Py_DEBUG
3160    memset(writer->small_buffer, 0xCB, sizeof(writer->small_buffer));
3161#endif
3162}
3163
3164void
3165_PyBytesWriter_Dealloc(_PyBytesWriter *writer)
3166{
3167    Py_CLEAR(writer->buffer);
3168}
3169
3170Py_LOCAL_INLINE(char*)
3171_PyBytesWriter_AsString(_PyBytesWriter *writer)
3172{
3173    if (writer->use_small_buffer) {
3174        assert(writer->buffer == NULL);
3175        return writer->small_buffer;
3176    }
3177    else if (writer->use_bytearray) {
3178        assert(writer->buffer != NULL);
3179        return PyByteArray_AS_STRING(writer->buffer);
3180    }
3181    else {
3182        assert(writer->buffer != NULL);
3183        return PyBytes_AS_STRING(writer->buffer);
3184    }
3185}
3186
3187Py_LOCAL_INLINE(Py_ssize_t)
3188_PyBytesWriter_GetSize(_PyBytesWriter *writer, char *str)
3189{
3190    char *start = _PyBytesWriter_AsString(writer);
3191    assert(str != NULL);
3192    assert(str >= start);
3193    assert(str - start <= writer->allocated);
3194    return str - start;
3195}
3196
3197Py_LOCAL_INLINE(void)
3198_PyBytesWriter_CheckConsistency(_PyBytesWriter *writer, char *str)
3199{
3200#ifdef Py_DEBUG
3201    char *start, *end;
3202
3203    if (writer->use_small_buffer) {
3204        assert(writer->buffer == NULL);
3205    }
3206    else {
3207        assert(writer->buffer != NULL);
3208        if (writer->use_bytearray)
3209            assert(PyByteArray_CheckExact(writer->buffer));
3210        else
3211            assert(PyBytes_CheckExact(writer->buffer));
3212        assert(Py_REFCNT(writer->buffer) == 1);
3213    }
3214
3215    if (writer->use_bytearray) {
3216        /* bytearray has its own overallocation algorithm,
3217           writer overallocation must be disabled */
3218        assert(!writer->overallocate);
3219    }
3220
3221    assert(0 <= writer->allocated);
3222    assert(0 <= writer->min_size && writer->min_size <= writer->allocated);
3223    /* the last byte must always be null */
3224    start = _PyBytesWriter_AsString(writer);
3225    assert(start[writer->allocated] == 0);
3226
3227    end = start + writer->allocated;
3228    assert(str != NULL);
3229    assert(start <= str && str <= end);
3230#endif
3231}
3232
3233void*
3234_PyBytesWriter_Resize(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3235{
3236    Py_ssize_t allocated, pos;
3237
3238    _PyBytesWriter_CheckConsistency(writer, str);
3239    assert(writer->allocated < size);
3240
3241    allocated = size;
3242    if (writer->overallocate
3243        && allocated <= (PY_SSIZE_T_MAX - allocated / OVERALLOCATE_FACTOR)) {
3244        /* overallocate to limit the number of realloc() */
3245        allocated += allocated / OVERALLOCATE_FACTOR;
3246    }
3247
3248    pos = _PyBytesWriter_GetSize(writer, str);
3249    if (!writer->use_small_buffer) {
3250        if (writer->use_bytearray) {
3251            if (PyByteArray_Resize(writer->buffer, allocated))
3252                goto error;
3253            /* writer->allocated can be smaller than writer->buffer->ob_alloc,
3254               but we cannot use ob_alloc because bytes may need to be moved
3255               to use the whole buffer. bytearray uses an internal optimization
3256               to avoid moving or copying bytes when bytes are removed at the
3257               beginning (ex: del bytearray[:1]). */
3258        }
3259        else {
3260            if (_PyBytes_Resize(&writer->buffer, allocated))
3261                goto error;
3262        }
3263    }
3264    else {
3265        /* convert from stack buffer to bytes object buffer */
3266        assert(writer->buffer == NULL);
3267
3268        if (writer->use_bytearray)
3269            writer->buffer = PyByteArray_FromStringAndSize(NULL, allocated);
3270        else
3271            writer->buffer = PyBytes_FromStringAndSize(NULL, allocated);
3272        if (writer->buffer == NULL)
3273            goto error;
3274
3275        if (pos != 0) {
3276            char *dest;
3277            if (writer->use_bytearray)
3278                dest = PyByteArray_AS_STRING(writer->buffer);
3279            else
3280                dest = PyBytes_AS_STRING(writer->buffer);
3281            memcpy(dest,
3282                      writer->small_buffer,
3283                      pos);
3284        }
3285
3286        writer->use_small_buffer = 0;
3287#ifdef Py_DEBUG
3288        memset(writer->small_buffer, 0xDB, sizeof(writer->small_buffer));
3289#endif
3290    }
3291    writer->allocated = allocated;
3292
3293    str = _PyBytesWriter_AsString(writer) + pos;
3294    _PyBytesWriter_CheckConsistency(writer, str);
3295    return str;
3296
3297error:
3298    _PyBytesWriter_Dealloc(writer);
3299    return NULL;
3300}
3301
3302void*
3303_PyBytesWriter_Prepare(_PyBytesWriter *writer, void *str, Py_ssize_t size)
3304{
3305    Py_ssize_t new_min_size;
3306
3307    _PyBytesWriter_CheckConsistency(writer, str);
3308    assert(size >= 0);
3309
3310    if (size == 0) {
3311        /* nothing to do */
3312        return str;
3313    }
3314
3315    if (writer->min_size > PY_SSIZE_T_MAX - size) {
3316        PyErr_NoMemory();
3317        _PyBytesWriter_Dealloc(writer);
3318        return NULL;
3319    }
3320    new_min_size = writer->min_size + size;
3321
3322    if (new_min_size > writer->allocated)
3323        str = _PyBytesWriter_Resize(writer, str, new_min_size);
3324
3325    writer->min_size = new_min_size;
3326    return str;
3327}
3328
3329/* Allocate the buffer to write size bytes.
3330   Return the pointer to the beginning of buffer data.
3331   Raise an exception and return NULL on error. */
3332void*
3333_PyBytesWriter_Alloc(_PyBytesWriter *writer, Py_ssize_t size)
3334{
3335    /* ensure that _PyBytesWriter_Alloc() is only called once */
3336    assert(writer->min_size == 0 && writer->buffer == NULL);
3337    assert(size >= 0);
3338
3339    writer->use_small_buffer = 1;
3340#ifdef Py_DEBUG
3341    writer->allocated = sizeof(writer->small_buffer) - 1;
3342    /* In debug mode, don't use the full small buffer because it is less
3343       efficient than bytes and bytearray objects to detect buffer underflow
3344       and buffer overflow. Use 10 bytes of the small buffer to test also
3345       code using the smaller buffer in debug mode.
3346
3347       Don't modify the _PyBytesWriter structure (use a shorter small buffer)
3348       in debug mode to also be able to detect stack overflow when running
3349       tests in debug mode. The _PyBytesWriter is large (more than 512 bytes),
3350       if Py_EnterRecursiveCall() is not used in deep C callback, we may hit a
3351       stack overflow. */
3352    writer->allocated = Py_MIN(writer->allocated, 10);
3353    /* _PyBytesWriter_CheckConsistency() requires the last byte to be 0,
3354       to detect buffer overflow */
3355    writer->small_buffer[writer->allocated] = 0;
3356#else
3357    writer->allocated = sizeof(writer->small_buffer);
3358#endif
3359    return _PyBytesWriter_Prepare(writer, writer->small_buffer, size);
3360}
3361
3362PyObject *
3363_PyBytesWriter_Finish(_PyBytesWriter *writer, void *str)
3364{
3365    Py_ssize_t size;
3366    PyObject *result;
3367
3368    _PyBytesWriter_CheckConsistency(writer, str);
3369
3370    size = _PyBytesWriter_GetSize(writer, str);
3371    if (size == 0 && !writer->use_bytearray) {
3372        Py_CLEAR(writer->buffer);
3373        /* Get the empty byte string singleton */
3374        result = PyBytes_FromStringAndSize(NULL, 0);
3375    }
3376    else if (writer->use_small_buffer) {
3377        if (writer->use_bytearray) {
3378            result = PyByteArray_FromStringAndSize(writer->small_buffer, size);
3379        }
3380        else {
3381            result = PyBytes_FromStringAndSize(writer->small_buffer, size);
3382        }
3383    }
3384    else {
3385        result = writer->buffer;
3386        writer->buffer = NULL;
3387
3388        if (size != writer->allocated) {
3389            if (writer->use_bytearray) {
3390                if (PyByteArray_Resize(result, size)) {
3391                    Py_DECREF(result);
3392                    return NULL;
3393                }
3394            }
3395            else {
3396                if (_PyBytes_Resize(&result, size)) {
3397                    assert(result == NULL);
3398                    return NULL;
3399                }
3400            }
3401        }
3402    }
3403    return result;
3404}
3405
3406void*
3407_PyBytesWriter_WriteBytes(_PyBytesWriter *writer, void *ptr,
3408                          const void *bytes, Py_ssize_t size)
3409{
3410    char *str = (char *)ptr;
3411
3412    str = _PyBytesWriter_Prepare(writer, str, size);
3413    if (str == NULL)
3414        return NULL;
3415
3416    memcpy(str, bytes, size);
3417    str += size;
3418
3419    return str;
3420}
3421