1d0825bca7fe65beaee391d30da42e937db621564Steve Block#include "Python.h"
2d0825bca7fe65beaee391d30da42e937db621564Steve Block#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
3d0825bca7fe65beaee391d30da42e937db621564Steve Blocktypedef int Py_ssize_t;
4d0825bca7fe65beaee391d30da42e937db621564Steve Block#define PY_SSIZE_T_MAX INT_MAX
5d0825bca7fe65beaee391d30da42e937db621564Steve Block#define PY_SSIZE_T_MIN INT_MIN
6d0825bca7fe65beaee391d30da42e937db621564Steve Block#endif
7d0825bca7fe65beaee391d30da42e937db621564Steve Block
8d0825bca7fe65beaee391d30da42e937db621564Steve Blockstatic Py_ssize_t
9d0825bca7fe65beaee391d30da42e937db621564Steve Blockascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars);
10d0825bca7fe65beaee391d30da42e937db621564Steve Blockstatic PyObject *
11d0825bca7fe65beaee391d30da42e937db621564Steve Blockascii_escape_unicode(PyObject *pystr);
12d0825bca7fe65beaee391d30da42e937db621564Steve Blockstatic PyObject *
13d0825bca7fe65beaee391d30da42e937db621564Steve Blockascii_escape_str(PyObject *pystr);
14d0825bca7fe65beaee391d30da42e937db621564Steve Blockstatic PyObject *
15d0825bca7fe65beaee391d30da42e937db621564Steve Blockpy_encode_basestring_ascii(PyObject* self __attribute__((__unused__)), PyObject *pystr);
16d0825bca7fe65beaee391d30da42e937db621564Steve Blockvoid init_speedups(void);
17d0825bca7fe65beaee391d30da42e937db621564Steve Block
18d0825bca7fe65beaee391d30da42e937db621564Steve Block#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '/' && c != '"')
19d0825bca7fe65beaee391d30da42e937db621564Steve Block
20d0825bca7fe65beaee391d30da42e937db621564Steve Block#define MIN_EXPANSION 6
21d0825bca7fe65beaee391d30da42e937db621564Steve Block#ifdef Py_UNICODE_WIDE
22d0825bca7fe65beaee391d30da42e937db621564Steve Block#define MAX_EXPANSION (2 * MIN_EXPANSION)
23d0825bca7fe65beaee391d30da42e937db621564Steve Block#else
24d0825bca7fe65beaee391d30da42e937db621564Steve Block#define MAX_EXPANSION MIN_EXPANSION
25d0825bca7fe65beaee391d30da42e937db621564Steve Block#endif
26d0825bca7fe65beaee391d30da42e937db621564Steve Block
27d0825bca7fe65beaee391d30da42e937db621564Steve Blockstatic Py_ssize_t
28d0825bca7fe65beaee391d30da42e937db621564Steve Blockascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars) {
29d0825bca7fe65beaee391d30da42e937db621564Steve Block    Py_UNICODE x;
30d0825bca7fe65beaee391d30da42e937db621564Steve Block    output[chars++] = '\\';
31d0825bca7fe65beaee391d30da42e937db621564Steve Block    switch (c) {
32d0825bca7fe65beaee391d30da42e937db621564Steve Block        case '/': output[chars++] = (char)c; break;
33d0825bca7fe65beaee391d30da42e937db621564Steve Block        case '\\': output[chars++] = (char)c; break;
34d0825bca7fe65beaee391d30da42e937db621564Steve Block        case '"': output[chars++] = (char)c; break;
35d0825bca7fe65beaee391d30da42e937db621564Steve Block        case '\b': output[chars++] = 'b'; break;
36d0825bca7fe65beaee391d30da42e937db621564Steve Block        case '\f': output[chars++] = 'f'; break;
37d0825bca7fe65beaee391d30da42e937db621564Steve Block        case '\n': output[chars++] = 'n'; break;
38d0825bca7fe65beaee391d30da42e937db621564Steve Block        case '\r': output[chars++] = 'r'; break;
39d0825bca7fe65beaee391d30da42e937db621564Steve Block        case '\t': output[chars++] = 't'; break;
40d0825bca7fe65beaee391d30da42e937db621564Steve Block        default:
41d0825bca7fe65beaee391d30da42e937db621564Steve Block#ifdef Py_UNICODE_WIDE
42d0825bca7fe65beaee391d30da42e937db621564Steve Block            if (c >= 0x10000) {
43d0825bca7fe65beaee391d30da42e937db621564Steve Block                /* UTF-16 surrogate pair */
44d0825bca7fe65beaee391d30da42e937db621564Steve Block                Py_UNICODE v = c - 0x10000;
45d0825bca7fe65beaee391d30da42e937db621564Steve Block                c = 0xd800 | ((v >> 10) & 0x3ff);
46d0825bca7fe65beaee391d30da42e937db621564Steve Block                output[chars++] = 'u';
47d0825bca7fe65beaee391d30da42e937db621564Steve Block                x = (c & 0xf000) >> 12;
48d0825bca7fe65beaee391d30da42e937db621564Steve Block                output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
49d0825bca7fe65beaee391d30da42e937db621564Steve Block                x = (c & 0x0f00) >> 8;
50d0825bca7fe65beaee391d30da42e937db621564Steve Block                output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
51d0825bca7fe65beaee391d30da42e937db621564Steve Block                x = (c & 0x00f0) >> 4;
52d0825bca7fe65beaee391d30da42e937db621564Steve Block                output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
53d0825bca7fe65beaee391d30da42e937db621564Steve Block                x = (c & 0x000f);
54d0825bca7fe65beaee391d30da42e937db621564Steve Block                output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
55d0825bca7fe65beaee391d30da42e937db621564Steve Block                c = 0xdc00 | (v & 0x3ff);
56d0825bca7fe65beaee391d30da42e937db621564Steve Block                output[chars++] = '\\';
57d0825bca7fe65beaee391d30da42e937db621564Steve Block            }
58d0825bca7fe65beaee391d30da42e937db621564Steve Block#endif
59d0825bca7fe65beaee391d30da42e937db621564Steve Block            output[chars++] = 'u';
60d0825bca7fe65beaee391d30da42e937db621564Steve Block            x = (c & 0xf000) >> 12;
61d0825bca7fe65beaee391d30da42e937db621564Steve Block            output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
62d0825bca7fe65beaee391d30da42e937db621564Steve Block            x = (c & 0x0f00) >> 8;
63d0825bca7fe65beaee391d30da42e937db621564Steve Block            output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
64d0825bca7fe65beaee391d30da42e937db621564Steve Block            x = (c & 0x00f0) >> 4;
65d0825bca7fe65beaee391d30da42e937db621564Steve Block            output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
66d0825bca7fe65beaee391d30da42e937db621564Steve Block            x = (c & 0x000f);
67d0825bca7fe65beaee391d30da42e937db621564Steve Block            output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
68d0825bca7fe65beaee391d30da42e937db621564Steve Block    }
69d0825bca7fe65beaee391d30da42e937db621564Steve Block    return chars;
70d0825bca7fe65beaee391d30da42e937db621564Steve Block}
71d0825bca7fe65beaee391d30da42e937db621564Steve Block
72d0825bca7fe65beaee391d30da42e937db621564Steve Blockstatic PyObject *
73d0825bca7fe65beaee391d30da42e937db621564Steve Blockascii_escape_unicode(PyObject *pystr) {
74d0825bca7fe65beaee391d30da42e937db621564Steve Block    Py_ssize_t i;
75d0825bca7fe65beaee391d30da42e937db621564Steve Block    Py_ssize_t input_chars;
76d0825bca7fe65beaee391d30da42e937db621564Steve Block    Py_ssize_t output_size;
77d0825bca7fe65beaee391d30da42e937db621564Steve Block    Py_ssize_t chars;
78d0825bca7fe65beaee391d30da42e937db621564Steve Block    PyObject *rval;
79d0825bca7fe65beaee391d30da42e937db621564Steve Block    char *output;
80d0825bca7fe65beaee391d30da42e937db621564Steve Block    Py_UNICODE *input_unicode;
81d0825bca7fe65beaee391d30da42e937db621564Steve Block
82d0825bca7fe65beaee391d30da42e937db621564Steve Block    input_chars = PyUnicode_GET_SIZE(pystr);
83d0825bca7fe65beaee391d30da42e937db621564Steve Block    input_unicode = PyUnicode_AS_UNICODE(pystr);
84d0825bca7fe65beaee391d30da42e937db621564Steve Block    /* One char input can be up to 6 chars output, estimate 4 of these */
85d0825bca7fe65beaee391d30da42e937db621564Steve Block    output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
86d0825bca7fe65beaee391d30da42e937db621564Steve Block    rval = PyString_FromStringAndSize(NULL, output_size);
87d0825bca7fe65beaee391d30da42e937db621564Steve Block    if (rval == NULL) {
88d0825bca7fe65beaee391d30da42e937db621564Steve Block        return NULL;
89d0825bca7fe65beaee391d30da42e937db621564Steve Block    }
90d0825bca7fe65beaee391d30da42e937db621564Steve Block    output = PyString_AS_STRING(rval);
91d0825bca7fe65beaee391d30da42e937db621564Steve Block    chars = 0;
92d0825bca7fe65beaee391d30da42e937db621564Steve Block    output[chars++] = '"';
93d0825bca7fe65beaee391d30da42e937db621564Steve Block    for (i = 0; i < input_chars; i++) {
94d0825bca7fe65beaee391d30da42e937db621564Steve Block        Py_UNICODE c = input_unicode[i];
95d0825bca7fe65beaee391d30da42e937db621564Steve Block        if (S_CHAR(c)) {
96d0825bca7fe65beaee391d30da42e937db621564Steve Block            output[chars++] = (char)c;
97d0825bca7fe65beaee391d30da42e937db621564Steve Block        } else {
98d0825bca7fe65beaee391d30da42e937db621564Steve Block            chars = ascii_escape_char(c, output, chars);
99d0825bca7fe65beaee391d30da42e937db621564Steve Block        }
100d0825bca7fe65beaee391d30da42e937db621564Steve Block        if (output_size - chars < (1 + MAX_EXPANSION)) {
101d0825bca7fe65beaee391d30da42e937db621564Steve Block            /* There's more than four, so let's resize by a lot */
102d0825bca7fe65beaee391d30da42e937db621564Steve Block            output_size *= 2;
103d0825bca7fe65beaee391d30da42e937db621564Steve Block            /* This is an upper bound */
104d0825bca7fe65beaee391d30da42e937db621564Steve Block            if (output_size > 2 + (input_chars * MAX_EXPANSION)) {
105d0825bca7fe65beaee391d30da42e937db621564Steve Block                output_size = 2 + (input_chars * MAX_EXPANSION);
106d0825bca7fe65beaee391d30da42e937db621564Steve Block            }
107d0825bca7fe65beaee391d30da42e937db621564Steve Block            if (_PyString_Resize(&rval, output_size) == -1) {
108d0825bca7fe65beaee391d30da42e937db621564Steve Block                return NULL;
109d0825bca7fe65beaee391d30da42e937db621564Steve Block            }
110d0825bca7fe65beaee391d30da42e937db621564Steve Block            output = PyString_AS_STRING(rval);
111d0825bca7fe65beaee391d30da42e937db621564Steve Block        }
112d0825bca7fe65beaee391d30da42e937db621564Steve Block    }
113d0825bca7fe65beaee391d30da42e937db621564Steve Block    output[chars++] = '"';
114d0825bca7fe65beaee391d30da42e937db621564Steve Block    if (_PyString_Resize(&rval, chars) == -1) {
115d0825bca7fe65beaee391d30da42e937db621564Steve Block        return NULL;
116d0825bca7fe65beaee391d30da42e937db621564Steve Block    }
117d0825bca7fe65beaee391d30da42e937db621564Steve Block    return rval;
118d0825bca7fe65beaee391d30da42e937db621564Steve Block}
119d0825bca7fe65beaee391d30da42e937db621564Steve Block
120d0825bca7fe65beaee391d30da42e937db621564Steve Blockstatic PyObject *
121d0825bca7fe65beaee391d30da42e937db621564Steve Blockascii_escape_str(PyObject *pystr) {
122d0825bca7fe65beaee391d30da42e937db621564Steve Block    Py_ssize_t i;
123d0825bca7fe65beaee391d30da42e937db621564Steve Block    Py_ssize_t input_chars;
124d0825bca7fe65beaee391d30da42e937db621564Steve Block    Py_ssize_t output_size;
125d0825bca7fe65beaee391d30da42e937db621564Steve Block    Py_ssize_t chars;
126d0825bca7fe65beaee391d30da42e937db621564Steve Block    PyObject *rval;
127d0825bca7fe65beaee391d30da42e937db621564Steve Block    char *output;
128d0825bca7fe65beaee391d30da42e937db621564Steve Block    char *input_str;
129d0825bca7fe65beaee391d30da42e937db621564Steve Block
130d0825bca7fe65beaee391d30da42e937db621564Steve Block    input_chars = PyString_GET_SIZE(pystr);
131d0825bca7fe65beaee391d30da42e937db621564Steve Block    input_str = PyString_AS_STRING(pystr);
132d0825bca7fe65beaee391d30da42e937db621564Steve Block    /* One char input can be up to 6 chars output, estimate 4 of these */
133d0825bca7fe65beaee391d30da42e937db621564Steve Block    output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
134d0825bca7fe65beaee391d30da42e937db621564Steve Block    rval = PyString_FromStringAndSize(NULL, output_size);
135d0825bca7fe65beaee391d30da42e937db621564Steve Block    if (rval == NULL) {
136d0825bca7fe65beaee391d30da42e937db621564Steve Block        return NULL;
137d0825bca7fe65beaee391d30da42e937db621564Steve Block    }
138d0825bca7fe65beaee391d30da42e937db621564Steve Block    output = PyString_AS_STRING(rval);
139d0825bca7fe65beaee391d30da42e937db621564Steve Block    chars = 0;
140d0825bca7fe65beaee391d30da42e937db621564Steve Block    output[chars++] = '"';
141d0825bca7fe65beaee391d30da42e937db621564Steve Block    for (i = 0; i < input_chars; i++) {
142d0825bca7fe65beaee391d30da42e937db621564Steve Block        Py_UNICODE c = (Py_UNICODE)input_str[i];
143d0825bca7fe65beaee391d30da42e937db621564Steve Block        if (S_CHAR(c)) {
144d0825bca7fe65beaee391d30da42e937db621564Steve Block            output[chars++] = (char)c;
145d0825bca7fe65beaee391d30da42e937db621564Steve Block        } else if (c > 0x7F) {
146d0825bca7fe65beaee391d30da42e937db621564Steve Block            /* We hit a non-ASCII character, bail to unicode mode */
147d0825bca7fe65beaee391d30da42e937db621564Steve Block            PyObject *uni;
148d0825bca7fe65beaee391d30da42e937db621564Steve Block            Py_DECREF(rval);
149d0825bca7fe65beaee391d30da42e937db621564Steve Block            uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
150d0825bca7fe65beaee391d30da42e937db621564Steve Block            if (uni == NULL) {
151d0825bca7fe65beaee391d30da42e937db621564Steve Block                return NULL;
152d0825bca7fe65beaee391d30da42e937db621564Steve Block            }
153d0825bca7fe65beaee391d30da42e937db621564Steve Block            rval = ascii_escape_unicode(uni);
154d0825bca7fe65beaee391d30da42e937db621564Steve Block            Py_DECREF(uni);
155d0825bca7fe65beaee391d30da42e937db621564Steve Block            return rval;
156d0825bca7fe65beaee391d30da42e937db621564Steve Block        } else {
157d0825bca7fe65beaee391d30da42e937db621564Steve Block            chars = ascii_escape_char(c, output, chars);
158d0825bca7fe65beaee391d30da42e937db621564Steve Block        }
159d0825bca7fe65beaee391d30da42e937db621564Steve Block        /* An ASCII char can't possibly expand to a surrogate! */
160d0825bca7fe65beaee391d30da42e937db621564Steve Block        if (output_size - chars < (1 + MIN_EXPANSION)) {
161d0825bca7fe65beaee391d30da42e937db621564Steve Block            /* There's more than four, so let's resize by a lot */
162d0825bca7fe65beaee391d30da42e937db621564Steve Block            output_size *= 2;
163d0825bca7fe65beaee391d30da42e937db621564Steve Block            if (output_size > 2 + (input_chars * MIN_EXPANSION)) {
164d0825bca7fe65beaee391d30da42e937db621564Steve Block                output_size = 2 + (input_chars * MIN_EXPANSION);
165d0825bca7fe65beaee391d30da42e937db621564Steve Block            }
166d0825bca7fe65beaee391d30da42e937db621564Steve Block            if (_PyString_Resize(&rval, output_size) == -1) {
167d0825bca7fe65beaee391d30da42e937db621564Steve Block                return NULL;
168d0825bca7fe65beaee391d30da42e937db621564Steve Block            }
169d0825bca7fe65beaee391d30da42e937db621564Steve Block            output = PyString_AS_STRING(rval);
170d0825bca7fe65beaee391d30da42e937db621564Steve Block        }
171d0825bca7fe65beaee391d30da42e937db621564Steve Block    }
172d0825bca7fe65beaee391d30da42e937db621564Steve Block    output[chars++] = '"';
173d0825bca7fe65beaee391d30da42e937db621564Steve Block    if (_PyString_Resize(&rval, chars) == -1) {
174d0825bca7fe65beaee391d30da42e937db621564Steve Block        return NULL;
175d0825bca7fe65beaee391d30da42e937db621564Steve Block    }
176d0825bca7fe65beaee391d30da42e937db621564Steve Block    return rval;
177d0825bca7fe65beaee391d30da42e937db621564Steve Block}
178d0825bca7fe65beaee391d30da42e937db621564Steve Block
179d0825bca7fe65beaee391d30da42e937db621564Steve BlockPyDoc_STRVAR(pydoc_encode_basestring_ascii,
180d0825bca7fe65beaee391d30da42e937db621564Steve Block    "encode_basestring_ascii(basestring) -> str\n"
181d0825bca7fe65beaee391d30da42e937db621564Steve Block    "\n"
182d0825bca7fe65beaee391d30da42e937db621564Steve Block    "..."
183d0825bca7fe65beaee391d30da42e937db621564Steve Block);
184d0825bca7fe65beaee391d30da42e937db621564Steve Block
185d0825bca7fe65beaee391d30da42e937db621564Steve Blockstatic PyObject *
186d0825bca7fe65beaee391d30da42e937db621564Steve Blockpy_encode_basestring_ascii(PyObject* self __attribute__((__unused__)), PyObject *pystr) {
187d0825bca7fe65beaee391d30da42e937db621564Steve Block    /* METH_O */
188d0825bca7fe65beaee391d30da42e937db621564Steve Block    if (PyString_Check(pystr)) {
189d0825bca7fe65beaee391d30da42e937db621564Steve Block        return ascii_escape_str(pystr);
190d0825bca7fe65beaee391d30da42e937db621564Steve Block    } else if (PyUnicode_Check(pystr)) {
191d0825bca7fe65beaee391d30da42e937db621564Steve Block        return ascii_escape_unicode(pystr);
192d0825bca7fe65beaee391d30da42e937db621564Steve Block    }
193d0825bca7fe65beaee391d30da42e937db621564Steve Block    PyErr_SetString(PyExc_TypeError, "first argument must be a string");
194d0825bca7fe65beaee391d30da42e937db621564Steve Block    return NULL;
195d0825bca7fe65beaee391d30da42e937db621564Steve Block}
196d0825bca7fe65beaee391d30da42e937db621564Steve Block
197d0825bca7fe65beaee391d30da42e937db621564Steve Block#define DEFN(n, k) \
198d0825bca7fe65beaee391d30da42e937db621564Steve Block    {  \
199d0825bca7fe65beaee391d30da42e937db621564Steve Block        #n, \
200d0825bca7fe65beaee391d30da42e937db621564Steve Block        (PyCFunction)py_ ##n, \
201d0825bca7fe65beaee391d30da42e937db621564Steve Block        k, \
202d0825bca7fe65beaee391d30da42e937db621564Steve Block        pydoc_ ##n \
203d0825bca7fe65beaee391d30da42e937db621564Steve Block    }
204d0825bca7fe65beaee391d30da42e937db621564Steve Blockstatic PyMethodDef speedups_methods[] = {
205d0825bca7fe65beaee391d30da42e937db621564Steve Block    DEFN(encode_basestring_ascii, METH_O),
206d0825bca7fe65beaee391d30da42e937db621564Steve Block    {}
207d0825bca7fe65beaee391d30da42e937db621564Steve Block};
208d0825bca7fe65beaee391d30da42e937db621564Steve Block#undef DEFN
209d0825bca7fe65beaee391d30da42e937db621564Steve Block
210d0825bca7fe65beaee391d30da42e937db621564Steve Blockvoid
211d0825bca7fe65beaee391d30da42e937db621564Steve Blockinit_speedups(void)
212d0825bca7fe65beaee391d30da42e937db621564Steve Block{
213d0825bca7fe65beaee391d30da42e937db621564Steve Block    PyObject *m;
214d0825bca7fe65beaee391d30da42e937db621564Steve Block    m = Py_InitModule4("_speedups", speedups_methods, NULL, NULL, PYTHON_API_VERSION);
215d0825bca7fe65beaee391d30da42e937db621564Steve Block}
216