_speedups.c revision 5821806d5e7f356e8fa4b058a389a808ea183019
1#include "Python.h"
2#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN)
3typedef int Py_ssize_t;
4#define PY_SSIZE_T_MAX INT_MAX
5#define PY_SSIZE_T_MIN INT_MIN
6#endif
7
8static Py_ssize_t
9ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars);
10static PyObject *
11ascii_escape_unicode(PyObject *pystr);
12static PyObject *
13ascii_escape_str(PyObject *pystr);
14static PyObject *
15py_encode_basestring_ascii(PyObject* self __attribute__((__unused__)), PyObject *pystr);
16void init_speedups(void);
17
18#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '/' && c != '"')
19
20#define MIN_EXPANSION 6
21#ifdef Py_UNICODE_WIDE
22#define MAX_EXPANSION (2 * MIN_EXPANSION)
23#else
24#define MAX_EXPANSION MIN_EXPANSION
25#endif
26
27static Py_ssize_t
28ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars) {
29    Py_UNICODE x;
30    output[chars++] = '\\';
31    switch (c) {
32        case '/': output[chars++] = (char)c; break;
33        case '\\': output[chars++] = (char)c; break;
34        case '"': output[chars++] = (char)c; break;
35        case '\b': output[chars++] = 'b'; break;
36        case '\f': output[chars++] = 'f'; break;
37        case '\n': output[chars++] = 'n'; break;
38        case '\r': output[chars++] = 'r'; break;
39        case '\t': output[chars++] = 't'; break;
40        default:
41#ifdef Py_UNICODE_WIDE
42            if (c >= 0x10000) {
43                /* UTF-16 surrogate pair */
44                Py_UNICODE v = c - 0x10000;
45                c = 0xd800 | ((v >> 10) & 0x3ff);
46                output[chars++] = 'u';
47                x = (c & 0xf000) >> 12;
48                output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
49                x = (c & 0x0f00) >> 8;
50                output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
51                x = (c & 0x00f0) >> 4;
52                output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
53                x = (c & 0x000f);
54                output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
55                c = 0xdc00 | (v & 0x3ff);
56                output[chars++] = '\\';
57            }
58#endif
59            output[chars++] = 'u';
60            x = (c & 0xf000) >> 12;
61            output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
62            x = (c & 0x0f00) >> 8;
63            output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
64            x = (c & 0x00f0) >> 4;
65            output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
66            x = (c & 0x000f);
67            output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10);
68    }
69    return chars;
70}
71
72static PyObject *
73ascii_escape_unicode(PyObject *pystr) {
74    Py_ssize_t i;
75    Py_ssize_t input_chars;
76    Py_ssize_t output_size;
77    Py_ssize_t chars;
78    PyObject *rval;
79    char *output;
80    Py_UNICODE *input_unicode;
81
82    input_chars = PyUnicode_GET_SIZE(pystr);
83    input_unicode = PyUnicode_AS_UNICODE(pystr);
84    /* One char input can be up to 6 chars output, estimate 4 of these */
85    output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
86    rval = PyString_FromStringAndSize(NULL, output_size);
87    if (rval == NULL) {
88        return NULL;
89    }
90    output = PyString_AS_STRING(rval);
91    chars = 0;
92    output[chars++] = '"';
93    for (i = 0; i < input_chars; i++) {
94        Py_UNICODE c = input_unicode[i];
95        if (S_CHAR(c)) {
96            output[chars++] = (char)c;
97        } else {
98            chars = ascii_escape_char(c, output, chars);
99        }
100        if (output_size - chars < (1 + MAX_EXPANSION)) {
101            /* There's more than four, so let's resize by a lot */
102            output_size *= 2;
103            /* This is an upper bound */
104            if (output_size > 2 + (input_chars * MAX_EXPANSION)) {
105                output_size = 2 + (input_chars * MAX_EXPANSION);
106            }
107            if (_PyString_Resize(&rval, output_size) == -1) {
108                return NULL;
109            }
110            output = PyString_AS_STRING(rval);
111        }
112    }
113    output[chars++] = '"';
114    if (_PyString_Resize(&rval, chars) == -1) {
115        return NULL;
116    }
117    return rval;
118}
119
120static PyObject *
121ascii_escape_str(PyObject *pystr) {
122    Py_ssize_t i;
123    Py_ssize_t input_chars;
124    Py_ssize_t output_size;
125    Py_ssize_t chars;
126    PyObject *rval;
127    char *output;
128    char *input_str;
129
130    input_chars = PyString_GET_SIZE(pystr);
131    input_str = PyString_AS_STRING(pystr);
132    /* One char input can be up to 6 chars output, estimate 4 of these */
133    output_size = 2 + (MIN_EXPANSION * 4) + input_chars;
134    rval = PyString_FromStringAndSize(NULL, output_size);
135    if (rval == NULL) {
136        return NULL;
137    }
138    output = PyString_AS_STRING(rval);
139    chars = 0;
140    output[chars++] = '"';
141    for (i = 0; i < input_chars; i++) {
142        Py_UNICODE c = (Py_UNICODE)input_str[i];
143        if (S_CHAR(c)) {
144            output[chars++] = (char)c;
145        } else if (c > 0x7F) {
146            /* We hit a non-ASCII character, bail to unicode mode */
147            PyObject *uni;
148            Py_DECREF(rval);
149            uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict");
150            if (uni == NULL) {
151                return NULL;
152            }
153            rval = ascii_escape_unicode(uni);
154            Py_DECREF(uni);
155            return rval;
156        } else {
157            chars = ascii_escape_char(c, output, chars);
158        }
159        /* An ASCII char can't possibly expand to a surrogate! */
160        if (output_size - chars < (1 + MIN_EXPANSION)) {
161            /* There's more than four, so let's resize by a lot */
162            output_size *= 2;
163            if (output_size > 2 + (input_chars * MIN_EXPANSION)) {
164                output_size = 2 + (input_chars * MIN_EXPANSION);
165            }
166            if (_PyString_Resize(&rval, output_size) == -1) {
167                return NULL;
168            }
169            output = PyString_AS_STRING(rval);
170        }
171    }
172    output[chars++] = '"';
173    if (_PyString_Resize(&rval, chars) == -1) {
174        return NULL;
175    }
176    return rval;
177}
178
179PyDoc_STRVAR(pydoc_encode_basestring_ascii,
180    "encode_basestring_ascii(basestring) -> str\n"
181    "\n"
182    "..."
183);
184
185static PyObject *
186py_encode_basestring_ascii(PyObject* self __attribute__((__unused__)), PyObject *pystr) {
187    /* METH_O */
188    if (PyString_Check(pystr)) {
189        return ascii_escape_str(pystr);
190    } else if (PyUnicode_Check(pystr)) {
191        return ascii_escape_unicode(pystr);
192    }
193    PyErr_SetString(PyExc_TypeError, "first argument must be a string");
194    return NULL;
195}
196
197#define DEFN(n, k) \
198    {  \
199        #n, \
200        (PyCFunction)py_ ##n, \
201        k, \
202        pydoc_ ##n \
203    }
204static PyMethodDef speedups_methods[] = {
205    DEFN(encode_basestring_ascii, METH_O),
206    {}
207};
208#undef DEFN
209
210void
211init_speedups(void)
212{
213    PyObject *m;
214    m = Py_InitModule4("_speedups", speedups_methods, NULL, NULL, PYTHON_API_VERSION);
215}
216