1#include "Python.h" 2#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN) 3typedef int Py_ssize_t; 4#define PY_SSIZE_T_MAX INT_MAX 5#define PY_SSIZE_T_MIN INT_MIN 6#endif 7 8static Py_ssize_t 9ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars); 10static PyObject * 11ascii_escape_unicode(PyObject *pystr); 12static PyObject * 13ascii_escape_str(PyObject *pystr); 14static PyObject * 15py_encode_basestring_ascii(PyObject* self __attribute__((__unused__)), PyObject *pystr); 16void init_speedups(void); 17 18#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '/' && c != '"') 19 20#define MIN_EXPANSION 6 21#ifdef Py_UNICODE_WIDE 22#define MAX_EXPANSION (2 * MIN_EXPANSION) 23#else 24#define MAX_EXPANSION MIN_EXPANSION 25#endif 26 27static Py_ssize_t 28ascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars) { 29 Py_UNICODE x; 30 output[chars++] = '\\'; 31 switch (c) { 32 case '/': output[chars++] = (char)c; break; 33 case '\\': output[chars++] = (char)c; break; 34 case '"': output[chars++] = (char)c; break; 35 case '\b': output[chars++] = 'b'; break; 36 case '\f': output[chars++] = 'f'; break; 37 case '\n': output[chars++] = 'n'; break; 38 case '\r': output[chars++] = 'r'; break; 39 case '\t': output[chars++] = 't'; break; 40 default: 41#ifdef Py_UNICODE_WIDE 42 if (c >= 0x10000) { 43 /* UTF-16 surrogate pair */ 44 Py_UNICODE v = c - 0x10000; 45 c = 0xd800 | ((v >> 10) & 0x3ff); 46 output[chars++] = 'u'; 47 x = (c & 0xf000) >> 12; 48 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); 49 x = (c & 0x0f00) >> 8; 50 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); 51 x = (c & 0x00f0) >> 4; 52 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); 53 x = (c & 0x000f); 54 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); 55 c = 0xdc00 | (v & 0x3ff); 56 output[chars++] = '\\'; 57 } 58#endif 59 output[chars++] = 'u'; 60 x = (c & 0xf000) >> 12; 61 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); 62 x = (c & 0x0f00) >> 8; 63 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); 64 x = (c & 0x00f0) >> 4; 65 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); 66 x = (c & 0x000f); 67 output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); 68 } 69 return chars; 70} 71 72static PyObject * 73ascii_escape_unicode(PyObject *pystr) { 74 Py_ssize_t i; 75 Py_ssize_t input_chars; 76 Py_ssize_t output_size; 77 Py_ssize_t chars; 78 PyObject *rval; 79 char *output; 80 Py_UNICODE *input_unicode; 81 82 input_chars = PyUnicode_GET_SIZE(pystr); 83 input_unicode = PyUnicode_AS_UNICODE(pystr); 84 /* One char input can be up to 6 chars output, estimate 4 of these */ 85 output_size = 2 + (MIN_EXPANSION * 4) + input_chars; 86 rval = PyString_FromStringAndSize(NULL, output_size); 87 if (rval == NULL) { 88 return NULL; 89 } 90 output = PyString_AS_STRING(rval); 91 chars = 0; 92 output[chars++] = '"'; 93 for (i = 0; i < input_chars; i++) { 94 Py_UNICODE c = input_unicode[i]; 95 if (S_CHAR(c)) { 96 output[chars++] = (char)c; 97 } else { 98 chars = ascii_escape_char(c, output, chars); 99 } 100 if (output_size - chars < (1 + MAX_EXPANSION)) { 101 /* There's more than four, so let's resize by a lot */ 102 output_size *= 2; 103 /* This is an upper bound */ 104 if (output_size > 2 + (input_chars * MAX_EXPANSION)) { 105 output_size = 2 + (input_chars * MAX_EXPANSION); 106 } 107 if (_PyString_Resize(&rval, output_size) == -1) { 108 return NULL; 109 } 110 output = PyString_AS_STRING(rval); 111 } 112 } 113 output[chars++] = '"'; 114 if (_PyString_Resize(&rval, chars) == -1) { 115 return NULL; 116 } 117 return rval; 118} 119 120static PyObject * 121ascii_escape_str(PyObject *pystr) { 122 Py_ssize_t i; 123 Py_ssize_t input_chars; 124 Py_ssize_t output_size; 125 Py_ssize_t chars; 126 PyObject *rval; 127 char *output; 128 char *input_str; 129 130 input_chars = PyString_GET_SIZE(pystr); 131 input_str = PyString_AS_STRING(pystr); 132 /* One char input can be up to 6 chars output, estimate 4 of these */ 133 output_size = 2 + (MIN_EXPANSION * 4) + input_chars; 134 rval = PyString_FromStringAndSize(NULL, output_size); 135 if (rval == NULL) { 136 return NULL; 137 } 138 output = PyString_AS_STRING(rval); 139 chars = 0; 140 output[chars++] = '"'; 141 for (i = 0; i < input_chars; i++) { 142 Py_UNICODE c = (Py_UNICODE)input_str[i]; 143 if (S_CHAR(c)) { 144 output[chars++] = (char)c; 145 } else if (c > 0x7F) { 146 /* We hit a non-ASCII character, bail to unicode mode */ 147 PyObject *uni; 148 Py_DECREF(rval); 149 uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict"); 150 if (uni == NULL) { 151 return NULL; 152 } 153 rval = ascii_escape_unicode(uni); 154 Py_DECREF(uni); 155 return rval; 156 } else { 157 chars = ascii_escape_char(c, output, chars); 158 } 159 /* An ASCII char can't possibly expand to a surrogate! */ 160 if (output_size - chars < (1 + MIN_EXPANSION)) { 161 /* There's more than four, so let's resize by a lot */ 162 output_size *= 2; 163 if (output_size > 2 + (input_chars * MIN_EXPANSION)) { 164 output_size = 2 + (input_chars * MIN_EXPANSION); 165 } 166 if (_PyString_Resize(&rval, output_size) == -1) { 167 return NULL; 168 } 169 output = PyString_AS_STRING(rval); 170 } 171 } 172 output[chars++] = '"'; 173 if (_PyString_Resize(&rval, chars) == -1) { 174 return NULL; 175 } 176 return rval; 177} 178 179PyDoc_STRVAR(pydoc_encode_basestring_ascii, 180 "encode_basestring_ascii(basestring) -> str\n" 181 "\n" 182 "..." 183); 184 185static PyObject * 186py_encode_basestring_ascii(PyObject* self __attribute__((__unused__)), PyObject *pystr) { 187 /* METH_O */ 188 if (PyString_Check(pystr)) { 189 return ascii_escape_str(pystr); 190 } else if (PyUnicode_Check(pystr)) { 191 return ascii_escape_unicode(pystr); 192 } 193 PyErr_SetString(PyExc_TypeError, "first argument must be a string"); 194 return NULL; 195} 196 197#define DEFN(n, k) \ 198 { \ 199 #n, \ 200 (PyCFunction)py_ ##n, \ 201 k, \ 202 pydoc_ ##n \ 203 } 204static PyMethodDef speedups_methods[] = { 205 DEFN(encode_basestring_ascii, METH_O), 206 {} 207}; 208#undef DEFN 209 210void 211init_speedups(void) 212{ 213 PyObject *m; 214 m = Py_InitModule4("_speedups", speedups_methods, NULL, NULL, PYTHON_API_VERSION); 215} 216