1d0825bca7fe65beaee391d30da42e937db621564Steve Block#include "Python.h" 2d0825bca7fe65beaee391d30da42e937db621564Steve Block#if PY_VERSION_HEX < 0x02050000 && !defined(PY_SSIZE_T_MIN) 3d0825bca7fe65beaee391d30da42e937db621564Steve Blocktypedef int Py_ssize_t; 4d0825bca7fe65beaee391d30da42e937db621564Steve Block#define PY_SSIZE_T_MAX INT_MAX 5d0825bca7fe65beaee391d30da42e937db621564Steve Block#define PY_SSIZE_T_MIN INT_MIN 6d0825bca7fe65beaee391d30da42e937db621564Steve Block#endif 7d0825bca7fe65beaee391d30da42e937db621564Steve Block 8d0825bca7fe65beaee391d30da42e937db621564Steve Blockstatic Py_ssize_t 9d0825bca7fe65beaee391d30da42e937db621564Steve Blockascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars); 10d0825bca7fe65beaee391d30da42e937db621564Steve Blockstatic PyObject * 11d0825bca7fe65beaee391d30da42e937db621564Steve Blockascii_escape_unicode(PyObject *pystr); 12d0825bca7fe65beaee391d30da42e937db621564Steve Blockstatic PyObject * 13d0825bca7fe65beaee391d30da42e937db621564Steve Blockascii_escape_str(PyObject *pystr); 14d0825bca7fe65beaee391d30da42e937db621564Steve Blockstatic PyObject * 15d0825bca7fe65beaee391d30da42e937db621564Steve Blockpy_encode_basestring_ascii(PyObject* self __attribute__((__unused__)), PyObject *pystr); 16d0825bca7fe65beaee391d30da42e937db621564Steve Blockvoid init_speedups(void); 17d0825bca7fe65beaee391d30da42e937db621564Steve Block 18d0825bca7fe65beaee391d30da42e937db621564Steve Block#define S_CHAR(c) (c >= ' ' && c <= '~' && c != '\\' && c != '/' && c != '"') 19d0825bca7fe65beaee391d30da42e937db621564Steve Block 20d0825bca7fe65beaee391d30da42e937db621564Steve Block#define MIN_EXPANSION 6 21d0825bca7fe65beaee391d30da42e937db621564Steve Block#ifdef Py_UNICODE_WIDE 22d0825bca7fe65beaee391d30da42e937db621564Steve Block#define MAX_EXPANSION (2 * MIN_EXPANSION) 23d0825bca7fe65beaee391d30da42e937db621564Steve Block#else 24d0825bca7fe65beaee391d30da42e937db621564Steve Block#define MAX_EXPANSION MIN_EXPANSION 25d0825bca7fe65beaee391d30da42e937db621564Steve Block#endif 26d0825bca7fe65beaee391d30da42e937db621564Steve Block 27d0825bca7fe65beaee391d30da42e937db621564Steve Blockstatic Py_ssize_t 28d0825bca7fe65beaee391d30da42e937db621564Steve Blockascii_escape_char(Py_UNICODE c, char *output, Py_ssize_t chars) { 29d0825bca7fe65beaee391d30da42e937db621564Steve Block Py_UNICODE x; 30d0825bca7fe65beaee391d30da42e937db621564Steve Block output[chars++] = '\\'; 31d0825bca7fe65beaee391d30da42e937db621564Steve Block switch (c) { 32d0825bca7fe65beaee391d30da42e937db621564Steve Block case '/': output[chars++] = (char)c; break; 33d0825bca7fe65beaee391d30da42e937db621564Steve Block case '\\': output[chars++] = (char)c; break; 34d0825bca7fe65beaee391d30da42e937db621564Steve Block case '"': output[chars++] = (char)c; break; 35d0825bca7fe65beaee391d30da42e937db621564Steve Block case '\b': output[chars++] = 'b'; break; 36d0825bca7fe65beaee391d30da42e937db621564Steve Block case '\f': output[chars++] = 'f'; break; 37d0825bca7fe65beaee391d30da42e937db621564Steve Block case '\n': output[chars++] = 'n'; break; 38d0825bca7fe65beaee391d30da42e937db621564Steve Block case '\r': output[chars++] = 'r'; break; 39d0825bca7fe65beaee391d30da42e937db621564Steve Block case '\t': output[chars++] = 't'; break; 40d0825bca7fe65beaee391d30da42e937db621564Steve Block default: 41d0825bca7fe65beaee391d30da42e937db621564Steve Block#ifdef Py_UNICODE_WIDE 42d0825bca7fe65beaee391d30da42e937db621564Steve Block if (c >= 0x10000) { 43d0825bca7fe65beaee391d30da42e937db621564Steve Block /* UTF-16 surrogate pair */ 44d0825bca7fe65beaee391d30da42e937db621564Steve Block Py_UNICODE v = c - 0x10000; 45d0825bca7fe65beaee391d30da42e937db621564Steve Block c = 0xd800 | ((v >> 10) & 0x3ff); 46d0825bca7fe65beaee391d30da42e937db621564Steve Block output[chars++] = 'u'; 47d0825bca7fe65beaee391d30da42e937db621564Steve Block x = (c & 0xf000) >> 12; 48d0825bca7fe65beaee391d30da42e937db621564Steve Block output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); 49d0825bca7fe65beaee391d30da42e937db621564Steve Block x = (c & 0x0f00) >> 8; 50d0825bca7fe65beaee391d30da42e937db621564Steve Block output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); 51d0825bca7fe65beaee391d30da42e937db621564Steve Block x = (c & 0x00f0) >> 4; 52d0825bca7fe65beaee391d30da42e937db621564Steve Block output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); 53d0825bca7fe65beaee391d30da42e937db621564Steve Block x = (c & 0x000f); 54d0825bca7fe65beaee391d30da42e937db621564Steve Block output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); 55d0825bca7fe65beaee391d30da42e937db621564Steve Block c = 0xdc00 | (v & 0x3ff); 56d0825bca7fe65beaee391d30da42e937db621564Steve Block output[chars++] = '\\'; 57d0825bca7fe65beaee391d30da42e937db621564Steve Block } 58d0825bca7fe65beaee391d30da42e937db621564Steve Block#endif 59d0825bca7fe65beaee391d30da42e937db621564Steve Block output[chars++] = 'u'; 60d0825bca7fe65beaee391d30da42e937db621564Steve Block x = (c & 0xf000) >> 12; 61d0825bca7fe65beaee391d30da42e937db621564Steve Block output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); 62d0825bca7fe65beaee391d30da42e937db621564Steve Block x = (c & 0x0f00) >> 8; 63d0825bca7fe65beaee391d30da42e937db621564Steve Block output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); 64d0825bca7fe65beaee391d30da42e937db621564Steve Block x = (c & 0x00f0) >> 4; 65d0825bca7fe65beaee391d30da42e937db621564Steve Block output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); 66d0825bca7fe65beaee391d30da42e937db621564Steve Block x = (c & 0x000f); 67d0825bca7fe65beaee391d30da42e937db621564Steve Block output[chars++] = (x < 10) ? '0' + x : 'a' + (x - 10); 68d0825bca7fe65beaee391d30da42e937db621564Steve Block } 69d0825bca7fe65beaee391d30da42e937db621564Steve Block return chars; 70d0825bca7fe65beaee391d30da42e937db621564Steve Block} 71d0825bca7fe65beaee391d30da42e937db621564Steve Block 72d0825bca7fe65beaee391d30da42e937db621564Steve Blockstatic PyObject * 73d0825bca7fe65beaee391d30da42e937db621564Steve Blockascii_escape_unicode(PyObject *pystr) { 74d0825bca7fe65beaee391d30da42e937db621564Steve Block Py_ssize_t i; 75d0825bca7fe65beaee391d30da42e937db621564Steve Block Py_ssize_t input_chars; 76d0825bca7fe65beaee391d30da42e937db621564Steve Block Py_ssize_t output_size; 77d0825bca7fe65beaee391d30da42e937db621564Steve Block Py_ssize_t chars; 78d0825bca7fe65beaee391d30da42e937db621564Steve Block PyObject *rval; 79d0825bca7fe65beaee391d30da42e937db621564Steve Block char *output; 80d0825bca7fe65beaee391d30da42e937db621564Steve Block Py_UNICODE *input_unicode; 81d0825bca7fe65beaee391d30da42e937db621564Steve Block 82d0825bca7fe65beaee391d30da42e937db621564Steve Block input_chars = PyUnicode_GET_SIZE(pystr); 83d0825bca7fe65beaee391d30da42e937db621564Steve Block input_unicode = PyUnicode_AS_UNICODE(pystr); 84d0825bca7fe65beaee391d30da42e937db621564Steve Block /* One char input can be up to 6 chars output, estimate 4 of these */ 85d0825bca7fe65beaee391d30da42e937db621564Steve Block output_size = 2 + (MIN_EXPANSION * 4) + input_chars; 86d0825bca7fe65beaee391d30da42e937db621564Steve Block rval = PyString_FromStringAndSize(NULL, output_size); 87d0825bca7fe65beaee391d30da42e937db621564Steve Block if (rval == NULL) { 88d0825bca7fe65beaee391d30da42e937db621564Steve Block return NULL; 89d0825bca7fe65beaee391d30da42e937db621564Steve Block } 90d0825bca7fe65beaee391d30da42e937db621564Steve Block output = PyString_AS_STRING(rval); 91d0825bca7fe65beaee391d30da42e937db621564Steve Block chars = 0; 92d0825bca7fe65beaee391d30da42e937db621564Steve Block output[chars++] = '"'; 93d0825bca7fe65beaee391d30da42e937db621564Steve Block for (i = 0; i < input_chars; i++) { 94d0825bca7fe65beaee391d30da42e937db621564Steve Block Py_UNICODE c = input_unicode[i]; 95d0825bca7fe65beaee391d30da42e937db621564Steve Block if (S_CHAR(c)) { 96d0825bca7fe65beaee391d30da42e937db621564Steve Block output[chars++] = (char)c; 97d0825bca7fe65beaee391d30da42e937db621564Steve Block } else { 98d0825bca7fe65beaee391d30da42e937db621564Steve Block chars = ascii_escape_char(c, output, chars); 99d0825bca7fe65beaee391d30da42e937db621564Steve Block } 100d0825bca7fe65beaee391d30da42e937db621564Steve Block if (output_size - chars < (1 + MAX_EXPANSION)) { 101d0825bca7fe65beaee391d30da42e937db621564Steve Block /* There's more than four, so let's resize by a lot */ 102d0825bca7fe65beaee391d30da42e937db621564Steve Block output_size *= 2; 103d0825bca7fe65beaee391d30da42e937db621564Steve Block /* This is an upper bound */ 104d0825bca7fe65beaee391d30da42e937db621564Steve Block if (output_size > 2 + (input_chars * MAX_EXPANSION)) { 105d0825bca7fe65beaee391d30da42e937db621564Steve Block output_size = 2 + (input_chars * MAX_EXPANSION); 106d0825bca7fe65beaee391d30da42e937db621564Steve Block } 107d0825bca7fe65beaee391d30da42e937db621564Steve Block if (_PyString_Resize(&rval, output_size) == -1) { 108d0825bca7fe65beaee391d30da42e937db621564Steve Block return NULL; 109d0825bca7fe65beaee391d30da42e937db621564Steve Block } 110d0825bca7fe65beaee391d30da42e937db621564Steve Block output = PyString_AS_STRING(rval); 111d0825bca7fe65beaee391d30da42e937db621564Steve Block } 112d0825bca7fe65beaee391d30da42e937db621564Steve Block } 113d0825bca7fe65beaee391d30da42e937db621564Steve Block output[chars++] = '"'; 114d0825bca7fe65beaee391d30da42e937db621564Steve Block if (_PyString_Resize(&rval, chars) == -1) { 115d0825bca7fe65beaee391d30da42e937db621564Steve Block return NULL; 116d0825bca7fe65beaee391d30da42e937db621564Steve Block } 117d0825bca7fe65beaee391d30da42e937db621564Steve Block return rval; 118d0825bca7fe65beaee391d30da42e937db621564Steve Block} 119d0825bca7fe65beaee391d30da42e937db621564Steve Block 120d0825bca7fe65beaee391d30da42e937db621564Steve Blockstatic PyObject * 121d0825bca7fe65beaee391d30da42e937db621564Steve Blockascii_escape_str(PyObject *pystr) { 122d0825bca7fe65beaee391d30da42e937db621564Steve Block Py_ssize_t i; 123d0825bca7fe65beaee391d30da42e937db621564Steve Block Py_ssize_t input_chars; 124d0825bca7fe65beaee391d30da42e937db621564Steve Block Py_ssize_t output_size; 125d0825bca7fe65beaee391d30da42e937db621564Steve Block Py_ssize_t chars; 126d0825bca7fe65beaee391d30da42e937db621564Steve Block PyObject *rval; 127d0825bca7fe65beaee391d30da42e937db621564Steve Block char *output; 128d0825bca7fe65beaee391d30da42e937db621564Steve Block char *input_str; 129d0825bca7fe65beaee391d30da42e937db621564Steve Block 130d0825bca7fe65beaee391d30da42e937db621564Steve Block input_chars = PyString_GET_SIZE(pystr); 131d0825bca7fe65beaee391d30da42e937db621564Steve Block input_str = PyString_AS_STRING(pystr); 132d0825bca7fe65beaee391d30da42e937db621564Steve Block /* One char input can be up to 6 chars output, estimate 4 of these */ 133d0825bca7fe65beaee391d30da42e937db621564Steve Block output_size = 2 + (MIN_EXPANSION * 4) + input_chars; 134d0825bca7fe65beaee391d30da42e937db621564Steve Block rval = PyString_FromStringAndSize(NULL, output_size); 135d0825bca7fe65beaee391d30da42e937db621564Steve Block if (rval == NULL) { 136d0825bca7fe65beaee391d30da42e937db621564Steve Block return NULL; 137d0825bca7fe65beaee391d30da42e937db621564Steve Block } 138d0825bca7fe65beaee391d30da42e937db621564Steve Block output = PyString_AS_STRING(rval); 139d0825bca7fe65beaee391d30da42e937db621564Steve Block chars = 0; 140d0825bca7fe65beaee391d30da42e937db621564Steve Block output[chars++] = '"'; 141d0825bca7fe65beaee391d30da42e937db621564Steve Block for (i = 0; i < input_chars; i++) { 142d0825bca7fe65beaee391d30da42e937db621564Steve Block Py_UNICODE c = (Py_UNICODE)input_str[i]; 143d0825bca7fe65beaee391d30da42e937db621564Steve Block if (S_CHAR(c)) { 144d0825bca7fe65beaee391d30da42e937db621564Steve Block output[chars++] = (char)c; 145d0825bca7fe65beaee391d30da42e937db621564Steve Block } else if (c > 0x7F) { 146d0825bca7fe65beaee391d30da42e937db621564Steve Block /* We hit a non-ASCII character, bail to unicode mode */ 147d0825bca7fe65beaee391d30da42e937db621564Steve Block PyObject *uni; 148d0825bca7fe65beaee391d30da42e937db621564Steve Block Py_DECREF(rval); 149d0825bca7fe65beaee391d30da42e937db621564Steve Block uni = PyUnicode_DecodeUTF8(input_str, input_chars, "strict"); 150d0825bca7fe65beaee391d30da42e937db621564Steve Block if (uni == NULL) { 151d0825bca7fe65beaee391d30da42e937db621564Steve Block return NULL; 152d0825bca7fe65beaee391d30da42e937db621564Steve Block } 153d0825bca7fe65beaee391d30da42e937db621564Steve Block rval = ascii_escape_unicode(uni); 154d0825bca7fe65beaee391d30da42e937db621564Steve Block Py_DECREF(uni); 155d0825bca7fe65beaee391d30da42e937db621564Steve Block return rval; 156d0825bca7fe65beaee391d30da42e937db621564Steve Block } else { 157d0825bca7fe65beaee391d30da42e937db621564Steve Block chars = ascii_escape_char(c, output, chars); 158d0825bca7fe65beaee391d30da42e937db621564Steve Block } 159d0825bca7fe65beaee391d30da42e937db621564Steve Block /* An ASCII char can't possibly expand to a surrogate! */ 160d0825bca7fe65beaee391d30da42e937db621564Steve Block if (output_size - chars < (1 + MIN_EXPANSION)) { 161d0825bca7fe65beaee391d30da42e937db621564Steve Block /* There's more than four, so let's resize by a lot */ 162d0825bca7fe65beaee391d30da42e937db621564Steve Block output_size *= 2; 163d0825bca7fe65beaee391d30da42e937db621564Steve Block if (output_size > 2 + (input_chars * MIN_EXPANSION)) { 164d0825bca7fe65beaee391d30da42e937db621564Steve Block output_size = 2 + (input_chars * MIN_EXPANSION); 165d0825bca7fe65beaee391d30da42e937db621564Steve Block } 166d0825bca7fe65beaee391d30da42e937db621564Steve Block if (_PyString_Resize(&rval, output_size) == -1) { 167d0825bca7fe65beaee391d30da42e937db621564Steve Block return NULL; 168d0825bca7fe65beaee391d30da42e937db621564Steve Block } 169d0825bca7fe65beaee391d30da42e937db621564Steve Block output = PyString_AS_STRING(rval); 170d0825bca7fe65beaee391d30da42e937db621564Steve Block } 171d0825bca7fe65beaee391d30da42e937db621564Steve Block } 172d0825bca7fe65beaee391d30da42e937db621564Steve Block output[chars++] = '"'; 173d0825bca7fe65beaee391d30da42e937db621564Steve Block if (_PyString_Resize(&rval, chars) == -1) { 174d0825bca7fe65beaee391d30da42e937db621564Steve Block return NULL; 175d0825bca7fe65beaee391d30da42e937db621564Steve Block } 176d0825bca7fe65beaee391d30da42e937db621564Steve Block return rval; 177d0825bca7fe65beaee391d30da42e937db621564Steve Block} 178d0825bca7fe65beaee391d30da42e937db621564Steve Block 179d0825bca7fe65beaee391d30da42e937db621564Steve BlockPyDoc_STRVAR(pydoc_encode_basestring_ascii, 180d0825bca7fe65beaee391d30da42e937db621564Steve Block "encode_basestring_ascii(basestring) -> str\n" 181d0825bca7fe65beaee391d30da42e937db621564Steve Block "\n" 182d0825bca7fe65beaee391d30da42e937db621564Steve Block "..." 183d0825bca7fe65beaee391d30da42e937db621564Steve Block); 184d0825bca7fe65beaee391d30da42e937db621564Steve Block 185d0825bca7fe65beaee391d30da42e937db621564Steve Blockstatic PyObject * 186d0825bca7fe65beaee391d30da42e937db621564Steve Blockpy_encode_basestring_ascii(PyObject* self __attribute__((__unused__)), PyObject *pystr) { 187d0825bca7fe65beaee391d30da42e937db621564Steve Block /* METH_O */ 188d0825bca7fe65beaee391d30da42e937db621564Steve Block if (PyString_Check(pystr)) { 189d0825bca7fe65beaee391d30da42e937db621564Steve Block return ascii_escape_str(pystr); 190d0825bca7fe65beaee391d30da42e937db621564Steve Block } else if (PyUnicode_Check(pystr)) { 191d0825bca7fe65beaee391d30da42e937db621564Steve Block return ascii_escape_unicode(pystr); 192d0825bca7fe65beaee391d30da42e937db621564Steve Block } 193d0825bca7fe65beaee391d30da42e937db621564Steve Block PyErr_SetString(PyExc_TypeError, "first argument must be a string"); 194d0825bca7fe65beaee391d30da42e937db621564Steve Block return NULL; 195d0825bca7fe65beaee391d30da42e937db621564Steve Block} 196d0825bca7fe65beaee391d30da42e937db621564Steve Block 197d0825bca7fe65beaee391d30da42e937db621564Steve Block#define DEFN(n, k) \ 198d0825bca7fe65beaee391d30da42e937db621564Steve Block { \ 199d0825bca7fe65beaee391d30da42e937db621564Steve Block #n, \ 200d0825bca7fe65beaee391d30da42e937db621564Steve Block (PyCFunction)py_ ##n, \ 201d0825bca7fe65beaee391d30da42e937db621564Steve Block k, \ 202d0825bca7fe65beaee391d30da42e937db621564Steve Block pydoc_ ##n \ 203d0825bca7fe65beaee391d30da42e937db621564Steve Block } 204d0825bca7fe65beaee391d30da42e937db621564Steve Blockstatic PyMethodDef speedups_methods[] = { 205d0825bca7fe65beaee391d30da42e937db621564Steve Block DEFN(encode_basestring_ascii, METH_O), 206d0825bca7fe65beaee391d30da42e937db621564Steve Block {} 207d0825bca7fe65beaee391d30da42e937db621564Steve Block}; 208d0825bca7fe65beaee391d30da42e937db621564Steve Block#undef DEFN 209d0825bca7fe65beaee391d30da42e937db621564Steve Block 210d0825bca7fe65beaee391d30da42e937db621564Steve Blockvoid 211d0825bca7fe65beaee391d30da42e937db621564Steve Blockinit_speedups(void) 212d0825bca7fe65beaee391d30da42e937db621564Steve Block{ 213d0825bca7fe65beaee391d30da42e937db621564Steve Block PyObject *m; 214d0825bca7fe65beaee391d30da42e937db621564Steve Block m = Py_InitModule4("_speedups", speedups_methods, NULL, NULL, PYTHON_API_VERSION); 215d0825bca7fe65beaee391d30da42e937db621564Steve Block} 216