1/* 2 * Copyright 2011 - 2014 3 * Andr\xe9 Malo or his licensors, as applicable 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 */ 17 18#include "cext.h" 19EXT_INIT_FUNC; 20 21#define RJSMIN_DULL_BIT (1 << 0) 22#define RJSMIN_PRE_REGEX_BIT (1 << 1) 23#define RJSMIN_REGEX_DULL_BIT (1 << 2) 24#define RJSMIN_REGEX_CC_DULL_BIT (1 << 3) 25#define RJSMIN_ID_LIT_BIT (1 << 4) 26#define RJSMIN_ID_LIT_O_BIT (1 << 5) 27#define RJSMIN_ID_LIT_C_BIT (1 << 6) 28#define RJSMIN_STRING_DULL_BIT (1 << 7) 29#define RJSMIN_SPACE_BIT (1 << 8) 30 31#ifdef EXT3 32typedef Py_UNICODE rchar; 33#else 34typedef unsigned char rchar; 35#endif 36#define U(c) ((rchar)(c)) 37 38#define RJSMIN_IS_DULL(c) ((U(c) > 127) || \ 39 (rjsmin_charmask[U(c) & 0x7F] & RJSMIN_DULL_BIT)) 40 41#define RJSMIN_IS_REGEX_DULL(c) ((U(c) > 127) || \ 42 (rjsmin_charmask[U(c) & 0x7F] & RJSMIN_REGEX_DULL_BIT)) 43 44#define RJSMIN_IS_REGEX_CC_DULL(c) ((U(c) > 127) || \ 45 (rjsmin_charmask[U(c) & 0x7F] & RJSMIN_REGEX_CC_DULL_BIT)) 46 47#define RJSMIN_IS_STRING_DULL(c) ((U(c) > 127) || \ 48 (rjsmin_charmask[U(c) & 0x7F] & RJSMIN_STRING_DULL_BIT)) 49 50#define RJSMIN_IS_ID_LITERAL(c) ((U(c) > 127) || \ 51 (rjsmin_charmask[U(c) & 0x7F] & RJSMIN_ID_LIT_BIT)) 52 53#define RJSMIN_IS_ID_LITERAL_OPEN(c) ((U(c) > 127) || \ 54 (rjsmin_charmask[U(c) & 0x7F] & RJSMIN_ID_LIT_O_BIT)) 55 56#define RJSMIN_IS_ID_LITERAL_CLOSE(c) ((U(c) > 127) || \ 57 (rjsmin_charmask[U(c) & 0x7F] & RJSMIN_ID_LIT_C_BIT)) 58 59#define RJSMIN_IS_SPACE(c) ((U(c) <= 127) && \ 60 (rjsmin_charmask[U(c) & 0x7F] & RJSMIN_SPACE_BIT)) 61 62#define RJSMIN_IS_PRE_REGEX_1(c) ((U(c) <= 127) && \ 63 (rjsmin_charmask[U(c) & 0x7F] & RJSMIN_PRE_REGEX_BIT)) 64 65 66static const unsigned short rjsmin_charmask[128] = { 67 396, 396, 396, 396, 396, 396, 396, 396, 68 396, 396, 2, 396, 396, 2, 396, 396, 69 396, 396, 396, 396, 396, 396, 396, 396, 70 396, 396, 396, 396, 396, 396, 396, 396, 71 396, 175, 76, 141, 253, 141, 143, 76, 72 175, 205, 141, 237, 143, 237, 141, 136, 73 253, 253, 253, 253, 253, 253, 253, 253, 74 253, 253, 143, 143, 141, 143, 141, 143, 75 141, 253, 253, 253, 253, 253, 253, 253, 76 253, 253, 253, 253, 253, 253, 253, 253, 77 253, 253, 253, 253, 253, 253, 253, 253, 78 253, 253, 253, 171, 1, 197, 141, 253, 79 141, 253, 253, 253, 253, 253, 253, 253, 80 253, 253, 253, 253, 253, 253, 253, 253, 81 253, 253, 253, 253, 253, 253, 253, 253, 82 253, 253, 253, 175, 143, 207, 141, 253 83}; 84 85static Py_ssize_t 86rjsmin(const rchar *source, rchar *target, Py_ssize_t length, 87 int keep_bang_comments) 88{ 89 const rchar *reset, *sentinel = source + length; 90 rchar *tstart = target; 91 rchar c, quote; 92 93 while (source < sentinel) { 94 c = *source++; 95 if (RJSMIN_IS_DULL(c)) { 96 *target++ = c; 97 continue; 98 } 99 switch (c) { 100 101 /* String */ 102 case U('\''): case U('"'): 103 reset = source; 104 *target++ = quote = c; 105 while (source < sentinel) { 106 c = *source++; 107 *target++ = c; 108 if (RJSMIN_IS_STRING_DULL(c)) 109 continue; 110 switch (c) { 111 case U('\''): case U('"'): 112 if (c == quote) 113 goto cont; 114 continue; 115 case U('\\'): 116 if (source < sentinel) { 117 c = *source++; 118 *target++ = c; 119 if (c == U('\r') && source < sentinel 120 && *source == U('\n')) 121 *target++ = *source++; 122 } 123 continue; 124 } 125 break; 126 } 127 target -= source - reset; 128 source = reset; 129 continue; 130 131 /* Comment or Regex or something else entirely */ 132 case U('/'): 133 if (!(source < sentinel)) { 134 *target++ = c; 135 } 136 else { 137 switch (*source) { 138 /* Comment */ 139 case U('*'): case U('/'): 140 goto skip_or_copy_ws; 141 142 default: 143 if ( target == tstart 144 || RJSMIN_IS_PRE_REGEX_1(*(target - 1)) 145 || ( 146 (target - tstart >= 6) 147 && *(target - 1) == U('n') 148 && *(target - 2) == U('r') 149 && *(target - 3) == U('u') 150 && *(target - 4) == U('t') 151 && *(target - 5) == U('e') 152 && *(target - 6) == U('r') 153 && ( 154 target - tstart == 6 155 || !RJSMIN_IS_ID_LITERAL(*(target - 7)) 156 ) 157 )) { 158 159 /* Regex */ 160 reset = source; 161 *target++ = U('/'); 162 while (source < sentinel) { 163 c = *source++; 164 *target++ = c; 165 if (RJSMIN_IS_REGEX_DULL(c)) 166 continue; 167 switch (c) { 168 case U('/'): 169 goto cont; 170 case U('\\'): 171 if (source < sentinel) { 172 c = *source++; 173 *target++ = c; 174 if (c == U('\r') || c == U('\n')) 175 break; 176 } 177 continue; 178 case U('['): 179 while (source < sentinel) { 180 c = *source++; 181 *target++ = c; 182 if (RJSMIN_IS_REGEX_CC_DULL(c)) 183 continue; 184 switch (c) { 185 case U('\\'): 186 if (source < sentinel) { 187 c = *source++; 188 *target++ = c; 189 if (c == U('\r') || c == U('\n')) 190 break; 191 } 192 continue; 193 case U(']'): 194 goto cont_regex; 195 } 196 } 197 break; 198 } 199 break; 200 cont_regex: 201 continue; 202 } 203 target -= source - reset; 204 source = reset; 205 } 206 else { 207 /* Just a slash */ 208 *target++ = c; 209 } 210 continue; 211 } 212 } 213 continue; 214 215 /* Whitespace */ 216 default: 217 skip_or_copy_ws: 218 quote = U(' '); 219 --source; 220 while (source < sentinel) { 221 c = *source++; 222 if (RJSMIN_IS_SPACE(c)) 223 continue; 224 switch (c) { 225 case U('\r'): case U('\n'): 226 quote = U('\n'); 227 continue; 228 case U('/'): 229 if (source < sentinel) { 230 switch (*source) { 231 case U('*'): 232 reset = source++; 233 /* copy bang comment, if requested */ 234 if ( keep_bang_comments && source < sentinel 235 && *source == U('!')) { 236 *target++ = U('/'); 237 *target++ = U('*'); 238 *target++ = *source++; 239 while (source < sentinel) { 240 c = *source++; 241 *target++ = c; 242 if (c == U('*') && source < sentinel 243 && *source == U('/')) { 244 *target++ = *source++; 245 reset = NULL; 246 break; 247 } 248 } 249 if (!reset) 250 continue; 251 target -= source - reset; 252 source = reset; 253 } 254 /* strip regular comment */ 255 else { 256 while (source < sentinel) { 257 c = *source++; 258 if (c == U('*') && source < sentinel 259 && *source == U('/')) { 260 ++source; 261 reset = NULL; 262 break; 263 } 264 } 265 if (!reset) 266 continue; 267 source = reset; 268 *target++ = U('/'); 269 } 270 goto cont; 271 case U('/'): 272 ++source; 273 while (source < sentinel) { 274 c = *source++; 275 switch (c) { 276 case U('\n'): 277 break; 278 case U('\r'): 279 if (source < sentinel 280 && *source == U('\n')) 281 ++source; 282 break; 283 default: 284 continue; 285 } 286 break; 287 } 288 quote = U('\n'); 289 continue; 290 } 291 } 292 } 293 --source; 294 break; 295 } 296 297 if ((tstart < target && source < sentinel) 298 && ((quote == U('\n') 299 && RJSMIN_IS_ID_LITERAL_CLOSE(*(target - 1)) 300 && RJSMIN_IS_ID_LITERAL_OPEN(*source)) 301 || 302 (quote == U(' ') 303 && ((RJSMIN_IS_ID_LITERAL(*(target - 1)) 304 && RJSMIN_IS_ID_LITERAL(*source)) 305 || (source < sentinel 306 && ((*(target - 1) == U('+') 307 && *source == U('+')) 308 || (*(target - 1) == U('-') 309 && *source == U('-')))))))) 310 *target++ = quote; 311 } 312 cont: 313 continue; 314 } 315 return (Py_ssize_t)(target - tstart); 316} 317 318 319PyDoc_STRVAR(rjsmin_jsmin__doc__, 320"jsmin(script, keep_bang_comments=False)\n\ 321\n\ 322Minify javascript based on `jsmin.c by Douglas Crockford`_\\.\n\ 323\n\ 324Instead of parsing the stream char by char, it uses a regular\n\ 325expression approach which minifies the whole script with one big\n\ 326substitution regex.\n\ 327\n\ 328.. _jsmin.c by Douglas Crockford:\n\ 329 http://www.crockford.com/javascript/jsmin.c\n\ 330\n\ 331:Note: This is a hand crafted C implementation built on the regex\n\ 332 semantics.\n\ 333\n\ 334:Parameters:\n\ 335 `script` : ``str``\n\ 336 Script to minify\n\ 337\n\ 338 `keep_bang_comments` : ``bool``\n\ 339 Keep comments starting with an exclamation mark? (``/*!...*/``)\n\ 340\n\ 341:Return: Minified script\n\ 342:Rtype: ``str``"); 343 344static PyObject * 345rjsmin_jsmin(PyObject *self, PyObject *args, PyObject *kwds) 346{ 347 PyObject *script, *keep_bang_comments_ = NULL, *result; 348 static char *kwlist[] = {"script", "keep_bang_comments", NULL}; 349 Py_ssize_t slength, length; 350 int keep_bang_comments; 351#ifdef EXT2 352 int uni; 353#define UOBJ "O" 354#endif 355#ifdef EXT3 356#define UOBJ "U" 357#endif 358 359 if (!PyArg_ParseTupleAndKeywords(args, kwds, UOBJ "|O", kwlist, 360 &script, &keep_bang_comments_)) 361 return NULL; 362 363 if (!keep_bang_comments_) 364 keep_bang_comments = 0; 365 else { 366 keep_bang_comments = PyObject_IsTrue(keep_bang_comments_); 367 if (keep_bang_comments == -1) 368 return NULL; 369 } 370 371#ifdef EXT2 372 if (PyUnicode_Check(script)) { 373 if (!(script = PyUnicode_AsUTF8String(script))) 374 return NULL; 375 uni = 1; 376 } 377 else { 378 if (!(script = PyObject_Str(script))) 379 return NULL; 380 uni = 0; 381 } 382#endif 383 384#ifdef EXT3 385 Py_INCREF(script); 386#define PyString_GET_SIZE PyUnicode_GET_SIZE 387#define PyString_AS_STRING PyUnicode_AS_UNICODE 388#define _PyString_Resize PyUnicode_Resize 389#define PyString_FromStringAndSize PyUnicode_FromUnicode 390#endif 391 392 slength = PyString_GET_SIZE(script); 393 if (!(result = PyString_FromStringAndSize(NULL, slength))) { 394 Py_DECREF(script); 395 return NULL; 396 } 397 Py_BEGIN_ALLOW_THREADS 398 length = rjsmin((rchar *)PyString_AS_STRING(script), 399 (rchar *)PyString_AS_STRING(result), 400 slength, keep_bang_comments); 401 Py_END_ALLOW_THREADS 402 403 Py_DECREF(script); 404 if (length < 0) { 405 Py_DECREF(result); 406 return NULL; 407 } 408 if (length != slength && _PyString_Resize(&result, length) == -1) 409 return NULL; 410 411#ifdef EXT2 412 if (uni) { 413 script = PyUnicode_DecodeUTF8(PyString_AS_STRING(result), 414 PyString_GET_SIZE(result), "strict"); 415 Py_DECREF(result); 416 if (!script) 417 return NULL; 418 result = script; 419 } 420#endif 421 return result; 422} 423 424/* ------------------------ BEGIN MODULE DEFINITION ------------------------ */ 425 426EXT_METHODS = { 427 {"jsmin", 428 (PyCFunction)rjsmin_jsmin, METH_VARARGS | METH_KEYWORDS, 429 rjsmin_jsmin__doc__}, 430 431 {NULL} /* Sentinel */ 432}; 433 434PyDoc_STRVAR(EXT_DOCS_VAR, 435"C implementation of rjsmin\n\ 436==========================\n\ 437\n\ 438C implementation of rjsmin."); 439 440 441EXT_DEFINE(EXT_MODULE_NAME, EXT_METHODS_VAR, EXT_DOCS_VAR); 442 443EXT_INIT_FUNC { 444 PyObject *m; 445 446 /* Create the module and populate stuff */ 447 if (!(m = EXT_CREATE(&EXT_DEFINE_VAR))) 448 EXT_INIT_ERROR(NULL); 449 450 EXT_ADD_UNICODE(m, "__author__", "Andr\xe9 Malo", "latin-1"); 451 EXT_ADD_STRING(m, "__docformat__", "restructuredtext en"); 452 453 EXT_INIT_RETURN(m); 454} 455 456/* ------------------------- END MODULE DEFINITION ------------------------- */ 457