16467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou/* stringlib: split implementation */ 26467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou 36467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#ifndef STRINGLIB_SPLIT_H 46467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#define STRINGLIB_SPLIT_H 56467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou 66467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#ifndef STRINGLIB_FASTSEARCH_H 76467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#error must include "stringlib/fastsearch.h" before including this module 86467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#endif 96467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou 106467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou/* Overallocate the initial list to reduce the number of reallocs for small 116467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou split sizes. Eg, "A A A A A A A A A A".split() (10 elements) has three 126467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou resizes, to sizes 4, 8, then 16. Most observed string splits are for human 136467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou text (roughly 11 words per line) and field delimited data (usually 1-10 146467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou fields). For large strings the split algorithms are bandwidth limited 156467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou so increasing the preallocation likely will not improve things.*/ 166467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou 176467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#define MAX_PREALLOC 12 186467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou 196467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou/* 5 splits gives 6 elements */ 206467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#define PREALLOC_SIZE(maxsplit) \ 216467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1) 226467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou 236467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#define SPLIT_APPEND(data, left, right) \ 246467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou sub = STRINGLIB_NEW((data) + (left), \ 256467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou (right) - (left)); \ 266467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou if (sub == NULL) \ 276467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou goto onError; \ 286467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou if (PyList_Append(list, sub)) { \ 296467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou Py_DECREF(sub); \ 306467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou goto onError; \ 316467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou } \ 326467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou else \ 336467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou Py_DECREF(sub); 346467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou 356467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#define SPLIT_ADD(data, left, right) { \ 366467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou sub = STRINGLIB_NEW((data) + (left), \ 376467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou (right) - (left)); \ 386467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou if (sub == NULL) \ 396467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou goto onError; \ 406467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou if (count < MAX_PREALLOC) { \ 416467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou PyList_SET_ITEM(list, count, sub); \ 426467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou } else { \ 436467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou if (PyList_Append(list, sub)) { \ 446467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou Py_DECREF(sub); \ 456467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou goto onError; \ 466467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou } \ 476467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou else \ 486467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou Py_DECREF(sub); \ 496467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou } \ 506467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou count++; } 516467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou 526467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou 536467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou/* Always force the list to the expected size. */ 546467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count 556467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou 566467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine PitrouPy_LOCAL_INLINE(PyObject *) 576467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitroustringlib_split_whitespace(PyObject* str_obj, 586467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou const STRINGLIB_CHAR* str, Py_ssize_t str_len, 596467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou Py_ssize_t maxcount) 606467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou{ 616467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou Py_ssize_t i, j, count=0; 626467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou PyObject *list = PyList_New(PREALLOC_SIZE(maxcount)); 636467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou PyObject *sub; 646467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou 656467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou if (list == NULL) 666467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou return NULL; 676467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou 686467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou i = j = 0; 696467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou while (maxcount-- > 0) { 706467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou while (i < str_len && STRINGLIB_ISSPACE(str[i])) 716467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou i++; 726467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou if (i == str_len) break; 736467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou j = i; i++; 746467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou while (i < str_len && !STRINGLIB_ISSPACE(str[i])) 756467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou i++; 766467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#ifndef STRINGLIB_MUTABLE 776467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou if (j == 0 && i == str_len && STRINGLIB_CHECK_EXACT(str_obj)) { 786467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou /* No whitespace in str_obj, so just use it as list[0] */ 796467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou Py_INCREF(str_obj); 806467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou PyList_SET_ITEM(list, 0, (PyObject *)str_obj); 816467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou count++; 826467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou break; 836467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou } 846467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#endif 856467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou SPLIT_ADD(str, j, i); 866467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou } 876467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou 886467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou if (i < str_len) { 896467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou /* Only occurs when maxcount was reached */ 906467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou /* Skip any remaining whitespace and copy to end of string */ 916467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou while (i < str_len && STRINGLIB_ISSPACE(str[i])) 926467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou i++; 936467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou if (i != str_len) 946467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou SPLIT_ADD(str, i, str_len); 956467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou } 966467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou FIX_PREALLOC_SIZE(list); 976467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou return list; 986467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou 996467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou onError: 1006467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou Py_DECREF(list); 1016467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou return NULL; 1026467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou} 1036467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou 1046467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine PitrouPy_LOCAL_INLINE(PyObject *) 1056467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitroustringlib_split_char(PyObject* str_obj, 1066467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou const STRINGLIB_CHAR* str, Py_ssize_t str_len, 1076467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou const STRINGLIB_CHAR ch, 1086467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou Py_ssize_t maxcount) 1096467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou{ 1106467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou Py_ssize_t i, j, count=0; 1116467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou PyObject *list = PyList_New(PREALLOC_SIZE(maxcount)); 1126467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou PyObject *sub; 1136467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou 1146467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou if (list == NULL) 1156467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou return NULL; 1166467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou 1176467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou i = j = 0; 1186467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou while ((j < str_len) && (maxcount-- > 0)) { 1196467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou for(; j < str_len; j++) { 1206467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou /* I found that using memchr makes no difference */ 1216467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou if (str[j] == ch) { 1226467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou SPLIT_ADD(str, i, j); 1236467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou i = j = j + 1; 1246467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou break; 1256467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou } 1266467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou } 1276467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou } 1286467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#ifndef STRINGLIB_MUTABLE 1296467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) { 1306467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou /* ch not in str_obj, so just use str_obj as list[0] */ 1316467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou Py_INCREF(str_obj); 1326467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou PyList_SET_ITEM(list, 0, (PyObject *)str_obj); 1336467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou count++; 1346467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou } else 1356467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#endif 1366467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou if (i <= str_len) { 1376467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou SPLIT_ADD(str, i, str_len); 1386467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou } 1396467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou FIX_PREALLOC_SIZE(list); 1406467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou return list; 1416467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou 1426467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou onError: 1436467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou Py_DECREF(list); 1446467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou return NULL; 1456467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou} 1466467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou 1476467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine PitrouPy_LOCAL_INLINE(PyObject *) 1486467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitroustringlib_split(PyObject* str_obj, 1496467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou const STRINGLIB_CHAR* str, Py_ssize_t str_len, 1506467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou const STRINGLIB_CHAR* sep, Py_ssize_t sep_len, 1516467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou Py_ssize_t maxcount) 1526467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou{ 1536467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou Py_ssize_t i, j, pos, count=0; 1546467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou PyObject *list, *sub; 1556467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou 1566467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou if (sep_len == 0) { 1576467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou PyErr_SetString(PyExc_ValueError, "empty separator"); 1586467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou return NULL; 1596467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou } 1606467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou else if (sep_len == 1) 1616467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou return stringlib_split_char(str_obj, str, str_len, sep[0], maxcount); 1626467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou 1636467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou list = PyList_New(PREALLOC_SIZE(maxcount)); 1646467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou if (list == NULL) 1656467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou return NULL; 1666467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou 1676467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou i = j = 0; 1686467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou while (maxcount-- > 0) { 1696467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou pos = fastsearch(str+i, str_len-i, sep, sep_len, -1, FAST_SEARCH); 1706467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou if (pos < 0) 1716467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou break; 1726467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou j = i + pos; 1736467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou SPLIT_ADD(str, i, j); 1746467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou i = j + sep_len; 1756467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou } 1766467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#ifndef STRINGLIB_MUTABLE 1776467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) { 1786467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou /* No match in str_obj, so just use it as list[0] */ 1796467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou Py_INCREF(str_obj); 1806467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou PyList_SET_ITEM(list, 0, (PyObject *)str_obj); 1816467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou count++; 1826467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou } else 1836467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#endif 1846467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou { 1856467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou SPLIT_ADD(str, i, str_len); 1866467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou } 1876467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou FIX_PREALLOC_SIZE(list); 1886467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou return list; 1896467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou 1906467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou onError: 1916467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou Py_DECREF(list); 1926467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou return NULL; 1936467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou} 1946467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou 1956467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine PitrouPy_LOCAL_INLINE(PyObject *) 1966467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitroustringlib_rsplit_whitespace(PyObject* str_obj, 1976467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou const STRINGLIB_CHAR* str, Py_ssize_t str_len, 1986467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou Py_ssize_t maxcount) 1996467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou{ 2006467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou Py_ssize_t i, j, count=0; 2016467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou PyObject *list = PyList_New(PREALLOC_SIZE(maxcount)); 2026467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou PyObject *sub; 2036467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou 2046467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou if (list == NULL) 2056467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou return NULL; 2066467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou 2076467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou i = j = str_len - 1; 2086467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou while (maxcount-- > 0) { 2096467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou while (i >= 0 && STRINGLIB_ISSPACE(str[i])) 2106467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou i--; 2116467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou if (i < 0) break; 2126467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou j = i; i--; 2136467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou while (i >= 0 && !STRINGLIB_ISSPACE(str[i])) 2146467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou i--; 2156467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#ifndef STRINGLIB_MUTABLE 2166467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou if (j == str_len - 1 && i < 0 && STRINGLIB_CHECK_EXACT(str_obj)) { 2176467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou /* No whitespace in str_obj, so just use it as list[0] */ 2186467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou Py_INCREF(str_obj); 2196467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou PyList_SET_ITEM(list, 0, (PyObject *)str_obj); 2206467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou count++; 2216467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou break; 2226467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou } 2236467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#endif 2246467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou SPLIT_ADD(str, i + 1, j + 1); 2256467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou } 2266467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou 2276467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou if (i >= 0) { 2286467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou /* Only occurs when maxcount was reached */ 2296467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou /* Skip any remaining whitespace and copy to beginning of string */ 2306467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou while (i >= 0 && STRINGLIB_ISSPACE(str[i])) 2316467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou i--; 2326467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou if (i >= 0) 2336467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou SPLIT_ADD(str, 0, i + 1); 2346467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou } 2356467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou FIX_PREALLOC_SIZE(list); 2366467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou if (PyList_Reverse(list) < 0) 2376467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou goto onError; 2386467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou return list; 2396467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou 2406467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou onError: 2416467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou Py_DECREF(list); 2426467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou return NULL; 2436467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou} 2446467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou 2456467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine PitrouPy_LOCAL_INLINE(PyObject *) 2466467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitroustringlib_rsplit_char(PyObject* str_obj, 2476467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou const STRINGLIB_CHAR* str, Py_ssize_t str_len, 2486467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou const STRINGLIB_CHAR ch, 2496467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou Py_ssize_t maxcount) 2506467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou{ 2516467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou Py_ssize_t i, j, count=0; 2526467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou PyObject *list = PyList_New(PREALLOC_SIZE(maxcount)); 2536467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou PyObject *sub; 2546467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou 2556467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou if (list == NULL) 2566467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou return NULL; 2576467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou 2586467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou i = j = str_len - 1; 2596467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou while ((i >= 0) && (maxcount-- > 0)) { 2606467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou for(; i >= 0; i--) { 2616467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou if (str[i] == ch) { 2626467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou SPLIT_ADD(str, i + 1, j + 1); 2636467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou j = i = i - 1; 2646467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou break; 2656467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou } 2666467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou } 2676467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou } 2686467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#ifndef STRINGLIB_MUTABLE 2696467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) { 2706467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou /* ch not in str_obj, so just use str_obj as list[0] */ 2716467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou Py_INCREF(str_obj); 2726467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou PyList_SET_ITEM(list, 0, (PyObject *)str_obj); 2736467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou count++; 2746467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou } else 2756467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#endif 2766467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou if (j >= -1) { 2776467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou SPLIT_ADD(str, 0, j + 1); 2786467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou } 2796467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou FIX_PREALLOC_SIZE(list); 2806467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou if (PyList_Reverse(list) < 0) 2816467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou goto onError; 2826467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou return list; 2836467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou 2846467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou onError: 2856467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou Py_DECREF(list); 2866467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou return NULL; 2876467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou} 2886467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou 2896467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine PitrouPy_LOCAL_INLINE(PyObject *) 2906467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitroustringlib_rsplit(PyObject* str_obj, 2916467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou const STRINGLIB_CHAR* str, Py_ssize_t str_len, 2926467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou const STRINGLIB_CHAR* sep, Py_ssize_t sep_len, 2936467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou Py_ssize_t maxcount) 2946467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou{ 2956467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou Py_ssize_t j, pos, count=0; 2966467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou PyObject *list, *sub; 2976467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou 2986467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou if (sep_len == 0) { 2996467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou PyErr_SetString(PyExc_ValueError, "empty separator"); 3006467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou return NULL; 3016467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou } 3026467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou else if (sep_len == 1) 3036467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou return stringlib_rsplit_char(str_obj, str, str_len, sep[0], maxcount); 3046467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou 3056467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou list = PyList_New(PREALLOC_SIZE(maxcount)); 3066467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou if (list == NULL) 3076467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou return NULL; 3086467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou 3096467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou j = str_len; 3106467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou while (maxcount-- > 0) { 3116467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou pos = fastsearch(str, j, sep, sep_len, -1, FAST_RSEARCH); 3126467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou if (pos < 0) 3136467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou break; 3146467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou SPLIT_ADD(str, pos + sep_len, j); 3156467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou j = pos; 3166467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou } 3176467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#ifndef STRINGLIB_MUTABLE 3186467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) { 3196467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou /* No match in str_obj, so just use it as list[0] */ 3206467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou Py_INCREF(str_obj); 3216467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou PyList_SET_ITEM(list, 0, (PyObject *)str_obj); 3226467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou count++; 3236467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou } else 3246467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#endif 3256467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou { 3266467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou SPLIT_ADD(str, 0, j); 3276467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou } 3286467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou FIX_PREALLOC_SIZE(list); 3296467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou if (PyList_Reverse(list) < 0) 3306467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou goto onError; 3316467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou return list; 3326467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou 3336467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou onError: 3346467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou Py_DECREF(list); 3356467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou return NULL; 3366467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou} 3376467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou 3386467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine PitrouPy_LOCAL_INLINE(PyObject *) 3396467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitroustringlib_splitlines(PyObject* str_obj, 3406467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou const STRINGLIB_CHAR* str, Py_ssize_t str_len, 3416467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou int keepends) 3426467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou{ 3436467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou /* This does not use the preallocated list because splitlines is 3446467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou usually run with hundreds of newlines. The overhead of 3456467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou switching between PyList_SET_ITEM and append causes about a 3466467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou 2-3% slowdown for that common case. A smarter implementation 3476467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou could move the if check out, so the SET_ITEMs are done first 3486467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou and the appends only done when the prealloc buffer is full. 3496467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou That's too much work for little gain.*/ 3506467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou 3516467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou register Py_ssize_t i; 3526467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou register Py_ssize_t j; 3536467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou PyObject *list = PyList_New(0); 3546467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou PyObject *sub; 3556467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou 3566467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou if (list == NULL) 3576467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou return NULL; 3586467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou 3596467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou for (i = j = 0; i < str_len; ) { 3606467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou Py_ssize_t eol; 3616467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou 3626467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou /* Find a line and append it */ 3636467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou while (i < str_len && !STRINGLIB_ISLINEBREAK(str[i])) 3646467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou i++; 3656467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou 3666467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou /* Skip the line break reading CRLF as one line break */ 3676467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou eol = i; 3686467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou if (i < str_len) { 3696467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou if (str[i] == '\r' && i + 1 < str_len && str[i+1] == '\n') 3706467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou i += 2; 3716467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou else 3726467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou i++; 3736467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou if (keepends) 3746467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou eol = i; 3756467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou } 3766467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#ifndef STRINGLIB_MUTABLE 3776467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou if (j == 0 && eol == str_len && STRINGLIB_CHECK_EXACT(str_obj)) { 3786467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou /* No linebreak in str_obj, so just use it as list[0] */ 3796467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou if (PyList_Append(list, str_obj)) 3806467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou goto onError; 3816467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou break; 3826467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou } 3836467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#endif 3846467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou SPLIT_APPEND(str, j, eol); 3856467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou j = i; 3866467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou } 3876467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou return list; 3886467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou 3896467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou onError: 3906467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou Py_DECREF(list); 3916467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou return NULL; 3926467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou} 3936467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou 3946467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#endif 395