16467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou/* stringlib: split implementation */
26467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou
36467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#ifndef STRINGLIB_SPLIT_H
46467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#define STRINGLIB_SPLIT_H
56467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou
66467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#ifndef STRINGLIB_FASTSEARCH_H
76467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#error must include "stringlib/fastsearch.h" before including this module
86467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#endif
96467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou
106467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou/* Overallocate the initial list to reduce the number of reallocs for small
116467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou   split sizes.  Eg, "A A A A A A A A A A".split() (10 elements) has three
126467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou   resizes, to sizes 4, 8, then 16.  Most observed string splits are for human
136467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou   text (roughly 11 words per line) and field delimited data (usually 1-10
146467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou   fields).  For large strings the split algorithms are bandwidth limited
156467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou   so increasing the preallocation likely will not improve things.*/
166467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou
176467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#define MAX_PREALLOC 12
186467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou
196467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou/* 5 splits gives 6 elements */
206467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#define PREALLOC_SIZE(maxsplit) \
216467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    (maxsplit >= MAX_PREALLOC ? MAX_PREALLOC : maxsplit+1)
226467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou
236467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#define SPLIT_APPEND(data, left, right)         \
246467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    sub = STRINGLIB_NEW((data) + (left),        \
256467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou                        (right) - (left));      \
266467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    if (sub == NULL)                            \
276467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        goto onError;                           \
286467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    if (PyList_Append(list, sub)) {             \
296467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        Py_DECREF(sub);                         \
306467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        goto onError;                           \
316467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    }                                           \
326467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    else                                        \
336467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        Py_DECREF(sub);
346467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou
356467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#define SPLIT_ADD(data, left, right) {          \
366467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    sub = STRINGLIB_NEW((data) + (left),        \
376467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou                        (right) - (left));      \
386467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    if (sub == NULL)                            \
396467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        goto onError;                           \
406467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    if (count < MAX_PREALLOC) {                 \
416467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        PyList_SET_ITEM(list, count, sub);      \
426467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    } else {                                    \
436467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        if (PyList_Append(list, sub)) {         \
446467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou            Py_DECREF(sub);                     \
456467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou            goto onError;                       \
466467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        }                                       \
476467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        else                                    \
486467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou            Py_DECREF(sub);                     \
496467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    }                                           \
506467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    count++; }
516467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou
526467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou
536467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou/* Always force the list to the expected size. */
546467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#define FIX_PREALLOC_SIZE(list) Py_SIZE(list) = count
556467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou
566467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine PitrouPy_LOCAL_INLINE(PyObject *)
576467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitroustringlib_split_whitespace(PyObject* str_obj,
586467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou                           const STRINGLIB_CHAR* str, Py_ssize_t str_len,
596467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou                           Py_ssize_t maxcount)
606467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou{
616467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    Py_ssize_t i, j, count=0;
626467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
636467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    PyObject *sub;
646467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou
656467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    if (list == NULL)
666467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        return NULL;
676467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou
686467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    i = j = 0;
696467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    while (maxcount-- > 0) {
706467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        while (i < str_len && STRINGLIB_ISSPACE(str[i]))
716467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou            i++;
726467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        if (i == str_len) break;
736467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        j = i; i++;
746467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        while (i < str_len && !STRINGLIB_ISSPACE(str[i]))
756467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou            i++;
766467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#ifndef STRINGLIB_MUTABLE
776467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        if (j == 0 && i == str_len && STRINGLIB_CHECK_EXACT(str_obj)) {
786467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou            /* No whitespace in str_obj, so just use it as list[0] */
796467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou            Py_INCREF(str_obj);
806467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou            PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
816467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou            count++;
826467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou            break;
836467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        }
846467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#endif
856467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        SPLIT_ADD(str, j, i);
866467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    }
876467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou
886467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    if (i < str_len) {
896467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        /* Only occurs when maxcount was reached */
906467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        /* Skip any remaining whitespace and copy to end of string */
916467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        while (i < str_len && STRINGLIB_ISSPACE(str[i]))
926467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou            i++;
936467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        if (i != str_len)
946467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou            SPLIT_ADD(str, i, str_len);
956467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    }
966467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    FIX_PREALLOC_SIZE(list);
976467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    return list;
986467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou
996467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou  onError:
1006467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    Py_DECREF(list);
1016467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    return NULL;
1026467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou}
1036467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou
1046467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine PitrouPy_LOCAL_INLINE(PyObject *)
1056467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitroustringlib_split_char(PyObject* str_obj,
1066467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou                     const STRINGLIB_CHAR* str, Py_ssize_t str_len,
1076467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou                     const STRINGLIB_CHAR ch,
1086467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou                     Py_ssize_t maxcount)
1096467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou{
1106467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    Py_ssize_t i, j, count=0;
1116467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
1126467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    PyObject *sub;
1136467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou
1146467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    if (list == NULL)
1156467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        return NULL;
1166467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou
1176467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    i = j = 0;
1186467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    while ((j < str_len) && (maxcount-- > 0)) {
1196467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        for(; j < str_len; j++) {
1206467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou            /* I found that using memchr makes no difference */
1216467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou            if (str[j] == ch) {
1226467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou                SPLIT_ADD(str, i, j);
1236467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou                i = j = j + 1;
1246467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou                break;
1256467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou            }
1266467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        }
1276467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    }
1286467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#ifndef STRINGLIB_MUTABLE
1296467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
1306467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        /* ch not in str_obj, so just use str_obj as list[0] */
1316467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        Py_INCREF(str_obj);
1326467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
1336467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        count++;
1346467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    } else
1356467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#endif
1366467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    if (i <= str_len) {
1376467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        SPLIT_ADD(str, i, str_len);
1386467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    }
1396467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    FIX_PREALLOC_SIZE(list);
1406467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    return list;
1416467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou
1426467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou  onError:
1436467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    Py_DECREF(list);
1446467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    return NULL;
1456467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou}
1466467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou
1476467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine PitrouPy_LOCAL_INLINE(PyObject *)
1486467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitroustringlib_split(PyObject* str_obj,
1496467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou                const STRINGLIB_CHAR* str, Py_ssize_t str_len,
1506467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou                const STRINGLIB_CHAR* sep, Py_ssize_t sep_len,
1516467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou                Py_ssize_t maxcount)
1526467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou{
1536467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    Py_ssize_t i, j, pos, count=0;
1546467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    PyObject *list, *sub;
1556467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou
1566467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    if (sep_len == 0) {
1576467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        PyErr_SetString(PyExc_ValueError, "empty separator");
1586467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        return NULL;
1596467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    }
1606467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    else if (sep_len == 1)
1616467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        return stringlib_split_char(str_obj, str, str_len, sep[0], maxcount);
1626467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou
1636467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    list = PyList_New(PREALLOC_SIZE(maxcount));
1646467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    if (list == NULL)
1656467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        return NULL;
1666467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou
1676467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    i = j = 0;
1686467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    while (maxcount-- > 0) {
1696467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        pos = fastsearch(str+i, str_len-i, sep, sep_len, -1, FAST_SEARCH);
1706467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        if (pos < 0)
1716467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou            break;
1726467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        j = i + pos;
1736467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        SPLIT_ADD(str, i, j);
1746467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        i = j + sep_len;
1756467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    }
1766467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#ifndef STRINGLIB_MUTABLE
1776467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
1786467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        /* No match in str_obj, so just use it as list[0] */
1796467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        Py_INCREF(str_obj);
1806467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
1816467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        count++;
1826467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    } else
1836467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#endif
1846467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    {
1856467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        SPLIT_ADD(str, i, str_len);
1866467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    }
1876467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    FIX_PREALLOC_SIZE(list);
1886467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    return list;
1896467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou
1906467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou  onError:
1916467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    Py_DECREF(list);
1926467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    return NULL;
1936467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou}
1946467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou
1956467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine PitrouPy_LOCAL_INLINE(PyObject *)
1966467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitroustringlib_rsplit_whitespace(PyObject* str_obj,
1976467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou                            const STRINGLIB_CHAR* str, Py_ssize_t str_len,
1986467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou                            Py_ssize_t maxcount)
1996467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou{
2006467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    Py_ssize_t i, j, count=0;
2016467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2026467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    PyObject *sub;
2036467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou
2046467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    if (list == NULL)
2056467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        return NULL;
2066467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou
2076467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    i = j = str_len - 1;
2086467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    while (maxcount-- > 0) {
2096467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        while (i >= 0 && STRINGLIB_ISSPACE(str[i]))
2106467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou            i--;
2116467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        if (i < 0) break;
2126467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        j = i; i--;
2136467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        while (i >= 0 && !STRINGLIB_ISSPACE(str[i]))
2146467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou            i--;
2156467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#ifndef STRINGLIB_MUTABLE
2166467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        if (j == str_len - 1 && i < 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
2176467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou            /* No whitespace in str_obj, so just use it as list[0] */
2186467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou            Py_INCREF(str_obj);
2196467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou            PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
2206467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou            count++;
2216467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou            break;
2226467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        }
2236467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#endif
2246467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        SPLIT_ADD(str, i + 1, j + 1);
2256467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    }
2266467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou
2276467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    if (i >= 0) {
2286467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        /* Only occurs when maxcount was reached */
2296467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        /* Skip any remaining whitespace and copy to beginning of string */
2306467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        while (i >= 0 && STRINGLIB_ISSPACE(str[i]))
2316467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou            i--;
2326467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        if (i >= 0)
2336467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou            SPLIT_ADD(str, 0, i + 1);
2346467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    }
2356467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    FIX_PREALLOC_SIZE(list);
2366467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    if (PyList_Reverse(list) < 0)
2376467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        goto onError;
2386467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    return list;
2396467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou
2406467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou  onError:
2416467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    Py_DECREF(list);
2426467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    return NULL;
2436467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou}
2446467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou
2456467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine PitrouPy_LOCAL_INLINE(PyObject *)
2466467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitroustringlib_rsplit_char(PyObject* str_obj,
2476467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou                      const STRINGLIB_CHAR* str, Py_ssize_t str_len,
2486467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou                      const STRINGLIB_CHAR ch,
2496467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou                      Py_ssize_t maxcount)
2506467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou{
2516467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    Py_ssize_t i, j, count=0;
2526467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    PyObject *list = PyList_New(PREALLOC_SIZE(maxcount));
2536467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    PyObject *sub;
2546467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou
2556467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    if (list == NULL)
2566467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        return NULL;
2576467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou
2586467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    i = j = str_len - 1;
2596467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    while ((i >= 0) && (maxcount-- > 0)) {
2606467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        for(; i >= 0; i--) {
2616467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou            if (str[i] == ch) {
2626467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou                SPLIT_ADD(str, i + 1, j + 1);
2636467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou                j = i = i - 1;
2646467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou                break;
2656467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou            }
2666467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        }
2676467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    }
2686467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#ifndef STRINGLIB_MUTABLE
2696467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
2706467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        /* ch not in str_obj, so just use str_obj as list[0] */
2716467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        Py_INCREF(str_obj);
2726467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
2736467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        count++;
2746467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    } else
2756467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#endif
2766467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    if (j >= -1) {
2776467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        SPLIT_ADD(str, 0, j + 1);
2786467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    }
2796467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    FIX_PREALLOC_SIZE(list);
2806467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    if (PyList_Reverse(list) < 0)
2816467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        goto onError;
2826467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    return list;
2836467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou
2846467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou  onError:
2856467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    Py_DECREF(list);
2866467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    return NULL;
2876467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou}
2886467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou
2896467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine PitrouPy_LOCAL_INLINE(PyObject *)
2906467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitroustringlib_rsplit(PyObject* str_obj,
2916467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou                 const STRINGLIB_CHAR* str, Py_ssize_t str_len,
2926467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou                 const STRINGLIB_CHAR* sep, Py_ssize_t sep_len,
2936467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou                 Py_ssize_t maxcount)
2946467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou{
2956467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    Py_ssize_t j, pos, count=0;
2966467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    PyObject *list, *sub;
2976467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou
2986467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    if (sep_len == 0) {
2996467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        PyErr_SetString(PyExc_ValueError, "empty separator");
3006467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        return NULL;
3016467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    }
3026467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    else if (sep_len == 1)
3036467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        return stringlib_rsplit_char(str_obj, str, str_len, sep[0], maxcount);
3046467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou
3056467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    list = PyList_New(PREALLOC_SIZE(maxcount));
3066467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    if (list == NULL)
3076467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        return NULL;
3086467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou
3096467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    j = str_len;
3106467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    while (maxcount-- > 0) {
3116467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        pos = fastsearch(str, j, sep, sep_len, -1, FAST_RSEARCH);
3126467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        if (pos < 0)
3136467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou            break;
3146467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        SPLIT_ADD(str, pos + sep_len, j);
3156467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        j = pos;
3166467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    }
3176467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#ifndef STRINGLIB_MUTABLE
3186467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    if (count == 0 && STRINGLIB_CHECK_EXACT(str_obj)) {
3196467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        /* No match in str_obj, so just use it as list[0] */
3206467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        Py_INCREF(str_obj);
3216467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        PyList_SET_ITEM(list, 0, (PyObject *)str_obj);
3226467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        count++;
3236467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    } else
3246467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#endif
3256467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    {
3266467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        SPLIT_ADD(str, 0, j);
3276467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    }
3286467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    FIX_PREALLOC_SIZE(list);
3296467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    if (PyList_Reverse(list) < 0)
3306467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        goto onError;
3316467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    return list;
3326467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou
3336467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou  onError:
3346467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    Py_DECREF(list);
3356467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    return NULL;
3366467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou}
3376467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou
3386467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine PitrouPy_LOCAL_INLINE(PyObject *)
3396467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitroustringlib_splitlines(PyObject* str_obj,
3406467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou                     const STRINGLIB_CHAR* str, Py_ssize_t str_len,
3416467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou                     int keepends)
3426467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou{
3436467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    /* This does not use the preallocated list because splitlines is
3446467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou       usually run with hundreds of newlines.  The overhead of
3456467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou       switching between PyList_SET_ITEM and append causes about a
3466467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou       2-3% slowdown for that common case.  A smarter implementation
3476467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou       could move the if check out, so the SET_ITEMs are done first
3486467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou       and the appends only done when the prealloc buffer is full.
3496467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou       That's too much work for little gain.*/
3506467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou
3516467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    register Py_ssize_t i;
3526467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    register Py_ssize_t j;
3536467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    PyObject *list = PyList_New(0);
3546467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    PyObject *sub;
3556467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou
3566467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    if (list == NULL)
3576467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        return NULL;
3586467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou
3596467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    for (i = j = 0; i < str_len; ) {
3606467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        Py_ssize_t eol;
3616467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou
3626467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        /* Find a line and append it */
3636467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        while (i < str_len && !STRINGLIB_ISLINEBREAK(str[i]))
3646467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou            i++;
3656467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou
3666467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        /* Skip the line break reading CRLF as one line break */
3676467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        eol = i;
3686467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        if (i < str_len) {
3696467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou            if (str[i] == '\r' && i + 1 < str_len && str[i+1] == '\n')
3706467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou                i += 2;
3716467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou            else
3726467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou                i++;
3736467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou            if (keepends)
3746467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou                eol = i;
3756467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        }
3766467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#ifndef STRINGLIB_MUTABLE
3776467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        if (j == 0 && eol == str_len && STRINGLIB_CHECK_EXACT(str_obj)) {
3786467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou            /* No linebreak in str_obj, so just use it as list[0] */
3796467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou            if (PyList_Append(list, str_obj))
3806467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou                goto onError;
3816467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou            break;
3826467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        }
3836467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#endif
3846467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        SPLIT_APPEND(str, j, eol);
3856467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou        j = i;
3866467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    }
3876467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    return list;
3886467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou
3896467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou  onError:
3906467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    Py_DECREF(list);
3916467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou    return NULL;
3926467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou}
3936467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou
3946467213bfd10dd45f0ae6fa607c8052a3bdaec23Antoine Pitrou#endif
395