1/* strop module */
2
3#define PY_SSIZE_T_CLEAN
4#include "Python.h"
5#include <ctype.h>
6
7PyDoc_STRVAR(strop_module__doc__,
8"Common string manipulations, optimized for speed.\n"
9"\n"
10"Always use \"import string\" rather than referencing\n"
11"this module directly.");
12
13/* XXX This file assumes that the <ctype.h> is*() functions
14   XXX are defined for all 8-bit characters! */
15
16#define WARN if (PyErr_Warn(PyExc_DeprecationWarning, \
17               "strop functions are obsolete; use string methods")) \
18         return NULL
19
20/* The lstrip(), rstrip() and strip() functions are implemented
21   in do_strip(), which uses an additional parameter to indicate what
22   type of strip should occur. */
23
24#define LEFTSTRIP 0
25#define RIGHTSTRIP 1
26#define BOTHSTRIP 2
27
28
29static PyObject *
30split_whitespace(char *s, Py_ssize_t len, Py_ssize_t maxsplit)
31{
32    Py_ssize_t i = 0, j;
33    int err;
34    Py_ssize_t countsplit = 0;
35    PyObject* item;
36    PyObject *list = PyList_New(0);
37
38    if (list == NULL)
39        return NULL;
40
41    while (i < len) {
42        while (i < len && isspace(Py_CHARMASK(s[i]))) {
43            i = i+1;
44        }
45        j = i;
46        while (i < len && !isspace(Py_CHARMASK(s[i]))) {
47            i = i+1;
48        }
49        if (j < i) {
50            item = PyString_FromStringAndSize(s+j, i-j);
51            if (item == NULL)
52                goto finally;
53
54            err = PyList_Append(list, item);
55            Py_DECREF(item);
56            if (err < 0)
57                goto finally;
58
59            countsplit++;
60            while (i < len && isspace(Py_CHARMASK(s[i]))) {
61                i = i+1;
62            }
63            if (maxsplit && (countsplit >= maxsplit) && i < len) {
64                item = PyString_FromStringAndSize(
65                    s+i, len - i);
66                if (item == NULL)
67                    goto finally;
68
69                err = PyList_Append(list, item);
70                Py_DECREF(item);
71                if (err < 0)
72                    goto finally;
73
74                i = len;
75            }
76        }
77    }
78    return list;
79  finally:
80    Py_DECREF(list);
81    return NULL;
82}
83
84
85PyDoc_STRVAR(splitfields__doc__,
86"split(s [,sep [,maxsplit]]) -> list of strings\n"
87"splitfields(s [,sep [,maxsplit]]) -> list of strings\n"
88"\n"
89"Return a list of the words in the string s, using sep as the\n"
90"delimiter string.  If maxsplit is nonzero, splits into at most\n"
91"maxsplit words.  If sep is not specified, any whitespace string\n"
92"is a separator.  Maxsplit defaults to 0.\n"
93"\n"
94"(split and splitfields are synonymous)");
95
96static PyObject *
97strop_splitfields(PyObject *self, PyObject *args)
98{
99    Py_ssize_t len, n, i, j, err;
100    Py_ssize_t splitcount, maxsplit;
101    char *s, *sub;
102    PyObject *list, *item;
103
104    WARN;
105    sub = NULL;
106    n = 0;
107    splitcount = 0;
108    maxsplit = 0;
109    if (!PyArg_ParseTuple(args, "t#|z#n:split", &s, &len, &sub, &n, &maxsplit))
110        return NULL;
111    if (sub == NULL)
112        return split_whitespace(s, len, maxsplit);
113    if (n == 0) {
114        PyErr_SetString(PyExc_ValueError, "empty separator");
115        return NULL;
116    }
117
118    list = PyList_New(0);
119    if (list == NULL)
120        return NULL;
121
122    i = j = 0;
123    while (i+n <= len) {
124        if (s[i] == sub[0] && (n == 1 || memcmp(s+i, sub, n) == 0)) {
125            item = PyString_FromStringAndSize(s+j, i-j);
126            if (item == NULL)
127                goto fail;
128            err = PyList_Append(list, item);
129            Py_DECREF(item);
130            if (err < 0)
131                goto fail;
132            i = j = i + n;
133            splitcount++;
134            if (maxsplit && (splitcount >= maxsplit))
135                break;
136        }
137        else
138            i++;
139    }
140    item = PyString_FromStringAndSize(s+j, len-j);
141    if (item == NULL)
142        goto fail;
143    err = PyList_Append(list, item);
144    Py_DECREF(item);
145    if (err < 0)
146        goto fail;
147
148    return list;
149
150 fail:
151    Py_DECREF(list);
152    return NULL;
153}
154
155
156PyDoc_STRVAR(joinfields__doc__,
157"join(list [,sep]) -> string\n"
158"joinfields(list [,sep]) -> string\n"
159"\n"
160"Return a string composed of the words in list, with\n"
161"intervening occurrences of sep.  Sep defaults to a single\n"
162"space.\n"
163"\n"
164"(join and joinfields are synonymous)");
165
166static PyObject *
167strop_joinfields(PyObject *self, PyObject *args)
168{
169    PyObject *seq;
170    char *sep = NULL;
171    Py_ssize_t seqlen, seplen = 0;
172    Py_ssize_t i, reslen = 0, slen = 0, sz = 100;
173    PyObject *res = NULL;
174    char* p = NULL;
175    ssizeargfunc getitemfunc;
176
177    WARN;
178    if (!PyArg_ParseTuple(args, "O|t#:join", &seq, &sep, &seplen))
179        return NULL;
180    if (sep == NULL) {
181        sep = " ";
182        seplen = 1;
183    }
184
185    seqlen = PySequence_Size(seq);
186    if (seqlen < 0 && PyErr_Occurred())
187        return NULL;
188
189    if (seqlen == 1) {
190        /* Optimization if there's only one item */
191        PyObject *item = PySequence_GetItem(seq, 0);
192        if (item && !PyString_Check(item)) {
193            PyErr_SetString(PyExc_TypeError,
194                     "first argument must be sequence of strings");
195            Py_DECREF(item);
196            return NULL;
197        }
198        return item;
199    }
200
201    if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
202        return NULL;
203    p = PyString_AsString(res);
204
205    /* optimize for lists, since it's the most common case.  all others
206     * (tuples and arbitrary sequences) just use the sequence abstract
207     * interface.
208     */
209    if (PyList_Check(seq)) {
210        for (i = 0; i < seqlen; i++) {
211            PyObject *item = PyList_GET_ITEM(seq, i);
212            if (!PyString_Check(item)) {
213                PyErr_SetString(PyExc_TypeError,
214                "first argument must be sequence of strings");
215                Py_DECREF(res);
216                return NULL;
217            }
218            slen = PyString_GET_SIZE(item);
219            if (slen > PY_SSIZE_T_MAX - reslen ||
220                seplen > PY_SSIZE_T_MAX - reslen - seplen) {
221                PyErr_SetString(PyExc_OverflowError,
222                                "input too long");
223                Py_DECREF(res);
224                return NULL;
225            }
226            while (reslen + slen + seplen >= sz) {
227                if (_PyString_Resize(&res, sz * 2) < 0)
228                    return NULL;
229                sz *= 2;
230                p = PyString_AsString(res) + reslen;
231            }
232            if (i > 0) {
233                memcpy(p, sep, seplen);
234                p += seplen;
235                reslen += seplen;
236            }
237            memcpy(p, PyString_AS_STRING(item), slen);
238            p += slen;
239            reslen += slen;
240        }
241        _PyString_Resize(&res, reslen);
242        return res;
243    }
244
245    if (seq->ob_type->tp_as_sequence == NULL ||
246             (getitemfunc = seq->ob_type->tp_as_sequence->sq_item) == NULL)
247    {
248        PyErr_SetString(PyExc_TypeError,
249                        "first argument must be a sequence");
250        return NULL;
251    }
252    /* This is now type safe */
253    for (i = 0; i < seqlen; i++) {
254        PyObject *item = getitemfunc(seq, i);
255        if (!item || !PyString_Check(item)) {
256            PyErr_SetString(PyExc_TypeError,
257                     "first argument must be sequence of strings");
258            Py_DECREF(res);
259            Py_XDECREF(item);
260            return NULL;
261        }
262        slen = PyString_GET_SIZE(item);
263        if (slen > PY_SSIZE_T_MAX - reslen ||
264            seplen > PY_SSIZE_T_MAX - reslen - seplen) {
265            PyErr_SetString(PyExc_OverflowError,
266                            "input too long");
267            Py_DECREF(res);
268            Py_XDECREF(item);
269            return NULL;
270        }
271        while (reslen + slen + seplen >= sz) {
272            if (_PyString_Resize(&res, sz * 2) < 0) {
273                Py_DECREF(item);
274                return NULL;
275            }
276            sz *= 2;
277            p = PyString_AsString(res) + reslen;
278        }
279        if (i > 0) {
280            memcpy(p, sep, seplen);
281            p += seplen;
282            reslen += seplen;
283        }
284        memcpy(p, PyString_AS_STRING(item), slen);
285        p += slen;
286        reslen += slen;
287        Py_DECREF(item);
288    }
289    _PyString_Resize(&res, reslen);
290    return res;
291}
292
293
294PyDoc_STRVAR(find__doc__,
295"find(s, sub [,start [,end]]) -> in\n"
296"\n"
297"Return the lowest index in s where substring sub is found,\n"
298"such that sub is contained within s[start,end].  Optional\n"
299"arguments start and end are interpreted as in slice notation.\n"
300"\n"
301"Return -1 on failure.");
302
303static PyObject *
304strop_find(PyObject *self, PyObject *args)
305{
306    char *s, *sub;
307    Py_ssize_t len, n, i = 0, last = PY_SSIZE_T_MAX;
308
309    WARN;
310    if (!PyArg_ParseTuple(args, "t#t#|nn:find", &s, &len, &sub, &n, &i, &last))
311        return NULL;
312
313    if (last > len)
314        last = len;
315    if (last < 0)
316        last += len;
317    if (last < 0)
318        last = 0;
319    if (i < 0)
320        i += len;
321    if (i < 0)
322        i = 0;
323
324    if (n == 0 && i <= last)
325        return PyInt_FromLong((long)i);
326
327    last -= n;
328    for (; i <= last; ++i)
329        if (s[i] == sub[0] &&
330            (n == 1 || memcmp(&s[i+1], &sub[1], n-1) == 0))
331            return PyInt_FromLong((long)i);
332
333    return PyInt_FromLong(-1L);
334}
335
336
337PyDoc_STRVAR(rfind__doc__,
338"rfind(s, sub [,start [,end]]) -> int\n"
339"\n"
340"Return the highest index in s where substring sub is found,\n"
341"such that sub is contained within s[start,end].  Optional\n"
342"arguments start and end are interpreted as in slice notation.\n"
343"\n"
344"Return -1 on failure.");
345
346static PyObject *
347strop_rfind(PyObject *self, PyObject *args)
348{
349    char *s, *sub;
350    Py_ssize_t len, n, j;
351    Py_ssize_t i = 0, last = PY_SSIZE_T_MAX;
352
353    WARN;
354    if (!PyArg_ParseTuple(args, "t#t#|nn:rfind", &s, &len, &sub, &n, &i, &last))
355        return NULL;
356
357    if (last > len)
358        last = len;
359    if (last < 0)
360        last += len;
361    if (last < 0)
362        last = 0;
363    if (i < 0)
364        i += len;
365    if (i < 0)
366        i = 0;
367
368    if (n == 0 && i <= last)
369        return PyInt_FromLong((long)last);
370
371    for (j = last-n; j >= i; --j)
372        if (s[j] == sub[0] &&
373            (n == 1 || memcmp(&s[j+1], &sub[1], n-1) == 0))
374            return PyInt_FromLong((long)j);
375
376    return PyInt_FromLong(-1L);
377}
378
379
380static PyObject *
381do_strip(PyObject *args, int striptype)
382{
383    char *s;
384    Py_ssize_t len, i, j;
385
386
387    if (PyString_AsStringAndSize(args, &s, &len))
388        return NULL;
389
390    i = 0;
391    if (striptype != RIGHTSTRIP) {
392        while (i < len && isspace(Py_CHARMASK(s[i]))) {
393            i++;
394        }
395    }
396
397    j = len;
398    if (striptype != LEFTSTRIP) {
399        do {
400            j--;
401        } while (j >= i && isspace(Py_CHARMASK(s[j])));
402        j++;
403    }
404
405    if (i == 0 && j == len) {
406        Py_INCREF(args);
407        return args;
408    }
409    else
410        return PyString_FromStringAndSize(s+i, j-i);
411}
412
413
414PyDoc_STRVAR(strip__doc__,
415"strip(s) -> string\n"
416"\n"
417"Return a copy of the string s with leading and trailing\n"
418"whitespace removed.");
419
420static PyObject *
421strop_strip(PyObject *self, PyObject *args)
422{
423    WARN;
424    return do_strip(args, BOTHSTRIP);
425}
426
427
428PyDoc_STRVAR(lstrip__doc__,
429"lstrip(s) -> string\n"
430"\n"
431"Return a copy of the string s with leading whitespace removed.");
432
433static PyObject *
434strop_lstrip(PyObject *self, PyObject *args)
435{
436    WARN;
437    return do_strip(args, LEFTSTRIP);
438}
439
440
441PyDoc_STRVAR(rstrip__doc__,
442"rstrip(s) -> string\n"
443"\n"
444"Return a copy of the string s with trailing whitespace removed.");
445
446static PyObject *
447strop_rstrip(PyObject *self, PyObject *args)
448{
449    WARN;
450    return do_strip(args, RIGHTSTRIP);
451}
452
453
454PyDoc_STRVAR(lower__doc__,
455"lower(s) -> string\n"
456"\n"
457"Return a copy of the string s converted to lowercase.");
458
459static PyObject *
460strop_lower(PyObject *self, PyObject *args)
461{
462    char *s, *s_new;
463    Py_ssize_t i, n;
464    PyObject *newstr;
465    int changed;
466
467    WARN;
468    if (PyString_AsStringAndSize(args, &s, &n))
469        return NULL;
470    newstr = PyString_FromStringAndSize(NULL, n);
471    if (newstr == NULL)
472        return NULL;
473    s_new = PyString_AsString(newstr);
474    changed = 0;
475    for (i = 0; i < n; i++) {
476        int c = Py_CHARMASK(*s++);
477        if (isupper(c)) {
478            changed = 1;
479            *s_new = tolower(c);
480        } else
481            *s_new = c;
482        s_new++;
483    }
484    if (!changed) {
485        Py_DECREF(newstr);
486        Py_INCREF(args);
487        return args;
488    }
489    return newstr;
490}
491
492
493PyDoc_STRVAR(upper__doc__,
494"upper(s) -> string\n"
495"\n"
496"Return a copy of the string s converted to uppercase.");
497
498static PyObject *
499strop_upper(PyObject *self, PyObject *args)
500{
501    char *s, *s_new;
502    Py_ssize_t i, n;
503    PyObject *newstr;
504    int changed;
505
506    WARN;
507    if (PyString_AsStringAndSize(args, &s, &n))
508        return NULL;
509    newstr = PyString_FromStringAndSize(NULL, n);
510    if (newstr == NULL)
511        return NULL;
512    s_new = PyString_AsString(newstr);
513    changed = 0;
514    for (i = 0; i < n; i++) {
515        int c = Py_CHARMASK(*s++);
516        if (islower(c)) {
517            changed = 1;
518            *s_new = toupper(c);
519        } else
520            *s_new = c;
521        s_new++;
522    }
523    if (!changed) {
524        Py_DECREF(newstr);
525        Py_INCREF(args);
526        return args;
527    }
528    return newstr;
529}
530
531
532PyDoc_STRVAR(capitalize__doc__,
533"capitalize(s) -> string\n"
534"\n"
535"Return a copy of the string s with only its first character\n"
536"capitalized.");
537
538static PyObject *
539strop_capitalize(PyObject *self, PyObject *args)
540{
541    char *s, *s_new;
542    Py_ssize_t i, n;
543    PyObject *newstr;
544    int changed;
545
546    WARN;
547    if (PyString_AsStringAndSize(args, &s, &n))
548        return NULL;
549    newstr = PyString_FromStringAndSize(NULL, n);
550    if (newstr == NULL)
551        return NULL;
552    s_new = PyString_AsString(newstr);
553    changed = 0;
554    if (0 < n) {
555        int c = Py_CHARMASK(*s++);
556        if (islower(c)) {
557            changed = 1;
558            *s_new = toupper(c);
559        } else
560            *s_new = c;
561        s_new++;
562    }
563    for (i = 1; i < n; i++) {
564        int c = Py_CHARMASK(*s++);
565        if (isupper(c)) {
566            changed = 1;
567            *s_new = tolower(c);
568        } else
569            *s_new = c;
570        s_new++;
571    }
572    if (!changed) {
573        Py_DECREF(newstr);
574        Py_INCREF(args);
575        return args;
576    }
577    return newstr;
578}
579
580
581PyDoc_STRVAR(expandtabs__doc__,
582"expandtabs(string, [tabsize]) -> string\n"
583"\n"
584"Expand tabs in a string, i.e. replace them by one or more spaces,\n"
585"depending on the current column and the given tab size (default 8).\n"
586"The column number is reset to zero after each newline occurring in the\n"
587"string.  This doesn't understand other non-printing characters.");
588
589static PyObject *
590strop_expandtabs(PyObject *self, PyObject *args)
591{
592    /* Original by Fredrik Lundh */
593    char* e;
594    char* p;
595    char* q;
596    Py_ssize_t i, j;
597    PyObject* out;
598    char* string;
599    Py_ssize_t stringlen;
600    int tabsize = 8;
601
602    WARN;
603    /* Get arguments */
604    if (!PyArg_ParseTuple(args, "s#|i:expandtabs", &string, &stringlen, &tabsize))
605        return NULL;
606    if (tabsize < 1) {
607        PyErr_SetString(PyExc_ValueError,
608                        "tabsize must be at least 1");
609        return NULL;
610    }
611
612    /* First pass: determine size of output string */
613    i = j = 0; /* j: current column; i: total of previous lines */
614    e = string + stringlen;
615    for (p = string; p < e; p++) {
616        if (*p == '\t') {
617            Py_ssize_t incr = tabsize - (j%tabsize);
618            if (j > PY_SSIZE_T_MAX - incr)
619                goto overflow;
620            j += incr;
621        } else {
622            if (j > PY_SSIZE_T_MAX - 1)
623                goto overflow;
624            j++;
625            if (*p == '\n') {
626                if (i > PY_SSIZE_T_MAX - j)
627                    goto overflow;
628                i += j;
629                j = 0;
630            }
631        }
632    }
633
634    if (i > PY_SSIZE_T_MAX - j)
635        goto overflow;
636
637    /* Second pass: create output string and fill it */
638    out = PyString_FromStringAndSize(NULL, i+j);
639    if (out == NULL)
640        return NULL;
641
642    i = 0;
643    q = PyString_AS_STRING(out);
644
645    for (p = string; p < e; p++) {
646        if (*p == '\t') {
647            j = tabsize - (i%tabsize);
648            i += j;
649            while (j-- > 0)
650                *q++ = ' ';
651        } else {
652            *q++ = *p;
653            i++;
654            if (*p == '\n')
655                i = 0;
656        }
657    }
658
659    return out;
660  overflow:
661    PyErr_SetString(PyExc_OverflowError, "result is too long");
662    return NULL;
663}
664
665
666PyDoc_STRVAR(count__doc__,
667"count(s, sub[, start[, end]]) -> int\n"
668"\n"
669"Return the number of occurrences of substring sub in string\n"
670"s[start:end].  Optional arguments start and end are\n"
671"interpreted as in slice notation.");
672
673static PyObject *
674strop_count(PyObject *self, PyObject *args)
675{
676    char *s, *sub;
677    Py_ssize_t len, n;
678    Py_ssize_t i = 0, last = PY_SSIZE_T_MAX;
679    Py_ssize_t m, r;
680
681    WARN;
682    if (!PyArg_ParseTuple(args, "t#t#|nn:count", &s, &len, &sub, &n, &i, &last))
683        return NULL;
684    if (last > len)
685        last = len;
686    if (last < 0)
687        last += len;
688    if (last < 0)
689        last = 0;
690    if (i < 0)
691        i += len;
692    if (i < 0)
693        i = 0;
694    m = last + 1 - n;
695    if (n == 0)
696        return PyInt_FromLong((long) (m-i));
697
698    r = 0;
699    while (i < m) {
700        if (!memcmp(s+i, sub, n)) {
701            r++;
702            i += n;
703        } else {
704            i++;
705        }
706    }
707    return PyInt_FromLong((long) r);
708}
709
710
711PyDoc_STRVAR(swapcase__doc__,
712"swapcase(s) -> string\n"
713"\n"
714"Return a copy of the string s with upper case characters\n"
715"converted to lowercase and vice versa.");
716
717static PyObject *
718strop_swapcase(PyObject *self, PyObject *args)
719{
720    char *s, *s_new;
721    Py_ssize_t i, n;
722    PyObject *newstr;
723    int changed;
724
725    WARN;
726    if (PyString_AsStringAndSize(args, &s, &n))
727        return NULL;
728    newstr = PyString_FromStringAndSize(NULL, n);
729    if (newstr == NULL)
730        return NULL;
731    s_new = PyString_AsString(newstr);
732    changed = 0;
733    for (i = 0; i < n; i++) {
734        int c = Py_CHARMASK(*s++);
735        if (islower(c)) {
736            changed = 1;
737            *s_new = toupper(c);
738        }
739        else if (isupper(c)) {
740            changed = 1;
741            *s_new = tolower(c);
742        }
743        else
744            *s_new = c;
745        s_new++;
746    }
747    if (!changed) {
748        Py_DECREF(newstr);
749        Py_INCREF(args);
750        return args;
751    }
752    return newstr;
753}
754
755
756PyDoc_STRVAR(atoi__doc__,
757"atoi(s [,base]) -> int\n"
758"\n"
759"Return the integer represented by the string s in the given\n"
760"base, which defaults to 10.  The string s must consist of one\n"
761"or more digits, possibly preceded by a sign.  If base is 0, it\n"
762"is chosen from the leading characters of s, 0 for octal, 0x or\n"
763"0X for hexadecimal.  If base is 16, a preceding 0x or 0X is\n"
764"accepted.");
765
766static PyObject *
767strop_atoi(PyObject *self, PyObject *args)
768{
769    char *s, *end;
770    int base = 10;
771    long x;
772    char buffer[256]; /* For errors */
773
774    WARN;
775    if (!PyArg_ParseTuple(args, "s|i:atoi", &s, &base))
776        return NULL;
777
778    if ((base != 0 && base < 2) || base > 36) {
779        PyErr_SetString(PyExc_ValueError, "invalid base for atoi()");
780        return NULL;
781    }
782
783    while (*s && isspace(Py_CHARMASK(*s)))
784        s++;
785    errno = 0;
786    if (base == 0 && s[0] == '0')
787        x = (long) PyOS_strtoul(s, &end, base);
788    else
789        x = PyOS_strtol(s, &end, base);
790    if (end == s || !isalnum(Py_CHARMASK(end[-1])))
791        goto bad;
792    while (*end && isspace(Py_CHARMASK(*end)))
793        end++;
794    if (*end != '\0') {
795  bad:
796        PyOS_snprintf(buffer, sizeof(buffer),
797                      "invalid literal for atoi(): %.200s", s);
798        PyErr_SetString(PyExc_ValueError, buffer);
799        return NULL;
800    }
801    else if (errno != 0) {
802        PyOS_snprintf(buffer, sizeof(buffer),
803                      "atoi() literal too large: %.200s", s);
804        PyErr_SetString(PyExc_ValueError, buffer);
805        return NULL;
806    }
807    return PyInt_FromLong(x);
808}
809
810
811PyDoc_STRVAR(atol__doc__,
812"atol(s [,base]) -> long\n"
813"\n"
814"Return the long integer represented by the string s in the\n"
815"given base, which defaults to 10.  The string s must consist\n"
816"of one or more digits, possibly preceded by a sign.  If base\n"
817"is 0, it is chosen from the leading characters of s, 0 for\n"
818"octal, 0x or 0X for hexadecimal.  If base is 16, a preceding\n"
819"0x or 0X is accepted.  A trailing L or l is not accepted,\n"
820"unless base is 0.");
821
822static PyObject *
823strop_atol(PyObject *self, PyObject *args)
824{
825    char *s, *end;
826    int base = 10;
827    PyObject *x;
828    char buffer[256]; /* For errors */
829
830    WARN;
831    if (!PyArg_ParseTuple(args, "s|i:atol", &s, &base))
832        return NULL;
833
834    if ((base != 0 && base < 2) || base > 36) {
835        PyErr_SetString(PyExc_ValueError, "invalid base for atol()");
836        return NULL;
837    }
838
839    while (*s && isspace(Py_CHARMASK(*s)))
840        s++;
841    if (s[0] == '\0') {
842        PyErr_SetString(PyExc_ValueError, "empty string for atol()");
843        return NULL;
844    }
845    x = PyLong_FromString(s, &end, base);
846    if (x == NULL)
847        return NULL;
848    if (base == 0 && (*end == 'l' || *end == 'L'))
849        end++;
850    while (*end && isspace(Py_CHARMASK(*end)))
851        end++;
852    if (*end != '\0') {
853        PyOS_snprintf(buffer, sizeof(buffer),
854                      "invalid literal for atol(): %.200s", s);
855        PyErr_SetString(PyExc_ValueError, buffer);
856        Py_DECREF(x);
857        return NULL;
858    }
859    return x;
860}
861
862
863PyDoc_STRVAR(atof__doc__,
864"atof(s) -> float\n"
865"\n"
866"Return the floating point number represented by the string s.");
867
868static PyObject *
869strop_atof(PyObject *self, PyObject *args)
870{
871    char *s, *end;
872    double x;
873    char buffer[256]; /* For errors */
874
875    WARN;
876    if (!PyArg_ParseTuple(args, "s:atof", &s))
877        return NULL;
878    while (*s && isspace(Py_CHARMASK(*s)))
879        s++;
880    if (s[0] == '\0') {
881        PyErr_SetString(PyExc_ValueError, "empty string for atof()");
882        return NULL;
883    }
884
885    PyFPE_START_PROTECT("strop_atof", return 0)
886    x = PyOS_string_to_double(s, &end, PyExc_OverflowError);
887    PyFPE_END_PROTECT(x)
888    if (x == -1 && PyErr_Occurred())
889        return NULL;
890    while (*end && isspace(Py_CHARMASK(*end)))
891        end++;
892    if (*end != '\0') {
893        PyOS_snprintf(buffer, sizeof(buffer),
894                      "invalid literal for atof(): %.200s", s);
895        PyErr_SetString(PyExc_ValueError, buffer);
896        return NULL;
897    }
898    return PyFloat_FromDouble(x);
899}
900
901
902PyDoc_STRVAR(maketrans__doc__,
903"maketrans(frm, to) -> string\n"
904"\n"
905"Return a translation table (a string of 256 bytes long)\n"
906"suitable for use in string.translate.  The strings frm and to\n"
907"must be of the same length.");
908
909static PyObject *
910strop_maketrans(PyObject *self, PyObject *args)
911{
912    unsigned char *c, *from=NULL, *to=NULL;
913    Py_ssize_t i, fromlen=0, tolen=0;
914    PyObject *result;
915
916    if (!PyArg_ParseTuple(args, "t#t#:maketrans", &from, &fromlen, &to, &tolen))
917        return NULL;
918
919    if (fromlen != tolen) {
920        PyErr_SetString(PyExc_ValueError,
921                        "maketrans arguments must have same length");
922        return NULL;
923    }
924
925    result = PyString_FromStringAndSize((char *)NULL, 256);
926    if (result == NULL)
927        return NULL;
928    c = (unsigned char *) PyString_AS_STRING((PyStringObject *)result);
929    for (i = 0; i < 256; i++)
930        c[i]=(unsigned char)i;
931    for (i = 0; i < fromlen; i++)
932        c[from[i]]=to[i];
933
934    return result;
935}
936
937
938PyDoc_STRVAR(translate__doc__,
939"translate(s,table [,deletechars]) -> string\n"
940"\n"
941"Return a copy of the string s, where all characters occurring\n"
942"in the optional argument deletechars are removed, and the\n"
943"remaining characters have been mapped through the given\n"
944"translation table, which must be a string of length 256.");
945
946static PyObject *
947strop_translate(PyObject *self, PyObject *args)
948{
949    register char *input, *table, *output;
950    Py_ssize_t i;
951    int c, changed = 0;
952    PyObject *input_obj;
953    char *table1, *output_start, *del_table=NULL;
954    Py_ssize_t inlen, tablen, dellen = 0;
955    PyObject *result;
956    int trans_table[256];
957
958    WARN;
959    if (!PyArg_ParseTuple(args, "St#|t#:translate", &input_obj,
960                          &table1, &tablen, &del_table, &dellen))
961        return NULL;
962    if (tablen != 256) {
963        PyErr_SetString(PyExc_ValueError,
964                      "translation table must be 256 characters long");
965        return NULL;
966    }
967
968    table = table1;
969    inlen = PyString_GET_SIZE(input_obj);
970    result = PyString_FromStringAndSize((char *)NULL, inlen);
971    if (result == NULL)
972        return NULL;
973    output_start = output = PyString_AsString(result);
974    input = PyString_AsString(input_obj);
975
976    if (dellen == 0) {
977        /* If no deletions are required, use faster code */
978        for (i = inlen; --i >= 0; ) {
979            c = Py_CHARMASK(*input++);
980            if (Py_CHARMASK((*output++ = table[c])) != c)
981                changed = 1;
982        }
983        if (changed)
984            return result;
985        Py_DECREF(result);
986        Py_INCREF(input_obj);
987        return input_obj;
988    }
989
990    for (i = 0; i < 256; i++)
991        trans_table[i] = Py_CHARMASK(table[i]);
992
993    for (i = 0; i < dellen; i++)
994        trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
995
996    for (i = inlen; --i >= 0; ) {
997        c = Py_CHARMASK(*input++);
998        if (trans_table[c] != -1)
999            if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
1000                continue;
1001        changed = 1;
1002    }
1003    if (!changed) {
1004        Py_DECREF(result);
1005        Py_INCREF(input_obj);
1006        return input_obj;
1007    }
1008    /* Fix the size of the resulting string */
1009    if (inlen > 0)
1010        _PyString_Resize(&result, output - output_start);
1011    return result;
1012}
1013
1014
1015/* What follows is used for implementing replace().  Perry Stoll. */
1016
1017/*
1018  mymemfind
1019
1020  strstr replacement for arbitrary blocks of memory.
1021
1022  Locates the first occurrence in the memory pointed to by MEM of the
1023  contents of memory pointed to by PAT.  Returns the index into MEM if
1024  found, or -1 if not found.  If len of PAT is greater than length of
1025  MEM, the function returns -1.
1026*/
1027static Py_ssize_t
1028mymemfind(const char *mem, Py_ssize_t len, const char *pat, Py_ssize_t pat_len)
1029{
1030    register Py_ssize_t ii;
1031
1032    /* pattern can not occur in the last pat_len-1 chars */
1033    len -= pat_len;
1034
1035    for (ii = 0; ii <= len; ii++) {
1036        if (mem[ii] == pat[0] &&
1037            (pat_len == 1 ||
1038             memcmp(&mem[ii+1], &pat[1], pat_len-1) == 0)) {
1039            return ii;
1040        }
1041    }
1042    return -1;
1043}
1044
1045/*
1046  mymemcnt
1047
1048   Return the number of distinct times PAT is found in MEM.
1049   meaning mem=1111 and pat==11 returns 2.
1050       mem=11111 and pat==11 also return 2.
1051 */
1052static Py_ssize_t
1053mymemcnt(const char *mem, Py_ssize_t len, const char *pat, Py_ssize_t pat_len)
1054{
1055    register Py_ssize_t offset = 0;
1056    Py_ssize_t nfound = 0;
1057
1058    while (len >= 0) {
1059        offset = mymemfind(mem, len, pat, pat_len);
1060        if (offset == -1)
1061            break;
1062        mem += offset + pat_len;
1063        len -= offset + pat_len;
1064        nfound++;
1065    }
1066    return nfound;
1067}
1068
1069/*
1070   mymemreplace
1071
1072   Return a string in which all occurrences of PAT in memory STR are
1073   replaced with SUB.
1074
1075   If length of PAT is less than length of STR or there are no occurrences
1076   of PAT in STR, then the original string is returned. Otherwise, a new
1077   string is allocated here and returned.
1078
1079   on return, out_len is:
1080       the length of output string, or
1081       -1 if the input string is returned, or
1082       unchanged if an error occurs (no memory).
1083
1084   return value is:
1085       the new string allocated locally, or
1086       NULL if an error occurred.
1087*/
1088static char *
1089mymemreplace(const char *str, Py_ssize_t len,           /* input string */
1090         const char *pat, Py_ssize_t pat_len,           /* pattern string to find */
1091         const char *sub, Py_ssize_t sub_len,           /* substitution string */
1092         Py_ssize_t count,                              /* number of replacements */
1093         Py_ssize_t *out_len)
1094{
1095    char *out_s;
1096    char *new_s;
1097    Py_ssize_t nfound, offset, new_len;
1098
1099    if (len == 0 || pat_len > len)
1100        goto return_same;
1101
1102    /* find length of output string */
1103    nfound = mymemcnt(str, len, pat, pat_len);
1104    if (count < 0)
1105        count = PY_SSIZE_T_MAX;
1106    else if (nfound > count)
1107        nfound = count;
1108    if (nfound == 0)
1109        goto return_same;
1110
1111    new_len = len + nfound*(sub_len - pat_len);
1112    if (new_len == 0) {
1113        /* Have to allocate something for the caller to free(). */
1114        out_s = (char *)PyMem_MALLOC(1);
1115        if (out_s == NULL)
1116            return NULL;
1117        out_s[0] = '\0';
1118    }
1119    else {
1120        assert(new_len > 0);
1121        new_s = (char *)PyMem_MALLOC(new_len);
1122        if (new_s == NULL)
1123            return NULL;
1124        out_s = new_s;
1125
1126        for (; count > 0 && len > 0; --count) {
1127            /* find index of next instance of pattern */
1128            offset = mymemfind(str, len, pat, pat_len);
1129            if (offset == -1)
1130                break;
1131
1132            /* copy non matching part of input string */
1133            memcpy(new_s, str, offset);
1134            str += offset + pat_len;
1135            len -= offset + pat_len;
1136
1137            /* copy substitute into the output string */
1138            new_s += offset;
1139            memcpy(new_s, sub, sub_len);
1140            new_s += sub_len;
1141        }
1142        /* copy any remaining values into output string */
1143        if (len > 0)
1144            memcpy(new_s, str, len);
1145    }
1146    *out_len = new_len;
1147    return out_s;
1148
1149  return_same:
1150    *out_len = -1;
1151    return (char *)str; /* cast away const */
1152}
1153
1154
1155PyDoc_STRVAR(replace__doc__,
1156"replace (str, old, new[, maxsplit]) -> string\n"
1157"\n"
1158"Return a copy of string str with all occurrences of substring\n"
1159"old replaced by new. If the optional argument maxsplit is\n"
1160"given, only the first maxsplit occurrences are replaced.");
1161
1162static PyObject *
1163strop_replace(PyObject *self, PyObject *args)
1164{
1165    char *str, *pat,*sub,*new_s;
1166    Py_ssize_t len,pat_len,sub_len,out_len;
1167    Py_ssize_t count = -1;
1168    PyObject *newstr;
1169
1170    WARN;
1171    if (!PyArg_ParseTuple(args, "t#t#t#|n:replace",
1172                          &str, &len, &pat, &pat_len, &sub, &sub_len,
1173                          &count))
1174        return NULL;
1175    if (pat_len <= 0) {
1176        PyErr_SetString(PyExc_ValueError, "empty pattern string");
1177        return NULL;
1178    }
1179    /* CAUTION:  strop treats a replace count of 0 as infinity, unlke
1180     * current (2.1) string.py and string methods.  Preserve this for
1181     * ... well, hard to say for what <wink>.
1182     */
1183    if (count == 0)
1184        count = -1;
1185    new_s = mymemreplace(str,len,pat,pat_len,sub,sub_len,count,&out_len);
1186    if (new_s == NULL) {
1187        PyErr_NoMemory();
1188        return NULL;
1189    }
1190    if (out_len == -1) {
1191        /* we're returning another reference to the input string */
1192        newstr = PyTuple_GetItem(args, 0);
1193        Py_XINCREF(newstr);
1194    }
1195    else {
1196        newstr = PyString_FromStringAndSize(new_s, out_len);
1197        PyMem_FREE(new_s);
1198    }
1199    return newstr;
1200}
1201
1202
1203/* List of functions defined in the module */
1204
1205static PyMethodDef
1206strop_methods[] = {
1207    {"atof",            strop_atof,        METH_VARARGS, atof__doc__},
1208    {"atoi",            strop_atoi,        METH_VARARGS, atoi__doc__},
1209    {"atol",            strop_atol,        METH_VARARGS, atol__doc__},
1210    {"capitalize",      strop_capitalize,  METH_O,       capitalize__doc__},
1211    {"count",           strop_count,       METH_VARARGS, count__doc__},
1212    {"expandtabs",      strop_expandtabs,  METH_VARARGS, expandtabs__doc__},
1213    {"find",            strop_find,        METH_VARARGS, find__doc__},
1214    {"join",            strop_joinfields,  METH_VARARGS, joinfields__doc__},
1215    {"joinfields",      strop_joinfields,  METH_VARARGS, joinfields__doc__},
1216    {"lstrip",          strop_lstrip,      METH_O,       lstrip__doc__},
1217    {"lower",           strop_lower,       METH_O,       lower__doc__},
1218    {"maketrans",       strop_maketrans,   METH_VARARGS, maketrans__doc__},
1219    {"replace",         strop_replace,     METH_VARARGS, replace__doc__},
1220    {"rfind",           strop_rfind,       METH_VARARGS, rfind__doc__},
1221    {"rstrip",          strop_rstrip,      METH_O,       rstrip__doc__},
1222    {"split",           strop_splitfields, METH_VARARGS, splitfields__doc__},
1223    {"splitfields",     strop_splitfields, METH_VARARGS, splitfields__doc__},
1224    {"strip",           strop_strip,       METH_O,       strip__doc__},
1225    {"swapcase",        strop_swapcase,    METH_O,       swapcase__doc__},
1226    {"translate",       strop_translate,   METH_VARARGS, translate__doc__},
1227    {"upper",           strop_upper,       METH_O,       upper__doc__},
1228    {NULL,              NULL}   /* sentinel */
1229};
1230
1231
1232PyMODINIT_FUNC
1233initstrop(void)
1234{
1235    PyObject *m, *s;
1236    char buf[256];
1237    int c, n;
1238    m = Py_InitModule4("strop", strop_methods, strop_module__doc__,
1239                       (PyObject*)NULL, PYTHON_API_VERSION);
1240    if (m == NULL)
1241        return;
1242
1243    /* Create 'whitespace' object */
1244    n = 0;
1245    for (c = 0; c < 256; c++) {
1246        if (isspace(c))
1247            buf[n++] = c;
1248    }
1249    s = PyString_FromStringAndSize(buf, n);
1250    if (s)
1251        PyModule_AddObject(m, "whitespace", s);
1252
1253    /* Create 'lowercase' object */
1254    n = 0;
1255    for (c = 0; c < 256; c++) {
1256        if (islower(c))
1257            buf[n++] = c;
1258    }
1259    s = PyString_FromStringAndSize(buf, n);
1260    if (s)
1261        PyModule_AddObject(m, "lowercase", s);
1262
1263    /* Create 'uppercase' object */
1264    n = 0;
1265    for (c = 0; c < 256; c++) {
1266        if (isupper(c))
1267            buf[n++] = c;
1268    }
1269    s = PyString_FromStringAndSize(buf, n);
1270    if (s)
1271        PyModule_AddObject(m, "uppercase", s);
1272}
1273