1/* strop module */
2
3#define PY_SSIZE_T_CLEAN
4#include "Python.h"
5#include <ctype.h>
6
7PyDoc_STRVAR(strop_module__doc__,
8"Common string manipulations, optimized for speed.\n"
9"\n"
10"Always use \"import string\" rather than referencing\n"
11"this module directly.");
12
13/* XXX This file assumes that the <ctype.h> is*() functions
14   XXX are defined for all 8-bit characters! */
15
16#define WARN if (PyErr_Warn(PyExc_DeprecationWarning, \
17               "strop functions are obsolete; use string methods")) \
18         return NULL
19
20/* The lstrip(), rstrip() and strip() functions are implemented
21   in do_strip(), which uses an additional parameter to indicate what
22   type of strip should occur. */
23
24#define LEFTSTRIP 0
25#define RIGHTSTRIP 1
26#define BOTHSTRIP 2
27
28
29static PyObject *
30split_whitespace(char *s, Py_ssize_t len, Py_ssize_t maxsplit)
31{
32    Py_ssize_t i = 0, j;
33    int err;
34    Py_ssize_t countsplit = 0;
35    PyObject* item;
36    PyObject *list = PyList_New(0);
37
38    if (list == NULL)
39        return NULL;
40
41    while (i < len) {
42        while (i < len && isspace(Py_CHARMASK(s[i]))) {
43            i = i+1;
44        }
45        j = i;
46        while (i < len && !isspace(Py_CHARMASK(s[i]))) {
47            i = i+1;
48        }
49        if (j < i) {
50            item = PyString_FromStringAndSize(s+j, i-j);
51            if (item == NULL)
52                goto finally;
53
54            err = PyList_Append(list, item);
55            Py_DECREF(item);
56            if (err < 0)
57                goto finally;
58
59            countsplit++;
60            while (i < len && isspace(Py_CHARMASK(s[i]))) {
61                i = i+1;
62            }
63            if (maxsplit && (countsplit >= maxsplit) && i < len) {
64                item = PyString_FromStringAndSize(
65                    s+i, len - i);
66                if (item == NULL)
67                    goto finally;
68
69                err = PyList_Append(list, item);
70                Py_DECREF(item);
71                if (err < 0)
72                    goto finally;
73
74                i = len;
75            }
76        }
77    }
78    return list;
79  finally:
80    Py_DECREF(list);
81    return NULL;
82}
83
84
85PyDoc_STRVAR(splitfields__doc__,
86"split(s [,sep [,maxsplit]]) -> list of strings\n"
87"splitfields(s [,sep [,maxsplit]]) -> list of strings\n"
88"\n"
89"Return a list of the words in the string s, using sep as the\n"
90"delimiter string.  If maxsplit is nonzero, splits into at most\n"
91"maxsplit words.  If sep is not specified, any whitespace string\n"
92"is a separator.  Maxsplit defaults to 0.\n"
93"\n"
94"(split and splitfields are synonymous)");
95
96static PyObject *
97strop_splitfields(PyObject *self, PyObject *args)
98{
99    Py_ssize_t len, n, i, j, err;
100    Py_ssize_t splitcount, maxsplit;
101    char *s, *sub;
102    PyObject *list, *item;
103
104    WARN;
105    sub = NULL;
106    n = 0;
107    splitcount = 0;
108    maxsplit = 0;
109    if (!PyArg_ParseTuple(args, "t#|z#n:split", &s, &len, &sub, &n, &maxsplit))
110        return NULL;
111    if (sub == NULL)
112        return split_whitespace(s, len, maxsplit);
113    if (n == 0) {
114        PyErr_SetString(PyExc_ValueError, "empty separator");
115        return NULL;
116    }
117
118    list = PyList_New(0);
119    if (list == NULL)
120        return NULL;
121
122    i = j = 0;
123    while (i+n <= len) {
124        if (s[i] == sub[0] && (n == 1 || memcmp(s+i, sub, n) == 0)) {
125            item = PyString_FromStringAndSize(s+j, i-j);
126            if (item == NULL)
127                goto fail;
128            err = PyList_Append(list, item);
129            Py_DECREF(item);
130            if (err < 0)
131                goto fail;
132            i = j = i + n;
133            splitcount++;
134            if (maxsplit && (splitcount >= maxsplit))
135                break;
136        }
137        else
138            i++;
139    }
140    item = PyString_FromStringAndSize(s+j, len-j);
141    if (item == NULL)
142        goto fail;
143    err = PyList_Append(list, item);
144    Py_DECREF(item);
145    if (err < 0)
146        goto fail;
147
148    return list;
149
150 fail:
151    Py_DECREF(list);
152    return NULL;
153}
154
155
156PyDoc_STRVAR(joinfields__doc__,
157"join(list [,sep]) -> string\n"
158"joinfields(list [,sep]) -> string\n"
159"\n"
160"Return a string composed of the words in list, with\n"
161"intervening occurrences of sep.  Sep defaults to a single\n"
162"space.\n"
163"\n"
164"(join and joinfields are synonymous)");
165
166static PyObject *
167strop_joinfields(PyObject *self, PyObject *args)
168{
169    PyObject *seq;
170    char *sep = NULL;
171    Py_ssize_t seqlen, seplen = 0;
172    Py_ssize_t i, reslen = 0, slen = 0, sz = 100;
173    PyObject *res = NULL;
174    char* p = NULL;
175    ssizeargfunc getitemfunc;
176
177    WARN;
178    if (!PyArg_ParseTuple(args, "O|t#:join", &seq, &sep, &seplen))
179        return NULL;
180    if (sep == NULL) {
181        sep = " ";
182        seplen = 1;
183    }
184
185    seqlen = PySequence_Size(seq);
186    if (seqlen < 0 && PyErr_Occurred())
187        return NULL;
188
189    if (seqlen == 1) {
190        /* Optimization if there's only one item */
191        PyObject *item = PySequence_GetItem(seq, 0);
192        if (item && !PyString_Check(item)) {
193            PyErr_SetString(PyExc_TypeError,
194                     "first argument must be sequence of strings");
195            Py_DECREF(item);
196            return NULL;
197        }
198        return item;
199    }
200
201    if (!(res = PyString_FromStringAndSize((char*)NULL, sz)))
202        return NULL;
203    p = PyString_AsString(res);
204
205    /* optimize for lists, since it's the most common case.  all others
206     * (tuples and arbitrary sequences) just use the sequence abstract
207     * interface.
208     */
209    if (PyList_Check(seq)) {
210        for (i = 0; i < seqlen; i++) {
211            PyObject *item = PyList_GET_ITEM(seq, i);
212            if (!PyString_Check(item)) {
213                PyErr_SetString(PyExc_TypeError,
214                "first argument must be sequence of strings");
215                Py_DECREF(res);
216                return NULL;
217            }
218            slen = PyString_GET_SIZE(item);
219            if (slen > PY_SSIZE_T_MAX - reslen ||
220                seplen > PY_SSIZE_T_MAX - reslen - seplen) {
221                PyErr_SetString(PyExc_OverflowError,
222                                "input too long");
223                Py_DECREF(res);
224                return NULL;
225            }
226            while (reslen + slen + seplen >= sz) {
227                if (_PyString_Resize(&res, sz * 2) < 0)
228                    return NULL;
229                sz *= 2;
230                p = PyString_AsString(res) + reslen;
231            }
232            if (i > 0) {
233                memcpy(p, sep, seplen);
234                p += seplen;
235                reslen += seplen;
236            }
237            memcpy(p, PyString_AS_STRING(item), slen);
238            p += slen;
239            reslen += slen;
240        }
241        _PyString_Resize(&res, reslen);
242        return res;
243    }
244
245    if (seq->ob_type->tp_as_sequence == NULL ||
246             (getitemfunc = seq->ob_type->tp_as_sequence->sq_item) == NULL)
247    {
248        PyErr_SetString(PyExc_TypeError,
249                        "first argument must be a sequence");
250        return NULL;
251    }
252    /* This is now type safe */
253    for (i = 0; i < seqlen; i++) {
254        PyObject *item = getitemfunc(seq, i);
255        if (!item || !PyString_Check(item)) {
256            PyErr_SetString(PyExc_TypeError,
257                     "first argument must be sequence of strings");
258            Py_DECREF(res);
259            Py_XDECREF(item);
260            return NULL;
261        }
262        slen = PyString_GET_SIZE(item);
263        if (slen > PY_SSIZE_T_MAX - reslen ||
264            seplen > PY_SSIZE_T_MAX - reslen - seplen) {
265            PyErr_SetString(PyExc_OverflowError,
266                            "input too long");
267            Py_DECREF(res);
268            Py_XDECREF(item);
269            return NULL;
270        }
271        while (reslen + slen + seplen >= sz) {
272            if (_PyString_Resize(&res, sz * 2) < 0) {
273                Py_DECREF(item);
274                return NULL;
275            }
276            sz *= 2;
277            p = PyString_AsString(res) + reslen;
278        }
279        if (i > 0) {
280            memcpy(p, sep, seplen);
281            p += seplen;
282            reslen += seplen;
283        }
284        memcpy(p, PyString_AS_STRING(item), slen);
285        p += slen;
286        reslen += slen;
287        Py_DECREF(item);
288    }
289    _PyString_Resize(&res, reslen);
290    return res;
291}
292
293
294PyDoc_STRVAR(find__doc__,
295"find(s, sub [,start [,end]]) -> in\n"
296"\n"
297"Return the lowest index in s where substring sub is found,\n"
298"such that sub is contained within s[start,end].  Optional\n"
299"arguments start and end are interpreted as in slice notation.\n"
300"\n"
301"Return -1 on failure.");
302
303static PyObject *
304strop_find(PyObject *self, PyObject *args)
305{
306    char *s, *sub;
307    Py_ssize_t len, n, i = 0, last = PY_SSIZE_T_MAX;
308
309    WARN;
310    if (!PyArg_ParseTuple(args, "t#t#|nn:find", &s, &len, &sub, &n, &i, &last))
311        return NULL;
312
313    if (last > len)
314        last = len;
315    if (last < 0)
316        last += len;
317    if (last < 0)
318        last = 0;
319    if (i < 0)
320        i += len;
321    if (i < 0)
322        i = 0;
323
324    if (n == 0 && i <= last)
325        return PyInt_FromLong((long)i);
326
327    last -= n;
328    for (; i <= last; ++i)
329        if (s[i] == sub[0] &&
330            (n == 1 || memcmp(&s[i+1], &sub[1], n-1) == 0))
331            return PyInt_FromLong((long)i);
332
333    return PyInt_FromLong(-1L);
334}
335
336
337PyDoc_STRVAR(rfind__doc__,
338"rfind(s, sub [,start [,end]]) -> int\n"
339"\n"
340"Return the highest index in s where substring sub is found,\n"
341"such that sub is contained within s[start,end].  Optional\n"
342"arguments start and end are interpreted as in slice notation.\n"
343"\n"
344"Return -1 on failure.");
345
346static PyObject *
347strop_rfind(PyObject *self, PyObject *args)
348{
349    char *s, *sub;
350    Py_ssize_t len, n, j;
351    Py_ssize_t i = 0, last = PY_SSIZE_T_MAX;
352
353    WARN;
354    if (!PyArg_ParseTuple(args, "t#t#|nn:rfind", &s, &len, &sub, &n, &i, &last))
355        return NULL;
356
357    if (last > len)
358        last = len;
359    if (last < 0)
360        last += len;
361    if (last < 0)
362        last = 0;
363    if (i < 0)
364        i += len;
365    if (i < 0)
366        i = 0;
367
368    if (n == 0 && i <= last)
369        return PyInt_FromLong((long)last);
370
371    for (j = last-n; j >= i; --j)
372        if (s[j] == sub[0] &&
373            (n == 1 || memcmp(&s[j+1], &sub[1], n-1) == 0))
374            return PyInt_FromLong((long)j);
375
376    return PyInt_FromLong(-1L);
377}
378
379
380static PyObject *
381do_strip(PyObject *args, int striptype)
382{
383    char *s;
384    Py_ssize_t len, i, j;
385
386
387    if (PyString_AsStringAndSize(args, &s, &len))
388        return NULL;
389
390    i = 0;
391    if (striptype != RIGHTSTRIP) {
392        while (i < len && isspace(Py_CHARMASK(s[i]))) {
393            i++;
394        }
395    }
396
397    j = len;
398    if (striptype != LEFTSTRIP) {
399        do {
400            j--;
401        } while (j >= i && isspace(Py_CHARMASK(s[j])));
402        j++;
403    }
404
405    if (i == 0 && j == len) {
406        Py_INCREF(args);
407        return args;
408    }
409    else
410        return PyString_FromStringAndSize(s+i, j-i);
411}
412
413
414PyDoc_STRVAR(strip__doc__,
415"strip(s) -> string\n"
416"\n"
417"Return a copy of the string s with leading and trailing\n"
418"whitespace removed.");
419
420static PyObject *
421strop_strip(PyObject *self, PyObject *args)
422{
423    WARN;
424    return do_strip(args, BOTHSTRIP);
425}
426
427
428PyDoc_STRVAR(lstrip__doc__,
429"lstrip(s) -> string\n"
430"\n"
431"Return a copy of the string s with leading whitespace removed.");
432
433static PyObject *
434strop_lstrip(PyObject *self, PyObject *args)
435{
436    WARN;
437    return do_strip(args, LEFTSTRIP);
438}
439
440
441PyDoc_STRVAR(rstrip__doc__,
442"rstrip(s) -> string\n"
443"\n"
444"Return a copy of the string s with trailing whitespace removed.");
445
446static PyObject *
447strop_rstrip(PyObject *self, PyObject *args)
448{
449    WARN;
450    return do_strip(args, RIGHTSTRIP);
451}
452
453
454PyDoc_STRVAR(lower__doc__,
455"lower(s) -> string\n"
456"\n"
457"Return a copy of the string s converted to lowercase.");
458
459static PyObject *
460strop_lower(PyObject *self, PyObject *args)
461{
462    char *s, *s_new;
463    Py_ssize_t i, n;
464    PyObject *newstr;
465    int changed;
466
467    WARN;
468    if (PyString_AsStringAndSize(args, &s, &n))
469        return NULL;
470    newstr = PyString_FromStringAndSize(NULL, n);
471    if (newstr == NULL)
472        return NULL;
473    s_new = PyString_AsString(newstr);
474    changed = 0;
475    for (i = 0; i < n; i++) {
476        int c = Py_CHARMASK(*s++);
477        if (isupper(c)) {
478            changed = 1;
479            *s_new = tolower(c);
480        } else
481            *s_new = c;
482        s_new++;
483    }
484    if (!changed) {
485        Py_DECREF(newstr);
486        Py_INCREF(args);
487        return args;
488    }
489    return newstr;
490}
491
492
493PyDoc_STRVAR(upper__doc__,
494"upper(s) -> string\n"
495"\n"
496"Return a copy of the string s converted to uppercase.");
497
498static PyObject *
499strop_upper(PyObject *self, PyObject *args)
500{
501    char *s, *s_new;
502    Py_ssize_t i, n;
503    PyObject *newstr;
504    int changed;
505
506    WARN;
507    if (PyString_AsStringAndSize(args, &s, &n))
508        return NULL;
509    newstr = PyString_FromStringAndSize(NULL, n);
510    if (newstr == NULL)
511        return NULL;
512    s_new = PyString_AsString(newstr);
513    changed = 0;
514    for (i = 0; i < n; i++) {
515        int c = Py_CHARMASK(*s++);
516        if (islower(c)) {
517            changed = 1;
518            *s_new = toupper(c);
519        } else
520            *s_new = c;
521        s_new++;
522    }
523    if (!changed) {
524        Py_DECREF(newstr);
525        Py_INCREF(args);
526        return args;
527    }
528    return newstr;
529}
530
531
532PyDoc_STRVAR(capitalize__doc__,
533"capitalize(s) -> string\n"
534"\n"
535"Return a copy of the string s with only its first character\n"
536"capitalized.");
537
538static PyObject *
539strop_capitalize(PyObject *self, PyObject *args)
540{
541    char *s, *s_new;
542    Py_ssize_t i, n;
543    PyObject *newstr;
544    int changed;
545
546    WARN;
547    if (PyString_AsStringAndSize(args, &s, &n))
548        return NULL;
549    newstr = PyString_FromStringAndSize(NULL, n);
550    if (newstr == NULL)
551        return NULL;
552    s_new = PyString_AsString(newstr);
553    changed = 0;
554    if (0 < n) {
555        int c = Py_CHARMASK(*s++);
556        if (islower(c)) {
557            changed = 1;
558            *s_new = toupper(c);
559        } else
560            *s_new = c;
561        s_new++;
562    }
563    for (i = 1; i < n; i++) {
564        int c = Py_CHARMASK(*s++);
565        if (isupper(c)) {
566            changed = 1;
567            *s_new = tolower(c);
568        } else
569            *s_new = c;
570        s_new++;
571    }
572    if (!changed) {
573        Py_DECREF(newstr);
574        Py_INCREF(args);
575        return args;
576    }
577    return newstr;
578}
579
580
581PyDoc_STRVAR(expandtabs__doc__,
582"expandtabs(string, [tabsize]) -> string\n"
583"\n"
584"Expand tabs in a string, i.e. replace them by one or more spaces,\n"
585"depending on the current column and the given tab size (default 8).\n"
586"The column number is reset to zero after each newline occurring in the\n"
587"string.  This doesn't understand other non-printing characters.");
588
589static PyObject *
590strop_expandtabs(PyObject *self, PyObject *args)
591{
592    /* Original by Fredrik Lundh */
593    char* e;
594    char* p;
595    char* q;
596    Py_ssize_t i, j, old_j;
597    PyObject* out;
598    char* string;
599    Py_ssize_t stringlen;
600    int tabsize = 8;
601
602    WARN;
603    /* Get arguments */
604    if (!PyArg_ParseTuple(args, "s#|i:expandtabs", &string, &stringlen, &tabsize))
605        return NULL;
606    if (tabsize < 1) {
607        PyErr_SetString(PyExc_ValueError,
608                        "tabsize must be at least 1");
609        return NULL;
610    }
611
612    /* First pass: determine size of output string */
613    i = j = old_j = 0; /* j: current column; i: total of previous lines */
614    e = string + stringlen;
615    for (p = string; p < e; p++) {
616        if (*p == '\t') {
617            j += tabsize - (j%tabsize);
618            if (old_j > j) {
619                PyErr_SetString(PyExc_OverflowError,
620                                "new string is too long");
621                return NULL;
622            }
623            old_j = j;
624        } else {
625            j++;
626            if (*p == '\n') {
627                i += j;
628                j = 0;
629            }
630        }
631    }
632
633    if ((i + j) < 0) {
634        PyErr_SetString(PyExc_OverflowError, "new string is too long");
635        return NULL;
636    }
637
638    /* Second pass: create output string and fill it */
639    out = PyString_FromStringAndSize(NULL, i+j);
640    if (out == NULL)
641        return NULL;
642
643    i = 0;
644    q = PyString_AS_STRING(out);
645
646    for (p = string; p < e; p++) {
647        if (*p == '\t') {
648            j = tabsize - (i%tabsize);
649            i += j;
650            while (j-- > 0)
651                *q++ = ' ';
652        } else {
653            *q++ = *p;
654            i++;
655            if (*p == '\n')
656                i = 0;
657        }
658    }
659
660    return out;
661}
662
663
664PyDoc_STRVAR(count__doc__,
665"count(s, sub[, start[, end]]) -> int\n"
666"\n"
667"Return the number of occurrences of substring sub in string\n"
668"s[start:end].  Optional arguments start and end are\n"
669"interpreted as in slice notation.");
670
671static PyObject *
672strop_count(PyObject *self, PyObject *args)
673{
674    char *s, *sub;
675    Py_ssize_t len, n;
676    Py_ssize_t i = 0, last = PY_SSIZE_T_MAX;
677    Py_ssize_t m, r;
678
679    WARN;
680    if (!PyArg_ParseTuple(args, "t#t#|nn:count", &s, &len, &sub, &n, &i, &last))
681        return NULL;
682    if (last > len)
683        last = len;
684    if (last < 0)
685        last += len;
686    if (last < 0)
687        last = 0;
688    if (i < 0)
689        i += len;
690    if (i < 0)
691        i = 0;
692    m = last + 1 - n;
693    if (n == 0)
694        return PyInt_FromLong((long) (m-i));
695
696    r = 0;
697    while (i < m) {
698        if (!memcmp(s+i, sub, n)) {
699            r++;
700            i += n;
701        } else {
702            i++;
703        }
704    }
705    return PyInt_FromLong((long) r);
706}
707
708
709PyDoc_STRVAR(swapcase__doc__,
710"swapcase(s) -> string\n"
711"\n"
712"Return a copy of the string s with upper case characters\n"
713"converted to lowercase and vice versa.");
714
715static PyObject *
716strop_swapcase(PyObject *self, PyObject *args)
717{
718    char *s, *s_new;
719    Py_ssize_t i, n;
720    PyObject *newstr;
721    int changed;
722
723    WARN;
724    if (PyString_AsStringAndSize(args, &s, &n))
725        return NULL;
726    newstr = PyString_FromStringAndSize(NULL, n);
727    if (newstr == NULL)
728        return NULL;
729    s_new = PyString_AsString(newstr);
730    changed = 0;
731    for (i = 0; i < n; i++) {
732        int c = Py_CHARMASK(*s++);
733        if (islower(c)) {
734            changed = 1;
735            *s_new = toupper(c);
736        }
737        else if (isupper(c)) {
738            changed = 1;
739            *s_new = tolower(c);
740        }
741        else
742            *s_new = c;
743        s_new++;
744    }
745    if (!changed) {
746        Py_DECREF(newstr);
747        Py_INCREF(args);
748        return args;
749    }
750    return newstr;
751}
752
753
754PyDoc_STRVAR(atoi__doc__,
755"atoi(s [,base]) -> int\n"
756"\n"
757"Return the integer represented by the string s in the given\n"
758"base, which defaults to 10.  The string s must consist of one\n"
759"or more digits, possibly preceded by a sign.  If base is 0, it\n"
760"is chosen from the leading characters of s, 0 for octal, 0x or\n"
761"0X for hexadecimal.  If base is 16, a preceding 0x or 0X is\n"
762"accepted.");
763
764static PyObject *
765strop_atoi(PyObject *self, PyObject *args)
766{
767    char *s, *end;
768    int base = 10;
769    long x;
770    char buffer[256]; /* For errors */
771
772    WARN;
773    if (!PyArg_ParseTuple(args, "s|i:atoi", &s, &base))
774        return NULL;
775
776    if ((base != 0 && base < 2) || base > 36) {
777        PyErr_SetString(PyExc_ValueError, "invalid base for atoi()");
778        return NULL;
779    }
780
781    while (*s && isspace(Py_CHARMASK(*s)))
782        s++;
783    errno = 0;
784    if (base == 0 && s[0] == '0')
785        x = (long) PyOS_strtoul(s, &end, base);
786    else
787        x = PyOS_strtol(s, &end, base);
788    if (end == s || !isalnum(Py_CHARMASK(end[-1])))
789        goto bad;
790    while (*end && isspace(Py_CHARMASK(*end)))
791        end++;
792    if (*end != '\0') {
793  bad:
794        PyOS_snprintf(buffer, sizeof(buffer),
795                      "invalid literal for atoi(): %.200s", s);
796        PyErr_SetString(PyExc_ValueError, buffer);
797        return NULL;
798    }
799    else if (errno != 0) {
800        PyOS_snprintf(buffer, sizeof(buffer),
801                      "atoi() literal too large: %.200s", s);
802        PyErr_SetString(PyExc_ValueError, buffer);
803        return NULL;
804    }
805    return PyInt_FromLong(x);
806}
807
808
809PyDoc_STRVAR(atol__doc__,
810"atol(s [,base]) -> long\n"
811"\n"
812"Return the long integer represented by the string s in the\n"
813"given base, which defaults to 10.  The string s must consist\n"
814"of one or more digits, possibly preceded by a sign.  If base\n"
815"is 0, it is chosen from the leading characters of s, 0 for\n"
816"octal, 0x or 0X for hexadecimal.  If base is 16, a preceding\n"
817"0x or 0X is accepted.  A trailing L or l is not accepted,\n"
818"unless base is 0.");
819
820static PyObject *
821strop_atol(PyObject *self, PyObject *args)
822{
823    char *s, *end;
824    int base = 10;
825    PyObject *x;
826    char buffer[256]; /* For errors */
827
828    WARN;
829    if (!PyArg_ParseTuple(args, "s|i:atol", &s, &base))
830        return NULL;
831
832    if ((base != 0 && base < 2) || base > 36) {
833        PyErr_SetString(PyExc_ValueError, "invalid base for atol()");
834        return NULL;
835    }
836
837    while (*s && isspace(Py_CHARMASK(*s)))
838        s++;
839    if (s[0] == '\0') {
840        PyErr_SetString(PyExc_ValueError, "empty string for atol()");
841        return NULL;
842    }
843    x = PyLong_FromString(s, &end, base);
844    if (x == NULL)
845        return NULL;
846    if (base == 0 && (*end == 'l' || *end == 'L'))
847        end++;
848    while (*end && isspace(Py_CHARMASK(*end)))
849        end++;
850    if (*end != '\0') {
851        PyOS_snprintf(buffer, sizeof(buffer),
852                      "invalid literal for atol(): %.200s", s);
853        PyErr_SetString(PyExc_ValueError, buffer);
854        Py_DECREF(x);
855        return NULL;
856    }
857    return x;
858}
859
860
861PyDoc_STRVAR(atof__doc__,
862"atof(s) -> float\n"
863"\n"
864"Return the floating point number represented by the string s.");
865
866static PyObject *
867strop_atof(PyObject *self, PyObject *args)
868{
869    char *s, *end;
870    double x;
871    char buffer[256]; /* For errors */
872
873    WARN;
874    if (!PyArg_ParseTuple(args, "s:atof", &s))
875        return NULL;
876    while (*s && isspace(Py_CHARMASK(*s)))
877        s++;
878    if (s[0] == '\0') {
879        PyErr_SetString(PyExc_ValueError, "empty string for atof()");
880        return NULL;
881    }
882
883    PyFPE_START_PROTECT("strop_atof", return 0)
884    x = PyOS_string_to_double(s, &end, PyExc_OverflowError);
885    PyFPE_END_PROTECT(x)
886    if (x == -1 && PyErr_Occurred())
887        return NULL;
888    while (*end && isspace(Py_CHARMASK(*end)))
889        end++;
890    if (*end != '\0') {
891        PyOS_snprintf(buffer, sizeof(buffer),
892                      "invalid literal for atof(): %.200s", s);
893        PyErr_SetString(PyExc_ValueError, buffer);
894        return NULL;
895    }
896    return PyFloat_FromDouble(x);
897}
898
899
900PyDoc_STRVAR(maketrans__doc__,
901"maketrans(frm, to) -> string\n"
902"\n"
903"Return a translation table (a string of 256 bytes long)\n"
904"suitable for use in string.translate.  The strings frm and to\n"
905"must be of the same length.");
906
907static PyObject *
908strop_maketrans(PyObject *self, PyObject *args)
909{
910    unsigned char *c, *from=NULL, *to=NULL;
911    Py_ssize_t i, fromlen=0, tolen=0;
912    PyObject *result;
913
914    if (!PyArg_ParseTuple(args, "t#t#:maketrans", &from, &fromlen, &to, &tolen))
915        return NULL;
916
917    if (fromlen != tolen) {
918        PyErr_SetString(PyExc_ValueError,
919                        "maketrans arguments must have same length");
920        return NULL;
921    }
922
923    result = PyString_FromStringAndSize((char *)NULL, 256);
924    if (result == NULL)
925        return NULL;
926    c = (unsigned char *) PyString_AS_STRING((PyStringObject *)result);
927    for (i = 0; i < 256; i++)
928        c[i]=(unsigned char)i;
929    for (i = 0; i < fromlen; i++)
930        c[from[i]]=to[i];
931
932    return result;
933}
934
935
936PyDoc_STRVAR(translate__doc__,
937"translate(s,table [,deletechars]) -> string\n"
938"\n"
939"Return a copy of the string s, where all characters occurring\n"
940"in the optional argument deletechars are removed, and the\n"
941"remaining characters have been mapped through the given\n"
942"translation table, which must be a string of length 256.");
943
944static PyObject *
945strop_translate(PyObject *self, PyObject *args)
946{
947    register char *input, *table, *output;
948    Py_ssize_t i;
949    int c, changed = 0;
950    PyObject *input_obj;
951    char *table1, *output_start, *del_table=NULL;
952    Py_ssize_t inlen, tablen, dellen = 0;
953    PyObject *result;
954    int trans_table[256];
955
956    WARN;
957    if (!PyArg_ParseTuple(args, "St#|t#:translate", &input_obj,
958                          &table1, &tablen, &del_table, &dellen))
959        return NULL;
960    if (tablen != 256) {
961        PyErr_SetString(PyExc_ValueError,
962                      "translation table must be 256 characters long");
963        return NULL;
964    }
965
966    table = table1;
967    inlen = PyString_GET_SIZE(input_obj);
968    result = PyString_FromStringAndSize((char *)NULL, inlen);
969    if (result == NULL)
970        return NULL;
971    output_start = output = PyString_AsString(result);
972    input = PyString_AsString(input_obj);
973
974    if (dellen == 0) {
975        /* If no deletions are required, use faster code */
976        for (i = inlen; --i >= 0; ) {
977            c = Py_CHARMASK(*input++);
978            if (Py_CHARMASK((*output++ = table[c])) != c)
979                changed = 1;
980        }
981        if (changed)
982            return result;
983        Py_DECREF(result);
984        Py_INCREF(input_obj);
985        return input_obj;
986    }
987
988    for (i = 0; i < 256; i++)
989        trans_table[i] = Py_CHARMASK(table[i]);
990
991    for (i = 0; i < dellen; i++)
992        trans_table[(int) Py_CHARMASK(del_table[i])] = -1;
993
994    for (i = inlen; --i >= 0; ) {
995        c = Py_CHARMASK(*input++);
996        if (trans_table[c] != -1)
997            if (Py_CHARMASK(*output++ = (char)trans_table[c]) == c)
998                continue;
999        changed = 1;
1000    }
1001    if (!changed) {
1002        Py_DECREF(result);
1003        Py_INCREF(input_obj);
1004        return input_obj;
1005    }
1006    /* Fix the size of the resulting string */
1007    if (inlen > 0)
1008        _PyString_Resize(&result, output - output_start);
1009    return result;
1010}
1011
1012
1013/* What follows is used for implementing replace().  Perry Stoll. */
1014
1015/*
1016  mymemfind
1017
1018  strstr replacement for arbitrary blocks of memory.
1019
1020  Locates the first occurrence in the memory pointed to by MEM of the
1021  contents of memory pointed to by PAT.  Returns the index into MEM if
1022  found, or -1 if not found.  If len of PAT is greater than length of
1023  MEM, the function returns -1.
1024*/
1025static Py_ssize_t
1026mymemfind(const char *mem, Py_ssize_t len, const char *pat, Py_ssize_t pat_len)
1027{
1028    register Py_ssize_t ii;
1029
1030    /* pattern can not occur in the last pat_len-1 chars */
1031    len -= pat_len;
1032
1033    for (ii = 0; ii <= len; ii++) {
1034        if (mem[ii] == pat[0] &&
1035            (pat_len == 1 ||
1036             memcmp(&mem[ii+1], &pat[1], pat_len-1) == 0)) {
1037            return ii;
1038        }
1039    }
1040    return -1;
1041}
1042
1043/*
1044  mymemcnt
1045
1046   Return the number of distinct times PAT is found in MEM.
1047   meaning mem=1111 and pat==11 returns 2.
1048       mem=11111 and pat==11 also return 2.
1049 */
1050static Py_ssize_t
1051mymemcnt(const char *mem, Py_ssize_t len, const char *pat, Py_ssize_t pat_len)
1052{
1053    register Py_ssize_t offset = 0;
1054    Py_ssize_t nfound = 0;
1055
1056    while (len >= 0) {
1057        offset = mymemfind(mem, len, pat, pat_len);
1058        if (offset == -1)
1059            break;
1060        mem += offset + pat_len;
1061        len -= offset + pat_len;
1062        nfound++;
1063    }
1064    return nfound;
1065}
1066
1067/*
1068   mymemreplace
1069
1070   Return a string in which all occurrences of PAT in memory STR are
1071   replaced with SUB.
1072
1073   If length of PAT is less than length of STR or there are no occurrences
1074   of PAT in STR, then the original string is returned. Otherwise, a new
1075   string is allocated here and returned.
1076
1077   on return, out_len is:
1078       the length of output string, or
1079       -1 if the input string is returned, or
1080       unchanged if an error occurs (no memory).
1081
1082   return value is:
1083       the new string allocated locally, or
1084       NULL if an error occurred.
1085*/
1086static char *
1087mymemreplace(const char *str, Py_ssize_t len,           /* input string */
1088         const char *pat, Py_ssize_t pat_len,           /* pattern string to find */
1089         const char *sub, Py_ssize_t sub_len,           /* substitution string */
1090         Py_ssize_t count,                              /* number of replacements */
1091         Py_ssize_t *out_len)
1092{
1093    char *out_s;
1094    char *new_s;
1095    Py_ssize_t nfound, offset, new_len;
1096
1097    if (len == 0 || pat_len > len)
1098        goto return_same;
1099
1100    /* find length of output string */
1101    nfound = mymemcnt(str, len, pat, pat_len);
1102    if (count < 0)
1103        count = PY_SSIZE_T_MAX;
1104    else if (nfound > count)
1105        nfound = count;
1106    if (nfound == 0)
1107        goto return_same;
1108
1109    new_len = len + nfound*(sub_len - pat_len);
1110    if (new_len == 0) {
1111        /* Have to allocate something for the caller to free(). */
1112        out_s = (char *)PyMem_MALLOC(1);
1113        if (out_s == NULL)
1114            return NULL;
1115        out_s[0] = '\0';
1116    }
1117    else {
1118        assert(new_len > 0);
1119        new_s = (char *)PyMem_MALLOC(new_len);
1120        if (new_s == NULL)
1121            return NULL;
1122        out_s = new_s;
1123
1124        for (; count > 0 && len > 0; --count) {
1125            /* find index of next instance of pattern */
1126            offset = mymemfind(str, len, pat, pat_len);
1127            if (offset == -1)
1128                break;
1129
1130            /* copy non matching part of input string */
1131            memcpy(new_s, str, offset);
1132            str += offset + pat_len;
1133            len -= offset + pat_len;
1134
1135            /* copy substitute into the output string */
1136            new_s += offset;
1137            memcpy(new_s, sub, sub_len);
1138            new_s += sub_len;
1139        }
1140        /* copy any remaining values into output string */
1141        if (len > 0)
1142            memcpy(new_s, str, len);
1143    }
1144    *out_len = new_len;
1145    return out_s;
1146
1147  return_same:
1148    *out_len = -1;
1149    return (char *)str; /* cast away const */
1150}
1151
1152
1153PyDoc_STRVAR(replace__doc__,
1154"replace (str, old, new[, maxsplit]) -> string\n"
1155"\n"
1156"Return a copy of string str with all occurrences of substring\n"
1157"old replaced by new. If the optional argument maxsplit is\n"
1158"given, only the first maxsplit occurrences are replaced.");
1159
1160static PyObject *
1161strop_replace(PyObject *self, PyObject *args)
1162{
1163    char *str, *pat,*sub,*new_s;
1164    Py_ssize_t len,pat_len,sub_len,out_len;
1165    Py_ssize_t count = -1;
1166    PyObject *newstr;
1167
1168    WARN;
1169    if (!PyArg_ParseTuple(args, "t#t#t#|n:replace",
1170                          &str, &len, &pat, &pat_len, &sub, &sub_len,
1171                          &count))
1172        return NULL;
1173    if (pat_len <= 0) {
1174        PyErr_SetString(PyExc_ValueError, "empty pattern string");
1175        return NULL;
1176    }
1177    /* CAUTION:  strop treats a replace count of 0 as infinity, unlke
1178     * current (2.1) string.py and string methods.  Preserve this for
1179     * ... well, hard to say for what <wink>.
1180     */
1181    if (count == 0)
1182        count = -1;
1183    new_s = mymemreplace(str,len,pat,pat_len,sub,sub_len,count,&out_len);
1184    if (new_s == NULL) {
1185        PyErr_NoMemory();
1186        return NULL;
1187    }
1188    if (out_len == -1) {
1189        /* we're returning another reference to the input string */
1190        newstr = PyTuple_GetItem(args, 0);
1191        Py_XINCREF(newstr);
1192    }
1193    else {
1194        newstr = PyString_FromStringAndSize(new_s, out_len);
1195        PyMem_FREE(new_s);
1196    }
1197    return newstr;
1198}
1199
1200
1201/* List of functions defined in the module */
1202
1203static PyMethodDef
1204strop_methods[] = {
1205    {"atof",            strop_atof,        METH_VARARGS, atof__doc__},
1206    {"atoi",            strop_atoi,        METH_VARARGS, atoi__doc__},
1207    {"atol",            strop_atol,        METH_VARARGS, atol__doc__},
1208    {"capitalize",      strop_capitalize,  METH_O,       capitalize__doc__},
1209    {"count",           strop_count,       METH_VARARGS, count__doc__},
1210    {"expandtabs",      strop_expandtabs,  METH_VARARGS, expandtabs__doc__},
1211    {"find",            strop_find,        METH_VARARGS, find__doc__},
1212    {"join",            strop_joinfields,  METH_VARARGS, joinfields__doc__},
1213    {"joinfields",      strop_joinfields,  METH_VARARGS, joinfields__doc__},
1214    {"lstrip",          strop_lstrip,      METH_O,       lstrip__doc__},
1215    {"lower",           strop_lower,       METH_O,       lower__doc__},
1216    {"maketrans",       strop_maketrans,   METH_VARARGS, maketrans__doc__},
1217    {"replace",         strop_replace,     METH_VARARGS, replace__doc__},
1218    {"rfind",           strop_rfind,       METH_VARARGS, rfind__doc__},
1219    {"rstrip",          strop_rstrip,      METH_O,       rstrip__doc__},
1220    {"split",           strop_splitfields, METH_VARARGS, splitfields__doc__},
1221    {"splitfields",     strop_splitfields, METH_VARARGS, splitfields__doc__},
1222    {"strip",           strop_strip,       METH_O,       strip__doc__},
1223    {"swapcase",        strop_swapcase,    METH_O,       swapcase__doc__},
1224    {"translate",       strop_translate,   METH_VARARGS, translate__doc__},
1225    {"upper",           strop_upper,       METH_O,       upper__doc__},
1226    {NULL,              NULL}   /* sentinel */
1227};
1228
1229
1230PyMODINIT_FUNC
1231initstrop(void)
1232{
1233    PyObject *m, *s;
1234    char buf[256];
1235    int c, n;
1236    m = Py_InitModule4("strop", strop_methods, strop_module__doc__,
1237                       (PyObject*)NULL, PYTHON_API_VERSION);
1238    if (m == NULL)
1239        return;
1240
1241    /* Create 'whitespace' object */
1242    n = 0;
1243    for (c = 0; c < 256; c++) {
1244        if (isspace(c))
1245            buf[n++] = c;
1246    }
1247    s = PyString_FromStringAndSize(buf, n);
1248    if (s)
1249        PyModule_AddObject(m, "whitespace", s);
1250
1251    /* Create 'lowercase' object */
1252    n = 0;
1253    for (c = 0; c < 256; c++) {
1254        if (islower(c))
1255            buf[n++] = c;
1256    }
1257    s = PyString_FromStringAndSize(buf, n);
1258    if (s)
1259        PyModule_AddObject(m, "lowercase", s);
1260
1261    /* Create 'uppercase' object */
1262    n = 0;
1263    for (c = 0; c < 256; c++) {
1264        if (isupper(c))
1265            buf[n++] = c;
1266    }
1267    s = PyString_FromStringAndSize(buf, n);
1268    if (s)
1269        PyModule_AddObject(m, "uppercase", s);
1270}
1271