stringold.py revision 7e47402264cf87b9bbb61fc9ff610af08add7c7b
1# module 'string' -- A collection of string operations
2
3# Warning: most of the code you see here isn't normally used nowadays.  With
4# Python 1.6, many of these functions are implemented as methods on the
5# standard string object. They used to be implemented by a built-in module
6# called strop, but strop is now obsolete itself.
7
8"""Common string manipulations.
9
10Public module variables:
11
12whitespace -- a string containing all characters considered whitespace
13lowercase -- a string containing all characters considered lowercase letters
14uppercase -- a string containing all characters considered uppercase letters
15letters -- a string containing all characters considered letters
16digits -- a string containing all characters considered decimal digits
17hexdigits -- a string containing all characters considered hexadecimal digits
18octdigits -- a string containing all characters considered octal digits
19
20"""
21
22# Some strings for ctype-style character classification
23whitespace = ' \t\n\r\v\f'
24lowercase = 'abcdefghijklmnopqrstuvwxyz'
25uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
26letters = lowercase + uppercase
27digits = '0123456789'
28hexdigits = digits + 'abcdef' + 'ABCDEF'
29octdigits = '01234567'
30
31# Case conversion helpers
32_idmap = ''
33for i in range(256): _idmap = _idmap + chr(i)
34del i
35
36# Backward compatible names for exceptions
37index_error = ValueError
38atoi_error = ValueError
39atof_error = ValueError
40atol_error = ValueError
41
42# convert UPPER CASE letters to lower case
43def lower(s):
44    """lower(s) -> string
45
46    Return a copy of the string s converted to lowercase.
47
48    """
49    return s.lower()
50
51# Convert lower case letters to UPPER CASE
52def upper(s):
53    """upper(s) -> string
54
55    Return a copy of the string s converted to uppercase.
56
57    """
58    return s.upper()
59
60# Swap lower case letters and UPPER CASE
61def swapcase(s):
62    """swapcase(s) -> string
63
64    Return a copy of the string s with upper case characters
65    converted to lowercase and vice versa.
66
67    """
68    return s.swapcase()
69
70# Strip leading and trailing tabs and spaces
71def strip(s):
72    """strip(s) -> string
73
74    Return a copy of the string s with leading and trailing
75    whitespace removed.
76
77    """
78    return s.strip()
79
80# Strip leading tabs and spaces
81def lstrip(s):
82    """lstrip(s) -> string
83
84    Return a copy of the string s with leading whitespace removed.
85
86    """
87    return s.lstrip()
88
89# Strip trailing tabs and spaces
90def rstrip(s):
91    """rstrip(s) -> string
92
93    Return a copy of the string s with trailing whitespace
94    removed.
95
96    """
97    return s.rstrip()
98
99
100# Split a string into a list of space/tab-separated words
101# NB: split(s) is NOT the same as splitfields(s, ' ')!
102def split(s, sep=None, maxsplit=0):
103    """split(str [,sep [,maxsplit]]) -> list of strings
104
105    Return a list of the words in the string s, using sep as the
106    delimiter string.  If maxsplit is nonzero, splits into at most
107    maxsplit words If sep is not specified, any whitespace string
108    is a separator.  Maxsplit defaults to 0.
109
110    (split and splitfields are synonymous)
111
112    """
113    return s.split(sep, maxsplit)
114splitfields = split
115
116# Join fields with optional separator
117def join(words, sep = ' '):
118    """join(list [,sep]) -> string
119
120    Return a string composed of the words in list, with
121    intervening occurrences of sep.  The default separator is a
122    single space.
123
124    (joinfields and join are synonymous)
125
126    """
127    return sep.join(words)
128joinfields = join
129
130# for a little bit of speed
131_apply = apply
132
133# Find substring, raise exception if not found
134def index(s, *args):
135    """index(s, sub [,start [,end]]) -> int
136
137    Like find but raises ValueError when the substring is not found.
138
139    """
140    return _apply(s.index, args)
141
142# Find last substring, raise exception if not found
143def rindex(s, *args):
144    """rindex(s, sub [,start [,end]]) -> int
145
146    Like rfind but raises ValueError when the substring is not found.
147
148    """
149    return _apply(s.rindex, args)
150
151# Count non-overlapping occurrences of substring
152def count(s, *args):
153    """count(s, sub[, start[,end]]) -> int
154
155    Return the number of occurrences of substring sub in string
156    s[start:end].  Optional arguments start and end are
157    interpreted as in slice notation.
158
159    """
160    return _apply(s.count, args)
161
162# Find substring, return -1 if not found
163def find(s, *args):
164    """find(s, sub [,start [,end]]) -> in
165
166    Return the lowest index in s where substring sub is found,
167    such that sub is contained within s[start,end].  Optional
168    arguments start and end are interpreted as in slice notation.
169
170    Return -1 on failure.
171
172    """
173    return _apply(s.find, args)
174
175# Find last substring, return -1 if not found
176def rfind(s, *args):
177    """rfind(s, sub [,start [,end]]) -> int
178
179    Return the highest index in s where substring sub is found,
180    such that sub is contained within s[start,end].  Optional
181    arguments start and end are interpreted as in slice notation.
182
183    Return -1 on failure.
184
185    """
186    return _apply(s.rfind, args)
187
188# for a bit of speed
189_float = float
190_int = int
191_long = long
192_StringType = type('')
193
194# Convert string to float
195def atof(s):
196    """atof(s) -> float
197
198    Return the floating point number represented by the string s.
199
200    """
201    if type(s) == _StringType:
202        return _float(s)
203    else:
204        raise TypeError('argument 1: expected string, %s found' %
205                        type(s).__name__)
206
207# Convert string to integer
208def atoi(*args):
209    """atoi(s [,base]) -> int
210
211    Return the integer represented by the string s in the given
212    base, which defaults to 10.  The string s must consist of one
213    or more digits, possibly preceded by a sign.  If base is 0, it
214    is chosen from the leading characters of s, 0 for octal, 0x or
215    0X for hexadecimal.  If base is 16, a preceding 0x or 0X is
216    accepted.
217
218    """
219    try:
220        s = args[0]
221    except IndexError:
222        raise TypeError('function requires at least 1 argument: %d given' %
223                        len(args))
224    # Don't catch type error resulting from too many arguments to int().  The
225    # error message isn't compatible but the error type is, and this function
226    # is complicated enough already.
227    if type(s) == _StringType:
228        return _apply(_int, args)
229    else:
230        raise TypeError('argument 1: expected string, %s found' %
231                        type(s).__name__)
232
233
234# Convert string to long integer
235def atol(*args):
236    """atol(s [,base]) -> long
237
238    Return the long integer represented by the string s in the
239    given base, which defaults to 10.  The string s must consist
240    of one or more digits, possibly preceded by a sign.  If base
241    is 0, it is chosen from the leading characters of s, 0 for
242    octal, 0x or 0X for hexadecimal.  If base is 16, a preceding
243    0x or 0X is accepted.  A trailing L or l is not accepted,
244    unless base is 0.
245
246    """
247    try:
248        s = args[0]
249    except IndexError:
250        raise TypeError('function requires at least 1 argument: %d given' %
251                        len(args))
252    # Don't catch type error resulting from too many arguments to long().  The
253    # error message isn't compatible but the error type is, and this function
254    # is complicated enough already.
255    if type(s) == _StringType:
256        return _apply(_long, args)
257    else:
258        raise TypeError('argument 1: expected string, %s found' %
259                        type(s).__name__)
260
261
262# Left-justify a string
263def ljust(s, width):
264    """ljust(s, width) -> string
265
266    Return a left-justified version of s, in a field of the
267    specified width, padded with spaces as needed.  The string is
268    never truncated.
269
270    """
271    n = width - len(s)
272    if n <= 0: return s
273    return s + ' '*n
274
275# Right-justify a string
276def rjust(s, width):
277    """rjust(s, width) -> string
278
279    Return a right-justified version of s, in a field of the
280    specified width, padded with spaces as needed.  The string is
281    never truncated.
282
283    """
284    n = width - len(s)
285    if n <= 0: return s
286    return ' '*n + s
287
288# Center a string
289def center(s, width):
290    """center(s, width) -> string
291
292    Return a center version of s, in a field of the specified
293    width. padded with spaces as needed.  The string is never
294    truncated.
295
296    """
297    n = width - len(s)
298    if n <= 0: return s
299    half = n/2
300    if n%2 and width%2:
301        # This ensures that center(center(s, i), j) = center(s, j)
302        half = half+1
303    return ' '*half +  s + ' '*(n-half)
304
305# Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03'
306# Decadent feature: the argument may be a string or a number
307# (Use of this is deprecated; it should be a string as with ljust c.s.)
308def zfill(x, width):
309    """zfill(x, width) -> string
310
311    Pad a numeric string x with zeros on the left, to fill a field
312    of the specified width.  The string x is never truncated.
313
314    """
315    if type(x) == type(''): s = x
316    else: s = `x`
317    n = len(s)
318    if n >= width: return s
319    sign = ''
320    if s[0] in ('-', '+'):
321        sign, s = s[0], s[1:]
322    return sign + '0'*(width-n) + s
323
324# Expand tabs in a string.
325# Doesn't take non-printing chars into account, but does understand \n.
326def expandtabs(s, tabsize=8):
327    """expandtabs(s [,tabsize]) -> string
328
329    Return a copy of the string s with all tab characters replaced
330    by the appropriate number of spaces, depending on the current
331    column, and the tabsize (default 8).
332
333    """
334    res = line = ''
335    for c in s:
336        if c == '\t':
337            c = ' '*(tabsize - len(line) % tabsize)
338        line = line + c
339        if c == '\n':
340            res = res + line
341            line = ''
342    return res + line
343
344# Character translation through look-up table.
345def translate(s, table, deletions=""):
346    """translate(s,table [,deletechars]) -> string
347
348    Return a copy of the string s, where all characters occurring
349    in the optional argument deletechars are removed, and the
350    remaining characters have been mapped through the given
351    translation table, which must be a string of length 256.
352
353    """
354    return s.translate(table, deletions)
355
356# Capitalize a string, e.g. "aBc  dEf" -> "Abc  def".
357def capitalize(s):
358    """capitalize(s) -> string
359
360    Return a copy of the string s with only its first character
361    capitalized.
362
363    """
364    return s.capitalize()
365
366# Capitalize the words in a string, e.g. " aBc  dEf " -> "Abc Def".
367# See also regsub.capwords().
368def capwords(s, sep=None):
369    """capwords(s, [sep]) -> string
370
371    Split the argument into words using split, capitalize each
372    word using capitalize, and join the capitalized words using
373    join. Note that this replaces runs of whitespace characters by
374    a single space.
375
376    """
377    return join(map(capitalize, s.split(sep)), sep or ' ')
378
379# Construct a translation string
380_idmapL = None
381def maketrans(fromstr, tostr):
382    """maketrans(frm, to) -> string
383
384    Return a translation table (a string of 256 bytes long)
385    suitable for use in string.translate.  The strings frm and to
386    must be of the same length.
387
388    """
389    if len(fromstr) != len(tostr):
390        raise ValueError, "maketrans arguments must have same length"
391    global _idmapL
392    if not _idmapL:
393        _idmapL = map(None, _idmap)
394    L = _idmapL[:]
395    fromstr = map(ord, fromstr)
396    for i in range(len(fromstr)):
397        L[fromstr[i]] = tostr[i]
398    return joinfields(L, "")
399
400# Substring replacement (global)
401def replace(s, old, new, maxsplit=0):
402    """replace (str, old, new[, maxsplit]) -> string
403
404    Return a copy of string str with all occurrences of substring
405    old replaced by new. If the optional argument maxsplit is
406    given, only the first maxsplit occurrences are replaced.
407
408    """
409    return s.replace(old, new, maxsplit)
410
411
412# XXX: transitional
413#
414# If string objects do not have methods, then we need to use the old string.py
415# library, which uses strop for many more things than just the few outlined
416# below.
417try:
418    ''.upper
419except AttributeError:
420    from stringold import *
421
422# Try importing optional built-in module "strop" -- if it exists,
423# it redefines some string operations that are 100-1000 times faster.
424# It also defines values for whitespace, lowercase and uppercase
425# that match <ctype.h>'s definitions.
426
427try:
428    from strop import maketrans, lowercase, uppercase, whitespace
429    letters = lowercase + uppercase
430except ImportError:
431    pass                                          # Use the original versions
432