string.py revision 857c4c36b962c6e74559e045c7fb43177dd5bcea
1"""A collection of string operations (most are no longer used in Python 1.6).
2
3Warning: most of the code you see here isn't normally used nowadays.  With
4Python 1.6, many of these functions are implemented as methods on the
5standard string object. They used to be implemented by a built-in module
6called strop, but strop is now obsolete itself.
7
8Public module variables:
9
10whitespace -- a string containing all characters considered whitespace
11lowercase -- a string containing all characters considered lowercase letters
12uppercase -- a string containing all characters considered uppercase letters
13letters -- a string containing all characters considered letters
14digits -- a string containing all characters considered decimal digits
15hexdigits -- a string containing all characters considered hexadecimal digits
16octdigits -- a string containing all characters considered octal digits
17
18"""
19
20# Some strings for ctype-style character classification
21whitespace = ' \t\n\r\v\f'
22lowercase = 'abcdefghijklmnopqrstuvwxyz'
23uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
24letters = lowercase + uppercase
25digits = '0123456789'
26hexdigits = digits + 'abcdef' + 'ABCDEF'
27octdigits = '01234567'
28
29# Case conversion helpers
30_idmap = ''
31for i in range(256): _idmap = _idmap + chr(i)
32del i
33
34# Backward compatible names for exceptions
35index_error = ValueError
36atoi_error = ValueError
37atof_error = ValueError
38atol_error = ValueError
39
40# convert UPPER CASE letters to lower case
41def lower(s):
42    """lower(s) -> string
43
44    Return a copy of the string s converted to lowercase.
45
46    """
47    return s.lower()
48
49# Convert lower case letters to UPPER CASE
50def upper(s):
51    """upper(s) -> string
52
53    Return a copy of the string s converted to uppercase.
54
55    """
56    return s.upper()
57
58# Swap lower case letters and UPPER CASE
59def swapcase(s):
60    """swapcase(s) -> string
61
62    Return a copy of the string s with upper case characters
63    converted to lowercase and vice versa.
64
65    """
66    return s.swapcase()
67
68# Strip leading and trailing tabs and spaces
69def strip(s):
70    """strip(s) -> string
71
72    Return a copy of the string s with leading and trailing
73    whitespace removed.
74
75    """
76    return s.strip()
77
78# Strip leading tabs and spaces
79def lstrip(s):
80    """lstrip(s) -> string
81
82    Return a copy of the string s with leading whitespace removed.
83
84    """
85    return s.lstrip()
86
87# Strip trailing tabs and spaces
88def rstrip(s):
89    """rstrip(s) -> string
90
91    Return a copy of the string s with trailing whitespace
92    removed.
93
94    """
95    return s.rstrip()
96
97
98# Split a string into a list of space/tab-separated words
99# NB: split(s) is NOT the same as splitfields(s, ' ')!
100def split(s, sep=None, maxsplit=0):
101    """split(s [,sep [,maxsplit]]) -> list of strings
102
103    Return a list of the words in the string s, using sep as the
104    delimiter string.  If maxsplit is nonzero, splits into at most
105    maxsplit words.  If sep is not specified, any whitespace string
106    is a separator.  Maxsplit defaults to 0.
107
108    (split and splitfields are synonymous)
109
110    """
111    return s.split(sep, maxsplit)
112splitfields = split
113
114# Join fields with optional separator
115def join(words, sep = ' '):
116    """join(list [,sep]) -> string
117
118    Return a string composed of the words in list, with
119    intervening occurences of sep.  The default separator is a
120    single space.
121
122    (joinfields and join are synonymous)
123
124    """
125    return sep.join(words)
126joinfields = join
127
128# for a little bit of speed
129_apply = apply
130
131# Find substring, raise exception if not found
132def index(s, *args):
133    """index(s, sub [,start [,end]]) -> int
134
135    Like find but raises ValueError when the substring is not found.
136
137    """
138    return _apply(s.index, args)
139
140# Find last substring, raise exception if not found
141def rindex(s, *args):
142    """rindex(s, sub [,start [,end]]) -> int
143
144    Like rfind but raises ValueError when the substring is not found.
145
146    """
147    return _apply(s.rindex, args)
148
149# Count non-overlapping occurrences of substring
150def count(s, *args):
151    """count(s, sub[, start[,end]]) -> int
152
153    Return the number of occurrences of substring sub in string
154    s[start:end].  Optional arguments start and end are
155    interpreted as in slice notation.
156
157    """
158    return _apply(s.count, args)
159
160# Find substring, return -1 if not found
161def find(s, *args):
162    """find(s, sub [,start [,end]]) -> in
163
164    Return the lowest index in s where substring sub is found,
165    such that sub is contained within s[start,end].  Optional
166    arguments start and end are interpreted as in slice notation.
167
168    Return -1 on failure.
169
170    """
171    return _apply(s.find, args)
172
173# Find last substring, return -1 if not found
174def rfind(s, *args):
175    """rfind(s, sub [,start [,end]]) -> int
176
177    Return the highest index in s where substring sub is found,
178    such that sub is contained within s[start,end].  Optional
179    arguments start and end are interpreted as in slice notation.
180
181    Return -1 on failure.
182
183    """
184    return _apply(s.rfind, args)
185
186# for a bit of speed
187_float = float
188_int = int
189_long = long
190_StringType = type('')
191
192# Convert string to float
193def atof(s):
194    """atof(s) -> float
195
196    Return the floating point number represented by the string s.
197
198    """
199    if type(s) == _StringType:
200        return _float(s)
201    else:
202        raise TypeError('argument 1: expected string, %s found' %
203                        type(s).__name__)
204
205# Convert string to integer
206def atoi(*args):
207    """atoi(s [,base]) -> int
208
209    Return the integer represented by the string s in the given
210    base, which defaults to 10.  The string s must consist of one
211    or more digits, possibly preceded by a sign.  If base is 0, it
212    is chosen from the leading characters of s, 0 for octal, 0x or
213    0X for hexadecimal.  If base is 16, a preceding 0x or 0X is
214    accepted.
215
216    """
217    try:
218        s = args[0]
219    except IndexError:
220        raise TypeError('function requires at least 1 argument: %d given' %
221                        len(args))
222    # Don't catch type error resulting from too many arguments to int().  The
223    # error message isn't compatible but the error type is, and this function
224    # is complicated enough already.
225    if type(s) == _StringType:
226        return _apply(_int, args)
227    else:
228        raise TypeError('argument 1: expected string, %s found' %
229                        type(s).__name__)
230
231
232# Convert string to long integer
233def atol(*args):
234    """atol(s [,base]) -> long
235
236    Return the long integer represented by the string s in the
237    given base, which defaults to 10.  The string s must consist
238    of one or more digits, possibly preceded by a sign.  If base
239    is 0, it is chosen from the leading characters of s, 0 for
240    octal, 0x or 0X for hexadecimal.  If base is 16, a preceding
241    0x or 0X is accepted.  A trailing L or l is not accepted,
242    unless base is 0.
243
244    """
245    try:
246        s = args[0]
247    except IndexError:
248        raise TypeError('function requires at least 1 argument: %d given' %
249                        len(args))
250    # Don't catch type error resulting from too many arguments to long().  The
251    # error message isn't compatible but the error type is, and this function
252    # is complicated enough already.
253    if type(s) == _StringType:
254        return _apply(_long, args)
255    else:
256        raise TypeError('argument 1: expected string, %s found' %
257                        type(s).__name__)
258
259
260# Left-justify a string
261def ljust(s, width):
262    """ljust(s, width) -> string
263
264    Return a left-justified version of s, in a field of the
265    specified width, padded with spaces as needed.  The string is
266    never truncated.
267
268    """
269    n = width - len(s)
270    if n <= 0: return s
271    return s + ' '*n
272
273# Right-justify a string
274def rjust(s, width):
275    """rjust(s, width) -> string
276
277    Return a right-justified version of s, in a field of the
278    specified width, padded with spaces as needed.  The string is
279    never truncated.
280
281    """
282    n = width - len(s)
283    if n <= 0: return s
284    return ' '*n + s
285
286# Center a string
287def center(s, width):
288    """center(s, width) -> string
289
290    Return a center version of s, in a field of the specified
291    width. padded with spaces as needed.  The string is never
292    truncated.
293
294    """
295    n = width - len(s)
296    if n <= 0: return s
297    half = n/2
298    if n%2 and width%2:
299        # This ensures that center(center(s, i), j) = center(s, j)
300        half = half+1
301    return ' '*half +  s + ' '*(n-half)
302
303# Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03'
304# Decadent feature: the argument may be a string or a number
305# (Use of this is deprecated; it should be a string as with ljust c.s.)
306def zfill(x, width):
307    """zfill(x, width) -> string
308
309    Pad a numeric string x with zeros on the left, to fill a field
310    of the specified width.  The string x is never truncated.
311
312    """
313    if type(x) == type(''): s = x
314    else: s = `x`
315    n = len(s)
316    if n >= width: return s
317    sign = ''
318    if s[0] in ('-', '+'):
319        sign, s = s[0], s[1:]
320    return sign + '0'*(width-n) + s
321
322# Expand tabs in a string.
323# Doesn't take non-printing chars into account, but does understand \n.
324def expandtabs(s, tabsize=8):
325    """expandtabs(s [,tabsize]) -> string
326
327    Return a copy of the string s with all tab characters replaced
328    by the appropriate number of spaces, depending on the current
329    column, and the tabsize (default 8).
330
331    """
332    res = line = ''
333    for c in s:
334        if c == '\t':
335            c = ' '*(tabsize - len(line) % tabsize)
336        line = line + c
337        if c == '\n':
338            res = res + line
339            line = ''
340    return res + line
341
342# Character translation through look-up table.
343def translate(s, table, deletions=""):
344    """translate(s,table [,deletechars]) -> string
345
346    Return a copy of the string s, where all characters occurring
347    in the optional argument deletechars are removed, and the
348    remaining characters have been mapped through the given
349    translation table, which must be a string of length 256.
350
351    """
352    return s.translate(table, deletions)
353
354# Capitalize a string, e.g. "aBc  dEf" -> "Abc  def".
355def capitalize(s):
356    """capitalize(s) -> string
357
358    Return a copy of the string s with only its first character
359    capitalized.
360
361    """
362    return s.capitalize()
363
364# Capitalize the words in a string, e.g. " aBc  dEf " -> "Abc Def".
365# See also regsub.capwords().
366def capwords(s, sep=None):
367    """capwords(s, [sep]) -> string
368
369    Split the argument into words using split, capitalize each
370    word using capitalize, and join the capitalized words using
371    join. Note that this replaces runs of whitespace characters by
372    a single space.
373
374    """
375    return join(map(capitalize, s.split(sep)), sep or ' ')
376
377# Construct a translation string
378_idmapL = None
379def maketrans(fromstr, tostr):
380    """maketrans(frm, to) -> string
381
382    Return a translation table (a string of 256 bytes long)
383    suitable for use in string.translate.  The strings frm and to
384    must be of the same length.
385
386    """
387    if len(fromstr) != len(tostr):
388        raise ValueError, "maketrans arguments must have same length"
389    global _idmapL
390    if not _idmapL:
391        _idmapL = map(None, _idmap)
392    L = _idmapL[:]
393    fromstr = map(ord, fromstr)
394    for i in range(len(fromstr)):
395        L[fromstr[i]] = tostr[i]
396    return joinfields(L, "")
397
398# Substring replacement (global)
399def replace(s, old, new, maxsplit=0):
400    """replace (str, old, new[, maxsplit]) -> string
401
402    Return a copy of string str with all occurrences of substring
403    old replaced by new. If the optional argument maxsplit is
404    given, only the first maxsplit occurrences are replaced.
405
406    """
407    return s.replace(old, new, maxsplit)
408
409
410# XXX: transitional
411#
412# If string objects do not have methods, then we need to use the old string.py
413# library, which uses strop for many more things than just the few outlined
414# below.
415try:
416    ''.upper
417except AttributeError:
418    from stringold import *
419
420# Try importing optional built-in module "strop" -- if it exists,
421# it redefines some string operations that are 100-1000 times faster.
422# It also defines values for whitespace, lowercase and uppercase
423# that match <ctype.h>'s definitions.
424
425try:
426    from strop import maketrans, lowercase, uppercase, whitespace
427    letters = lowercase + uppercase
428except ImportError:
429    pass                                          # Use the original versions
430