string.py revision 5aff7752eb28c6ddaa68738ee77e1947b72e1a58
1"""A collection of string operations (most are no longer used in Python 1.6).
2
3Warning: most of the code you see here isn't normally used nowadays.  With
4Python 1.6, many of these functions are implemented as methods on the
5standard string object. They used to be implemented by a built-in module
6called strop, but strop is now obsolete itself.
7
8Public module variables:
9
10whitespace -- a string containing all characters considered whitespace
11lowercase -- a string containing all characters considered lowercase letters
12uppercase -- a string containing all characters considered uppercase letters
13letters -- a string containing all characters considered letters
14digits -- a string containing all characters considered decimal digits
15hexdigits -- a string containing all characters considered hexadecimal digits
16octdigits -- a string containing all characters considered octal digits
17punctuation -- a string containing all characters considered punctuation
18printable -- a string containing all characters considered printable
19
20"""
21
22# Some strings for ctype-style character classification
23whitespace = ' \t\n\r\v\f'
24lowercase = 'abcdefghijklmnopqrstuvwxyz'
25uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
26letters = lowercase + uppercase
27digits = '0123456789'
28hexdigits = digits + 'abcdef' + 'ABCDEF'
29octdigits = '01234567'
30punctuation = """!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~"""
31printable = digits + letters + punctuation + whitespace
32
33# Case conversion helpers
34_idmap = ''
35for i in range(256): _idmap = _idmap + chr(i)
36del i
37
38# Backward compatible names for exceptions
39index_error = ValueError
40atoi_error = ValueError
41atof_error = ValueError
42atol_error = ValueError
43
44# convert UPPER CASE letters to lower case
45def lower(s):
46    """lower(s) -> string
47
48    Return a copy of the string s converted to lowercase.
49
50    """
51    return s.lower()
52
53# Convert lower case letters to UPPER CASE
54def upper(s):
55    """upper(s) -> string
56
57    Return a copy of the string s converted to uppercase.
58
59    """
60    return s.upper()
61
62# Swap lower case letters and UPPER CASE
63def swapcase(s):
64    """swapcase(s) -> string
65
66    Return a copy of the string s with upper case characters
67    converted to lowercase and vice versa.
68
69    """
70    return s.swapcase()
71
72# Strip leading and trailing tabs and spaces
73def strip(s):
74    """strip(s) -> string
75
76    Return a copy of the string s with leading and trailing
77    whitespace removed.
78
79    """
80    return s.strip()
81
82# Strip leading tabs and spaces
83def lstrip(s):
84    """lstrip(s) -> string
85
86    Return a copy of the string s with leading whitespace removed.
87
88    """
89    return s.lstrip()
90
91# Strip trailing tabs and spaces
92def rstrip(s):
93    """rstrip(s) -> string
94
95    Return a copy of the string s with trailing whitespace
96    removed.
97
98    """
99    return s.rstrip()
100
101
102# Split a string into a list of space/tab-separated words
103# NB: split(s) is NOT the same as splitfields(s, ' ')!
104def split(s, sep=None, maxsplit=-1):
105    """split(s [,sep [,maxsplit]]) -> list of strings
106
107    Return a list of the words in the string s, using sep as the
108    delimiter string.  If maxsplit is given, splits into at most
109    maxsplit words.  If sep is not specified, any whitespace string
110    is a separator.
111
112    (split and splitfields are synonymous)
113
114    """
115    return s.split(sep, maxsplit)
116splitfields = split
117
118# Join fields with optional separator
119def join(words, sep = ' '):
120    """join(list [,sep]) -> string
121
122    Return a string composed of the words in list, with
123    intervening occurrences of sep.  The default separator is a
124    single space.
125
126    (joinfields and join are synonymous)
127
128    """
129    return sep.join(words)
130joinfields = join
131
132# Find substring, raise exception if not found
133def index(s, *args):
134    """index(s, sub [,start [,end]]) -> int
135
136    Like find but raises ValueError when the substring is not found.
137
138    """
139    return s.index(*args)
140
141# Find last substring, raise exception if not found
142def rindex(s, *args):
143    """rindex(s, sub [,start [,end]]) -> int
144
145    Like rfind but raises ValueError when the substring is not found.
146
147    """
148    return s.rindex(*args)
149
150# Count non-overlapping occurrences of substring
151def count(s, *args):
152    """count(s, sub[, start[,end]]) -> int
153
154    Return the number of occurrences of substring sub in string
155    s[start:end].  Optional arguments start and end are
156    interpreted as in slice notation.
157
158    """
159    return s.count(*args)
160
161# Find substring, return -1 if not found
162def find(s, *args):
163    """find(s, sub [,start [,end]]) -> in
164
165    Return the lowest index in s where substring sub is found,
166    such that sub is contained within s[start,end].  Optional
167    arguments start and end are interpreted as in slice notation.
168
169    Return -1 on failure.
170
171    """
172    return s.find(*args)
173
174# Find last substring, return -1 if not found
175def rfind(s, *args):
176    """rfind(s, sub [,start [,end]]) -> int
177
178    Return the highest index in s where substring sub is found,
179    such that sub is contained within s[start,end].  Optional
180    arguments start and end are interpreted as in slice notation.
181
182    Return -1 on failure.
183
184    """
185    return s.rfind(*args)
186
187# for a bit of speed
188_float = float
189_int = int
190_long = long
191_StringType = type('')
192
193# Convert string to float
194def atof(s):
195    """atof(s) -> float
196
197    Return the floating point number represented by the string s.
198
199    """
200    return _float(s)
201
202
203# Convert string to integer
204def atoi(s , base=10):
205    """atoi(s [,base]) -> int
206
207    Return the integer represented by the string s in the given
208    base, which defaults to 10.  The string s must consist of one
209    or more digits, possibly preceded by a sign.  If base is 0, it
210    is chosen from the leading characters of s, 0 for octal, 0x or
211    0X for hexadecimal.  If base is 16, a preceding 0x or 0X is
212    accepted.
213
214    """
215    return _int(s, base)
216
217
218# Convert string to long integer
219def atol(s, base=10):
220    """atol(s [,base]) -> long
221
222    Return the long integer represented by the string s in the
223    given base, which defaults to 10.  The string s must consist
224    of one or more digits, possibly preceded by a sign.  If base
225    is 0, it is chosen from the leading characters of s, 0 for
226    octal, 0x or 0X for hexadecimal.  If base is 16, a preceding
227    0x or 0X is accepted.  A trailing L or l is not accepted,
228    unless base is 0.
229
230    """
231    return _long(s, base)
232
233
234# Left-justify a string
235def ljust(s, width):
236    """ljust(s, width) -> string
237
238    Return a left-justified version of s, in a field of the
239    specified width, padded with spaces as needed.  The string is
240    never truncated.
241
242    """
243    return s.ljust(width)
244
245# Right-justify a string
246def rjust(s, width):
247    """rjust(s, width) -> string
248
249    Return a right-justified version of s, in a field of the
250    specified width, padded with spaces as needed.  The string is
251    never truncated.
252
253    """
254    return s.rjust(width)
255
256# Center a string
257def center(s, width):
258    """center(s, width) -> string
259
260    Return a center version of s, in a field of the specified
261    width. padded with spaces as needed.  The string is never
262    truncated.
263
264    """
265    return s.center(width)
266
267# Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03'
268# Decadent feature: the argument may be a string or a number
269# (Use of this is deprecated; it should be a string as with ljust c.s.)
270def zfill(x, width):
271    """zfill(x, width) -> string
272
273    Pad a numeric string x with zeros on the left, to fill a field
274    of the specified width.  The string x is never truncated.
275
276    """
277    if type(x) == type(''): s = x
278    else: s = `x`
279    n = len(s)
280    if n >= width: return s
281    sign = ''
282    if s[0] in ('-', '+'):
283        sign, s = s[0], s[1:]
284    return sign + '0'*(width-n) + s
285
286# Expand tabs in a string.
287# Doesn't take non-printing chars into account, but does understand \n.
288def expandtabs(s, tabsize=8):
289    """expandtabs(s [,tabsize]) -> string
290
291    Return a copy of the string s with all tab characters replaced
292    by the appropriate number of spaces, depending on the current
293    column, and the tabsize (default 8).
294
295    """
296    return s.expandtabs(tabsize)
297
298# Character translation through look-up table.
299def translate(s, table, deletions=""):
300    """translate(s,table [,deletions]) -> string
301
302    Return a copy of the string s, where all characters occurring
303    in the optional argument deletions are removed, and the
304    remaining characters have been mapped through the given
305    translation table, which must be a string of length 256.  The
306    deletions argument is not allowed for Unicode strings.
307
308    """
309    if deletions:
310        return s.translate(table, deletions)
311    else:
312        # Add s[:0] so that if s is Unicode and table is an 8-bit string,
313        # table is converted to Unicode.  This means that table *cannot*
314        # be a dictionary -- for that feature, use u.translate() directly.
315        return s.translate(table + s[:0])
316
317# Capitalize a string, e.g. "aBc  dEf" -> "Abc  def".
318def capitalize(s):
319    """capitalize(s) -> string
320
321    Return a copy of the string s with only its first character
322    capitalized.
323
324    """
325    return s.capitalize()
326
327# Capitalize the words in a string, e.g. " aBc  dEf " -> "Abc Def".
328# See also regsub.capwords().
329def capwords(s, sep=None):
330    """capwords(s, [sep]) -> string
331
332    Split the argument into words using split, capitalize each
333    word using capitalize, and join the capitalized words using
334    join. Note that this replaces runs of whitespace characters by
335    a single space.
336
337    """
338    return join(map(capitalize, s.split(sep)), sep or ' ')
339
340# Construct a translation string
341_idmapL = None
342def maketrans(fromstr, tostr):
343    """maketrans(frm, to) -> string
344
345    Return a translation table (a string of 256 bytes long)
346    suitable for use in string.translate.  The strings frm and to
347    must be of the same length.
348
349    """
350    if len(fromstr) != len(tostr):
351        raise ValueError, "maketrans arguments must have same length"
352    global _idmapL
353    if not _idmapL:
354        _idmapL = map(None, _idmap)
355    L = _idmapL[:]
356    fromstr = map(ord, fromstr)
357    for i in range(len(fromstr)):
358        L[fromstr[i]] = tostr[i]
359    return joinfields(L, "")
360
361# Substring replacement (global)
362def replace(s, old, new, maxsplit=-1):
363    """replace (str, old, new[, maxsplit]) -> string
364
365    Return a copy of string str with all occurrences of substring
366    old replaced by new. If the optional argument maxsplit is
367    given, only the first maxsplit occurrences are replaced.
368
369    """
370    return s.replace(old, new, maxsplit)
371
372
373# Try importing optional built-in module "strop" -- if it exists,
374# it redefines some string operations that are 100-1000 times faster.
375# It also defines values for whitespace, lowercase and uppercase
376# that match <ctype.h>'s definitions.
377
378try:
379    from strop import maketrans, lowercase, uppercase, whitespace
380    letters = lowercase + uppercase
381except ImportError:
382    pass                                          # Use the original versions
383