1# module 'string' -- A collection of string operations
2
3# Warning: most of the code you see here isn't normally used nowadays.  With
4# Python 1.6, many of these functions are implemented as methods on the
5# standard string object. They used to be implemented by a built-in module
6# called strop, but strop is now obsolete itself.
7
8"""Common string manipulations.
9
10Public module variables:
11
12whitespace -- a string containing all characters considered whitespace
13lowercase -- a string containing all characters considered lowercase letters
14uppercase -- a string containing all characters considered uppercase letters
15letters -- a string containing all characters considered letters
16digits -- a string containing all characters considered decimal digits
17hexdigits -- a string containing all characters considered hexadecimal digits
18octdigits -- a string containing all characters considered octal digits
19
20"""
21from warnings import warnpy3k
22warnpy3k("the stringold module has been removed in Python 3.0", stacklevel=2)
23del warnpy3k
24
25# Some strings for ctype-style character classification
26whitespace = ' \t\n\r\v\f'
27lowercase = 'abcdefghijklmnopqrstuvwxyz'
28uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
29letters = lowercase + uppercase
30digits = '0123456789'
31hexdigits = digits + 'abcdef' + 'ABCDEF'
32octdigits = '01234567'
33
34# Case conversion helpers
35_idmap = ''
36for i in range(256): _idmap = _idmap + chr(i)
37del i
38
39# Backward compatible names for exceptions
40index_error = ValueError
41atoi_error = ValueError
42atof_error = ValueError
43atol_error = ValueError
44
45# convert UPPER CASE letters to lower case
46def lower(s):
47    """lower(s) -> string
48
49    Return a copy of the string s converted to lowercase.
50
51    """
52    return s.lower()
53
54# Convert lower case letters to UPPER CASE
55def upper(s):
56    """upper(s) -> string
57
58    Return a copy of the string s converted to uppercase.
59
60    """
61    return s.upper()
62
63# Swap lower case letters and UPPER CASE
64def swapcase(s):
65    """swapcase(s) -> string
66
67    Return a copy of the string s with upper case characters
68    converted to lowercase and vice versa.
69
70    """
71    return s.swapcase()
72
73# Strip leading and trailing tabs and spaces
74def strip(s):
75    """strip(s) -> string
76
77    Return a copy of the string s with leading and trailing
78    whitespace removed.
79
80    """
81    return s.strip()
82
83# Strip leading tabs and spaces
84def lstrip(s):
85    """lstrip(s) -> string
86
87    Return a copy of the string s with leading whitespace removed.
88
89    """
90    return s.lstrip()
91
92# Strip trailing tabs and spaces
93def rstrip(s):
94    """rstrip(s) -> string
95
96    Return a copy of the string s with trailing whitespace
97    removed.
98
99    """
100    return s.rstrip()
101
102
103# Split a string into a list of space/tab-separated words
104def split(s, sep=None, maxsplit=0):
105    """split(str [,sep [,maxsplit]]) -> list of strings
106
107    Return a list of the words in the string s, using sep as the
108    delimiter string.  If maxsplit is nonzero, splits into at most
109    maxsplit words If sep is not specified, any whitespace string
110    is a separator.  Maxsplit defaults to 0.
111
112    (split and splitfields are synonymous)
113
114    """
115    return s.split(sep, maxsplit)
116splitfields = split
117
118# Join fields with optional separator
119def join(words, sep = ' '):
120    """join(list [,sep]) -> string
121
122    Return a string composed of the words in list, with
123    intervening occurrences of sep.  The default separator is a
124    single space.
125
126    (joinfields and join are synonymous)
127
128    """
129    return sep.join(words)
130joinfields = join
131
132# for a little bit of speed
133_apply = apply
134
135# Find substring, raise exception if not found
136def index(s, *args):
137    """index(s, sub [,start [,end]]) -> int
138
139    Like find but raises ValueError when the substring is not found.
140
141    """
142    return _apply(s.index, args)
143
144# Find last substring, raise exception if not found
145def rindex(s, *args):
146    """rindex(s, sub [,start [,end]]) -> int
147
148    Like rfind but raises ValueError when the substring is not found.
149
150    """
151    return _apply(s.rindex, args)
152
153# Count non-overlapping occurrences of substring
154def count(s, *args):
155    """count(s, sub[, start[,end]]) -> int
156
157    Return the number of occurrences of substring sub in string
158    s[start:end].  Optional arguments start and end are
159    interpreted as in slice notation.
160
161    """
162    return _apply(s.count, args)
163
164# Find substring, return -1 if not found
165def find(s, *args):
166    """find(s, sub [,start [,end]]) -> in
167
168    Return the lowest index in s where substring sub is found,
169    such that sub is contained within s[start,end].  Optional
170    arguments start and end are interpreted as in slice notation.
171
172    Return -1 on failure.
173
174    """
175    return _apply(s.find, args)
176
177# Find last substring, return -1 if not found
178def rfind(s, *args):
179    """rfind(s, sub [,start [,end]]) -> int
180
181    Return the highest index in s where substring sub is found,
182    such that sub is contained within s[start,end].  Optional
183    arguments start and end are interpreted as in slice notation.
184
185    Return -1 on failure.
186
187    """
188    return _apply(s.rfind, args)
189
190# for a bit of speed
191_float = float
192_int = int
193_long = long
194_StringType = type('')
195
196# Convert string to float
197def atof(s):
198    """atof(s) -> float
199
200    Return the floating point number represented by the string s.
201
202    """
203    if type(s) == _StringType:
204        return _float(s)
205    else:
206        raise TypeError('argument 1: expected string, %s found' %
207                        type(s).__name__)
208
209# Convert string to integer
210def atoi(*args):
211    """atoi(s [,base]) -> int
212
213    Return the integer represented by the string s in the given
214    base, which defaults to 10.  The string s must consist of one
215    or more digits, possibly preceded by a sign.  If base is 0, it
216    is chosen from the leading characters of s, 0 for octal, 0x or
217    0X for hexadecimal.  If base is 16, a preceding 0x or 0X is
218    accepted.
219
220    """
221    try:
222        s = args[0]
223    except IndexError:
224        raise TypeError('function requires at least 1 argument: %d given' %
225                        len(args))
226    # Don't catch type error resulting from too many arguments to int().  The
227    # error message isn't compatible but the error type is, and this function
228    # is complicated enough already.
229    if type(s) == _StringType:
230        return _apply(_int, args)
231    else:
232        raise TypeError('argument 1: expected string, %s found' %
233                        type(s).__name__)
234
235
236# Convert string to long integer
237def atol(*args):
238    """atol(s [,base]) -> long
239
240    Return the long integer represented by the string s in the
241    given base, which defaults to 10.  The string s must consist
242    of one or more digits, possibly preceded by a sign.  If base
243    is 0, it is chosen from the leading characters of s, 0 for
244    octal, 0x or 0X for hexadecimal.  If base is 16, a preceding
245    0x or 0X is accepted.  A trailing L or l is not accepted,
246    unless base is 0.
247
248    """
249    try:
250        s = args[0]
251    except IndexError:
252        raise TypeError('function requires at least 1 argument: %d given' %
253                        len(args))
254    # Don't catch type error resulting from too many arguments to long().  The
255    # error message isn't compatible but the error type is, and this function
256    # is complicated enough already.
257    if type(s) == _StringType:
258        return _apply(_long, args)
259    else:
260        raise TypeError('argument 1: expected string, %s found' %
261                        type(s).__name__)
262
263
264# Left-justify a string
265def ljust(s, width):
266    """ljust(s, width) -> string
267
268    Return a left-justified version of s, in a field of the
269    specified width, padded with spaces as needed.  The string is
270    never truncated.
271
272    """
273    n = width - len(s)
274    if n <= 0: return s
275    return s + ' '*n
276
277# Right-justify a string
278def rjust(s, width):
279    """rjust(s, width) -> string
280
281    Return a right-justified version of s, in a field of the
282    specified width, padded with spaces as needed.  The string is
283    never truncated.
284
285    """
286    n = width - len(s)
287    if n <= 0: return s
288    return ' '*n + s
289
290# Center a string
291def center(s, width):
292    """center(s, width) -> string
293
294    Return a center version of s, in a field of the specified
295    width. padded with spaces as needed.  The string is never
296    truncated.
297
298    """
299    n = width - len(s)
300    if n <= 0: return s
301    half = n/2
302    if n%2 and width%2:
303        # This ensures that center(center(s, i), j) = center(s, j)
304        half = half+1
305    return ' '*half +  s + ' '*(n-half)
306
307# Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03'
308# Decadent feature: the argument may be a string or a number
309# (Use of this is deprecated; it should be a string as with ljust c.s.)
310def zfill(x, width):
311    """zfill(x, width) -> string
312
313    Pad a numeric string x with zeros on the left, to fill a field
314    of the specified width.  The string x is never truncated.
315
316    """
317    if type(x) == type(''): s = x
318    else: s = repr(x)
319    n = len(s)
320    if n >= width: return s
321    sign = ''
322    if s[0] in ('-', '+'):
323        sign, s = s[0], s[1:]
324    return sign + '0'*(width-n) + s
325
326# Expand tabs in a string.
327# Doesn't take non-printing chars into account, but does understand \n.
328def expandtabs(s, tabsize=8):
329    """expandtabs(s [,tabsize]) -> string
330
331    Return a copy of the string s with all tab characters replaced
332    by the appropriate number of spaces, depending on the current
333    column, and the tabsize (default 8).
334
335    """
336    res = line = ''
337    for c in s:
338        if c == '\t':
339            c = ' '*(tabsize - len(line) % tabsize)
340        line = line + c
341        if c == '\n':
342            res = res + line
343            line = ''
344    return res + line
345
346# Character translation through look-up table.
347def translate(s, table, deletions=""):
348    """translate(s,table [,deletechars]) -> string
349
350    Return a copy of the string s, where all characters occurring
351    in the optional argument deletechars are removed, and the
352    remaining characters have been mapped through the given
353    translation table, which must be a string of length 256.
354
355    """
356    return s.translate(table, deletions)
357
358# Capitalize a string, e.g. "aBc  dEf" -> "Abc  def".
359def capitalize(s):
360    """capitalize(s) -> string
361
362    Return a copy of the string s with only its first character
363    capitalized.
364
365    """
366    return s.capitalize()
367
368# Capitalize the words in a string, e.g. " aBc  dEf " -> "Abc Def".
369def capwords(s, sep=None):
370    """capwords(s, [sep]) -> string
371
372    Split the argument into words using split, capitalize each
373    word using capitalize, and join the capitalized words using
374    join. Note that this replaces runs of whitespace characters by
375    a single space.
376
377    """
378    return join(map(capitalize, s.split(sep)), sep or ' ')
379
380# Construct a translation string
381_idmapL = None
382def maketrans(fromstr, tostr):
383    """maketrans(frm, to) -> string
384
385    Return a translation table (a string of 256 bytes long)
386    suitable for use in string.translate.  The strings frm and to
387    must be of the same length.
388
389    """
390    if len(fromstr) != len(tostr):
391        raise ValueError, "maketrans arguments must have same length"
392    global _idmapL
393    if not _idmapL:
394        _idmapL = list(_idmap)
395    L = _idmapL[:]
396    fromstr = map(ord, fromstr)
397    for i in range(len(fromstr)):
398        L[fromstr[i]] = tostr[i]
399    return join(L, "")
400
401# Substring replacement (global)
402def replace(s, old, new, maxsplit=0):
403    """replace (str, old, new[, maxsplit]) -> string
404
405    Return a copy of string str with all occurrences of substring
406    old replaced by new. If the optional argument maxsplit is
407    given, only the first maxsplit occurrences are replaced.
408
409    """
410    return s.replace(old, new, maxsplit)
411
412
413# XXX: transitional
414#
415# If string objects do not have methods, then we need to use the old string.py
416# library, which uses strop for many more things than just the few outlined
417# below.
418try:
419    ''.upper
420except AttributeError:
421    from stringold import *
422
423# Try importing optional built-in module "strop" -- if it exists,
424# it redefines some string operations that are 100-1000 times faster.
425# It also defines values for whitespace, lowercase and uppercase
426# that match <ctype.h>'s definitions.
427
428try:
429    from strop import maketrans, lowercase, uppercase, whitespace
430    letters = lowercase + uppercase
431except ImportError:
432    pass                                          # Use the original versions
433