1c636014c430620325f8d213e9ba10d925991b8d7Guido van Rossum# module 'string' -- A collection of string operations
2c636014c430620325f8d213e9ba10d925991b8d7Guido van Rossum
3226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw# Warning: most of the code you see here isn't normally used nowadays.  With
4226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw# Python 1.6, many of these functions are implemented as methods on the
5226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw# standard string object. They used to be implemented by a built-in module
6226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw# called strop, but strop is now obsolete itself.
7c636014c430620325f8d213e9ba10d925991b8d7Guido van Rossum
82003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossum"""Common string manipulations.
92003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossum
102003204ba71ab3cac54015ef9fc6171f4e825953Guido van RossumPublic module variables:
112003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossum
122003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossumwhitespace -- a string containing all characters considered whitespace
132003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossumlowercase -- a string containing all characters considered lowercase letters
142003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossumuppercase -- a string containing all characters considered uppercase letters
152003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossumletters -- a string containing all characters considered letters
162003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossumdigits -- a string containing all characters considered decimal digits
172003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossumhexdigits -- a string containing all characters considered hexadecimal digits
182003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossumoctdigits -- a string containing all characters considered octal digits
192003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossum
202003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossum"""
216071cc8fb027d4b7163742d578a1b8666afe1d1cBrett Cannonfrom warnings import warnpy3k
226071cc8fb027d4b7163742d578a1b8666afe1d1cBrett Cannonwarnpy3k("the stringold module has been removed in Python 3.0", stacklevel=2)
236071cc8fb027d4b7163742d578a1b8666afe1d1cBrett Cannondel warnpy3k
242003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossum
25c636014c430620325f8d213e9ba10d925991b8d7Guido van Rossum# Some strings for ctype-style character classification
268e2ec56cbc74578a1e700dc237368e26bde07427Guido van Rossumwhitespace = ' \t\n\r\v\f'
27c636014c430620325f8d213e9ba10d925991b8d7Guido van Rossumlowercase = 'abcdefghijklmnopqrstuvwxyz'
28c636014c430620325f8d213e9ba10d925991b8d7Guido van Rossumuppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
29c636014c430620325f8d213e9ba10d925991b8d7Guido van Rossumletters = lowercase + uppercase
30c636014c430620325f8d213e9ba10d925991b8d7Guido van Rossumdigits = '0123456789'
31c636014c430620325f8d213e9ba10d925991b8d7Guido van Rossumhexdigits = digits + 'abcdef' + 'ABCDEF'
32c636014c430620325f8d213e9ba10d925991b8d7Guido van Rossumoctdigits = '01234567'
33c636014c430620325f8d213e9ba10d925991b8d7Guido van Rossum
34c636014c430620325f8d213e9ba10d925991b8d7Guido van Rossum# Case conversion helpers
35a61ff7b1602d0f94340244bf4504d9eb0faf4ac7Guido van Rossum_idmap = ''
36a61ff7b1602d0f94340244bf4504d9eb0faf4ac7Guido van Rossumfor i in range(256): _idmap = _idmap + chr(i)
37c636014c430620325f8d213e9ba10d925991b8d7Guido van Rossumdel i
38c636014c430620325f8d213e9ba10d925991b8d7Guido van Rossum
39710c352d058856830e7c003bce7cccd4c10024eaGuido van Rossum# Backward compatible names for exceptions
40710c352d058856830e7c003bce7cccd4c10024eaGuido van Rossumindex_error = ValueError
41710c352d058856830e7c003bce7cccd4c10024eaGuido van Rossumatoi_error = ValueError
42710c352d058856830e7c003bce7cccd4c10024eaGuido van Rossumatof_error = ValueError
43710c352d058856830e7c003bce7cccd4c10024eaGuido van Rossumatol_error = ValueError
44710c352d058856830e7c003bce7cccd4c10024eaGuido van Rossum
45c636014c430620325f8d213e9ba10d925991b8d7Guido van Rossum# convert UPPER CASE letters to lower case
46c636014c430620325f8d213e9ba10d925991b8d7Guido van Rossumdef lower(s):
47226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """lower(s) -> string
482003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossum
49226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    Return a copy of the string s converted to lowercase.
502003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossum
51226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """
52226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    return s.lower()
53c636014c430620325f8d213e9ba10d925991b8d7Guido van Rossum
54c636014c430620325f8d213e9ba10d925991b8d7Guido van Rossum# Convert lower case letters to UPPER CASE
55c636014c430620325f8d213e9ba10d925991b8d7Guido van Rossumdef upper(s):
56226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """upper(s) -> string
572003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossum
58226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    Return a copy of the string s converted to uppercase.
592003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossum
60226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """
61226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    return s.upper()
62c636014c430620325f8d213e9ba10d925991b8d7Guido van Rossum
63c636014c430620325f8d213e9ba10d925991b8d7Guido van Rossum# Swap lower case letters and UPPER CASE
64c636014c430620325f8d213e9ba10d925991b8d7Guido van Rossumdef swapcase(s):
65226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """swapcase(s) -> string
662003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossum
67226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    Return a copy of the string s with upper case characters
68226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    converted to lowercase and vice versa.
692003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossum
70226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """
71226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    return s.swapcase()
72c636014c430620325f8d213e9ba10d925991b8d7Guido van Rossum
73c636014c430620325f8d213e9ba10d925991b8d7Guido van Rossum# Strip leading and trailing tabs and spaces
74c636014c430620325f8d213e9ba10d925991b8d7Guido van Rossumdef strip(s):
75226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """strip(s) -> string
762003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossum
77226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    Return a copy of the string s with leading and trailing
78226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    whitespace removed.
792003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossum
80226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """
81226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    return s.strip()
82c636014c430620325f8d213e9ba10d925991b8d7Guido van Rossum
83306a8a633800f2f37d054fbf48a3e0628faf3073Guido van Rossum# Strip leading tabs and spaces
84306a8a633800f2f37d054fbf48a3e0628faf3073Guido van Rossumdef lstrip(s):
85226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """lstrip(s) -> string
862003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossum
87226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    Return a copy of the string s with leading whitespace removed.
882003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossum
89226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """
90226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    return s.lstrip()
91306a8a633800f2f37d054fbf48a3e0628faf3073Guido van Rossum
92306a8a633800f2f37d054fbf48a3e0628faf3073Guido van Rossum# Strip trailing tabs and spaces
93306a8a633800f2f37d054fbf48a3e0628faf3073Guido van Rossumdef rstrip(s):
94226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """rstrip(s) -> string
952003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossum
96226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    Return a copy of the string s with trailing whitespace
97226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    removed.
982003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossum
99226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """
100226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    return s.rstrip()
101306a8a633800f2f37d054fbf48a3e0628faf3073Guido van Rossum
102306a8a633800f2f37d054fbf48a3e0628faf3073Guido van Rossum
103c636014c430620325f8d213e9ba10d925991b8d7Guido van Rossum# Split a string into a list of space/tab-separated words
104306a8a633800f2f37d054fbf48a3e0628faf3073Guido van Rossumdef split(s, sep=None, maxsplit=0):
105226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """split(str [,sep [,maxsplit]]) -> list of strings
1062003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossum
107226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    Return a list of the words in the string s, using sep as the
108226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    delimiter string.  If maxsplit is nonzero, splits into at most
109226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    maxsplit words If sep is not specified, any whitespace string
110226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    is a separator.  Maxsplit defaults to 0.
1112003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossum
112226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    (split and splitfields are synonymous)
1132003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossum
114226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """
115226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    return s.split(sep, maxsplit)
116226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsawsplitfields = split
117fac38b7c40caac93f119e3af90ced0edd3d963dcGuido van Rossum
1182ab19920fc0ba6a0054aa4556bef94199aa432fcGuido van Rossum# Join fields with optional separator
119226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsawdef join(words, sep = ' '):
120226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """join(list [,sep]) -> string
1212003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossum
122226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    Return a string composed of the words in list, with
1237e47402264cf87b9bbb61fc9ff610af08add7c7bThomas Wouters    intervening occurrences of sep.  The default separator is a
124226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    single space.
1252003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossum
126226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    (joinfields and join are synonymous)
1272003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossum
128226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """
129226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    return sep.join(words)
130226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsawjoinfields = join
131fac38b7c40caac93f119e3af90ced0edd3d963dcGuido van Rossum
132226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw# for a little bit of speed
133226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw_apply = apply
1342003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossum
135226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw# Find substring, raise exception if not found
136226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsawdef index(s, *args):
137226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """index(s, sub [,start [,end]]) -> int
1382003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossum
139226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    Like find but raises ValueError when the substring is not found.
1402003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossum
141226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """
142226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    return _apply(s.index, args)
143d316607732aa70361d5793f6b301b70fab7ca367Guido van Rossum
144e65cce5eec23812d77a54095209c923937cc3c92Guido van Rossum# Find last substring, raise exception if not found
145226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsawdef rindex(s, *args):
146226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """rindex(s, sub [,start [,end]]) -> int
1472003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossum
148226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    Like rfind but raises ValueError when the substring is not found.
1492003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossum
150226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """
151226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    return _apply(s.rindex, args)
152b6775db241f5fe5e3dc2ca09fc6c9e6164d4b2afGuido van Rossum
153b6775db241f5fe5e3dc2ca09fc6c9e6164d4b2afGuido van Rossum# Count non-overlapping occurrences of substring
154226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsawdef count(s, *args):
155226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """count(s, sub[, start[,end]]) -> int
156226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw
157226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    Return the number of occurrences of substring sub in string
158226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    s[start:end].  Optional arguments start and end are
159226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    interpreted as in slice notation.
160226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw
161226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """
162226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    return _apply(s.count, args)
163e65cce5eec23812d77a54095209c923937cc3c92Guido van Rossum
164d316607732aa70361d5793f6b301b70fab7ca367Guido van Rossum# Find substring, return -1 if not found
165226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsawdef find(s, *args):
166226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """find(s, sub [,start [,end]]) -> in
167226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw
168226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    Return the lowest index in s where substring sub is found,
169226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    such that sub is contained within s[start,end].  Optional
170226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    arguments start and end are interpreted as in slice notation.
171226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw
172226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    Return -1 on failure.
173226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw
174226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """
175226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    return _apply(s.find, args)
176c636014c430620325f8d213e9ba10d925991b8d7Guido van Rossum
177e65cce5eec23812d77a54095209c923937cc3c92Guido van Rossum# Find last substring, return -1 if not found
178226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsawdef rfind(s, *args):
179226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """rfind(s, sub [,start [,end]]) -> int
180226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw
181226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    Return the highest index in s where substring sub is found,
182226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    such that sub is contained within s[start,end].  Optional
183226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    arguments start and end are interpreted as in slice notation.
184226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw
185226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    Return -1 on failure.
186226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw
187226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """
188226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    return _apply(s.rfind, args)
189226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw
190226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw# for a bit of speed
191226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw_float = float
192226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw_int = int
193226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw_long = long
194226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw_StringType = type('')
195d0753e20b256057a6320e95e43974f053f4123f1Guido van Rossum
196e61fa0a1e4c6598f286f54772c7e065c49dc17baGuido van Rossum# Convert string to float
197226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsawdef atof(s):
198226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """atof(s) -> float
199226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw
200226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    Return the floating point number represented by the string s.
201226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw
202226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """
203226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    if type(s) == _StringType:
20413a2c279c504ae58c20baba5f0b3d1d6c0a85ed3Fred Drake        return _float(s)
205226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    else:
20613a2c279c504ae58c20baba5f0b3d1d6c0a85ed3Fred Drake        raise TypeError('argument 1: expected string, %s found' %
20713a2c279c504ae58c20baba5f0b3d1d6c0a85ed3Fred Drake                        type(s).__name__)
208e61fa0a1e4c6598f286f54772c7e065c49dc17baGuido van Rossum
209c636014c430620325f8d213e9ba10d925991b8d7Guido van Rossum# Convert string to integer
210226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsawdef atoi(*args):
211226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """atoi(s [,base]) -> int
212226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw
213226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    Return the integer represented by the string s in the given
214226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    base, which defaults to 10.  The string s must consist of one
215226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    or more digits, possibly preceded by a sign.  If base is 0, it
216226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    is chosen from the leading characters of s, 0 for octal, 0x or
217226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    0X for hexadecimal.  If base is 16, a preceding 0x or 0X is
218226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    accepted.
219226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw
220226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """
221226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    try:
22213a2c279c504ae58c20baba5f0b3d1d6c0a85ed3Fred Drake        s = args[0]
223226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    except IndexError:
22413a2c279c504ae58c20baba5f0b3d1d6c0a85ed3Fred Drake        raise TypeError('function requires at least 1 argument: %d given' %
22513a2c279c504ae58c20baba5f0b3d1d6c0a85ed3Fred Drake                        len(args))
226226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    # Don't catch type error resulting from too many arguments to int().  The
227226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    # error message isn't compatible but the error type is, and this function
228226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    # is complicated enough already.
229226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    if type(s) == _StringType:
23013a2c279c504ae58c20baba5f0b3d1d6c0a85ed3Fred Drake        return _apply(_int, args)
231226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    else:
23213a2c279c504ae58c20baba5f0b3d1d6c0a85ed3Fred Drake        raise TypeError('argument 1: expected string, %s found' %
23313a2c279c504ae58c20baba5f0b3d1d6c0a85ed3Fred Drake                        type(s).__name__)
234226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw
235c636014c430620325f8d213e9ba10d925991b8d7Guido van Rossum
236e61fa0a1e4c6598f286f54772c7e065c49dc17baGuido van Rossum# Convert string to long integer
237226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsawdef atol(*args):
238226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """atol(s [,base]) -> long
239226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw
240226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    Return the long integer represented by the string s in the
241226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    given base, which defaults to 10.  The string s must consist
242226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    of one or more digits, possibly preceded by a sign.  If base
243226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    is 0, it is chosen from the leading characters of s, 0 for
244226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    octal, 0x or 0X for hexadecimal.  If base is 16, a preceding
245226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    0x or 0X is accepted.  A trailing L or l is not accepted,
246226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    unless base is 0.
247226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw
248226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """
249226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    try:
25013a2c279c504ae58c20baba5f0b3d1d6c0a85ed3Fred Drake        s = args[0]
251226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    except IndexError:
25213a2c279c504ae58c20baba5f0b3d1d6c0a85ed3Fred Drake        raise TypeError('function requires at least 1 argument: %d given' %
25313a2c279c504ae58c20baba5f0b3d1d6c0a85ed3Fred Drake                        len(args))
254226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    # Don't catch type error resulting from too many arguments to long().  The
255226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    # error message isn't compatible but the error type is, and this function
256226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    # is complicated enough already.
257226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    if type(s) == _StringType:
25813a2c279c504ae58c20baba5f0b3d1d6c0a85ed3Fred Drake        return _apply(_long, args)
259226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    else:
26013a2c279c504ae58c20baba5f0b3d1d6c0a85ed3Fred Drake        raise TypeError('argument 1: expected string, %s found' %
26113a2c279c504ae58c20baba5f0b3d1d6c0a85ed3Fred Drake                        type(s).__name__)
262226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw
263e61fa0a1e4c6598f286f54772c7e065c49dc17baGuido van Rossum
264c636014c430620325f8d213e9ba10d925991b8d7Guido van Rossum# Left-justify a string
265c636014c430620325f8d213e9ba10d925991b8d7Guido van Rossumdef ljust(s, width):
266226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """ljust(s, width) -> string
2672003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossum
268226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    Return a left-justified version of s, in a field of the
269226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    specified width, padded with spaces as needed.  The string is
270226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    never truncated.
2712003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossum
272226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """
273226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    n = width - len(s)
274226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    if n <= 0: return s
275226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    return s + ' '*n
276c636014c430620325f8d213e9ba10d925991b8d7Guido van Rossum
277c636014c430620325f8d213e9ba10d925991b8d7Guido van Rossum# Right-justify a string
278c636014c430620325f8d213e9ba10d925991b8d7Guido van Rossumdef rjust(s, width):
279226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """rjust(s, width) -> string
2802003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossum
281226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    Return a right-justified version of s, in a field of the
282226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    specified width, padded with spaces as needed.  The string is
283226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    never truncated.
2842003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossum
285226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """
286226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    n = width - len(s)
287226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    if n <= 0: return s
288226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    return ' '*n + s
289c636014c430620325f8d213e9ba10d925991b8d7Guido van Rossum
290c636014c430620325f8d213e9ba10d925991b8d7Guido van Rossum# Center a string
291c636014c430620325f8d213e9ba10d925991b8d7Guido van Rossumdef center(s, width):
292226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """center(s, width) -> string
2932003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossum
294226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    Return a center version of s, in a field of the specified
295226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    width. padded with spaces as needed.  The string is never
296226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    truncated.
2972003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossum
298226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """
299226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    n = width - len(s)
300226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    if n <= 0: return s
301226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    half = n/2
302226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    if n%2 and width%2:
30313a2c279c504ae58c20baba5f0b3d1d6c0a85ed3Fred Drake        # This ensures that center(center(s, i), j) = center(s, j)
30413a2c279c504ae58c20baba5f0b3d1d6c0a85ed3Fred Drake        half = half+1
305226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    return ' '*half +  s + ' '*(n-half)
306c636014c430620325f8d213e9ba10d925991b8d7Guido van Rossum
307c636014c430620325f8d213e9ba10d925991b8d7Guido van Rossum# Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03'
308c636014c430620325f8d213e9ba10d925991b8d7Guido van Rossum# Decadent feature: the argument may be a string or a number
309c636014c430620325f8d213e9ba10d925991b8d7Guido van Rossum# (Use of this is deprecated; it should be a string as with ljust c.s.)
310c636014c430620325f8d213e9ba10d925991b8d7Guido van Rossumdef zfill(x, width):
311226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """zfill(x, width) -> string
3122003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossum
313226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    Pad a numeric string x with zeros on the left, to fill a field
314226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    of the specified width.  The string x is never truncated.
3152003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossum
316226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """
317226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    if type(x) == type(''): s = x
31870a6b49821a3226f55e9716f32d802d06640cb89Walter Dörwald    else: s = repr(x)
319226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    n = len(s)
320226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    if n >= width: return s
321226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    sign = ''
322226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    if s[0] in ('-', '+'):
32313a2c279c504ae58c20baba5f0b3d1d6c0a85ed3Fred Drake        sign, s = s[0], s[1:]
324226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    return sign + '0'*(width-n) + s
3256ff2e90c518113433b3ecf34b8c17d434079a414Guido van Rossum
3266ff2e90c518113433b3ecf34b8c17d434079a414Guido van Rossum# Expand tabs in a string.
3276ff2e90c518113433b3ecf34b8c17d434079a414Guido van Rossum# Doesn't take non-printing chars into account, but does understand \n.
328894a7bb9955233293b263897e52fa401240c030dGuido van Rossumdef expandtabs(s, tabsize=8):
329226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """expandtabs(s [,tabsize]) -> string
330226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw
331226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    Return a copy of the string s with all tab characters replaced
332226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    by the appropriate number of spaces, depending on the current
333226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    column, and the tabsize (default 8).
334226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw
335226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """
336226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    res = line = ''
337226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    for c in s:
33813a2c279c504ae58c20baba5f0b3d1d6c0a85ed3Fred Drake        if c == '\t':
33913a2c279c504ae58c20baba5f0b3d1d6c0a85ed3Fred Drake            c = ' '*(tabsize - len(line) % tabsize)
34013a2c279c504ae58c20baba5f0b3d1d6c0a85ed3Fred Drake        line = line + c
34113a2c279c504ae58c20baba5f0b3d1d6c0a85ed3Fred Drake        if c == '\n':
34213a2c279c504ae58c20baba5f0b3d1d6c0a85ed3Fred Drake            res = res + line
34313a2c279c504ae58c20baba5f0b3d1d6c0a85ed3Fred Drake            line = ''
344226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    return res + line
3452db91358def94cf8081f27b736988320d14eba39Guido van Rossum
3462539528810e844c9ee293bb46b4deb58a9e38447Guido van Rossum# Character translation through look-up table.
347ed7253ca507ab2755400b3aab612e18efc7249e2Guido van Rossumdef translate(s, table, deletions=""):
348226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """translate(s,table [,deletechars]) -> string
349226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw
350226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    Return a copy of the string s, where all characters occurring
351226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    in the optional argument deletechars are removed, and the
352226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    remaining characters have been mapped through the given
353226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    translation table, which must be a string of length 256.
354226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw
355226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """
356226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    return s.translate(table, deletions)
3572db91358def94cf8081f27b736988320d14eba39Guido van Rossum
3588775d8b9dc0e3a199b1da57ed481a417f047e1cfGuido van Rossum# Capitalize a string, e.g. "aBc  dEf" -> "Abc  def".
3598775d8b9dc0e3a199b1da57ed481a417f047e1cfGuido van Rossumdef capitalize(s):
360226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """capitalize(s) -> string
3612003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossum
362226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    Return a copy of the string s with only its first character
363226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    capitalized.
3642003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossum
365226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """
366226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    return s.capitalize()
3678775d8b9dc0e3a199b1da57ed481a417f047e1cfGuido van Rossum
3688775d8b9dc0e3a199b1da57ed481a417f047e1cfGuido van Rossum# Capitalize the words in a string, e.g. " aBc  dEf " -> "Abc Def".
36934f173110fbe9f765c43f38b5a5731b5539eb295Guido van Rossumdef capwords(s, sep=None):
370226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """capwords(s, [sep]) -> string
3712003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossum
372226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    Split the argument into words using split, capitalize each
373226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    word using capitalize, and join the capitalized words using
374226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    join. Note that this replaces runs of whitespace characters by
375226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    a single space.
3762003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossum
377226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """
378226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    return join(map(capitalize, s.split(sep)), sep or ' ')
3798775d8b9dc0e3a199b1da57ed481a417f047e1cfGuido van Rossum
380ed7253ca507ab2755400b3aab612e18efc7249e2Guido van Rossum# Construct a translation string
381ed7253ca507ab2755400b3aab612e18efc7249e2Guido van Rossum_idmapL = None
382ed7253ca507ab2755400b3aab612e18efc7249e2Guido van Rossumdef maketrans(fromstr, tostr):
383226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """maketrans(frm, to) -> string
384226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw
385226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    Return a translation table (a string of 256 bytes long)
386226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    suitable for use in string.translate.  The strings frm and to
387226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    must be of the same length.
388226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw
389226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """
390226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    if len(fromstr) != len(tostr):
39113a2c279c504ae58c20baba5f0b3d1d6c0a85ed3Fred Drake        raise ValueError, "maketrans arguments must have same length"
392226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    global _idmapL
393226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    if not _idmapL:
39474bbc79d10862727c9126f4f8d8868c021a389e4Georg Brandl        _idmapL = list(_idmap)
395226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    L = _idmapL[:]
396226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    fromstr = map(ord, fromstr)
397226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    for i in range(len(fromstr)):
39813a2c279c504ae58c20baba5f0b3d1d6c0a85ed3Fred Drake        L[fromstr[i]] = tostr[i]
399e37340edf21f207659d1b2dcccf354c1bd46d4b0Eric S. Raymond    return join(L, "")
4008775d8b9dc0e3a199b1da57ed481a417f047e1cfGuido van Rossum
4011eb9a81eb92dd5d133c8584bad8aecd5b8f0ccd1Guido van Rossum# Substring replacement (global)
402226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsawdef replace(s, old, new, maxsplit=0):
403226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """replace (str, old, new[, maxsplit]) -> string
4042003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossum
405226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    Return a copy of string str with all occurrences of substring
406226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    old replaced by new. If the optional argument maxsplit is
407226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    given, only the first maxsplit occurrences are replaced.
4082003204ba71ab3cac54015ef9fc6171f4e825953Guido van Rossum
409226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    """
410226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    return s.replace(old, new, maxsplit)
4111eb9a81eb92dd5d133c8584bad8aecd5b8f0ccd1Guido van Rossum
4121eb9a81eb92dd5d133c8584bad8aecd5b8f0ccd1Guido van Rossum
413226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw# XXX: transitional
414226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw#
415226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw# If string objects do not have methods, then we need to use the old string.py
416226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw# library, which uses strop for many more things than just the few outlined
417226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw# below.
418226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsawtry:
419226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    ''.upper
420226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsawexcept AttributeError:
421226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    from stringold import *
422226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw
4232db91358def94cf8081f27b736988320d14eba39Guido van Rossum# Try importing optional built-in module "strop" -- if it exists,
4242db91358def94cf8081f27b736988320d14eba39Guido van Rossum# it redefines some string operations that are 100-1000 times faster.
4258e2ec56cbc74578a1e700dc237368e26bde07427Guido van Rossum# It also defines values for whitespace, lowercase and uppercase
4268e2ec56cbc74578a1e700dc237368e26bde07427Guido van Rossum# that match <ctype.h>'s definitions.
4272db91358def94cf8081f27b736988320d14eba39Guido van Rossum
4282db91358def94cf8081f27b736988320d14eba39Guido van Rossumtry:
429226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    from strop import maketrans, lowercase, uppercase, whitespace
430226ae6ca122f814dabdc40178c7b9656caf729c2Barry Warsaw    letters = lowercase + uppercase
431b6775db241f5fe5e3dc2ca09fc6c9e6164d4b2afGuido van Rossumexcept ImportError:
43213a2c279c504ae58c20baba5f0b3d1d6c0a85ed3Fred Drake    pass                                          # Use the original versions
433