string.py revision 34f173110fbe9f765c43f38b5a5731b5539eb295
1# module 'string' -- A collection of string operations
2
3# Warning: most of the code you see here isn't normally used nowadays.
4# At the end of this file most functions are replaced by built-in
5# functions imported from built-in module "strop".
6
7# Some strings for ctype-style character classification
8whitespace = ' \t\n\r\v\f'
9lowercase = 'abcdefghijklmnopqrstuvwxyz'
10uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
11letters = lowercase + uppercase
12digits = '0123456789'
13hexdigits = digits + 'abcdef' + 'ABCDEF'
14octdigits = '01234567'
15
16# Case conversion helpers
17_idmap = ''
18for i in range(256): _idmap = _idmap + chr(i)
19_lower = _idmap[:ord('A')] + lowercase + _idmap[ord('Z')+1:]
20_upper = _idmap[:ord('a')] + uppercase + _idmap[ord('z')+1:]
21_swapcase = _upper[:ord('A')] + lowercase + _upper[ord('Z')+1:]
22del i
23
24# Backward compatible names for exceptions
25index_error = ValueError
26atoi_error = ValueError
27atof_error = ValueError
28atol_error = ValueError
29
30# convert UPPER CASE letters to lower case
31def lower(s):
32	res = ''
33	for c in s:
34		res = res + _lower[ord(c)]
35	return res
36
37# Convert lower case letters to UPPER CASE
38def upper(s):
39	res = ''
40	for c in s:
41		res = res + _upper[ord(c)]
42	return res
43
44# Swap lower case letters and UPPER CASE
45def swapcase(s):
46	res = ''
47	for c in s:
48		res = res + _swapcase[ord(c)]
49	return res
50
51# Strip leading and trailing tabs and spaces
52def strip(s):
53	i, j = 0, len(s)
54	while i < j and s[i] in whitespace: i = i+1
55	while i < j and s[j-1] in whitespace: j = j-1
56	return s[i:j]
57
58# Strip leading tabs and spaces
59def lstrip(s):
60	i, j = 0, len(s)
61	while i < j and s[i] in whitespace: i = i+1
62	return s[i:j]
63
64# Strip trailing tabs and spaces
65def rstrip(s):
66	i, j = 0, len(s)
67	while i < j and s[j-1] in whitespace: j = j-1
68	return s[i:j]
69
70
71# Split a string into a list of space/tab-separated words
72# NB: split(s) is NOT the same as splitfields(s, ' ')!
73def split(s, sep=None, maxsplit=0):
74	if sep is not None: return splitfields(s, sep, maxsplit)
75	res = []
76	i, n = 0, len(s)
77	while i < n:
78		while i < n and s[i] in whitespace: i = i+1
79		if i == n: break
80		j = i
81		while j < n and s[j] not in whitespace: j = j+1
82		res.append(s[i:j])
83		i = j
84	return res
85
86# Split a list into fields separated by a given string
87# NB: splitfields(s, ' ') is NOT the same as split(s)!
88# splitfields(s, '') returns [s] (in analogy with split() in nawk)
89def splitfields(s, sep=None, maxsplit=0):
90	if sep is None: return split(s, None, maxsplit)
91	res = []
92	nsep = len(sep)
93	if nsep == 0:
94		return [s]
95	ns = len(s)
96	i = j = 0
97	count = 0
98	while j+nsep <= ns:
99		if s[j:j+nsep] == sep:
100			count = count + 1
101			res.append(s[i:j])
102			i = j = j + nsep
103			if (maxsplit and (count >= maxsplit)):
104			    break
105
106		else:
107			j = j + 1
108	res.append(s[i:])
109	return res
110
111# Join words with spaces between them
112def join(words, sep = ' '):
113	return joinfields(words, sep)
114
115# Join fields with optional separator
116def joinfields(words, sep = ' '):
117	res = ''
118	for w in words:
119		res = res + (sep + w)
120	return res[len(sep):]
121
122# Find substring, raise exception if not found
123def index(s, sub, i = 0):
124	res = find(s, sub, i)
125	if res < 0:
126		raise ValueError, 'substring not found in string.index'
127	return res
128
129# Find last substring, raise exception if not found
130def rindex(s, sub, i = 0):
131	res = rfind(s, sub, i)
132	if res < 0:
133		raise ValueError, 'substring not found in string.index'
134	return res
135
136# Count non-overlapping occurrences of substring
137def count(s, sub, i = 0):
138	if i < 0: i = max(0, i + len(s))
139	n = len(sub)
140	m = len(s) + 1 - n
141	if n == 0: return m-i
142	r = 0
143	while i < m:
144		if sub == s[i:i+n]:
145			r = r+1
146			i = i+n
147		else:
148			i = i+1
149	return r
150
151# Find substring, return -1 if not found
152def find(s, sub, i = 0):
153	if i < 0: i = max(0, i + len(s))
154	n = len(sub)
155	m = len(s) + 1 - n
156	while i < m:
157		if sub == s[i:i+n]: return i
158		i = i+1
159	return -1
160
161# Find last substring, return -1 if not found
162def rfind(s, sub, i = 0):
163	if i < 0: i = max(0, i + len(s))
164	n = len(sub)
165	m = len(s) + 1 - n
166	r = -1
167	while i < m:
168		if sub == s[i:i+n]: r = i
169		i = i+1
170	return r
171
172# Convert string to float
173def atof(str):
174	import regex
175	sign = ''
176	s = str
177	if s and s[0] in '+-':
178		sign = s[0]
179		s = s[1:]
180	if not s:
181		raise ValueError, 'non-float argument to string.atof'
182	while s[0] == '0' and len(s) > 1 and s[1] in digits: s = s[1:]
183	if regex.match('[0-9]*\(\.[0-9]*\)?\([eE][-+]?[0-9]+\)?', s) != len(s):
184		raise ValueError, 'non-float argument to string.atof'
185	try:
186		return float(eval(sign + s))
187	except SyntaxError:
188		raise ValueError, 'non-float argument to string.atof'
189
190# Convert string to integer
191def atoi(str, base=10):
192	if base != 10:
193		# We only get here if strop doesn't define atoi()
194		raise ValueError, "this string.atoi doesn't support base != 10"
195	sign = ''
196	s = str
197	if s and s[0] in '+-':
198		sign = s[0]
199		s = s[1:]
200	if not s:
201		raise ValueError, 'non-integer argument to string.atoi'
202	while s[0] == '0' and len(s) > 1: s = s[1:]
203	for c in s:
204		if c not in digits:
205			raise ValueError, 'non-integer argument to string.atoi'
206	return eval(sign + s)
207
208# Convert string to long integer
209def atol(str, base=10):
210	if base != 10:
211		# We only get here if strop doesn't define atol()
212		raise ValueError, "this string.atol doesn't support base != 10"
213	sign = ''
214	s = str
215	if s and s[0] in '+-':
216		sign = s[0]
217		s = s[1:]
218	if not s:
219		raise ValueError, 'non-integer argument to string.atol'
220	while s[0] == '0' and len(s) > 1: s = s[1:]
221	for c in s:
222		if c not in digits:
223			raise ValueError, 'non-integer argument to string.atol'
224	return eval(sign + s + 'L')
225
226# Left-justify a string
227def ljust(s, width):
228	n = width - len(s)
229	if n <= 0: return s
230	return s + ' '*n
231
232# Right-justify a string
233def rjust(s, width):
234	n = width - len(s)
235	if n <= 0: return s
236	return ' '*n + s
237
238# Center a string
239def center(s, width):
240	n = width - len(s)
241	if n <= 0: return s
242	half = n/2
243	if n%2 and width%2:
244		# This ensures that center(center(s, i), j) = center(s, j)
245		half = half+1
246	return ' '*half +  s + ' '*(n-half)
247
248# Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03'
249# Decadent feature: the argument may be a string or a number
250# (Use of this is deprecated; it should be a string as with ljust c.s.)
251def zfill(x, width):
252	if type(x) == type(''): s = x
253	else: s = `x`
254	n = len(s)
255	if n >= width: return s
256	sign = ''
257	if s[0] in ('-', '+'):
258		sign, s = s[0], s[1:]
259	return sign + '0'*(width-n) + s
260
261# Expand tabs in a string.
262# Doesn't take non-printing chars into account, but does understand \n.
263def expandtabs(s, tabsize=8):
264	res = line = ''
265	for c in s:
266		if c == '\t':
267			c = ' '*(tabsize - len(line)%tabsize)
268		line = line + c
269		if c == '\n':
270			res = res + line
271			line = ''
272	return res + line
273
274# Character translation through look-up table.
275def translate(s, table, deletions=""):
276	if type(table) != type('') or len(table) != 256:
277	    raise TypeError, "translation table must be 256 characters long"
278	res = ""
279	for c in s:
280		if c not in deletions:
281			res = res + table[ord(c)]
282	return res
283
284# Capitalize a string, e.g. "aBc  dEf" -> "Abc  def".
285def capitalize(s):
286	return upper(s[:1]) + lower(s[1:])
287
288# Capitalize the words in a string, e.g. " aBc  dEf " -> "Abc Def".
289# See also regsub.capwords().
290def capwords(s, sep=None):
291	return join(map(capitalize, split(s, sep)))
292
293# Construct a translation string
294_idmapL = None
295def maketrans(fromstr, tostr):
296	if len(fromstr) != len(tostr):
297		raise ValueError, "maketrans arguments must have same length"
298	global _idmapL
299	if not _idmapL:
300		_idmapL = map(None, _idmap)
301	L = _idmapL[:]
302	fromstr = map(ord, fromstr)
303	for i in range(len(fromstr)):
304		L[fromstr[i]] = tostr[i]
305	return joinfields(L, "")
306
307# Try importing optional built-in module "strop" -- if it exists,
308# it redefines some string operations that are 100-1000 times faster.
309# It also defines values for whitespace, lowercase and uppercase
310# that match <ctype.h>'s definitions.
311
312try:
313	from strop import *
314	letters = lowercase + uppercase
315except ImportError:
316	pass # Use the original, slow versions
317