stringold.py revision 2ab19920fc0ba6a0054aa4556bef94199aa432fc
1# module 'string' -- A collection of string operations
2
3# Warning: most of the code you see here isn't normally used nowadays.
4# At the end of this file most functions are replaced by built-in
5# functions imported from built-in module "strop".
6
7# Some strings for ctype-style character classification
8whitespace = ' \t\n\r\v\f'
9lowercase = 'abcdefghijklmnopqrstuvwxyz'
10uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
11letters = lowercase + uppercase
12digits = '0123456789'
13hexdigits = digits + 'abcdef' + 'ABCDEF'
14octdigits = '01234567'
15
16# Case conversion helpers
17_idmap = ''
18for i in range(256): _idmap = _idmap + chr(i)
19_lower = _idmap[:ord('A')] + lowercase + _idmap[ord('Z')+1:]
20_upper = _idmap[:ord('a')] + uppercase + _idmap[ord('z')+1:]
21_swapcase = _upper[:ord('A')] + lowercase + _upper[ord('Z')+1:]
22del i
23
24# Backward compatible names for exceptions
25index_error = ValueError
26atoi_error = ValueError
27atof_error = ValueError
28atol_error = ValueError
29
30# convert UPPER CASE letters to lower case
31def lower(s):
32	res = ''
33	for c in s:
34		res = res + _lower[ord(c)]
35	return res
36
37# Convert lower case letters to UPPER CASE
38def upper(s):
39	res = ''
40	for c in s:
41		res = res + _upper[ord(c)]
42	return res
43
44# Swap lower case letters and UPPER CASE
45def swapcase(s):
46	res = ''
47	for c in s:
48		res = res + _swapcase[ord(c)]
49	return res
50
51# Strip leading and trailing tabs and spaces
52def strip(s):
53	i, j = 0, len(s)
54	while i < j and s[i] in whitespace: i = i+1
55	while i < j and s[j-1] in whitespace: j = j-1
56	return s[i:j]
57
58# Split a string into a list of space/tab-separated words
59# NB: split(s) is NOT the same as splitfields(s, ' ')!
60def split(s, sep=None):
61	if sep is not None: return splitfields(s, sep)
62	res = []
63	i, n = 0, len(s)
64	while i < n:
65		while i < n and s[i] in whitespace: i = i+1
66		if i == n: break
67		j = i
68		while j < n and s[j] not in whitespace: j = j+1
69		res.append(s[i:j])
70		i = j
71	return res
72
73# Split a list into fields separated by a given string
74# NB: splitfields(s, ' ') is NOT the same as split(s)!
75# splitfields(s, '') returns [s] (in analogy with split() in nawk)
76def splitfields(s, sep=None):
77	if sep is None: return split(s)
78	res = []
79	nsep = len(sep)
80	if nsep == 0:
81		return [s]
82	ns = len(s)
83	i = j = 0
84	while j+nsep <= ns:
85		if s[j:j+nsep] == sep:
86			res.append(s[i:j])
87			i = j = j + nsep
88		else:
89			j = j + 1
90	res.append(s[i:])
91	return res
92
93# Join words with spaces between them
94def join(words, sep = ' '):
95	return joinfields(words, sep)
96
97# Join fields with optional separator
98def joinfields(words, sep = ' '):
99	res = ''
100	for w in words:
101		res = res + (sep + w)
102	return res[len(sep):]
103
104# Find substring, raise exception if not found
105def index(s, sub, i = 0):
106	res = find(s, sub, i)
107	if res < 0:
108		raise ValueError, 'substring not found in string.index'
109	return res
110
111# Find last substring, raise exception if not found
112def rindex(s, sub, i = 0):
113	res = rfind(s, sub, i)
114	if res < 0:
115		raise ValueError, 'substring not found in string.index'
116	return res
117
118# Count non-overlapping occurrences of substring
119def count(s, sub, i = 0):
120	if i < 0: i = max(0, i + len(s))
121	n = len(sub)
122	m = len(s) + 1 - n
123	if n == 0: return m-i
124	r = 0
125	while i < m:
126		if sub == s[i:i+n]:
127			r = r+1
128			i = i+n
129		else:
130			i = i+1
131	return r
132
133# Find substring, return -1 if not found
134def find(s, sub, i = 0):
135	if i < 0: i = max(0, i + len(s))
136	n = len(sub)
137	m = len(s) + 1 - n
138	while i < m:
139		if sub == s[i:i+n]: return i
140		i = i+1
141	return -1
142
143# Find last substring, return -1 if not found
144def rfind(s, sub, i = 0):
145	if i < 0: i = max(0, i + len(s))
146	n = len(sub)
147	m = len(s) + 1 - n
148	r = -1
149	while i < m:
150		if sub == s[i:i+n]: r = i
151		i = i+1
152	return r
153
154# Convert string to float
155def atof(str):
156	import regex
157	sign = ''
158	s = str
159	if s and s[0] in '+-':
160		sign = s[0]
161		s = s[1:]
162	if not s:
163		raise ValueError, 'non-float argument to string.atof'
164	while s[0] == '0' and len(s) > 1 and s[1] in digits: s = s[1:]
165	if regex.match('[0-9]*\(\.[0-9]*\)?\([eE][-+]?[0-9]+\)?', s) != len(s):
166		raise ValueError, 'non-float argument to string.atof'
167	try:
168		return float(eval(sign + s))
169	except SyntaxError:
170		raise ValueError, 'non-float argument to string.atof'
171
172# Convert string to integer
173def atoi(str, base=10):
174	if base != 10:
175		# We only get here if strop doesn't define atoi()
176		raise ValueError, "this string.atoi doesn't support base != 10"
177	sign = ''
178	s = str
179	if s and s[0] in '+-':
180		sign = s[0]
181		s = s[1:]
182	if not s:
183		raise ValueError, 'non-integer argument to string.atoi'
184	while s[0] == '0' and len(s) > 1: s = s[1:]
185	for c in s:
186		if c not in digits:
187			raise ValueError, 'non-integer argument to string.atoi'
188	return eval(sign + s)
189
190# Convert string to long integer
191def atol(str, base=10):
192	if base != 10:
193		# We only get here if strop doesn't define atol()
194		raise ValueError, "this string.atol doesn't support base != 10"
195	sign = ''
196	s = str
197	if s and s[0] in '+-':
198		sign = s[0]
199		s = s[1:]
200	if not s:
201		raise ValueError, 'non-integer argument to string.atol'
202	while s[0] == '0' and len(s) > 1: s = s[1:]
203	for c in s:
204		if c not in digits:
205			raise ValueError, 'non-integer argument to string.atol'
206	return eval(sign + s + 'L')
207
208# Left-justify a string
209def ljust(s, width):
210	n = width - len(s)
211	if n <= 0: return s
212	return s + ' '*n
213
214# Right-justify a string
215def rjust(s, width):
216	n = width - len(s)
217	if n <= 0: return s
218	return ' '*n + s
219
220# Center a string
221def center(s, width):
222	n = width - len(s)
223	if n <= 0: return s
224	half = n/2
225	if n%2 and width%2:
226		# This ensures that center(center(s, i), j) = center(s, j)
227		half = half+1
228	return ' '*half +  s + ' '*(n-half)
229
230# Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03'
231# Decadent feature: the argument may be a string or a number
232# (Use of this is deprecated; it should be a string as with ljust c.s.)
233def zfill(x, width):
234	if type(x) == type(''): s = x
235	else: s = `x`
236	n = len(s)
237	if n >= width: return s
238	sign = ''
239	if s[0] in ('-', '+'):
240		sign, s = s[0], s[1:]
241	return sign + '0'*(width-n) + s
242
243# Expand tabs in a string.
244# Doesn't take non-printing chars into account, but does understand \n.
245def expandtabs(s, tabsize):
246	res = line = ''
247	for c in s:
248		if c == '\t':
249			c = ' '*(tabsize - len(line)%tabsize)
250		line = line + c
251		if c == '\n':
252			res = res + line
253			line = ''
254	return res + line
255
256
257# Try importing optional built-in module "strop" -- if it exists,
258# it redefines some string operations that are 100-1000 times faster.
259# It also defines values for whitespace, lowercase and uppercase
260# that match <ctype.h>'s definitions.
261
262try:
263	from strop import *
264	letters = lowercase + uppercase
265except ImportError:
266	pass # Use the original, slow versions
267