stringold.py revision a6bb6be95f4a04fdf7a09fcc92432273877af049
1# module 'string' -- A collection of string operations
2
3# Warning: most of the code you see here isn't normally used nowadays.
4# At the end of this file most functions are replaced by built-in
5# functions imported from built-in module "strop".
6
7"""Common string manipulations.
8
9Public module variables:
10
11whitespace -- a string containing all characters considered whitespace
12lowercase -- a string containing all characters considered lowercase letters
13uppercase -- a string containing all characters considered uppercase letters
14letters -- a string containing all characters considered letters
15digits -- a string containing all characters considered decimal digits
16hexdigits -- a string containing all characters considered hexadecimal digits
17octdigits -- a string containing all characters considered octal digits
18
19"""
20
21# Some strings for ctype-style character classification
22whitespace = ' \t\n\r\v\f'
23lowercase = 'abcdefghijklmnopqrstuvwxyz'
24uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ'
25letters = lowercase + uppercase
26digits = '0123456789'
27hexdigits = digits + 'abcdef' + 'ABCDEF'
28octdigits = '01234567'
29
30# Case conversion helpers
31_idmap = ''
32for i in range(256): _idmap = _idmap + chr(i)
33_lower = _idmap[:ord('A')] + lowercase + _idmap[ord('Z')+1:]
34_upper = _idmap[:ord('a')] + uppercase + _idmap[ord('z')+1:]
35_swapcase = _upper[:ord('A')] + lowercase + _upper[ord('Z')+1:]
36del i
37
38# Backward compatible names for exceptions
39index_error = ValueError
40atoi_error = ValueError
41atof_error = ValueError
42atol_error = ValueError
43
44# convert UPPER CASE letters to lower case
45def lower(s):
46	"""lower(s) -> string
47
48	Return a copy of the string s converted to lowercase.
49
50	"""
51	res = ''
52	for c in s:
53		res = res + _lower[ord(c)]
54	return res
55
56# Convert lower case letters to UPPER CASE
57def upper(s):
58	"""upper(s) -> string
59
60	Return a copy of the string s converted to uppercase.
61
62	"""
63	res = ''
64	for c in s:
65		res = res + _upper[ord(c)]
66	return res
67
68# Swap lower case letters and UPPER CASE
69def swapcase(s):
70	"""swapcase(s) -> string
71
72	Return a copy of the string s with upper case characters
73	converted to lowercase and vice versa.
74
75	"""
76	res = ''
77	for c in s:
78		res = res + _swapcase[ord(c)]
79	return res
80
81# Strip leading and trailing tabs and spaces
82def strip(s):
83	"""strip(s) -> string
84
85	Return a copy of the string s with leading and trailing
86	whitespace removed.
87
88	"""
89	i, j = 0, len(s)
90	while i < j and s[i] in whitespace: i = i+1
91	while i < j and s[j-1] in whitespace: j = j-1
92	return s[i:j]
93
94# Strip leading tabs and spaces
95def lstrip(s):
96	"""lstrip(s) -> string
97
98	Return a copy of the string s with leading whitespace removed.
99
100	"""
101	i, j = 0, len(s)
102	while i < j and s[i] in whitespace: i = i+1
103	return s[i:j]
104
105# Strip trailing tabs and spaces
106def rstrip(s):
107	"""rstrip(s) -> string
108
109	Return a copy of the string s with trailing whitespace
110	removed.
111
112	"""
113	i, j = 0, len(s)
114	while i < j and s[j-1] in whitespace: j = j-1
115	return s[i:j]
116
117
118# Split a string into a list of space/tab-separated words
119# NB: split(s) is NOT the same as splitfields(s, ' ')!
120def split(s, sep=None, maxsplit=0):
121	"""split(str [,sep [,maxsplit]]) -> list of strings
122
123	Return a list of the words in the string s, using sep as the
124	delimiter string.  If maxsplit is nonzero, splits into at most
125	maxsplit words If sep is not specified, any whitespace string
126	is a separator.  Maxsplit defaults to 0.
127
128	(split and splitfields are synonymous)
129
130	"""
131	if sep is not None: return splitfields(s, sep, maxsplit)
132	res = []
133	i, n = 0, len(s)
134	if maxsplit <= 0: maxsplit = n
135	count = 0
136	while i < n:
137		while i < n and s[i] in whitespace: i = i+1
138		if i == n: break
139		if count >= maxsplit:
140		    res.append(s[i:])
141		    break
142		j = i
143		while j < n and s[j] not in whitespace: j = j+1
144		count = count + 1
145		res.append(s[i:j])
146		i = j
147	return res
148
149# Split a list into fields separated by a given string
150# NB: splitfields(s, ' ') is NOT the same as split(s)!
151# splitfields(s, '') returns [s] (in analogy with split() in nawk)
152def splitfields(s, sep=None, maxsplit=0):
153	"""splitfields(str [,sep [,maxsplit]]) -> list of strings
154
155	Return a list of the words in the string s, using sep as the
156	delimiter string.  If maxsplit is nonzero, splits into at most
157	maxsplit words If sep is not specified, any whitespace string
158	is a separator.  Maxsplit defaults to 0.
159
160	(split and splitfields are synonymous)
161
162	"""
163	if sep is None: return split(s, None, maxsplit)
164	res = []
165	nsep = len(sep)
166	if nsep == 0:
167		return [s]
168	ns = len(s)
169	if maxsplit <= 0: maxsplit = ns
170	i = j = 0
171	count = 0
172	while j+nsep <= ns:
173		if s[j:j+nsep] == sep:
174			count = count + 1
175			res.append(s[i:j])
176			i = j = j + nsep
177			if count >= maxsplit: break
178		else:
179			j = j + 1
180	res.append(s[i:])
181	return res
182
183# Join words with spaces between them
184def join(words, sep = ' '):
185	"""join(list [,sep]) -> string
186
187	Return a string composed of the words in list, with
188	intervening occurences of sep.  Sep defaults to a single
189	space.
190
191	(joinfields and join are synonymous)
192
193	"""
194	return joinfields(words, sep)
195
196# Join fields with optional separator
197def joinfields(words, sep = ' '):
198	"""joinfields(list [,sep]) -> string
199
200	Return a string composed of the words in list, with
201	intervening occurences of sep.  The default separator is a
202	single space.
203
204	(joinfields and join are synonymous)
205
206	"""
207	res = ''
208	for w in words:
209		res = res + (sep + w)
210	return res[len(sep):]
211
212# Find substring, raise exception if not found
213def index(s, sub, i = 0, last=None):
214	"""index(s, sub [,start [,end]]) -> int
215
216	Return the lowest index in s where substring sub is found,
217	such that sub is contained within s[start,end].  Optional
218	arguments start and end are interpreted as in slice notation.
219
220	Raise ValueError if not found.
221
222	"""
223	if last is None: last = len(s)
224	res = find(s, sub, i, last)
225	if res < 0:
226		raise ValueError, 'substring not found in string.index'
227	return res
228
229# Find last substring, raise exception if not found
230def rindex(s, sub, i = 0, last=None):
231	"""rindex(s, sub [,start [,end]]) -> int
232
233	Return the highest index in s where substring sub is found,
234	such that sub is contained within s[start,end].  Optional
235	arguments start and end are interpreted as in slice notation.
236
237	Raise ValueError if not found.
238
239	"""
240	if last is None: last = len(s)
241	res = rfind(s, sub, i, last)
242	if res < 0:
243		raise ValueError, 'substring not found in string.index'
244	return res
245
246# Count non-overlapping occurrences of substring
247def count(s, sub, i = 0, last=None):
248	"""count(s, sub[, start[,end]]) -> int
249
250	Return the number of occurrences of substring sub in string
251	s[start:end].  Optional arguments start and end are
252	interpreted as in slice notation.
253
254	"""
255	Slen = len(s)  # cache this value, for speed
256	if last is None:
257		last = Slen
258	elif last < 0:
259		last = max(0, last + Slen)
260	elif last > Slen:
261		last = Slen
262	if i < 0: i = max(0, i + Slen)
263	n = len(sub)
264	m = last + 1 - n
265	if n == 0: return m-i
266	r = 0
267	while i < m:
268		if sub == s[i:i+n]:
269			r = r+1
270			i = i+n
271		else:
272			i = i+1
273	return r
274
275# Find substring, return -1 if not found
276def find(s, sub, i = 0, last=None):
277	"""find(s, sub [,start [,end]]) -> in
278
279	Return the lowest index in s where substring sub is found,
280	such that sub is contained within s[start,end].  Optional
281	arguments start and end are interpreted as in slice notation.
282
283	Return -1 on failure.
284
285	"""
286	Slen = len(s)  # cache this value, for speed
287	if last is None:
288		last = Slen
289	elif last < 0:
290		last = max(0, last + Slen)
291	elif last > Slen:
292		last = Slen
293	if i < 0: i = max(0, i + Slen)
294	n = len(sub)
295	m = last + 1 - n
296	while i < m:
297		if sub == s[i:i+n]: return i
298		i = i+1
299	return -1
300
301# Find last substring, return -1 if not found
302def rfind(s, sub, i = 0, last=None):
303	"""rfind(s, sub [,start [,end]]) -> int
304
305	Return the highest index in s where substring sub is found,
306	such that sub is contained within s[start,end].  Optional
307	arguments start and end are interpreted as in slice notation.
308
309	Return -1 on failure.
310
311	"""
312	Slen = len(s)  # cache this value, for speed
313	if last is None:
314		last = Slen
315	elif last < 0:
316		last = max(0, last + Slen)
317	elif last > Slen:
318		last = Slen
319	if i < 0: i = max(0, i + Slen)
320	n = len(sub)
321	m = last + 1 - n
322	r = -1
323	while i < m:
324		if sub == s[i:i+n]: r = i
325		i = i+1
326	return r
327
328# "Safe" environment for eval()
329safe_env = {"__builtins__": {}}
330
331# Convert string to float
332re = None
333def atof(str):
334	"""atof(s) -> float
335
336	Return the floating point number represented by the string s.
337
338	"""
339	global re
340	if re is None:
341		# Don't fail if re doesn't exist -- just skip the syntax check
342		try:
343			import re
344		except ImportError:
345			re = 0
346	sign = ''
347	s = strip(str)
348	if s and s[0] in '+-':
349		sign = s[0]
350		s = s[1:]
351	if not s:
352		raise ValueError, 'non-float argument to string.atof'
353	while s[0] == '0' and len(s) > 1 and s[1] in digits: s = s[1:]
354	if re and not re.match('[0-9]*(\.[0-9]*)?([eE][-+]?[0-9]+)?$', s):
355		raise ValueError, 'non-float argument to string.atof'
356	try:
357		return float(eval(sign + s, safe_env))
358	except SyntaxError:
359		raise ValueError, 'non-float argument to string.atof'
360
361# Convert string to integer
362def atoi(str, base=10):
363	"""atoi(s [,base]) -> int
364
365	Return the integer represented by the string s in the given
366	base, which defaults to 10.  The string s must consist of one
367	or more digits, possibly preceded by a sign.  If base is 0, it
368	is chosen from the leading characters of s, 0 for octal, 0x or
369	0X for hexadecimal.  If base is 16, a preceding 0x or 0X is
370	accepted.
371
372	"""
373	if base != 10:
374		# We only get here if strop doesn't define atoi()
375		raise ValueError, "this string.atoi doesn't support base != 10"
376	sign = ''
377	s = strip(str)
378	if s and s[0] in '+-':
379		sign = s[0]
380		s = s[1:]
381	if not s:
382		raise ValueError, 'non-integer argument to string.atoi'
383	while s[0] == '0' and len(s) > 1: s = s[1:]
384	for c in s:
385		if c not in digits:
386			raise ValueError, 'non-integer argument to string.atoi'
387	return eval(sign + s, safe_env)
388
389# Convert string to long integer
390def atol(str, base=10):
391	"""atol(s [,base]) -> long
392
393	Return the long integer represented by the string s in the
394	given base, which defaults to 10.  The string s must consist
395	of one or more digits, possibly preceded by a sign.  If base
396	is 0, it is chosen from the leading characters of s, 0 for
397	octal, 0x or 0X for hexadecimal.  If base is 16, a preceding
398	0x or 0X is accepted.  A trailing L or l is not accepted,
399	unless base is 0.
400
401	"""
402	if base != 10:
403		# We only get here if strop doesn't define atol()
404		raise ValueError, "this string.atol doesn't support base != 10"
405	sign = ''
406	s = strip(str)
407	if s and s[0] in '+-':
408		sign = s[0]
409		s = s[1:]
410	if not s:
411		raise ValueError, 'non-integer argument to string.atol'
412	while s[0] == '0' and len(s) > 1: s = s[1:]
413	for c in s:
414		if c not in digits:
415			raise ValueError, 'non-integer argument to string.atol'
416	return eval(sign + s + 'L', safe_env)
417
418# Left-justify a string
419def ljust(s, width):
420	"""ljust(s, width) -> string
421
422	Return a left-justified version of s, in a field of the
423	specified width, padded with spaces as needed.  The string is
424	never truncated.
425
426	"""
427	n = width - len(s)
428	if n <= 0: return s
429	return s + ' '*n
430
431# Right-justify a string
432def rjust(s, width):
433	"""rjust(s, width) -> string
434
435	Return a right-justified version of s, in a field of the
436	specified width, padded with spaces as needed.  The string is
437	never truncated.
438
439	"""
440	n = width - len(s)
441	if n <= 0: return s
442	return ' '*n + s
443
444# Center a string
445def center(s, width):
446	"""center(s, width) -> string
447
448	Return a center version of s, in a field of the specified
449	width. padded with spaces as needed.  The string is never
450	truncated.
451
452	"""
453	n = width - len(s)
454	if n <= 0: return s
455	half = n/2
456	if n%2 and width%2:
457		# This ensures that center(center(s, i), j) = center(s, j)
458		half = half+1
459	return ' '*half +  s + ' '*(n-half)
460
461# Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03'
462# Decadent feature: the argument may be a string or a number
463# (Use of this is deprecated; it should be a string as with ljust c.s.)
464def zfill(x, width):
465	"""zfill(x, width) -> string
466
467	Pad a numeric string x with zeros on the left, to fill a field
468	of the specified width.  The string x is never truncated.
469
470	"""
471	if type(x) == type(''): s = x
472	else: s = `x`
473	n = len(s)
474	if n >= width: return s
475	sign = ''
476	if s[0] in ('-', '+'):
477		sign, s = s[0], s[1:]
478	return sign + '0'*(width-n) + s
479
480# Expand tabs in a string.
481# Doesn't take non-printing chars into account, but does understand \n.
482def expandtabs(s, tabsize=8):
483	"""expandtabs(s [,tabsize]) -> string
484
485	Return a copy of the string s with all tab characters replaced
486	by the appropriate number of spaces, depending on the current
487	column, and the tabsize (default 8).
488
489	"""
490	res = line = ''
491	for c in s:
492		if c == '\t':
493			c = ' '*(tabsize - len(line)%tabsize)
494		line = line + c
495		if c == '\n':
496			res = res + line
497			line = ''
498	return res + line
499
500# Character translation through look-up table.
501def translate(s, table, deletions=""):
502	"""translate(s,table [,deletechars]) -> string
503
504	Return a copy of the string s, where all characters occurring
505	in the optional argument deletechars are removed, and the
506	remaining characters have been mapped through the given
507	translation table, which must be a string of length 256.
508
509	"""
510	if type(table) != type('') or len(table) != 256:
511		raise TypeError, \
512		      "translation table must be 256 characters long"
513	res = ""
514	for c in s:
515		if c not in deletions:
516			res = res + table[ord(c)]
517	return res
518
519# Capitalize a string, e.g. "aBc  dEf" -> "Abc  def".
520def capitalize(s):
521	"""capitalize(s) -> string
522
523	Return a copy of the string s with only its first character
524	capitalized.
525
526	"""
527	return upper(s[:1]) + lower(s[1:])
528
529# Capitalize the words in a string, e.g. " aBc  dEf " -> "Abc Def".
530# See also regsub.capwords().
531def capwords(s, sep=None):
532	"""capwords(s, [sep]) -> string
533
534	Split the argument into words using split, capitalize each
535	word using capitalize, and join the capitalized words using
536	join. Note that this replaces runs of whitespace characters by
537	a single space.
538
539	"""
540	return join(map(capitalize, split(s, sep)), sep or ' ')
541
542# Construct a translation string
543_idmapL = None
544def maketrans(fromstr, tostr):
545	"""maketrans(frm, to) -> string
546
547	Return a translation table (a string of 256 bytes long)
548	suitable for use in string.translate.  The strings frm and to
549	must be of the same length.
550
551	"""
552	if len(fromstr) != len(tostr):
553		raise ValueError, "maketrans arguments must have same length"
554	global _idmapL
555	if not _idmapL:
556		_idmapL = map(None, _idmap)
557	L = _idmapL[:]
558	fromstr = map(ord, fromstr)
559	for i in range(len(fromstr)):
560		L[fromstr[i]] = tostr[i]
561	return joinfields(L, "")
562
563# Substring replacement (global)
564def replace(str, old, new, maxsplit=0):
565	"""replace (str, old, new[, maxsplit]) -> string
566
567	Return a copy of string str with all occurrences of substring
568	old replaced by new. If the optional argument maxsplit is
569	given, only the first maxsplit occurrences are replaced.
570
571	"""
572	return joinfields(splitfields(str, old, maxsplit), new)
573
574
575# Try importing optional built-in module "strop" -- if it exists,
576# it redefines some string operations that are 100-1000 times faster.
577# It also defines values for whitespace, lowercase and uppercase
578# that match <ctype.h>'s definitions.
579
580try:
581	from strop import *
582	letters = lowercase + uppercase
583except ImportError:
584	pass # Use the original, slow versions
585