stringold.py revision 2ab19920fc0ba6a0054aa4556bef94199aa432fc
1# module 'string' -- A collection of string operations 2 3# Warning: most of the code you see here isn't normally used nowadays. 4# At the end of this file most functions are replaced by built-in 5# functions imported from built-in module "strop". 6 7# Some strings for ctype-style character classification 8whitespace = ' \t\n\r\v\f' 9lowercase = 'abcdefghijklmnopqrstuvwxyz' 10uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 11letters = lowercase + uppercase 12digits = '0123456789' 13hexdigits = digits + 'abcdef' + 'ABCDEF' 14octdigits = '01234567' 15 16# Case conversion helpers 17_idmap = '' 18for i in range(256): _idmap = _idmap + chr(i) 19_lower = _idmap[:ord('A')] + lowercase + _idmap[ord('Z')+1:] 20_upper = _idmap[:ord('a')] + uppercase + _idmap[ord('z')+1:] 21_swapcase = _upper[:ord('A')] + lowercase + _upper[ord('Z')+1:] 22del i 23 24# Backward compatible names for exceptions 25index_error = ValueError 26atoi_error = ValueError 27atof_error = ValueError 28atol_error = ValueError 29 30# convert UPPER CASE letters to lower case 31def lower(s): 32 res = '' 33 for c in s: 34 res = res + _lower[ord(c)] 35 return res 36 37# Convert lower case letters to UPPER CASE 38def upper(s): 39 res = '' 40 for c in s: 41 res = res + _upper[ord(c)] 42 return res 43 44# Swap lower case letters and UPPER CASE 45def swapcase(s): 46 res = '' 47 for c in s: 48 res = res + _swapcase[ord(c)] 49 return res 50 51# Strip leading and trailing tabs and spaces 52def strip(s): 53 i, j = 0, len(s) 54 while i < j and s[i] in whitespace: i = i+1 55 while i < j and s[j-1] in whitespace: j = j-1 56 return s[i:j] 57 58# Split a string into a list of space/tab-separated words 59# NB: split(s) is NOT the same as splitfields(s, ' ')! 60def split(s, sep=None): 61 if sep is not None: return splitfields(s, sep) 62 res = [] 63 i, n = 0, len(s) 64 while i < n: 65 while i < n and s[i] in whitespace: i = i+1 66 if i == n: break 67 j = i 68 while j < n and s[j] not in whitespace: j = j+1 69 res.append(s[i:j]) 70 i = j 71 return res 72 73# Split a list into fields separated by a given string 74# NB: splitfields(s, ' ') is NOT the same as split(s)! 75# splitfields(s, '') returns [s] (in analogy with split() in nawk) 76def splitfields(s, sep=None): 77 if sep is None: return split(s) 78 res = [] 79 nsep = len(sep) 80 if nsep == 0: 81 return [s] 82 ns = len(s) 83 i = j = 0 84 while j+nsep <= ns: 85 if s[j:j+nsep] == sep: 86 res.append(s[i:j]) 87 i = j = j + nsep 88 else: 89 j = j + 1 90 res.append(s[i:]) 91 return res 92 93# Join words with spaces between them 94def join(words, sep = ' '): 95 return joinfields(words, sep) 96 97# Join fields with optional separator 98def joinfields(words, sep = ' '): 99 res = '' 100 for w in words: 101 res = res + (sep + w) 102 return res[len(sep):] 103 104# Find substring, raise exception if not found 105def index(s, sub, i = 0): 106 res = find(s, sub, i) 107 if res < 0: 108 raise ValueError, 'substring not found in string.index' 109 return res 110 111# Find last substring, raise exception if not found 112def rindex(s, sub, i = 0): 113 res = rfind(s, sub, i) 114 if res < 0: 115 raise ValueError, 'substring not found in string.index' 116 return res 117 118# Count non-overlapping occurrences of substring 119def count(s, sub, i = 0): 120 if i < 0: i = max(0, i + len(s)) 121 n = len(sub) 122 m = len(s) + 1 - n 123 if n == 0: return m-i 124 r = 0 125 while i < m: 126 if sub == s[i:i+n]: 127 r = r+1 128 i = i+n 129 else: 130 i = i+1 131 return r 132 133# Find substring, return -1 if not found 134def find(s, sub, i = 0): 135 if i < 0: i = max(0, i + len(s)) 136 n = len(sub) 137 m = len(s) + 1 - n 138 while i < m: 139 if sub == s[i:i+n]: return i 140 i = i+1 141 return -1 142 143# Find last substring, return -1 if not found 144def rfind(s, sub, i = 0): 145 if i < 0: i = max(0, i + len(s)) 146 n = len(sub) 147 m = len(s) + 1 - n 148 r = -1 149 while i < m: 150 if sub == s[i:i+n]: r = i 151 i = i+1 152 return r 153 154# Convert string to float 155def atof(str): 156 import regex 157 sign = '' 158 s = str 159 if s and s[0] in '+-': 160 sign = s[0] 161 s = s[1:] 162 if not s: 163 raise ValueError, 'non-float argument to string.atof' 164 while s[0] == '0' and len(s) > 1 and s[1] in digits: s = s[1:] 165 if regex.match('[0-9]*\(\.[0-9]*\)?\([eE][-+]?[0-9]+\)?', s) != len(s): 166 raise ValueError, 'non-float argument to string.atof' 167 try: 168 return float(eval(sign + s)) 169 except SyntaxError: 170 raise ValueError, 'non-float argument to string.atof' 171 172# Convert string to integer 173def atoi(str, base=10): 174 if base != 10: 175 # We only get here if strop doesn't define atoi() 176 raise ValueError, "this string.atoi doesn't support base != 10" 177 sign = '' 178 s = str 179 if s and s[0] in '+-': 180 sign = s[0] 181 s = s[1:] 182 if not s: 183 raise ValueError, 'non-integer argument to string.atoi' 184 while s[0] == '0' and len(s) > 1: s = s[1:] 185 for c in s: 186 if c not in digits: 187 raise ValueError, 'non-integer argument to string.atoi' 188 return eval(sign + s) 189 190# Convert string to long integer 191def atol(str, base=10): 192 if base != 10: 193 # We only get here if strop doesn't define atol() 194 raise ValueError, "this string.atol doesn't support base != 10" 195 sign = '' 196 s = str 197 if s and s[0] in '+-': 198 sign = s[0] 199 s = s[1:] 200 if not s: 201 raise ValueError, 'non-integer argument to string.atol' 202 while s[0] == '0' and len(s) > 1: s = s[1:] 203 for c in s: 204 if c not in digits: 205 raise ValueError, 'non-integer argument to string.atol' 206 return eval(sign + s + 'L') 207 208# Left-justify a string 209def ljust(s, width): 210 n = width - len(s) 211 if n <= 0: return s 212 return s + ' '*n 213 214# Right-justify a string 215def rjust(s, width): 216 n = width - len(s) 217 if n <= 0: return s 218 return ' '*n + s 219 220# Center a string 221def center(s, width): 222 n = width - len(s) 223 if n <= 0: return s 224 half = n/2 225 if n%2 and width%2: 226 # This ensures that center(center(s, i), j) = center(s, j) 227 half = half+1 228 return ' '*half + s + ' '*(n-half) 229 230# Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03' 231# Decadent feature: the argument may be a string or a number 232# (Use of this is deprecated; it should be a string as with ljust c.s.) 233def zfill(x, width): 234 if type(x) == type(''): s = x 235 else: s = `x` 236 n = len(s) 237 if n >= width: return s 238 sign = '' 239 if s[0] in ('-', '+'): 240 sign, s = s[0], s[1:] 241 return sign + '0'*(width-n) + s 242 243# Expand tabs in a string. 244# Doesn't take non-printing chars into account, but does understand \n. 245def expandtabs(s, tabsize): 246 res = line = '' 247 for c in s: 248 if c == '\t': 249 c = ' '*(tabsize - len(line)%tabsize) 250 line = line + c 251 if c == '\n': 252 res = res + line 253 line = '' 254 return res + line 255 256 257# Try importing optional built-in module "strop" -- if it exists, 258# it redefines some string operations that are 100-1000 times faster. 259# It also defines values for whitespace, lowercase and uppercase 260# that match <ctype.h>'s definitions. 261 262try: 263 from strop import * 264 letters = lowercase + uppercase 265except ImportError: 266 pass # Use the original, slow versions 267