stringold.py revision 7e47402264cf87b9bbb61fc9ff610af08add7c7b
1# module 'string' -- A collection of string operations 2 3# Warning: most of the code you see here isn't normally used nowadays. With 4# Python 1.6, many of these functions are implemented as methods on the 5# standard string object. They used to be implemented by a built-in module 6# called strop, but strop is now obsolete itself. 7 8"""Common string manipulations. 9 10Public module variables: 11 12whitespace -- a string containing all characters considered whitespace 13lowercase -- a string containing all characters considered lowercase letters 14uppercase -- a string containing all characters considered uppercase letters 15letters -- a string containing all characters considered letters 16digits -- a string containing all characters considered decimal digits 17hexdigits -- a string containing all characters considered hexadecimal digits 18octdigits -- a string containing all characters considered octal digits 19 20""" 21 22# Some strings for ctype-style character classification 23whitespace = ' \t\n\r\v\f' 24lowercase = 'abcdefghijklmnopqrstuvwxyz' 25uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 26letters = lowercase + uppercase 27digits = '0123456789' 28hexdigits = digits + 'abcdef' + 'ABCDEF' 29octdigits = '01234567' 30 31# Case conversion helpers 32_idmap = '' 33for i in range(256): _idmap = _idmap + chr(i) 34del i 35 36# Backward compatible names for exceptions 37index_error = ValueError 38atoi_error = ValueError 39atof_error = ValueError 40atol_error = ValueError 41 42# convert UPPER CASE letters to lower case 43def lower(s): 44 """lower(s) -> string 45 46 Return a copy of the string s converted to lowercase. 47 48 """ 49 return s.lower() 50 51# Convert lower case letters to UPPER CASE 52def upper(s): 53 """upper(s) -> string 54 55 Return a copy of the string s converted to uppercase. 56 57 """ 58 return s.upper() 59 60# Swap lower case letters and UPPER CASE 61def swapcase(s): 62 """swapcase(s) -> string 63 64 Return a copy of the string s with upper case characters 65 converted to lowercase and vice versa. 66 67 """ 68 return s.swapcase() 69 70# Strip leading and trailing tabs and spaces 71def strip(s): 72 """strip(s) -> string 73 74 Return a copy of the string s with leading and trailing 75 whitespace removed. 76 77 """ 78 return s.strip() 79 80# Strip leading tabs and spaces 81def lstrip(s): 82 """lstrip(s) -> string 83 84 Return a copy of the string s with leading whitespace removed. 85 86 """ 87 return s.lstrip() 88 89# Strip trailing tabs and spaces 90def rstrip(s): 91 """rstrip(s) -> string 92 93 Return a copy of the string s with trailing whitespace 94 removed. 95 96 """ 97 return s.rstrip() 98 99 100# Split a string into a list of space/tab-separated words 101# NB: split(s) is NOT the same as splitfields(s, ' ')! 102def split(s, sep=None, maxsplit=0): 103 """split(str [,sep [,maxsplit]]) -> list of strings 104 105 Return a list of the words in the string s, using sep as the 106 delimiter string. If maxsplit is nonzero, splits into at most 107 maxsplit words If sep is not specified, any whitespace string 108 is a separator. Maxsplit defaults to 0. 109 110 (split and splitfields are synonymous) 111 112 """ 113 return s.split(sep, maxsplit) 114splitfields = split 115 116# Join fields with optional separator 117def join(words, sep = ' '): 118 """join(list [,sep]) -> string 119 120 Return a string composed of the words in list, with 121 intervening occurrences of sep. The default separator is a 122 single space. 123 124 (joinfields and join are synonymous) 125 126 """ 127 return sep.join(words) 128joinfields = join 129 130# for a little bit of speed 131_apply = apply 132 133# Find substring, raise exception if not found 134def index(s, *args): 135 """index(s, sub [,start [,end]]) -> int 136 137 Like find but raises ValueError when the substring is not found. 138 139 """ 140 return _apply(s.index, args) 141 142# Find last substring, raise exception if not found 143def rindex(s, *args): 144 """rindex(s, sub [,start [,end]]) -> int 145 146 Like rfind but raises ValueError when the substring is not found. 147 148 """ 149 return _apply(s.rindex, args) 150 151# Count non-overlapping occurrences of substring 152def count(s, *args): 153 """count(s, sub[, start[,end]]) -> int 154 155 Return the number of occurrences of substring sub in string 156 s[start:end]. Optional arguments start and end are 157 interpreted as in slice notation. 158 159 """ 160 return _apply(s.count, args) 161 162# Find substring, return -1 if not found 163def find(s, *args): 164 """find(s, sub [,start [,end]]) -> in 165 166 Return the lowest index in s where substring sub is found, 167 such that sub is contained within s[start,end]. Optional 168 arguments start and end are interpreted as in slice notation. 169 170 Return -1 on failure. 171 172 """ 173 return _apply(s.find, args) 174 175# Find last substring, return -1 if not found 176def rfind(s, *args): 177 """rfind(s, sub [,start [,end]]) -> int 178 179 Return the highest index in s where substring sub is found, 180 such that sub is contained within s[start,end]. Optional 181 arguments start and end are interpreted as in slice notation. 182 183 Return -1 on failure. 184 185 """ 186 return _apply(s.rfind, args) 187 188# for a bit of speed 189_float = float 190_int = int 191_long = long 192_StringType = type('') 193 194# Convert string to float 195def atof(s): 196 """atof(s) -> float 197 198 Return the floating point number represented by the string s. 199 200 """ 201 if type(s) == _StringType: 202 return _float(s) 203 else: 204 raise TypeError('argument 1: expected string, %s found' % 205 type(s).__name__) 206 207# Convert string to integer 208def atoi(*args): 209 """atoi(s [,base]) -> int 210 211 Return the integer represented by the string s in the given 212 base, which defaults to 10. The string s must consist of one 213 or more digits, possibly preceded by a sign. If base is 0, it 214 is chosen from the leading characters of s, 0 for octal, 0x or 215 0X for hexadecimal. If base is 16, a preceding 0x or 0X is 216 accepted. 217 218 """ 219 try: 220 s = args[0] 221 except IndexError: 222 raise TypeError('function requires at least 1 argument: %d given' % 223 len(args)) 224 # Don't catch type error resulting from too many arguments to int(). The 225 # error message isn't compatible but the error type is, and this function 226 # is complicated enough already. 227 if type(s) == _StringType: 228 return _apply(_int, args) 229 else: 230 raise TypeError('argument 1: expected string, %s found' % 231 type(s).__name__) 232 233 234# Convert string to long integer 235def atol(*args): 236 """atol(s [,base]) -> long 237 238 Return the long integer represented by the string s in the 239 given base, which defaults to 10. The string s must consist 240 of one or more digits, possibly preceded by a sign. If base 241 is 0, it is chosen from the leading characters of s, 0 for 242 octal, 0x or 0X for hexadecimal. If base is 16, a preceding 243 0x or 0X is accepted. A trailing L or l is not accepted, 244 unless base is 0. 245 246 """ 247 try: 248 s = args[0] 249 except IndexError: 250 raise TypeError('function requires at least 1 argument: %d given' % 251 len(args)) 252 # Don't catch type error resulting from too many arguments to long(). The 253 # error message isn't compatible but the error type is, and this function 254 # is complicated enough already. 255 if type(s) == _StringType: 256 return _apply(_long, args) 257 else: 258 raise TypeError('argument 1: expected string, %s found' % 259 type(s).__name__) 260 261 262# Left-justify a string 263def ljust(s, width): 264 """ljust(s, width) -> string 265 266 Return a left-justified version of s, in a field of the 267 specified width, padded with spaces as needed. The string is 268 never truncated. 269 270 """ 271 n = width - len(s) 272 if n <= 0: return s 273 return s + ' '*n 274 275# Right-justify a string 276def rjust(s, width): 277 """rjust(s, width) -> string 278 279 Return a right-justified version of s, in a field of the 280 specified width, padded with spaces as needed. The string is 281 never truncated. 282 283 """ 284 n = width - len(s) 285 if n <= 0: return s 286 return ' '*n + s 287 288# Center a string 289def center(s, width): 290 """center(s, width) -> string 291 292 Return a center version of s, in a field of the specified 293 width. padded with spaces as needed. The string is never 294 truncated. 295 296 """ 297 n = width - len(s) 298 if n <= 0: return s 299 half = n/2 300 if n%2 and width%2: 301 # This ensures that center(center(s, i), j) = center(s, j) 302 half = half+1 303 return ' '*half + s + ' '*(n-half) 304 305# Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03' 306# Decadent feature: the argument may be a string or a number 307# (Use of this is deprecated; it should be a string as with ljust c.s.) 308def zfill(x, width): 309 """zfill(x, width) -> string 310 311 Pad a numeric string x with zeros on the left, to fill a field 312 of the specified width. The string x is never truncated. 313 314 """ 315 if type(x) == type(''): s = x 316 else: s = `x` 317 n = len(s) 318 if n >= width: return s 319 sign = '' 320 if s[0] in ('-', '+'): 321 sign, s = s[0], s[1:] 322 return sign + '0'*(width-n) + s 323 324# Expand tabs in a string. 325# Doesn't take non-printing chars into account, but does understand \n. 326def expandtabs(s, tabsize=8): 327 """expandtabs(s [,tabsize]) -> string 328 329 Return a copy of the string s with all tab characters replaced 330 by the appropriate number of spaces, depending on the current 331 column, and the tabsize (default 8). 332 333 """ 334 res = line = '' 335 for c in s: 336 if c == '\t': 337 c = ' '*(tabsize - len(line) % tabsize) 338 line = line + c 339 if c == '\n': 340 res = res + line 341 line = '' 342 return res + line 343 344# Character translation through look-up table. 345def translate(s, table, deletions=""): 346 """translate(s,table [,deletechars]) -> string 347 348 Return a copy of the string s, where all characters occurring 349 in the optional argument deletechars are removed, and the 350 remaining characters have been mapped through the given 351 translation table, which must be a string of length 256. 352 353 """ 354 return s.translate(table, deletions) 355 356# Capitalize a string, e.g. "aBc dEf" -> "Abc def". 357def capitalize(s): 358 """capitalize(s) -> string 359 360 Return a copy of the string s with only its first character 361 capitalized. 362 363 """ 364 return s.capitalize() 365 366# Capitalize the words in a string, e.g. " aBc dEf " -> "Abc Def". 367# See also regsub.capwords(). 368def capwords(s, sep=None): 369 """capwords(s, [sep]) -> string 370 371 Split the argument into words using split, capitalize each 372 word using capitalize, and join the capitalized words using 373 join. Note that this replaces runs of whitespace characters by 374 a single space. 375 376 """ 377 return join(map(capitalize, s.split(sep)), sep or ' ') 378 379# Construct a translation string 380_idmapL = None 381def maketrans(fromstr, tostr): 382 """maketrans(frm, to) -> string 383 384 Return a translation table (a string of 256 bytes long) 385 suitable for use in string.translate. The strings frm and to 386 must be of the same length. 387 388 """ 389 if len(fromstr) != len(tostr): 390 raise ValueError, "maketrans arguments must have same length" 391 global _idmapL 392 if not _idmapL: 393 _idmapL = map(None, _idmap) 394 L = _idmapL[:] 395 fromstr = map(ord, fromstr) 396 for i in range(len(fromstr)): 397 L[fromstr[i]] = tostr[i] 398 return joinfields(L, "") 399 400# Substring replacement (global) 401def replace(s, old, new, maxsplit=0): 402 """replace (str, old, new[, maxsplit]) -> string 403 404 Return a copy of string str with all occurrences of substring 405 old replaced by new. If the optional argument maxsplit is 406 given, only the first maxsplit occurrences are replaced. 407 408 """ 409 return s.replace(old, new, maxsplit) 410 411 412# XXX: transitional 413# 414# If string objects do not have methods, then we need to use the old string.py 415# library, which uses strop for many more things than just the few outlined 416# below. 417try: 418 ''.upper 419except AttributeError: 420 from stringold import * 421 422# Try importing optional built-in module "strop" -- if it exists, 423# it redefines some string operations that are 100-1000 times faster. 424# It also defines values for whitespace, lowercase and uppercase 425# that match <ctype.h>'s definitions. 426 427try: 428 from strop import maketrans, lowercase, uppercase, whitespace 429 letters = lowercase + uppercase 430except ImportError: 431 pass # Use the original versions 432