stringold.py revision a6bb6be95f4a04fdf7a09fcc92432273877af049
1# module 'string' -- A collection of string operations 2 3# Warning: most of the code you see here isn't normally used nowadays. 4# At the end of this file most functions are replaced by built-in 5# functions imported from built-in module "strop". 6 7"""Common string manipulations. 8 9Public module variables: 10 11whitespace -- a string containing all characters considered whitespace 12lowercase -- a string containing all characters considered lowercase letters 13uppercase -- a string containing all characters considered uppercase letters 14letters -- a string containing all characters considered letters 15digits -- a string containing all characters considered decimal digits 16hexdigits -- a string containing all characters considered hexadecimal digits 17octdigits -- a string containing all characters considered octal digits 18 19""" 20 21# Some strings for ctype-style character classification 22whitespace = ' \t\n\r\v\f' 23lowercase = 'abcdefghijklmnopqrstuvwxyz' 24uppercase = 'ABCDEFGHIJKLMNOPQRSTUVWXYZ' 25letters = lowercase + uppercase 26digits = '0123456789' 27hexdigits = digits + 'abcdef' + 'ABCDEF' 28octdigits = '01234567' 29 30# Case conversion helpers 31_idmap = '' 32for i in range(256): _idmap = _idmap + chr(i) 33_lower = _idmap[:ord('A')] + lowercase + _idmap[ord('Z')+1:] 34_upper = _idmap[:ord('a')] + uppercase + _idmap[ord('z')+1:] 35_swapcase = _upper[:ord('A')] + lowercase + _upper[ord('Z')+1:] 36del i 37 38# Backward compatible names for exceptions 39index_error = ValueError 40atoi_error = ValueError 41atof_error = ValueError 42atol_error = ValueError 43 44# convert UPPER CASE letters to lower case 45def lower(s): 46 """lower(s) -> string 47 48 Return a copy of the string s converted to lowercase. 49 50 """ 51 res = '' 52 for c in s: 53 res = res + _lower[ord(c)] 54 return res 55 56# Convert lower case letters to UPPER CASE 57def upper(s): 58 """upper(s) -> string 59 60 Return a copy of the string s converted to uppercase. 61 62 """ 63 res = '' 64 for c in s: 65 res = res + _upper[ord(c)] 66 return res 67 68# Swap lower case letters and UPPER CASE 69def swapcase(s): 70 """swapcase(s) -> string 71 72 Return a copy of the string s with upper case characters 73 converted to lowercase and vice versa. 74 75 """ 76 res = '' 77 for c in s: 78 res = res + _swapcase[ord(c)] 79 return res 80 81# Strip leading and trailing tabs and spaces 82def strip(s): 83 """strip(s) -> string 84 85 Return a copy of the string s with leading and trailing 86 whitespace removed. 87 88 """ 89 i, j = 0, len(s) 90 while i < j and s[i] in whitespace: i = i+1 91 while i < j and s[j-1] in whitespace: j = j-1 92 return s[i:j] 93 94# Strip leading tabs and spaces 95def lstrip(s): 96 """lstrip(s) -> string 97 98 Return a copy of the string s with leading whitespace removed. 99 100 """ 101 i, j = 0, len(s) 102 while i < j and s[i] in whitespace: i = i+1 103 return s[i:j] 104 105# Strip trailing tabs and spaces 106def rstrip(s): 107 """rstrip(s) -> string 108 109 Return a copy of the string s with trailing whitespace 110 removed. 111 112 """ 113 i, j = 0, len(s) 114 while i < j and s[j-1] in whitespace: j = j-1 115 return s[i:j] 116 117 118# Split a string into a list of space/tab-separated words 119# NB: split(s) is NOT the same as splitfields(s, ' ')! 120def split(s, sep=None, maxsplit=0): 121 """split(str [,sep [,maxsplit]]) -> list of strings 122 123 Return a list of the words in the string s, using sep as the 124 delimiter string. If maxsplit is nonzero, splits into at most 125 maxsplit words If sep is not specified, any whitespace string 126 is a separator. Maxsplit defaults to 0. 127 128 (split and splitfields are synonymous) 129 130 """ 131 if sep is not None: return splitfields(s, sep, maxsplit) 132 res = [] 133 i, n = 0, len(s) 134 if maxsplit <= 0: maxsplit = n 135 count = 0 136 while i < n: 137 while i < n and s[i] in whitespace: i = i+1 138 if i == n: break 139 if count >= maxsplit: 140 res.append(s[i:]) 141 break 142 j = i 143 while j < n and s[j] not in whitespace: j = j+1 144 count = count + 1 145 res.append(s[i:j]) 146 i = j 147 return res 148 149# Split a list into fields separated by a given string 150# NB: splitfields(s, ' ') is NOT the same as split(s)! 151# splitfields(s, '') returns [s] (in analogy with split() in nawk) 152def splitfields(s, sep=None, maxsplit=0): 153 """splitfields(str [,sep [,maxsplit]]) -> list of strings 154 155 Return a list of the words in the string s, using sep as the 156 delimiter string. If maxsplit is nonzero, splits into at most 157 maxsplit words If sep is not specified, any whitespace string 158 is a separator. Maxsplit defaults to 0. 159 160 (split and splitfields are synonymous) 161 162 """ 163 if sep is None: return split(s, None, maxsplit) 164 res = [] 165 nsep = len(sep) 166 if nsep == 0: 167 return [s] 168 ns = len(s) 169 if maxsplit <= 0: maxsplit = ns 170 i = j = 0 171 count = 0 172 while j+nsep <= ns: 173 if s[j:j+nsep] == sep: 174 count = count + 1 175 res.append(s[i:j]) 176 i = j = j + nsep 177 if count >= maxsplit: break 178 else: 179 j = j + 1 180 res.append(s[i:]) 181 return res 182 183# Join words with spaces between them 184def join(words, sep = ' '): 185 """join(list [,sep]) -> string 186 187 Return a string composed of the words in list, with 188 intervening occurences of sep. Sep defaults to a single 189 space. 190 191 (joinfields and join are synonymous) 192 193 """ 194 return joinfields(words, sep) 195 196# Join fields with optional separator 197def joinfields(words, sep = ' '): 198 """joinfields(list [,sep]) -> string 199 200 Return a string composed of the words in list, with 201 intervening occurences of sep. The default separator is a 202 single space. 203 204 (joinfields and join are synonymous) 205 206 """ 207 res = '' 208 for w in words: 209 res = res + (sep + w) 210 return res[len(sep):] 211 212# Find substring, raise exception if not found 213def index(s, sub, i = 0, last=None): 214 """index(s, sub [,start [,end]]) -> int 215 216 Return the lowest index in s where substring sub is found, 217 such that sub is contained within s[start,end]. Optional 218 arguments start and end are interpreted as in slice notation. 219 220 Raise ValueError if not found. 221 222 """ 223 if last is None: last = len(s) 224 res = find(s, sub, i, last) 225 if res < 0: 226 raise ValueError, 'substring not found in string.index' 227 return res 228 229# Find last substring, raise exception if not found 230def rindex(s, sub, i = 0, last=None): 231 """rindex(s, sub [,start [,end]]) -> int 232 233 Return the highest index in s where substring sub is found, 234 such that sub is contained within s[start,end]. Optional 235 arguments start and end are interpreted as in slice notation. 236 237 Raise ValueError if not found. 238 239 """ 240 if last is None: last = len(s) 241 res = rfind(s, sub, i, last) 242 if res < 0: 243 raise ValueError, 'substring not found in string.index' 244 return res 245 246# Count non-overlapping occurrences of substring 247def count(s, sub, i = 0, last=None): 248 """count(s, sub[, start[,end]]) -> int 249 250 Return the number of occurrences of substring sub in string 251 s[start:end]. Optional arguments start and end are 252 interpreted as in slice notation. 253 254 """ 255 Slen = len(s) # cache this value, for speed 256 if last is None: 257 last = Slen 258 elif last < 0: 259 last = max(0, last + Slen) 260 elif last > Slen: 261 last = Slen 262 if i < 0: i = max(0, i + Slen) 263 n = len(sub) 264 m = last + 1 - n 265 if n == 0: return m-i 266 r = 0 267 while i < m: 268 if sub == s[i:i+n]: 269 r = r+1 270 i = i+n 271 else: 272 i = i+1 273 return r 274 275# Find substring, return -1 if not found 276def find(s, sub, i = 0, last=None): 277 """find(s, sub [,start [,end]]) -> in 278 279 Return the lowest index in s where substring sub is found, 280 such that sub is contained within s[start,end]. Optional 281 arguments start and end are interpreted as in slice notation. 282 283 Return -1 on failure. 284 285 """ 286 Slen = len(s) # cache this value, for speed 287 if last is None: 288 last = Slen 289 elif last < 0: 290 last = max(0, last + Slen) 291 elif last > Slen: 292 last = Slen 293 if i < 0: i = max(0, i + Slen) 294 n = len(sub) 295 m = last + 1 - n 296 while i < m: 297 if sub == s[i:i+n]: return i 298 i = i+1 299 return -1 300 301# Find last substring, return -1 if not found 302def rfind(s, sub, i = 0, last=None): 303 """rfind(s, sub [,start [,end]]) -> int 304 305 Return the highest index in s where substring sub is found, 306 such that sub is contained within s[start,end]. Optional 307 arguments start and end are interpreted as in slice notation. 308 309 Return -1 on failure. 310 311 """ 312 Slen = len(s) # cache this value, for speed 313 if last is None: 314 last = Slen 315 elif last < 0: 316 last = max(0, last + Slen) 317 elif last > Slen: 318 last = Slen 319 if i < 0: i = max(0, i + Slen) 320 n = len(sub) 321 m = last + 1 - n 322 r = -1 323 while i < m: 324 if sub == s[i:i+n]: r = i 325 i = i+1 326 return r 327 328# "Safe" environment for eval() 329safe_env = {"__builtins__": {}} 330 331# Convert string to float 332re = None 333def atof(str): 334 """atof(s) -> float 335 336 Return the floating point number represented by the string s. 337 338 """ 339 global re 340 if re is None: 341 # Don't fail if re doesn't exist -- just skip the syntax check 342 try: 343 import re 344 except ImportError: 345 re = 0 346 sign = '' 347 s = strip(str) 348 if s and s[0] in '+-': 349 sign = s[0] 350 s = s[1:] 351 if not s: 352 raise ValueError, 'non-float argument to string.atof' 353 while s[0] == '0' and len(s) > 1 and s[1] in digits: s = s[1:] 354 if re and not re.match('[0-9]*(\.[0-9]*)?([eE][-+]?[0-9]+)?$', s): 355 raise ValueError, 'non-float argument to string.atof' 356 try: 357 return float(eval(sign + s, safe_env)) 358 except SyntaxError: 359 raise ValueError, 'non-float argument to string.atof' 360 361# Convert string to integer 362def atoi(str, base=10): 363 """atoi(s [,base]) -> int 364 365 Return the integer represented by the string s in the given 366 base, which defaults to 10. The string s must consist of one 367 or more digits, possibly preceded by a sign. If base is 0, it 368 is chosen from the leading characters of s, 0 for octal, 0x or 369 0X for hexadecimal. If base is 16, a preceding 0x or 0X is 370 accepted. 371 372 """ 373 if base != 10: 374 # We only get here if strop doesn't define atoi() 375 raise ValueError, "this string.atoi doesn't support base != 10" 376 sign = '' 377 s = strip(str) 378 if s and s[0] in '+-': 379 sign = s[0] 380 s = s[1:] 381 if not s: 382 raise ValueError, 'non-integer argument to string.atoi' 383 while s[0] == '0' and len(s) > 1: s = s[1:] 384 for c in s: 385 if c not in digits: 386 raise ValueError, 'non-integer argument to string.atoi' 387 return eval(sign + s, safe_env) 388 389# Convert string to long integer 390def atol(str, base=10): 391 """atol(s [,base]) -> long 392 393 Return the long integer represented by the string s in the 394 given base, which defaults to 10. The string s must consist 395 of one or more digits, possibly preceded by a sign. If base 396 is 0, it is chosen from the leading characters of s, 0 for 397 octal, 0x or 0X for hexadecimal. If base is 16, a preceding 398 0x or 0X is accepted. A trailing L or l is not accepted, 399 unless base is 0. 400 401 """ 402 if base != 10: 403 # We only get here if strop doesn't define atol() 404 raise ValueError, "this string.atol doesn't support base != 10" 405 sign = '' 406 s = strip(str) 407 if s and s[0] in '+-': 408 sign = s[0] 409 s = s[1:] 410 if not s: 411 raise ValueError, 'non-integer argument to string.atol' 412 while s[0] == '0' and len(s) > 1: s = s[1:] 413 for c in s: 414 if c not in digits: 415 raise ValueError, 'non-integer argument to string.atol' 416 return eval(sign + s + 'L', safe_env) 417 418# Left-justify a string 419def ljust(s, width): 420 """ljust(s, width) -> string 421 422 Return a left-justified version of s, in a field of the 423 specified width, padded with spaces as needed. The string is 424 never truncated. 425 426 """ 427 n = width - len(s) 428 if n <= 0: return s 429 return s + ' '*n 430 431# Right-justify a string 432def rjust(s, width): 433 """rjust(s, width) -> string 434 435 Return a right-justified version of s, in a field of the 436 specified width, padded with spaces as needed. The string is 437 never truncated. 438 439 """ 440 n = width - len(s) 441 if n <= 0: return s 442 return ' '*n + s 443 444# Center a string 445def center(s, width): 446 """center(s, width) -> string 447 448 Return a center version of s, in a field of the specified 449 width. padded with spaces as needed. The string is never 450 truncated. 451 452 """ 453 n = width - len(s) 454 if n <= 0: return s 455 half = n/2 456 if n%2 and width%2: 457 # This ensures that center(center(s, i), j) = center(s, j) 458 half = half+1 459 return ' '*half + s + ' '*(n-half) 460 461# Zero-fill a number, e.g., (12, 3) --> '012' and (-3, 3) --> '-03' 462# Decadent feature: the argument may be a string or a number 463# (Use of this is deprecated; it should be a string as with ljust c.s.) 464def zfill(x, width): 465 """zfill(x, width) -> string 466 467 Pad a numeric string x with zeros on the left, to fill a field 468 of the specified width. The string x is never truncated. 469 470 """ 471 if type(x) == type(''): s = x 472 else: s = `x` 473 n = len(s) 474 if n >= width: return s 475 sign = '' 476 if s[0] in ('-', '+'): 477 sign, s = s[0], s[1:] 478 return sign + '0'*(width-n) + s 479 480# Expand tabs in a string. 481# Doesn't take non-printing chars into account, but does understand \n. 482def expandtabs(s, tabsize=8): 483 """expandtabs(s [,tabsize]) -> string 484 485 Return a copy of the string s with all tab characters replaced 486 by the appropriate number of spaces, depending on the current 487 column, and the tabsize (default 8). 488 489 """ 490 res = line = '' 491 for c in s: 492 if c == '\t': 493 c = ' '*(tabsize - len(line)%tabsize) 494 line = line + c 495 if c == '\n': 496 res = res + line 497 line = '' 498 return res + line 499 500# Character translation through look-up table. 501def translate(s, table, deletions=""): 502 """translate(s,table [,deletechars]) -> string 503 504 Return a copy of the string s, where all characters occurring 505 in the optional argument deletechars are removed, and the 506 remaining characters have been mapped through the given 507 translation table, which must be a string of length 256. 508 509 """ 510 if type(table) != type('') or len(table) != 256: 511 raise TypeError, \ 512 "translation table must be 256 characters long" 513 res = "" 514 for c in s: 515 if c not in deletions: 516 res = res + table[ord(c)] 517 return res 518 519# Capitalize a string, e.g. "aBc dEf" -> "Abc def". 520def capitalize(s): 521 """capitalize(s) -> string 522 523 Return a copy of the string s with only its first character 524 capitalized. 525 526 """ 527 return upper(s[:1]) + lower(s[1:]) 528 529# Capitalize the words in a string, e.g. " aBc dEf " -> "Abc Def". 530# See also regsub.capwords(). 531def capwords(s, sep=None): 532 """capwords(s, [sep]) -> string 533 534 Split the argument into words using split, capitalize each 535 word using capitalize, and join the capitalized words using 536 join. Note that this replaces runs of whitespace characters by 537 a single space. 538 539 """ 540 return join(map(capitalize, split(s, sep)), sep or ' ') 541 542# Construct a translation string 543_idmapL = None 544def maketrans(fromstr, tostr): 545 """maketrans(frm, to) -> string 546 547 Return a translation table (a string of 256 bytes long) 548 suitable for use in string.translate. The strings frm and to 549 must be of the same length. 550 551 """ 552 if len(fromstr) != len(tostr): 553 raise ValueError, "maketrans arguments must have same length" 554 global _idmapL 555 if not _idmapL: 556 _idmapL = map(None, _idmap) 557 L = _idmapL[:] 558 fromstr = map(ord, fromstr) 559 for i in range(len(fromstr)): 560 L[fromstr[i]] = tostr[i] 561 return joinfields(L, "") 562 563# Substring replacement (global) 564def replace(str, old, new, maxsplit=0): 565 """replace (str, old, new[, maxsplit]) -> string 566 567 Return a copy of string str with all occurrences of substring 568 old replaced by new. If the optional argument maxsplit is 569 given, only the first maxsplit occurrences are replaced. 570 571 """ 572 return joinfields(splitfields(str, old, maxsplit), new) 573 574 575# Try importing optional built-in module "strop" -- if it exists, 576# it redefines some string operations that are 100-1000 times faster. 577# It also defines values for whitespace, lowercase and uppercase 578# that match <ctype.h>'s definitions. 579 580try: 581 from strop import * 582 letters = lowercase + uppercase 583except ImportError: 584 pass # Use the original, slow versions 585