1""" 2HyperParser 3=========== 4This module defines the HyperParser class, which provides advanced parsing 5abilities for the ParenMatch and other extensions. 6The HyperParser uses PyParser. PyParser is intended mostly to give information 7on the proper indentation of code. HyperParser gives some information on the 8structure of code, used by extensions to help the user. 9""" 10 11import string 12import keyword 13from idlelib import PyParse 14 15class HyperParser: 16 17 def __init__(self, editwin, index): 18 """Initialize the HyperParser to analyze the surroundings of the given 19 index. 20 """ 21 22 self.editwin = editwin 23 self.text = text = editwin.text 24 25 parser = PyParse.Parser(editwin.indentwidth, editwin.tabwidth) 26 27 def index2line(index): 28 return int(float(index)) 29 lno = index2line(text.index(index)) 30 31 if not editwin.context_use_ps1: 32 for context in editwin.num_context_lines: 33 startat = max(lno - context, 1) 34 startatindex = repr(startat) + ".0" 35 stopatindex = "%d.end" % lno 36 # We add the newline because PyParse requires a newline at end. 37 # We add a space so that index won't be at end of line, so that 38 # its status will be the same as the char before it, if should. 39 parser.set_str(text.get(startatindex, stopatindex)+' \n') 40 bod = parser.find_good_parse_start( 41 editwin._build_char_in_string_func(startatindex)) 42 if bod is not None or startat == 1: 43 break 44 parser.set_lo(bod or 0) 45 else: 46 r = text.tag_prevrange("console", index) 47 if r: 48 startatindex = r[1] 49 else: 50 startatindex = "1.0" 51 stopatindex = "%d.end" % lno 52 # We add the newline because PyParse requires a newline at end. 53 # We add a space so that index won't be at end of line, so that 54 # its status will be the same as the char before it, if should. 55 parser.set_str(text.get(startatindex, stopatindex)+' \n') 56 parser.set_lo(0) 57 58 # We want what the parser has, except for the last newline and space. 59 self.rawtext = parser.str[:-2] 60 # As far as I can see, parser.str preserves the statement we are in, 61 # so that stopatindex can be used to synchronize the string with the 62 # text box indices. 63 self.stopatindex = stopatindex 64 self.bracketing = parser.get_last_stmt_bracketing() 65 # find which pairs of bracketing are openers. These always correspond 66 # to a character of rawtext. 67 self.isopener = [i>0 and self.bracketing[i][1] > self.bracketing[i-1][1] 68 for i in range(len(self.bracketing))] 69 70 self.set_index(index) 71 72 def set_index(self, index): 73 """Set the index to which the functions relate. Note that it must be 74 in the same statement. 75 """ 76 indexinrawtext = \ 77 len(self.rawtext) - len(self.text.get(index, self.stopatindex)) 78 if indexinrawtext < 0: 79 raise ValueError("The index given is before the analyzed statement") 80 self.indexinrawtext = indexinrawtext 81 # find the rightmost bracket to which index belongs 82 self.indexbracket = 0 83 while self.indexbracket < len(self.bracketing)-1 and \ 84 self.bracketing[self.indexbracket+1][0] < self.indexinrawtext: 85 self.indexbracket += 1 86 if self.indexbracket < len(self.bracketing)-1 and \ 87 self.bracketing[self.indexbracket+1][0] == self.indexinrawtext and \ 88 not self.isopener[self.indexbracket+1]: 89 self.indexbracket += 1 90 91 def is_in_string(self): 92 """Is the index given to the HyperParser is in a string?""" 93 # The bracket to which we belong should be an opener. 94 # If it's an opener, it has to have a character. 95 return self.isopener[self.indexbracket] and \ 96 self.rawtext[self.bracketing[self.indexbracket][0]] in ('"', "'") 97 98 def is_in_code(self): 99 """Is the index given to the HyperParser is in a normal code?""" 100 return not self.isopener[self.indexbracket] or \ 101 self.rawtext[self.bracketing[self.indexbracket][0]] not in \ 102 ('#', '"', "'") 103 104 def get_surrounding_brackets(self, openers='([{', mustclose=False): 105 """If the index given to the HyperParser is surrounded by a bracket 106 defined in openers (or at least has one before it), return the 107 indices of the opening bracket and the closing bracket (or the 108 end of line, whichever comes first). 109 If it is not surrounded by brackets, or the end of line comes before 110 the closing bracket and mustclose is True, returns None. 111 """ 112 bracketinglevel = self.bracketing[self.indexbracket][1] 113 before = self.indexbracket 114 while not self.isopener[before] or \ 115 self.rawtext[self.bracketing[before][0]] not in openers or \ 116 self.bracketing[before][1] > bracketinglevel: 117 before -= 1 118 if before < 0: 119 return None 120 bracketinglevel = min(bracketinglevel, self.bracketing[before][1]) 121 after = self.indexbracket + 1 122 while after < len(self.bracketing) and \ 123 self.bracketing[after][1] >= bracketinglevel: 124 after += 1 125 126 beforeindex = self.text.index("%s-%dc" % 127 (self.stopatindex, len(self.rawtext)-self.bracketing[before][0])) 128 if after >= len(self.bracketing) or \ 129 self.bracketing[after][0] > len(self.rawtext): 130 if mustclose: 131 return None 132 afterindex = self.stopatindex 133 else: 134 # We are after a real char, so it is a ')' and we give the index 135 # before it. 136 afterindex = self.text.index("%s-%dc" % 137 (self.stopatindex, 138 len(self.rawtext)-(self.bracketing[after][0]-1))) 139 140 return beforeindex, afterindex 141 142 # This string includes all chars that may be in a white space 143 _whitespace_chars = " \t\n\\" 144 # This string includes all chars that may be in an identifier 145 _id_chars = string.ascii_letters + string.digits + "_" 146 # This string includes all chars that may be the first char of an identifier 147 _id_first_chars = string.ascii_letters + "_" 148 149 # Given a string and pos, return the number of chars in the identifier 150 # which ends at pos, or 0 if there is no such one. Saved words are not 151 # identifiers. 152 def _eat_identifier(self, str, limit, pos): 153 i = pos 154 while i > limit and str[i-1] in self._id_chars: 155 i -= 1 156 if i < pos and (str[i] not in self._id_first_chars or \ 157 keyword.iskeyword(str[i:pos])): 158 i = pos 159 return pos - i 160 161 def get_expression(self): 162 """Return a string with the Python expression which ends at the given 163 index, which is empty if there is no real one. 164 """ 165 if not self.is_in_code(): 166 raise ValueError("get_expression should only be called if index "\ 167 "is inside a code.") 168 169 rawtext = self.rawtext 170 bracketing = self.bracketing 171 172 brck_index = self.indexbracket 173 brck_limit = bracketing[brck_index][0] 174 pos = self.indexinrawtext 175 176 last_identifier_pos = pos 177 postdot_phase = True 178 179 while 1: 180 # Eat whitespaces, comments, and if postdot_phase is False - one dot 181 while 1: 182 if pos>brck_limit and rawtext[pos-1] in self._whitespace_chars: 183 # Eat a whitespace 184 pos -= 1 185 elif not postdot_phase and \ 186 pos > brck_limit and rawtext[pos-1] == '.': 187 # Eat a dot 188 pos -= 1 189 postdot_phase = True 190 # The next line will fail if we are *inside* a comment, but we 191 # shouldn't be. 192 elif pos == brck_limit and brck_index > 0 and \ 193 rawtext[bracketing[brck_index-1][0]] == '#': 194 # Eat a comment 195 brck_index -= 2 196 brck_limit = bracketing[brck_index][0] 197 pos = bracketing[brck_index+1][0] 198 else: 199 # If we didn't eat anything, quit. 200 break 201 202 if not postdot_phase: 203 # We didn't find a dot, so the expression end at the last 204 # identifier pos. 205 break 206 207 ret = self._eat_identifier(rawtext, brck_limit, pos) 208 if ret: 209 # There is an identifier to eat 210 pos = pos - ret 211 last_identifier_pos = pos 212 # Now, in order to continue the search, we must find a dot. 213 postdot_phase = False 214 # (the loop continues now) 215 216 elif pos == brck_limit: 217 # We are at a bracketing limit. If it is a closing bracket, 218 # eat the bracket, otherwise, stop the search. 219 level = bracketing[brck_index][1] 220 while brck_index > 0 and bracketing[brck_index-1][1] > level: 221 brck_index -= 1 222 if bracketing[brck_index][0] == brck_limit: 223 # We were not at the end of a closing bracket 224 break 225 pos = bracketing[brck_index][0] 226 brck_index -= 1 227 brck_limit = bracketing[brck_index][0] 228 last_identifier_pos = pos 229 if rawtext[pos] in "([": 230 # [] and () may be used after an identifier, so we 231 # continue. postdot_phase is True, so we don't allow a dot. 232 pass 233 else: 234 # We can't continue after other types of brackets 235 if rawtext[pos] in "'\"": 236 # Scan a string prefix 237 while pos > 0 and rawtext[pos - 1] in "rRbBuU": 238 pos -= 1 239 last_identifier_pos = pos 240 break 241 242 else: 243 # We've found an operator or something. 244 break 245 246 return rawtext[last_identifier_pos:self.indexinrawtext] 247