1"""
2HyperParser
3===========
4This module defines the HyperParser class, which provides advanced parsing
5abilities for the ParenMatch and other extensions.
6The HyperParser uses PyParser. PyParser is intended mostly to give information
7on the proper indentation of code. HyperParser gives some information on the
8structure of code, used by extensions to help the user.
9"""
10
11import string
12import keyword
13from idlelib import PyParse
14
15class HyperParser:
16
17    def __init__(self, editwin, index):
18        """Initialize the HyperParser to analyze the surroundings of the given
19        index.
20        """
21
22        self.editwin = editwin
23        self.text = text = editwin.text
24
25        parser = PyParse.Parser(editwin.indentwidth, editwin.tabwidth)
26
27        def index2line(index):
28            return int(float(index))
29        lno = index2line(text.index(index))
30
31        if not editwin.context_use_ps1:
32            for context in editwin.num_context_lines:
33                startat = max(lno - context, 1)
34                startatindex = repr(startat) + ".0"
35                stopatindex = "%d.end" % lno
36                # We add the newline because PyParse requires a newline at end.
37                # We add a space so that index won't be at end of line, so that
38                # its status will be the same as the char before it, if should.
39                parser.set_str(text.get(startatindex, stopatindex)+' \n')
40                bod = parser.find_good_parse_start(
41                          editwin._build_char_in_string_func(startatindex))
42                if bod is not None or startat == 1:
43                    break
44            parser.set_lo(bod or 0)
45        else:
46            r = text.tag_prevrange("console", index)
47            if r:
48                startatindex = r[1]
49            else:
50                startatindex = "1.0"
51            stopatindex = "%d.end" % lno
52            # We add the newline because PyParse requires a newline at end.
53            # We add a space so that index won't be at end of line, so that
54            # its status will be the same as the char before it, if should.
55            parser.set_str(text.get(startatindex, stopatindex)+' \n')
56            parser.set_lo(0)
57
58        # We want what the parser has, except for the last newline and space.
59        self.rawtext = parser.str[:-2]
60        # As far as I can see, parser.str preserves the statement we are in,
61        # so that stopatindex can be used to synchronize the string with the
62        # text box indices.
63        self.stopatindex = stopatindex
64        self.bracketing = parser.get_last_stmt_bracketing()
65        # find which pairs of bracketing are openers. These always correspond
66        # to a character of rawtext.
67        self.isopener = [i>0 and self.bracketing[i][1] > self.bracketing[i-1][1]
68                         for i in range(len(self.bracketing))]
69
70        self.set_index(index)
71
72    def set_index(self, index):
73        """Set the index to which the functions relate. Note that it must be
74        in the same statement.
75        """
76        indexinrawtext = \
77            len(self.rawtext) - len(self.text.get(index, self.stopatindex))
78        if indexinrawtext < 0:
79            raise ValueError("The index given is before the analyzed statement")
80        self.indexinrawtext = indexinrawtext
81        # find the rightmost bracket to which index belongs
82        self.indexbracket = 0
83        while self.indexbracket < len(self.bracketing)-1 and \
84              self.bracketing[self.indexbracket+1][0] < self.indexinrawtext:
85            self.indexbracket += 1
86        if self.indexbracket < len(self.bracketing)-1 and \
87           self.bracketing[self.indexbracket+1][0] == self.indexinrawtext and \
88           not self.isopener[self.indexbracket+1]:
89            self.indexbracket += 1
90
91    def is_in_string(self):
92        """Is the index given to the HyperParser is in a string?"""
93        # The bracket to which we belong should be an opener.
94        # If it's an opener, it has to have a character.
95        return self.isopener[self.indexbracket] and \
96               self.rawtext[self.bracketing[self.indexbracket][0]] in ('"', "'")
97
98    def is_in_code(self):
99        """Is the index given to the HyperParser is in a normal code?"""
100        return not self.isopener[self.indexbracket] or \
101               self.rawtext[self.bracketing[self.indexbracket][0]] not in \
102                                                                ('#', '"', "'")
103
104    def get_surrounding_brackets(self, openers='([{', mustclose=False):
105        """If the index given to the HyperParser is surrounded by a bracket
106        defined in openers (or at least has one before it), return the
107        indices of the opening bracket and the closing bracket (or the
108        end of line, whichever comes first).
109        If it is not surrounded by brackets, or the end of line comes before
110        the closing bracket and mustclose is True, returns None.
111        """
112        bracketinglevel = self.bracketing[self.indexbracket][1]
113        before = self.indexbracket
114        while not self.isopener[before] or \
115              self.rawtext[self.bracketing[before][0]] not in openers or \
116              self.bracketing[before][1] > bracketinglevel:
117            before -= 1
118            if before < 0:
119                return None
120            bracketinglevel = min(bracketinglevel, self.bracketing[before][1])
121        after = self.indexbracket + 1
122        while after < len(self.bracketing) and \
123              self.bracketing[after][1] >= bracketinglevel:
124            after += 1
125
126        beforeindex = self.text.index("%s-%dc" %
127            (self.stopatindex, len(self.rawtext)-self.bracketing[before][0]))
128        if after >= len(self.bracketing) or \
129           self.bracketing[after][0] > len(self.rawtext):
130            if mustclose:
131                return None
132            afterindex = self.stopatindex
133        else:
134            # We are after a real char, so it is a ')' and we give the index
135            # before it.
136            afterindex = self.text.index("%s-%dc" %
137                (self.stopatindex,
138                 len(self.rawtext)-(self.bracketing[after][0]-1)))
139
140        return beforeindex, afterindex
141
142    # This string includes all chars that may be in a white space
143    _whitespace_chars = " \t\n\\"
144    # This string includes all chars that may be in an identifier
145    _id_chars = string.ascii_letters + string.digits + "_"
146    # This string includes all chars that may be the first char of an identifier
147    _id_first_chars = string.ascii_letters + "_"
148
149    # Given a string and pos, return the number of chars in the identifier
150    # which ends at pos, or 0 if there is no such one. Saved words are not
151    # identifiers.
152    def _eat_identifier(self, str, limit, pos):
153        i = pos
154        while i > limit and str[i-1] in self._id_chars:
155            i -= 1
156        if i < pos and (str[i] not in self._id_first_chars or \
157                        keyword.iskeyword(str[i:pos])):
158            i = pos
159        return pos - i
160
161    def get_expression(self):
162        """Return a string with the Python expression which ends at the given
163        index, which is empty if there is no real one.
164        """
165        if not self.is_in_code():
166            raise ValueError("get_expression should only be called if index "\
167                             "is inside a code.")
168
169        rawtext = self.rawtext
170        bracketing = self.bracketing
171
172        brck_index = self.indexbracket
173        brck_limit = bracketing[brck_index][0]
174        pos = self.indexinrawtext
175
176        last_identifier_pos = pos
177        postdot_phase = True
178
179        while 1:
180            # Eat whitespaces, comments, and if postdot_phase is False - one dot
181            while 1:
182                if pos>brck_limit and rawtext[pos-1] in self._whitespace_chars:
183                    # Eat a whitespace
184                    pos -= 1
185                elif not postdot_phase and \
186                     pos > brck_limit and rawtext[pos-1] == '.':
187                    # Eat a dot
188                    pos -= 1
189                    postdot_phase = True
190                # The next line will fail if we are *inside* a comment, but we
191                # shouldn't be.
192                elif pos == brck_limit and brck_index > 0 and \
193                     rawtext[bracketing[brck_index-1][0]] == '#':
194                    # Eat a comment
195                    brck_index -= 2
196                    brck_limit = bracketing[brck_index][0]
197                    pos = bracketing[brck_index+1][0]
198                else:
199                    # If we didn't eat anything, quit.
200                    break
201
202            if not postdot_phase:
203                # We didn't find a dot, so the expression end at the last
204                # identifier pos.
205                break
206
207            ret = self._eat_identifier(rawtext, brck_limit, pos)
208            if ret:
209                # There is an identifier to eat
210                pos = pos - ret
211                last_identifier_pos = pos
212                # Now, in order to continue the search, we must find a dot.
213                postdot_phase = False
214                # (the loop continues now)
215
216            elif pos == brck_limit:
217                # We are at a bracketing limit. If it is a closing bracket,
218                # eat the bracket, otherwise, stop the search.
219                level = bracketing[brck_index][1]
220                while brck_index > 0 and bracketing[brck_index-1][1] > level:
221                    brck_index -= 1
222                if bracketing[brck_index][0] == brck_limit:
223                    # We were not at the end of a closing bracket
224                    break
225                pos = bracketing[brck_index][0]
226                brck_index -= 1
227                brck_limit = bracketing[brck_index][0]
228                last_identifier_pos = pos
229                if rawtext[pos] in "([":
230                    # [] and () may be used after an identifier, so we
231                    # continue. postdot_phase is True, so we don't allow a dot.
232                    pass
233                else:
234                    # We can't continue after other types of brackets
235                    if rawtext[pos] in "'\"":
236                        # Scan a string prefix
237                        while pos > 0 and rawtext[pos - 1] in "rRbBuU":
238                            pos -= 1
239                        last_identifier_pos = pos
240                    break
241
242            else:
243                # We've found an operator or something.
244                break
245
246        return rawtext[last_identifier_pos:self.indexinrawtext]
247