1"""Provide advanced parsing abilities for ParenMatch and other extensions.
2
3HyperParser uses PyParser.  PyParser mostly gives information on the
4proper indentation of code.  HyperParser gives additional information on
5the structure of code.
6"""
7
8import string
9import keyword
10from idlelib import PyParse
11
12class HyperParser:
13
14    def __init__(self, editwin, index):
15        "To initialize, analyze the surroundings of the given index."
16
17        self.editwin = editwin
18        self.text = text = editwin.text
19
20        parser = PyParse.Parser(editwin.indentwidth, editwin.tabwidth)
21
22        def index2line(index):
23            return int(float(index))
24        lno = index2line(text.index(index))
25
26        if not editwin.context_use_ps1:
27            for context in editwin.num_context_lines:
28                startat = max(lno - context, 1)
29                startatindex = repr(startat) + ".0"
30                stopatindex = "%d.end" % lno
31                # We add the newline because PyParse requires a newline
32                # at end. We add a space so that index won't be at end
33                # of line, so that its status will be the same as the
34                # char before it, if should.
35                parser.set_str(text.get(startatindex, stopatindex)+' \n')
36                bod = parser.find_good_parse_start(
37                          editwin._build_char_in_string_func(startatindex))
38                if bod is not None or startat == 1:
39                    break
40            parser.set_lo(bod or 0)
41        else:
42            r = text.tag_prevrange("console", index)
43            if r:
44                startatindex = r[1]
45            else:
46                startatindex = "1.0"
47            stopatindex = "%d.end" % lno
48            # We add the newline because PyParse requires it. We add a
49            # space so that index won't be at end of line, so that its
50            # status will be the same as the char before it, if should.
51            parser.set_str(text.get(startatindex, stopatindex)+' \n')
52            parser.set_lo(0)
53
54        # We want what the parser has, minus the last newline and space.
55        self.rawtext = parser.str[:-2]
56        # Parser.str apparently preserves the statement we are in, so
57        # that stopatindex can be used to synchronize the string with
58        # the text box indices.
59        self.stopatindex = stopatindex
60        self.bracketing = parser.get_last_stmt_bracketing()
61        # find which pairs of bracketing are openers. These always
62        # correspond to a character of rawtext.
63        self.isopener = [i>0 and self.bracketing[i][1] >
64                         self.bracketing[i-1][1]
65                         for i in range(len(self.bracketing))]
66
67        self.set_index(index)
68
69    def set_index(self, index):
70        """Set the index to which the functions relate.
71
72        The index must be in the same statement.
73        """
74        indexinrawtext = (len(self.rawtext) -
75                          len(self.text.get(index, self.stopatindex)))
76        if indexinrawtext < 0:
77            raise ValueError("Index %s precedes the analyzed statement"
78                             % index)
79        self.indexinrawtext = indexinrawtext
80        # find the rightmost bracket to which index belongs
81        self.indexbracket = 0
82        while (self.indexbracket < len(self.bracketing)-1 and
83               self.bracketing[self.indexbracket+1][0] < self.indexinrawtext):
84            self.indexbracket += 1
85        if (self.indexbracket < len(self.bracketing)-1 and
86            self.bracketing[self.indexbracket+1][0] == self.indexinrawtext and
87           not self.isopener[self.indexbracket+1]):
88            self.indexbracket += 1
89
90    def is_in_string(self):
91        """Is the index given to the HyperParser in a string?"""
92        # The bracket to which we belong should be an opener.
93        # If it's an opener, it has to have a character.
94        return (self.isopener[self.indexbracket] and
95                self.rawtext[self.bracketing[self.indexbracket][0]]
96                in ('"', "'"))
97
98    def is_in_code(self):
99        """Is the index given to the HyperParser in normal code?"""
100        return (not self.isopener[self.indexbracket] or
101                self.rawtext[self.bracketing[self.indexbracket][0]]
102                not in ('#', '"', "'"))
103
104    def get_surrounding_brackets(self, openers='([{', mustclose=False):
105        """Return bracket indexes or None.
106
107        If the index given to the HyperParser is surrounded by a
108        bracket defined in openers (or at least has one before it),
109        return the indices of the opening bracket and the closing
110        bracket (or the end of line, whichever comes first).
111
112        If it is not surrounded by brackets, or the end of line comes
113        before the closing bracket and mustclose is True, returns None.
114        """
115
116        bracketinglevel = self.bracketing[self.indexbracket][1]
117        before = self.indexbracket
118        while (not self.isopener[before] or
119              self.rawtext[self.bracketing[before][0]] not in openers or
120              self.bracketing[before][1] > bracketinglevel):
121            before -= 1
122            if before < 0:
123                return None
124            bracketinglevel = min(bracketinglevel, self.bracketing[before][1])
125        after = self.indexbracket + 1
126        while (after < len(self.bracketing) and
127              self.bracketing[after][1] >= bracketinglevel):
128            after += 1
129
130        beforeindex = self.text.index("%s-%dc" %
131            (self.stopatindex, len(self.rawtext)-self.bracketing[before][0]))
132        if (after >= len(self.bracketing) or
133           self.bracketing[after][0] > len(self.rawtext)):
134            if mustclose:
135                return None
136            afterindex = self.stopatindex
137        else:
138            # We are after a real char, so it is a ')' and we give the
139            # index before it.
140            afterindex = self.text.index(
141                "%s-%dc" % (self.stopatindex,
142                 len(self.rawtext)-(self.bracketing[after][0]-1)))
143
144        return beforeindex, afterindex
145
146    # Ascii chars that may be in a white space
147    _whitespace_chars = " \t\n\\"
148    # Ascii chars that may be in an identifier
149    _id_chars = string.ascii_letters + string.digits + "_"
150    # Ascii chars that may be the first char of an identifier
151    _id_first_chars = string.ascii_letters + "_"
152
153    # Given a string and pos, return the number of chars in the
154    # identifier which ends at pos, or 0 if there is no such one. Saved
155    # words are not identifiers.
156    def _eat_identifier(self, str, limit, pos):
157        i = pos
158        while i > limit and str[i-1] in self._id_chars:
159            i -= 1
160        if (i < pos and (str[i] not in self._id_first_chars or
161            keyword.iskeyword(str[i:pos]))):
162            i = pos
163        return pos - i
164
165    def get_expression(self):
166        """Return a string with the Python expression which ends at the
167        given index, which is empty if there is no real one.
168        """
169        if not self.is_in_code():
170            raise ValueError("get_expression should only be called"
171                             "if index is inside a code.")
172
173        rawtext = self.rawtext
174        bracketing = self.bracketing
175
176        brck_index = self.indexbracket
177        brck_limit = bracketing[brck_index][0]
178        pos = self.indexinrawtext
179
180        last_identifier_pos = pos
181        postdot_phase = True
182
183        while 1:
184            # Eat whitespaces, comments, and if postdot_phase is False - a dot
185            while 1:
186                if pos>brck_limit and rawtext[pos-1] in self._whitespace_chars:
187                    # Eat a whitespace
188                    pos -= 1
189                elif (not postdot_phase and
190                      pos > brck_limit and rawtext[pos-1] == '.'):
191                    # Eat a dot
192                    pos -= 1
193                    postdot_phase = True
194                # The next line will fail if we are *inside* a comment,
195                # but we shouldn't be.
196                elif (pos == brck_limit and brck_index > 0 and
197                      rawtext[bracketing[brck_index-1][0]] == '#'):
198                    # Eat a comment
199                    brck_index -= 2
200                    brck_limit = bracketing[brck_index][0]
201                    pos = bracketing[brck_index+1][0]
202                else:
203                    # If we didn't eat anything, quit.
204                    break
205
206            if not postdot_phase:
207                # We didn't find a dot, so the expression end at the
208                # last identifier pos.
209                break
210
211            ret = self._eat_identifier(rawtext, brck_limit, pos)
212            if ret:
213                # There is an identifier to eat
214                pos = pos - ret
215                last_identifier_pos = pos
216                # Now, to continue the search, we must find a dot.
217                postdot_phase = False
218                # (the loop continues now)
219
220            elif pos == brck_limit:
221                # We are at a bracketing limit. If it is a closing
222                # bracket, eat the bracket, otherwise, stop the search.
223                level = bracketing[brck_index][1]
224                while brck_index > 0 and bracketing[brck_index-1][1] > level:
225                    brck_index -= 1
226                if bracketing[brck_index][0] == brck_limit:
227                    # We were not at the end of a closing bracket
228                    break
229                pos = bracketing[brck_index][0]
230                brck_index -= 1
231                brck_limit = bracketing[brck_index][0]
232                last_identifier_pos = pos
233                if rawtext[pos] in "([":
234                    # [] and () may be used after an identifier, so we
235                    # continue. postdot_phase is True, so we don't allow a dot.
236                    pass
237                else:
238                    # We can't continue after other types of brackets
239                    if rawtext[pos] in "'\"":
240                        # Scan a string prefix
241                        while pos > 0 and rawtext[pos - 1] in "rRbBuU":
242                            pos -= 1
243                        last_identifier_pos = pos
244                    break
245
246            else:
247                # We've found an operator or something.
248                break
249
250        return rawtext[last_identifier_pos:self.indexinrawtext]
251
252
253if __name__ == '__main__':
254    import unittest
255    unittest.main('idlelib.idle_test.test_hyperparser', verbosity=2)
256