1/*
2 * Copyright (C) 2009 Google Inc. All rights reserved.
3 *
4 * Redistribution and use in source and binary forms, with or without
5 * modification, are permitted provided that the following conditions are
6 * met:
7 *
8 *     * Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 *     * Redistributions in binary form must reproduce the above
11 * copyright notice, this list of conditions and the following disclaimer
12 * in the documentation and/or other materials provided with the
13 * distribution.
14 *     * Neither the name of Google Inc. nor the names of its
15 * contributors may be used to endorse or promote products derived from
16 * this software without specific prior written permission.
17 *
18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29 */
30
31// Generate js file as follows:
32//
33// re2c -isc WebCore/inspector/front-end/SourceHTMLTokenizer.re2js \
34// | sed 's|^yy\([^:]*\)*\:|case \1:|' \
35// | sed 's|[*]cursor[+][+]|this._charAt(cursor++)|' \
36// | sed 's|[[*][+][+]cursor|this._charAt(++cursor)|' \
37// | sed 's|[*]cursor|this._charAt(cursor)|' \
38// | sed 's|yych = \*\([^;]*\)|yych = this._charAt\1|' \
39// | sed 's|goto case \([^;]*\)|{ gotoCase = \1; continue; }|' \
40// | sed 's|unsigned\ int|var|' \
41// | sed 's|var\ yych|case 1: var yych|'
42
43WebInspector.SourceHTMLTokenizer = function()
44{
45    WebInspector.SourceTokenizer.call(this);
46
47    // The order is determined by the generated code.
48    this._lexConditions = {
49        INITIAL: 0,
50        COMMENT: 1,
51        DOCTYPE: 2,
52        TAG: 3,
53        DSTRING: 4,
54        SSTRING: 5
55    };
56    this.case_INITIAL = 1000;
57    this.case_COMMENT = 1001;
58    this.case_DOCTYPE = 1002;
59    this.case_TAG = 1003;
60    this.case_DSTRING = 1004;
61    this.case_SSTRING = 1005;
62
63    this._parseConditions = {
64        INITIAL: 0,
65        ATTRIBUTE: 1,
66        ATTRIBUTE_VALUE: 2,
67        LINKIFY: 4,
68        A_NODE: 8,
69        SCRIPT: 16
70    };
71
72    this.initialCondition = { lexCondition: this._lexConditions.INITIAL, parseCondition: this._parseConditions.INITIAL };
73}
74
75WebInspector.SourceHTMLTokenizer.prototype = {
76    _isExpectingAttribute: function()
77    {
78        return this._parseCondition & this._parseConditions.ATTRIBUTE;
79    },
80
81    _isExpectingAttributeValue: function()
82    {
83        return this._parseCondition & this._parseConditions.ATTRIBUTE_VALUE;
84    },
85
86    _setExpectingAttribute: function()
87    {
88        if (this._isExpectingAttributeValue())
89            this._parseCondition ^= this._parseConditions.ATTRIBUTE_VALUE;
90        this._parseCondition |= this._parseConditions.ATTRIBUTE;
91    },
92
93    _setExpectingAttributeValue: function()
94    {
95        if (this._isExpectingAttribute())
96            this._parseCondition ^= this._parseConditions.ATTRIBUTE;
97        this._parseCondition |= this._parseConditions.ATTRIBUTE_VALUE;
98    },
99
100    _stringToken: function(cursor, stringEnds)
101    {
102        if (!this._isExpectingAttributeValue()) {
103            this.tokenType = null;
104            return cursor;
105        }
106        this.tokenType = this._attrValueTokenType();
107        if (stringEnds)
108            this._setExpectingAttribute();
109        return cursor;
110    },
111
112    _attrValueTokenType: function()
113    {
114        if (this._parseCondition & this._parseConditions.LINKIFY) {
115            if (this._parseCondition & this._parseConditions.A_NODE)
116                return "html-external-link";
117            return "html-resource-link";
118        }
119        return "html-attribute-value";
120    },
121
122    nextToken: function(cursor)
123    {
124        var cursorOnEnter = cursor;
125        var gotoCase = 1;
126        while (1) {
127            switch (gotoCase)
128            // Following comment is replaced with generated state machine.
129            /*!re2c
130                re2c:define:YYCTYPE  = "var";
131                re2c:define:YYCURSOR = cursor;
132                re2c:define:YYGETCONDITION = "this.getLexCondition";
133                re2c:define:YYSETCONDITION = "this.setLexCondition";
134                re2c:condprefix = "case this.case_";
135                re2c:condenumprefix = "this._lexConditions.";
136                re2c:yyfill:enable = 0;
137                re2c:labelprefix = "case ";
138                re2c:indent:top = 2;
139                re2c:indent:string = "    ";
140
141                CommentContent = ([^-\r\n] | ("--" [^>]))*;
142                Comment = "<!--" CommentContent "-->";
143                CommentStart = "<!--" CommentContent [\r\n];
144                CommentEnd = CommentContent "-->";
145
146                DocTypeStart = "<!" [Dd] [Oo] [Cc] [Tt] [Yy] [Pp] [Ee];
147                DocTypeContent = [^\r\n>]*;
148
149                ScriptStart = "<" [Ss] [Cc] [Rr] [Ii] [Pp] [Tt];
150                ScriptEnd = "</" [Ss] [Cc] [Rr] [Ii] [Pp] [Tt];
151
152                LT = "<" | "</";
153                GT = ">";
154                EqualSign = "=";
155
156                DoubleStringContent = [^\r\n\"]*;
157                SingleStringContent = [^\r\n\']*;
158                StringLiteral = "\"" DoubleStringContent "\"" | "'" SingleStringContent "'";
159                DoubleStringStart = "\"" DoubleStringContent [\r\n];
160                DoubleStringEnd = DoubleStringContent "\"";
161                SingleStringStart = "'" SingleStringContent [\r\n];
162                SingleStringEnd = SingleStringContent "'";
163
164                Identifier = [^ \r\n"'<>\[\]=]+;
165
166                <INITIAL> Comment { this.tokenType = "html-comment"; return cursor; }
167                <INITIAL> CommentStart => COMMENT { this.tokenType = "html-comment"; return cursor; }
168                <COMMENT> CommentContent => COMMENT { this.tokenType = "html-comment"; return cursor; }
169                <COMMENT> CommentEnd => INITIAL { this.tokenType = "html-comment"; return cursor; }
170
171                <INITIAL> DocTypeStart => DOCTYPE { this.tokenType = "html-doctype"; return cursor; }
172                <DOCTYPE> DocTypeContent => DOCTYPE { this.tokenType = "html-doctype"; return cursor; }
173                <DOCTYPE> GT => INITIAL { this.tokenType = "html-doctype"; return cursor; }
174
175                <INITIAL> ScriptStart => TAG
176                {
177                    this.tokenType = "html-tag";
178                    this._parseCondition = this._parseConditions.SCRIPT;
179                    this._setExpectingAttribute();
180                    return cursor;
181                }
182
183                <INITIAL> ScriptEnd => TAG
184                {
185                    this.tokenType = "html-tag";
186                    this._parseCondition = this._parseConditions.INITIAL;
187                    return cursor;
188                }
189
190                <INITIAL> LT => TAG
191                {
192                    if (this._parseCondition & this._parseConditions.SCRIPT) {
193                        // Do not tokenize script tag contents, keep lexer state although processing "<".
194                        this.setLexCondition(this._lexConditions.INITIAL);
195                        this.tokenType = null;
196                        return cursor;
197                    }
198
199                    this._parseCondition = this._parseConditions.INITIAL;
200                    this.tokenType = "html-tag";
201                    return cursor;
202                }
203
204                <TAG> GT => INITIAL
205                {
206                    if (this._parseCondition & this._parseConditions.SCRIPT) {
207                        // Do not tokenize script tag contents.
208                        this.tokenType = null;
209                        return cursor;
210                    }
211
212                    this._parseCondition = this._parseConditions.INITIAL;
213                    this.tokenType = "html-tag";
214                    return cursor;
215                }
216
217                <TAG> StringLiteral { return this._stringToken(cursor, true); }
218                <TAG> DoubleStringStart => DSTRING { return this._stringToken(cursor); }
219                <DSTRING> DoubleStringContent => DSTRING { return this._stringToken(cursor); }
220                <DSTRING> DoubleStringEnd => TAG { return this._stringToken(cursor, true); }
221                <TAG> SingleStringStart => SSTRING { return this._stringToken(cursor); }
222                <SSTRING> SingleStringContent => SSTRING { return this._stringToken(cursor); }
223                <SSTRING> SingleStringEnd => TAG { return this._stringToken(cursor, true); }
224
225                <TAG> EqualSign => TAG
226                {
227                    if (this._isExpectingAttribute())
228                        this._setExpectingAttributeValue();
229                    this.tokenType = null;
230                    return cursor;
231                }
232
233                <TAG> Identifier
234                {
235                    if (this._parseCondition === this._parseConditions.SCRIPT) {
236                        // Fall through if expecting attributes.
237                        this.tokenType = null;
238                        return cursor;
239                    }
240
241                    if (this._parseCondition === this._parseConditions.INITIAL) {
242                        this.tokenType = "html-tag";
243                        this._setExpectingAttribute();
244                        var token = this._line.substring(cursorOnEnter, cursor);
245                        if (token === "a")
246                            this._parseCondition |= this._parseConditions.A_NODE;
247                        else if (this._parseCondition & this._parseConditions.A_NODE)
248                            this._parseCondition ^= this._parseConditions.A_NODE;
249                    } else if (this._isExpectingAttribute()) {
250                        var token = this._line.substring(cursorOnEnter, cursor);
251                        if (token === "href" || token === "src")
252                            this._parseCondition |= this._parseConditions.LINKIFY;
253                        else if (this._parseCondition |= this._parseConditions.LINKIFY)
254                            this._parseCondition ^= this._parseConditions.LINKIFY;
255                        this.tokenType = "html-attribute-name";
256                    } else if (this._isExpectingAttributeValue())
257                        this.tokenType = this._attrValueTokenType();
258                    else
259                        this.tokenType = null;
260                    return cursor;
261                }
262                <*> [^] { this.tokenType = null; return cursor; }
263            */
264        }
265    }
266}
267
268WebInspector.SourceHTMLTokenizer.prototype.__proto__ = WebInspector.SourceTokenizer.prototype;
269