1/* 2 * Copyright (C) 2009 Google Inc. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions are 6 * met: 7 * 8 * * Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * * Redistributions in binary form must reproduce the above 11 * copyright notice, this list of conditions and the following disclaimer 12 * in the documentation and/or other materials provided with the 13 * distribution. 14 * * Neither the name of Google Inc. nor the names of its 15 * contributors may be used to endorse or promote products derived from 16 * this software without specific prior written permission. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 19 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 20 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 21 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 22 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 23 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 24 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 25 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 26 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 28 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29 */ 30 31// Generate js file as follows: 32// 33// re2c -isc WebCore/inspector/front-end/SourceHTMLTokenizer.re2js \ 34// | sed 's|^yy\([^:]*\)*\:|case \1:|' \ 35// | sed 's|[*]cursor[+][+]|this._charAt(cursor++)|' \ 36// | sed 's|[[*][+][+]cursor|this._charAt(++cursor)|' \ 37// | sed 's|[*]cursor|this._charAt(cursor)|' \ 38// | sed 's|yych = \*\([^;]*\)|yych = this._charAt\1|' \ 39// | sed 's|goto case \([^;]*\)|{ gotoCase = \1; continue; }|' \ 40// | sed 's|unsigned\ int|var|' \ 41// | sed 's|var\ yych|case 1: var yych|' 42 43WebInspector.SourceHTMLTokenizer = function() 44{ 45 WebInspector.SourceTokenizer.call(this); 46 47 // The order is determined by the generated code. 48 this._lexConditions = { 49 INITIAL: 0, 50 COMMENT: 1, 51 DOCTYPE: 2, 52 TAG: 3, 53 DSTRING: 4, 54 SSTRING: 5 55 }; 56 this.case_INITIAL = 1000; 57 this.case_COMMENT = 1001; 58 this.case_DOCTYPE = 1002; 59 this.case_TAG = 1003; 60 this.case_DSTRING = 1004; 61 this.case_SSTRING = 1005; 62 63 this._parseConditions = { 64 INITIAL: 0, 65 ATTRIBUTE: 1, 66 ATTRIBUTE_VALUE: 2, 67 LINKIFY: 4, 68 A_NODE: 8, 69 SCRIPT: 16 70 }; 71 72 this.initialCondition = { lexCondition: this._lexConditions.INITIAL, parseCondition: this._parseConditions.INITIAL }; 73} 74 75WebInspector.SourceHTMLTokenizer.prototype = { 76 _isExpectingAttribute: function() 77 { 78 return this._parseCondition & this._parseConditions.ATTRIBUTE; 79 }, 80 81 _isExpectingAttributeValue: function() 82 { 83 return this._parseCondition & this._parseConditions.ATTRIBUTE_VALUE; 84 }, 85 86 _setExpectingAttribute: function() 87 { 88 if (this._isExpectingAttributeValue()) 89 this._parseCondition ^= this._parseConditions.ATTRIBUTE_VALUE; 90 this._parseCondition |= this._parseConditions.ATTRIBUTE; 91 }, 92 93 _setExpectingAttributeValue: function() 94 { 95 if (this._isExpectingAttribute()) 96 this._parseCondition ^= this._parseConditions.ATTRIBUTE; 97 this._parseCondition |= this._parseConditions.ATTRIBUTE_VALUE; 98 }, 99 100 _stringToken: function(cursor, stringEnds) 101 { 102 if (!this._isExpectingAttributeValue()) { 103 this.tokenType = null; 104 return cursor; 105 } 106 this.tokenType = this._attrValueTokenType(); 107 if (stringEnds) 108 this._setExpectingAttribute(); 109 return cursor; 110 }, 111 112 _attrValueTokenType: function() 113 { 114 if (this._parseCondition & this._parseConditions.LINKIFY) { 115 if (this._parseCondition & this._parseConditions.A_NODE) 116 return "html-external-link"; 117 return "html-resource-link"; 118 } 119 return "html-attribute-value"; 120 }, 121 122 nextToken: function(cursor) 123 { 124 var cursorOnEnter = cursor; 125 var gotoCase = 1; 126 while (1) { 127 switch (gotoCase) 128 // Following comment is replaced with generated state machine. 129 /*!re2c 130 re2c:define:YYCTYPE = "var"; 131 re2c:define:YYCURSOR = cursor; 132 re2c:define:YYGETCONDITION = "this.getLexCondition"; 133 re2c:define:YYSETCONDITION = "this.setLexCondition"; 134 re2c:condprefix = "case this.case_"; 135 re2c:condenumprefix = "this._lexConditions."; 136 re2c:yyfill:enable = 0; 137 re2c:labelprefix = "case "; 138 re2c:indent:top = 2; 139 re2c:indent:string = " "; 140 141 CommentContent = ([^-\r\n] | ("--" [^>]))*; 142 Comment = "<!--" CommentContent "-->"; 143 CommentStart = "<!--" CommentContent [\r\n]; 144 CommentEnd = CommentContent "-->"; 145 146 DocTypeStart = "<!" [Dd] [Oo] [Cc] [Tt] [Yy] [Pp] [Ee]; 147 DocTypeContent = [^\r\n>]*; 148 149 ScriptStart = "<" [Ss] [Cc] [Rr] [Ii] [Pp] [Tt]; 150 ScriptEnd = "</" [Ss] [Cc] [Rr] [Ii] [Pp] [Tt]; 151 152 LT = "<" | "</"; 153 GT = ">"; 154 EqualSign = "="; 155 156 DoubleStringContent = [^\r\n\"]*; 157 SingleStringContent = [^\r\n\']*; 158 StringLiteral = "\"" DoubleStringContent "\"" | "'" SingleStringContent "'"; 159 DoubleStringStart = "\"" DoubleStringContent [\r\n]; 160 DoubleStringEnd = DoubleStringContent "\""; 161 SingleStringStart = "'" SingleStringContent [\r\n]; 162 SingleStringEnd = SingleStringContent "'"; 163 164 Identifier = [^ \r\n"'<>\[\]=]+; 165 166 <INITIAL> Comment { this.tokenType = "html-comment"; return cursor; } 167 <INITIAL> CommentStart => COMMENT { this.tokenType = "html-comment"; return cursor; } 168 <COMMENT> CommentContent => COMMENT { this.tokenType = "html-comment"; return cursor; } 169 <COMMENT> CommentEnd => INITIAL { this.tokenType = "html-comment"; return cursor; } 170 171 <INITIAL> DocTypeStart => DOCTYPE { this.tokenType = "html-doctype"; return cursor; } 172 <DOCTYPE> DocTypeContent => DOCTYPE { this.tokenType = "html-doctype"; return cursor; } 173 <DOCTYPE> GT => INITIAL { this.tokenType = "html-doctype"; return cursor; } 174 175 <INITIAL> ScriptStart => TAG 176 { 177 this.tokenType = "html-tag"; 178 this._parseCondition = this._parseConditions.SCRIPT; 179 this._setExpectingAttribute(); 180 return cursor; 181 } 182 183 <INITIAL> ScriptEnd => TAG 184 { 185 this.tokenType = "html-tag"; 186 this._parseCondition = this._parseConditions.INITIAL; 187 return cursor; 188 } 189 190 <INITIAL> LT => TAG 191 { 192 if (this._parseCondition & this._parseConditions.SCRIPT) { 193 // Do not tokenize script tag contents, keep lexer state although processing "<". 194 this.setLexCondition(this._lexConditions.INITIAL); 195 this.tokenType = null; 196 return cursor; 197 } 198 199 this._parseCondition = this._parseConditions.INITIAL; 200 this.tokenType = "html-tag"; 201 return cursor; 202 } 203 204 <TAG> GT => INITIAL 205 { 206 if (this._parseCondition & this._parseConditions.SCRIPT) { 207 // Do not tokenize script tag contents. 208 this.tokenType = null; 209 return cursor; 210 } 211 212 this._parseCondition = this._parseConditions.INITIAL; 213 this.tokenType = "html-tag"; 214 return cursor; 215 } 216 217 <TAG> StringLiteral { return this._stringToken(cursor, true); } 218 <TAG> DoubleStringStart => DSTRING { return this._stringToken(cursor); } 219 <DSTRING> DoubleStringContent => DSTRING { return this._stringToken(cursor); } 220 <DSTRING> DoubleStringEnd => TAG { return this._stringToken(cursor, true); } 221 <TAG> SingleStringStart => SSTRING { return this._stringToken(cursor); } 222 <SSTRING> SingleStringContent => SSTRING { return this._stringToken(cursor); } 223 <SSTRING> SingleStringEnd => TAG { return this._stringToken(cursor, true); } 224 225 <TAG> EqualSign => TAG 226 { 227 if (this._isExpectingAttribute()) 228 this._setExpectingAttributeValue(); 229 this.tokenType = null; 230 return cursor; 231 } 232 233 <TAG> Identifier 234 { 235 if (this._parseCondition === this._parseConditions.SCRIPT) { 236 // Fall through if expecting attributes. 237 this.tokenType = null; 238 return cursor; 239 } 240 241 if (this._parseCondition === this._parseConditions.INITIAL) { 242 this.tokenType = "html-tag"; 243 this._setExpectingAttribute(); 244 var token = this._line.substring(cursorOnEnter, cursor); 245 if (token === "a") 246 this._parseCondition |= this._parseConditions.A_NODE; 247 else if (this._parseCondition & this._parseConditions.A_NODE) 248 this._parseCondition ^= this._parseConditions.A_NODE; 249 } else if (this._isExpectingAttribute()) { 250 var token = this._line.substring(cursorOnEnter, cursor); 251 if (token === "href" || token === "src") 252 this._parseCondition |= this._parseConditions.LINKIFY; 253 else if (this._parseCondition |= this._parseConditions.LINKIFY) 254 this._parseCondition ^= this._parseConditions.LINKIFY; 255 this.tokenType = "html-attribute-name"; 256 } else if (this._isExpectingAttributeValue()) 257 this.tokenType = this._attrValueTokenType(); 258 else 259 this.tokenType = null; 260 return cursor; 261 } 262 <*> [^] { this.tokenType = null; return cursor; } 263 */ 264 } 265 } 266} 267 268WebInspector.SourceHTMLTokenizer.prototype.__proto__ = WebInspector.SourceTokenizer.prototype; 269