18ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar/* 28ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar [The "BSD licence"] 38ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar Copyright (c) 2013 Terence Parr 48ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar All rights reserved. 58ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar 68ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar Redistribution and use in source and binary forms, with or without 78ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar modification, are permitted provided that the following conditions 88ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar are met: 98ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar 1. Redistributions of source code must retain the above copyright 108ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar notice, this list of conditions and the following disclaimer. 118ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar 2. Redistributions in binary form must reproduce the above copyright 128ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar notice, this list of conditions and the following disclaimer in the 138ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar documentation and/or other materials provided with the distribution. 148ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar 3. The name of the author may not be used to endorse or promote products 158ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar derived from this software without specific prior written permission. 168ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar 178ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 188ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 198ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 208ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 218ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 228ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 238ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 248ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 258ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 268ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 278ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar*/ 288ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar 298ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar/** XML lexer derived from ANTLR v4 ref guide book example */ 308ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyarlexer grammar XMLLexer; 318ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar 328ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar// Default "mode": Everything OUTSIDE of a tag 338ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit BoyarCOMMENT : '<!--' .*? '-->' ; 348ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit BoyarCDATA : '<![CDATA[' .*? ']]>' ; 358ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar/** Scarf all DTD stuff, Entity Declarations like <!ENTITY ...>, 368ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar * and Notation Declarations <!NOTATION ...> 378ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar */ 388ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit BoyarDTD : '<!' .*? '>' -> skip ; 398ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit BoyarEntityRef : '&' Name ';' ; 408ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit BoyarCharRef : '&#' DIGIT+ ';' 418ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar | '&#x' HEXDIGIT+ ';' 428ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar ; 438ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit BoyarSEA_WS : (' '|'\t'|'\r'? '\n')+ ; 448ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar 458ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit BoyarOPEN : '<' -> pushMode(INSIDE) ; 468ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit BoyarXMLDeclOpen : '<?xml' S -> pushMode(INSIDE) ; 478ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit BoyarSPECIAL_OPEN: '<?' Name -> more, pushMode(PROC_INSTR) ; 488ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar 498ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit BoyarTEXT : ~[<&]+ ; // match any 16 bit char other than < and & 508ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar 518ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar// ----------------- Everything INSIDE of a tag --------------------- 528ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyarmode INSIDE; 538ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar 548ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit BoyarCLOSE : '>' -> popMode ; 558ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit BoyarSPECIAL_CLOSE: '?>' -> popMode ; // close <?xml...?> 568ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit BoyarSLASH_CLOSE : '/>' -> popMode ; 578ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit BoyarSLASH : '/' ; 588ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit BoyarEQUALS : '=' ; 598ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit BoyarSTRING : '"' ~[<"]* '"' 608ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar | '\'' ~[<']* '\'' 618ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar ; 628ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit BoyarName : NameStartChar NameChar* ; 638ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit BoyarS : [ \t\r\n] -> skip ; 648ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar 658ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyarfragment 668ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit BoyarHEXDIGIT : [a-fA-F0-9] ; 678ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar 688ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyarfragment 698ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit BoyarDIGIT : [0-9] ; 708ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar 718ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyarfragment 728ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit BoyarNameChar : NameStartChar 738ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar | '-' | '_' | '.' | DIGIT 748ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar | '\u00B7' 758ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar | '\u0300'..'\u036F' 768ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar | '\u203F'..'\u2040' 778ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar ; 788ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar 798ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyarfragment 808ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit BoyarNameStartChar 818ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar : [:a-zA-Z] 828ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar | '\u2070'..'\u218F' 838ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar | '\u2C00'..'\u2FEF' 848ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar | '\u3001'..'\uD7FF' 858ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar | '\uF900'..'\uFDCF' 868ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar | '\uFDF0'..'\uFFFD' 878ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar ; 888ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar 898ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar// ----------------- Handle <? ... ?> --------------------- 908ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyarmode PROC_INSTR; 918ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar 928ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit BoyarPI : '?>' -> popMode ; // close <?...?> 938ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit BoyarIGNORE : . -> more ; 94