18ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar/*
28ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar [The "BSD licence"]
38ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar Copyright (c) 2013 Terence Parr
48ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar All rights reserved.
58ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar
68ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar Redistribution and use in source and binary forms, with or without
78ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar modification, are permitted provided that the following conditions
88ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar are met:
98ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar 1. Redistributions of source code must retain the above copyright
108ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar    notice, this list of conditions and the following disclaimer.
118ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar 2. Redistributions in binary form must reproduce the above copyright
128ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar    notice, this list of conditions and the following disclaimer in the
138ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar    documentation and/or other materials provided with the distribution.
148ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar 3. The name of the author may not be used to endorse or promote products
158ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar    derived from this software without specific prior written permission.
168ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar
178ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
188ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
198ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
208ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
218ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
228ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
238ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
248ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
258ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
268ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
278ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar*/
288ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar
298ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar/** XML lexer derived from ANTLR v4 ref guide book example */
308ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyarlexer grammar XMLLexer;
318ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar
328ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar// Default "mode": Everything OUTSIDE of a tag
338ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit BoyarCOMMENT     :   '<!--' .*? '-->' ;
348ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit BoyarCDATA       :   '<![CDATA[' .*? ']]>' ;
358ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar/** Scarf all DTD stuff, Entity Declarations like <!ENTITY ...>,
368ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar *  and Notation Declarations <!NOTATION ...>
378ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar */
388ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit BoyarDTD         :   '<!' .*? '>'            -> skip ;
398ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit BoyarEntityRef   :   '&' Name ';' ;
408ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit BoyarCharRef     :   '&#' DIGIT+ ';'
418ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar            |   '&#x' HEXDIGIT+ ';'
428ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar            ;
438ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit BoyarSEA_WS      :   (' '|'\t'|'\r'? '\n')+ ;
448ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar
458ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit BoyarOPEN        :   '<'                     -> pushMode(INSIDE) ;
468ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit BoyarXMLDeclOpen :   '<?xml' S               -> pushMode(INSIDE) ;
478ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit BoyarSPECIAL_OPEN:   '<?' Name               -> more, pushMode(PROC_INSTR) ;
488ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar
498ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit BoyarTEXT        :   ~[<&]+ ;        // match any 16 bit char other than < and &
508ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar
518ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar// ----------------- Everything INSIDE of a tag ---------------------
528ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyarmode INSIDE;
538ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar
548ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit BoyarCLOSE       :   '>'                     -> popMode ;
558ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit BoyarSPECIAL_CLOSE:  '?>'                    -> popMode ; // close <?xml...?>
568ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit BoyarSLASH_CLOSE :   '/>'                    -> popMode ;
578ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit BoyarSLASH       :   '/' ;
588ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit BoyarEQUALS      :   '=' ;
598ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit BoyarSTRING      :   '"' ~[<"]* '"'
608ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar            |   '\'' ~[<']* '\''
618ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar            ;
628ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit BoyarName        :   NameStartChar NameChar* ;
638ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit BoyarS           :   [ \t\r\n]               -> skip ;
648ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar
658ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyarfragment
668ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit BoyarHEXDIGIT    :   [a-fA-F0-9] ;
678ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar
688ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyarfragment
698ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit BoyarDIGIT       :   [0-9] ;
708ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar
718ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyarfragment
728ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit BoyarNameChar    :   NameStartChar
738ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar            |   '-' | '_' | '.' | DIGIT
748ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar            |   '\u00B7'
758ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar            |   '\u0300'..'\u036F'
768ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar            |   '\u203F'..'\u2040'
778ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar            ;
788ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar
798ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyarfragment
808ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit BoyarNameStartChar
818ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar            :   [:a-zA-Z]
828ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar            |   '\u2070'..'\u218F'
838ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar            |   '\u2C00'..'\u2FEF'
848ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar            |   '\u3001'..'\uD7FF'
858ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar            |   '\uF900'..'\uFDCF'
868ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar            |   '\uFDF0'..'\uFFFD'
878ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar            ;
888ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar
898ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar// ----------------- Handle <? ... ?> ---------------------
908ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyarmode PROC_INSTR;
918ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit Boyar
928ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit BoyarPI          :   '?>'                    -> popMode ; // close <?...?>
938ffce57a6056a34c9568ef1893d6e36ce80f2245Yigit BoyarIGNORE      :   .                       -> more ;
94