t012lexerXML.g revision 324c4644fee44b9898524c09511bd33c3f12e2df
1/** XML parser by Oliver Zeigermann October 10, 2005 */
2lexer grammar t012lexerXML;
3options {
4  language = JavaScript;
5}
6
7@lexer::members {
8this.lout = [];
9this.output = function(line) {
10    this.lout.push(line);
11};
12}
13
14DOCUMENT
15    :  XMLDECL? WS? DOCTYPE? WS? ELEMENT WS?
16    ;
17
18fragment DOCTYPE
19    :
20        '<!DOCTYPE' WS rootElementName=GENERIC_ID
21        {this.output("ROOTELEMENT: "+$rootElementName.text)}
22        WS
23        (
24            ( 'SYSTEM' WS sys1=VALUE
25                {this.output("SYSTEM: "+$sys1.text)}
26
27            | 'PUBLIC' WS pub=VALUE WS sys2=VALUE
28                {this.output("PUBLIC: "+$pub.text)}
29                {this.output("SYSTEM: "+$sys2.text)}
30            )
31            ( WS )?
32        )?
33        ( dtd=INTERNAL_DTD
34            {this.output("INTERNAL DTD: "+$dtd.text)}
35        )?
36		'>'
37	;
38
39fragment INTERNAL_DTD : '[' (options {greedy=false;} : .)* ']' ;
40
41fragment PI :
42        '<?' target=GENERIC_ID WS?
43          {this.output("PI: "+$target.text)}
44        ( ATTRIBUTE WS? )*  '?>'
45	;
46
47fragment XMLDECL :
48        '<?' ('x'|'X') ('m'|'M') ('l'|'L') WS?
49          {this.output("XML declaration")}
50        ( ATTRIBUTE WS? )*  '?>'
51	;
52
53
54fragment ELEMENT
55    : ( START_TAG
56            (ELEMENT
57            | t=PCDATA
58                {this.output("PCDATA: \""+$t.text+"\"")}
59            | t=CDATA
60                {this.output("CDATA: \""+$t.text+"\"")}
61            | t=COMMENT
62                {this.output("Comment: \""+$t.text+"\"")}
63            | pi=PI
64            )*
65            END_TAG
66        | EMPTY_ELEMENT
67        )
68    ;
69
70fragment START_TAG
71    : '<' WS? name=GENERIC_ID WS?
72          {this.output("Start Tag: "+$name.text)}
73        ( ATTRIBUTE WS? )* '>'
74    ;
75
76fragment EMPTY_ELEMENT
77    : '<' WS? name=GENERIC_ID WS?
78          {this.output("Empty Element: "+$name.text)}
79        ( ATTRIBUTE WS? )* '/>'
80    ;
81
82fragment ATTRIBUTE
83    : name=GENERIC_ID WS? '=' WS? value=VALUE
84        {this.output("Attr: "+$name.text+"="+$value.text)}
85    ;
86
87fragment END_TAG
88    : '</' WS? name=GENERIC_ID WS? '>'
89        {this.output("End Tag: "+$name.text)}
90    ;
91
92fragment COMMENT
93	:	'<!--' (options {greedy=false;} : .)* '-->'
94	;
95
96fragment CDATA
97	:	'<![CDATA[' (options {greedy=false;} : .)* ']]>'
98	;
99
100fragment PCDATA : (~'<')+ ;
101
102fragment VALUE :
103        ( '\"' (~'\"')* '\"'
104        | '\'' (~'\'')* '\''
105        )
106	;
107
108fragment GENERIC_ID
109    : ( LETTER | '_' | ':')
110        ( options {greedy=true;} : LETTER | '0'..'9' | '.' | '-' | '_' | ':' )*
111	;
112
113fragment LETTER
114	: 'a'..'z'
115	| 'A'..'Z'
116	;
117
118fragment WS  :
119        (   ' '
120        |   '\t'
121        |  ( '\n'
122            |	'\r\n'
123            |	'\r'
124            )
125        )+
126    ;
127
128