1324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#!/usr/bin/ruby
2324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# encoding: utf-8
3324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
4324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverrequire 'antlr3/test/functional'
5324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
6324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverclass XMLLexerTest < ANTLR3::Test::Functional
7324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  inline_grammar( <<-'END' )
8324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    lexer grammar XML;
9324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    options { language = Ruby; }
10324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
11324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    @members {
12324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      include ANTLR3::Test::CaptureOutput
13324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      include ANTLR3::Test::RaiseErrors
14324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      
15324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      def quote(text)
16324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        text = text.gsub(/\"/, '\\"')
17324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        \%("#{ text }")
18324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      end
19324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    }
20324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
21324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    DOCUMENT
22324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        :  XMLDECL? WS? DOCTYPE? WS? ELEMENT WS? 
23324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        ;
24324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
25324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    fragment DOCTYPE
26324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        :
27324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            '<!DOCTYPE' WS rootElementName=GENERIC_ID 
28324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            {say("ROOTELEMENT: " + $rootElementName.text)}
29324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            WS
30324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            ( 
31324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                ( 'SYSTEM' WS sys1=VALUE
32324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            {say("SYSTEM: " + $sys1.text)}
33324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    
34324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                | 'PUBLIC' WS pub=VALUE WS sys2=VALUE
35324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    {say("PUBLIC: " + $pub.text)}
36324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    {say("SYSTEM: " + $sys2.text)}   
37324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                )
38324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                ( WS )?
39324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            )?
40324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            ( dtd=INTERNAL_DTD
41324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                {say("INTERNAL DTD: " + $dtd.text)}
42324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            )?
43324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        '>'
44324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      ;
45324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
46324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    fragment INTERNAL_DTD : '[' (options {greedy=false;} : .)* ']' ;
47324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
48324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    fragment PI :
49324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            '<?' target=GENERIC_ID WS? 
50324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver              {say("PI: " + $target.text)}
51324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            ( ATTRIBUTE WS? )*  '?>'
52324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      ;
53324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
54324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    fragment XMLDECL :
55324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            '<?' ('x'|'X') ('m'|'M') ('l'|'L') WS? 
56324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver              {say("XML declaration")}
57324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            ( ATTRIBUTE WS? )*  '?>'
58324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      ;
59324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
60324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
61324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    fragment ELEMENT
62324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        : ( START_TAG
63324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                (ELEMENT
64324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                | t=PCDATA
65324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    {say("PCDATA: " << quote($t.text))}
66324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                | t=CDATA
67324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    {say("CDATA: " << quote($t.text))}
68324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                | t=COMMENT
69324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                    {say("Comment: " << quote($t.text))}
70324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                | pi=PI
71324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                )*
72324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                END_TAG
73324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            | EMPTY_ELEMENT
74324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            )
75324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        ;
76324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
77324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    fragment START_TAG 
78324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        : '<' WS? name=GENERIC_ID WS?
79324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver              {say("Start Tag: " + $name.text)}
80324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            ( ATTRIBUTE WS? )* '>'
81324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        ;
82324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
83324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    fragment EMPTY_ELEMENT 
84324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        : '<' WS? name=GENERIC_ID WS?
85324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver              {say("Empty Element: " + $name.text)}
86324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            ( ATTRIBUTE WS? )* '/>'
87324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        ;
88324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
89324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    fragment ATTRIBUTE 
90324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        : name=GENERIC_ID WS? '=' WS? value=VALUE
91324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            {say("Attr: " + $name.text + " = "+ $value.text)}
92324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        ;
93324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
94324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    fragment END_TAG 
95324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        : '</' WS? name=GENERIC_ID WS? '>'
96324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            {say("End Tag: " + $name.text)}
97324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        ;
98324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
99324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    fragment COMMENT
100324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      :	'<!--' (options {greedy=false;} : .)* '-->'
101324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      ;
102324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
103324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    fragment CDATA
104324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      :	'<![CDATA[' (options {greedy=false;} : .)* ']]>'
105324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      ;
106324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
107324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    fragment PCDATA : (~'<')+ ; 
108324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
109324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    fragment VALUE : 
110324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            ( '\"' (~'\"')* '\"'
111324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            | '\'' (~'\'')* '\''
112324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            )
113324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      ;
114324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
115324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    fragment GENERIC_ID 
116324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        : ( LETTER | '_' | ':') 
117324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            ( options {greedy=true;} : LETTER | '0'..'9' | '.' | '-' | '_' | ':' )*
118324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      ;
119324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
120324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    fragment LETTER
121324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      : 'a'..'z' 
122324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      | 'A'..'Z'
123324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      ;
124324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
125324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    fragment WS  :
126324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            (   ' '
127324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            |   '\t'
128324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            |  ( '\n'
129324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                |	'\r\n'
130324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                |	'\r'
131324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                )
132324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver            )+
133324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver        ;    
134324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  END
135324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  
136324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  it "should be valid" do
137324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    lexer = XML::Lexer.new( <<-'END'.fixed_indent( 0 ) )
138324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      <?xml version='1.0'?>
139324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      <!DOCTYPE component [
140324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      <!ELEMENT component (PCDATA|sub)*>
141324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      <!ATTLIST component
142324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                attr CDATA #IMPLIED
143324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                attr2 CDATA #IMPLIED
144324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      >
145324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      <!ELMENT sub EMPTY>
146324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      
147324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      ]>
148324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      <component attr="val'ue" attr2='val"ue'>
149324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      <!-- This is a comment -->
150324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      Text
151324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      <![CDATA[huhu]]>
152324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      öäüß
153324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      &amp;
154324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      &lt;
155324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      <?xtal cursor='11'?>
156324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      <sub/>
157324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      <sub></sub>
158324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      </component>
159324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    END
160324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
161324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    lexer.map { |tk| tk }
162324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    
163324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    lexer.output.should == <<-'END'.fixed_indent( 0 )
164324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      XML declaration
165324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      Attr: version = '1.0'
166324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      ROOTELEMENT: component
167324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      INTERNAL DTD: [
168324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      <!ELEMENT component (PCDATA|sub)*>
169324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      <!ATTLIST component
170324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                attr CDATA #IMPLIED
171324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver                attr2 CDATA #IMPLIED
172324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      >
173324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      <!ELMENT sub EMPTY>
174324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      
175324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      ]
176324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      Start Tag: component
177324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      Attr: attr = "val'ue"
178324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      Attr: attr2 = 'val"ue'
179324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      PCDATA: "
180324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      "
181324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      Comment: "<!-- This is a comment -->"
182324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      PCDATA: "
183324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      Text
184324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      "
185324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      CDATA: "<![CDATA[huhu]]>"
186324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      PCDATA: "
187324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      öäüß
188324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      &amp;
189324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      &lt;
190324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      "
191324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      PI: xtal
192324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      Attr: cursor = '11'
193324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      PCDATA: "
194324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      "
195324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      Empty Element: sub
196324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      PCDATA: "
197324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      "
198324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      Start Tag: sub
199324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      End Tag: sub
200324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      PCDATA: "
201324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      "
202324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver      End Tag: component
203324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver    END
204324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver  end
205324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver
206324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverend
207