1324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver#!/usr/bin/ruby 2324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver# encoding: utf-8 3324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 4324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverrequire 'antlr3/test/functional' 5324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 6324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverclass XMLLexerTest < ANTLR3::Test::Functional 7324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver inline_grammar( <<-'END' ) 8324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer grammar XML; 9324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver options { language = Ruby; } 10324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 11324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver @members { 12324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver include ANTLR3::Test::CaptureOutput 13324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver include ANTLR3::Test::RaiseErrors 14324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 15324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def quote(text) 16324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver text = text.gsub(/\"/, '\\"') 17324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver \%("#{ text }") 18324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 19324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver } 20324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 21324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver DOCUMENT 22324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : XMLDECL? WS? DOCTYPE? WS? ELEMENT WS? 23324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 24324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 25324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver fragment DOCTYPE 26324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : 27324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver '<!DOCTYPE' WS rootElementName=GENERIC_ID 28324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver {say("ROOTELEMENT: " + $rootElementName.text)} 29324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver WS 30324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ( 31324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ( 'SYSTEM' WS sys1=VALUE 32324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver {say("SYSTEM: " + $sys1.text)} 33324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 34324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | 'PUBLIC' WS pub=VALUE WS sys2=VALUE 35324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver {say("PUBLIC: " + $pub.text)} 36324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver {say("SYSTEM: " + $sys2.text)} 37324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ) 38324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ( WS )? 39324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver )? 40324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ( dtd=INTERNAL_DTD 41324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver {say("INTERNAL DTD: " + $dtd.text)} 42324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver )? 43324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver '>' 44324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 45324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 46324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver fragment INTERNAL_DTD : '[' (options {greedy=false;} : .)* ']' ; 47324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 48324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver fragment PI : 49324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver '<?' target=GENERIC_ID WS? 50324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver {say("PI: " + $target.text)} 51324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ( ATTRIBUTE WS? )* '?>' 52324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 53324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 54324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver fragment XMLDECL : 55324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver '<?' ('x'|'X') ('m'|'M') ('l'|'L') WS? 56324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver {say("XML declaration")} 57324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ( ATTRIBUTE WS? )* '?>' 58324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 59324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 60324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 61324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver fragment ELEMENT 62324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : ( START_TAG 63324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver (ELEMENT 64324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | t=PCDATA 65324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver {say("PCDATA: " << quote($t.text))} 66324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | t=CDATA 67324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver {say("CDATA: " << quote($t.text))} 68324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | t=COMMENT 69324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver {say("Comment: " << quote($t.text))} 70324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | pi=PI 71324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver )* 72324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver END_TAG 73324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | EMPTY_ELEMENT 74324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ) 75324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 76324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 77324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver fragment START_TAG 78324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : '<' WS? name=GENERIC_ID WS? 79324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver {say("Start Tag: " + $name.text)} 80324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ( ATTRIBUTE WS? )* '>' 81324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 82324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 83324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver fragment EMPTY_ELEMENT 84324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : '<' WS? name=GENERIC_ID WS? 85324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver {say("Empty Element: " + $name.text)} 86324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ( ATTRIBUTE WS? )* '/>' 87324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 88324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 89324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver fragment ATTRIBUTE 90324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : name=GENERIC_ID WS? '=' WS? value=VALUE 91324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver {say("Attr: " + $name.text + " = "+ $value.text)} 92324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 93324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 94324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver fragment END_TAG 95324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : '</' WS? name=GENERIC_ID WS? '>' 96324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver {say("End Tag: " + $name.text)} 97324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 98324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 99324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver fragment COMMENT 100324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : '<!--' (options {greedy=false;} : .)* '-->' 101324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 102324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 103324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver fragment CDATA 104324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : '<![CDATA[' (options {greedy=false;} : .)* ']]>' 105324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 106324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 107324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver fragment PCDATA : (~'<')+ ; 108324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 109324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver fragment VALUE : 110324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ( '\"' (~'\"')* '\"' 111324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | '\'' (~'\'')* '\'' 112324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ) 113324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 114324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 115324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver fragment GENERIC_ID 116324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : ( LETTER | '_' | ':') 117324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ( options {greedy=true;} : LETTER | '0'..'9' | '.' | '-' | '_' | ':' )* 118324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 119324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 120324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver fragment LETTER 121324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver : 'a'..'z' 122324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | 'A'..'Z' 123324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 124324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 125324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver fragment WS : 126324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ( ' ' 127324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | '\t' 128324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | ( '\n' 129324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | '\r\n' 130324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver | '\r' 131324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ) 132324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver )+ 133324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ; 134324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver END 135324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 136324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver it "should be valid" do 137324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer = XML::Lexer.new( <<-'END'.fixed_indent( 0 ) ) 138324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver <?xml version='1.0'?> 139324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver <!DOCTYPE component [ 140324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver <!ELEMENT component (PCDATA|sub)*> 141324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver <!ATTLIST component 142324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver attr CDATA #IMPLIED 143324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver attr2 CDATA #IMPLIED 144324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver > 145324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver <!ELMENT sub EMPTY> 146324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 147324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ]> 148324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver <component attr="val'ue" attr2='val"ue'> 149324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver <!-- This is a comment --> 150324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Text 151324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver <![CDATA[huhu]]> 152324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver öäüß 153324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver & 154324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver < 155324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver <?xtal cursor='11'?> 156324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver <sub/> 157324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver <sub></sub> 158324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver </component> 159324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver END 160324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 161324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer.map { |tk| tk } 162324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 163324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer.output.should == <<-'END'.fixed_indent( 0 ) 164324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver XML declaration 165324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Attr: version = '1.0' 166324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ROOTELEMENT: component 167324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver INTERNAL DTD: [ 168324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver <!ELEMENT component (PCDATA|sub)*> 169324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver <!ATTLIST component 170324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver attr CDATA #IMPLIED 171324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver attr2 CDATA #IMPLIED 172324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver > 173324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver <!ELMENT sub EMPTY> 174324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 175324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ] 176324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Start Tag: component 177324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Attr: attr = "val'ue" 178324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Attr: attr2 = 'val"ue' 179324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver PCDATA: " 180324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver " 181324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Comment: "<!-- This is a comment -->" 182324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver PCDATA: " 183324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Text 184324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver " 185324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver CDATA: "<![CDATA[huhu]]>" 186324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver PCDATA: " 187324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver öäüß 188324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver & 189324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver < 190324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver " 191324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver PI: xtal 192324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Attr: cursor = '11' 193324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver PCDATA: " 194324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver " 195324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Empty Element: sub 196324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver PCDATA: " 197324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver " 198324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver Start Tag: sub 199324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver End Tag: sub 200324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver PCDATA: " 201324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver " 202324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver End Tag: component 203324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver END 204324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver end 205324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 206324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverend 207