1import antlr3
2import testbase
3import unittest
4import os
5import sys
6from cStringIO import StringIO
7import difflib
8import textwrap
9
10class t012lexerXML(testbase.ANTLRTest):
11    def setUp(self):
12        self.compileGrammar('t012lexerXMLLexer.g')
13
14
15    def lexerClass(self, base):
16        class TLexer(base):
17            def emitErrorMessage(self, msg):
18                # report errors to /dev/null
19                pass
20
21            def reportError(self, re):
22                # no error recovery yet, just crash!
23                raise re
24
25        return TLexer
26
27
28    def testValid(self):
29        inputPath = os.path.splitext(__file__)[0] + '.input'
30        stream = antlr3.StringStream(unicode(open(inputPath).read(), 'utf-8'))
31        lexer = self.getLexer(stream)
32
33        while True:
34            token = lexer.nextToken()
35            if token.type == self.lexerModule.EOF:
36                break
37
38
39        output = unicode(lexer.outbuf.getvalue(), 'utf-8')
40
41        outputPath = os.path.splitext(__file__)[0] + '.output'
42        testOutput = unicode(open(outputPath).read(), 'utf-8')
43
44        success = (output == testOutput)
45        if not success:
46            d = difflib.Differ()
47            r = d.compare(output.splitlines(1), testOutput.splitlines(1))
48            self.fail(
49                ''.join([l.encode('ascii', 'backslashreplace') for l in r])
50                )
51
52
53    def testMalformedInput1(self):
54        input = textwrap.dedent("""\
55        <?xml version='1.0'?>
56        <document d>
57        </document>
58        """)
59
60        stream = antlr3.StringStream(input)
61        lexer = self.getLexer(stream)
62
63        try:
64            while True:
65                token = lexer.nextToken()
66                if token.type == antlr3.EOF:
67                    break
68
69            raise AssertionError
70
71        except antlr3.NoViableAltException, exc:
72            assert exc.unexpectedType == '>', repr(exc.unexpectedType)
73            assert exc.charPositionInLine == 11, repr(exc.charPositionInLine)
74            assert exc.line == 2, repr(exc.line)
75
76
77    def testMalformedInput2(self):
78        input = textwrap.dedent("""\
79        <?tml version='1.0'?>
80        <document>
81        </document>
82        """)
83
84        stream = antlr3.StringStream(input)
85        lexer = self.getLexer(stream)
86
87        try:
88            while True:
89                token = lexer.nextToken()
90                if token.type == antlr3.EOF:
91                    break
92
93            raise AssertionError
94
95        except antlr3.MismatchedSetException, exc:
96            assert exc.unexpectedType == 't', repr(exc.unexpectedType)
97            assert exc.charPositionInLine == 2, repr(exc.charPositionInLine)
98            assert exc.line == 1, repr(exc.line)
99
100
101    def testMalformedInput3(self):
102        input = textwrap.dedent("""\
103        <?xml version='1.0'?>
104        <docu ment attr="foo">
105        </document>
106        """)
107
108        stream = antlr3.StringStream(input)
109        lexer = self.getLexer(stream)
110
111        try:
112            while True:
113                token = lexer.nextToken()
114                if token.type == antlr3.EOF:
115                    break
116
117            raise AssertionError
118
119        except antlr3.NoViableAltException, exc:
120            assert exc.unexpectedType == 'a', repr(exc.unexpectedType)
121            assert exc.charPositionInLine == 11, repr(exc.charPositionInLine)
122            assert exc.line == 2, repr(exc.line)
123
124
125
126if __name__ == '__main__':
127    unittest.main()
128
129
130## # run an infinite loop with randomly mangled input
131## while True:
132##     print "ping"
133
134##     input = """\
135## <?xml version='1.0'?>
136## <!DOCTYPE component [
137## <!ELEMENT component (PCDATA|sub)*>
138## <!ATTLIST component
139##           attr CDATA #IMPLIED
140##           attr2 CDATA #IMPLIED
141## >
142## <!ELMENT sub EMPTY>
143
144## ]>
145## <component attr="val'ue" attr2='val"ue'>
146## <!-- This is a comment -->
147## Text
148## <![CDATA[huhu]]>
149## &amp;
150## &lt;
151## <?xtal cursor='11'?>
152## <sub/>
153## <sub></sub>
154## </component>
155## """
156
157##     import random
158##     input = list(input) # make it mutable
159##     for _ in range(3):
160##         p1 = random.randrange(len(input))
161##         p2 = random.randrange(len(input))
162
163##         c1 = input[p1]
164##         input[p1] = input[p2]
165##         input[p2] = c1
166##     input = ''.join(input) # back to string
167
168##     stream = antlr3.StringStream(input)
169##     lexer = Lexer(stream)
170
171##     try:
172##         while True:
173##             token = lexer.nextToken()
174##             if token.type == EOF:
175##                 break
176
177##     except antlr3.RecognitionException, exc:
178##         print exc
179##         for l in input.splitlines()[0:exc.line]:
180##             print l
181##         print ' '*exc.charPositionInLine + '^'
182
183##     except BaseException, exc:
184##         print '\n'.join(['%02d: %s' % (idx+1, l) for idx, l in enumerate(input.splitlines())])
185##         print "%s at %d:%d" % (exc, stream.line, stream.charPositionInLine)
186##         print
187
188##         raise
189
190