1import antlr3 2import testbase 3import unittest 4import os 5import sys 6from cStringIO import StringIO 7import difflib 8import textwrap 9 10class t012lexerXML(testbase.ANTLRTest): 11 def setUp(self): 12 self.compileGrammar('t012lexerXMLLexer.g') 13 14 15 def lexerClass(self, base): 16 class TLexer(base): 17 def emitErrorMessage(self, msg): 18 # report errors to /dev/null 19 pass 20 21 def reportError(self, re): 22 # no error recovery yet, just crash! 23 raise re 24 25 return TLexer 26 27 28 def testValid(self): 29 inputPath = os.path.splitext(__file__)[0] + '.input' 30 stream = antlr3.StringStream(unicode(open(inputPath).read(), 'utf-8')) 31 lexer = self.getLexer(stream) 32 33 while True: 34 token = lexer.nextToken() 35 if token.type == self.lexerModule.EOF: 36 break 37 38 39 output = unicode(lexer.outbuf.getvalue(), 'utf-8') 40 41 outputPath = os.path.splitext(__file__)[0] + '.output' 42 testOutput = unicode(open(outputPath).read(), 'utf-8') 43 44 success = (output == testOutput) 45 if not success: 46 d = difflib.Differ() 47 r = d.compare(output.splitlines(1), testOutput.splitlines(1)) 48 self.fail( 49 ''.join([l.encode('ascii', 'backslashreplace') for l in r]) 50 ) 51 52 53 def testMalformedInput1(self): 54 input = textwrap.dedent("""\ 55 <?xml version='1.0'?> 56 <document d> 57 </document> 58 """) 59 60 stream = antlr3.StringStream(input) 61 lexer = self.getLexer(stream) 62 63 try: 64 while True: 65 token = lexer.nextToken() 66 if token.type == antlr3.EOF: 67 break 68 69 raise AssertionError 70 71 except antlr3.NoViableAltException, exc: 72 assert exc.unexpectedType == '>', repr(exc.unexpectedType) 73 assert exc.charPositionInLine == 11, repr(exc.charPositionInLine) 74 assert exc.line == 2, repr(exc.line) 75 76 77 def testMalformedInput2(self): 78 input = textwrap.dedent("""\ 79 <?tml version='1.0'?> 80 <document> 81 </document> 82 """) 83 84 stream = antlr3.StringStream(input) 85 lexer = self.getLexer(stream) 86 87 try: 88 while True: 89 token = lexer.nextToken() 90 if token.type == antlr3.EOF: 91 break 92 93 raise AssertionError 94 95 except antlr3.MismatchedSetException, exc: 96 assert exc.unexpectedType == 't', repr(exc.unexpectedType) 97 assert exc.charPositionInLine == 2, repr(exc.charPositionInLine) 98 assert exc.line == 1, repr(exc.line) 99 100 101 def testMalformedInput3(self): 102 input = textwrap.dedent("""\ 103 <?xml version='1.0'?> 104 <docu ment attr="foo"> 105 </document> 106 """) 107 108 stream = antlr3.StringStream(input) 109 lexer = self.getLexer(stream) 110 111 try: 112 while True: 113 token = lexer.nextToken() 114 if token.type == antlr3.EOF: 115 break 116 117 raise AssertionError 118 119 except antlr3.NoViableAltException, exc: 120 assert exc.unexpectedType == 'a', repr(exc.unexpectedType) 121 assert exc.charPositionInLine == 11, repr(exc.charPositionInLine) 122 assert exc.line == 2, repr(exc.line) 123 124 125 126if __name__ == '__main__': 127 unittest.main() 128 129 130## # run an infinite loop with randomly mangled input 131## while True: 132## print "ping" 133 134## input = """\ 135## <?xml version='1.0'?> 136## <!DOCTYPE component [ 137## <!ELEMENT component (PCDATA|sub)*> 138## <!ATTLIST component 139## attr CDATA #IMPLIED 140## attr2 CDATA #IMPLIED 141## > 142## <!ELMENT sub EMPTY> 143 144## ]> 145## <component attr="val'ue" attr2='val"ue'> 146## <!-- This is a comment --> 147## Text 148## <![CDATA[huhu]]> 149## & 150## < 151## <?xtal cursor='11'?> 152## <sub/> 153## <sub></sub> 154## </component> 155## """ 156 157## import random 158## input = list(input) # make it mutable 159## for _ in range(3): 160## p1 = random.randrange(len(input)) 161## p2 = random.randrange(len(input)) 162 163## c1 = input[p1] 164## input[p1] = input[p2] 165## input[p2] = c1 166## input = ''.join(input) # back to string 167 168## stream = antlr3.StringStream(input) 169## lexer = Lexer(stream) 170 171## try: 172## while True: 173## token = lexer.nextToken() 174## if token.type == EOF: 175## break 176 177## except antlr3.RecognitionException, exc: 178## print exc 179## for l in input.splitlines()[0:exc.line]: 180## print l 181## print ' '*exc.charPositionInLine + '^' 182 183## except BaseException, exc: 184## print '\n'.join(['%02d: %s' % (idx+1, l) for idx, l in enumerate(input.splitlines())]) 185## print "%s at %d:%d" % (exc, stream.line, stream.charPositionInLine) 186## print 187 188## raise 189 190