1324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverimport antlr3 2324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverimport testbase 3324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverimport unittest 4324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverimport os 5324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverimport sys 6324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverfrom cStringIO import StringIO 7324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverimport difflib 8324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverimport textwrap 9324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 10324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverclass t012lexerXML(testbase.ANTLRTest): 11324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def setUp(self): 12324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.compileGrammar('t012lexerXMLLexer.g') 13324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 14324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 15324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def lexerClass(self, base): 16324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver class TLexer(base): 17324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def emitErrorMessage(self, msg): 18324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # report errors to /dev/null 19324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver pass 20324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 21324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def reportError(self, re): 22324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver # no error recovery yet, just crash! 23324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver raise re 24324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 25324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver return TLexer 26324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 27324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 28324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def testValid(self): 29324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver inputPath = os.path.splitext(__file__)[0] + '.input' 30324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver stream = antlr3.StringStream(unicode(open(inputPath).read(), 'utf-8')) 31324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer = self.getLexer(stream) 32324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 33324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver while True: 34324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver token = lexer.nextToken() 35324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if token.type == self.lexerModule.EOF: 36324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver break 37324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 38324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 39324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver output = unicode(lexer.outbuf.getvalue(), 'utf-8') 40324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 41324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver outputPath = os.path.splitext(__file__)[0] + '.output' 42324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver testOutput = unicode(open(outputPath).read(), 'utf-8') 43324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 44324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver success = (output == testOutput) 45324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if not success: 46324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver d = difflib.Differ() 47324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver r = d.compare(output.splitlines(1), testOutput.splitlines(1)) 48324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver self.fail( 49324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ''.join([l.encode('ascii', 'backslashreplace') for l in r]) 50324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver ) 51324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 52324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 53324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def testMalformedInput1(self): 54324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input = textwrap.dedent("""\ 55324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver <?xml version='1.0'?> 56324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver <document d> 57324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver </document> 58324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """) 59324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 60324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver stream = antlr3.StringStream(input) 61324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer = self.getLexer(stream) 62324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 63324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver try: 64324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver while True: 65324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver token = lexer.nextToken() 66324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if token.type == antlr3.EOF: 67324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver break 68324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 69324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver raise AssertionError 70324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 71324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver except antlr3.NoViableAltException, exc: 72324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver assert exc.unexpectedType == '>', repr(exc.unexpectedType) 73324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver assert exc.charPositionInLine == 11, repr(exc.charPositionInLine) 74324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver assert exc.line == 2, repr(exc.line) 75324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 76324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 77324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def testMalformedInput2(self): 78324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input = textwrap.dedent("""\ 79324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver <?tml version='1.0'?> 80324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver <document> 81324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver </document> 82324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """) 83324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 84324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver stream = antlr3.StringStream(input) 85324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer = self.getLexer(stream) 86324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 87324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver try: 88324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver while True: 89324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver token = lexer.nextToken() 90324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if token.type == antlr3.EOF: 91324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver break 92324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 93324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver raise AssertionError 94324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 95324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver except antlr3.MismatchedSetException, exc: 96324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver assert exc.unexpectedType == 't', repr(exc.unexpectedType) 97324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver assert exc.charPositionInLine == 2, repr(exc.charPositionInLine) 98324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver assert exc.line == 1, repr(exc.line) 99324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 100324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 101324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver def testMalformedInput3(self): 102324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver input = textwrap.dedent("""\ 103324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver <?xml version='1.0'?> 104324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver <docu ment attr="foo"> 105324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver </document> 106324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver """) 107324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 108324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver stream = antlr3.StringStream(input) 109324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver lexer = self.getLexer(stream) 110324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 111324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver try: 112324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver while True: 113324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver token = lexer.nextToken() 114324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver if token.type == antlr3.EOF: 115324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver break 116324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 117324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver raise AssertionError 118324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 119324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver except antlr3.NoViableAltException, exc: 120324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver assert exc.unexpectedType == 'a', repr(exc.unexpectedType) 121324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver assert exc.charPositionInLine == 11, repr(exc.charPositionInLine) 122324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver assert exc.line == 2, repr(exc.line) 123324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 124324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 125324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 126324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruverif __name__ == '__main__': 127324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver unittest.main() 128324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 129324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 130324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## # run an infinite loop with randomly mangled input 131324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## while True: 132324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## print "ping" 133324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 134324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## input = """\ 135324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## <?xml version='1.0'?> 136324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## <!DOCTYPE component [ 137324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## <!ELEMENT component (PCDATA|sub)*> 138324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## <!ATTLIST component 139324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## attr CDATA #IMPLIED 140324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## attr2 CDATA #IMPLIED 141324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## > 142324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## <!ELMENT sub EMPTY> 143324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 144324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## ]> 145324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## <component attr="val'ue" attr2='val"ue'> 146324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## <!-- This is a comment --> 147324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## Text 148324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## <![CDATA[huhu]]> 149324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## & 150324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## < 151324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## <?xtal cursor='11'?> 152324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## <sub/> 153324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## <sub></sub> 154324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## </component> 155324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## """ 156324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 157324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## import random 158324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## input = list(input) # make it mutable 159324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## for _ in range(3): 160324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## p1 = random.randrange(len(input)) 161324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## p2 = random.randrange(len(input)) 162324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 163324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## c1 = input[p1] 164324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## input[p1] = input[p2] 165324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## input[p2] = c1 166324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## input = ''.join(input) # back to string 167324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 168324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## stream = antlr3.StringStream(input) 169324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## lexer = Lexer(stream) 170324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 171324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## try: 172324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## while True: 173324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## token = lexer.nextToken() 174324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## if token.type == EOF: 175324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## break 176324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 177324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## except antlr3.RecognitionException, exc: 178324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## print exc 179324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## for l in input.splitlines()[0:exc.line]: 180324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## print l 181324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## print ' '*exc.charPositionInLine + '^' 182324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 183324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## except BaseException, exc: 184324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## print '\n'.join(['%02d: %s' % (idx+1, l) for idx, l in enumerate(input.splitlines())]) 185324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## print "%s at %d:%d" % (exc, stream.line, stream.charPositionInLine) 186324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## print 187324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 188324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver## raise 189324c4644fee44b9898524c09511bd33c3f12e2dfBen Gruver 190