test_pyexpat.py revision 265a804af22d2513403c295d9218ba41753ca218
1# Very simple test - Parse a file and print what happens 2 3# XXX TypeErrors on calling handlers, or on bad return values from a 4# handler, are obscure and unhelpful. 5 6import pyexpat 7 8class Outputter: 9 def StartElementHandler(self, name, attrs): 10 print 'Start element:\n\t', repr(name), attrs 11 12 def EndElementHandler(self, name): 13 print 'End element:\n\t', repr(name) 14 15 def CharacterDataHandler(self, data): 16 data = data.strip() 17 if data: 18 print 'Character data:' 19 print '\t', repr(data) 20 21 def ProcessingInstructionHandler(self, target, data): 22 print 'PI:\n\t', repr(target), repr(data) 23 24 def StartNamespaceDeclHandler(self, prefix, uri): 25 print 'NS decl:\n\t', repr(prefix), repr(uri) 26 27 def EndNamespaceDeclHandler(self, prefix): 28 print 'End of NS decl:\n\t', repr(prefix) 29 30 def StartCdataSectionHandler(self): 31 print 'Start of CDATA section' 32 33 def EndCdataSectionHandler(self): 34 print 'End of CDATA section' 35 36 def CommentHandler(self, text): 37 print 'Comment:\n\t', repr(text) 38 39 def NotationDeclHandler(self, *args): 40 name, base, sysid, pubid = args 41 print 'Notation declared:', args 42 43 def UnparsedEntityDeclHandler(self, *args): 44 entityName, base, systemId, publicId, notationName = args 45 print 'Unparsed entity decl:\n\t', args 46 47 def NotStandaloneHandler(self, userData): 48 print 'Not standalone' 49 return 1 50 51 def ExternalEntityRefHandler(self, *args): 52 context, base, sysId, pubId = args 53 print 'External entity ref:', args 54 return 1 55 56 def DefaultHandler(self, userData): 57 pass 58 59 def DefaultHandlerExpand(self, userData): 60 pass 61 62 63def confirm(ok): 64 if ok: 65 print "OK." 66 else: 67 print "Not OK." 68 69out = Outputter() 70parser = pyexpat.ParserCreate(namespace_separator='!') 71 72# Test getting/setting returns_unicode 73parser.returns_unicode = 0; confirm(parser.returns_unicode == 0) 74parser.returns_unicode = 1; confirm(parser.returns_unicode == 1) 75parser.returns_unicode = 2; confirm(parser.returns_unicode == 1) 76parser.returns_unicode = 0; confirm(parser.returns_unicode == 0) 77 78HANDLER_NAMES = [ 79 'StartElementHandler', 'EndElementHandler', 80 'CharacterDataHandler', 'ProcessingInstructionHandler', 81 'UnparsedEntityDeclHandler', 'NotationDeclHandler', 82 'StartNamespaceDeclHandler', 'EndNamespaceDeclHandler', 83 'CommentHandler', 'StartCdataSectionHandler', 84 'EndCdataSectionHandler', 85 'DefaultHandler', 'DefaultHandlerExpand', 86 #'NotStandaloneHandler', 87 'ExternalEntityRefHandler' 88 ] 89for name in HANDLER_NAMES: 90 setattr(parser, name, getattr(out, name)) 91 92data = '''\ 93<?xml version="1.0" encoding="iso-8859-1" standalone="no"?> 94<?xml-stylesheet href="stylesheet.css"?> 95<!-- comment data --> 96<!DOCTYPE quotations SYSTEM "quotations.dtd" [ 97<!ELEMENT root ANY> 98<!NOTATION notation SYSTEM "notation.jpeg"> 99<!ENTITY acirc "â"> 100<!ENTITY external_entity SYSTEM "entity.file"> 101<!ENTITY unparsed_entity SYSTEM "entity.file" NDATA notation> 102%unparsed_entity; 103]> 104 105<root attr1="value1" attr2="value2ὀ"> 106<myns:subelement xmlns:myns="http://www.python.org/namespace"> 107 Contents of subelements 108</myns:subelement> 109<sub2><![CDATA[contents of CDATA section]]></sub2> 110&external_entity; 111</root> 112''' 113 114# Produce UTF-8 output 115parser.returns_unicode = 0 116try: 117 parser.Parse(data, 1) 118except pyexpat.error: 119 print '** Error', parser.ErrorCode, pyexpat.ErrorString(parser.ErrorCode) 120 print '** Line', parser.ErrorLineNumber 121 print '** Column', parser.ErrorColumnNumber 122 print '** Byte', parser.ErrorByteIndex 123 124# Try the parse again, this time producing Unicode output 125parser = pyexpat.ParserCreate(namespace_separator='!') 126parser.returns_unicode = 1 127 128for name in HANDLER_NAMES: 129 setattr(parser, name, getattr(out, name)) 130try: 131 parser.Parse(data, 1) 132except pyexpat.error: 133 print '** Error', parser.ErrorCode, pyexpat.ErrorString(parser.ErrorCode) 134 print '** Line', parser.ErrorLineNumber 135 print '** Column', parser.ErrorColumnNumber 136 print '** Byte', parser.ErrorByteIndex 137 138# Try parsing a file 139parser = pyexpat.ParserCreate(namespace_separator='!') 140parser.returns_unicode = 1 141 142for name in HANDLER_NAMES: 143 setattr(parser, name, getattr(out, name)) 144import StringIO 145file = StringIO.StringIO(data) 146try: 147 parser.ParseFile(file) 148except pyexpat.error: 149 print '** Error', parser.ErrorCode, pyexpat.ErrorString(parser.ErrorCode) 150 print '** Line', parser.ErrorLineNumber 151 print '** Column', parser.ErrorColumnNumber 152 print '** Byte', parser.ErrorByteIndex 153