15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)"""Helper functions for XML. 25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)This module has misc. helper functions for working with XML DOM nodes.""" 45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)import re 65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)from compat import * 75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)import os 95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)if os.name != "java": 105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) from xml.dom import minidom 115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) from xml.sax import saxutils 125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) def parseDocument(s): 145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return minidom.parseString(s) 155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)else: 165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) from javax.xml.parsers import * 175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) import java 185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) builder = DocumentBuilderFactory.newInstance().newDocumentBuilder() 205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) def parseDocument(s): 225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) stream = java.io.ByteArrayInputStream(java.lang.String(s).getBytes()) 235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return builder.parse(stream) 245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def parseAndStripWhitespace(s): 265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) try: 275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) element = parseDocument(s).documentElement 285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) except BaseException, e: 295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) raise SyntaxError(str(e)) 305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) stripWhitespace(element) 315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return element 325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#Goes through a DOM tree and removes whitespace besides child elements, 345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#as long as this whitespace is correctly tab-ified 355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def stripWhitespace(element, tab=0): 365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) element.normalize() 375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) lastSpacer = "\n" + ("\t"*tab) 395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) spacer = lastSpacer + "\t" 405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) #Zero children aren't allowed (i.e. <empty/>) 425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) #This makes writing output simpler, and matches Canonical XML 435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if element.childNodes.length==0: #DON'T DO len(element.childNodes) - doesn't work in Jython 445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) raise SyntaxError("Empty XML elements not allowed") 455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) #If there's a single child, it must be text context 475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if element.childNodes.length==1: 485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if element.firstChild.nodeType == element.firstChild.TEXT_NODE: 495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) #If it's an empty element, remove 505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if element.firstChild.data == lastSpacer: 515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) element.removeChild(element.firstChild) 525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return 535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) #If not text content, give an error 545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) elif element.firstChild.nodeType == element.firstChild.ELEMENT_NODE: 555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) raise SyntaxError("Bad whitespace under '%s'" % element.tagName) 565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else: 575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) raise SyntaxError("Unexpected node type in XML document") 585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) #Otherwise there's multiple child element 605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) child = element.firstChild 615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) while child: 625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if child.nodeType == child.ELEMENT_NODE: 635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) stripWhitespace(child, tab+1) 645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) child = child.nextSibling 655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) elif child.nodeType == child.TEXT_NODE: 665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if child == element.lastChild: 675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if child.data != lastSpacer: 685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) raise SyntaxError("Bad whitespace under '%s'" % element.tagName) 695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) elif child.data != spacer: 705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) raise SyntaxError("Bad whitespace under '%s'" % element.tagName) 715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) next = child.nextSibling 725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) element.removeChild(child) 735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) child = next 745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else: 755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) raise SyntaxError("Unexpected node type in XML document") 765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def checkName(element, name): 795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if element.nodeType != element.ELEMENT_NODE: 805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) raise SyntaxError("Missing element: '%s'" % name) 815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if name == None: 835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return 845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if element.tagName != name: 865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) raise SyntaxError("Wrong element name: should be '%s', is '%s'" % (name, element.tagName)) 875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def getChild(element, index, name=None): 895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if element.nodeType != element.ELEMENT_NODE: 905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) raise SyntaxError("Wrong node type in getChild()") 915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) child = element.childNodes.item(index) 935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if child == None: 945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) raise SyntaxError("Missing child: '%s'" % name) 955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) checkName(child, name) 965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return child 975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def getChildIter(element, index): 995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) class ChildIter: 1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) def __init__(self, element, index): 1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) self.element = element 1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) self.index = index 1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) def next(self): 1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if self.index < len(self.element.childNodes): 1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) retVal = self.element.childNodes.item(self.index) 1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) self.index += 1 1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else: 1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) retVal = None 1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return retVal 1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) def checkEnd(self): 1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if self.index != len(self.element.childNodes): 1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) raise SyntaxError("Too many elements under: '%s'" % self.element.tagName) 1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return ChildIter(element, index) 1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def getChildOrNone(element, index): 1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if element.nodeType != element.ELEMENT_NODE: 1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) raise SyntaxError("Wrong node type in getChild()") 1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) child = element.childNodes.item(index) 1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return child 1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def getLastChild(element, index, name=None): 1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if element.nodeType != element.ELEMENT_NODE: 1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) raise SyntaxError("Wrong node type in getLastChild()") 1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) child = element.childNodes.item(index) 1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if child == None: 1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) raise SyntaxError("Missing child: '%s'" % name) 1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if child != element.lastChild: 1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) raise SyntaxError("Too many elements under: '%s'" % element.tagName) 1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) checkName(child, name) 1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return child 1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#Regular expressions for syntax-checking attribute and element content 1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)nsRegEx = "http://trevp.net/cryptoID\Z" 1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)cryptoIDRegEx = "([a-km-z3-9]{5}\.){3}[a-km-z3-9]{5}\Z" 1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)urlRegEx = "http(s)?://.{1,100}\Z" 1395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)sha1Base64RegEx = "[A-Za-z0-9+/]{27}=\Z" 1405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)base64RegEx = "[A-Za-z0-9+/]+={0,4}\Z" 1415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)certsListRegEx = "(0)?(1)?(2)?(3)?(4)?(5)?(6)?(7)?(8)?(9)?\Z" 1425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)keyRegEx = "[A-Z]\Z" 1435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)keysListRegEx = "(A)?(B)?(C)?(D)?(E)?(F)?(G)?(H)?(I)?(J)?(K)?(L)?(M)?(N)?(O)?(P)?(Q)?(R)?(S)?(T)?(U)?(V)?(W)?(X)?(Y)?(Z)?\Z" 1445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)dateTimeRegEx = "\d\d\d\d-\d\d-\d\dT\d\d:\d\d:\d\dZ\Z" 1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)shortStringRegEx = ".{1,100}\Z" 1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)exprRegEx = "[a-zA-Z0-9 ,()]{1,200}\Z" 1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)notAfterDeltaRegEx = "0|([1-9][0-9]{0,8})\Z" #A number from 0 to (1 billion)-1 1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)booleanRegEx = "(true)|(false)" 1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def getReqAttribute(element, attrName, regEx=""): 1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if element.nodeType != element.ELEMENT_NODE: 1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) raise SyntaxError("Wrong node type in getReqAttribute()") 1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) value = element.getAttribute(attrName) 1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if not value: 1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) raise SyntaxError("Missing Attribute: " + attrName) 1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if not re.match(regEx, value): 1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) raise SyntaxError("Bad Attribute Value for '%s': '%s' " % (attrName, value)) 1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) element.removeAttribute(attrName) 1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return str(value) #de-unicode it; this is needed for bsddb, for example 1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def getAttribute(element, attrName, regEx=""): 1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if element.nodeType != element.ELEMENT_NODE: 1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) raise SyntaxError("Wrong node type in getAttribute()") 1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) value = element.getAttribute(attrName) 1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if value: 1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if not re.match(regEx, value): 1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) raise SyntaxError("Bad Attribute Value for '%s': '%s' " % (attrName, value)) 1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) element.removeAttribute(attrName) 1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return str(value) #de-unicode it; this is needed for bsddb, for example 1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def checkNoMoreAttributes(element): 1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if element.nodeType != element.ELEMENT_NODE: 1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) raise SyntaxError("Wrong node type in checkNoMoreAttributes()") 1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if element.attributes.length!=0: 1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) raise SyntaxError("Extra attributes on '%s'" % element.tagName) 1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def getText(element, regEx=""): 1815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) textNode = element.firstChild 1825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if textNode == None: 1835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) raise SyntaxError("Empty element '%s'" % element.tagName) 1845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if textNode.nodeType != textNode.TEXT_NODE: 1855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) raise SyntaxError("Non-text node: '%s'" % element.tagName) 1865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if not re.match(regEx, textNode.data): 1875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) raise SyntaxError("Bad Text Value for '%s': '%s' " % (element.tagName, textNode.data)) 1885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return str(textNode.data) #de-unicode it; this is needed for bsddb, for example 1895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 1905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#Function for adding tabs to a string 1915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def indent(s, steps, ch="\t"): 1925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) tabs = ch*steps 1935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) if s[-1] != "\n": 1945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) s = tabs + s.replace("\n", "\n"+tabs) 1955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) else: 1965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) s = tabs + s.replace("\n", "\n"+tabs) 1975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) s = s[ : -len(tabs)] 1985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return s 1995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) 2005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)def escape(s): 2015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) return saxutils.escape(s) 202