1"""Implementation of the DOM Level 3 'LS-Load' feature."""
2
3import copy
4import xml.dom
5
6from xml.dom.NodeFilter import NodeFilter
7
8
9__all__ = ["DOMBuilder", "DOMEntityResolver", "DOMInputSource"]
10
11
12class Options:
13    """Features object that has variables set for each DOMBuilder feature.
14
15    The DOMBuilder class uses an instance of this class to pass settings to
16    the ExpatBuilder class.
17    """
18
19    # Note that the DOMBuilder class in LoadSave constrains which of these
20    # values can be set using the DOM Level 3 LoadSave feature.
21
22    namespaces = 1
23    namespace_declarations = True
24    validation = False
25    external_parameter_entities = True
26    external_general_entities = True
27    external_dtd_subset = True
28    validate_if_schema = False
29    validate = False
30    datatype_normalization = False
31    create_entity_ref_nodes = True
32    entities = True
33    whitespace_in_element_content = True
34    cdata_sections = True
35    comments = True
36    charset_overrides_xml_encoding = True
37    infoset = False
38    supported_mediatypes_only = False
39
40    errorHandler = None
41    filter = None
42
43
44class DOMBuilder:
45    entityResolver = None
46    errorHandler = None
47    filter = None
48
49    ACTION_REPLACE = 1
50    ACTION_APPEND_AS_CHILDREN = 2
51    ACTION_INSERT_AFTER = 3
52    ACTION_INSERT_BEFORE = 4
53
54    _legal_actions = (ACTION_REPLACE, ACTION_APPEND_AS_CHILDREN,
55                      ACTION_INSERT_AFTER, ACTION_INSERT_BEFORE)
56
57    def __init__(self):
58        self._options = Options()
59
60    def _get_entityResolver(self):
61        return self.entityResolver
62    def _set_entityResolver(self, entityResolver):
63        self.entityResolver = entityResolver
64
65    def _get_errorHandler(self):
66        return self.errorHandler
67    def _set_errorHandler(self, errorHandler):
68        self.errorHandler = errorHandler
69
70    def _get_filter(self):
71        return self.filter
72    def _set_filter(self, filter):
73        self.filter = filter
74
75    def setFeature(self, name, state):
76        if self.supportsFeature(name):
77            state = state and 1 or 0
78            try:
79                settings = self._settings[(_name_xform(name), state)]
80            except KeyError:
81                raise xml.dom.NotSupportedErr(
82                    "unsupported feature: %r" % (name,))
83            else:
84                for name, value in settings:
85                    setattr(self._options, name, value)
86        else:
87            raise xml.dom.NotFoundErr("unknown feature: " + repr(name))
88
89    def supportsFeature(self, name):
90        return hasattr(self._options, _name_xform(name))
91
92    def canSetFeature(self, name, state):
93        key = (_name_xform(name), state and 1 or 0)
94        return key in self._settings
95
96    # This dictionary maps from (feature,value) to a list of
97    # (option,value) pairs that should be set on the Options object.
98    # If a (feature,value) setting is not in this dictionary, it is
99    # not supported by the DOMBuilder.
100    #
101    _settings = {
102        ("namespace_declarations", 0): [
103            ("namespace_declarations", 0)],
104        ("namespace_declarations", 1): [
105            ("namespace_declarations", 1)],
106        ("validation", 0): [
107            ("validation", 0)],
108        ("external_general_entities", 0): [
109            ("external_general_entities", 0)],
110        ("external_general_entities", 1): [
111            ("external_general_entities", 1)],
112        ("external_parameter_entities", 0): [
113            ("external_parameter_entities", 0)],
114        ("external_parameter_entities", 1): [
115            ("external_parameter_entities", 1)],
116        ("validate_if_schema", 0): [
117            ("validate_if_schema", 0)],
118        ("create_entity_ref_nodes", 0): [
119            ("create_entity_ref_nodes", 0)],
120        ("create_entity_ref_nodes", 1): [
121            ("create_entity_ref_nodes", 1)],
122        ("entities", 0): [
123            ("create_entity_ref_nodes", 0),
124            ("entities", 0)],
125        ("entities", 1): [
126            ("entities", 1)],
127        ("whitespace_in_element_content", 0): [
128            ("whitespace_in_element_content", 0)],
129        ("whitespace_in_element_content", 1): [
130            ("whitespace_in_element_content", 1)],
131        ("cdata_sections", 0): [
132            ("cdata_sections", 0)],
133        ("cdata_sections", 1): [
134            ("cdata_sections", 1)],
135        ("comments", 0): [
136            ("comments", 0)],
137        ("comments", 1): [
138            ("comments", 1)],
139        ("charset_overrides_xml_encoding", 0): [
140            ("charset_overrides_xml_encoding", 0)],
141        ("charset_overrides_xml_encoding", 1): [
142            ("charset_overrides_xml_encoding", 1)],
143        ("infoset", 0): [],
144        ("infoset", 1): [
145            ("namespace_declarations", 0),
146            ("validate_if_schema", 0),
147            ("create_entity_ref_nodes", 0),
148            ("entities", 0),
149            ("cdata_sections", 0),
150            ("datatype_normalization", 1),
151            ("whitespace_in_element_content", 1),
152            ("comments", 1),
153            ("charset_overrides_xml_encoding", 1)],
154        ("supported_mediatypes_only", 0): [
155            ("supported_mediatypes_only", 0)],
156        ("namespaces", 0): [
157            ("namespaces", 0)],
158        ("namespaces", 1): [
159            ("namespaces", 1)],
160    }
161
162    def getFeature(self, name):
163        xname = _name_xform(name)
164        try:
165            return getattr(self._options, xname)
166        except AttributeError:
167            if name == "infoset":
168                options = self._options
169                return (options.datatype_normalization
170                        and options.whitespace_in_element_content
171                        and options.comments
172                        and options.charset_overrides_xml_encoding
173                        and not (options.namespace_declarations
174                                 or options.validate_if_schema
175                                 or options.create_entity_ref_nodes
176                                 or options.entities
177                                 or options.cdata_sections))
178            raise xml.dom.NotFoundErr("feature %s not known" % repr(name))
179
180    def parseURI(self, uri):
181        if self.entityResolver:
182            input = self.entityResolver.resolveEntity(None, uri)
183        else:
184            input = DOMEntityResolver().resolveEntity(None, uri)
185        return self.parse(input)
186
187    def parse(self, input):
188        options = copy.copy(self._options)
189        options.filter = self.filter
190        options.errorHandler = self.errorHandler
191        fp = input.byteStream
192        if fp is None and options.systemId:
193            import urllib2
194            fp = urllib2.urlopen(input.systemId)
195        return self._parse_bytestream(fp, options)
196
197    def parseWithContext(self, input, cnode, action):
198        if action not in self._legal_actions:
199            raise ValueError("not a legal action")
200        raise NotImplementedError("Haven't written this yet...")
201
202    def _parse_bytestream(self, stream, options):
203        import xml.dom.expatbuilder
204        builder = xml.dom.expatbuilder.makeBuilder(options)
205        return builder.parseFile(stream)
206
207
208def _name_xform(name):
209    return name.lower().replace('-', '_')
210
211
212class DOMEntityResolver(object):
213    __slots__ = '_opener',
214
215    def resolveEntity(self, publicId, systemId):
216        assert systemId is not None
217        source = DOMInputSource()
218        source.publicId = publicId
219        source.systemId = systemId
220        source.byteStream = self._get_opener().open(systemId)
221
222        # determine the encoding if the transport provided it
223        source.encoding = self._guess_media_encoding(source)
224
225        # determine the base URI is we can
226        import posixpath, urlparse
227        parts = urlparse.urlparse(systemId)
228        scheme, netloc, path, params, query, fragment = parts
229        # XXX should we check the scheme here as well?
230        if path and not path.endswith("/"):
231            path = posixpath.dirname(path) + "/"
232            parts = scheme, netloc, path, params, query, fragment
233            source.baseURI = urlparse.urlunparse(parts)
234
235        return source
236
237    def _get_opener(self):
238        try:
239            return self._opener
240        except AttributeError:
241            self._opener = self._create_opener()
242            return self._opener
243
244    def _create_opener(self):
245        import urllib2
246        return urllib2.build_opener()
247
248    def _guess_media_encoding(self, source):
249        info = source.byteStream.info()
250        if "Content-Type" in info:
251            for param in info.getplist():
252                if param.startswith("charset="):
253                    return param.split("=", 1)[1].lower()
254
255
256class DOMInputSource(object):
257    __slots__ = ('byteStream', 'characterStream', 'stringData',
258                 'encoding', 'publicId', 'systemId', 'baseURI')
259
260    def __init__(self):
261        self.byteStream = None
262        self.characterStream = None
263        self.stringData = None
264        self.encoding = None
265        self.publicId = None
266        self.systemId = None
267        self.baseURI = None
268
269    def _get_byteStream(self):
270        return self.byteStream
271    def _set_byteStream(self, byteStream):
272        self.byteStream = byteStream
273
274    def _get_characterStream(self):
275        return self.characterStream
276    def _set_characterStream(self, characterStream):
277        self.characterStream = characterStream
278
279    def _get_stringData(self):
280        return self.stringData
281    def _set_stringData(self, data):
282        self.stringData = data
283
284    def _get_encoding(self):
285        return self.encoding
286    def _set_encoding(self, encoding):
287        self.encoding = encoding
288
289    def _get_publicId(self):
290        return self.publicId
291    def _set_publicId(self, publicId):
292        self.publicId = publicId
293
294    def _get_systemId(self):
295        return self.systemId
296    def _set_systemId(self, systemId):
297        self.systemId = systemId
298
299    def _get_baseURI(self):
300        return self.baseURI
301    def _set_baseURI(self, uri):
302        self.baseURI = uri
303
304
305class DOMBuilderFilter:
306    """Element filter which can be used to tailor construction of
307    a DOM instance.
308    """
309
310    # There's really no need for this class; concrete implementations
311    # should just implement the endElement() and startElement()
312    # methods as appropriate.  Using this makes it easy to only
313    # implement one of them.
314
315    FILTER_ACCEPT = 1
316    FILTER_REJECT = 2
317    FILTER_SKIP = 3
318    FILTER_INTERRUPT = 4
319
320    whatToShow = NodeFilter.SHOW_ALL
321
322    def _get_whatToShow(self):
323        return self.whatToShow
324
325    def acceptNode(self, element):
326        return self.FILTER_ACCEPT
327
328    def startContainer(self, element):
329        return self.FILTER_ACCEPT
330
331del NodeFilter
332
333
334class DocumentLS:
335    """Mixin to create documents that conform to the load/save spec."""
336
337    async = False
338
339    def _get_async(self):
340        return False
341    def _set_async(self, async):
342        if async:
343            raise xml.dom.NotSupportedErr(
344                "asynchronous document loading is not supported")
345
346    def abort(self):
347        # What does it mean to "clear" a document?  Does the
348        # documentElement disappear?
349        raise NotImplementedError(
350            "haven't figured out what this means yet")
351
352    def load(self, uri):
353        raise NotImplementedError("haven't written this yet")
354
355    def loadXML(self, source):
356        raise NotImplementedError("haven't written this yet")
357
358    def saveXML(self, snode):
359        if snode is None:
360            snode = self
361        elif snode.ownerDocument is not self:
362            raise xml.dom.WrongDocumentErr()
363        return snode.toxml()
364
365
366class DOMImplementationLS:
367    MODE_SYNCHRONOUS = 1
368    MODE_ASYNCHRONOUS = 2
369
370    def createDOMBuilder(self, mode, schemaType):
371        if schemaType is not None:
372            raise xml.dom.NotSupportedErr(
373                "schemaType not yet supported")
374        if mode == self.MODE_SYNCHRONOUS:
375            return DOMBuilder()
376        if mode == self.MODE_ASYNCHRONOUS:
377            raise xml.dom.NotSupportedErr(
378                "asynchronous builders are not supported")
379        raise ValueError("unknown value for mode")
380
381    def createDOMWriter(self):
382        raise NotImplementedError(
383            "the writer interface hasn't been written yet!")
384
385    def createDOMInputSource(self):
386        return DOMInputSource()
387