1# markdown/html4.py
2#
3# Add html4 serialization to older versions of Elementree
4# Taken from ElementTree 1.3 preview with slight modifications
5#
6# Copyright (c) 1999-2007 by Fredrik Lundh.  All rights reserved.
7#
8# fredrik@pythonware.com
9# http://www.pythonware.com
10#
11# --------------------------------------------------------------------
12# The ElementTree toolkit is
13#
14# Copyright (c) 1999-2007 by Fredrik Lundh
15#
16# By obtaining, using, and/or copying this software and/or its
17# associated documentation, you agree that you have read, understood,
18# and will comply with the following terms and conditions:
19#
20# Permission to use, copy, modify, and distribute this software and
21# its associated documentation for any purpose and without fee is
22# hereby granted, provided that the above copyright notice appears in
23# all copies, and that both that copyright notice and this permission
24# notice appear in supporting documentation, and that the name of
25# Secret Labs AB or the author not be used in advertising or publicity
26# pertaining to distribution of the software without specific, written
27# prior permission.
28#
29# SECRET LABS AB AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH REGARD
30# TO THIS SOFTWARE, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANT-
31# ABILITY AND FITNESS.  IN NO EVENT SHALL SECRET LABS AB OR THE AUTHOR
32# BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
33# DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
34# WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
35# ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
36# OF THIS SOFTWARE.
37# --------------------------------------------------------------------
38
39
40import markdown
41ElementTree = markdown.etree.ElementTree
42QName = markdown.etree.QName
43Comment = markdown.etree.Comment
44PI = markdown.etree.PI
45ProcessingInstruction = markdown.etree.ProcessingInstruction
46
47HTML_EMPTY = ("area", "base", "basefont", "br", "col", "frame", "hr",
48              "img", "input", "isindex", "link", "meta" "param")
49
50try:
51    HTML_EMPTY = set(HTML_EMPTY)
52except NameError:
53    pass
54
55_namespace_map = {
56    # "well-known" namespace prefixes
57    "http://www.w3.org/XML/1998/namespace": "xml",
58    "http://www.w3.org/1999/xhtml": "html",
59    "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf",
60    "http://schemas.xmlsoap.org/wsdl/": "wsdl",
61    # xml schema
62    "http://www.w3.org/2001/XMLSchema": "xs",
63    "http://www.w3.org/2001/XMLSchema-instance": "xsi",
64    # dublic core
65    "http://purl.org/dc/elements/1.1/": "dc",
66}
67
68
69def _raise_serialization_error(text):
70    raise TypeError(
71        "cannot serialize %r (type %s)" % (text, type(text).__name__)
72        )
73
74def _encode(text, encoding):
75    try:
76        return text.encode(encoding, "xmlcharrefreplace")
77    except (TypeError, AttributeError):
78        _raise_serialization_error(text)
79
80def _escape_cdata(text, encoding):
81    # escape character data
82    try:
83        # it's worth avoiding do-nothing calls for strings that are
84        # shorter than 500 character, or so.  assume that's, by far,
85        # the most common case in most applications.
86        if "&" in text:
87            text = text.replace("&", "&")
88        if "<" in text:
89            text = text.replace("<", "&lt;")
90        if ">" in text:
91            text = text.replace(">", "&gt;")
92        return text.encode(encoding, "xmlcharrefreplace")
93    except (TypeError, AttributeError):
94        _raise_serialization_error(text)
95
96
97def _escape_attrib(text, encoding):
98    # escape attribute value
99    try:
100        if "&" in text:
101            text = text.replace("&", "&amp;")
102        if "<" in text:
103            text = text.replace("<", "&lt;")
104        if ">" in text:
105            text = text.replace(">", "&gt;")
106        if "\"" in text:
107            text = text.replace("\"", "&quot;")
108        if "\n" in text:
109            text = text.replace("\n", "&#10;")
110        return text.encode(encoding, "xmlcharrefreplace")
111    except (TypeError, AttributeError):
112        _raise_serialization_error(text)
113
114def _escape_attrib_html(text, encoding):
115    # escape attribute value
116    try:
117        if "&" in text:
118            text = text.replace("&", "&amp;")
119        if ">" in text:
120            text = text.replace(">", "&gt;")
121        if "\"" in text:
122            text = text.replace("\"", "&quot;")
123        return text.encode(encoding, "xmlcharrefreplace")
124    except (TypeError, AttributeError):
125        _raise_serialization_error(text)
126
127
128def _serialize_html(write, elem, encoding, qnames, namespaces):
129    tag = elem.tag
130    text = elem.text
131    if tag is Comment:
132        write("<!--%s-->" % _escape_cdata(text, encoding))
133    elif tag is ProcessingInstruction:
134        write("<?%s?>" % _escape_cdata(text, encoding))
135    else:
136        tag = qnames[tag]
137        if tag is None:
138            if text:
139                write(_escape_cdata(text, encoding))
140            for e in elem:
141                _serialize_html(write, e, encoding, qnames, None)
142        else:
143            write("<" + tag)
144            items = elem.items()
145            if items or namespaces:
146                items.sort() # lexical order
147                for k, v in items:
148                    if isinstance(k, QName):
149                        k = k.text
150                    if isinstance(v, QName):
151                        v = qnames[v.text]
152                    else:
153                        v = _escape_attrib_html(v, encoding)
154                    # FIXME: handle boolean attributes
155                    write(" %s=\"%s\"" % (qnames[k], v))
156                if namespaces:
157                    items = namespaces.items()
158                    items.sort(key=lambda x: x[1]) # sort on prefix
159                    for v, k in items:
160                        if k:
161                            k = ":" + k
162                        write(" xmlns%s=\"%s\"" % (
163                            k.encode(encoding),
164                            _escape_attrib(v, encoding)
165                            ))
166            write(">")
167            tag = tag.lower()
168            if text:
169                if tag == "script" or tag == "style":
170                    write(_encode(text, encoding))
171                else:
172                    write(_escape_cdata(text, encoding))
173            for e in elem:
174                _serialize_html(write, e, encoding, qnames, None)
175            if tag not in HTML_EMPTY:
176                write("</" + tag + ">")
177    if elem.tail:
178        write(_escape_cdata(elem.tail, encoding))
179
180def write_html(root, f,
181          # keyword arguments
182          encoding="us-ascii",
183          default_namespace=None):
184    assert root is not None
185    if not hasattr(f, "write"):
186        f = open(f, "wb")
187    write = f.write
188    if not encoding:
189        encoding = "us-ascii"
190    qnames, namespaces = _namespaces(
191            root, encoding, default_namespace
192            )
193    _serialize_html(
194                write, root, encoding, qnames, namespaces
195                )
196
197# --------------------------------------------------------------------
198# serialization support
199
200def _namespaces(elem, encoding, default_namespace=None):
201    # identify namespaces used in this tree
202
203    # maps qnames to *encoded* prefix:local names
204    qnames = {None: None}
205
206    # maps uri:s to prefixes
207    namespaces = {}
208    if default_namespace:
209        namespaces[default_namespace] = ""
210
211    def encode(text):
212        return text.encode(encoding)
213
214    def add_qname(qname):
215        # calculate serialized qname representation
216        try:
217            if qname[:1] == "{":
218                uri, tag = qname[1:].split("}", 1)
219                prefix = namespaces.get(uri)
220                if prefix is None:
221                    prefix = _namespace_map.get(uri)
222                    if prefix is None:
223                        prefix = "ns%d" % len(namespaces)
224                    if prefix != "xml":
225                        namespaces[uri] = prefix
226                if prefix:
227                    qnames[qname] = encode("%s:%s" % (prefix, tag))
228                else:
229                    qnames[qname] = encode(tag) # default element
230            else:
231                if default_namespace:
232                    # FIXME: can this be handled in XML 1.0?
233                    raise ValueError(
234                        "cannot use non-qualified names with "
235                        "default_namespace option"
236                        )
237                qnames[qname] = encode(qname)
238        except TypeError:
239            _raise_serialization_error(qname)
240
241    # populate qname and namespaces table
242    try:
243        iterate = elem.iter
244    except AttributeError:
245        iterate = elem.getiterator # cET compatibility
246    for elem in iterate():
247        tag = elem.tag
248        if isinstance(tag, QName) and tag.text not in qnames:
249            add_qname(tag.text)
250        elif isinstance(tag, basestring):
251            if tag not in qnames:
252                add_qname(tag)
253        elif tag is not None and tag is not Comment and tag is not PI:
254            _raise_serialization_error(tag)
255        for key, value in elem.items():
256            if isinstance(key, QName):
257                key = key.text
258            if key not in qnames:
259                add_qname(key)
260            if isinstance(value, QName) and value.text not in qnames:
261                add_qname(value.text)
262        text = elem.text
263        if isinstance(text, QName) and text.text not in qnames:
264            add_qname(text.text)
265    return qnames, namespaces
266
267def to_html_string(element, encoding=None):
268    class dummy:
269        pass
270    data = []
271    file = dummy()
272    file.write = data.append
273    write_html(ElementTree(element).getroot(),file,encoding)
274    return "".join(data)
275