1/*
2 * HTMLtree.c : implementation of access function for an HTML tree.
3 *
4 * See Copyright for the status of this software.
5 *
6 * daniel@veillard.com
7 */
8
9
10#define IN_LIBXML
11#include "libxml.h"
12#ifdef LIBXML_HTML_ENABLED
13
14#include <string.h> /* for memset() only ! */
15
16#ifdef HAVE_CTYPE_H
17#include <ctype.h>
18#endif
19#ifdef HAVE_STDLIB_H
20#include <stdlib.h>
21#endif
22
23#include <libxml/xmlmemory.h>
24#include <libxml/HTMLparser.h>
25#include <libxml/HTMLtree.h>
26#include <libxml/entities.h>
27#include <libxml/valid.h>
28#include <libxml/xmlerror.h>
29#include <libxml/parserInternals.h>
30#include <libxml/globals.h>
31#include <libxml/uri.h>
32
33/************************************************************************
34 *									*
35 *   		Getting/Setting encoding meta tags			*
36 *									*
37 ************************************************************************/
38
39/**
40 * htmlGetMetaEncoding:
41 * @doc:  the document
42 *
43 * Encoding definition lookup in the Meta tags
44 *
45 * Returns the current encoding as flagged in the HTML source
46 */
47const xmlChar *
48htmlGetMetaEncoding(htmlDocPtr doc) {
49    htmlNodePtr cur;
50    const xmlChar *content;
51    const xmlChar *encoding;
52
53    if (doc == NULL)
54	return(NULL);
55    cur = doc->children;
56
57    /*
58     * Search the html
59     */
60    while (cur != NULL) {
61	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
62	    if (xmlStrEqual(cur->name, BAD_CAST"html"))
63		break;
64	    if (xmlStrEqual(cur->name, BAD_CAST"head"))
65		goto found_head;
66	    if (xmlStrEqual(cur->name, BAD_CAST"meta"))
67		goto found_meta;
68	}
69	cur = cur->next;
70    }
71    if (cur == NULL)
72	return(NULL);
73    cur = cur->children;
74
75    /*
76     * Search the head
77     */
78    while (cur != NULL) {
79	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
80	    if (xmlStrEqual(cur->name, BAD_CAST"head"))
81		break;
82	    if (xmlStrEqual(cur->name, BAD_CAST"meta"))
83		goto found_meta;
84	}
85	cur = cur->next;
86    }
87    if (cur == NULL)
88	return(NULL);
89found_head:
90    cur = cur->children;
91
92    /*
93     * Search the meta elements
94     */
95found_meta:
96    while (cur != NULL) {
97	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
98	    if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
99		xmlAttrPtr attr = cur->properties;
100		int http;
101		const xmlChar *value;
102
103		content = NULL;
104		http = 0;
105		while (attr != NULL) {
106		    if ((attr->children != NULL) &&
107		        (attr->children->type == XML_TEXT_NODE) &&
108		        (attr->children->next == NULL)) {
109			value = attr->children->content;
110			if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
111			 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
112			    http = 1;
113			else if ((value != NULL)
114			 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
115			    content = value;
116			if ((http != 0) && (content != NULL))
117			    goto found_content;
118		    }
119		    attr = attr->next;
120		}
121	    }
122	}
123	cur = cur->next;
124    }
125    return(NULL);
126
127found_content:
128    encoding = xmlStrstr(content, BAD_CAST"charset=");
129    if (encoding == NULL)
130	encoding = xmlStrstr(content, BAD_CAST"Charset=");
131    if (encoding == NULL)
132	encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
133    if (encoding != NULL) {
134	encoding += 8;
135    } else {
136	encoding = xmlStrstr(content, BAD_CAST"charset =");
137	if (encoding == NULL)
138	    encoding = xmlStrstr(content, BAD_CAST"Charset =");
139	if (encoding == NULL)
140	    encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
141	if (encoding != NULL)
142	    encoding += 9;
143    }
144    if (encoding != NULL) {
145	while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
146    }
147    return(encoding);
148}
149
150/**
151 * htmlSetMetaEncoding:
152 * @doc:  the document
153 * @encoding:  the encoding string
154 *
155 * Sets the current encoding in the Meta tags
156 * NOTE: this will not change the document content encoding, just
157 * the META flag associated.
158 *
159 * Returns 0 in case of success and -1 in case of error
160 */
161int
162htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
163    htmlNodePtr cur, meta = NULL, head = NULL;
164    const xmlChar *content = NULL;
165    char newcontent[100];
166
167
168    if (doc == NULL)
169	return(-1);
170
171    /* html isn't a real encoding it's just libxml2 way to get entities */
172    if (!xmlStrcasecmp(encoding, BAD_CAST "html"))
173        return(-1);
174
175    if (encoding != NULL) {
176	snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
177                (char *)encoding);
178	newcontent[sizeof(newcontent) - 1] = 0;
179    }
180
181    cur = doc->children;
182
183    /*
184     * Search the html
185     */
186    while (cur != NULL) {
187	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
188	    if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
189		break;
190	    if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
191		goto found_head;
192	    if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
193		goto found_meta;
194	}
195	cur = cur->next;
196    }
197    if (cur == NULL)
198	return(-1);
199    cur = cur->children;
200
201    /*
202     * Search the head
203     */
204    while (cur != NULL) {
205	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
206	    if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
207		break;
208	    if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
209                head = cur->parent;
210		goto found_meta;
211            }
212	}
213	cur = cur->next;
214    }
215    if (cur == NULL)
216	return(-1);
217found_head:
218    head = cur;
219    if (cur->children == NULL)
220        goto create;
221    cur = cur->children;
222
223found_meta:
224    /*
225     * Search and update all the remaining the meta elements carrying
226     * encoding informations
227     */
228    while (cur != NULL) {
229	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
230	    if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
231		xmlAttrPtr attr = cur->properties;
232		int http;
233		const xmlChar *value;
234
235		content = NULL;
236		http = 0;
237		while (attr != NULL) {
238		    if ((attr->children != NULL) &&
239		        (attr->children->type == XML_TEXT_NODE) &&
240		        (attr->children->next == NULL)) {
241			value = attr->children->content;
242			if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
243			 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
244			    http = 1;
245			else
246                        {
247                           if ((value != NULL) &&
248                               (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
249			       content = value;
250                        }
251		        if ((http != 0) && (content != NULL))
252			    break;
253		    }
254		    attr = attr->next;
255		}
256		if ((http != 0) && (content != NULL)) {
257		    meta = cur;
258		    break;
259		}
260
261	    }
262	}
263	cur = cur->next;
264    }
265create:
266    if (meta == NULL) {
267        if ((encoding != NULL) && (head != NULL)) {
268            /*
269             * Create a new Meta element with the right attributes
270             */
271
272            meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
273            if (head->children == NULL)
274                xmlAddChild(head, meta);
275            else
276                xmlAddPrevSibling(head->children, meta);
277            xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
278            xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
279        }
280    } else {
281        /* change the document only if there is a real encoding change */
282        if (xmlStrcasestr(content, encoding) == NULL) {
283            xmlSetProp(meta, BAD_CAST"content", BAD_CAST newcontent);
284        }
285    }
286
287
288    return(0);
289}
290
291/**
292 * booleanHTMLAttrs:
293 *
294 * These are the HTML attributes which will be output
295 * in minimized form, i.e. <option selected="selected"> will be
296 * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"
297 *
298 */
299static const char* htmlBooleanAttrs[] = {
300  "checked", "compact", "declare", "defer", "disabled", "ismap",
301  "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
302  "selected", NULL
303};
304
305
306/**
307 * htmlIsBooleanAttr:
308 * @name:  the name of the attribute to check
309 *
310 * Determine if a given attribute is a boolean attribute.
311 *
312 * returns: false if the attribute is not boolean, true otherwise.
313 */
314int
315htmlIsBooleanAttr(const xmlChar *name)
316{
317    int i = 0;
318
319    while (htmlBooleanAttrs[i] != NULL) {
320        if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0)
321            return 1;
322        i++;
323    }
324    return 0;
325}
326
327#ifdef LIBXML_OUTPUT_ENABLED
328/*
329 * private routine exported from xmlIO.c
330 */
331xmlOutputBufferPtr
332xmlAllocOutputBufferInternal(xmlCharEncodingHandlerPtr encoder);
333/************************************************************************
334 *									*
335 * 			Output error handlers				*
336 *									*
337 ************************************************************************/
338/**
339 * htmlSaveErrMemory:
340 * @extra:  extra informations
341 *
342 * Handle an out of memory condition
343 */
344static void
345htmlSaveErrMemory(const char *extra)
346{
347    __xmlSimpleError(XML_FROM_OUTPUT, XML_ERR_NO_MEMORY, NULL, NULL, extra);
348}
349
350/**
351 * htmlSaveErr:
352 * @code:  the error number
353 * @node:  the location of the error.
354 * @extra:  extra informations
355 *
356 * Handle an out of memory condition
357 */
358static void
359htmlSaveErr(int code, xmlNodePtr node, const char *extra)
360{
361    const char *msg = NULL;
362
363    switch(code) {
364        case XML_SAVE_NOT_UTF8:
365	    msg = "string is not in UTF-8\n";
366	    break;
367	case XML_SAVE_CHAR_INVALID:
368	    msg = "invalid character value\n";
369	    break;
370	case XML_SAVE_UNKNOWN_ENCODING:
371	    msg = "unknown encoding %s\n";
372	    break;
373	case XML_SAVE_NO_DOCTYPE:
374	    msg = "HTML has no DOCTYPE\n";
375	    break;
376	default:
377	    msg = "unexpected error number\n";
378    }
379    __xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra);
380}
381
382/************************************************************************
383 *									*
384 *   		Dumping HTML tree content to a simple buffer		*
385 *									*
386 ************************************************************************/
387
388static int
389htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
390	           int format);
391
392/**
393 * htmlNodeDumpFormat:
394 * @buf:  the HTML buffer output
395 * @doc:  the document
396 * @cur:  the current node
397 * @format:  should formatting spaces been added
398 *
399 * Dump an HTML node, recursive behaviour,children are printed too.
400 *
401 * Returns the number of byte written or -1 in case of error
402 */
403static int
404htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
405	           int format) {
406    unsigned int use;
407    int ret;
408    xmlOutputBufferPtr outbuf;
409
410    if (cur == NULL) {
411	return (-1);
412    }
413    if (buf == NULL) {
414	return (-1);
415    }
416    outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer));
417    if (outbuf == NULL) {
418        htmlSaveErrMemory("allocating HTML output buffer");
419	return (-1);
420    }
421    memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer));
422    outbuf->buffer = buf;
423    outbuf->encoder = NULL;
424    outbuf->writecallback = NULL;
425    outbuf->closecallback = NULL;
426    outbuf->context = NULL;
427    outbuf->written = 0;
428
429    use = buf->use;
430    htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);
431    xmlFree(outbuf);
432    ret = buf->use - use;
433    return (ret);
434}
435
436/**
437 * htmlNodeDump:
438 * @buf:  the HTML buffer output
439 * @doc:  the document
440 * @cur:  the current node
441 *
442 * Dump an HTML node, recursive behaviour,children are printed too,
443 * and formatting returns are added.
444 *
445 * Returns the number of byte written or -1 in case of error
446 */
447int
448htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
449    xmlInitParser();
450
451    return(htmlNodeDumpFormat(buf, doc, cur, 1));
452}
453
454/**
455 * htmlNodeDumpFileFormat:
456 * @out:  the FILE pointer
457 * @doc:  the document
458 * @cur:  the current node
459 * @encoding: the document encoding
460 * @format:  should formatting spaces been added
461 *
462 * Dump an HTML node, recursive behaviour,children are printed too.
463 *
464 * TODO: if encoding == NULL try to save in the doc encoding
465 *
466 * returns: the number of byte written or -1 in case of failure.
467 */
468int
469htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
470	               xmlNodePtr cur, const char *encoding, int format) {
471    xmlOutputBufferPtr buf;
472    xmlCharEncodingHandlerPtr handler = NULL;
473    int ret;
474
475    xmlInitParser();
476
477    if (encoding != NULL) {
478	xmlCharEncoding enc;
479
480	enc = xmlParseCharEncoding(encoding);
481	if (enc != XML_CHAR_ENCODING_UTF8) {
482	    handler = xmlFindCharEncodingHandler(encoding);
483	    if (handler == NULL)
484		return(-1);
485	}
486    }
487
488    /*
489     * Fallback to HTML or ASCII when the encoding is unspecified
490     */
491    if (handler == NULL)
492	handler = xmlFindCharEncodingHandler("HTML");
493    if (handler == NULL)
494	handler = xmlFindCharEncodingHandler("ascii");
495
496    /*
497     * save the content to a temp buffer.
498     */
499    buf = xmlOutputBufferCreateFile(out, handler);
500    if (buf == NULL) return(0);
501
502    htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
503
504    ret = xmlOutputBufferClose(buf);
505    return(ret);
506}
507
508/**
509 * htmlNodeDumpFile:
510 * @out:  the FILE pointer
511 * @doc:  the document
512 * @cur:  the current node
513 *
514 * Dump an HTML node, recursive behaviour,children are printed too,
515 * and formatting returns are added.
516 */
517void
518htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
519    htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
520}
521
522/**
523 * htmlDocDumpMemoryFormat:
524 * @cur:  the document
525 * @mem:  OUT: the memory pointer
526 * @size:  OUT: the memory length
527 * @format:  should formatting spaces been added
528 *
529 * Dump an HTML document in memory and return the xmlChar * and it's size.
530 * It's up to the caller to free the memory.
531 */
532void
533htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) {
534    xmlOutputBufferPtr buf;
535    xmlCharEncodingHandlerPtr handler = NULL;
536    const char *encoding;
537
538    xmlInitParser();
539
540    if ((mem == NULL) || (size == NULL))
541        return;
542    if (cur == NULL) {
543	*mem = NULL;
544	*size = 0;
545	return;
546    }
547
548    encoding = (const char *) htmlGetMetaEncoding(cur);
549
550    if (encoding != NULL) {
551	xmlCharEncoding enc;
552
553	enc = xmlParseCharEncoding(encoding);
554	if (enc != cur->charset) {
555	    if (cur->charset != XML_CHAR_ENCODING_UTF8) {
556		/*
557		 * Not supported yet
558		 */
559		*mem = NULL;
560		*size = 0;
561		return;
562	    }
563
564	    handler = xmlFindCharEncodingHandler(encoding);
565	    if (handler == NULL) {
566		*mem = NULL;
567		*size = 0;
568		return;
569	    }
570	} else {
571	    handler = xmlFindCharEncodingHandler(encoding);
572	}
573    }
574
575    /*
576     * Fallback to HTML or ASCII when the encoding is unspecified
577     */
578    if (handler == NULL)
579	handler = xmlFindCharEncodingHandler("HTML");
580    if (handler == NULL)
581	handler = xmlFindCharEncodingHandler("ascii");
582
583    buf = xmlAllocOutputBufferInternal(handler);
584    if (buf == NULL) {
585	*mem = NULL;
586	*size = 0;
587	return;
588    }
589
590	htmlDocContentDumpFormatOutput(buf, cur, NULL, format);
591
592    xmlOutputBufferFlush(buf);
593    if (buf->conv != NULL) {
594	*size = buf->conv->use;
595	*mem = xmlStrndup(buf->conv->content, *size);
596    } else {
597	*size = buf->buffer->use;
598	*mem = xmlStrndup(buf->buffer->content, *size);
599    }
600    (void)xmlOutputBufferClose(buf);
601}
602
603/**
604 * htmlDocDumpMemory:
605 * @cur:  the document
606 * @mem:  OUT: the memory pointer
607 * @size:  OUT: the memory length
608 *
609 * Dump an HTML document in memory and return the xmlChar * and it's size.
610 * It's up to the caller to free the memory.
611 */
612void
613htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
614	htmlDocDumpMemoryFormat(cur, mem, size, 1);
615}
616
617
618/************************************************************************
619 *									*
620 *   		Dumping HTML tree content to an I/O output buffer	*
621 *									*
622 ************************************************************************/
623
624void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur);
625
626/**
627 * htmlDtdDumpOutput:
628 * @buf:  the HTML buffer output
629 * @doc:  the document
630 * @encoding:  the encoding string
631 *
632 * TODO: check whether encoding is needed
633 *
634 * Dump the HTML document DTD, if any.
635 */
636static void
637htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
638	          const char *encoding ATTRIBUTE_UNUSED) {
639    xmlDtdPtr cur = doc->intSubset;
640
641    if (cur == NULL) {
642	htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL);
643	return;
644    }
645    xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
646    xmlOutputBufferWriteString(buf, (const char *)cur->name);
647    if (cur->ExternalID != NULL) {
648	xmlOutputBufferWriteString(buf, " PUBLIC ");
649	xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
650	if (cur->SystemID != NULL) {
651	    xmlOutputBufferWriteString(buf, " ");
652	    xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
653	}
654    }  else if (cur->SystemID != NULL) {
655	xmlOutputBufferWriteString(buf, " SYSTEM ");
656	xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
657    }
658    xmlOutputBufferWriteString(buf, ">\n");
659}
660
661/**
662 * htmlAttrDumpOutput:
663 * @buf:  the HTML buffer output
664 * @doc:  the document
665 * @cur:  the attribute pointer
666 * @encoding:  the encoding string
667 *
668 * Dump an HTML attribute
669 */
670static void
671htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
672	           const char *encoding ATTRIBUTE_UNUSED) {
673    xmlChar *value;
674
675    /*
676     * TODO: The html output method should not escape a & character
677     *       occurring in an attribute value immediately followed by
678     *       a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
679     */
680
681    if (cur == NULL) {
682	return;
683    }
684    xmlOutputBufferWriteString(buf, " ");
685    if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
686        xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
687	xmlOutputBufferWriteString(buf, ":");
688    }
689    xmlOutputBufferWriteString(buf, (const char *)cur->name);
690    if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
691	value = xmlNodeListGetString(doc, cur->children, 0);
692	if (value) {
693	    xmlOutputBufferWriteString(buf, "=");
694	    if ((cur->ns == NULL) && (cur->parent != NULL) &&
695		(cur->parent->ns == NULL) &&
696		((!xmlStrcasecmp(cur->name, BAD_CAST "href")) ||
697	         (!xmlStrcasecmp(cur->name, BAD_CAST "action")) ||
698		 (!xmlStrcasecmp(cur->name, BAD_CAST "src")) ||
699		 ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
700		  (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {
701		xmlChar *escaped;
702		xmlChar *tmp = value;
703
704		while (IS_BLANK_CH(*tmp)) tmp++;
705
706		escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+");
707		if (escaped != NULL) {
708		    xmlBufferWriteQuotedString(buf->buffer, escaped);
709		    xmlFree(escaped);
710		} else {
711		    xmlBufferWriteQuotedString(buf->buffer, value);
712		}
713	    } else {
714		xmlBufferWriteQuotedString(buf->buffer, value);
715	    }
716	    xmlFree(value);
717	} else  {
718	    xmlOutputBufferWriteString(buf, "=\"\"");
719	}
720    }
721}
722
723/**
724 * htmlAttrListDumpOutput:
725 * @buf:  the HTML buffer output
726 * @doc:  the document
727 * @cur:  the first attribute pointer
728 * @encoding:  the encoding string
729 *
730 * Dump a list of HTML attributes
731 */
732static void
733htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
734    if (cur == NULL) {
735	return;
736    }
737    while (cur != NULL) {
738        htmlAttrDumpOutput(buf, doc, cur, encoding);
739	cur = cur->next;
740    }
741}
742
743
744
745/**
746 * htmlNodeListDumpOutput:
747 * @buf:  the HTML buffer output
748 * @doc:  the document
749 * @cur:  the first node
750 * @encoding:  the encoding string
751 * @format:  should formatting spaces been added
752 *
753 * Dump an HTML node list, recursive behaviour,children are printed too.
754 */
755static void
756htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
757	               xmlNodePtr cur, const char *encoding, int format) {
758    if (cur == NULL) {
759	return;
760    }
761    while (cur != NULL) {
762        htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
763	cur = cur->next;
764    }
765}
766
767/**
768 * htmlNodeDumpFormatOutput:
769 * @buf:  the HTML buffer output
770 * @doc:  the document
771 * @cur:  the current node
772 * @encoding:  the encoding string
773 * @format:  should formatting spaces been added
774 *
775 * Dump an HTML node, recursive behaviour,children are printed too.
776 */
777void
778htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
779	                 xmlNodePtr cur, const char *encoding, int format) {
780    const htmlElemDesc * info;
781
782    xmlInitParser();
783
784    if ((cur == NULL) || (buf == NULL)) {
785	return;
786    }
787    /*
788     * Special cases.
789     */
790    if (cur->type == XML_DTD_NODE)
791	return;
792    if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
793        (cur->type == XML_DOCUMENT_NODE)){
794	htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
795	return;
796    }
797    if (cur->type == XML_ATTRIBUTE_NODE) {
798        htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur, encoding);
799	return;
800    }
801    if (cur->type == HTML_TEXT_NODE) {
802	if (cur->content != NULL) {
803	    if (((cur->name == (const xmlChar *)xmlStringText) ||
804		 (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
805		((cur->parent == NULL) ||
806		 ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) &&
807		  (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) {
808		xmlChar *buffer;
809
810		buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
811		if (buffer != NULL) {
812		    xmlOutputBufferWriteString(buf, (const char *)buffer);
813		    xmlFree(buffer);
814		}
815	    } else {
816		xmlOutputBufferWriteString(buf, (const char *)cur->content);
817	    }
818	}
819	return;
820    }
821    if (cur->type == HTML_COMMENT_NODE) {
822	if (cur->content != NULL) {
823	    xmlOutputBufferWriteString(buf, "<!--");
824	    xmlOutputBufferWriteString(buf, (const char *)cur->content);
825	    xmlOutputBufferWriteString(buf, "-->");
826	}
827	return;
828    }
829    if (cur->type == HTML_PI_NODE) {
830	if (cur->name == NULL)
831	    return;
832	xmlOutputBufferWriteString(buf, "<?");
833	xmlOutputBufferWriteString(buf, (const char *)cur->name);
834	if (cur->content != NULL) {
835	    xmlOutputBufferWriteString(buf, " ");
836	    xmlOutputBufferWriteString(buf, (const char *)cur->content);
837	}
838	xmlOutputBufferWriteString(buf, ">");
839	return;
840    }
841    if (cur->type == HTML_ENTITY_REF_NODE) {
842        xmlOutputBufferWriteString(buf, "&");
843	xmlOutputBufferWriteString(buf, (const char *)cur->name);
844        xmlOutputBufferWriteString(buf, ";");
845	return;
846    }
847    if (cur->type == HTML_PRESERVE_NODE) {
848	if (cur->content != NULL) {
849	    xmlOutputBufferWriteString(buf, (const char *)cur->content);
850	}
851	return;
852    }
853
854    /*
855     * Get specific HTML info for that node.
856     */
857    if (cur->ns == NULL)
858	info = htmlTagLookup(cur->name);
859    else
860	info = NULL;
861
862    xmlOutputBufferWriteString(buf, "<");
863    if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
864        xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
865	xmlOutputBufferWriteString(buf, ":");
866    }
867    xmlOutputBufferWriteString(buf, (const char *)cur->name);
868    if (cur->nsDef)
869	xmlNsListDumpOutput(buf, cur->nsDef);
870    if (cur->properties != NULL)
871        htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
872
873    if ((info != NULL) && (info->empty)) {
874        xmlOutputBufferWriteString(buf, ">");
875	if ((format) && (!info->isinline) && (cur->next != NULL)) {
876	    if ((cur->next->type != HTML_TEXT_NODE) &&
877		(cur->next->type != HTML_ENTITY_REF_NODE) &&
878		(cur->parent != NULL) &&
879		(cur->parent->name != NULL) &&
880		(cur->parent->name[0] != 'p')) /* p, pre, param */
881		xmlOutputBufferWriteString(buf, "\n");
882	}
883	return;
884    }
885    if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
886	(cur->children == NULL)) {
887        if ((info != NULL) && (info->saveEndTag != 0) &&
888	    (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
889	    (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
890	    xmlOutputBufferWriteString(buf, ">");
891	} else {
892	    xmlOutputBufferWriteString(buf, "></");
893            if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
894                xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
895                xmlOutputBufferWriteString(buf, ":");
896            }
897	    xmlOutputBufferWriteString(buf, (const char *)cur->name);
898	    xmlOutputBufferWriteString(buf, ">");
899	}
900	if ((format) && (cur->next != NULL) &&
901            (info != NULL) && (!info->isinline)) {
902	    if ((cur->next->type != HTML_TEXT_NODE) &&
903		(cur->next->type != HTML_ENTITY_REF_NODE) &&
904		(cur->parent != NULL) &&
905		(cur->parent->name != NULL) &&
906		(cur->parent->name[0] != 'p')) /* p, pre, param */
907		xmlOutputBufferWriteString(buf, "\n");
908	}
909	return;
910    }
911    xmlOutputBufferWriteString(buf, ">");
912    if ((cur->type != XML_ELEMENT_NODE) &&
913	(cur->content != NULL)) {
914	    /*
915	     * Uses the OutputBuffer property to automatically convert
916	     * invalids to charrefs
917	     */
918
919            xmlOutputBufferWriteString(buf, (const char *) cur->content);
920    }
921    if (cur->children != NULL) {
922        if ((format) && (info != NULL) && (!info->isinline) &&
923	    (cur->children->type != HTML_TEXT_NODE) &&
924	    (cur->children->type != HTML_ENTITY_REF_NODE) &&
925	    (cur->children != cur->last) &&
926	    (cur->name != NULL) &&
927	    (cur->name[0] != 'p')) /* p, pre, param */
928	    xmlOutputBufferWriteString(buf, "\n");
929	htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format);
930        if ((format) && (info != NULL) && (!info->isinline) &&
931	    (cur->last->type != HTML_TEXT_NODE) &&
932	    (cur->last->type != HTML_ENTITY_REF_NODE) &&
933	    (cur->children != cur->last) &&
934	    (cur->name != NULL) &&
935	    (cur->name[0] != 'p')) /* p, pre, param */
936	    xmlOutputBufferWriteString(buf, "\n");
937    }
938    xmlOutputBufferWriteString(buf, "</");
939    if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
940        xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
941	xmlOutputBufferWriteString(buf, ":");
942    }
943    xmlOutputBufferWriteString(buf, (const char *)cur->name);
944    xmlOutputBufferWriteString(buf, ">");
945    if ((format) && (info != NULL) && (!info->isinline) &&
946	(cur->next != NULL)) {
947        if ((cur->next->type != HTML_TEXT_NODE) &&
948	    (cur->next->type != HTML_ENTITY_REF_NODE) &&
949	    (cur->parent != NULL) &&
950	    (cur->parent->name != NULL) &&
951	    (cur->parent->name[0] != 'p')) /* p, pre, param */
952	    xmlOutputBufferWriteString(buf, "\n");
953    }
954}
955
956/**
957 * htmlNodeDumpOutput:
958 * @buf:  the HTML buffer output
959 * @doc:  the document
960 * @cur:  the current node
961 * @encoding:  the encoding string
962 *
963 * Dump an HTML node, recursive behaviour,children are printed too,
964 * and formatting returns/spaces are added.
965 */
966void
967htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
968	           xmlNodePtr cur, const char *encoding) {
969    htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1);
970}
971
972/**
973 * htmlDocContentDumpFormatOutput:
974 * @buf:  the HTML buffer output
975 * @cur:  the document
976 * @encoding:  the encoding string
977 * @format:  should formatting spaces been added
978 *
979 * Dump an HTML document.
980 */
981void
982htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
983	                       const char *encoding, int format) {
984    int type;
985
986    xmlInitParser();
987
988    if ((buf == NULL) || (cur == NULL))
989        return;
990
991    /*
992     * force to output the stuff as HTML, especially for entities
993     */
994    type = cur->type;
995    cur->type = XML_HTML_DOCUMENT_NODE;
996    if (cur->intSubset != NULL) {
997        htmlDtdDumpOutput(buf, cur, NULL);
998    }
999    if (cur->children != NULL) {
1000        htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format);
1001    }
1002    xmlOutputBufferWriteString(buf, "\n");
1003    cur->type = (xmlElementType) type;
1004}
1005
1006/**
1007 * htmlDocContentDumpOutput:
1008 * @buf:  the HTML buffer output
1009 * @cur:  the document
1010 * @encoding:  the encoding string
1011 *
1012 * Dump an HTML document. Formating return/spaces are added.
1013 */
1014void
1015htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1016	                 const char *encoding) {
1017    htmlDocContentDumpFormatOutput(buf, cur, encoding, 1);
1018}
1019
1020/************************************************************************
1021 *									*
1022 *		Saving functions front-ends				*
1023 *									*
1024 ************************************************************************/
1025
1026/**
1027 * htmlDocDump:
1028 * @f:  the FILE*
1029 * @cur:  the document
1030 *
1031 * Dump an HTML document to an open FILE.
1032 *
1033 * returns: the number of byte written or -1 in case of failure.
1034 */
1035int
1036htmlDocDump(FILE *f, xmlDocPtr cur) {
1037    xmlOutputBufferPtr buf;
1038    xmlCharEncodingHandlerPtr handler = NULL;
1039    const char *encoding;
1040    int ret;
1041
1042    xmlInitParser();
1043
1044    if ((cur == NULL) || (f == NULL)) {
1045	return(-1);
1046    }
1047
1048    encoding = (const char *) htmlGetMetaEncoding(cur);
1049
1050    if (encoding != NULL) {
1051	xmlCharEncoding enc;
1052
1053	enc = xmlParseCharEncoding(encoding);
1054	if (enc != cur->charset) {
1055	    if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1056		/*
1057		 * Not supported yet
1058		 */
1059		return(-1);
1060	    }
1061
1062	    handler = xmlFindCharEncodingHandler(encoding);
1063	    if (handler == NULL)
1064		return(-1);
1065	} else {
1066	    handler = xmlFindCharEncodingHandler(encoding);
1067	}
1068    }
1069
1070    /*
1071     * Fallback to HTML or ASCII when the encoding is unspecified
1072     */
1073    if (handler == NULL)
1074	handler = xmlFindCharEncodingHandler("HTML");
1075    if (handler == NULL)
1076	handler = xmlFindCharEncodingHandler("ascii");
1077
1078    buf = xmlOutputBufferCreateFile(f, handler);
1079    if (buf == NULL) return(-1);
1080    htmlDocContentDumpOutput(buf, cur, NULL);
1081
1082    ret = xmlOutputBufferClose(buf);
1083    return(ret);
1084}
1085
1086/**
1087 * htmlSaveFile:
1088 * @filename:  the filename (or URL)
1089 * @cur:  the document
1090 *
1091 * Dump an HTML document to a file. If @filename is "-" the stdout file is
1092 * used.
1093 * returns: the number of byte written or -1 in case of failure.
1094 */
1095int
1096htmlSaveFile(const char *filename, xmlDocPtr cur) {
1097    xmlOutputBufferPtr buf;
1098    xmlCharEncodingHandlerPtr handler = NULL;
1099    const char *encoding;
1100    int ret;
1101
1102    if ((cur == NULL) || (filename == NULL))
1103        return(-1);
1104
1105    xmlInitParser();
1106
1107    encoding = (const char *) htmlGetMetaEncoding(cur);
1108
1109    if (encoding != NULL) {
1110	xmlCharEncoding enc;
1111
1112	enc = xmlParseCharEncoding(encoding);
1113	if (enc != cur->charset) {
1114	    if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1115		/*
1116		 * Not supported yet
1117		 */
1118		return(-1);
1119	    }
1120
1121	    handler = xmlFindCharEncodingHandler(encoding);
1122	    if (handler == NULL)
1123		return(-1);
1124	}
1125    }
1126
1127    /*
1128     * Fallback to HTML or ASCII when the encoding is unspecified
1129     */
1130    if (handler == NULL)
1131	handler = xmlFindCharEncodingHandler("HTML");
1132    if (handler == NULL)
1133	handler = xmlFindCharEncodingHandler("ascii");
1134
1135    /*
1136     * save the content to a temp buffer.
1137     */
1138    buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1139    if (buf == NULL) return(0);
1140
1141    htmlDocContentDumpOutput(buf, cur, NULL);
1142
1143    ret = xmlOutputBufferClose(buf);
1144    return(ret);
1145}
1146
1147/**
1148 * htmlSaveFileFormat:
1149 * @filename:  the filename
1150 * @cur:  the document
1151 * @format:  should formatting spaces been added
1152 * @encoding: the document encoding
1153 *
1154 * Dump an HTML document to a file using a given encoding.
1155 *
1156 * returns: the number of byte written or -1 in case of failure.
1157 */
1158int
1159htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
1160	           const char *encoding, int format) {
1161    xmlOutputBufferPtr buf;
1162    xmlCharEncodingHandlerPtr handler = NULL;
1163    int ret;
1164
1165    if ((cur == NULL) || (filename == NULL))
1166        return(-1);
1167
1168    xmlInitParser();
1169
1170    if (encoding != NULL) {
1171	xmlCharEncoding enc;
1172
1173	enc = xmlParseCharEncoding(encoding);
1174	if (enc != cur->charset) {
1175	    if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1176		/*
1177		 * Not supported yet
1178		 */
1179		return(-1);
1180	    }
1181
1182	    handler = xmlFindCharEncodingHandler(encoding);
1183	    if (handler == NULL)
1184		return(-1);
1185	}
1186        htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1187    } else {
1188	htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
1189    }
1190
1191    /*
1192     * Fallback to HTML or ASCII when the encoding is unspecified
1193     */
1194    if (handler == NULL)
1195	handler = xmlFindCharEncodingHandler("HTML");
1196    if (handler == NULL)
1197	handler = xmlFindCharEncodingHandler("ascii");
1198
1199    /*
1200     * save the content to a temp buffer.
1201     */
1202    buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1203    if (buf == NULL) return(0);
1204
1205    htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
1206
1207    ret = xmlOutputBufferClose(buf);
1208    return(ret);
1209}
1210
1211/**
1212 * htmlSaveFileEnc:
1213 * @filename:  the filename
1214 * @cur:  the document
1215 * @encoding: the document encoding
1216 *
1217 * Dump an HTML document to a file using a given encoding
1218 * and formatting returns/spaces are added.
1219 *
1220 * returns: the number of byte written or -1 in case of failure.
1221 */
1222int
1223htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1224    return(htmlSaveFileFormat(filename, cur, encoding, 1));
1225}
1226
1227#endif /* LIBXML_OUTPUT_ENABLED */
1228
1229#define bottom_HTMLtree
1230#include "elfgcchack.h"
1231#endif /* LIBXML_HTML_ENABLED */
1232