1/*
2 * HTMLtree.c : implementation of access function for an HTML tree.
3 *
4 * See Copyright for the status of this software.
5 *
6 * daniel@veillard.com
7 */
8
9
10#define IN_LIBXML
11#include "libxml.h"
12#ifdef LIBXML_HTML_ENABLED
13
14#include <string.h> /* for memset() only ! */
15
16#ifdef HAVE_CTYPE_H
17#include <ctype.h>
18#endif
19#ifdef HAVE_STDLIB_H
20#include <stdlib.h>
21#endif
22
23#include <libxml/xmlmemory.h>
24#include <libxml/HTMLparser.h>
25#include <libxml/HTMLtree.h>
26#include <libxml/entities.h>
27#include <libxml/valid.h>
28#include <libxml/xmlerror.h>
29#include <libxml/parserInternals.h>
30#include <libxml/globals.h>
31#include <libxml/uri.h>
32
33/************************************************************************
34 *									*
35 *   		Getting/Setting encoding meta tags			*
36 *									*
37 ************************************************************************/
38
39/**
40 * htmlGetMetaEncoding:
41 * @doc:  the document
42 *
43 * Encoding definition lookup in the Meta tags
44 *
45 * Returns the current encoding as flagged in the HTML source
46 */
47const xmlChar *
48htmlGetMetaEncoding(htmlDocPtr doc) {
49    htmlNodePtr cur;
50    const xmlChar *content;
51    const xmlChar *encoding;
52
53    if (doc == NULL)
54	return(NULL);
55    cur = doc->children;
56
57    /*
58     * Search the html
59     */
60    while (cur != NULL) {
61	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
62	    if (xmlStrEqual(cur->name, BAD_CAST"html"))
63		break;
64	    if (xmlStrEqual(cur->name, BAD_CAST"head"))
65		goto found_head;
66	    if (xmlStrEqual(cur->name, BAD_CAST"meta"))
67		goto found_meta;
68	}
69	cur = cur->next;
70    }
71    if (cur == NULL)
72	return(NULL);
73    cur = cur->children;
74
75    /*
76     * Search the head
77     */
78    while (cur != NULL) {
79	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
80	    if (xmlStrEqual(cur->name, BAD_CAST"head"))
81		break;
82	    if (xmlStrEqual(cur->name, BAD_CAST"meta"))
83		goto found_meta;
84	}
85	cur = cur->next;
86    }
87    if (cur == NULL)
88	return(NULL);
89found_head:
90    cur = cur->children;
91
92    /*
93     * Search the meta elements
94     */
95found_meta:
96    while (cur != NULL) {
97	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
98	    if (xmlStrEqual(cur->name, BAD_CAST"meta")) {
99		xmlAttrPtr attr = cur->properties;
100		int http;
101		const xmlChar *value;
102
103		content = NULL;
104		http = 0;
105		while (attr != NULL) {
106		    if ((attr->children != NULL) &&
107		        (attr->children->type == XML_TEXT_NODE) &&
108		        (attr->children->next == NULL)) {
109			value = attr->children->content;
110			if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
111			 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
112			    http = 1;
113			else if ((value != NULL)
114			 && (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
115			    content = value;
116			if ((http != 0) && (content != NULL))
117			    goto found_content;
118		    }
119		    attr = attr->next;
120		}
121	    }
122	}
123	cur = cur->next;
124    }
125    return(NULL);
126
127found_content:
128    encoding = xmlStrstr(content, BAD_CAST"charset=");
129    if (encoding == NULL)
130	encoding = xmlStrstr(content, BAD_CAST"Charset=");
131    if (encoding == NULL)
132	encoding = xmlStrstr(content, BAD_CAST"CHARSET=");
133    if (encoding != NULL) {
134	encoding += 8;
135    } else {
136	encoding = xmlStrstr(content, BAD_CAST"charset =");
137	if (encoding == NULL)
138	    encoding = xmlStrstr(content, BAD_CAST"Charset =");
139	if (encoding == NULL)
140	    encoding = xmlStrstr(content, BAD_CAST"CHARSET =");
141	if (encoding != NULL)
142	    encoding += 9;
143    }
144    if (encoding != NULL) {
145	while ((*encoding == ' ') || (*encoding == '\t')) encoding++;
146    }
147    return(encoding);
148}
149
150/**
151 * htmlSetMetaEncoding:
152 * @doc:  the document
153 * @encoding:  the encoding string
154 *
155 * Sets the current encoding in the Meta tags
156 * NOTE: this will not change the document content encoding, just
157 * the META flag associated.
158 *
159 * Returns 0 in case of success and -1 in case of error
160 */
161int
162htmlSetMetaEncoding(htmlDocPtr doc, const xmlChar *encoding) {
163    htmlNodePtr cur, meta = NULL, head = NULL;
164    const xmlChar *content = NULL;
165    char newcontent[100];
166
167
168    if (doc == NULL)
169	return(-1);
170
171    /* html isn't a real encoding it's just libxml2 way to get entities */
172    if (!xmlStrcasecmp(encoding, BAD_CAST "html"))
173        return(-1);
174
175    if (encoding != NULL) {
176	snprintf(newcontent, sizeof(newcontent), "text/html; charset=%s",
177                (char *)encoding);
178	newcontent[sizeof(newcontent) - 1] = 0;
179    }
180
181    cur = doc->children;
182
183    /*
184     * Search the html
185     */
186    while (cur != NULL) {
187	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
188	    if (xmlStrcasecmp(cur->name, BAD_CAST"html") == 0)
189		break;
190	    if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
191		goto found_head;
192	    if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0)
193		goto found_meta;
194	}
195	cur = cur->next;
196    }
197    if (cur == NULL)
198	return(-1);
199    cur = cur->children;
200
201    /*
202     * Search the head
203     */
204    while (cur != NULL) {
205	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
206	    if (xmlStrcasecmp(cur->name, BAD_CAST"head") == 0)
207		break;
208	    if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
209                head = cur->parent;
210		goto found_meta;
211            }
212	}
213	cur = cur->next;
214    }
215    if (cur == NULL)
216	return(-1);
217found_head:
218    head = cur;
219    if (cur->children == NULL)
220        goto create;
221    cur = cur->children;
222
223found_meta:
224    /*
225     * Search and update all the remaining the meta elements carrying
226     * encoding informations
227     */
228    while (cur != NULL) {
229	if ((cur->type == XML_ELEMENT_NODE) && (cur->name != NULL)) {
230	    if (xmlStrcasecmp(cur->name, BAD_CAST"meta") == 0) {
231		xmlAttrPtr attr = cur->properties;
232		int http;
233		const xmlChar *value;
234
235		content = NULL;
236		http = 0;
237		while (attr != NULL) {
238		    if ((attr->children != NULL) &&
239		        (attr->children->type == XML_TEXT_NODE) &&
240		        (attr->children->next == NULL)) {
241			value = attr->children->content;
242			if ((!xmlStrcasecmp(attr->name, BAD_CAST"http-equiv"))
243			 && (!xmlStrcasecmp(value, BAD_CAST"Content-Type")))
244			    http = 1;
245			else
246                        {
247                           if ((value != NULL) &&
248                               (!xmlStrcasecmp(attr->name, BAD_CAST"content")))
249			       content = value;
250                        }
251		        if ((http != 0) && (content != NULL))
252			    break;
253		    }
254		    attr = attr->next;
255		}
256		if ((http != 0) && (content != NULL)) {
257		    meta = cur;
258		    break;
259		}
260
261	    }
262	}
263	cur = cur->next;
264    }
265create:
266    if (meta == NULL) {
267        if ((encoding != NULL) && (head != NULL)) {
268            /*
269             * Create a new Meta element with the right attributes
270             */
271
272            meta = xmlNewDocNode(doc, NULL, BAD_CAST"meta", NULL);
273            if (head->children == NULL)
274                xmlAddChild(head, meta);
275            else
276                xmlAddPrevSibling(head->children, meta);
277            xmlNewProp(meta, BAD_CAST"http-equiv", BAD_CAST"Content-Type");
278            xmlNewProp(meta, BAD_CAST"content", BAD_CAST newcontent);
279        }
280    } else {
281        /* change the document only if there is a real encoding change */
282        if (xmlStrcasestr(content, encoding) == NULL) {
283            xmlSetProp(meta, BAD_CAST"content", BAD_CAST newcontent);
284        }
285    }
286
287
288    return(0);
289}
290
291/**
292 * booleanHTMLAttrs:
293 *
294 * These are the HTML attributes which will be output
295 * in minimized form, i.e. <option selected="selected"> will be
296 * output as <option selected>, as per XSLT 1.0 16.2 "HTML Output Method"
297 *
298 */
299static const char* htmlBooleanAttrs[] = {
300  "checked", "compact", "declare", "defer", "disabled", "ismap",
301  "multiple", "nohref", "noresize", "noshade", "nowrap", "readonly",
302  "selected", NULL
303};
304
305
306/**
307 * htmlIsBooleanAttr:
308 * @name:  the name of the attribute to check
309 *
310 * Determine if a given attribute is a boolean attribute.
311 *
312 * returns: false if the attribute is not boolean, true otherwise.
313 */
314int
315htmlIsBooleanAttr(const xmlChar *name)
316{
317    int i = 0;
318
319    while (htmlBooleanAttrs[i] != NULL) {
320        if (xmlStrcasecmp((const xmlChar *)htmlBooleanAttrs[i], name) == 0)
321            return 1;
322        i++;
323    }
324    return 0;
325}
326
327#ifdef LIBXML_OUTPUT_ENABLED
328/*
329 * private routine exported from xmlIO.c
330 */
331xmlOutputBufferPtr
332xmlAllocOutputBufferInternal(xmlCharEncodingHandlerPtr encoder);
333/************************************************************************
334 *									*
335 * 			Output error handlers				*
336 *									*
337 ************************************************************************/
338/**
339 * htmlSaveErrMemory:
340 * @extra:  extra informations
341 *
342 * Handle an out of memory condition
343 */
344static void
345htmlSaveErrMemory(const char *extra)
346{
347    __xmlSimpleError(XML_FROM_OUTPUT, XML_ERR_NO_MEMORY, NULL, NULL, extra);
348}
349
350/**
351 * htmlSaveErr:
352 * @code:  the error number
353 * @node:  the location of the error.
354 * @extra:  extra informations
355 *
356 * Handle an out of memory condition
357 */
358static void
359htmlSaveErr(int code, xmlNodePtr node, const char *extra)
360{
361    const char *msg = NULL;
362
363    switch(code) {
364        case XML_SAVE_NOT_UTF8:
365	    msg = "string is not in UTF-8\n";
366	    break;
367	case XML_SAVE_CHAR_INVALID:
368	    msg = "invalid character value\n";
369	    break;
370	case XML_SAVE_UNKNOWN_ENCODING:
371	    msg = "unknown encoding %s\n";
372	    break;
373	case XML_SAVE_NO_DOCTYPE:
374	    msg = "HTML has no DOCTYPE\n";
375	    break;
376	default:
377	    msg = "unexpected error number\n";
378    }
379    __xmlSimpleError(XML_FROM_OUTPUT, code, node, msg, extra);
380}
381
382/************************************************************************
383 *									*
384 *   		Dumping HTML tree content to a simple buffer		*
385 *									*
386 ************************************************************************/
387
388static int
389htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
390	           int format);
391
392/**
393 * htmlNodeDumpFormat:
394 * @buf:  the HTML buffer output
395 * @doc:  the document
396 * @cur:  the current node
397 * @format:  should formatting spaces been added
398 *
399 * Dump an HTML node, recursive behaviour,children are printed too.
400 *
401 * Returns the number of byte written or -1 in case of error
402 */
403static int
404htmlNodeDumpFormat(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur,
405	           int format) {
406    unsigned int use;
407    int ret;
408    xmlOutputBufferPtr outbuf;
409
410    if (cur == NULL) {
411	return (-1);
412    }
413    if (buf == NULL) {
414	return (-1);
415    }
416    outbuf = (xmlOutputBufferPtr) xmlMalloc(sizeof(xmlOutputBuffer));
417    if (outbuf == NULL) {
418        htmlSaveErrMemory("allocating HTML output buffer");
419	return (-1);
420    }
421    memset(outbuf, 0, (size_t) sizeof(xmlOutputBuffer));
422    outbuf->buffer = buf;
423    outbuf->encoder = NULL;
424    outbuf->writecallback = NULL;
425    outbuf->closecallback = NULL;
426    outbuf->context = NULL;
427    outbuf->written = 0;
428
429    use = buf->use;
430    htmlNodeDumpFormatOutput(outbuf, doc, cur, NULL, format);
431    xmlFree(outbuf);
432    ret = buf->use - use;
433    return (ret);
434}
435
436/**
437 * htmlNodeDump:
438 * @buf:  the HTML buffer output
439 * @doc:  the document
440 * @cur:  the current node
441 *
442 * Dump an HTML node, recursive behaviour,children are printed too,
443 * and formatting returns are added.
444 *
445 * Returns the number of byte written or -1 in case of error
446 */
447int
448htmlNodeDump(xmlBufferPtr buf, xmlDocPtr doc, xmlNodePtr cur) {
449    xmlInitParser();
450
451    return(htmlNodeDumpFormat(buf, doc, cur, 1));
452}
453
454/**
455 * htmlNodeDumpFileFormat:
456 * @out:  the FILE pointer
457 * @doc:  the document
458 * @cur:  the current node
459 * @encoding: the document encoding
460 * @format:  should formatting spaces been added
461 *
462 * Dump an HTML node, recursive behaviour,children are printed too.
463 *
464 * TODO: if encoding == NULL try to save in the doc encoding
465 *
466 * returns: the number of byte written or -1 in case of failure.
467 */
468int
469htmlNodeDumpFileFormat(FILE *out, xmlDocPtr doc,
470	               xmlNodePtr cur, const char *encoding, int format) {
471    xmlOutputBufferPtr buf;
472    xmlCharEncodingHandlerPtr handler = NULL;
473    int ret;
474
475    xmlInitParser();
476
477    if (encoding != NULL) {
478	xmlCharEncoding enc;
479
480	enc = xmlParseCharEncoding(encoding);
481	if (enc != XML_CHAR_ENCODING_UTF8) {
482	    handler = xmlFindCharEncodingHandler(encoding);
483	    if (handler == NULL)
484		htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
485	}
486    }
487
488    /*
489     * Fallback to HTML or ASCII when the encoding is unspecified
490     */
491    if (handler == NULL)
492	handler = xmlFindCharEncodingHandler("HTML");
493    if (handler == NULL)
494	handler = xmlFindCharEncodingHandler("ascii");
495
496    /*
497     * save the content to a temp buffer.
498     */
499    buf = xmlOutputBufferCreateFile(out, handler);
500    if (buf == NULL) return(0);
501
502    htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
503
504    ret = xmlOutputBufferClose(buf);
505    return(ret);
506}
507
508/**
509 * htmlNodeDumpFile:
510 * @out:  the FILE pointer
511 * @doc:  the document
512 * @cur:  the current node
513 *
514 * Dump an HTML node, recursive behaviour,children are printed too,
515 * and formatting returns are added.
516 */
517void
518htmlNodeDumpFile(FILE *out, xmlDocPtr doc, xmlNodePtr cur) {
519    htmlNodeDumpFileFormat(out, doc, cur, NULL, 1);
520}
521
522/**
523 * htmlDocDumpMemoryFormat:
524 * @cur:  the document
525 * @mem:  OUT: the memory pointer
526 * @size:  OUT: the memory length
527 * @format:  should formatting spaces been added
528 *
529 * Dump an HTML document in memory and return the xmlChar * and it's size.
530 * It's up to the caller to free the memory.
531 */
532void
533htmlDocDumpMemoryFormat(xmlDocPtr cur, xmlChar**mem, int *size, int format) {
534    xmlOutputBufferPtr buf;
535    xmlCharEncodingHandlerPtr handler = NULL;
536    const char *encoding;
537
538    xmlInitParser();
539
540    if ((mem == NULL) || (size == NULL))
541        return;
542    if (cur == NULL) {
543	*mem = NULL;
544	*size = 0;
545	return;
546    }
547
548    encoding = (const char *) htmlGetMetaEncoding(cur);
549
550    if (encoding != NULL) {
551	xmlCharEncoding enc;
552
553	enc = xmlParseCharEncoding(encoding);
554	if (enc != cur->charset) {
555	    if (cur->charset != XML_CHAR_ENCODING_UTF8) {
556		/*
557		 * Not supported yet
558		 */
559		*mem = NULL;
560		*size = 0;
561		return;
562	    }
563
564	    handler = xmlFindCharEncodingHandler(encoding);
565	    if (handler == NULL)
566                htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
567
568	} else {
569	    handler = xmlFindCharEncodingHandler(encoding);
570	}
571    }
572
573    /*
574     * Fallback to HTML or ASCII when the encoding is unspecified
575     */
576    if (handler == NULL)
577	handler = xmlFindCharEncodingHandler("HTML");
578    if (handler == NULL)
579	handler = xmlFindCharEncodingHandler("ascii");
580
581    buf = xmlAllocOutputBufferInternal(handler);
582    if (buf == NULL) {
583	*mem = NULL;
584	*size = 0;
585	return;
586    }
587
588    htmlDocContentDumpFormatOutput(buf, cur, NULL, format);
589
590    xmlOutputBufferFlush(buf);
591    if (buf->conv != NULL) {
592	*size = buf->conv->use;
593	*mem = xmlStrndup(buf->conv->content, *size);
594    } else {
595	*size = buf->buffer->use;
596	*mem = xmlStrndup(buf->buffer->content, *size);
597    }
598    (void)xmlOutputBufferClose(buf);
599}
600
601/**
602 * htmlDocDumpMemory:
603 * @cur:  the document
604 * @mem:  OUT: the memory pointer
605 * @size:  OUT: the memory length
606 *
607 * Dump an HTML document in memory and return the xmlChar * and it's size.
608 * It's up to the caller to free the memory.
609 */
610void
611htmlDocDumpMemory(xmlDocPtr cur, xmlChar**mem, int *size) {
612	htmlDocDumpMemoryFormat(cur, mem, size, 1);
613}
614
615
616/************************************************************************
617 *									*
618 *   		Dumping HTML tree content to an I/O output buffer	*
619 *									*
620 ************************************************************************/
621
622void xmlNsListDumpOutput(xmlOutputBufferPtr buf, xmlNsPtr cur);
623
624/**
625 * htmlDtdDumpOutput:
626 * @buf:  the HTML buffer output
627 * @doc:  the document
628 * @encoding:  the encoding string
629 *
630 * TODO: check whether encoding is needed
631 *
632 * Dump the HTML document DTD, if any.
633 */
634static void
635htmlDtdDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
636	          const char *encoding ATTRIBUTE_UNUSED) {
637    xmlDtdPtr cur = doc->intSubset;
638
639    if (cur == NULL) {
640	htmlSaveErr(XML_SAVE_NO_DOCTYPE, (xmlNodePtr) doc, NULL);
641	return;
642    }
643    xmlOutputBufferWriteString(buf, "<!DOCTYPE ");
644    xmlOutputBufferWriteString(buf, (const char *)cur->name);
645    if (cur->ExternalID != NULL) {
646	xmlOutputBufferWriteString(buf, " PUBLIC ");
647	xmlBufferWriteQuotedString(buf->buffer, cur->ExternalID);
648	if (cur->SystemID != NULL) {
649	    xmlOutputBufferWriteString(buf, " ");
650	    xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
651	}
652    }  else if (cur->SystemID != NULL) {
653	xmlOutputBufferWriteString(buf, " SYSTEM ");
654	xmlBufferWriteQuotedString(buf->buffer, cur->SystemID);
655    }
656    xmlOutputBufferWriteString(buf, ">\n");
657}
658
659/**
660 * htmlAttrDumpOutput:
661 * @buf:  the HTML buffer output
662 * @doc:  the document
663 * @cur:  the attribute pointer
664 * @encoding:  the encoding string
665 *
666 * Dump an HTML attribute
667 */
668static void
669htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
670	           const char *encoding ATTRIBUTE_UNUSED) {
671    xmlChar *value;
672
673    /*
674     * TODO: The html output method should not escape a & character
675     *       occurring in an attribute value immediately followed by
676     *       a { character (see Section B.7.1 of the HTML 4.0 Recommendation).
677     */
678
679    if (cur == NULL) {
680	return;
681    }
682    xmlOutputBufferWriteString(buf, " ");
683    if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
684        xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
685	xmlOutputBufferWriteString(buf, ":");
686    }
687    xmlOutputBufferWriteString(buf, (const char *)cur->name);
688    if ((cur->children != NULL) && (!htmlIsBooleanAttr(cur->name))) {
689	value = xmlNodeListGetString(doc, cur->children, 0);
690	if (value) {
691	    xmlOutputBufferWriteString(buf, "=");
692	    if ((cur->ns == NULL) && (cur->parent != NULL) &&
693		(cur->parent->ns == NULL) &&
694		((!xmlStrcasecmp(cur->name, BAD_CAST "href")) ||
695	         (!xmlStrcasecmp(cur->name, BAD_CAST "action")) ||
696		 (!xmlStrcasecmp(cur->name, BAD_CAST "src")) ||
697		 ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
698		  (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {
699		xmlChar *escaped;
700		xmlChar *tmp = value;
701
702		while (IS_BLANK_CH(*tmp)) tmp++;
703
704		escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+");
705		if (escaped != NULL) {
706		    xmlBufferWriteQuotedString(buf->buffer, escaped);
707		    xmlFree(escaped);
708		} else {
709		    xmlBufferWriteQuotedString(buf->buffer, value);
710		}
711	    } else {
712		xmlBufferWriteQuotedString(buf->buffer, value);
713	    }
714	    xmlFree(value);
715	} else  {
716	    xmlOutputBufferWriteString(buf, "=\"\"");
717	}
718    }
719}
720
721/**
722 * htmlAttrListDumpOutput:
723 * @buf:  the HTML buffer output
724 * @doc:  the document
725 * @cur:  the first attribute pointer
726 * @encoding:  the encoding string
727 *
728 * Dump a list of HTML attributes
729 */
730static void
731htmlAttrListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur, const char *encoding) {
732    if (cur == NULL) {
733	return;
734    }
735    while (cur != NULL) {
736        htmlAttrDumpOutput(buf, doc, cur, encoding);
737	cur = cur->next;
738    }
739}
740
741
742
743/**
744 * htmlNodeListDumpOutput:
745 * @buf:  the HTML buffer output
746 * @doc:  the document
747 * @cur:  the first node
748 * @encoding:  the encoding string
749 * @format:  should formatting spaces been added
750 *
751 * Dump an HTML node list, recursive behaviour,children are printed too.
752 */
753static void
754htmlNodeListDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
755	               xmlNodePtr cur, const char *encoding, int format) {
756    if (cur == NULL) {
757	return;
758    }
759    while (cur != NULL) {
760        htmlNodeDumpFormatOutput(buf, doc, cur, encoding, format);
761	cur = cur->next;
762    }
763}
764
765/**
766 * htmlNodeDumpFormatOutput:
767 * @buf:  the HTML buffer output
768 * @doc:  the document
769 * @cur:  the current node
770 * @encoding:  the encoding string
771 * @format:  should formatting spaces been added
772 *
773 * Dump an HTML node, recursive behaviour,children are printed too.
774 */
775void
776htmlNodeDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
777	                 xmlNodePtr cur, const char *encoding, int format) {
778    const htmlElemDesc * info;
779
780    xmlInitParser();
781
782    if ((cur == NULL) || (buf == NULL)) {
783	return;
784    }
785    /*
786     * Special cases.
787     */
788    if (cur->type == XML_DTD_NODE)
789	return;
790    if ((cur->type == XML_HTML_DOCUMENT_NODE) ||
791        (cur->type == XML_DOCUMENT_NODE)){
792	htmlDocContentDumpOutput(buf, (xmlDocPtr) cur, encoding);
793	return;
794    }
795    if (cur->type == XML_ATTRIBUTE_NODE) {
796        htmlAttrDumpOutput(buf, doc, (xmlAttrPtr) cur, encoding);
797	return;
798    }
799    if (cur->type == HTML_TEXT_NODE) {
800	if (cur->content != NULL) {
801	    if (((cur->name == (const xmlChar *)xmlStringText) ||
802		 (cur->name != (const xmlChar *)xmlStringTextNoenc)) &&
803		((cur->parent == NULL) ||
804		 ((xmlStrcasecmp(cur->parent->name, BAD_CAST "script")) &&
805		  (xmlStrcasecmp(cur->parent->name, BAD_CAST "style"))))) {
806		xmlChar *buffer;
807
808		buffer = xmlEncodeEntitiesReentrant(doc, cur->content);
809		if (buffer != NULL) {
810		    xmlOutputBufferWriteString(buf, (const char *)buffer);
811		    xmlFree(buffer);
812		}
813	    } else {
814		xmlOutputBufferWriteString(buf, (const char *)cur->content);
815	    }
816	}
817	return;
818    }
819    if (cur->type == HTML_COMMENT_NODE) {
820	if (cur->content != NULL) {
821	    xmlOutputBufferWriteString(buf, "<!--");
822	    xmlOutputBufferWriteString(buf, (const char *)cur->content);
823	    xmlOutputBufferWriteString(buf, "-->");
824	}
825	return;
826    }
827    if (cur->type == HTML_PI_NODE) {
828	if (cur->name == NULL)
829	    return;
830	xmlOutputBufferWriteString(buf, "<?");
831	xmlOutputBufferWriteString(buf, (const char *)cur->name);
832	if (cur->content != NULL) {
833	    xmlOutputBufferWriteString(buf, " ");
834	    xmlOutputBufferWriteString(buf, (const char *)cur->content);
835	}
836	xmlOutputBufferWriteString(buf, ">");
837	return;
838    }
839    if (cur->type == HTML_ENTITY_REF_NODE) {
840        xmlOutputBufferWriteString(buf, "&");
841	xmlOutputBufferWriteString(buf, (const char *)cur->name);
842        xmlOutputBufferWriteString(buf, ";");
843	return;
844    }
845    if (cur->type == HTML_PRESERVE_NODE) {
846	if (cur->content != NULL) {
847	    xmlOutputBufferWriteString(buf, (const char *)cur->content);
848	}
849	return;
850    }
851
852    /*
853     * Get specific HTML info for that node.
854     */
855    if (cur->ns == NULL)
856	info = htmlTagLookup(cur->name);
857    else
858	info = NULL;
859
860    xmlOutputBufferWriteString(buf, "<");
861    if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
862        xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
863	xmlOutputBufferWriteString(buf, ":");
864    }
865    xmlOutputBufferWriteString(buf, (const char *)cur->name);
866    if (cur->nsDef)
867	xmlNsListDumpOutput(buf, cur->nsDef);
868    if (cur->properties != NULL)
869        htmlAttrListDumpOutput(buf, doc, cur->properties, encoding);
870
871    if ((info != NULL) && (info->empty)) {
872        xmlOutputBufferWriteString(buf, ">");
873	if ((format) && (!info->isinline) && (cur->next != NULL)) {
874	    if ((cur->next->type != HTML_TEXT_NODE) &&
875		(cur->next->type != HTML_ENTITY_REF_NODE) &&
876		(cur->parent != NULL) &&
877		(cur->parent->name != NULL) &&
878		(cur->parent->name[0] != 'p')) /* p, pre, param */
879		xmlOutputBufferWriteString(buf, "\n");
880	}
881	return;
882    }
883    if (((cur->type == XML_ELEMENT_NODE) || (cur->content == NULL)) &&
884	(cur->children == NULL)) {
885        if ((info != NULL) && (info->saveEndTag != 0) &&
886	    (xmlStrcmp(BAD_CAST info->name, BAD_CAST "html")) &&
887	    (xmlStrcmp(BAD_CAST info->name, BAD_CAST "body"))) {
888	    xmlOutputBufferWriteString(buf, ">");
889	} else {
890	    xmlOutputBufferWriteString(buf, "></");
891            if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
892                xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
893                xmlOutputBufferWriteString(buf, ":");
894            }
895	    xmlOutputBufferWriteString(buf, (const char *)cur->name);
896	    xmlOutputBufferWriteString(buf, ">");
897	}
898	if ((format) && (cur->next != NULL) &&
899            (info != NULL) && (!info->isinline)) {
900	    if ((cur->next->type != HTML_TEXT_NODE) &&
901		(cur->next->type != HTML_ENTITY_REF_NODE) &&
902		(cur->parent != NULL) &&
903		(cur->parent->name != NULL) &&
904		(cur->parent->name[0] != 'p')) /* p, pre, param */
905		xmlOutputBufferWriteString(buf, "\n");
906	}
907	return;
908    }
909    xmlOutputBufferWriteString(buf, ">");
910    if ((cur->type != XML_ELEMENT_NODE) &&
911	(cur->content != NULL)) {
912	    /*
913	     * Uses the OutputBuffer property to automatically convert
914	     * invalids to charrefs
915	     */
916
917            xmlOutputBufferWriteString(buf, (const char *) cur->content);
918    }
919    if (cur->children != NULL) {
920        if ((format) && (info != NULL) && (!info->isinline) &&
921	    (cur->children->type != HTML_TEXT_NODE) &&
922	    (cur->children->type != HTML_ENTITY_REF_NODE) &&
923	    (cur->children != cur->last) &&
924	    (cur->name != NULL) &&
925	    (cur->name[0] != 'p')) /* p, pre, param */
926	    xmlOutputBufferWriteString(buf, "\n");
927	htmlNodeListDumpOutput(buf, doc, cur->children, encoding, format);
928        if ((format) && (info != NULL) && (!info->isinline) &&
929	    (cur->last->type != HTML_TEXT_NODE) &&
930	    (cur->last->type != HTML_ENTITY_REF_NODE) &&
931	    (cur->children != cur->last) &&
932	    (cur->name != NULL) &&
933	    (cur->name[0] != 'p')) /* p, pre, param */
934	    xmlOutputBufferWriteString(buf, "\n");
935    }
936    xmlOutputBufferWriteString(buf, "</");
937    if ((cur->ns != NULL) && (cur->ns->prefix != NULL)) {
938        xmlOutputBufferWriteString(buf, (const char *)cur->ns->prefix);
939	xmlOutputBufferWriteString(buf, ":");
940    }
941    xmlOutputBufferWriteString(buf, (const char *)cur->name);
942    xmlOutputBufferWriteString(buf, ">");
943    if ((format) && (info != NULL) && (!info->isinline) &&
944	(cur->next != NULL)) {
945        if ((cur->next->type != HTML_TEXT_NODE) &&
946	    (cur->next->type != HTML_ENTITY_REF_NODE) &&
947	    (cur->parent != NULL) &&
948	    (cur->parent->name != NULL) &&
949	    (cur->parent->name[0] != 'p')) /* p, pre, param */
950	    xmlOutputBufferWriteString(buf, "\n");
951    }
952}
953
954/**
955 * htmlNodeDumpOutput:
956 * @buf:  the HTML buffer output
957 * @doc:  the document
958 * @cur:  the current node
959 * @encoding:  the encoding string
960 *
961 * Dump an HTML node, recursive behaviour,children are printed too,
962 * and formatting returns/spaces are added.
963 */
964void
965htmlNodeDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc,
966	           xmlNodePtr cur, const char *encoding) {
967    htmlNodeDumpFormatOutput(buf, doc, cur, encoding, 1);
968}
969
970/**
971 * htmlDocContentDumpFormatOutput:
972 * @buf:  the HTML buffer output
973 * @cur:  the document
974 * @encoding:  the encoding string
975 * @format:  should formatting spaces been added
976 *
977 * Dump an HTML document.
978 */
979void
980htmlDocContentDumpFormatOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
981	                       const char *encoding, int format) {
982    int type;
983
984    xmlInitParser();
985
986    if ((buf == NULL) || (cur == NULL))
987        return;
988
989    /*
990     * force to output the stuff as HTML, especially for entities
991     */
992    type = cur->type;
993    cur->type = XML_HTML_DOCUMENT_NODE;
994    if (cur->intSubset != NULL) {
995        htmlDtdDumpOutput(buf, cur, NULL);
996    }
997    if (cur->children != NULL) {
998        htmlNodeListDumpOutput(buf, cur, cur->children, encoding, format);
999    }
1000    xmlOutputBufferWriteString(buf, "\n");
1001    cur->type = (xmlElementType) type;
1002}
1003
1004/**
1005 * htmlDocContentDumpOutput:
1006 * @buf:  the HTML buffer output
1007 * @cur:  the document
1008 * @encoding:  the encoding string
1009 *
1010 * Dump an HTML document. Formating return/spaces are added.
1011 */
1012void
1013htmlDocContentDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr cur,
1014	                 const char *encoding) {
1015    htmlDocContentDumpFormatOutput(buf, cur, encoding, 1);
1016}
1017
1018/************************************************************************
1019 *									*
1020 *		Saving functions front-ends				*
1021 *									*
1022 ************************************************************************/
1023
1024/**
1025 * htmlDocDump:
1026 * @f:  the FILE*
1027 * @cur:  the document
1028 *
1029 * Dump an HTML document to an open FILE.
1030 *
1031 * returns: the number of byte written or -1 in case of failure.
1032 */
1033int
1034htmlDocDump(FILE *f, xmlDocPtr cur) {
1035    xmlOutputBufferPtr buf;
1036    xmlCharEncodingHandlerPtr handler = NULL;
1037    const char *encoding;
1038    int ret;
1039
1040    xmlInitParser();
1041
1042    if ((cur == NULL) || (f == NULL)) {
1043	return(-1);
1044    }
1045
1046    encoding = (const char *) htmlGetMetaEncoding(cur);
1047
1048    if (encoding != NULL) {
1049	xmlCharEncoding enc;
1050
1051	enc = xmlParseCharEncoding(encoding);
1052	if (enc != cur->charset) {
1053	    if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1054		/*
1055		 * Not supported yet
1056		 */
1057		return(-1);
1058	    }
1059
1060	    handler = xmlFindCharEncodingHandler(encoding);
1061	    if (handler == NULL)
1062		htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
1063	} else {
1064	    handler = xmlFindCharEncodingHandler(encoding);
1065	}
1066    }
1067
1068    /*
1069     * Fallback to HTML or ASCII when the encoding is unspecified
1070     */
1071    if (handler == NULL)
1072	handler = xmlFindCharEncodingHandler("HTML");
1073    if (handler == NULL)
1074	handler = xmlFindCharEncodingHandler("ascii");
1075
1076    buf = xmlOutputBufferCreateFile(f, handler);
1077    if (buf == NULL) return(-1);
1078    htmlDocContentDumpOutput(buf, cur, NULL);
1079
1080    ret = xmlOutputBufferClose(buf);
1081    return(ret);
1082}
1083
1084/**
1085 * htmlSaveFile:
1086 * @filename:  the filename (or URL)
1087 * @cur:  the document
1088 *
1089 * Dump an HTML document to a file. If @filename is "-" the stdout file is
1090 * used.
1091 * returns: the number of byte written or -1 in case of failure.
1092 */
1093int
1094htmlSaveFile(const char *filename, xmlDocPtr cur) {
1095    xmlOutputBufferPtr buf;
1096    xmlCharEncodingHandlerPtr handler = NULL;
1097    const char *encoding;
1098    int ret;
1099
1100    if ((cur == NULL) || (filename == NULL))
1101        return(-1);
1102
1103    xmlInitParser();
1104
1105    encoding = (const char *) htmlGetMetaEncoding(cur);
1106
1107    if (encoding != NULL) {
1108	xmlCharEncoding enc;
1109
1110	enc = xmlParseCharEncoding(encoding);
1111	if (enc != cur->charset) {
1112	    if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1113		/*
1114		 * Not supported yet
1115		 */
1116		return(-1);
1117	    }
1118
1119	    handler = xmlFindCharEncodingHandler(encoding);
1120	    if (handler == NULL)
1121		htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
1122	}
1123    }
1124
1125    /*
1126     * Fallback to HTML or ASCII when the encoding is unspecified
1127     */
1128    if (handler == NULL)
1129	handler = xmlFindCharEncodingHandler("HTML");
1130    if (handler == NULL)
1131	handler = xmlFindCharEncodingHandler("ascii");
1132
1133    /*
1134     * save the content to a temp buffer.
1135     */
1136    buf = xmlOutputBufferCreateFilename(filename, handler, cur->compression);
1137    if (buf == NULL) return(0);
1138
1139    htmlDocContentDumpOutput(buf, cur, NULL);
1140
1141    ret = xmlOutputBufferClose(buf);
1142    return(ret);
1143}
1144
1145/**
1146 * htmlSaveFileFormat:
1147 * @filename:  the filename
1148 * @cur:  the document
1149 * @format:  should formatting spaces been added
1150 * @encoding: the document encoding
1151 *
1152 * Dump an HTML document to a file using a given encoding.
1153 *
1154 * returns: the number of byte written or -1 in case of failure.
1155 */
1156int
1157htmlSaveFileFormat(const char *filename, xmlDocPtr cur,
1158	           const char *encoding, int format) {
1159    xmlOutputBufferPtr buf;
1160    xmlCharEncodingHandlerPtr handler = NULL;
1161    int ret;
1162
1163    if ((cur == NULL) || (filename == NULL))
1164        return(-1);
1165
1166    xmlInitParser();
1167
1168    if (encoding != NULL) {
1169	xmlCharEncoding enc;
1170
1171	enc = xmlParseCharEncoding(encoding);
1172	if (enc != cur->charset) {
1173	    if (cur->charset != XML_CHAR_ENCODING_UTF8) {
1174		/*
1175		 * Not supported yet
1176		 */
1177		return(-1);
1178	    }
1179
1180	    handler = xmlFindCharEncodingHandler(encoding);
1181	    if (handler == NULL)
1182		htmlSaveErr(XML_SAVE_UNKNOWN_ENCODING, NULL, encoding);
1183	}
1184        htmlSetMetaEncoding(cur, (const xmlChar *) encoding);
1185    } else {
1186	htmlSetMetaEncoding(cur, (const xmlChar *) "UTF-8");
1187    }
1188
1189    /*
1190     * Fallback to HTML or ASCII when the encoding is unspecified
1191     */
1192    if (handler == NULL)
1193	handler = xmlFindCharEncodingHandler("HTML");
1194    if (handler == NULL)
1195	handler = xmlFindCharEncodingHandler("ascii");
1196
1197    /*
1198     * save the content to a temp buffer.
1199     */
1200    buf = xmlOutputBufferCreateFilename(filename, handler, 0);
1201    if (buf == NULL) return(0);
1202
1203    htmlDocContentDumpFormatOutput(buf, cur, encoding, format);
1204
1205    ret = xmlOutputBufferClose(buf);
1206    return(ret);
1207}
1208
1209/**
1210 * htmlSaveFileEnc:
1211 * @filename:  the filename
1212 * @cur:  the document
1213 * @encoding: the document encoding
1214 *
1215 * Dump an HTML document to a file using a given encoding
1216 * and formatting returns/spaces are added.
1217 *
1218 * returns: the number of byte written or -1 in case of failure.
1219 */
1220int
1221htmlSaveFileEnc(const char *filename, xmlDocPtr cur, const char *encoding) {
1222    return(htmlSaveFileFormat(filename, cur, encoding, 1));
1223}
1224
1225#endif /* LIBXML_OUTPUT_ENABLED */
1226
1227#define bottom_HTMLtree
1228#include "elfgcchack.h"
1229#endif /* LIBXML_HTML_ENABLED */
1230