1#include "XMLHandler.h"
2
3#include <algorithm>
4#include <expat.h>
5#include <stdio.h>
6#include <string.h>
7#include <fcntl.h>
8#include <unistd.h>
9#include <errno.h>
10
11#define NS_SEPARATOR 1
12#define MORE_INDENT "  "
13
14static string
15xml_text_escape(const string& s)
16{
17    string result;
18    const size_t N = s.length();
19    for (size_t i=0; i<N; i++) {
20        char c = s[i];
21        switch (c) {
22            case '<':
23                result += "&lt;";
24                break;
25            case '>':
26                result += "&gt;";
27                break;
28            case '&':
29                result += "&amp;";
30                break;
31            default:
32                result += c;
33                break;
34        }
35    }
36    return result;
37}
38
39static string
40xml_attr_escape(const string& s)
41{
42    string result;
43    const size_t N = s.length();
44    for (size_t i=0; i<N; i++) {
45        char c = s[i];
46        switch (c) {
47            case '\"':
48                result += "&quot;";
49                break;
50            default:
51                result += c;
52                break;
53        }
54    }
55    return result;
56}
57
58XMLNamespaceMap::XMLNamespaceMap()
59{
60}
61
62XMLNamespaceMap::XMLNamespaceMap(char const*const* nspaces)
63
64{
65    while (*nspaces) {
66        m_map[nspaces[1]] = nspaces[0];
67        nspaces += 2;
68    }
69}
70
71string
72XMLNamespaceMap::Get(const string& ns) const
73{
74    if (ns == "xml") {
75        return ns;
76    }
77    map<string,string>::const_iterator it = m_map.find(ns);
78    if (it == m_map.end()) {
79        return "";
80    } else {
81        return it->second;
82    }
83}
84
85string
86XMLNamespaceMap::GetPrefix(const string& ns) const
87{
88    if (ns == "") {
89        return "";
90    }
91    map<string,string>::const_iterator it = m_map.find(ns);
92    if (it != m_map.end()) {
93        if (it->second == "") {
94            return "";
95        } else {
96            return it->second + ":";
97        }
98    } else {
99        return ":"; // invalid
100    }
101}
102
103void
104XMLNamespaceMap::AddToAttributes(vector<XMLAttribute>* attrs) const
105{
106    map<string,string>::const_iterator it;
107    for (it=m_map.begin(); it!=m_map.end(); it++) {
108        if (it->second == "xml") {
109            continue;
110        }
111        XMLAttribute attr;
112        if (it->second == "") {
113            attr.name = "xmlns";
114        } else {
115            attr.name = "xmlns:";
116            attr.name += it->second;
117        }
118        attr.value = it->first;
119        attrs->push_back(attr);
120    }
121}
122
123XMLAttribute::XMLAttribute()
124{
125}
126
127XMLAttribute::XMLAttribute(const XMLAttribute& that)
128    :ns(that.ns),
129     name(that.name),
130     value(that.value)
131{
132}
133
134XMLAttribute::XMLAttribute(string n, string na, string v)
135    :ns(n),
136     name(na),
137     value(v)
138{
139}
140
141XMLAttribute::~XMLAttribute()
142{
143}
144
145int
146XMLAttribute::Compare(const XMLAttribute& that) const
147{
148    if (ns != that.ns) {
149        return ns < that.ns ? -1 : 1;
150    }
151    if (name != that.name) {
152        return name < that.name ? -1 : 1;
153    }
154    return 0;
155}
156
157string
158XMLAttribute::Find(const vector<XMLAttribute>& list, const string& ns, const string& name,
159                    const string& def)
160{
161    const size_t N = list.size();
162    for (size_t i=0; i<N; i++) {
163        const XMLAttribute& attr = list[i];
164        if (attr.ns == ns && attr.name == name) {
165            return attr.value;
166        }
167    }
168    return def;
169}
170
171struct xml_handler_data {
172    vector<XMLHandler*> stack;
173    XML_Parser parser;
174    vector<vector<XMLAttribute>*> attributes;
175    string filename;
176};
177
178XMLNode::XMLNode()
179{
180}
181
182XMLNode::~XMLNode()
183{
184//    for_each(m_children.begin(), m_children.end(), delete_object<XMLNode>);
185}
186
187XMLNode*
188XMLNode::Clone() const
189{
190    switch (m_type) {
191        case ELEMENT: {
192            XMLNode* e = XMLNode::NewElement(m_pos, m_ns, m_name, m_attrs, m_pretty);
193            const size_t N = m_children.size();
194            for (size_t i=0; i<N; i++) {
195                e->m_children.push_back(m_children[i]->Clone());
196            }
197            return e;
198        }
199        case TEXT: {
200            return XMLNode::NewText(m_pos, m_text, m_pretty);
201        }
202        default:
203            return NULL;
204    }
205}
206
207XMLNode*
208XMLNode::NewElement(const SourcePos& pos, const string& ns, const string& name,
209                        const vector<XMLAttribute>& attrs, int pretty)
210{
211    XMLNode* node = new XMLNode();
212        node->m_type = ELEMENT;
213        node->m_pretty = pretty;
214        node->m_pos = pos;
215        node->m_ns = ns;
216        node->m_name = name;
217        node->m_attrs = attrs;
218    return node;
219}
220
221XMLNode*
222XMLNode::NewText(const SourcePos& pos, const string& text, int pretty)
223{
224    XMLNode* node = new XMLNode();
225        node->m_type = TEXT;
226        node->m_pretty = pretty;
227        node->m_pos = pos;
228        node->m_text = text;
229    return node;
230}
231
232void
233XMLNode::SetPrettyRecursive(int value)
234{
235    m_pretty = value;
236    const size_t N = m_children.size();
237    for (size_t i=0; i<N; i++) {
238        m_children[i]->SetPrettyRecursive(value);
239    }
240}
241
242string
243XMLNode::ContentsToString(const XMLNamespaceMap& nspaces) const
244{
245    return contents_to_string(nspaces, "");
246}
247
248string
249XMLNode::ToString(const XMLNamespaceMap& nspaces) const
250{
251    return to_string(nspaces, "");
252}
253
254string
255XMLNode::OpenTagToString(const XMLNamespaceMap& nspaces, int pretty) const
256{
257    return open_tag_to_string(nspaces, "", pretty);
258}
259
260string
261XMLNode::contents_to_string(const XMLNamespaceMap& nspaces, const string& indent) const
262{
263    string result;
264    const size_t N = m_children.size();
265    for (size_t i=0; i<N; i++) {
266        const XMLNode* child = m_children[i];
267        switch (child->Type()) {
268        case ELEMENT:
269            if (m_pretty == PRETTY) {
270                result += '\n';
271                result += indent;
272            }
273        case TEXT:
274            result += child->to_string(nspaces, indent);
275            break;
276        }
277    }
278    return result;
279}
280
281string
282trim_string(const string& str)
283{
284    const char* p = str.c_str();
285    while (*p && isspace(*p)) {
286        p++;
287    }
288    const char* q = str.c_str() + str.length() - 1;
289    while (q > p && isspace(*q)) {
290        q--;
291    }
292    q++;
293    return string(p, q-p);
294}
295
296string
297XMLNode::open_tag_to_string(const XMLNamespaceMap& nspaces, const string& indent, int pretty) const
298{
299    if (m_type != ELEMENT) {
300        return "";
301    }
302    string result = "<";
303    result += nspaces.GetPrefix(m_ns);
304    result += m_name;
305
306    vector<XMLAttribute> attrs = m_attrs;
307
308    sort(attrs.begin(), attrs.end());
309
310    const size_t N = attrs.size();
311    for (size_t i=0; i<N; i++) {
312        const XMLAttribute& attr = attrs[i];
313        if (i == 0 || m_pretty == EXACT || pretty == EXACT) {
314            result += ' ';
315        }
316        else {
317            result += "\n";
318            result += indent;
319            result += MORE_INDENT;
320            result += MORE_INDENT;
321        }
322        result += nspaces.GetPrefix(attr.ns);
323        result += attr.name;
324        result += "=\"";
325        result += xml_attr_escape(attr.value);
326        result += '\"';
327    }
328
329    if (m_children.size() > 0) {
330        result += '>';
331    } else {
332        result += " />";
333    }
334    return result;
335}
336
337string
338XMLNode::to_string(const XMLNamespaceMap& nspaces, const string& indent) const
339{
340    switch (m_type)
341    {
342        case TEXT: {
343            if (m_pretty == EXACT) {
344                return xml_text_escape(m_text);
345            } else {
346                return xml_text_escape(trim_string(m_text));
347            }
348        }
349        case ELEMENT: {
350            string result = open_tag_to_string(nspaces, indent, PRETTY);
351
352            if (m_children.size() > 0) {
353                result += contents_to_string(nspaces, indent + MORE_INDENT);
354
355                if (m_pretty == PRETTY && m_children.size() > 0) {
356                    result += '\n';
357                    result += indent;
358                }
359
360                result += "</";
361                result += nspaces.GetPrefix(m_ns);
362                result += m_name;
363                result += '>';
364            }
365            return result;
366        }
367        default:
368            return "";
369    }
370}
371
372string
373XMLNode::CollapseTextContents() const
374{
375    if (m_type == TEXT) {
376        return m_text;
377    }
378    else if (m_type == ELEMENT) {
379        string result;
380
381        const size_t N=m_children.size();
382        for (size_t i=0; i<N; i++) {
383            result += m_children[i]->CollapseTextContents();
384        }
385
386        return result;
387    }
388    else {
389        return "";
390    }
391}
392
393vector<XMLNode*>
394XMLNode::GetElementsByName(const string& ns, const string& name) const
395{
396    vector<XMLNode*> result;
397    const size_t N=m_children.size();
398    for (size_t i=0; i<N; i++) {
399        XMLNode* child = m_children[i];
400        if (child->m_type == ELEMENT && child->m_ns == ns && child->m_name == name) {
401            result.push_back(child);
402        }
403    }
404    return result;
405}
406
407XMLNode*
408XMLNode::GetElementByNameAt(const string& ns, const string& name, size_t index) const
409{
410    vector<XMLNode*> result;
411    const size_t N=m_children.size();
412    for (size_t i=0; i<N; i++) {
413        XMLNode* child = m_children[i];
414        if (child->m_type == ELEMENT && child->m_ns == ns && child->m_name == name) {
415            if (index == 0) {
416                return child;
417            } else {
418                index--;
419            }
420        }
421    }
422    return NULL;
423}
424
425size_t
426XMLNode::CountElementsByName(const string& ns, const string& name) const
427{
428    size_t result = 0;
429    const size_t N=m_children.size();
430    for (size_t i=0; i<N; i++) {
431        XMLNode* child = m_children[i];
432        if (child->m_type == ELEMENT && child->m_ns == ns && child->m_name == name) {
433            result++;
434        }
435    }
436    return result;
437}
438
439string
440XMLNode::GetAttribute(const string& ns, const string& name, const string& def) const
441{
442    return XMLAttribute::Find(m_attrs, ns, name, def);
443}
444
445static void
446parse_namespace(const char* data, string* ns, string* name)
447{
448    const char* p = strchr(data, NS_SEPARATOR);
449    if (p != NULL) {
450        ns->assign(data, p-data);
451        name->assign(p+1);
452    } else {
453        ns->assign("");
454        name->assign(data);
455    }
456}
457
458static void
459convert_attrs(const char** in, vector<XMLAttribute>* out)
460{
461    while (*in) {
462        XMLAttribute attr;
463        parse_namespace(in[0], &attr.ns, &attr.name);
464        attr.value = in[1];
465        out->push_back(attr);
466        in += 2;
467    }
468}
469
470static bool
471list_contains(const vector<XMLHandler*>& stack, XMLHandler* handler)
472{
473    const size_t N = stack.size();
474    for (size_t i=0; i<N; i++) {
475        if (stack[i] == handler) {
476            return true;
477        }
478    }
479    return false;
480}
481
482static void XMLCALL
483start_element_handler(void *userData, const char *name, const char **attrs)
484{
485    xml_handler_data* data = (xml_handler_data*)userData;
486
487    XMLHandler* handler = data->stack[data->stack.size()-1];
488
489    SourcePos pos(data->filename, (int)XML_GetCurrentLineNumber(data->parser));
490    string nsString;
491    string nameString;
492    XMLHandler* next = handler;
493    vector<XMLAttribute> attributes;
494
495    parse_namespace(name, &nsString, &nameString);
496    convert_attrs(attrs, &attributes);
497
498    handler->OnStartElement(pos, nsString, nameString, attributes, &next);
499
500    if (next == NULL) {
501        next = handler;
502    }
503
504    if (next != handler) {
505        next->elementPos = pos;
506        next->elementNamespace = nsString;
507        next->elementName = nameString;
508        next->elementAttributes = attributes;
509    }
510
511    data->stack.push_back(next);
512}
513
514static void XMLCALL
515end_element_handler(void *userData, const char *name)
516{
517    xml_handler_data* data = (xml_handler_data*)userData;
518
519    XMLHandler* handler = data->stack[data->stack.size()-1];
520    data->stack.pop_back();
521
522    SourcePos pos(data->filename, (int)XML_GetCurrentLineNumber(data->parser));
523
524    if (!list_contains(data->stack, handler)) {
525        handler->OnDone(pos);
526        if (data->stack.size() > 1) {
527            // not top one
528            delete handler;
529        }
530    }
531
532    handler = data->stack[data->stack.size()-1];
533
534    string nsString;
535    string nameString;
536
537    parse_namespace(name, &nsString, &nameString);
538
539    handler->OnEndElement(pos, nsString, nameString);
540}
541
542static void XMLCALL
543text_handler(void *userData, const XML_Char *s, int len)
544{
545    xml_handler_data* data = (xml_handler_data*)userData;
546    XMLHandler* handler = data->stack[data->stack.size()-1];
547    SourcePos pos(data->filename, (int)XML_GetCurrentLineNumber(data->parser));
548    handler->OnText(pos, string(s, len));
549}
550
551static void XMLCALL
552comment_handler(void *userData, const char *comment)
553{
554    xml_handler_data* data = (xml_handler_data*)userData;
555    XMLHandler* handler = data->stack[data->stack.size()-1];
556    SourcePos pos(data->filename, (int)XML_GetCurrentLineNumber(data->parser));
557    handler->OnComment(pos, string(comment));
558}
559
560bool
561XMLHandler::ParseFile(const string& filename, XMLHandler* handler)
562{
563    char buf[16384];
564    int fd = open(filename.c_str(), O_RDONLY);
565    if (fd < 0) {
566        SourcePos(filename, -1).Error("Unable to open file for read: %s", strerror(errno));
567        return false;
568    }
569
570    XML_Parser parser = XML_ParserCreateNS(NULL, NS_SEPARATOR);
571    xml_handler_data state;
572    state.stack.push_back(handler);
573    state.parser = parser;
574    state.filename = filename;
575
576    XML_SetUserData(parser, &state);
577    XML_SetElementHandler(parser, start_element_handler, end_element_handler);
578    XML_SetCharacterDataHandler(parser, text_handler);
579    XML_SetCommentHandler(parser, comment_handler);
580
581    ssize_t len;
582    bool done;
583    do {
584        len = read(fd, buf, sizeof(buf));
585        done = len < (ssize_t)sizeof(buf);
586        if (len < 0) {
587            SourcePos(filename, -1).Error("Error reading file: %s\n", strerror(errno));
588            close(fd);
589            return false;
590        }
591        if (XML_Parse(parser, buf, len, done) == XML_STATUS_ERROR) {
592            SourcePos(filename, (int)XML_GetCurrentLineNumber(parser)).Error(
593                    "Error parsing XML: %s\n", XML_ErrorString(XML_GetErrorCode(parser)));
594            close(fd);
595            return false;
596        }
597    } while (!done);
598
599    XML_ParserFree(parser);
600
601    close(fd);
602
603    return true;
604}
605
606bool
607XMLHandler::ParseString(const string& filename, const string& text, XMLHandler* handler)
608{
609    XML_Parser parser = XML_ParserCreateNS(NULL, NS_SEPARATOR);
610    xml_handler_data state;
611    state.stack.push_back(handler);
612    state.parser = parser;
613    state.filename = filename;
614
615    XML_SetUserData(parser, &state);
616    XML_SetElementHandler(parser, start_element_handler, end_element_handler);
617    XML_SetCharacterDataHandler(parser, text_handler);
618    XML_SetCommentHandler(parser, comment_handler);
619
620    if (XML_Parse(parser, text.c_str(), text.size(), true) == XML_STATUS_ERROR) {
621        SourcePos(filename, (int)XML_GetCurrentLineNumber(parser)).Error(
622                "Error parsing XML: %s\n", XML_ErrorString(XML_GetErrorCode(parser)));
623        return false;
624    }
625
626    XML_ParserFree(parser);
627
628    return true;
629}
630
631XMLHandler::XMLHandler()
632{
633}
634
635XMLHandler::~XMLHandler()
636{
637}
638
639int
640XMLHandler::OnStartElement(const SourcePos& pos, const string& ns, const string& name,
641                            const vector<XMLAttribute>& attrs, XMLHandler** next)
642{
643    return 0;
644}
645
646int
647XMLHandler::OnEndElement(const SourcePos& pos, const string& ns, const string& name)
648{
649    return 0;
650}
651
652int
653XMLHandler::OnText(const SourcePos& pos, const string& text)
654{
655    return 0;
656}
657
658int
659XMLHandler::OnComment(const SourcePos& pos, const string& text)
660{
661    return 0;
662}
663
664int
665XMLHandler::OnDone(const SourcePos& pos)
666{
667    return 0;
668}
669
670TopElementHandler::TopElementHandler(const string& ns, const string& name, XMLHandler* next)
671    :m_ns(ns),
672     m_name(name),
673     m_next(next)
674{
675}
676
677int
678TopElementHandler::OnStartElement(const SourcePos& pos, const string& ns, const string& name,
679                            const vector<XMLAttribute>& attrs, XMLHandler** next)
680{
681    *next = m_next;
682    return 0;
683}
684
685int
686TopElementHandler::OnEndElement(const SourcePos& pos, const string& ns, const string& name)
687{
688    return 0;
689}
690
691int
692TopElementHandler::OnText(const SourcePos& pos, const string& text)
693{
694    return 0;
695}
696
697int
698TopElementHandler::OnDone(const SourcePos& pos)
699{
700    return 0;
701}
702
703
704NodeHandler::NodeHandler(XMLNode* root, int pretty)
705    :m_root(root),
706     m_pretty(pretty)
707{
708    if (root != NULL) {
709        m_nodes.push_back(root);
710    }
711}
712
713NodeHandler::~NodeHandler()
714{
715}
716
717int
718NodeHandler::OnStartElement(const SourcePos& pos, const string& ns, const string& name,
719                            const vector<XMLAttribute>& attrs, XMLHandler** next)
720{
721    int pretty;
722    if (XMLAttribute::Find(attrs, XMLNS_XMLNS, "space", "") == "preserve") {
723        pretty = XMLNode::EXACT;
724    } else {
725        if (m_root == NULL) {
726            pretty = m_pretty;
727        } else {
728            pretty = m_nodes[m_nodes.size()-1]->Pretty();
729        }
730    }
731    XMLNode* n = XMLNode::NewElement(pos, ns, name, attrs, pretty);
732    if (m_root == NULL) {
733        m_root = n;
734    } else {
735        m_nodes[m_nodes.size()-1]->EditChildren().push_back(n);
736    }
737    m_nodes.push_back(n);
738    return 0;
739}
740
741int
742NodeHandler::OnEndElement(const SourcePos& pos, const string& ns, const string& name)
743{
744    m_nodes.pop_back();
745    return 0;
746}
747
748int
749NodeHandler::OnText(const SourcePos& pos, const string& text)
750{
751    if (m_root == NULL) {
752        return 1;
753    }
754    XMLNode* n = XMLNode::NewText(pos, text, m_nodes[m_nodes.size()-1]->Pretty());
755    m_nodes[m_nodes.size()-1]->EditChildren().push_back(n);
756    return 0;
757}
758
759int
760NodeHandler::OnComment(const SourcePos& pos, const string& text)
761{
762    return 0;
763}
764
765int
766NodeHandler::OnDone(const SourcePos& pos)
767{
768    return 0;
769}
770
771XMLNode*
772NodeHandler::ParseFile(const string& filename, int pretty)
773{
774    NodeHandler handler(NULL, pretty);
775    if (!XMLHandler::ParseFile(filename, &handler)) {
776        fprintf(stderr, "error parsing file: %s\n", filename.c_str());
777        return NULL;
778    }
779    return handler.Root();
780}
781
782XMLNode*
783NodeHandler::ParseString(const string& filename, const string& text, int pretty)
784{
785    NodeHandler handler(NULL, pretty);
786    if (!XMLHandler::ParseString(filename, text, &handler)) {
787        fprintf(stderr, "error parsing file: %s\n", filename.c_str());
788        return NULL;
789    }
790    return handler.Root();
791}
792
793
794