15821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Copyright (c) 2012 The Chromium Authors. All rights reserved.
25821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Use of this source code is governed by a BSD-style license that can be
35821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// found in the LICENSE file.
45821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
55821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#ifndef THIRD_PARTY_LIBXML_CHROMIUM_LIBXML_UTILS_H_
65821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#define THIRD_PARTY_LIBXML_CHROMIUM_LIBXML_UTILS_H_
75821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#pragma once
85821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
95821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include <string>
105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "libxml/xmlreader.h"
125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#include "libxml/xmlwriter.h"
135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// Converts a libxml xmlChar* into a UTF-8 std::string.
155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// NULL inputs produce an empty string.
165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)std::string XmlStringToStdString(const xmlChar* xmlstring);
175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// libxml uses a global error function pointer for reporting errors.
195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// A ScopedXmlErrorFunc object lets you change the global error pointer
205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// for the duration of the object's lifetime.
215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class ScopedXmlErrorFunc {
225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ScopedXmlErrorFunc(void* context, xmlGenericErrorFunc func) {
245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    old_error_func_ = xmlGenericError;
255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    old_error_context_ = xmlGenericErrorContext;
265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlSetGenericErrorFunc(context, func);
275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ~ScopedXmlErrorFunc() {
295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    xmlSetGenericErrorFunc(old_error_context_, old_error_func_);
305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private:
335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  xmlGenericErrorFunc old_error_func_;
345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void* old_error_context_;
355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// XmlReader is a wrapper class around libxml's xmlReader,
385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// providing a simplified C++ API.
395821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class XmlReader {
405821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
415821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  XmlReader();
425821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ~XmlReader();
435821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
445821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Load a document into the reader from memory.  |input| must be UTF-8 and
455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // exist for the lifetime of this object.  Returns false on error.
465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // TODO(evanm): handle encodings other than UTF-8?
475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool Load(const std::string& input);
485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Load a document into the reader from a file.  Returns false on error.
505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool LoadFile(const std::string& file_path);
515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Wrappers around libxml functions -----------------------------------------
535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Read() advances to the next node.  Returns false on EOF or error.
555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool Read() { return xmlTextReaderRead(reader_) == 1; }
565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Next(), when pointing at an opening tag, advances to the node after
585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // the matching closing tag.  Returns false on EOF or error.
595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool Next() { return xmlTextReaderNext(reader_) == 1; }
605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Return the depth in the tree of the current node.
625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int Depth() { return xmlTextReaderDepth(reader_); }
635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns the "local" name of the current node.
655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // For a tag like <foo:bar>, this is the string "foo:bar".
665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::string NodeName() {
675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return XmlStringToStdString(xmlTextReaderConstLocalName(reader_));
685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // When pointing at a tag, retrieves the value of an attribute.
715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns false on failure.
725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // E.g. for <foo bar:baz="a">, NodeAttribute("bar:baz", &value)
735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // returns true and |value| is set to "a".
745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool NodeAttribute(const char* name, std::string* value);
755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
763551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  // Returns true if the node is a closing element (e.g. </foo>).
773551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  bool IsClosingElement();
783551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)
795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Helper functions not provided by libxml ----------------------------------
805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
815821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Return the string content within an element.
825821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // "<foo>bar</foo>" is a sequence of three nodes:
835821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // (1) open tag, (2) text, (3) close tag.
845821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // With the reader currently at (1), this returns the text of (2),
855821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // and advances past (3).
865821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns false on error.
875821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool ReadElementContent(std::string* content);
885821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
895821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Skip to the next opening tag, returning false if we reach a closing
905821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // tag or EOF first.
915821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // If currently on an opening tag, doesn't advance at all.
925821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool SkipToElement();
935821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
945821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private:
955821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns the libxml node type of the current node.
965821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  int NodeType() { return xmlTextReaderNodeType(reader_); }
975821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
985821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // The underlying libxml xmlTextReader.
995821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  xmlTextReaderPtr reader_;
1005821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
1015821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1025821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// XmlWriter is a wrapper class around libxml's xmlWriter,
1035821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// providing a simplified C++ API.
1045821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// StartWriting must be called before other methods, and StopWriting
1055821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)// must be called before GetWrittenString() will return results.
1065821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)class XmlWriter {
1075821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) public:
1085821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  XmlWriter();
1095821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  ~XmlWriter();
1105821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1115821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Allocates the xmlTextWriter and an xmlBuffer and starts an XML document.
1125821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // This must be called before any other functions. By default, indenting is
1135821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // set to true.
1145821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void StartWriting();
1155821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1165821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Ends the XML document and frees the xmlTextWriter.
1175821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // This must be called before GetWrittenString() is called.
1185821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void StopWriting();
1195821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Wrappers around libxml functions -----------------------------------------
1205821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1215821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // All following elements will be indented to match their depth.
1225821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void StartIndenting() { xmlTextWriterSetIndent(writer_, 1); }
1235821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1245821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // All follow elements will not be indented.
1255821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  void StopIndenting() { xmlTextWriterSetIndent(writer_, 0); }
1265821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1275821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Start an element with the given name. All future elements added will be
1285821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // children of this element, until it is ended. Returns false on error.
1295821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool StartElement(const std::string& element_name) {
1305821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return xmlTextWriterStartElement(writer_,
1315821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                     BAD_CAST element_name.c_str()) >= 0;
1325821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1335821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1345821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Ends the current open element. Returns false on error.
1355821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool EndElement() {
1365821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return xmlTextWriterEndElement(writer_) >= 0;
1375821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1385821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1393551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  // Appends to the content of the current open element.
1403551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  bool AppendElementContent(const std::string& content) {
1413551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)    return xmlTextWriterWriteString(writer_,
1423551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)                                    BAD_CAST content.c_str()) >= 0;
1433551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)  }
1443551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)
1455821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Adds an attribute to the current open element. Returns false on error.
1465821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool AddAttribute(const std::string& attribute_name,
1475821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    const std::string& attribute_value) {
1485821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return xmlTextWriterWriteAttribute(writer_,
1495821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                       BAD_CAST attribute_name.c_str(),
1505821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                       BAD_CAST attribute_value.c_str()) >= 0;
1515821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1525821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1535821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Adds a new element with name |element_name| and content |content|
1545821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // to the buffer. Example: <|element_name|>|content|</|element_name|>
1555821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns false on errors.
1565821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  bool WriteElement(const std::string& element_name,
1575821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                    const std::string& content) {
1585821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return xmlTextWriterWriteElement(writer_,
1595821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                     BAD_CAST element_name.c_str(),
1605821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)                                     BAD_CAST content.c_str()) >= 0;
1615821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1625821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1635821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Helper functions not provided by xmlTextWriter ---------------------------
1645821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1655821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Returns the string that has been written to the buffer.
1665821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  std::string GetWrittenString() {
1675821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    if (buffer_ == NULL)
1685821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)      return "";
1695821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)    return XmlStringToStdString(buffer_->content);
1705821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  }
1715821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1725821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles) private:
1735821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // The underlying libxml xmlTextWriter.
1745821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  xmlTextWriterPtr writer_;
1755821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1765821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  // Stores the output.
1775821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)  xmlBufferPtr buffer_;
1785821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)};
1795821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)
1805821806d5e7f356e8fa4b058a389a808ea183019Torne (Richard Coles)#endif  // THIRD_PARTY_LIBXML_CHROMIUM_INCLUDE_LIBXML_LIBXML_UTILS_H_
181