106741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved. 206741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch// Use of this source code is governed by a BSD-style license that can be 306741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch// found in the LICENSE file. 406741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 506741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch#ifndef CHROME_COMMON_LIBXML_UTILS_H__ 606741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch#define CHROME_COMMON_LIBXML_UTILS_H__ 73345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#pragma once 806741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 906741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch#include <string> 1006741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 1106741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch#include "libxml/xmlreader.h" 1206741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch#include "libxml/xmlwriter.h" 1306741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 1472a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsenclass FilePath; 1572a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen 1606741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch// Converts a libxml xmlChar* into a UTF-8 std::string. 1706741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch// NULL inputs produce an empty string. 1806741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdochstd::string XmlStringToStdString(const xmlChar* xmlstring); 1906741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 2006741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch// libxml uses a global error function pointer for reporting errors. 2106741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch// A ScopedXmlErrorFunc object lets you change the global error pointer 2206741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch// for the duration of the object's lifetime. 2306741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdochclass ScopedXmlErrorFunc { 2406741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch public: 2506741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch ScopedXmlErrorFunc(void* context, xmlGenericErrorFunc func) { 2606741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch old_error_func_ = xmlGenericError; 2706741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch old_error_context_ = xmlGenericErrorContext; 2806741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch xmlSetGenericErrorFunc(context, func); 2906741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch } 3006741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch ~ScopedXmlErrorFunc() { 3106741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch xmlSetGenericErrorFunc(old_error_context_, old_error_func_); 3206741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch } 3306741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 3406741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch private: 3506741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch xmlGenericErrorFunc old_error_func_; 3606741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch void* old_error_context_; 3706741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch}; 3806741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 3906741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch// XmlReader is a wrapper class around libxml's xmlReader, 4006741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch// providing a simplified C++ API. 4106741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdochclass XmlReader { 4206741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch public: 4306741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch XmlReader(); 4406741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch ~XmlReader(); 4506741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 4606741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // Load a document into the reader from memory. |input| must be UTF-8 and 4706741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // exist for the lifetime of this object. Returns false on error. 4806741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // TODO(evanm): handle encodings other than UTF-8? 4906741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch bool Load(const std::string& input); 5006741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 5106741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // Load a document into the reader from a file. Returns false on error. 5272a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen bool LoadFile(const FilePath& file_path); 5306741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 5406741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // Wrappers around libxml functions ----------------------------------------- 5506741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 5606741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // Read() advances to the next node. Returns false on EOF or error. 5706741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch bool Read() { return xmlTextReaderRead(reader_) == 1; } 5806741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 5906741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // Next(), when pointing at an opening tag, advances to the node after 6006741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // the matching closing tag. Returns false on EOF or error. 6106741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch bool Next() { return xmlTextReaderNext(reader_) == 1; } 6206741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 6306741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // Return the depth in the tree of the current node. 6406741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch int Depth() { return xmlTextReaderDepth(reader_); } 6506741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 6606741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // Returns the "local" name of the current node. 6706741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // For a tag like <foo:bar>, this is the string "foo:bar". 6806741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch std::string NodeName() { 6906741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch return XmlStringToStdString(xmlTextReaderConstLocalName(reader_)); 7006741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch } 7106741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 7206741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // When pointing at a tag, retrieves the value of an attribute. 7306741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // Returns false on failure. 7406741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // E.g. for <foo bar:baz="a">, NodeAttribute("bar:baz", &value) 7506741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // returns true and |value| is set to "a". 7606741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch bool NodeAttribute(const char* name, std::string* value); 7706741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 7806741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // Helper functions not provided by libxml ---------------------------------- 7906741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 8006741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // Return the string content within an element. 8106741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // "<foo>bar</foo>" is a sequence of three nodes: 8206741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // (1) open tag, (2) text, (3) close tag. 8306741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // With the reader currently at (1), this returns the text of (2), 8406741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // and advances past (3). 8506741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // Returns false on error. 8606741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch bool ReadElementContent(std::string* content); 8706741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 8806741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // Skip to the next opening tag, returning false if we reach a closing 8906741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // tag or EOF first. 9006741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // If currently on an opening tag, doesn't advance at all. 9106741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch bool SkipToElement(); 9206741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 9306741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // Returns the errors reported by libxml, if any. 9406741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // (libxml normally just dumps these errors to stderr.) 9506741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch const std::string& errors() const { return errors_; } 9606741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 9706741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch private: 9806741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // A callback for libxml to report errors. 9906741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch static void GenericErrorCallback(void* context, const char* msg, ...); 10006741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 10106741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // Returns the libxml node type of the current node. 10206741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch int NodeType() { return xmlTextReaderNodeType(reader_); } 10306741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 10406741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // The underlying libxml xmlTextReader. 10506741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch xmlTextReaderPtr reader_; 10606741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 10706741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // error_func_ is used to reassign libxml's global error function 10806741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // to report errors into |errors_| for the lifetime of this object. 10906741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch ScopedXmlErrorFunc error_func_; 11006741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch std::string errors_; 11106741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch}; 11206741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 11306741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch// XmlWriter is a wrapper class around libxml's xmlWriter, 11406741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch// providing a simplified C++ API. 11506741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch// StartWriting must be called before other methods, and StopWriting 11606741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch// must be called before GetWrittenString() will return results. 11706741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdochclass XmlWriter { 11806741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch public: 11906741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch XmlWriter(); 12006741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch ~XmlWriter(); 12106741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 12206741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // Allocates the xmlTextWriter and an xmlBuffer and starts an XML document. 12306741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // This must be called before any other functions. By default, indenting is 12406741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // set to true. 12506741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch void StartWriting(); 12606741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 12706741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // Ends the XML document and frees the xmlTextWriter. 12806741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // This must be called before GetWrittenString() is called. 12906741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch void StopWriting(); 13006741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // Wrappers around libxml functions ----------------------------------------- 13106741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 13206741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // All following elements will be indented to match their depth. 13306741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch void StartIndenting() { xmlTextWriterSetIndent(writer_, 1); } 13406741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 13506741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // All follow elements will not be indented. 13606741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch void StopIndenting() { xmlTextWriterSetIndent(writer_, 0); } 13706741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 13806741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // Start an element with the given name. All future elements added will be 13906741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // children of this element, until it is ended. Returns false on error. 14006741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch bool StartElement(const std::string& element_name) { 14106741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch return xmlTextWriterStartElement(writer_, 14206741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch BAD_CAST element_name.c_str()) >= 0; 14306741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch } 14406741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 14506741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // Ends the current open element. Returns false on error. 14606741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch bool EndElement() { 14706741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch return xmlTextWriterEndElement(writer_) >= 0; 14806741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch } 14906741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 15006741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // Adds an attribute to the current open element. Returns false on error. 15106741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch bool AddAttribute(const std::string& attribute_name, 15206741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch const std::string& attribute_value) { 15306741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch return xmlTextWriterWriteAttribute(writer_, 15406741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch BAD_CAST attribute_name.c_str(), 15506741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch BAD_CAST attribute_value.c_str()) >= 0; 15606741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch } 15706741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 15806741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // Adds a new element with name |element_name| and content |content| 15906741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // to the buffer. Example: <|element_name|>|content|</|element_name|> 16006741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // Returns false on errors. 16106741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch bool WriteElement(const std::string& element_name, 16206741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch const std::string& content) { 16306741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch return xmlTextWriterWriteElement(writer_, 16406741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch BAD_CAST element_name.c_str(), 16506741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch BAD_CAST content.c_str()) >= 0; 16606741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch } 16706741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 16806741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // Helper functions not provided by xmlTextWriter --------------------------- 16906741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 17006741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // Returns the string that has been written to the buffer. 17106741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch std::string GetWrittenString() { 17206741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch if (buffer_ == NULL) 17306741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch return ""; 17406741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch return XmlStringToStdString(buffer_->content); 17506741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch } 17606741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 17706741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch private: 17806741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // The underlying libxml xmlTextWriter. 17906741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch xmlTextWriterPtr writer_; 18006741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 18106741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch // Stores the output. 18206741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch xmlBufferPtr buffer_; 18306741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch}; 18406741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch 18506741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch#endif // CHROME_COMMON_LIBXML_UTILS_H__ 186