106741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch// Copyright (c) 2006-2008 The Chromium Authors. All rights reserved.
206741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch// Use of this source code is governed by a BSD-style license that can be
306741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch// found in the LICENSE file.
406741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch
506741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch#ifndef CHROME_COMMON_LIBXML_UTILS_H__
606741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch#define CHROME_COMMON_LIBXML_UTILS_H__
73345a6884c488ff3a535c2c9acdd33d74b37e311Iain Merrick#pragma once
806741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch
906741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch#include <string>
1006741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch
1106741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch#include "libxml/xmlreader.h"
1206741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch#include "libxml/xmlwriter.h"
1306741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch
1472a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsenclass FilePath;
1572a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen
1606741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch// Converts a libxml xmlChar* into a UTF-8 std::string.
1706741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch// NULL inputs produce an empty string.
1806741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdochstd::string XmlStringToStdString(const xmlChar* xmlstring);
1906741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch
2006741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch// libxml uses a global error function pointer for reporting errors.
2106741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch// A ScopedXmlErrorFunc object lets you change the global error pointer
2206741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch// for the duration of the object's lifetime.
2306741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdochclass ScopedXmlErrorFunc {
2406741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch public:
2506741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  ScopedXmlErrorFunc(void* context, xmlGenericErrorFunc func) {
2606741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch    old_error_func_ = xmlGenericError;
2706741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch    old_error_context_ = xmlGenericErrorContext;
2806741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch    xmlSetGenericErrorFunc(context, func);
2906741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  }
3006741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  ~ScopedXmlErrorFunc() {
3106741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch    xmlSetGenericErrorFunc(old_error_context_, old_error_func_);
3206741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  }
3306741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch
3406741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch private:
3506741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  xmlGenericErrorFunc old_error_func_;
3606741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  void* old_error_context_;
3706741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch};
3806741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch
3906741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch// XmlReader is a wrapper class around libxml's xmlReader,
4006741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch// providing a simplified C++ API.
4106741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdochclass XmlReader {
4206741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch public:
4306741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  XmlReader();
4406741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  ~XmlReader();
4506741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch
4606741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // Load a document into the reader from memory.  |input| must be UTF-8 and
4706741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // exist for the lifetime of this object.  Returns false on error.
4806741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // TODO(evanm): handle encodings other than UTF-8?
4906741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  bool Load(const std::string& input);
5006741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch
5106741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // Load a document into the reader from a file.  Returns false on error.
5272a454cd3513ac24fbdd0e0cb9ad70b86a99b801Kristian Monsen  bool LoadFile(const FilePath& file_path);
5306741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch
5406741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // Wrappers around libxml functions -----------------------------------------
5506741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch
5606741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // Read() advances to the next node.  Returns false on EOF or error.
5706741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  bool Read() { return xmlTextReaderRead(reader_) == 1; }
5806741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch
5906741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // Next(), when pointing at an opening tag, advances to the node after
6006741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // the matching closing tag.  Returns false on EOF or error.
6106741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  bool Next() { return xmlTextReaderNext(reader_) == 1; }
6206741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch
6306741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // Return the depth in the tree of the current node.
6406741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  int Depth() { return xmlTextReaderDepth(reader_); }
6506741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch
6606741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // Returns the "local" name of the current node.
6706741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // For a tag like <foo:bar>, this is the string "foo:bar".
6806741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  std::string NodeName() {
6906741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch    return XmlStringToStdString(xmlTextReaderConstLocalName(reader_));
7006741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  }
7106741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch
7206741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // When pointing at a tag, retrieves the value of an attribute.
7306741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // Returns false on failure.
7406741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // E.g. for <foo bar:baz="a">, NodeAttribute("bar:baz", &value)
7506741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // returns true and |value| is set to "a".
7606741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  bool NodeAttribute(const char* name, std::string* value);
7706741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch
7806741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // Helper functions not provided by libxml ----------------------------------
7906741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch
8006741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // Return the string content within an element.
8106741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // "<foo>bar</foo>" is a sequence of three nodes:
8206741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // (1) open tag, (2) text, (3) close tag.
8306741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // With the reader currently at (1), this returns the text of (2),
8406741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // and advances past (3).
8506741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // Returns false on error.
8606741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  bool ReadElementContent(std::string* content);
8706741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch
8806741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // Skip to the next opening tag, returning false if we reach a closing
8906741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // tag or EOF first.
9006741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // If currently on an opening tag, doesn't advance at all.
9106741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  bool SkipToElement();
9206741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch
9306741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // Returns the errors reported by libxml, if any.
9406741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // (libxml normally just dumps these errors to stderr.)
9506741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  const std::string& errors() const { return errors_; }
9606741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch
9706741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch private:
9806741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // A callback for libxml to report errors.
9906741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  static void GenericErrorCallback(void* context, const char* msg, ...);
10006741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch
10106741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // Returns the libxml node type of the current node.
10206741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  int NodeType() { return xmlTextReaderNodeType(reader_); }
10306741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch
10406741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // The underlying libxml xmlTextReader.
10506741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  xmlTextReaderPtr reader_;
10606741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch
10706741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // error_func_ is used to reassign libxml's global error function
10806741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // to report errors into |errors_| for the lifetime of this object.
10906741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  ScopedXmlErrorFunc error_func_;
11006741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  std::string errors_;
11106741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch};
11206741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch
11306741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch// XmlWriter is a wrapper class around libxml's xmlWriter,
11406741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch// providing a simplified C++ API.
11506741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch// StartWriting must be called before other methods, and StopWriting
11606741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch// must be called before GetWrittenString() will return results.
11706741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdochclass XmlWriter {
11806741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch public:
11906741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  XmlWriter();
12006741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  ~XmlWriter();
12106741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch
12206741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // Allocates the xmlTextWriter and an xmlBuffer and starts an XML document.
12306741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // This must be called before any other functions. By default, indenting is
12406741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // set to true.
12506741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  void StartWriting();
12606741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch
12706741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // Ends the XML document and frees the xmlTextWriter.
12806741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // This must be called before GetWrittenString() is called.
12906741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  void StopWriting();
13006741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // Wrappers around libxml functions -----------------------------------------
13106741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch
13206741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // All following elements will be indented to match their depth.
13306741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  void StartIndenting() { xmlTextWriterSetIndent(writer_, 1); }
13406741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch
13506741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // All follow elements will not be indented.
13606741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  void StopIndenting() { xmlTextWriterSetIndent(writer_, 0); }
13706741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch
13806741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // Start an element with the given name. All future elements added will be
13906741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // children of this element, until it is ended. Returns false on error.
14006741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  bool StartElement(const std::string& element_name) {
14106741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch    return xmlTextWriterStartElement(writer_,
14206741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch                                     BAD_CAST element_name.c_str()) >= 0;
14306741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  }
14406741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch
14506741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // Ends the current open element. Returns false on error.
14606741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  bool EndElement() {
14706741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch    return xmlTextWriterEndElement(writer_) >= 0;
14806741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  }
14906741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch
15006741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // Adds an attribute to the current open element. Returns false on error.
15106741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  bool AddAttribute(const std::string& attribute_name,
15206741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch                    const std::string& attribute_value) {
15306741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch    return xmlTextWriterWriteAttribute(writer_,
15406741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch                                       BAD_CAST attribute_name.c_str(),
15506741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch                                       BAD_CAST attribute_value.c_str()) >= 0;
15606741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  }
15706741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch
15806741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // Adds a new element with name |element_name| and content |content|
15906741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // to the buffer. Example: <|element_name|>|content|</|element_name|>
16006741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // Returns false on errors.
16106741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  bool WriteElement(const std::string& element_name,
16206741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch                    const std::string& content) {
16306741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch    return xmlTextWriterWriteElement(writer_,
16406741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch                                     BAD_CAST element_name.c_str(),
16506741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch                                     BAD_CAST content.c_str()) >= 0;
16606741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  }
16706741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch
16806741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // Helper functions not provided by xmlTextWriter ---------------------------
16906741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch
17006741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // Returns the string that has been written to the buffer.
17106741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  std::string GetWrittenString() {
17206741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch    if (buffer_ == NULL)
17306741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch      return "";
17406741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch    return XmlStringToStdString(buffer_->content);
17506741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  }
17606741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch
17706741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch private:
17806741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // The underlying libxml xmlTextWriter.
17906741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  xmlTextWriterPtr writer_;
18006741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch
18106741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  // Stores the output.
18206741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch  xmlBufferPtr buffer_;
18306741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch};
18406741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch
18506741cbc25cd4227a9fba40dfd0273bfcc1a587aBen Murdoch#endif  // CHROME_COMMON_LIBXML_UTILS_H__
186