1fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// Protocol Buffers - Google's data interchange format 2fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// Copyright 2008 Google Inc. All rights reserved. 3fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// http://code.google.com/p/protobuf/ 4fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// 5fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// Redistribution and use in source and binary forms, with or without 6fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// modification, are permitted provided that the following conditions are 7fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// met: 8fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// 9fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// * Redistributions of source code must retain the above copyright 10fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// notice, this list of conditions and the following disclaimer. 11fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// * Redistributions in binary form must reproduce the above 12fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// copyright notice, this list of conditions and the following disclaimer 13fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// in the documentation and/or other materials provided with the 14fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// distribution. 15fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// * Neither the name of Google Inc. nor the names of its 16fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// contributors may be used to endorse or promote products derived from 17fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// this software without specific prior written permission. 18fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// 19fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 31fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// Author: kenton@google.com (Kenton Varda) 32fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// Based on original Protocol Buffers design by 33fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// Sanjay Ghemawat, Jeff Dean, and others. 34fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// 35fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// Class for parsing tokenized text from a ZeroCopyInputStream. 36fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 37fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville#ifndef GOOGLE_PROTOBUF_IO_TOKENIZER_H__ 38fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville#define GOOGLE_PROTOBUF_IO_TOKENIZER_H__ 39fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 40fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville#include <string> 41fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville#include <google/protobuf/stubs/common.h> 42fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 43fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillenamespace google { 44fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillenamespace protobuf { 45fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savillenamespace io { 46fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 47fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savilleclass ZeroCopyInputStream; // zero_copy_stream.h 48fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 49fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// Defined in this file. 50fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savilleclass ErrorCollector; 51fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savilleclass Tokenizer; 52fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 53fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// Abstract interface for an object which collects the errors that occur 54fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// during parsing. A typical implementation might simply print the errors 55fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// to stdout. 56fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savilleclass LIBPROTOBUF_EXPORT ErrorCollector { 57fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville public: 58fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville inline ErrorCollector() {} 59fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville virtual ~ErrorCollector(); 60fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 61fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Indicates that there was an error in the input at the given line and 62fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // column numbers. The numbers are zero-based, so you may want to add 63fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // 1 to each before printing them. 64fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville virtual void AddError(int line, int column, const string& message) = 0; 65fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 66d0332953cda33fb4f8e24ebff9c49159b69c43d6Wink Saville // Indicates that there was a warning in the input at the given line and 67d0332953cda33fb4f8e24ebff9c49159b69c43d6Wink Saville // column numbers. The numbers are zero-based, so you may want to add 68d0332953cda33fb4f8e24ebff9c49159b69c43d6Wink Saville // 1 to each before printing them. 69d0332953cda33fb4f8e24ebff9c49159b69c43d6Wink Saville virtual void AddWarning(int line, int column, const string& message) { } 70d0332953cda33fb4f8e24ebff9c49159b69c43d6Wink Saville 71fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville private: 72fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(ErrorCollector); 73fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville}; 74fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 75fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// This class converts a stream of raw text into a stream of tokens for 76fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// the protocol definition parser to parse. The tokens recognized are 77fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// similar to those that make up the C language; see the TokenType enum for 78fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// precise descriptions. Whitespace and comments are skipped. By default, 79fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// C- and C++-style comments are recognized, but other styles can be used by 80fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// calling set_comment_style(). 81fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savilleclass LIBPROTOBUF_EXPORT Tokenizer { 82fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville public: 83fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Construct a Tokenizer that reads and tokenizes text from the given 84fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // input stream and writes errors to the given error_collector. 85fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // The caller keeps ownership of input and error_collector. 86fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville Tokenizer(ZeroCopyInputStream* input, ErrorCollector* error_collector); 87fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville ~Tokenizer(); 88fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 89fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville enum TokenType { 90fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville TYPE_START, // Next() has not yet been called. 91fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville TYPE_END, // End of input reached. "text" is empty. 92fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 93fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville TYPE_IDENTIFIER, // A sequence of letters, digits, and underscores, not 94fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // starting with a digit. It is an error for a number 95fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // to be followed by an identifier with no space in 96fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // between. 97fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville TYPE_INTEGER, // A sequence of digits representing an integer. Normally 98fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // the digits are decimal, but a prefix of "0x" indicates 99fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // a hex number and a leading zero indicates octal, just 100fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // like with C numeric literals. A leading negative sign 101fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // is NOT included in the token; it's up to the parser to 102fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // interpret the unary minus operator on its own. 103fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville TYPE_FLOAT, // A floating point literal, with a fractional part and/or 104fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // an exponent. Always in decimal. Again, never 105fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // negative. 106fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville TYPE_STRING, // A quoted sequence of escaped characters. Either single 107fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // or double quotes can be used, but they must match. 108fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // A string literal cannot cross a line break. 109fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville TYPE_SYMBOL, // Any other printable character, like '!' or '+'. 110fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Symbols are always a single character, so "!+$%" is 111fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // four tokens. 112fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville }; 113fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 114fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Structure representing a token read from the token stream. 115fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville struct Token { 116fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville TokenType type; 117fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville string text; // The exact text of the token as it appeared in 118fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // the input. e.g. tokens of TYPE_STRING will still 119fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // be escaped and in quotes. 120fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 121fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // "line" and "column" specify the position of the first character of 122fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // the token within the input stream. They are zero-based. 123fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville int line; 124fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville int column; 125fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville }; 126fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 127fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Get the current token. This is updated when Next() is called. Before 128fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // the first call to Next(), current() has type TYPE_START and no contents. 129fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville const Token& current(); 130fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 131fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Advance to the next token. Returns false if the end of the input is 132fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // reached. 133fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville bool Next(); 134fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 135fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Parse helpers --------------------------------------------------- 136fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 137fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Parses a TYPE_FLOAT token. This never fails, so long as the text actually 138fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // comes from a TYPE_FLOAT token parsed by Tokenizer. If it doesn't, the 139fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // result is undefined (possibly an assert failure). 140fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville static double ParseFloat(const string& text); 141fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 142fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Parses a TYPE_STRING token. This never fails, so long as the text actually 143fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // comes from a TYPE_STRING token parsed by Tokenizer. If it doesn't, the 144fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // result is undefined (possibly an assert failure). 145fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville static void ParseString(const string& text, string* output); 146fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 147fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Identical to ParseString, but appends to output. 148fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville static void ParseStringAppend(const string& text, string* output); 149fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 150fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Parses a TYPE_INTEGER token. Returns false if the result would be 151fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // greater than max_value. Otherwise, returns true and sets *output to the 152fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // result. If the text is not from a Token of type TYPE_INTEGER originally 153fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // parsed by a Tokenizer, the result is undefined (possibly an assert 154fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // failure). 155fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville static bool ParseInteger(const string& text, uint64 max_value, 156fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville uint64* output); 157fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 158fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Options --------------------------------------------------------- 159fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 160fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Set true to allow floats to be suffixed with the letter 'f'. Tokens 161fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // which would otherwise be integers but which have the 'f' suffix will be 162fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // forced to be interpreted as floats. For all other purposes, the 'f' is 163fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // ignored. 164fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville void set_allow_f_after_float(bool value) { allow_f_after_float_ = value; } 165fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 166fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Valid values for set_comment_style(). 167fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville enum CommentStyle { 168fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Line comments begin with "//", block comments are delimited by "/*" and 169fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // "*/". 170fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville CPP_COMMENT_STYLE, 171fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Line comments begin with "#". No way to write block comments. 172fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville SH_COMMENT_STYLE 173fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville }; 174fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 175fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Sets the comment style. 176fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville void set_comment_style(CommentStyle style) { comment_style_ = style; } 177fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 178fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // ----------------------------------------------------------------- 179fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville private: 180fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville GOOGLE_DISALLOW_EVIL_CONSTRUCTORS(Tokenizer); 181fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 182fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville Token current_; // Returned by current(). 183fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 184fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville ZeroCopyInputStream* input_; 185fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville ErrorCollector* error_collector_; 186fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 187fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville char current_char_; // == buffer_[buffer_pos_], updated by NextChar(). 188fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville const char* buffer_; // Current buffer returned from input_. 189fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville int buffer_size_; // Size of buffer_. 190fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville int buffer_pos_; // Current position within the buffer. 191fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville bool read_error_; // Did we previously encounter a read error? 192fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 193fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Line and column number of current_char_ within the whole input stream. 194fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville int line_; 195fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville int column_; 196fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 197fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Position in buffer_ where StartToken() was called. If the token 198fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // started in the previous buffer, this is zero, and current_.text already 199fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // contains the part of the token from the previous buffer. If not 200fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // currently parsing a token, this is -1. 201fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville int token_start_; 202fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 203fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Options. 204fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville bool allow_f_after_float_; 205fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville CommentStyle comment_style_; 206fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 207fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Since we count columns we need to interpret tabs somehow. We'll take 208fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // the standard 8-character definition for lack of any way to do better. 209fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville static const int kTabWidth = 8; 210fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 211fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // ----------------------------------------------------------------- 212fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Helper methods. 213fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 214fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Consume this character and advance to the next one. 215fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville void NextChar(); 216fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 217fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Read a new buffer from the input. 218fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville void Refresh(); 219fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 220fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Called when the current character is the first character of a new 221fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // token (not including whitespace or comments). 222fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville inline void StartToken(); 223fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Called when the current character is the first character after the 224fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // end of the last token. After this returns, current_.text will 225fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // contain all text consumed since StartToken() was called. 226fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville inline void EndToken(); 227fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 228fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Convenience method to add an error at the current line and column. 229fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville void AddError(const string& message) { 230fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville error_collector_->AddError(line_, column_, message); 231fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville } 232fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 233fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // ----------------------------------------------------------------- 234fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // The following four methods are used to consume tokens of specific 235fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // types. They are actually used to consume all characters *after* 236fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // the first, since the calling function consumes the first character 237fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // in order to decide what kind of token is being read. 238fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 239fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Read and consume a string, ending when the given delimiter is 240fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // consumed. 241fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville void ConsumeString(char delimiter); 242fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 243fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Read and consume a number, returning TYPE_FLOAT or TYPE_INTEGER 244fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // depending on what was read. This needs to know if the first 245fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // character was a zero in order to correctly recognize hex and octal 246fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // numbers. 247fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // It also needs to know if the first characted was a . to parse floating 248fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // point correctly. 249fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville TokenType ConsumeNumber(bool started_with_zero, bool started_with_dot); 250fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 251fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Consume the rest of a line. 252fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville void ConsumeLineComment(); 253fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Consume until "*/". 254fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville void ConsumeBlockComment(); 255fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 256fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // ----------------------------------------------------------------- 257fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // These helper methods make the parsing code more readable. The 258fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // "character classes" refered to are defined at the top of the .cc file. 259fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Basically it is a C++ class with one method: 260fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // static bool InClass(char c); 261fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // The method returns true if c is a member of this "class", like "Letter" 262fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // or "Digit". 263fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 264fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Returns true if the current character is of the given character 265fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // class, but does not consume anything. 266fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville template<typename CharacterClass> 267fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville inline bool LookingAt(); 268fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 269fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // If the current character is in the given class, consume it and return 270fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // true. Otherwise return false. 271fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // e.g. TryConsumeOne<Letter>() 272fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville template<typename CharacterClass> 273fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville inline bool TryConsumeOne(); 274fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 275fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Like above, but try to consume the specific character indicated. 276fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville inline bool TryConsume(char c); 277fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 278fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Consume zero or more of the given character class. 279fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville template<typename CharacterClass> 280fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville inline void ConsumeZeroOrMore(); 281fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 282fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // Consume one or more of the given character class or log the given 283fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // error message. 284fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville // e.g. ConsumeOneOrMore<Digit>("Expected digits."); 285fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville template<typename CharacterClass> 286fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville inline void ConsumeOneOrMore(const char* error); 287fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville}; 288fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 289fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville// inline methods ==================================================== 290fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savilleinline const Tokenizer::Token& Tokenizer::current() { 291fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville return current_; 292fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} 293fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 294fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Savilleinline void Tokenizer::ParseString(const string& text, string* output) { 295fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville output->clear(); 296fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville ParseStringAppend(text, output); 297fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} 298fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 299fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} // namespace io 300fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} // namespace protobuf 301fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville 302fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville} // namespace google 303fbaaef999ba563838ebd00874ed8a1c01fbf286dWink Saville#endif // GOOGLE_PROTOBUF_IO_TOKENIZER_H__ 304