tokenizer.h revision d3868032626d59662ff73b372b5d584c1d144c53
1d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch// Copyright (c) 2013 The Chromium Authors. All rights reserved. 2d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch// Use of this source code is governed by a BSD-style license that can be 3d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch// found in the LICENSE file. 4d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 5d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch#ifndef TOOLS_GN_TOKENIZER_H_ 6d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch#define TOOLS_GN_TOKENIZER_H_ 7d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 8d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch#include <vector> 9d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 10d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch#include "base/basictypes.h" 11d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch#include "base/strings/string_piece.h" 12d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch#include "tools/gn/err.h" 13d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch#include "tools/gn/token.h" 14d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 15d3868032626d59662ff73b372b5d584c1d144c53Ben Murdochclass InputFile; 16d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 17d3868032626d59662ff73b372b5d584c1d144c53Ben Murdochclass Tokenizer { 18d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch public: 19d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch static std::vector<Token> Tokenize(const InputFile* input_file, Err* err); 20d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 21d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // Counts lines in the given buffer (the first line is "1") and returns 22d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // the byte offset of the beginning of that line, or (size_t)-1 if there 23d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // aren't that many lines in the file. Note that this will return the byte 24d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // one past the end of the input if the last character is a newline. 25d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // 26d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // This is a helper function for error output so that the tokenizer's 27d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // notion of lines can be used elsewhere. 28d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch static size_t ByteOffsetOfNthLine(const base::StringPiece& buf, int n); 29d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 30d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // Returns true if the given offset of the string piece counts as a newline. 31d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // The offset must be in the buffer. 32d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch static bool IsNewline(const base::StringPiece& buffer, size_t offset); 33d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 34d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch static bool IsIdentifierFirstChar(char c) { 35d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z') || c == '_'; 36d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch } 37d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 38d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch static bool IsIdentifierContinuingChar(char c) { 39d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // Also allow digits after the first char. 40d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch return IsIdentifierFirstChar(c) || (c >= '0' && c <= '9'); 41d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch } 42d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 43d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch private: 44d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // InputFile must outlive the tokenizer and all generated tokens. 45d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch explicit Tokenizer(const InputFile* input_file, Err* err); 46d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch ~Tokenizer(); 47d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 48d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch std::vector<Token> Run(); 49d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 50d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch void AdvanceToNextToken(); 51d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch Token::Type ClassifyCurrent() const; 52d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch void AdvanceToEndOfToken(const Location& location, Token::Type type); 53d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 54d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch bool IsCurrentWhitespace() const; 55d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch bool IsCurrentNewline() const; 56d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch bool IsCurrentStringTerminator(char quote_char) const; 57d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 58d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch bool CanIncrement() const { return cur_ < input_.size(); } 59d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 60d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // Increments the current location by one. 61d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch void Advance(); 62d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 63d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // Returns the current character in the file as a location. 64d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch Location GetCurrentLocation() const; 65d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 66d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch Err GetErrorForInvalidToken(const Location& location) const; 67d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 68d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch bool done() const { return at_end() || has_error(); } 69d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 70d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch bool at_end() const { return cur_ == input_.size(); } 71d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch char cur_char() const { return input_[cur_]; } 72d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 73d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch bool has_error() const { return err_->has_error(); } 74d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 75d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch const InputFile* input_file_; 76d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch const base::StringPiece input_; 77d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch Err* err_; 78d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch size_t cur_; // Byte offset into input buffer. 79d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 80d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch int line_number_; 81d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch int char_in_line_; 82d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 83d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch DISALLOW_COPY_AND_ASSIGN(Tokenizer); 84d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch}; 85d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 86d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch#endif // TOOLS_GN_TOKENIZER_H_ 87