1// Copyright (c) 2013 The Chromium Authors. All rights reserved. 2// Use of this source code is governed by a BSD-style license that can be 3// found in the LICENSE file. 4 5#ifndef TOOLS_GN_TOKENIZER_H_ 6#define TOOLS_GN_TOKENIZER_H_ 7 8#include <vector> 9 10#include "base/basictypes.h" 11#include "base/strings/string_piece.h" 12#include "base/strings/string_util.h" 13#include "tools/gn/err.h" 14#include "tools/gn/token.h" 15 16class InputFile; 17 18class Tokenizer { 19 public: 20 static std::vector<Token> Tokenize(const InputFile* input_file, Err* err); 21 22 // Counts lines in the given buffer (the first line is "1") and returns 23 // the byte offset of the beginning of that line, or (size_t)-1 if there 24 // aren't that many lines in the file. Note that this will return the byte 25 // one past the end of the input if the last character is a newline. 26 // 27 // This is a helper function for error output so that the tokenizer's 28 // notion of lines can be used elsewhere. 29 static size_t ByteOffsetOfNthLine(const base::StringPiece& buf, int n); 30 31 // Returns true if the given offset of the string piece counts as a newline. 32 // The offset must be in the buffer. 33 static bool IsNewline(const base::StringPiece& buffer, size_t offset); 34 35 static bool IsIdentifierFirstChar(char c) { 36 return IsAsciiAlpha(c) || c == '_'; 37 } 38 39 static bool IsIdentifierContinuingChar(char c) { 40 // Also allow digits after the first char. 41 return IsIdentifierFirstChar(c) || IsAsciiDigit(c); 42 } 43 44 private: 45 // InputFile must outlive the tokenizer and all generated tokens. 46 explicit Tokenizer(const InputFile* input_file, Err* err); 47 ~Tokenizer(); 48 49 std::vector<Token> Run(); 50 51 void AdvanceToNextToken(); 52 Token::Type ClassifyCurrent() const; 53 void AdvanceToEndOfToken(const Location& location, Token::Type type); 54 55 bool IsCurrentWhitespace() const; 56 bool IsCurrentNewline() const; 57 bool IsCurrentStringTerminator(char quote_char) const; 58 59 bool CanIncrement() const { return cur_ < input_.size(); } 60 61 // Increments the current location by one. 62 void Advance(); 63 64 // Returns the current character in the file as a location. 65 Location GetCurrentLocation() const; 66 67 Err GetErrorForInvalidToken(const Location& location) const; 68 69 bool done() const { return at_end() || has_error(); } 70 71 bool at_end() const { return cur_ == input_.size(); } 72 char cur_char() const { return input_[cur_]; } 73 74 bool has_error() const { return err_->has_error(); } 75 76 std::vector<Token> tokens_; 77 78 const InputFile* input_file_; 79 const base::StringPiece input_; 80 Err* err_; 81 size_t cur_; // Byte offset into input buffer. 82 83 int line_number_; 84 int char_in_line_; 85 86 DISALLOW_COPY_AND_ASSIGN(Tokenizer); 87}; 88 89#endif // TOOLS_GN_TOKENIZER_H_ 90