1// Copyright (c) 2013 The Chromium Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style license that can be
3// found in the LICENSE file.
4
5#ifndef TOOLS_GN_TOKENIZER_H_
6#define TOOLS_GN_TOKENIZER_H_
7
8#include <vector>
9
10#include "base/basictypes.h"
11#include "base/strings/string_piece.h"
12#include "base/strings/string_util.h"
13#include "tools/gn/err.h"
14#include "tools/gn/token.h"
15
16class InputFile;
17
18class Tokenizer {
19 public:
20  static std::vector<Token> Tokenize(const InputFile* input_file, Err* err);
21
22  // Counts lines in the given buffer (the first line is "1") and returns
23  // the byte offset of the beginning of that line, or (size_t)-1 if there
24  // aren't that many lines in the file. Note that this will return the byte
25  // one past the end of the input if the last character is a newline.
26  //
27  // This is a helper function for error output so that the tokenizer's
28  // notion of lines can be used elsewhere.
29  static size_t ByteOffsetOfNthLine(const base::StringPiece& buf, int n);
30
31  // Returns true if the given offset of the string piece counts as a newline.
32  // The offset must be in the buffer.
33  static bool IsNewline(const base::StringPiece& buffer, size_t offset);
34
35  static bool IsIdentifierFirstChar(char c) {
36    return IsAsciiAlpha(c) || c == '_';
37  }
38
39  static bool IsIdentifierContinuingChar(char c) {
40    // Also allow digits after the first char.
41    return IsIdentifierFirstChar(c) || IsAsciiDigit(c);
42  }
43
44 private:
45  // InputFile must outlive the tokenizer and all generated tokens.
46  explicit Tokenizer(const InputFile* input_file, Err* err);
47  ~Tokenizer();
48
49  std::vector<Token> Run();
50
51  void AdvanceToNextToken();
52  Token::Type ClassifyCurrent() const;
53  void AdvanceToEndOfToken(const Location& location, Token::Type type);
54
55  bool IsCurrentWhitespace() const;
56  bool IsCurrentNewline() const;
57  bool IsCurrentStringTerminator(char quote_char) const;
58
59  bool CanIncrement() const { return cur_ < input_.size(); }
60
61  // Increments the current location by one.
62  void Advance();
63
64  // Returns the current character in the file as a location.
65  Location GetCurrentLocation() const;
66
67  Err GetErrorForInvalidToken(const Location& location) const;
68
69  bool done() const { return at_end() || has_error(); }
70
71  bool at_end() const { return cur_ == input_.size(); }
72  char cur_char() const { return input_[cur_]; }
73
74  bool has_error() const { return err_->has_error(); }
75
76  std::vector<Token> tokens_;
77
78  const InputFile* input_file_;
79  const base::StringPiece input_;
80  Err* err_;
81  size_t cur_;  // Byte offset into input buffer.
82
83  int line_number_;
84  int char_in_line_;
85
86  DISALLOW_COPY_AND_ASSIGN(Tokenizer);
87};
88
89#endif  // TOOLS_GN_TOKENIZER_H_
90