1d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch// Copyright (c) 2013 The Chromium Authors. All rights reserved. 2d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch// Use of this source code is governed by a BSD-style license that can be 3d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch// found in the LICENSE file. 4d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 5d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch#include "tools/gn/tokenizer.h" 6d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 7d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch#include "base/logging.h" 81320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci#include "base/strings/string_util.h" 9d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch#include "tools/gn/input_file.h" 10d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 11d3868032626d59662ff73b372b5d584c1d144c53Ben Murdochnamespace { 12d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 13d3868032626d59662ff73b372b5d584c1d144c53Ben Murdochbool CouldBeTwoCharOperatorBegin(char c) { 14d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch return c == '<' || c == '>' || c == '!' || c == '=' || c == '-' || 15d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch c == '+' || c == '|' || c == '&'; 16d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch} 17d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 18d3868032626d59662ff73b372b5d584c1d144c53Ben Murdochbool CouldBeTwoCharOperatorEnd(char c) { 19d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch return c == '=' || c == '|' || c == '&'; 20d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch} 21d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 22d3868032626d59662ff73b372b5d584c1d144c53Ben Murdochbool CouldBeOneCharOperator(char c) { 23d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch return c == '=' || c == '<' || c == '>' || c == '+' || c == '!' || 24d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch c == ':' || c == '|' || c == '&' || c == '-'; 25d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch} 26d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 27d3868032626d59662ff73b372b5d584c1d144c53Ben Murdochbool CouldBeOperator(char c) { 28d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch return CouldBeOneCharOperator(c) || CouldBeTwoCharOperatorBegin(c); 29d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch} 30d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 31d3868032626d59662ff73b372b5d584c1d144c53Ben Murdochbool IsScoperChar(char c) { 32d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch return c == '(' || c == ')' || c == '[' || c == ']' || c == '{' || c == '}'; 33d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch} 34d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 353551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)Token::Type GetSpecificOperatorType(base::StringPiece value) { 363551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) if (value == "=") 373551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) return Token::EQUAL; 383551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) if (value == "+") 393551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) return Token::PLUS; 403551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) if (value == "-") 413551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) return Token::MINUS; 423551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) if (value == "+=") 433551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) return Token::PLUS_EQUALS; 443551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) if (value == "-=") 453551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) return Token::MINUS_EQUALS; 463551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) if (value == "==") 473551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) return Token::EQUAL_EQUAL; 483551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) if (value == "!=") 493551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) return Token::NOT_EQUAL; 503551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) if (value == "<=") 513551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) return Token::LESS_EQUAL; 523551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) if (value == ">=") 533551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) return Token::GREATER_EQUAL; 543551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) if (value == "<") 553551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) return Token::LESS_THAN; 563551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) if (value == ">") 573551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) return Token::GREATER_THAN; 583551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) if (value == "&&") 593551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) return Token::BOOLEAN_AND; 603551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) if (value == "||") 613551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) return Token::BOOLEAN_OR; 623551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) if (value == "!") 633551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) return Token::BANG; 64effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch if (value == ".") 65effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch return Token::DOT; 663551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) return Token::INVALID; 673551c9c881056c480085172ff9840cab31610854Torne (Richard Coles)} 683551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) 69d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch} // namespace 70d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 71d3868032626d59662ff73b372b5d584c1d144c53Ben MurdochTokenizer::Tokenizer(const InputFile* input_file, Err* err) 72d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch : input_file_(input_file), 73d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch input_(input_file->contents()), 74d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch err_(err), 75d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch cur_(0), 76d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch line_number_(1), 77d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch char_in_line_(1) { 78d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch} 79d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 80d3868032626d59662ff73b372b5d584c1d144c53Ben MurdochTokenizer::~Tokenizer() { 81d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch} 82d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 83d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch// static 84d3868032626d59662ff73b372b5d584c1d144c53Ben Murdochstd::vector<Token> Tokenizer::Tokenize(const InputFile* input_file, Err* err) { 85d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch Tokenizer t(input_file, err); 86d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch return t.Run(); 87d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch} 88d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 89d3868032626d59662ff73b372b5d584c1d144c53Ben Murdochstd::vector<Token> Tokenizer::Run() { 903551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) DCHECK(tokens_.empty()); 91d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch while (!done()) { 92d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch AdvanceToNextToken(); 93d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch if (done()) 94d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch break; 95d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch Location location = GetCurrentLocation(); 96d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 97d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch Token::Type type = ClassifyCurrent(); 98d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch if (type == Token::INVALID) { 99d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch *err_ = GetErrorForInvalidToken(location); 100d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch break; 101d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch } 102d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch size_t token_begin = cur_; 103d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch AdvanceToEndOfToken(location, type); 104d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch if (has_error()) 105d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch break; 106d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch size_t token_end = cur_; 107d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 1083551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) base::StringPiece token_value(&input_.data()[token_begin], 1093551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) token_end - token_begin); 1103551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) 1111320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (type == Token::UNCLASSIFIED_OPERATOR) { 1123551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) type = GetSpecificOperatorType(token_value); 1131320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } else if (type == Token::IDENTIFIER) { 1143551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) if (token_value == "if") 1153551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) type = Token::IF; 1163551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) else if (token_value == "else") 1173551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) type = Token::ELSE; 1183551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) else if (token_value == "true") 1193551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) type = Token::TRUE_TOKEN; 1203551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) else if (token_value == "false") 1213551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) type = Token::FALSE_TOKEN; 1221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } else if (type == Token::UNCLASSIFIED_COMMENT) { 1231320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (AtStartOfLine(token_begin) && 1241320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci // If it's a standalone comment, but is a continuation of a comment on 1251320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci // a previous line, then instead make it a continued suffix comment. 1261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci (tokens_.empty() || tokens_.back().type() != Token::SUFFIX_COMMENT || 1271320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci tokens_.back().location().line_number() + 1 != 1281320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci location.line_number() || 1291320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci tokens_.back().location().char_offset() != location.char_offset())) { 1301320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci type = Token::LINE_COMMENT; 1311320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci Advance(); // The current \n. 1321320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci // If this comment is separated from the next syntax element, then we 1331320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci // want to tag it as a block comment. This will become a standalone 1341320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci // statement at the parser level to keep this comment separate, rather 1351320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci // than attached to the subsequent statement. 1361320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci while (!at_end() && IsCurrentWhitespace()) { 1371320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (IsCurrentNewline()) { 1381320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci type = Token::BLOCK_COMMENT; 1391320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci break; 1401320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 1411320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci Advance(); 1421320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 1431320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } else { 1441320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci type = Token::SUFFIX_COMMENT; 1451320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 1463551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) } 1473551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) 1481320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci tokens_.push_back(Token(location, type, token_value)); 149d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch } 150d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch if (err_->has_error()) 1513551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) tokens_.clear(); 1523551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) return tokens_; 153d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch} 154d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 155d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch// static 156d3868032626d59662ff73b372b5d584c1d144c53Ben Murdochsize_t Tokenizer::ByteOffsetOfNthLine(const base::StringPiece& buf, int n) { 157116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch DCHECK_GT(n, 0); 158d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 159d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch if (n == 1) 160d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch return 0; 161d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 162116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch int cur_line = 1; 163116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch size_t cur_byte = 0; 164d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch while (cur_byte < buf.size()) { 165d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch if (IsNewline(buf, cur_byte)) { 166d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch cur_line++; 167d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch if (cur_line == n) 168d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch return cur_byte + 1; 169d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch } 170d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch cur_byte++; 171d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch } 172116680a4aac90f2aa7413d9095a592090648e557Ben Murdoch return static_cast<size_t>(-1); 173d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch} 174d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 175d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch// static 176d3868032626d59662ff73b372b5d584c1d144c53Ben Murdochbool Tokenizer::IsNewline(const base::StringPiece& buffer, size_t offset) { 177d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch DCHECK(offset < buffer.size()); 178d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // We may need more logic here to handle different line ending styles. 179d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch return buffer[offset] == '\n'; 180d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch} 181d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 182d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 183d3868032626d59662ff73b372b5d584c1d144c53Ben Murdochvoid Tokenizer::AdvanceToNextToken() { 184d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch while (!at_end() && IsCurrentWhitespace()) 185d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch Advance(); 186d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch} 187d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 188d3868032626d59662ff73b372b5d584c1d144c53Ben MurdochToken::Type Tokenizer::ClassifyCurrent() const { 189d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch DCHECK(!at_end()); 190d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch char next_char = cur_char(); 191cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) if (IsAsciiDigit(next_char)) 192d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch return Token::INTEGER; 193d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch if (next_char == '"') 194d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch return Token::STRING; 195d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 196d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // Note: '-' handled specially below. 197d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch if (next_char != '-' && CouldBeOperator(next_char)) 1983551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) return Token::UNCLASSIFIED_OPERATOR; 199d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 200d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch if (IsIdentifierFirstChar(next_char)) 201d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch return Token::IDENTIFIER; 202d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 2033551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) if (next_char == '[') 2043551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) return Token::LEFT_BRACKET; 2053551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) if (next_char == ']') 2063551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) return Token::RIGHT_BRACKET; 2073551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) if (next_char == '(') 2083551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) return Token::LEFT_PAREN; 2093551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) if (next_char == ')') 2103551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) return Token::RIGHT_PAREN; 2113551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) if (next_char == '{') 2123551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) return Token::LEFT_BRACE; 2133551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) if (next_char == '}') 2143551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) return Token::RIGHT_BRACE; 2153551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) 216effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch if (next_char == '.') 217effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch return Token::DOT; 2183551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) if (next_char == ',') 2193551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) return Token::COMMA; 220d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 221d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch if (next_char == '#') 2221320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return Token::UNCLASSIFIED_COMMENT; 223d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 224d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // For the case of '-' differentiate between a negative number and anything 225d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // else. 226d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch if (next_char == '-') { 227d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch if (!CanIncrement()) 2283551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) return Token::UNCLASSIFIED_OPERATOR; // Just the minus before end of 2293551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) // file. 230d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch char following_char = input_[cur_ + 1]; 231cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) if (IsAsciiDigit(following_char)) 232d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch return Token::INTEGER; 2333551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) return Token::UNCLASSIFIED_OPERATOR; 234d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch } 235d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 236d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch return Token::INVALID; 237d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch} 238d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 239d3868032626d59662ff73b372b5d584c1d144c53Ben Murdochvoid Tokenizer::AdvanceToEndOfToken(const Location& location, 240d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch Token::Type type) { 241d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch switch (type) { 242d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch case Token::INTEGER: 243d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch do { 244d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch Advance(); 245cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) } while (!at_end() && IsAsciiDigit(cur_char())); 246d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch if (!at_end()) { 247d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // Require the char after a number to be some kind of space, scope, 248d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // or operator. 249d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch char c = cur_char(); 250d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch if (!IsCurrentWhitespace() && !CouldBeOperator(c) && 2513551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) !IsScoperChar(c) && c != ',') { 252d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch *err_ = Err(GetCurrentLocation(), 253cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) "This is not a valid number.", 254cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) "Learn to count."); 255d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // Highlight the number. 256d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch err_->AppendRange(LocationRange(location, GetCurrentLocation())); 257d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch } 258d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch } 259d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch break; 260d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 261d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch case Token::STRING: { 262d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch char initial = cur_char(); 263d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch Advance(); // Advance past initial " 264d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch for (;;) { 265d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch if (at_end()) { 266cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) *err_ = Err(LocationRange(location, GetCurrentLocation()), 267cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) "Unterminated string literal.", 268cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) "Don't leave me hanging like this!"); 269d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch break; 270d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch } 271d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch if (IsCurrentStringTerminator(initial)) { 272d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch Advance(); // Skip past last " 273d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch break; 274d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch } else if (cur_char() == '\n') { 275cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) *err_ = Err(LocationRange(location, GetCurrentLocation()), 276cedac228d2dd51db4b79ea1e72c7f249408ee061Torne (Richard Coles) "Newline in string constant."); 277d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch } 278d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch Advance(); 279d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch } 280d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch break; 281d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch } 282d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 2833551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) case Token::UNCLASSIFIED_OPERATOR: 284d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // Some operators are two characters, some are one. 285d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch if (CouldBeTwoCharOperatorBegin(cur_char())) { 286d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch if (CanIncrement() && CouldBeTwoCharOperatorEnd(input_[cur_ + 1])) 287d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch Advance(); 288d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch } 289d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch Advance(); 290d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch break; 291d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 292d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch case Token::IDENTIFIER: 293d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch while (!at_end() && IsIdentifierContinuingChar(cur_char())) 294d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch Advance(); 295d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch break; 296d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 2973551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) case Token::LEFT_BRACKET: 2983551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) case Token::RIGHT_BRACKET: 2993551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) case Token::LEFT_BRACE: 3003551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) case Token::RIGHT_BRACE: 3013551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) case Token::LEFT_PAREN: 3023551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) case Token::RIGHT_PAREN: 303effb81e5f8246d0db0270817048dc992db66e9fbBen Murdoch case Token::DOT: 3043551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) case Token::COMMA: 305d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch Advance(); // All are one char. 306d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch break; 307d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 3081320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci case Token::UNCLASSIFIED_COMMENT: 309d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // Eat to EOL. 310d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch while (!at_end() && !IsCurrentNewline()) 311d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch Advance(); 312d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch break; 313d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 314d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch case Token::INVALID: 3153551c9c881056c480085172ff9840cab31610854Torne (Richard Coles) default: 316d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch *err_ = Err(location, "Everything is all messed up", 317d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch "Please insert system disk in drive A: and press any key."); 318d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch NOTREACHED(); 319d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch return; 320d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch } 321d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch} 322d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 3231320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tuccibool Tokenizer::AtStartOfLine(size_t location) const { 3241320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci while (location > 0) { 3251320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci --location; 3261320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci char c = input_[location]; 3271320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (c == '\n') 3281320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return true; 3291320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci if (c != ' ') 3301320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return false; 3311320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci } 3321320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return true; 3331320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci} 3341320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci 335d3868032626d59662ff73b372b5d584c1d144c53Ben Murdochbool Tokenizer::IsCurrentWhitespace() const { 336d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch DCHECK(!at_end()); 337d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch char c = input_[cur_]; 3381320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci // Note that tab (0x09), vertical tab (0x0B), and formfeed (0x0C) are illegal. 3391320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return c == 0x0A || c == 0x0D || c == 0x20; 340d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch} 341d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 342d3868032626d59662ff73b372b5d584c1d144c53Ben Murdochbool Tokenizer::IsCurrentStringTerminator(char quote_char) const { 343d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch DCHECK(!at_end()); 344d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch if (cur_char() != quote_char) 345d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch return false; 346d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 347d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // Check for escaping. \" is not a string terminator, but \\" is. Count 348d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // the number of preceeding backslashes. 349d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch int num_backslashes = 0; 350d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch for (int i = static_cast<int>(cur_) - 1; i >= 0 && input_[i] == '\\'; i--) 351d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch num_backslashes++; 352d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 353d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // Even backslashes mean that they were escaping each other and don't count 354d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // as escaping this quote. 355d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch return (num_backslashes % 2) == 0; 356d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch} 357d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 358d3868032626d59662ff73b372b5d584c1d144c53Ben Murdochbool Tokenizer::IsCurrentNewline() const { 359d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch return IsNewline(input_, cur_); 360d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch} 361d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 362d3868032626d59662ff73b372b5d584c1d144c53Ben Murdochvoid Tokenizer::Advance() { 363d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch DCHECK(cur_ < input_.size()); 364d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch if (IsCurrentNewline()) { 365d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch line_number_++; 366d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch char_in_line_ = 1; 367d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch } else { 368d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch char_in_line_++; 369d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch } 370d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch cur_++; 371d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch} 372d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 373d3868032626d59662ff73b372b5d584c1d144c53Ben MurdochLocation Tokenizer::GetCurrentLocation() const { 3741320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci return Location( 3751320f92c476a1ad9d19dba2a48c72b75566198e9Primiano Tucci input_file_, line_number_, char_in_line_, static_cast<int>(cur_)); 376d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch} 377d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 378d3868032626d59662ff73b372b5d584c1d144c53Ben MurdochErr Tokenizer::GetErrorForInvalidToken(const Location& location) const { 379d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch std::string help; 380d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch if (cur_char() == ';') { 381d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // Semicolon. 382d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch help = "Semicolons are not needed, delete this one."; 383d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch } else if (cur_char() == '\t') { 384d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // Tab. 385d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch help = "You got a tab character in here. Tabs are evil. " 386d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch "Convert to spaces."; 387d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch } else if (cur_char() == '/' && cur_ + 1 < input_.size() && 388d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch (input_[cur_ + 1] == '/' || input_[cur_ + 1] == '*')) { 389d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch // Different types of comments. 390d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch help = "Comments should start with # instead"; 391d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch } else { 392d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch help = "I have no idea what this is."; 393d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch } 394d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch 395d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch return Err(location, "Invalid token.", help); 396d3868032626d59662ff73b372b5d584c1d144c53Ben Murdoch} 397