UnwrappedLineParser.h revision 526ed11ad9743c773df76bd1649d33fb92c2b8cb
1//===--- UnwrappedLineParser.h - Format C++ code ----------------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9/// 10/// \file 11/// \brief This file contains the declaration of the UnwrappedLineParser, 12/// which turns a stream of tokens into UnwrappedLines. 13/// 14/// This is EXPERIMENTAL code under heavy development. It is not in a state yet, 15/// where it can be used to format real code. 16/// 17//===----------------------------------------------------------------------===// 18 19#ifndef LLVM_CLANG_FORMAT_UNWRAPPED_LINE_PARSER_H 20#define LLVM_CLANG_FORMAT_UNWRAPPED_LINE_PARSER_H 21 22#include "clang/Basic/IdentifierTable.h" 23#include "clang/Basic/SourceManager.h" 24#include "clang/Format/Format.h" 25#include "clang/Lex/Lexer.h" 26 27#include <vector> 28 29namespace clang { 30namespace format { 31 32/// \brief A wrapper around a \c Token storing information about the 33/// whitespace characters preceeding it. 34struct FormatToken { 35 FormatToken() 36 : NewlinesBefore(0), HasUnescapedNewline(false), WhiteSpaceLength(0), 37 TokenLength(0), IsFirst(false), MustBreakBefore(false) { 38 } 39 40 /// \brief The \c Token. 41 Token Tok; 42 43 /// \brief The number of newlines immediately before the \c Token. 44 /// 45 /// This can be used to determine what the user wrote in the original code 46 /// and thereby e.g. leave an empty line between two function definitions. 47 unsigned NewlinesBefore; 48 49 /// \brief Whether there is at least one unescaped newline before the \c 50 /// Token. 51 bool HasUnescapedNewline; 52 53 /// \brief The location of the start of the whitespace immediately preceeding 54 /// the \c Token. 55 /// 56 /// Used together with \c WhiteSpaceLength to create a \c Replacement. 57 SourceLocation WhiteSpaceStart; 58 59 /// \brief The length in characters of the whitespace immediately preceeding 60 /// the \c Token. 61 unsigned WhiteSpaceLength; 62 63 /// \brief The length of the non-whitespace parts of the token. This is 64 /// necessary because we need to handle escaped newlines that are stored 65 /// with the token. 66 unsigned TokenLength; 67 68 /// \brief Indicates that this is the first token. 69 bool IsFirst; 70 71 /// \brief Whether there must be a line break before this token. 72 /// 73 /// This happens for example when a preprocessor directive ended directly 74 /// before the token. 75 bool MustBreakBefore; 76 77 // FIXME: We currently assume that there is exactly one token in this vector 78 // except for the very last token that does not have any children. 79 /// \brief All tokens that logically follow this token. 80 std::vector<FormatToken> Children; 81}; 82 83/// \brief An unwrapped line is a sequence of \c Token, that we would like to 84/// put on a single line if there was no column limit. 85/// 86/// This is used as a main interface between the \c UnwrappedLineParser and the 87/// \c UnwrappedLineFormatter. The key property is that changing the formatting 88/// within an unwrapped line does not affect any other unwrapped lines. 89struct UnwrappedLine { 90 UnwrappedLine() : Level(0), InPPDirective(false) { 91 } 92 93 /// \brief The \c Token comprising this \c UnwrappedLine. 94 FormatToken RootToken; 95 96 /// \brief The indent level of the \c UnwrappedLine. 97 unsigned Level; 98 99 /// \brief Whether this \c UnwrappedLine is part of a preprocessor directive. 100 bool InPPDirective; 101}; 102 103class UnwrappedLineConsumer { 104public: 105 virtual ~UnwrappedLineConsumer() { 106 } 107 virtual void consumeUnwrappedLine(const UnwrappedLine &Line) = 0; 108}; 109 110class FormatTokenSource { 111public: 112 virtual ~FormatTokenSource() { 113 } 114 virtual FormatToken getNextToken() = 0; 115}; 116 117class UnwrappedLineParser { 118public: 119 UnwrappedLineParser(const FormatStyle &Style, FormatTokenSource &Tokens, 120 UnwrappedLineConsumer &Callback); 121 122 /// Returns true in case of a structural error. 123 bool parse(); 124 125private: 126 bool parseFile(); 127 bool parseLevel(bool HasOpeningBrace); 128 bool parseBlock(unsigned AddLevels = 1); 129 void parsePPDirective(); 130 void parsePPDefine(); 131 void parsePPUnknown(); 132 void parseComments(); 133 void parseStructuralElement(); 134 void parseParens(); 135 void parseIfThenElse(); 136 void parseForOrWhileLoop(); 137 void parseDoWhile(); 138 void parseLabel(); 139 void parseCaseLabel(); 140 void parseSwitch(); 141 void parseNamespace(); 142 void parseAccessSpecifier(); 143 void parseEnum(); 144 void parseStructOrClass(); 145 void addUnwrappedLine(); 146 bool eof() const; 147 void nextToken(); 148 void readToken(); 149 150 // FIXME: We are constantly running into bugs where Line.Level is incorrectly 151 // subtracted from beyond 0. Introduce a method to subtract from Line.Level 152 // and use that everywhere in the Parser. 153 llvm::OwningPtr<UnwrappedLine> Line; 154 bool RootTokenInitialized; 155 FormatToken *LastInCurrentLine; 156 FormatToken FormatTok; 157 bool MustBreakBeforeNextToken; 158 159 const FormatStyle &Style; 160 FormatTokenSource *Tokens; 161 UnwrappedLineConsumer &Callback; 162}; 163 164} // end namespace format 165} // end namespace clang 166 167#endif // LLVM_CLANG_FORMAT_UNWRAPPED_LINE_PARSER_H 168