LiteralSupport.h revision 064c3f1ed782eb641df6bc00beec069b3c80fa8a
1//===--- LiteralSupport.h ---------------------------------------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the NumericLiteralParser, CharLiteralParser, and 11// StringLiteralParser interfaces. 12// 13//===----------------------------------------------------------------------===// 14 15#ifndef CLANG_LITERALSUPPORT_H 16#define CLANG_LITERALSUPPORT_H 17 18#include "clang/Basic/CharInfo.h" 19#include "clang/Basic/LLVM.h" 20#include "clang/Basic/TokenKinds.h" 21#include "llvm/ADT/APFloat.h" 22#include "llvm/ADT/SmallString.h" 23#include "llvm/ADT/StringRef.h" 24#include "llvm/Support/DataTypes.h" 25 26namespace clang { 27 28class DiagnosticsEngine; 29class Preprocessor; 30class Token; 31class SourceLocation; 32class TargetInfo; 33class SourceManager; 34class LangOptions; 35 36/// NumericLiteralParser - This performs strict semantic analysis of the content 37/// of a ppnumber, classifying it as either integer, floating, or erroneous, 38/// determines the radix of the value and can convert it to a useful value. 39class NumericLiteralParser { 40 Preprocessor &PP; // needed for diagnostics 41 42 const char *const ThisTokBegin; 43 const char *const ThisTokEnd; 44 const char *DigitsBegin, *SuffixBegin; // markers 45 const char *s; // cursor 46 47 unsigned radix; 48 49 bool saw_exponent, saw_period, saw_ud_suffix; 50 51public: 52 NumericLiteralParser(StringRef TokSpelling, 53 SourceLocation TokLoc, 54 Preprocessor &PP); 55 bool hadError; 56 bool isUnsigned; 57 bool isLong; // This is *not* set for long long. 58 bool isLongLong; 59 bool isFloat; // 1.0f 60 bool isImaginary; // 1.0i 61 bool isMicrosoftInteger; // Microsoft suffix extension i8, i16, i32, or i64. 62 63 bool isIntegerLiteral() const { 64 return !saw_period && !saw_exponent; 65 } 66 bool isFloatingLiteral() const { 67 return saw_period || saw_exponent; 68 } 69 70 bool hasUDSuffix() const { 71 return saw_ud_suffix; 72 } 73 StringRef getUDSuffix() const { 74 assert(saw_ud_suffix); 75 return StringRef(SuffixBegin, ThisTokEnd - SuffixBegin); 76 } 77 unsigned getUDSuffixOffset() const { 78 assert(saw_ud_suffix); 79 return SuffixBegin - ThisTokBegin; 80 } 81 82 static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix); 83 84 unsigned getRadix() const { return radix; } 85 86 /// GetIntegerValue - Convert this numeric literal value to an APInt that 87 /// matches Val's input width. If there is an overflow (i.e., if the unsigned 88 /// value read is larger than the APInt's bits will hold), set Val to the low 89 /// bits of the result and return true. Otherwise, return false. 90 bool GetIntegerValue(llvm::APInt &Val); 91 92 /// GetFloatValue - Convert this numeric literal to a floating value, using 93 /// the specified APFloat fltSemantics (specifying float, double, etc). 94 /// The optional bool isExact (passed-by-reference) has its value 95 /// set to true if the returned APFloat can represent the number in the 96 /// literal exactly, and false otherwise. 97 llvm::APFloat::opStatus GetFloatValue(llvm::APFloat &Result); 98 99private: 100 101 void ParseNumberStartingWithZero(SourceLocation TokLoc); 102 103 static bool isDigitSeparator(char C) { return C == '\''; } 104 105 enum CheckSeparatorKind { CSK_BeforeDigits, CSK_AfterDigits }; 106 107 /// \brief Ensure that we don't have a digit separator here. 108 void checkSeparator(SourceLocation TokLoc, const char *Pos, 109 CheckSeparatorKind IsAfterDigits); 110 111 /// SkipHexDigits - Read and skip over any hex digits, up to End. 112 /// Return a pointer to the first non-hex digit or End. 113 const char *SkipHexDigits(const char *ptr) { 114 while (ptr != ThisTokEnd && (isHexDigit(*ptr) || isDigitSeparator(*ptr))) 115 ptr++; 116 return ptr; 117 } 118 119 /// SkipOctalDigits - Read and skip over any octal digits, up to End. 120 /// Return a pointer to the first non-hex digit or End. 121 const char *SkipOctalDigits(const char *ptr) { 122 while (ptr != ThisTokEnd && 123 ((*ptr >= '0' && *ptr <= '7') || isDigitSeparator(*ptr))) 124 ptr++; 125 return ptr; 126 } 127 128 /// SkipDigits - Read and skip over any digits, up to End. 129 /// Return a pointer to the first non-hex digit or End. 130 const char *SkipDigits(const char *ptr) { 131 while (ptr != ThisTokEnd && (isDigit(*ptr) || isDigitSeparator(*ptr))) 132 ptr++; 133 return ptr; 134 } 135 136 /// SkipBinaryDigits - Read and skip over any binary digits, up to End. 137 /// Return a pointer to the first non-binary digit or End. 138 const char *SkipBinaryDigits(const char *ptr) { 139 while (ptr != ThisTokEnd && 140 (*ptr == '0' || *ptr == '1' || isDigitSeparator(*ptr))) 141 ptr++; 142 return ptr; 143 } 144 145}; 146 147/// CharLiteralParser - Perform interpretation and semantic analysis of a 148/// character literal. 149class CharLiteralParser { 150 uint64_t Value; 151 tok::TokenKind Kind; 152 bool IsMultiChar; 153 bool HadError; 154 SmallString<32> UDSuffixBuf; 155 unsigned UDSuffixOffset; 156public: 157 CharLiteralParser(const char *begin, const char *end, 158 SourceLocation Loc, Preprocessor &PP, 159 tok::TokenKind kind); 160 161 bool hadError() const { return HadError; } 162 bool isAscii() const { return Kind == tok::char_constant; } 163 bool isWide() const { return Kind == tok::wide_char_constant; } 164 bool isUTF16() const { return Kind == tok::utf16_char_constant; } 165 bool isUTF32() const { return Kind == tok::utf32_char_constant; } 166 bool isMultiChar() const { return IsMultiChar; } 167 uint64_t getValue() const { return Value; } 168 StringRef getUDSuffix() const { return UDSuffixBuf; } 169 unsigned getUDSuffixOffset() const { 170 assert(!UDSuffixBuf.empty() && "no ud-suffix"); 171 return UDSuffixOffset; 172 } 173}; 174 175/// StringLiteralParser - This decodes string escape characters and performs 176/// wide string analysis and Translation Phase #6 (concatenation of string 177/// literals) (C99 5.1.1.2p1). 178class StringLiteralParser { 179 const SourceManager &SM; 180 const LangOptions &Features; 181 const TargetInfo &Target; 182 DiagnosticsEngine *Diags; 183 184 unsigned MaxTokenLength; 185 unsigned SizeBound; 186 unsigned CharByteWidth; 187 tok::TokenKind Kind; 188 SmallString<512> ResultBuf; 189 char *ResultPtr; // cursor 190 SmallString<32> UDSuffixBuf; 191 unsigned UDSuffixToken; 192 unsigned UDSuffixOffset; 193public: 194 StringLiteralParser(const Token *StringToks, unsigned NumStringToks, 195 Preprocessor &PP, bool Complain = true); 196 StringLiteralParser(const Token *StringToks, unsigned NumStringToks, 197 const SourceManager &sm, const LangOptions &features, 198 const TargetInfo &target, DiagnosticsEngine *diags = 0) 199 : SM(sm), Features(features), Target(target), Diags(diags), 200 MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown), 201 ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) { 202 init(StringToks, NumStringToks); 203 } 204 205 206 bool hadError; 207 bool Pascal; 208 209 StringRef GetString() const { 210 return StringRef(ResultBuf.data(), GetStringLength()); 211 } 212 unsigned GetStringLength() const { return ResultPtr-ResultBuf.data(); } 213 214 unsigned GetNumStringChars() const { 215 return GetStringLength() / CharByteWidth; 216 } 217 /// getOffsetOfStringByte - This function returns the offset of the 218 /// specified byte of the string data represented by Token. This handles 219 /// advancing over escape sequences in the string. 220 /// 221 /// If the Diagnostics pointer is non-null, then this will do semantic 222 /// checking of the string literal and emit errors and warnings. 223 unsigned getOffsetOfStringByte(const Token &TheTok, unsigned ByteNo) const; 224 225 bool isAscii() const { return Kind == tok::string_literal; } 226 bool isWide() const { return Kind == tok::wide_string_literal; } 227 bool isUTF8() const { return Kind == tok::utf8_string_literal; } 228 bool isUTF16() const { return Kind == tok::utf16_string_literal; } 229 bool isUTF32() const { return Kind == tok::utf32_string_literal; } 230 bool isPascal() const { return Pascal; } 231 232 StringRef getUDSuffix() const { return UDSuffixBuf; } 233 234 /// Get the index of a token containing a ud-suffix. 235 unsigned getUDSuffixToken() const { 236 assert(!UDSuffixBuf.empty() && "no ud-suffix"); 237 return UDSuffixToken; 238 } 239 /// Get the spelling offset of the first byte of the ud-suffix. 240 unsigned getUDSuffixOffset() const { 241 assert(!UDSuffixBuf.empty() && "no ud-suffix"); 242 return UDSuffixOffset; 243 } 244 245private: 246 void init(const Token *StringToks, unsigned NumStringToks); 247 bool CopyStringFragment(const Token &Tok, const char *TokBegin, 248 StringRef Fragment); 249 void DiagnoseLexingError(SourceLocation Loc); 250}; 251 252} // end namespace clang 253 254#endif 255