LiteralSupport.h revision 859b6227694033dd6eaf3991a2b80877a406c382
1//===--- LiteralSupport.h ---------------------------------------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the NumericLiteralParser, CharLiteralParser, and 11// StringLiteralParser interfaces. 12// 13//===----------------------------------------------------------------------===// 14 15#ifndef CLANG_LITERALSUPPORT_H 16#define CLANG_LITERALSUPPORT_H 17 18#include "clang/Basic/CharInfo.h" 19#include "clang/Basic/LLVM.h" 20#include "clang/Basic/TokenKinds.h" 21#include "llvm/ADT/APFloat.h" 22#include "llvm/ADT/SmallString.h" 23#include "llvm/ADT/StringRef.h" 24#include "llvm/Support/DataTypes.h" 25 26namespace clang { 27 28class DiagnosticsEngine; 29class Preprocessor; 30class Token; 31class SourceLocation; 32class TargetInfo; 33class SourceManager; 34class LangOptions; 35 36/// NumericLiteralParser - This performs strict semantic analysis of the content 37/// of a ppnumber, classifying it as either integer, floating, or erroneous, 38/// determines the radix of the value and can convert it to a useful value. 39class NumericLiteralParser { 40 Preprocessor &PP; // needed for diagnostics 41 42 const char *const ThisTokBegin; 43 const char *const ThisTokEnd; 44 const char *DigitsBegin, *SuffixBegin; // markers 45 const char *s; // cursor 46 47 unsigned radix; 48 49 bool saw_exponent, saw_period, saw_ud_suffix; 50 51public: 52 NumericLiteralParser(StringRef TokSpelling, 53 SourceLocation TokLoc, 54 Preprocessor &PP); 55 bool hadError; 56 bool isUnsigned; 57 bool isLong; // This is *not* set for long long. 58 bool isLongLong; 59 bool isFloat; // 1.0f 60 bool isImaginary; // 1.0i 61 bool isMicrosoftInteger; // Microsoft suffix extension i8, i16, i32, or i64. 62 63 bool isIntegerLiteral() const { 64 return !saw_period && !saw_exponent; 65 } 66 bool isFloatingLiteral() const { 67 return saw_period || saw_exponent; 68 } 69 70 bool hasUDSuffix() const { 71 return saw_ud_suffix; 72 } 73 StringRef getUDSuffix() const { 74 assert(saw_ud_suffix); 75 return StringRef(SuffixBegin, ThisTokEnd - SuffixBegin); 76 } 77 unsigned getUDSuffixOffset() const { 78 assert(saw_ud_suffix); 79 return SuffixBegin - ThisTokBegin; 80 } 81 82 static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix); 83 84 unsigned getRadix() const { return radix; } 85 86 /// GetIntegerValue - Convert this numeric literal value to an APInt that 87 /// matches Val's input width. If there is an overflow (i.e., if the unsigned 88 /// value read is larger than the APInt's bits will hold), set Val to the low 89 /// bits of the result and return true. Otherwise, return false. 90 bool GetIntegerValue(llvm::APInt &Val); 91 92 /// GetFloatValue - Convert this numeric literal to a floating value, using 93 /// the specified APFloat fltSemantics (specifying float, double, etc). 94 /// The optional bool isExact (passed-by-reference) has its value 95 /// set to true if the returned APFloat can represent the number in the 96 /// literal exactly, and false otherwise. 97 llvm::APFloat::opStatus GetFloatValue(llvm::APFloat &Result); 98 99private: 100 101 void ParseNumberStartingWithZero(SourceLocation TokLoc); 102 103 static bool isDigitSeparator(char C) { return C == '\''; } 104 105 /// \brief Ensure that we don't have a digit separator here. 106 void checkSeparator(SourceLocation TokLoc, const char *Pos, 107 bool IsAfterDigits); 108 109 /// SkipHexDigits - Read and skip over any hex digits, up to End. 110 /// Return a pointer to the first non-hex digit or End. 111 const char *SkipHexDigits(const char *ptr) { 112 while (ptr != ThisTokEnd && (isHexDigit(*ptr) || isDigitSeparator(*ptr))) 113 ptr++; 114 return ptr; 115 } 116 117 /// SkipOctalDigits - Read and skip over any octal digits, up to End. 118 /// Return a pointer to the first non-hex digit or End. 119 const char *SkipOctalDigits(const char *ptr) { 120 while (ptr != ThisTokEnd && 121 ((*ptr >= '0' && *ptr <= '7') || isDigitSeparator(*ptr))) 122 ptr++; 123 return ptr; 124 } 125 126 /// SkipDigits - Read and skip over any digits, up to End. 127 /// Return a pointer to the first non-hex digit or End. 128 const char *SkipDigits(const char *ptr) { 129 while (ptr != ThisTokEnd && (isDigit(*ptr) || isDigitSeparator(*ptr))) 130 ptr++; 131 return ptr; 132 } 133 134 /// SkipBinaryDigits - Read and skip over any binary digits, up to End. 135 /// Return a pointer to the first non-binary digit or End. 136 const char *SkipBinaryDigits(const char *ptr) { 137 while (ptr != ThisTokEnd && 138 (*ptr == '0' || *ptr == '1' || isDigitSeparator(*ptr))) 139 ptr++; 140 return ptr; 141 } 142 143}; 144 145/// CharLiteralParser - Perform interpretation and semantic analysis of a 146/// character literal. 147class CharLiteralParser { 148 uint64_t Value; 149 tok::TokenKind Kind; 150 bool IsMultiChar; 151 bool HadError; 152 SmallString<32> UDSuffixBuf; 153 unsigned UDSuffixOffset; 154public: 155 CharLiteralParser(const char *begin, const char *end, 156 SourceLocation Loc, Preprocessor &PP, 157 tok::TokenKind kind); 158 159 bool hadError() const { return HadError; } 160 bool isAscii() const { return Kind == tok::char_constant; } 161 bool isWide() const { return Kind == tok::wide_char_constant; } 162 bool isUTF16() const { return Kind == tok::utf16_char_constant; } 163 bool isUTF32() const { return Kind == tok::utf32_char_constant; } 164 bool isMultiChar() const { return IsMultiChar; } 165 uint64_t getValue() const { return Value; } 166 StringRef getUDSuffix() const { return UDSuffixBuf; } 167 unsigned getUDSuffixOffset() const { 168 assert(!UDSuffixBuf.empty() && "no ud-suffix"); 169 return UDSuffixOffset; 170 } 171}; 172 173/// StringLiteralParser - This decodes string escape characters and performs 174/// wide string analysis and Translation Phase #6 (concatenation of string 175/// literals) (C99 5.1.1.2p1). 176class StringLiteralParser { 177 const SourceManager &SM; 178 const LangOptions &Features; 179 const TargetInfo &Target; 180 DiagnosticsEngine *Diags; 181 182 unsigned MaxTokenLength; 183 unsigned SizeBound; 184 unsigned CharByteWidth; 185 tok::TokenKind Kind; 186 SmallString<512> ResultBuf; 187 char *ResultPtr; // cursor 188 SmallString<32> UDSuffixBuf; 189 unsigned UDSuffixToken; 190 unsigned UDSuffixOffset; 191public: 192 StringLiteralParser(const Token *StringToks, unsigned NumStringToks, 193 Preprocessor &PP, bool Complain = true); 194 StringLiteralParser(const Token *StringToks, unsigned NumStringToks, 195 const SourceManager &sm, const LangOptions &features, 196 const TargetInfo &target, DiagnosticsEngine *diags = 0) 197 : SM(sm), Features(features), Target(target), Diags(diags), 198 MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown), 199 ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) { 200 init(StringToks, NumStringToks); 201 } 202 203 204 bool hadError; 205 bool Pascal; 206 207 StringRef GetString() const { 208 return StringRef(ResultBuf.data(), GetStringLength()); 209 } 210 unsigned GetStringLength() const { return ResultPtr-ResultBuf.data(); } 211 212 unsigned GetNumStringChars() const { 213 return GetStringLength() / CharByteWidth; 214 } 215 /// getOffsetOfStringByte - This function returns the offset of the 216 /// specified byte of the string data represented by Token. This handles 217 /// advancing over escape sequences in the string. 218 /// 219 /// If the Diagnostics pointer is non-null, then this will do semantic 220 /// checking of the string literal and emit errors and warnings. 221 unsigned getOffsetOfStringByte(const Token &TheTok, unsigned ByteNo) const; 222 223 bool isAscii() const { return Kind == tok::string_literal; } 224 bool isWide() const { return Kind == tok::wide_string_literal; } 225 bool isUTF8() const { return Kind == tok::utf8_string_literal; } 226 bool isUTF16() const { return Kind == tok::utf16_string_literal; } 227 bool isUTF32() const { return Kind == tok::utf32_string_literal; } 228 bool isPascal() const { return Pascal; } 229 230 StringRef getUDSuffix() const { return UDSuffixBuf; } 231 232 /// Get the index of a token containing a ud-suffix. 233 unsigned getUDSuffixToken() const { 234 assert(!UDSuffixBuf.empty() && "no ud-suffix"); 235 return UDSuffixToken; 236 } 237 /// Get the spelling offset of the first byte of the ud-suffix. 238 unsigned getUDSuffixOffset() const { 239 assert(!UDSuffixBuf.empty() && "no ud-suffix"); 240 return UDSuffixOffset; 241 } 242 243private: 244 void init(const Token *StringToks, unsigned NumStringToks); 245 bool CopyStringFragment(const Token &Tok, const char *TokBegin, 246 StringRef Fragment); 247 void DiagnoseLexingError(SourceLocation Loc); 248}; 249 250} // end namespace clang 251 252#endif 253