15f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//===--- LiteralSupport.h ---------------------------------------*- C++ -*-===//
25f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//
35f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//                     The LLVM Compiler Infrastructure
45f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//
50bc735ffcfb223c0186419547abaa5c84482663eChris Lattner// This file is distributed under the University of Illinois Open Source
60bc735ffcfb223c0186419547abaa5c84482663eChris Lattner// License. See LICENSE.TXT for details.
75f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//
85f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//===----------------------------------------------------------------------===//
95f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//
105f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// This file defines the NumericLiteralParser, CharLiteralParser, and
115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// StringLiteralParser interfaces.
125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//
135f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//===----------------------------------------------------------------------===//
145f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
15176edba5311f6eff0cad2631449885ddf4fbc9eaStephen Hines#ifndef LLVM_CLANG_LEX_LITERALSUPPORT_H
16176edba5311f6eff0cad2631449885ddf4fbc9eaStephen Hines#define LLVM_CLANG_LEX_LITERALSUPPORT_H
175f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
183f6f51e28231f65de9c2dd150a2d757b2162cfa3Jordan Rose#include "clang/Basic/CharInfo.h"
19686775deca8b8685eb90801495880e3abdd844c2Chris Lattner#include "clang/Basic/LLVM.h"
2030a2e16f6c27f888dd11eba6bbbae1e980078fcbChandler Carruth#include "clang/Basic/TokenKinds.h"
2194c939dc1d4958b62ea5a89294dd8b2905f3191fJohn McCall#include "llvm/ADT/APFloat.h"
224967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar#include "llvm/ADT/ArrayRef.h"
235f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer#include "llvm/ADT/SmallString.h"
24fc97ea29b1afd9e87341bce2b0cbb0c7172b7dd8Dmitri Gribenko#include "llvm/ADT/StringRef.h"
2503013fa9a0bf1ef4b907f5fec006c8f4000fdd21Michael J. Spencer#include "llvm/Support/DataTypes.h"
265f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
275f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencernamespace clang {
285f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
29d6471f7c1921c7802804ce3ff6fe9768310f72b9David Blaikieclass DiagnosticsEngine;
305f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerclass Preprocessor;
31d217773f106856a11879ec79dc468efefaf2ee75Chris Lattnerclass Token;
325f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerclass SourceLocation;
335f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerclass TargetInfo;
34ca1475ea0e76da6b852796610139ed9b49c8d4a6Chris Lattnerclass SourceManager;
35ca1475ea0e76da6b852796610139ed9b49c8d4a6Chris Lattnerclass LangOptions;
361eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
37651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines/// Copy characters from Input to Buf, expanding any UCNs.
38651f13cea278ec967336033dd032faef0e9fc2ecStephen Hinesvoid expandUCNs(SmallVectorImpl<char> &Buf, StringRef Input);
39651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines
405f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// NumericLiteralParser - This performs strict semantic analysis of the content
415f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// of a ppnumber, classifying it as either integer, floating, or erroneous,
425f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// determines the radix of the value and can convert it to a useful value.
435f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerclass NumericLiteralParser {
445f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  Preprocessor &PP; // needed for diagnostics
451eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
465f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  const char *const ThisTokBegin;
475f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  const char *const ThisTokEnd;
485f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  const char *DigitsBegin, *SuffixBegin; // markers
495f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  const char *s; // cursor
501eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
515f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  unsigned radix;
521eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
53b453ad3214d00acc51c9aa702c76c58354d84b84Richard Smith  bool saw_exponent, saw_period, saw_ud_suffix;
541eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
55651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines  SmallString<32> UDSuffixBuf;
56651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines
575f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerpublic:
58fc97ea29b1afd9e87341bce2b0cbb0c7172b7dd8Dmitri Gribenko  NumericLiteralParser(StringRef TokSpelling,
59fc97ea29b1afd9e87341bce2b0cbb0c7172b7dd8Dmitri Gribenko                       SourceLocation TokLoc,
60fc97ea29b1afd9e87341bce2b0cbb0c7172b7dd8Dmitri Gribenko                       Preprocessor &PP);
6158878f85ab89b13e9eea4af3ccf055e42c557bc8Pirama Arumuga Nainar  bool hadError : 1;
6258878f85ab89b13e9eea4af3ccf055e42c557bc8Pirama Arumuga Nainar  bool isUnsigned : 1;
6358878f85ab89b13e9eea4af3ccf055e42c557bc8Pirama Arumuga Nainar  bool isLong : 1;          // This is *not* set for long long.
6458878f85ab89b13e9eea4af3ccf055e42c557bc8Pirama Arumuga Nainar  bool isLongLong : 1;
654967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar  bool isHalf : 1;          // 1.0h
6658878f85ab89b13e9eea4af3ccf055e42c557bc8Pirama Arumuga Nainar  bool isFloat : 1;         // 1.0f
6758878f85ab89b13e9eea4af3ccf055e42c557bc8Pirama Arumuga Nainar  bool isImaginary : 1;     // 1.0i
684967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar  bool isFloat128 : 1;      // 1.0q
6958878f85ab89b13e9eea4af3ccf055e42c557bc8Pirama Arumuga Nainar  uint8_t MicrosoftInteger; // Microsoft suffix extension i8, i16, i32, or i64.
701eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
711eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump  bool isIntegerLiteral() const {
72506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    return !saw_period && !saw_exponent;
735f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  }
745f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  bool isFloatingLiteral() const {
75506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    return saw_period || saw_exponent;
765f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  }
77b453ad3214d00acc51c9aa702c76c58354d84b84Richard Smith
78b453ad3214d00acc51c9aa702c76c58354d84b84Richard Smith  bool hasUDSuffix() const {
79b453ad3214d00acc51c9aa702c76c58354d84b84Richard Smith    return saw_ud_suffix;
80b453ad3214d00acc51c9aa702c76c58354d84b84Richard Smith  }
81b453ad3214d00acc51c9aa702c76c58354d84b84Richard Smith  StringRef getUDSuffix() const {
82b453ad3214d00acc51c9aa702c76c58354d84b84Richard Smith    assert(saw_ud_suffix);
83651f13cea278ec967336033dd032faef0e9fc2ecStephen Hines    return UDSuffixBuf;
84b453ad3214d00acc51c9aa702c76c58354d84b84Richard Smith  }
85b453ad3214d00acc51c9aa702c76c58354d84b84Richard Smith  unsigned getUDSuffixOffset() const {
86b453ad3214d00acc51c9aa702c76c58354d84b84Richard Smith    assert(saw_ud_suffix);
87b453ad3214d00acc51c9aa702c76c58354d84b84Richard Smith    return SuffixBegin - ThisTokBegin;
885f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  }
891eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
904ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith  static bool isValidUDSuffix(const LangOptions &LangOpts, StringRef Suffix);
914ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith
925f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  unsigned getRadix() const { return radix; }
931eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
945f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  /// GetIntegerValue - Convert this numeric literal value to an APInt that
955f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  /// matches Val's input width.  If there is an overflow (i.e., if the unsigned
965f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  /// value read is larger than the APInt's bits will hold), set Val to the low
975f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  /// bits of the result and return true.  Otherwise, return false.
985f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  bool GetIntegerValue(llvm::APInt &Val);
991eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
100525a05093a4816af961fe2bc6b8a81c17e2e26c2Chris Lattner  /// GetFloatValue - Convert this numeric literal to a floating value, using
101525a05093a4816af961fe2bc6b8a81c17e2e26c2Chris Lattner  /// the specified APFloat fltSemantics (specifying float, double, etc).
102427d5af5b601985093b6b4b33ba1e30fc24d86dcTed Kremenek  /// The optional bool isExact (passed-by-reference) has its value
103427d5af5b601985093b6b4b33ba1e30fc24d86dcTed Kremenek  /// set to true if the returned APFloat can represent the number in the
104427d5af5b601985093b6b4b33ba1e30fc24d86dcTed Kremenek  /// literal exactly, and false otherwise.
10594c939dc1d4958b62ea5a89294dd8b2905f3191fJohn McCall  llvm::APFloat::opStatus GetFloatValue(llvm::APFloat &Result);
1065f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
1071eb4433ac451dc16f4133a88af2d002ac26c58efMike Stumpprivate:
1081eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
109368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  void ParseNumberStartingWithZero(SourceLocation TokLoc);
1104967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar  void ParseDecimalOrOctalCommon(SourceLocation TokLoc);
1111eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
112859b6227694033dd6eaf3991a2b80877a406c382Richard Smith  static bool isDigitSeparator(char C) { return C == '\''; }
113859b6227694033dd6eaf3991a2b80877a406c382Richard Smith
1144967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar  /// \brief Determine whether the sequence of characters [Start, End) contains
1154967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar  /// any real digits (not digit separators).
1164967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar  bool containsDigits(const char *Start, const char *End) {
1174967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar    return Start != End && (Start + 1 != End || !isDigitSeparator(Start[0]));
1184967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar  }
1194967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar
120064c3f1ed782eb641df6bc00beec069b3c80fa8aRichard Smith  enum CheckSeparatorKind { CSK_BeforeDigits, CSK_AfterDigits };
121064c3f1ed782eb641df6bc00beec069b3c80fa8aRichard Smith
122859b6227694033dd6eaf3991a2b80877a406c382Richard Smith  /// \brief Ensure that we don't have a digit separator here.
123859b6227694033dd6eaf3991a2b80877a406c382Richard Smith  void checkSeparator(SourceLocation TokLoc, const char *Pos,
124064c3f1ed782eb641df6bc00beec069b3c80fa8aRichard Smith                      CheckSeparatorKind IsAfterDigits);
125859b6227694033dd6eaf3991a2b80877a406c382Richard Smith
1265f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  /// SkipHexDigits - Read and skip over any hex digits, up to End.
1275f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  /// Return a pointer to the first non-hex digit or End.
1285f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  const char *SkipHexDigits(const char *ptr) {
129859b6227694033dd6eaf3991a2b80877a406c382Richard Smith    while (ptr != ThisTokEnd && (isHexDigit(*ptr) || isDigitSeparator(*ptr)))
1305f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      ptr++;
1315f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    return ptr;
1325f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  }
1331eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1345f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  /// SkipOctalDigits - Read and skip over any octal digits, up to End.
1355f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  /// Return a pointer to the first non-hex digit or End.
1365f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  const char *SkipOctalDigits(const char *ptr) {
137859b6227694033dd6eaf3991a2b80877a406c382Richard Smith    while (ptr != ThisTokEnd &&
138859b6227694033dd6eaf3991a2b80877a406c382Richard Smith           ((*ptr >= '0' && *ptr <= '7') || isDigitSeparator(*ptr)))
1395f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      ptr++;
1405f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    return ptr;
1415f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  }
1421eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1435f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  /// SkipDigits - Read and skip over any digits, up to End.
1445f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  /// Return a pointer to the first non-hex digit or End.
1455f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  const char *SkipDigits(const char *ptr) {
146859b6227694033dd6eaf3991a2b80877a406c382Richard Smith    while (ptr != ThisTokEnd && (isDigit(*ptr) || isDigitSeparator(*ptr)))
1475f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      ptr++;
1485f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    return ptr;
1495f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  }
1501eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1515f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  /// SkipBinaryDigits - Read and skip over any binary digits, up to End.
1525f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  /// Return a pointer to the first non-binary digit or End.
1535f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  const char *SkipBinaryDigits(const char *ptr) {
154859b6227694033dd6eaf3991a2b80877a406c382Richard Smith    while (ptr != ThisTokEnd &&
155859b6227694033dd6eaf3991a2b80877a406c382Richard Smith           (*ptr == '0' || *ptr == '1' || isDigitSeparator(*ptr)))
1565f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      ptr++;
1575f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    return ptr;
1585f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  }
1591eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1605f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer};
1615f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
1625f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// CharLiteralParser - Perform interpretation and semantic analysis of a
1635f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// character literal.
1645f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerclass CharLiteralParser {
1654bc11af9bed1d4a247e3db1fcb754d410ad99099Sanjiv Gupta  uint64_t Value;
1665cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  tok::TokenKind Kind;
1672a1c363f38e59a5044fc349aa7e538a50954c244Eli Friedman  bool IsMultiChar;
1685f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  bool HadError;
1695cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith  SmallString<32> UDSuffixBuf;
170dd66be718f23c8149d74ae8b011b002e11e8d5baRichard Smith  unsigned UDSuffixOffset;
1715f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerpublic:
1725f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  CharLiteralParser(const char *begin, const char *end,
1735cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor                    SourceLocation Loc, Preprocessor &PP,
1745cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor                    tok::TokenKind kind);
1755f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
1765f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  bool hadError() const { return HadError; }
1775cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  bool isAscii() const { return Kind == tok::char_constant; }
1785cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  bool isWide() const { return Kind == tok::wide_char_constant; }
1794967a710c84587c654b56c828382219c3937dacbPirama Arumuga Nainar  bool isUTF8() const { return Kind == tok::utf8_char_constant; }
1805cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  bool isUTF16() const { return Kind == tok::utf16_char_constant; }
1815cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  bool isUTF32() const { return Kind == tok::utf32_char_constant; }
1822a1c363f38e59a5044fc349aa7e538a50954c244Eli Friedman  bool isMultiChar() const { return IsMultiChar; }
1834bc11af9bed1d4a247e3db1fcb754d410ad99099Sanjiv Gupta  uint64_t getValue() const { return Value; }
1845cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith  StringRef getUDSuffix() const { return UDSuffixBuf; }
185dd66be718f23c8149d74ae8b011b002e11e8d5baRichard Smith  unsigned getUDSuffixOffset() const {
186dd66be718f23c8149d74ae8b011b002e11e8d5baRichard Smith    assert(!UDSuffixBuf.empty() && "no ud-suffix");
187dd66be718f23c8149d74ae8b011b002e11e8d5baRichard Smith    return UDSuffixOffset;
188dd66be718f23c8149d74ae8b011b002e11e8d5baRichard Smith  }
1895f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer};
1905f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
1915f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// StringLiteralParser - This decodes string escape characters and performs
1925f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// wide string analysis and Translation Phase #6 (concatenation of string
1935f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// literals) (C99 5.1.1.2p1).
1945f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerclass StringLiteralParser {
1956c66f07854c1334a1ce9eae1428d61d54182a6e1Chris Lattner  const SourceManager &SM;
1966c66f07854c1334a1ce9eae1428d61d54182a6e1Chris Lattner  const LangOptions &Features;
1976c66f07854c1334a1ce9eae1428d61d54182a6e1Chris Lattner  const TargetInfo &Target;
198d6471f7c1921c7802804ce3ff6fe9768310f72b9David Blaikie  DiagnosticsEngine *Diags;
1996c66f07854c1334a1ce9eae1428d61d54182a6e1Chris Lattner
2005f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  unsigned MaxTokenLength;
2015f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  unsigned SizeBound;
2025cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  unsigned CharByteWidth;
2035cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  tok::TokenKind Kind;
204f7ccbad5d9949e7ddd1cbef43d482553b811e026Dylan Noblesmith  SmallString<512> ResultBuf;
2055f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  char *ResultPtr; // cursor
2065cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith  SmallString<32> UDSuffixBuf;
207dd66be718f23c8149d74ae8b011b002e11e8d5baRichard Smith  unsigned UDSuffixToken;
208dd66be718f23c8149d74ae8b011b002e11e8d5baRichard Smith  unsigned UDSuffixOffset;
2095f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerpublic:
210c568f1e98938584c0ef0b12ae5018ff7d90a4072Stephen Hines  StringLiteralParser(ArrayRef<Token> StringToks,
211b90f4b3fb94056609da9cca5eef7358d95a363b2Douglas Gregor                      Preprocessor &PP, bool Complain = true);
212c568f1e98938584c0ef0b12ae5018ff7d90a4072Stephen Hines  StringLiteralParser(ArrayRef<Token> StringToks,
2130833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner                      const SourceManager &sm, const LangOptions &features,
2146bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines                      const TargetInfo &target,
2156bcf27bb9a4b5c3f79cb44c0e4654a6d7619ad89Stephen Hines                      DiagnosticsEngine *diags = nullptr)
216403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis    : SM(sm), Features(features), Target(target), Diags(diags),
2175cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor      MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),
2185cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor      ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) {
219c568f1e98938584c0ef0b12ae5018ff7d90a4072Stephen Hines    init(StringToks);
2200833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner  }
2210833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner
2220833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner
2235f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  bool hadError;
224ee98ac5a1330db432b188dd2d38b6631aac47bf1Anders Carlsson  bool Pascal;
2251eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
226686775deca8b8685eb90801495880e3abdd844c2Chris Lattner  StringRef GetString() const {
227686775deca8b8685eb90801495880e3abdd844c2Chris Lattner    return StringRef(ResultBuf.data(), GetStringLength());
22865aa6885818d4b4eea2e5a9d12085b2398148662Jay Foad  }
229403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis  unsigned GetStringLength() const { return ResultPtr-ResultBuf.data(); }
230dbb1ecc32ca122b07b7c98fd0a8f6f53985adaccChris Lattner
231dbb1ecc32ca122b07b7c98fd0a8f6f53985adaccChris Lattner  unsigned GetNumStringChars() const {
2325cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    return GetStringLength() / CharByteWidth;
2331eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump  }
234719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner  /// getOffsetOfStringByte - This function returns the offset of the
235719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner  /// specified byte of the string data represented by Token.  This handles
236719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner  /// advancing over escape sequences in the string.
23791f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner  ///
23891f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner  /// If the Diagnostics pointer is non-null, then this will do semantic
23991f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner  /// checking of the string literal and emit errors and warnings.
2406c66f07854c1334a1ce9eae1428d61d54182a6e1Chris Lattner  unsigned getOffsetOfStringByte(const Token &TheTok, unsigned ByteNo) const;
2415cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor
24264f45a24b19eb89ff88f7c3ff0df9be8e861ac97Eli Friedman  bool isAscii() const { return Kind == tok::string_literal; }
24364f45a24b19eb89ff88f7c3ff0df9be8e861ac97Eli Friedman  bool isWide() const { return Kind == tok::wide_string_literal; }
24464f45a24b19eb89ff88f7c3ff0df9be8e861ac97Eli Friedman  bool isUTF8() const { return Kind == tok::utf8_string_literal; }
24564f45a24b19eb89ff88f7c3ff0df9be8e861ac97Eli Friedman  bool isUTF16() const { return Kind == tok::utf16_string_literal; }
24664f45a24b19eb89ff88f7c3ff0df9be8e861ac97Eli Friedman  bool isUTF32() const { return Kind == tok::utf32_string_literal; }
24764f45a24b19eb89ff88f7c3ff0df9be8e861ac97Eli Friedman  bool isPascal() const { return Pascal; }
2485cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor
2495cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith  StringRef getUDSuffix() const { return UDSuffixBuf; }
2505cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith
251dd66be718f23c8149d74ae8b011b002e11e8d5baRichard Smith  /// Get the index of a token containing a ud-suffix.
252dd66be718f23c8149d74ae8b011b002e11e8d5baRichard Smith  unsigned getUDSuffixToken() const {
253dd66be718f23c8149d74ae8b011b002e11e8d5baRichard Smith    assert(!UDSuffixBuf.empty() && "no ud-suffix");
254dd66be718f23c8149d74ae8b011b002e11e8d5baRichard Smith    return UDSuffixToken;
255dd66be718f23c8149d74ae8b011b002e11e8d5baRichard Smith  }
256dd66be718f23c8149d74ae8b011b002e11e8d5baRichard Smith  /// Get the spelling offset of the first byte of the ud-suffix.
257dd66be718f23c8149d74ae8b011b002e11e8d5baRichard Smith  unsigned getUDSuffixOffset() const {
258dd66be718f23c8149d74ae8b011b002e11e8d5baRichard Smith    assert(!UDSuffixBuf.empty() && "no ud-suffix");
259dd66be718f23c8149d74ae8b011b002e11e8d5baRichard Smith    return UDSuffixOffset;
260dd66be718f23c8149d74ae8b011b002e11e8d5baRichard Smith  }
261dd66be718f23c8149d74ae8b011b002e11e8d5baRichard Smith
2620833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattnerprivate:
263c568f1e98938584c0ef0b12ae5018ff7d90a4072Stephen Hines  void init(ArrayRef<Token> StringToks);
264e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith  bool CopyStringFragment(const Token &Tok, const char *TokBegin,
265e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith                          StringRef Fragment);
2663144749f8bf9bbf7c027f2161a930bff80ad6f72Argyrios Kyrtzidis  void DiagnoseLexingError(SourceLocation Loc);
2675f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer};
2681eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
2695f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer}  // end namespace clang
2705f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
2716fa5f0943a84233b2e1ec9716eae55643225bfd4Chris Lattner#endif
272