15f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//===--- LiteralSupport.cpp - Code to parse and process literals ----------===//
25f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//
35f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//                     The LLVM Compiler Infrastructure
45f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//
50bc735ffcfb223c0186419547abaa5c84482663eChris Lattner// This file is distributed under the University of Illinois Open Source
60bc735ffcfb223c0186419547abaa5c84482663eChris Lattner// License. See LICENSE.TXT for details.
75f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//
85f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//===----------------------------------------------------------------------===//
95f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//
105f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// This file implements the NumericLiteralParser, CharLiteralParser, and
115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// StringLiteralParser interfaces.
125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//
135f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//===----------------------------------------------------------------------===//
145f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
155f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer#include "clang/Lex/LiteralSupport.h"
165f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer#include "clang/Lex/Preprocessor.h"
17500d3297d2a21edeac4d46cbcbe21bc2352c2a28Chris Lattner#include "clang/Lex/LexDiagnostic.h"
18136f93a2b660fc780fc2dba82a6f42ca4dc9164aChris Lattner#include "clang/Basic/TargetInfo.h"
195f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer#include "llvm/ADT/StringExtras.h"
209fe8c74a93ac8e92512615c5f83e7a328b3b0544David Blaikie#include "llvm/Support/ErrorHandling.h"
215f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerusing namespace clang;
225f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
235f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// HexDigitValue - Return the value of the specified hex digit, or -1 if it's
245f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// not valid.
255f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerstatic int HexDigitValue(char C) {
265f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  if (C >= '0' && C <= '9') return C-'0';
275f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  if (C >= 'a' && C <= 'f') return C-'a'+10;
285f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  if (C >= 'A' && C <= 'F') return C-'A'+10;
295f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  return -1;
305f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer}
315f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
325cee1195584fa8672253139c86e922daeda69b9eDouglas Gregorstatic unsigned getCharWidth(tok::TokenKind kind, const TargetInfo &Target) {
335cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  switch (kind) {
34b219cfc4d75f0a03630b7c4509ef791b7e97b2c8David Blaikie  default: llvm_unreachable("Unknown token type!");
355cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  case tok::char_constant:
365cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  case tok::string_literal:
375cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  case tok::utf8_string_literal:
385cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    return Target.getCharWidth();
395cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  case tok::wide_char_constant:
405cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  case tok::wide_string_literal:
415cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    return Target.getWCharWidth();
425cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  case tok::utf16_char_constant:
435cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  case tok::utf16_string_literal:
445cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    return Target.getChar16Width();
455cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  case tok::utf32_char_constant:
465cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  case tok::utf32_string_literal:
475cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    return Target.getChar32Width();
485cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  }
495cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor}
505cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor
515f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// ProcessCharEscape - Parse a standard C escape sequence, which can occur in
525f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// either a character or a string literal.
535f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerstatic unsigned ProcessCharEscape(const char *&ThisTokBuf,
545f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer                                  const char *ThisTokEnd, bool &HadError,
555cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor                                  FullSourceLoc Loc, unsigned CharWidth,
56d6471f7c1921c7802804ce3ff6fe9768310f72b9David Blaikie                                  DiagnosticsEngine *Diags) {
575f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // Skip the '\' char.
585f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  ++ThisTokBuf;
595f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
605f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // We know that this character can't be off the end of the buffer, because
615f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // that would have been \", which would not have been the end of string.
625f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  unsigned ResultChar = *ThisTokBuf++;
635f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  switch (ResultChar) {
645f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // These map to themselves.
655f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case '\\': case '\'': case '"': case '?': break;
661eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
675f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // These have fixed mappings.
685f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case 'a':
695f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // TODO: K&R: the meaning of '\\a' is different in traditional C
705f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    ResultChar = 7;
715f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    break;
725f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case 'b':
735f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    ResultChar = 8;
745f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    break;
755f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case 'e':
7691f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner    if (Diags)
7791f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner      Diags->Report(Loc, diag::ext_nonstandard_escape) << "e";
785f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    ResultChar = 27;
795f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    break;
803c54801fbc67d8df2fed0711a2e2022db6b1bbcfEli Friedman  case 'E':
8191f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner    if (Diags)
8291f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner      Diags->Report(Loc, diag::ext_nonstandard_escape) << "E";
833c54801fbc67d8df2fed0711a2e2022db6b1bbcfEli Friedman    ResultChar = 27;
843c54801fbc67d8df2fed0711a2e2022db6b1bbcfEli Friedman    break;
855f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case 'f':
865f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    ResultChar = 12;
875f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    break;
885f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case 'n':
895f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    ResultChar = 10;
905f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    break;
915f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case 'r':
925f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    ResultChar = 13;
935f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    break;
945f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case 't':
955f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    ResultChar = 9;
965f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    break;
975f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case 'v':
985f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    ResultChar = 11;
995f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    break;
1005f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case 'x': { // Hex escape.
1015f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    ResultChar = 0;
1025f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    if (ThisTokBuf == ThisTokEnd || !isxdigit(*ThisTokBuf)) {
10391f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner      if (Diags)
10491f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner        Diags->Report(Loc, diag::err_hex_escape_no_digits);
1055f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      HadError = 1;
1065f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      break;
1075f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    }
1081eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1095f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // Hex escapes are a maximal series of hex digits.
1105f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    bool Overflow = false;
1115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    for (; ThisTokBuf != ThisTokEnd; ++ThisTokBuf) {
1125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      int CharVal = HexDigitValue(ThisTokBuf[0]);
1135f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      if (CharVal == -1) break;
114c29bbde0a14a664d6843b21d3791478d1f4d2833Chris Lattner      // About to shift out a digit?
115c29bbde0a14a664d6843b21d3791478d1f4d2833Chris Lattner      Overflow |= (ResultChar & 0xF0000000) ? true : false;
1165f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      ResultChar <<= 4;
1175f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      ResultChar |= CharVal;
1185f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    }
1195f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
1205f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // See if any bits will be truncated when evaluated as a character.
1215f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
1225f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      Overflow = true;
1235f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      ResultChar &= ~0U >> (32-CharWidth);
1245f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    }
1251eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1265f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // Check for overflow.
12791f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner    if (Overflow && Diags)   // Too many digits to fit in
12891f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner      Diags->Report(Loc, diag::warn_hex_escape_too_large);
1295f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    break;
1305f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  }
1315f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case '0': case '1': case '2': case '3':
1325f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case '4': case '5': case '6': case '7': {
1335f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // Octal escapes.
1345f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    --ThisTokBuf;
1355f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    ResultChar = 0;
1365f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
1375f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // Octal escapes are a series of octal digits with maximum length 3.
1385f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // "\0123" is a two digit sequence equal to "\012" "3".
1395f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    unsigned NumDigits = 0;
1405f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    do {
1415f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      ResultChar <<= 3;
1425f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      ResultChar |= *ThisTokBuf++ - '0';
1435f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      ++NumDigits;
1445f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    } while (ThisTokBuf != ThisTokEnd && NumDigits < 3 &&
1455f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer             ThisTokBuf[0] >= '0' && ThisTokBuf[0] <= '7');
1461eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1475f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // Check for overflow.  Reject '\777', but not L'\777'.
1485f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
14991f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner      if (Diags)
15091f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner        Diags->Report(Loc, diag::warn_octal_escape_too_large);
1515f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      ResultChar &= ~0U >> (32-CharWidth);
1525f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    }
1535f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    break;
1545f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  }
1551eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1565f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // Otherwise, these are not valid escapes.
1575f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case '(': case '{': case '[': case '%':
1585f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // GCC accepts these as extensions.  We warn about them as such though.
15991f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner    if (Diags)
16091f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner      Diags->Report(Loc, diag::ext_nonstandard_escape)
161b90f4b3fb94056609da9cca5eef7358d95a363b2Douglas Gregor        << std::string()+(char)ResultChar;
162f01fdff97b245caac98100d232c760b4d0531411Eli Friedman    break;
1635f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  default:
16491f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner    if (Diags == 0)
165b90f4b3fb94056609da9cca5eef7358d95a363b2Douglas Gregor      break;
166b90f4b3fb94056609da9cca5eef7358d95a363b2Douglas Gregor
16723ef69d197ba3b5e9602f7161fee50990059502aTed Kremenek    if (isgraph(ResultChar))
16891f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner      Diags->Report(Loc, diag::ext_unknown_escape)
16991f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner        << std::string()+(char)ResultChar;
170ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner    else
17191f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner      Diags->Report(Loc, diag::ext_unknown_escape)
17291f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner        << "x"+llvm::utohexstr(ResultChar);
1735f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    break;
1745f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  }
1751eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1765f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  return ResultChar;
1775f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer}
1785f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
1790e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff/// ProcessUCNEscape - Read the Universal Character Name, check constraints and
18059705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber/// return the UTF32.
18159705aee3fe01aa6fb6962dd11350161b47983d9Nico Weberstatic bool ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,
18259705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber                             uint32_t &UcnVal, unsigned short &UcnLen,
183d6471f7c1921c7802804ce3ff6fe9768310f72b9David Blaikie                             FullSourceLoc Loc, DiagnosticsEngine *Diags,
1846c66f07854c1334a1ce9eae1428d61d54182a6e1Chris Lattner                             const LangOptions &Features) {
1856c66f07854c1334a1ce9eae1428d61d54182a6e1Chris Lattner  if (!Features.CPlusPlus && !Features.C99 && Diags)
186872a45e91778eb0b706ff57272fe547d4512eb19Chris Lattner    Diags->Report(Loc, diag::warn_ucn_not_valid_in_c89);
1871eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1884e93b34fdb798abfa0534062a139f2c37cbf876eSteve Naroff  // Save the beginning of the string (for error diagnostics).
1894e93b34fdb798abfa0534062a139f2c37cbf876eSteve Naroff  const char *ThisTokBegin = ThisTokBuf;
1901eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1910e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  // Skip the '\u' char's.
1920e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  ThisTokBuf += 2;
1930e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff
1940e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  if (ThisTokBuf == ThisTokEnd || !isxdigit(*ThisTokBuf)) {
1956c66f07854c1334a1ce9eae1428d61d54182a6e1Chris Lattner    if (Diags)
196872a45e91778eb0b706ff57272fe547d4512eb19Chris Lattner      Diags->Report(Loc, diag::err_ucn_escape_no_digits);
19759705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber    return false;
1980e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  }
19959705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber  UcnLen = (ThisTokBuf[-1] == 'u' ? 4 : 8);
20056bedefe92ae8f604d14bea75cc3040ab32337c2Fariborz Jahanian  unsigned short UcnLenSave = UcnLen;
20159705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber  for (; ThisTokBuf != ThisTokEnd && UcnLenSave; ++ThisTokBuf, UcnLenSave--) {
2020e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    int CharVal = HexDigitValue(ThisTokBuf[0]);
2030e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    if (CharVal == -1) break;
2040e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    UcnVal <<= 4;
2050e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    UcnVal |= CharVal;
2060e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  }
2070e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  // If we didn't consume the proper number of digits, there is a problem.
20859705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber  if (UcnLenSave) {
209872a45e91778eb0b706ff57272fe547d4512eb19Chris Lattner    if (Diags) {
2107ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner      SourceLocation L =
2117ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner        Lexer::AdvanceToTokenCharacter(Loc, ThisTokBuf-ThisTokBegin,
2127ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner                                       Loc.getManager(), Features);
2137ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner      Diags->Report(FullSourceLoc(L, Loc.getManager()),
2147ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner                    diag::err_ucn_escape_incomplete);
215872a45e91778eb0b706ff57272fe547d4512eb19Chris Lattner    }
21659705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber    return false;
2170e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  }
2181eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump  // Check UCN constraints (C99 6.4.3p2).
2190e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  if ((UcnVal < 0xa0 &&
2200e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff      (UcnVal != 0x24 && UcnVal != 0x40 && UcnVal != 0x60 )) // $, @, `
2211eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump      || (UcnVal >= 0xD800 && UcnVal <= 0xDFFF)
2228a5c0cd90b8d607ca284274000ed8716b836d253Steve Naroff      || (UcnVal > 0x10FFFF)) /* the maximum legal UTF32 value */ {
2236c66f07854c1334a1ce9eae1428d61d54182a6e1Chris Lattner    if (Diags)
224872a45e91778eb0b706ff57272fe547d4512eb19Chris Lattner      Diags->Report(Loc, diag::err_ucn_escape_invalid);
22559705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber    return false;
22659705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber  }
22759705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber  return true;
22859705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber}
22959705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber
23059705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber/// EncodeUCNEscape - Read the Universal Character Name, check constraints and
23159705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber/// convert the UTF32 to UTF8 or UTF16. This is a subroutine of
23259705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber/// StringLiteralParser. When we decide to implement UCN's for identifiers,
23359705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber/// we will likely rework our support for UCN's.
23459705aee3fe01aa6fb6962dd11350161b47983d9Nico Weberstatic void EncodeUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,
235a95880d6513c617bb96634bcc1f16c6bdb80dedcChris Lattner                            char *&ResultBuf, bool &HadError,
2365cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor                            FullSourceLoc Loc, unsigned CharByteWidth,
237d6471f7c1921c7802804ce3ff6fe9768310f72b9David Blaikie                            DiagnosticsEngine *Diags,
238d6471f7c1921c7802804ce3ff6fe9768310f72b9David Blaikie                            const LangOptions &Features) {
23959705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber  typedef uint32_t UTF32;
24059705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber  UTF32 UcnVal = 0;
24159705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber  unsigned short UcnLen = 0;
242a95880d6513c617bb96634bcc1f16c6bdb80dedcChris Lattner  if (!ProcessUCNEscape(ThisTokBuf, ThisTokEnd, UcnVal, UcnLen, Loc, Diags,
243a95880d6513c617bb96634bcc1f16c6bdb80dedcChris Lattner                        Features)) {
2440e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    HadError = 1;
2450e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    return;
2460e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  }
24759705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber
2485cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  assert((CharByteWidth == 1 || CharByteWidth == 2 || CharByteWidth) &&
2495cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor         "only character widths of 1, 2, or 4 bytes supported");
250a0f15b0848405ae16d63bd5d78c862a6526b338aNico Weber
2515cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  (void)UcnLen;
2525cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  assert((UcnLen== 4 || UcnLen== 8) && "only ucn length of 4 or 8 supported");
2535cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor
2545cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  if (CharByteWidth == 4) {
2555cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    // Note: our internal rep of wide char tokens is always little-endian.
2565cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    *ResultBuf++ = (UcnVal & 0x000000FF);
2575cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    *ResultBuf++ = (UcnVal & 0x0000FF00) >> 8;
2585cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    *ResultBuf++ = (UcnVal & 0x00FF0000) >> 16;
2595cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    *ResultBuf++ = (UcnVal & 0xFF000000) >> 24;
2605cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    return;
2615cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  }
262a0f15b0848405ae16d63bd5d78c862a6526b338aNico Weber
2635cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  if (CharByteWidth == 2) {
264a0f15b0848405ae16d63bd5d78c862a6526b338aNico Weber    // Convert to UTF16.
265a0f15b0848405ae16d63bd5d78c862a6526b338aNico Weber    if (UcnVal < (UTF32)0xFFFF) {
266a0f15b0848405ae16d63bd5d78c862a6526b338aNico Weber      *ResultBuf++ = (UcnVal & 0x000000FF);
267a0f15b0848405ae16d63bd5d78c862a6526b338aNico Weber      *ResultBuf++ = (UcnVal & 0x0000FF00) >> 8;
268a0f15b0848405ae16d63bd5d78c862a6526b338aNico Weber      return;
269a0f15b0848405ae16d63bd5d78c862a6526b338aNico Weber    }
270a95880d6513c617bb96634bcc1f16c6bdb80dedcChris Lattner    if (Diags) Diags->Report(Loc, diag::warn_ucn_escape_too_large);
271a0f15b0848405ae16d63bd5d78c862a6526b338aNico Weber
272a0f15b0848405ae16d63bd5d78c862a6526b338aNico Weber    typedef uint16_t UTF16;
273a0f15b0848405ae16d63bd5d78c862a6526b338aNico Weber    UcnVal -= 0x10000;
274a0f15b0848405ae16d63bd5d78c862a6526b338aNico Weber    UTF16 surrogate1 = 0xD800 + (UcnVal >> 10);
275a0f15b0848405ae16d63bd5d78c862a6526b338aNico Weber    UTF16 surrogate2 = 0xDC00 + (UcnVal & 0x3FF);
276a0f15b0848405ae16d63bd5d78c862a6526b338aNico Weber    *ResultBuf++ = (surrogate1 & 0x000000FF);
277a0f15b0848405ae16d63bd5d78c862a6526b338aNico Weber    *ResultBuf++ = (surrogate1 & 0x0000FF00) >> 8;
278a0f15b0848405ae16d63bd5d78c862a6526b338aNico Weber    *ResultBuf++ = (surrogate2 & 0x000000FF);
279a0f15b0848405ae16d63bd5d78c862a6526b338aNico Weber    *ResultBuf++ = (surrogate2 & 0x0000FF00) >> 8;
28056bedefe92ae8f604d14bea75cc3040ab32337c2Fariborz Jahanian    return;
28156bedefe92ae8f604d14bea75cc3040ab32337c2Fariborz Jahanian  }
2825cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor
2835cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  assert(CharByteWidth == 1 && "UTF-8 encoding is only for 1 byte characters");
2845cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor
2850e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  // Now that we've parsed/checked the UCN, we convert from UTF32->UTF8.
2860e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  // The conversion below was inspired by:
2870e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  //   http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c
2881eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump  // First, we determine how many bytes the result will require.
2894e93b34fdb798abfa0534062a139f2c37cbf876eSteve Naroff  typedef uint8_t UTF8;
2900e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff
2910e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  unsigned short bytesToWrite = 0;
2920e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  if (UcnVal < (UTF32)0x80)
2930e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    bytesToWrite = 1;
2940e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  else if (UcnVal < (UTF32)0x800)
2950e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    bytesToWrite = 2;
2960e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  else if (UcnVal < (UTF32)0x10000)
2970e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    bytesToWrite = 3;
2980e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  else
2990e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    bytesToWrite = 4;
3001eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
3010e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  const unsigned byteMask = 0xBF;
3020e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  const unsigned byteMark = 0x80;
3031eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
3040e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  // Once the bits are split out into bytes of UTF8, this is a mask OR-ed
3058a5c0cd90b8d607ca284274000ed8716b836d253Steve Naroff  // into the first byte, depending on how many bytes follow.
3061eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump  static const UTF8 firstByteMark[5] = {
3078a5c0cd90b8d607ca284274000ed8716b836d253Steve Naroff    0x00, 0x00, 0xC0, 0xE0, 0xF0
3080e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  };
3090e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  // Finally, we write the bytes into ResultBuf.
3100e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  ResultBuf += bytesToWrite;
3110e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  switch (bytesToWrite) { // note: everything falls through.
3120e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    case 4: *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6;
3130e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    case 3: *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6;
3140e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    case 2: *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6;
3150e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    case 1: *--ResultBuf = (UTF8) (UcnVal | firstByteMark[bytesToWrite]);
3160e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  }
3170e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  // Update the buffer.
3180e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  ResultBuf += bytesToWrite;
3190e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff}
3205f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
3215f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
3225f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       integer-constant: [C99 6.4.4.1]
3235f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         decimal-constant integer-suffix
3245f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         octal-constant integer-suffix
3255f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         hexadecimal-constant integer-suffix
3261eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump///       decimal-constant:
3275f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         nonzero-digit
3285f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         decimal-constant digit
3291eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump///       octal-constant:
3305f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         0
3315f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         octal-constant octal-digit
3321eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump///       hexadecimal-constant:
3335f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         hexadecimal-prefix hexadecimal-digit
3345f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         hexadecimal-constant hexadecimal-digit
3355f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       hexadecimal-prefix: one of
3365f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         0x 0X
3375f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       integer-suffix:
3385f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         unsigned-suffix [long-suffix]
3395f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         unsigned-suffix [long-long-suffix]
3405f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         long-suffix [unsigned-suffix]
3415f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         long-long-suffix [unsigned-sufix]
3425f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       nonzero-digit:
3435f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         1 2 3 4 5 6 7 8 9
3445f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       octal-digit:
3455f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         0 1 2 3 4 5 6 7
3465f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       hexadecimal-digit:
3475f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         0 1 2 3 4 5 6 7 8 9
3485f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         a b c d e f
3495f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         A B C D E F
3505f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       unsigned-suffix: one of
3515f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         u U
3525f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       long-suffix: one of
3535f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         l L
3541eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump///       long-long-suffix: one of
3555f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         ll LL
3565f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///
3575f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       floating-constant: [C99 6.4.4.2]
3585f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         TODO: add rules...
3595f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///
3605f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid SpencerNumericLiteralParser::
3615f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid SpencerNumericLiteralParser(const char *begin, const char *end,
3625f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer                     SourceLocation TokLoc, Preprocessor &pp)
3635f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  : PP(pp), ThisTokBegin(begin), ThisTokEnd(end) {
3641eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
365c29bbde0a14a664d6843b21d3791478d1f4d2833Chris Lattner  // This routine assumes that the range begin/end matches the regex for integer
366c29bbde0a14a664d6843b21d3791478d1f4d2833Chris Lattner  // and FP constants (specifically, the 'pp-number' regex), and assumes that
367c29bbde0a14a664d6843b21d3791478d1f4d2833Chris Lattner  // the byte at "*end" is both valid and not part of the regex.  Because of
368c29bbde0a14a664d6843b21d3791478d1f4d2833Chris Lattner  // this, it doesn't have to check for 'overscan' in various places.
369c29bbde0a14a664d6843b21d3791478d1f4d2833Chris Lattner  assert(!isalnum(*end) && *end != '.' && *end != '_' &&
370c29bbde0a14a664d6843b21d3791478d1f4d2833Chris Lattner         "Lexer didn't maximally munch?");
3711eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
3725f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  s = DigitsBegin = begin;
3735f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  saw_exponent = false;
3745f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  saw_period = false;
3755f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  isLong = false;
3765f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  isUnsigned = false;
3775f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  isLongLong = false;
3786e400c286b485e28d04a742ea87860ddfefa672eChris Lattner  isFloat = false;
379506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner  isImaginary = false;
380b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump  isMicrosoftInteger = false;
3815f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  hadError = false;
3821eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
3835f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  if (*s == '0') { // parse radix
384368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    ParseNumberStartingWithZero(TokLoc);
385368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    if (hadError)
386368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      return;
3875f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  } else { // the first digit is non-zero
3885f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    radix = 10;
3895f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    s = SkipDigits(s);
3905f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    if (s == ThisTokEnd) {
3915f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      // Done.
392016765e3453db2e302efe53905e99cdb25501234Christopher Lamb    } else if (isxdigit(*s) && !(*s == 'e' || *s == 'E')) {
393ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner      PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin),
3945f9e272e632e951b1efe824cd16acb4d96077930Chris Lattner              diag::err_invalid_decimal_digit) << StringRef(s, 1);
395ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner      hadError = true;
3965f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      return;
3975f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    } else if (*s == '.') {
3985f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      s++;
3995f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      saw_period = true;
4005f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      s = SkipDigits(s);
4011eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump    }
4024411f46050216a139ab2fc7ff145ec384d11ec7fChris Lattner    if ((*s == 'e' || *s == 'E')) { // exponent
40370f66ab053f36ab3df7a778d09bcb2b4b0fec1f8Chris Lattner      const char *Exponent = s;
4045f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      s++;
4055f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      saw_exponent = true;
4065f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      if (*s == '+' || *s == '-')  s++; // sign
4075f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      const char *first_non_digit = SkipDigits(s);
4080b7f69d789ca1f76582ee9a336e25861fd0c1416Chris Lattner      if (first_non_digit != s) {
4095f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer        s = first_non_digit;
4100b7f69d789ca1f76582ee9a336e25861fd0c1416Chris Lattner      } else {
411ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner        PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-begin),
412ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner                diag::err_exponent_has_no_digits);
413ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner        hadError = true;
4140b7f69d789ca1f76582ee9a336e25861fd0c1416Chris Lattner        return;
4155f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      }
4165f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    }
4175f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  }
4185f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
4195f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  SuffixBegin = s;
4201eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
421506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner  // Parse the suffix.  At this point we can classify whether we have an FP or
422506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner  // integer constant.
423506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner  bool isFPConstant = isFloatingLiteral();
4241eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
425506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner  // Loop over all of the characters of the suffix.  If we see something bad,
426506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner  // we break out of the loop.
427506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner  for (; s != ThisTokEnd; ++s) {
428506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    switch (*s) {
429506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    case 'f':      // FP Suffix for "float"
430506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    case 'F':
431506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      if (!isFPConstant) break;  // Error for integer constant.
4326e400c286b485e28d04a742ea87860ddfefa672eChris Lattner      if (isFloat || isLong) break; // FF, LF invalid.
4336e400c286b485e28d04a742ea87860ddfefa672eChris Lattner      isFloat = true;
434506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      continue;  // Success.
435506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    case 'u':
436506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    case 'U':
437506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      if (isFPConstant) break;  // Error for floating constant.
438506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      if (isUnsigned) break;    // Cannot be repeated.
439506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      isUnsigned = true;
440506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      continue;  // Success.
441506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    case 'l':
442506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    case 'L':
443506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      if (isLong || isLongLong) break;  // Cannot be repeated.
4446e400c286b485e28d04a742ea87860ddfefa672eChris Lattner      if (isFloat) break;               // LF invalid.
4451eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
446506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      // Check for long long.  The L's need to be adjacent and the same case.
447506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      if (s+1 != ThisTokEnd && s[1] == s[0]) {
448506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner        if (isFPConstant) break;        // long long invalid for floats.
449506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner        isLongLong = true;
450506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner        ++s;  // Eat both of them.
451506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      } else {
4525f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer        isLong = true;
4535f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      }
454506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      continue;  // Success.
455506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    case 'i':
456c637415a96c16abc7e28ef83c6c105716f7e8936Chris Lattner    case 'I':
45762ec1f2fd7368542bb926c04797fb07023547694Francois Pichet      if (PP.getLangOptions().MicrosoftExt) {
458a8be02b655b76e4dbe776b0c62bc3c450dc6feabFariborz Jahanian        if (isFPConstant || isLong || isLongLong) break;
4596e8c7acb61b2c7f421d6e1aba8a7a84e96ab6981Nuno Lopes
4600c29b22f4384500cc0d04f3072cc5d5d58d10d6cSteve Naroff        // Allow i8, i16, i32, i64, and i128.
461b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump        if (s + 1 != ThisTokEnd) {
462b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump          switch (s[1]) {
463b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump            case '8':
464b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump              s += 2; // i8 suffix
465b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump              isMicrosoftInteger = true;
4666e8c7acb61b2c7f421d6e1aba8a7a84e96ab6981Nuno Lopes              break;
467b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump            case '1':
4686e8c7acb61b2c7f421d6e1aba8a7a84e96ab6981Nuno Lopes              if (s + 2 == ThisTokEnd) break;
469d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet              if (s[2] == '6') {
470d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet                s += 3; // i16 suffix
471d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet                isMicrosoftInteger = true;
472d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet              }
4736e8c7acb61b2c7f421d6e1aba8a7a84e96ab6981Nuno Lopes              else if (s[2] == '2') {
4746e8c7acb61b2c7f421d6e1aba8a7a84e96ab6981Nuno Lopes                if (s + 3 == ThisTokEnd) break;
475d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet                if (s[3] == '8') {
476d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet                  s += 4; // i128 suffix
477d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet                  isMicrosoftInteger = true;
478d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet                }
479b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump              }
4806e8c7acb61b2c7f421d6e1aba8a7a84e96ab6981Nuno Lopes              break;
481b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump            case '3':
4826e8c7acb61b2c7f421d6e1aba8a7a84e96ab6981Nuno Lopes              if (s + 2 == ThisTokEnd) break;
483d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet              if (s[2] == '2') {
484d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet                s += 3; // i32 suffix
485d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet                isLong = true;
486d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet                isMicrosoftInteger = true;
487d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet              }
4886e8c7acb61b2c7f421d6e1aba8a7a84e96ab6981Nuno Lopes              break;
489b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump            case '6':
4906e8c7acb61b2c7f421d6e1aba8a7a84e96ab6981Nuno Lopes              if (s + 2 == ThisTokEnd) break;
491d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet              if (s[2] == '4') {
492d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet                s += 3; // i64 suffix
493d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet                isLongLong = true;
494d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet                isMicrosoftInteger = true;
495d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet              }
4966e8c7acb61b2c7f421d6e1aba8a7a84e96ab6981Nuno Lopes              break;
497b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump            default:
498b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump              break;
499b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump          }
500b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump          break;
5010c29b22f4384500cc0d04f3072cc5d5d58d10d6cSteve Naroff        }
5020c29b22f4384500cc0d04f3072cc5d5d58d10d6cSteve Naroff      }
5030c29b22f4384500cc0d04f3072cc5d5d58d10d6cSteve Naroff      // fall through.
504506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    case 'j':
505506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    case 'J':
506506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      if (isImaginary) break;   // Cannot be repeated.
507506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin),
508506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner              diag::ext_imaginary_constant);
509506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      isImaginary = true;
510506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      continue;  // Success.
5115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    }
512506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    // If we reached here, there was an error.
513506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    break;
514506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner  }
5151eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
516506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner  // Report an error if there are any.
517506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner  if (s != ThisTokEnd) {
518ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner    PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin),
519ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner            isFPConstant ? diag::err_invalid_suffix_float_constant :
520ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner                           diag::err_invalid_suffix_integer_constant)
5215f9e272e632e951b1efe824cd16acb4d96077930Chris Lattner      << StringRef(SuffixBegin, ThisTokEnd-SuffixBegin);
522ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner    hadError = true;
523506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    return;
5245f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  }
5255f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer}
5265f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
527368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner/// ParseNumberStartingWithZero - This method is called when the first character
528368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner/// of the number is found to be a zero.  This means it is either an octal
529368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner/// number (like '04') or a hex number ('0x123a') a binary number ('0b1010') or
5301eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump/// a floating point number (01239.123e4).  Eat the prefix, determining the
531368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner/// radix etc.
532368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattnervoid NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
533368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  assert(s[0] == '0' && "Invalid method call");
534368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  s++;
5351eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
536368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  // Handle a hex number like 0x1234.
537368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  if ((*s == 'x' || *s == 'X') && (isxdigit(s[1]) || s[1] == '.')) {
538368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    s++;
539368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    radix = 16;
540368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    DigitsBegin = s;
541368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    s = SkipHexDigits(s);
542368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    if (s == ThisTokEnd) {
543368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      // Done.
544368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    } else if (*s == '.') {
545368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      s++;
546368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      saw_period = true;
547368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      s = SkipHexDigits(s);
548368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    }
549368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    // A binary exponent can appear with or with a '.'. If dotted, the
5501eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump    // binary exponent is required.
5511155c42e7b1b4e401bb0a331a6d715d637958c75Douglas Gregor    if (*s == 'p' || *s == 'P') {
552368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      const char *Exponent = s;
553368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      s++;
554368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      saw_exponent = true;
555368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      if (*s == '+' || *s == '-')  s++; // sign
556368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      const char *first_non_digit = SkipDigits(s);
5576ea623823f8532670480425b573f35115404b4a0Chris Lattner      if (first_non_digit == s) {
558ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner        PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-ThisTokBegin),
559ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner                diag::err_exponent_has_no_digits);
560ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner        hadError = true;
5616ea623823f8532670480425b573f35115404b4a0Chris Lattner        return;
562368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      }
5636ea623823f8532670480425b573f35115404b4a0Chris Lattner      s = first_non_digit;
5641eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
5651155c42e7b1b4e401bb0a331a6d715d637958c75Douglas Gregor      if (!PP.getLangOptions().HexFloats)
566ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner        PP.Diag(TokLoc, diag::ext_hexconstant_invalid);
567368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    } else if (saw_period) {
568ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner      PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin),
569ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner              diag::err_hexconstant_requires_exponent);
570ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner      hadError = true;
571368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    }
572368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    return;
573368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  }
5741eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
575368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  // Handle simple binary numbers 0b01010
576368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  if (*s == 'b' || *s == 'B') {
577368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    // 0b101010 is a GCC extension.
578413d355e38755a71f106dbc0ac900ca989070916Chris Lattner    PP.Diag(TokLoc, diag::ext_binary_literal);
579368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    ++s;
580368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    radix = 2;
581368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    DigitsBegin = s;
582368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    s = SkipBinaryDigits(s);
583368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    if (s == ThisTokEnd) {
584368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      // Done.
585368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    } else if (isxdigit(*s)) {
586ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner      PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin),
5875f9e272e632e951b1efe824cd16acb4d96077930Chris Lattner              diag::err_invalid_binary_digit) << StringRef(s, 1);
588ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner      hadError = true;
589368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    }
590413d355e38755a71f106dbc0ac900ca989070916Chris Lattner    // Other suffixes will be diagnosed by the caller.
591368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    return;
592368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  }
5931eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
594368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  // For now, the radix is set to 8. If we discover that we have a
595368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  // floating point constant, the radix will change to 10. Octal floating
5961eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump  // point constants are not permitted (only decimal and hexadecimal).
597368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  radix = 8;
598368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  DigitsBegin = s;
599368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  s = SkipOctalDigits(s);
600368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  if (s == ThisTokEnd)
601368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    return; // Done, simple octal number like 01234
6021eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
603413d355e38755a71f106dbc0ac900ca989070916Chris Lattner  // If we have some other non-octal digit that *is* a decimal digit, see if
604413d355e38755a71f106dbc0ac900ca989070916Chris Lattner  // this is part of a floating point number like 094.123 or 09e1.
605413d355e38755a71f106dbc0ac900ca989070916Chris Lattner  if (isdigit(*s)) {
606413d355e38755a71f106dbc0ac900ca989070916Chris Lattner    const char *EndDecimal = SkipDigits(s);
607413d355e38755a71f106dbc0ac900ca989070916Chris Lattner    if (EndDecimal[0] == '.' || EndDecimal[0] == 'e' || EndDecimal[0] == 'E') {
608413d355e38755a71f106dbc0ac900ca989070916Chris Lattner      s = EndDecimal;
609413d355e38755a71f106dbc0ac900ca989070916Chris Lattner      radix = 10;
610413d355e38755a71f106dbc0ac900ca989070916Chris Lattner    }
611413d355e38755a71f106dbc0ac900ca989070916Chris Lattner  }
6121eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
613413d355e38755a71f106dbc0ac900ca989070916Chris Lattner  // If we have a hex digit other than 'e' (which denotes a FP exponent) then
614413d355e38755a71f106dbc0ac900ca989070916Chris Lattner  // the code is using an incorrect base.
615368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  if (isxdigit(*s) && *s != 'e' && *s != 'E') {
616ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner    PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin),
6175f9e272e632e951b1efe824cd16acb4d96077930Chris Lattner            diag::err_invalid_octal_digit) << StringRef(s, 1);
618ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner    hadError = true;
619368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    return;
620368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  }
6211eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
622368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  if (*s == '.') {
623368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    s++;
624368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    radix = 10;
625368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    saw_period = true;
626413d355e38755a71f106dbc0ac900ca989070916Chris Lattner    s = SkipDigits(s); // Skip suffix.
627368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  }
628368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  if (*s == 'e' || *s == 'E') { // exponent
629368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    const char *Exponent = s;
630368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    s++;
631368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    radix = 10;
632368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    saw_exponent = true;
633368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    if (*s == '+' || *s == '-')  s++; // sign
634368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    const char *first_non_digit = SkipDigits(s);
635368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    if (first_non_digit != s) {
636368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      s = first_non_digit;
637368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    } else {
6381eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump      PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-ThisTokBegin),
639ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner              diag::err_exponent_has_no_digits);
640ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner      hadError = true;
641368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      return;
642368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    }
643368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  }
644368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner}
645368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner
646368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner
6475f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// GetIntegerValue - Convert this numeric literal value to an APInt that
6485f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// matches Val's input width.  If there is an overflow, set Val to the low bits
6495f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// of the result and return true.  Otherwise, return false.
6505f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerbool NumericLiteralParser::GetIntegerValue(llvm::APInt &Val) {
651a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar  // Fast path: Compute a conservative bound on the maximum number of
652a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar  // bits per digit in this radix. If we can't possibly overflow a
653a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar  // uint64 based on that bound then do the simple conversion to
654a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar  // integer. This avoids the expensive overflow checking below, and
655a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar  // handles the common cases that matter (small decimal integers and
656a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar  // hex/octal values which don't overflow).
657a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar  unsigned MaxBitsPerDigit = 1;
6581eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump  while ((1U << MaxBitsPerDigit) < radix)
659a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar    MaxBitsPerDigit += 1;
660a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar  if ((SuffixBegin - DigitsBegin) * MaxBitsPerDigit <= 64) {
661a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar    uint64_t N = 0;
662a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar    for (s = DigitsBegin; s != SuffixBegin; ++s)
663a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar      N = N*radix + HexDigitValue(*s);
664a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar
665a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar    // This will truncate the value to Val's input width. Simply check
666a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar    // for overflow by comparing.
667a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar    Val = N;
668a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar    return Val.getZExtValue() != N;
669a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar  }
670a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar
6715f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  Val = 0;
6725f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  s = DigitsBegin;
6735f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
6745f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  llvm::APInt RadixVal(Val.getBitWidth(), radix);
6755f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  llvm::APInt CharVal(Val.getBitWidth(), 0);
6765f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  llvm::APInt OldVal = Val;
6771eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
6785f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  bool OverflowOccurred = false;
6795f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  while (s < SuffixBegin) {
6805f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    unsigned C = HexDigitValue(*s++);
6811eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
6825f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // If this letter is out of bound for this radix, reject it.
6835f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    assert(C < radix && "NumericLiteralParser ctor should have rejected this");
6841eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
6855f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    CharVal = C;
6861eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
6875f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // Add the digit to the value in the appropriate radix.  If adding in digits
6885f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // made the value smaller, then this overflowed.
6895f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    OldVal = Val;
6905f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
6915f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // Multiply by radix, did overflow occur on the multiply?
6925f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    Val *= RadixVal;
6935f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    OverflowOccurred |= Val.udiv(RadixVal) != OldVal;
6945f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
6955f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // Add value, did overflow occur on the value?
696d70cb645702bdbb42aee58403306a7c47e0d901cDaniel Dunbar    //   (a + b) ult b  <=> overflow
6975f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    Val += CharVal;
6985f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    OverflowOccurred |= Val.ult(CharVal);
6995f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  }
7005f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  return OverflowOccurred;
7015f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer}
7025f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
70394c939dc1d4958b62ea5a89294dd8b2905f3191fJohn McCallllvm::APFloat::opStatus
70494c939dc1d4958b62ea5a89294dd8b2905f3191fJohn McCallNumericLiteralParser::GetFloatValue(llvm::APFloat &Result) {
705427d5af5b601985093b6b4b33ba1e30fc24d86dcTed Kremenek  using llvm::APFloat;
7061eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
707e9f195f15ffe96d0a220c872ab12d0630a633c44Erick Tryzelaar  unsigned n = std::min(SuffixBegin - ThisTokBegin, ThisTokEnd - ThisTokBegin);
70894c939dc1d4958b62ea5a89294dd8b2905f3191fJohn McCall  return Result.convertFromString(StringRef(ThisTokBegin, n),
70994c939dc1d4958b62ea5a89294dd8b2905f3191fJohn McCall                                  APFloat::rmNearestTiesToEven);
7105f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer}
7115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
7125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
7132fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       character-literal: [C++0x lex.ccon]
7142fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         ' c-char-sequence '
7152fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         u' c-char-sequence '
7162fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         U' c-char-sequence '
7172fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         L' c-char-sequence '
7182fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       c-char-sequence:
7192fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         c-char
7202fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         c-char-sequence c-char
7212fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       c-char:
7222fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         any member of the source character set except the single-quote ',
7232fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///           backslash \, or new-line character
7242fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         escape-sequence
7252fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         universal-character-name
7262fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       escape-sequence: [C++0x lex.ccon]
7272fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         simple-escape-sequence
7282fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         octal-escape-sequence
7292fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         hexadecimal-escape-sequence
7302fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       simple-escape-sequence:
731ddddd48da72bc29d1c3f388ed91ea5549328129eNAKAMURA Takumi///         one of \' \" \? \\ \a \b \f \n \r \t \v
7322fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       octal-escape-sequence:
7332fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         \ octal-digit
7342fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         \ octal-digit octal-digit
7352fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         \ octal-digit octal-digit octal-digit
7362fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       hexadecimal-escape-sequence:
7372fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         \x hexadecimal-digit
7382fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         hexadecimal-escape-sequence hexadecimal-digit
7392fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       universal-character-name:
7402fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         \u hex-quad
7412fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         \U hex-quad hex-quad
7422fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       hex-quad:
7432fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         hex-digit hex-digit hex-digit hex-digit
7442fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///
7455f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid SpencerCharLiteralParser::CharLiteralParser(const char *begin, const char *end,
7465cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor                                     SourceLocation Loc, Preprocessor &PP,
7475cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor                                     tok::TokenKind kind) {
7485f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // At this point we know that the character matches the regex "L?'.*'".
7495f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  HadError = false;
7501eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
7515cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  Kind = kind;
7525cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor
7535cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  // Determine if this is a wide or UTF character.
7545cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  if (Kind == tok::wide_char_constant || Kind == tok::utf16_char_constant ||
7555cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor      Kind == tok::utf32_char_constant) {
7565cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    ++begin;
7575cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  }
7581eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
7595f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // Skip over the entry quote.
7605f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  assert(begin[0] == '\'' && "Invalid token lexed");
7615f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  ++begin;
7625f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
7631eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump  // FIXME: The "Value" is an uint64_t so we can handle char literals of
764fc8f0e14ad142ed811e90fbd9a30e419e301c717Chris Lattner  // up to 64-bits.
7655f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // FIXME: This extensively assumes that 'char' is 8-bits.
76698be4943e8dc4f3905629a7102668960873cf863Chris Lattner  assert(PP.getTargetInfo().getCharWidth() == 8 &&
7675f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer         "Assumes char is 8 bits");
768e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner  assert(PP.getTargetInfo().getIntWidth() <= 64 &&
769e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner         (PP.getTargetInfo().getIntWidth() & 7) == 0 &&
770e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner         "Assumes sizeof(int) on target is <= 64 and a multiple of char");
771e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner  assert(PP.getTargetInfo().getWCharWidth() <= 64 &&
772e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner         "Assumes sizeof(wchar) on target is <= 64");
7734bc11af9bed1d4a247e3db1fcb754d410ad99099Sanjiv Gupta
7741eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump  // This is what we will use for overflow detection
7754bc11af9bed1d4a247e3db1fcb754d410ad99099Sanjiv Gupta  llvm::APInt LitVal(PP.getTargetInfo().getIntWidth(), 0);
7761eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
777e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner  unsigned NumCharsSoFar = 0;
7781c6c64b5181a960c7d4cace4995a938d4dfa6fbfChris Lattner  bool Warned = false;
7795f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  while (begin[0] != '\'') {
7804bc11af9bed1d4a247e3db1fcb754d410ad99099Sanjiv Gupta    uint64_t ResultChar;
78159705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber
78259705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber      // Is this a Universal Character Name escape?
7835f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    if (begin[0] != '\\')     // If this is a normal character, consume it.
78417c8c84fbc5cbde336fdef8fffe63c08a955ade9Douglas Gregor      ResultChar = (unsigned char)*begin++;
78559705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber    else {                    // Otherwise, this is an escape character.
7860473cd52eac6f1e831777ed899be3ea4509c7b24Craig Topper      unsigned CharWidth = getCharWidth(Kind, PP.getTargetInfo());
78759705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber      // Check for UCN.
78859705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber      if (begin[1] == 'u' || begin[1] == 'U') {
78959705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber        uint32_t utf32 = 0;
79059705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber        unsigned short UcnLen = 0;
791872a45e91778eb0b706ff57272fe547d4512eb19Chris Lattner        if (!ProcessUCNEscape(begin, end, utf32, UcnLen,
792872a45e91778eb0b706ff57272fe547d4512eb19Chris Lattner                              FullSourceLoc(Loc, PP.getSourceManager()),
7936c66f07854c1334a1ce9eae1428d61d54182a6e1Chris Lattner                              &PP.getDiagnostics(), PP.getLangOptions())) {
79459705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber          HadError = 1;
79559705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber        }
79659705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber        ResultChar = utf32;
7970473cd52eac6f1e831777ed899be3ea4509c7b24Craig Topper        if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
7980473cd52eac6f1e831777ed899be3ea4509c7b24Craig Topper          PP.Diag(Loc, diag::warn_ucn_escape_too_large);
7990473cd52eac6f1e831777ed899be3ea4509c7b24Craig Topper          ResultChar &= ~0U >> (32-CharWidth);
8000473cd52eac6f1e831777ed899be3ea4509c7b24Craig Topper        }
80159705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber      } else {
80259705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber        // Otherwise, this is a non-UCN escape character.  Process it.
80391f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner        ResultChar = ProcessCharEscape(begin, end, HadError,
80491f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner                                       FullSourceLoc(Loc,PP.getSourceManager()),
8055cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor                                       CharWidth, &PP.getDiagnostics());
80659705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber      }
80759705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber    }
8085f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
8095f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // If this is a multi-character constant (e.g. 'abc'), handle it.  These are
8105f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // implementation defined (C99 6.4.4.4p10).
811e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner    if (NumCharsSoFar) {
8125cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor      if (!isAscii()) {
8135f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer        // Emulate GCC's (unintentional?) behavior: L'ab' -> L'b'.
8144bc11af9bed1d4a247e3db1fcb754d410ad99099Sanjiv Gupta        LitVal = 0;
8155f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      } else {
8165f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer        // Narrow character literals act as though their value is concatenated
817e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner        // in this implementation, but warn on overflow.
8181c6c64b5181a960c7d4cace4995a938d4dfa6fbfChris Lattner        if (LitVal.countLeadingZeros() < 8 && !Warned) {
8195f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer          PP.Diag(Loc, diag::warn_char_constant_too_large);
8201c6c64b5181a960c7d4cace4995a938d4dfa6fbfChris Lattner          Warned = true;
8211c6c64b5181a960c7d4cace4995a938d4dfa6fbfChris Lattner        }
8224bc11af9bed1d4a247e3db1fcb754d410ad99099Sanjiv Gupta        LitVal <<= 8;
8235f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      }
8245f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    }
8251eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
8264bc11af9bed1d4a247e3db1fcb754d410ad99099Sanjiv Gupta    LitVal = LitVal + ResultChar;
827e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner    ++NumCharsSoFar;
828e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner  }
829e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner
830e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner  // If this is the second character being processed, do special handling.
831e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner  if (NumCharsSoFar > 1) {
832e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner    // Warn about discarding the top bits for multi-char wide-character
833e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner    // constants (L'abcd').
8345cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    if (!isAscii())
8355cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor      PP.Diag(Loc, diag::warn_extraneous_char_constant);
836e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner    else if (NumCharsSoFar != 4)
837e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner      PP.Diag(Loc, diag::ext_multichar_character_literal);
838e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner    else
839e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner      PP.Diag(Loc, diag::ext_four_char_character_literal);
8402a1c363f38e59a5044fc349aa7e538a50954c244Eli Friedman    IsMultiChar = true;
841930b71a4a7dedf70a73e5fd875bae7df452b80a9Daniel Dunbar  } else
842930b71a4a7dedf70a73e5fd875bae7df452b80a9Daniel Dunbar    IsMultiChar = false;
8434bc11af9bed1d4a247e3db1fcb754d410ad99099Sanjiv Gupta
8444bc11af9bed1d4a247e3db1fcb754d410ad99099Sanjiv Gupta  // Transfer the value from APInt to uint64_t
8454bc11af9bed1d4a247e3db1fcb754d410ad99099Sanjiv Gupta  Value = LitVal.getZExtValue();
8461eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
8475f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // If this is a single narrow character, sign extend it (e.g. '\xFF' is "-1")
8485f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // if 'char' is signed for this target (C99 6.4.4.4p10).  Note that multiple
8495f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // character constants are not sign extended in the this implementation:
8505f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // '\xFF\xFF' = 65536 and '\x0\xFF' = 255, which matches GCC.
8515cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  if (isAscii() && NumCharsSoFar == 1 && (Value & 128) &&
85215b91764d08e886391c865c4a444d7b51141c284Eli Friedman      PP.getLangOptions().CharIsSigned)
8535f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    Value = (signed char)Value;
8545f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer}
8555f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
8565f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
8572fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       string-literal: [C++0x lex.string]
8582fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         encoding-prefix " [s-char-sequence] "
8592fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         encoding-prefix R raw-string
8602fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       encoding-prefix:
8612fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         u8
8622fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         u
8632fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         U
8642fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         L
8655f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       s-char-sequence:
8665f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         s-char
8675f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         s-char-sequence s-char
8685f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       s-char:
8692fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         any member of the source character set except the double-quote ",
8702fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///           backslash \, or new-line character
8712fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         escape-sequence
8725f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         universal-character-name
8732fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       raw-string:
8742fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         " d-char-sequence ( r-char-sequence ) d-char-sequence "
8752fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       r-char-sequence:
8762fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         r-char
8772fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         r-char-sequence r-char
8782fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       r-char:
8792fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         any member of the source character set, except a right parenthesis )
8802fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///           followed by the initial d-char-sequence (which may be empty)
8812fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///           followed by a double quote ".
8822fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       d-char-sequence:
8832fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         d-char
8842fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         d-char-sequence d-char
8852fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       d-char:
8862fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         any member of the basic source character set except:
8872fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///           space, the left parenthesis (, the right parenthesis ),
8882fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///           the backslash \, and the control characters representing horizontal
8892fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///           tab, vertical tab, form feed, and newline.
8902fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       escape-sequence: [C++0x lex.ccon]
8912fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         simple-escape-sequence
8922fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         octal-escape-sequence
8932fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         hexadecimal-escape-sequence
8942fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       simple-escape-sequence:
895ddddd48da72bc29d1c3f388ed91ea5549328129eNAKAMURA Takumi///         one of \' \" \? \\ \a \b \f \n \r \t \v
8962fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       octal-escape-sequence:
8972fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         \ octal-digit
8982fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         \ octal-digit octal-digit
8992fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         \ octal-digit octal-digit octal-digit
9002fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       hexadecimal-escape-sequence:
9012fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         \x hexadecimal-digit
9022fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         hexadecimal-escape-sequence hexadecimal-digit
9035f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       universal-character-name:
9045f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         \u hex-quad
9055f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         \U hex-quad hex-quad
9065f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       hex-quad:
9075f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         hex-digit hex-digit hex-digit hex-digit
9085f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///
9095f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid SpencerStringLiteralParser::
910d217773f106856a11879ec79dc468efefaf2ee75Chris LattnerStringLiteralParser(const Token *StringToks, unsigned NumStringToks,
9110833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner                    Preprocessor &PP, bool Complain)
9120833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner  : SM(PP.getSourceManager()), Features(PP.getLangOptions()),
913403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis    Target(PP.getTargetInfo()), Diags(Complain ? &PP.getDiagnostics() : 0),
9145cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),
9155cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) {
9160833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner  init(StringToks, NumStringToks);
9170833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner}
9180833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner
9190833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattnervoid StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
920403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis  // The literal token may have come from an invalid source location (e.g. due
921403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis  // to a PCH error), in which case the token length will be 0.
922403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis  if (NumStringToks == 0 || StringToks[0].getLength() < 2) {
923403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis    hadError = true;
924403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis    return;
925403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis  }
926403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis
9275f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // Scan all of the string portions, remember the max individual token length,
9285f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // computing a bound on the concatenated string length, and see whether any
9295f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // piece is a wide-string.  If any of the string portions is a wide-string
9305f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // literal, the result is a wide-string literal [C99 6.4.5p4].
931403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis  assert(NumStringToks && "expected at least one token");
9326cf750298d3621d8a10a6dd07fcee8e274b9d94dSean Hunt  MaxTokenLength = StringToks[0].getLength();
933403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis  assert(StringToks[0].getLength() >= 2 && "literal token is invalid!");
9346cf750298d3621d8a10a6dd07fcee8e274b9d94dSean Hunt  SizeBound = StringToks[0].getLength()-2;  // -2 for "".
9355cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  Kind = StringToks[0].getKind();
9366cf750298d3621d8a10a6dd07fcee8e274b9d94dSean Hunt
9376cf750298d3621d8a10a6dd07fcee8e274b9d94dSean Hunt  hadError = false;
9385f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
9395f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // Implement Translation Phase #6: concatenation of string literals
9405f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  /// (C99 5.1.1.2p1).  The common case is only one string fragment.
9415f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  for (unsigned i = 1; i != NumStringToks; ++i) {
942403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis    if (StringToks[i].getLength() < 2) {
943403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis      hadError = true;
944403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis      return;
945403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis    }
946403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis
9475f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // The string could be shorter than this if it needs cleaning, but this is a
9485f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // reasonable bound, which is all we need.
949403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis    assert(StringToks[i].getLength() >= 2 && "literal token is invalid!");
9506cf750298d3621d8a10a6dd07fcee8e274b9d94dSean Hunt    SizeBound += StringToks[i].getLength()-2;  // -2 for "".
9511eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
9525f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // Remember maximum string piece length.
9536cf750298d3621d8a10a6dd07fcee8e274b9d94dSean Hunt    if (StringToks[i].getLength() > MaxTokenLength)
9546cf750298d3621d8a10a6dd07fcee8e274b9d94dSean Hunt      MaxTokenLength = StringToks[i].getLength();
9551eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
9565cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    // Remember if we see any wide or utf-8/16/32 strings.
9575cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    // Also check for illegal concatenations.
9585cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    if (StringToks[i].isNot(Kind) && StringToks[i].isNot(tok::string_literal)) {
9595cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor      if (isAscii()) {
9605cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor        Kind = StringToks[i].getKind();
9615cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor      } else {
9625cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor        if (Diags)
9635cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor          Diags->Report(FullSourceLoc(StringToks[i].getLocation(), SM),
9645cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor                        diag::err_unsupported_string_concat);
9655cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor        hadError = true;
9665cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor      }
9675cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    }
9685f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  }
969dbb1ecc32ca122b07b7c98fd0a8f6f53985adaccChris Lattner
9705f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // Include space for the null terminator.
9715f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  ++SizeBound;
9721eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
9735f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // TODO: K&R warning: "traditional C rejects string constant concatenation"
9741eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
9755cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  // Get the width in bytes of char/wchar_t/char16_t/char32_t
9765cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  CharByteWidth = getCharWidth(Kind, Target);
9775cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  assert((CharByteWidth & 7) == 0 && "Assumes character size is byte multiple");
9785cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  CharByteWidth /= 8;
9791eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
9805f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // The output buffer size needs to be large enough to hold wide characters.
9815f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // This is a worst-case assumption which basically corresponds to L"" "long".
9825cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  SizeBound *= CharByteWidth;
9831eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
9845f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // Size the temporary buffer to hold the result string data.
9855f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  ResultBuf.resize(SizeBound);
9861eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
9875f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // Likewise, but for each string piece.
9885f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  llvm::SmallString<512> TokenBuf;
9895f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  TokenBuf.resize(MaxTokenLength);
9901eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
9915f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // Loop over all the strings, getting their spelling, and expanding them to
9925f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // wide strings as appropriate.
9935f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  ResultPtr = &ResultBuf[0];   // Next byte to fill in.
9941eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
995ee98ac5a1330db432b188dd2d38b6631aac47bf1Anders Carlsson  Pascal = false;
9961eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
9975f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  for (unsigned i = 0, e = NumStringToks; i != e; ++i) {
9985f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    const char *ThisTokBuf = &TokenBuf[0];
9995f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // Get the spelling of the token, which eliminates trigraphs, etc.  We know
10005f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // that ThisTokBuf points to a buffer that is big enough for the whole token
10015f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // and 'spelled' tokens can only shrink.
100250f6af7a6d6951a63f3da7d4c5a7d3965bf73b63Douglas Gregor    bool StringInvalid = false;
10030833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner    unsigned ThisTokLen =
1004b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner      Lexer::getSpelling(StringToks[i], ThisTokBuf, SM, Features,
1005b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner                         &StringInvalid);
100650f6af7a6d6951a63f3da7d4c5a7d3965bf73b63Douglas Gregor    if (StringInvalid) {
10075cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor      hadError = true;
100850f6af7a6d6951a63f3da7d4c5a7d3965bf73b63Douglas Gregor      continue;
100950f6af7a6d6951a63f3da7d4c5a7d3965bf73b63Douglas Gregor    }
101050f6af7a6d6951a63f3da7d4c5a7d3965bf73b63Douglas Gregor
10115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1;  // Skip end quote.
10125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // TODO: Input character set mapping support.
10131eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
10141661d717563d6a27dec3da69deba2b2efaa45802Craig Topper    // Skip marker for wide or unicode strings.
10155cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    if (ThisTokBuf[0] == 'L' || ThisTokBuf[0] == 'u' || ThisTokBuf[0] == 'U') {
10165f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      ++ThisTokBuf;
10175cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor      // Skip 8 of u8 marker for utf8 strings.
10185cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor      if (ThisTokBuf[0] == '8')
10195cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor        ++ThisTokBuf;
102056bedefe92ae8f604d14bea75cc3040ab32337c2Fariborz Jahanian    }
10211eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
10222fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper    // Check for raw string
10232fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper    if (ThisTokBuf[0] == 'R') {
10242fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      ThisTokBuf += 2; // skip R"
10251eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
10262fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      const char *Prefix = ThisTokBuf;
10272fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      while (ThisTokBuf[0] != '(')
1028ee98ac5a1330db432b188dd2d38b6631aac47bf1Anders Carlsson        ++ThisTokBuf;
10292fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      ++ThisTokBuf; // skip '('
10302fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper
10312fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      // remove same number of characters from the end
10322fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      if (ThisTokEnd >= ThisTokBuf + (ThisTokBuf - Prefix))
10332fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        ThisTokEnd -= (ThisTokBuf - Prefix);
10342fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper
10352fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      // Copy the string over
10362fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      CopyStringFragment(StringRef(ThisTokBuf, ThisTokEnd - ThisTokBuf));
10372fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper    } else {
10382fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
10392fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      ++ThisTokBuf; // skip "
10402fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper
10412fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      // Check if this is a pascal string
10422fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      if (Features.PascalStrings && ThisTokBuf + 1 != ThisTokEnd &&
10432fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper          ThisTokBuf[0] == '\\' && ThisTokBuf[1] == 'p') {
10441eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
10452fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        // If the \p sequence is found in the first token, we have a pascal string
10462fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        // Otherwise, if we already have a pascal string, ignore the first \p
10472fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        if (i == 0) {
10485f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer          ++ThisTokBuf;
10492fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper          Pascal = true;
10502fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        } else if (Pascal)
10512fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper          ThisTokBuf += 2;
10525f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      }
10531eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
10542fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      while (ThisTokBuf != ThisTokEnd) {
10552fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        // Is this a span of non-escape characters?
10562fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        if (ThisTokBuf[0] != '\\') {
10572fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper          const char *InStart = ThisTokBuf;
10582fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper          do {
10592fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper            ++ThisTokBuf;
10602fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper          } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');
10612fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper
10622fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper          // Copy the character span over.
10632fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper          CopyStringFragment(StringRef(InStart, ThisTokBuf - InStart));
10642fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper          continue;
10652fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        }
10662fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        // Is this a Universal Character Name escape?
10672fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        if (ThisTokBuf[1] == 'u' || ThisTokBuf[1] == 'U') {
10682fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper          EncodeUCNEscape(ThisTokBuf, ThisTokEnd, ResultPtr,
10692fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper                          hadError, FullSourceLoc(StringToks[i].getLocation(),SM),
10702fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper                          CharByteWidth, Diags, Features);
10712fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper          continue;
10722fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        }
10732fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        // Otherwise, this is a non-UCN escape character.  Process it.
10742fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        unsigned ResultChar =
10752fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper          ProcessCharEscape(ThisTokBuf, ThisTokEnd, hadError,
10762fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper                            FullSourceLoc(StringToks[i].getLocation(), SM),
10772fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper                            CharByteWidth*8, Diags);
10782fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper
10792fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        // Note: our internal rep of wide char tokens is always little-endian.
10802fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        *ResultPtr++ = ResultChar & 0xFF;
10811eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
10822fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        for (unsigned i = 1, e = CharByteWidth; i != e; ++i)
10832fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper          *ResultPtr++ = ResultChar >> i*8;
10842fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      }
10855f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    }
10865f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  }
10871eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1088bbee00b6456e90a09f63c83c20233e6c5ad6000aChris Lattner  if (Pascal) {
1089ee98ac5a1330db432b188dd2d38b6631aac47bf1Anders Carlsson    ResultBuf[0] = ResultPtr-&ResultBuf[0]-1;
10905cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    ResultBuf[0] /= CharByteWidth;
1091bbee00b6456e90a09f63c83c20233e6c5ad6000aChris Lattner
1092bbee00b6456e90a09f63c83c20233e6c5ad6000aChris Lattner    // Verify that pascal strings aren't too large.
10930833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner    if (GetStringLength() > 256) {
10940833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner      if (Diags)
10950833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner        Diags->Report(FullSourceLoc(StringToks[0].getLocation(), SM),
10960833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner                      diag::err_pascal_string_too_long)
10970833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner          << SourceRange(StringToks[0].getLocation(),
10980833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner                         StringToks[NumStringToks-1].getLocation());
10995cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor      hadError = true;
110057d7dde770c67b282e7fb77b1b81e429910937b3Eli Friedman      return;
110157d7dde770c67b282e7fb77b1b81e429910937b3Eli Friedman    }
11020833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner  } else if (Diags) {
1103427c492d368d6ecf409fa8053eecb5cd0e779c5bDouglas Gregor    // Complain if this string literal has too many characters.
1104a95880d6513c617bb96634bcc1f16c6bdb80dedcChris Lattner    unsigned MaxChars = Features.CPlusPlus? 65536 : Features.C99 ? 4095 : 509;
1105427c492d368d6ecf409fa8053eecb5cd0e779c5bDouglas Gregor
1106427c492d368d6ecf409fa8053eecb5cd0e779c5bDouglas Gregor    if (GetNumStringChars() > MaxChars)
11070833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner      Diags->Report(FullSourceLoc(StringToks[0].getLocation(), SM),
11080833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner                    diag::ext_string_too_long)
1109427c492d368d6ecf409fa8053eecb5cd0e779c5bDouglas Gregor        << GetNumStringChars() << MaxChars
1110a95880d6513c617bb96634bcc1f16c6bdb80dedcChris Lattner        << (Features.CPlusPlus ? 2 : Features.C99 ? 1 : 0)
1111427c492d368d6ecf409fa8053eecb5cd0e779c5bDouglas Gregor        << SourceRange(StringToks[0].getLocation(),
1112427c492d368d6ecf409fa8053eecb5cd0e779c5bDouglas Gregor                       StringToks[NumStringToks-1].getLocation());
1113bbee00b6456e90a09f63c83c20233e6c5ad6000aChris Lattner  }
11145f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer}
1115719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner
1116719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner
11172fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper/// copyStringFragment - This function copies from Start to End into ResultPtr.
11182fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper/// Performs widening for multi-byte characters.
111903720fce6e709661af020f3e4e6dfd08a96e8044Craig Toppervoid StringLiteralParser::CopyStringFragment(StringRef Fragment) {
11202fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper  // Copy the character span over.
11212fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper  if (CharByteWidth == 1) {
11222fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper    memcpy(ResultPtr, Fragment.data(), Fragment.size());
11232fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper    ResultPtr += Fragment.size();
11242fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper  } else {
11252fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper    // Note: our internal rep of wide char tokens is always little-endian.
11262fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper    for (StringRef::iterator I=Fragment.begin(), E=Fragment.end(); I!=E; ++I) {
11272fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      *ResultPtr++ = *I;
11282fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      // Add zeros at the end.
11292fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      for (unsigned i = 1, e = CharByteWidth; i != e; ++i)
11302fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        *ResultPtr++ = 0;
11312fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper    }
11322fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper  }
11332fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper}
11342fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper
11352fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper
1136719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner/// getOffsetOfStringByte - This function returns the offset of the
1137719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner/// specified byte of the string data represented by Token.  This handles
1138719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner/// advancing over escape sequences in the string.
1139719e61573f27c11057ecfe0dd8f141621602c571Chris Lattnerunsigned StringLiteralParser::getOffsetOfStringByte(const Token &Tok,
11406c66f07854c1334a1ce9eae1428d61d54182a6e1Chris Lattner                                                    unsigned ByteNo) const {
1141719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner  // Get the spelling of the token.
1142ca1475ea0e76da6b852796610139ed9b49c8d4a6Chris Lattner  llvm::SmallString<32> SpellingBuffer;
11436cf750298d3621d8a10a6dd07fcee8e274b9d94dSean Hunt  SpellingBuffer.resize(Tok.getLength());
11441eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
114550f6af7a6d6951a63f3da7d4c5a7d3965bf73b63Douglas Gregor  bool StringInvalid = false;
1146719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner  const char *SpellingPtr = &SpellingBuffer[0];
1147b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner  unsigned TokLen = Lexer::getSpelling(Tok, SpellingPtr, SM, Features,
1148b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner                                       &StringInvalid);
114991f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner  if (StringInvalid)
115050f6af7a6d6951a63f3da7d4c5a7d3965bf73b63Douglas Gregor    return 0;
1151719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner
11525cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  assert(SpellingPtr[0] != 'L' && SpellingPtr[0] != 'u' &&
11535cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor         SpellingPtr[0] != 'U' && "Doesn't handle wide or utf strings yet");
1154719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner
11551eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1156719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner  const char *SpellingStart = SpellingPtr;
1157719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner  const char *SpellingEnd = SpellingPtr+TokLen;
1158719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner
1159719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner  // Skip over the leading quote.
1160719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner  assert(SpellingPtr[0] == '"' && "Should be a string literal!");
1161719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner  ++SpellingPtr;
11621eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1163719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner  // Skip over bytes until we find the offset we're looking for.
1164719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner  while (ByteNo) {
1165719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner    assert(SpellingPtr < SpellingEnd && "Didn't find byte offset!");
11661eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1167719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner    // Step over non-escapes simply.
1168719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner    if (*SpellingPtr != '\\') {
1169719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner      ++SpellingPtr;
1170719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner      --ByteNo;
1171719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner      continue;
1172719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner    }
11731eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1174719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner    // Otherwise, this is an escape character.  Advance over it.
1175719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner    bool HadError = false;
1176719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner    ProcessCharEscape(SpellingPtr, SpellingEnd, HadError,
1177ca1475ea0e76da6b852796610139ed9b49c8d4a6Chris Lattner                      FullSourceLoc(Tok.getLocation(), SM),
11785cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor                      CharByteWidth*8, Diags);
1179719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner    assert(!HadError && "This method isn't valid on erroneous strings");
1180719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner    --ByteNo;
1181719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner  }
11821eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1183719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner  return SpellingPtr-SpellingStart;
1184719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner}
1185