LiteralSupport.cpp revision 62ec1f2fd7368542bb926c04797fb07023547694
15f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//===--- LiteralSupport.cpp - Code to parse and process literals ----------===//
25f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//
35f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//                     The LLVM Compiler Infrastructure
45f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//
50bc735ffcfb223c0186419547abaa5c84482663eChris Lattner// This file is distributed under the University of Illinois Open Source
60bc735ffcfb223c0186419547abaa5c84482663eChris Lattner// License. See LICENSE.TXT for details.
75f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//
85f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//===----------------------------------------------------------------------===//
95f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//
105f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// This file implements the NumericLiteralParser, CharLiteralParser, and
115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// StringLiteralParser interfaces.
125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//
135f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//===----------------------------------------------------------------------===//
145f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
155f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer#include "clang/Lex/LiteralSupport.h"
165f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer#include "clang/Lex/Preprocessor.h"
17500d3297d2a21edeac4d46cbcbe21bc2352c2a28Chris Lattner#include "clang/Lex/LexDiagnostic.h"
18136f93a2b660fc780fc2dba82a6f42ca4dc9164aChris Lattner#include "clang/Basic/TargetInfo.h"
195f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer#include "llvm/ADT/StringExtras.h"
205f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerusing namespace clang;
215f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
225f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// HexDigitValue - Return the value of the specified hex digit, or -1 if it's
235f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// not valid.
245f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerstatic int HexDigitValue(char C) {
255f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  if (C >= '0' && C <= '9') return C-'0';
265f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  if (C >= 'a' && C <= 'f') return C-'a'+10;
275f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  if (C >= 'A' && C <= 'F') return C-'A'+10;
285f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  return -1;
295f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer}
305f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
315cee1195584fa8672253139c86e922daeda69b9eDouglas Gregorstatic unsigned getCharWidth(tok::TokenKind kind, const TargetInfo &Target) {
325cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  switch (kind) {
335cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  default: assert(0 && "Unknown token type!");
345cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  case tok::char_constant:
355cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  case tok::string_literal:
365cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  case tok::utf8_string_literal:
375cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    return Target.getCharWidth();
385cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  case tok::wide_char_constant:
395cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  case tok::wide_string_literal:
405cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    return Target.getWCharWidth();
415cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  case tok::utf16_char_constant:
425cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  case tok::utf16_string_literal:
435cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    return Target.getChar16Width();
445cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  case tok::utf32_char_constant:
455cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  case tok::utf32_string_literal:
465cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    return Target.getChar32Width();
475cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  }
485cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor}
495cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor
505f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// ProcessCharEscape - Parse a standard C escape sequence, which can occur in
515f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// either a character or a string literal.
525f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerstatic unsigned ProcessCharEscape(const char *&ThisTokBuf,
535f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer                                  const char *ThisTokEnd, bool &HadError,
545cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor                                  FullSourceLoc Loc, unsigned CharWidth,
555cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor                                  Diagnostic *Diags) {
565f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // Skip the '\' char.
575f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  ++ThisTokBuf;
585f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
595f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // We know that this character can't be off the end of the buffer, because
605f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // that would have been \", which would not have been the end of string.
615f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  unsigned ResultChar = *ThisTokBuf++;
625f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  switch (ResultChar) {
635f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // These map to themselves.
645f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case '\\': case '\'': case '"': case '?': break;
651eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
665f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // These have fixed mappings.
675f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case 'a':
685f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // TODO: K&R: the meaning of '\\a' is different in traditional C
695f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    ResultChar = 7;
705f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    break;
715f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case 'b':
725f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    ResultChar = 8;
735f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    break;
745f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case 'e':
7591f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner    if (Diags)
7691f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner      Diags->Report(Loc, diag::ext_nonstandard_escape) << "e";
775f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    ResultChar = 27;
785f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    break;
793c54801fbc67d8df2fed0711a2e2022db6b1bbcfEli Friedman  case 'E':
8091f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner    if (Diags)
8191f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner      Diags->Report(Loc, diag::ext_nonstandard_escape) << "E";
823c54801fbc67d8df2fed0711a2e2022db6b1bbcfEli Friedman    ResultChar = 27;
833c54801fbc67d8df2fed0711a2e2022db6b1bbcfEli Friedman    break;
845f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case 'f':
855f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    ResultChar = 12;
865f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    break;
875f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case 'n':
885f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    ResultChar = 10;
895f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    break;
905f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case 'r':
915f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    ResultChar = 13;
925f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    break;
935f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case 't':
945f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    ResultChar = 9;
955f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    break;
965f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case 'v':
975f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    ResultChar = 11;
985f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    break;
995f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case 'x': { // Hex escape.
1005f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    ResultChar = 0;
1015f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    if (ThisTokBuf == ThisTokEnd || !isxdigit(*ThisTokBuf)) {
10291f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner      if (Diags)
10391f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner        Diags->Report(Loc, diag::err_hex_escape_no_digits);
1045f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      HadError = 1;
1055f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      break;
1065f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    }
1071eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1085f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // Hex escapes are a maximal series of hex digits.
1095f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    bool Overflow = false;
1105f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    for (; ThisTokBuf != ThisTokEnd; ++ThisTokBuf) {
1115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      int CharVal = HexDigitValue(ThisTokBuf[0]);
1125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      if (CharVal == -1) break;
113c29bbde0a14a664d6843b21d3791478d1f4d2833Chris Lattner      // About to shift out a digit?
114c29bbde0a14a664d6843b21d3791478d1f4d2833Chris Lattner      Overflow |= (ResultChar & 0xF0000000) ? true : false;
1155f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      ResultChar <<= 4;
1165f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      ResultChar |= CharVal;
1175f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    }
1185f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
1195f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // See if any bits will be truncated when evaluated as a character.
1205f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
1215f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      Overflow = true;
1225f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      ResultChar &= ~0U >> (32-CharWidth);
1235f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    }
1241eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1255f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // Check for overflow.
12691f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner    if (Overflow && Diags)   // Too many digits to fit in
12791f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner      Diags->Report(Loc, diag::warn_hex_escape_too_large);
1285f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    break;
1295f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  }
1305f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case '0': case '1': case '2': case '3':
1315f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case '4': case '5': case '6': case '7': {
1325f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // Octal escapes.
1335f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    --ThisTokBuf;
1345f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    ResultChar = 0;
1355f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
1365f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // Octal escapes are a series of octal digits with maximum length 3.
1375f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // "\0123" is a two digit sequence equal to "\012" "3".
1385f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    unsigned NumDigits = 0;
1395f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    do {
1405f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      ResultChar <<= 3;
1415f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      ResultChar |= *ThisTokBuf++ - '0';
1425f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      ++NumDigits;
1435f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    } while (ThisTokBuf != ThisTokEnd && NumDigits < 3 &&
1445f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer             ThisTokBuf[0] >= '0' && ThisTokBuf[0] <= '7');
1451eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1465f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // Check for overflow.  Reject '\777', but not L'\777'.
1475f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
14891f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner      if (Diags)
14991f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner        Diags->Report(Loc, diag::warn_octal_escape_too_large);
1505f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      ResultChar &= ~0U >> (32-CharWidth);
1515f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    }
1525f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    break;
1535f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  }
1541eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1555f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // Otherwise, these are not valid escapes.
1565f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case '(': case '{': case '[': case '%':
1575f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // GCC accepts these as extensions.  We warn about them as such though.
15891f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner    if (Diags)
15991f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner      Diags->Report(Loc, diag::ext_nonstandard_escape)
160b90f4b3fb94056609da9cca5eef7358d95a363b2Douglas Gregor        << std::string()+(char)ResultChar;
161f01fdff97b245caac98100d232c760b4d0531411Eli Friedman    break;
1625f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  default:
16391f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner    if (Diags == 0)
164b90f4b3fb94056609da9cca5eef7358d95a363b2Douglas Gregor      break;
165b90f4b3fb94056609da9cca5eef7358d95a363b2Douglas Gregor
16623ef69d197ba3b5e9602f7161fee50990059502aTed Kremenek    if (isgraph(ResultChar))
16791f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner      Diags->Report(Loc, diag::ext_unknown_escape)
16891f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner        << std::string()+(char)ResultChar;
169ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner    else
17091f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner      Diags->Report(Loc, diag::ext_unknown_escape)
17191f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner        << "x"+llvm::utohexstr(ResultChar);
1725f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    break;
1735f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  }
1741eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1755f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  return ResultChar;
1765f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer}
1775f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
1780e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff/// ProcessUCNEscape - Read the Universal Character Name, check constraints and
17959705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber/// return the UTF32.
18059705aee3fe01aa6fb6962dd11350161b47983d9Nico Weberstatic bool ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,
18159705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber                             uint32_t &UcnVal, unsigned short &UcnLen,
182872a45e91778eb0b706ff57272fe547d4512eb19Chris Lattner                             FullSourceLoc Loc, Diagnostic *Diags,
1836c66f07854c1334a1ce9eae1428d61d54182a6e1Chris Lattner                             const LangOptions &Features) {
1846c66f07854c1334a1ce9eae1428d61d54182a6e1Chris Lattner  if (!Features.CPlusPlus && !Features.C99 && Diags)
185872a45e91778eb0b706ff57272fe547d4512eb19Chris Lattner    Diags->Report(Loc, diag::warn_ucn_not_valid_in_c89);
1861eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1874e93b34fdb798abfa0534062a139f2c37cbf876eSteve Naroff  // Save the beginning of the string (for error diagnostics).
1884e93b34fdb798abfa0534062a139f2c37cbf876eSteve Naroff  const char *ThisTokBegin = ThisTokBuf;
1891eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1900e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  // Skip the '\u' char's.
1910e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  ThisTokBuf += 2;
1920e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff
1930e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  if (ThisTokBuf == ThisTokEnd || !isxdigit(*ThisTokBuf)) {
1946c66f07854c1334a1ce9eae1428d61d54182a6e1Chris Lattner    if (Diags)
195872a45e91778eb0b706ff57272fe547d4512eb19Chris Lattner      Diags->Report(Loc, diag::err_ucn_escape_no_digits);
19659705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber    return false;
1970e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  }
19859705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber  UcnLen = (ThisTokBuf[-1] == 'u' ? 4 : 8);
19956bedefe92ae8f604d14bea75cc3040ab32337c2Fariborz Jahanian  unsigned short UcnLenSave = UcnLen;
20059705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber  for (; ThisTokBuf != ThisTokEnd && UcnLenSave; ++ThisTokBuf, UcnLenSave--) {
2010e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    int CharVal = HexDigitValue(ThisTokBuf[0]);
2020e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    if (CharVal == -1) break;
2030e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    UcnVal <<= 4;
2040e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    UcnVal |= CharVal;
2050e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  }
2060e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  // If we didn't consume the proper number of digits, there is a problem.
20759705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber  if (UcnLenSave) {
208872a45e91778eb0b706ff57272fe547d4512eb19Chris Lattner    if (Diags) {
2097ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner      SourceLocation L =
2107ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner        Lexer::AdvanceToTokenCharacter(Loc, ThisTokBuf-ThisTokBegin,
2117ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner                                       Loc.getManager(), Features);
2127ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner      Diags->Report(FullSourceLoc(L, Loc.getManager()),
2137ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner                    diag::err_ucn_escape_incomplete);
214872a45e91778eb0b706ff57272fe547d4512eb19Chris Lattner    }
21559705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber    return false;
2160e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  }
2171eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump  // Check UCN constraints (C99 6.4.3p2).
2180e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  if ((UcnVal < 0xa0 &&
2190e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff      (UcnVal != 0x24 && UcnVal != 0x40 && UcnVal != 0x60 )) // $, @, `
2201eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump      || (UcnVal >= 0xD800 && UcnVal <= 0xDFFF)
2218a5c0cd90b8d607ca284274000ed8716b836d253Steve Naroff      || (UcnVal > 0x10FFFF)) /* the maximum legal UTF32 value */ {
2226c66f07854c1334a1ce9eae1428d61d54182a6e1Chris Lattner    if (Diags)
223872a45e91778eb0b706ff57272fe547d4512eb19Chris Lattner      Diags->Report(Loc, diag::err_ucn_escape_invalid);
22459705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber    return false;
22559705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber  }
22659705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber  return true;
22759705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber}
22859705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber
22959705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber/// EncodeUCNEscape - Read the Universal Character Name, check constraints and
23059705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber/// convert the UTF32 to UTF8 or UTF16. This is a subroutine of
23159705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber/// StringLiteralParser. When we decide to implement UCN's for identifiers,
23259705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber/// we will likely rework our support for UCN's.
23359705aee3fe01aa6fb6962dd11350161b47983d9Nico Weberstatic void EncodeUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,
234a95880d6513c617bb96634bcc1f16c6bdb80dedcChris Lattner                            char *&ResultBuf, bool &HadError,
2355cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor                            FullSourceLoc Loc, unsigned CharByteWidth,
2365cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor                            Diagnostic *Diags, const LangOptions &Features) {
23759705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber  typedef uint32_t UTF32;
23859705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber  UTF32 UcnVal = 0;
23959705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber  unsigned short UcnLen = 0;
240a95880d6513c617bb96634bcc1f16c6bdb80dedcChris Lattner  if (!ProcessUCNEscape(ThisTokBuf, ThisTokEnd, UcnVal, UcnLen, Loc, Diags,
241a95880d6513c617bb96634bcc1f16c6bdb80dedcChris Lattner                        Features)) {
2420e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    HadError = 1;
2430e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    return;
2440e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  }
24559705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber
2465cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  assert((CharByteWidth == 1 || CharByteWidth == 2 || CharByteWidth) &&
2475cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor         "only character widths of 1, 2, or 4 bytes supported");
248a0f15b0848405ae16d63bd5d78c862a6526b338aNico Weber
2495cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  (void)UcnLen;
2505cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  assert((UcnLen== 4 || UcnLen== 8) && "only ucn length of 4 or 8 supported");
2515cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor
2525cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  if (CharByteWidth == 4) {
2535cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    // Note: our internal rep of wide char tokens is always little-endian.
2545cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    *ResultBuf++ = (UcnVal & 0x000000FF);
2555cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    *ResultBuf++ = (UcnVal & 0x0000FF00) >> 8;
2565cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    *ResultBuf++ = (UcnVal & 0x00FF0000) >> 16;
2575cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    *ResultBuf++ = (UcnVal & 0xFF000000) >> 24;
2585cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    return;
2595cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  }
260a0f15b0848405ae16d63bd5d78c862a6526b338aNico Weber
2615cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  if (CharByteWidth == 2) {
262a0f15b0848405ae16d63bd5d78c862a6526b338aNico Weber    // Convert to UTF16.
263a0f15b0848405ae16d63bd5d78c862a6526b338aNico Weber    if (UcnVal < (UTF32)0xFFFF) {
264a0f15b0848405ae16d63bd5d78c862a6526b338aNico Weber      *ResultBuf++ = (UcnVal & 0x000000FF);
265a0f15b0848405ae16d63bd5d78c862a6526b338aNico Weber      *ResultBuf++ = (UcnVal & 0x0000FF00) >> 8;
266a0f15b0848405ae16d63bd5d78c862a6526b338aNico Weber      return;
267a0f15b0848405ae16d63bd5d78c862a6526b338aNico Weber    }
268a95880d6513c617bb96634bcc1f16c6bdb80dedcChris Lattner    if (Diags) Diags->Report(Loc, diag::warn_ucn_escape_too_large);
269a0f15b0848405ae16d63bd5d78c862a6526b338aNico Weber
270a0f15b0848405ae16d63bd5d78c862a6526b338aNico Weber    typedef uint16_t UTF16;
271a0f15b0848405ae16d63bd5d78c862a6526b338aNico Weber    UcnVal -= 0x10000;
272a0f15b0848405ae16d63bd5d78c862a6526b338aNico Weber    UTF16 surrogate1 = 0xD800 + (UcnVal >> 10);
273a0f15b0848405ae16d63bd5d78c862a6526b338aNico Weber    UTF16 surrogate2 = 0xDC00 + (UcnVal & 0x3FF);
274a0f15b0848405ae16d63bd5d78c862a6526b338aNico Weber    *ResultBuf++ = (surrogate1 & 0x000000FF);
275a0f15b0848405ae16d63bd5d78c862a6526b338aNico Weber    *ResultBuf++ = (surrogate1 & 0x0000FF00) >> 8;
276a0f15b0848405ae16d63bd5d78c862a6526b338aNico Weber    *ResultBuf++ = (surrogate2 & 0x000000FF);
277a0f15b0848405ae16d63bd5d78c862a6526b338aNico Weber    *ResultBuf++ = (surrogate2 & 0x0000FF00) >> 8;
27856bedefe92ae8f604d14bea75cc3040ab32337c2Fariborz Jahanian    return;
27956bedefe92ae8f604d14bea75cc3040ab32337c2Fariborz Jahanian  }
2805cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor
2815cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  assert(CharByteWidth == 1 && "UTF-8 encoding is only for 1 byte characters");
2825cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor
2830e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  // Now that we've parsed/checked the UCN, we convert from UTF32->UTF8.
2840e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  // The conversion below was inspired by:
2850e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  //   http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c
2861eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump  // First, we determine how many bytes the result will require.
2874e93b34fdb798abfa0534062a139f2c37cbf876eSteve Naroff  typedef uint8_t UTF8;
2880e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff
2890e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  unsigned short bytesToWrite = 0;
2900e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  if (UcnVal < (UTF32)0x80)
2910e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    bytesToWrite = 1;
2920e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  else if (UcnVal < (UTF32)0x800)
2930e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    bytesToWrite = 2;
2940e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  else if (UcnVal < (UTF32)0x10000)
2950e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    bytesToWrite = 3;
2960e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  else
2970e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    bytesToWrite = 4;
2981eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
2990e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  const unsigned byteMask = 0xBF;
3000e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  const unsigned byteMark = 0x80;
3011eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
3020e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  // Once the bits are split out into bytes of UTF8, this is a mask OR-ed
3038a5c0cd90b8d607ca284274000ed8716b836d253Steve Naroff  // into the first byte, depending on how many bytes follow.
3041eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump  static const UTF8 firstByteMark[5] = {
3058a5c0cd90b8d607ca284274000ed8716b836d253Steve Naroff    0x00, 0x00, 0xC0, 0xE0, 0xF0
3060e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  };
3070e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  // Finally, we write the bytes into ResultBuf.
3080e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  ResultBuf += bytesToWrite;
3090e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  switch (bytesToWrite) { // note: everything falls through.
3100e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    case 4: *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6;
3110e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    case 3: *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6;
3120e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    case 2: *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6;
3130e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    case 1: *--ResultBuf = (UTF8) (UcnVal | firstByteMark[bytesToWrite]);
3140e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  }
3150e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  // Update the buffer.
3160e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  ResultBuf += bytesToWrite;
3170e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff}
3185f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
3195f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
3205f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       integer-constant: [C99 6.4.4.1]
3215f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         decimal-constant integer-suffix
3225f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         octal-constant integer-suffix
3235f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         hexadecimal-constant integer-suffix
3241eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump///       decimal-constant:
3255f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         nonzero-digit
3265f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         decimal-constant digit
3271eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump///       octal-constant:
3285f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         0
3295f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         octal-constant octal-digit
3301eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump///       hexadecimal-constant:
3315f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         hexadecimal-prefix hexadecimal-digit
3325f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         hexadecimal-constant hexadecimal-digit
3335f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       hexadecimal-prefix: one of
3345f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         0x 0X
3355f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       integer-suffix:
3365f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         unsigned-suffix [long-suffix]
3375f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         unsigned-suffix [long-long-suffix]
3385f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         long-suffix [unsigned-suffix]
3395f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         long-long-suffix [unsigned-sufix]
3405f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       nonzero-digit:
3415f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         1 2 3 4 5 6 7 8 9
3425f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       octal-digit:
3435f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         0 1 2 3 4 5 6 7
3445f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       hexadecimal-digit:
3455f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         0 1 2 3 4 5 6 7 8 9
3465f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         a b c d e f
3475f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         A B C D E F
3485f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       unsigned-suffix: one of
3495f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         u U
3505f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       long-suffix: one of
3515f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         l L
3521eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump///       long-long-suffix: one of
3535f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         ll LL
3545f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///
3555f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       floating-constant: [C99 6.4.4.2]
3565f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         TODO: add rules...
3575f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///
3585f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid SpencerNumericLiteralParser::
3595f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid SpencerNumericLiteralParser(const char *begin, const char *end,
3605f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer                     SourceLocation TokLoc, Preprocessor &pp)
3615f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  : PP(pp), ThisTokBegin(begin), ThisTokEnd(end) {
3621eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
363c29bbde0a14a664d6843b21d3791478d1f4d2833Chris Lattner  // This routine assumes that the range begin/end matches the regex for integer
364c29bbde0a14a664d6843b21d3791478d1f4d2833Chris Lattner  // and FP constants (specifically, the 'pp-number' regex), and assumes that
365c29bbde0a14a664d6843b21d3791478d1f4d2833Chris Lattner  // the byte at "*end" is both valid and not part of the regex.  Because of
366c29bbde0a14a664d6843b21d3791478d1f4d2833Chris Lattner  // this, it doesn't have to check for 'overscan' in various places.
367c29bbde0a14a664d6843b21d3791478d1f4d2833Chris Lattner  assert(!isalnum(*end) && *end != '.' && *end != '_' &&
368c29bbde0a14a664d6843b21d3791478d1f4d2833Chris Lattner         "Lexer didn't maximally munch?");
3691eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
3705f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  s = DigitsBegin = begin;
3715f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  saw_exponent = false;
3725f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  saw_period = false;
3735f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  isLong = false;
3745f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  isUnsigned = false;
3755f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  isLongLong = false;
3766e400c286b485e28d04a742ea87860ddfefa672eChris Lattner  isFloat = false;
377506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner  isImaginary = false;
378b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump  isMicrosoftInteger = false;
3795f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  hadError = false;
3801eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
3815f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  if (*s == '0') { // parse radix
382368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    ParseNumberStartingWithZero(TokLoc);
383368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    if (hadError)
384368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      return;
3855f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  } else { // the first digit is non-zero
3865f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    radix = 10;
3875f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    s = SkipDigits(s);
3885f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    if (s == ThisTokEnd) {
3895f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      // Done.
390016765e3453db2e302efe53905e99cdb25501234Christopher Lamb    } else if (isxdigit(*s) && !(*s == 'e' || *s == 'E')) {
391ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner      PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin),
3925f9e272e632e951b1efe824cd16acb4d96077930Chris Lattner              diag::err_invalid_decimal_digit) << StringRef(s, 1);
393ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner      hadError = true;
3945f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      return;
3955f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    } else if (*s == '.') {
3965f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      s++;
3975f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      saw_period = true;
3985f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      s = SkipDigits(s);
3991eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump    }
4004411f46050216a139ab2fc7ff145ec384d11ec7fChris Lattner    if ((*s == 'e' || *s == 'E')) { // exponent
40170f66ab053f36ab3df7a778d09bcb2b4b0fec1f8Chris Lattner      const char *Exponent = s;
4025f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      s++;
4035f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      saw_exponent = true;
4045f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      if (*s == '+' || *s == '-')  s++; // sign
4055f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      const char *first_non_digit = SkipDigits(s);
4060b7f69d789ca1f76582ee9a336e25861fd0c1416Chris Lattner      if (first_non_digit != s) {
4075f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer        s = first_non_digit;
4080b7f69d789ca1f76582ee9a336e25861fd0c1416Chris Lattner      } else {
409ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner        PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-begin),
410ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner                diag::err_exponent_has_no_digits);
411ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner        hadError = true;
4120b7f69d789ca1f76582ee9a336e25861fd0c1416Chris Lattner        return;
4135f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      }
4145f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    }
4155f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  }
4165f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
4175f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  SuffixBegin = s;
4181eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
419506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner  // Parse the suffix.  At this point we can classify whether we have an FP or
420506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner  // integer constant.
421506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner  bool isFPConstant = isFloatingLiteral();
4221eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
423506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner  // Loop over all of the characters of the suffix.  If we see something bad,
424506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner  // we break out of the loop.
425506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner  for (; s != ThisTokEnd; ++s) {
426506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    switch (*s) {
427506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    case 'f':      // FP Suffix for "float"
428506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    case 'F':
429506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      if (!isFPConstant) break;  // Error for integer constant.
4306e400c286b485e28d04a742ea87860ddfefa672eChris Lattner      if (isFloat || isLong) break; // FF, LF invalid.
4316e400c286b485e28d04a742ea87860ddfefa672eChris Lattner      isFloat = true;
432506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      continue;  // Success.
433506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    case 'u':
434506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    case 'U':
435506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      if (isFPConstant) break;  // Error for floating constant.
436506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      if (isUnsigned) break;    // Cannot be repeated.
437506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      isUnsigned = true;
438506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      continue;  // Success.
439506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    case 'l':
440506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    case 'L':
441506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      if (isLong || isLongLong) break;  // Cannot be repeated.
4426e400c286b485e28d04a742ea87860ddfefa672eChris Lattner      if (isFloat) break;               // LF invalid.
4431eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
444506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      // Check for long long.  The L's need to be adjacent and the same case.
445506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      if (s+1 != ThisTokEnd && s[1] == s[0]) {
446506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner        if (isFPConstant) break;        // long long invalid for floats.
447506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner        isLongLong = true;
448506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner        ++s;  // Eat both of them.
449506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      } else {
4505f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer        isLong = true;
4515f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      }
452506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      continue;  // Success.
453506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    case 'i':
454c637415a96c16abc7e28ef83c6c105716f7e8936Chris Lattner    case 'I':
45562ec1f2fd7368542bb926c04797fb07023547694Francois Pichet      if (PP.getLangOptions().MicrosoftExt) {
456a8be02b655b76e4dbe776b0c62bc3c450dc6feabFariborz Jahanian        if (isFPConstant || isLong || isLongLong) break;
4576e8c7acb61b2c7f421d6e1aba8a7a84e96ab6981Nuno Lopes
4580c29b22f4384500cc0d04f3072cc5d5d58d10d6cSteve Naroff        // Allow i8, i16, i32, i64, and i128.
459b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump        if (s + 1 != ThisTokEnd) {
460b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump          switch (s[1]) {
461b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump            case '8':
462b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump              s += 2; // i8 suffix
463b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump              isMicrosoftInteger = true;
4646e8c7acb61b2c7f421d6e1aba8a7a84e96ab6981Nuno Lopes              break;
465b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump            case '1':
4666e8c7acb61b2c7f421d6e1aba8a7a84e96ab6981Nuno Lopes              if (s + 2 == ThisTokEnd) break;
467d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet              if (s[2] == '6') {
468d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet                s += 3; // i16 suffix
469d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet                isMicrosoftInteger = true;
470d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet              }
4716e8c7acb61b2c7f421d6e1aba8a7a84e96ab6981Nuno Lopes              else if (s[2] == '2') {
4726e8c7acb61b2c7f421d6e1aba8a7a84e96ab6981Nuno Lopes                if (s + 3 == ThisTokEnd) break;
473d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet                if (s[3] == '8') {
474d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet                  s += 4; // i128 suffix
475d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet                  isMicrosoftInteger = true;
476d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet                }
477b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump              }
4786e8c7acb61b2c7f421d6e1aba8a7a84e96ab6981Nuno Lopes              break;
479b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump            case '3':
4806e8c7acb61b2c7f421d6e1aba8a7a84e96ab6981Nuno Lopes              if (s + 2 == ThisTokEnd) break;
481d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet              if (s[2] == '2') {
482d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet                s += 3; // i32 suffix
483d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet                isLong = true;
484d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet                isMicrosoftInteger = true;
485d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet              }
4866e8c7acb61b2c7f421d6e1aba8a7a84e96ab6981Nuno Lopes              break;
487b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump            case '6':
4886e8c7acb61b2c7f421d6e1aba8a7a84e96ab6981Nuno Lopes              if (s + 2 == ThisTokEnd) break;
489d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet              if (s[2] == '4') {
490d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet                s += 3; // i64 suffix
491d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet                isLongLong = true;
492d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet                isMicrosoftInteger = true;
493d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet              }
4946e8c7acb61b2c7f421d6e1aba8a7a84e96ab6981Nuno Lopes              break;
495b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump            default:
496b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump              break;
497b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump          }
498b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump          break;
4990c29b22f4384500cc0d04f3072cc5d5d58d10d6cSteve Naroff        }
5000c29b22f4384500cc0d04f3072cc5d5d58d10d6cSteve Naroff      }
5010c29b22f4384500cc0d04f3072cc5d5d58d10d6cSteve Naroff      // fall through.
502506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    case 'j':
503506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    case 'J':
504506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      if (isImaginary) break;   // Cannot be repeated.
505506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin),
506506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner              diag::ext_imaginary_constant);
507506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      isImaginary = true;
508506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      continue;  // Success.
5095f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    }
510506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    // If we reached here, there was an error.
511506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    break;
512506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner  }
5131eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
514506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner  // Report an error if there are any.
515506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner  if (s != ThisTokEnd) {
516ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner    PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin),
517ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner            isFPConstant ? diag::err_invalid_suffix_float_constant :
518ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner                           diag::err_invalid_suffix_integer_constant)
5195f9e272e632e951b1efe824cd16acb4d96077930Chris Lattner      << StringRef(SuffixBegin, ThisTokEnd-SuffixBegin);
520ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner    hadError = true;
521506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    return;
5225f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  }
5235f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer}
5245f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
525368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner/// ParseNumberStartingWithZero - This method is called when the first character
526368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner/// of the number is found to be a zero.  This means it is either an octal
527368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner/// number (like '04') or a hex number ('0x123a') a binary number ('0b1010') or
5281eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump/// a floating point number (01239.123e4).  Eat the prefix, determining the
529368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner/// radix etc.
530368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattnervoid NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
531368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  assert(s[0] == '0' && "Invalid method call");
532368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  s++;
5331eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
534368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  // Handle a hex number like 0x1234.
535368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  if ((*s == 'x' || *s == 'X') && (isxdigit(s[1]) || s[1] == '.')) {
536368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    s++;
537368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    radix = 16;
538368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    DigitsBegin = s;
539368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    s = SkipHexDigits(s);
540368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    if (s == ThisTokEnd) {
541368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      // Done.
542368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    } else if (*s == '.') {
543368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      s++;
544368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      saw_period = true;
545368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      s = SkipHexDigits(s);
546368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    }
547368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    // A binary exponent can appear with or with a '.'. If dotted, the
5481eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump    // binary exponent is required.
5491155c42e7b1b4e401bb0a331a6d715d637958c75Douglas Gregor    if (*s == 'p' || *s == 'P') {
550368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      const char *Exponent = s;
551368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      s++;
552368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      saw_exponent = true;
553368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      if (*s == '+' || *s == '-')  s++; // sign
554368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      const char *first_non_digit = SkipDigits(s);
5556ea623823f8532670480425b573f35115404b4a0Chris Lattner      if (first_non_digit == s) {
556ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner        PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-ThisTokBegin),
557ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner                diag::err_exponent_has_no_digits);
558ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner        hadError = true;
5596ea623823f8532670480425b573f35115404b4a0Chris Lattner        return;
560368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      }
5616ea623823f8532670480425b573f35115404b4a0Chris Lattner      s = first_non_digit;
5621eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
5638c723404c34e9347fae473489194e8c200c4ac83Sean Hunt      // In C++0x, we cannot support hexadecmial floating literals because
5648c723404c34e9347fae473489194e8c200c4ac83Sean Hunt      // they conflict with user-defined literals, so we warn in previous
5658c723404c34e9347fae473489194e8c200c4ac83Sean Hunt      // versions of C++ by default.
5661155c42e7b1b4e401bb0a331a6d715d637958c75Douglas Gregor      if (!PP.getLangOptions().HexFloats)
567ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner        PP.Diag(TokLoc, diag::ext_hexconstant_invalid);
568368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    } else if (saw_period) {
569ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner      PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin),
570ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner              diag::err_hexconstant_requires_exponent);
571ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner      hadError = true;
572368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    }
573368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    return;
574368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  }
5751eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
576368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  // Handle simple binary numbers 0b01010
577368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  if (*s == 'b' || *s == 'B') {
578368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    // 0b101010 is a GCC extension.
579413d355e38755a71f106dbc0ac900ca989070916Chris Lattner    PP.Diag(TokLoc, diag::ext_binary_literal);
580368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    ++s;
581368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    radix = 2;
582368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    DigitsBegin = s;
583368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    s = SkipBinaryDigits(s);
584368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    if (s == ThisTokEnd) {
585368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      // Done.
586368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    } else if (isxdigit(*s)) {
587ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner      PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin),
5885f9e272e632e951b1efe824cd16acb4d96077930Chris Lattner              diag::err_invalid_binary_digit) << StringRef(s, 1);
589ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner      hadError = true;
590368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    }
591413d355e38755a71f106dbc0ac900ca989070916Chris Lattner    // Other suffixes will be diagnosed by the caller.
592368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    return;
593368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  }
5941eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
595368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  // For now, the radix is set to 8. If we discover that we have a
596368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  // floating point constant, the radix will change to 10. Octal floating
5971eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump  // point constants are not permitted (only decimal and hexadecimal).
598368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  radix = 8;
599368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  DigitsBegin = s;
600368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  s = SkipOctalDigits(s);
601368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  if (s == ThisTokEnd)
602368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    return; // Done, simple octal number like 01234
6031eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
604413d355e38755a71f106dbc0ac900ca989070916Chris Lattner  // If we have some other non-octal digit that *is* a decimal digit, see if
605413d355e38755a71f106dbc0ac900ca989070916Chris Lattner  // this is part of a floating point number like 094.123 or 09e1.
606413d355e38755a71f106dbc0ac900ca989070916Chris Lattner  if (isdigit(*s)) {
607413d355e38755a71f106dbc0ac900ca989070916Chris Lattner    const char *EndDecimal = SkipDigits(s);
608413d355e38755a71f106dbc0ac900ca989070916Chris Lattner    if (EndDecimal[0] == '.' || EndDecimal[0] == 'e' || EndDecimal[0] == 'E') {
609413d355e38755a71f106dbc0ac900ca989070916Chris Lattner      s = EndDecimal;
610413d355e38755a71f106dbc0ac900ca989070916Chris Lattner      radix = 10;
611413d355e38755a71f106dbc0ac900ca989070916Chris Lattner    }
612413d355e38755a71f106dbc0ac900ca989070916Chris Lattner  }
6131eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
614413d355e38755a71f106dbc0ac900ca989070916Chris Lattner  // If we have a hex digit other than 'e' (which denotes a FP exponent) then
615413d355e38755a71f106dbc0ac900ca989070916Chris Lattner  // the code is using an incorrect base.
616368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  if (isxdigit(*s) && *s != 'e' && *s != 'E') {
617ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner    PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin),
6185f9e272e632e951b1efe824cd16acb4d96077930Chris Lattner            diag::err_invalid_octal_digit) << StringRef(s, 1);
619ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner    hadError = true;
620368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    return;
621368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  }
6221eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
623368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  if (*s == '.') {
624368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    s++;
625368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    radix = 10;
626368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    saw_period = true;
627413d355e38755a71f106dbc0ac900ca989070916Chris Lattner    s = SkipDigits(s); // Skip suffix.
628368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  }
629368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  if (*s == 'e' || *s == 'E') { // exponent
630368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    const char *Exponent = s;
631368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    s++;
632368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    radix = 10;
633368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    saw_exponent = true;
634368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    if (*s == '+' || *s == '-')  s++; // sign
635368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    const char *first_non_digit = SkipDigits(s);
636368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    if (first_non_digit != s) {
637368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      s = first_non_digit;
638368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    } else {
6391eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump      PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-ThisTokBegin),
640ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner              diag::err_exponent_has_no_digits);
641ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner      hadError = true;
642368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      return;
643368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    }
644368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  }
645368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner}
646368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner
647368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner
6485f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// GetIntegerValue - Convert this numeric literal value to an APInt that
6495f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// matches Val's input width.  If there is an overflow, set Val to the low bits
6505f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// of the result and return true.  Otherwise, return false.
6515f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerbool NumericLiteralParser::GetIntegerValue(llvm::APInt &Val) {
652a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar  // Fast path: Compute a conservative bound on the maximum number of
653a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar  // bits per digit in this radix. If we can't possibly overflow a
654a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar  // uint64 based on that bound then do the simple conversion to
655a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar  // integer. This avoids the expensive overflow checking below, and
656a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar  // handles the common cases that matter (small decimal integers and
657a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar  // hex/octal values which don't overflow).
658a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar  unsigned MaxBitsPerDigit = 1;
6591eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump  while ((1U << MaxBitsPerDigit) < radix)
660a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar    MaxBitsPerDigit += 1;
661a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar  if ((SuffixBegin - DigitsBegin) * MaxBitsPerDigit <= 64) {
662a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar    uint64_t N = 0;
663a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar    for (s = DigitsBegin; s != SuffixBegin; ++s)
664a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar      N = N*radix + HexDigitValue(*s);
665a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar
666a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar    // This will truncate the value to Val's input width. Simply check
667a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar    // for overflow by comparing.
668a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar    Val = N;
669a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar    return Val.getZExtValue() != N;
670a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar  }
671a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar
6725f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  Val = 0;
6735f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  s = DigitsBegin;
6745f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
6755f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  llvm::APInt RadixVal(Val.getBitWidth(), radix);
6765f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  llvm::APInt CharVal(Val.getBitWidth(), 0);
6775f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  llvm::APInt OldVal = Val;
6781eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
6795f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  bool OverflowOccurred = false;
6805f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  while (s < SuffixBegin) {
6815f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    unsigned C = HexDigitValue(*s++);
6821eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
6835f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // If this letter is out of bound for this radix, reject it.
6845f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    assert(C < radix && "NumericLiteralParser ctor should have rejected this");
6851eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
6865f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    CharVal = C;
6871eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
6885f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // Add the digit to the value in the appropriate radix.  If adding in digits
6895f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // made the value smaller, then this overflowed.
6905f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    OldVal = Val;
6915f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
6925f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // Multiply by radix, did overflow occur on the multiply?
6935f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    Val *= RadixVal;
6945f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    OverflowOccurred |= Val.udiv(RadixVal) != OldVal;
6955f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
6965f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // Add value, did overflow occur on the value?
697d70cb645702bdbb42aee58403306a7c47e0d901cDaniel Dunbar    //   (a + b) ult b  <=> overflow
6985f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    Val += CharVal;
6995f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    OverflowOccurred |= Val.ult(CharVal);
7005f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  }
7015f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  return OverflowOccurred;
7025f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer}
7035f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
70494c939dc1d4958b62ea5a89294dd8b2905f3191fJohn McCallllvm::APFloat::opStatus
70594c939dc1d4958b62ea5a89294dd8b2905f3191fJohn McCallNumericLiteralParser::GetFloatValue(llvm::APFloat &Result) {
706427d5af5b601985093b6b4b33ba1e30fc24d86dcTed Kremenek  using llvm::APFloat;
7071eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
708e9f195f15ffe96d0a220c872ab12d0630a633c44Erick Tryzelaar  unsigned n = std::min(SuffixBegin - ThisTokBegin, ThisTokEnd - ThisTokBegin);
70994c939dc1d4958b62ea5a89294dd8b2905f3191fJohn McCall  return Result.convertFromString(StringRef(ThisTokBegin, n),
71094c939dc1d4958b62ea5a89294dd8b2905f3191fJohn McCall                                  APFloat::rmNearestTiesToEven);
7115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer}
7125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
7135f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
7142fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       character-literal: [C++0x lex.ccon]
7152fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         ' c-char-sequence '
7162fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         u' c-char-sequence '
7172fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         U' c-char-sequence '
7182fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         L' c-char-sequence '
7192fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       c-char-sequence:
7202fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         c-char
7212fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         c-char-sequence c-char
7222fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       c-char:
7232fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         any member of the source character set except the single-quote ',
7242fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///           backslash \, or new-line character
7252fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         escape-sequence
7262fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         universal-character-name
7272fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       escape-sequence: [C++0x lex.ccon]
7282fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         simple-escape-sequence
7292fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         octal-escape-sequence
7302fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         hexadecimal-escape-sequence
7312fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       simple-escape-sequence:
732ddddd48da72bc29d1c3f388ed91ea5549328129eNAKAMURA Takumi///         one of \' \" \? \\ \a \b \f \n \r \t \v
7332fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       octal-escape-sequence:
7342fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         \ octal-digit
7352fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         \ octal-digit octal-digit
7362fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         \ octal-digit octal-digit octal-digit
7372fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       hexadecimal-escape-sequence:
7382fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         \x hexadecimal-digit
7392fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         hexadecimal-escape-sequence hexadecimal-digit
7402fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       universal-character-name:
7412fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         \u hex-quad
7422fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         \U hex-quad hex-quad
7432fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       hex-quad:
7442fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         hex-digit hex-digit hex-digit hex-digit
7452fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///
7465f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid SpencerCharLiteralParser::CharLiteralParser(const char *begin, const char *end,
7475cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor                                     SourceLocation Loc, Preprocessor &PP,
7485cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor                                     tok::TokenKind kind) {
7495f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // At this point we know that the character matches the regex "L?'.*'".
7505f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  HadError = false;
7511eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
7525cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  Kind = kind;
7535cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor
7545cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  // Determine if this is a wide or UTF character.
7555cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  if (Kind == tok::wide_char_constant || Kind == tok::utf16_char_constant ||
7565cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor      Kind == tok::utf32_char_constant) {
7575cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    ++begin;
7585cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  }
7591eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
7605f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // Skip over the entry quote.
7615f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  assert(begin[0] == '\'' && "Invalid token lexed");
7625f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  ++begin;
7635f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
7641eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump  // FIXME: The "Value" is an uint64_t so we can handle char literals of
765fc8f0e14ad142ed811e90fbd9a30e419e301c717Chris Lattner  // up to 64-bits.
7665f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // FIXME: This extensively assumes that 'char' is 8-bits.
76798be4943e8dc4f3905629a7102668960873cf863Chris Lattner  assert(PP.getTargetInfo().getCharWidth() == 8 &&
7685f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer         "Assumes char is 8 bits");
769e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner  assert(PP.getTargetInfo().getIntWidth() <= 64 &&
770e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner         (PP.getTargetInfo().getIntWidth() & 7) == 0 &&
771e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner         "Assumes sizeof(int) on target is <= 64 and a multiple of char");
772e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner  assert(PP.getTargetInfo().getWCharWidth() <= 64 &&
773e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner         "Assumes sizeof(wchar) on target is <= 64");
7744bc11af9bed1d4a247e3db1fcb754d410ad99099Sanjiv Gupta
7751eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump  // This is what we will use for overflow detection
7764bc11af9bed1d4a247e3db1fcb754d410ad99099Sanjiv Gupta  llvm::APInt LitVal(PP.getTargetInfo().getIntWidth(), 0);
7771eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
778e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner  unsigned NumCharsSoFar = 0;
7791c6c64b5181a960c7d4cace4995a938d4dfa6fbfChris Lattner  bool Warned = false;
7805f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  while (begin[0] != '\'') {
7814bc11af9bed1d4a247e3db1fcb754d410ad99099Sanjiv Gupta    uint64_t ResultChar;
78259705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber
78359705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber      // Is this a Universal Character Name escape?
7845f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    if (begin[0] != '\\')     // If this is a normal character, consume it.
7855f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      ResultChar = *begin++;
78659705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber    else {                    // Otherwise, this is an escape character.
7870473cd52eac6f1e831777ed899be3ea4509c7b24Craig Topper      unsigned CharWidth = getCharWidth(Kind, PP.getTargetInfo());
78859705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber      // Check for UCN.
78959705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber      if (begin[1] == 'u' || begin[1] == 'U') {
79059705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber        uint32_t utf32 = 0;
79159705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber        unsigned short UcnLen = 0;
792872a45e91778eb0b706ff57272fe547d4512eb19Chris Lattner        if (!ProcessUCNEscape(begin, end, utf32, UcnLen,
793872a45e91778eb0b706ff57272fe547d4512eb19Chris Lattner                              FullSourceLoc(Loc, PP.getSourceManager()),
7946c66f07854c1334a1ce9eae1428d61d54182a6e1Chris Lattner                              &PP.getDiagnostics(), PP.getLangOptions())) {
79559705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber          HadError = 1;
79659705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber        }
79759705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber        ResultChar = utf32;
7980473cd52eac6f1e831777ed899be3ea4509c7b24Craig Topper        if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
7990473cd52eac6f1e831777ed899be3ea4509c7b24Craig Topper          PP.Diag(Loc, diag::warn_ucn_escape_too_large);
8000473cd52eac6f1e831777ed899be3ea4509c7b24Craig Topper          ResultChar &= ~0U >> (32-CharWidth);
8010473cd52eac6f1e831777ed899be3ea4509c7b24Craig Topper        }
80259705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber      } else {
80359705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber        // Otherwise, this is a non-UCN escape character.  Process it.
80491f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner        ResultChar = ProcessCharEscape(begin, end, HadError,
80591f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner                                       FullSourceLoc(Loc,PP.getSourceManager()),
8065cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor                                       CharWidth, &PP.getDiagnostics());
80759705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber      }
80859705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber    }
8095f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
8105f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // If this is a multi-character constant (e.g. 'abc'), handle it.  These are
8115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // implementation defined (C99 6.4.4.4p10).
812e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner    if (NumCharsSoFar) {
8135cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor      if (!isAscii()) {
8145f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer        // Emulate GCC's (unintentional?) behavior: L'ab' -> L'b'.
8154bc11af9bed1d4a247e3db1fcb754d410ad99099Sanjiv Gupta        LitVal = 0;
8165f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      } else {
8175f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer        // Narrow character literals act as though their value is concatenated
818e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner        // in this implementation, but warn on overflow.
8191c6c64b5181a960c7d4cace4995a938d4dfa6fbfChris Lattner        if (LitVal.countLeadingZeros() < 8 && !Warned) {
8205f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer          PP.Diag(Loc, diag::warn_char_constant_too_large);
8211c6c64b5181a960c7d4cace4995a938d4dfa6fbfChris Lattner          Warned = true;
8221c6c64b5181a960c7d4cace4995a938d4dfa6fbfChris Lattner        }
8234bc11af9bed1d4a247e3db1fcb754d410ad99099Sanjiv Gupta        LitVal <<= 8;
8245f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      }
8255f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    }
8261eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
8274bc11af9bed1d4a247e3db1fcb754d410ad99099Sanjiv Gupta    LitVal = LitVal + ResultChar;
828e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner    ++NumCharsSoFar;
829e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner  }
830e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner
831e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner  // If this is the second character being processed, do special handling.
832e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner  if (NumCharsSoFar > 1) {
833e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner    // Warn about discarding the top bits for multi-char wide-character
834e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner    // constants (L'abcd').
8355cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    if (!isAscii())
8365cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor      PP.Diag(Loc, diag::warn_extraneous_char_constant);
837e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner    else if (NumCharsSoFar != 4)
838e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner      PP.Diag(Loc, diag::ext_multichar_character_literal);
839e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner    else
840e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner      PP.Diag(Loc, diag::ext_four_char_character_literal);
8412a1c363f38e59a5044fc349aa7e538a50954c244Eli Friedman    IsMultiChar = true;
842930b71a4a7dedf70a73e5fd875bae7df452b80a9Daniel Dunbar  } else
843930b71a4a7dedf70a73e5fd875bae7df452b80a9Daniel Dunbar    IsMultiChar = false;
8444bc11af9bed1d4a247e3db1fcb754d410ad99099Sanjiv Gupta
8454bc11af9bed1d4a247e3db1fcb754d410ad99099Sanjiv Gupta  // Transfer the value from APInt to uint64_t
8464bc11af9bed1d4a247e3db1fcb754d410ad99099Sanjiv Gupta  Value = LitVal.getZExtValue();
8471eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
8485f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // If this is a single narrow character, sign extend it (e.g. '\xFF' is "-1")
8495f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // if 'char' is signed for this target (C99 6.4.4.4p10).  Note that multiple
8505f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // character constants are not sign extended in the this implementation:
8515f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // '\xFF\xFF' = 65536 and '\x0\xFF' = 255, which matches GCC.
8525cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  if (isAscii() && NumCharsSoFar == 1 && (Value & 128) &&
85315b91764d08e886391c865c4a444d7b51141c284Eli Friedman      PP.getLangOptions().CharIsSigned)
8545f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    Value = (signed char)Value;
8555f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer}
8565f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
8575f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
8582fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       string-literal: [C++0x lex.string]
8592fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         encoding-prefix " [s-char-sequence] "
8602fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         encoding-prefix R raw-string
8612fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       encoding-prefix:
8622fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         u8
8632fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         u
8642fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         U
8652fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         L
8665f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       s-char-sequence:
8675f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         s-char
8685f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         s-char-sequence s-char
8695f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       s-char:
8702fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         any member of the source character set except the double-quote ",
8712fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///           backslash \, or new-line character
8722fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         escape-sequence
8735f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         universal-character-name
8742fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       raw-string:
8752fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         " d-char-sequence ( r-char-sequence ) d-char-sequence "
8762fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       r-char-sequence:
8772fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         r-char
8782fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         r-char-sequence r-char
8792fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       r-char:
8802fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         any member of the source character set, except a right parenthesis )
8812fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///           followed by the initial d-char-sequence (which may be empty)
8822fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///           followed by a double quote ".
8832fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       d-char-sequence:
8842fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         d-char
8852fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         d-char-sequence d-char
8862fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       d-char:
8872fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         any member of the basic source character set except:
8882fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///           space, the left parenthesis (, the right parenthesis ),
8892fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///           the backslash \, and the control characters representing horizontal
8902fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///           tab, vertical tab, form feed, and newline.
8912fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       escape-sequence: [C++0x lex.ccon]
8922fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         simple-escape-sequence
8932fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         octal-escape-sequence
8942fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         hexadecimal-escape-sequence
8952fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       simple-escape-sequence:
896ddddd48da72bc29d1c3f388ed91ea5549328129eNAKAMURA Takumi///         one of \' \" \? \\ \a \b \f \n \r \t \v
8972fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       octal-escape-sequence:
8982fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         \ octal-digit
8992fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         \ octal-digit octal-digit
9002fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         \ octal-digit octal-digit octal-digit
9012fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       hexadecimal-escape-sequence:
9022fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         \x hexadecimal-digit
9032fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         hexadecimal-escape-sequence hexadecimal-digit
9045f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       universal-character-name:
9055f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         \u hex-quad
9065f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         \U hex-quad hex-quad
9075f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       hex-quad:
9085f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         hex-digit hex-digit hex-digit hex-digit
9095f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///
9105f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid SpencerStringLiteralParser::
911d217773f106856a11879ec79dc468efefaf2ee75Chris LattnerStringLiteralParser(const Token *StringToks, unsigned NumStringToks,
9120833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner                    Preprocessor &PP, bool Complain)
9130833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner  : SM(PP.getSourceManager()), Features(PP.getLangOptions()),
914403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis    Target(PP.getTargetInfo()), Diags(Complain ? &PP.getDiagnostics() : 0),
9155cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),
9165cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) {
9170833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner  init(StringToks, NumStringToks);
9180833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner}
9190833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner
9200833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattnervoid StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
921403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis  // The literal token may have come from an invalid source location (e.g. due
922403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis  // to a PCH error), in which case the token length will be 0.
923403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis  if (NumStringToks == 0 || StringToks[0].getLength() < 2) {
924403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis    hadError = true;
925403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis    return;
926403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis  }
927403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis
9285f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // Scan all of the string portions, remember the max individual token length,
9295f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // computing a bound on the concatenated string length, and see whether any
9305f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // piece is a wide-string.  If any of the string portions is a wide-string
9315f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // literal, the result is a wide-string literal [C99 6.4.5p4].
932403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis  assert(NumStringToks && "expected at least one token");
9336cf750298d3621d8a10a6dd07fcee8e274b9d94dSean Hunt  MaxTokenLength = StringToks[0].getLength();
934403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis  assert(StringToks[0].getLength() >= 2 && "literal token is invalid!");
9356cf750298d3621d8a10a6dd07fcee8e274b9d94dSean Hunt  SizeBound = StringToks[0].getLength()-2;  // -2 for "".
9365cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  Kind = StringToks[0].getKind();
9376cf750298d3621d8a10a6dd07fcee8e274b9d94dSean Hunt
9386cf750298d3621d8a10a6dd07fcee8e274b9d94dSean Hunt  hadError = false;
9395f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
9405f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // Implement Translation Phase #6: concatenation of string literals
9415f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  /// (C99 5.1.1.2p1).  The common case is only one string fragment.
9425f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  for (unsigned i = 1; i != NumStringToks; ++i) {
943403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis    if (StringToks[i].getLength() < 2) {
944403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis      hadError = true;
945403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis      return;
946403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis    }
947403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis
9485f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // The string could be shorter than this if it needs cleaning, but this is a
9495f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // reasonable bound, which is all we need.
950403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis    assert(StringToks[i].getLength() >= 2 && "literal token is invalid!");
9516cf750298d3621d8a10a6dd07fcee8e274b9d94dSean Hunt    SizeBound += StringToks[i].getLength()-2;  // -2 for "".
9521eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
9535f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // Remember maximum string piece length.
9546cf750298d3621d8a10a6dd07fcee8e274b9d94dSean Hunt    if (StringToks[i].getLength() > MaxTokenLength)
9556cf750298d3621d8a10a6dd07fcee8e274b9d94dSean Hunt      MaxTokenLength = StringToks[i].getLength();
9561eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
9575cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    // Remember if we see any wide or utf-8/16/32 strings.
9585cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    // Also check for illegal concatenations.
9595cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    if (StringToks[i].isNot(Kind) && StringToks[i].isNot(tok::string_literal)) {
9605cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor      if (isAscii()) {
9615cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor        Kind = StringToks[i].getKind();
9625cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor      } else {
9635cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor        if (Diags)
9645cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor          Diags->Report(FullSourceLoc(StringToks[i].getLocation(), SM),
9655cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor                        diag::err_unsupported_string_concat);
9665cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor        hadError = true;
9675cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor      }
9685cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    }
9695f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  }
970dbb1ecc32ca122b07b7c98fd0a8f6f53985adaccChris Lattner
9715f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // Include space for the null terminator.
9725f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  ++SizeBound;
9731eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
9745f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // TODO: K&R warning: "traditional C rejects string constant concatenation"
9751eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
9765cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  // Get the width in bytes of char/wchar_t/char16_t/char32_t
9775cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  CharByteWidth = getCharWidth(Kind, Target);
9785cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  assert((CharByteWidth & 7) == 0 && "Assumes character size is byte multiple");
9795cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  CharByteWidth /= 8;
9801eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
9815f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // The output buffer size needs to be large enough to hold wide characters.
9825f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // This is a worst-case assumption which basically corresponds to L"" "long".
9835cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  SizeBound *= CharByteWidth;
9841eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
9855f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // Size the temporary buffer to hold the result string data.
9865f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  ResultBuf.resize(SizeBound);
9871eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
9885f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // Likewise, but for each string piece.
9895f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  llvm::SmallString<512> TokenBuf;
9905f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  TokenBuf.resize(MaxTokenLength);
9911eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
9925f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // Loop over all the strings, getting their spelling, and expanding them to
9935f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // wide strings as appropriate.
9945f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  ResultPtr = &ResultBuf[0];   // Next byte to fill in.
9951eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
996ee98ac5a1330db432b188dd2d38b6631aac47bf1Anders Carlsson  Pascal = false;
9971eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
9985f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  for (unsigned i = 0, e = NumStringToks; i != e; ++i) {
9995f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    const char *ThisTokBuf = &TokenBuf[0];
10005f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // Get the spelling of the token, which eliminates trigraphs, etc.  We know
10015f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // that ThisTokBuf points to a buffer that is big enough for the whole token
10025f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // and 'spelled' tokens can only shrink.
100350f6af7a6d6951a63f3da7d4c5a7d3965bf73b63Douglas Gregor    bool StringInvalid = false;
10040833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner    unsigned ThisTokLen =
1005b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner      Lexer::getSpelling(StringToks[i], ThisTokBuf, SM, Features,
1006b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner                         &StringInvalid);
100750f6af7a6d6951a63f3da7d4c5a7d3965bf73b63Douglas Gregor    if (StringInvalid) {
10085cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor      hadError = true;
100950f6af7a6d6951a63f3da7d4c5a7d3965bf73b63Douglas Gregor      continue;
101050f6af7a6d6951a63f3da7d4c5a7d3965bf73b63Douglas Gregor    }
101150f6af7a6d6951a63f3da7d4c5a7d3965bf73b63Douglas Gregor
10125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1;  // Skip end quote.
10135f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // TODO: Input character set mapping support.
10141eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
10151661d717563d6a27dec3da69deba2b2efaa45802Craig Topper    // Skip marker for wide or unicode strings.
10165cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    if (ThisTokBuf[0] == 'L' || ThisTokBuf[0] == 'u' || ThisTokBuf[0] == 'U') {
10175f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      ++ThisTokBuf;
10185cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor      // Skip 8 of u8 marker for utf8 strings.
10195cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor      if (ThisTokBuf[0] == '8')
10205cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor        ++ThisTokBuf;
102156bedefe92ae8f604d14bea75cc3040ab32337c2Fariborz Jahanian    }
10221eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
10232fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper    // Check for raw string
10242fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper    if (ThisTokBuf[0] == 'R') {
10252fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      ThisTokBuf += 2; // skip R"
10261eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
10272fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      const char *Prefix = ThisTokBuf;
10282fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      while (ThisTokBuf[0] != '(')
1029ee98ac5a1330db432b188dd2d38b6631aac47bf1Anders Carlsson        ++ThisTokBuf;
10302fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      ++ThisTokBuf; // skip '('
10312fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper
10322fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      // remove same number of characters from the end
10332fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      if (ThisTokEnd >= ThisTokBuf + (ThisTokBuf - Prefix))
10342fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        ThisTokEnd -= (ThisTokBuf - Prefix);
10352fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper
10362fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      // Copy the string over
10372fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      CopyStringFragment(StringRef(ThisTokBuf, ThisTokEnd - ThisTokBuf));
10382fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper    } else {
10392fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
10402fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      ++ThisTokBuf; // skip "
10412fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper
10422fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      // Check if this is a pascal string
10432fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      if (Features.PascalStrings && ThisTokBuf + 1 != ThisTokEnd &&
10442fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper          ThisTokBuf[0] == '\\' && ThisTokBuf[1] == 'p') {
10451eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
10462fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        // If the \p sequence is found in the first token, we have a pascal string
10472fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        // Otherwise, if we already have a pascal string, ignore the first \p
10482fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        if (i == 0) {
10495f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer          ++ThisTokBuf;
10502fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper          Pascal = true;
10512fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        } else if (Pascal)
10522fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper          ThisTokBuf += 2;
10535f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      }
10541eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
10552fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      while (ThisTokBuf != ThisTokEnd) {
10562fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        // Is this a span of non-escape characters?
10572fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        if (ThisTokBuf[0] != '\\') {
10582fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper          const char *InStart = ThisTokBuf;
10592fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper          do {
10602fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper            ++ThisTokBuf;
10612fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper          } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');
10622fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper
10632fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper          // Copy the character span over.
10642fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper          CopyStringFragment(StringRef(InStart, ThisTokBuf - InStart));
10652fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper          continue;
10662fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        }
10672fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        // Is this a Universal Character Name escape?
10682fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        if (ThisTokBuf[1] == 'u' || ThisTokBuf[1] == 'U') {
10692fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper          EncodeUCNEscape(ThisTokBuf, ThisTokEnd, ResultPtr,
10702fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper                          hadError, FullSourceLoc(StringToks[i].getLocation(),SM),
10712fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper                          CharByteWidth, Diags, Features);
10722fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper          continue;
10732fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        }
10742fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        // Otherwise, this is a non-UCN escape character.  Process it.
10752fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        unsigned ResultChar =
10762fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper          ProcessCharEscape(ThisTokBuf, ThisTokEnd, hadError,
10772fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper                            FullSourceLoc(StringToks[i].getLocation(), SM),
10782fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper                            CharByteWidth*8, Diags);
10792fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper
10802fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        // Note: our internal rep of wide char tokens is always little-endian.
10812fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        *ResultPtr++ = ResultChar & 0xFF;
10821eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
10832fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        for (unsigned i = 1, e = CharByteWidth; i != e; ++i)
10842fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper          *ResultPtr++ = ResultChar >> i*8;
10852fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      }
10865f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    }
10875f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  }
10881eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1089bbee00b6456e90a09f63c83c20233e6c5ad6000aChris Lattner  if (Pascal) {
1090ee98ac5a1330db432b188dd2d38b6631aac47bf1Anders Carlsson    ResultBuf[0] = ResultPtr-&ResultBuf[0]-1;
10915cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    ResultBuf[0] /= CharByteWidth;
1092bbee00b6456e90a09f63c83c20233e6c5ad6000aChris Lattner
1093bbee00b6456e90a09f63c83c20233e6c5ad6000aChris Lattner    // Verify that pascal strings aren't too large.
10940833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner    if (GetStringLength() > 256) {
10950833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner      if (Diags)
10960833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner        Diags->Report(FullSourceLoc(StringToks[0].getLocation(), SM),
10970833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner                      diag::err_pascal_string_too_long)
10980833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner          << SourceRange(StringToks[0].getLocation(),
10990833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner                         StringToks[NumStringToks-1].getLocation());
11005cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor      hadError = true;
110157d7dde770c67b282e7fb77b1b81e429910937b3Eli Friedman      return;
110257d7dde770c67b282e7fb77b1b81e429910937b3Eli Friedman    }
11030833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner  } else if (Diags) {
1104427c492d368d6ecf409fa8053eecb5cd0e779c5bDouglas Gregor    // Complain if this string literal has too many characters.
1105a95880d6513c617bb96634bcc1f16c6bdb80dedcChris Lattner    unsigned MaxChars = Features.CPlusPlus? 65536 : Features.C99 ? 4095 : 509;
1106427c492d368d6ecf409fa8053eecb5cd0e779c5bDouglas Gregor
1107427c492d368d6ecf409fa8053eecb5cd0e779c5bDouglas Gregor    if (GetNumStringChars() > MaxChars)
11080833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner      Diags->Report(FullSourceLoc(StringToks[0].getLocation(), SM),
11090833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner                    diag::ext_string_too_long)
1110427c492d368d6ecf409fa8053eecb5cd0e779c5bDouglas Gregor        << GetNumStringChars() << MaxChars
1111a95880d6513c617bb96634bcc1f16c6bdb80dedcChris Lattner        << (Features.CPlusPlus ? 2 : Features.C99 ? 1 : 0)
1112427c492d368d6ecf409fa8053eecb5cd0e779c5bDouglas Gregor        << SourceRange(StringToks[0].getLocation(),
1113427c492d368d6ecf409fa8053eecb5cd0e779c5bDouglas Gregor                       StringToks[NumStringToks-1].getLocation());
1114bbee00b6456e90a09f63c83c20233e6c5ad6000aChris Lattner  }
11155f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer}
1116719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner
1117719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner
11182fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper/// copyStringFragment - This function copies from Start to End into ResultPtr.
11192fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper/// Performs widening for multi-byte characters.
112003720fce6e709661af020f3e4e6dfd08a96e8044Craig Toppervoid StringLiteralParser::CopyStringFragment(StringRef Fragment) {
11212fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper  // Copy the character span over.
11222fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper  if (CharByteWidth == 1) {
11232fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper    memcpy(ResultPtr, Fragment.data(), Fragment.size());
11242fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper    ResultPtr += Fragment.size();
11252fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper  } else {
11262fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper    // Note: our internal rep of wide char tokens is always little-endian.
11272fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper    for (StringRef::iterator I=Fragment.begin(), E=Fragment.end(); I!=E; ++I) {
11282fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      *ResultPtr++ = *I;
11292fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      // Add zeros at the end.
11302fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      for (unsigned i = 1, e = CharByteWidth; i != e; ++i)
11312fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        *ResultPtr++ = 0;
11322fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper    }
11332fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper  }
11342fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper}
11352fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper
11362fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper
1137719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner/// getOffsetOfStringByte - This function returns the offset of the
1138719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner/// specified byte of the string data represented by Token.  This handles
1139719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner/// advancing over escape sequences in the string.
1140719e61573f27c11057ecfe0dd8f141621602c571Chris Lattnerunsigned StringLiteralParser::getOffsetOfStringByte(const Token &Tok,
11416c66f07854c1334a1ce9eae1428d61d54182a6e1Chris Lattner                                                    unsigned ByteNo) const {
1142719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner  // Get the spelling of the token.
1143ca1475ea0e76da6b852796610139ed9b49c8d4a6Chris Lattner  llvm::SmallString<32> SpellingBuffer;
11446cf750298d3621d8a10a6dd07fcee8e274b9d94dSean Hunt  SpellingBuffer.resize(Tok.getLength());
11451eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
114650f6af7a6d6951a63f3da7d4c5a7d3965bf73b63Douglas Gregor  bool StringInvalid = false;
1147719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner  const char *SpellingPtr = &SpellingBuffer[0];
1148b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner  unsigned TokLen = Lexer::getSpelling(Tok, SpellingPtr, SM, Features,
1149b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner                                       &StringInvalid);
115091f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner  if (StringInvalid)
115150f6af7a6d6951a63f3da7d4c5a7d3965bf73b63Douglas Gregor    return 0;
1152719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner
11535cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  assert(SpellingPtr[0] != 'L' && SpellingPtr[0] != 'u' &&
11545cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor         SpellingPtr[0] != 'U' && "Doesn't handle wide or utf strings yet");
1155719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner
11561eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1157719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner  const char *SpellingStart = SpellingPtr;
1158719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner  const char *SpellingEnd = SpellingPtr+TokLen;
1159719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner
1160719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner  // Skip over the leading quote.
1161719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner  assert(SpellingPtr[0] == '"' && "Should be a string literal!");
1162719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner  ++SpellingPtr;
11631eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1164719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner  // Skip over bytes until we find the offset we're looking for.
1165719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner  while (ByteNo) {
1166719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner    assert(SpellingPtr < SpellingEnd && "Didn't find byte offset!");
11671eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1168719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner    // Step over non-escapes simply.
1169719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner    if (*SpellingPtr != '\\') {
1170719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner      ++SpellingPtr;
1171719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner      --ByteNo;
1172719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner      continue;
1173719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner    }
11741eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1175719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner    // Otherwise, this is an escape character.  Advance over it.
1176719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner    bool HadError = false;
1177719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner    ProcessCharEscape(SpellingPtr, SpellingEnd, HadError,
1178ca1475ea0e76da6b852796610139ed9b49c8d4a6Chris Lattner                      FullSourceLoc(Tok.getLocation(), SM),
11795cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor                      CharByteWidth*8, Diags);
1180719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner    assert(!HadError && "This method isn't valid on erroneous strings");
1181719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner    --ByteNo;
1182719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner  }
11831eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1184719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner  return SpellingPtr-SpellingStart;
1185719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner}
1186