LiteralSupport.cpp revision b453ad3214d00acc51c9aa702c76c58354d84b84
15f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//===--- LiteralSupport.cpp - Code to parse and process literals ----------===//
25f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//
35f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//                     The LLVM Compiler Infrastructure
45f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//
50bc735ffcfb223c0186419547abaa5c84482663eChris Lattner// This file is distributed under the University of Illinois Open Source
60bc735ffcfb223c0186419547abaa5c84482663eChris Lattner// License. See LICENSE.TXT for details.
75f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//
85f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//===----------------------------------------------------------------------===//
95f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//
105f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// This file implements the NumericLiteralParser, CharLiteralParser, and
115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// StringLiteralParser interfaces.
125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//
135f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//===----------------------------------------------------------------------===//
145f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
155f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer#include "clang/Lex/LiteralSupport.h"
165f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer#include "clang/Lex/Preprocessor.h"
17500d3297d2a21edeac4d46cbcbe21bc2352c2a28Chris Lattner#include "clang/Lex/LexDiagnostic.h"
18136f93a2b660fc780fc2dba82a6f42ca4dc9164aChris Lattner#include "clang/Basic/TargetInfo.h"
19f74a4587629615ffd13bd0724868f86ba8c8f27bEli Friedman#include "clang/Basic/ConvertUTF.h"
205f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer#include "llvm/ADT/StringExtras.h"
219fe8c74a93ac8e92512615c5f83e7a328b3b0544David Blaikie#include "llvm/Support/ErrorHandling.h"
225f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerusing namespace clang;
235f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
245f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// HexDigitValue - Return the value of the specified hex digit, or -1 if it's
255f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// not valid.
265f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerstatic int HexDigitValue(char C) {
275f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  if (C >= '0' && C <= '9') return C-'0';
285f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  if (C >= 'a' && C <= 'f') return C-'a'+10;
295f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  if (C >= 'A' && C <= 'F') return C-'A'+10;
305f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  return -1;
315f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer}
325f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
335cee1195584fa8672253139c86e922daeda69b9eDouglas Gregorstatic unsigned getCharWidth(tok::TokenKind kind, const TargetInfo &Target) {
345cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  switch (kind) {
35b219cfc4d75f0a03630b7c4509ef791b7e97b2c8David Blaikie  default: llvm_unreachable("Unknown token type!");
365cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  case tok::char_constant:
375cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  case tok::string_literal:
385cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  case tok::utf8_string_literal:
395cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    return Target.getCharWidth();
405cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  case tok::wide_char_constant:
415cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  case tok::wide_string_literal:
425cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    return Target.getWCharWidth();
435cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  case tok::utf16_char_constant:
445cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  case tok::utf16_string_literal:
455cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    return Target.getChar16Width();
465cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  case tok::utf32_char_constant:
475cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  case tok::utf32_string_literal:
485cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    return Target.getChar32Width();
495cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  }
505cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor}
515cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor
525f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// ProcessCharEscape - Parse a standard C escape sequence, which can occur in
535f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// either a character or a string literal.
545f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerstatic unsigned ProcessCharEscape(const char *&ThisTokBuf,
555f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer                                  const char *ThisTokEnd, bool &HadError,
565cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor                                  FullSourceLoc Loc, unsigned CharWidth,
57d6471f7c1921c7802804ce3ff6fe9768310f72b9David Blaikie                                  DiagnosticsEngine *Diags) {
585f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // Skip the '\' char.
595f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  ++ThisTokBuf;
605f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
615f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // We know that this character can't be off the end of the buffer, because
625f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // that would have been \", which would not have been the end of string.
635f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  unsigned ResultChar = *ThisTokBuf++;
645f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  switch (ResultChar) {
655f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // These map to themselves.
665f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case '\\': case '\'': case '"': case '?': break;
671eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
685f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // These have fixed mappings.
695f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case 'a':
705f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // TODO: K&R: the meaning of '\\a' is different in traditional C
715f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    ResultChar = 7;
725f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    break;
735f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case 'b':
745f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    ResultChar = 8;
755f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    break;
765f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case 'e':
7791f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner    if (Diags)
7891f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner      Diags->Report(Loc, diag::ext_nonstandard_escape) << "e";
795f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    ResultChar = 27;
805f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    break;
813c54801fbc67d8df2fed0711a2e2022db6b1bbcfEli Friedman  case 'E':
8291f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner    if (Diags)
8391f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner      Diags->Report(Loc, diag::ext_nonstandard_escape) << "E";
843c54801fbc67d8df2fed0711a2e2022db6b1bbcfEli Friedman    ResultChar = 27;
853c54801fbc67d8df2fed0711a2e2022db6b1bbcfEli Friedman    break;
865f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case 'f':
875f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    ResultChar = 12;
885f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    break;
895f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case 'n':
905f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    ResultChar = 10;
915f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    break;
925f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case 'r':
935f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    ResultChar = 13;
945f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    break;
955f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case 't':
965f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    ResultChar = 9;
975f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    break;
985f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case 'v':
995f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    ResultChar = 11;
1005f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    break;
1015f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case 'x': { // Hex escape.
1025f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    ResultChar = 0;
1035f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    if (ThisTokBuf == ThisTokEnd || !isxdigit(*ThisTokBuf)) {
10491f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner      if (Diags)
10591f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner        Diags->Report(Loc, diag::err_hex_escape_no_digits);
1065f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      HadError = 1;
1075f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      break;
1085f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    }
1091eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1105f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // Hex escapes are a maximal series of hex digits.
1115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    bool Overflow = false;
1125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    for (; ThisTokBuf != ThisTokEnd; ++ThisTokBuf) {
1135f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      int CharVal = HexDigitValue(ThisTokBuf[0]);
1145f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      if (CharVal == -1) break;
115c29bbde0a14a664d6843b21d3791478d1f4d2833Chris Lattner      // About to shift out a digit?
116c29bbde0a14a664d6843b21d3791478d1f4d2833Chris Lattner      Overflow |= (ResultChar & 0xF0000000) ? true : false;
1175f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      ResultChar <<= 4;
1185f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      ResultChar |= CharVal;
1195f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    }
1205f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
1215f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // See if any bits will be truncated when evaluated as a character.
1225f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
1235f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      Overflow = true;
1245f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      ResultChar &= ~0U >> (32-CharWidth);
1255f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    }
1261eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1275f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // Check for overflow.
12891f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner    if (Overflow && Diags)   // Too many digits to fit in
12991f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner      Diags->Report(Loc, diag::warn_hex_escape_too_large);
1305f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    break;
1315f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  }
1325f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case '0': case '1': case '2': case '3':
1335f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case '4': case '5': case '6': case '7': {
1345f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // Octal escapes.
1355f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    --ThisTokBuf;
1365f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    ResultChar = 0;
1375f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
1385f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // Octal escapes are a series of octal digits with maximum length 3.
1395f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // "\0123" is a two digit sequence equal to "\012" "3".
1405f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    unsigned NumDigits = 0;
1415f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    do {
1425f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      ResultChar <<= 3;
1435f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      ResultChar |= *ThisTokBuf++ - '0';
1445f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      ++NumDigits;
1455f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    } while (ThisTokBuf != ThisTokEnd && NumDigits < 3 &&
1465f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer             ThisTokBuf[0] >= '0' && ThisTokBuf[0] <= '7');
1471eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1485f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // Check for overflow.  Reject '\777', but not L'\777'.
1495f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
15091f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner      if (Diags)
15191f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner        Diags->Report(Loc, diag::warn_octal_escape_too_large);
1525f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      ResultChar &= ~0U >> (32-CharWidth);
1535f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    }
1545f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    break;
1555f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  }
1561eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1575f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // Otherwise, these are not valid escapes.
1585f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case '(': case '{': case '[': case '%':
1595f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // GCC accepts these as extensions.  We warn about them as such though.
16091f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner    if (Diags)
16191f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner      Diags->Report(Loc, diag::ext_nonstandard_escape)
162b90f4b3fb94056609da9cca5eef7358d95a363b2Douglas Gregor        << std::string()+(char)ResultChar;
163f01fdff97b245caac98100d232c760b4d0531411Eli Friedman    break;
1645f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  default:
16591f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner    if (Diags == 0)
166b90f4b3fb94056609da9cca5eef7358d95a363b2Douglas Gregor      break;
167b90f4b3fb94056609da9cca5eef7358d95a363b2Douglas Gregor
16823ef69d197ba3b5e9602f7161fee50990059502aTed Kremenek    if (isgraph(ResultChar))
16991f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner      Diags->Report(Loc, diag::ext_unknown_escape)
17091f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner        << std::string()+(char)ResultChar;
171ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner    else
17291f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner      Diags->Report(Loc, diag::ext_unknown_escape)
17391f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner        << "x"+llvm::utohexstr(ResultChar);
1745f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    break;
1755f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  }
1761eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1775f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  return ResultChar;
1785f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer}
1795f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
1800e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff/// ProcessUCNEscape - Read the Universal Character Name, check constraints and
18159705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber/// return the UTF32.
18259705aee3fe01aa6fb6962dd11350161b47983d9Nico Weberstatic bool ProcessUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,
18359705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber                             uint32_t &UcnVal, unsigned short &UcnLen,
184d6471f7c1921c7802804ce3ff6fe9768310f72b9David Blaikie                             FullSourceLoc Loc, DiagnosticsEngine *Diags,
185be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell                             const LangOptions &Features,
186be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell                             bool in_char_string_literal = false) {
1876c66f07854c1334a1ce9eae1428d61d54182a6e1Chris Lattner  if (!Features.CPlusPlus && !Features.C99 && Diags)
188872a45e91778eb0b706ff57272fe547d4512eb19Chris Lattner    Diags->Report(Loc, diag::warn_ucn_not_valid_in_c89);
1891eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1904e93b34fdb798abfa0534062a139f2c37cbf876eSteve Naroff  // Save the beginning of the string (for error diagnostics).
1914e93b34fdb798abfa0534062a139f2c37cbf876eSteve Naroff  const char *ThisTokBegin = ThisTokBuf;
1921eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1930e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  // Skip the '\u' char's.
1940e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  ThisTokBuf += 2;
1950e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff
1960e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  if (ThisTokBuf == ThisTokEnd || !isxdigit(*ThisTokBuf)) {
1976c66f07854c1334a1ce9eae1428d61d54182a6e1Chris Lattner    if (Diags)
198872a45e91778eb0b706ff57272fe547d4512eb19Chris Lattner      Diags->Report(Loc, diag::err_ucn_escape_no_digits);
19959705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber    return false;
2000e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  }
20159705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber  UcnLen = (ThisTokBuf[-1] == 'u' ? 4 : 8);
20256bedefe92ae8f604d14bea75cc3040ab32337c2Fariborz Jahanian  unsigned short UcnLenSave = UcnLen;
20359705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber  for (; ThisTokBuf != ThisTokEnd && UcnLenSave; ++ThisTokBuf, UcnLenSave--) {
2040e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    int CharVal = HexDigitValue(ThisTokBuf[0]);
2050e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    if (CharVal == -1) break;
2060e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    UcnVal <<= 4;
2070e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    UcnVal |= CharVal;
2080e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  }
2090e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  // If we didn't consume the proper number of digits, there is a problem.
21059705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber  if (UcnLenSave) {
211872a45e91778eb0b706ff57272fe547d4512eb19Chris Lattner    if (Diags) {
2127ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner      SourceLocation L =
2137ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner        Lexer::AdvanceToTokenCharacter(Loc, ThisTokBuf-ThisTokBegin,
2147ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner                                       Loc.getManager(), Features);
2157ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner      Diags->Report(FullSourceLoc(L, Loc.getManager()),
2167ef5c27eb6e8ebe58b52013246c06753c3613263Chris Lattner                    diag::err_ucn_escape_incomplete);
217872a45e91778eb0b706ff57272fe547d4512eb19Chris Lattner    }
21859705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber    return false;
2190e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  }
220be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  // Check UCN constraints (C99 6.4.3p2) [C++11 lex.charset p2]
221be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  bool invalid_ucn = (0xD800<=UcnVal && UcnVal<=0xDFFF) // surrogate codepoints
222be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell                       || 0x10FFFF < UcnVal; // maximum legal UTF32 value
223be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell
224be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  // C++11 allows UCNs that refer to control characters and basic source
225be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  // characters inside character and string literals
226be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  if (!Features.CPlusPlus0x || !in_char_string_literal) {
227be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell    if ((UcnVal < 0xa0 &&
228be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell         (UcnVal != 0x24 && UcnVal != 0x40 && UcnVal != 0x60 ))) {  // $, @, `
229be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell      invalid_ucn = true;
230be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell    }
231be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  }
232be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell
233be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  if (invalid_ucn) {
2346c66f07854c1334a1ce9eae1428d61d54182a6e1Chris Lattner    if (Diags)
235872a45e91778eb0b706ff57272fe547d4512eb19Chris Lattner      Diags->Report(Loc, diag::err_ucn_escape_invalid);
23659705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber    return false;
23759705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber  }
23859705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber  return true;
23959705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber}
24059705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber
24159705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber/// EncodeUCNEscape - Read the Universal Character Name, check constraints and
24259705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber/// convert the UTF32 to UTF8 or UTF16. This is a subroutine of
24359705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber/// StringLiteralParser. When we decide to implement UCN's for identifiers,
24459705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber/// we will likely rework our support for UCN's.
24559705aee3fe01aa6fb6962dd11350161b47983d9Nico Weberstatic void EncodeUCNEscape(const char *&ThisTokBuf, const char *ThisTokEnd,
246a95880d6513c617bb96634bcc1f16c6bdb80dedcChris Lattner                            char *&ResultBuf, bool &HadError,
2475cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor                            FullSourceLoc Loc, unsigned CharByteWidth,
248d6471f7c1921c7802804ce3ff6fe9768310f72b9David Blaikie                            DiagnosticsEngine *Diags,
249d6471f7c1921c7802804ce3ff6fe9768310f72b9David Blaikie                            const LangOptions &Features) {
25059705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber  typedef uint32_t UTF32;
25159705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber  UTF32 UcnVal = 0;
25259705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber  unsigned short UcnLen = 0;
253a95880d6513c617bb96634bcc1f16c6bdb80dedcChris Lattner  if (!ProcessUCNEscape(ThisTokBuf, ThisTokEnd, UcnVal, UcnLen, Loc, Diags,
254a95880d6513c617bb96634bcc1f16c6bdb80dedcChris Lattner                        Features)) {
2550e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    HadError = 1;
2560e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    return;
2570e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  }
25859705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber
2595cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  assert((CharByteWidth == 1 || CharByteWidth == 2 || CharByteWidth) &&
2605cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor         "only character widths of 1, 2, or 4 bytes supported");
261a0f15b0848405ae16d63bd5d78c862a6526b338aNico Weber
2625cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  (void)UcnLen;
2635cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  assert((UcnLen== 4 || UcnLen== 8) && "only ucn length of 4 or 8 supported");
2645cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor
2655cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  if (CharByteWidth == 4) {
266caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman    // FIXME: Make the type of the result buffer correct instead of
267caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman    // using reinterpret_cast.
268caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman    UTF32 *ResultPtr = reinterpret_cast<UTF32*>(ResultBuf);
269caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman    *ResultPtr = UcnVal;
270caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman    ResultBuf += 4;
2715cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    return;
2725cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  }
273a0f15b0848405ae16d63bd5d78c862a6526b338aNico Weber
2745cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  if (CharByteWidth == 2) {
275caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman    // FIXME: Make the type of the result buffer correct instead of
276caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman    // using reinterpret_cast.
277caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman    UTF16 *ResultPtr = reinterpret_cast<UTF16*>(ResultBuf);
278caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman
279a0f15b0848405ae16d63bd5d78c862a6526b338aNico Weber    if (UcnVal < (UTF32)0xFFFF) {
280caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman      *ResultPtr = UcnVal;
281caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman      ResultBuf += 2;
282a0f15b0848405ae16d63bd5d78c862a6526b338aNico Weber      return;
283a0f15b0848405ae16d63bd5d78c862a6526b338aNico Weber    }
284a0f15b0848405ae16d63bd5d78c862a6526b338aNico Weber
285caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman    // Convert to UTF16.
286a0f15b0848405ae16d63bd5d78c862a6526b338aNico Weber    UcnVal -= 0x10000;
287caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman    *ResultPtr     = 0xD800 + (UcnVal >> 10);
288caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman    *(ResultPtr+1) = 0xDC00 + (UcnVal & 0x3FF);
289caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman    ResultBuf += 4;
29056bedefe92ae8f604d14bea75cc3040ab32337c2Fariborz Jahanian    return;
29156bedefe92ae8f604d14bea75cc3040ab32337c2Fariborz Jahanian  }
2925cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor
2935cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  assert(CharByteWidth == 1 && "UTF-8 encoding is only for 1 byte characters");
2945cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor
2950e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  // Now that we've parsed/checked the UCN, we convert from UTF32->UTF8.
2960e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  // The conversion below was inspired by:
2970e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  //   http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c
2981eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump  // First, we determine how many bytes the result will require.
2994e93b34fdb798abfa0534062a139f2c37cbf876eSteve Naroff  typedef uint8_t UTF8;
3000e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff
3010e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  unsigned short bytesToWrite = 0;
3020e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  if (UcnVal < (UTF32)0x80)
3030e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    bytesToWrite = 1;
3040e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  else if (UcnVal < (UTF32)0x800)
3050e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    bytesToWrite = 2;
3060e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  else if (UcnVal < (UTF32)0x10000)
3070e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    bytesToWrite = 3;
3080e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  else
3090e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    bytesToWrite = 4;
3101eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
3110e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  const unsigned byteMask = 0xBF;
3120e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  const unsigned byteMark = 0x80;
3131eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
3140e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  // Once the bits are split out into bytes of UTF8, this is a mask OR-ed
3158a5c0cd90b8d607ca284274000ed8716b836d253Steve Naroff  // into the first byte, depending on how many bytes follow.
3161eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump  static const UTF8 firstByteMark[5] = {
3178a5c0cd90b8d607ca284274000ed8716b836d253Steve Naroff    0x00, 0x00, 0xC0, 0xE0, 0xF0
3180e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  };
3190e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  // Finally, we write the bytes into ResultBuf.
3200e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  ResultBuf += bytesToWrite;
3210e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  switch (bytesToWrite) { // note: everything falls through.
3220e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    case 4: *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6;
3230e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    case 3: *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6;
3240e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    case 2: *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6;
3250e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    case 1: *--ResultBuf = (UTF8) (UcnVal | firstByteMark[bytesToWrite]);
3260e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  }
3270e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  // Update the buffer.
3280e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  ResultBuf += bytesToWrite;
3290e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff}
3305f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
3315f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
3325f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       integer-constant: [C99 6.4.4.1]
3335f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         decimal-constant integer-suffix
3345f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         octal-constant integer-suffix
3355f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         hexadecimal-constant integer-suffix
336b453ad3214d00acc51c9aa702c76c58354d84b84Richard Smith///       user-defiend-integer-literal: [C++11 lex.ext]
337b453ad3214d00acc51c9aa702c76c58354d84b84Richard Smith///         decimal-literal ud-suffix
338b453ad3214d00acc51c9aa702c76c58354d84b84Richard Smith///         octal-literal ud-suffix
339b453ad3214d00acc51c9aa702c76c58354d84b84Richard Smith///         hexadecimal-literal ud-suffix
3401eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump///       decimal-constant:
3415f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         nonzero-digit
3425f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         decimal-constant digit
3431eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump///       octal-constant:
3445f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         0
3455f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         octal-constant octal-digit
3461eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump///       hexadecimal-constant:
3475f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         hexadecimal-prefix hexadecimal-digit
3485f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         hexadecimal-constant hexadecimal-digit
3495f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       hexadecimal-prefix: one of
3505f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         0x 0X
3515f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       integer-suffix:
3525f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         unsigned-suffix [long-suffix]
3535f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         unsigned-suffix [long-long-suffix]
3545f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         long-suffix [unsigned-suffix]
3555f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         long-long-suffix [unsigned-sufix]
3565f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       nonzero-digit:
3575f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         1 2 3 4 5 6 7 8 9
3585f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       octal-digit:
3595f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         0 1 2 3 4 5 6 7
3605f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       hexadecimal-digit:
3615f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         0 1 2 3 4 5 6 7 8 9
3625f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         a b c d e f
3635f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         A B C D E F
3645f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       unsigned-suffix: one of
3655f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         u U
3665f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       long-suffix: one of
3675f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         l L
3681eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump///       long-long-suffix: one of
3695f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         ll LL
3705f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///
3715f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       floating-constant: [C99 6.4.4.2]
3725f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         TODO: add rules...
3735f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///
3745f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid SpencerNumericLiteralParser::
3755f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid SpencerNumericLiteralParser(const char *begin, const char *end,
3765f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer                     SourceLocation TokLoc, Preprocessor &pp)
3775f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  : PP(pp), ThisTokBegin(begin), ThisTokEnd(end) {
3781eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
379c29bbde0a14a664d6843b21d3791478d1f4d2833Chris Lattner  // This routine assumes that the range begin/end matches the regex for integer
380c29bbde0a14a664d6843b21d3791478d1f4d2833Chris Lattner  // and FP constants (specifically, the 'pp-number' regex), and assumes that
381c29bbde0a14a664d6843b21d3791478d1f4d2833Chris Lattner  // the byte at "*end" is both valid and not part of the regex.  Because of
382c29bbde0a14a664d6843b21d3791478d1f4d2833Chris Lattner  // this, it doesn't have to check for 'overscan' in various places.
383c29bbde0a14a664d6843b21d3791478d1f4d2833Chris Lattner  assert(!isalnum(*end) && *end != '.' && *end != '_' &&
384c29bbde0a14a664d6843b21d3791478d1f4d2833Chris Lattner         "Lexer didn't maximally munch?");
3851eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
3865f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  s = DigitsBegin = begin;
3875f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  saw_exponent = false;
3885f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  saw_period = false;
389b453ad3214d00acc51c9aa702c76c58354d84b84Richard Smith  saw_ud_suffix = false;
3905f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  isLong = false;
3915f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  isUnsigned = false;
3925f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  isLongLong = false;
3936e400c286b485e28d04a742ea87860ddfefa672eChris Lattner  isFloat = false;
394506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner  isImaginary = false;
395b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump  isMicrosoftInteger = false;
3965f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  hadError = false;
3971eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
3985f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  if (*s == '0') { // parse radix
399368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    ParseNumberStartingWithZero(TokLoc);
400368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    if (hadError)
401368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      return;
4025f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  } else { // the first digit is non-zero
4035f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    radix = 10;
4045f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    s = SkipDigits(s);
4055f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    if (s == ThisTokEnd) {
4065f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      // Done.
407016765e3453db2e302efe53905e99cdb25501234Christopher Lamb    } else if (isxdigit(*s) && !(*s == 'e' || *s == 'E')) {
408ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner      PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin),
4095f9e272e632e951b1efe824cd16acb4d96077930Chris Lattner              diag::err_invalid_decimal_digit) << StringRef(s, 1);
410ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner      hadError = true;
4115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      return;
4125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    } else if (*s == '.') {
4135f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      s++;
4145f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      saw_period = true;
4155f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      s = SkipDigits(s);
4161eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump    }
4174411f46050216a139ab2fc7ff145ec384d11ec7fChris Lattner    if ((*s == 'e' || *s == 'E')) { // exponent
41870f66ab053f36ab3df7a778d09bcb2b4b0fec1f8Chris Lattner      const char *Exponent = s;
4195f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      s++;
4205f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      saw_exponent = true;
4215f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      if (*s == '+' || *s == '-')  s++; // sign
4225f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      const char *first_non_digit = SkipDigits(s);
4230b7f69d789ca1f76582ee9a336e25861fd0c1416Chris Lattner      if (first_non_digit != s) {
4245f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer        s = first_non_digit;
4250b7f69d789ca1f76582ee9a336e25861fd0c1416Chris Lattner      } else {
426ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner        PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-begin),
427ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner                diag::err_exponent_has_no_digits);
428ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner        hadError = true;
4290b7f69d789ca1f76582ee9a336e25861fd0c1416Chris Lattner        return;
4305f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      }
4315f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    }
4325f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  }
4335f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
4345f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  SuffixBegin = s;
4351eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
436506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner  // Parse the suffix.  At this point we can classify whether we have an FP or
437506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner  // integer constant.
438506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner  bool isFPConstant = isFloatingLiteral();
4391eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
440506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner  // Loop over all of the characters of the suffix.  If we see something bad,
441506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner  // we break out of the loop.
442506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner  for (; s != ThisTokEnd; ++s) {
443506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    switch (*s) {
444506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    case 'f':      // FP Suffix for "float"
445506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    case 'F':
446506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      if (!isFPConstant) break;  // Error for integer constant.
4476e400c286b485e28d04a742ea87860ddfefa672eChris Lattner      if (isFloat || isLong) break; // FF, LF invalid.
4486e400c286b485e28d04a742ea87860ddfefa672eChris Lattner      isFloat = true;
449506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      continue;  // Success.
450506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    case 'u':
451506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    case 'U':
452506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      if (isFPConstant) break;  // Error for floating constant.
453506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      if (isUnsigned) break;    // Cannot be repeated.
454506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      isUnsigned = true;
455506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      continue;  // Success.
456506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    case 'l':
457506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    case 'L':
458506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      if (isLong || isLongLong) break;  // Cannot be repeated.
4596e400c286b485e28d04a742ea87860ddfefa672eChris Lattner      if (isFloat) break;               // LF invalid.
4601eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
461506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      // Check for long long.  The L's need to be adjacent and the same case.
462506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      if (s+1 != ThisTokEnd && s[1] == s[0]) {
463506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner        if (isFPConstant) break;        // long long invalid for floats.
464506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner        isLongLong = true;
465506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner        ++s;  // Eat both of them.
466506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      } else {
4675f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer        isLong = true;
4685f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      }
469506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      continue;  // Success.
470506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    case 'i':
471c637415a96c16abc7e28ef83c6c105716f7e8936Chris Lattner    case 'I':
47262ec1f2fd7368542bb926c04797fb07023547694Francois Pichet      if (PP.getLangOptions().MicrosoftExt) {
473a8be02b655b76e4dbe776b0c62bc3c450dc6feabFariborz Jahanian        if (isFPConstant || isLong || isLongLong) break;
4746e8c7acb61b2c7f421d6e1aba8a7a84e96ab6981Nuno Lopes
4750c29b22f4384500cc0d04f3072cc5d5d58d10d6cSteve Naroff        // Allow i8, i16, i32, i64, and i128.
476b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump        if (s + 1 != ThisTokEnd) {
477b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump          switch (s[1]) {
478b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump            case '8':
479b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump              s += 2; // i8 suffix
480b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump              isMicrosoftInteger = true;
4816e8c7acb61b2c7f421d6e1aba8a7a84e96ab6981Nuno Lopes              break;
482b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump            case '1':
4836e8c7acb61b2c7f421d6e1aba8a7a84e96ab6981Nuno Lopes              if (s + 2 == ThisTokEnd) break;
484d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet              if (s[2] == '6') {
485d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet                s += 3; // i16 suffix
486d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet                isMicrosoftInteger = true;
487d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet              }
4886e8c7acb61b2c7f421d6e1aba8a7a84e96ab6981Nuno Lopes              else if (s[2] == '2') {
4896e8c7acb61b2c7f421d6e1aba8a7a84e96ab6981Nuno Lopes                if (s + 3 == ThisTokEnd) break;
490d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet                if (s[3] == '8') {
491d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet                  s += 4; // i128 suffix
492d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet                  isMicrosoftInteger = true;
493d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet                }
494b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump              }
4956e8c7acb61b2c7f421d6e1aba8a7a84e96ab6981Nuno Lopes              break;
496b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump            case '3':
4976e8c7acb61b2c7f421d6e1aba8a7a84e96ab6981Nuno Lopes              if (s + 2 == ThisTokEnd) break;
498d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet              if (s[2] == '2') {
499d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet                s += 3; // i32 suffix
500d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet                isLong = true;
501d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet                isMicrosoftInteger = true;
502d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet              }
5036e8c7acb61b2c7f421d6e1aba8a7a84e96ab6981Nuno Lopes              break;
504b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump            case '6':
5056e8c7acb61b2c7f421d6e1aba8a7a84e96ab6981Nuno Lopes              if (s + 2 == ThisTokEnd) break;
506d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet              if (s[2] == '4') {
507d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet                s += 3; // i64 suffix
508d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet                isLongLong = true;
509d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet                isMicrosoftInteger = true;
510d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet              }
5116e8c7acb61b2c7f421d6e1aba8a7a84e96ab6981Nuno Lopes              break;
512b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump            default:
513b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump              break;
514b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump          }
515b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump          break;
5160c29b22f4384500cc0d04f3072cc5d5d58d10d6cSteve Naroff        }
5170c29b22f4384500cc0d04f3072cc5d5d58d10d6cSteve Naroff      }
5180c29b22f4384500cc0d04f3072cc5d5d58d10d6cSteve Naroff      // fall through.
519506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    case 'j':
520506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    case 'J':
521506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      if (isImaginary) break;   // Cannot be repeated.
522506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin),
523506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner              diag::ext_imaginary_constant);
524506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      isImaginary = true;
525506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      continue;  // Success.
5265f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    }
527b453ad3214d00acc51c9aa702c76c58354d84b84Richard Smith    // If we reached here, there was an error or a ud-suffix.
528506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    break;
529506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner  }
5301eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
531506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner  if (s != ThisTokEnd) {
532b453ad3214d00acc51c9aa702c76c58354d84b84Richard Smith    if (PP.getLangOptions().CPlusPlus0x && s == SuffixBegin && *s == '_') {
533b453ad3214d00acc51c9aa702c76c58354d84b84Richard Smith      // We have a ud-suffix! By C++11 [lex.ext]p10, ud-suffixes not starting
534b453ad3214d00acc51c9aa702c76c58354d84b84Richard Smith      // with an '_' are ill-formed.
535b453ad3214d00acc51c9aa702c76c58354d84b84Richard Smith      saw_ud_suffix = true;
536b453ad3214d00acc51c9aa702c76c58354d84b84Richard Smith      return;
537b453ad3214d00acc51c9aa702c76c58354d84b84Richard Smith    }
538b453ad3214d00acc51c9aa702c76c58354d84b84Richard Smith
539b453ad3214d00acc51c9aa702c76c58354d84b84Richard Smith    // Report an error if there are any.
540b453ad3214d00acc51c9aa702c76c58354d84b84Richard Smith    PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, SuffixBegin-begin),
541ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner            isFPConstant ? diag::err_invalid_suffix_float_constant :
542ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner                           diag::err_invalid_suffix_integer_constant)
5435f9e272e632e951b1efe824cd16acb4d96077930Chris Lattner      << StringRef(SuffixBegin, ThisTokEnd-SuffixBegin);
544ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner    hadError = true;
545506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    return;
5465f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  }
5475f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer}
5485f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
549368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner/// ParseNumberStartingWithZero - This method is called when the first character
550368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner/// of the number is found to be a zero.  This means it is either an octal
551368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner/// number (like '04') or a hex number ('0x123a') a binary number ('0b1010') or
5521eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump/// a floating point number (01239.123e4).  Eat the prefix, determining the
553368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner/// radix etc.
554368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattnervoid NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
555368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  assert(s[0] == '0' && "Invalid method call");
556368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  s++;
5571eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
558368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  // Handle a hex number like 0x1234.
559368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  if ((*s == 'x' || *s == 'X') && (isxdigit(s[1]) || s[1] == '.')) {
560368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    s++;
561368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    radix = 16;
562368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    DigitsBegin = s;
563368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    s = SkipHexDigits(s);
56466b0ebac276353f3ff7d41eaba3e6d24d48663b7Aaron Ballman    bool noSignificand = (s == DigitsBegin);
565368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    if (s == ThisTokEnd) {
566368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      // Done.
567368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    } else if (*s == '.') {
568368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      s++;
569368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      saw_period = true;
57066b0ebac276353f3ff7d41eaba3e6d24d48663b7Aaron Ballman      const char *floatDigitsBegin = s;
571368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      s = SkipHexDigits(s);
57266b0ebac276353f3ff7d41eaba3e6d24d48663b7Aaron Ballman      noSignificand &= (floatDigitsBegin == s);
57366b0ebac276353f3ff7d41eaba3e6d24d48663b7Aaron Ballman    }
57466b0ebac276353f3ff7d41eaba3e6d24d48663b7Aaron Ballman
57566b0ebac276353f3ff7d41eaba3e6d24d48663b7Aaron Ballman    if (noSignificand) {
57666b0ebac276353f3ff7d41eaba3e6d24d48663b7Aaron Ballman      PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin), \
57766b0ebac276353f3ff7d41eaba3e6d24d48663b7Aaron Ballman        diag::err_hexconstant_requires_digits);
57866b0ebac276353f3ff7d41eaba3e6d24d48663b7Aaron Ballman      hadError = true;
57966b0ebac276353f3ff7d41eaba3e6d24d48663b7Aaron Ballman      return;
580368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    }
58166b0ebac276353f3ff7d41eaba3e6d24d48663b7Aaron Ballman
582368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    // A binary exponent can appear with or with a '.'. If dotted, the
5831eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump    // binary exponent is required.
5841155c42e7b1b4e401bb0a331a6d715d637958c75Douglas Gregor    if (*s == 'p' || *s == 'P') {
585368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      const char *Exponent = s;
586368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      s++;
587368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      saw_exponent = true;
588368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      if (*s == '+' || *s == '-')  s++; // sign
589368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      const char *first_non_digit = SkipDigits(s);
5906ea623823f8532670480425b573f35115404b4a0Chris Lattner      if (first_non_digit == s) {
591ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner        PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-ThisTokBegin),
592ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner                diag::err_exponent_has_no_digits);
593ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner        hadError = true;
5946ea623823f8532670480425b573f35115404b4a0Chris Lattner        return;
595368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      }
5966ea623823f8532670480425b573f35115404b4a0Chris Lattner      s = first_non_digit;
5971eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
5981155c42e7b1b4e401bb0a331a6d715d637958c75Douglas Gregor      if (!PP.getLangOptions().HexFloats)
599ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner        PP.Diag(TokLoc, diag::ext_hexconstant_invalid);
600368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    } else if (saw_period) {
601ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner      PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin),
602ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner              diag::err_hexconstant_requires_exponent);
603ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner      hadError = true;
604368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    }
605368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    return;
606368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  }
6071eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
608368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  // Handle simple binary numbers 0b01010
609368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  if (*s == 'b' || *s == 'B') {
610368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    // 0b101010 is a GCC extension.
611413d355e38755a71f106dbc0ac900ca989070916Chris Lattner    PP.Diag(TokLoc, diag::ext_binary_literal);
612368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    ++s;
613368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    radix = 2;
614368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    DigitsBegin = s;
615368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    s = SkipBinaryDigits(s);
616368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    if (s == ThisTokEnd) {
617368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      // Done.
618368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    } else if (isxdigit(*s)) {
619ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner      PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin),
6205f9e272e632e951b1efe824cd16acb4d96077930Chris Lattner              diag::err_invalid_binary_digit) << StringRef(s, 1);
621ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner      hadError = true;
622368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    }
623413d355e38755a71f106dbc0ac900ca989070916Chris Lattner    // Other suffixes will be diagnosed by the caller.
624368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    return;
625368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  }
6261eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
627368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  // For now, the radix is set to 8. If we discover that we have a
628368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  // floating point constant, the radix will change to 10. Octal floating
6291eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump  // point constants are not permitted (only decimal and hexadecimal).
630368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  radix = 8;
631368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  DigitsBegin = s;
632368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  s = SkipOctalDigits(s);
633368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  if (s == ThisTokEnd)
634368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    return; // Done, simple octal number like 01234
6351eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
636413d355e38755a71f106dbc0ac900ca989070916Chris Lattner  // If we have some other non-octal digit that *is* a decimal digit, see if
637413d355e38755a71f106dbc0ac900ca989070916Chris Lattner  // this is part of a floating point number like 094.123 or 09e1.
638413d355e38755a71f106dbc0ac900ca989070916Chris Lattner  if (isdigit(*s)) {
639413d355e38755a71f106dbc0ac900ca989070916Chris Lattner    const char *EndDecimal = SkipDigits(s);
640413d355e38755a71f106dbc0ac900ca989070916Chris Lattner    if (EndDecimal[0] == '.' || EndDecimal[0] == 'e' || EndDecimal[0] == 'E') {
641413d355e38755a71f106dbc0ac900ca989070916Chris Lattner      s = EndDecimal;
642413d355e38755a71f106dbc0ac900ca989070916Chris Lattner      radix = 10;
643413d355e38755a71f106dbc0ac900ca989070916Chris Lattner    }
644413d355e38755a71f106dbc0ac900ca989070916Chris Lattner  }
6451eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
646413d355e38755a71f106dbc0ac900ca989070916Chris Lattner  // If we have a hex digit other than 'e' (which denotes a FP exponent) then
647413d355e38755a71f106dbc0ac900ca989070916Chris Lattner  // the code is using an incorrect base.
648368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  if (isxdigit(*s) && *s != 'e' && *s != 'E') {
649ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner    PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin),
6505f9e272e632e951b1efe824cd16acb4d96077930Chris Lattner            diag::err_invalid_octal_digit) << StringRef(s, 1);
651ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner    hadError = true;
652368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    return;
653368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  }
6541eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
655368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  if (*s == '.') {
656368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    s++;
657368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    radix = 10;
658368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    saw_period = true;
659413d355e38755a71f106dbc0ac900ca989070916Chris Lattner    s = SkipDigits(s); // Skip suffix.
660368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  }
661368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  if (*s == 'e' || *s == 'E') { // exponent
662368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    const char *Exponent = s;
663368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    s++;
664368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    radix = 10;
665368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    saw_exponent = true;
666368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    if (*s == '+' || *s == '-')  s++; // sign
667368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    const char *first_non_digit = SkipDigits(s);
668368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    if (first_non_digit != s) {
669368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      s = first_non_digit;
670368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    } else {
6711eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump      PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-ThisTokBegin),
672ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner              diag::err_exponent_has_no_digits);
673ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner      hadError = true;
674368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      return;
675368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    }
676368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  }
677368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner}
678368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner
679368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner
6805f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// GetIntegerValue - Convert this numeric literal value to an APInt that
6815f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// matches Val's input width.  If there is an overflow, set Val to the low bits
6825f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// of the result and return true.  Otherwise, return false.
6835f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerbool NumericLiteralParser::GetIntegerValue(llvm::APInt &Val) {
684a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar  // Fast path: Compute a conservative bound on the maximum number of
685a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar  // bits per digit in this radix. If we can't possibly overflow a
686a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar  // uint64 based on that bound then do the simple conversion to
687a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar  // integer. This avoids the expensive overflow checking below, and
688a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar  // handles the common cases that matter (small decimal integers and
689a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar  // hex/octal values which don't overflow).
690a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar  unsigned MaxBitsPerDigit = 1;
6911eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump  while ((1U << MaxBitsPerDigit) < radix)
692a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar    MaxBitsPerDigit += 1;
693a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar  if ((SuffixBegin - DigitsBegin) * MaxBitsPerDigit <= 64) {
694a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar    uint64_t N = 0;
695a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar    for (s = DigitsBegin; s != SuffixBegin; ++s)
696a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar      N = N*radix + HexDigitValue(*s);
697a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar
698a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar    // This will truncate the value to Val's input width. Simply check
699a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar    // for overflow by comparing.
700a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar    Val = N;
701a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar    return Val.getZExtValue() != N;
702a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar  }
703a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar
7045f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  Val = 0;
7055f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  s = DigitsBegin;
7065f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
7075f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  llvm::APInt RadixVal(Val.getBitWidth(), radix);
7085f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  llvm::APInt CharVal(Val.getBitWidth(), 0);
7095f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  llvm::APInt OldVal = Val;
7101eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
7115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  bool OverflowOccurred = false;
7125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  while (s < SuffixBegin) {
7135f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    unsigned C = HexDigitValue(*s++);
7141eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
7155f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // If this letter is out of bound for this radix, reject it.
7165f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    assert(C < radix && "NumericLiteralParser ctor should have rejected this");
7171eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
7185f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    CharVal = C;
7191eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
7205f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // Add the digit to the value in the appropriate radix.  If adding in digits
7215f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // made the value smaller, then this overflowed.
7225f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    OldVal = Val;
7235f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
7245f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // Multiply by radix, did overflow occur on the multiply?
7255f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    Val *= RadixVal;
7265f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    OverflowOccurred |= Val.udiv(RadixVal) != OldVal;
7275f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
7285f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // Add value, did overflow occur on the value?
729d70cb645702bdbb42aee58403306a7c47e0d901cDaniel Dunbar    //   (a + b) ult b  <=> overflow
7305f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    Val += CharVal;
7315f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    OverflowOccurred |= Val.ult(CharVal);
7325f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  }
7335f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  return OverflowOccurred;
7345f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer}
7355f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
73694c939dc1d4958b62ea5a89294dd8b2905f3191fJohn McCallllvm::APFloat::opStatus
73794c939dc1d4958b62ea5a89294dd8b2905f3191fJohn McCallNumericLiteralParser::GetFloatValue(llvm::APFloat &Result) {
738427d5af5b601985093b6b4b33ba1e30fc24d86dcTed Kremenek  using llvm::APFloat;
7391eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
740e9f195f15ffe96d0a220c872ab12d0630a633c44Erick Tryzelaar  unsigned n = std::min(SuffixBegin - ThisTokBegin, ThisTokEnd - ThisTokBegin);
74194c939dc1d4958b62ea5a89294dd8b2905f3191fJohn McCall  return Result.convertFromString(StringRef(ThisTokBegin, n),
74294c939dc1d4958b62ea5a89294dd8b2905f3191fJohn McCall                                  APFloat::rmNearestTiesToEven);
7435f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer}
7445f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
7455f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
7465cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith///       user-defined-character-literal: [C++11 lex.ext]
7475cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith///         character-literal ud-suffix
7485cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith///       ud-suffix:
7495cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith///         identifier
7505cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith///       character-literal: [C++11 lex.ccon]
7512fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         ' c-char-sequence '
7522fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         u' c-char-sequence '
7532fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         U' c-char-sequence '
7542fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         L' c-char-sequence '
7552fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       c-char-sequence:
7562fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         c-char
7572fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         c-char-sequence c-char
7582fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       c-char:
7592fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         any member of the source character set except the single-quote ',
7602fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///           backslash \, or new-line character
7612fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         escape-sequence
7622fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         universal-character-name
7635cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith///       escape-sequence:
7642fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         simple-escape-sequence
7652fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         octal-escape-sequence
7662fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         hexadecimal-escape-sequence
7672fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       simple-escape-sequence:
768ddddd48da72bc29d1c3f388ed91ea5549328129eNAKAMURA Takumi///         one of \' \" \? \\ \a \b \f \n \r \t \v
7692fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       octal-escape-sequence:
7702fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         \ octal-digit
7712fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         \ octal-digit octal-digit
7722fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         \ octal-digit octal-digit octal-digit
7732fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       hexadecimal-escape-sequence:
7742fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         \x hexadecimal-digit
7752fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         hexadecimal-escape-sequence hexadecimal-digit
7765cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith///       universal-character-name: [C++11 lex.charset]
7772fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         \u hex-quad
7782fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         \U hex-quad hex-quad
7792fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       hex-quad:
7802fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         hex-digit hex-digit hex-digit hex-digit
7812fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///
7825f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid SpencerCharLiteralParser::CharLiteralParser(const char *begin, const char *end,
7835cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor                                     SourceLocation Loc, Preprocessor &PP,
7845cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor                                     tok::TokenKind kind) {
785be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  // At this point we know that the character matches the regex "(L|u|U)?'.*'".
7865f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  HadError = false;
7871eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
7885cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  Kind = kind;
7895cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor
790be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  // Skip over wide character determinant.
791be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  if (Kind != tok::char_constant) {
7925cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    ++begin;
7935cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  }
7941eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
7955f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // Skip over the entry quote.
7965f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  assert(begin[0] == '\'' && "Invalid token lexed");
7975f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  ++begin;
7985f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
7995cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith  // Remove an optional ud-suffix.
8005cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith  if (end[-1] != '\'') {
8015cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith    const char *UDSuffixEnd = end;
8025cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith    do {
8035cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith      --end;
8045cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith    } while (end[-1] != '\'');
8055cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith    UDSuffixBuf.assign(end, UDSuffixEnd);
806dd66be718f23c8149d74ae8b011b002e11e8d5baRichard Smith    UDSuffixOffset = end - begin + 1;
8075cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith  }
8085cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith
809be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  // Trim the ending quote.
8105cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith  assert(end != begin && "Invalid token lexed");
811be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  --end;
812be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell
8131eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump  // FIXME: The "Value" is an uint64_t so we can handle char literals of
814fc8f0e14ad142ed811e90fbd9a30e419e301c717Chris Lattner  // up to 64-bits.
8155f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // FIXME: This extensively assumes that 'char' is 8-bits.
81698be4943e8dc4f3905629a7102668960873cf863Chris Lattner  assert(PP.getTargetInfo().getCharWidth() == 8 &&
8175f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer         "Assumes char is 8 bits");
818e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner  assert(PP.getTargetInfo().getIntWidth() <= 64 &&
819e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner         (PP.getTargetInfo().getIntWidth() & 7) == 0 &&
820e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner         "Assumes sizeof(int) on target is <= 64 and a multiple of char");
821e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner  assert(PP.getTargetInfo().getWCharWidth() <= 64 &&
822e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner         "Assumes sizeof(wchar) on target is <= 64");
8234bc11af9bed1d4a247e3db1fcb754d410ad99099Sanjiv Gupta
824be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  SmallVector<uint32_t,4> codepoint_buffer;
825be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  codepoint_buffer.resize(end-begin);
826be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  uint32_t *buffer_begin = &codepoint_buffer.front();
827be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  uint32_t *buffer_end = buffer_begin + codepoint_buffer.size();
828be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell
829be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  // Unicode escapes representing characters that cannot be correctly
830be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  // represented in a single code unit are disallowed in character literals
831be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  // by this implementation.
832be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  uint32_t largest_character_for_kind;
833be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  if (tok::wide_char_constant == Kind) {
834be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell    largest_character_for_kind = 0xFFFFFFFFu >> (32-PP.getTargetInfo().getWCharWidth());
835be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  } else if (tok::utf16_char_constant == Kind) {
836be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell    largest_character_for_kind = 0xFFFF;
837be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  } else if (tok::utf32_char_constant == Kind) {
838be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell    largest_character_for_kind = 0x10FFFF;
839be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  } else {
840be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell    largest_character_for_kind = 0x7Fu;
841be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  }
8425f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
843be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  while (begin!=end) {
844be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell    // Is this a span of non-escape characters?
845be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell    if (begin[0] != '\\') {
846be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell      char const *start = begin;
847be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell      do {
848be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell        ++begin;
849be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell      } while (begin != end && *begin != '\\');
850be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell
85191359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman      char const *tmp_in_start = start;
85291359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman      uint32_t *tmp_out_start = buffer_begin;
853be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell      ConversionResult res =
854be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell      ConvertUTF8toUTF32(reinterpret_cast<UTF8 const **>(&start),
855be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell                         reinterpret_cast<UTF8 const *>(begin),
856be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell                         &buffer_begin,buffer_end,strictConversion);
857be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell      if (res!=conversionOK) {
85891359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman        // If we see bad encoding for unprefixed character literals, warn and
85991359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman        // simply copy the byte values, for compatibility with gcc and
86091359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman        // older versions of clang.
86191359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman        bool NoErrorOnBadEncoding = isAscii();
86291359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman        unsigned Msg = diag::err_bad_character_encoding;
86391359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman        if (NoErrorOnBadEncoding)
86491359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman          Msg = diag::warn_bad_character_encoding;
86591359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman        PP.Diag(Loc, Msg);
86691359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman        if (NoErrorOnBadEncoding) {
86791359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman          start = tmp_in_start;
86891359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman          buffer_begin = tmp_out_start;
86991359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman          for ( ; start != begin; ++start, ++buffer_begin)
87091359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman            *buffer_begin = static_cast<uint8_t>(*start);
87191359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman        } else {
87291359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman          HadError = true;
87391359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman        }
8745f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      } else {
87591359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman        for (; tmp_out_start <buffer_begin; ++tmp_out_start) {
87691359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman          if (*tmp_out_start > largest_character_for_kind) {
877be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell            HadError = true;
878be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell            PP.Diag(Loc, diag::err_character_too_large);
879be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell          }
8801c6c64b5181a960c7d4cace4995a938d4dfa6fbfChris Lattner        }
8815f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      }
882be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell
883be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell      continue;
8845f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    }
885be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell    // Is this a Universal Character Name excape?
886be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell    if (begin[1] == 'u' || begin[1] == 'U') {
887be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell      unsigned short UcnLen = 0;
888be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell      if (!ProcessUCNEscape(begin, end, *buffer_begin, UcnLen,
889be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell                            FullSourceLoc(Loc, PP.getSourceManager()),
890be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell                            &PP.getDiagnostics(), PP.getLangOptions(),
891be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell                            true))
892be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell      {
893be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell        HadError = true;
894be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell      } else if (*buffer_begin > largest_character_for_kind) {
895be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell        HadError = true;
896be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell        PP.Diag(Loc,diag::err_character_too_large);
897be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell      }
8981eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
899be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell      ++buffer_begin;
900be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell      continue;
901be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell    }
902be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell    unsigned CharWidth = getCharWidth(Kind, PP.getTargetInfo());
903be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell    uint64_t result =
904be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell    ProcessCharEscape(begin, end, HadError,
905be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell                      FullSourceLoc(Loc,PP.getSourceManager()),
906be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell                      CharWidth, &PP.getDiagnostics());
907be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell    *buffer_begin++ = result;
908e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner  }
909e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner
910be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  unsigned NumCharsSoFar = buffer_begin-&codepoint_buffer.front();
911be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell
912e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner  if (NumCharsSoFar > 1) {
913be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell    if (isWide())
9145cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor      PP.Diag(Loc, diag::warn_extraneous_char_constant);
915be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell    else if (isAscii() && NumCharsSoFar == 4)
916be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell      PP.Diag(Loc, diag::ext_four_char_character_literal);
917be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell    else if (isAscii())
918e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner      PP.Diag(Loc, diag::ext_multichar_character_literal);
919e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner    else
920be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell      PP.Diag(Loc, diag::err_multichar_utf_character_literal);
9212a1c363f38e59a5044fc349aa7e538a50954c244Eli Friedman    IsMultiChar = true;
922930b71a4a7dedf70a73e5fd875bae7df452b80a9Daniel Dunbar  } else
923930b71a4a7dedf70a73e5fd875bae7df452b80a9Daniel Dunbar    IsMultiChar = false;
9244bc11af9bed1d4a247e3db1fcb754d410ad99099Sanjiv Gupta
925be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  llvm::APInt LitVal(PP.getTargetInfo().getIntWidth(), 0);
926be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell
927be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  // Narrow character literals act as though their value is concatenated
928be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  // in this implementation, but warn on overflow.
929be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  bool multi_char_too_long = false;
930be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  if (isAscii() && isMultiChar()) {
931be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell    LitVal = 0;
932be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell    for (size_t i=0;i<NumCharsSoFar;++i) {
933be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell      // check for enough leading zeros to shift into
934be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell      multi_char_too_long |= (LitVal.countLeadingZeros() < 8);
935be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell      LitVal <<= 8;
936be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell      LitVal = LitVal + (codepoint_buffer[i] & 0xFF);
937be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell    }
938be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  } else if (NumCharsSoFar > 0) {
939be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell    // otherwise just take the last character
940be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell    LitVal = buffer_begin[-1];
941be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  }
942be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell
943be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  if (!HadError && multi_char_too_long) {
944be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell    PP.Diag(Loc,diag::warn_char_constant_too_large);
945be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  }
946be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell
9474bc11af9bed1d4a247e3db1fcb754d410ad99099Sanjiv Gupta  // Transfer the value from APInt to uint64_t
9484bc11af9bed1d4a247e3db1fcb754d410ad99099Sanjiv Gupta  Value = LitVal.getZExtValue();
9491eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
9505f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // If this is a single narrow character, sign extend it (e.g. '\xFF' is "-1")
9515f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // if 'char' is signed for this target (C99 6.4.4.4p10).  Note that multiple
9525f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // character constants are not sign extended in the this implementation:
9535f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // '\xFF\xFF' = 65536 and '\x0\xFF' = 255, which matches GCC.
9545cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  if (isAscii() && NumCharsSoFar == 1 && (Value & 128) &&
95515b91764d08e886391c865c4a444d7b51141c284Eli Friedman      PP.getLangOptions().CharIsSigned)
9565f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    Value = (signed char)Value;
9575f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer}
9585f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
9595f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
9602fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       string-literal: [C++0x lex.string]
9612fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         encoding-prefix " [s-char-sequence] "
9622fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         encoding-prefix R raw-string
9632fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       encoding-prefix:
9642fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         u8
9652fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         u
9662fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         U
9672fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         L
9685f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       s-char-sequence:
9695f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         s-char
9705f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         s-char-sequence s-char
9715f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       s-char:
9722fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         any member of the source character set except the double-quote ",
9732fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///           backslash \, or new-line character
9742fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         escape-sequence
9755f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         universal-character-name
9762fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       raw-string:
9772fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         " d-char-sequence ( r-char-sequence ) d-char-sequence "
9782fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       r-char-sequence:
9792fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         r-char
9802fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         r-char-sequence r-char
9812fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       r-char:
9822fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         any member of the source character set, except a right parenthesis )
9832fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///           followed by the initial d-char-sequence (which may be empty)
9842fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///           followed by a double quote ".
9852fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       d-char-sequence:
9862fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         d-char
9872fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         d-char-sequence d-char
9882fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       d-char:
9892fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         any member of the basic source character set except:
9902fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///           space, the left parenthesis (, the right parenthesis ),
9912fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///           the backslash \, and the control characters representing horizontal
9922fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///           tab, vertical tab, form feed, and newline.
9932fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       escape-sequence: [C++0x lex.ccon]
9942fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         simple-escape-sequence
9952fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         octal-escape-sequence
9962fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         hexadecimal-escape-sequence
9972fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       simple-escape-sequence:
998ddddd48da72bc29d1c3f388ed91ea5549328129eNAKAMURA Takumi///         one of \' \" \? \\ \a \b \f \n \r \t \v
9992fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       octal-escape-sequence:
10002fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         \ octal-digit
10012fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         \ octal-digit octal-digit
10022fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         \ octal-digit octal-digit octal-digit
10032fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       hexadecimal-escape-sequence:
10042fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         \x hexadecimal-digit
10052fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         hexadecimal-escape-sequence hexadecimal-digit
10065f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       universal-character-name:
10075f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         \u hex-quad
10085f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         \U hex-quad hex-quad
10095f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       hex-quad:
10105f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         hex-digit hex-digit hex-digit hex-digit
10115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///
10125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid SpencerStringLiteralParser::
1013d217773f106856a11879ec79dc468efefaf2ee75Chris LattnerStringLiteralParser(const Token *StringToks, unsigned NumStringToks,
10140833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner                    Preprocessor &PP, bool Complain)
10150833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner  : SM(PP.getSourceManager()), Features(PP.getLangOptions()),
1016403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis    Target(PP.getTargetInfo()), Diags(Complain ? &PP.getDiagnostics() : 0),
10175cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),
10185cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) {
10190833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner  init(StringToks, NumStringToks);
10200833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner}
10210833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner
10220833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattnervoid StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
1023403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis  // The literal token may have come from an invalid source location (e.g. due
1024403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis  // to a PCH error), in which case the token length will be 0.
1025403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis  if (NumStringToks == 0 || StringToks[0].getLength() < 2) {
1026403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis    hadError = true;
1027403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis    return;
1028403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis  }
1029403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis
10305f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // Scan all of the string portions, remember the max individual token length,
10315f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // computing a bound on the concatenated string length, and see whether any
10325f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // piece is a wide-string.  If any of the string portions is a wide-string
10335f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // literal, the result is a wide-string literal [C99 6.4.5p4].
1034403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis  assert(NumStringToks && "expected at least one token");
10356cf750298d3621d8a10a6dd07fcee8e274b9d94dSean Hunt  MaxTokenLength = StringToks[0].getLength();
1036403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis  assert(StringToks[0].getLength() >= 2 && "literal token is invalid!");
10376cf750298d3621d8a10a6dd07fcee8e274b9d94dSean Hunt  SizeBound = StringToks[0].getLength()-2;  // -2 for "".
10385cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  Kind = StringToks[0].getKind();
10396cf750298d3621d8a10a6dd07fcee8e274b9d94dSean Hunt
10406cf750298d3621d8a10a6dd07fcee8e274b9d94dSean Hunt  hadError = false;
10415f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
10425f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // Implement Translation Phase #6: concatenation of string literals
10435f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  /// (C99 5.1.1.2p1).  The common case is only one string fragment.
10445f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  for (unsigned i = 1; i != NumStringToks; ++i) {
1045403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis    if (StringToks[i].getLength() < 2) {
1046403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis      hadError = true;
1047403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis      return;
1048403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis    }
1049403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis
10505f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // The string could be shorter than this if it needs cleaning, but this is a
10515f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // reasonable bound, which is all we need.
1052403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis    assert(StringToks[i].getLength() >= 2 && "literal token is invalid!");
10536cf750298d3621d8a10a6dd07fcee8e274b9d94dSean Hunt    SizeBound += StringToks[i].getLength()-2;  // -2 for "".
10541eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
10555f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // Remember maximum string piece length.
10566cf750298d3621d8a10a6dd07fcee8e274b9d94dSean Hunt    if (StringToks[i].getLength() > MaxTokenLength)
10576cf750298d3621d8a10a6dd07fcee8e274b9d94dSean Hunt      MaxTokenLength = StringToks[i].getLength();
10581eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
10595cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    // Remember if we see any wide or utf-8/16/32 strings.
10605cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    // Also check for illegal concatenations.
10615cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    if (StringToks[i].isNot(Kind) && StringToks[i].isNot(tok::string_literal)) {
10625cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor      if (isAscii()) {
10635cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor        Kind = StringToks[i].getKind();
10645cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor      } else {
10655cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor        if (Diags)
10665cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor          Diags->Report(FullSourceLoc(StringToks[i].getLocation(), SM),
10675cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor                        diag::err_unsupported_string_concat);
10685cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor        hadError = true;
10695cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor      }
10705cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    }
10715f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  }
1072dbb1ecc32ca122b07b7c98fd0a8f6f53985adaccChris Lattner
10735f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // Include space for the null terminator.
10745f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  ++SizeBound;
10751eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
10765f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // TODO: K&R warning: "traditional C rejects string constant concatenation"
10771eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
10785cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  // Get the width in bytes of char/wchar_t/char16_t/char32_t
10795cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  CharByteWidth = getCharWidth(Kind, Target);
10805cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  assert((CharByteWidth & 7) == 0 && "Assumes character size is byte multiple");
10815cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  CharByteWidth /= 8;
10821eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
10835f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // The output buffer size needs to be large enough to hold wide characters.
10845f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // This is a worst-case assumption which basically corresponds to L"" "long".
10855cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  SizeBound *= CharByteWidth;
10861eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
10875f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // Size the temporary buffer to hold the result string data.
10885f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  ResultBuf.resize(SizeBound);
10891eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
10905f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // Likewise, but for each string piece.
1091f7ccbad5d9949e7ddd1cbef43d482553b811e026Dylan Noblesmith  SmallString<512> TokenBuf;
10925f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  TokenBuf.resize(MaxTokenLength);
10931eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
10945f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // Loop over all the strings, getting their spelling, and expanding them to
10955f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // wide strings as appropriate.
10965f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  ResultPtr = &ResultBuf[0];   // Next byte to fill in.
10971eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1098ee98ac5a1330db432b188dd2d38b6631aac47bf1Anders Carlsson  Pascal = false;
10991eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
11005cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith  SourceLocation UDSuffixTokLoc;
11015cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith
11025f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  for (unsigned i = 0, e = NumStringToks; i != e; ++i) {
11035f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    const char *ThisTokBuf = &TokenBuf[0];
11045f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // Get the spelling of the token, which eliminates trigraphs, etc.  We know
11055f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // that ThisTokBuf points to a buffer that is big enough for the whole token
11065f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // and 'spelled' tokens can only shrink.
110750f6af7a6d6951a63f3da7d4c5a7d3965bf73b63Douglas Gregor    bool StringInvalid = false;
11080833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner    unsigned ThisTokLen =
1109b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner      Lexer::getSpelling(StringToks[i], ThisTokBuf, SM, Features,
1110b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner                         &StringInvalid);
111150f6af7a6d6951a63f3da7d4c5a7d3965bf73b63Douglas Gregor    if (StringInvalid) {
11125cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor      hadError = true;
111350f6af7a6d6951a63f3da7d4c5a7d3965bf73b63Douglas Gregor      continue;
111450f6af7a6d6951a63f3da7d4c5a7d3965bf73b63Douglas Gregor    }
111550f6af7a6d6951a63f3da7d4c5a7d3965bf73b63Douglas Gregor
11165cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith    const char *ThisTokEnd = ThisTokBuf+ThisTokLen;
11175cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith
11185cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith    // Remove an optional ud-suffix.
11195cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith    if (ThisTokEnd[-1] != '"') {
11205cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith      const char *UDSuffixEnd = ThisTokEnd;
11215cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith      do {
11225cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith        --ThisTokEnd;
11235cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith      } while (ThisTokEnd[-1] != '"');
11245cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith
11255cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith      StringRef UDSuffix(ThisTokEnd, UDSuffixEnd - ThisTokEnd);
11265cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith
11275cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith      if (UDSuffixBuf.empty()) {
11285cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith        UDSuffixBuf.assign(UDSuffix);
1129dd66be718f23c8149d74ae8b011b002e11e8d5baRichard Smith        UDSuffixToken = i;
1130dd66be718f23c8149d74ae8b011b002e11e8d5baRichard Smith        UDSuffixOffset = ThisTokEnd - ThisTokBuf;
11315cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith        UDSuffixTokLoc = StringToks[i].getLocation();
11325cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith      } else if (!UDSuffixBuf.equals(UDSuffix)) {
11335cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith        // C++11 [lex.ext]p8: At the end of phase 6, if a string literal is the
11345cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith        // result of a concatenation involving at least one user-defined-string-
11355cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith        // literal, all the participating user-defined-string-literals shall
11365cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith        // have the same ud-suffix.
11375cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith        if (Diags) {
11385cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith          SourceLocation TokLoc = StringToks[i].getLocation();
11395cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith          Diags->Report(TokLoc, diag::err_string_concat_mixed_suffix)
11405cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith            << UDSuffixBuf << UDSuffix
11415cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith            << SourceRange(UDSuffixTokLoc, UDSuffixTokLoc)
11425cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith            << SourceRange(TokLoc, TokLoc);
11435cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith        }
11445cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith        hadError = true;
11455cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith      }
11465cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith    }
11475cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith
11485cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith    // Strip the end quote.
11495cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith    --ThisTokEnd;
11505cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith
11515f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // TODO: Input character set mapping support.
11521eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
11531661d717563d6a27dec3da69deba2b2efaa45802Craig Topper    // Skip marker for wide or unicode strings.
11545cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    if (ThisTokBuf[0] == 'L' || ThisTokBuf[0] == 'u' || ThisTokBuf[0] == 'U') {
11555f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      ++ThisTokBuf;
11565cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor      // Skip 8 of u8 marker for utf8 strings.
11575cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor      if (ThisTokBuf[0] == '8')
11585cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor        ++ThisTokBuf;
115956bedefe92ae8f604d14bea75cc3040ab32337c2Fariborz Jahanian    }
11601eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
11612fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper    // Check for raw string
11622fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper    if (ThisTokBuf[0] == 'R') {
11632fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      ThisTokBuf += 2; // skip R"
11641eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
11652fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      const char *Prefix = ThisTokBuf;
11662fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      while (ThisTokBuf[0] != '(')
1167ee98ac5a1330db432b188dd2d38b6631aac47bf1Anders Carlsson        ++ThisTokBuf;
11682fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      ++ThisTokBuf; // skip '('
11692fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper
11702fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      // remove same number of characters from the end
11712fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      if (ThisTokEnd >= ThisTokBuf + (ThisTokBuf - Prefix))
11722fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        ThisTokEnd -= (ThisTokBuf - Prefix);
11732fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper
11742fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      // Copy the string over
1175f74a4587629615ffd13bd0724868f86ba8c8f27bEli Friedman      if (CopyStringFragment(StringRef(ThisTokBuf,ThisTokEnd-ThisTokBuf)))
1176f74a4587629615ffd13bd0724868f86ba8c8f27bEli Friedman      {
117791359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman        if (DiagnoseBadString(StringToks[i]))
117891359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman          hadError = true;
1179f74a4587629615ffd13bd0724868f86ba8c8f27bEli Friedman      }
1180f74a4587629615ffd13bd0724868f86ba8c8f27bEli Friedman
11812fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper    } else {
11822fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
11832fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      ++ThisTokBuf; // skip "
11842fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper
11852fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      // Check if this is a pascal string
11862fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      if (Features.PascalStrings && ThisTokBuf + 1 != ThisTokEnd &&
11872fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper          ThisTokBuf[0] == '\\' && ThisTokBuf[1] == 'p') {
11881eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
11892fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        // If the \p sequence is found in the first token, we have a pascal string
11902fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        // Otherwise, if we already have a pascal string, ignore the first \p
11912fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        if (i == 0) {
11925f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer          ++ThisTokBuf;
11932fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper          Pascal = true;
11942fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        } else if (Pascal)
11952fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper          ThisTokBuf += 2;
11965f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      }
11971eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
11982fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      while (ThisTokBuf != ThisTokEnd) {
11992fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        // Is this a span of non-escape characters?
12002fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        if (ThisTokBuf[0] != '\\') {
12012fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper          const char *InStart = ThisTokBuf;
12022fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper          do {
12032fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper            ++ThisTokBuf;
12042fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper          } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');
12052fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper
12062fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper          // Copy the character span over.
1207f74a4587629615ffd13bd0724868f86ba8c8f27bEli Friedman          if (CopyStringFragment(StringRef(InStart,ThisTokBuf-InStart)))
1208f74a4587629615ffd13bd0724868f86ba8c8f27bEli Friedman          {
120991359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman            if (DiagnoseBadString(StringToks[i]))
121091359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman              hadError = true;
1211f74a4587629615ffd13bd0724868f86ba8c8f27bEli Friedman          }
12122fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper          continue;
12132fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        }
12142fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        // Is this a Universal Character Name escape?
12152fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        if (ThisTokBuf[1] == 'u' || ThisTokBuf[1] == 'U') {
12162fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper          EncodeUCNEscape(ThisTokBuf, ThisTokEnd, ResultPtr,
12172fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper                          hadError, FullSourceLoc(StringToks[i].getLocation(),SM),
12182fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper                          CharByteWidth, Diags, Features);
12192fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper          continue;
12202fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        }
12212fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        // Otherwise, this is a non-UCN escape character.  Process it.
12222fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        unsigned ResultChar =
12232fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper          ProcessCharEscape(ThisTokBuf, ThisTokEnd, hadError,
12242fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper                            FullSourceLoc(StringToks[i].getLocation(), SM),
12252fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper                            CharByteWidth*8, Diags);
12262fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper
1227caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman        if (CharByteWidth == 4) {
1228caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman          // FIXME: Make the type of the result buffer correct instead of
1229caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman          // using reinterpret_cast.
1230caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman          UTF32 *ResultWidePtr = reinterpret_cast<UTF32*>(ResultPtr);
12319b483df983759d51d61d54e8ae34bff423d15403Nico Weber          *ResultWidePtr = ResultChar;
1232caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman          ResultPtr += 4;
1233caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman        } else if (CharByteWidth == 2) {
1234caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman          // FIXME: Make the type of the result buffer correct instead of
1235caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman          // using reinterpret_cast.
1236caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman          UTF16 *ResultWidePtr = reinterpret_cast<UTF16*>(ResultPtr);
12379b483df983759d51d61d54e8ae34bff423d15403Nico Weber          *ResultWidePtr = ResultChar & 0xFFFF;
1238caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman          ResultPtr += 2;
1239caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman        } else {
1240caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman          assert(CharByteWidth == 1 && "Unexpected char width");
1241caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman          *ResultPtr++ = ResultChar & 0xFF;
1242caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman        }
12432fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      }
12445f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    }
12455f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  }
12461eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1247bbee00b6456e90a09f63c83c20233e6c5ad6000aChris Lattner  if (Pascal) {
124822508f410b3d727d5c557af3304c0a1bad94999eEli Friedman    if (CharByteWidth == 4) {
124922508f410b3d727d5c557af3304c0a1bad94999eEli Friedman      // FIXME: Make the type of the result buffer correct instead of
125022508f410b3d727d5c557af3304c0a1bad94999eEli Friedman      // using reinterpret_cast.
125122508f410b3d727d5c557af3304c0a1bad94999eEli Friedman      UTF32 *ResultWidePtr = reinterpret_cast<UTF32*>(ResultBuf.data());
125222508f410b3d727d5c557af3304c0a1bad94999eEli Friedman      ResultWidePtr[0] = GetNumStringChars() - 1;
125322508f410b3d727d5c557af3304c0a1bad94999eEli Friedman    } else if (CharByteWidth == 2) {
125422508f410b3d727d5c557af3304c0a1bad94999eEli Friedman      // FIXME: Make the type of the result buffer correct instead of
125522508f410b3d727d5c557af3304c0a1bad94999eEli Friedman      // using reinterpret_cast.
125622508f410b3d727d5c557af3304c0a1bad94999eEli Friedman      UTF16 *ResultWidePtr = reinterpret_cast<UTF16*>(ResultBuf.data());
125722508f410b3d727d5c557af3304c0a1bad94999eEli Friedman      ResultWidePtr[0] = GetNumStringChars() - 1;
125822508f410b3d727d5c557af3304c0a1bad94999eEli Friedman    } else {
125922508f410b3d727d5c557af3304c0a1bad94999eEli Friedman      assert(CharByteWidth == 1 && "Unexpected char width");
126022508f410b3d727d5c557af3304c0a1bad94999eEli Friedman      ResultBuf[0] = GetNumStringChars() - 1;
126122508f410b3d727d5c557af3304c0a1bad94999eEli Friedman    }
1262bbee00b6456e90a09f63c83c20233e6c5ad6000aChris Lattner
1263bbee00b6456e90a09f63c83c20233e6c5ad6000aChris Lattner    // Verify that pascal strings aren't too large.
12640833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner    if (GetStringLength() > 256) {
12650833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner      if (Diags)
12660833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner        Diags->Report(FullSourceLoc(StringToks[0].getLocation(), SM),
12670833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner                      diag::err_pascal_string_too_long)
12680833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner          << SourceRange(StringToks[0].getLocation(),
12690833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner                         StringToks[NumStringToks-1].getLocation());
12705cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor      hadError = true;
127157d7dde770c67b282e7fb77b1b81e429910937b3Eli Friedman      return;
127257d7dde770c67b282e7fb77b1b81e429910937b3Eli Friedman    }
12730833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner  } else if (Diags) {
1274427c492d368d6ecf409fa8053eecb5cd0e779c5bDouglas Gregor    // Complain if this string literal has too many characters.
1275a95880d6513c617bb96634bcc1f16c6bdb80dedcChris Lattner    unsigned MaxChars = Features.CPlusPlus? 65536 : Features.C99 ? 4095 : 509;
1276427c492d368d6ecf409fa8053eecb5cd0e779c5bDouglas Gregor
1277427c492d368d6ecf409fa8053eecb5cd0e779c5bDouglas Gregor    if (GetNumStringChars() > MaxChars)
12780833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner      Diags->Report(FullSourceLoc(StringToks[0].getLocation(), SM),
12790833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner                    diag::ext_string_too_long)
1280427c492d368d6ecf409fa8053eecb5cd0e779c5bDouglas Gregor        << GetNumStringChars() << MaxChars
1281a95880d6513c617bb96634bcc1f16c6bdb80dedcChris Lattner        << (Features.CPlusPlus ? 2 : Features.C99 ? 1 : 0)
1282427c492d368d6ecf409fa8053eecb5cd0e779c5bDouglas Gregor        << SourceRange(StringToks[0].getLocation(),
1283427c492d368d6ecf409fa8053eecb5cd0e779c5bDouglas Gregor                       StringToks[NumStringToks-1].getLocation());
1284bbee00b6456e90a09f63c83c20233e6c5ad6000aChris Lattner  }
12855f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer}
1286719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner
1287719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner
12882fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper/// copyStringFragment - This function copies from Start to End into ResultPtr.
12892fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper/// Performs widening for multi-byte characters.
1290f74a4587629615ffd13bd0724868f86ba8c8f27bEli Friedmanbool StringLiteralParser::CopyStringFragment(StringRef Fragment) {
1291f74a4587629615ffd13bd0724868f86ba8c8f27bEli Friedman  assert(CharByteWidth==1 || CharByteWidth==2 || CharByteWidth==4);
1292f74a4587629615ffd13bd0724868f86ba8c8f27bEli Friedman  ConversionResult result = conversionOK;
12932fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper  // Copy the character span over.
12942fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper  if (CharByteWidth == 1) {
129591359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman    if (!isLegalUTF8Sequence(reinterpret_cast<const UTF8*>(Fragment.begin()),
129691359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman                             reinterpret_cast<const UTF8*>(Fragment.end())))
129791359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman      result = sourceIllegal;
12982fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper    memcpy(ResultPtr, Fragment.data(), Fragment.size());
12992fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper    ResultPtr += Fragment.size();
1300f74a4587629615ffd13bd0724868f86ba8c8f27bEli Friedman  } else if (CharByteWidth == 2) {
1301f74a4587629615ffd13bd0724868f86ba8c8f27bEli Friedman    UTF8 const *sourceStart = (UTF8 const *)Fragment.data();
1302f74a4587629615ffd13bd0724868f86ba8c8f27bEli Friedman    // FIXME: Make the type of the result buffer correct instead of
1303f74a4587629615ffd13bd0724868f86ba8c8f27bEli Friedman    // using reinterpret_cast.
1304f74a4587629615ffd13bd0724868f86ba8c8f27bEli Friedman    UTF16 *targetStart = reinterpret_cast<UTF16*>(ResultPtr);
130591359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman    ConversionFlags flags = strictConversion;
1306f74a4587629615ffd13bd0724868f86ba8c8f27bEli Friedman    result = ConvertUTF8toUTF16(
1307f74a4587629615ffd13bd0724868f86ba8c8f27bEli Friedman	    &sourceStart,sourceStart + Fragment.size(),
1308f74a4587629615ffd13bd0724868f86ba8c8f27bEli Friedman        &targetStart,targetStart + 2*Fragment.size(),flags);
1309f74a4587629615ffd13bd0724868f86ba8c8f27bEli Friedman    if (result==conversionOK)
1310f74a4587629615ffd13bd0724868f86ba8c8f27bEli Friedman      ResultPtr = reinterpret_cast<char*>(targetStart);
1311f74a4587629615ffd13bd0724868f86ba8c8f27bEli Friedman  } else if (CharByteWidth == 4) {
1312f74a4587629615ffd13bd0724868f86ba8c8f27bEli Friedman    UTF8 const *sourceStart = (UTF8 const *)Fragment.data();
1313f74a4587629615ffd13bd0724868f86ba8c8f27bEli Friedman    // FIXME: Make the type of the result buffer correct instead of
1314f74a4587629615ffd13bd0724868f86ba8c8f27bEli Friedman    // using reinterpret_cast.
1315f74a4587629615ffd13bd0724868f86ba8c8f27bEli Friedman    UTF32 *targetStart = reinterpret_cast<UTF32*>(ResultPtr);
131691359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman    ConversionFlags flags = strictConversion;
1317f74a4587629615ffd13bd0724868f86ba8c8f27bEli Friedman    result = ConvertUTF8toUTF32(
1318f74a4587629615ffd13bd0724868f86ba8c8f27bEli Friedman        &sourceStart,sourceStart + Fragment.size(),
1319f74a4587629615ffd13bd0724868f86ba8c8f27bEli Friedman        &targetStart,targetStart + 4*Fragment.size(),flags);
1320f74a4587629615ffd13bd0724868f86ba8c8f27bEli Friedman    if (result==conversionOK)
1321f74a4587629615ffd13bd0724868f86ba8c8f27bEli Friedman      ResultPtr = reinterpret_cast<char*>(targetStart);
13222fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper  }
1323f74a4587629615ffd13bd0724868f86ba8c8f27bEli Friedman  assert((result != targetExhausted)
1324f74a4587629615ffd13bd0724868f86ba8c8f27bEli Friedman         && "ConvertUTF8toUTFXX exhausted target buffer");
1325f74a4587629615ffd13bd0724868f86ba8c8f27bEli Friedman  return result != conversionOK;
13262fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper}
13272fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper
132891359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedmanbool StringLiteralParser::DiagnoseBadString(const Token &Tok) {
132991359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman  // If we see bad encoding for unprefixed string literals, warn and
133091359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman  // simply copy the byte values, for compatibility with gcc and older
133191359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman  // versions of clang.
133291359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman  bool NoErrorOnBadEncoding = isAscii();
133391359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman  unsigned Msg = NoErrorOnBadEncoding ? diag::warn_bad_string_encoding :
133491359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman                                        diag::err_bad_string_encoding;
133591359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman  if (Diags)
133691359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman    Diags->Report(FullSourceLoc(Tok.getLocation(), SM), Msg);
133791359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman  return !NoErrorOnBadEncoding;
133891359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman}
13392fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper
1340719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner/// getOffsetOfStringByte - This function returns the offset of the
1341719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner/// specified byte of the string data represented by Token.  This handles
1342719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner/// advancing over escape sequences in the string.
1343719e61573f27c11057ecfe0dd8f141621602c571Chris Lattnerunsigned StringLiteralParser::getOffsetOfStringByte(const Token &Tok,
13446c66f07854c1334a1ce9eae1428d61d54182a6e1Chris Lattner                                                    unsigned ByteNo) const {
1345719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner  // Get the spelling of the token.
1346f7ccbad5d9949e7ddd1cbef43d482553b811e026Dylan Noblesmith  SmallString<32> SpellingBuffer;
13476cf750298d3621d8a10a6dd07fcee8e274b9d94dSean Hunt  SpellingBuffer.resize(Tok.getLength());
13481eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
134950f6af7a6d6951a63f3da7d4c5a7d3965bf73b63Douglas Gregor  bool StringInvalid = false;
1350719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner  const char *SpellingPtr = &SpellingBuffer[0];
1351b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner  unsigned TokLen = Lexer::getSpelling(Tok, SpellingPtr, SM, Features,
1352b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner                                       &StringInvalid);
135391f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner  if (StringInvalid)
135450f6af7a6d6951a63f3da7d4c5a7d3965bf73b63Douglas Gregor    return 0;
1355719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner
13565cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  assert(SpellingPtr[0] != 'L' && SpellingPtr[0] != 'u' &&
13575cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor         SpellingPtr[0] != 'U' && "Doesn't handle wide or utf strings yet");
1358719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner
13591eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1360719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner  const char *SpellingStart = SpellingPtr;
1361719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner  const char *SpellingEnd = SpellingPtr+TokLen;
1362719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner
1363719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner  // Skip over the leading quote.
1364719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner  assert(SpellingPtr[0] == '"' && "Should be a string literal!");
1365719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner  ++SpellingPtr;
13661eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1367719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner  // Skip over bytes until we find the offset we're looking for.
1368719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner  while (ByteNo) {
1369719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner    assert(SpellingPtr < SpellingEnd && "Didn't find byte offset!");
13701eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1371719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner    // Step over non-escapes simply.
1372719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner    if (*SpellingPtr != '\\') {
1373719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner      ++SpellingPtr;
1374719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner      --ByteNo;
1375719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner      continue;
1376719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner    }
13771eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1378719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner    // Otherwise, this is an escape character.  Advance over it.
1379719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner    bool HadError = false;
1380719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner    ProcessCharEscape(SpellingPtr, SpellingEnd, HadError,
1381ca1475ea0e76da6b852796610139ed9b49c8d4a6Chris Lattner                      FullSourceLoc(Tok.getLocation(), SM),
13825cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor                      CharByteWidth*8, Diags);
1383719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner    assert(!HadError && "This method isn't valid on erroneous strings");
1384719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner    --ByteNo;
1385719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner  }
13861eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1387719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner  return SpellingPtr-SpellingStart;
1388719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner}
1389