15f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//===--- LiteralSupport.cpp - Code to parse and process literals ----------===//
25f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//
35f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//                     The LLVM Compiler Infrastructure
45f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//
50bc735ffcfb223c0186419547abaa5c84482663eChris Lattner// This file is distributed under the University of Illinois Open Source
60bc735ffcfb223c0186419547abaa5c84482663eChris Lattner// License. See LICENSE.TXT for details.
75f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//
85f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//===----------------------------------------------------------------------===//
95f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//
105f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// This file implements the NumericLiteralParser, CharLiteralParser, and
115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer// StringLiteralParser interfaces.
125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//
135f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer//===----------------------------------------------------------------------===//
145f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
155f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer#include "clang/Lex/LiteralSupport.h"
163f6f51e28231f65de9c2dd150a2d757b2162cfa3Jordan Rose#include "clang/Basic/CharInfo.h"
1755fc873017f10f6f566b182b70f6fc22aefa3464Chandler Carruth#include "clang/Basic/TargetInfo.h"
1855fc873017f10f6f566b182b70f6fc22aefa3464Chandler Carruth#include "clang/Lex/LexDiagnostic.h"
1955fc873017f10f6f566b182b70f6fc22aefa3464Chandler Carruth#include "clang/Lex/Preprocessor.h"
205f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer#include "llvm/ADT/StringExtras.h"
21cb5620c9b213f4bd323912159fdddda35e258a14Dmitri Gribenko#include "llvm/Support/ConvertUTF.h"
229fe8c74a93ac8e92512615c5f83e7a328b3b0544David Blaikie#include "llvm/Support/ErrorHandling.h"
23cb5620c9b213f4bd323912159fdddda35e258a14Dmitri Gribenko
245f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerusing namespace clang;
255f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
265cee1195584fa8672253139c86e922daeda69b9eDouglas Gregorstatic unsigned getCharWidth(tok::TokenKind kind, const TargetInfo &Target) {
275cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  switch (kind) {
28b219cfc4d75f0a03630b7c4509ef791b7e97b2c8David Blaikie  default: llvm_unreachable("Unknown token type!");
295cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  case tok::char_constant:
305cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  case tok::string_literal:
315cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  case tok::utf8_string_literal:
325cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    return Target.getCharWidth();
335cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  case tok::wide_char_constant:
345cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  case tok::wide_string_literal:
355cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    return Target.getWCharWidth();
365cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  case tok::utf16_char_constant:
375cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  case tok::utf16_string_literal:
385cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    return Target.getChar16Width();
395cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  case tok::utf32_char_constant:
405cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  case tok::utf32_string_literal:
415cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    return Target.getChar32Width();
425cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  }
435cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor}
445cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor
455bffbe5c1033967fe49c7a638fdcea99d30d573aSeth Cantrellstatic CharSourceRange MakeCharSourceRange(const LangOptions &Features,
465bffbe5c1033967fe49c7a638fdcea99d30d573aSeth Cantrell                                           FullSourceLoc TokLoc,
475bffbe5c1033967fe49c7a638fdcea99d30d573aSeth Cantrell                                           const char *TokBegin,
485bffbe5c1033967fe49c7a638fdcea99d30d573aSeth Cantrell                                           const char *TokRangeBegin,
495bffbe5c1033967fe49c7a638fdcea99d30d573aSeth Cantrell                                           const char *TokRangeEnd) {
505bffbe5c1033967fe49c7a638fdcea99d30d573aSeth Cantrell  SourceLocation Begin =
515bffbe5c1033967fe49c7a638fdcea99d30d573aSeth Cantrell    Lexer::AdvanceToTokenCharacter(TokLoc, TokRangeBegin - TokBegin,
525bffbe5c1033967fe49c7a638fdcea99d30d573aSeth Cantrell                                   TokLoc.getManager(), Features);
535bffbe5c1033967fe49c7a638fdcea99d30d573aSeth Cantrell  SourceLocation End =
545bffbe5c1033967fe49c7a638fdcea99d30d573aSeth Cantrell    Lexer::AdvanceToTokenCharacter(Begin, TokRangeEnd - TokRangeBegin,
555bffbe5c1033967fe49c7a638fdcea99d30d573aSeth Cantrell                                   TokLoc.getManager(), Features);
565bffbe5c1033967fe49c7a638fdcea99d30d573aSeth Cantrell  return CharSourceRange::getCharRange(Begin, End);
575bffbe5c1033967fe49c7a638fdcea99d30d573aSeth Cantrell}
585bffbe5c1033967fe49c7a638fdcea99d30d573aSeth Cantrell
59e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith/// \brief Produce a diagnostic highlighting some portion of a literal.
60e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith///
61e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith/// Emits the diagnostic \p DiagID, highlighting the range of characters from
62e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith/// \p TokRangeBegin (inclusive) to \p TokRangeEnd (exclusive), which must be
63e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith/// a substring of a spelling buffer for the token beginning at \p TokBegin.
64e5f0588840b20897631cc8110344fd2745ef4caaRichard Smithstatic DiagnosticBuilder Diag(DiagnosticsEngine *Diags,
65e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith                              const LangOptions &Features, FullSourceLoc TokLoc,
66e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith                              const char *TokBegin, const char *TokRangeBegin,
67e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith                              const char *TokRangeEnd, unsigned DiagID) {
68e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith  SourceLocation Begin =
69e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith    Lexer::AdvanceToTokenCharacter(TokLoc, TokRangeBegin - TokBegin,
70e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith                                   TokLoc.getManager(), Features);
715bffbe5c1033967fe49c7a638fdcea99d30d573aSeth Cantrell  return Diags->Report(Begin, DiagID) <<
725bffbe5c1033967fe49c7a638fdcea99d30d573aSeth Cantrell    MakeCharSourceRange(Features, TokLoc, TokBegin, TokRangeBegin, TokRangeEnd);
73e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith}
74e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith
755f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// ProcessCharEscape - Parse a standard C escape sequence, which can occur in
765f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// either a character or a string literal.
77e5f0588840b20897631cc8110344fd2745ef4caaRichard Smithstatic unsigned ProcessCharEscape(const char *ThisTokBegin,
78e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith                                  const char *&ThisTokBuf,
795f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer                                  const char *ThisTokEnd, bool &HadError,
805cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor                                  FullSourceLoc Loc, unsigned CharWidth,
81e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith                                  DiagnosticsEngine *Diags,
82e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith                                  const LangOptions &Features) {
83e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith  const char *EscapeBegin = ThisTokBuf;
84e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith
855f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // Skip the '\' char.
865f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  ++ThisTokBuf;
875f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
885f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // We know that this character can't be off the end of the buffer, because
895f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // that would have been \", which would not have been the end of string.
905f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  unsigned ResultChar = *ThisTokBuf++;
915f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  switch (ResultChar) {
925f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // These map to themselves.
935f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case '\\': case '\'': case '"': case '?': break;
941eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
955f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // These have fixed mappings.
965f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case 'a':
975f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // TODO: K&R: the meaning of '\\a' is different in traditional C
985f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    ResultChar = 7;
995f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    break;
1005f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case 'b':
1015f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    ResultChar = 8;
1025f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    break;
1035f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case 'e':
10491f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner    if (Diags)
105e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith      Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
106e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith           diag::ext_nonstandard_escape) << "e";
1075f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    ResultChar = 27;
1085f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    break;
1093c54801fbc67d8df2fed0711a2e2022db6b1bbcfEli Friedman  case 'E':
11091f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner    if (Diags)
111e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith      Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
112e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith           diag::ext_nonstandard_escape) << "E";
1133c54801fbc67d8df2fed0711a2e2022db6b1bbcfEli Friedman    ResultChar = 27;
1143c54801fbc67d8df2fed0711a2e2022db6b1bbcfEli Friedman    break;
1155f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case 'f':
1165f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    ResultChar = 12;
1175f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    break;
1185f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case 'n':
1195f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    ResultChar = 10;
1205f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    break;
1215f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case 'r':
1225f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    ResultChar = 13;
1235f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    break;
1245f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case 't':
1255f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    ResultChar = 9;
1265f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    break;
1275f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case 'v':
1285f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    ResultChar = 11;
1295f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    break;
1305f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case 'x': { // Hex escape.
1315f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    ResultChar = 0;
1323f6f51e28231f65de9c2dd150a2d757b2162cfa3Jordan Rose    if (ThisTokBuf == ThisTokEnd || !isHexDigit(*ThisTokBuf)) {
13391f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner      if (Diags)
134e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith        Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
1355209e2bc4d18e679dcacfd6f6a0120aa1d4a757fJordan Rose             diag::err_hex_escape_no_digits) << "x";
1365f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      HadError = 1;
1375f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      break;
1385f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    }
1391eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1405f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // Hex escapes are a maximal series of hex digits.
1415f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    bool Overflow = false;
1425f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    for (; ThisTokBuf != ThisTokEnd; ++ThisTokBuf) {
143728bb4c41844b1df98eb35e7fa98eb5ffa9d65a6Jordan Rose      int CharVal = llvm::hexDigitValue(ThisTokBuf[0]);
1445f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      if (CharVal == -1) break;
145c29bbde0a14a664d6843b21d3791478d1f4d2833Chris Lattner      // About to shift out a digit?
146c29bbde0a14a664d6843b21d3791478d1f4d2833Chris Lattner      Overflow |= (ResultChar & 0xF0000000) ? true : false;
1475f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      ResultChar <<= 4;
1485f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      ResultChar |= CharVal;
1495f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    }
1505f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
1515f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // See if any bits will be truncated when evaluated as a character.
1525f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
1535f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      Overflow = true;
1545f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      ResultChar &= ~0U >> (32-CharWidth);
1555f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    }
1561eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1575f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // Check for overflow.
15891f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner    if (Overflow && Diags)   // Too many digits to fit in
159e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith      Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
160b3da613977f6b77dee2b382eeff5713168a4ca18Eli Friedman           diag::err_hex_escape_too_large);
1615f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    break;
1625f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  }
1635f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case '0': case '1': case '2': case '3':
1645f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case '4': case '5': case '6': case '7': {
1655f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // Octal escapes.
1665f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    --ThisTokBuf;
1675f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    ResultChar = 0;
1685f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
1695f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // Octal escapes are a series of octal digits with maximum length 3.
1705f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // "\0123" is a two digit sequence equal to "\012" "3".
1715f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    unsigned NumDigits = 0;
1725f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    do {
1735f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      ResultChar <<= 3;
1745f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      ResultChar |= *ThisTokBuf++ - '0';
1755f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      ++NumDigits;
1765f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    } while (ThisTokBuf != ThisTokEnd && NumDigits < 3 &&
1775f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer             ThisTokBuf[0] >= '0' && ThisTokBuf[0] <= '7');
1781eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1795f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // Check for overflow.  Reject '\777', but not L'\777'.
1805f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
18191f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner      if (Diags)
182e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith        Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
183b3da613977f6b77dee2b382eeff5713168a4ca18Eli Friedman             diag::err_octal_escape_too_large);
1845f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      ResultChar &= ~0U >> (32-CharWidth);
1855f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    }
1865f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    break;
1875f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  }
1881eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1895f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // Otherwise, these are not valid escapes.
1905f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  case '(': case '{': case '[': case '%':
1915f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // GCC accepts these as extensions.  We warn about them as such though.
19291f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner    if (Diags)
193e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith      Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
194e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith           diag::ext_nonstandard_escape)
195e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith        << std::string(1, ResultChar);
196f01fdff97b245caac98100d232c760b4d0531411Eli Friedman    break;
1975f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  default:
19891f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner    if (Diags == 0)
199b90f4b3fb94056609da9cca5eef7358d95a363b2Douglas Gregor      break;
200e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith
2013f6f51e28231f65de9c2dd150a2d757b2162cfa3Jordan Rose    if (isPrintable(ResultChar))
202e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith      Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
203e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith           diag::ext_unknown_escape)
204e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith        << std::string(1, ResultChar);
205ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner    else
206e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith      Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
207e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith           diag::ext_unknown_escape)
208e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith        << "x" + llvm::utohexstr(ResultChar);
2095f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    break;
2105f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  }
2111eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
2125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  return ResultChar;
2135f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer}
2145f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
2150e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff/// ProcessUCNEscape - Read the Universal Character Name, check constraints and
21659705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber/// return the UTF32.
21726b75c07317a3b50a8a00a1623e3ef38af1d8349Richard Smithstatic bool ProcessUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
21826b75c07317a3b50a8a00a1623e3ef38af1d8349Richard Smith                             const char *ThisTokEnd,
21959705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber                             uint32_t &UcnVal, unsigned short &UcnLen,
220d6471f7c1921c7802804ce3ff6fe9768310f72b9David Blaikie                             FullSourceLoc Loc, DiagnosticsEngine *Diags,
221be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell                             const LangOptions &Features,
222be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell                             bool in_char_string_literal = false) {
22326b75c07317a3b50a8a00a1623e3ef38af1d8349Richard Smith  const char *UcnBegin = ThisTokBuf;
2241eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
2250e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  // Skip the '\u' char's.
2260e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  ThisTokBuf += 2;
2270e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff
2283f6f51e28231f65de9c2dd150a2d757b2162cfa3Jordan Rose  if (ThisTokBuf == ThisTokEnd || !isHexDigit(*ThisTokBuf)) {
2296c66f07854c1334a1ce9eae1428d61d54182a6e1Chris Lattner    if (Diags)
230e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith      Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
2315209e2bc4d18e679dcacfd6f6a0120aa1d4a757fJordan Rose           diag::err_hex_escape_no_digits) << StringRef(&ThisTokBuf[-1], 1);
23259705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber    return false;
2330e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  }
23459705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber  UcnLen = (ThisTokBuf[-1] == 'u' ? 4 : 8);
23556bedefe92ae8f604d14bea75cc3040ab32337c2Fariborz Jahanian  unsigned short UcnLenSave = UcnLen;
23659705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber  for (; ThisTokBuf != ThisTokEnd && UcnLenSave; ++ThisTokBuf, UcnLenSave--) {
237728bb4c41844b1df98eb35e7fa98eb5ffa9d65a6Jordan Rose    int CharVal = llvm::hexDigitValue(ThisTokBuf[0]);
2380e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    if (CharVal == -1) break;
2390e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    UcnVal <<= 4;
2400e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    UcnVal |= CharVal;
2410e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  }
2420e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  // If we didn't consume the proper number of digits, there is a problem.
24359705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber  if (UcnLenSave) {
244e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith    if (Diags)
245e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith      Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
246e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith           diag::err_ucn_escape_incomplete);
24759705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber    return false;
2480e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  }
24926b75c07317a3b50a8a00a1623e3ef38af1d8349Richard Smith
250be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  // Check UCN constraints (C99 6.4.3p2) [C++11 lex.charset p2]
25126b75c07317a3b50a8a00a1623e3ef38af1d8349Richard Smith  if ((0xD800 <= UcnVal && UcnVal <= 0xDFFF) || // surrogate codepoints
25226b75c07317a3b50a8a00a1623e3ef38af1d8349Richard Smith      UcnVal > 0x10FFFF) {                      // maximum legal UTF32 value
25326b75c07317a3b50a8a00a1623e3ef38af1d8349Richard Smith    if (Diags)
254e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith      Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
255e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith           diag::err_ucn_escape_invalid);
25626b75c07317a3b50a8a00a1623e3ef38af1d8349Richard Smith    return false;
25726b75c07317a3b50a8a00a1623e3ef38af1d8349Richard Smith  }
258be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell
259be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  // C++11 allows UCNs that refer to control characters and basic source
260be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  // characters inside character and string literals
26126b75c07317a3b50a8a00a1623e3ef38af1d8349Richard Smith  if (UcnVal < 0xa0 &&
26226b75c07317a3b50a8a00a1623e3ef38af1d8349Richard Smith      (UcnVal != 0x24 && UcnVal != 0x40 && UcnVal != 0x60)) {  // $, @, `
26380ad52f327b532bded5c5b0ee38779d841c6cd35Richard Smith    bool IsError = (!Features.CPlusPlus11 || !in_char_string_literal);
26426b75c07317a3b50a8a00a1623e3ef38af1d8349Richard Smith    if (Diags) {
26526b75c07317a3b50a8a00a1623e3ef38af1d8349Richard Smith      char BasicSCSChar = UcnVal;
26626b75c07317a3b50a8a00a1623e3ef38af1d8349Richard Smith      if (UcnVal >= 0x20 && UcnVal < 0x7f)
267e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith        Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
268e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith             IsError ? diag::err_ucn_escape_basic_scs :
269e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith                       diag::warn_cxx98_compat_literal_ucn_escape_basic_scs)
270e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith            << StringRef(&BasicSCSChar, 1);
27126b75c07317a3b50a8a00a1623e3ef38af1d8349Richard Smith      else
272e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith        Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
273e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith             IsError ? diag::err_ucn_control_character :
274e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith                       diag::warn_cxx98_compat_literal_ucn_control_character);
275be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell    }
27626b75c07317a3b50a8a00a1623e3ef38af1d8349Richard Smith    if (IsError)
27726b75c07317a3b50a8a00a1623e3ef38af1d8349Richard Smith      return false;
278be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  }
279be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell
280e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith  if (!Features.CPlusPlus && !Features.C99 && Diags)
281e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith    Diag(Diags, Features, Loc, ThisTokBegin, UcnBegin, ThisTokBuf,
282bfec916e5fc40f22ac11267e78a024cd8dcf3bbfJordan Rose         diag::warn_ucn_not_valid_in_c89_literal);
283e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith
28459705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber  return true;
28559705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber}
28659705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber
287df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith/// MeasureUCNEscape - Determine the number of bytes within the resulting string
288df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith/// which this UCN will occupy.
289df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smithstatic int MeasureUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
290df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith                            const char *ThisTokEnd, unsigned CharByteWidth,
291df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith                            const LangOptions &Features, bool &HadError) {
292df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith  // UTF-32: 4 bytes per escape.
293df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith  if (CharByteWidth == 4)
294df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith    return 4;
295df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith
296df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith  uint32_t UcnVal = 0;
297df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith  unsigned short UcnLen = 0;
298df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith  FullSourceLoc Loc;
299df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith
300df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith  if (!ProcessUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, UcnVal,
301df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith                        UcnLen, Loc, 0, Features, true)) {
302df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith    HadError = true;
303df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith    return 0;
304df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith  }
305df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith
306df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith  // UTF-16: 2 bytes for BMP, 4 bytes otherwise.
307df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith  if (CharByteWidth == 2)
308df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith    return UcnVal <= 0xFFFF ? 2 : 4;
309df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith
310df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith  // UTF-8.
311df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith  if (UcnVal < 0x80)
312df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith    return 1;
313df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith  if (UcnVal < 0x800)
314df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith    return 2;
315df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith  if (UcnVal < 0x10000)
316df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith    return 3;
317df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith  return 4;
318df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith}
319df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith
32059705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber/// EncodeUCNEscape - Read the Universal Character Name, check constraints and
32159705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber/// convert the UTF32 to UTF8 or UTF16. This is a subroutine of
32259705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber/// StringLiteralParser. When we decide to implement UCN's for identifiers,
32359705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber/// we will likely rework our support for UCN's.
32426b75c07317a3b50a8a00a1623e3ef38af1d8349Richard Smithstatic void EncodeUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
32526b75c07317a3b50a8a00a1623e3ef38af1d8349Richard Smith                            const char *ThisTokEnd,
326a95880d6513c617bb96634bcc1f16c6bdb80dedcChris Lattner                            char *&ResultBuf, bool &HadError,
3275cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor                            FullSourceLoc Loc, unsigned CharByteWidth,
328d6471f7c1921c7802804ce3ff6fe9768310f72b9David Blaikie                            DiagnosticsEngine *Diags,
329d6471f7c1921c7802804ce3ff6fe9768310f72b9David Blaikie                            const LangOptions &Features) {
33059705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber  typedef uint32_t UTF32;
33159705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber  UTF32 UcnVal = 0;
33259705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber  unsigned short UcnLen = 0;
33326b75c07317a3b50a8a00a1623e3ef38af1d8349Richard Smith  if (!ProcessUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, UcnVal, UcnLen,
33426b75c07317a3b50a8a00a1623e3ef38af1d8349Richard Smith                        Loc, Diags, Features, true)) {
335df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith    HadError = true;
3360e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    return;
3370e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  }
33859705aee3fe01aa6fb6962dd11350161b47983d9Nico Weber
3395cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  assert((CharByteWidth == 1 || CharByteWidth == 2 || CharByteWidth) &&
3405cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor         "only character widths of 1, 2, or 4 bytes supported");
341a0f15b0848405ae16d63bd5d78c862a6526b338aNico Weber
3425cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  (void)UcnLen;
3435cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  assert((UcnLen== 4 || UcnLen== 8) && "only ucn length of 4 or 8 supported");
3445cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor
3455cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  if (CharByteWidth == 4) {
346caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman    // FIXME: Make the type of the result buffer correct instead of
347caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman    // using reinterpret_cast.
348caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman    UTF32 *ResultPtr = reinterpret_cast<UTF32*>(ResultBuf);
349caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman    *ResultPtr = UcnVal;
350caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman    ResultBuf += 4;
3515cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    return;
3525cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  }
353a0f15b0848405ae16d63bd5d78c862a6526b338aNico Weber
3545cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  if (CharByteWidth == 2) {
355caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman    // FIXME: Make the type of the result buffer correct instead of
356caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman    // using reinterpret_cast.
357caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman    UTF16 *ResultPtr = reinterpret_cast<UTF16*>(ResultBuf);
358caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman
35959b26d84b64510158e23d80eba077b844b7e0b04Richard Smith    if (UcnVal <= (UTF32)0xFFFF) {
360caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman      *ResultPtr = UcnVal;
361caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman      ResultBuf += 2;
362a0f15b0848405ae16d63bd5d78c862a6526b338aNico Weber      return;
363a0f15b0848405ae16d63bd5d78c862a6526b338aNico Weber    }
364a0f15b0848405ae16d63bd5d78c862a6526b338aNico Weber
365caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman    // Convert to UTF16.
366a0f15b0848405ae16d63bd5d78c862a6526b338aNico Weber    UcnVal -= 0x10000;
367caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman    *ResultPtr     = 0xD800 + (UcnVal >> 10);
368caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman    *(ResultPtr+1) = 0xDC00 + (UcnVal & 0x3FF);
369caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman    ResultBuf += 4;
37056bedefe92ae8f604d14bea75cc3040ab32337c2Fariborz Jahanian    return;
37156bedefe92ae8f604d14bea75cc3040ab32337c2Fariborz Jahanian  }
3725cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor
3735cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  assert(CharByteWidth == 1 && "UTF-8 encoding is only for 1 byte characters");
3745cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor
3750e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  // Now that we've parsed/checked the UCN, we convert from UTF32->UTF8.
3760e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  // The conversion below was inspired by:
3770e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  //   http://www.unicode.org/Public/PROGRAMS/CVTUTF/ConvertUTF.c
3781eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump  // First, we determine how many bytes the result will require.
3794e93b34fdb798abfa0534062a139f2c37cbf876eSteve Naroff  typedef uint8_t UTF8;
3800e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff
3810e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  unsigned short bytesToWrite = 0;
3820e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  if (UcnVal < (UTF32)0x80)
3830e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    bytesToWrite = 1;
3840e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  else if (UcnVal < (UTF32)0x800)
3850e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    bytesToWrite = 2;
3860e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  else if (UcnVal < (UTF32)0x10000)
3870e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    bytesToWrite = 3;
3880e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  else
3890e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff    bytesToWrite = 4;
3901eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
3910e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  const unsigned byteMask = 0xBF;
3920e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  const unsigned byteMark = 0x80;
3931eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
3940e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  // Once the bits are split out into bytes of UTF8, this is a mask OR-ed
3958a5c0cd90b8d607ca284274000ed8716b836d253Steve Naroff  // into the first byte, depending on how many bytes follow.
3961eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump  static const UTF8 firstByteMark[5] = {
3978a5c0cd90b8d607ca284274000ed8716b836d253Steve Naroff    0x00, 0x00, 0xC0, 0xE0, 0xF0
3980e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  };
3990e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  // Finally, we write the bytes into ResultBuf.
4000e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  ResultBuf += bytesToWrite;
4010e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  switch (bytesToWrite) { // note: everything falls through.
4025d704fef6e0a7db07329a37290684b17bf6badf8Benjamin Kramer  case 4: *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6;
4035d704fef6e0a7db07329a37290684b17bf6badf8Benjamin Kramer  case 3: *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6;
4045d704fef6e0a7db07329a37290684b17bf6badf8Benjamin Kramer  case 2: *--ResultBuf = (UTF8)((UcnVal | byteMark) & byteMask); UcnVal >>= 6;
4055d704fef6e0a7db07329a37290684b17bf6badf8Benjamin Kramer  case 1: *--ResultBuf = (UTF8) (UcnVal | firstByteMark[bytesToWrite]);
4060e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  }
4070e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  // Update the buffer.
4080e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff  ResultBuf += bytesToWrite;
4090e3e3eb3879d5a7aaca4a393706149ddef8544f1Steve Naroff}
4105f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
4115f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
4125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       integer-constant: [C99 6.4.4.1]
4135f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         decimal-constant integer-suffix
4145f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         octal-constant integer-suffix
4155f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         hexadecimal-constant integer-suffix
4164ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith///         binary-literal integer-suffix [GNU, C++1y]
41749d517467c3dcd2c67e8a6c740ba5160e37625f7Richard Smith///       user-defined-integer-literal: [C++11 lex.ext]
418b453ad3214d00acc51c9aa702c76c58354d84b84Richard Smith///         decimal-literal ud-suffix
419b453ad3214d00acc51c9aa702c76c58354d84b84Richard Smith///         octal-literal ud-suffix
420b453ad3214d00acc51c9aa702c76c58354d84b84Richard Smith///         hexadecimal-literal ud-suffix
4214ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith///         binary-literal ud-suffix [GNU, C++1y]
4221eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump///       decimal-constant:
4235f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         nonzero-digit
4245f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         decimal-constant digit
4251eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump///       octal-constant:
4265f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         0
4275f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         octal-constant octal-digit
4281eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump///       hexadecimal-constant:
4295f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         hexadecimal-prefix hexadecimal-digit
4305f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         hexadecimal-constant hexadecimal-digit
4315f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       hexadecimal-prefix: one of
4325f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         0x 0X
4334ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith///       binary-literal:
4344ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith///         0b binary-digit
4354ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith///         0B binary-digit
4364ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith///         binary-literal binary-digit
4375f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       integer-suffix:
4385f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         unsigned-suffix [long-suffix]
4395f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         unsigned-suffix [long-long-suffix]
4405f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         long-suffix [unsigned-suffix]
4415f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         long-long-suffix [unsigned-sufix]
4425f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       nonzero-digit:
4435f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         1 2 3 4 5 6 7 8 9
4445f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       octal-digit:
4455f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         0 1 2 3 4 5 6 7
4465f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       hexadecimal-digit:
4475f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         0 1 2 3 4 5 6 7 8 9
4485f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         a b c d e f
4495f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         A B C D E F
4504ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith///       binary-digit:
4514ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith///         0
4524ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith///         1
4535f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       unsigned-suffix: one of
4545f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         u U
4555f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       long-suffix: one of
4565f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         l L
4571eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump///       long-long-suffix: one of
4585f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         ll LL
4595f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///
4605f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       floating-constant: [C99 6.4.4.2]
4615f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         TODO: add rules...
4625f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///
463fc97ea29b1afd9e87341bce2b0cbb0c7172b7dd8Dmitri GribenkoNumericLiteralParser::NumericLiteralParser(StringRef TokSpelling,
464fc97ea29b1afd9e87341bce2b0cbb0c7172b7dd8Dmitri Gribenko                                           SourceLocation TokLoc,
465fc97ea29b1afd9e87341bce2b0cbb0c7172b7dd8Dmitri Gribenko                                           Preprocessor &PP)
466fc97ea29b1afd9e87341bce2b0cbb0c7172b7dd8Dmitri Gribenko  : PP(PP), ThisTokBegin(TokSpelling.begin()), ThisTokEnd(TokSpelling.end()) {
4671eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
468c29bbde0a14a664d6843b21d3791478d1f4d2833Chris Lattner  // This routine assumes that the range begin/end matches the regex for integer
469c29bbde0a14a664d6843b21d3791478d1f4d2833Chris Lattner  // and FP constants (specifically, the 'pp-number' regex), and assumes that
470c29bbde0a14a664d6843b21d3791478d1f4d2833Chris Lattner  // the byte at "*end" is both valid and not part of the regex.  Because of
471c29bbde0a14a664d6843b21d3791478d1f4d2833Chris Lattner  // this, it doesn't have to check for 'overscan' in various places.
4723f6f51e28231f65de9c2dd150a2d757b2162cfa3Jordan Rose  assert(!isPreprocessingNumberBody(*ThisTokEnd) && "didn't maximally munch?");
4731eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
474fc97ea29b1afd9e87341bce2b0cbb0c7172b7dd8Dmitri Gribenko  s = DigitsBegin = ThisTokBegin;
4755f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  saw_exponent = false;
4765f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  saw_period = false;
477b453ad3214d00acc51c9aa702c76c58354d84b84Richard Smith  saw_ud_suffix = false;
4785f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  isLong = false;
4795f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  isUnsigned = false;
4805f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  isLongLong = false;
4816e400c286b485e28d04a742ea87860ddfefa672eChris Lattner  isFloat = false;
482506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner  isImaginary = false;
483b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump  isMicrosoftInteger = false;
4845f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  hadError = false;
4851eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
4865f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  if (*s == '0') { // parse radix
487368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    ParseNumberStartingWithZero(TokLoc);
488368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    if (hadError)
489368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      return;
4905f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  } else { // the first digit is non-zero
4915f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    radix = 10;
4925f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    s = SkipDigits(s);
4935f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    if (s == ThisTokEnd) {
4945f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      // Done.
4953f6f51e28231f65de9c2dd150a2d757b2162cfa3Jordan Rose    } else if (isHexDigit(*s) && !(*s == 'e' || *s == 'E')) {
496fc97ea29b1afd9e87341bce2b0cbb0c7172b7dd8Dmitri Gribenko      PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin),
4975f9e272e632e951b1efe824cd16acb4d96077930Chris Lattner              diag::err_invalid_decimal_digit) << StringRef(s, 1);
498ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner      hadError = true;
4995f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      return;
5005f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    } else if (*s == '.') {
5015f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      s++;
5025f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      saw_period = true;
5035f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      s = SkipDigits(s);
5041eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump    }
5054411f46050216a139ab2fc7ff145ec384d11ec7fChris Lattner    if ((*s == 'e' || *s == 'E')) { // exponent
50670f66ab053f36ab3df7a778d09bcb2b4b0fec1f8Chris Lattner      const char *Exponent = s;
5075f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      s++;
5085f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      saw_exponent = true;
5095f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      if (*s == '+' || *s == '-')  s++; // sign
5105f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      const char *first_non_digit = SkipDigits(s);
5110b7f69d789ca1f76582ee9a336e25861fd0c1416Chris Lattner      if (first_non_digit != s) {
5125f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer        s = first_non_digit;
5130b7f69d789ca1f76582ee9a336e25861fd0c1416Chris Lattner      } else {
514fc97ea29b1afd9e87341bce2b0cbb0c7172b7dd8Dmitri Gribenko        PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent - ThisTokBegin),
515ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner                diag::err_exponent_has_no_digits);
516ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner        hadError = true;
5170b7f69d789ca1f76582ee9a336e25861fd0c1416Chris Lattner        return;
5185f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      }
5195f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    }
5205f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  }
5215f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
5225f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  SuffixBegin = s;
5231eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
524506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner  // Parse the suffix.  At this point we can classify whether we have an FP or
525506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner  // integer constant.
526506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner  bool isFPConstant = isFloatingLiteral();
5274ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith  const char *ImaginarySuffixLoc = 0;
5281eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
529506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner  // Loop over all of the characters of the suffix.  If we see something bad,
530506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner  // we break out of the loop.
531506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner  for (; s != ThisTokEnd; ++s) {
532506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    switch (*s) {
533506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    case 'f':      // FP Suffix for "float"
534506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    case 'F':
535506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      if (!isFPConstant) break;  // Error for integer constant.
5366e400c286b485e28d04a742ea87860ddfefa672eChris Lattner      if (isFloat || isLong) break; // FF, LF invalid.
5376e400c286b485e28d04a742ea87860ddfefa672eChris Lattner      isFloat = true;
538506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      continue;  // Success.
539506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    case 'u':
540506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    case 'U':
541506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      if (isFPConstant) break;  // Error for floating constant.
542506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      if (isUnsigned) break;    // Cannot be repeated.
543506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      isUnsigned = true;
544506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      continue;  // Success.
545506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    case 'l':
546506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    case 'L':
547506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      if (isLong || isLongLong) break;  // Cannot be repeated.
5486e400c286b485e28d04a742ea87860ddfefa672eChris Lattner      if (isFloat) break;               // LF invalid.
5491eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
550506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      // Check for long long.  The L's need to be adjacent and the same case.
551506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      if (s+1 != ThisTokEnd && s[1] == s[0]) {
552506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner        if (isFPConstant) break;        // long long invalid for floats.
553506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner        isLongLong = true;
554506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner        ++s;  // Eat both of them.
555506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      } else {
5565f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer        isLong = true;
5575f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      }
558506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      continue;  // Success.
559506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    case 'i':
560c637415a96c16abc7e28ef83c6c105716f7e8936Chris Lattner    case 'I':
5614e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie      if (PP.getLangOpts().MicrosoftExt) {
562a8be02b655b76e4dbe776b0c62bc3c450dc6feabFariborz Jahanian        if (isFPConstant || isLong || isLongLong) break;
5636e8c7acb61b2c7f421d6e1aba8a7a84e96ab6981Nuno Lopes
5640c29b22f4384500cc0d04f3072cc5d5d58d10d6cSteve Naroff        // Allow i8, i16, i32, i64, and i128.
565b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump        if (s + 1 != ThisTokEnd) {
566b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump          switch (s[1]) {
567b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump            case '8':
568b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump              s += 2; // i8 suffix
569b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump              isMicrosoftInteger = true;
5706e8c7acb61b2c7f421d6e1aba8a7a84e96ab6981Nuno Lopes              break;
571b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump            case '1':
5726e8c7acb61b2c7f421d6e1aba8a7a84e96ab6981Nuno Lopes              if (s + 2 == ThisTokEnd) break;
573d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet              if (s[2] == '6') {
574d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet                s += 3; // i16 suffix
575d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet                isMicrosoftInteger = true;
576d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet              }
5776e8c7acb61b2c7f421d6e1aba8a7a84e96ab6981Nuno Lopes              else if (s[2] == '2') {
5786e8c7acb61b2c7f421d6e1aba8a7a84e96ab6981Nuno Lopes                if (s + 3 == ThisTokEnd) break;
579d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet                if (s[3] == '8') {
580d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet                  s += 4; // i128 suffix
581d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet                  isMicrosoftInteger = true;
582d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet                }
583b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump              }
5846e8c7acb61b2c7f421d6e1aba8a7a84e96ab6981Nuno Lopes              break;
585b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump            case '3':
5866e8c7acb61b2c7f421d6e1aba8a7a84e96ab6981Nuno Lopes              if (s + 2 == ThisTokEnd) break;
587d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet              if (s[2] == '2') {
588d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet                s += 3; // i32 suffix
589d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet                isLong = true;
590d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet                isMicrosoftInteger = true;
591d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet              }
5926e8c7acb61b2c7f421d6e1aba8a7a84e96ab6981Nuno Lopes              break;
593b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump            case '6':
5946e8c7acb61b2c7f421d6e1aba8a7a84e96ab6981Nuno Lopes              if (s + 2 == ThisTokEnd) break;
595d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet              if (s[2] == '4') {
596d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet                s += 3; // i64 suffix
597d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet                isLongLong = true;
598d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet                isMicrosoftInteger = true;
599d062b604548be6e2f85f6f63a461702e5ea14115Francois Pichet              }
6006e8c7acb61b2c7f421d6e1aba8a7a84e96ab6981Nuno Lopes              break;
601b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump            default:
602b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump              break;
603b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump          }
604b79fe2d28777652a4df4f49dc876cbec060ca90eMike Stump          break;
6050c29b22f4384500cc0d04f3072cc5d5d58d10d6cSteve Naroff        }
6060c29b22f4384500cc0d04f3072cc5d5d58d10d6cSteve Naroff      }
6070c29b22f4384500cc0d04f3072cc5d5d58d10d6cSteve Naroff      // fall through.
608506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    case 'j':
609506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    case 'J':
610506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      if (isImaginary) break;   // Cannot be repeated.
611506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      isImaginary = true;
6124ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith      ImaginarySuffixLoc = s;
613506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner      continue;  // Success.
6145f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    }
615b453ad3214d00acc51c9aa702c76c58354d84b84Richard Smith    // If we reached here, there was an error or a ud-suffix.
616506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    break;
617506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner  }
6181eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
619506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner  if (s != ThisTokEnd) {
6204ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith    if (isValidUDSuffix(PP.getLangOpts(),
6214ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith                        StringRef(SuffixBegin, ThisTokEnd - SuffixBegin))) {
6224ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith      // Any suffix pieces we might have parsed are actually part of the
6234ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith      // ud-suffix.
6244ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith      isLong = false;
6254ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith      isUnsigned = false;
6264ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith      isLongLong = false;
6274ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith      isFloat = false;
6284ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith      isImaginary = false;
6294ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith      isMicrosoftInteger = false;
6304ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith
631b453ad3214d00acc51c9aa702c76c58354d84b84Richard Smith      saw_ud_suffix = true;
632b453ad3214d00acc51c9aa702c76c58354d84b84Richard Smith      return;
633b453ad3214d00acc51c9aa702c76c58354d84b84Richard Smith    }
634b453ad3214d00acc51c9aa702c76c58354d84b84Richard Smith
635b453ad3214d00acc51c9aa702c76c58354d84b84Richard Smith    // Report an error if there are any.
636fc97ea29b1afd9e87341bce2b0cbb0c7172b7dd8Dmitri Gribenko    PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, SuffixBegin - ThisTokBegin),
637ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner            isFPConstant ? diag::err_invalid_suffix_float_constant :
638ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner                           diag::err_invalid_suffix_integer_constant)
6395f9e272e632e951b1efe824cd16acb4d96077930Chris Lattner      << StringRef(SuffixBegin, ThisTokEnd-SuffixBegin);
640ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner    hadError = true;
641506b8dec4ed3db3c60bf9e0dd37901f0cf3d6749Chris Lattner    return;
6425f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  }
6434ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith
6444ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith  if (isImaginary) {
6454ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith    PP.Diag(PP.AdvanceToTokenCharacter(TokLoc,
6464ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith                                       ImaginarySuffixLoc - ThisTokBegin),
6474ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith            diag::ext_imaginary_constant);
6484ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith  }
6494ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith}
6504ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith
6514ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith/// Determine whether a suffix is a valid ud-suffix. We avoid treating reserved
6524ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith/// suffixes as ud-suffixes, because the diagnostic experience is better if we
6534ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith/// treat it as an invalid suffix.
6544ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smithbool NumericLiteralParser::isValidUDSuffix(const LangOptions &LangOpts,
6554ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith                                           StringRef Suffix) {
6564ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith  if (!LangOpts.CPlusPlus11 || Suffix.empty())
6574ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith    return false;
6584ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith
6594ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith  // By C++11 [lex.ext]p10, ud-suffixes starting with an '_' are always valid.
6604ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith  if (Suffix[0] == '_')
6614ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith    return true;
6624ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith
6634ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith  // In C++11, there are no library suffixes.
6644ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith  if (!LangOpts.CPlusPlus1y)
6654ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith    return false;
6664ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith
6674ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith  // In C++1y, "s", "h", "min", "ms", "us", and "ns" are used in the library.
6684ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith  return llvm::StringSwitch<bool>(Suffix)
6694ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith      .Cases("h", "min", "s", true)
6704ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith      .Cases("ms", "us", "ns", true)
6714ac537b0f07f2efb9fcf081f60d54e6cfb1cf2d5Richard Smith      .Default(false);
6725f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer}
6735f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
674368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner/// ParseNumberStartingWithZero - This method is called when the first character
675368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner/// of the number is found to be a zero.  This means it is either an octal
676368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner/// number (like '04') or a hex number ('0x123a') a binary number ('0b1010') or
6771eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump/// a floating point number (01239.123e4).  Eat the prefix, determining the
678368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner/// radix etc.
679368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattnervoid NumericLiteralParser::ParseNumberStartingWithZero(SourceLocation TokLoc) {
680368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  assert(s[0] == '0' && "Invalid method call");
681368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  s++;
6821eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
683368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  // Handle a hex number like 0x1234.
6843f6f51e28231f65de9c2dd150a2d757b2162cfa3Jordan Rose  if ((*s == 'x' || *s == 'X') && (isHexDigit(s[1]) || s[1] == '.')) {
685368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    s++;
686368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    radix = 16;
687368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    DigitsBegin = s;
688368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    s = SkipHexDigits(s);
68966b0ebac276353f3ff7d41eaba3e6d24d48663b7Aaron Ballman    bool noSignificand = (s == DigitsBegin);
690368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    if (s == ThisTokEnd) {
691368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      // Done.
692368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    } else if (*s == '.') {
693368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      s++;
694368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      saw_period = true;
69566b0ebac276353f3ff7d41eaba3e6d24d48663b7Aaron Ballman      const char *floatDigitsBegin = s;
696368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      s = SkipHexDigits(s);
69766b0ebac276353f3ff7d41eaba3e6d24d48663b7Aaron Ballman      noSignificand &= (floatDigitsBegin == s);
69866b0ebac276353f3ff7d41eaba3e6d24d48663b7Aaron Ballman    }
69966b0ebac276353f3ff7d41eaba3e6d24d48663b7Aaron Ballman
70066b0ebac276353f3ff7d41eaba3e6d24d48663b7Aaron Ballman    if (noSignificand) {
701fc97ea29b1afd9e87341bce2b0cbb0c7172b7dd8Dmitri Gribenko      PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s - ThisTokBegin),
70266b0ebac276353f3ff7d41eaba3e6d24d48663b7Aaron Ballman        diag::err_hexconstant_requires_digits);
70366b0ebac276353f3ff7d41eaba3e6d24d48663b7Aaron Ballman      hadError = true;
70466b0ebac276353f3ff7d41eaba3e6d24d48663b7Aaron Ballman      return;
705368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    }
70666b0ebac276353f3ff7d41eaba3e6d24d48663b7Aaron Ballman
707368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    // A binary exponent can appear with or with a '.'. If dotted, the
7081eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump    // binary exponent is required.
7091155c42e7b1b4e401bb0a331a6d715d637958c75Douglas Gregor    if (*s == 'p' || *s == 'P') {
710368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      const char *Exponent = s;
711368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      s++;
712368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      saw_exponent = true;
713368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      if (*s == '+' || *s == '-')  s++; // sign
714368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      const char *first_non_digit = SkipDigits(s);
7156ea623823f8532670480425b573f35115404b4a0Chris Lattner      if (first_non_digit == s) {
716ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner        PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-ThisTokBegin),
717ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner                diag::err_exponent_has_no_digits);
718ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner        hadError = true;
7196ea623823f8532670480425b573f35115404b4a0Chris Lattner        return;
720368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      }
7216ea623823f8532670480425b573f35115404b4a0Chris Lattner      s = first_non_digit;
7221eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
7234e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie      if (!PP.getLangOpts().HexFloats)
724ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner        PP.Diag(TokLoc, diag::ext_hexconstant_invalid);
725368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    } else if (saw_period) {
726ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner      PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin),
727ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner              diag::err_hexconstant_requires_exponent);
728ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner      hadError = true;
729368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    }
730368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    return;
731368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  }
7321eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
733368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  // Handle simple binary numbers 0b01010
734368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  if (*s == 'b' || *s == 'B') {
7352fcf0de8a122ddb265d8c32d6ac1012c070e4f24Richard Smith    // 0b101010 is a C++1y / GCC extension.
7362fcf0de8a122ddb265d8c32d6ac1012c070e4f24Richard Smith    PP.Diag(TokLoc,
7372fcf0de8a122ddb265d8c32d6ac1012c070e4f24Richard Smith            PP.getLangOpts().CPlusPlus1y
7382fcf0de8a122ddb265d8c32d6ac1012c070e4f24Richard Smith              ? diag::warn_cxx11_compat_binary_literal
7392fcf0de8a122ddb265d8c32d6ac1012c070e4f24Richard Smith              : PP.getLangOpts().CPlusPlus
7402fcf0de8a122ddb265d8c32d6ac1012c070e4f24Richard Smith                ? diag::ext_binary_literal_cxx1y
7412fcf0de8a122ddb265d8c32d6ac1012c070e4f24Richard Smith                : diag::ext_binary_literal);
742368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    ++s;
743368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    radix = 2;
744368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    DigitsBegin = s;
745368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    s = SkipBinaryDigits(s);
746368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    if (s == ThisTokEnd) {
747368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      // Done.
7483f6f51e28231f65de9c2dd150a2d757b2162cfa3Jordan Rose    } else if (isHexDigit(*s)) {
749ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner      PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin),
7505f9e272e632e951b1efe824cd16acb4d96077930Chris Lattner              diag::err_invalid_binary_digit) << StringRef(s, 1);
751ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner      hadError = true;
752368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    }
753413d355e38755a71f106dbc0ac900ca989070916Chris Lattner    // Other suffixes will be diagnosed by the caller.
754368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    return;
755368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  }
7561eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
757368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  // For now, the radix is set to 8. If we discover that we have a
758368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  // floating point constant, the radix will change to 10. Octal floating
7591eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump  // point constants are not permitted (only decimal and hexadecimal).
760368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  radix = 8;
761368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  DigitsBegin = s;
762368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  s = SkipOctalDigits(s);
763368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  if (s == ThisTokEnd)
764368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    return; // Done, simple octal number like 01234
7651eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
766413d355e38755a71f106dbc0ac900ca989070916Chris Lattner  // If we have some other non-octal digit that *is* a decimal digit, see if
767413d355e38755a71f106dbc0ac900ca989070916Chris Lattner  // this is part of a floating point number like 094.123 or 09e1.
7683f6f51e28231f65de9c2dd150a2d757b2162cfa3Jordan Rose  if (isDigit(*s)) {
769413d355e38755a71f106dbc0ac900ca989070916Chris Lattner    const char *EndDecimal = SkipDigits(s);
770413d355e38755a71f106dbc0ac900ca989070916Chris Lattner    if (EndDecimal[0] == '.' || EndDecimal[0] == 'e' || EndDecimal[0] == 'E') {
771413d355e38755a71f106dbc0ac900ca989070916Chris Lattner      s = EndDecimal;
772413d355e38755a71f106dbc0ac900ca989070916Chris Lattner      radix = 10;
773413d355e38755a71f106dbc0ac900ca989070916Chris Lattner    }
774413d355e38755a71f106dbc0ac900ca989070916Chris Lattner  }
7751eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
776413d355e38755a71f106dbc0ac900ca989070916Chris Lattner  // If we have a hex digit other than 'e' (which denotes a FP exponent) then
777413d355e38755a71f106dbc0ac900ca989070916Chris Lattner  // the code is using an incorrect base.
7783f6f51e28231f65de9c2dd150a2d757b2162cfa3Jordan Rose  if (isHexDigit(*s) && *s != 'e' && *s != 'E') {
779ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner    PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-ThisTokBegin),
7805f9e272e632e951b1efe824cd16acb4d96077930Chris Lattner            diag::err_invalid_octal_digit) << StringRef(s, 1);
781ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner    hadError = true;
782368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    return;
783368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  }
7841eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
785368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  if (*s == '.') {
786368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    s++;
787368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    radix = 10;
788368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    saw_period = true;
789413d355e38755a71f106dbc0ac900ca989070916Chris Lattner    s = SkipDigits(s); // Skip suffix.
790368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  }
791368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  if (*s == 'e' || *s == 'E') { // exponent
792368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    const char *Exponent = s;
793368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    s++;
794368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    radix = 10;
795368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    saw_exponent = true;
796368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    if (*s == '+' || *s == '-')  s++; // sign
797368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    const char *first_non_digit = SkipDigits(s);
798368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    if (first_non_digit != s) {
799368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      s = first_non_digit;
800368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    } else {
8011eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump      PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, Exponent-ThisTokBegin),
802ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner              diag::err_exponent_has_no_digits);
803ac92d829111bc19d1cc97cd85c3c04bc39b969d1Chris Lattner      hadError = true;
804368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner      return;
805368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner    }
806368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner  }
807368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner}
808368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner
8092fd6956fd3020cdaf7452ee68435376125c60355Jordan Rosestatic bool alwaysFitsInto64Bits(unsigned Radix, unsigned NumDigits) {
810191046d0504cec221655e9821f264f492a70f0b1Dmitri Gribenko  switch (Radix) {
811191046d0504cec221655e9821f264f492a70f0b1Dmitri Gribenko  case 2:
812191046d0504cec221655e9821f264f492a70f0b1Dmitri Gribenko    return NumDigits <= 64;
813191046d0504cec221655e9821f264f492a70f0b1Dmitri Gribenko  case 8:
814191046d0504cec221655e9821f264f492a70f0b1Dmitri Gribenko    return NumDigits <= 64 / 3; // Digits are groups of 3 bits.
815191046d0504cec221655e9821f264f492a70f0b1Dmitri Gribenko  case 10:
816191046d0504cec221655e9821f264f492a70f0b1Dmitri Gribenko    return NumDigits <= 19; // floor(log10(2^64))
817191046d0504cec221655e9821f264f492a70f0b1Dmitri Gribenko  case 16:
818191046d0504cec221655e9821f264f492a70f0b1Dmitri Gribenko    return NumDigits <= 64 / 4; // Digits are groups of 4 bits.
819191046d0504cec221655e9821f264f492a70f0b1Dmitri Gribenko  default:
820191046d0504cec221655e9821f264f492a70f0b1Dmitri Gribenko    llvm_unreachable("impossible Radix");
821191046d0504cec221655e9821f264f492a70f0b1Dmitri Gribenko  }
822191046d0504cec221655e9821f264f492a70f0b1Dmitri Gribenko}
823368328c88bd46f471bbf85f05438b4f2eb95df5bChris Lattner
8245f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// GetIntegerValue - Convert this numeric literal value to an APInt that
8255f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// matches Val's input width.  If there is an overflow, set Val to the low bits
8265f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer/// of the result and return true.  Otherwise, return false.
8275f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencerbool NumericLiteralParser::GetIntegerValue(llvm::APInt &Val) {
828a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar  // Fast path: Compute a conservative bound on the maximum number of
829a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar  // bits per digit in this radix. If we can't possibly overflow a
830a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar  // uint64 based on that bound then do the simple conversion to
831a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar  // integer. This avoids the expensive overflow checking below, and
832a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar  // handles the common cases that matter (small decimal integers and
833a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar  // hex/octal values which don't overflow).
834191046d0504cec221655e9821f264f492a70f0b1Dmitri Gribenko  const unsigned NumDigits = SuffixBegin - DigitsBegin;
8352fd6956fd3020cdaf7452ee68435376125c60355Jordan Rose  if (alwaysFitsInto64Bits(radix, NumDigits)) {
836a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar    uint64_t N = 0;
837191046d0504cec221655e9821f264f492a70f0b1Dmitri Gribenko    for (const char *Ptr = DigitsBegin; Ptr != SuffixBegin; ++Ptr)
838728bb4c41844b1df98eb35e7fa98eb5ffa9d65a6Jordan Rose      N = N * radix + llvm::hexDigitValue(*Ptr);
839a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar
840a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar    // This will truncate the value to Val's input width. Simply check
841a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar    // for overflow by comparing.
842a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar    Val = N;
843a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar    return Val.getZExtValue() != N;
844a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar  }
845a179be34c1a3c0190e6b9e39dee2197651f44a5dDaniel Dunbar
8465f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  Val = 0;
847191046d0504cec221655e9821f264f492a70f0b1Dmitri Gribenko  const char *Ptr = DigitsBegin;
8485f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
8495f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  llvm::APInt RadixVal(Val.getBitWidth(), radix);
8505f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  llvm::APInt CharVal(Val.getBitWidth(), 0);
8515f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  llvm::APInt OldVal = Val;
8521eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
8535f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  bool OverflowOccurred = false;
854191046d0504cec221655e9821f264f492a70f0b1Dmitri Gribenko  while (Ptr < SuffixBegin) {
855728bb4c41844b1df98eb35e7fa98eb5ffa9d65a6Jordan Rose    unsigned C = llvm::hexDigitValue(*Ptr++);
8561eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
8575f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // If this letter is out of bound for this radix, reject it.
8585f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    assert(C < radix && "NumericLiteralParser ctor should have rejected this");
8591eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
8605f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    CharVal = C;
8611eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
8625f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // Add the digit to the value in the appropriate radix.  If adding in digits
8635f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // made the value smaller, then this overflowed.
8645f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    OldVal = Val;
8655f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
8665f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // Multiply by radix, did overflow occur on the multiply?
8675f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    Val *= RadixVal;
8685f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    OverflowOccurred |= Val.udiv(RadixVal) != OldVal;
8695f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
8705f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // Add value, did overflow occur on the value?
871d70cb645702bdbb42aee58403306a7c47e0d901cDaniel Dunbar    //   (a + b) ult b  <=> overflow
8725f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    Val += CharVal;
8735f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    OverflowOccurred |= Val.ult(CharVal);
8745f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  }
8755f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  return OverflowOccurred;
8765f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer}
8775f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
87894c939dc1d4958b62ea5a89294dd8b2905f3191fJohn McCallllvm::APFloat::opStatus
87994c939dc1d4958b62ea5a89294dd8b2905f3191fJohn McCallNumericLiteralParser::GetFloatValue(llvm::APFloat &Result) {
880427d5af5b601985093b6b4b33ba1e30fc24d86dcTed Kremenek  using llvm::APFloat;
8811eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
882e9f195f15ffe96d0a220c872ab12d0630a633c44Erick Tryzelaar  unsigned n = std::min(SuffixBegin - ThisTokBegin, ThisTokEnd - ThisTokBegin);
88394c939dc1d4958b62ea5a89294dd8b2905f3191fJohn McCall  return Result.convertFromString(StringRef(ThisTokBegin, n),
88494c939dc1d4958b62ea5a89294dd8b2905f3191fJohn McCall                                  APFloat::rmNearestTiesToEven);
8855f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer}
8865f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
8875f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
88858f9ce1664cd4fa67da48679a11df6aed25ce1fcJames Dennett/// \verbatim
8895cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith///       user-defined-character-literal: [C++11 lex.ext]
8905cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith///         character-literal ud-suffix
8915cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith///       ud-suffix:
8925cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith///         identifier
8935cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith///       character-literal: [C++11 lex.ccon]
8942fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         ' c-char-sequence '
8952fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         u' c-char-sequence '
8962fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         U' c-char-sequence '
8972fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         L' c-char-sequence '
8982fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       c-char-sequence:
8992fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         c-char
9002fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         c-char-sequence c-char
9012fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       c-char:
9022fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         any member of the source character set except the single-quote ',
9032fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///           backslash \, or new-line character
9042fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         escape-sequence
9052fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         universal-character-name
9065cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith///       escape-sequence:
9072fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         simple-escape-sequence
9082fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         octal-escape-sequence
9092fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         hexadecimal-escape-sequence
9102fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       simple-escape-sequence:
911ddddd48da72bc29d1c3f388ed91ea5549328129eNAKAMURA Takumi///         one of \' \" \? \\ \a \b \f \n \r \t \v
9122fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       octal-escape-sequence:
9132fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         \ octal-digit
9142fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         \ octal-digit octal-digit
9152fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         \ octal-digit octal-digit octal-digit
9162fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       hexadecimal-escape-sequence:
9172fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         \x hexadecimal-digit
9182fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         hexadecimal-escape-sequence hexadecimal-digit
9195cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith///       universal-character-name: [C++11 lex.charset]
9202fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         \u hex-quad
9212fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         \U hex-quad hex-quad
9222fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       hex-quad:
9232fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         hex-digit hex-digit hex-digit hex-digit
92458f9ce1664cd4fa67da48679a11df6aed25ce1fcJames Dennett/// \endverbatim
9252fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///
9265f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid SpencerCharLiteralParser::CharLiteralParser(const char *begin, const char *end,
9275cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor                                     SourceLocation Loc, Preprocessor &PP,
9285cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor                                     tok::TokenKind kind) {
929be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  // At this point we know that the character matches the regex "(L|u|U)?'.*'".
9305f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  HadError = false;
9311eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
9325cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  Kind = kind;
9335cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor
93426b75c07317a3b50a8a00a1623e3ef38af1d8349Richard Smith  const char *TokBegin = begin;
93526b75c07317a3b50a8a00a1623e3ef38af1d8349Richard Smith
936be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  // Skip over wide character determinant.
937be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  if (Kind != tok::char_constant) {
9385cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    ++begin;
9395cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  }
9401eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
9415f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // Skip over the entry quote.
9425f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  assert(begin[0] == '\'' && "Invalid token lexed");
9435f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  ++begin;
9445f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
9455cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith  // Remove an optional ud-suffix.
9465cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith  if (end[-1] != '\'') {
9475cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith    const char *UDSuffixEnd = end;
9485cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith    do {
9495cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith      --end;
9505cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith    } while (end[-1] != '\'');
9515cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith    UDSuffixBuf.assign(end, UDSuffixEnd);
95226b75c07317a3b50a8a00a1623e3ef38af1d8349Richard Smith    UDSuffixOffset = end - TokBegin;
9535cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith  }
9545cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith
955be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  // Trim the ending quote.
9565cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith  assert(end != begin && "Invalid token lexed");
957be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  --end;
958be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell
9591eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump  // FIXME: The "Value" is an uint64_t so we can handle char literals of
960fc8f0e14ad142ed811e90fbd9a30e419e301c717Chris Lattner  // up to 64-bits.
9615f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // FIXME: This extensively assumes that 'char' is 8-bits.
96298be4943e8dc4f3905629a7102668960873cf863Chris Lattner  assert(PP.getTargetInfo().getCharWidth() == 8 &&
9635f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer         "Assumes char is 8 bits");
964e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner  assert(PP.getTargetInfo().getIntWidth() <= 64 &&
965e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner         (PP.getTargetInfo().getIntWidth() & 7) == 0 &&
966e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner         "Assumes sizeof(int) on target is <= 64 and a multiple of char");
967e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner  assert(PP.getTargetInfo().getWCharWidth() <= 64 &&
968e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner         "Assumes sizeof(wchar) on target is <= 64");
9694bc11af9bed1d4a247e3db1fcb754d410ad99099Sanjiv Gupta
970be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  SmallVector<uint32_t,4> codepoint_buffer;
971be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  codepoint_buffer.resize(end-begin);
972be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  uint32_t *buffer_begin = &codepoint_buffer.front();
973be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  uint32_t *buffer_end = buffer_begin + codepoint_buffer.size();
974be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell
975be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  // Unicode escapes representing characters that cannot be correctly
976be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  // represented in a single code unit are disallowed in character literals
977be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  // by this implementation.
978be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  uint32_t largest_character_for_kind;
979be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  if (tok::wide_char_constant == Kind) {
980be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell    largest_character_for_kind = 0xFFFFFFFFu >> (32-PP.getTargetInfo().getWCharWidth());
981be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  } else if (tok::utf16_char_constant == Kind) {
982be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell    largest_character_for_kind = 0xFFFF;
983be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  } else if (tok::utf32_char_constant == Kind) {
984be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell    largest_character_for_kind = 0x10FFFF;
985be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  } else {
986be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell    largest_character_for_kind = 0x7Fu;
987be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  }
9885f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
989be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  while (begin!=end) {
990be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell    // Is this a span of non-escape characters?
991be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell    if (begin[0] != '\\') {
992be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell      char const *start = begin;
993be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell      do {
994be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell        ++begin;
995be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell      } while (begin != end && *begin != '\\');
996be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell
99791359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman      char const *tmp_in_start = start;
99891359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman      uint32_t *tmp_out_start = buffer_begin;
999be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell      ConversionResult res =
1000be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell      ConvertUTF8toUTF32(reinterpret_cast<UTF8 const **>(&start),
1001be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell                         reinterpret_cast<UTF8 const *>(begin),
1002be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell                         &buffer_begin,buffer_end,strictConversion);
1003be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell      if (res!=conversionOK) {
100491359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman        // If we see bad encoding for unprefixed character literals, warn and
100591359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman        // simply copy the byte values, for compatibility with gcc and
100691359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman        // older versions of clang.
100791359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman        bool NoErrorOnBadEncoding = isAscii();
100891359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman        unsigned Msg = diag::err_bad_character_encoding;
100991359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman        if (NoErrorOnBadEncoding)
101091359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman          Msg = diag::warn_bad_character_encoding;
101191359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman        PP.Diag(Loc, Msg);
101291359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman        if (NoErrorOnBadEncoding) {
101391359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman          start = tmp_in_start;
101491359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman          buffer_begin = tmp_out_start;
101591359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman          for ( ; start != begin; ++start, ++buffer_begin)
101691359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman            *buffer_begin = static_cast<uint8_t>(*start);
101791359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman        } else {
101891359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman          HadError = true;
101991359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman        }
10205f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      } else {
102191359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman        for (; tmp_out_start <buffer_begin; ++tmp_out_start) {
102291359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman          if (*tmp_out_start > largest_character_for_kind) {
1023be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell            HadError = true;
1024be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell            PP.Diag(Loc, diag::err_character_too_large);
1025be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell          }
10261c6c64b5181a960c7d4cace4995a938d4dfa6fbfChris Lattner        }
10275f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      }
1028be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell
1029be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell      continue;
10305f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    }
1031be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell    // Is this a Universal Character Name excape?
1032be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell    if (begin[1] == 'u' || begin[1] == 'U') {
1033be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell      unsigned short UcnLen = 0;
103426b75c07317a3b50a8a00a1623e3ef38af1d8349Richard Smith      if (!ProcessUCNEscape(TokBegin, begin, end, *buffer_begin, UcnLen,
1035be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell                            FullSourceLoc(Loc, PP.getSourceManager()),
10364e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie                            &PP.getDiagnostics(), PP.getLangOpts(),
1037be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell                            true))
1038be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell      {
1039be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell        HadError = true;
1040be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell      } else if (*buffer_begin > largest_character_for_kind) {
1041be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell        HadError = true;
1042e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith        PP.Diag(Loc, diag::err_character_too_large);
1043be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell      }
10441eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1045be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell      ++buffer_begin;
1046be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell      continue;
1047be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell    }
1048be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell    unsigned CharWidth = getCharWidth(Kind, PP.getTargetInfo());
1049be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell    uint64_t result =
1050e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith      ProcessCharEscape(TokBegin, begin, end, HadError,
1051e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith                        FullSourceLoc(Loc,PP.getSourceManager()),
1052e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith                        CharWidth, &PP.getDiagnostics(), PP.getLangOpts());
1053be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell    *buffer_begin++ = result;
1054e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner  }
1055e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner
1056be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  unsigned NumCharsSoFar = buffer_begin-&codepoint_buffer.front();
1057be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell
1058e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner  if (NumCharsSoFar > 1) {
1059be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell    if (isWide())
10605cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor      PP.Diag(Loc, diag::warn_extraneous_char_constant);
1061be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell    else if (isAscii() && NumCharsSoFar == 4)
1062be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell      PP.Diag(Loc, diag::ext_four_char_character_literal);
1063be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell    else if (isAscii())
1064e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner      PP.Diag(Loc, diag::ext_multichar_character_literal);
1065e3ad881e4e9620e941dabd4e78dacdb028b85682Chris Lattner    else
1066be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell      PP.Diag(Loc, diag::err_multichar_utf_character_literal);
10672a1c363f38e59a5044fc349aa7e538a50954c244Eli Friedman    IsMultiChar = true;
1068930b71a4a7dedf70a73e5fd875bae7df452b80a9Daniel Dunbar  } else
1069930b71a4a7dedf70a73e5fd875bae7df452b80a9Daniel Dunbar    IsMultiChar = false;
10704bc11af9bed1d4a247e3db1fcb754d410ad99099Sanjiv Gupta
1071be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  llvm::APInt LitVal(PP.getTargetInfo().getIntWidth(), 0);
1072be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell
1073be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  // Narrow character literals act as though their value is concatenated
1074be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  // in this implementation, but warn on overflow.
1075be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  bool multi_char_too_long = false;
1076be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  if (isAscii() && isMultiChar()) {
1077be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell    LitVal = 0;
1078be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell    for (size_t i=0;i<NumCharsSoFar;++i) {
1079be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell      // check for enough leading zeros to shift into
1080be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell      multi_char_too_long |= (LitVal.countLeadingZeros() < 8);
1081be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell      LitVal <<= 8;
1082be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell      LitVal = LitVal + (codepoint_buffer[i] & 0xFF);
1083be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell    }
1084be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  } else if (NumCharsSoFar > 0) {
1085be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell    // otherwise just take the last character
1086be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell    LitVal = buffer_begin[-1];
1087be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  }
1088be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell
1089be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  if (!HadError && multi_char_too_long) {
1090be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell    PP.Diag(Loc,diag::warn_char_constant_too_large);
1091be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell  }
1092be773526230b5a7121a8b321b05f2e53fa473f5cSeth Cantrell
10934bc11af9bed1d4a247e3db1fcb754d410ad99099Sanjiv Gupta  // Transfer the value from APInt to uint64_t
10944bc11af9bed1d4a247e3db1fcb754d410ad99099Sanjiv Gupta  Value = LitVal.getZExtValue();
10951eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
10965f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // If this is a single narrow character, sign extend it (e.g. '\xFF' is "-1")
10975f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // if 'char' is signed for this target (C99 6.4.4.4p10).  Note that multiple
10985f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // character constants are not sign extended in the this implementation:
10995f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // '\xFF\xFF' = 65536 and '\x0\xFF' = 255, which matches GCC.
11005cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  if (isAscii() && NumCharsSoFar == 1 && (Value & 128) &&
11014e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie      PP.getLangOpts().CharIsSigned)
11025f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    Value = (signed char)Value;
11035f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer}
11045f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
1105a1263cfb87151771fd74a1d2b676ae2ba172b72cJames Dennett/// \verbatim
11062fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       string-literal: [C++0x lex.string]
11072fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         encoding-prefix " [s-char-sequence] "
11082fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         encoding-prefix R raw-string
11092fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       encoding-prefix:
11102fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         u8
11112fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         u
11122fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         U
11132fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         L
11145f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       s-char-sequence:
11155f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         s-char
11165f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         s-char-sequence s-char
11175f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       s-char:
11182fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         any member of the source character set except the double-quote ",
11192fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///           backslash \, or new-line character
11202fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         escape-sequence
11215f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         universal-character-name
11222fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       raw-string:
11232fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         " d-char-sequence ( r-char-sequence ) d-char-sequence "
11242fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       r-char-sequence:
11252fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         r-char
11262fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         r-char-sequence r-char
11272fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       r-char:
11282fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         any member of the source character set, except a right parenthesis )
11292fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///           followed by the initial d-char-sequence (which may be empty)
11302fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///           followed by a double quote ".
11312fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       d-char-sequence:
11322fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         d-char
11332fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         d-char-sequence d-char
11342fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       d-char:
11352fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         any member of the basic source character set except:
11362fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///           space, the left parenthesis (, the right parenthesis ),
11372fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///           the backslash \, and the control characters representing horizontal
11382fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///           tab, vertical tab, form feed, and newline.
11392fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       escape-sequence: [C++0x lex.ccon]
11402fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         simple-escape-sequence
11412fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         octal-escape-sequence
11422fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         hexadecimal-escape-sequence
11432fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       simple-escape-sequence:
1144ddddd48da72bc29d1c3f388ed91ea5549328129eNAKAMURA Takumi///         one of \' \" \? \\ \a \b \f \n \r \t \v
11452fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       octal-escape-sequence:
11462fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         \ octal-digit
11472fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         \ octal-digit octal-digit
11482fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         \ octal-digit octal-digit octal-digit
11492fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///       hexadecimal-escape-sequence:
11502fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         \x hexadecimal-digit
11512fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper///         hexadecimal-escape-sequence hexadecimal-digit
11525f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       universal-character-name:
11535f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         \u hex-quad
11545f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         \U hex-quad hex-quad
11555f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///       hex-quad:
11565f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///         hex-digit hex-digit hex-digit hex-digit
1157a1263cfb87151771fd74a1d2b676ae2ba172b72cJames Dennett/// \endverbatim
11585f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer///
11595f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid SpencerStringLiteralParser::
1160d217773f106856a11879ec79dc468efefaf2ee75Chris LattnerStringLiteralParser(const Token *StringToks, unsigned NumStringToks,
11610833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner                    Preprocessor &PP, bool Complain)
11624e4d08403ca5cfd4d558fa2936215d3a4e5a528dDavid Blaikie  : SM(PP.getSourceManager()), Features(PP.getLangOpts()),
1163403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis    Target(PP.getTargetInfo()), Diags(Complain ? &PP.getDiagnostics() : 0),
11645cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),
11655cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) {
11660833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner  init(StringToks, NumStringToks);
11670833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner}
11680833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner
11690833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattnervoid StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
1170403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis  // The literal token may have come from an invalid source location (e.g. due
1171403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis  // to a PCH error), in which case the token length will be 0.
11723144749f8bf9bbf7c027f2161a930bff80ad6f72Argyrios Kyrtzidis  if (NumStringToks == 0 || StringToks[0].getLength() < 2)
11733144749f8bf9bbf7c027f2161a930bff80ad6f72Argyrios Kyrtzidis    return DiagnoseLexingError(SourceLocation());
1174403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis
11755f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // Scan all of the string portions, remember the max individual token length,
11765f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // computing a bound on the concatenated string length, and see whether any
11775f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // piece is a wide-string.  If any of the string portions is a wide-string
11785f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // literal, the result is a wide-string literal [C99 6.4.5p4].
1179403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis  assert(NumStringToks && "expected at least one token");
11806cf750298d3621d8a10a6dd07fcee8e274b9d94dSean Hunt  MaxTokenLength = StringToks[0].getLength();
1181403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis  assert(StringToks[0].getLength() >= 2 && "literal token is invalid!");
11826cf750298d3621d8a10a6dd07fcee8e274b9d94dSean Hunt  SizeBound = StringToks[0].getLength()-2;  // -2 for "".
11835cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  Kind = StringToks[0].getKind();
11846cf750298d3621d8a10a6dd07fcee8e274b9d94dSean Hunt
11856cf750298d3621d8a10a6dd07fcee8e274b9d94dSean Hunt  hadError = false;
11865f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer
11875f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // Implement Translation Phase #6: concatenation of string literals
11885f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  /// (C99 5.1.1.2p1).  The common case is only one string fragment.
11895f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  for (unsigned i = 1; i != NumStringToks; ++i) {
11903144749f8bf9bbf7c027f2161a930bff80ad6f72Argyrios Kyrtzidis    if (StringToks[i].getLength() < 2)
11913144749f8bf9bbf7c027f2161a930bff80ad6f72Argyrios Kyrtzidis      return DiagnoseLexingError(StringToks[i].getLocation());
1192403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis
11935f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // The string could be shorter than this if it needs cleaning, but this is a
11945f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // reasonable bound, which is all we need.
1195403de3f932b5d1d3e4e58f69960000911d04dd2aArgyrios Kyrtzidis    assert(StringToks[i].getLength() >= 2 && "literal token is invalid!");
11966cf750298d3621d8a10a6dd07fcee8e274b9d94dSean Hunt    SizeBound += StringToks[i].getLength()-2;  // -2 for "".
11971eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
11985f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // Remember maximum string piece length.
11996cf750298d3621d8a10a6dd07fcee8e274b9d94dSean Hunt    if (StringToks[i].getLength() > MaxTokenLength)
12006cf750298d3621d8a10a6dd07fcee8e274b9d94dSean Hunt      MaxTokenLength = StringToks[i].getLength();
12011eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
12025cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    // Remember if we see any wide or utf-8/16/32 strings.
12035cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    // Also check for illegal concatenations.
12045cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    if (StringToks[i].isNot(Kind) && StringToks[i].isNot(tok::string_literal)) {
12055cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor      if (isAscii()) {
12065cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor        Kind = StringToks[i].getKind();
12075cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor      } else {
12085cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor        if (Diags)
1209e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith          Diags->Report(StringToks[i].getLocation(),
12105cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor                        diag::err_unsupported_string_concat);
12115cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor        hadError = true;
12125cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor      }
12135cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    }
12145f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  }
1215dbb1ecc32ca122b07b7c98fd0a8f6f53985adaccChris Lattner
12165f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // Include space for the null terminator.
12175f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  ++SizeBound;
12181eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
12195f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // TODO: K&R warning: "traditional C rejects string constant concatenation"
12201eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
12215cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  // Get the width in bytes of char/wchar_t/char16_t/char32_t
12225cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  CharByteWidth = getCharWidth(Kind, Target);
12235cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  assert((CharByteWidth & 7) == 0 && "Assumes character size is byte multiple");
12245cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  CharByteWidth /= 8;
12251eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
12265f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // The output buffer size needs to be large enough to hold wide characters.
12275f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // This is a worst-case assumption which basically corresponds to L"" "long".
12285cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  SizeBound *= CharByteWidth;
12291eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
12305f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // Size the temporary buffer to hold the result string data.
12315f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  ResultBuf.resize(SizeBound);
12321eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
12335f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // Likewise, but for each string piece.
1234f7ccbad5d9949e7ddd1cbef43d482553b811e026Dylan Noblesmith  SmallString<512> TokenBuf;
12355f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  TokenBuf.resize(MaxTokenLength);
12361eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
12375f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // Loop over all the strings, getting their spelling, and expanding them to
12385f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  // wide strings as appropriate.
12395f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  ResultPtr = &ResultBuf[0];   // Next byte to fill in.
12401eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1241ee98ac5a1330db432b188dd2d38b6631aac47bf1Anders Carlsson  Pascal = false;
12421eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
12435cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith  SourceLocation UDSuffixTokLoc;
12445cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith
12455f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  for (unsigned i = 0, e = NumStringToks; i != e; ++i) {
12465f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    const char *ThisTokBuf = &TokenBuf[0];
12475f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // Get the spelling of the token, which eliminates trigraphs, etc.  We know
12485f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // that ThisTokBuf points to a buffer that is big enough for the whole token
12495f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // and 'spelled' tokens can only shrink.
125050f6af7a6d6951a63f3da7d4c5a7d3965bf73b63Douglas Gregor    bool StringInvalid = false;
12510833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner    unsigned ThisTokLen =
1252b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner      Lexer::getSpelling(StringToks[i], ThisTokBuf, SM, Features,
1253b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner                         &StringInvalid);
12543144749f8bf9bbf7c027f2161a930bff80ad6f72Argyrios Kyrtzidis    if (StringInvalid)
12553144749f8bf9bbf7c027f2161a930bff80ad6f72Argyrios Kyrtzidis      return DiagnoseLexingError(StringToks[i].getLocation());
125650f6af7a6d6951a63f3da7d4c5a7d3965bf73b63Douglas Gregor
125726b75c07317a3b50a8a00a1623e3ef38af1d8349Richard Smith    const char *ThisTokBegin = ThisTokBuf;
12585cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith    const char *ThisTokEnd = ThisTokBuf+ThisTokLen;
12595cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith
12605cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith    // Remove an optional ud-suffix.
12615cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith    if (ThisTokEnd[-1] != '"') {
12625cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith      const char *UDSuffixEnd = ThisTokEnd;
12635cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith      do {
12645cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith        --ThisTokEnd;
12655cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith      } while (ThisTokEnd[-1] != '"');
12665cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith
12675cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith      StringRef UDSuffix(ThisTokEnd, UDSuffixEnd - ThisTokEnd);
12685cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith
12695cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith      if (UDSuffixBuf.empty()) {
12705cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith        UDSuffixBuf.assign(UDSuffix);
1271dd66be718f23c8149d74ae8b011b002e11e8d5baRichard Smith        UDSuffixToken = i;
1272dd66be718f23c8149d74ae8b011b002e11e8d5baRichard Smith        UDSuffixOffset = ThisTokEnd - ThisTokBuf;
12735cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith        UDSuffixTokLoc = StringToks[i].getLocation();
12745cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith      } else if (!UDSuffixBuf.equals(UDSuffix)) {
12755cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith        // C++11 [lex.ext]p8: At the end of phase 6, if a string literal is the
12765cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith        // result of a concatenation involving at least one user-defined-string-
12775cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith        // literal, all the participating user-defined-string-literals shall
12785cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith        // have the same ud-suffix.
12795cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith        if (Diags) {
12805cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith          SourceLocation TokLoc = StringToks[i].getLocation();
12815cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith          Diags->Report(TokLoc, diag::err_string_concat_mixed_suffix)
12825cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith            << UDSuffixBuf << UDSuffix
12835cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith            << SourceRange(UDSuffixTokLoc, UDSuffixTokLoc)
12845cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith            << SourceRange(TokLoc, TokLoc);
12855cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith        }
12865cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith        hadError = true;
12875cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith      }
12885cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith    }
12895cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith
12905cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith    // Strip the end quote.
12915cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith    --ThisTokEnd;
12925cc2c6eb67b6e5361bbe96f79b519fd62ec666d6Richard Smith
12935f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    // TODO: Input character set mapping support.
12941eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
12951661d717563d6a27dec3da69deba2b2efaa45802Craig Topper    // Skip marker for wide or unicode strings.
12965cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor    if (ThisTokBuf[0] == 'L' || ThisTokBuf[0] == 'u' || ThisTokBuf[0] == 'U') {
12975f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      ++ThisTokBuf;
12985cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor      // Skip 8 of u8 marker for utf8 strings.
12995cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor      if (ThisTokBuf[0] == '8')
13005cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor        ++ThisTokBuf;
130156bedefe92ae8f604d14bea75cc3040ab32337c2Fariborz Jahanian    }
13021eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
13032fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper    // Check for raw string
13042fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper    if (ThisTokBuf[0] == 'R') {
13052fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      ThisTokBuf += 2; // skip R"
13061eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
13072fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      const char *Prefix = ThisTokBuf;
13082fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      while (ThisTokBuf[0] != '(')
1309ee98ac5a1330db432b188dd2d38b6631aac47bf1Anders Carlsson        ++ThisTokBuf;
13102fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      ++ThisTokBuf; // skip '('
13112fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper
131249d517467c3dcd2c67e8a6c740ba5160e37625f7Richard Smith      // Remove same number of characters from the end
131349d517467c3dcd2c67e8a6c740ba5160e37625f7Richard Smith      ThisTokEnd -= ThisTokBuf - Prefix;
131449d517467c3dcd2c67e8a6c740ba5160e37625f7Richard Smith      assert(ThisTokEnd >= ThisTokBuf && "malformed raw string literal");
13152fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper
13162fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      // Copy the string over
1317e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith      if (CopyStringFragment(StringToks[i], ThisTokBegin,
1318e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith                             StringRef(ThisTokBuf, ThisTokEnd - ThisTokBuf)))
1319e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith        hadError = true;
13202fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper    } else {
132107a075870110a8376ddc1cd09412a0ec00987153Argyrios Kyrtzidis      if (ThisTokBuf[0] != '"') {
132207a075870110a8376ddc1cd09412a0ec00987153Argyrios Kyrtzidis        // The file may have come from PCH and then changed after loading the
132307a075870110a8376ddc1cd09412a0ec00987153Argyrios Kyrtzidis        // PCH; Fail gracefully.
13243144749f8bf9bbf7c027f2161a930bff80ad6f72Argyrios Kyrtzidis        return DiagnoseLexingError(StringToks[i].getLocation());
132507a075870110a8376ddc1cd09412a0ec00987153Argyrios Kyrtzidis      }
13262fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      ++ThisTokBuf; // skip "
13272fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper
13282fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      // Check if this is a pascal string
13292fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      if (Features.PascalStrings && ThisTokBuf + 1 != ThisTokEnd &&
13302fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper          ThisTokBuf[0] == '\\' && ThisTokBuf[1] == 'p') {
13311eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
13322fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        // If the \p sequence is found in the first token, we have a pascal string
13332fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        // Otherwise, if we already have a pascal string, ignore the first \p
13342fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        if (i == 0) {
13355f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer          ++ThisTokBuf;
13362fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper          Pascal = true;
13372fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        } else if (Pascal)
13382fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper          ThisTokBuf += 2;
13395f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer      }
13401eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
13412fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      while (ThisTokBuf != ThisTokEnd) {
13422fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        // Is this a span of non-escape characters?
13432fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        if (ThisTokBuf[0] != '\\') {
13442fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper          const char *InStart = ThisTokBuf;
13452fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper          do {
13462fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper            ++ThisTokBuf;
13472fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper          } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');
13482fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper
13492fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper          // Copy the character span over.
1350e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith          if (CopyStringFragment(StringToks[i], ThisTokBegin,
1351e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith                                 StringRef(InStart, ThisTokBuf - InStart)))
1352e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith            hadError = true;
13532fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper          continue;
13542fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        }
13552fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        // Is this a Universal Character Name escape?
13562fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        if (ThisTokBuf[1] == 'u' || ThisTokBuf[1] == 'U') {
135726b75c07317a3b50a8a00a1623e3ef38af1d8349Richard Smith          EncodeUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd,
135826b75c07317a3b50a8a00a1623e3ef38af1d8349Richard Smith                          ResultPtr, hadError,
135926b75c07317a3b50a8a00a1623e3ef38af1d8349Richard Smith                          FullSourceLoc(StringToks[i].getLocation(), SM),
13602fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper                          CharByteWidth, Diags, Features);
13612fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper          continue;
13622fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        }
13632fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        // Otherwise, this is a non-UCN escape character.  Process it.
13642fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper        unsigned ResultChar =
1365e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith          ProcessCharEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, hadError,
13662fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper                            FullSourceLoc(StringToks[i].getLocation(), SM),
1367e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith                            CharByteWidth*8, Diags, Features);
13682fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper
1369caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman        if (CharByteWidth == 4) {
1370caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman          // FIXME: Make the type of the result buffer correct instead of
1371caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman          // using reinterpret_cast.
1372caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman          UTF32 *ResultWidePtr = reinterpret_cast<UTF32*>(ResultPtr);
13739b483df983759d51d61d54e8ae34bff423d15403Nico Weber          *ResultWidePtr = ResultChar;
1374caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman          ResultPtr += 4;
1375caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman        } else if (CharByteWidth == 2) {
1376caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman          // FIXME: Make the type of the result buffer correct instead of
1377caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman          // using reinterpret_cast.
1378caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman          UTF16 *ResultWidePtr = reinterpret_cast<UTF16*>(ResultPtr);
13799b483df983759d51d61d54e8ae34bff423d15403Nico Weber          *ResultWidePtr = ResultChar & 0xFFFF;
1380caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman          ResultPtr += 2;
1381caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman        } else {
1382caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman          assert(CharByteWidth == 1 && "Unexpected char width");
1383caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman          *ResultPtr++ = ResultChar & 0xFF;
1384caf1f26777c3adf2556c3af7bf9e01bd8ead17d9Eli Friedman        }
13852fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper      }
13865f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer    }
13875f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer  }
13881eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1389bbee00b6456e90a09f63c83c20233e6c5ad6000aChris Lattner  if (Pascal) {
139022508f410b3d727d5c557af3304c0a1bad94999eEli Friedman    if (CharByteWidth == 4) {
139122508f410b3d727d5c557af3304c0a1bad94999eEli Friedman      // FIXME: Make the type of the result buffer correct instead of
139222508f410b3d727d5c557af3304c0a1bad94999eEli Friedman      // using reinterpret_cast.
139322508f410b3d727d5c557af3304c0a1bad94999eEli Friedman      UTF32 *ResultWidePtr = reinterpret_cast<UTF32*>(ResultBuf.data());
139422508f410b3d727d5c557af3304c0a1bad94999eEli Friedman      ResultWidePtr[0] = GetNumStringChars() - 1;
139522508f410b3d727d5c557af3304c0a1bad94999eEli Friedman    } else if (CharByteWidth == 2) {
139622508f410b3d727d5c557af3304c0a1bad94999eEli Friedman      // FIXME: Make the type of the result buffer correct instead of
139722508f410b3d727d5c557af3304c0a1bad94999eEli Friedman      // using reinterpret_cast.
139822508f410b3d727d5c557af3304c0a1bad94999eEli Friedman      UTF16 *ResultWidePtr = reinterpret_cast<UTF16*>(ResultBuf.data());
139922508f410b3d727d5c557af3304c0a1bad94999eEli Friedman      ResultWidePtr[0] = GetNumStringChars() - 1;
140022508f410b3d727d5c557af3304c0a1bad94999eEli Friedman    } else {
140122508f410b3d727d5c557af3304c0a1bad94999eEli Friedman      assert(CharByteWidth == 1 && "Unexpected char width");
140222508f410b3d727d5c557af3304c0a1bad94999eEli Friedman      ResultBuf[0] = GetNumStringChars() - 1;
140322508f410b3d727d5c557af3304c0a1bad94999eEli Friedman    }
1404bbee00b6456e90a09f63c83c20233e6c5ad6000aChris Lattner
1405bbee00b6456e90a09f63c83c20233e6c5ad6000aChris Lattner    // Verify that pascal strings aren't too large.
14060833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner    if (GetStringLength() > 256) {
1407e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith      if (Diags)
1408e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith        Diags->Report(StringToks[0].getLocation(),
14090833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner                      diag::err_pascal_string_too_long)
14100833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner          << SourceRange(StringToks[0].getLocation(),
14110833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner                         StringToks[NumStringToks-1].getLocation());
14125cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor      hadError = true;
141357d7dde770c67b282e7fb77b1b81e429910937b3Eli Friedman      return;
141457d7dde770c67b282e7fb77b1b81e429910937b3Eli Friedman    }
14150833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner  } else if (Diags) {
1416427c492d368d6ecf409fa8053eecb5cd0e779c5bDouglas Gregor    // Complain if this string literal has too many characters.
1417a95880d6513c617bb96634bcc1f16c6bdb80dedcChris Lattner    unsigned MaxChars = Features.CPlusPlus? 65536 : Features.C99 ? 4095 : 509;
14185d704fef6e0a7db07329a37290684b17bf6badf8Benjamin Kramer
1419427c492d368d6ecf409fa8053eecb5cd0e779c5bDouglas Gregor    if (GetNumStringChars() > MaxChars)
1420e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith      Diags->Report(StringToks[0].getLocation(),
14210833dd0675c25cbb35671c7a2006d511d5c77ce3Chris Lattner                    diag::ext_string_too_long)
1422427c492d368d6ecf409fa8053eecb5cd0e779c5bDouglas Gregor        << GetNumStringChars() << MaxChars
1423a95880d6513c617bb96634bcc1f16c6bdb80dedcChris Lattner        << (Features.CPlusPlus ? 2 : Features.C99 ? 1 : 0)
1424427c492d368d6ecf409fa8053eecb5cd0e779c5bDouglas Gregor        << SourceRange(StringToks[0].getLocation(),
1425427c492d368d6ecf409fa8053eecb5cd0e779c5bDouglas Gregor                       StringToks[NumStringToks-1].getLocation());
1426bbee00b6456e90a09f63c83c20233e6c5ad6000aChris Lattner  }
14275f016e2cb5d11daeb237544de1c5d59f20fe1a6eReid Spencer}
1428719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner
14295d704fef6e0a7db07329a37290684b17bf6badf8Benjamin Kramerstatic const char *resyncUTF8(const char *Err, const char *End) {
14305d704fef6e0a7db07329a37290684b17bf6badf8Benjamin Kramer  if (Err == End)
14315d704fef6e0a7db07329a37290684b17bf6badf8Benjamin Kramer    return End;
14325d704fef6e0a7db07329a37290684b17bf6badf8Benjamin Kramer  End = Err + std::min<unsigned>(getNumBytesForUTF8(*Err), End-Err);
14335d704fef6e0a7db07329a37290684b17bf6badf8Benjamin Kramer  while (++Err != End && (*Err & 0xC0) == 0x80)
14345d704fef6e0a7db07329a37290684b17bf6badf8Benjamin Kramer    ;
14355d704fef6e0a7db07329a37290684b17bf6badf8Benjamin Kramer  return Err;
14365bffbe5c1033967fe49c7a638fdcea99d30d573aSeth Cantrell}
14375bffbe5c1033967fe49c7a638fdcea99d30d573aSeth Cantrell
1438e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith/// \brief This function copies from Fragment, which is a sequence of bytes
1439e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith/// within Tok's contents (which begin at TokBegin) into ResultPtr.
14402fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper/// Performs widening for multi-byte characters.
1441e5f0588840b20897631cc8110344fd2745ef4caaRichard Smithbool StringLiteralParser::CopyStringFragment(const Token &Tok,
1442e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith                                             const char *TokBegin,
1443e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith                                             StringRef Fragment) {
1444e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith  const UTF8 *ErrorPtrTmp;
1445e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith  if (ConvertUTF8toWide(CharByteWidth, Fragment, ResultPtr, ErrorPtrTmp))
1446e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith    return false;
14472fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper
144891359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman  // If we see bad encoding for unprefixed string literals, warn and
144991359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman  // simply copy the byte values, for compatibility with gcc and older
145091359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman  // versions of clang.
145191359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman  bool NoErrorOnBadEncoding = isAscii();
1452e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith  if (NoErrorOnBadEncoding) {
1453e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith    memcpy(ResultPtr, Fragment.data(), Fragment.size());
1454e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith    ResultPtr += Fragment.size();
1455e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith  }
14565bffbe5c1033967fe49c7a638fdcea99d30d573aSeth Cantrell
1457e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith  if (Diags) {
14585bffbe5c1033967fe49c7a638fdcea99d30d573aSeth Cantrell    const char *ErrorPtr = reinterpret_cast<const char *>(ErrorPtrTmp);
14595bffbe5c1033967fe49c7a638fdcea99d30d573aSeth Cantrell
14605bffbe5c1033967fe49c7a638fdcea99d30d573aSeth Cantrell    FullSourceLoc SourceLoc(Tok.getLocation(), SM);
14615bffbe5c1033967fe49c7a638fdcea99d30d573aSeth Cantrell    const DiagnosticBuilder &Builder =
14625bffbe5c1033967fe49c7a638fdcea99d30d573aSeth Cantrell      Diag(Diags, Features, SourceLoc, TokBegin,
14635d704fef6e0a7db07329a37290684b17bf6badf8Benjamin Kramer           ErrorPtr, resyncUTF8(ErrorPtr, Fragment.end()),
14645bffbe5c1033967fe49c7a638fdcea99d30d573aSeth Cantrell           NoErrorOnBadEncoding ? diag::warn_bad_string_encoding
14655bffbe5c1033967fe49c7a638fdcea99d30d573aSeth Cantrell                                : diag::err_bad_string_encoding);
14665bffbe5c1033967fe49c7a638fdcea99d30d573aSeth Cantrell
14675d704fef6e0a7db07329a37290684b17bf6badf8Benjamin Kramer    const char *NextStart = resyncUTF8(ErrorPtr, Fragment.end());
14685bffbe5c1033967fe49c7a638fdcea99d30d573aSeth Cantrell    StringRef NextFragment(NextStart, Fragment.end()-NextStart);
14695bffbe5c1033967fe49c7a638fdcea99d30d573aSeth Cantrell
14704f056ac7f8d837822dbf8ab7cdd6849a9b0ad12fBenjamin Kramer    // Decode into a dummy buffer.
14714f056ac7f8d837822dbf8ab7cdd6849a9b0ad12fBenjamin Kramer    SmallString<512> Dummy;
14724f056ac7f8d837822dbf8ab7cdd6849a9b0ad12fBenjamin Kramer    Dummy.reserve(Fragment.size() * CharByteWidth);
14734f056ac7f8d837822dbf8ab7cdd6849a9b0ad12fBenjamin Kramer    char *Ptr = Dummy.data();
14744f056ac7f8d837822dbf8ab7cdd6849a9b0ad12fBenjamin Kramer
147582c6dc72adc30e785ce5bc6e8b43ae92070d2e08David Blaikie    while (!Builder.hasMaxRanges() &&
14764f056ac7f8d837822dbf8ab7cdd6849a9b0ad12fBenjamin Kramer           !ConvertUTF8toWide(CharByteWidth, NextFragment, Ptr, ErrorPtrTmp)) {
14775bffbe5c1033967fe49c7a638fdcea99d30d573aSeth Cantrell      const char *ErrorPtr = reinterpret_cast<const char *>(ErrorPtrTmp);
14785d704fef6e0a7db07329a37290684b17bf6badf8Benjamin Kramer      NextStart = resyncUTF8(ErrorPtr, Fragment.end());
14795bffbe5c1033967fe49c7a638fdcea99d30d573aSeth Cantrell      Builder << MakeCharSourceRange(Features, SourceLoc, TokBegin,
14805bffbe5c1033967fe49c7a638fdcea99d30d573aSeth Cantrell                                     ErrorPtr, NextStart);
14815bffbe5c1033967fe49c7a638fdcea99d30d573aSeth Cantrell      NextFragment = StringRef(NextStart, Fragment.end()-NextStart);
14825bffbe5c1033967fe49c7a638fdcea99d30d573aSeth Cantrell    }
1483e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith  }
148491359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman  return !NoErrorOnBadEncoding;
148591359302b822d829afa93c0dadf5f7ce6e19fbc6Eli Friedman}
14862fa4e86b4fdada3b9ecbbbd99965b83ed879f69bCraig Topper
14873144749f8bf9bbf7c027f2161a930bff80ad6f72Argyrios Kyrtzidisvoid StringLiteralParser::DiagnoseLexingError(SourceLocation Loc) {
14883144749f8bf9bbf7c027f2161a930bff80ad6f72Argyrios Kyrtzidis  hadError = true;
14893144749f8bf9bbf7c027f2161a930bff80ad6f72Argyrios Kyrtzidis  if (Diags)
14903144749f8bf9bbf7c027f2161a930bff80ad6f72Argyrios Kyrtzidis    Diags->Report(Loc, diag::err_lexing_string);
14913144749f8bf9bbf7c027f2161a930bff80ad6f72Argyrios Kyrtzidis}
14923144749f8bf9bbf7c027f2161a930bff80ad6f72Argyrios Kyrtzidis
1493719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner/// getOffsetOfStringByte - This function returns the offset of the
1494719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner/// specified byte of the string data represented by Token.  This handles
1495719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner/// advancing over escape sequences in the string.
1496719e61573f27c11057ecfe0dd8f141621602c571Chris Lattnerunsigned StringLiteralParser::getOffsetOfStringByte(const Token &Tok,
14976c66f07854c1334a1ce9eae1428d61d54182a6e1Chris Lattner                                                    unsigned ByteNo) const {
1498719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner  // Get the spelling of the token.
1499f7ccbad5d9949e7ddd1cbef43d482553b811e026Dylan Noblesmith  SmallString<32> SpellingBuffer;
15006cf750298d3621d8a10a6dd07fcee8e274b9d94dSean Hunt  SpellingBuffer.resize(Tok.getLength());
15011eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
150250f6af7a6d6951a63f3da7d4c5a7d3965bf73b63Douglas Gregor  bool StringInvalid = false;
1503719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner  const char *SpellingPtr = &SpellingBuffer[0];
1504b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner  unsigned TokLen = Lexer::getSpelling(Tok, SpellingPtr, SM, Features,
1505b0607279cb98bbf2bbfe0db170aed39ef91e86a2Chris Lattner                                       &StringInvalid);
150691f54ce93bec136fb9e18740b895cf1c1339524bChris Lattner  if (StringInvalid)
150750f6af7a6d6951a63f3da7d4c5a7d3965bf73b63Douglas Gregor    return 0;
1508719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner
1509df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith  const char *SpellingStart = SpellingPtr;
1510df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith  const char *SpellingEnd = SpellingPtr+TokLen;
1511df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith
1512df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith  // Handle UTF-8 strings just like narrow strings.
1513df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith  if (SpellingPtr[0] == 'u' && SpellingPtr[1] == '8')
1514df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith    SpellingPtr += 2;
1515df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith
15165cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor  assert(SpellingPtr[0] != 'L' && SpellingPtr[0] != 'u' &&
15175cee1195584fa8672253139c86e922daeda69b9eDouglas Gregor         SpellingPtr[0] != 'U' && "Doesn't handle wide or utf strings yet");
1518719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner
1519df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith  // For raw string literals, this is easy.
1520df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith  if (SpellingPtr[0] == 'R') {
1521df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith    assert(SpellingPtr[1] == '"' && "Should be a raw string literal!");
1522df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith    // Skip 'R"'.
1523df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith    SpellingPtr += 2;
1524df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith    while (*SpellingPtr != '(') {
1525df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith      ++SpellingPtr;
1526df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith      assert(SpellingPtr < SpellingEnd && "Missing ( for raw string literal");
1527df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith    }
1528df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith    // Skip '('.
1529df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith    ++SpellingPtr;
1530df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith    return SpellingPtr - SpellingStart + ByteNo;
1531df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith  }
15321eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1533df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith  // Skip over the leading quote
1534719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner  assert(SpellingPtr[0] == '"' && "Should be a string literal!");
1535719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner  ++SpellingPtr;
15361eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1537719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner  // Skip over bytes until we find the offset we're looking for.
1538719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner  while (ByteNo) {
1539719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner    assert(SpellingPtr < SpellingEnd && "Didn't find byte offset!");
15401eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1541719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner    // Step over non-escapes simply.
1542719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner    if (*SpellingPtr != '\\') {
1543719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner      ++SpellingPtr;
1544719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner      --ByteNo;
1545719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner      continue;
1546719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner    }
15471eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1548719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner    // Otherwise, this is an escape character.  Advance over it.
1549719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner    bool HadError = false;
1550df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith    if (SpellingPtr[1] == 'u' || SpellingPtr[1] == 'U') {
1551df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith      const char *EscapePtr = SpellingPtr;
1552df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith      unsigned Len = MeasureUCNEscape(SpellingStart, SpellingPtr, SpellingEnd,
1553df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith                                      1, Features, HadError);
1554df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith      if (Len > ByteNo) {
1555df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith        // ByteNo is somewhere within the escape sequence.
1556df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith        SpellingPtr = EscapePtr;
1557df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith        break;
1558df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith      }
1559df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith      ByteNo -= Len;
1560df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith    } else {
1561e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith      ProcessCharEscape(SpellingStart, SpellingPtr, SpellingEnd, HadError,
1562df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith                        FullSourceLoc(Tok.getLocation(), SM),
1563e5f0588840b20897631cc8110344fd2745ef4caaRichard Smith                        CharByteWidth*8, Diags, Features);
1564df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith      --ByteNo;
1565df9ef1bc8c3780307ab2ed81bf5e31c23310b936Richard Smith    }
1566719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner    assert(!HadError && "This method isn't valid on erroneous strings");
1567719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner  }
15681eb4433ac451dc16f4133a88af2d002ac26c58efMike Stump
1569719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner  return SpellingPtr-SpellingStart;
1570719e61573f27c11057ecfe0dd8f141621602c571Chris Lattner}
1571