LiteralSupport.cpp revision 0b7f69d789ca1f76582ee9a336e25861fd0c1416
1//===--- LiteralSupport.cpp - Code to parse and process literals ----------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file implements the NumericLiteralParser, CharLiteralParser, and
11// StringLiteralParser interfaces.
12//
13//===----------------------------------------------------------------------===//
14
15#include "clang/Lex/LiteralSupport.h"
16#include "clang/Lex/Preprocessor.h"
17#include "clang/Basic/Diagnostic.h"
18#include "clang/Basic/SourceManager.h"
19#include "clang/Basic/TargetInfo.h"
20#include "llvm/ADT/StringExtras.h"
21using namespace clang;
22
23/// HexDigitValue - Return the value of the specified hex digit, or -1 if it's
24/// not valid.
25static int HexDigitValue(char C) {
26  if (C >= '0' && C <= '9') return C-'0';
27  if (C >= 'a' && C <= 'f') return C-'a'+10;
28  if (C >= 'A' && C <= 'F') return C-'A'+10;
29  return -1;
30}
31
32/// ProcessCharEscape - Parse a standard C escape sequence, which can occur in
33/// either a character or a string literal.
34static unsigned ProcessCharEscape(const char *&ThisTokBuf,
35                                  const char *ThisTokEnd, bool &HadError,
36                                  SourceLocation Loc, bool IsWide,
37                                  Preprocessor &PP) {
38  // Skip the '\' char.
39  ++ThisTokBuf;
40
41  // We know that this character can't be off the end of the buffer, because
42  // that would have been \", which would not have been the end of string.
43  unsigned ResultChar = *ThisTokBuf++;
44  switch (ResultChar) {
45  // These map to themselves.
46  case '\\': case '\'': case '"': case '?': break;
47
48    // These have fixed mappings.
49  case 'a':
50    // TODO: K&R: the meaning of '\\a' is different in traditional C
51    ResultChar = 7;
52    break;
53  case 'b':
54    ResultChar = 8;
55    break;
56  case 'e':
57    PP.Diag(Loc, diag::ext_nonstandard_escape, "e");
58    ResultChar = 27;
59    break;
60  case 'f':
61    ResultChar = 12;
62    break;
63  case 'n':
64    ResultChar = 10;
65    break;
66  case 'r':
67    ResultChar = 13;
68    break;
69  case 't':
70    ResultChar = 9;
71    break;
72  case 'v':
73    ResultChar = 11;
74    break;
75
76    //case 'u': case 'U':  // FIXME: UCNs.
77  case 'x': { // Hex escape.
78    ResultChar = 0;
79    if (ThisTokBuf == ThisTokEnd || !isxdigit(*ThisTokBuf)) {
80      PP.Diag(Loc, diag::err_hex_escape_no_digits);
81      HadError = 1;
82      break;
83    }
84
85    // Hex escapes are a maximal series of hex digits.
86    bool Overflow = false;
87    for (; ThisTokBuf != ThisTokEnd; ++ThisTokBuf) {
88      int CharVal = HexDigitValue(ThisTokBuf[0]);
89      if (CharVal == -1) break;
90      Overflow |= (ResultChar & 0xF0000000) ? true : false;  // About to shift out a digit?
91      ResultChar <<= 4;
92      ResultChar |= CharVal;
93    }
94
95    // See if any bits will be truncated when evaluated as a character.
96    unsigned CharWidth = PP.getTargetInfo().getCharWidth(IsWide);
97
98    if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
99      Overflow = true;
100      ResultChar &= ~0U >> (32-CharWidth);
101    }
102
103    // Check for overflow.
104    if (Overflow)   // Too many digits to fit in
105      PP.Diag(Loc, diag::warn_hex_escape_too_large);
106    break;
107  }
108  case '0': case '1': case '2': case '3':
109  case '4': case '5': case '6': case '7': {
110    // Octal escapes.
111    --ThisTokBuf;
112    ResultChar = 0;
113
114    // Octal escapes are a series of octal digits with maximum length 3.
115    // "\0123" is a two digit sequence equal to "\012" "3".
116    unsigned NumDigits = 0;
117    do {
118      ResultChar <<= 3;
119      ResultChar |= *ThisTokBuf++ - '0';
120      ++NumDigits;
121    } while (ThisTokBuf != ThisTokEnd && NumDigits < 3 &&
122             ThisTokBuf[0] >= '0' && ThisTokBuf[0] <= '7');
123
124    // Check for overflow.  Reject '\777', but not L'\777'.
125    unsigned CharWidth = PP.getTargetInfo().getCharWidth(IsWide);
126
127    if (CharWidth != 32 && (ResultChar >> CharWidth) != 0) {
128      PP.Diag(Loc, diag::warn_octal_escape_too_large);
129      ResultChar &= ~0U >> (32-CharWidth);
130    }
131    break;
132  }
133
134    // Otherwise, these are not valid escapes.
135  case '(': case '{': case '[': case '%':
136    // GCC accepts these as extensions.  We warn about them as such though.
137    if (!PP.getLangOptions().NoExtensions) {
138      PP.Diag(Loc, diag::ext_nonstandard_escape,
139              std::string()+(char)ResultChar);
140      break;
141    }
142    // FALL THROUGH.
143  default:
144    if (isgraph(ThisTokBuf[0])) {
145      PP.Diag(Loc, diag::ext_unknown_escape, std::string()+(char)ResultChar);
146    } else {
147      PP.Diag(Loc, diag::ext_unknown_escape, "x"+llvm::utohexstr(ResultChar));
148    }
149    break;
150  }
151
152  return ResultChar;
153}
154
155
156
157
158///       integer-constant: [C99 6.4.4.1]
159///         decimal-constant integer-suffix
160///         octal-constant integer-suffix
161///         hexadecimal-constant integer-suffix
162///       decimal-constant:
163///         nonzero-digit
164///         decimal-constant digit
165///       octal-constant:
166///         0
167///         octal-constant octal-digit
168///       hexadecimal-constant:
169///         hexadecimal-prefix hexadecimal-digit
170///         hexadecimal-constant hexadecimal-digit
171///       hexadecimal-prefix: one of
172///         0x 0X
173///       integer-suffix:
174///         unsigned-suffix [long-suffix]
175///         unsigned-suffix [long-long-suffix]
176///         long-suffix [unsigned-suffix]
177///         long-long-suffix [unsigned-sufix]
178///       nonzero-digit:
179///         1 2 3 4 5 6 7 8 9
180///       octal-digit:
181///         0 1 2 3 4 5 6 7
182///       hexadecimal-digit:
183///         0 1 2 3 4 5 6 7 8 9
184///         a b c d e f
185///         A B C D E F
186///       unsigned-suffix: one of
187///         u U
188///       long-suffix: one of
189///         l L
190///       long-long-suffix: one of
191///         ll LL
192///
193///       floating-constant: [C99 6.4.4.2]
194///         TODO: add rules...
195///
196
197NumericLiteralParser::
198NumericLiteralParser(const char *begin, const char *end,
199                     SourceLocation TokLoc, Preprocessor &pp)
200  : PP(pp), ThisTokBegin(begin), ThisTokEnd(end) {
201  s = DigitsBegin = begin;
202  saw_exponent = false;
203  saw_period = false;
204  isLong = false;
205  isUnsigned = false;
206  isLongLong = false;
207  isFloat = false;
208  isImaginary = false;
209  hadError = false;
210
211  if (*s == '0') { // parse radix
212    s++;
213    if ((*s == 'x' || *s == 'X') && (isxdigit(s[1]) || s[1] == '.')) {
214      s++;
215      radix = 16;
216      DigitsBegin = s;
217      s = SkipHexDigits(s);
218      if (s == ThisTokEnd) {
219        // Done.
220      } else if (*s == '.') {
221        s++;
222        saw_period = true;
223        s = SkipHexDigits(s);
224      }
225      // A binary exponent can appear with or with a '.'. If dotted, the
226      // binary exponent is required.
227      if ((*s == 'p' || *s == 'P') && PP.getLangOptions().HexFloats) {
228        s++;
229        saw_exponent = true;
230        if (*s == '+' || *s == '-')  s++; // sign
231        const char *first_non_digit = SkipDigits(s);
232        if (first_non_digit != s) {
233          s = first_non_digit;
234        } else {
235          Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin),
236               diag::err_exponent_has_no_digits);
237          return;
238        }
239      } else if (saw_period) {
240        Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin),
241             diag::err_hexconstant_requires_exponent);
242        return;
243      }
244    } else if (*s == 'b' || *s == 'B') {
245      // 0b101010 is a GCC extension.
246      ++s;
247      radix = 2;
248      DigitsBegin = s;
249      s = SkipBinaryDigits(s);
250      if (s == ThisTokEnd) {
251        // Done.
252      } else if (isxdigit(*s)) {
253        Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin),
254             diag::err_invalid_binary_digit, std::string(s, s+1));
255        return;
256      }
257      PP.Diag(TokLoc, diag::ext_binary_literal);
258    } else {
259      // For now, the radix is set to 8. If we discover that we have a
260      // floating point constant, the radix will change to 10. Octal floating
261      // point constants are not permitted (only decimal and hexadecimal).
262      radix = 8;
263      DigitsBegin = s;
264      s = SkipOctalDigits(s);
265      if (s == ThisTokEnd) {
266        // Done.
267      } else if (isxdigit(*s) && !(*s == 'e' || *s == 'E')) {
268        Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin),
269             diag::err_invalid_octal_digit, std::string(s, s+1));
270        return;
271      } else if (*s == '.') {
272        s++;
273        radix = 10;
274        saw_period = true;
275        s = SkipDigits(s);
276      }
277      if (*s == 'e' || *s == 'E') { // exponent
278        s++;
279        radix = 10;
280        saw_exponent = true;
281        if (*s == '+' || *s == '-')  s++; // sign
282        const char *first_non_digit = SkipDigits(s);
283        if (first_non_digit != s) {
284          s = first_non_digit;
285        } else {
286          Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin),
287               diag::err_exponent_has_no_digits);
288          return;
289        }
290      }
291    }
292  } else { // the first digit is non-zero
293    radix = 10;
294    s = SkipDigits(s);
295    if (s == ThisTokEnd) {
296      // Done.
297    } else if (isxdigit(*s) && !(*s == 'e' || *s == 'E')) {
298      Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin),
299           diag::err_invalid_decimal_digit, std::string(s, s+1));
300      return;
301    } else if (*s == '.') {
302      s++;
303      saw_period = true;
304      s = SkipDigits(s);
305    }
306    if (*s == 'e' || *s == 'E') { // exponent
307      s++;
308      saw_exponent = true;
309      if (*s == '+' || *s == '-')  s++; // sign
310      const char *first_non_digit = SkipDigits(s);
311      if (first_non_digit != s) {
312        s = first_non_digit;
313      } else {
314        Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin),
315             diag::err_exponent_has_no_digits);
316        return;
317      }
318    }
319  }
320
321  SuffixBegin = s;
322
323  // Parse the suffix.  At this point we can classify whether we have an FP or
324  // integer constant.
325  bool isFPConstant = isFloatingLiteral();
326
327  // Loop over all of the characters of the suffix.  If we see something bad,
328  // we break out of the loop.
329  for (; s != ThisTokEnd; ++s) {
330    switch (*s) {
331    case 'f':      // FP Suffix for "float"
332    case 'F':
333      if (!isFPConstant) break;  // Error for integer constant.
334      if (isFloat || isLong) break; // FF, LF invalid.
335      isFloat = true;
336      continue;  // Success.
337    case 'u':
338    case 'U':
339      if (isFPConstant) break;  // Error for floating constant.
340      if (isUnsigned) break;    // Cannot be repeated.
341      isUnsigned = true;
342      continue;  // Success.
343    case 'l':
344    case 'L':
345      if (isLong || isLongLong) break;  // Cannot be repeated.
346      if (isFloat) break;               // LF invalid.
347
348      // Check for long long.  The L's need to be adjacent and the same case.
349      if (s+1 != ThisTokEnd && s[1] == s[0]) {
350        if (isFPConstant) break;        // long long invalid for floats.
351        isLongLong = true;
352        ++s;  // Eat both of them.
353      } else {
354        isLong = true;
355      }
356      continue;  // Success.
357    case 'i':
358      if (PP.getLangOptions().Microsoft) {
359        // Allow i8, i16, i32, i64, and i128.
360        if (++s == ThisTokEnd) break;
361        switch (*s) {
362          case '8':
363            s++; // i8 suffix
364            break;
365          case '1':
366            if (++s == ThisTokEnd) break;
367            if (*s == '6') s++; // i16 suffix
368            else if (*s == '2') {
369              if (++s == ThisTokEnd) break;
370              if (*s == '8') s++; // i128 suffix
371            }
372            break;
373          case '3':
374            if (++s == ThisTokEnd) break;
375            if (*s == '2') s++; // i32 suffix
376            break;
377          case '6':
378            if (++s == ThisTokEnd) break;
379            if (*s == '4') s++; // i64 suffix
380            break;
381          default:
382            break;
383        }
384        break;
385      }
386      // fall through.
387    case 'I':
388    case 'j':
389    case 'J':
390      if (isImaginary) break;   // Cannot be repeated.
391      PP.Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin),
392              diag::ext_imaginary_constant);
393      isImaginary = true;
394      continue;  // Success.
395    }
396    // If we reached here, there was an error.
397    break;
398  }
399
400  // Report an error if there are any.
401  if (s != ThisTokEnd) {
402    Diag(PP.AdvanceToTokenCharacter(TokLoc, s-begin),
403         isFPConstant ? diag::err_invalid_suffix_float_constant :
404                        diag::err_invalid_suffix_integer_constant,
405         std::string(SuffixBegin, ThisTokEnd));
406    return;
407  }
408}
409
410/// GetIntegerValue - Convert this numeric literal value to an APInt that
411/// matches Val's input width.  If there is an overflow, set Val to the low bits
412/// of the result and return true.  Otherwise, return false.
413bool NumericLiteralParser::GetIntegerValue(llvm::APInt &Val) {
414  Val = 0;
415  s = DigitsBegin;
416
417  llvm::APInt RadixVal(Val.getBitWidth(), radix);
418  llvm::APInt CharVal(Val.getBitWidth(), 0);
419  llvm::APInt OldVal = Val;
420
421  bool OverflowOccurred = false;
422  while (s < SuffixBegin) {
423    unsigned C = HexDigitValue(*s++);
424
425    // If this letter is out of bound for this radix, reject it.
426    assert(C < radix && "NumericLiteralParser ctor should have rejected this");
427
428    CharVal = C;
429
430    // Add the digit to the value in the appropriate radix.  If adding in digits
431    // made the value smaller, then this overflowed.
432    OldVal = Val;
433
434    // Multiply by radix, did overflow occur on the multiply?
435    Val *= RadixVal;
436    OverflowOccurred |= Val.udiv(RadixVal) != OldVal;
437
438    OldVal = Val;
439    // Add value, did overflow occur on the value?
440    Val += CharVal;
441    OverflowOccurred |= Val.ult(OldVal);
442    OverflowOccurred |= Val.ult(CharVal);
443  }
444  return OverflowOccurred;
445}
446
447llvm::APFloat NumericLiteralParser::
448GetFloatValue(const llvm::fltSemantics &Format, bool* isExact) {
449  using llvm::APFloat;
450
451  llvm::SmallVector<char,256> floatChars;
452  for (unsigned i = 0, n = ThisTokEnd-ThisTokBegin; i != n; ++i)
453    floatChars.push_back(ThisTokBegin[i]);
454
455  floatChars.push_back('\0');
456
457  APFloat V (Format, APFloat::fcZero, false);
458  APFloat::opStatus status;
459
460  status = V.convertFromString(&floatChars[0],APFloat::rmNearestTiesToEven);
461
462  if (isExact)
463    *isExact = status == APFloat::opOK;
464
465  return V;
466}
467
468void NumericLiteralParser::Diag(SourceLocation Loc, unsigned DiagID,
469          const std::string &M) {
470  PP.Diag(Loc, DiagID, M);
471  hadError = true;
472}
473
474
475CharLiteralParser::CharLiteralParser(const char *begin, const char *end,
476                                     SourceLocation Loc, Preprocessor &PP) {
477  // At this point we know that the character matches the regex "L?'.*'".
478  HadError = false;
479  Value = 0;
480
481  // Determine if this is a wide character.
482  IsWide = begin[0] == 'L';
483  if (IsWide) ++begin;
484
485  // Skip over the entry quote.
486  assert(begin[0] == '\'' && "Invalid token lexed");
487  ++begin;
488
489  // FIXME: This assumes that 'int' is 32-bits in overflow calculation, and the
490  // size of "value".
491  assert(PP.getTargetInfo().getIntWidth() == 32 &&
492         "Assumes sizeof(int) == 4 for now");
493  // FIXME: This assumes that wchar_t is 32-bits for now.
494  assert(PP.getTargetInfo().getWCharWidth() == 32 &&
495         "Assumes sizeof(wchar_t) == 4 for now");
496  // FIXME: This extensively assumes that 'char' is 8-bits.
497  assert(PP.getTargetInfo().getCharWidth() == 8 &&
498         "Assumes char is 8 bits");
499
500  bool isFirstChar = true;
501  bool isMultiChar = false;
502  while (begin[0] != '\'') {
503    unsigned ResultChar;
504    if (begin[0] != '\\')     // If this is a normal character, consume it.
505      ResultChar = *begin++;
506    else                      // Otherwise, this is an escape character.
507      ResultChar = ProcessCharEscape(begin, end, HadError, Loc, IsWide, PP);
508
509    // If this is a multi-character constant (e.g. 'abc'), handle it.  These are
510    // implementation defined (C99 6.4.4.4p10).
511    if (!isFirstChar) {
512      // If this is the second character being processed, do special handling.
513      if (!isMultiChar) {
514        isMultiChar = true;
515
516        // Warn about discarding the top bits for multi-char wide-character
517        // constants (L'abcd').
518        if (IsWide)
519          PP.Diag(Loc, diag::warn_extraneous_wide_char_constant);
520      }
521
522      if (IsWide) {
523        // Emulate GCC's (unintentional?) behavior: L'ab' -> L'b'.
524        Value = 0;
525      } else {
526        // Narrow character literals act as though their value is concatenated
527        // in this implementation.
528        if (((Value << 8) >> 8) != Value)
529          PP.Diag(Loc, diag::warn_char_constant_too_large);
530        Value <<= 8;
531      }
532    }
533
534    Value += ResultChar;
535    isFirstChar = false;
536  }
537
538  // If this is a single narrow character, sign extend it (e.g. '\xFF' is "-1")
539  // if 'char' is signed for this target (C99 6.4.4.4p10).  Note that multiple
540  // character constants are not sign extended in the this implementation:
541  // '\xFF\xFF' = 65536 and '\x0\xFF' = 255, which matches GCC.
542  if (!IsWide && !isMultiChar && (Value & 128) &&
543      PP.getTargetInfo().isCharSigned())
544    Value = (signed char)Value;
545}
546
547
548///       string-literal: [C99 6.4.5]
549///          " [s-char-sequence] "
550///         L" [s-char-sequence] "
551///       s-char-sequence:
552///         s-char
553///         s-char-sequence s-char
554///       s-char:
555///         any source character except the double quote ",
556///           backslash \, or newline character
557///         escape-character
558///         universal-character-name
559///       escape-character: [C99 6.4.4.4]
560///         \ escape-code
561///         universal-character-name
562///       escape-code:
563///         character-escape-code
564///         octal-escape-code
565///         hex-escape-code
566///       character-escape-code: one of
567///         n t b r f v a
568///         \ ' " ?
569///       octal-escape-code:
570///         octal-digit
571///         octal-digit octal-digit
572///         octal-digit octal-digit octal-digit
573///       hex-escape-code:
574///         x hex-digit
575///         hex-escape-code hex-digit
576///       universal-character-name:
577///         \u hex-quad
578///         \U hex-quad hex-quad
579///       hex-quad:
580///         hex-digit hex-digit hex-digit hex-digit
581///
582StringLiteralParser::
583StringLiteralParser(const Token *StringToks, unsigned NumStringToks,
584                    Preprocessor &pp, TargetInfo &t)
585  : PP(pp), Target(t) {
586  // Scan all of the string portions, remember the max individual token length,
587  // computing a bound on the concatenated string length, and see whether any
588  // piece is a wide-string.  If any of the string portions is a wide-string
589  // literal, the result is a wide-string literal [C99 6.4.5p4].
590  MaxTokenLength = StringToks[0].getLength();
591  SizeBound = StringToks[0].getLength()-2;  // -2 for "".
592  AnyWide = StringToks[0].is(tok::wide_string_literal);
593
594  hadError = false;
595
596  // Implement Translation Phase #6: concatenation of string literals
597  /// (C99 5.1.1.2p1).  The common case is only one string fragment.
598  for (unsigned i = 1; i != NumStringToks; ++i) {
599    // The string could be shorter than this if it needs cleaning, but this is a
600    // reasonable bound, which is all we need.
601    SizeBound += StringToks[i].getLength()-2;  // -2 for "".
602
603    // Remember maximum string piece length.
604    if (StringToks[i].getLength() > MaxTokenLength)
605      MaxTokenLength = StringToks[i].getLength();
606
607    // Remember if we see any wide strings.
608    AnyWide |= StringToks[i].is(tok::wide_string_literal);
609  }
610
611
612  // Include space for the null terminator.
613  ++SizeBound;
614
615  // TODO: K&R warning: "traditional C rejects string constant concatenation"
616
617  // Get the width in bytes of wchar_t.  If no wchar_t strings are used, do not
618  // query the target.  As such, wchar_tByteWidth is only valid if AnyWide=true.
619  wchar_tByteWidth = ~0U;
620  if (AnyWide) {
621    wchar_tByteWidth = Target.getWCharWidth();
622    assert((wchar_tByteWidth & 7) == 0 && "Assumes wchar_t is byte multiple!");
623    wchar_tByteWidth /= 8;
624  }
625
626  // The output buffer size needs to be large enough to hold wide characters.
627  // This is a worst-case assumption which basically corresponds to L"" "long".
628  if (AnyWide)
629    SizeBound *= wchar_tByteWidth;
630
631  // Size the temporary buffer to hold the result string data.
632  ResultBuf.resize(SizeBound);
633
634  // Likewise, but for each string piece.
635  llvm::SmallString<512> TokenBuf;
636  TokenBuf.resize(MaxTokenLength);
637
638  // Loop over all the strings, getting their spelling, and expanding them to
639  // wide strings as appropriate.
640  ResultPtr = &ResultBuf[0];   // Next byte to fill in.
641
642  Pascal = false;
643
644  for (unsigned i = 0, e = NumStringToks; i != e; ++i) {
645    const char *ThisTokBuf = &TokenBuf[0];
646    // Get the spelling of the token, which eliminates trigraphs, etc.  We know
647    // that ThisTokBuf points to a buffer that is big enough for the whole token
648    // and 'spelled' tokens can only shrink.
649    unsigned ThisTokLen = PP.getSpelling(StringToks[i], ThisTokBuf);
650    const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1;  // Skip end quote.
651
652    // TODO: Input character set mapping support.
653
654    // Skip L marker for wide strings.
655    bool ThisIsWide = false;
656    if (ThisTokBuf[0] == 'L') {
657      ++ThisTokBuf;
658      ThisIsWide = true;
659    }
660
661    assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
662    ++ThisTokBuf;
663
664    // Check if this is a pascal string
665    if (pp.getLangOptions().PascalStrings && ThisTokBuf + 1 != ThisTokEnd &&
666        ThisTokBuf[0] == '\\' && ThisTokBuf[1] == 'p') {
667
668      // If the \p sequence is found in the first token, we have a pascal string
669      // Otherwise, if we already have a pascal string, ignore the first \p
670      if (i == 0) {
671        ++ThisTokBuf;
672        Pascal = true;
673      } else if (Pascal)
674        ThisTokBuf += 2;
675    }
676
677    while (ThisTokBuf != ThisTokEnd) {
678      // Is this a span of non-escape characters?
679      if (ThisTokBuf[0] != '\\') {
680        const char *InStart = ThisTokBuf;
681        do {
682          ++ThisTokBuf;
683        } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');
684
685        // Copy the character span over.
686        unsigned Len = ThisTokBuf-InStart;
687        if (!AnyWide) {
688          memcpy(ResultPtr, InStart, Len);
689          ResultPtr += Len;
690        } else {
691          // Note: our internal rep of wide char tokens is always little-endian.
692          for (; Len; --Len, ++InStart) {
693            *ResultPtr++ = InStart[0];
694            // Add zeros at the end.
695            for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
696            *ResultPtr++ = 0;
697          }
698        }
699        continue;
700      }
701
702      // Otherwise, this is an escape character.  Process it.
703      unsigned ResultChar = ProcessCharEscape(ThisTokBuf, ThisTokEnd, hadError,
704                                              StringToks[i].getLocation(),
705                                              ThisIsWide, PP);
706
707      // Note: our internal rep of wide char tokens is always little-endian.
708      *ResultPtr++ = ResultChar & 0xFF;
709
710      if (AnyWide) {
711        for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
712          *ResultPtr++ = ResultChar >> i*8;
713      }
714    }
715  }
716
717  // Add zero terminator.
718  *ResultPtr = 0;
719  if (AnyWide) {
720    for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
721    *ResultPtr++ = 0;
722  }
723
724  if (Pascal)
725    ResultBuf[0] = ResultPtr-&ResultBuf[0]-1;
726}
727