1//===- LLLexer.cpp - Lexer for .ll Files ----------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Implement the Lexer for .ll files.
11//
12//===----------------------------------------------------------------------===//
13
14#include "LLLexer.h"
15#include "llvm/ADT/StringExtras.h"
16#include "llvm/ADT/Twine.h"
17#include "llvm/AsmParser/Parser.h"
18#include "llvm/IR/DerivedTypes.h"
19#include "llvm/IR/Instruction.h"
20#include "llvm/IR/LLVMContext.h"
21#include "llvm/Support/ErrorHandling.h"
22#include "llvm/Support/MathExtras.h"
23#include "llvm/Support/MemoryBuffer.h"
24#include "llvm/Support/SourceMgr.h"
25#include "llvm/Support/raw_ostream.h"
26#include <cctype>
27#include <cstdio>
28#include <cstdlib>
29#include <cstring>
30using namespace llvm;
31
32bool LLLexer::Error(LocTy ErrorLoc, const Twine &Msg) const {
33  ErrorInfo = SM.GetMessage(ErrorLoc, SourceMgr::DK_Error, Msg);
34  return true;
35}
36
37void LLLexer::Warning(LocTy WarningLoc, const Twine &Msg) const {
38  SM.PrintMessage(WarningLoc, SourceMgr::DK_Warning, Msg);
39}
40
41//===----------------------------------------------------------------------===//
42// Helper functions.
43//===----------------------------------------------------------------------===//
44
45// atoull - Convert an ascii string of decimal digits into the unsigned long
46// long representation... this does not have to do input error checking,
47// because we know that the input will be matched by a suitable regex...
48//
49uint64_t LLLexer::atoull(const char *Buffer, const char *End) {
50  uint64_t Result = 0;
51  for (; Buffer != End; Buffer++) {
52    uint64_t OldRes = Result;
53    Result *= 10;
54    Result += *Buffer-'0';
55    if (Result < OldRes) {  // Uh, oh, overflow detected!!!
56      Error("constant bigger than 64 bits detected!");
57      return 0;
58    }
59  }
60  return Result;
61}
62
63uint64_t LLLexer::HexIntToVal(const char *Buffer, const char *End) {
64  uint64_t Result = 0;
65  for (; Buffer != End; ++Buffer) {
66    uint64_t OldRes = Result;
67    Result *= 16;
68    Result += hexDigitValue(*Buffer);
69
70    if (Result < OldRes) {   // Uh, oh, overflow detected!!!
71      Error("constant bigger than 64 bits detected!");
72      return 0;
73    }
74  }
75  return Result;
76}
77
78void LLLexer::HexToIntPair(const char *Buffer, const char *End,
79                           uint64_t Pair[2]) {
80  Pair[0] = 0;
81  if (End - Buffer >= 16) {
82    for (int i = 0; i < 16; i++, Buffer++) {
83      assert(Buffer != End);
84      Pair[0] *= 16;
85      Pair[0] += hexDigitValue(*Buffer);
86    }
87  }
88  Pair[1] = 0;
89  for (int i = 0; i < 16 && Buffer != End; i++, Buffer++) {
90    Pair[1] *= 16;
91    Pair[1] += hexDigitValue(*Buffer);
92  }
93  if (Buffer != End)
94    Error("constant bigger than 128 bits detected!");
95}
96
97/// FP80HexToIntPair - translate an 80 bit FP80 number (20 hexits) into
98/// { low64, high16 } as usual for an APInt.
99void LLLexer::FP80HexToIntPair(const char *Buffer, const char *End,
100                           uint64_t Pair[2]) {
101  Pair[1] = 0;
102  for (int i=0; i<4 && Buffer != End; i++, Buffer++) {
103    assert(Buffer != End);
104    Pair[1] *= 16;
105    Pair[1] += hexDigitValue(*Buffer);
106  }
107  Pair[0] = 0;
108  for (int i = 0; i < 16 && Buffer != End; i++, Buffer++) {
109    Pair[0] *= 16;
110    Pair[0] += hexDigitValue(*Buffer);
111  }
112  if (Buffer != End)
113    Error("constant bigger than 128 bits detected!");
114}
115
116// UnEscapeLexed - Run through the specified buffer and change \xx codes to the
117// appropriate character.
118static void UnEscapeLexed(std::string &Str) {
119  if (Str.empty()) return;
120
121  char *Buffer = &Str[0], *EndBuffer = Buffer+Str.size();
122  char *BOut = Buffer;
123  for (char *BIn = Buffer; BIn != EndBuffer; ) {
124    if (BIn[0] == '\\') {
125      if (BIn < EndBuffer-1 && BIn[1] == '\\') {
126        *BOut++ = '\\'; // Two \ becomes one
127        BIn += 2;
128      } else if (BIn < EndBuffer-2 &&
129                 isxdigit(static_cast<unsigned char>(BIn[1])) &&
130                 isxdigit(static_cast<unsigned char>(BIn[2]))) {
131        *BOut = hexDigitValue(BIn[1]) * 16 + hexDigitValue(BIn[2]);
132        BIn += 3;                           // Skip over handled chars
133        ++BOut;
134      } else {
135        *BOut++ = *BIn++;
136      }
137    } else {
138      *BOut++ = *BIn++;
139    }
140  }
141  Str.resize(BOut-Buffer);
142}
143
144/// isLabelChar - Return true for [-a-zA-Z$._0-9].
145static bool isLabelChar(char C) {
146  return isalnum(static_cast<unsigned char>(C)) || C == '-' || C == '$' ||
147         C == '.' || C == '_';
148}
149
150
151/// isLabelTail - Return true if this pointer points to a valid end of a label.
152static const char *isLabelTail(const char *CurPtr) {
153  while (1) {
154    if (CurPtr[0] == ':') return CurPtr+1;
155    if (!isLabelChar(CurPtr[0])) return nullptr;
156    ++CurPtr;
157  }
158}
159
160
161
162//===----------------------------------------------------------------------===//
163// Lexer definition.
164//===----------------------------------------------------------------------===//
165
166LLLexer::LLLexer(StringRef StartBuf, SourceMgr &sm, SMDiagnostic &Err,
167                 LLVMContext &C)
168  : CurBuf(StartBuf), ErrorInfo(Err), SM(sm), Context(C), APFloatVal(0.0) {
169  CurPtr = CurBuf.begin();
170}
171
172int LLLexer::getNextChar() {
173  char CurChar = *CurPtr++;
174  switch (CurChar) {
175  default: return (unsigned char)CurChar;
176  case 0:
177    // A nul character in the stream is either the end of the current buffer or
178    // a random nul in the file.  Disambiguate that here.
179    if (CurPtr-1 != CurBuf.end())
180      return 0;  // Just whitespace.
181
182    // Otherwise, return end of file.
183    --CurPtr;  // Another call to lex will return EOF again.
184    return EOF;
185  }
186}
187
188
189lltok::Kind LLLexer::LexToken() {
190  TokStart = CurPtr;
191
192  int CurChar = getNextChar();
193  switch (CurChar) {
194  default:
195    // Handle letters: [a-zA-Z_]
196    if (isalpha(static_cast<unsigned char>(CurChar)) || CurChar == '_')
197      return LexIdentifier();
198
199    return lltok::Error;
200  case EOF: return lltok::Eof;
201  case 0:
202  case ' ':
203  case '\t':
204  case '\n':
205  case '\r':
206    // Ignore whitespace.
207    return LexToken();
208  case '+': return LexPositive();
209  case '@': return LexAt();
210  case '$': return LexDollar();
211  case '%': return LexPercent();
212  case '"': return LexQuote();
213  case '.':
214    if (const char *Ptr = isLabelTail(CurPtr)) {
215      CurPtr = Ptr;
216      StrVal.assign(TokStart, CurPtr-1);
217      return lltok::LabelStr;
218    }
219    if (CurPtr[0] == '.' && CurPtr[1] == '.') {
220      CurPtr += 2;
221      return lltok::dotdotdot;
222    }
223    return lltok::Error;
224  case ';':
225    SkipLineComment();
226    return LexToken();
227  case '!': return LexExclaim();
228  case '#': return LexHash();
229  case '0': case '1': case '2': case '3': case '4':
230  case '5': case '6': case '7': case '8': case '9':
231  case '-':
232    return LexDigitOrNegative();
233  case '=': return lltok::equal;
234  case '[': return lltok::lsquare;
235  case ']': return lltok::rsquare;
236  case '{': return lltok::lbrace;
237  case '}': return lltok::rbrace;
238  case '<': return lltok::less;
239  case '>': return lltok::greater;
240  case '(': return lltok::lparen;
241  case ')': return lltok::rparen;
242  case ',': return lltok::comma;
243  case '*': return lltok::star;
244  case '|': return lltok::bar;
245  }
246}
247
248void LLLexer::SkipLineComment() {
249  while (1) {
250    if (CurPtr[0] == '\n' || CurPtr[0] == '\r' || getNextChar() == EOF)
251      return;
252  }
253}
254
255/// Lex all tokens that start with an @ character.
256///   GlobalVar   @\"[^\"]*\"
257///   GlobalVar   @[-a-zA-Z$._][-a-zA-Z$._0-9]*
258///   GlobalVarID @[0-9]+
259lltok::Kind LLLexer::LexAt() {
260  return LexVar(lltok::GlobalVar, lltok::GlobalID);
261}
262
263lltok::Kind LLLexer::LexDollar() {
264  if (const char *Ptr = isLabelTail(TokStart)) {
265    CurPtr = Ptr;
266    StrVal.assign(TokStart, CurPtr - 1);
267    return lltok::LabelStr;
268  }
269
270  // Handle DollarStringConstant: $\"[^\"]*\"
271  if (CurPtr[0] == '"') {
272    ++CurPtr;
273
274    while (1) {
275      int CurChar = getNextChar();
276
277      if (CurChar == EOF) {
278        Error("end of file in COMDAT variable name");
279        return lltok::Error;
280      }
281      if (CurChar == '"') {
282        StrVal.assign(TokStart + 2, CurPtr - 1);
283        UnEscapeLexed(StrVal);
284        if (StringRef(StrVal).find_first_of(0) != StringRef::npos) {
285          Error("Null bytes are not allowed in names");
286          return lltok::Error;
287        }
288        return lltok::ComdatVar;
289      }
290    }
291  }
292
293  // Handle ComdatVarName: $[-a-zA-Z$._][-a-zA-Z$._0-9]*
294  if (ReadVarName())
295    return lltok::ComdatVar;
296
297  return lltok::Error;
298}
299
300/// ReadString - Read a string until the closing quote.
301lltok::Kind LLLexer::ReadString(lltok::Kind kind) {
302  const char *Start = CurPtr;
303  while (1) {
304    int CurChar = getNextChar();
305
306    if (CurChar == EOF) {
307      Error("end of file in string constant");
308      return lltok::Error;
309    }
310    if (CurChar == '"') {
311      StrVal.assign(Start, CurPtr-1);
312      UnEscapeLexed(StrVal);
313      return kind;
314    }
315  }
316}
317
318/// ReadVarName - Read the rest of a token containing a variable name.
319bool LLLexer::ReadVarName() {
320  const char *NameStart = CurPtr;
321  if (isalpha(static_cast<unsigned char>(CurPtr[0])) ||
322      CurPtr[0] == '-' || CurPtr[0] == '$' ||
323      CurPtr[0] == '.' || CurPtr[0] == '_') {
324    ++CurPtr;
325    while (isalnum(static_cast<unsigned char>(CurPtr[0])) ||
326           CurPtr[0] == '-' || CurPtr[0] == '$' ||
327           CurPtr[0] == '.' || CurPtr[0] == '_')
328      ++CurPtr;
329
330    StrVal.assign(NameStart, CurPtr);
331    return true;
332  }
333  return false;
334}
335
336lltok::Kind LLLexer::LexVar(lltok::Kind Var, lltok::Kind VarID) {
337  // Handle StringConstant: \"[^\"]*\"
338  if (CurPtr[0] == '"') {
339    ++CurPtr;
340
341    while (1) {
342      int CurChar = getNextChar();
343
344      if (CurChar == EOF) {
345        Error("end of file in global variable name");
346        return lltok::Error;
347      }
348      if (CurChar == '"') {
349        StrVal.assign(TokStart+2, CurPtr-1);
350        UnEscapeLexed(StrVal);
351        if (StringRef(StrVal).find_first_of(0) != StringRef::npos) {
352          Error("Null bytes are not allowed in names");
353          return lltok::Error;
354        }
355        return Var;
356      }
357    }
358  }
359
360  // Handle VarName: [-a-zA-Z$._][-a-zA-Z$._0-9]*
361  if (ReadVarName())
362    return Var;
363
364  // Handle VarID: [0-9]+
365  if (isdigit(static_cast<unsigned char>(CurPtr[0]))) {
366    for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
367      /*empty*/;
368
369    uint64_t Val = atoull(TokStart+1, CurPtr);
370    if ((unsigned)Val != Val)
371      Error("invalid value number (too large)!");
372    UIntVal = unsigned(Val);
373    return VarID;
374  }
375  return lltok::Error;
376}
377
378/// Lex all tokens that start with a % character.
379///   LocalVar   ::= %\"[^\"]*\"
380///   LocalVar   ::= %[-a-zA-Z$._][-a-zA-Z$._0-9]*
381///   LocalVarID ::= %[0-9]+
382lltok::Kind LLLexer::LexPercent() {
383  return LexVar(lltok::LocalVar, lltok::LocalVarID);
384}
385
386/// Lex all tokens that start with a " character.
387///   QuoteLabel        "[^"]+":
388///   StringConstant    "[^"]*"
389lltok::Kind LLLexer::LexQuote() {
390  lltok::Kind kind = ReadString(lltok::StringConstant);
391  if (kind == lltok::Error || kind == lltok::Eof)
392    return kind;
393
394  if (CurPtr[0] == ':') {
395    ++CurPtr;
396    if (StringRef(StrVal).find_first_of(0) != StringRef::npos) {
397      Error("Null bytes are not allowed in names");
398      kind = lltok::Error;
399    } else {
400      kind = lltok::LabelStr;
401    }
402  }
403
404  return kind;
405}
406
407/// Lex all tokens that start with a ! character.
408///    !foo
409///    !
410lltok::Kind LLLexer::LexExclaim() {
411  // Lex a metadata name as a MetadataVar.
412  if (isalpha(static_cast<unsigned char>(CurPtr[0])) ||
413      CurPtr[0] == '-' || CurPtr[0] == '$' ||
414      CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\') {
415    ++CurPtr;
416    while (isalnum(static_cast<unsigned char>(CurPtr[0])) ||
417           CurPtr[0] == '-' || CurPtr[0] == '$' ||
418           CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\')
419      ++CurPtr;
420
421    StrVal.assign(TokStart+1, CurPtr);   // Skip !
422    UnEscapeLexed(StrVal);
423    return lltok::MetadataVar;
424  }
425  return lltok::exclaim;
426}
427
428/// Lex all tokens that start with a # character.
429///    AttrGrpID ::= #[0-9]+
430lltok::Kind LLLexer::LexHash() {
431  // Handle AttrGrpID: #[0-9]+
432  if (isdigit(static_cast<unsigned char>(CurPtr[0]))) {
433    for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
434      /*empty*/;
435
436    uint64_t Val = atoull(TokStart+1, CurPtr);
437    if ((unsigned)Val != Val)
438      Error("invalid value number (too large)!");
439    UIntVal = unsigned(Val);
440    return lltok::AttrGrpID;
441  }
442
443  return lltok::Error;
444}
445
446/// Lex a label, integer type, keyword, or hexadecimal integer constant.
447///    Label           [-a-zA-Z$._0-9]+:
448///    IntegerType     i[0-9]+
449///    Keyword         sdiv, float, ...
450///    HexIntConstant  [us]0x[0-9A-Fa-f]+
451lltok::Kind LLLexer::LexIdentifier() {
452  const char *StartChar = CurPtr;
453  const char *IntEnd = CurPtr[-1] == 'i' ? nullptr : StartChar;
454  const char *KeywordEnd = nullptr;
455
456  for (; isLabelChar(*CurPtr); ++CurPtr) {
457    // If we decide this is an integer, remember the end of the sequence.
458    if (!IntEnd && !isdigit(static_cast<unsigned char>(*CurPtr)))
459      IntEnd = CurPtr;
460    if (!KeywordEnd && !isalnum(static_cast<unsigned char>(*CurPtr)) &&
461        *CurPtr != '_')
462      KeywordEnd = CurPtr;
463  }
464
465  // If we stopped due to a colon, this really is a label.
466  if (*CurPtr == ':') {
467    StrVal.assign(StartChar-1, CurPtr++);
468    return lltok::LabelStr;
469  }
470
471  // Otherwise, this wasn't a label.  If this was valid as an integer type,
472  // return it.
473  if (!IntEnd) IntEnd = CurPtr;
474  if (IntEnd != StartChar) {
475    CurPtr = IntEnd;
476    uint64_t NumBits = atoull(StartChar, CurPtr);
477    if (NumBits < IntegerType::MIN_INT_BITS ||
478        NumBits > IntegerType::MAX_INT_BITS) {
479      Error("bitwidth for integer type out of range!");
480      return lltok::Error;
481    }
482    TyVal = IntegerType::get(Context, NumBits);
483    return lltok::Type;
484  }
485
486  // Otherwise, this was a letter sequence.  See which keyword this is.
487  if (!KeywordEnd) KeywordEnd = CurPtr;
488  CurPtr = KeywordEnd;
489  --StartChar;
490  StringRef Keyword(StartChar, CurPtr - StartChar);
491#define KEYWORD(STR)                                                           \
492  do {                                                                         \
493    if (Keyword == #STR)                                                       \
494      return lltok::kw_##STR;                                                  \
495  } while (0)
496
497  KEYWORD(true);    KEYWORD(false);
498  KEYWORD(declare); KEYWORD(define);
499  KEYWORD(global);  KEYWORD(constant);
500
501  KEYWORD(private);
502  KEYWORD(internal);
503  KEYWORD(available_externally);
504  KEYWORD(linkonce);
505  KEYWORD(linkonce_odr);
506  KEYWORD(weak); // Use as a linkage, and a modifier for "cmpxchg".
507  KEYWORD(weak_odr);
508  KEYWORD(appending);
509  KEYWORD(dllimport);
510  KEYWORD(dllexport);
511  KEYWORD(common);
512  KEYWORD(default);
513  KEYWORD(hidden);
514  KEYWORD(protected);
515  KEYWORD(unnamed_addr);
516  KEYWORD(externally_initialized);
517  KEYWORD(extern_weak);
518  KEYWORD(external);
519  KEYWORD(thread_local);
520  KEYWORD(localdynamic);
521  KEYWORD(initialexec);
522  KEYWORD(localexec);
523  KEYWORD(zeroinitializer);
524  KEYWORD(undef);
525  KEYWORD(null);
526  KEYWORD(none);
527  KEYWORD(to);
528  KEYWORD(caller);
529  KEYWORD(within);
530  KEYWORD(from);
531  KEYWORD(tail);
532  KEYWORD(musttail);
533  KEYWORD(notail);
534  KEYWORD(target);
535  KEYWORD(triple);
536  KEYWORD(unwind);
537  KEYWORD(deplibs);             // FIXME: Remove in 4.0.
538  KEYWORD(datalayout);
539  KEYWORD(volatile);
540  KEYWORD(atomic);
541  KEYWORD(unordered);
542  KEYWORD(monotonic);
543  KEYWORD(acquire);
544  KEYWORD(release);
545  KEYWORD(acq_rel);
546  KEYWORD(seq_cst);
547  KEYWORD(singlethread);
548
549  KEYWORD(nnan);
550  KEYWORD(ninf);
551  KEYWORD(nsz);
552  KEYWORD(arcp);
553  KEYWORD(fast);
554  KEYWORD(nuw);
555  KEYWORD(nsw);
556  KEYWORD(exact);
557  KEYWORD(inbounds);
558  KEYWORD(align);
559  KEYWORD(addrspace);
560  KEYWORD(section);
561  KEYWORD(alias);
562  KEYWORD(module);
563  KEYWORD(asm);
564  KEYWORD(sideeffect);
565  KEYWORD(alignstack);
566  KEYWORD(inteldialect);
567  KEYWORD(gc);
568  KEYWORD(prefix);
569  KEYWORD(prologue);
570
571  KEYWORD(ccc);
572  KEYWORD(fastcc);
573  KEYWORD(coldcc);
574  KEYWORD(x86_stdcallcc);
575  KEYWORD(x86_fastcallcc);
576  KEYWORD(x86_thiscallcc);
577  KEYWORD(x86_vectorcallcc);
578  KEYWORD(arm_apcscc);
579  KEYWORD(arm_aapcscc);
580  KEYWORD(arm_aapcs_vfpcc);
581  KEYWORD(msp430_intrcc);
582  KEYWORD(ptx_kernel);
583  KEYWORD(ptx_device);
584  KEYWORD(spir_kernel);
585  KEYWORD(spir_func);
586  KEYWORD(intel_ocl_bicc);
587  KEYWORD(x86_64_sysvcc);
588  KEYWORD(x86_64_win64cc);
589  KEYWORD(webkit_jscc);
590  KEYWORD(anyregcc);
591  KEYWORD(preserve_mostcc);
592  KEYWORD(preserve_allcc);
593  KEYWORD(ghccc);
594  KEYWORD(x86_intrcc);
595  KEYWORD(hhvmcc);
596  KEYWORD(hhvm_ccc);
597  KEYWORD(cxx_fast_tlscc);
598
599  KEYWORD(cc);
600  KEYWORD(c);
601
602  KEYWORD(attributes);
603
604  KEYWORD(alwaysinline);
605  KEYWORD(argmemonly);
606  KEYWORD(builtin);
607  KEYWORD(byval);
608  KEYWORD(inalloca);
609  KEYWORD(cold);
610  KEYWORD(convergent);
611  KEYWORD(dereferenceable);
612  KEYWORD(dereferenceable_or_null);
613  KEYWORD(inaccessiblememonly);
614  KEYWORD(inaccessiblemem_or_argmemonly);
615  KEYWORD(inlinehint);
616  KEYWORD(inreg);
617  KEYWORD(jumptable);
618  KEYWORD(minsize);
619  KEYWORD(naked);
620  KEYWORD(nest);
621  KEYWORD(noalias);
622  KEYWORD(nobuiltin);
623  KEYWORD(nocapture);
624  KEYWORD(noduplicate);
625  KEYWORD(noimplicitfloat);
626  KEYWORD(noinline);
627  KEYWORD(norecurse);
628  KEYWORD(nonlazybind);
629  KEYWORD(nonnull);
630  KEYWORD(noredzone);
631  KEYWORD(noreturn);
632  KEYWORD(nounwind);
633  KEYWORD(optnone);
634  KEYWORD(optsize);
635  KEYWORD(readnone);
636  KEYWORD(readonly);
637  KEYWORD(returned);
638  KEYWORD(returns_twice);
639  KEYWORD(signext);
640  KEYWORD(sret);
641  KEYWORD(ssp);
642  KEYWORD(sspreq);
643  KEYWORD(sspstrong);
644  KEYWORD(safestack);
645  KEYWORD(sanitize_address);
646  KEYWORD(sanitize_thread);
647  KEYWORD(sanitize_memory);
648  KEYWORD(uwtable);
649  KEYWORD(zeroext);
650
651  KEYWORD(type);
652  KEYWORD(opaque);
653
654  KEYWORD(comdat);
655
656  // Comdat types
657  KEYWORD(any);
658  KEYWORD(exactmatch);
659  KEYWORD(largest);
660  KEYWORD(noduplicates);
661  KEYWORD(samesize);
662
663  KEYWORD(eq); KEYWORD(ne); KEYWORD(slt); KEYWORD(sgt); KEYWORD(sle);
664  KEYWORD(sge); KEYWORD(ult); KEYWORD(ugt); KEYWORD(ule); KEYWORD(uge);
665  KEYWORD(oeq); KEYWORD(one); KEYWORD(olt); KEYWORD(ogt); KEYWORD(ole);
666  KEYWORD(oge); KEYWORD(ord); KEYWORD(uno); KEYWORD(ueq); KEYWORD(une);
667
668  KEYWORD(xchg); KEYWORD(nand); KEYWORD(max); KEYWORD(min); KEYWORD(umax);
669  KEYWORD(umin);
670
671  KEYWORD(x);
672  KEYWORD(blockaddress);
673
674  // Metadata types.
675  KEYWORD(distinct);
676
677  // Use-list order directives.
678  KEYWORD(uselistorder);
679  KEYWORD(uselistorder_bb);
680
681  KEYWORD(personality);
682  KEYWORD(cleanup);
683  KEYWORD(catch);
684  KEYWORD(filter);
685#undef KEYWORD
686
687  // Keywords for types.
688#define TYPEKEYWORD(STR, LLVMTY)                                               \
689  do {                                                                         \
690    if (Keyword == STR) {                                                      \
691      TyVal = LLVMTY;                                                          \
692      return lltok::Type;                                                      \
693    }                                                                          \
694  } while (false)
695  TYPEKEYWORD("void",      Type::getVoidTy(Context));
696  TYPEKEYWORD("half",      Type::getHalfTy(Context));
697  TYPEKEYWORD("float",     Type::getFloatTy(Context));
698  TYPEKEYWORD("double",    Type::getDoubleTy(Context));
699  TYPEKEYWORD("x86_fp80",  Type::getX86_FP80Ty(Context));
700  TYPEKEYWORD("fp128",     Type::getFP128Ty(Context));
701  TYPEKEYWORD("ppc_fp128", Type::getPPC_FP128Ty(Context));
702  TYPEKEYWORD("label",     Type::getLabelTy(Context));
703  TYPEKEYWORD("metadata",  Type::getMetadataTy(Context));
704  TYPEKEYWORD("x86_mmx",   Type::getX86_MMXTy(Context));
705  TYPEKEYWORD("token",     Type::getTokenTy(Context));
706#undef TYPEKEYWORD
707
708  // Keywords for instructions.
709#define INSTKEYWORD(STR, Enum)                                                 \
710  do {                                                                         \
711    if (Keyword == #STR) {                                                     \
712      UIntVal = Instruction::Enum;                                             \
713      return lltok::kw_##STR;                                                  \
714    }                                                                          \
715  } while (false)
716
717  INSTKEYWORD(add,   Add);  INSTKEYWORD(fadd,   FAdd);
718  INSTKEYWORD(sub,   Sub);  INSTKEYWORD(fsub,   FSub);
719  INSTKEYWORD(mul,   Mul);  INSTKEYWORD(fmul,   FMul);
720  INSTKEYWORD(udiv,  UDiv); INSTKEYWORD(sdiv,  SDiv); INSTKEYWORD(fdiv,  FDiv);
721  INSTKEYWORD(urem,  URem); INSTKEYWORD(srem,  SRem); INSTKEYWORD(frem,  FRem);
722  INSTKEYWORD(shl,   Shl);  INSTKEYWORD(lshr,  LShr); INSTKEYWORD(ashr,  AShr);
723  INSTKEYWORD(and,   And);  INSTKEYWORD(or,    Or);   INSTKEYWORD(xor,   Xor);
724  INSTKEYWORD(icmp,  ICmp); INSTKEYWORD(fcmp,  FCmp);
725
726  INSTKEYWORD(phi,         PHI);
727  INSTKEYWORD(call,        Call);
728  INSTKEYWORD(trunc,       Trunc);
729  INSTKEYWORD(zext,        ZExt);
730  INSTKEYWORD(sext,        SExt);
731  INSTKEYWORD(fptrunc,     FPTrunc);
732  INSTKEYWORD(fpext,       FPExt);
733  INSTKEYWORD(uitofp,      UIToFP);
734  INSTKEYWORD(sitofp,      SIToFP);
735  INSTKEYWORD(fptoui,      FPToUI);
736  INSTKEYWORD(fptosi,      FPToSI);
737  INSTKEYWORD(inttoptr,    IntToPtr);
738  INSTKEYWORD(ptrtoint,    PtrToInt);
739  INSTKEYWORD(bitcast,     BitCast);
740  INSTKEYWORD(addrspacecast, AddrSpaceCast);
741  INSTKEYWORD(select,      Select);
742  INSTKEYWORD(va_arg,      VAArg);
743  INSTKEYWORD(ret,         Ret);
744  INSTKEYWORD(br,          Br);
745  INSTKEYWORD(switch,      Switch);
746  INSTKEYWORD(indirectbr,  IndirectBr);
747  INSTKEYWORD(invoke,      Invoke);
748  INSTKEYWORD(resume,      Resume);
749  INSTKEYWORD(unreachable, Unreachable);
750
751  INSTKEYWORD(alloca,      Alloca);
752  INSTKEYWORD(load,        Load);
753  INSTKEYWORD(store,       Store);
754  INSTKEYWORD(cmpxchg,     AtomicCmpXchg);
755  INSTKEYWORD(atomicrmw,   AtomicRMW);
756  INSTKEYWORD(fence,       Fence);
757  INSTKEYWORD(getelementptr, GetElementPtr);
758
759  INSTKEYWORD(extractelement, ExtractElement);
760  INSTKEYWORD(insertelement,  InsertElement);
761  INSTKEYWORD(shufflevector,  ShuffleVector);
762  INSTKEYWORD(extractvalue,   ExtractValue);
763  INSTKEYWORD(insertvalue,    InsertValue);
764  INSTKEYWORD(landingpad,     LandingPad);
765  INSTKEYWORD(cleanupret,     CleanupRet);
766  INSTKEYWORD(catchret,       CatchRet);
767  INSTKEYWORD(catchswitch,  CatchSwitch);
768  INSTKEYWORD(catchpad,     CatchPad);
769  INSTKEYWORD(cleanuppad,   CleanupPad);
770#undef INSTKEYWORD
771
772#define DWKEYWORD(TYPE, TOKEN)                                                 \
773  do {                                                                         \
774    if (Keyword.startswith("DW_" #TYPE "_")) {                                 \
775      StrVal.assign(Keyword.begin(), Keyword.end());                           \
776      return lltok::TOKEN;                                                     \
777    }                                                                          \
778  } while (false)
779  DWKEYWORD(TAG, DwarfTag);
780  DWKEYWORD(ATE, DwarfAttEncoding);
781  DWKEYWORD(VIRTUALITY, DwarfVirtuality);
782  DWKEYWORD(LANG, DwarfLang);
783  DWKEYWORD(OP, DwarfOp);
784  DWKEYWORD(MACINFO, DwarfMacinfo);
785#undef DWKEYWORD
786
787  if (Keyword.startswith("DIFlag")) {
788    StrVal.assign(Keyword.begin(), Keyword.end());
789    return lltok::DIFlag;
790  }
791
792  // Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by
793  // the CFE to avoid forcing it to deal with 64-bit numbers.
794  if ((TokStart[0] == 'u' || TokStart[0] == 's') &&
795      TokStart[1] == '0' && TokStart[2] == 'x' &&
796      isxdigit(static_cast<unsigned char>(TokStart[3]))) {
797    int len = CurPtr-TokStart-3;
798    uint32_t bits = len * 4;
799    StringRef HexStr(TokStart + 3, len);
800    if (!std::all_of(HexStr.begin(), HexStr.end(), isxdigit)) {
801      // Bad token, return it as an error.
802      CurPtr = TokStart+3;
803      return lltok::Error;
804    }
805    APInt Tmp(bits, HexStr, 16);
806    uint32_t activeBits = Tmp.getActiveBits();
807    if (activeBits > 0 && activeBits < bits)
808      Tmp = Tmp.trunc(activeBits);
809    APSIntVal = APSInt(Tmp, TokStart[0] == 'u');
810    return lltok::APSInt;
811  }
812
813  // If this is "cc1234", return this as just "cc".
814  if (TokStart[0] == 'c' && TokStart[1] == 'c') {
815    CurPtr = TokStart+2;
816    return lltok::kw_cc;
817  }
818
819  // Finally, if this isn't known, return an error.
820  CurPtr = TokStart+1;
821  return lltok::Error;
822}
823
824/// Lex all tokens that start with a 0x prefix, knowing they match and are not
825/// labels.
826///    HexFPConstant     0x[0-9A-Fa-f]+
827///    HexFP80Constant   0xK[0-9A-Fa-f]+
828///    HexFP128Constant  0xL[0-9A-Fa-f]+
829///    HexPPC128Constant 0xM[0-9A-Fa-f]+
830///    HexHalfConstant   0xH[0-9A-Fa-f]+
831lltok::Kind LLLexer::Lex0x() {
832  CurPtr = TokStart + 2;
833
834  char Kind;
835  if ((CurPtr[0] >= 'K' && CurPtr[0] <= 'M') || CurPtr[0] == 'H') {
836    Kind = *CurPtr++;
837  } else {
838    Kind = 'J';
839  }
840
841  if (!isxdigit(static_cast<unsigned char>(CurPtr[0]))) {
842    // Bad token, return it as an error.
843    CurPtr = TokStart+1;
844    return lltok::Error;
845  }
846
847  while (isxdigit(static_cast<unsigned char>(CurPtr[0])))
848    ++CurPtr;
849
850  if (Kind == 'J') {
851    // HexFPConstant - Floating point constant represented in IEEE format as a
852    // hexadecimal number for when exponential notation is not precise enough.
853    // Half, Float, and double only.
854    APFloatVal = APFloat(BitsToDouble(HexIntToVal(TokStart+2, CurPtr)));
855    return lltok::APFloat;
856  }
857
858  uint64_t Pair[2];
859  switch (Kind) {
860  default: llvm_unreachable("Unknown kind!");
861  case 'K':
862    // F80HexFPConstant - x87 long double in hexadecimal format (10 bytes)
863    FP80HexToIntPair(TokStart+3, CurPtr, Pair);
864    APFloatVal = APFloat(APFloat::x87DoubleExtended, APInt(80, Pair));
865    return lltok::APFloat;
866  case 'L':
867    // F128HexFPConstant - IEEE 128-bit in hexadecimal format (16 bytes)
868    HexToIntPair(TokStart+3, CurPtr, Pair);
869    APFloatVal = APFloat(APFloat::IEEEquad, APInt(128, Pair));
870    return lltok::APFloat;
871  case 'M':
872    // PPC128HexFPConstant - PowerPC 128-bit in hexadecimal format (16 bytes)
873    HexToIntPair(TokStart+3, CurPtr, Pair);
874    APFloatVal = APFloat(APFloat::PPCDoubleDouble, APInt(128, Pair));
875    return lltok::APFloat;
876  case 'H':
877    APFloatVal = APFloat(APFloat::IEEEhalf,
878                         APInt(16,HexIntToVal(TokStart+3, CurPtr)));
879    return lltok::APFloat;
880  }
881}
882
883/// Lex tokens for a label or a numeric constant, possibly starting with -.
884///    Label             [-a-zA-Z$._0-9]+:
885///    NInteger          -[0-9]+
886///    FPConstant        [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
887///    PInteger          [0-9]+
888///    HexFPConstant     0x[0-9A-Fa-f]+
889///    HexFP80Constant   0xK[0-9A-Fa-f]+
890///    HexFP128Constant  0xL[0-9A-Fa-f]+
891///    HexPPC128Constant 0xM[0-9A-Fa-f]+
892lltok::Kind LLLexer::LexDigitOrNegative() {
893  // If the letter after the negative is not a number, this is probably a label.
894  if (!isdigit(static_cast<unsigned char>(TokStart[0])) &&
895      !isdigit(static_cast<unsigned char>(CurPtr[0]))) {
896    // Okay, this is not a number after the -, it's probably a label.
897    if (const char *End = isLabelTail(CurPtr)) {
898      StrVal.assign(TokStart, End-1);
899      CurPtr = End;
900      return lltok::LabelStr;
901    }
902
903    return lltok::Error;
904  }
905
906  // At this point, it is either a label, int or fp constant.
907
908  // Skip digits, we have at least one.
909  for (; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
910    /*empty*/;
911
912  // Check to see if this really is a label afterall, e.g. "-1:".
913  if (isLabelChar(CurPtr[0]) || CurPtr[0] == ':') {
914    if (const char *End = isLabelTail(CurPtr)) {
915      StrVal.assign(TokStart, End-1);
916      CurPtr = End;
917      return lltok::LabelStr;
918    }
919  }
920
921  // If the next character is a '.', then it is a fp value, otherwise its
922  // integer.
923  if (CurPtr[0] != '.') {
924    if (TokStart[0] == '0' && TokStart[1] == 'x')
925      return Lex0x();
926    APSIntVal = APSInt(StringRef(TokStart, CurPtr - TokStart));
927    return lltok::APSInt;
928  }
929
930  ++CurPtr;
931
932  // Skip over [0-9]*([eE][-+]?[0-9]+)?
933  while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
934
935  if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
936    if (isdigit(static_cast<unsigned char>(CurPtr[1])) ||
937        ((CurPtr[1] == '-' || CurPtr[1] == '+') &&
938          isdigit(static_cast<unsigned char>(CurPtr[2])))) {
939      CurPtr += 2;
940      while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
941    }
942  }
943
944  APFloatVal = APFloat(std::atof(TokStart));
945  return lltok::APFloat;
946}
947
948/// Lex a floating point constant starting with +.
949///    FPConstant  [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
950lltok::Kind LLLexer::LexPositive() {
951  // If the letter after the negative is a number, this is probably not a
952  // label.
953  if (!isdigit(static_cast<unsigned char>(CurPtr[0])))
954    return lltok::Error;
955
956  // Skip digits.
957  for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
958    /*empty*/;
959
960  // At this point, we need a '.'.
961  if (CurPtr[0] != '.') {
962    CurPtr = TokStart+1;
963    return lltok::Error;
964  }
965
966  ++CurPtr;
967
968  // Skip over [0-9]*([eE][-+]?[0-9]+)?
969  while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
970
971  if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
972    if (isdigit(static_cast<unsigned char>(CurPtr[1])) ||
973        ((CurPtr[1] == '-' || CurPtr[1] == '+') &&
974        isdigit(static_cast<unsigned char>(CurPtr[2])))) {
975      CurPtr += 2;
976      while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
977    }
978  }
979
980  APFloatVal = APFloat(std::atof(TokStart));
981  return lltok::APFloat;
982}
983