1//===- LLLexer.cpp - Lexer for .ll Files ----------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Implement the Lexer for .ll files.
11//
12//===----------------------------------------------------------------------===//
13
14#include "LLLexer.h"
15#include "llvm/ADT/StringExtras.h"
16#include "llvm/ADT/Twine.h"
17#include "llvm/AsmParser/Parser.h"
18#include "llvm/IR/DerivedTypes.h"
19#include "llvm/IR/Instruction.h"
20#include "llvm/IR/LLVMContext.h"
21#include "llvm/Support/ErrorHandling.h"
22#include "llvm/Support/MathExtras.h"
23#include "llvm/Support/MemoryBuffer.h"
24#include "llvm/Support/SourceMgr.h"
25#include "llvm/Support/raw_ostream.h"
26#include <cctype>
27#include <cstdio>
28#include <cstdlib>
29#include <cstring>
30using namespace llvm;
31
32bool LLLexer::Error(LocTy ErrorLoc, const Twine &Msg) const {
33  ErrorInfo = SM.GetMessage(ErrorLoc, SourceMgr::DK_Error, Msg);
34  return true;
35}
36
37void LLLexer::Warning(LocTy WarningLoc, const Twine &Msg) const {
38  SM.PrintMessage(WarningLoc, SourceMgr::DK_Warning, Msg);
39}
40
41//===----------------------------------------------------------------------===//
42// Helper functions.
43//===----------------------------------------------------------------------===//
44
45// atoull - Convert an ascii string of decimal digits into the unsigned long
46// long representation... this does not have to do input error checking,
47// because we know that the input will be matched by a suitable regex...
48//
49uint64_t LLLexer::atoull(const char *Buffer, const char *End) {
50  uint64_t Result = 0;
51  for (; Buffer != End; Buffer++) {
52    uint64_t OldRes = Result;
53    Result *= 10;
54    Result += *Buffer-'0';
55    if (Result < OldRes) {  // Uh, oh, overflow detected!!!
56      Error("constant bigger than 64 bits detected!");
57      return 0;
58    }
59  }
60  return Result;
61}
62
63uint64_t LLLexer::HexIntToVal(const char *Buffer, const char *End) {
64  uint64_t Result = 0;
65  for (; Buffer != End; ++Buffer) {
66    uint64_t OldRes = Result;
67    Result *= 16;
68    Result += hexDigitValue(*Buffer);
69
70    if (Result < OldRes) {   // Uh, oh, overflow detected!!!
71      Error("constant bigger than 64 bits detected!");
72      return 0;
73    }
74  }
75  return Result;
76}
77
78void LLLexer::HexToIntPair(const char *Buffer, const char *End,
79                           uint64_t Pair[2]) {
80  Pair[0] = 0;
81  for (int i=0; i<16; i++, Buffer++) {
82    assert(Buffer != End);
83    Pair[0] *= 16;
84    Pair[0] += hexDigitValue(*Buffer);
85  }
86  Pair[1] = 0;
87  for (int i=0; i<16 && Buffer != End; i++, Buffer++) {
88    Pair[1] *= 16;
89    Pair[1] += hexDigitValue(*Buffer);
90  }
91  if (Buffer != End)
92    Error("constant bigger than 128 bits detected!");
93}
94
95/// FP80HexToIntPair - translate an 80 bit FP80 number (20 hexits) into
96/// { low64, high16 } as usual for an APInt.
97void LLLexer::FP80HexToIntPair(const char *Buffer, const char *End,
98                           uint64_t Pair[2]) {
99  Pair[1] = 0;
100  for (int i=0; i<4 && Buffer != End; i++, Buffer++) {
101    assert(Buffer != End);
102    Pair[1] *= 16;
103    Pair[1] += hexDigitValue(*Buffer);
104  }
105  Pair[0] = 0;
106  for (int i=0; i<16; i++, Buffer++) {
107    Pair[0] *= 16;
108    Pair[0] += hexDigitValue(*Buffer);
109  }
110  if (Buffer != End)
111    Error("constant bigger than 128 bits detected!");
112}
113
114// UnEscapeLexed - Run through the specified buffer and change \xx codes to the
115// appropriate character.
116static void UnEscapeLexed(std::string &Str) {
117  if (Str.empty()) return;
118
119  char *Buffer = &Str[0], *EndBuffer = Buffer+Str.size();
120  char *BOut = Buffer;
121  for (char *BIn = Buffer; BIn != EndBuffer; ) {
122    if (BIn[0] == '\\') {
123      if (BIn < EndBuffer-1 && BIn[1] == '\\') {
124        *BOut++ = '\\'; // Two \ becomes one
125        BIn += 2;
126      } else if (BIn < EndBuffer-2 &&
127                 isxdigit(static_cast<unsigned char>(BIn[1])) &&
128                 isxdigit(static_cast<unsigned char>(BIn[2]))) {
129        *BOut = hexDigitValue(BIn[1]) * 16 + hexDigitValue(BIn[2]);
130        BIn += 3;                           // Skip over handled chars
131        ++BOut;
132      } else {
133        *BOut++ = *BIn++;
134      }
135    } else {
136      *BOut++ = *BIn++;
137    }
138  }
139  Str.resize(BOut-Buffer);
140}
141
142/// isLabelChar - Return true for [-a-zA-Z$._0-9].
143static bool isLabelChar(char C) {
144  return isalnum(static_cast<unsigned char>(C)) || C == '-' || C == '$' ||
145         C == '.' || C == '_';
146}
147
148
149/// isLabelTail - Return true if this pointer points to a valid end of a label.
150static const char *isLabelTail(const char *CurPtr) {
151  while (1) {
152    if (CurPtr[0] == ':') return CurPtr+1;
153    if (!isLabelChar(CurPtr[0])) return nullptr;
154    ++CurPtr;
155  }
156}
157
158
159
160//===----------------------------------------------------------------------===//
161// Lexer definition.
162//===----------------------------------------------------------------------===//
163
164LLLexer::LLLexer(MemoryBuffer *StartBuf, SourceMgr &sm, SMDiagnostic &Err,
165                 LLVMContext &C)
166  : CurBuf(StartBuf), ErrorInfo(Err), SM(sm), Context(C), APFloatVal(0.0) {
167  CurPtr = CurBuf->getBufferStart();
168}
169
170std::string LLLexer::getFilename() const {
171  return CurBuf->getBufferIdentifier();
172}
173
174int LLLexer::getNextChar() {
175  char CurChar = *CurPtr++;
176  switch (CurChar) {
177  default: return (unsigned char)CurChar;
178  case 0:
179    // A nul character in the stream is either the end of the current buffer or
180    // a random nul in the file.  Disambiguate that here.
181    if (CurPtr-1 != CurBuf->getBufferEnd())
182      return 0;  // Just whitespace.
183
184    // Otherwise, return end of file.
185    --CurPtr;  // Another call to lex will return EOF again.
186    return EOF;
187  }
188}
189
190
191lltok::Kind LLLexer::LexToken() {
192  TokStart = CurPtr;
193
194  int CurChar = getNextChar();
195  switch (CurChar) {
196  default:
197    // Handle letters: [a-zA-Z_]
198    if (isalpha(static_cast<unsigned char>(CurChar)) || CurChar == '_')
199      return LexIdentifier();
200
201    return lltok::Error;
202  case EOF: return lltok::Eof;
203  case 0:
204  case ' ':
205  case '\t':
206  case '\n':
207  case '\r':
208    // Ignore whitespace.
209    return LexToken();
210  case '+': return LexPositive();
211  case '@': return LexAt();
212  case '$': return LexDollar();
213  case '%': return LexPercent();
214  case '"': return LexQuote();
215  case '.':
216    if (const char *Ptr = isLabelTail(CurPtr)) {
217      CurPtr = Ptr;
218      StrVal.assign(TokStart, CurPtr-1);
219      return lltok::LabelStr;
220    }
221    if (CurPtr[0] == '.' && CurPtr[1] == '.') {
222      CurPtr += 2;
223      return lltok::dotdotdot;
224    }
225    return lltok::Error;
226  case ';':
227    SkipLineComment();
228    return LexToken();
229  case '!': return LexExclaim();
230  case '#': return LexHash();
231  case '0': case '1': case '2': case '3': case '4':
232  case '5': case '6': case '7': case '8': case '9':
233  case '-':
234    return LexDigitOrNegative();
235  case '=': return lltok::equal;
236  case '[': return lltok::lsquare;
237  case ']': return lltok::rsquare;
238  case '{': return lltok::lbrace;
239  case '}': return lltok::rbrace;
240  case '<': return lltok::less;
241  case '>': return lltok::greater;
242  case '(': return lltok::lparen;
243  case ')': return lltok::rparen;
244  case ',': return lltok::comma;
245  case '*': return lltok::star;
246  case '\\': return lltok::backslash;
247  }
248}
249
250void LLLexer::SkipLineComment() {
251  while (1) {
252    if (CurPtr[0] == '\n' || CurPtr[0] == '\r' || getNextChar() == EOF)
253      return;
254  }
255}
256
257/// LexAt - Lex all tokens that start with an @ character:
258///   GlobalVar   @\"[^\"]*\"
259///   GlobalVar   @[-a-zA-Z$._][-a-zA-Z$._0-9]*
260///   GlobalVarID @[0-9]+
261lltok::Kind LLLexer::LexAt() {
262  // Handle AtStringConstant: @\"[^\"]*\"
263  if (CurPtr[0] == '"') {
264    ++CurPtr;
265
266    while (1) {
267      int CurChar = getNextChar();
268
269      if (CurChar == EOF) {
270        Error("end of file in global variable name");
271        return lltok::Error;
272      }
273      if (CurChar == '"') {
274        StrVal.assign(TokStart+2, CurPtr-1);
275        UnEscapeLexed(StrVal);
276        if (StringRef(StrVal).find_first_of(0) != StringRef::npos) {
277          Error("Null bytes are not allowed in names");
278          return lltok::Error;
279        }
280        return lltok::GlobalVar;
281      }
282    }
283  }
284
285  // Handle GlobalVarName: @[-a-zA-Z$._][-a-zA-Z$._0-9]*
286  if (ReadVarName())
287    return lltok::GlobalVar;
288
289  // Handle GlobalVarID: @[0-9]+
290  if (isdigit(static_cast<unsigned char>(CurPtr[0]))) {
291    for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
292      /*empty*/;
293
294    uint64_t Val = atoull(TokStart+1, CurPtr);
295    if ((unsigned)Val != Val)
296      Error("invalid value number (too large)!");
297    UIntVal = unsigned(Val);
298    return lltok::GlobalID;
299  }
300
301  return lltok::Error;
302}
303
304lltok::Kind LLLexer::LexDollar() {
305  if (const char *Ptr = isLabelTail(TokStart)) {
306    CurPtr = Ptr;
307    StrVal.assign(TokStart, CurPtr - 1);
308    return lltok::LabelStr;
309  }
310
311  // Handle DollarStringConstant: $\"[^\"]*\"
312  if (CurPtr[0] == '"') {
313    ++CurPtr;
314
315    while (1) {
316      int CurChar = getNextChar();
317
318      if (CurChar == EOF) {
319        Error("end of file in COMDAT variable name");
320        return lltok::Error;
321      }
322      if (CurChar == '"') {
323        StrVal.assign(TokStart + 2, CurPtr - 1);
324        UnEscapeLexed(StrVal);
325        if (StringRef(StrVal).find_first_of(0) != StringRef::npos) {
326          Error("Null bytes are not allowed in names");
327          return lltok::Error;
328        }
329        return lltok::ComdatVar;
330      }
331    }
332  }
333
334  // Handle ComdatVarName: $[-a-zA-Z$._][-a-zA-Z$._0-9]*
335  if (ReadVarName())
336    return lltok::ComdatVar;
337
338  return lltok::Error;
339}
340
341/// ReadString - Read a string until the closing quote.
342lltok::Kind LLLexer::ReadString(lltok::Kind kind) {
343  const char *Start = CurPtr;
344  while (1) {
345    int CurChar = getNextChar();
346
347    if (CurChar == EOF) {
348      Error("end of file in string constant");
349      return lltok::Error;
350    }
351    if (CurChar == '"') {
352      StrVal.assign(Start, CurPtr-1);
353      UnEscapeLexed(StrVal);
354      return kind;
355    }
356  }
357}
358
359/// ReadVarName - Read the rest of a token containing a variable name.
360bool LLLexer::ReadVarName() {
361  const char *NameStart = CurPtr;
362  if (isalpha(static_cast<unsigned char>(CurPtr[0])) ||
363      CurPtr[0] == '-' || CurPtr[0] == '$' ||
364      CurPtr[0] == '.' || CurPtr[0] == '_') {
365    ++CurPtr;
366    while (isalnum(static_cast<unsigned char>(CurPtr[0])) ||
367           CurPtr[0] == '-' || CurPtr[0] == '$' ||
368           CurPtr[0] == '.' || CurPtr[0] == '_')
369      ++CurPtr;
370
371    StrVal.assign(NameStart, CurPtr);
372    return true;
373  }
374  return false;
375}
376
377/// LexPercent - Lex all tokens that start with a % character:
378///   LocalVar   ::= %\"[^\"]*\"
379///   LocalVar   ::= %[-a-zA-Z$._][-a-zA-Z$._0-9]*
380///   LocalVarID ::= %[0-9]+
381lltok::Kind LLLexer::LexPercent() {
382  // Handle LocalVarName: %\"[^\"]*\"
383  if (CurPtr[0] == '"') {
384    ++CurPtr;
385    return ReadString(lltok::LocalVar);
386  }
387
388  // Handle LocalVarName: %[-a-zA-Z$._][-a-zA-Z$._0-9]*
389  if (ReadVarName())
390    return lltok::LocalVar;
391
392  // Handle LocalVarID: %[0-9]+
393  if (isdigit(static_cast<unsigned char>(CurPtr[0]))) {
394    for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
395      /*empty*/;
396
397    uint64_t Val = atoull(TokStart+1, CurPtr);
398    if ((unsigned)Val != Val)
399      Error("invalid value number (too large)!");
400    UIntVal = unsigned(Val);
401    return lltok::LocalVarID;
402  }
403
404  return lltok::Error;
405}
406
407/// LexQuote - Lex all tokens that start with a " character:
408///   QuoteLabel        "[^"]+":
409///   StringConstant    "[^"]*"
410lltok::Kind LLLexer::LexQuote() {
411  lltok::Kind kind = ReadString(lltok::StringConstant);
412  if (kind == lltok::Error || kind == lltok::Eof)
413    return kind;
414
415  if (CurPtr[0] == ':') {
416    ++CurPtr;
417    kind = lltok::LabelStr;
418  }
419
420  return kind;
421}
422
423/// LexExclaim:
424///    !foo
425///    !
426lltok::Kind LLLexer::LexExclaim() {
427  // Lex a metadata name as a MetadataVar.
428  if (isalpha(static_cast<unsigned char>(CurPtr[0])) ||
429      CurPtr[0] == '-' || CurPtr[0] == '$' ||
430      CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\') {
431    ++CurPtr;
432    while (isalnum(static_cast<unsigned char>(CurPtr[0])) ||
433           CurPtr[0] == '-' || CurPtr[0] == '$' ||
434           CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\')
435      ++CurPtr;
436
437    StrVal.assign(TokStart+1, CurPtr);   // Skip !
438    UnEscapeLexed(StrVal);
439    return lltok::MetadataVar;
440  }
441  return lltok::exclaim;
442}
443
444/// LexHash - Lex all tokens that start with a # character:
445///    AttrGrpID ::= #[0-9]+
446lltok::Kind LLLexer::LexHash() {
447  // Handle AttrGrpID: #[0-9]+
448  if (isdigit(static_cast<unsigned char>(CurPtr[0]))) {
449    for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
450      /*empty*/;
451
452    uint64_t Val = atoull(TokStart+1, CurPtr);
453    if ((unsigned)Val != Val)
454      Error("invalid value number (too large)!");
455    UIntVal = unsigned(Val);
456    return lltok::AttrGrpID;
457  }
458
459  return lltok::Error;
460}
461
462/// LexIdentifier: Handle several related productions:
463///    Label           [-a-zA-Z$._0-9]+:
464///    IntegerType     i[0-9]+
465///    Keyword         sdiv, float, ...
466///    HexIntConstant  [us]0x[0-9A-Fa-f]+
467lltok::Kind LLLexer::LexIdentifier() {
468  const char *StartChar = CurPtr;
469  const char *IntEnd = CurPtr[-1] == 'i' ? nullptr : StartChar;
470  const char *KeywordEnd = nullptr;
471
472  for (; isLabelChar(*CurPtr); ++CurPtr) {
473    // If we decide this is an integer, remember the end of the sequence.
474    if (!IntEnd && !isdigit(static_cast<unsigned char>(*CurPtr)))
475      IntEnd = CurPtr;
476    if (!KeywordEnd && !isalnum(static_cast<unsigned char>(*CurPtr)) &&
477        *CurPtr != '_')
478      KeywordEnd = CurPtr;
479  }
480
481  // If we stopped due to a colon, this really is a label.
482  if (*CurPtr == ':') {
483    StrVal.assign(StartChar-1, CurPtr++);
484    return lltok::LabelStr;
485  }
486
487  // Otherwise, this wasn't a label.  If this was valid as an integer type,
488  // return it.
489  if (!IntEnd) IntEnd = CurPtr;
490  if (IntEnd != StartChar) {
491    CurPtr = IntEnd;
492    uint64_t NumBits = atoull(StartChar, CurPtr);
493    if (NumBits < IntegerType::MIN_INT_BITS ||
494        NumBits > IntegerType::MAX_INT_BITS) {
495      Error("bitwidth for integer type out of range!");
496      return lltok::Error;
497    }
498    TyVal = IntegerType::get(Context, NumBits);
499    return lltok::Type;
500  }
501
502  // Otherwise, this was a letter sequence.  See which keyword this is.
503  if (!KeywordEnd) KeywordEnd = CurPtr;
504  CurPtr = KeywordEnd;
505  --StartChar;
506  unsigned Len = CurPtr-StartChar;
507#define KEYWORD(STR)                                                    \
508  do {                                                                  \
509    if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR)))  \
510      return lltok::kw_##STR;                                           \
511  } while (0)
512
513  KEYWORD(true);    KEYWORD(false);
514  KEYWORD(declare); KEYWORD(define);
515  KEYWORD(global);  KEYWORD(constant);
516
517  KEYWORD(private);
518  KEYWORD(internal);
519  KEYWORD(linker_private);        // NOTE: deprecated, for parser compatibility
520  KEYWORD(linker_private_weak);   // NOTE: deprecated, for parser compatibility
521  KEYWORD(available_externally);
522  KEYWORD(linkonce);
523  KEYWORD(linkonce_odr);
524  KEYWORD(weak); // Use as a linkage, and a modifier for "cmpxchg".
525  KEYWORD(weak_odr);
526  KEYWORD(appending);
527  KEYWORD(dllimport);
528  KEYWORD(dllexport);
529  KEYWORD(common);
530  KEYWORD(default);
531  KEYWORD(hidden);
532  KEYWORD(protected);
533  KEYWORD(unnamed_addr);
534  KEYWORD(externally_initialized);
535  KEYWORD(extern_weak);
536  KEYWORD(external);
537  KEYWORD(thread_local);
538  KEYWORD(localdynamic);
539  KEYWORD(initialexec);
540  KEYWORD(localexec);
541  KEYWORD(zeroinitializer);
542  KEYWORD(undef);
543  KEYWORD(null);
544  KEYWORD(to);
545  KEYWORD(tail);
546  KEYWORD(musttail);
547  KEYWORD(target);
548  KEYWORD(triple);
549  KEYWORD(unwind);
550  KEYWORD(deplibs);             // FIXME: Remove in 4.0.
551  KEYWORD(datalayout);
552  KEYWORD(volatile);
553  KEYWORD(atomic);
554  KEYWORD(unordered);
555  KEYWORD(monotonic);
556  KEYWORD(acquire);
557  KEYWORD(release);
558  KEYWORD(acq_rel);
559  KEYWORD(seq_cst);
560  KEYWORD(singlethread);
561
562  KEYWORD(nnan);
563  KEYWORD(ninf);
564  KEYWORD(nsz);
565  KEYWORD(arcp);
566  KEYWORD(fast);
567  KEYWORD(nuw);
568  KEYWORD(nsw);
569  KEYWORD(exact);
570  KEYWORD(inbounds);
571  KEYWORD(align);
572  KEYWORD(addrspace);
573  KEYWORD(section);
574  KEYWORD(alias);
575  KEYWORD(module);
576  KEYWORD(asm);
577  KEYWORD(sideeffect);
578  KEYWORD(alignstack);
579  KEYWORD(inteldialect);
580  KEYWORD(gc);
581  KEYWORD(prefix);
582
583  KEYWORD(ccc);
584  KEYWORD(fastcc);
585  KEYWORD(coldcc);
586  KEYWORD(x86_stdcallcc);
587  KEYWORD(x86_fastcallcc);
588  KEYWORD(x86_thiscallcc);
589  KEYWORD(arm_apcscc);
590  KEYWORD(arm_aapcscc);
591  KEYWORD(arm_aapcs_vfpcc);
592  KEYWORD(msp430_intrcc);
593  KEYWORD(ptx_kernel);
594  KEYWORD(ptx_device);
595  KEYWORD(spir_kernel);
596  KEYWORD(spir_func);
597  KEYWORD(intel_ocl_bicc);
598  KEYWORD(x86_64_sysvcc);
599  KEYWORD(x86_64_win64cc);
600  KEYWORD(webkit_jscc);
601  KEYWORD(anyregcc);
602  KEYWORD(preserve_mostcc);
603  KEYWORD(preserve_allcc);
604
605  KEYWORD(cc);
606  KEYWORD(c);
607
608  KEYWORD(attributes);
609
610  KEYWORD(alwaysinline);
611  KEYWORD(builtin);
612  KEYWORD(byval);
613  KEYWORD(inalloca);
614  KEYWORD(cold);
615  KEYWORD(inlinehint);
616  KEYWORD(inreg);
617  KEYWORD(jumptable);
618  KEYWORD(minsize);
619  KEYWORD(naked);
620  KEYWORD(nest);
621  KEYWORD(noalias);
622  KEYWORD(nobuiltin);
623  KEYWORD(nocapture);
624  KEYWORD(noduplicate);
625  KEYWORD(noimplicitfloat);
626  KEYWORD(noinline);
627  KEYWORD(nonlazybind);
628  KEYWORD(nonnull);
629  KEYWORD(noredzone);
630  KEYWORD(noreturn);
631  KEYWORD(nounwind);
632  KEYWORD(optnone);
633  KEYWORD(optsize);
634  KEYWORD(readnone);
635  KEYWORD(readonly);
636  KEYWORD(returned);
637  KEYWORD(returns_twice);
638  KEYWORD(signext);
639  KEYWORD(sret);
640  KEYWORD(ssp);
641  KEYWORD(sspreq);
642  KEYWORD(sspstrong);
643  KEYWORD(sanitize_address);
644  KEYWORD(sanitize_thread);
645  KEYWORD(sanitize_memory);
646  KEYWORD(uwtable);
647  KEYWORD(zeroext);
648
649  KEYWORD(type);
650  KEYWORD(opaque);
651
652  KEYWORD(comdat);
653
654  // Comdat types
655  KEYWORD(any);
656  KEYWORD(exactmatch);
657  KEYWORD(largest);
658  KEYWORD(noduplicates);
659  KEYWORD(samesize);
660
661  KEYWORD(eq); KEYWORD(ne); KEYWORD(slt); KEYWORD(sgt); KEYWORD(sle);
662  KEYWORD(sge); KEYWORD(ult); KEYWORD(ugt); KEYWORD(ule); KEYWORD(uge);
663  KEYWORD(oeq); KEYWORD(one); KEYWORD(olt); KEYWORD(ogt); KEYWORD(ole);
664  KEYWORD(oge); KEYWORD(ord); KEYWORD(uno); KEYWORD(ueq); KEYWORD(une);
665
666  KEYWORD(xchg); KEYWORD(nand); KEYWORD(max); KEYWORD(min); KEYWORD(umax);
667  KEYWORD(umin);
668
669  KEYWORD(x);
670  KEYWORD(blockaddress);
671
672  KEYWORD(personality);
673  KEYWORD(cleanup);
674  KEYWORD(catch);
675  KEYWORD(filter);
676#undef KEYWORD
677
678  // Keywords for types.
679#define TYPEKEYWORD(STR, LLVMTY) \
680  if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) { \
681    TyVal = LLVMTY; return lltok::Type; }
682  TYPEKEYWORD("void",      Type::getVoidTy(Context));
683  TYPEKEYWORD("half",      Type::getHalfTy(Context));
684  TYPEKEYWORD("float",     Type::getFloatTy(Context));
685  TYPEKEYWORD("double",    Type::getDoubleTy(Context));
686  TYPEKEYWORD("x86_fp80",  Type::getX86_FP80Ty(Context));
687  TYPEKEYWORD("fp128",     Type::getFP128Ty(Context));
688  TYPEKEYWORD("ppc_fp128", Type::getPPC_FP128Ty(Context));
689  TYPEKEYWORD("label",     Type::getLabelTy(Context));
690  TYPEKEYWORD("metadata",  Type::getMetadataTy(Context));
691  TYPEKEYWORD("x86_mmx",   Type::getX86_MMXTy(Context));
692#undef TYPEKEYWORD
693
694  // Keywords for instructions.
695#define INSTKEYWORD(STR, Enum) \
696  if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) { \
697    UIntVal = Instruction::Enum; return lltok::kw_##STR; }
698
699  INSTKEYWORD(add,   Add);  INSTKEYWORD(fadd,   FAdd);
700  INSTKEYWORD(sub,   Sub);  INSTKEYWORD(fsub,   FSub);
701  INSTKEYWORD(mul,   Mul);  INSTKEYWORD(fmul,   FMul);
702  INSTKEYWORD(udiv,  UDiv); INSTKEYWORD(sdiv,  SDiv); INSTKEYWORD(fdiv,  FDiv);
703  INSTKEYWORD(urem,  URem); INSTKEYWORD(srem,  SRem); INSTKEYWORD(frem,  FRem);
704  INSTKEYWORD(shl,   Shl);  INSTKEYWORD(lshr,  LShr); INSTKEYWORD(ashr,  AShr);
705  INSTKEYWORD(and,   And);  INSTKEYWORD(or,    Or);   INSTKEYWORD(xor,   Xor);
706  INSTKEYWORD(icmp,  ICmp); INSTKEYWORD(fcmp,  FCmp);
707
708  INSTKEYWORD(phi,         PHI);
709  INSTKEYWORD(call,        Call);
710  INSTKEYWORD(trunc,       Trunc);
711  INSTKEYWORD(zext,        ZExt);
712  INSTKEYWORD(sext,        SExt);
713  INSTKEYWORD(fptrunc,     FPTrunc);
714  INSTKEYWORD(fpext,       FPExt);
715  INSTKEYWORD(uitofp,      UIToFP);
716  INSTKEYWORD(sitofp,      SIToFP);
717  INSTKEYWORD(fptoui,      FPToUI);
718  INSTKEYWORD(fptosi,      FPToSI);
719  INSTKEYWORD(inttoptr,    IntToPtr);
720  INSTKEYWORD(ptrtoint,    PtrToInt);
721  INSTKEYWORD(bitcast,     BitCast);
722  INSTKEYWORD(addrspacecast, AddrSpaceCast);
723  INSTKEYWORD(select,      Select);
724  INSTKEYWORD(va_arg,      VAArg);
725  INSTKEYWORD(ret,         Ret);
726  INSTKEYWORD(br,          Br);
727  INSTKEYWORD(switch,      Switch);
728  INSTKEYWORD(indirectbr,  IndirectBr);
729  INSTKEYWORD(invoke,      Invoke);
730  INSTKEYWORD(resume,      Resume);
731  INSTKEYWORD(unreachable, Unreachable);
732
733  INSTKEYWORD(alloca,      Alloca);
734  INSTKEYWORD(load,        Load);
735  INSTKEYWORD(store,       Store);
736  INSTKEYWORD(cmpxchg,     AtomicCmpXchg);
737  INSTKEYWORD(atomicrmw,   AtomicRMW);
738  INSTKEYWORD(fence,       Fence);
739  INSTKEYWORD(getelementptr, GetElementPtr);
740
741  INSTKEYWORD(extractelement, ExtractElement);
742  INSTKEYWORD(insertelement,  InsertElement);
743  INSTKEYWORD(shufflevector,  ShuffleVector);
744  INSTKEYWORD(extractvalue,   ExtractValue);
745  INSTKEYWORD(insertvalue,    InsertValue);
746  INSTKEYWORD(landingpad,     LandingPad);
747#undef INSTKEYWORD
748
749  // Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by
750  // the CFE to avoid forcing it to deal with 64-bit numbers.
751  if ((TokStart[0] == 'u' || TokStart[0] == 's') &&
752      TokStart[1] == '0' && TokStart[2] == 'x' &&
753      isxdigit(static_cast<unsigned char>(TokStart[3]))) {
754    int len = CurPtr-TokStart-3;
755    uint32_t bits = len * 4;
756    APInt Tmp(bits, StringRef(TokStart+3, len), 16);
757    uint32_t activeBits = Tmp.getActiveBits();
758    if (activeBits > 0 && activeBits < bits)
759      Tmp = Tmp.trunc(activeBits);
760    APSIntVal = APSInt(Tmp, TokStart[0] == 'u');
761    return lltok::APSInt;
762  }
763
764  // If this is "cc1234", return this as just "cc".
765  if (TokStart[0] == 'c' && TokStart[1] == 'c') {
766    CurPtr = TokStart+2;
767    return lltok::kw_cc;
768  }
769
770  // Finally, if this isn't known, return an error.
771  CurPtr = TokStart+1;
772  return lltok::Error;
773}
774
775
776/// Lex0x: Handle productions that start with 0x, knowing that it matches and
777/// that this is not a label:
778///    HexFPConstant     0x[0-9A-Fa-f]+
779///    HexFP80Constant   0xK[0-9A-Fa-f]+
780///    HexFP128Constant  0xL[0-9A-Fa-f]+
781///    HexPPC128Constant 0xM[0-9A-Fa-f]+
782///    HexHalfConstant   0xH[0-9A-Fa-f]+
783lltok::Kind LLLexer::Lex0x() {
784  CurPtr = TokStart + 2;
785
786  char Kind;
787  if ((CurPtr[0] >= 'K' && CurPtr[0] <= 'M') || CurPtr[0] == 'H') {
788    Kind = *CurPtr++;
789  } else {
790    Kind = 'J';
791  }
792
793  if (!isxdigit(static_cast<unsigned char>(CurPtr[0]))) {
794    // Bad token, return it as an error.
795    CurPtr = TokStart+1;
796    return lltok::Error;
797  }
798
799  while (isxdigit(static_cast<unsigned char>(CurPtr[0])))
800    ++CurPtr;
801
802  if (Kind == 'J') {
803    // HexFPConstant - Floating point constant represented in IEEE format as a
804    // hexadecimal number for when exponential notation is not precise enough.
805    // Half, Float, and double only.
806    APFloatVal = APFloat(BitsToDouble(HexIntToVal(TokStart+2, CurPtr)));
807    return lltok::APFloat;
808  }
809
810  uint64_t Pair[2];
811  switch (Kind) {
812  default: llvm_unreachable("Unknown kind!");
813  case 'K':
814    // F80HexFPConstant - x87 long double in hexadecimal format (10 bytes)
815    FP80HexToIntPair(TokStart+3, CurPtr, Pair);
816    APFloatVal = APFloat(APFloat::x87DoubleExtended, APInt(80, Pair));
817    return lltok::APFloat;
818  case 'L':
819    // F128HexFPConstant - IEEE 128-bit in hexadecimal format (16 bytes)
820    HexToIntPair(TokStart+3, CurPtr, Pair);
821    APFloatVal = APFloat(APFloat::IEEEquad, APInt(128, Pair));
822    return lltok::APFloat;
823  case 'M':
824    // PPC128HexFPConstant - PowerPC 128-bit in hexadecimal format (16 bytes)
825    HexToIntPair(TokStart+3, CurPtr, Pair);
826    APFloatVal = APFloat(APFloat::PPCDoubleDouble, APInt(128, Pair));
827    return lltok::APFloat;
828  case 'H':
829    APFloatVal = APFloat(APFloat::IEEEhalf,
830                         APInt(16,HexIntToVal(TokStart+3, CurPtr)));
831    return lltok::APFloat;
832  }
833}
834
835/// LexIdentifier: Handle several related productions:
836///    Label             [-a-zA-Z$._0-9]+:
837///    NInteger          -[0-9]+
838///    FPConstant        [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
839///    PInteger          [0-9]+
840///    HexFPConstant     0x[0-9A-Fa-f]+
841///    HexFP80Constant   0xK[0-9A-Fa-f]+
842///    HexFP128Constant  0xL[0-9A-Fa-f]+
843///    HexPPC128Constant 0xM[0-9A-Fa-f]+
844lltok::Kind LLLexer::LexDigitOrNegative() {
845  // If the letter after the negative is not a number, this is probably a label.
846  if (!isdigit(static_cast<unsigned char>(TokStart[0])) &&
847      !isdigit(static_cast<unsigned char>(CurPtr[0]))) {
848    // Okay, this is not a number after the -, it's probably a label.
849    if (const char *End = isLabelTail(CurPtr)) {
850      StrVal.assign(TokStart, End-1);
851      CurPtr = End;
852      return lltok::LabelStr;
853    }
854
855    return lltok::Error;
856  }
857
858  // At this point, it is either a label, int or fp constant.
859
860  // Skip digits, we have at least one.
861  for (; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
862    /*empty*/;
863
864  // Check to see if this really is a label afterall, e.g. "-1:".
865  if (isLabelChar(CurPtr[0]) || CurPtr[0] == ':') {
866    if (const char *End = isLabelTail(CurPtr)) {
867      StrVal.assign(TokStart, End-1);
868      CurPtr = End;
869      return lltok::LabelStr;
870    }
871  }
872
873  // If the next character is a '.', then it is a fp value, otherwise its
874  // integer.
875  if (CurPtr[0] != '.') {
876    if (TokStart[0] == '0' && TokStart[1] == 'x')
877      return Lex0x();
878    unsigned Len = CurPtr-TokStart;
879    uint32_t numBits = ((Len * 64) / 19) + 2;
880    APInt Tmp(numBits, StringRef(TokStart, Len), 10);
881    if (TokStart[0] == '-') {
882      uint32_t minBits = Tmp.getMinSignedBits();
883      if (minBits > 0 && minBits < numBits)
884        Tmp = Tmp.trunc(minBits);
885      APSIntVal = APSInt(Tmp, false);
886    } else {
887      uint32_t activeBits = Tmp.getActiveBits();
888      if (activeBits > 0 && activeBits < numBits)
889        Tmp = Tmp.trunc(activeBits);
890      APSIntVal = APSInt(Tmp, true);
891    }
892    return lltok::APSInt;
893  }
894
895  ++CurPtr;
896
897  // Skip over [0-9]*([eE][-+]?[0-9]+)?
898  while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
899
900  if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
901    if (isdigit(static_cast<unsigned char>(CurPtr[1])) ||
902        ((CurPtr[1] == '-' || CurPtr[1] == '+') &&
903          isdigit(static_cast<unsigned char>(CurPtr[2])))) {
904      CurPtr += 2;
905      while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
906    }
907  }
908
909  APFloatVal = APFloat(std::atof(TokStart));
910  return lltok::APFloat;
911}
912
913///    FPConstant  [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
914lltok::Kind LLLexer::LexPositive() {
915  // If the letter after the negative is a number, this is probably not a
916  // label.
917  if (!isdigit(static_cast<unsigned char>(CurPtr[0])))
918    return lltok::Error;
919
920  // Skip digits.
921  for (++CurPtr; isdigit(static_cast<unsigned char>(CurPtr[0])); ++CurPtr)
922    /*empty*/;
923
924  // At this point, we need a '.'.
925  if (CurPtr[0] != '.') {
926    CurPtr = TokStart+1;
927    return lltok::Error;
928  }
929
930  ++CurPtr;
931
932  // Skip over [0-9]*([eE][-+]?[0-9]+)?
933  while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
934
935  if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
936    if (isdigit(static_cast<unsigned char>(CurPtr[1])) ||
937        ((CurPtr[1] == '-' || CurPtr[1] == '+') &&
938        isdigit(static_cast<unsigned char>(CurPtr[2])))) {
939      CurPtr += 2;
940      while (isdigit(static_cast<unsigned char>(CurPtr[0]))) ++CurPtr;
941    }
942  }
943
944  APFloatVal = APFloat(std::atof(TokStart));
945  return lltok::APFloat;
946}
947