LLLexer.cpp revision d1e1703c39742f3c9fc3d27a442ff59bbdbfb5aa
1//===- LLLexer.cpp - Lexer for .ll Files ----------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Implement the Lexer for .ll files.
11//
12//===----------------------------------------------------------------------===//
13
14#include "LLLexer.h"
15#include "llvm/DerivedTypes.h"
16#include "llvm/Instruction.h"
17#include "llvm/LLVMContext.h"
18#include "llvm/ADT/Twine.h"
19#include "llvm/Assembly/Parser.h"
20#include "llvm/Support/ErrorHandling.h"
21#include "llvm/Support/MemoryBuffer.h"
22#include "llvm/Support/MathExtras.h"
23#include "llvm/Support/SourceMgr.h"
24#include "llvm/Support/raw_ostream.h"
25#include <cstdio>
26#include <cstdlib>
27#include <cstring>
28using namespace llvm;
29
30bool LLLexer::Error(LocTy ErrorLoc, const Twine &Msg) const {
31  ErrorInfo = SM.GetMessage(ErrorLoc, Msg, "error");
32  return true;
33}
34
35//===----------------------------------------------------------------------===//
36// Helper functions.
37//===----------------------------------------------------------------------===//
38
39// atoull - Convert an ascii string of decimal digits into the unsigned long
40// long representation... this does not have to do input error checking,
41// because we know that the input will be matched by a suitable regex...
42//
43uint64_t LLLexer::atoull(const char *Buffer, const char *End) {
44  uint64_t Result = 0;
45  for (; Buffer != End; Buffer++) {
46    uint64_t OldRes = Result;
47    Result *= 10;
48    Result += *Buffer-'0';
49    if (Result < OldRes) {  // Uh, oh, overflow detected!!!
50      Error("constant bigger than 64 bits detected!");
51      return 0;
52    }
53  }
54  return Result;
55}
56
57uint64_t LLLexer::HexIntToVal(const char *Buffer, const char *End) {
58  uint64_t Result = 0;
59  for (; Buffer != End; ++Buffer) {
60    uint64_t OldRes = Result;
61    Result *= 16;
62    char C = *Buffer;
63    if (C >= '0' && C <= '9')
64      Result += C-'0';
65    else if (C >= 'A' && C <= 'F')
66      Result += C-'A'+10;
67    else if (C >= 'a' && C <= 'f')
68      Result += C-'a'+10;
69
70    if (Result < OldRes) {   // Uh, oh, overflow detected!!!
71      Error("constant bigger than 64 bits detected!");
72      return 0;
73    }
74  }
75  return Result;
76}
77
78void LLLexer::HexToIntPair(const char *Buffer, const char *End,
79                           uint64_t Pair[2]) {
80  Pair[0] = 0;
81  for (int i=0; i<16; i++, Buffer++) {
82    assert(Buffer != End);
83    Pair[0] *= 16;
84    char C = *Buffer;
85    if (C >= '0' && C <= '9')
86      Pair[0] += C-'0';
87    else if (C >= 'A' && C <= 'F')
88      Pair[0] += C-'A'+10;
89    else if (C >= 'a' && C <= 'f')
90      Pair[0] += C-'a'+10;
91  }
92  Pair[1] = 0;
93  for (int i=0; i<16 && Buffer != End; i++, Buffer++) {
94    Pair[1] *= 16;
95    char C = *Buffer;
96    if (C >= '0' && C <= '9')
97      Pair[1] += C-'0';
98    else if (C >= 'A' && C <= 'F')
99      Pair[1] += C-'A'+10;
100    else if (C >= 'a' && C <= 'f')
101      Pair[1] += C-'a'+10;
102  }
103  if (Buffer != End)
104    Error("constant bigger than 128 bits detected!");
105}
106
107/// FP80HexToIntPair - translate an 80 bit FP80 number (20 hexits) into
108/// { low64, high16 } as usual for an APInt.
109void LLLexer::FP80HexToIntPair(const char *Buffer, const char *End,
110                           uint64_t Pair[2]) {
111  Pair[1] = 0;
112  for (int i=0; i<4 && Buffer != End; i++, Buffer++) {
113    assert(Buffer != End);
114    Pair[1] *= 16;
115    char C = *Buffer;
116    if (C >= '0' && C <= '9')
117      Pair[1] += C-'0';
118    else if (C >= 'A' && C <= 'F')
119      Pair[1] += C-'A'+10;
120    else if (C >= 'a' && C <= 'f')
121      Pair[1] += C-'a'+10;
122  }
123  Pair[0] = 0;
124  for (int i=0; i<16; i++, Buffer++) {
125    Pair[0] *= 16;
126    char C = *Buffer;
127    if (C >= '0' && C <= '9')
128      Pair[0] += C-'0';
129    else if (C >= 'A' && C <= 'F')
130      Pair[0] += C-'A'+10;
131    else if (C >= 'a' && C <= 'f')
132      Pair[0] += C-'a'+10;
133  }
134  if (Buffer != End)
135    Error("constant bigger than 128 bits detected!");
136}
137
138// UnEscapeLexed - Run through the specified buffer and change \xx codes to the
139// appropriate character.
140static void UnEscapeLexed(std::string &Str) {
141  if (Str.empty()) return;
142
143  char *Buffer = &Str[0], *EndBuffer = Buffer+Str.size();
144  char *BOut = Buffer;
145  for (char *BIn = Buffer; BIn != EndBuffer; ) {
146    if (BIn[0] == '\\') {
147      if (BIn < EndBuffer-1 && BIn[1] == '\\') {
148        *BOut++ = '\\'; // Two \ becomes one
149        BIn += 2;
150      } else if (BIn < EndBuffer-2 && isxdigit(BIn[1]) && isxdigit(BIn[2])) {
151        char Tmp = BIn[3]; BIn[3] = 0;      // Terminate string
152        *BOut = (char)strtol(BIn+1, 0, 16); // Convert to number
153        BIn[3] = Tmp;                       // Restore character
154        BIn += 3;                           // Skip over handled chars
155        ++BOut;
156      } else {
157        *BOut++ = *BIn++;
158      }
159    } else {
160      *BOut++ = *BIn++;
161    }
162  }
163  Str.resize(BOut-Buffer);
164}
165
166/// isLabelChar - Return true for [-a-zA-Z$._0-9].
167static bool isLabelChar(char C) {
168  return isalnum(C) || C == '-' || C == '$' || C == '.' || C == '_';
169}
170
171
172/// isLabelTail - Return true if this pointer points to a valid end of a label.
173static const char *isLabelTail(const char *CurPtr) {
174  while (1) {
175    if (CurPtr[0] == ':') return CurPtr+1;
176    if (!isLabelChar(CurPtr[0])) return 0;
177    ++CurPtr;
178  }
179}
180
181
182
183//===----------------------------------------------------------------------===//
184// Lexer definition.
185//===----------------------------------------------------------------------===//
186
187LLLexer::LLLexer(MemoryBuffer *StartBuf, SourceMgr &sm, SMDiagnostic &Err,
188                 LLVMContext &C)
189  : CurBuf(StartBuf), ErrorInfo(Err), SM(sm), Context(C), APFloatVal(0.0) {
190  CurPtr = CurBuf->getBufferStart();
191}
192
193std::string LLLexer::getFilename() const {
194  return CurBuf->getBufferIdentifier();
195}
196
197int LLLexer::getNextChar() {
198  char CurChar = *CurPtr++;
199  switch (CurChar) {
200  default: return (unsigned char)CurChar;
201  case 0:
202    // A nul character in the stream is either the end of the current buffer or
203    // a random nul in the file.  Disambiguate that here.
204    if (CurPtr-1 != CurBuf->getBufferEnd())
205      return 0;  // Just whitespace.
206
207    // Otherwise, return end of file.
208    --CurPtr;  // Another call to lex will return EOF again.
209    return EOF;
210  }
211}
212
213
214lltok::Kind LLLexer::LexToken() {
215  TokStart = CurPtr;
216
217  int CurChar = getNextChar();
218  switch (CurChar) {
219  default:
220    // Handle letters: [a-zA-Z_]
221    if (isalpha(CurChar) || CurChar == '_')
222      return LexIdentifier();
223
224    return lltok::Error;
225  case EOF: return lltok::Eof;
226  case 0:
227  case ' ':
228  case '\t':
229  case '\n':
230  case '\r':
231    // Ignore whitespace.
232    return LexToken();
233  case '+': return LexPositive();
234  case '@': return LexAt();
235  case '%': return LexPercent();
236  case '"': return LexQuote();
237  case '.':
238    if (const char *Ptr = isLabelTail(CurPtr)) {
239      CurPtr = Ptr;
240      StrVal.assign(TokStart, CurPtr-1);
241      return lltok::LabelStr;
242    }
243    if (CurPtr[0] == '.' && CurPtr[1] == '.') {
244      CurPtr += 2;
245      return lltok::dotdotdot;
246    }
247    return lltok::Error;
248  case '$':
249    if (const char *Ptr = isLabelTail(CurPtr)) {
250      CurPtr = Ptr;
251      StrVal.assign(TokStart, CurPtr-1);
252      return lltok::LabelStr;
253    }
254    return lltok::Error;
255  case ';':
256    SkipLineComment();
257    return LexToken();
258  case '!': return LexExclaim();
259  case '0': case '1': case '2': case '3': case '4':
260  case '5': case '6': case '7': case '8': case '9':
261  case '-':
262    return LexDigitOrNegative();
263  case '=': return lltok::equal;
264  case '[': return lltok::lsquare;
265  case ']': return lltok::rsquare;
266  case '{': return lltok::lbrace;
267  case '}': return lltok::rbrace;
268  case '<': return lltok::less;
269  case '>': return lltok::greater;
270  case '(': return lltok::lparen;
271  case ')': return lltok::rparen;
272  case ',': return lltok::comma;
273  case '*': return lltok::star;
274  case '\\': return lltok::backslash;
275  }
276}
277
278void LLLexer::SkipLineComment() {
279  while (1) {
280    if (CurPtr[0] == '\n' || CurPtr[0] == '\r' || getNextChar() == EOF)
281      return;
282  }
283}
284
285/// LexAt - Lex all tokens that start with an @ character:
286///   GlobalVar   @\"[^\"]*\"
287///   GlobalVar   @[-a-zA-Z$._][-a-zA-Z$._0-9]*
288///   GlobalVarID @[0-9]+
289lltok::Kind LLLexer::LexAt() {
290  // Handle AtStringConstant: @\"[^\"]*\"
291  if (CurPtr[0] == '"') {
292    ++CurPtr;
293
294    while (1) {
295      int CurChar = getNextChar();
296
297      if (CurChar == EOF) {
298        Error("end of file in global variable name");
299        return lltok::Error;
300      }
301      if (CurChar == '"') {
302        StrVal.assign(TokStart+2, CurPtr-1);
303        UnEscapeLexed(StrVal);
304        return lltok::GlobalVar;
305      }
306    }
307  }
308
309  // Handle GlobalVarName: @[-a-zA-Z$._][-a-zA-Z$._0-9]*
310  if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
311      CurPtr[0] == '.' || CurPtr[0] == '_') {
312    ++CurPtr;
313    while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
314           CurPtr[0] == '.' || CurPtr[0] == '_')
315      ++CurPtr;
316
317    StrVal.assign(TokStart+1, CurPtr);   // Skip @
318    return lltok::GlobalVar;
319  }
320
321  // Handle GlobalVarID: @[0-9]+
322  if (isdigit(CurPtr[0])) {
323    for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr)
324      /*empty*/;
325
326    uint64_t Val = atoull(TokStart+1, CurPtr);
327    if ((unsigned)Val != Val)
328      Error("invalid value number (too large)!");
329    UIntVal = unsigned(Val);
330    return lltok::GlobalID;
331  }
332
333  return lltok::Error;
334}
335
336
337/// LexPercent - Lex all tokens that start with a % character:
338///   LocalVar   ::= %\"[^\"]*\"
339///   LocalVar   ::= %[-a-zA-Z$._][-a-zA-Z$._0-9]*
340///   LocalVarID ::= %[0-9]+
341lltok::Kind LLLexer::LexPercent() {
342  // Handle LocalVarName: %\"[^\"]*\"
343  if (CurPtr[0] == '"') {
344    ++CurPtr;
345
346    while (1) {
347      int CurChar = getNextChar();
348
349      if (CurChar == EOF) {
350        Error("end of file in string constant");
351        return lltok::Error;
352      }
353      if (CurChar == '"') {
354        StrVal.assign(TokStart+2, CurPtr-1);
355        UnEscapeLexed(StrVal);
356        return lltok::LocalVar;
357      }
358    }
359  }
360
361  // Handle LocalVarName: %[-a-zA-Z$._][-a-zA-Z$._0-9]*
362  if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
363      CurPtr[0] == '.' || CurPtr[0] == '_') {
364    ++CurPtr;
365    while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
366           CurPtr[0] == '.' || CurPtr[0] == '_')
367      ++CurPtr;
368
369    StrVal.assign(TokStart+1, CurPtr);   // Skip %
370    return lltok::LocalVar;
371  }
372
373  // Handle LocalVarID: %[0-9]+
374  if (isdigit(CurPtr[0])) {
375    for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr)
376      /*empty*/;
377
378    uint64_t Val = atoull(TokStart+1, CurPtr);
379    if ((unsigned)Val != Val)
380      Error("invalid value number (too large)!");
381    UIntVal = unsigned(Val);
382    return lltok::LocalVarID;
383  }
384
385  return lltok::Error;
386}
387
388/// LexQuote - Lex all tokens that start with a " character:
389///   QuoteLabel        "[^"]+":
390///   StringConstant    "[^"]*"
391lltok::Kind LLLexer::LexQuote() {
392  while (1) {
393    int CurChar = getNextChar();
394
395    if (CurChar == EOF) {
396      Error("end of file in quoted string");
397      return lltok::Error;
398    }
399
400    if (CurChar != '"') continue;
401
402    if (CurPtr[0] != ':') {
403      StrVal.assign(TokStart+1, CurPtr-1);
404      UnEscapeLexed(StrVal);
405      return lltok::StringConstant;
406    }
407
408    ++CurPtr;
409    StrVal.assign(TokStart+1, CurPtr-2);
410    UnEscapeLexed(StrVal);
411    return lltok::LabelStr;
412  }
413}
414
415static bool JustWhitespaceNewLine(const char *&Ptr) {
416  const char *ThisPtr = Ptr;
417  while (*ThisPtr == ' ' || *ThisPtr == '\t')
418    ++ThisPtr;
419  if (*ThisPtr == '\n' || *ThisPtr == '\r') {
420    Ptr = ThisPtr;
421    return true;
422  }
423  return false;
424}
425
426/// LexExclaim:
427///    !foo
428///    !
429lltok::Kind LLLexer::LexExclaim() {
430  // Lex a metadata name as a MetadataVar.
431  if (isalpha(CurPtr[0])) {
432    ++CurPtr;
433    while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
434           CurPtr[0] == '.' || CurPtr[0] == '_')
435      ++CurPtr;
436
437    StrVal.assign(TokStart+1, CurPtr);   // Skip !
438    return lltok::MetadataVar;
439  }
440  return lltok::exclaim;
441}
442
443/// LexIdentifier: Handle several related productions:
444///    Label           [-a-zA-Z$._0-9]+:
445///    IntegerType     i[0-9]+
446///    Keyword         sdiv, float, ...
447///    HexIntConstant  [us]0x[0-9A-Fa-f]+
448lltok::Kind LLLexer::LexIdentifier() {
449  const char *StartChar = CurPtr;
450  const char *IntEnd = CurPtr[-1] == 'i' ? 0 : StartChar;
451  const char *KeywordEnd = 0;
452
453  for (; isLabelChar(*CurPtr); ++CurPtr) {
454    // If we decide this is an integer, remember the end of the sequence.
455    if (!IntEnd && !isdigit(*CurPtr)) IntEnd = CurPtr;
456    if (!KeywordEnd && !isalnum(*CurPtr) && *CurPtr != '_') KeywordEnd = CurPtr;
457  }
458
459  // If we stopped due to a colon, this really is a label.
460  if (*CurPtr == ':') {
461    StrVal.assign(StartChar-1, CurPtr++);
462    return lltok::LabelStr;
463  }
464
465  // Otherwise, this wasn't a label.  If this was valid as an integer type,
466  // return it.
467  if (IntEnd == 0) IntEnd = CurPtr;
468  if (IntEnd != StartChar) {
469    CurPtr = IntEnd;
470    uint64_t NumBits = atoull(StartChar, CurPtr);
471    if (NumBits < IntegerType::MIN_INT_BITS ||
472        NumBits > IntegerType::MAX_INT_BITS) {
473      Error("bitwidth for integer type out of range!");
474      return lltok::Error;
475    }
476    TyVal = IntegerType::get(Context, NumBits);
477    return lltok::Type;
478  }
479
480  // Otherwise, this was a letter sequence.  See which keyword this is.
481  if (KeywordEnd == 0) KeywordEnd = CurPtr;
482  CurPtr = KeywordEnd;
483  --StartChar;
484  unsigned Len = CurPtr-StartChar;
485#define KEYWORD(STR) \
486  if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) \
487    return lltok::kw_##STR;
488
489  KEYWORD(begin);   KEYWORD(end);
490  KEYWORD(true);    KEYWORD(false);
491  KEYWORD(declare); KEYWORD(define);
492  KEYWORD(global);  KEYWORD(constant);
493
494  KEYWORD(private);
495  KEYWORD(linker_private);
496  KEYWORD(linker_private_weak);
497  KEYWORD(linker_private_weak_def_auto);
498  KEYWORD(internal);
499  KEYWORD(available_externally);
500  KEYWORD(linkonce);
501  KEYWORD(linkonce_odr);
502  KEYWORD(weak);
503  KEYWORD(weak_odr);
504  KEYWORD(appending);
505  KEYWORD(dllimport);
506  KEYWORD(dllexport);
507  KEYWORD(common);
508  KEYWORD(default);
509  KEYWORD(hidden);
510  KEYWORD(protected);
511  KEYWORD(extern_weak);
512  KEYWORD(external);
513  KEYWORD(thread_local);
514  KEYWORD(zeroinitializer);
515  KEYWORD(undef);
516  KEYWORD(null);
517  KEYWORD(to);
518  KEYWORD(tail);
519  KEYWORD(target);
520  KEYWORD(triple);
521  KEYWORD(deplibs);
522  KEYWORD(datalayout);
523  KEYWORD(volatile);
524  KEYWORD(nuw);
525  KEYWORD(nsw);
526  KEYWORD(exact);
527  KEYWORD(inbounds);
528  KEYWORD(align);
529  KEYWORD(addrspace);
530  KEYWORD(section);
531  KEYWORD(alias);
532  KEYWORD(module);
533  KEYWORD(asm);
534  KEYWORD(sideeffect);
535  KEYWORD(alignstack);
536  KEYWORD(gc);
537
538  KEYWORD(ccc);
539  KEYWORD(fastcc);
540  KEYWORD(coldcc);
541  KEYWORD(x86_stdcallcc);
542  KEYWORD(x86_fastcallcc);
543  KEYWORD(x86_thiscallcc);
544  KEYWORD(arm_apcscc);
545  KEYWORD(arm_aapcscc);
546  KEYWORD(arm_aapcs_vfpcc);
547  KEYWORD(msp430_intrcc);
548  KEYWORD(ptx_kernel);
549  KEYWORD(ptx_device);
550
551  KEYWORD(cc);
552  KEYWORD(c);
553
554  KEYWORD(signext);
555  KEYWORD(zeroext);
556  KEYWORD(inreg);
557  KEYWORD(sret);
558  KEYWORD(nounwind);
559  KEYWORD(noreturn);
560  KEYWORD(noalias);
561  KEYWORD(nocapture);
562  KEYWORD(byval);
563  KEYWORD(nest);
564  KEYWORD(readnone);
565  KEYWORD(readonly);
566
567  KEYWORD(inlinehint);
568  KEYWORD(noinline);
569  KEYWORD(alwaysinline);
570  KEYWORD(optsize);
571  KEYWORD(ssp);
572  KEYWORD(sspreq);
573  KEYWORD(noredzone);
574  KEYWORD(noimplicitfloat);
575  KEYWORD(naked);
576
577  KEYWORD(type);
578  KEYWORD(opaque);
579
580  KEYWORD(eq); KEYWORD(ne); KEYWORD(slt); KEYWORD(sgt); KEYWORD(sle);
581  KEYWORD(sge); KEYWORD(ult); KEYWORD(ugt); KEYWORD(ule); KEYWORD(uge);
582  KEYWORD(oeq); KEYWORD(one); KEYWORD(olt); KEYWORD(ogt); KEYWORD(ole);
583  KEYWORD(oge); KEYWORD(ord); KEYWORD(uno); KEYWORD(ueq); KEYWORD(une);
584
585  KEYWORD(x);
586  KEYWORD(blockaddress);
587#undef KEYWORD
588
589  // Keywords for types.
590#define TYPEKEYWORD(STR, LLVMTY) \
591  if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) { \
592    TyVal = LLVMTY; return lltok::Type; }
593  TYPEKEYWORD("void",      Type::getVoidTy(Context));
594  TYPEKEYWORD("float",     Type::getFloatTy(Context));
595  TYPEKEYWORD("double",    Type::getDoubleTy(Context));
596  TYPEKEYWORD("x86_fp80",  Type::getX86_FP80Ty(Context));
597  TYPEKEYWORD("fp128",     Type::getFP128Ty(Context));
598  TYPEKEYWORD("ppc_fp128", Type::getPPC_FP128Ty(Context));
599  TYPEKEYWORD("label",     Type::getLabelTy(Context));
600  TYPEKEYWORD("metadata",  Type::getMetadataTy(Context));
601  TYPEKEYWORD("x86_mmx",   Type::getX86_MMXTy(Context));
602#undef TYPEKEYWORD
603
604  // Handle special forms for autoupgrading.  Drop these in LLVM 3.0.  This is
605  // to avoid conflicting with the sext/zext instructions, below.
606  if (Len == 4 && !memcmp(StartChar, "sext", 4)) {
607    // Scan CurPtr ahead, seeing if there is just whitespace before the newline.
608    if (JustWhitespaceNewLine(CurPtr))
609      return lltok::kw_signext;
610  } else if (Len == 4 && !memcmp(StartChar, "zext", 4)) {
611    // Scan CurPtr ahead, seeing if there is just whitespace before the newline.
612    if (JustWhitespaceNewLine(CurPtr))
613      return lltok::kw_zeroext;
614  } else if (Len == 6 && !memcmp(StartChar, "malloc", 6)) {
615    // FIXME: Remove in LLVM 3.0.
616    // Autoupgrade malloc instruction.
617    return lltok::kw_malloc;
618  } else if (Len == 4 && !memcmp(StartChar, "free", 4)) {
619    // FIXME: Remove in LLVM 3.0.
620    // Autoupgrade malloc instruction.
621    return lltok::kw_free;
622  }
623
624  // Keywords for instructions.
625#define INSTKEYWORD(STR, Enum) \
626  if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) { \
627    UIntVal = Instruction::Enum; return lltok::kw_##STR; }
628
629  INSTKEYWORD(add,   Add);  INSTKEYWORD(fadd,   FAdd);
630  INSTKEYWORD(sub,   Sub);  INSTKEYWORD(fsub,   FSub);
631  INSTKEYWORD(mul,   Mul);  INSTKEYWORD(fmul,   FMul);
632  INSTKEYWORD(udiv,  UDiv); INSTKEYWORD(sdiv,  SDiv); INSTKEYWORD(fdiv,  FDiv);
633  INSTKEYWORD(urem,  URem); INSTKEYWORD(srem,  SRem); INSTKEYWORD(frem,  FRem);
634  INSTKEYWORD(shl,   Shl);  INSTKEYWORD(lshr,  LShr); INSTKEYWORD(ashr,  AShr);
635  INSTKEYWORD(and,   And);  INSTKEYWORD(or,    Or);   INSTKEYWORD(xor,   Xor);
636  INSTKEYWORD(icmp,  ICmp); INSTKEYWORD(fcmp,  FCmp);
637
638  INSTKEYWORD(phi,         PHI);
639  INSTKEYWORD(call,        Call);
640  INSTKEYWORD(trunc,       Trunc);
641  INSTKEYWORD(zext,        ZExt);
642  INSTKEYWORD(sext,        SExt);
643  INSTKEYWORD(fptrunc,     FPTrunc);
644  INSTKEYWORD(fpext,       FPExt);
645  INSTKEYWORD(uitofp,      UIToFP);
646  INSTKEYWORD(sitofp,      SIToFP);
647  INSTKEYWORD(fptoui,      FPToUI);
648  INSTKEYWORD(fptosi,      FPToSI);
649  INSTKEYWORD(inttoptr,    IntToPtr);
650  INSTKEYWORD(ptrtoint,    PtrToInt);
651  INSTKEYWORD(bitcast,     BitCast);
652  INSTKEYWORD(select,      Select);
653  INSTKEYWORD(va_arg,      VAArg);
654  INSTKEYWORD(ret,         Ret);
655  INSTKEYWORD(br,          Br);
656  INSTKEYWORD(switch,      Switch);
657  INSTKEYWORD(indirectbr,  IndirectBr);
658  INSTKEYWORD(invoke,      Invoke);
659  INSTKEYWORD(unwind,      Unwind);
660  INSTKEYWORD(unreachable, Unreachable);
661
662  INSTKEYWORD(alloca,      Alloca);
663  INSTKEYWORD(load,        Load);
664  INSTKEYWORD(store,       Store);
665  INSTKEYWORD(getelementptr, GetElementPtr);
666
667  INSTKEYWORD(extractelement, ExtractElement);
668  INSTKEYWORD(insertelement,  InsertElement);
669  INSTKEYWORD(shufflevector,  ShuffleVector);
670  INSTKEYWORD(getresult,      ExtractValue);
671  INSTKEYWORD(extractvalue,   ExtractValue);
672  INSTKEYWORD(insertvalue,    InsertValue);
673#undef INSTKEYWORD
674
675  // Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by
676  // the CFE to avoid forcing it to deal with 64-bit numbers.
677  if ((TokStart[0] == 'u' || TokStart[0] == 's') &&
678      TokStart[1] == '0' && TokStart[2] == 'x' && isxdigit(TokStart[3])) {
679    int len = CurPtr-TokStart-3;
680    uint32_t bits = len * 4;
681    APInt Tmp(bits, StringRef(TokStart+3, len), 16);
682    uint32_t activeBits = Tmp.getActiveBits();
683    if (activeBits > 0 && activeBits < bits)
684      Tmp.trunc(activeBits);
685    APSIntVal = APSInt(Tmp, TokStart[0] == 'u');
686    return lltok::APSInt;
687  }
688
689  // If this is "cc1234", return this as just "cc".
690  if (TokStart[0] == 'c' && TokStart[1] == 'c') {
691    CurPtr = TokStart+2;
692    return lltok::kw_cc;
693  }
694
695  // If this starts with "call", return it as CALL.  This is to support old
696  // broken .ll files.  FIXME: remove this with LLVM 3.0.
697  if (CurPtr-TokStart > 4 && !memcmp(TokStart, "call", 4)) {
698    CurPtr = TokStart+4;
699    UIntVal = Instruction::Call;
700    return lltok::kw_call;
701  }
702
703  // Finally, if this isn't known, return an error.
704  CurPtr = TokStart+1;
705  return lltok::Error;
706}
707
708
709/// Lex0x: Handle productions that start with 0x, knowing that it matches and
710/// that this is not a label:
711///    HexFPConstant     0x[0-9A-Fa-f]+
712///    HexFP80Constant   0xK[0-9A-Fa-f]+
713///    HexFP128Constant  0xL[0-9A-Fa-f]+
714///    HexPPC128Constant 0xM[0-9A-Fa-f]+
715lltok::Kind LLLexer::Lex0x() {
716  CurPtr = TokStart + 2;
717
718  char Kind;
719  if (CurPtr[0] >= 'K' && CurPtr[0] <= 'M') {
720    Kind = *CurPtr++;
721  } else {
722    Kind = 'J';
723  }
724
725  if (!isxdigit(CurPtr[0])) {
726    // Bad token, return it as an error.
727    CurPtr = TokStart+1;
728    return lltok::Error;
729  }
730
731  while (isxdigit(CurPtr[0]))
732    ++CurPtr;
733
734  if (Kind == 'J') {
735    // HexFPConstant - Floating point constant represented in IEEE format as a
736    // hexadecimal number for when exponential notation is not precise enough.
737    // Float and double only.
738    APFloatVal = APFloat(BitsToDouble(HexIntToVal(TokStart+2, CurPtr)));
739    return lltok::APFloat;
740  }
741
742  uint64_t Pair[2];
743  switch (Kind) {
744  default: llvm_unreachable("Unknown kind!");
745  case 'K':
746    // F80HexFPConstant - x87 long double in hexadecimal format (10 bytes)
747    FP80HexToIntPair(TokStart+3, CurPtr, Pair);
748    APFloatVal = APFloat(APInt(80, 2, Pair));
749    return lltok::APFloat;
750  case 'L':
751    // F128HexFPConstant - IEEE 128-bit in hexadecimal format (16 bytes)
752    HexToIntPair(TokStart+3, CurPtr, Pair);
753    APFloatVal = APFloat(APInt(128, 2, Pair), true);
754    return lltok::APFloat;
755  case 'M':
756    // PPC128HexFPConstant - PowerPC 128-bit in hexadecimal format (16 bytes)
757    HexToIntPair(TokStart+3, CurPtr, Pair);
758    APFloatVal = APFloat(APInt(128, 2, Pair));
759    return lltok::APFloat;
760  }
761}
762
763/// LexIdentifier: Handle several related productions:
764///    Label             [-a-zA-Z$._0-9]+:
765///    NInteger          -[0-9]+
766///    FPConstant        [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
767///    PInteger          [0-9]+
768///    HexFPConstant     0x[0-9A-Fa-f]+
769///    HexFP80Constant   0xK[0-9A-Fa-f]+
770///    HexFP128Constant  0xL[0-9A-Fa-f]+
771///    HexPPC128Constant 0xM[0-9A-Fa-f]+
772lltok::Kind LLLexer::LexDigitOrNegative() {
773  // If the letter after the negative is a number, this is probably a label.
774  if (!isdigit(TokStart[0]) && !isdigit(CurPtr[0])) {
775    // Okay, this is not a number after the -, it's probably a label.
776    if (const char *End = isLabelTail(CurPtr)) {
777      StrVal.assign(TokStart, End-1);
778      CurPtr = End;
779      return lltok::LabelStr;
780    }
781
782    return lltok::Error;
783  }
784
785  // At this point, it is either a label, int or fp constant.
786
787  // Skip digits, we have at least one.
788  for (; isdigit(CurPtr[0]); ++CurPtr)
789    /*empty*/;
790
791  // Check to see if this really is a label afterall, e.g. "-1:".
792  if (isLabelChar(CurPtr[0]) || CurPtr[0] == ':') {
793    if (const char *End = isLabelTail(CurPtr)) {
794      StrVal.assign(TokStart, End-1);
795      CurPtr = End;
796      return lltok::LabelStr;
797    }
798  }
799
800  // If the next character is a '.', then it is a fp value, otherwise its
801  // integer.
802  if (CurPtr[0] != '.') {
803    if (TokStart[0] == '0' && TokStart[1] == 'x')
804      return Lex0x();
805    unsigned Len = CurPtr-TokStart;
806    uint32_t numBits = ((Len * 64) / 19) + 2;
807    APInt Tmp(numBits, StringRef(TokStart, Len), 10);
808    if (TokStart[0] == '-') {
809      uint32_t minBits = Tmp.getMinSignedBits();
810      if (minBits > 0 && minBits < numBits)
811        Tmp.trunc(minBits);
812      APSIntVal = APSInt(Tmp, false);
813    } else {
814      uint32_t activeBits = Tmp.getActiveBits();
815      if (activeBits > 0 && activeBits < numBits)
816        Tmp.trunc(activeBits);
817      APSIntVal = APSInt(Tmp, true);
818    }
819    return lltok::APSInt;
820  }
821
822  ++CurPtr;
823
824  // Skip over [0-9]*([eE][-+]?[0-9]+)?
825  while (isdigit(CurPtr[0])) ++CurPtr;
826
827  if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
828    if (isdigit(CurPtr[1]) ||
829        ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(CurPtr[2]))) {
830      CurPtr += 2;
831      while (isdigit(CurPtr[0])) ++CurPtr;
832    }
833  }
834
835  APFloatVal = APFloat(atof(TokStart));
836  return lltok::APFloat;
837}
838
839///    FPConstant  [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
840lltok::Kind LLLexer::LexPositive() {
841  // If the letter after the negative is a number, this is probably not a
842  // label.
843  if (!isdigit(CurPtr[0]))
844    return lltok::Error;
845
846  // Skip digits.
847  for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr)
848    /*empty*/;
849
850  // At this point, we need a '.'.
851  if (CurPtr[0] != '.') {
852    CurPtr = TokStart+1;
853    return lltok::Error;
854  }
855
856  ++CurPtr;
857
858  // Skip over [0-9]*([eE][-+]?[0-9]+)?
859  while (isdigit(CurPtr[0])) ++CurPtr;
860
861  if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
862    if (isdigit(CurPtr[1]) ||
863        ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(CurPtr[2]))) {
864      CurPtr += 2;
865      while (isdigit(CurPtr[0])) ++CurPtr;
866    }
867  }
868
869  APFloatVal = APFloat(atof(TokStart));
870  return lltok::APFloat;
871}
872