LLLexer.cpp revision 385f5a99ecc7fee48a7539bc63d3e1d3b5089c0d
1//===- LLLexer.cpp - Lexer for .ll Files ----------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// Implement the Lexer for .ll files.
11//
12//===----------------------------------------------------------------------===//
13
14#include "LLLexer.h"
15#include "llvm/DerivedTypes.h"
16#include "llvm/Instruction.h"
17#include "llvm/Support/MemoryBuffer.h"
18#include "llvm/Support/MathExtras.h"
19#include "llvm/Support/raw_ostream.h"
20#include "llvm/Assembly/Parser.h"
21#include <cstdlib>
22#include <cstring>
23using namespace llvm;
24
25bool LLLexer::Error(LocTy ErrorLoc, const std::string &Msg) const {
26  // Scan backward to find the start of the line.
27  const char *LineStart = ErrorLoc;
28  while (LineStart != CurBuf->getBufferStart() &&
29         LineStart[-1] != '\n' && LineStart[-1] != '\r')
30    --LineStart;
31  // Get the end of the line.
32  const char *LineEnd = ErrorLoc;
33  while (LineEnd != CurBuf->getBufferEnd() &&
34         LineEnd[0] != '\n' && LineEnd[0] != '\r')
35    ++LineEnd;
36
37  unsigned LineNo = 1;
38  for (const char *FP = CurBuf->getBufferStart(); FP != ErrorLoc; ++FP)
39    if (*FP == '\n') ++LineNo;
40
41  std::string LineContents(LineStart, LineEnd);
42  ErrorInfo.setError(Msg, LineNo, ErrorLoc-LineStart, LineContents);
43  return true;
44}
45
46//===----------------------------------------------------------------------===//
47// Helper functions.
48//===----------------------------------------------------------------------===//
49
50// atoull - Convert an ascii string of decimal digits into the unsigned long
51// long representation... this does not have to do input error checking,
52// because we know that the input will be matched by a suitable regex...
53//
54uint64_t LLLexer::atoull(const char *Buffer, const char *End) {
55  uint64_t Result = 0;
56  for (; Buffer != End; Buffer++) {
57    uint64_t OldRes = Result;
58    Result *= 10;
59    Result += *Buffer-'0';
60    if (Result < OldRes) {  // Uh, oh, overflow detected!!!
61      Error("constant bigger than 64 bits detected!");
62      return 0;
63    }
64  }
65  return Result;
66}
67
68uint64_t LLLexer::HexIntToVal(const char *Buffer, const char *End) {
69  uint64_t Result = 0;
70  for (; Buffer != End; ++Buffer) {
71    uint64_t OldRes = Result;
72    Result *= 16;
73    char C = *Buffer;
74    if (C >= '0' && C <= '9')
75      Result += C-'0';
76    else if (C >= 'A' && C <= 'F')
77      Result += C-'A'+10;
78    else if (C >= 'a' && C <= 'f')
79      Result += C-'a'+10;
80
81    if (Result < OldRes) {   // Uh, oh, overflow detected!!!
82      Error("constant bigger than 64 bits detected!");
83      return 0;
84    }
85  }
86  return Result;
87}
88
89void LLLexer::HexToIntPair(const char *Buffer, const char *End,
90                           uint64_t Pair[2]) {
91  Pair[0] = 0;
92  for (int i=0; i<16; i++, Buffer++) {
93    assert(Buffer != End);
94    Pair[0] *= 16;
95    char C = *Buffer;
96    if (C >= '0' && C <= '9')
97      Pair[0] += C-'0';
98    else if (C >= 'A' && C <= 'F')
99      Pair[0] += C-'A'+10;
100    else if (C >= 'a' && C <= 'f')
101      Pair[0] += C-'a'+10;
102  }
103  Pair[1] = 0;
104  for (int i=0; i<16 && Buffer != End; i++, Buffer++) {
105    Pair[1] *= 16;
106    char C = *Buffer;
107    if (C >= '0' && C <= '9')
108      Pair[1] += C-'0';
109    else if (C >= 'A' && C <= 'F')
110      Pair[1] += C-'A'+10;
111    else if (C >= 'a' && C <= 'f')
112      Pair[1] += C-'a'+10;
113  }
114  if (Buffer != End)
115    Error("constant bigger than 128 bits detected!");
116}
117
118/// FP80HexToIntPair - translate an 80 bit FP80 number (20 hexits) into
119/// { low64, high16 } as usual for an APInt.
120void LLLexer::FP80HexToIntPair(const char *Buffer, const char *End,
121                           uint64_t Pair[2]) {
122  Pair[1] = 0;
123  for (int i=0; i<4 && Buffer != End; i++, Buffer++) {
124    assert(Buffer != End);
125    Pair[1] *= 16;
126    char C = *Buffer;
127    if (C >= '0' && C <= '9')
128      Pair[1] += C-'0';
129    else if (C >= 'A' && C <= 'F')
130      Pair[1] += C-'A'+10;
131    else if (C >= 'a' && C <= 'f')
132      Pair[1] += C-'a'+10;
133  }
134  Pair[0] = 0;
135  for (int i=0; i<16; i++, Buffer++) {
136    Pair[0] *= 16;
137    char C = *Buffer;
138    if (C >= '0' && C <= '9')
139      Pair[0] += C-'0';
140    else if (C >= 'A' && C <= 'F')
141      Pair[0] += C-'A'+10;
142    else if (C >= 'a' && C <= 'f')
143      Pair[0] += C-'a'+10;
144  }
145  if (Buffer != End)
146    Error("constant bigger than 128 bits detected!");
147}
148
149// UnEscapeLexed - Run through the specified buffer and change \xx codes to the
150// appropriate character.
151static void UnEscapeLexed(std::string &Str) {
152  if (Str.empty()) return;
153
154  char *Buffer = &Str[0], *EndBuffer = Buffer+Str.size();
155  char *BOut = Buffer;
156  for (char *BIn = Buffer; BIn != EndBuffer; ) {
157    if (BIn[0] == '\\') {
158      if (BIn < EndBuffer-1 && BIn[1] == '\\') {
159        *BOut++ = '\\'; // Two \ becomes one
160        BIn += 2;
161      } else if (BIn < EndBuffer-2 && isxdigit(BIn[1]) && isxdigit(BIn[2])) {
162        char Tmp = BIn[3]; BIn[3] = 0;      // Terminate string
163        *BOut = (char)strtol(BIn+1, 0, 16); // Convert to number
164        BIn[3] = Tmp;                       // Restore character
165        BIn += 3;                           // Skip over handled chars
166        ++BOut;
167      } else {
168        *BOut++ = *BIn++;
169      }
170    } else {
171      *BOut++ = *BIn++;
172    }
173  }
174  Str.resize(BOut-Buffer);
175}
176
177/// isLabelChar - Return true for [-a-zA-Z$._0-9].
178static bool isLabelChar(char C) {
179  return isalnum(C) || C == '-' || C == '$' || C == '.' || C == '_';
180}
181
182
183/// isLabelTail - Return true if this pointer points to a valid end of a label.
184static const char *isLabelTail(const char *CurPtr) {
185  while (1) {
186    if (CurPtr[0] == ':') return CurPtr+1;
187    if (!isLabelChar(CurPtr[0])) return 0;
188    ++CurPtr;
189  }
190}
191
192
193
194//===----------------------------------------------------------------------===//
195// Lexer definition.
196//===----------------------------------------------------------------------===//
197
198LLLexer::LLLexer(MemoryBuffer *StartBuf, ParseError &Err)
199  : CurBuf(StartBuf), ErrorInfo(Err), APFloatVal(0.0) {
200  CurPtr = CurBuf->getBufferStart();
201}
202
203std::string LLLexer::getFilename() const {
204  return CurBuf->getBufferIdentifier();
205}
206
207int LLLexer::getNextChar() {
208  char CurChar = *CurPtr++;
209  switch (CurChar) {
210  default: return (unsigned char)CurChar;
211  case 0:
212    // A nul character in the stream is either the end of the current buffer or
213    // a random nul in the file.  Disambiguate that here.
214    if (CurPtr-1 != CurBuf->getBufferEnd())
215      return 0;  // Just whitespace.
216
217    // Otherwise, return end of file.
218    --CurPtr;  // Another call to lex will return EOF again.
219    return EOF;
220  }
221}
222
223
224lltok::Kind LLLexer::LexToken() {
225  TokStart = CurPtr;
226
227  int CurChar = getNextChar();
228  switch (CurChar) {
229  default:
230    // Handle letters: [a-zA-Z_]
231    if (isalpha(CurChar) || CurChar == '_')
232      return LexIdentifier();
233
234    return lltok::Error;
235  case EOF: return lltok::Eof;
236  case 0:
237  case ' ':
238  case '\t':
239  case '\n':
240  case '\r':
241    // Ignore whitespace.
242    return LexToken();
243  case '+': return LexPositive();
244  case '@': return LexAt();
245  case '%': return LexPercent();
246  case '"': return LexQuote();
247  case '.':
248    if (const char *Ptr = isLabelTail(CurPtr)) {
249      CurPtr = Ptr;
250      StrVal.assign(TokStart, CurPtr-1);
251      return lltok::LabelStr;
252    }
253    if (CurPtr[0] == '.' && CurPtr[1] == '.') {
254      CurPtr += 2;
255      return lltok::dotdotdot;
256    }
257    return lltok::Error;
258  case '$':
259    if (const char *Ptr = isLabelTail(CurPtr)) {
260      CurPtr = Ptr;
261      StrVal.assign(TokStart, CurPtr-1);
262      return lltok::LabelStr;
263    }
264    return lltok::Error;
265  case ';':
266    SkipLineComment();
267    return LexToken();
268  case '!': return lltok::Metadata;
269  case '0': case '1': case '2': case '3': case '4':
270  case '5': case '6': case '7': case '8': case '9':
271  case '-':
272    return LexDigitOrNegative();
273  case '=': return lltok::equal;
274  case '[': return lltok::lsquare;
275  case ']': return lltok::rsquare;
276  case '{': return lltok::lbrace;
277  case '}': return lltok::rbrace;
278  case '<': return lltok::less;
279  case '>': return lltok::greater;
280  case '(': return lltok::lparen;
281  case ')': return lltok::rparen;
282  case ',': return lltok::comma;
283  case '*': return lltok::star;
284  case '\\': return lltok::backslash;
285  }
286}
287
288void LLLexer::SkipLineComment() {
289  while (1) {
290    if (CurPtr[0] == '\n' || CurPtr[0] == '\r' || getNextChar() == EOF)
291      return;
292  }
293}
294
295/// LexAt - Lex all tokens that start with an @ character:
296///   GlobalVar   @\"[^\"]*\"
297///   GlobalVar   @[-a-zA-Z$._][-a-zA-Z$._0-9]*
298///   GlobalVarID @[0-9]+
299lltok::Kind LLLexer::LexAt() {
300  // Handle AtStringConstant: @\"[^\"]*\"
301  if (CurPtr[0] == '"') {
302    ++CurPtr;
303
304    while (1) {
305      int CurChar = getNextChar();
306
307      if (CurChar == EOF) {
308        Error("end of file in global variable name");
309        return lltok::Error;
310      }
311      if (CurChar == '"') {
312        StrVal.assign(TokStart+2, CurPtr-1);
313        UnEscapeLexed(StrVal);
314        return lltok::GlobalVar;
315      }
316    }
317  }
318
319  // Handle GlobalVarName: @[-a-zA-Z$._][-a-zA-Z$._0-9]*
320  if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
321      CurPtr[0] == '.' || CurPtr[0] == '_') {
322    ++CurPtr;
323    while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
324           CurPtr[0] == '.' || CurPtr[0] == '_')
325      ++CurPtr;
326
327    StrVal.assign(TokStart+1, CurPtr);   // Skip @
328    return lltok::GlobalVar;
329  }
330
331  // Handle GlobalVarID: @[0-9]+
332  if (isdigit(CurPtr[0])) {
333    for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr)
334      /*empty*/;
335
336    uint64_t Val = atoull(TokStart+1, CurPtr);
337    if ((unsigned)Val != Val)
338      Error("invalid value number (too large)!");
339    UIntVal = unsigned(Val);
340    return lltok::GlobalID;
341  }
342
343  return lltok::Error;
344}
345
346
347/// LexPercent - Lex all tokens that start with a % character:
348///   LocalVar   ::= %\"[^\"]*\"
349///   LocalVar   ::= %[-a-zA-Z$._][-a-zA-Z$._0-9]*
350///   LocalVarID ::= %[0-9]+
351lltok::Kind LLLexer::LexPercent() {
352  // Handle LocalVarName: %\"[^\"]*\"
353  if (CurPtr[0] == '"') {
354    ++CurPtr;
355
356    while (1) {
357      int CurChar = getNextChar();
358
359      if (CurChar == EOF) {
360        Error("end of file in string constant");
361        return lltok::Error;
362      }
363      if (CurChar == '"') {
364        StrVal.assign(TokStart+2, CurPtr-1);
365        UnEscapeLexed(StrVal);
366        return lltok::LocalVar;
367      }
368    }
369  }
370
371  // Handle LocalVarName: %[-a-zA-Z$._][-a-zA-Z$._0-9]*
372  if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
373      CurPtr[0] == '.' || CurPtr[0] == '_') {
374    ++CurPtr;
375    while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' ||
376           CurPtr[0] == '.' || CurPtr[0] == '_')
377      ++CurPtr;
378
379    StrVal.assign(TokStart+1, CurPtr);   // Skip %
380    return lltok::LocalVar;
381  }
382
383  // Handle LocalVarID: %[0-9]+
384  if (isdigit(CurPtr[0])) {
385    for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr)
386      /*empty*/;
387
388    uint64_t Val = atoull(TokStart+1, CurPtr);
389    if ((unsigned)Val != Val)
390      Error("invalid value number (too large)!");
391    UIntVal = unsigned(Val);
392    return lltok::LocalVarID;
393  }
394
395  return lltok::Error;
396}
397
398/// LexQuote - Lex all tokens that start with a " character:
399///   QuoteLabel        "[^"]+":
400///   StringConstant    "[^"]*"
401lltok::Kind LLLexer::LexQuote() {
402  while (1) {
403    int CurChar = getNextChar();
404
405    if (CurChar == EOF) {
406      Error("end of file in quoted string");
407      return lltok::Error;
408    }
409
410    if (CurChar != '"') continue;
411
412    if (CurPtr[0] != ':') {
413      StrVal.assign(TokStart+1, CurPtr-1);
414      UnEscapeLexed(StrVal);
415      return lltok::StringConstant;
416    }
417
418    ++CurPtr;
419    StrVal.assign(TokStart+1, CurPtr-2);
420    UnEscapeLexed(StrVal);
421    return lltok::LabelStr;
422  }
423}
424
425static bool JustWhitespaceNewLine(const char *&Ptr) {
426  const char *ThisPtr = Ptr;
427  while (*ThisPtr == ' ' || *ThisPtr == '\t')
428    ++ThisPtr;
429  if (*ThisPtr == '\n' || *ThisPtr == '\r') {
430    Ptr = ThisPtr;
431    return true;
432  }
433  return false;
434}
435
436
437/// LexIdentifier: Handle several related productions:
438///    Label           [-a-zA-Z$._0-9]+:
439///    IntegerType     i[0-9]+
440///    Keyword         sdiv, float, ...
441///    HexIntConstant  [us]0x[0-9A-Fa-f]+
442lltok::Kind LLLexer::LexIdentifier() {
443  const char *StartChar = CurPtr;
444  const char *IntEnd = CurPtr[-1] == 'i' ? 0 : StartChar;
445  const char *KeywordEnd = 0;
446
447  for (; isLabelChar(*CurPtr); ++CurPtr) {
448    // If we decide this is an integer, remember the end of the sequence.
449    if (!IntEnd && !isdigit(*CurPtr)) IntEnd = CurPtr;
450    if (!KeywordEnd && !isalnum(*CurPtr) && *CurPtr != '_') KeywordEnd = CurPtr;
451  }
452
453  // If we stopped due to a colon, this really is a label.
454  if (*CurPtr == ':') {
455    StrVal.assign(StartChar-1, CurPtr++);
456    return lltok::LabelStr;
457  }
458
459  // Otherwise, this wasn't a label.  If this was valid as an integer type,
460  // return it.
461  if (IntEnd == 0) IntEnd = CurPtr;
462  if (IntEnd != StartChar) {
463    CurPtr = IntEnd;
464    uint64_t NumBits = atoull(StartChar, CurPtr);
465    if (NumBits < IntegerType::MIN_INT_BITS ||
466        NumBits > IntegerType::MAX_INT_BITS) {
467      Error("bitwidth for integer type out of range!");
468      return lltok::Error;
469    }
470    TyVal = IntegerType::get(NumBits);
471    return lltok::Type;
472  }
473
474  // Otherwise, this was a letter sequence.  See which keyword this is.
475  if (KeywordEnd == 0) KeywordEnd = CurPtr;
476  CurPtr = KeywordEnd;
477  --StartChar;
478  unsigned Len = CurPtr-StartChar;
479#define KEYWORD(STR) \
480  if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) \
481    return lltok::kw_##STR;
482
483  KEYWORD(begin);   KEYWORD(end);
484  KEYWORD(true);    KEYWORD(false);
485  KEYWORD(declare); KEYWORD(define);
486  KEYWORD(global);  KEYWORD(constant);
487
488  KEYWORD(private);
489  KEYWORD(internal);
490  KEYWORD(available_externally);
491  KEYWORD(linkonce);
492  KEYWORD(linkonce_odr);
493  KEYWORD(weak);
494  KEYWORD(weak_odr);
495  KEYWORD(appending);
496  KEYWORD(dllimport);
497  KEYWORD(dllexport);
498  KEYWORD(common);
499  KEYWORD(default);
500  KEYWORD(hidden);
501  KEYWORD(protected);
502  KEYWORD(extern_weak);
503  KEYWORD(external);
504  KEYWORD(thread_local);
505  KEYWORD(zeroinitializer);
506  KEYWORD(undef);
507  KEYWORD(null);
508  KEYWORD(to);
509  KEYWORD(tail);
510  KEYWORD(target);
511  KEYWORD(triple);
512  KEYWORD(deplibs);
513  KEYWORD(datalayout);
514  KEYWORD(volatile);
515  KEYWORD(align);
516  KEYWORD(addrspace);
517  KEYWORD(section);
518  KEYWORD(alias);
519  KEYWORD(module);
520  KEYWORD(asm);
521  KEYWORD(sideeffect);
522  KEYWORD(gc);
523
524  KEYWORD(ccc);
525  KEYWORD(fastcc);
526  KEYWORD(coldcc);
527  KEYWORD(x86_stdcallcc);
528  KEYWORD(x86_fastcallcc);
529  KEYWORD(arm_apcscc);
530  KEYWORD(arm_aapcscc);
531  KEYWORD(arm_aapcs_vfpcc);
532
533  KEYWORD(cc);
534  KEYWORD(c);
535
536  KEYWORD(signext);
537  KEYWORD(zeroext);
538  KEYWORD(inreg);
539  KEYWORD(sret);
540  KEYWORD(nounwind);
541  KEYWORD(noreturn);
542  KEYWORD(noalias);
543  KEYWORD(nocapture);
544  KEYWORD(byval);
545  KEYWORD(nest);
546  KEYWORD(readnone);
547  KEYWORD(readonly);
548
549  KEYWORD(noinline);
550  KEYWORD(alwaysinline);
551  KEYWORD(optsize);
552  KEYWORD(ssp);
553  KEYWORD(sspreq);
554  KEYWORD(noredzone);
555  KEYWORD(noimplicitfloat);
556
557  KEYWORD(type);
558  KEYWORD(opaque);
559
560  KEYWORD(eq); KEYWORD(ne); KEYWORD(slt); KEYWORD(sgt); KEYWORD(sle);
561  KEYWORD(sge); KEYWORD(ult); KEYWORD(ugt); KEYWORD(ule); KEYWORD(uge);
562  KEYWORD(oeq); KEYWORD(one); KEYWORD(olt); KEYWORD(ogt); KEYWORD(ole);
563  KEYWORD(oge); KEYWORD(ord); KEYWORD(uno); KEYWORD(ueq); KEYWORD(une);
564
565  KEYWORD(x);
566#undef KEYWORD
567
568  // Keywords for types.
569#define TYPEKEYWORD(STR, LLVMTY) \
570  if (Len == strlen(STR) && !memcmp(StartChar, STR, strlen(STR))) { \
571    TyVal = LLVMTY; return lltok::Type; }
572  TYPEKEYWORD("void",      Type::VoidTy);
573  TYPEKEYWORD("float",     Type::FloatTy);
574  TYPEKEYWORD("double",    Type::DoubleTy);
575  TYPEKEYWORD("x86_fp80",  Type::X86_FP80Ty);
576  TYPEKEYWORD("fp128",     Type::FP128Ty);
577  TYPEKEYWORD("ppc_fp128", Type::PPC_FP128Ty);
578  TYPEKEYWORD("label",     Type::LabelTy);
579  TYPEKEYWORD("metadata",  Type::MetadataTy);
580#undef TYPEKEYWORD
581
582  // Handle special forms for autoupgrading.  Drop these in LLVM 3.0.  This is
583  // to avoid conflicting with the sext/zext instructions, below.
584  if (Len == 4 && !memcmp(StartChar, "sext", 4)) {
585    // Scan CurPtr ahead, seeing if there is just whitespace before the newline.
586    if (JustWhitespaceNewLine(CurPtr))
587      return lltok::kw_signext;
588  } else if (Len == 4 && !memcmp(StartChar, "zext", 4)) {
589    // Scan CurPtr ahead, seeing if there is just whitespace before the newline.
590    if (JustWhitespaceNewLine(CurPtr))
591      return lltok::kw_zeroext;
592  }
593
594  // Keywords for instructions.
595#define INSTKEYWORD(STR, Enum) \
596  if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) { \
597    UIntVal = Instruction::Enum; return lltok::kw_##STR; }
598
599  INSTKEYWORD(add,   Add);  INSTKEYWORD(fadd,   FAdd);
600  INSTKEYWORD(sub,   Sub);  INSTKEYWORD(fsub,   FSub);
601  INSTKEYWORD(mul,   Mul);  INSTKEYWORD(fmul,   FMul);
602  INSTKEYWORD(udiv,  UDiv); INSTKEYWORD(sdiv,  SDiv); INSTKEYWORD(fdiv,  FDiv);
603  INSTKEYWORD(urem,  URem); INSTKEYWORD(srem,  SRem); INSTKEYWORD(frem,  FRem);
604  INSTKEYWORD(shl,   Shl);  INSTKEYWORD(lshr,  LShr); INSTKEYWORD(ashr,  AShr);
605  INSTKEYWORD(and,   And);  INSTKEYWORD(or,    Or);   INSTKEYWORD(xor,   Xor);
606  INSTKEYWORD(icmp,  ICmp); INSTKEYWORD(fcmp,  FCmp);
607  INSTKEYWORD(vicmp, VICmp); INSTKEYWORD(vfcmp, VFCmp);
608
609  INSTKEYWORD(phi,         PHI);
610  INSTKEYWORD(call,        Call);
611  INSTKEYWORD(trunc,       Trunc);
612  INSTKEYWORD(zext,        ZExt);
613  INSTKEYWORD(sext,        SExt);
614  INSTKEYWORD(fptrunc,     FPTrunc);
615  INSTKEYWORD(fpext,       FPExt);
616  INSTKEYWORD(uitofp,      UIToFP);
617  INSTKEYWORD(sitofp,      SIToFP);
618  INSTKEYWORD(fptoui,      FPToUI);
619  INSTKEYWORD(fptosi,      FPToSI);
620  INSTKEYWORD(inttoptr,    IntToPtr);
621  INSTKEYWORD(ptrtoint,    PtrToInt);
622  INSTKEYWORD(bitcast,     BitCast);
623  INSTKEYWORD(select,      Select);
624  INSTKEYWORD(va_arg,      VAArg);
625  INSTKEYWORD(ret,         Ret);
626  INSTKEYWORD(br,          Br);
627  INSTKEYWORD(switch,      Switch);
628  INSTKEYWORD(invoke,      Invoke);
629  INSTKEYWORD(unwind,      Unwind);
630  INSTKEYWORD(unreachable, Unreachable);
631
632  INSTKEYWORD(malloc,      Malloc);
633  INSTKEYWORD(alloca,      Alloca);
634  INSTKEYWORD(free,        Free);
635  INSTKEYWORD(load,        Load);
636  INSTKEYWORD(store,       Store);
637  INSTKEYWORD(getelementptr, GetElementPtr);
638
639  INSTKEYWORD(extractelement, ExtractElement);
640  INSTKEYWORD(insertelement,  InsertElement);
641  INSTKEYWORD(shufflevector,  ShuffleVector);
642  INSTKEYWORD(getresult,      ExtractValue);
643  INSTKEYWORD(extractvalue,   ExtractValue);
644  INSTKEYWORD(insertvalue,    InsertValue);
645#undef INSTKEYWORD
646
647  // Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by
648  // the CFE to avoid forcing it to deal with 64-bit numbers.
649  if ((TokStart[0] == 'u' || TokStart[0] == 's') &&
650      TokStart[1] == '0' && TokStart[2] == 'x' && isxdigit(TokStart[3])) {
651    int len = CurPtr-TokStart-3;
652    uint32_t bits = len * 4;
653    APInt Tmp(bits, TokStart+3, len, 16);
654    uint32_t activeBits = Tmp.getActiveBits();
655    if (activeBits > 0 && activeBits < bits)
656      Tmp.trunc(activeBits);
657    APSIntVal = APSInt(Tmp, TokStart[0] == 'u');
658    return lltok::APSInt;
659  }
660
661  // If this is "cc1234", return this as just "cc".
662  if (TokStart[0] == 'c' && TokStart[1] == 'c') {
663    CurPtr = TokStart+2;
664    return lltok::kw_cc;
665  }
666
667  // If this starts with "call", return it as CALL.  This is to support old
668  // broken .ll files.  FIXME: remove this with LLVM 3.0.
669  if (CurPtr-TokStart > 4 && !memcmp(TokStart, "call", 4)) {
670    CurPtr = TokStart+4;
671    UIntVal = Instruction::Call;
672    return lltok::kw_call;
673  }
674
675  // Finally, if this isn't known, return an error.
676  CurPtr = TokStart+1;
677  return lltok::Error;
678}
679
680
681/// Lex0x: Handle productions that start with 0x, knowing that it matches and
682/// that this is not a label:
683///    HexFPConstant     0x[0-9A-Fa-f]+
684///    HexFP80Constant   0xK[0-9A-Fa-f]+
685///    HexFP128Constant  0xL[0-9A-Fa-f]+
686///    HexPPC128Constant 0xM[0-9A-Fa-f]+
687lltok::Kind LLLexer::Lex0x() {
688  CurPtr = TokStart + 2;
689
690  char Kind;
691  if (CurPtr[0] >= 'K' && CurPtr[0] <= 'M') {
692    Kind = *CurPtr++;
693  } else {
694    Kind = 'J';
695  }
696
697  if (!isxdigit(CurPtr[0])) {
698    // Bad token, return it as an error.
699    CurPtr = TokStart+1;
700    return lltok::Error;
701  }
702
703  while (isxdigit(CurPtr[0]))
704    ++CurPtr;
705
706  if (Kind == 'J') {
707    // HexFPConstant - Floating point constant represented in IEEE format as a
708    // hexadecimal number for when exponential notation is not precise enough.
709    // Float and double only.
710    APFloatVal = APFloat(BitsToDouble(HexIntToVal(TokStart+2, CurPtr)));
711    return lltok::APFloat;
712  }
713
714  uint64_t Pair[2];
715  switch (Kind) {
716  default: assert(0 && "Unknown kind!");
717  case 'K':
718    // F80HexFPConstant - x87 long double in hexadecimal format (10 bytes)
719    FP80HexToIntPair(TokStart+3, CurPtr, Pair);
720    APFloatVal = APFloat(APInt(80, 2, Pair));
721    return lltok::APFloat;
722  case 'L':
723    // F128HexFPConstant - IEEE 128-bit in hexadecimal format (16 bytes)
724    HexToIntPair(TokStart+3, CurPtr, Pair);
725    APFloatVal = APFloat(APInt(128, 2, Pair), true);
726    return lltok::APFloat;
727  case 'M':
728    // PPC128HexFPConstant - PowerPC 128-bit in hexadecimal format (16 bytes)
729    HexToIntPair(TokStart+3, CurPtr, Pair);
730    APFloatVal = APFloat(APInt(128, 2, Pair));
731    return lltok::APFloat;
732  }
733}
734
735/// LexIdentifier: Handle several related productions:
736///    Label             [-a-zA-Z$._0-9]+:
737///    NInteger          -[0-9]+
738///    FPConstant        [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
739///    PInteger          [0-9]+
740///    HexFPConstant     0x[0-9A-Fa-f]+
741///    HexFP80Constant   0xK[0-9A-Fa-f]+
742///    HexFP128Constant  0xL[0-9A-Fa-f]+
743///    HexPPC128Constant 0xM[0-9A-Fa-f]+
744lltok::Kind LLLexer::LexDigitOrNegative() {
745  // If the letter after the negative is a number, this is probably a label.
746  if (!isdigit(TokStart[0]) && !isdigit(CurPtr[0])) {
747    // Okay, this is not a number after the -, it's probably a label.
748    if (const char *End = isLabelTail(CurPtr)) {
749      StrVal.assign(TokStart, End-1);
750      CurPtr = End;
751      return lltok::LabelStr;
752    }
753
754    return lltok::Error;
755  }
756
757  // At this point, it is either a label, int or fp constant.
758
759  // Skip digits, we have at least one.
760  for (; isdigit(CurPtr[0]); ++CurPtr)
761    /*empty*/;
762
763  // Check to see if this really is a label afterall, e.g. "-1:".
764  if (isLabelChar(CurPtr[0]) || CurPtr[0] == ':') {
765    if (const char *End = isLabelTail(CurPtr)) {
766      StrVal.assign(TokStart, End-1);
767      CurPtr = End;
768      return lltok::LabelStr;
769    }
770  }
771
772  // If the next character is a '.', then it is a fp value, otherwise its
773  // integer.
774  if (CurPtr[0] != '.') {
775    if (TokStart[0] == '0' && TokStart[1] == 'x')
776      return Lex0x();
777    unsigned Len = CurPtr-TokStart;
778    uint32_t numBits = ((Len * 64) / 19) + 2;
779    APInt Tmp(numBits, TokStart, Len, 10);
780    if (TokStart[0] == '-') {
781      uint32_t minBits = Tmp.getMinSignedBits();
782      if (minBits > 0 && minBits < numBits)
783        Tmp.trunc(minBits);
784      APSIntVal = APSInt(Tmp, false);
785    } else {
786      uint32_t activeBits = Tmp.getActiveBits();
787      if (activeBits > 0 && activeBits < numBits)
788        Tmp.trunc(activeBits);
789      APSIntVal = APSInt(Tmp, true);
790    }
791    return lltok::APSInt;
792  }
793
794  ++CurPtr;
795
796  // Skip over [0-9]*([eE][-+]?[0-9]+)?
797  while (isdigit(CurPtr[0])) ++CurPtr;
798
799  if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
800    if (isdigit(CurPtr[1]) ||
801        ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(CurPtr[2]))) {
802      CurPtr += 2;
803      while (isdigit(CurPtr[0])) ++CurPtr;
804    }
805  }
806
807  APFloatVal = APFloat(atof(TokStart));
808  return lltok::APFloat;
809}
810
811///    FPConstant  [-+]?[0-9]+[.][0-9]*([eE][-+]?[0-9]+)?
812lltok::Kind LLLexer::LexPositive() {
813  // If the letter after the negative is a number, this is probably not a
814  // label.
815  if (!isdigit(CurPtr[0]))
816    return lltok::Error;
817
818  // Skip digits.
819  for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr)
820    /*empty*/;
821
822  // At this point, we need a '.'.
823  if (CurPtr[0] != '.') {
824    CurPtr = TokStart+1;
825    return lltok::Error;
826  }
827
828  ++CurPtr;
829
830  // Skip over [0-9]*([eE][-+]?[0-9]+)?
831  while (isdigit(CurPtr[0])) ++CurPtr;
832
833  if (CurPtr[0] == 'e' || CurPtr[0] == 'E') {
834    if (isdigit(CurPtr[1]) ||
835        ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(CurPtr[2]))) {
836      CurPtr += 2;
837      while (isdigit(CurPtr[0])) ++CurPtr;
838    }
839  }
840
841  APFloatVal = APFloat(atof(TokStart));
842  return lltok::APFloat;
843}
844