LLParser.h revision f3a789d931de6b5be729c33ff476fb20f0badbb1
1//===-- LLParser.h - Parser Class -------------------------------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file defines the parser class for .ll files.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_ASMPARSER_LLPARSER_H
15#define LLVM_ASMPARSER_LLPARSER_H
16
17#include "LLLexer.h"
18#include "llvm/Module.h"
19#include "llvm/Type.h"
20#include "llvm/ADT/DenseMap.h"
21#include "llvm/Support/ValueHandle.h"
22#include <map>
23
24namespace llvm {
25  class Module;
26  class OpaqueType;
27  class Function;
28  class Value;
29  class BasicBlock;
30  class Instruction;
31  class Constant;
32  class GlobalValue;
33  class MDString;
34  class MDNode;
35
36  /// ValID - Represents a reference of a definition of some sort with no type.
37  /// There are several cases where we have to parse the value but where the
38  /// type can depend on later context.  This may either be a numeric reference
39  /// or a symbolic (%var) reference.  This is just a discriminated union.
40  struct ValID {
41    enum {
42      t_LocalID, t_GlobalID,      // ID in UIntVal.
43      t_LocalName, t_GlobalName,  // Name in StrVal.
44      t_APSInt, t_APFloat,        // Value in APSIntVal/APFloatVal.
45      t_Null, t_Undef, t_Zero,    // No value.
46      t_EmptyArray,               // No value:  []
47      t_Constant,                 // Value in ConstantVal.
48      t_InlineAsm,                // Value in StrVal/StrVal2/UIntVal.
49      t_MDNode,                   // Value in MDNodeVal.
50      t_MDString                  // Value in MDStringVal.
51    } Kind;
52
53    LLLexer::LocTy Loc;
54    unsigned UIntVal;
55    std::string StrVal, StrVal2;
56    APSInt APSIntVal;
57    APFloat APFloatVal;
58    Constant *ConstantVal;
59    MDNode *MDNodeVal;
60    MDString *MDStringVal;
61    ValID() : APFloatVal(0.0) {}
62
63    bool operator<(const ValID &RHS) const {
64      if (Kind == t_LocalID || Kind == t_GlobalID)
65        return UIntVal < RHS.UIntVal;
66      assert((Kind == t_LocalName || Kind == t_GlobalName) &&
67             "Ordering not defined for this ValID kind yet");
68      return StrVal < RHS.StrVal;
69    }
70  };
71
72  class LLParser {
73  public:
74    typedef LLLexer::LocTy LocTy;
75  private:
76    LLVMContext &Context;
77    LLLexer Lex;
78    Module *M;
79
80    // Instruction metadata resolution.  Each instruction can have a list of
81    // MDRef info associated with them.
82    //
83    // The simpler approach of just creating temporary MDNodes and then calling
84    // RAUW on them when the definition is processed doesn't work because some
85    // instruction metadata kinds, such as dbg, get stored in the IR in an
86    // "optimized" format which doesn't participate in the normal value use
87    // lists. This means that RAUW doesn't work, even on temporary MDNodes
88    // which otherwise support RAUW. Instead, we defer resolving MDNode
89    // references until the definitions have been processed.
90    struct MDRef {
91      SMLoc Loc;
92      unsigned MDKind, MDSlot;
93    };
94    DenseMap<Instruction*, std::vector<MDRef> > ForwardRefInstMetadata;
95
96    // Type resolution handling data structures.
97    std::map<std::string, std::pair<PATypeHolder, LocTy> > ForwardRefTypes;
98    std::map<unsigned, std::pair<PATypeHolder, LocTy> > ForwardRefTypeIDs;
99    std::vector<PATypeHolder> NumberedTypes;
100    std::vector<TrackingVH<MDNode> > NumberedMetadata;
101    std::map<unsigned, std::pair<TrackingVH<MDNode>, LocTy> > ForwardRefMDNodes;
102    struct UpRefRecord {
103      /// Loc - This is the location of the upref.
104      LocTy Loc;
105
106      /// NestingLevel - The number of nesting levels that need to be popped
107      /// before this type is resolved.
108      unsigned NestingLevel;
109
110      /// LastContainedTy - This is the type at the current binding level for
111      /// the type.  Every time we reduce the nesting level, this gets updated.
112      const Type *LastContainedTy;
113
114      /// UpRefTy - This is the actual opaque type that the upreference is
115      /// represented with.
116      OpaqueType *UpRefTy;
117
118      UpRefRecord(LocTy L, unsigned NL, OpaqueType *URTy)
119        : Loc(L), NestingLevel(NL), LastContainedTy((Type*)URTy),
120          UpRefTy(URTy) {}
121    };
122    std::vector<UpRefRecord> UpRefs;
123
124    // Global Value reference information.
125    std::map<std::string, std::pair<GlobalValue*, LocTy> > ForwardRefVals;
126    std::map<unsigned, std::pair<GlobalValue*, LocTy> > ForwardRefValIDs;
127    std::vector<GlobalValue*> NumberedVals;
128
129    // References to blockaddress.  The key is the function ValID, the value is
130    // a list of references to blocks in that function.
131    std::map<ValID, std::vector<std::pair<ValID, GlobalValue*> > >
132      ForwardRefBlockAddresses;
133
134  public:
135    LLParser(MemoryBuffer *F, SourceMgr &SM, SMDiagnostic &Err, Module *m) :
136      Context(m->getContext()), Lex(F, SM, Err, m->getContext()),
137      M(m) {}
138    bool Run();
139
140    LLVMContext& getContext() { return Context; }
141
142  private:
143
144    bool Error(LocTy L, const Twine &Msg) const {
145      return Lex.Error(L, Msg);
146    }
147    bool TokError(const Twine &Msg) const {
148      return Error(Lex.getLoc(), Msg);
149    }
150
151    /// GetGlobalVal - Get a value with the specified name or ID, creating a
152    /// forward reference record if needed.  This can return null if the value
153    /// exists but does not have the right type.
154    GlobalValue *GetGlobalVal(const std::string &N, const Type *Ty, LocTy Loc);
155    GlobalValue *GetGlobalVal(unsigned ID, const Type *Ty, LocTy Loc);
156
157    // Helper Routines.
158    bool ParseToken(lltok::Kind T, const char *ErrMsg);
159    bool EatIfPresent(lltok::Kind T) {
160      if (Lex.getKind() != T) return false;
161      Lex.Lex();
162      return true;
163    }
164    bool ParseOptionalToken(lltok::Kind T, bool &Present, LocTy *Loc = 0) {
165      if (Lex.getKind() != T) {
166        Present = false;
167      } else {
168        if (Loc)
169          *Loc = Lex.getLoc();
170        Lex.Lex();
171        Present = true;
172      }
173      return false;
174    }
175    bool ParseStringConstant(std::string &Result);
176    bool ParseUInt32(unsigned &Val);
177    bool ParseUInt32(unsigned &Val, LocTy &Loc) {
178      Loc = Lex.getLoc();
179      return ParseUInt32(Val);
180    }
181    bool ParseOptionalAddrSpace(unsigned &AddrSpace);
182    bool ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind);
183    bool ParseOptionalLinkage(unsigned &Linkage, bool &HasLinkage);
184    bool ParseOptionalLinkage(unsigned &Linkage) {
185      bool HasLinkage; return ParseOptionalLinkage(Linkage, HasLinkage);
186    }
187    bool ParseOptionalVisibility(unsigned &Visibility);
188    bool ParseOptionalCallingConv(CallingConv::ID &CC);
189    bool ParseOptionalAlignment(unsigned &Alignment);
190    bool ParseOptionalStackAlignment(unsigned &Alignment);
191    bool ParseOptionalCommaAlign(unsigned &Alignment, bool &AteExtraComma);
192    bool ParseIndexList(SmallVectorImpl<unsigned> &Indices,bool &AteExtraComma);
193    bool ParseIndexList(SmallVectorImpl<unsigned> &Indices) {
194      bool AteExtraComma;
195      if (ParseIndexList(Indices, AteExtraComma)) return true;
196      if (AteExtraComma)
197        return TokError("expected index");
198      return false;
199    }
200
201    // Top-Level Entities
202    bool ParseTopLevelEntities();
203    bool ValidateEndOfModule();
204    bool ParseTargetDefinition();
205    bool ParseDepLibs();
206    bool ParseModuleAsm();
207    bool ParseUnnamedType();
208    bool ParseNamedType();
209    bool ParseDeclare();
210    bool ParseDefine();
211
212    bool ParseGlobalType(bool &IsConstant);
213    bool ParseUnnamedGlobal();
214    bool ParseNamedGlobal();
215    bool ParseGlobal(const std::string &Name, LocTy Loc, unsigned Linkage,
216                     bool HasLinkage, unsigned Visibility);
217    bool ParseAlias(const std::string &Name, LocTy Loc, unsigned Visibility);
218    bool ParseStandaloneMetadata();
219    bool ParseNamedMetadata();
220    bool ParseMDString(MDString *&Result);
221    bool ParseMDNodeID(MDNode *&Result);
222    bool ParseMDNodeID(MDNode *&Result, unsigned &SlotNo);
223
224    // Type Parsing.
225    bool ParseType(PATypeHolder &Result, bool AllowVoid = false);
226    bool ParseType(PATypeHolder &Result, LocTy &Loc, bool AllowVoid = false) {
227      Loc = Lex.getLoc();
228      return ParseType(Result, AllowVoid);
229    }
230    bool ParseTypeRec(PATypeHolder &H);
231    bool ParseStructType(PATypeHolder &H, bool Packed);
232    bool ParseArrayVectorType(PATypeHolder &H, bool isVector);
233    bool ParseFunctionType(PATypeHolder &Result);
234    PATypeHolder HandleUpRefs(const Type *Ty);
235
236    // Function Semantic Analysis.
237    class PerFunctionState {
238      LLParser &P;
239      Function &F;
240      std::map<std::string, std::pair<Value*, LocTy> > ForwardRefVals;
241      std::map<unsigned, std::pair<Value*, LocTy> > ForwardRefValIDs;
242      std::vector<Value*> NumberedVals;
243
244      /// FunctionNumber - If this is an unnamed function, this is the slot
245      /// number of it, otherwise it is -1.
246      int FunctionNumber;
247    public:
248      PerFunctionState(LLParser &p, Function &f, int FunctionNumber);
249      ~PerFunctionState();
250
251      Function &getFunction() const { return F; }
252
253      bool FinishFunction();
254
255      /// GetVal - Get a value with the specified name or ID, creating a
256      /// forward reference record if needed.  This can return null if the value
257      /// exists but does not have the right type.
258      Value *GetVal(const std::string &Name, const Type *Ty, LocTy Loc);
259      Value *GetVal(unsigned ID, const Type *Ty, LocTy Loc);
260
261      /// SetInstName - After an instruction is parsed and inserted into its
262      /// basic block, this installs its name.
263      bool SetInstName(int NameID, const std::string &NameStr, LocTy NameLoc,
264                       Instruction *Inst);
265
266      /// GetBB - Get a basic block with the specified name or ID, creating a
267      /// forward reference record if needed.  This can return null if the value
268      /// is not a BasicBlock.
269      BasicBlock *GetBB(const std::string &Name, LocTy Loc);
270      BasicBlock *GetBB(unsigned ID, LocTy Loc);
271
272      /// DefineBB - Define the specified basic block, which is either named or
273      /// unnamed.  If there is an error, this returns null otherwise it returns
274      /// the block being defined.
275      BasicBlock *DefineBB(const std::string &Name, LocTy Loc);
276    };
277
278    bool ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V,
279                             PerFunctionState *PFS);
280
281    bool ParseValue(const Type *Ty, Value *&V, PerFunctionState &PFS);
282    bool ParseValue(const Type *Ty, Value *&V, LocTy &Loc,
283                    PerFunctionState &PFS) {
284      Loc = Lex.getLoc();
285      return ParseValue(Ty, V, PFS);
286    }
287
288    bool ParseTypeAndValue(Value *&V, PerFunctionState &PFS);
289    bool ParseTypeAndValue(Value *&V, LocTy &Loc, PerFunctionState &PFS) {
290      Loc = Lex.getLoc();
291      return ParseTypeAndValue(V, PFS);
292    }
293    bool ParseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc,
294                                PerFunctionState &PFS);
295    bool ParseTypeAndBasicBlock(BasicBlock *&BB, PerFunctionState &PFS) {
296      LocTy Loc;
297      return ParseTypeAndBasicBlock(BB, Loc, PFS);
298    }
299
300
301    struct ParamInfo {
302      LocTy Loc;
303      Value *V;
304      unsigned Attrs;
305      ParamInfo(LocTy loc, Value *v, unsigned attrs)
306        : Loc(loc), V(v), Attrs(attrs) {}
307    };
308    bool ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
309                            PerFunctionState &PFS);
310
311    // Constant Parsing.
312    bool ParseValID(ValID &ID, PerFunctionState *PFS = NULL);
313    bool ParseGlobalValue(const Type *Ty, Constant *&V);
314    bool ParseGlobalTypeAndValue(Constant *&V);
315    bool ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts);
316    bool ParseMetadataListValue(ValID &ID, PerFunctionState *PFS);
317    bool ParseMetadataValue(ValID &ID, PerFunctionState *PFS);
318    bool ParseMDNodeVector(SmallVectorImpl<Value*> &, PerFunctionState *PFS);
319    bool ParseInstructionMetadata(Instruction *Inst, PerFunctionState *PFS);
320
321    // Function Parsing.
322    struct ArgInfo {
323      LocTy Loc;
324      PATypeHolder Type;
325      unsigned Attrs;
326      std::string Name;
327      ArgInfo(LocTy L, PATypeHolder Ty, unsigned Attr, const std::string &N)
328        : Loc(L), Type(Ty), Attrs(Attr), Name(N) {}
329    };
330    bool ParseArgumentList(std::vector<ArgInfo> &ArgList,
331                           bool &isVarArg, bool inType);
332    bool ParseFunctionHeader(Function *&Fn, bool isDefine);
333    bool ParseFunctionBody(Function &Fn);
334    bool ParseBasicBlock(PerFunctionState &PFS);
335
336    // Instruction Parsing.  Each instruction parsing routine can return with a
337    // normal result, an error result, or return having eaten an extra comma.
338    enum InstResult { InstNormal = 0, InstError = 1, InstExtraComma = 2 };
339    int ParseInstruction(Instruction *&Inst, BasicBlock *BB,
340                         PerFunctionState &PFS);
341    bool ParseCmpPredicate(unsigned &Pred, unsigned Opc);
342
343    int ParseRet(Instruction *&Inst, BasicBlock *BB, PerFunctionState &PFS);
344    bool ParseBr(Instruction *&Inst, PerFunctionState &PFS);
345    bool ParseSwitch(Instruction *&Inst, PerFunctionState &PFS);
346    bool ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS);
347    bool ParseInvoke(Instruction *&Inst, PerFunctionState &PFS);
348
349    bool ParseArithmetic(Instruction *&I, PerFunctionState &PFS, unsigned Opc,
350                         unsigned OperandType);
351    bool ParseLogical(Instruction *&I, PerFunctionState &PFS, unsigned Opc);
352    bool ParseCompare(Instruction *&I, PerFunctionState &PFS, unsigned Opc);
353    bool ParseCast(Instruction *&I, PerFunctionState &PFS, unsigned Opc);
354    bool ParseSelect(Instruction *&I, PerFunctionState &PFS);
355    bool ParseVA_Arg(Instruction *&I, PerFunctionState &PFS);
356    bool ParseExtractElement(Instruction *&I, PerFunctionState &PFS);
357    bool ParseInsertElement(Instruction *&I, PerFunctionState &PFS);
358    bool ParseShuffleVector(Instruction *&I, PerFunctionState &PFS);
359    int ParsePHI(Instruction *&I, PerFunctionState &PFS);
360    bool ParseCall(Instruction *&I, PerFunctionState &PFS, bool isTail);
361    int ParseAlloc(Instruction *&I, PerFunctionState &PFS);
362    int ParseLoad(Instruction *&I, PerFunctionState &PFS, bool isVolatile);
363    int ParseStore(Instruction *&I, PerFunctionState &PFS, bool isVolatile);
364    bool ParseGetResult(Instruction *&I, PerFunctionState &PFS);
365    int ParseGetElementPtr(Instruction *&I, PerFunctionState &PFS);
366    int ParseExtractValue(Instruction *&I, PerFunctionState &PFS);
367    int ParseInsertValue(Instruction *&I, PerFunctionState &PFS);
368
369    bool ResolveForwardRefBlockAddresses(Function *TheFn,
370                             std::vector<std::pair<ValID, GlobalValue*> > &Refs,
371                                         PerFunctionState *PFS);
372  };
373} // End llvm namespace
374
375#endif
376