LLParser.h revision 36b56886974eae4f9c5ebc96befd3e7bfe5de338
1//===-- LLParser.h - Parser Class -------------------------------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file defines the parser class for .ll files.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_ASMPARSER_LLPARSER_H
15#define LLVM_ASMPARSER_LLPARSER_H
16
17#include "LLLexer.h"
18#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/StringMap.h"
20#include "llvm/IR/Attributes.h"
21#include "llvm/IR/Instructions.h"
22#include "llvm/IR/Module.h"
23#include "llvm/IR/Operator.h"
24#include "llvm/IR/Type.h"
25#include "llvm/IR/ValueHandle.h"
26#include <map>
27
28namespace llvm {
29  class Module;
30  class OpaqueType;
31  class Function;
32  class Value;
33  class BasicBlock;
34  class Instruction;
35  class Constant;
36  class GlobalValue;
37  class MDString;
38  class MDNode;
39  class StructType;
40
41  /// ValID - Represents a reference of a definition of some sort with no type.
42  /// There are several cases where we have to parse the value but where the
43  /// type can depend on later context.  This may either be a numeric reference
44  /// or a symbolic (%var) reference.  This is just a discriminated union.
45  struct ValID {
46    enum {
47      t_LocalID, t_GlobalID,      // ID in UIntVal.
48      t_LocalName, t_GlobalName,  // Name in StrVal.
49      t_APSInt, t_APFloat,        // Value in APSIntVal/APFloatVal.
50      t_Null, t_Undef, t_Zero,    // No value.
51      t_EmptyArray,               // No value:  []
52      t_Constant,                 // Value in ConstantVal.
53      t_InlineAsm,                // Value in StrVal/StrVal2/UIntVal.
54      t_MDNode,                   // Value in MDNodeVal.
55      t_MDString,                 // Value in MDStringVal.
56      t_ConstantStruct,           // Value in ConstantStructElts.
57      t_PackedConstantStruct      // Value in ConstantStructElts.
58    } Kind;
59
60    LLLexer::LocTy Loc;
61    unsigned UIntVal;
62    std::string StrVal, StrVal2;
63    APSInt APSIntVal;
64    APFloat APFloatVal;
65    Constant *ConstantVal;
66    MDNode *MDNodeVal;
67    MDString *MDStringVal;
68    Constant **ConstantStructElts;
69
70    ValID() : Kind(t_LocalID), APFloatVal(0.0) {}
71    ~ValID() {
72      if (Kind == t_ConstantStruct || Kind == t_PackedConstantStruct)
73        delete [] ConstantStructElts;
74    }
75
76    bool operator<(const ValID &RHS) const {
77      if (Kind == t_LocalID || Kind == t_GlobalID)
78        return UIntVal < RHS.UIntVal;
79      assert((Kind == t_LocalName || Kind == t_GlobalName ||
80              Kind == t_ConstantStruct || Kind == t_PackedConstantStruct) &&
81             "Ordering not defined for this ValID kind yet");
82      return StrVal < RHS.StrVal;
83    }
84  };
85
86  class LLParser {
87  public:
88    typedef LLLexer::LocTy LocTy;
89  private:
90    LLVMContext &Context;
91    LLLexer Lex;
92    Module *M;
93
94    // Instruction metadata resolution.  Each instruction can have a list of
95    // MDRef info associated with them.
96    //
97    // The simpler approach of just creating temporary MDNodes and then calling
98    // RAUW on them when the definition is processed doesn't work because some
99    // instruction metadata kinds, such as dbg, get stored in the IR in an
100    // "optimized" format which doesn't participate in the normal value use
101    // lists. This means that RAUW doesn't work, even on temporary MDNodes
102    // which otherwise support RAUW. Instead, we defer resolving MDNode
103    // references until the definitions have been processed.
104    struct MDRef {
105      SMLoc Loc;
106      unsigned MDKind, MDSlot;
107    };
108    DenseMap<Instruction*, std::vector<MDRef> > ForwardRefInstMetadata;
109
110    SmallVector<Instruction*, 64> InstsWithTBAATag;
111
112    // Type resolution handling data structures.  The location is set when we
113    // have processed a use of the type but not a definition yet.
114    StringMap<std::pair<Type*, LocTy> > NamedTypes;
115    std::vector<std::pair<Type*, LocTy> > NumberedTypes;
116
117    std::vector<TrackingVH<MDNode> > NumberedMetadata;
118    std::map<unsigned, std::pair<TrackingVH<MDNode>, LocTy> > ForwardRefMDNodes;
119
120    // Global Value reference information.
121    std::map<std::string, std::pair<GlobalValue*, LocTy> > ForwardRefVals;
122    std::map<unsigned, std::pair<GlobalValue*, LocTy> > ForwardRefValIDs;
123    std::vector<GlobalValue*> NumberedVals;
124
125    // References to blockaddress.  The key is the function ValID, the value is
126    // a list of references to blocks in that function.
127    std::map<ValID, std::vector<std::pair<ValID, GlobalValue*> > >
128      ForwardRefBlockAddresses;
129
130    // Attribute builder reference information.
131    std::map<Value*, std::vector<unsigned> > ForwardRefAttrGroups;
132    std::map<unsigned, AttrBuilder> NumberedAttrBuilders;
133
134  public:
135    LLParser(MemoryBuffer *F, SourceMgr &SM, SMDiagnostic &Err, Module *m) :
136      Context(m->getContext()), Lex(F, SM, Err, m->getContext()),
137      M(m) {}
138    bool Run();
139
140    LLVMContext &getContext() { return Context; }
141
142  private:
143
144    bool Error(LocTy L, const Twine &Msg) const {
145      return Lex.Error(L, Msg);
146    }
147    bool TokError(const Twine &Msg) const {
148      return Error(Lex.getLoc(), Msg);
149    }
150
151    /// GetGlobalVal - Get a value with the specified name or ID, creating a
152    /// forward reference record if needed.  This can return null if the value
153    /// exists but does not have the right type.
154    GlobalValue *GetGlobalVal(const std::string &N, Type *Ty, LocTy Loc);
155    GlobalValue *GetGlobalVal(unsigned ID, Type *Ty, LocTy Loc);
156
157    // Helper Routines.
158    bool ParseToken(lltok::Kind T, const char *ErrMsg);
159    bool EatIfPresent(lltok::Kind T) {
160      if (Lex.getKind() != T) return false;
161      Lex.Lex();
162      return true;
163    }
164
165    FastMathFlags EatFastMathFlagsIfPresent() {
166      FastMathFlags FMF;
167      while (true)
168        switch (Lex.getKind()) {
169        case lltok::kw_fast: FMF.setUnsafeAlgebra();   Lex.Lex(); continue;
170        case lltok::kw_nnan: FMF.setNoNaNs();          Lex.Lex(); continue;
171        case lltok::kw_ninf: FMF.setNoInfs();          Lex.Lex(); continue;
172        case lltok::kw_nsz:  FMF.setNoSignedZeros();   Lex.Lex(); continue;
173        case lltok::kw_arcp: FMF.setAllowReciprocal(); Lex.Lex(); continue;
174        default: return FMF;
175        }
176      return FMF;
177    }
178
179    bool ParseOptionalToken(lltok::Kind T, bool &Present, LocTy *Loc = 0) {
180      if (Lex.getKind() != T) {
181        Present = false;
182      } else {
183        if (Loc)
184          *Loc = Lex.getLoc();
185        Lex.Lex();
186        Present = true;
187      }
188      return false;
189    }
190    bool ParseStringConstant(std::string &Result);
191    bool ParseUInt32(unsigned &Val);
192    bool ParseUInt32(unsigned &Val, LocTy &Loc) {
193      Loc = Lex.getLoc();
194      return ParseUInt32(Val);
195    }
196
197    bool ParseTLSModel(GlobalVariable::ThreadLocalMode &TLM);
198    bool ParseOptionalThreadLocal(GlobalVariable::ThreadLocalMode &TLM);
199    bool ParseOptionalAddrSpace(unsigned &AddrSpace);
200    bool ParseOptionalParamAttrs(AttrBuilder &B);
201    bool ParseOptionalReturnAttrs(AttrBuilder &B);
202    bool ParseOptionalLinkage(unsigned &Linkage, bool &HasLinkage);
203    bool ParseOptionalLinkage(unsigned &Linkage) {
204      bool HasLinkage; return ParseOptionalLinkage(Linkage, HasLinkage);
205    }
206    bool ParseOptionalVisibility(unsigned &Visibility);
207    bool ParseOptionalDLLStorageClass(unsigned &DLLStorageClass);
208    bool ParseOptionalCallingConv(CallingConv::ID &CC);
209    bool ParseOptionalAlignment(unsigned &Alignment);
210    bool ParseScopeAndOrdering(bool isAtomic, SynchronizationScope &Scope,
211                               AtomicOrdering &Ordering);
212    bool ParseOrdering(AtomicOrdering &Ordering);
213    bool ParseOptionalStackAlignment(unsigned &Alignment);
214    bool ParseOptionalCommaAlign(unsigned &Alignment, bool &AteExtraComma);
215    bool ParseOptionalCommaInAlloca(bool &IsInAlloca);
216    bool ParseIndexList(SmallVectorImpl<unsigned> &Indices,bool &AteExtraComma);
217    bool ParseIndexList(SmallVectorImpl<unsigned> &Indices) {
218      bool AteExtraComma;
219      if (ParseIndexList(Indices, AteExtraComma)) return true;
220      if (AteExtraComma)
221        return TokError("expected index");
222      return false;
223    }
224
225    // Top-Level Entities
226    bool ParseTopLevelEntities();
227    bool ValidateEndOfModule();
228    bool ParseTargetDefinition();
229    bool ParseModuleAsm();
230    bool ParseDepLibs();        // FIXME: Remove in 4.0.
231    bool ParseUnnamedType();
232    bool ParseNamedType();
233    bool ParseDeclare();
234    bool ParseDefine();
235
236    bool ParseGlobalType(bool &IsConstant);
237    bool ParseUnnamedGlobal();
238    bool ParseNamedGlobal();
239    bool ParseGlobal(const std::string &Name, LocTy Loc, unsigned Linkage,
240                     bool HasLinkage, unsigned Visibility,
241                     unsigned DLLStorageClass);
242    bool ParseAlias(const std::string &Name, LocTy Loc, unsigned Visibility,
243                    unsigned DLLStorageClass);
244    bool ParseStandaloneMetadata();
245    bool ParseNamedMetadata();
246    bool ParseMDString(MDString *&Result);
247    bool ParseMDNodeID(MDNode *&Result);
248    bool ParseMDNodeID(MDNode *&Result, unsigned &SlotNo);
249    bool ParseUnnamedAttrGrp();
250    bool ParseFnAttributeValuePairs(AttrBuilder &B,
251                                    std::vector<unsigned> &FwdRefAttrGrps,
252                                    bool inAttrGrp, LocTy &BuiltinLoc);
253
254    // Type Parsing.
255    bool ParseType(Type *&Result, bool AllowVoid = false);
256    bool ParseType(Type *&Result, LocTy &Loc, bool AllowVoid = false) {
257      Loc = Lex.getLoc();
258      return ParseType(Result, AllowVoid);
259    }
260    bool ParseAnonStructType(Type *&Result, bool Packed);
261    bool ParseStructBody(SmallVectorImpl<Type*> &Body);
262    bool ParseStructDefinition(SMLoc TypeLoc, StringRef Name,
263                               std::pair<Type*, LocTy> &Entry,
264                               Type *&ResultTy);
265
266    bool ParseArrayVectorType(Type *&Result, bool isVector);
267    bool ParseFunctionType(Type *&Result);
268
269    // Function Semantic Analysis.
270    class PerFunctionState {
271      LLParser &P;
272      Function &F;
273      std::map<std::string, std::pair<Value*, LocTy> > ForwardRefVals;
274      std::map<unsigned, std::pair<Value*, LocTy> > ForwardRefValIDs;
275      std::vector<Value*> NumberedVals;
276
277      /// FunctionNumber - If this is an unnamed function, this is the slot
278      /// number of it, otherwise it is -1.
279      int FunctionNumber;
280    public:
281      PerFunctionState(LLParser &p, Function &f, int FunctionNumber);
282      ~PerFunctionState();
283
284      Function &getFunction() const { return F; }
285
286      bool FinishFunction();
287
288      /// GetVal - Get a value with the specified name or ID, creating a
289      /// forward reference record if needed.  This can return null if the value
290      /// exists but does not have the right type.
291      Value *GetVal(const std::string &Name, Type *Ty, LocTy Loc);
292      Value *GetVal(unsigned ID, Type *Ty, LocTy Loc);
293
294      /// SetInstName - After an instruction is parsed and inserted into its
295      /// basic block, this installs its name.
296      bool SetInstName(int NameID, const std::string &NameStr, LocTy NameLoc,
297                       Instruction *Inst);
298
299      /// GetBB - Get a basic block with the specified name or ID, creating a
300      /// forward reference record if needed.  This can return null if the value
301      /// is not a BasicBlock.
302      BasicBlock *GetBB(const std::string &Name, LocTy Loc);
303      BasicBlock *GetBB(unsigned ID, LocTy Loc);
304
305      /// DefineBB - Define the specified basic block, which is either named or
306      /// unnamed.  If there is an error, this returns null otherwise it returns
307      /// the block being defined.
308      BasicBlock *DefineBB(const std::string &Name, LocTy Loc);
309    };
310
311    bool ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V,
312                             PerFunctionState *PFS);
313
314    bool ParseValue(Type *Ty, Value *&V, PerFunctionState *PFS);
315    bool ParseValue(Type *Ty, Value *&V, PerFunctionState &PFS) {
316      return ParseValue(Ty, V, &PFS);
317    }
318    bool ParseValue(Type *Ty, Value *&V, LocTy &Loc,
319                    PerFunctionState &PFS) {
320      Loc = Lex.getLoc();
321      return ParseValue(Ty, V, &PFS);
322    }
323
324    bool ParseTypeAndValue(Value *&V, PerFunctionState *PFS);
325    bool ParseTypeAndValue(Value *&V, PerFunctionState &PFS) {
326      return ParseTypeAndValue(V, &PFS);
327    }
328    bool ParseTypeAndValue(Value *&V, LocTy &Loc, PerFunctionState &PFS) {
329      Loc = Lex.getLoc();
330      return ParseTypeAndValue(V, PFS);
331    }
332    bool ParseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc,
333                                PerFunctionState &PFS);
334    bool ParseTypeAndBasicBlock(BasicBlock *&BB, PerFunctionState &PFS) {
335      LocTy Loc;
336      return ParseTypeAndBasicBlock(BB, Loc, PFS);
337    }
338
339
340    struct ParamInfo {
341      LocTy Loc;
342      Value *V;
343      AttributeSet Attrs;
344      ParamInfo(LocTy loc, Value *v, AttributeSet attrs)
345        : Loc(loc), V(v), Attrs(attrs) {}
346    };
347    bool ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
348                            PerFunctionState &PFS);
349
350    // Constant Parsing.
351    bool ParseValID(ValID &ID, PerFunctionState *PFS = NULL);
352    bool ParseGlobalValue(Type *Ty, Constant *&V);
353    bool ParseGlobalTypeAndValue(Constant *&V);
354    bool ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts);
355    bool ParseMetadataListValue(ValID &ID, PerFunctionState *PFS);
356    bool ParseMetadataValue(ValID &ID, PerFunctionState *PFS);
357    bool ParseMDNodeVector(SmallVectorImpl<Value*> &, PerFunctionState *PFS);
358    bool ParseInstructionMetadata(Instruction *Inst, PerFunctionState *PFS);
359
360    // Function Parsing.
361    struct ArgInfo {
362      LocTy Loc;
363      Type *Ty;
364      AttributeSet Attrs;
365      std::string Name;
366      ArgInfo(LocTy L, Type *ty, AttributeSet Attr, const std::string &N)
367        : Loc(L), Ty(ty), Attrs(Attr), Name(N) {}
368    };
369    bool ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList, bool &isVarArg);
370    bool ParseFunctionHeader(Function *&Fn, bool isDefine);
371    bool ParseFunctionBody(Function &Fn);
372    bool ParseBasicBlock(PerFunctionState &PFS);
373
374    // Instruction Parsing.  Each instruction parsing routine can return with a
375    // normal result, an error result, or return having eaten an extra comma.
376    enum InstResult { InstNormal = 0, InstError = 1, InstExtraComma = 2 };
377    int ParseInstruction(Instruction *&Inst, BasicBlock *BB,
378                         PerFunctionState &PFS);
379    bool ParseCmpPredicate(unsigned &Pred, unsigned Opc);
380
381    bool ParseRet(Instruction *&Inst, BasicBlock *BB, PerFunctionState &PFS);
382    bool ParseBr(Instruction *&Inst, PerFunctionState &PFS);
383    bool ParseSwitch(Instruction *&Inst, PerFunctionState &PFS);
384    bool ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS);
385    bool ParseInvoke(Instruction *&Inst, PerFunctionState &PFS);
386    bool ParseResume(Instruction *&Inst, PerFunctionState &PFS);
387
388    bool ParseArithmetic(Instruction *&I, PerFunctionState &PFS, unsigned Opc,
389                         unsigned OperandType);
390    bool ParseLogical(Instruction *&I, PerFunctionState &PFS, unsigned Opc);
391    bool ParseCompare(Instruction *&I, PerFunctionState &PFS, unsigned Opc);
392    bool ParseCast(Instruction *&I, PerFunctionState &PFS, unsigned Opc);
393    bool ParseSelect(Instruction *&I, PerFunctionState &PFS);
394    bool ParseVA_Arg(Instruction *&I, PerFunctionState &PFS);
395    bool ParseExtractElement(Instruction *&I, PerFunctionState &PFS);
396    bool ParseInsertElement(Instruction *&I, PerFunctionState &PFS);
397    bool ParseShuffleVector(Instruction *&I, PerFunctionState &PFS);
398    int ParsePHI(Instruction *&I, PerFunctionState &PFS);
399    bool ParseLandingPad(Instruction *&I, PerFunctionState &PFS);
400    bool ParseCall(Instruction *&I, PerFunctionState &PFS, bool isTail);
401    int ParseAlloc(Instruction *&I, PerFunctionState &PFS);
402    int ParseLoad(Instruction *&I, PerFunctionState &PFS);
403    int ParseStore(Instruction *&I, PerFunctionState &PFS);
404    int ParseCmpXchg(Instruction *&I, PerFunctionState &PFS);
405    int ParseAtomicRMW(Instruction *&I, PerFunctionState &PFS);
406    int ParseFence(Instruction *&I, PerFunctionState &PFS);
407    int ParseGetElementPtr(Instruction *&I, PerFunctionState &PFS);
408    int ParseExtractValue(Instruction *&I, PerFunctionState &PFS);
409    int ParseInsertValue(Instruction *&I, PerFunctionState &PFS);
410
411    bool ResolveForwardRefBlockAddresses(Function *TheFn,
412                             std::vector<std::pair<ValID, GlobalValue*> > &Refs,
413                                         PerFunctionState *PFS);
414  };
415} // End llvm namespace
416
417#endif
418