1//===-- LLParser.h - Parser Class -------------------------------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file defines the parser class for .ll files.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_ASMPARSER_LLPARSER_H
15#define LLVM_LIB_ASMPARSER_LLPARSER_H
16
17#include "LLLexer.h"
18#include "llvm/ADT/DenseMap.h"
19#include "llvm/ADT/StringMap.h"
20#include "llvm/IR/Attributes.h"
21#include "llvm/IR/Instructions.h"
22#include "llvm/IR/Module.h"
23#include "llvm/IR/Operator.h"
24#include "llvm/IR/Type.h"
25#include "llvm/IR/ValueHandle.h"
26#include <map>
27
28namespace llvm {
29  class Module;
30  class OpaqueType;
31  class Function;
32  class Value;
33  class BasicBlock;
34  class Instruction;
35  class Constant;
36  class GlobalValue;
37  class Comdat;
38  class MDString;
39  class MDNode;
40  class StructType;
41
42  /// ValID - Represents a reference of a definition of some sort with no type.
43  /// There are several cases where we have to parse the value but where the
44  /// type can depend on later context.  This may either be a numeric reference
45  /// or a symbolic (%var) reference.  This is just a discriminated union.
46  struct ValID {
47    enum {
48      t_LocalID, t_GlobalID,      // ID in UIntVal.
49      t_LocalName, t_GlobalName,  // Name in StrVal.
50      t_APSInt, t_APFloat,        // Value in APSIntVal/APFloatVal.
51      t_Null, t_Undef, t_Zero,    // No value.
52      t_EmptyArray,               // No value:  []
53      t_Constant,                 // Value in ConstantVal.
54      t_InlineAsm,                // Value in StrVal/StrVal2/UIntVal.
55      t_ConstantStruct,           // Value in ConstantStructElts.
56      t_PackedConstantStruct      // Value in ConstantStructElts.
57    } Kind;
58
59    LLLexer::LocTy Loc;
60    unsigned UIntVal;
61    std::string StrVal, StrVal2;
62    APSInt APSIntVal;
63    APFloat APFloatVal;
64    Constant *ConstantVal;
65    Constant **ConstantStructElts;
66
67    ValID() : Kind(t_LocalID), APFloatVal(0.0) {}
68    ~ValID() {
69      if (Kind == t_ConstantStruct || Kind == t_PackedConstantStruct)
70        delete [] ConstantStructElts;
71    }
72
73    bool operator<(const ValID &RHS) const {
74      if (Kind == t_LocalID || Kind == t_GlobalID)
75        return UIntVal < RHS.UIntVal;
76      assert((Kind == t_LocalName || Kind == t_GlobalName ||
77              Kind == t_ConstantStruct || Kind == t_PackedConstantStruct) &&
78             "Ordering not defined for this ValID kind yet");
79      return StrVal < RHS.StrVal;
80    }
81  };
82
83  class LLParser {
84  public:
85    typedef LLLexer::LocTy LocTy;
86  private:
87    LLVMContext &Context;
88    LLLexer Lex;
89    Module *M;
90
91    // Instruction metadata resolution.  Each instruction can have a list of
92    // MDRef info associated with them.
93    //
94    // The simpler approach of just creating temporary MDNodes and then calling
95    // RAUW on them when the definition is processed doesn't work because some
96    // instruction metadata kinds, such as dbg, get stored in the IR in an
97    // "optimized" format which doesn't participate in the normal value use
98    // lists. This means that RAUW doesn't work, even on temporary MDNodes
99    // which otherwise support RAUW. Instead, we defer resolving MDNode
100    // references until the definitions have been processed.
101    struct MDRef {
102      SMLoc Loc;
103      unsigned MDKind, MDSlot;
104    };
105
106    SmallVector<Instruction*, 64> InstsWithTBAATag;
107
108    // Type resolution handling data structures.  The location is set when we
109    // have processed a use of the type but not a definition yet.
110    StringMap<std::pair<Type*, LocTy> > NamedTypes;
111    std::map<unsigned, std::pair<Type*, LocTy> > NumberedTypes;
112
113    std::map<unsigned, TrackingMDNodeRef> NumberedMetadata;
114    std::map<unsigned, std::pair<TempMDTuple, LocTy>> ForwardRefMDNodes;
115
116    // Global Value reference information.
117    std::map<std::string, std::pair<GlobalValue*, LocTy> > ForwardRefVals;
118    std::map<unsigned, std::pair<GlobalValue*, LocTy> > ForwardRefValIDs;
119    std::vector<GlobalValue*> NumberedVals;
120
121    // Comdat forward reference information.
122    std::map<std::string, LocTy> ForwardRefComdats;
123
124    // References to blockaddress.  The key is the function ValID, the value is
125    // a list of references to blocks in that function.
126    std::map<ValID, std::map<ValID, GlobalValue *>> ForwardRefBlockAddresses;
127    class PerFunctionState;
128    /// Reference to per-function state to allow basic blocks to be
129    /// forward-referenced by blockaddress instructions within the same
130    /// function.
131    PerFunctionState *BlockAddressPFS;
132
133    // Attribute builder reference information.
134    std::map<Value*, std::vector<unsigned> > ForwardRefAttrGroups;
135    std::map<unsigned, AttrBuilder> NumberedAttrBuilders;
136
137  public:
138    LLParser(StringRef F, SourceMgr &SM, SMDiagnostic &Err, Module *m)
139        : Context(m->getContext()), Lex(F, SM, Err, m->getContext()), M(m),
140          BlockAddressPFS(nullptr) {}
141    bool Run();
142
143    LLVMContext &getContext() { return Context; }
144
145  private:
146
147    bool Error(LocTy L, const Twine &Msg) const {
148      return Lex.Error(L, Msg);
149    }
150    bool TokError(const Twine &Msg) const {
151      return Error(Lex.getLoc(), Msg);
152    }
153
154    /// GetGlobalVal - Get a value with the specified name or ID, creating a
155    /// forward reference record if needed.  This can return null if the value
156    /// exists but does not have the right type.
157    GlobalValue *GetGlobalVal(const std::string &N, Type *Ty, LocTy Loc);
158    GlobalValue *GetGlobalVal(unsigned ID, Type *Ty, LocTy Loc);
159
160    /// Get a Comdat with the specified name, creating a forward reference
161    /// record if needed.
162    Comdat *getComdat(const std::string &N, LocTy Loc);
163
164    // Helper Routines.
165    bool ParseToken(lltok::Kind T, const char *ErrMsg);
166    bool EatIfPresent(lltok::Kind T) {
167      if (Lex.getKind() != T) return false;
168      Lex.Lex();
169      return true;
170    }
171
172    FastMathFlags EatFastMathFlagsIfPresent() {
173      FastMathFlags FMF;
174      while (true)
175        switch (Lex.getKind()) {
176        case lltok::kw_fast: FMF.setUnsafeAlgebra();   Lex.Lex(); continue;
177        case lltok::kw_nnan: FMF.setNoNaNs();          Lex.Lex(); continue;
178        case lltok::kw_ninf: FMF.setNoInfs();          Lex.Lex(); continue;
179        case lltok::kw_nsz:  FMF.setNoSignedZeros();   Lex.Lex(); continue;
180        case lltok::kw_arcp: FMF.setAllowReciprocal(); Lex.Lex(); continue;
181        default: return FMF;
182        }
183      return FMF;
184    }
185
186    bool ParseOptionalToken(lltok::Kind T, bool &Present,
187                            LocTy *Loc = nullptr) {
188      if (Lex.getKind() != T) {
189        Present = false;
190      } else {
191        if (Loc)
192          *Loc = Lex.getLoc();
193        Lex.Lex();
194        Present = true;
195      }
196      return false;
197    }
198    bool ParseStringConstant(std::string &Result);
199    bool ParseUInt32(unsigned &Val);
200    bool ParseUInt32(unsigned &Val, LocTy &Loc) {
201      Loc = Lex.getLoc();
202      return ParseUInt32(Val);
203    }
204    bool ParseUInt64(uint64_t &Val);
205    bool ParseUInt64(uint64_t &Val, LocTy &Loc) {
206      Loc = Lex.getLoc();
207      return ParseUInt64(Val);
208    }
209
210    bool ParseTLSModel(GlobalVariable::ThreadLocalMode &TLM);
211    bool ParseOptionalThreadLocal(GlobalVariable::ThreadLocalMode &TLM);
212    bool parseOptionalUnnamedAddr(bool &UnnamedAddr) {
213      return ParseOptionalToken(lltok::kw_unnamed_addr, UnnamedAddr);
214    }
215    bool ParseOptionalAddrSpace(unsigned &AddrSpace);
216    bool ParseOptionalParamAttrs(AttrBuilder &B);
217    bool ParseOptionalReturnAttrs(AttrBuilder &B);
218    bool ParseOptionalLinkage(unsigned &Linkage, bool &HasLinkage);
219    bool ParseOptionalLinkage(unsigned &Linkage) {
220      bool HasLinkage; return ParseOptionalLinkage(Linkage, HasLinkage);
221    }
222    bool ParseOptionalVisibility(unsigned &Visibility);
223    bool ParseOptionalDLLStorageClass(unsigned &DLLStorageClass);
224    bool ParseOptionalCallingConv(unsigned &CC);
225    bool ParseOptionalAlignment(unsigned &Alignment);
226    bool ParseOptionalDerefAttrBytes(lltok::Kind AttrKind, uint64_t &Bytes);
227    bool ParseScopeAndOrdering(bool isAtomic, SynchronizationScope &Scope,
228                               AtomicOrdering &Ordering);
229    bool ParseOrdering(AtomicOrdering &Ordering);
230    bool ParseOptionalStackAlignment(unsigned &Alignment);
231    bool ParseOptionalCommaAlign(unsigned &Alignment, bool &AteExtraComma);
232    bool ParseOptionalCommaInAlloca(bool &IsInAlloca);
233    bool ParseIndexList(SmallVectorImpl<unsigned> &Indices,bool &AteExtraComma);
234    bool ParseIndexList(SmallVectorImpl<unsigned> &Indices) {
235      bool AteExtraComma;
236      if (ParseIndexList(Indices, AteExtraComma)) return true;
237      if (AteExtraComma)
238        return TokError("expected index");
239      return false;
240    }
241
242    // Top-Level Entities
243    bool ParseTopLevelEntities();
244    bool ValidateEndOfModule();
245    bool ParseTargetDefinition();
246    bool ParseModuleAsm();
247    bool ParseDepLibs();        // FIXME: Remove in 4.0.
248    bool ParseUnnamedType();
249    bool ParseNamedType();
250    bool ParseDeclare();
251    bool ParseDefine();
252
253    bool ParseGlobalType(bool &IsConstant);
254    bool ParseUnnamedGlobal();
255    bool ParseNamedGlobal();
256    bool ParseGlobal(const std::string &Name, LocTy Loc, unsigned Linkage,
257                     bool HasLinkage, unsigned Visibility,
258                     unsigned DLLStorageClass,
259                     GlobalVariable::ThreadLocalMode TLM, bool UnnamedAddr);
260    bool ParseAlias(const std::string &Name, LocTy Loc, unsigned Linkage,
261                    unsigned Visibility, unsigned DLLStorageClass,
262                    GlobalVariable::ThreadLocalMode TLM, bool UnnamedAddr);
263    bool parseComdat();
264    bool ParseStandaloneMetadata();
265    bool ParseNamedMetadata();
266    bool ParseMDString(MDString *&Result);
267    bool ParseMDNodeID(MDNode *&Result);
268    bool ParseUnnamedAttrGrp();
269    bool ParseFnAttributeValuePairs(AttrBuilder &B,
270                                    std::vector<unsigned> &FwdRefAttrGrps,
271                                    bool inAttrGrp, LocTy &BuiltinLoc);
272
273    // Type Parsing.
274    bool ParseType(Type *&Result, const Twine &Msg, bool AllowVoid = false);
275    bool ParseType(Type *&Result, bool AllowVoid = false) {
276      return ParseType(Result, "expected type", AllowVoid);
277    }
278    bool ParseType(Type *&Result, const Twine &Msg, LocTy &Loc,
279                   bool AllowVoid = false) {
280      Loc = Lex.getLoc();
281      return ParseType(Result, Msg, AllowVoid);
282    }
283    bool ParseType(Type *&Result, LocTy &Loc, bool AllowVoid = false) {
284      Loc = Lex.getLoc();
285      return ParseType(Result, AllowVoid);
286    }
287    bool ParseAnonStructType(Type *&Result, bool Packed);
288    bool ParseStructBody(SmallVectorImpl<Type*> &Body);
289    bool ParseStructDefinition(SMLoc TypeLoc, StringRef Name,
290                               std::pair<Type*, LocTy> &Entry,
291                               Type *&ResultTy);
292
293    bool ParseArrayVectorType(Type *&Result, bool isVector);
294    bool ParseFunctionType(Type *&Result);
295
296    // Function Semantic Analysis.
297    class PerFunctionState {
298      LLParser &P;
299      Function &F;
300      std::map<std::string, std::pair<Value*, LocTy> > ForwardRefVals;
301      std::map<unsigned, std::pair<Value*, LocTy> > ForwardRefValIDs;
302      std::vector<Value*> NumberedVals;
303
304      /// FunctionNumber - If this is an unnamed function, this is the slot
305      /// number of it, otherwise it is -1.
306      int FunctionNumber;
307    public:
308      PerFunctionState(LLParser &p, Function &f, int FunctionNumber);
309      ~PerFunctionState();
310
311      Function &getFunction() const { return F; }
312
313      bool FinishFunction();
314
315      /// GetVal - Get a value with the specified name or ID, creating a
316      /// forward reference record if needed.  This can return null if the value
317      /// exists but does not have the right type.
318      Value *GetVal(const std::string &Name, Type *Ty, LocTy Loc);
319      Value *GetVal(unsigned ID, Type *Ty, LocTy Loc);
320
321      /// SetInstName - After an instruction is parsed and inserted into its
322      /// basic block, this installs its name.
323      bool SetInstName(int NameID, const std::string &NameStr, LocTy NameLoc,
324                       Instruction *Inst);
325
326      /// GetBB - Get a basic block with the specified name or ID, creating a
327      /// forward reference record if needed.  This can return null if the value
328      /// is not a BasicBlock.
329      BasicBlock *GetBB(const std::string &Name, LocTy Loc);
330      BasicBlock *GetBB(unsigned ID, LocTy Loc);
331
332      /// DefineBB - Define the specified basic block, which is either named or
333      /// unnamed.  If there is an error, this returns null otherwise it returns
334      /// the block being defined.
335      BasicBlock *DefineBB(const std::string &Name, LocTy Loc);
336
337      bool resolveForwardRefBlockAddresses();
338    };
339
340    bool ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V,
341                             PerFunctionState *PFS);
342
343    bool ParseValue(Type *Ty, Value *&V, PerFunctionState *PFS);
344    bool ParseValue(Type *Ty, Value *&V, PerFunctionState &PFS) {
345      return ParseValue(Ty, V, &PFS);
346    }
347    bool ParseValue(Type *Ty, Value *&V, LocTy &Loc,
348                    PerFunctionState &PFS) {
349      Loc = Lex.getLoc();
350      return ParseValue(Ty, V, &PFS);
351    }
352
353    bool ParseTypeAndValue(Value *&V, PerFunctionState *PFS);
354    bool ParseTypeAndValue(Value *&V, PerFunctionState &PFS) {
355      return ParseTypeAndValue(V, &PFS);
356    }
357    bool ParseTypeAndValue(Value *&V, LocTy &Loc, PerFunctionState &PFS) {
358      Loc = Lex.getLoc();
359      return ParseTypeAndValue(V, PFS);
360    }
361    bool ParseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc,
362                                PerFunctionState &PFS);
363    bool ParseTypeAndBasicBlock(BasicBlock *&BB, PerFunctionState &PFS) {
364      LocTy Loc;
365      return ParseTypeAndBasicBlock(BB, Loc, PFS);
366    }
367
368
369    struct ParamInfo {
370      LocTy Loc;
371      Value *V;
372      AttributeSet Attrs;
373      ParamInfo(LocTy loc, Value *v, AttributeSet attrs)
374        : Loc(loc), V(v), Attrs(attrs) {}
375    };
376    bool ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
377                            PerFunctionState &PFS,
378                            bool IsMustTailCall = false,
379                            bool InVarArgsFunc = false);
380
381    // Constant Parsing.
382    bool ParseValID(ValID &ID, PerFunctionState *PFS = nullptr);
383    bool ParseGlobalValue(Type *Ty, Constant *&V);
384    bool ParseGlobalTypeAndValue(Constant *&V);
385    bool ParseGlobalValueVector(SmallVectorImpl<Constant *> &Elts);
386    bool parseOptionalComdat(StringRef GlobalName, Comdat *&C);
387    bool ParseMetadataAsValue(Value *&V, PerFunctionState &PFS);
388    bool ParseValueAsMetadata(Metadata *&MD, const Twine &TypeMsg,
389                              PerFunctionState *PFS);
390    bool ParseMetadata(Metadata *&MD, PerFunctionState *PFS);
391    bool ParseMDTuple(MDNode *&MD, bool IsDistinct = false);
392    bool ParseMDNode(MDNode *&MD);
393    bool ParseMDNodeTail(MDNode *&MD);
394    bool ParseMDNodeVector(SmallVectorImpl<Metadata *> &MDs);
395    bool ParseInstructionMetadata(Instruction *Inst, PerFunctionState *PFS);
396
397    template <class FieldTy>
398    bool ParseMDField(LocTy Loc, StringRef Name, FieldTy &Result);
399    template <class FieldTy> bool ParseMDField(StringRef Name, FieldTy &Result);
400    template <class ParserTy>
401    bool ParseMDFieldsImplBody(ParserTy parseField);
402    template <class ParserTy>
403    bool ParseMDFieldsImpl(ParserTy parseField, LocTy &ClosingLoc);
404    bool ParseSpecializedMDNode(MDNode *&N, bool IsDistinct = false);
405
406#define HANDLE_SPECIALIZED_MDNODE_LEAF(CLASS)                                  \
407  bool Parse##CLASS(MDNode *&Result, bool IsDistinct);
408#include "llvm/IR/Metadata.def"
409
410    // Function Parsing.
411    struct ArgInfo {
412      LocTy Loc;
413      Type *Ty;
414      AttributeSet Attrs;
415      std::string Name;
416      ArgInfo(LocTy L, Type *ty, AttributeSet Attr, const std::string &N)
417        : Loc(L), Ty(ty), Attrs(Attr), Name(N) {}
418    };
419    bool ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList, bool &isVarArg);
420    bool ParseFunctionHeader(Function *&Fn, bool isDefine);
421    bool ParseFunctionBody(Function &Fn);
422    bool ParseBasicBlock(PerFunctionState &PFS);
423
424    enum TailCallType { TCT_None, TCT_Tail, TCT_MustTail };
425
426    // Instruction Parsing.  Each instruction parsing routine can return with a
427    // normal result, an error result, or return having eaten an extra comma.
428    enum InstResult { InstNormal = 0, InstError = 1, InstExtraComma = 2 };
429    int ParseInstruction(Instruction *&Inst, BasicBlock *BB,
430                         PerFunctionState &PFS);
431    bool ParseCmpPredicate(unsigned &Pred, unsigned Opc);
432
433    bool ParseRet(Instruction *&Inst, BasicBlock *BB, PerFunctionState &PFS);
434    bool ParseBr(Instruction *&Inst, PerFunctionState &PFS);
435    bool ParseSwitch(Instruction *&Inst, PerFunctionState &PFS);
436    bool ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS);
437    bool ParseInvoke(Instruction *&Inst, PerFunctionState &PFS);
438    bool ParseResume(Instruction *&Inst, PerFunctionState &PFS);
439
440    bool ParseArithmetic(Instruction *&I, PerFunctionState &PFS, unsigned Opc,
441                         unsigned OperandType);
442    bool ParseLogical(Instruction *&I, PerFunctionState &PFS, unsigned Opc);
443    bool ParseCompare(Instruction *&I, PerFunctionState &PFS, unsigned Opc);
444    bool ParseCast(Instruction *&I, PerFunctionState &PFS, unsigned Opc);
445    bool ParseSelect(Instruction *&I, PerFunctionState &PFS);
446    bool ParseVA_Arg(Instruction *&I, PerFunctionState &PFS);
447    bool ParseExtractElement(Instruction *&I, PerFunctionState &PFS);
448    bool ParseInsertElement(Instruction *&I, PerFunctionState &PFS);
449    bool ParseShuffleVector(Instruction *&I, PerFunctionState &PFS);
450    int ParsePHI(Instruction *&I, PerFunctionState &PFS);
451    bool ParseLandingPad(Instruction *&I, PerFunctionState &PFS);
452    bool ParseCall(Instruction *&I, PerFunctionState &PFS,
453                   CallInst::TailCallKind IsTail);
454    int ParseAlloc(Instruction *&I, PerFunctionState &PFS);
455    int ParseLoad(Instruction *&I, PerFunctionState &PFS);
456    int ParseStore(Instruction *&I, PerFunctionState &PFS);
457    int ParseCmpXchg(Instruction *&I, PerFunctionState &PFS);
458    int ParseAtomicRMW(Instruction *&I, PerFunctionState &PFS);
459    int ParseFence(Instruction *&I, PerFunctionState &PFS);
460    int ParseGetElementPtr(Instruction *&I, PerFunctionState &PFS);
461    int ParseExtractValue(Instruction *&I, PerFunctionState &PFS);
462    int ParseInsertValue(Instruction *&I, PerFunctionState &PFS);
463
464    // Use-list order directives.
465    bool ParseUseListOrder(PerFunctionState *PFS = nullptr);
466    bool ParseUseListOrderBB();
467    bool ParseUseListOrderIndexes(SmallVectorImpl<unsigned> &Indexes);
468    bool sortUseListOrder(Value *V, ArrayRef<unsigned> Indexes, SMLoc Loc);
469  };
470} // End llvm namespace
471
472#endif
473