1//===-- LLParser.h - Parser Class -------------------------------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10//  This file defines the parser class for .ll files.
11//
12//===----------------------------------------------------------------------===//
13
14#ifndef LLVM_LIB_ASMPARSER_LLPARSER_H
15#define LLVM_LIB_ASMPARSER_LLPARSER_H
16
17#include "LLLexer.h"
18#include "llvm/ADT/Optional.h"
19#include "llvm/ADT/StringMap.h"
20#include "llvm/IR/Attributes.h"
21#include "llvm/IR/Instructions.h"
22#include "llvm/IR/Module.h"
23#include "llvm/IR/Operator.h"
24#include "llvm/IR/Type.h"
25#include "llvm/IR/ValueHandle.h"
26#include <map>
27
28namespace llvm {
29  class Module;
30  class OpaqueType;
31  class Function;
32  class Value;
33  class BasicBlock;
34  class Instruction;
35  class Constant;
36  class GlobalValue;
37  class Comdat;
38  class MDString;
39  class MDNode;
40  struct SlotMapping;
41  class StructType;
42
43  /// ValID - Represents a reference of a definition of some sort with no type.
44  /// There are several cases where we have to parse the value but where the
45  /// type can depend on later context.  This may either be a numeric reference
46  /// or a symbolic (%var) reference.  This is just a discriminated union.
47  struct ValID {
48    enum {
49      t_LocalID, t_GlobalID,           // ID in UIntVal.
50      t_LocalName, t_GlobalName,       // Name in StrVal.
51      t_APSInt, t_APFloat,             // Value in APSIntVal/APFloatVal.
52      t_Null, t_Undef, t_Zero, t_None, // No value.
53      t_EmptyArray,                    // No value:  []
54      t_Constant,                      // Value in ConstantVal.
55      t_InlineAsm,                     // Value in FTy/StrVal/StrVal2/UIntVal.
56      t_ConstantStruct,                // Value in ConstantStructElts.
57      t_PackedConstantStruct           // Value in ConstantStructElts.
58    } Kind = t_LocalID;
59
60    LLLexer::LocTy Loc;
61    unsigned UIntVal;
62    FunctionType *FTy = nullptr;
63    std::string StrVal, StrVal2;
64    APSInt APSIntVal;
65    APFloat APFloatVal{0.0};
66    Constant *ConstantVal;
67    std::unique_ptr<Constant *[]> ConstantStructElts;
68
69    ValID() = default;
70    ValID(const ValID &RHS)
71        : Kind(RHS.Kind), Loc(RHS.Loc), UIntVal(RHS.UIntVal), FTy(RHS.FTy),
72          StrVal(RHS.StrVal), StrVal2(RHS.StrVal2), APSIntVal(RHS.APSIntVal),
73          APFloatVal(RHS.APFloatVal), ConstantVal(RHS.ConstantVal) {
74      assert(!RHS.ConstantStructElts);
75    }
76
77    bool operator<(const ValID &RHS) const {
78      if (Kind == t_LocalID || Kind == t_GlobalID)
79        return UIntVal < RHS.UIntVal;
80      assert((Kind == t_LocalName || Kind == t_GlobalName ||
81              Kind == t_ConstantStruct || Kind == t_PackedConstantStruct) &&
82             "Ordering not defined for this ValID kind yet");
83      return StrVal < RHS.StrVal;
84    }
85  };
86
87  class LLParser {
88  public:
89    typedef LLLexer::LocTy LocTy;
90  private:
91    LLVMContext &Context;
92    LLLexer Lex;
93    Module *M;
94    SlotMapping *Slots;
95
96    // Instruction metadata resolution.  Each instruction can have a list of
97    // MDRef info associated with them.
98    //
99    // The simpler approach of just creating temporary MDNodes and then calling
100    // RAUW on them when the definition is processed doesn't work because some
101    // instruction metadata kinds, such as dbg, get stored in the IR in an
102    // "optimized" format which doesn't participate in the normal value use
103    // lists. This means that RAUW doesn't work, even on temporary MDNodes
104    // which otherwise support RAUW. Instead, we defer resolving MDNode
105    // references until the definitions have been processed.
106    struct MDRef {
107      SMLoc Loc;
108      unsigned MDKind, MDSlot;
109    };
110
111    SmallVector<Instruction*, 64> InstsWithTBAATag;
112
113    // Type resolution handling data structures.  The location is set when we
114    // have processed a use of the type but not a definition yet.
115    StringMap<std::pair<Type*, LocTy> > NamedTypes;
116    std::map<unsigned, std::pair<Type*, LocTy> > NumberedTypes;
117
118    std::map<unsigned, TrackingMDNodeRef> NumberedMetadata;
119    std::map<unsigned, std::pair<TempMDTuple, LocTy>> ForwardRefMDNodes;
120
121    // Global Value reference information.
122    std::map<std::string, std::pair<GlobalValue*, LocTy> > ForwardRefVals;
123    std::map<unsigned, std::pair<GlobalValue*, LocTy> > ForwardRefValIDs;
124    std::vector<GlobalValue*> NumberedVals;
125
126    // Comdat forward reference information.
127    std::map<std::string, LocTy> ForwardRefComdats;
128
129    // References to blockaddress.  The key is the function ValID, the value is
130    // a list of references to blocks in that function.
131    std::map<ValID, std::map<ValID, GlobalValue *>> ForwardRefBlockAddresses;
132    class PerFunctionState;
133    /// Reference to per-function state to allow basic blocks to be
134    /// forward-referenced by blockaddress instructions within the same
135    /// function.
136    PerFunctionState *BlockAddressPFS;
137
138    // Attribute builder reference information.
139    std::map<Value*, std::vector<unsigned> > ForwardRefAttrGroups;
140    std::map<unsigned, AttrBuilder> NumberedAttrBuilders;
141
142  public:
143    LLParser(StringRef F, SourceMgr &SM, SMDiagnostic &Err, Module *M,
144             SlotMapping *Slots = nullptr)
145        : Context(M->getContext()), Lex(F, SM, Err, M->getContext()), M(M),
146          Slots(Slots), BlockAddressPFS(nullptr) {}
147    bool Run();
148
149    bool parseStandaloneConstantValue(Constant *&C, const SlotMapping *Slots);
150
151    bool parseTypeAtBeginning(Type *&Ty, unsigned &Read,
152                              const SlotMapping *Slots);
153
154    LLVMContext &getContext() { return Context; }
155
156  private:
157
158    bool Error(LocTy L, const Twine &Msg) const {
159      return Lex.Error(L, Msg);
160    }
161    bool TokError(const Twine &Msg) const {
162      return Error(Lex.getLoc(), Msg);
163    }
164
165    /// Restore the internal name and slot mappings using the mappings that
166    /// were created at an earlier parsing stage.
167    void restoreParsingState(const SlotMapping *Slots);
168
169    /// GetGlobalVal - Get a value with the specified name or ID, creating a
170    /// forward reference record if needed.  This can return null if the value
171    /// exists but does not have the right type.
172    GlobalValue *GetGlobalVal(const std::string &N, Type *Ty, LocTy Loc);
173    GlobalValue *GetGlobalVal(unsigned ID, Type *Ty, LocTy Loc);
174
175    /// Get a Comdat with the specified name, creating a forward reference
176    /// record if needed.
177    Comdat *getComdat(const std::string &N, LocTy Loc);
178
179    // Helper Routines.
180    bool ParseToken(lltok::Kind T, const char *ErrMsg);
181    bool EatIfPresent(lltok::Kind T) {
182      if (Lex.getKind() != T) return false;
183      Lex.Lex();
184      return true;
185    }
186
187    FastMathFlags EatFastMathFlagsIfPresent() {
188      FastMathFlags FMF;
189      while (true)
190        switch (Lex.getKind()) {
191        case lltok::kw_fast: FMF.setUnsafeAlgebra();   Lex.Lex(); continue;
192        case lltok::kw_nnan: FMF.setNoNaNs();          Lex.Lex(); continue;
193        case lltok::kw_ninf: FMF.setNoInfs();          Lex.Lex(); continue;
194        case lltok::kw_nsz:  FMF.setNoSignedZeros();   Lex.Lex(); continue;
195        case lltok::kw_arcp: FMF.setAllowReciprocal(); Lex.Lex(); continue;
196        default: return FMF;
197        }
198      return FMF;
199    }
200
201    bool ParseOptionalToken(lltok::Kind T, bool &Present,
202                            LocTy *Loc = nullptr) {
203      if (Lex.getKind() != T) {
204        Present = false;
205      } else {
206        if (Loc)
207          *Loc = Lex.getLoc();
208        Lex.Lex();
209        Present = true;
210      }
211      return false;
212    }
213    bool ParseStringConstant(std::string &Result);
214    bool ParseUInt32(unsigned &Val);
215    bool ParseUInt32(unsigned &Val, LocTy &Loc) {
216      Loc = Lex.getLoc();
217      return ParseUInt32(Val);
218    }
219    bool ParseUInt64(uint64_t &Val);
220    bool ParseUInt64(uint64_t &Val, LocTy &Loc) {
221      Loc = Lex.getLoc();
222      return ParseUInt64(Val);
223    }
224
225    bool ParseStringAttribute(AttrBuilder &B);
226
227    bool ParseTLSModel(GlobalVariable::ThreadLocalMode &TLM);
228    bool ParseOptionalThreadLocal(GlobalVariable::ThreadLocalMode &TLM);
229    bool ParseOptionalUnnamedAddr(GlobalVariable::UnnamedAddr &UnnamedAddr);
230    bool ParseOptionalAddrSpace(unsigned &AddrSpace);
231    bool ParseOptionalParamAttrs(AttrBuilder &B);
232    bool ParseOptionalReturnAttrs(AttrBuilder &B);
233    bool ParseOptionalLinkage(unsigned &Linkage, bool &HasLinkage,
234                              unsigned &Visibility, unsigned &DLLStorageClass);
235    void ParseOptionalVisibility(unsigned &Visibility);
236    void ParseOptionalDLLStorageClass(unsigned &DLLStorageClass);
237    bool ParseOptionalCallingConv(unsigned &CC);
238    bool ParseOptionalAlignment(unsigned &Alignment);
239    bool ParseOptionalDerefAttrBytes(lltok::Kind AttrKind, uint64_t &Bytes);
240    bool ParseScopeAndOrdering(bool isAtomic, SynchronizationScope &Scope,
241                               AtomicOrdering &Ordering);
242    bool ParseOrdering(AtomicOrdering &Ordering);
243    bool ParseOptionalStackAlignment(unsigned &Alignment);
244    bool ParseOptionalCommaAlign(unsigned &Alignment, bool &AteExtraComma);
245    bool ParseOptionalCommaInAlloca(bool &IsInAlloca);
246    bool parseAllocSizeArguments(unsigned &ElemSizeArg,
247                                 Optional<unsigned> &HowManyArg);
248    bool ParseIndexList(SmallVectorImpl<unsigned> &Indices,
249                        bool &AteExtraComma);
250    bool ParseIndexList(SmallVectorImpl<unsigned> &Indices) {
251      bool AteExtraComma;
252      if (ParseIndexList(Indices, AteExtraComma)) return true;
253      if (AteExtraComma)
254        return TokError("expected index");
255      return false;
256    }
257
258    // Top-Level Entities
259    bool ParseTopLevelEntities();
260    bool ValidateEndOfModule();
261    bool ParseTargetDefinition();
262    bool ParseModuleAsm();
263    bool ParseSourceFileName();
264    bool ParseDepLibs();        // FIXME: Remove in 4.0.
265    bool ParseUnnamedType();
266    bool ParseNamedType();
267    bool ParseDeclare();
268    bool ParseDefine();
269
270    bool ParseGlobalType(bool &IsConstant);
271    bool ParseUnnamedGlobal();
272    bool ParseNamedGlobal();
273    bool ParseGlobal(const std::string &Name, LocTy Loc, unsigned Linkage,
274                     bool HasLinkage, unsigned Visibility,
275                     unsigned DLLStorageClass,
276                     GlobalVariable::ThreadLocalMode TLM,
277                     GlobalVariable::UnnamedAddr UnnamedAddr);
278    bool parseIndirectSymbol(const std::string &Name, LocTy Loc,
279                             unsigned Linkage, unsigned Visibility,
280                             unsigned DLLStorageClass,
281                             GlobalVariable::ThreadLocalMode TLM,
282                             GlobalVariable::UnnamedAddr UnnamedAddr);
283    bool parseComdat();
284    bool ParseStandaloneMetadata();
285    bool ParseNamedMetadata();
286    bool ParseMDString(MDString *&Result);
287    bool ParseMDNodeID(MDNode *&Result);
288    bool ParseUnnamedAttrGrp();
289    bool ParseFnAttributeValuePairs(AttrBuilder &B,
290                                    std::vector<unsigned> &FwdRefAttrGrps,
291                                    bool inAttrGrp, LocTy &BuiltinLoc);
292
293    // Type Parsing.
294    bool ParseType(Type *&Result, const Twine &Msg, bool AllowVoid = false);
295    bool ParseType(Type *&Result, bool AllowVoid = false) {
296      return ParseType(Result, "expected type", AllowVoid);
297    }
298    bool ParseType(Type *&Result, const Twine &Msg, LocTy &Loc,
299                   bool AllowVoid = false) {
300      Loc = Lex.getLoc();
301      return ParseType(Result, Msg, AllowVoid);
302    }
303    bool ParseType(Type *&Result, LocTy &Loc, bool AllowVoid = false) {
304      Loc = Lex.getLoc();
305      return ParseType(Result, AllowVoid);
306    }
307    bool ParseAnonStructType(Type *&Result, bool Packed);
308    bool ParseStructBody(SmallVectorImpl<Type*> &Body);
309    bool ParseStructDefinition(SMLoc TypeLoc, StringRef Name,
310                               std::pair<Type*, LocTy> &Entry,
311                               Type *&ResultTy);
312
313    bool ParseArrayVectorType(Type *&Result, bool isVector);
314    bool ParseFunctionType(Type *&Result);
315
316    // Function Semantic Analysis.
317    class PerFunctionState {
318      LLParser &P;
319      Function &F;
320      std::map<std::string, std::pair<Value*, LocTy> > ForwardRefVals;
321      std::map<unsigned, std::pair<Value*, LocTy> > ForwardRefValIDs;
322      std::vector<Value*> NumberedVals;
323
324      /// FunctionNumber - If this is an unnamed function, this is the slot
325      /// number of it, otherwise it is -1.
326      int FunctionNumber;
327    public:
328      PerFunctionState(LLParser &p, Function &f, int FunctionNumber);
329      ~PerFunctionState();
330
331      Function &getFunction() const { return F; }
332
333      bool FinishFunction();
334
335      /// GetVal - Get a value with the specified name or ID, creating a
336      /// forward reference record if needed.  This can return null if the value
337      /// exists but does not have the right type.
338      Value *GetVal(const std::string &Name, Type *Ty, LocTy Loc);
339      Value *GetVal(unsigned ID, Type *Ty, LocTy Loc);
340
341      /// SetInstName - After an instruction is parsed and inserted into its
342      /// basic block, this installs its name.
343      bool SetInstName(int NameID, const std::string &NameStr, LocTy NameLoc,
344                       Instruction *Inst);
345
346      /// GetBB - Get a basic block with the specified name or ID, creating a
347      /// forward reference record if needed.  This can return null if the value
348      /// is not a BasicBlock.
349      BasicBlock *GetBB(const std::string &Name, LocTy Loc);
350      BasicBlock *GetBB(unsigned ID, LocTy Loc);
351
352      /// DefineBB - Define the specified basic block, which is either named or
353      /// unnamed.  If there is an error, this returns null otherwise it returns
354      /// the block being defined.
355      BasicBlock *DefineBB(const std::string &Name, LocTy Loc);
356
357      bool resolveForwardRefBlockAddresses();
358    };
359
360    bool ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V,
361                             PerFunctionState *PFS);
362
363    bool parseConstantValue(Type *Ty, Constant *&C);
364    bool ParseValue(Type *Ty, Value *&V, PerFunctionState *PFS);
365    bool ParseValue(Type *Ty, Value *&V, PerFunctionState &PFS) {
366      return ParseValue(Ty, V, &PFS);
367    }
368
369    bool ParseValue(Type *Ty, Value *&V, LocTy &Loc,
370                    PerFunctionState &PFS) {
371      Loc = Lex.getLoc();
372      return ParseValue(Ty, V, &PFS);
373    }
374
375    bool ParseTypeAndValue(Value *&V, PerFunctionState *PFS);
376    bool ParseTypeAndValue(Value *&V, PerFunctionState &PFS) {
377      return ParseTypeAndValue(V, &PFS);
378    }
379    bool ParseTypeAndValue(Value *&V, LocTy &Loc, PerFunctionState &PFS) {
380      Loc = Lex.getLoc();
381      return ParseTypeAndValue(V, PFS);
382    }
383    bool ParseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc,
384                                PerFunctionState &PFS);
385    bool ParseTypeAndBasicBlock(BasicBlock *&BB, PerFunctionState &PFS) {
386      LocTy Loc;
387      return ParseTypeAndBasicBlock(BB, Loc, PFS);
388    }
389
390
391    struct ParamInfo {
392      LocTy Loc;
393      Value *V;
394      AttributeSet Attrs;
395      ParamInfo(LocTy loc, Value *v, AttributeSet attrs)
396        : Loc(loc), V(v), Attrs(attrs) {}
397    };
398    bool ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
399                            PerFunctionState &PFS,
400                            bool IsMustTailCall = false,
401                            bool InVarArgsFunc = false);
402
403    bool
404    ParseOptionalOperandBundles(SmallVectorImpl<OperandBundleDef> &BundleList,
405                                PerFunctionState &PFS);
406
407    bool ParseExceptionArgs(SmallVectorImpl<Value *> &Args,
408                            PerFunctionState &PFS);
409
410    // Constant Parsing.
411    bool ParseValID(ValID &ID, PerFunctionState *PFS = nullptr);
412    bool ParseGlobalValue(Type *Ty, Constant *&V);
413    bool ParseGlobalTypeAndValue(Constant *&V);
414    bool ParseGlobalValueVector(SmallVectorImpl<Constant *> &Elts);
415    bool parseOptionalComdat(StringRef GlobalName, Comdat *&C);
416    bool ParseMetadataAsValue(Value *&V, PerFunctionState &PFS);
417    bool ParseValueAsMetadata(Metadata *&MD, const Twine &TypeMsg,
418                              PerFunctionState *PFS);
419    bool ParseMetadata(Metadata *&MD, PerFunctionState *PFS);
420    bool ParseMDTuple(MDNode *&MD, bool IsDistinct = false);
421    bool ParseMDNode(MDNode *&MD);
422    bool ParseMDNodeTail(MDNode *&MD);
423    bool ParseMDNodeVector(SmallVectorImpl<Metadata *> &MDs);
424    bool ParseMetadataAttachment(unsigned &Kind, MDNode *&MD);
425    bool ParseInstructionMetadata(Instruction &Inst);
426    bool ParseGlobalObjectMetadataAttachment(GlobalObject &GO);
427    bool ParseOptionalFunctionMetadata(Function &F);
428
429    template <class FieldTy>
430    bool ParseMDField(LocTy Loc, StringRef Name, FieldTy &Result);
431    template <class FieldTy> bool ParseMDField(StringRef Name, FieldTy &Result);
432    template <class ParserTy>
433    bool ParseMDFieldsImplBody(ParserTy parseField);
434    template <class ParserTy>
435    bool ParseMDFieldsImpl(ParserTy parseField, LocTy &ClosingLoc);
436    bool ParseSpecializedMDNode(MDNode *&N, bool IsDistinct = false);
437
438#define HANDLE_SPECIALIZED_MDNODE_LEAF(CLASS)                                  \
439  bool Parse##CLASS(MDNode *&Result, bool IsDistinct);
440#include "llvm/IR/Metadata.def"
441
442    // Function Parsing.
443    struct ArgInfo {
444      LocTy Loc;
445      Type *Ty;
446      AttributeSet Attrs;
447      std::string Name;
448      ArgInfo(LocTy L, Type *ty, AttributeSet Attr, const std::string &N)
449        : Loc(L), Ty(ty), Attrs(Attr), Name(N) {}
450    };
451    bool ParseArgumentList(SmallVectorImpl<ArgInfo> &ArgList, bool &isVarArg);
452    bool ParseFunctionHeader(Function *&Fn, bool isDefine);
453    bool ParseFunctionBody(Function &Fn);
454    bool ParseBasicBlock(PerFunctionState &PFS);
455
456    enum TailCallType { TCT_None, TCT_Tail, TCT_MustTail };
457
458    // Instruction Parsing.  Each instruction parsing routine can return with a
459    // normal result, an error result, or return having eaten an extra comma.
460    enum InstResult { InstNormal = 0, InstError = 1, InstExtraComma = 2 };
461    int ParseInstruction(Instruction *&Inst, BasicBlock *BB,
462                         PerFunctionState &PFS);
463    bool ParseCmpPredicate(unsigned &Pred, unsigned Opc);
464
465    bool ParseRet(Instruction *&Inst, BasicBlock *BB, PerFunctionState &PFS);
466    bool ParseBr(Instruction *&Inst, PerFunctionState &PFS);
467    bool ParseSwitch(Instruction *&Inst, PerFunctionState &PFS);
468    bool ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS);
469    bool ParseInvoke(Instruction *&Inst, PerFunctionState &PFS);
470    bool ParseResume(Instruction *&Inst, PerFunctionState &PFS);
471    bool ParseCleanupRet(Instruction *&Inst, PerFunctionState &PFS);
472    bool ParseCatchRet(Instruction *&Inst, PerFunctionState &PFS);
473    bool ParseCatchSwitch(Instruction *&Inst, PerFunctionState &PFS);
474    bool ParseCatchPad(Instruction *&Inst, PerFunctionState &PFS);
475    bool ParseCleanupPad(Instruction *&Inst, PerFunctionState &PFS);
476
477    bool ParseArithmetic(Instruction *&I, PerFunctionState &PFS, unsigned Opc,
478                         unsigned OperandType);
479    bool ParseLogical(Instruction *&I, PerFunctionState &PFS, unsigned Opc);
480    bool ParseCompare(Instruction *&I, PerFunctionState &PFS, unsigned Opc);
481    bool ParseCast(Instruction *&I, PerFunctionState &PFS, unsigned Opc);
482    bool ParseSelect(Instruction *&I, PerFunctionState &PFS);
483    bool ParseVA_Arg(Instruction *&I, PerFunctionState &PFS);
484    bool ParseExtractElement(Instruction *&I, PerFunctionState &PFS);
485    bool ParseInsertElement(Instruction *&I, PerFunctionState &PFS);
486    bool ParseShuffleVector(Instruction *&I, PerFunctionState &PFS);
487    int ParsePHI(Instruction *&I, PerFunctionState &PFS);
488    bool ParseLandingPad(Instruction *&I, PerFunctionState &PFS);
489    bool ParseCall(Instruction *&I, PerFunctionState &PFS,
490                   CallInst::TailCallKind IsTail);
491    int ParseAlloc(Instruction *&I, PerFunctionState &PFS);
492    int ParseLoad(Instruction *&I, PerFunctionState &PFS);
493    int ParseStore(Instruction *&I, PerFunctionState &PFS);
494    int ParseCmpXchg(Instruction *&I, PerFunctionState &PFS);
495    int ParseAtomicRMW(Instruction *&I, PerFunctionState &PFS);
496    int ParseFence(Instruction *&I, PerFunctionState &PFS);
497    int ParseGetElementPtr(Instruction *&I, PerFunctionState &PFS);
498    int ParseExtractValue(Instruction *&I, PerFunctionState &PFS);
499    int ParseInsertValue(Instruction *&I, PerFunctionState &PFS);
500
501    // Use-list order directives.
502    bool ParseUseListOrder(PerFunctionState *PFS = nullptr);
503    bool ParseUseListOrderBB();
504    bool ParseUseListOrderIndexes(SmallVectorImpl<unsigned> &Indexes);
505    bool sortUseListOrder(Value *V, ArrayRef<unsigned> Indexes, SMLoc Loc);
506  };
507} // End llvm namespace
508
509#endif
510