LLParser.h revision f3a789d931de6b5be729c33ff476fb20f0badbb1
1//===-- LLParser.h - Parser Class -------------------------------*- C++ -*-===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file defines the parser class for .ll files. 11// 12//===----------------------------------------------------------------------===// 13 14#ifndef LLVM_ASMPARSER_LLPARSER_H 15#define LLVM_ASMPARSER_LLPARSER_H 16 17#include "LLLexer.h" 18#include "llvm/Module.h" 19#include "llvm/Type.h" 20#include "llvm/ADT/DenseMap.h" 21#include "llvm/Support/ValueHandle.h" 22#include <map> 23 24namespace llvm { 25 class Module; 26 class OpaqueType; 27 class Function; 28 class Value; 29 class BasicBlock; 30 class Instruction; 31 class Constant; 32 class GlobalValue; 33 class MDString; 34 class MDNode; 35 36 /// ValID - Represents a reference of a definition of some sort with no type. 37 /// There are several cases where we have to parse the value but where the 38 /// type can depend on later context. This may either be a numeric reference 39 /// or a symbolic (%var) reference. This is just a discriminated union. 40 struct ValID { 41 enum { 42 t_LocalID, t_GlobalID, // ID in UIntVal. 43 t_LocalName, t_GlobalName, // Name in StrVal. 44 t_APSInt, t_APFloat, // Value in APSIntVal/APFloatVal. 45 t_Null, t_Undef, t_Zero, // No value. 46 t_EmptyArray, // No value: [] 47 t_Constant, // Value in ConstantVal. 48 t_InlineAsm, // Value in StrVal/StrVal2/UIntVal. 49 t_MDNode, // Value in MDNodeVal. 50 t_MDString // Value in MDStringVal. 51 } Kind; 52 53 LLLexer::LocTy Loc; 54 unsigned UIntVal; 55 std::string StrVal, StrVal2; 56 APSInt APSIntVal; 57 APFloat APFloatVal; 58 Constant *ConstantVal; 59 MDNode *MDNodeVal; 60 MDString *MDStringVal; 61 ValID() : APFloatVal(0.0) {} 62 63 bool operator<(const ValID &RHS) const { 64 if (Kind == t_LocalID || Kind == t_GlobalID) 65 return UIntVal < RHS.UIntVal; 66 assert((Kind == t_LocalName || Kind == t_GlobalName) && 67 "Ordering not defined for this ValID kind yet"); 68 return StrVal < RHS.StrVal; 69 } 70 }; 71 72 class LLParser { 73 public: 74 typedef LLLexer::LocTy LocTy; 75 private: 76 LLVMContext &Context; 77 LLLexer Lex; 78 Module *M; 79 80 // Instruction metadata resolution. Each instruction can have a list of 81 // MDRef info associated with them. 82 // 83 // The simpler approach of just creating temporary MDNodes and then calling 84 // RAUW on them when the definition is processed doesn't work because some 85 // instruction metadata kinds, such as dbg, get stored in the IR in an 86 // "optimized" format which doesn't participate in the normal value use 87 // lists. This means that RAUW doesn't work, even on temporary MDNodes 88 // which otherwise support RAUW. Instead, we defer resolving MDNode 89 // references until the definitions have been processed. 90 struct MDRef { 91 SMLoc Loc; 92 unsigned MDKind, MDSlot; 93 }; 94 DenseMap<Instruction*, std::vector<MDRef> > ForwardRefInstMetadata; 95 96 // Type resolution handling data structures. 97 std::map<std::string, std::pair<PATypeHolder, LocTy> > ForwardRefTypes; 98 std::map<unsigned, std::pair<PATypeHolder, LocTy> > ForwardRefTypeIDs; 99 std::vector<PATypeHolder> NumberedTypes; 100 std::vector<TrackingVH<MDNode> > NumberedMetadata; 101 std::map<unsigned, std::pair<TrackingVH<MDNode>, LocTy> > ForwardRefMDNodes; 102 struct UpRefRecord { 103 /// Loc - This is the location of the upref. 104 LocTy Loc; 105 106 /// NestingLevel - The number of nesting levels that need to be popped 107 /// before this type is resolved. 108 unsigned NestingLevel; 109 110 /// LastContainedTy - This is the type at the current binding level for 111 /// the type. Every time we reduce the nesting level, this gets updated. 112 const Type *LastContainedTy; 113 114 /// UpRefTy - This is the actual opaque type that the upreference is 115 /// represented with. 116 OpaqueType *UpRefTy; 117 118 UpRefRecord(LocTy L, unsigned NL, OpaqueType *URTy) 119 : Loc(L), NestingLevel(NL), LastContainedTy((Type*)URTy), 120 UpRefTy(URTy) {} 121 }; 122 std::vector<UpRefRecord> UpRefs; 123 124 // Global Value reference information. 125 std::map<std::string, std::pair<GlobalValue*, LocTy> > ForwardRefVals; 126 std::map<unsigned, std::pair<GlobalValue*, LocTy> > ForwardRefValIDs; 127 std::vector<GlobalValue*> NumberedVals; 128 129 // References to blockaddress. The key is the function ValID, the value is 130 // a list of references to blocks in that function. 131 std::map<ValID, std::vector<std::pair<ValID, GlobalValue*> > > 132 ForwardRefBlockAddresses; 133 134 public: 135 LLParser(MemoryBuffer *F, SourceMgr &SM, SMDiagnostic &Err, Module *m) : 136 Context(m->getContext()), Lex(F, SM, Err, m->getContext()), 137 M(m) {} 138 bool Run(); 139 140 LLVMContext& getContext() { return Context; } 141 142 private: 143 144 bool Error(LocTy L, const Twine &Msg) const { 145 return Lex.Error(L, Msg); 146 } 147 bool TokError(const Twine &Msg) const { 148 return Error(Lex.getLoc(), Msg); 149 } 150 151 /// GetGlobalVal - Get a value with the specified name or ID, creating a 152 /// forward reference record if needed. This can return null if the value 153 /// exists but does not have the right type. 154 GlobalValue *GetGlobalVal(const std::string &N, const Type *Ty, LocTy Loc); 155 GlobalValue *GetGlobalVal(unsigned ID, const Type *Ty, LocTy Loc); 156 157 // Helper Routines. 158 bool ParseToken(lltok::Kind T, const char *ErrMsg); 159 bool EatIfPresent(lltok::Kind T) { 160 if (Lex.getKind() != T) return false; 161 Lex.Lex(); 162 return true; 163 } 164 bool ParseOptionalToken(lltok::Kind T, bool &Present, LocTy *Loc = 0) { 165 if (Lex.getKind() != T) { 166 Present = false; 167 } else { 168 if (Loc) 169 *Loc = Lex.getLoc(); 170 Lex.Lex(); 171 Present = true; 172 } 173 return false; 174 } 175 bool ParseStringConstant(std::string &Result); 176 bool ParseUInt32(unsigned &Val); 177 bool ParseUInt32(unsigned &Val, LocTy &Loc) { 178 Loc = Lex.getLoc(); 179 return ParseUInt32(Val); 180 } 181 bool ParseOptionalAddrSpace(unsigned &AddrSpace); 182 bool ParseOptionalAttrs(unsigned &Attrs, unsigned AttrKind); 183 bool ParseOptionalLinkage(unsigned &Linkage, bool &HasLinkage); 184 bool ParseOptionalLinkage(unsigned &Linkage) { 185 bool HasLinkage; return ParseOptionalLinkage(Linkage, HasLinkage); 186 } 187 bool ParseOptionalVisibility(unsigned &Visibility); 188 bool ParseOptionalCallingConv(CallingConv::ID &CC); 189 bool ParseOptionalAlignment(unsigned &Alignment); 190 bool ParseOptionalStackAlignment(unsigned &Alignment); 191 bool ParseOptionalCommaAlign(unsigned &Alignment, bool &AteExtraComma); 192 bool ParseIndexList(SmallVectorImpl<unsigned> &Indices,bool &AteExtraComma); 193 bool ParseIndexList(SmallVectorImpl<unsigned> &Indices) { 194 bool AteExtraComma; 195 if (ParseIndexList(Indices, AteExtraComma)) return true; 196 if (AteExtraComma) 197 return TokError("expected index"); 198 return false; 199 } 200 201 // Top-Level Entities 202 bool ParseTopLevelEntities(); 203 bool ValidateEndOfModule(); 204 bool ParseTargetDefinition(); 205 bool ParseDepLibs(); 206 bool ParseModuleAsm(); 207 bool ParseUnnamedType(); 208 bool ParseNamedType(); 209 bool ParseDeclare(); 210 bool ParseDefine(); 211 212 bool ParseGlobalType(bool &IsConstant); 213 bool ParseUnnamedGlobal(); 214 bool ParseNamedGlobal(); 215 bool ParseGlobal(const std::string &Name, LocTy Loc, unsigned Linkage, 216 bool HasLinkage, unsigned Visibility); 217 bool ParseAlias(const std::string &Name, LocTy Loc, unsigned Visibility); 218 bool ParseStandaloneMetadata(); 219 bool ParseNamedMetadata(); 220 bool ParseMDString(MDString *&Result); 221 bool ParseMDNodeID(MDNode *&Result); 222 bool ParseMDNodeID(MDNode *&Result, unsigned &SlotNo); 223 224 // Type Parsing. 225 bool ParseType(PATypeHolder &Result, bool AllowVoid = false); 226 bool ParseType(PATypeHolder &Result, LocTy &Loc, bool AllowVoid = false) { 227 Loc = Lex.getLoc(); 228 return ParseType(Result, AllowVoid); 229 } 230 bool ParseTypeRec(PATypeHolder &H); 231 bool ParseStructType(PATypeHolder &H, bool Packed); 232 bool ParseArrayVectorType(PATypeHolder &H, bool isVector); 233 bool ParseFunctionType(PATypeHolder &Result); 234 PATypeHolder HandleUpRefs(const Type *Ty); 235 236 // Function Semantic Analysis. 237 class PerFunctionState { 238 LLParser &P; 239 Function &F; 240 std::map<std::string, std::pair<Value*, LocTy> > ForwardRefVals; 241 std::map<unsigned, std::pair<Value*, LocTy> > ForwardRefValIDs; 242 std::vector<Value*> NumberedVals; 243 244 /// FunctionNumber - If this is an unnamed function, this is the slot 245 /// number of it, otherwise it is -1. 246 int FunctionNumber; 247 public: 248 PerFunctionState(LLParser &p, Function &f, int FunctionNumber); 249 ~PerFunctionState(); 250 251 Function &getFunction() const { return F; } 252 253 bool FinishFunction(); 254 255 /// GetVal - Get a value with the specified name or ID, creating a 256 /// forward reference record if needed. This can return null if the value 257 /// exists but does not have the right type. 258 Value *GetVal(const std::string &Name, const Type *Ty, LocTy Loc); 259 Value *GetVal(unsigned ID, const Type *Ty, LocTy Loc); 260 261 /// SetInstName - After an instruction is parsed and inserted into its 262 /// basic block, this installs its name. 263 bool SetInstName(int NameID, const std::string &NameStr, LocTy NameLoc, 264 Instruction *Inst); 265 266 /// GetBB - Get a basic block with the specified name or ID, creating a 267 /// forward reference record if needed. This can return null if the value 268 /// is not a BasicBlock. 269 BasicBlock *GetBB(const std::string &Name, LocTy Loc); 270 BasicBlock *GetBB(unsigned ID, LocTy Loc); 271 272 /// DefineBB - Define the specified basic block, which is either named or 273 /// unnamed. If there is an error, this returns null otherwise it returns 274 /// the block being defined. 275 BasicBlock *DefineBB(const std::string &Name, LocTy Loc); 276 }; 277 278 bool ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V, 279 PerFunctionState *PFS); 280 281 bool ParseValue(const Type *Ty, Value *&V, PerFunctionState &PFS); 282 bool ParseValue(const Type *Ty, Value *&V, LocTy &Loc, 283 PerFunctionState &PFS) { 284 Loc = Lex.getLoc(); 285 return ParseValue(Ty, V, PFS); 286 } 287 288 bool ParseTypeAndValue(Value *&V, PerFunctionState &PFS); 289 bool ParseTypeAndValue(Value *&V, LocTy &Loc, PerFunctionState &PFS) { 290 Loc = Lex.getLoc(); 291 return ParseTypeAndValue(V, PFS); 292 } 293 bool ParseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc, 294 PerFunctionState &PFS); 295 bool ParseTypeAndBasicBlock(BasicBlock *&BB, PerFunctionState &PFS) { 296 LocTy Loc; 297 return ParseTypeAndBasicBlock(BB, Loc, PFS); 298 } 299 300 301 struct ParamInfo { 302 LocTy Loc; 303 Value *V; 304 unsigned Attrs; 305 ParamInfo(LocTy loc, Value *v, unsigned attrs) 306 : Loc(loc), V(v), Attrs(attrs) {} 307 }; 308 bool ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList, 309 PerFunctionState &PFS); 310 311 // Constant Parsing. 312 bool ParseValID(ValID &ID, PerFunctionState *PFS = NULL); 313 bool ParseGlobalValue(const Type *Ty, Constant *&V); 314 bool ParseGlobalTypeAndValue(Constant *&V); 315 bool ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts); 316 bool ParseMetadataListValue(ValID &ID, PerFunctionState *PFS); 317 bool ParseMetadataValue(ValID &ID, PerFunctionState *PFS); 318 bool ParseMDNodeVector(SmallVectorImpl<Value*> &, PerFunctionState *PFS); 319 bool ParseInstructionMetadata(Instruction *Inst, PerFunctionState *PFS); 320 321 // Function Parsing. 322 struct ArgInfo { 323 LocTy Loc; 324 PATypeHolder Type; 325 unsigned Attrs; 326 std::string Name; 327 ArgInfo(LocTy L, PATypeHolder Ty, unsigned Attr, const std::string &N) 328 : Loc(L), Type(Ty), Attrs(Attr), Name(N) {} 329 }; 330 bool ParseArgumentList(std::vector<ArgInfo> &ArgList, 331 bool &isVarArg, bool inType); 332 bool ParseFunctionHeader(Function *&Fn, bool isDefine); 333 bool ParseFunctionBody(Function &Fn); 334 bool ParseBasicBlock(PerFunctionState &PFS); 335 336 // Instruction Parsing. Each instruction parsing routine can return with a 337 // normal result, an error result, or return having eaten an extra comma. 338 enum InstResult { InstNormal = 0, InstError = 1, InstExtraComma = 2 }; 339 int ParseInstruction(Instruction *&Inst, BasicBlock *BB, 340 PerFunctionState &PFS); 341 bool ParseCmpPredicate(unsigned &Pred, unsigned Opc); 342 343 int ParseRet(Instruction *&Inst, BasicBlock *BB, PerFunctionState &PFS); 344 bool ParseBr(Instruction *&Inst, PerFunctionState &PFS); 345 bool ParseSwitch(Instruction *&Inst, PerFunctionState &PFS); 346 bool ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS); 347 bool ParseInvoke(Instruction *&Inst, PerFunctionState &PFS); 348 349 bool ParseArithmetic(Instruction *&I, PerFunctionState &PFS, unsigned Opc, 350 unsigned OperandType); 351 bool ParseLogical(Instruction *&I, PerFunctionState &PFS, unsigned Opc); 352 bool ParseCompare(Instruction *&I, PerFunctionState &PFS, unsigned Opc); 353 bool ParseCast(Instruction *&I, PerFunctionState &PFS, unsigned Opc); 354 bool ParseSelect(Instruction *&I, PerFunctionState &PFS); 355 bool ParseVA_Arg(Instruction *&I, PerFunctionState &PFS); 356 bool ParseExtractElement(Instruction *&I, PerFunctionState &PFS); 357 bool ParseInsertElement(Instruction *&I, PerFunctionState &PFS); 358 bool ParseShuffleVector(Instruction *&I, PerFunctionState &PFS); 359 int ParsePHI(Instruction *&I, PerFunctionState &PFS); 360 bool ParseCall(Instruction *&I, PerFunctionState &PFS, bool isTail); 361 int ParseAlloc(Instruction *&I, PerFunctionState &PFS); 362 int ParseLoad(Instruction *&I, PerFunctionState &PFS, bool isVolatile); 363 int ParseStore(Instruction *&I, PerFunctionState &PFS, bool isVolatile); 364 bool ParseGetResult(Instruction *&I, PerFunctionState &PFS); 365 int ParseGetElementPtr(Instruction *&I, PerFunctionState &PFS); 366 int ParseExtractValue(Instruction *&I, PerFunctionState &PFS); 367 int ParseInsertValue(Instruction *&I, PerFunctionState &PFS); 368 369 bool ResolveForwardRefBlockAddresses(Function *TheFn, 370 std::vector<std::pair<ValID, GlobalValue*> > &Refs, 371 PerFunctionState *PFS); 372 }; 373} // End llvm namespace 374 375#endif 376