1//===--- DebugIR.cpp - Transform debug metadata to allow debugging IR -----===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// A Module transform pass that emits a succinct version of the IR and replaces 11// the source file metadata to allow debuggers to step through the IR. 12// 13// FIXME: instead of replacing debug metadata, this pass should allow for 14// additional metadata to be used to point capable debuggers to the IR file 15// without destroying the mapping to the original source file. 16// 17//===----------------------------------------------------------------------===// 18 19#define DEBUG_TYPE "debug-ir" 20 21#include "llvm/ADT/ValueMap.h" 22#include "llvm/Assembly/AssemblyAnnotationWriter.h" 23#include "llvm/DebugInfo.h" 24#include "llvm/DIBuilder.h" 25#include "llvm/InstVisitor.h" 26#include "llvm/IR/DataLayout.h" 27#include "llvm/IR/Instruction.h" 28#include "llvm/IR/Module.h" 29#include "llvm/Transforms/Instrumentation.h" 30#include "llvm/Transforms/Utils/Cloning.h" 31#include "llvm/Support/Debug.h" 32#include "llvm/Support/ToolOutputFile.h" 33#include "llvm/Support/FormattedStream.h" 34#include "llvm/Support/FileSystem.h" 35#include "llvm/Support/Path.h" 36 37#include "DebugIR.h" 38 39#include <string> 40 41#define STR_HELPER(x) #x 42#define STR(x) STR_HELPER(x) 43 44using namespace llvm; 45 46namespace { 47 48/// Builds a map of Value* to line numbers on which the Value appears in a 49/// textual representation of the IR by plugging into the AssemblyWriter by 50/// masquerading as an AssemblyAnnotationWriter. 51class ValueToLineMap : public AssemblyAnnotationWriter { 52 ValueMap<const Value *, unsigned int> Lines; 53 typedef ValueMap<const Value *, unsigned int>::const_iterator LineIter; 54 55 void addEntry(const Value *V, formatted_raw_ostream &Out) { 56 Out.flush(); 57 Lines.insert(std::make_pair(V, Out.getLine() + 1)); 58 } 59 60public: 61 62 /// Prints Module to a null buffer in order to build the map of Value pointers 63 /// to line numbers. 64 ValueToLineMap(const Module *M) { 65 raw_null_ostream ThrowAway; 66 M->print(ThrowAway, this); 67 } 68 69 // This function is called after an Instruction, GlobalValue, or GlobalAlias 70 // is printed. 71 void printInfoComment(const Value &V, formatted_raw_ostream &Out) { 72 addEntry(&V, Out); 73 } 74 75 void emitFunctionAnnot(const Function *F, formatted_raw_ostream &Out) { 76 addEntry(F, Out); 77 } 78 79 /// If V appears on a line in the textual IR representation, sets Line to the 80 /// line number and returns true, otherwise returns false. 81 bool getLine(const Value *V, unsigned int &Line) const { 82 LineIter i = Lines.find(V); 83 if (i != Lines.end()) { 84 Line = i->second; 85 return true; 86 } 87 return false; 88 } 89}; 90 91/// Removes debug intrisncs like llvm.dbg.declare and llvm.dbg.value. 92class DebugIntrinsicsRemover : public InstVisitor<DebugIntrinsicsRemover> { 93 void remove(Instruction &I) { I.eraseFromParent(); } 94 95public: 96 static void process(Module &M) { 97 DebugIntrinsicsRemover Remover; 98 Remover.visit(&M); 99 } 100 void visitDbgDeclareInst(DbgDeclareInst &I) { remove(I); } 101 void visitDbgValueInst(DbgValueInst &I) { remove(I); } 102 void visitDbgInfoIntrinsic(DbgInfoIntrinsic &I) { remove(I); } 103}; 104 105/// Removes debug metadata (!dbg) nodes from all instructions, and optionally 106/// metadata named "llvm.dbg.cu" if RemoveNamedInfo is true. 107class DebugMetadataRemover : public InstVisitor<DebugMetadataRemover> { 108 bool RemoveNamedInfo; 109 110public: 111 static void process(Module &M, bool RemoveNamedInfo = true) { 112 DebugMetadataRemover Remover(RemoveNamedInfo); 113 Remover.run(&M); 114 } 115 116 DebugMetadataRemover(bool RemoveNamedInfo) 117 : RemoveNamedInfo(RemoveNamedInfo) {} 118 119 void visitInstruction(Instruction &I) { 120 if (I.getMetadata(LLVMContext::MD_dbg)) 121 I.setMetadata(LLVMContext::MD_dbg, 0); 122 } 123 124 void run(Module *M) { 125 // Remove debug metadata attached to instructions 126 visit(M); 127 128 if (RemoveNamedInfo) { 129 // Remove CU named metadata (and all children nodes) 130 NamedMDNode *Node = M->getNamedMetadata("llvm.dbg.cu"); 131 if (Node) 132 M->eraseNamedMetadata(Node); 133 } 134 } 135}; 136 137/// Updates debug metadata in a Module: 138/// - changes Filename/Directory to values provided on construction 139/// - adds/updates line number (DebugLoc) entries associated with each 140/// instruction to reflect the instruction's location in an LLVM IR file 141class DIUpdater : public InstVisitor<DIUpdater> { 142 /// Builder of debug information 143 DIBuilder Builder; 144 145 /// Helper for type attributes/sizes/etc 146 DataLayout Layout; 147 148 /// Map of Value* to line numbers 149 const ValueToLineMap LineTable; 150 151 /// Map of Value* (in original Module) to Value* (in optional cloned Module) 152 const ValueToValueMapTy *VMap; 153 154 /// Directory of debug metadata 155 DebugInfoFinder Finder; 156 157 /// Source filename and directory 158 StringRef Filename; 159 StringRef Directory; 160 161 // CU nodes needed when creating DI subprograms 162 MDNode *FileNode; 163 MDNode *LexicalBlockFileNode; 164 const MDNode *CUNode; 165 166 ValueMap<const Function *, MDNode *> SubprogramDescriptors; 167 DenseMap<const Type *, MDNode *> TypeDescriptors; 168 169public: 170 DIUpdater(Module &M, StringRef Filename = StringRef(), 171 StringRef Directory = StringRef(), const Module *DisplayM = 0, 172 const ValueToValueMapTy *VMap = 0) 173 : Builder(M), Layout(&M), LineTable(DisplayM ? DisplayM : &M), VMap(VMap), 174 Finder(), Filename(Filename), Directory(Directory), FileNode(0), 175 LexicalBlockFileNode(0), CUNode(0) { 176 Finder.processModule(M); 177 visit(&M); 178 } 179 180 ~DIUpdater() { Builder.finalize(); } 181 182 void visitModule(Module &M) { 183 if (Finder.compile_unit_count() > 1) 184 report_fatal_error("DebugIR pass supports only a signle compile unit per " 185 "Module."); 186 createCompileUnit( 187 Finder.compile_unit_count() == 1 ? *Finder.compile_unit_begin() : 0); 188 } 189 190 void visitFunction(Function &F) { 191 if (F.isDeclaration() || findDISubprogram(&F)) 192 return; 193 194 StringRef MangledName = F.getName(); 195 DICompositeType Sig = createFunctionSignature(&F); 196 197 // find line of function declaration 198 unsigned Line = 0; 199 if (!findLine(&F, Line)) { 200 DEBUG(dbgs() << "WARNING: No line for Function " << F.getName().str() 201 << "\n"); 202 return; 203 } 204 205 Instruction *FirstInst = F.begin()->begin(); 206 unsigned ScopeLine = 0; 207 if (!findLine(FirstInst, ScopeLine)) { 208 DEBUG(dbgs() << "WARNING: No line for 1st Instruction in Function " 209 << F.getName().str() << "\n"); 210 return; 211 } 212 213 bool Local = F.hasInternalLinkage(); 214 bool IsDefinition = !F.isDeclaration(); 215 bool IsOptimized = false; 216 217 int FuncFlags = llvm::DIDescriptor::FlagPrototyped; 218 assert(CUNode && FileNode); 219 DISubprogram Sub = Builder.createFunction( 220 DICompileUnit(CUNode), F.getName(), MangledName, DIFile(FileNode), Line, 221 Sig, Local, IsDefinition, ScopeLine, FuncFlags, IsOptimized, &F); 222 assert(Sub.isSubprogram()); 223 DEBUG(dbgs() << "create subprogram mdnode " << *Sub << ": " 224 << "\n"); 225 226 SubprogramDescriptors.insert(std::make_pair(&F, Sub)); 227 } 228 229 void visitInstruction(Instruction &I) { 230 DebugLoc Loc(I.getDebugLoc()); 231 232 /// If a ValueToValueMap is provided, use it to get the real instruction as 233 /// the line table was generated on a clone of the module on which we are 234 /// operating. 235 Value *RealInst = 0; 236 if (VMap) 237 RealInst = VMap->lookup(&I); 238 239 if (!RealInst) 240 RealInst = &I; 241 242 unsigned Col = 0; // FIXME: support columns 243 unsigned Line; 244 if (!LineTable.getLine(RealInst, Line)) { 245 // Instruction has no line, it may have been removed (in the module that 246 // will be passed to the debugger) so there is nothing to do here. 247 DEBUG(dbgs() << "WARNING: no LineTable entry for instruction " << RealInst 248 << "\n"); 249 DEBUG(RealInst->dump()); 250 return; 251 } 252 253 DebugLoc NewLoc; 254 if (!Loc.isUnknown()) 255 // I had a previous debug location: re-use the DebugLoc 256 NewLoc = DebugLoc::get(Line, Col, Loc.getScope(RealInst->getContext()), 257 Loc.getInlinedAt(RealInst->getContext())); 258 else if (MDNode *scope = findScope(&I)) 259 NewLoc = DebugLoc::get(Line, Col, scope, 0); 260 else { 261 DEBUG(dbgs() << "WARNING: no valid scope for instruction " << &I 262 << ". no DebugLoc will be present." 263 << "\n"); 264 return; 265 } 266 267 addDebugLocation(I, NewLoc); 268 } 269 270private: 271 272 void createCompileUnit(MDNode *CUToReplace) { 273 std::string Flags; 274 bool IsOptimized = false; 275 StringRef Producer; 276 unsigned RuntimeVersion(0); 277 StringRef SplitName; 278 279 if (CUToReplace) { 280 // save fields from existing CU to re-use in the new CU 281 DICompileUnit ExistingCU(CUToReplace); 282 Producer = ExistingCU.getProducer(); 283 IsOptimized = ExistingCU.isOptimized(); 284 Flags = ExistingCU.getFlags(); 285 RuntimeVersion = ExistingCU.getRunTimeVersion(); 286 SplitName = ExistingCU.getSplitDebugFilename(); 287 } else { 288 Producer = 289 "LLVM Version " STR(LLVM_VERSION_MAJOR) "." STR(LLVM_VERSION_MINOR); 290 } 291 292 CUNode = 293 Builder.createCompileUnit(dwarf::DW_LANG_C99, Filename, Directory, 294 Producer, IsOptimized, Flags, RuntimeVersion); 295 296 if (CUToReplace) 297 CUToReplace->replaceAllUsesWith(const_cast<MDNode *>(CUNode)); 298 299 DICompileUnit CU(CUNode); 300 FileNode = Builder.createFile(Filename, Directory); 301 LexicalBlockFileNode = Builder.createLexicalBlockFile(CU, DIFile(FileNode)); 302 } 303 304 /// Returns the MDNode* that represents the DI scope to associate with I 305 MDNode *findScope(const Instruction *I) { 306 const Function *F = I->getParent()->getParent(); 307 if (MDNode *ret = findDISubprogram(F)) 308 return ret; 309 310 DEBUG(dbgs() << "WARNING: Using fallback lexical block file scope " 311 << LexicalBlockFileNode << " as scope for instruction " << I 312 << "\n"); 313 return LexicalBlockFileNode; 314 } 315 316 /// Returns the MDNode* that is the descriptor for F 317 MDNode *findDISubprogram(const Function *F) { 318 typedef ValueMap<const Function *, MDNode *>::const_iterator FuncNodeIter; 319 FuncNodeIter i = SubprogramDescriptors.find(F); 320 if (i != SubprogramDescriptors.end()) 321 return i->second; 322 323 DEBUG(dbgs() << "searching for DI scope node for Function " << F 324 << " in a list of " << Finder.subprogram_count() 325 << " subprogram nodes" 326 << "\n"); 327 328 for (DebugInfoFinder::iterator i = Finder.subprogram_begin(), 329 e = Finder.subprogram_end(); 330 i != e; ++i) { 331 DISubprogram S(*i); 332 if (S.getFunction() == F) { 333 DEBUG(dbgs() << "Found DISubprogram " << *i << " for function " 334 << S.getFunction() << "\n"); 335 return *i; 336 } 337 } 338 DEBUG(dbgs() << "unable to find DISubprogram node for function " 339 << F->getName().str() << "\n"); 340 return 0; 341 } 342 343 /// Sets Line to the line number on which V appears and returns true. If a 344 /// line location for V is not found, returns false. 345 bool findLine(const Value *V, unsigned &Line) { 346 if (LineTable.getLine(V, Line)) 347 return true; 348 349 if (VMap) { 350 Value *mapped = VMap->lookup(V); 351 if (mapped && LineTable.getLine(mapped, Line)) 352 return true; 353 } 354 return false; 355 } 356 357 std::string getTypeName(Type *T) { 358 std::string TypeName; 359 raw_string_ostream TypeStream(TypeName); 360 T->print(TypeStream); 361 TypeStream.flush(); 362 return TypeName; 363 } 364 365 /// Returns the MDNode that represents type T if it is already created, or 0 366 /// if it is not. 367 MDNode *getType(const Type *T) { 368 typedef DenseMap<const Type *, MDNode *>::const_iterator TypeNodeIter; 369 TypeNodeIter i = TypeDescriptors.find(T); 370 if (i != TypeDescriptors.end()) 371 return i->second; 372 return 0; 373 } 374 375 /// Returns a DebugInfo type from an LLVM type T. 376 DIDerivedType getOrCreateType(Type *T) { 377 MDNode *N = getType(T); 378 if (N) 379 return DIDerivedType(N); 380 else if (T->isVoidTy()) 381 return DIDerivedType(0); 382 else if (T->isStructTy()) { 383 N = Builder.createStructType( 384 DIScope(LexicalBlockFileNode), T->getStructName(), DIFile(FileNode), 385 0, Layout.getTypeSizeInBits(T), Layout.getABITypeAlignment(T), 0, 386 DIType(0), DIArray(0)); // filled in later 387 388 // N is added to the map (early) so that element search below can find it, 389 // so as to avoid infinite recursion for structs that contain pointers to 390 // their own type. 391 TypeDescriptors[T] = N; 392 DICompositeType StructDescriptor(N); 393 394 SmallVector<Value *, 4> Elements; 395 for (unsigned i = 0; i < T->getStructNumElements(); ++i) 396 Elements.push_back(getOrCreateType(T->getStructElementType(i))); 397 398 // set struct elements 399 StructDescriptor.setTypeArray(Builder.getOrCreateArray(Elements)); 400 } else if (T->isPointerTy()) { 401 Type *PointeeTy = T->getPointerElementType(); 402 if (!(N = getType(PointeeTy))) 403 N = Builder.createPointerType( 404 getOrCreateType(PointeeTy), Layout.getPointerSizeInBits(), 405 Layout.getPrefTypeAlignment(T), getTypeName(T)); 406 } else if (T->isArrayTy()) { 407 SmallVector<Value *, 1> Subrange; 408 Subrange.push_back( 409 Builder.getOrCreateSubrange(0, T->getArrayNumElements() - 1)); 410 411 N = Builder.createArrayType(Layout.getTypeSizeInBits(T), 412 Layout.getPrefTypeAlignment(T), 413 getOrCreateType(T->getArrayElementType()), 414 Builder.getOrCreateArray(Subrange)); 415 } else { 416 int encoding = llvm::dwarf::DW_ATE_signed; 417 if (T->isIntegerTy()) 418 encoding = llvm::dwarf::DW_ATE_unsigned; 419 else if (T->isFloatingPointTy()) 420 encoding = llvm::dwarf::DW_ATE_float; 421 422 N = Builder.createBasicType(getTypeName(T), T->getPrimitiveSizeInBits(), 423 0, encoding); 424 } 425 TypeDescriptors[T] = N; 426 return DIDerivedType(N); 427 } 428 429 /// Returns a DebugInfo type that represents a function signature for Func. 430 DICompositeType createFunctionSignature(const Function *Func) { 431 SmallVector<Value *, 4> Params; 432 DIDerivedType ReturnType(getOrCreateType(Func->getReturnType())); 433 Params.push_back(ReturnType); 434 435 const Function::ArgumentListType &Args(Func->getArgumentList()); 436 for (Function::ArgumentListType::const_iterator i = Args.begin(), 437 e = Args.end(); 438 i != e; ++i) { 439 Type *T(i->getType()); 440 Params.push_back(getOrCreateType(T)); 441 } 442 443 DIArray ParamArray = Builder.getOrCreateArray(Params); 444 return Builder.createSubroutineType(DIFile(FileNode), ParamArray); 445 } 446 447 /// Associates Instruction I with debug location Loc. 448 void addDebugLocation(Instruction &I, DebugLoc Loc) { 449 MDNode *MD = Loc.getAsMDNode(I.getContext()); 450 I.setMetadata(LLVMContext::MD_dbg, MD); 451 } 452}; 453 454/// Sets Filename/Directory from the Module identifier and returns true, or 455/// false if source information is not present. 456bool getSourceInfoFromModule(const Module &M, std::string &Directory, 457 std::string &Filename) { 458 std::string PathStr(M.getModuleIdentifier()); 459 if (PathStr.length() == 0 || PathStr == "<stdin>") 460 return false; 461 462 Filename = sys::path::filename(PathStr); 463 SmallVector<char, 16> Path(PathStr.begin(), PathStr.end()); 464 sys::path::remove_filename(Path); 465 Directory = StringRef(Path.data(), Path.size()); 466 return true; 467} 468 469// Sets Filename/Directory from debug information in M and returns true, or 470// false if no debug information available, or cannot be parsed. 471bool getSourceInfoFromDI(const Module &M, std::string &Directory, 472 std::string &Filename) { 473 NamedMDNode *CUNode = M.getNamedMetadata("llvm.dbg.cu"); 474 if (!CUNode || CUNode->getNumOperands() == 0) 475 return false; 476 477 DICompileUnit CU(CUNode->getOperand(0)); 478 if (!CU.Verify()) 479 return false; 480 481 Filename = CU.getFilename(); 482 Directory = CU.getDirectory(); 483 return true; 484} 485 486} // anonymous namespace 487 488namespace llvm { 489 490bool DebugIR::getSourceInfo(const Module &M) { 491 ParsedPath = getSourceInfoFromDI(M, Directory, Filename) || 492 getSourceInfoFromModule(M, Directory, Filename); 493 return ParsedPath; 494} 495 496bool DebugIR::updateExtension(StringRef NewExtension) { 497 size_t dot = Filename.find_last_of("."); 498 if (dot == std::string::npos) 499 return false; 500 501 Filename.erase(dot); 502 Filename += NewExtension.str(); 503 return true; 504} 505 506void DebugIR::generateFilename(OwningPtr<int> &fd) { 507 SmallVector<char, 16> PathVec; 508 fd.reset(new int); 509 sys::fs::createTemporaryFile("debug-ir", "ll", *fd, PathVec); 510 StringRef Path(PathVec.data(), PathVec.size()); 511 Filename = sys::path::filename(Path); 512 sys::path::remove_filename(PathVec); 513 Directory = StringRef(PathVec.data(), PathVec.size()); 514 515 GeneratedPath = true; 516} 517 518std::string DebugIR::getPath() { 519 SmallVector<char, 16> Path; 520 sys::path::append(Path, Directory, Filename); 521 Path.resize(Filename.size() + Directory.size() + 2); 522 Path[Filename.size() + Directory.size() + 1] = '\0'; 523 return std::string(Path.data()); 524} 525 526void DebugIR::writeDebugBitcode(const Module *M, int *fd) { 527 OwningPtr<raw_fd_ostream> Out; 528 std::string error; 529 530 if (!fd) { 531 std::string Path = getPath(); 532 Out.reset(new raw_fd_ostream(Path.c_str(), error)); 533 DEBUG(dbgs() << "WRITING debug bitcode from Module " << M << " to file " 534 << Path << "\n"); 535 } else { 536 DEBUG(dbgs() << "WRITING debug bitcode from Module " << M << " to fd " 537 << *fd << "\n"); 538 Out.reset(new raw_fd_ostream(*fd, true)); 539 } 540 541 M->print(*Out, 0); 542 Out->close(); 543} 544 545void DebugIR::createDebugInfo(Module &M, OwningPtr<Module> &DisplayM) { 546 if (M.getFunctionList().size() == 0) 547 // no functions -- no debug info needed 548 return; 549 550 OwningPtr<ValueToValueMapTy> VMap; 551 552 if (WriteSourceToDisk && (HideDebugIntrinsics || HideDebugMetadata)) { 553 VMap.reset(new ValueToValueMapTy); 554 DisplayM.reset(CloneModule(&M, *VMap)); 555 556 if (HideDebugIntrinsics) 557 DebugIntrinsicsRemover::process(*DisplayM); 558 559 if (HideDebugMetadata) 560 DebugMetadataRemover::process(*DisplayM); 561 } 562 563 DIUpdater R(M, Filename, Directory, DisplayM.get(), VMap.get()); 564} 565 566bool DebugIR::isMissingPath() { return Filename.empty() || Directory.empty(); } 567 568bool DebugIR::runOnModule(Module &M) { 569 OwningPtr<int> fd; 570 571 if (isMissingPath() && !getSourceInfo(M)) { 572 if (!WriteSourceToDisk) 573 report_fatal_error("DebugIR unable to determine file name in input. " 574 "Ensure Module contains an identifier, a valid " 575 "DICompileUnit, or construct DebugIR with " 576 "non-empty Filename/Directory parameters."); 577 else 578 generateFilename(fd); 579 } 580 581 if (!GeneratedPath && WriteSourceToDisk) 582 updateExtension(".debug-ll"); 583 584 // Clear line numbers. Keep debug info (if any) if we were able to read the 585 // file name from the DICompileUnit descriptor. 586 DebugMetadataRemover::process(M, !ParsedPath); 587 588 OwningPtr<Module> DisplayM; 589 createDebugInfo(M, DisplayM); 590 if (WriteSourceToDisk) { 591 Module *OutputM = DisplayM.get() ? DisplayM.get() : &M; 592 writeDebugBitcode(OutputM, fd.get()); 593 } 594 595 DEBUG(M.dump()); 596 return true; 597} 598 599bool DebugIR::runOnModule(Module &M, std::string &Path) { 600 bool result = runOnModule(M); 601 Path = getPath(); 602 return result; 603} 604 605} // llvm namespace 606 607char DebugIR::ID = 0; 608INITIALIZE_PASS(DebugIR, "debug-ir", "Enable debugging IR", false, false) 609 610ModulePass *llvm::createDebugIRPass(bool HideDebugIntrinsics, 611 bool HideDebugMetadata, StringRef Directory, 612 StringRef Filename) { 613 return new DebugIR(HideDebugIntrinsics, HideDebugMetadata, Directory, 614 Filename); 615} 616 617ModulePass *llvm::createDebugIRPass() { return new DebugIR(); } 618