DebugIR.cpp revision 36b56886974eae4f9c5ebc96befd3e7bfe5de338
1//===--- DebugIR.cpp - Transform debug metadata to allow debugging IR -----===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// A Module transform pass that emits a succinct version of the IR and replaces
11// the source file metadata to allow debuggers to step through the IR.
12//
13// FIXME: instead of replacing debug metadata, this pass should allow for
14// additional metadata to be used to point capable debuggers to the IR file
15// without destroying the mapping to the original source file.
16//
17//===----------------------------------------------------------------------===//
18
19#define DEBUG_TYPE "debug-ir"
20
21#include "llvm/IR/ValueMap.h"
22#include "DebugIR.h"
23#include "llvm/IR/AssemblyAnnotationWriter.h"
24#include "llvm/IR/DIBuilder.h"
25#include "llvm/IR/DataLayout.h"
26#include "llvm/IR/DebugInfo.h"
27#include "llvm/IR/InstVisitor.h"
28#include "llvm/IR/Instruction.h"
29#include "llvm/IR/LLVMContext.h"
30#include "llvm/IR/Module.h"
31#include "llvm/Support/Debug.h"
32#include "llvm/Support/FileSystem.h"
33#include "llvm/Support/FormattedStream.h"
34#include "llvm/Support/Path.h"
35#include "llvm/Support/ToolOutputFile.h"
36#include "llvm/Transforms/Instrumentation.h"
37#include "llvm/Transforms/Utils/Cloning.h"
38#include <string>
39
40#define STR_HELPER(x) #x
41#define STR(x) STR_HELPER(x)
42
43using namespace llvm;
44
45namespace {
46
47/// Builds a map of Value* to line numbers on which the Value appears in a
48/// textual representation of the IR by plugging into the AssemblyWriter by
49/// masquerading as an AssemblyAnnotationWriter.
50class ValueToLineMap : public AssemblyAnnotationWriter {
51  ValueMap<const Value *, unsigned int> Lines;
52  typedef ValueMap<const Value *, unsigned int>::const_iterator LineIter;
53
54  void addEntry(const Value *V, formatted_raw_ostream &Out) {
55    Out.flush();
56    Lines.insert(std::make_pair(V, Out.getLine() + 1));
57  }
58
59public:
60
61  /// Prints Module to a null buffer in order to build the map of Value pointers
62  /// to line numbers.
63  ValueToLineMap(const Module *M) {
64    raw_null_ostream ThrowAway;
65    M->print(ThrowAway, this);
66  }
67
68  // This function is called after an Instruction, GlobalValue, or GlobalAlias
69  // is printed.
70  void printInfoComment(const Value &V, formatted_raw_ostream &Out) override {
71    addEntry(&V, Out);
72  }
73
74  void emitFunctionAnnot(const Function *F,
75                         formatted_raw_ostream &Out) override {
76    addEntry(F, Out);
77  }
78
79  /// If V appears on a line in the textual IR representation, sets Line to the
80  /// line number and returns true, otherwise returns false.
81  bool getLine(const Value *V, unsigned int &Line) const {
82    LineIter i = Lines.find(V);
83    if (i != Lines.end()) {
84      Line = i->second;
85      return true;
86    }
87    return false;
88  }
89};
90
91/// Removes debug intrisncs like llvm.dbg.declare and llvm.dbg.value.
92class DebugIntrinsicsRemover : public InstVisitor<DebugIntrinsicsRemover> {
93  void remove(Instruction &I) { I.eraseFromParent(); }
94
95public:
96  static void process(Module &M) {
97    DebugIntrinsicsRemover Remover;
98    Remover.visit(&M);
99  }
100  void visitDbgDeclareInst(DbgDeclareInst &I) { remove(I); }
101  void visitDbgValueInst(DbgValueInst &I) { remove(I); }
102  void visitDbgInfoIntrinsic(DbgInfoIntrinsic &I) { remove(I); }
103};
104
105/// Removes debug metadata (!dbg) nodes from all instructions, and optionally
106/// metadata named "llvm.dbg.cu" if RemoveNamedInfo is true.
107class DebugMetadataRemover : public InstVisitor<DebugMetadataRemover> {
108  bool RemoveNamedInfo;
109
110public:
111  static void process(Module &M, bool RemoveNamedInfo = true) {
112    DebugMetadataRemover Remover(RemoveNamedInfo);
113    Remover.run(&M);
114  }
115
116  DebugMetadataRemover(bool RemoveNamedInfo)
117      : RemoveNamedInfo(RemoveNamedInfo) {}
118
119  void visitInstruction(Instruction &I) {
120    if (I.getMetadata(LLVMContext::MD_dbg))
121      I.setMetadata(LLVMContext::MD_dbg, 0);
122  }
123
124  void run(Module *M) {
125    // Remove debug metadata attached to instructions
126    visit(M);
127
128    if (RemoveNamedInfo) {
129      // Remove CU named metadata (and all children nodes)
130      NamedMDNode *Node = M->getNamedMetadata("llvm.dbg.cu");
131      if (Node)
132        M->eraseNamedMetadata(Node);
133    }
134  }
135};
136
137/// Updates debug metadata in a Module:
138///   - changes Filename/Directory to values provided on construction
139///   - adds/updates line number (DebugLoc) entries associated with each
140///     instruction to reflect the instruction's location in an LLVM IR file
141class DIUpdater : public InstVisitor<DIUpdater> {
142  /// Builder of debug information
143  DIBuilder Builder;
144
145  /// Helper for type attributes/sizes/etc
146  DataLayout Layout;
147
148  /// Map of Value* to line numbers
149  const ValueToLineMap LineTable;
150
151  /// Map of Value* (in original Module) to Value* (in optional cloned Module)
152  const ValueToValueMapTy *VMap;
153
154  /// Directory of debug metadata
155  DebugInfoFinder Finder;
156
157  /// Source filename and directory
158  StringRef Filename;
159  StringRef Directory;
160
161  // CU nodes needed when creating DI subprograms
162  MDNode *FileNode;
163  MDNode *LexicalBlockFileNode;
164  const MDNode *CUNode;
165
166  ValueMap<const Function *, MDNode *> SubprogramDescriptors;
167  DenseMap<const Type *, MDNode *> TypeDescriptors;
168
169public:
170  DIUpdater(Module &M, StringRef Filename = StringRef(),
171            StringRef Directory = StringRef(), const Module *DisplayM = 0,
172            const ValueToValueMapTy *VMap = 0)
173      : Builder(M), Layout(&M), LineTable(DisplayM ? DisplayM : &M), VMap(VMap),
174        Finder(), Filename(Filename), Directory(Directory), FileNode(0),
175        LexicalBlockFileNode(0), CUNode(0) {
176    Finder.processModule(M);
177    visit(&M);
178  }
179
180  ~DIUpdater() { Builder.finalize(); }
181
182  void visitModule(Module &M) {
183    if (Finder.compile_unit_count() > 1)
184      report_fatal_error("DebugIR pass supports only a signle compile unit per "
185                         "Module.");
186    createCompileUnit(Finder.compile_unit_count() == 1 ?
187                      (MDNode*)*Finder.compile_units().begin() : 0);
188  }
189
190  void visitFunction(Function &F) {
191    if (F.isDeclaration() || findDISubprogram(&F))
192      return;
193
194    StringRef MangledName = F.getName();
195    DICompositeType Sig = createFunctionSignature(&F);
196
197    // find line of function declaration
198    unsigned Line = 0;
199    if (!findLine(&F, Line)) {
200      DEBUG(dbgs() << "WARNING: No line for Function " << F.getName().str()
201                   << "\n");
202      return;
203    }
204
205    Instruction *FirstInst = F.begin()->begin();
206    unsigned ScopeLine = 0;
207    if (!findLine(FirstInst, ScopeLine)) {
208      DEBUG(dbgs() << "WARNING: No line for 1st Instruction in Function "
209                   << F.getName().str() << "\n");
210      return;
211    }
212
213    bool Local = F.hasInternalLinkage();
214    bool IsDefinition = !F.isDeclaration();
215    bool IsOptimized = false;
216
217    int FuncFlags = llvm::DIDescriptor::FlagPrototyped;
218    assert(CUNode && FileNode);
219    DISubprogram Sub = Builder.createFunction(
220        DICompileUnit(CUNode), F.getName(), MangledName, DIFile(FileNode), Line,
221        Sig, Local, IsDefinition, ScopeLine, FuncFlags, IsOptimized, &F);
222    assert(Sub.isSubprogram());
223    DEBUG(dbgs() << "create subprogram mdnode " << *Sub << ": "
224                 << "\n");
225
226    SubprogramDescriptors.insert(std::make_pair(&F, Sub));
227  }
228
229  void visitInstruction(Instruction &I) {
230    DebugLoc Loc(I.getDebugLoc());
231
232    /// If a ValueToValueMap is provided, use it to get the real instruction as
233    /// the line table was generated on a clone of the module on which we are
234    /// operating.
235    Value *RealInst = 0;
236    if (VMap)
237      RealInst = VMap->lookup(&I);
238
239    if (!RealInst)
240      RealInst = &I;
241
242    unsigned Col = 0; // FIXME: support columns
243    unsigned Line;
244    if (!LineTable.getLine(RealInst, Line)) {
245      // Instruction has no line, it may have been removed (in the module that
246      // will be passed to the debugger) so there is nothing to do here.
247      DEBUG(dbgs() << "WARNING: no LineTable entry for instruction " << RealInst
248                   << "\n");
249      DEBUG(RealInst->dump());
250      return;
251    }
252
253    DebugLoc NewLoc;
254    if (!Loc.isUnknown())
255      // I had a previous debug location: re-use the DebugLoc
256      NewLoc = DebugLoc::get(Line, Col, Loc.getScope(RealInst->getContext()),
257                             Loc.getInlinedAt(RealInst->getContext()));
258    else if (MDNode *scope = findScope(&I))
259      NewLoc = DebugLoc::get(Line, Col, scope, 0);
260    else {
261      DEBUG(dbgs() << "WARNING: no valid scope for instruction " << &I
262                   << ". no DebugLoc will be present."
263                   << "\n");
264      return;
265    }
266
267    addDebugLocation(I, NewLoc);
268  }
269
270private:
271
272  void createCompileUnit(MDNode *CUToReplace) {
273    std::string Flags;
274    bool IsOptimized = false;
275    StringRef Producer;
276    unsigned RuntimeVersion(0);
277    StringRef SplitName;
278
279    if (CUToReplace) {
280      // save fields from existing CU to re-use in the new CU
281      DICompileUnit ExistingCU(CUToReplace);
282      Producer = ExistingCU.getProducer();
283      IsOptimized = ExistingCU.isOptimized();
284      Flags = ExistingCU.getFlags();
285      RuntimeVersion = ExistingCU.getRunTimeVersion();
286      SplitName = ExistingCU.getSplitDebugFilename();
287    } else {
288      Producer =
289          "LLVM Version " STR(LLVM_VERSION_MAJOR) "." STR(LLVM_VERSION_MINOR);
290    }
291
292    CUNode =
293        Builder.createCompileUnit(dwarf::DW_LANG_C99, Filename, Directory,
294                                  Producer, IsOptimized, Flags, RuntimeVersion);
295
296    if (CUToReplace)
297      CUToReplace->replaceAllUsesWith(const_cast<MDNode *>(CUNode));
298
299    DICompileUnit CU(CUNode);
300    FileNode = Builder.createFile(Filename, Directory);
301    LexicalBlockFileNode = Builder.createLexicalBlockFile(CU, DIFile(FileNode));
302  }
303
304  /// Returns the MDNode* that represents the DI scope to associate with I
305  MDNode *findScope(const Instruction *I) {
306    const Function *F = I->getParent()->getParent();
307    if (MDNode *ret = findDISubprogram(F))
308      return ret;
309
310    DEBUG(dbgs() << "WARNING: Using fallback lexical block file scope "
311                 << LexicalBlockFileNode << " as scope for instruction " << I
312                 << "\n");
313    return LexicalBlockFileNode;
314  }
315
316  /// Returns the MDNode* that is the descriptor for F
317  MDNode *findDISubprogram(const Function *F) {
318    typedef ValueMap<const Function *, MDNode *>::const_iterator FuncNodeIter;
319    FuncNodeIter i = SubprogramDescriptors.find(F);
320    if (i != SubprogramDescriptors.end())
321      return i->second;
322
323    DEBUG(dbgs() << "searching for DI scope node for Function " << F
324                 << " in a list of " << Finder.subprogram_count()
325                 << " subprogram nodes"
326                 << "\n");
327
328    for (DISubprogram S : Finder.subprograms()) {
329      if (S.getFunction() == F) {
330        DEBUG(dbgs() << "Found DISubprogram " << S << " for function "
331                     << S.getFunction() << "\n");
332        return S;
333      }
334    }
335    DEBUG(dbgs() << "unable to find DISubprogram node for function "
336                 << F->getName().str() << "\n");
337    return 0;
338  }
339
340  /// Sets Line to the line number on which V appears and returns true. If a
341  /// line location for V is not found, returns false.
342  bool findLine(const Value *V, unsigned &Line) {
343    if (LineTable.getLine(V, Line))
344      return true;
345
346    if (VMap) {
347      Value *mapped = VMap->lookup(V);
348      if (mapped && LineTable.getLine(mapped, Line))
349        return true;
350    }
351    return false;
352  }
353
354  std::string getTypeName(Type *T) {
355    std::string TypeName;
356    raw_string_ostream TypeStream(TypeName);
357    T->print(TypeStream);
358    TypeStream.flush();
359    return TypeName;
360  }
361
362  /// Returns the MDNode that represents type T if it is already created, or 0
363  /// if it is not.
364  MDNode *getType(const Type *T) {
365    typedef DenseMap<const Type *, MDNode *>::const_iterator TypeNodeIter;
366    TypeNodeIter i = TypeDescriptors.find(T);
367    if (i != TypeDescriptors.end())
368      return i->second;
369    return 0;
370  }
371
372  /// Returns a DebugInfo type from an LLVM type T.
373  DIDerivedType getOrCreateType(Type *T) {
374    MDNode *N = getType(T);
375    if (N)
376      return DIDerivedType(N);
377    else if (T->isVoidTy())
378      return DIDerivedType(0);
379    else if (T->isStructTy()) {
380      N = Builder.createStructType(
381          DIScope(LexicalBlockFileNode), T->getStructName(), DIFile(FileNode),
382          0, Layout.getTypeSizeInBits(T), Layout.getABITypeAlignment(T), 0,
383          DIType(0), DIArray(0)); // filled in later
384
385      // N is added to the map (early) so that element search below can find it,
386      // so as to avoid infinite recursion for structs that contain pointers to
387      // their own type.
388      TypeDescriptors[T] = N;
389      DICompositeType StructDescriptor(N);
390
391      SmallVector<Value *, 4> Elements;
392      for (unsigned i = 0; i < T->getStructNumElements(); ++i)
393        Elements.push_back(getOrCreateType(T->getStructElementType(i)));
394
395      // set struct elements
396      StructDescriptor.setTypeArray(Builder.getOrCreateArray(Elements));
397    } else if (T->isPointerTy()) {
398      Type *PointeeTy = T->getPointerElementType();
399      if (!(N = getType(PointeeTy)))
400        N = Builder.createPointerType(
401            getOrCreateType(PointeeTy), Layout.getPointerTypeSizeInBits(T),
402            Layout.getPrefTypeAlignment(T), getTypeName(T));
403    } else if (T->isArrayTy()) {
404      SmallVector<Value *, 1> Subrange;
405      Subrange.push_back(
406          Builder.getOrCreateSubrange(0, T->getArrayNumElements() - 1));
407
408      N = Builder.createArrayType(Layout.getTypeSizeInBits(T),
409                                  Layout.getPrefTypeAlignment(T),
410                                  getOrCreateType(T->getArrayElementType()),
411                                  Builder.getOrCreateArray(Subrange));
412    } else {
413      int encoding = llvm::dwarf::DW_ATE_signed;
414      if (T->isIntegerTy())
415        encoding = llvm::dwarf::DW_ATE_unsigned;
416      else if (T->isFloatingPointTy())
417        encoding = llvm::dwarf::DW_ATE_float;
418
419      N = Builder.createBasicType(getTypeName(T), T->getPrimitiveSizeInBits(),
420                                  0, encoding);
421    }
422    TypeDescriptors[T] = N;
423    return DIDerivedType(N);
424  }
425
426  /// Returns a DebugInfo type that represents a function signature for Func.
427  DICompositeType createFunctionSignature(const Function *Func) {
428    SmallVector<Value *, 4> Params;
429    DIDerivedType ReturnType(getOrCreateType(Func->getReturnType()));
430    Params.push_back(ReturnType);
431
432    const Function::ArgumentListType &Args(Func->getArgumentList());
433    for (Function::ArgumentListType::const_iterator i = Args.begin(),
434                                                    e = Args.end();
435         i != e; ++i) {
436      Type *T(i->getType());
437      Params.push_back(getOrCreateType(T));
438    }
439
440    DIArray ParamArray = Builder.getOrCreateArray(Params);
441    return Builder.createSubroutineType(DIFile(FileNode), ParamArray);
442  }
443
444  /// Associates Instruction I with debug location Loc.
445  void addDebugLocation(Instruction &I, DebugLoc Loc) {
446    MDNode *MD = Loc.getAsMDNode(I.getContext());
447    I.setMetadata(LLVMContext::MD_dbg, MD);
448  }
449};
450
451/// Sets Filename/Directory from the Module identifier and returns true, or
452/// false if source information is not present.
453bool getSourceInfoFromModule(const Module &M, std::string &Directory,
454                             std::string &Filename) {
455  std::string PathStr(M.getModuleIdentifier());
456  if (PathStr.length() == 0 || PathStr == "<stdin>")
457    return false;
458
459  Filename = sys::path::filename(PathStr);
460  SmallVector<char, 16> Path(PathStr.begin(), PathStr.end());
461  sys::path::remove_filename(Path);
462  Directory = StringRef(Path.data(), Path.size());
463  return true;
464}
465
466// Sets Filename/Directory from debug information in M and returns true, or
467// false if no debug information available, or cannot be parsed.
468bool getSourceInfoFromDI(const Module &M, std::string &Directory,
469                         std::string &Filename) {
470  NamedMDNode *CUNode = M.getNamedMetadata("llvm.dbg.cu");
471  if (!CUNode || CUNode->getNumOperands() == 0)
472    return false;
473
474  DICompileUnit CU(CUNode->getOperand(0));
475  if (!CU.Verify())
476    return false;
477
478  Filename = CU.getFilename();
479  Directory = CU.getDirectory();
480  return true;
481}
482
483} // anonymous namespace
484
485namespace llvm {
486
487bool DebugIR::getSourceInfo(const Module &M) {
488  ParsedPath = getSourceInfoFromDI(M, Directory, Filename) ||
489               getSourceInfoFromModule(M, Directory, Filename);
490  return ParsedPath;
491}
492
493bool DebugIR::updateExtension(StringRef NewExtension) {
494  size_t dot = Filename.find_last_of(".");
495  if (dot == std::string::npos)
496    return false;
497
498  Filename.erase(dot);
499  Filename += NewExtension.str();
500  return true;
501}
502
503void DebugIR::generateFilename(std::unique_ptr<int> &fd) {
504  SmallVector<char, 16> PathVec;
505  fd.reset(new int);
506  sys::fs::createTemporaryFile("debug-ir", "ll", *fd, PathVec);
507  StringRef Path(PathVec.data(), PathVec.size());
508  Filename = sys::path::filename(Path);
509  sys::path::remove_filename(PathVec);
510  Directory = StringRef(PathVec.data(), PathVec.size());
511
512  GeneratedPath = true;
513}
514
515std::string DebugIR::getPath() {
516  SmallVector<char, 16> Path;
517  sys::path::append(Path, Directory, Filename);
518  Path.resize(Filename.size() + Directory.size() + 2);
519  Path[Filename.size() + Directory.size() + 1] = '\0';
520  return std::string(Path.data());
521}
522
523void DebugIR::writeDebugBitcode(const Module *M, int *fd) {
524  std::unique_ptr<raw_fd_ostream> Out;
525  std::string error;
526
527  if (!fd) {
528    std::string Path = getPath();
529    Out.reset(new raw_fd_ostream(Path.c_str(), error, sys::fs::F_Text));
530    DEBUG(dbgs() << "WRITING debug bitcode from Module " << M << " to file "
531                 << Path << "\n");
532  } else {
533    DEBUG(dbgs() << "WRITING debug bitcode from Module " << M << " to fd "
534                 << *fd << "\n");
535    Out.reset(new raw_fd_ostream(*fd, true));
536  }
537
538  M->print(*Out, 0);
539  Out->close();
540}
541
542void DebugIR::createDebugInfo(Module &M, std::unique_ptr<Module> &DisplayM) {
543  if (M.getFunctionList().size() == 0)
544    // no functions -- no debug info needed
545    return;
546
547  std::unique_ptr<ValueToValueMapTy> VMap;
548
549  if (WriteSourceToDisk && (HideDebugIntrinsics || HideDebugMetadata)) {
550    VMap.reset(new ValueToValueMapTy);
551    DisplayM.reset(CloneModule(&M, *VMap));
552
553    if (HideDebugIntrinsics)
554      DebugIntrinsicsRemover::process(*DisplayM);
555
556    if (HideDebugMetadata)
557      DebugMetadataRemover::process(*DisplayM);
558  }
559
560  DIUpdater R(M, Filename, Directory, DisplayM.get(), VMap.get());
561}
562
563bool DebugIR::isMissingPath() { return Filename.empty() || Directory.empty(); }
564
565bool DebugIR::runOnModule(Module &M) {
566  std::unique_ptr<int> fd;
567
568  if (isMissingPath() && !getSourceInfo(M)) {
569    if (!WriteSourceToDisk)
570      report_fatal_error("DebugIR unable to determine file name in input. "
571                         "Ensure Module contains an identifier, a valid "
572                         "DICompileUnit, or construct DebugIR with "
573                         "non-empty Filename/Directory parameters.");
574    else
575      generateFilename(fd);
576  }
577
578  if (!GeneratedPath && WriteSourceToDisk)
579    updateExtension(".debug-ll");
580
581  // Clear line numbers. Keep debug info (if any) if we were able to read the
582  // file name from the DICompileUnit descriptor.
583  DebugMetadataRemover::process(M, !ParsedPath);
584
585  std::unique_ptr<Module> DisplayM;
586  createDebugInfo(M, DisplayM);
587  if (WriteSourceToDisk) {
588    Module *OutputM = DisplayM.get() ? DisplayM.get() : &M;
589    writeDebugBitcode(OutputM, fd.get());
590  }
591
592  DEBUG(M.dump());
593  return true;
594}
595
596bool DebugIR::runOnModule(Module &M, std::string &Path) {
597  bool result = runOnModule(M);
598  Path = getPath();
599  return result;
600}
601
602} // llvm namespace
603
604char DebugIR::ID = 0;
605INITIALIZE_PASS(DebugIR, "debug-ir", "Enable debugging IR", false, false)
606
607ModulePass *llvm::createDebugIRPass(bool HideDebugIntrinsics,
608                                    bool HideDebugMetadata, StringRef Directory,
609                                    StringRef Filename) {
610  return new DebugIR(HideDebugIntrinsics, HideDebugMetadata, Directory,
611                     Filename);
612}
613
614ModulePass *llvm::createDebugIRPass() { return new DebugIR(); }
615