1//===-- llvm/lib/CodeGen/AsmPrinter/WinCodeViewLineTables.cpp --*- C++ -*--===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains support for writing line tables info into COFF files.
11//
12//===----------------------------------------------------------------------===//
13
14#include "WinCodeViewLineTables.h"
15#include "llvm/MC/MCExpr.h"
16#include "llvm/MC/MCSymbol.h"
17#include "llvm/Support/COFF.h"
18
19namespace llvm {
20
21StringRef WinCodeViewLineTables::getFullFilepath(const MDNode *S) {
22  assert(S);
23  assert((isa<DICompileUnit>(S) || isa<DIFile>(S) || isa<DISubprogram>(S) ||
24          isa<DILexicalBlockBase>(S)) &&
25         "Unexpected scope info");
26
27  auto *Scope = cast<DIScope>(S);
28  StringRef Dir = Scope->getDirectory(),
29            Filename = Scope->getFilename();
30  std::string &Filepath =
31      DirAndFilenameToFilepathMap[std::make_pair(Dir, Filename)];
32  if (!Filepath.empty())
33    return Filepath;
34
35  // Clang emits directory and relative filename info into the IR, but CodeView
36  // operates on full paths.  We could change Clang to emit full paths too, but
37  // that would increase the IR size and probably not needed for other users.
38  // For now, just concatenate and canonicalize the path here.
39  if (Filename.find(':') == 1)
40    Filepath = Filename;
41  else
42    Filepath = (Dir + "\\" + Filename).str();
43
44  // Canonicalize the path.  We have to do it textually because we may no longer
45  // have access the file in the filesystem.
46  // First, replace all slashes with backslashes.
47  std::replace(Filepath.begin(), Filepath.end(), '/', '\\');
48
49  // Remove all "\.\" with "\".
50  size_t Cursor = 0;
51  while ((Cursor = Filepath.find("\\.\\", Cursor)) != std::string::npos)
52    Filepath.erase(Cursor, 2);
53
54  // Replace all "\XXX\..\" with "\".  Don't try too hard though as the original
55  // path should be well-formatted, e.g. start with a drive letter, etc.
56  Cursor = 0;
57  while ((Cursor = Filepath.find("\\..\\", Cursor)) != std::string::npos) {
58    // Something's wrong if the path starts with "\..\", abort.
59    if (Cursor == 0)
60      break;
61
62    size_t PrevSlash = Filepath.rfind('\\', Cursor - 1);
63    if (PrevSlash == std::string::npos)
64      // Something's wrong, abort.
65      break;
66
67    Filepath.erase(PrevSlash, Cursor + 3 - PrevSlash);
68    // The next ".." might be following the one we've just erased.
69    Cursor = PrevSlash;
70  }
71
72  // Remove all duplicate backslashes.
73  Cursor = 0;
74  while ((Cursor = Filepath.find("\\\\", Cursor)) != std::string::npos)
75    Filepath.erase(Cursor, 1);
76
77  return Filepath;
78}
79
80void WinCodeViewLineTables::maybeRecordLocation(DebugLoc DL,
81                                                const MachineFunction *MF) {
82  const MDNode *Scope = DL.getScope();
83  if (!Scope)
84    return;
85  StringRef Filename = getFullFilepath(Scope);
86
87  // Skip this instruction if it has the same file:line as the previous one.
88  assert(CurFn);
89  if (!CurFn->Instrs.empty()) {
90    const InstrInfoTy &LastInstr = InstrInfo[CurFn->Instrs.back()];
91    if (LastInstr.Filename == Filename && LastInstr.LineNumber == DL.getLine())
92      return;
93  }
94  FileNameRegistry.add(Filename);
95
96  MCSymbol *MCL = Asm->MMI->getContext().createTempSymbol();
97  Asm->OutStreamer->EmitLabel(MCL);
98  CurFn->Instrs.push_back(MCL);
99  InstrInfo[MCL] = InstrInfoTy(Filename, DL.getLine(), DL.getCol());
100}
101
102WinCodeViewLineTables::WinCodeViewLineTables(AsmPrinter *AP)
103    : Asm(nullptr), CurFn(nullptr) {
104  MachineModuleInfo *MMI = AP->MMI;
105
106  // If module doesn't have named metadata anchors or COFF debug section
107  // is not available, skip any debug info related stuff.
108  if (!MMI->getModule()->getNamedMetadata("llvm.dbg.cu") ||
109      !AP->getObjFileLowering().getCOFFDebugSymbolsSection())
110    return;
111
112  // Tell MMI that we have debug info.
113  MMI->setDebugInfoAvailability(true);
114  Asm = AP;
115}
116
117void WinCodeViewLineTables::endModule() {
118  if (FnDebugInfo.empty())
119    return;
120
121  assert(Asm != nullptr);
122  Asm->OutStreamer->SwitchSection(
123      Asm->getObjFileLowering().getCOFFDebugSymbolsSection());
124  Asm->EmitInt32(COFF::DEBUG_SECTION_MAGIC);
125
126  // The COFF .debug$S section consists of several subsections, each starting
127  // with a 4-byte control code (e.g. 0xF1, 0xF2, etc) and then a 4-byte length
128  // of the payload followed by the payload itself.  The subsections are 4-byte
129  // aligned.
130
131  // Emit per-function debug information.  This code is extracted into a
132  // separate function for readability.
133  for (size_t I = 0, E = VisitedFunctions.size(); I != E; ++I)
134    emitDebugInfoForFunction(VisitedFunctions[I]);
135
136  // This subsection holds a file index to offset in string table table.
137  Asm->OutStreamer->AddComment("File index to string table offset subsection");
138  Asm->EmitInt32(COFF::DEBUG_INDEX_SUBSECTION);
139  size_t NumFilenames = FileNameRegistry.Infos.size();
140  Asm->EmitInt32(8 * NumFilenames);
141  for (size_t I = 0, E = FileNameRegistry.Filenames.size(); I != E; ++I) {
142    StringRef Filename = FileNameRegistry.Filenames[I];
143    // For each unique filename, just write its offset in the string table.
144    Asm->EmitInt32(FileNameRegistry.Infos[Filename].StartOffset);
145    // The function name offset is not followed by any additional data.
146    Asm->EmitInt32(0);
147  }
148
149  // This subsection holds the string table.
150  Asm->OutStreamer->AddComment("String table");
151  Asm->EmitInt32(COFF::DEBUG_STRING_TABLE_SUBSECTION);
152  Asm->EmitInt32(FileNameRegistry.LastOffset);
153  // The payload starts with a null character.
154  Asm->EmitInt8(0);
155
156  for (size_t I = 0, E = FileNameRegistry.Filenames.size(); I != E; ++I) {
157    // Just emit unique filenames one by one, separated by a null character.
158    Asm->OutStreamer->EmitBytes(FileNameRegistry.Filenames[I]);
159    Asm->EmitInt8(0);
160  }
161
162  // No more subsections. Fill with zeros to align the end of the section by 4.
163  Asm->OutStreamer->EmitFill((-FileNameRegistry.LastOffset) % 4, 0);
164
165  clear();
166}
167
168static void EmitLabelDiff(MCStreamer &Streamer,
169                          const MCSymbol *From, const MCSymbol *To,
170                          unsigned int Size = 4) {
171  MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None;
172  MCContext &Context = Streamer.getContext();
173  const MCExpr *FromRef = MCSymbolRefExpr::create(From, Variant, Context),
174               *ToRef   = MCSymbolRefExpr::create(To, Variant, Context);
175  const MCExpr *AddrDelta =
176      MCBinaryExpr::create(MCBinaryExpr::Sub, ToRef, FromRef, Context);
177  Streamer.EmitValue(AddrDelta, Size);
178}
179
180void WinCodeViewLineTables::emitDebugInfoForFunction(const Function *GV) {
181  // For each function there is a separate subsection
182  // which holds the PC to file:line table.
183  const MCSymbol *Fn = Asm->getSymbol(GV);
184  assert(Fn);
185
186  const FunctionInfo &FI = FnDebugInfo[GV];
187  if (FI.Instrs.empty())
188    return;
189  assert(FI.End && "Don't know where the function ends?");
190
191  StringRef GVName = GV->getName();
192  StringRef FuncName;
193  if (auto *SP = getDISubprogram(GV))
194    FuncName = SP->getDisplayName();
195
196  // FIXME Clang currently sets DisplayName to "bar" for a C++
197  // "namespace_foo::bar" function, see PR21528.  Luckily, dbghelp.dll is trying
198  // to demangle display names anyways, so let's just put a mangled name into
199  // the symbols subsection until Clang gives us what we need.
200  if (GVName.startswith("\01?"))
201    FuncName = GVName.substr(1);
202  // Emit a symbol subsection, required by VS2012+ to find function boundaries.
203  MCSymbol *SymbolsBegin = Asm->MMI->getContext().createTempSymbol(),
204           *SymbolsEnd = Asm->MMI->getContext().createTempSymbol();
205  Asm->OutStreamer->AddComment("Symbol subsection for " + Twine(FuncName));
206  Asm->EmitInt32(COFF::DEBUG_SYMBOL_SUBSECTION);
207  EmitLabelDiff(*Asm->OutStreamer, SymbolsBegin, SymbolsEnd);
208  Asm->OutStreamer->EmitLabel(SymbolsBegin);
209  {
210    MCSymbol *ProcSegmentBegin = Asm->MMI->getContext().createTempSymbol(),
211             *ProcSegmentEnd = Asm->MMI->getContext().createTempSymbol();
212    EmitLabelDiff(*Asm->OutStreamer, ProcSegmentBegin, ProcSegmentEnd, 2);
213    Asm->OutStreamer->EmitLabel(ProcSegmentBegin);
214
215    Asm->EmitInt16(COFF::DEBUG_SYMBOL_TYPE_PROC_START);
216    // Some bytes of this segment don't seem to be required for basic debugging,
217    // so just fill them with zeroes.
218    Asm->OutStreamer->EmitFill(12, 0);
219    // This is the important bit that tells the debugger where the function
220    // code is located and what's its size:
221    EmitLabelDiff(*Asm->OutStreamer, Fn, FI.End);
222    Asm->OutStreamer->EmitFill(12, 0);
223    Asm->OutStreamer->EmitCOFFSecRel32(Fn);
224    Asm->OutStreamer->EmitCOFFSectionIndex(Fn);
225    Asm->EmitInt8(0);
226    // Emit the function display name as a null-terminated string.
227    Asm->OutStreamer->EmitBytes(FuncName);
228    Asm->EmitInt8(0);
229    Asm->OutStreamer->EmitLabel(ProcSegmentEnd);
230
231    // We're done with this function.
232    Asm->EmitInt16(0x0002);
233    Asm->EmitInt16(COFF::DEBUG_SYMBOL_TYPE_PROC_END);
234  }
235  Asm->OutStreamer->EmitLabel(SymbolsEnd);
236  // Every subsection must be aligned to a 4-byte boundary.
237  Asm->OutStreamer->EmitFill((-FuncName.size()) % 4, 0);
238
239  // PCs/Instructions are grouped into segments sharing the same filename.
240  // Pre-calculate the lengths (in instructions) of these segments and store
241  // them in a map for convenience.  Each index in the map is the sequential
242  // number of the respective instruction that starts a new segment.
243  DenseMap<size_t, size_t> FilenameSegmentLengths;
244  size_t LastSegmentEnd = 0;
245  StringRef PrevFilename = InstrInfo[FI.Instrs[0]].Filename;
246  for (size_t J = 1, F = FI.Instrs.size(); J != F; ++J) {
247    if (PrevFilename == InstrInfo[FI.Instrs[J]].Filename)
248      continue;
249    FilenameSegmentLengths[LastSegmentEnd] = J - LastSegmentEnd;
250    LastSegmentEnd = J;
251    PrevFilename = InstrInfo[FI.Instrs[J]].Filename;
252  }
253  FilenameSegmentLengths[LastSegmentEnd] = FI.Instrs.size() - LastSegmentEnd;
254
255  // Emit a line table subsection, required to do PC-to-file:line lookup.
256  Asm->OutStreamer->AddComment("Line table subsection for " + Twine(FuncName));
257  Asm->EmitInt32(COFF::DEBUG_LINE_TABLE_SUBSECTION);
258  MCSymbol *LineTableBegin = Asm->MMI->getContext().createTempSymbol(),
259           *LineTableEnd = Asm->MMI->getContext().createTempSymbol();
260  EmitLabelDiff(*Asm->OutStreamer, LineTableBegin, LineTableEnd);
261  Asm->OutStreamer->EmitLabel(LineTableBegin);
262
263  // Identify the function this subsection is for.
264  Asm->OutStreamer->EmitCOFFSecRel32(Fn);
265  Asm->OutStreamer->EmitCOFFSectionIndex(Fn);
266  // Insert flags after a 16-bit section index.
267  Asm->EmitInt16(COFF::DEBUG_LINE_TABLES_HAVE_COLUMN_RECORDS);
268
269  // Length of the function's code, in bytes.
270  EmitLabelDiff(*Asm->OutStreamer, Fn, FI.End);
271
272  // PC-to-linenumber lookup table:
273  MCSymbol *FileSegmentEnd = nullptr;
274
275  // The start of the last segment:
276  size_t LastSegmentStart = 0;
277
278  auto FinishPreviousChunk = [&] {
279    if (!FileSegmentEnd)
280      return;
281    for (size_t ColSegI = LastSegmentStart,
282                ColSegEnd = ColSegI + FilenameSegmentLengths[LastSegmentStart];
283         ColSegI != ColSegEnd; ++ColSegI) {
284      unsigned ColumnNumber = InstrInfo[FI.Instrs[ColSegI]].ColumnNumber;
285      Asm->EmitInt16(ColumnNumber); // Start column
286      Asm->EmitInt16(ColumnNumber); // End column
287    }
288    Asm->OutStreamer->EmitLabel(FileSegmentEnd);
289  };
290
291  for (size_t J = 0, F = FI.Instrs.size(); J != F; ++J) {
292    MCSymbol *Instr = FI.Instrs[J];
293    assert(InstrInfo.count(Instr));
294
295    if (FilenameSegmentLengths.count(J)) {
296      // We came to a beginning of a new filename segment.
297      FinishPreviousChunk();
298      StringRef CurFilename = InstrInfo[FI.Instrs[J]].Filename;
299      assert(FileNameRegistry.Infos.count(CurFilename));
300      size_t IndexInStringTable =
301          FileNameRegistry.Infos[CurFilename].FilenameID;
302      // Each segment starts with the offset of the filename
303      // in the string table.
304      Asm->OutStreamer->AddComment(
305          "Segment for file '" + Twine(CurFilename) + "' begins");
306      MCSymbol *FileSegmentBegin = Asm->MMI->getContext().createTempSymbol();
307      Asm->OutStreamer->EmitLabel(FileSegmentBegin);
308      Asm->EmitInt32(8 * IndexInStringTable);
309
310      // Number of PC records in the lookup table.
311      size_t SegmentLength = FilenameSegmentLengths[J];
312      Asm->EmitInt32(SegmentLength);
313
314      // Full size of the segment for this filename, including the prev two
315      // records.
316      FileSegmentEnd = Asm->MMI->getContext().createTempSymbol();
317      EmitLabelDiff(*Asm->OutStreamer, FileSegmentBegin, FileSegmentEnd);
318      LastSegmentStart = J;
319    }
320
321    // The first PC with the given linenumber and the linenumber itself.
322    EmitLabelDiff(*Asm->OutStreamer, Fn, Instr);
323    Asm->EmitInt32(InstrInfo[Instr].LineNumber);
324  }
325
326  FinishPreviousChunk();
327  Asm->OutStreamer->EmitLabel(LineTableEnd);
328}
329
330void WinCodeViewLineTables::beginFunction(const MachineFunction *MF) {
331  assert(!CurFn && "Can't process two functions at once!");
332
333  if (!Asm || !Asm->MMI->hasDebugInfo())
334    return;
335
336  const Function *GV = MF->getFunction();
337  assert(FnDebugInfo.count(GV) == false);
338  VisitedFunctions.push_back(GV);
339  CurFn = &FnDebugInfo[GV];
340
341  // Find the end of the function prolog.
342  // FIXME: is there a simpler a way to do this? Can we just search
343  // for the first instruction of the function, not the last of the prolog?
344  DebugLoc PrologEndLoc;
345  bool EmptyPrologue = true;
346  for (const auto &MBB : *MF) {
347    if (PrologEndLoc)
348      break;
349    for (const auto &MI : MBB) {
350      if (MI.isDebugValue())
351        continue;
352
353      // First known non-DBG_VALUE and non-frame setup location marks
354      // the beginning of the function body.
355      // FIXME: do we need the first subcondition?
356      if (!MI.getFlag(MachineInstr::FrameSetup) && MI.getDebugLoc()) {
357        PrologEndLoc = MI.getDebugLoc();
358        break;
359      }
360      EmptyPrologue = false;
361    }
362  }
363  // Record beginning of function if we have a non-empty prologue.
364  if (PrologEndLoc && !EmptyPrologue) {
365    DebugLoc FnStartDL = PrologEndLoc.getFnDebugLoc();
366    maybeRecordLocation(FnStartDL, MF);
367  }
368}
369
370void WinCodeViewLineTables::endFunction(const MachineFunction *MF) {
371  if (!Asm || !CurFn)  // We haven't created any debug info for this function.
372    return;
373
374  const Function *GV = MF->getFunction();
375  assert(FnDebugInfo.count(GV));
376  assert(CurFn == &FnDebugInfo[GV]);
377
378  if (CurFn->Instrs.empty()) {
379    FnDebugInfo.erase(GV);
380    VisitedFunctions.pop_back();
381  } else {
382    CurFn->End = Asm->getFunctionEnd();
383  }
384  CurFn = nullptr;
385}
386
387void WinCodeViewLineTables::beginInstruction(const MachineInstr *MI) {
388  // Ignore DBG_VALUE locations and function prologue.
389  if (!Asm || MI->isDebugValue() || MI->getFlag(MachineInstr::FrameSetup))
390    return;
391  DebugLoc DL = MI->getDebugLoc();
392  if (DL == PrevInstLoc || !DL)
393    return;
394  maybeRecordLocation(DL, Asm->MF);
395}
396}
397