1//===-- sancov.cc --------------------------------------------===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file is a command-line tool for reading and analyzing sanitizer
11// coverage.
12//===----------------------------------------------------------------------===//
13#include "llvm/ADT/STLExtras.h"
14#include "llvm/ADT/Twine.h"
15#include "llvm/DebugInfo/Symbolize/Symbolize.h"
16#include "llvm/MC/MCAsmInfo.h"
17#include "llvm/MC/MCContext.h"
18#include "llvm/MC/MCDisassembler/MCDisassembler.h"
19#include "llvm/MC/MCInst.h"
20#include "llvm/MC/MCInstPrinter.h"
21#include "llvm/MC/MCInstrAnalysis.h"
22#include "llvm/MC/MCInstrInfo.h"
23#include "llvm/MC/MCObjectFileInfo.h"
24#include "llvm/MC/MCRegisterInfo.h"
25#include "llvm/MC/MCSubtargetInfo.h"
26#include "llvm/Object/Archive.h"
27#include "llvm/Object/Binary.h"
28#include "llvm/Object/ELFObjectFile.h"
29#include "llvm/Object/ObjectFile.h"
30#include "llvm/Support/Casting.h"
31#include "llvm/Support/CommandLine.h"
32#include "llvm/Support/Errc.h"
33#include "llvm/Support/ErrorOr.h"
34#include "llvm/Support/FileSystem.h"
35#include "llvm/Support/LineIterator.h"
36#include "llvm/Support/MD5.h"
37#include "llvm/Support/ManagedStatic.h"
38#include "llvm/Support/MemoryBuffer.h"
39#include "llvm/Support/Path.h"
40#include "llvm/Support/PrettyStackTrace.h"
41#include "llvm/Support/Regex.h"
42#include "llvm/Support/Signals.h"
43#include "llvm/Support/SpecialCaseList.h"
44#include "llvm/Support/TargetRegistry.h"
45#include "llvm/Support/TargetSelect.h"
46#include "llvm/Support/ToolOutputFile.h"
47#include "llvm/Support/raw_ostream.h"
48
49#include <algorithm>
50#include <set>
51#include <stdio.h>
52#include <string>
53#include <utility>
54#include <vector>
55
56using namespace llvm;
57
58namespace {
59
60// --------- COMMAND LINE FLAGS ---------
61
62enum ActionType {
63  PrintAction,
64  PrintCovPointsAction,
65  CoveredFunctionsAction,
66  NotCoveredFunctionsAction,
67  HtmlReportAction,
68  StatsAction
69};
70
71cl::opt<ActionType> Action(
72    cl::desc("Action (required)"), cl::Required,
73    cl::values(clEnumValN(PrintAction, "print", "Print coverage addresses"),
74               clEnumValN(PrintCovPointsAction, "print-coverage-pcs",
75                          "Print coverage instrumentation points addresses."),
76               clEnumValN(CoveredFunctionsAction, "covered-functions",
77                          "Print all covered funcions."),
78               clEnumValN(NotCoveredFunctionsAction, "not-covered-functions",
79                          "Print all not covered funcions."),
80               clEnumValN(HtmlReportAction, "html-report",
81                          "Print HTML coverage report."),
82               clEnumValN(StatsAction, "print-coverage-stats",
83                          "Print coverage statistics."),
84               clEnumValEnd));
85
86static cl::list<std::string>
87    ClInputFiles(cl::Positional, cl::OneOrMore,
88                 cl::desc("(<binary file>|<.sancov file>)..."));
89
90static cl::opt<bool> ClDemangle("demangle", cl::init(true),
91                                cl::desc("Print demangled function name."));
92
93static cl::opt<std::string> ClStripPathPrefix(
94    "strip_path_prefix", cl::init(""),
95    cl::desc("Strip this prefix from file paths in reports."));
96
97static cl::opt<std::string>
98    ClBlacklist("blacklist", cl::init(""),
99                cl::desc("Blacklist file (sanitizer blacklist format)."));
100
101static cl::opt<bool> ClUseDefaultBlacklist(
102    "use_default_blacklist", cl::init(true), cl::Hidden,
103    cl::desc("Controls if default blacklist should be used."));
104
105static const char *const DefaultBlacklistStr = "fun:__sanitizer_.*\n"
106                                               "src:/usr/include/.*\n"
107                                               "src:.*/libc\\+\\+/.*\n";
108
109// --------- FORMAT SPECIFICATION ---------
110
111struct FileHeader {
112  uint32_t Bitness;
113  uint32_t Magic;
114};
115
116static const uint32_t BinCoverageMagic = 0xC0BFFFFF;
117static const uint32_t Bitness32 = 0xFFFFFF32;
118static const uint32_t Bitness64 = 0xFFFFFF64;
119
120// --------- ERROR HANDLING ---------
121
122static void Fail(const llvm::Twine &E) {
123  errs() << "Error: " << E << "\n";
124  exit(1);
125}
126
127static void FailIfError(std::error_code Error) {
128  if (!Error)
129    return;
130  errs() << "Error: " << Error.message() << "(" << Error.value() << ")\n";
131  exit(1);
132}
133
134template <typename T> static void FailIfError(const ErrorOr<T> &E) {
135  FailIfError(E.getError());
136}
137
138static void FailIfError(Error Err) {
139  if (Err) {
140    logAllUnhandledErrors(std::move(Err), errs(), "Error: ");
141    exit(1);
142  }
143}
144
145template <typename T> static void FailIfError(Expected<T> &E) {
146  FailIfError(E.takeError());
147}
148
149static void FailIfNotEmpty(const llvm::Twine &E) {
150  if (E.str().empty())
151    return;
152  Fail(E);
153}
154
155template <typename T>
156static void FailIfEmpty(const std::unique_ptr<T> &Ptr,
157                        const std::string &Message) {
158  if (Ptr.get())
159    return;
160  Fail(Message);
161}
162
163// ---------
164
165// Produces std::map<K, std::vector<E>> grouping input
166// elements by FuncTy result.
167template <class RangeTy, class FuncTy>
168static inline auto group_by(const RangeTy &R, FuncTy F)
169    -> std::map<typename std::decay<decltype(F(*R.begin()))>::type,
170                std::vector<typename std::decay<decltype(*R.begin())>::type>> {
171  std::map<typename std::decay<decltype(F(*R.begin()))>::type,
172           std::vector<typename std::decay<decltype(*R.begin())>::type>>
173      Result;
174  for (const auto &E : R) {
175    Result[F(E)].push_back(E);
176  }
177  return Result;
178}
179
180template <typename T>
181static void readInts(const char *Start, const char *End,
182                     std::set<uint64_t> *Ints) {
183  const T *S = reinterpret_cast<const T *>(Start);
184  const T *E = reinterpret_cast<const T *>(End);
185  std::copy(S, E, std::inserter(*Ints, Ints->end()));
186}
187
188struct FileLoc {
189  bool operator<(const FileLoc &RHS) const {
190    return std::tie(FileName, Line) < std::tie(RHS.FileName, RHS.Line);
191  }
192
193  std::string FileName;
194  uint32_t Line;
195};
196
197struct FileFn {
198  bool operator<(const FileFn &RHS) const {
199    return std::tie(FileName, FunctionName) <
200           std::tie(RHS.FileName, RHS.FunctionName);
201  }
202
203  std::string FileName;
204  std::string FunctionName;
205};
206
207struct FnLoc {
208  bool operator<(const FnLoc &RHS) const {
209    return std::tie(Loc, FunctionName) < std::tie(RHS.Loc, RHS.FunctionName);
210  }
211
212  FileLoc Loc;
213  std::string FunctionName;
214};
215
216std::string stripPathPrefix(std::string Path) {
217  if (ClStripPathPrefix.empty())
218    return Path;
219  size_t Pos = Path.find(ClStripPathPrefix);
220  if (Pos == std::string::npos)
221    return Path;
222  return Path.substr(Pos + ClStripPathPrefix.size());
223}
224
225static std::unique_ptr<symbolize::LLVMSymbolizer> createSymbolizer() {
226  symbolize::LLVMSymbolizer::Options SymbolizerOptions;
227  SymbolizerOptions.Demangle = ClDemangle;
228  SymbolizerOptions.UseSymbolTable = true;
229  return std::unique_ptr<symbolize::LLVMSymbolizer>(
230      new symbolize::LLVMSymbolizer(SymbolizerOptions));
231}
232
233// A DILineInfo with address.
234struct AddrInfo : public DILineInfo {
235  uint64_t Addr;
236
237  AddrInfo(const DILineInfo &DI, uint64_t Addr) : DILineInfo(DI), Addr(Addr) {
238    FileName = normalizeFilename(FileName);
239  }
240
241private:
242  static std::string normalizeFilename(const std::string &FileName) {
243    SmallString<256> S(FileName);
244    sys::path::remove_dots(S, /* remove_dot_dot */ true);
245    return S.str().str();
246  }
247};
248
249class Blacklists {
250public:
251  Blacklists()
252      : DefaultBlacklist(createDefaultBlacklist()),
253        UserBlacklist(createUserBlacklist()) {}
254
255  // AddrInfo contains normalized filename. It is important to check it rather
256  // than DILineInfo.
257  bool isBlacklisted(const AddrInfo &AI) {
258    if (DefaultBlacklist && DefaultBlacklist->inSection("fun", AI.FunctionName))
259      return true;
260    if (DefaultBlacklist && DefaultBlacklist->inSection("src", AI.FileName))
261      return true;
262    if (UserBlacklist && UserBlacklist->inSection("fun", AI.FunctionName))
263      return true;
264    if (UserBlacklist && UserBlacklist->inSection("src", AI.FileName))
265      return true;
266    return false;
267  }
268
269private:
270  static std::unique_ptr<SpecialCaseList> createDefaultBlacklist() {
271    if (!ClUseDefaultBlacklist)
272      return std::unique_ptr<SpecialCaseList>();
273    std::unique_ptr<MemoryBuffer> MB =
274        MemoryBuffer::getMemBuffer(DefaultBlacklistStr);
275    std::string Error;
276    auto Blacklist = SpecialCaseList::create(MB.get(), Error);
277    FailIfNotEmpty(Error);
278    return Blacklist;
279  }
280
281  static std::unique_ptr<SpecialCaseList> createUserBlacklist() {
282    if (ClBlacklist.empty())
283      return std::unique_ptr<SpecialCaseList>();
284
285    return SpecialCaseList::createOrDie({{ClBlacklist}});
286  }
287  std::unique_ptr<SpecialCaseList> DefaultBlacklist;
288  std::unique_ptr<SpecialCaseList> UserBlacklist;
289};
290
291// Collect all debug info for given addresses.
292static std::vector<AddrInfo> getAddrInfo(const std::string &ObjectFile,
293                                         const std::set<uint64_t> &Addrs,
294                                         bool InlinedCode) {
295  std::vector<AddrInfo> Result;
296  auto Symbolizer(createSymbolizer());
297  Blacklists B;
298
299  for (auto Addr : Addrs) {
300    auto LineInfo = Symbolizer->symbolizeCode(ObjectFile, Addr);
301    FailIfError(LineInfo);
302    auto LineAddrInfo = AddrInfo(*LineInfo, Addr);
303    if (B.isBlacklisted(LineAddrInfo))
304      continue;
305    Result.push_back(LineAddrInfo);
306    if (InlinedCode) {
307      auto InliningInfo = Symbolizer->symbolizeInlinedCode(ObjectFile, Addr);
308      FailIfError(InliningInfo);
309      for (uint32_t I = 0; I < InliningInfo->getNumberOfFrames(); ++I) {
310        auto FrameInfo = InliningInfo->getFrame(I);
311        auto FrameAddrInfo = AddrInfo(FrameInfo, Addr);
312        if (B.isBlacklisted(FrameAddrInfo))
313          continue;
314        Result.push_back(FrameAddrInfo);
315      }
316    }
317  }
318
319  return Result;
320}
321
322// Locate __sanitizer_cov* function addresses that are used for coverage
323// reporting.
324static std::set<uint64_t>
325findSanitizerCovFunctions(const object::ObjectFile &O) {
326  std::set<uint64_t> Result;
327
328  for (const object::SymbolRef &Symbol : O.symbols()) {
329    Expected<uint64_t> AddressOrErr = Symbol.getAddress();
330    FailIfError(errorToErrorCode(AddressOrErr.takeError()));
331
332    Expected<StringRef> NameOrErr = Symbol.getName();
333    FailIfError(errorToErrorCode(NameOrErr.takeError()));
334    StringRef Name = NameOrErr.get();
335
336    if (Name == "__sanitizer_cov" || Name == "__sanitizer_cov_with_check" ||
337        Name == "__sanitizer_cov_trace_func_enter") {
338      if (!(Symbol.getFlags() & object::BasicSymbolRef::SF_Undefined))
339        Result.insert(AddressOrErr.get());
340    }
341  }
342
343  return Result;
344}
345
346// Locate addresses of all coverage points in a file. Coverage point
347// is defined as the 'address of instruction following __sanitizer_cov
348// call - 1'.
349static void getObjectCoveragePoints(const object::ObjectFile &O,
350                                    std::set<uint64_t> *Addrs) {
351  Triple TheTriple("unknown-unknown-unknown");
352  TheTriple.setArch(Triple::ArchType(O.getArch()));
353  auto TripleName = TheTriple.getTriple();
354
355  std::string Error;
356  const Target *TheTarget = TargetRegistry::lookupTarget(TripleName, Error);
357  FailIfNotEmpty(Error);
358
359  std::unique_ptr<const MCSubtargetInfo> STI(
360      TheTarget->createMCSubtargetInfo(TripleName, "", ""));
361  FailIfEmpty(STI, "no subtarget info for target " + TripleName);
362
363  std::unique_ptr<const MCRegisterInfo> MRI(
364      TheTarget->createMCRegInfo(TripleName));
365  FailIfEmpty(MRI, "no register info for target " + TripleName);
366
367  std::unique_ptr<const MCAsmInfo> AsmInfo(
368      TheTarget->createMCAsmInfo(*MRI, TripleName));
369  FailIfEmpty(AsmInfo, "no asm info for target " + TripleName);
370
371  std::unique_ptr<const MCObjectFileInfo> MOFI(new MCObjectFileInfo);
372  MCContext Ctx(AsmInfo.get(), MRI.get(), MOFI.get());
373  std::unique_ptr<MCDisassembler> DisAsm(
374      TheTarget->createMCDisassembler(*STI, Ctx));
375  FailIfEmpty(DisAsm, "no disassembler info for target " + TripleName);
376
377  std::unique_ptr<const MCInstrInfo> MII(TheTarget->createMCInstrInfo());
378  FailIfEmpty(MII, "no instruction info for target " + TripleName);
379
380  std::unique_ptr<const MCInstrAnalysis> MIA(
381      TheTarget->createMCInstrAnalysis(MII.get()));
382  FailIfEmpty(MIA, "no instruction analysis info for target " + TripleName);
383
384  auto SanCovAddrs = findSanitizerCovFunctions(O);
385  if (SanCovAddrs.empty())
386    Fail("__sanitizer_cov* functions not found");
387
388  for (object::SectionRef Section : O.sections()) {
389    if (Section.isVirtual() || !Section.isText()) // llvm-objdump does the same.
390      continue;
391    uint64_t SectionAddr = Section.getAddress();
392    uint64_t SectSize = Section.getSize();
393    if (!SectSize)
394      continue;
395
396    StringRef BytesStr;
397    FailIfError(Section.getContents(BytesStr));
398    ArrayRef<uint8_t> Bytes(reinterpret_cast<const uint8_t *>(BytesStr.data()),
399                            BytesStr.size());
400
401    for (uint64_t Index = 0, Size = 0; Index < Section.getSize();
402         Index += Size) {
403      MCInst Inst;
404      if (!DisAsm->getInstruction(Inst, Size, Bytes.slice(Index),
405                                  SectionAddr + Index, nulls(), nulls())) {
406        if (Size == 0)
407          Size = 1;
408        continue;
409      }
410      uint64_t Addr = Index + SectionAddr;
411      // Sanitizer coverage uses the address of the next instruction - 1.
412      uint64_t CovPoint = Addr + Size - 1;
413      uint64_t Target;
414      if (MIA->isCall(Inst) &&
415          MIA->evaluateBranch(Inst, SectionAddr + Index, Size, Target) &&
416          SanCovAddrs.find(Target) != SanCovAddrs.end())
417        Addrs->insert(CovPoint);
418    }
419  }
420}
421
422static void
423visitObjectFiles(const object::Archive &A,
424                 function_ref<void(const object::ObjectFile &)> Fn) {
425  Error Err;
426  for (auto &C : A.children(Err)) {
427    Expected<std::unique_ptr<object::Binary>> ChildOrErr = C.getAsBinary();
428    FailIfError(errorToErrorCode(ChildOrErr.takeError()));
429    if (auto *O = dyn_cast<object::ObjectFile>(&*ChildOrErr.get()))
430      Fn(*O);
431    else
432      FailIfError(object::object_error::invalid_file_type);
433  }
434  FailIfError(std::move(Err));
435}
436
437static void
438visitObjectFiles(const std::string &FileName,
439                 function_ref<void(const object::ObjectFile &)> Fn) {
440  Expected<object::OwningBinary<object::Binary>> BinaryOrErr =
441      object::createBinary(FileName);
442  if (!BinaryOrErr)
443    FailIfError(errorToErrorCode(BinaryOrErr.takeError()));
444
445  object::Binary &Binary = *BinaryOrErr.get().getBinary();
446  if (object::Archive *A = dyn_cast<object::Archive>(&Binary))
447    visitObjectFiles(*A, Fn);
448  else if (object::ObjectFile *O = dyn_cast<object::ObjectFile>(&Binary))
449    Fn(*O);
450  else
451    FailIfError(object::object_error::invalid_file_type);
452}
453
454std::set<uint64_t> findSanitizerCovFunctions(const std::string &FileName) {
455  std::set<uint64_t> Result;
456  visitObjectFiles(FileName, [&](const object::ObjectFile &O) {
457    auto Addrs = findSanitizerCovFunctions(O);
458    Result.insert(Addrs.begin(), Addrs.end());
459  });
460  return Result;
461}
462
463// Locate addresses of all coverage points in a file. Coverage point
464// is defined as the 'address of instruction following __sanitizer_cov
465// call - 1'.
466std::set<uint64_t> getCoveragePoints(const std::string &FileName) {
467  std::set<uint64_t> Result;
468  visitObjectFiles(FileName, [&](const object::ObjectFile &O) {
469    getObjectCoveragePoints(O, &Result);
470  });
471  return Result;
472}
473
474static void printCovPoints(const std::string &ObjFile, raw_ostream &OS) {
475  for (uint64_t Addr : getCoveragePoints(ObjFile)) {
476    OS << "0x";
477    OS.write_hex(Addr);
478    OS << "\n";
479  }
480}
481
482static std::string escapeHtml(const std::string &S) {
483  std::string Result;
484  Result.reserve(S.size());
485  for (char Ch : S) {
486    switch (Ch) {
487    case '&':
488      Result.append("&amp;");
489      break;
490    case '\'':
491      Result.append("&apos;");
492      break;
493    case '"':
494      Result.append("&quot;");
495      break;
496    case '<':
497      Result.append("&lt;");
498      break;
499    case '>':
500      Result.append("&gt;");
501      break;
502    default:
503      Result.push_back(Ch);
504      break;
505    }
506  }
507  return Result;
508}
509
510// Adds leading zeroes wrapped in 'lz' style.
511// Leading zeroes help locate 000% coverage.
512static std::string formatHtmlPct(size_t Pct) {
513  Pct = std::max(std::size_t{0}, std::min(std::size_t{100}, Pct));
514
515  std::string Num = std::to_string(Pct);
516  std::string Zeroes(3 - Num.size(), '0');
517  if (!Zeroes.empty())
518    Zeroes = "<span class='lz'>" + Zeroes + "</span>";
519
520  return Zeroes + Num;
521}
522
523static std::string anchorName(const std::string &Anchor) {
524  llvm::MD5 Hasher;
525  llvm::MD5::MD5Result Hash;
526  Hasher.update(Anchor);
527  Hasher.final(Hash);
528
529  SmallString<32> HexString;
530  llvm::MD5::stringifyResult(Hash, HexString);
531  return HexString.str().str();
532}
533
534static ErrorOr<bool> isCoverageFile(const std::string &FileName) {
535  ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
536      MemoryBuffer::getFile(FileName);
537  if (!BufOrErr) {
538    errs() << "Warning: " << BufOrErr.getError().message() << "("
539           << BufOrErr.getError().value()
540           << "), filename: " << llvm::sys::path::filename(FileName) << "\n";
541    return BufOrErr.getError();
542  }
543  std::unique_ptr<MemoryBuffer> Buf = std::move(BufOrErr.get());
544  if (Buf->getBufferSize() < 8) {
545    return false;
546  }
547  const FileHeader *Header =
548      reinterpret_cast<const FileHeader *>(Buf->getBufferStart());
549  return Header->Magic == BinCoverageMagic;
550}
551
552struct CoverageStats {
553  CoverageStats() : AllPoints(0), CovPoints(0), AllFns(0), CovFns(0) {}
554
555  size_t AllPoints;
556  size_t CovPoints;
557  size_t AllFns;
558  size_t CovFns;
559};
560
561static raw_ostream &operator<<(raw_ostream &OS, const CoverageStats &Stats) {
562  OS << "all-edges: " << Stats.AllPoints << "\n";
563  OS << "cov-edges: " << Stats.CovPoints << "\n";
564  OS << "all-functions: " << Stats.AllFns << "\n";
565  OS << "cov-functions: " << Stats.CovFns << "\n";
566  return OS;
567}
568
569class CoverageData {
570public:
571  // Read single file coverage data.
572  static ErrorOr<std::unique_ptr<CoverageData>>
573  read(const std::string &FileName) {
574    ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
575        MemoryBuffer::getFile(FileName);
576    if (!BufOrErr)
577      return BufOrErr.getError();
578    std::unique_ptr<MemoryBuffer> Buf = std::move(BufOrErr.get());
579    if (Buf->getBufferSize() < 8) {
580      errs() << "File too small (<8): " << Buf->getBufferSize();
581      return make_error_code(errc::illegal_byte_sequence);
582    }
583    const FileHeader *Header =
584        reinterpret_cast<const FileHeader *>(Buf->getBufferStart());
585
586    if (Header->Magic != BinCoverageMagic) {
587      errs() << "Wrong magic: " << Header->Magic;
588      return make_error_code(errc::illegal_byte_sequence);
589    }
590
591    auto Addrs = llvm::make_unique<std::set<uint64_t>>();
592
593    switch (Header->Bitness) {
594    case Bitness64:
595      readInts<uint64_t>(Buf->getBufferStart() + 8, Buf->getBufferEnd(),
596                         Addrs.get());
597      break;
598    case Bitness32:
599      readInts<uint32_t>(Buf->getBufferStart() + 8, Buf->getBufferEnd(),
600                         Addrs.get());
601      break;
602    default:
603      errs() << "Unsupported bitness: " << Header->Bitness;
604      return make_error_code(errc::illegal_byte_sequence);
605    }
606
607    return std::unique_ptr<CoverageData>(new CoverageData(std::move(Addrs)));
608  }
609
610  // Merge multiple coverage data together.
611  static std::unique_ptr<CoverageData>
612  merge(const std::vector<std::unique_ptr<CoverageData>> &Covs) {
613    auto Addrs = llvm::make_unique<std::set<uint64_t>>();
614
615    for (const auto &Cov : Covs)
616      Addrs->insert(Cov->Addrs->begin(), Cov->Addrs->end());
617
618    return std::unique_ptr<CoverageData>(new CoverageData(std::move(Addrs)));
619  }
620
621  // Read list of files and merges their coverage info.
622  static ErrorOr<std::unique_ptr<CoverageData>>
623  readAndMerge(const std::vector<std::string> &FileNames) {
624    std::vector<std::unique_ptr<CoverageData>> Covs;
625    for (const auto &FileName : FileNames) {
626      auto Cov = read(FileName);
627      if (!Cov)
628        return Cov.getError();
629      Covs.push_back(std::move(Cov.get()));
630    }
631    return merge(Covs);
632  }
633
634  // Print coverage addresses.
635  void printAddrs(raw_ostream &OS) {
636    for (auto Addr : *Addrs) {
637      OS << "0x";
638      OS.write_hex(Addr);
639      OS << "\n";
640    }
641  }
642
643protected:
644  explicit CoverageData(std::unique_ptr<std::set<uint64_t>> Addrs)
645      : Addrs(std::move(Addrs)) {}
646
647  friend class CoverageDataWithObjectFile;
648
649  std::unique_ptr<std::set<uint64_t>> Addrs;
650};
651
652// Coverage data translated into source code line-level information.
653// Fetches debug info in constructor and calculates various information per
654// request.
655class SourceCoverageData {
656public:
657  enum LineStatus {
658    // coverage information for the line is not available.
659    // default value in maps.
660    UNKNOWN = 0,
661    // the line is fully covered.
662    COVERED = 1,
663    // the line is fully uncovered.
664    NOT_COVERED = 2,
665    // some points in the line a covered, some are not.
666    MIXED = 3
667  };
668
669  SourceCoverageData(std::string ObjectFile, const std::set<uint64_t> &Addrs)
670      : AllCovPoints(getCoveragePoints(ObjectFile)) {
671    if (!std::includes(AllCovPoints.begin(), AllCovPoints.end(), Addrs.begin(),
672                       Addrs.end())) {
673      Fail("Coverage points in binary and .sancov file do not match.");
674    }
675
676    AllAddrInfo = getAddrInfo(ObjectFile, AllCovPoints, true);
677    CovAddrInfo = getAddrInfo(ObjectFile, Addrs, true);
678  }
679
680  // Compute number of coverage points hit/total in a file.
681  // file_name -> <coverage, all_coverage>
682  std::map<std::string, std::pair<size_t, size_t>> computeFileCoverage() {
683    std::map<std::string, std::pair<size_t, size_t>> FileCoverage;
684    auto AllCovPointsByFile =
685        group_by(AllAddrInfo, [](const AddrInfo &AI) { return AI.FileName; });
686    auto CovPointsByFile =
687        group_by(CovAddrInfo, [](const AddrInfo &AI) { return AI.FileName; });
688
689    for (const auto &P : AllCovPointsByFile) {
690      const std::string &FileName = P.first;
691
692      FileCoverage[FileName] =
693          std::make_pair(CovPointsByFile[FileName].size(),
694                         AllCovPointsByFile[FileName].size());
695    }
696    return FileCoverage;
697  }
698
699  // line_number -> line_status.
700  typedef std::map<int, LineStatus> LineStatusMap;
701  // file_name -> LineStatusMap
702  typedef std::map<std::string, LineStatusMap> FileLineStatusMap;
703
704  // fills in the {file_name -> {line_no -> status}} map.
705  FileLineStatusMap computeLineStatusMap() {
706    FileLineStatusMap StatusMap;
707
708    auto AllLocs = group_by(AllAddrInfo, [](const AddrInfo &AI) {
709      return FileLoc{AI.FileName, AI.Line};
710    });
711    auto CovLocs = group_by(CovAddrInfo, [](const AddrInfo &AI) {
712      return FileLoc{AI.FileName, AI.Line};
713    });
714
715    for (const auto &P : AllLocs) {
716      const FileLoc &Loc = P.first;
717      auto I = CovLocs.find(Loc);
718
719      if (I == CovLocs.end()) {
720        StatusMap[Loc.FileName][Loc.Line] = NOT_COVERED;
721      } else {
722        StatusMap[Loc.FileName][Loc.Line] =
723            (I->second.size() == P.second.size()) ? COVERED : MIXED;
724      }
725    }
726    return StatusMap;
727  }
728
729  std::set<FileFn> computeAllFunctions() const {
730    std::set<FileFn> Fns;
731    for (const auto &AI : AllAddrInfo) {
732      Fns.insert(FileFn{AI.FileName, AI.FunctionName});
733    }
734    return Fns;
735  }
736
737  std::set<FileFn> computeCoveredFunctions() const {
738    std::set<FileFn> Fns;
739    auto CovFns = group_by(CovAddrInfo, [](const AddrInfo &AI) {
740      return FileFn{AI.FileName, AI.FunctionName};
741    });
742
743    for (const auto &P : CovFns) {
744      Fns.insert(P.first);
745    }
746    return Fns;
747  }
748
749  std::set<FileFn> computeNotCoveredFunctions() const {
750    std::set<FileFn> Fns;
751
752    auto AllFns = group_by(AllAddrInfo, [](const AddrInfo &AI) {
753      return FileFn{AI.FileName, AI.FunctionName};
754    });
755    auto CovFns = group_by(CovAddrInfo, [](const AddrInfo &AI) {
756      return FileFn{AI.FileName, AI.FunctionName};
757    });
758
759    for (const auto &P : AllFns) {
760      if (CovFns.find(P.first) == CovFns.end()) {
761        Fns.insert(P.first);
762      }
763    }
764    return Fns;
765  }
766
767  // Compute % coverage for each function.
768  std::map<FileFn, int> computeFunctionsCoverage() const {
769    std::map<FileFn, int> FnCoverage;
770    auto AllFns = group_by(AllAddrInfo, [](const AddrInfo &AI) {
771      return FileFn{AI.FileName, AI.FunctionName};
772    });
773
774    auto CovFns = group_by(CovAddrInfo, [](const AddrInfo &AI) {
775      return FileFn{AI.FileName, AI.FunctionName};
776    });
777
778    for (const auto &P : AllFns) {
779      FileFn F = P.first;
780      FnCoverage[F] = CovFns[F].size() * 100 / P.second.size();
781    }
782
783    return FnCoverage;
784  }
785
786  typedef std::map<FileLoc, std::set<std::string>> FunctionLocs;
787  // finds first line number in a file for each function.
788  FunctionLocs resolveFunctions(const std::set<FileFn> &Fns) const {
789    std::vector<AddrInfo> FnAddrs;
790    for (const auto &AI : AllAddrInfo) {
791      if (Fns.find(FileFn{AI.FileName, AI.FunctionName}) != Fns.end())
792        FnAddrs.push_back(AI);
793    }
794
795    auto GroupedAddrs = group_by(FnAddrs, [](const AddrInfo &AI) {
796      return FnLoc{FileLoc{AI.FileName, AI.Line}, AI.FunctionName};
797    });
798
799    FunctionLocs Result;
800    std::string LastFileName;
801    std::set<std::string> ProcessedFunctions;
802
803    for (const auto &P : GroupedAddrs) {
804      const FnLoc &Loc = P.first;
805      std::string FileName = Loc.Loc.FileName;
806      std::string FunctionName = Loc.FunctionName;
807
808      if (LastFileName != FileName)
809        ProcessedFunctions.clear();
810      LastFileName = FileName;
811
812      if (!ProcessedFunctions.insert(FunctionName).second)
813        continue;
814
815      auto FLoc = FileLoc{FileName, Loc.Loc.Line};
816      Result[FLoc].insert(FunctionName);
817    }
818    return Result;
819  }
820
821  std::set<std::string> files() const {
822    std::set<std::string> Files;
823    for (const auto &AI : AllAddrInfo) {
824      Files.insert(AI.FileName);
825    }
826    return Files;
827  }
828
829  void collectStats(CoverageStats *Stats) const {
830    Stats->AllPoints += AllCovPoints.size();
831    Stats->AllFns += computeAllFunctions().size();
832    Stats->CovFns += computeCoveredFunctions().size();
833  }
834
835private:
836  const std::set<uint64_t> AllCovPoints;
837
838  std::vector<AddrInfo> AllAddrInfo;
839  std::vector<AddrInfo> CovAddrInfo;
840};
841
842static void printFunctionLocs(const SourceCoverageData::FunctionLocs &FnLocs,
843                              raw_ostream &OS) {
844  for (const auto &Fns : FnLocs) {
845    for (const auto &Fn : Fns.second) {
846      OS << stripPathPrefix(Fns.first.FileName) << ":" << Fns.first.Line << " "
847         << Fn << "\n";
848    }
849  }
850}
851
852// Holder for coverage data + filename of corresponding object file.
853class CoverageDataWithObjectFile : public CoverageData {
854public:
855  static ErrorOr<std::unique_ptr<CoverageDataWithObjectFile>>
856  readAndMerge(const std::string &ObjectFile,
857               const std::vector<std::string> &FileNames) {
858    auto MergedDataOrError = CoverageData::readAndMerge(FileNames);
859    if (!MergedDataOrError)
860      return MergedDataOrError.getError();
861    return std::unique_ptr<CoverageDataWithObjectFile>(
862        new CoverageDataWithObjectFile(ObjectFile,
863                                       std::move(MergedDataOrError.get())));
864  }
865
866  std::string object_file() const { return ObjectFile; }
867
868  // Print list of covered functions.
869  // Line format: <file_name>:<line> <function_name>
870  void printCoveredFunctions(raw_ostream &OS) const {
871    SourceCoverageData SCovData(ObjectFile, *Addrs);
872    auto CoveredFns = SCovData.computeCoveredFunctions();
873    printFunctionLocs(SCovData.resolveFunctions(CoveredFns), OS);
874  }
875
876  // Print list of not covered functions.
877  // Line format: <file_name>:<line> <function_name>
878  void printNotCoveredFunctions(raw_ostream &OS) const {
879    SourceCoverageData SCovData(ObjectFile, *Addrs);
880    auto NotCoveredFns = SCovData.computeNotCoveredFunctions();
881    printFunctionLocs(SCovData.resolveFunctions(NotCoveredFns), OS);
882  }
883
884  void printReport(raw_ostream &OS) const {
885    SourceCoverageData SCovData(ObjectFile, *Addrs);
886    auto LineStatusMap = SCovData.computeLineStatusMap();
887
888    std::set<FileFn> AllFns = SCovData.computeAllFunctions();
889    // file_loc -> set[function_name]
890    auto AllFnsByLoc = SCovData.resolveFunctions(AllFns);
891    auto FileCoverage = SCovData.computeFileCoverage();
892
893    auto FnCoverage = SCovData.computeFunctionsCoverage();
894    auto FnCoverageByFile =
895        group_by(FnCoverage, [](const std::pair<FileFn, int> &FileFn) {
896          return FileFn.first.FileName;
897        });
898
899    // TOC
900
901    size_t NotCoveredFilesCount = 0;
902    std::set<std::string> Files = SCovData.files();
903
904    // Covered Files.
905    OS << "<details open><summary>Touched Files</summary>\n";
906    OS << "<table>\n";
907    OS << "<tr><th>File</th><th>Coverage %</th>";
908    OS << "<th>Hit (Total) Fns</th></tr>\n";
909    for (const auto &FileName : Files) {
910      std::pair<size_t, size_t> FC = FileCoverage[FileName];
911      if (FC.first == 0) {
912        NotCoveredFilesCount++;
913        continue;
914      }
915      size_t CovPct = FC.second == 0 ? 100 : 100 * FC.first / FC.second;
916
917      OS << "<tr><td><a href=\"#" << anchorName(FileName) << "\">"
918         << stripPathPrefix(FileName) << "</a></td>"
919         << "<td>" << formatHtmlPct(CovPct) << "%</td>"
920         << "<td>" << FC.first << " (" << FC.second << ")"
921         << "</tr>\n";
922    }
923    OS << "</table>\n";
924    OS << "</details>\n";
925
926    // Not covered files.
927    if (NotCoveredFilesCount) {
928      OS << "<details><summary>Not Touched Files</summary>\n";
929      OS << "<table>\n";
930      for (const auto &FileName : Files) {
931        std::pair<size_t, size_t> FC = FileCoverage[FileName];
932        if (FC.first == 0)
933          OS << "<tr><td>" << stripPathPrefix(FileName) << "</td>\n";
934      }
935      OS << "</table>\n";
936      OS << "</details>\n";
937    } else {
938      OS << "<p>Congratulations! All source files are touched.</p>\n";
939    }
940
941    // Source
942    for (const auto &FileName : Files) {
943      std::pair<size_t, size_t> FC = FileCoverage[FileName];
944      if (FC.first == 0)
945        continue;
946      OS << "<a name=\"" << anchorName(FileName) << "\"></a>\n";
947      OS << "<h2>" << stripPathPrefix(FileName) << "</h2>\n";
948      OS << "<details open><summary>Function Coverage</summary>";
949      OS << "<div class='fnlist'>\n";
950
951      auto &FileFnCoverage = FnCoverageByFile[FileName];
952
953      for (const auto &P : FileFnCoverage) {
954        std::string FunctionName = P.first.FunctionName;
955
956        OS << "<div class='fn' style='order: " << P.second << "'>";
957        OS << "<span class='pct'>" << formatHtmlPct(P.second)
958           << "%</span>&nbsp;";
959        OS << "<span class='name'><a href=\"#"
960           << anchorName(FileName + "::" + FunctionName) << "\">";
961        OS << escapeHtml(FunctionName) << "</a></span>";
962        OS << "</div>\n";
963      }
964      OS << "</div></details>\n";
965
966      ErrorOr<std::unique_ptr<MemoryBuffer>> BufOrErr =
967          MemoryBuffer::getFile(FileName);
968      if (!BufOrErr) {
969        OS << "Error reading file: " << FileName << " : "
970           << BufOrErr.getError().message() << "("
971           << BufOrErr.getError().value() << ")\n";
972        continue;
973      }
974
975      OS << "<pre>\n";
976      const auto &LineStatuses = LineStatusMap[FileName];
977      for (line_iterator I = line_iterator(*BufOrErr.get(), false);
978           !I.is_at_eof(); ++I) {
979        uint32_t Line = I.line_number();
980        { // generate anchors (if any);
981          FileLoc Loc = FileLoc{FileName, Line};
982          auto It = AllFnsByLoc.find(Loc);
983          if (It != AllFnsByLoc.end()) {
984            for (const std::string &Fn : It->second) {
985              OS << "<a name=\"" << anchorName(FileName + "::" + Fn)
986                 << "\"></a>";
987            };
988          }
989        }
990
991        OS << "<span ";
992        auto LIT = LineStatuses.find(I.line_number());
993        auto Status = (LIT != LineStatuses.end()) ? LIT->second
994                                                  : SourceCoverageData::UNKNOWN;
995        switch (Status) {
996        case SourceCoverageData::UNKNOWN:
997          OS << "class=unknown";
998          break;
999        case SourceCoverageData::COVERED:
1000          OS << "class=covered";
1001          break;
1002        case SourceCoverageData::NOT_COVERED:
1003          OS << "class=notcovered";
1004          break;
1005        case SourceCoverageData::MIXED:
1006          OS << "class=mixed";
1007          break;
1008        }
1009        OS << ">";
1010        OS << escapeHtml(*I) << "</span>\n";
1011      }
1012      OS << "</pre>\n";
1013    }
1014  }
1015
1016  void collectStats(CoverageStats *Stats) const {
1017    Stats->CovPoints += Addrs->size();
1018
1019    SourceCoverageData SCovData(ObjectFile, *Addrs);
1020    SCovData.collectStats(Stats);
1021  }
1022
1023private:
1024  CoverageDataWithObjectFile(std::string ObjectFile,
1025                             std::unique_ptr<CoverageData> Coverage)
1026      : CoverageData(std::move(Coverage->Addrs)),
1027        ObjectFile(std::move(ObjectFile)) {}
1028  const std::string ObjectFile;
1029};
1030
1031// Multiple coverage files data organized by object file.
1032class CoverageDataSet {
1033public:
1034  static ErrorOr<std::unique_ptr<CoverageDataSet>>
1035  readCmdArguments(std::vector<std::string> FileNames) {
1036    // Short name => file name.
1037    std::map<std::string, std::string> ObjFiles;
1038    std::string FirstObjFile;
1039    std::set<std::string> CovFiles;
1040
1041    // Partition input values into coverage/object files.
1042    for (const auto &FileName : FileNames) {
1043      auto ErrorOrIsCoverage = isCoverageFile(FileName);
1044      if (!ErrorOrIsCoverage)
1045        continue;
1046      if (ErrorOrIsCoverage.get()) {
1047        CovFiles.insert(FileName);
1048      } else {
1049        auto ShortFileName = llvm::sys::path::filename(FileName);
1050        if (ObjFiles.find(ShortFileName) != ObjFiles.end()) {
1051          Fail("Duplicate binary file with a short name: " + ShortFileName);
1052        }
1053
1054        ObjFiles[ShortFileName] = FileName;
1055        if (FirstObjFile.empty())
1056          FirstObjFile = FileName;
1057      }
1058    }
1059
1060    Regex SancovRegex("(.*)\\.[0-9]+\\.sancov");
1061    SmallVector<StringRef, 2> Components;
1062
1063    // Object file => list of corresponding coverage file names.
1064    auto CoverageByObjFile = group_by(CovFiles, [&](std::string FileName) {
1065      auto ShortFileName = llvm::sys::path::filename(FileName);
1066      auto Ok = SancovRegex.match(ShortFileName, &Components);
1067      if (!Ok) {
1068        Fail("Can't match coverage file name against "
1069             "<module_name>.<pid>.sancov pattern: " +
1070             FileName);
1071      }
1072
1073      auto Iter = ObjFiles.find(Components[1]);
1074      if (Iter == ObjFiles.end()) {
1075        Fail("Object file for coverage not found: " + FileName);
1076      }
1077      return Iter->second;
1078    });
1079
1080    // Read coverage.
1081    std::vector<std::unique_ptr<CoverageDataWithObjectFile>> MergedCoverage;
1082    for (const auto &Pair : CoverageByObjFile) {
1083      if (findSanitizerCovFunctions(Pair.first).empty()) {
1084        for (const auto &FileName : Pair.second) {
1085          CovFiles.erase(FileName);
1086        }
1087
1088        errs()
1089            << "Ignoring " << Pair.first
1090            << " and its coverage because  __sanitizer_cov* functions were not "
1091               "found.\n";
1092        continue;
1093      }
1094
1095      auto DataOrError =
1096          CoverageDataWithObjectFile::readAndMerge(Pair.first, Pair.second);
1097      FailIfError(DataOrError);
1098      MergedCoverage.push_back(std::move(DataOrError.get()));
1099    }
1100
1101    return std::unique_ptr<CoverageDataSet>(
1102        new CoverageDataSet(FirstObjFile, &MergedCoverage, CovFiles));
1103  }
1104
1105  void printCoveredFunctions(raw_ostream &OS) const {
1106    for (const auto &Cov : Coverage) {
1107      Cov->printCoveredFunctions(OS);
1108    }
1109  }
1110
1111  void printNotCoveredFunctions(raw_ostream &OS) const {
1112    for (const auto &Cov : Coverage) {
1113      Cov->printNotCoveredFunctions(OS);
1114    }
1115  }
1116
1117  void printStats(raw_ostream &OS) const {
1118    CoverageStats Stats;
1119    for (const auto &Cov : Coverage) {
1120      Cov->collectStats(&Stats);
1121    }
1122    OS << Stats;
1123  }
1124
1125  void printReport(raw_ostream &OS) const {
1126    auto Title =
1127        (llvm::sys::path::filename(MainObjFile) + " Coverage Report").str();
1128
1129    OS << "<html>\n";
1130    OS << "<head>\n";
1131
1132    // Stylesheet
1133    OS << "<style>\n";
1134    OS << ".covered { background: #7F7; }\n";
1135    OS << ".notcovered { background: #F77; }\n";
1136    OS << ".mixed { background: #FF7; }\n";
1137    OS << "summary { font-weight: bold; }\n";
1138    OS << "details > summary + * { margin-left: 1em; }\n";
1139    OS << ".fnlist { display: flex; flex-flow: column nowrap; }\n";
1140    OS << ".fn { display: flex; flex-flow: row nowrap; }\n";
1141    OS << ".pct { width: 3em; text-align: right; margin-right: 1em; }\n";
1142    OS << ".name { flex: 2; }\n";
1143    OS << ".lz { color: lightgray; }\n";
1144    OS << "</style>\n";
1145    OS << "<title>" << Title << "</title>\n";
1146    OS << "</head>\n";
1147    OS << "<body>\n";
1148
1149    // Title
1150    OS << "<h1>" << Title << "</h1>\n";
1151
1152    // Modules TOC.
1153    if (Coverage.size() > 1) {
1154      for (const auto &CovData : Coverage) {
1155        OS << "<li><a href=\"#module_" << anchorName(CovData->object_file())
1156           << "\">" << llvm::sys::path::filename(CovData->object_file())
1157           << "</a></li>\n";
1158      }
1159    }
1160
1161    for (const auto &CovData : Coverage) {
1162      if (Coverage.size() > 1) {
1163        OS << "<h2>" << llvm::sys::path::filename(CovData->object_file())
1164           << "</h2>\n";
1165      }
1166      OS << "<a name=\"module_" << anchorName(CovData->object_file())
1167         << "\"></a>\n";
1168      CovData->printReport(OS);
1169    }
1170
1171    // About
1172    OS << "<details><summary>About</summary>\n";
1173    OS << "Coverage files:<ul>";
1174    for (const auto &InputFile : CoverageFiles) {
1175      llvm::sys::fs::file_status Status;
1176      llvm::sys::fs::status(InputFile, Status);
1177      OS << "<li>" << stripPathPrefix(InputFile) << " ("
1178         << Status.getLastModificationTime().str() << ")</li>\n";
1179    }
1180    OS << "</ul></details>\n";
1181
1182    OS << "</body>\n";
1183    OS << "</html>\n";
1184  }
1185
1186  bool empty() const { return Coverage.empty(); }
1187
1188private:
1189  explicit CoverageDataSet(
1190      const std::string &MainObjFile,
1191      std::vector<std::unique_ptr<CoverageDataWithObjectFile>> *Data,
1192      const std::set<std::string> &CoverageFiles)
1193      : MainObjFile(MainObjFile), CoverageFiles(CoverageFiles) {
1194    Data->swap(this->Coverage);
1195  }
1196
1197  const std::string MainObjFile;
1198  std::vector<std::unique_ptr<CoverageDataWithObjectFile>> Coverage;
1199  const std::set<std::string> CoverageFiles;
1200};
1201
1202} // namespace
1203
1204int main(int argc, char **argv) {
1205  // Print stack trace if we signal out.
1206  sys::PrintStackTraceOnErrorSignal(argv[0]);
1207  PrettyStackTraceProgram X(argc, argv);
1208  llvm_shutdown_obj Y; // Call llvm_shutdown() on exit.
1209
1210  llvm::InitializeAllTargetInfos();
1211  llvm::InitializeAllTargetMCs();
1212  llvm::InitializeAllDisassemblers();
1213
1214  cl::ParseCommandLineOptions(argc, argv, "Sanitizer Coverage Processing Tool");
1215
1216  // -print doesn't need object files.
1217  if (Action == PrintAction) {
1218    auto CovData = CoverageData::readAndMerge(ClInputFiles);
1219    FailIfError(CovData);
1220    CovData.get()->printAddrs(outs());
1221    return 0;
1222  } else if (Action == PrintCovPointsAction) {
1223    // -print-coverage-points doesn't need coverage files.
1224    for (const std::string &ObjFile : ClInputFiles) {
1225      printCovPoints(ObjFile, outs());
1226    }
1227    return 0;
1228  }
1229
1230  auto CovDataSet = CoverageDataSet::readCmdArguments(ClInputFiles);
1231  FailIfError(CovDataSet);
1232
1233  if (CovDataSet.get()->empty()) {
1234    Fail("No coverage files specified.");
1235  }
1236
1237  switch (Action) {
1238  case CoveredFunctionsAction: {
1239    CovDataSet.get()->printCoveredFunctions(outs());
1240    return 0;
1241  }
1242  case NotCoveredFunctionsAction: {
1243    CovDataSet.get()->printNotCoveredFunctions(outs());
1244    return 0;
1245  }
1246  case HtmlReportAction: {
1247    CovDataSet.get()->printReport(outs());
1248    return 0;
1249  }
1250  case StatsAction: {
1251    CovDataSet.get()->printStats(outs());
1252    return 0;
1253  }
1254  case PrintAction:
1255  case PrintCovPointsAction:
1256    llvm_unreachable("unsupported action");
1257  }
1258}
1259