1//===- InstrProfReader.h - Instrumented profiling readers -------*- C++ -*-===//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains support for reading profiling data for instrumentation
11// based PGO and coverage.
12//
13//===----------------------------------------------------------------------===//
14
15#ifndef LLVM_PROFILEDATA_INSTRPROFREADER_H
16#define LLVM_PROFILEDATA_INSTRPROFREADER_H
17
18#include "llvm/ADT/ArrayRef.h"
19#include "llvm/ADT/StringRef.h"
20#include "llvm/IR/ProfileSummary.h"
21#include "llvm/ProfileData/InstrProf.h"
22#include "llvm/Support/Endian.h"
23#include "llvm/Support/Error.h"
24#include "llvm/Support/LineIterator.h"
25#include "llvm/Support/MemoryBuffer.h"
26#include "llvm/Support/OnDiskHashTable.h"
27#include "llvm/Support/SwapByteOrder.h"
28#include <algorithm>
29#include <cassert>
30#include <cstddef>
31#include <cstdint>
32#include <iterator>
33#include <memory>
34#include <utility>
35#include <vector>
36
37namespace llvm {
38
39class InstrProfReader;
40
41/// A file format agnostic iterator over profiling data.
42class InstrProfIterator : public std::iterator<std::input_iterator_tag,
43                                               NamedInstrProfRecord> {
44  InstrProfReader *Reader = nullptr;
45  value_type Record;
46
47  void Increment();
48
49public:
50  InstrProfIterator() = default;
51  InstrProfIterator(InstrProfReader *Reader) : Reader(Reader) { Increment(); }
52
53  InstrProfIterator &operator++() { Increment(); return *this; }
54  bool operator==(const InstrProfIterator &RHS) { return Reader == RHS.Reader; }
55  bool operator!=(const InstrProfIterator &RHS) { return Reader != RHS.Reader; }
56  value_type &operator*() { return Record; }
57  value_type *operator->() { return &Record; }
58};
59
60/// Base class and interface for reading profiling data of any known instrprof
61/// format. Provides an iterator over NamedInstrProfRecords.
62class InstrProfReader {
63  instrprof_error LastError = instrprof_error::success;
64
65public:
66  InstrProfReader() = default;
67  virtual ~InstrProfReader() = default;
68
69  /// Read the header.  Required before reading first record.
70  virtual Error readHeader() = 0;
71
72  /// Read a single record.
73  virtual Error readNextRecord(NamedInstrProfRecord &Record) = 0;
74
75  /// Iterator over profile data.
76  InstrProfIterator begin() { return InstrProfIterator(this); }
77  InstrProfIterator end() { return InstrProfIterator(); }
78
79  virtual bool isIRLevelProfile() const = 0;
80
81  /// Return the PGO symtab. There are three different readers:
82  /// Raw, Text, and Indexed profile readers. The first two types
83  /// of readers are used only by llvm-profdata tool, while the indexed
84  /// profile reader is also used by llvm-cov tool and the compiler (
85  /// backend or frontend). Since creating PGO symtab can create
86  /// significant runtime and memory overhead (as it touches data
87  /// for the whole program), InstrProfSymtab for the indexed profile
88  /// reader should be created on demand and it is recommended to be
89  /// only used for dumping purpose with llvm-proftool, not with the
90  /// compiler.
91  virtual InstrProfSymtab &getSymtab() = 0;
92
93protected:
94  std::unique_ptr<InstrProfSymtab> Symtab;
95
96  /// Set the current error and return same.
97  Error error(instrprof_error Err) {
98    LastError = Err;
99    if (Err == instrprof_error::success)
100      return Error::success();
101    return make_error<InstrProfError>(Err);
102  }
103
104  Error error(Error E) { return error(InstrProfError::take(std::move(E))); }
105
106  /// Clear the current error and return a successful one.
107  Error success() { return error(instrprof_error::success); }
108
109public:
110  /// Return true if the reader has finished reading the profile data.
111  bool isEOF() { return LastError == instrprof_error::eof; }
112
113  /// Return true if the reader encountered an error reading profiling data.
114  bool hasError() { return LastError != instrprof_error::success && !isEOF(); }
115
116  /// Get the current error.
117  Error getError() {
118    if (hasError())
119      return make_error<InstrProfError>(LastError);
120    return Error::success();
121  }
122
123  /// Factory method to create an appropriately typed reader for the given
124  /// instrprof file.
125  static Expected<std::unique_ptr<InstrProfReader>> create(const Twine &Path);
126
127  static Expected<std::unique_ptr<InstrProfReader>>
128  create(std::unique_ptr<MemoryBuffer> Buffer);
129};
130
131/// Reader for the simple text based instrprof format.
132///
133/// This format is a simple text format that's suitable for test data. Records
134/// are separated by one or more blank lines, and record fields are separated by
135/// new lines.
136///
137/// Each record consists of a function name, a function hash, a number of
138/// counters, and then each counter value, in that order.
139class TextInstrProfReader : public InstrProfReader {
140private:
141  /// The profile data file contents.
142  std::unique_ptr<MemoryBuffer> DataBuffer;
143  /// Iterator over the profile data.
144  line_iterator Line;
145  bool IsIRLevelProfile = false;
146
147  Error readValueProfileData(InstrProfRecord &Record);
148
149public:
150  TextInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer_)
151      : DataBuffer(std::move(DataBuffer_)), Line(*DataBuffer, true, '#') {}
152  TextInstrProfReader(const TextInstrProfReader &) = delete;
153  TextInstrProfReader &operator=(const TextInstrProfReader &) = delete;
154
155  /// Return true if the given buffer is in text instrprof format.
156  static bool hasFormat(const MemoryBuffer &Buffer);
157
158  bool isIRLevelProfile() const override { return IsIRLevelProfile; }
159
160  /// Read the header.
161  Error readHeader() override;
162
163  /// Read a single record.
164  Error readNextRecord(NamedInstrProfRecord &Record) override;
165
166  InstrProfSymtab &getSymtab() override {
167    assert(Symtab.get());
168    return *Symtab.get();
169  }
170};
171
172/// Reader for the raw instrprof binary format from runtime.
173///
174/// This format is a raw memory dump of the instrumentation-baed profiling data
175/// from the runtime.  It has no index.
176///
177/// Templated on the unsigned type whose size matches pointers on the platform
178/// that wrote the profile.
179template <class IntPtrT>
180class RawInstrProfReader : public InstrProfReader {
181private:
182  /// The profile data file contents.
183  std::unique_ptr<MemoryBuffer> DataBuffer;
184  bool ShouldSwapBytes;
185  // The value of the version field of the raw profile data header. The lower 56
186  // bits specifies the format version and the most significant 8 bits specify
187  // the variant types of the profile.
188  uint64_t Version;
189  uint64_t CountersDelta;
190  uint64_t NamesDelta;
191  const RawInstrProf::ProfileData<IntPtrT> *Data;
192  const RawInstrProf::ProfileData<IntPtrT> *DataEnd;
193  const uint64_t *CountersStart;
194  const char *NamesStart;
195  uint64_t NamesSize;
196  // After value profile is all read, this pointer points to
197  // the header of next profile data (if exists)
198  const uint8_t *ValueDataStart;
199  uint32_t ValueKindLast;
200  uint32_t CurValueDataSize;
201
202  InstrProfRecord::ValueMapType FunctionPtrToNameMap;
203
204public:
205  RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
206      : DataBuffer(std::move(DataBuffer)) {}
207  RawInstrProfReader(const RawInstrProfReader &) = delete;
208  RawInstrProfReader &operator=(const RawInstrProfReader &) = delete;
209
210  static bool hasFormat(const MemoryBuffer &DataBuffer);
211  Error readHeader() override;
212  Error readNextRecord(NamedInstrProfRecord &Record) override;
213
214  bool isIRLevelProfile() const override {
215    return (Version & VARIANT_MASK_IR_PROF) != 0;
216  }
217
218  InstrProfSymtab &getSymtab() override {
219    assert(Symtab.get());
220    return *Symtab.get();
221  }
222
223private:
224  Error createSymtab(InstrProfSymtab &Symtab);
225  Error readNextHeader(const char *CurrentPos);
226  Error readHeader(const RawInstrProf::Header &Header);
227
228  template <class IntT> IntT swap(IntT Int) const {
229    return ShouldSwapBytes ? sys::getSwappedBytes(Int) : Int;
230  }
231
232  support::endianness getDataEndianness() const {
233    support::endianness HostEndian = getHostEndianness();
234    if (!ShouldSwapBytes)
235      return HostEndian;
236    if (HostEndian == support::little)
237      return support::big;
238    else
239      return support::little;
240  }
241
242  inline uint8_t getNumPaddingBytes(uint64_t SizeInBytes) {
243    return 7 & (sizeof(uint64_t) - SizeInBytes % sizeof(uint64_t));
244  }
245
246  Error readName(NamedInstrProfRecord &Record);
247  Error readFuncHash(NamedInstrProfRecord &Record);
248  Error readRawCounts(InstrProfRecord &Record);
249  Error readValueProfilingData(InstrProfRecord &Record);
250  bool atEnd() const { return Data == DataEnd; }
251
252  void advanceData() {
253    Data++;
254    ValueDataStart += CurValueDataSize;
255  }
256
257  const char *getNextHeaderPos() const {
258      assert(atEnd());
259      return (const char *)ValueDataStart;
260  }
261
262  const uint64_t *getCounter(IntPtrT CounterPtr) const {
263    ptrdiff_t Offset = (swap(CounterPtr) - CountersDelta) / sizeof(uint64_t);
264    return CountersStart + Offset;
265  }
266
267  StringRef getName(uint64_t NameRef) const {
268    return Symtab->getFuncName(swap(NameRef));
269  }
270};
271
272using RawInstrProfReader32 = RawInstrProfReader<uint32_t>;
273using RawInstrProfReader64 = RawInstrProfReader<uint64_t>;
274
275namespace IndexedInstrProf {
276
277enum class HashT : uint32_t;
278
279} // end namespace IndexedInstrProf
280
281/// Trait for lookups into the on-disk hash table for the binary instrprof
282/// format.
283class InstrProfLookupTrait {
284  std::vector<NamedInstrProfRecord> DataBuffer;
285  IndexedInstrProf::HashT HashType;
286  unsigned FormatVersion;
287  // Endianness of the input value profile data.
288  // It should be LE by default, but can be changed
289  // for testing purpose.
290  support::endianness ValueProfDataEndianness = support::little;
291
292public:
293  InstrProfLookupTrait(IndexedInstrProf::HashT HashType, unsigned FormatVersion)
294      : HashType(HashType), FormatVersion(FormatVersion) {}
295
296  using data_type = ArrayRef<NamedInstrProfRecord>;
297
298  using internal_key_type = StringRef;
299  using external_key_type = StringRef;
300  using hash_value_type = uint64_t;
301  using offset_type = uint64_t;
302
303  static bool EqualKey(StringRef A, StringRef B) { return A == B; }
304  static StringRef GetInternalKey(StringRef K) { return K; }
305  static StringRef GetExternalKey(StringRef K) { return K; }
306
307  hash_value_type ComputeHash(StringRef K);
308
309  static std::pair<offset_type, offset_type>
310  ReadKeyDataLength(const unsigned char *&D) {
311    using namespace support;
312
313    offset_type KeyLen = endian::readNext<offset_type, little, unaligned>(D);
314    offset_type DataLen = endian::readNext<offset_type, little, unaligned>(D);
315    return std::make_pair(KeyLen, DataLen);
316  }
317
318  StringRef ReadKey(const unsigned char *D, offset_type N) {
319    return StringRef((const char *)D, N);
320  }
321
322  bool readValueProfilingData(const unsigned char *&D,
323                              const unsigned char *const End);
324  data_type ReadData(StringRef K, const unsigned char *D, offset_type N);
325
326  // Used for testing purpose only.
327  void setValueProfDataEndianness(support::endianness Endianness) {
328    ValueProfDataEndianness = Endianness;
329  }
330};
331
332struct InstrProfReaderIndexBase {
333  virtual ~InstrProfReaderIndexBase() = default;
334
335  // Read all the profile records with the same key pointed to the current
336  // iterator.
337  virtual Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) = 0;
338
339  // Read all the profile records with the key equal to FuncName
340  virtual Error getRecords(StringRef FuncName,
341                                     ArrayRef<NamedInstrProfRecord> &Data) = 0;
342  virtual void advanceToNextKey() = 0;
343  virtual bool atEnd() const = 0;
344  virtual void setValueProfDataEndianness(support::endianness Endianness) = 0;
345  virtual uint64_t getVersion() const = 0;
346  virtual bool isIRLevelProfile() const = 0;
347  virtual Error populateSymtab(InstrProfSymtab &) = 0;
348};
349
350using OnDiskHashTableImplV3 =
351    OnDiskIterableChainedHashTable<InstrProfLookupTrait>;
352
353template <typename HashTableImpl>
354class InstrProfReaderIndex : public InstrProfReaderIndexBase {
355private:
356  std::unique_ptr<HashTableImpl> HashTable;
357  typename HashTableImpl::data_iterator RecordIterator;
358  uint64_t FormatVersion;
359
360public:
361  InstrProfReaderIndex(const unsigned char *Buckets,
362                       const unsigned char *const Payload,
363                       const unsigned char *const Base,
364                       IndexedInstrProf::HashT HashType, uint64_t Version);
365  ~InstrProfReaderIndex() override = default;
366
367  Error getRecords(ArrayRef<NamedInstrProfRecord> &Data) override;
368  Error getRecords(StringRef FuncName,
369                   ArrayRef<NamedInstrProfRecord> &Data) override;
370  void advanceToNextKey() override { RecordIterator++; }
371
372  bool atEnd() const override {
373    return RecordIterator == HashTable->data_end();
374  }
375
376  void setValueProfDataEndianness(support::endianness Endianness) override {
377    HashTable->getInfoObj().setValueProfDataEndianness(Endianness);
378  }
379
380  uint64_t getVersion() const override { return GET_VERSION(FormatVersion); }
381
382  bool isIRLevelProfile() const override {
383    return (FormatVersion & VARIANT_MASK_IR_PROF) != 0;
384  }
385
386  Error populateSymtab(InstrProfSymtab &Symtab) override {
387    return Symtab.create(HashTable->keys());
388  }
389};
390
391/// Reader for the indexed binary instrprof format.
392class IndexedInstrProfReader : public InstrProfReader {
393private:
394  /// The profile data file contents.
395  std::unique_ptr<MemoryBuffer> DataBuffer;
396  /// The index into the profile data.
397  std::unique_ptr<InstrProfReaderIndexBase> Index;
398  /// Profile summary data.
399  std::unique_ptr<ProfileSummary> Summary;
400  // Index to the current record in the record array.
401  unsigned RecordIndex;
402
403  // Read the profile summary. Return a pointer pointing to one byte past the
404  // end of the summary data if it exists or the input \c Cur.
405  const unsigned char *readSummary(IndexedInstrProf::ProfVersion Version,
406                                   const unsigned char *Cur);
407
408public:
409  IndexedInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer)
410      : DataBuffer(std::move(DataBuffer)), RecordIndex(0) {}
411  IndexedInstrProfReader(const IndexedInstrProfReader &) = delete;
412  IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete;
413
414  /// Return the profile version.
415  uint64_t getVersion() const { return Index->getVersion(); }
416  bool isIRLevelProfile() const override { return Index->isIRLevelProfile(); }
417
418  /// Return true if the given buffer is in an indexed instrprof format.
419  static bool hasFormat(const MemoryBuffer &DataBuffer);
420
421  /// Read the file header.
422  Error readHeader() override;
423  /// Read a single record.
424  Error readNextRecord(NamedInstrProfRecord &Record) override;
425
426  /// Return the NamedInstrProfRecord associated with FuncName and FuncHash
427  Expected<InstrProfRecord> getInstrProfRecord(StringRef FuncName,
428                                               uint64_t FuncHash);
429
430  /// Fill Counts with the profile data for the given function name.
431  Error getFunctionCounts(StringRef FuncName, uint64_t FuncHash,
432                          std::vector<uint64_t> &Counts);
433
434  /// Return the maximum of all known function counts.
435  uint64_t getMaximumFunctionCount() { return Summary->getMaxFunctionCount(); }
436
437  /// Factory method to create an indexed reader.
438  static Expected<std::unique_ptr<IndexedInstrProfReader>>
439  create(const Twine &Path);
440
441  static Expected<std::unique_ptr<IndexedInstrProfReader>>
442  create(std::unique_ptr<MemoryBuffer> Buffer);
443
444  // Used for testing purpose only.
445  void setValueProfDataEndianness(support::endianness Endianness) {
446    Index->setValueProfDataEndianness(Endianness);
447  }
448
449  // See description in the base class. This interface is designed
450  // to be used by llvm-profdata (for dumping). Avoid using this when
451  // the client is the compiler.
452  InstrProfSymtab &getSymtab() override;
453  ProfileSummary &getSummary() { return *(Summary.get()); }
454};
455
456} // end namespace llvm
457
458#endif // LLVM_PROFILEDATA_INSTRPROFREADER_H
459