InstrProfReader.cpp revision dce4a407a24b04eebc6a376f8e62b41aaa7b071f
1//=-- InstrProfReader.cpp - Instrumented profiling reader -------------------=//
2//
3//                     The LLVM Compiler Infrastructure
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9//
10// This file contains support for reading profiling data for clang's
11// instrumentation based PGO and coverage.
12//
13//===----------------------------------------------------------------------===//
14
15#include "llvm/ProfileData/InstrProfReader.h"
16#include "llvm/ProfileData/InstrProf.h"
17
18#include "InstrProfIndexed.h"
19
20#include <cassert>
21
22using namespace llvm;
23
24static error_code setupMemoryBuffer(std::string Path,
25                                    std::unique_ptr<MemoryBuffer> &Buffer) {
26  if (error_code EC = MemoryBuffer::getFileOrSTDIN(Path, Buffer))
27    return EC;
28
29  // Sanity check the file.
30  if (Buffer->getBufferSize() > std::numeric_limits<unsigned>::max())
31    return instrprof_error::too_large;
32  return instrprof_error::success;
33}
34
35static error_code initializeReader(InstrProfReader &Reader) {
36  return Reader.readHeader();
37}
38
39error_code InstrProfReader::create(std::string Path,
40                                   std::unique_ptr<InstrProfReader> &Result) {
41  // Set up the buffer to read.
42  std::unique_ptr<MemoryBuffer> Buffer;
43  if (error_code EC = setupMemoryBuffer(Path, Buffer))
44    return EC;
45
46  // Create the reader.
47  if (IndexedInstrProfReader::hasFormat(*Buffer))
48    Result.reset(new IndexedInstrProfReader(std::move(Buffer)));
49  else if (RawInstrProfReader64::hasFormat(*Buffer))
50    Result.reset(new RawInstrProfReader64(std::move(Buffer)));
51  else if (RawInstrProfReader32::hasFormat(*Buffer))
52    Result.reset(new RawInstrProfReader32(std::move(Buffer)));
53  else
54    Result.reset(new TextInstrProfReader(std::move(Buffer)));
55
56  // Initialize the reader and return the result.
57  return initializeReader(*Result);
58}
59
60error_code IndexedInstrProfReader::create(
61    std::string Path, std::unique_ptr<IndexedInstrProfReader> &Result) {
62  // Set up the buffer to read.
63  std::unique_ptr<MemoryBuffer> Buffer;
64  if (error_code EC = setupMemoryBuffer(Path, Buffer))
65    return EC;
66
67  // Create the reader.
68  if (!IndexedInstrProfReader::hasFormat(*Buffer))
69    return instrprof_error::bad_magic;
70  Result.reset(new IndexedInstrProfReader(std::move(Buffer)));
71
72  // Initialize the reader and return the result.
73  return initializeReader(*Result);
74}
75
76void InstrProfIterator::Increment() {
77  if (Reader->readNextRecord(Record))
78    *this = InstrProfIterator();
79}
80
81error_code TextInstrProfReader::readNextRecord(InstrProfRecord &Record) {
82  // Skip empty lines.
83  while (!Line.is_at_end() && Line->empty())
84    ++Line;
85  // If we hit EOF while looking for a name, we're done.
86  if (Line.is_at_end())
87    return error(instrprof_error::eof);
88
89  // Read the function name.
90  Record.Name = *Line++;
91
92  // Read the function hash.
93  if (Line.is_at_end())
94    return error(instrprof_error::truncated);
95  if ((Line++)->getAsInteger(10, Record.Hash))
96    return error(instrprof_error::malformed);
97
98  // Read the number of counters.
99  uint64_t NumCounters;
100  if (Line.is_at_end())
101    return error(instrprof_error::truncated);
102  if ((Line++)->getAsInteger(10, NumCounters))
103    return error(instrprof_error::malformed);
104  if (NumCounters == 0)
105    return error(instrprof_error::malformed);
106
107  // Read each counter and fill our internal storage with the values.
108  Counts.clear();
109  Counts.reserve(NumCounters);
110  for (uint64_t I = 0; I < NumCounters; ++I) {
111    if (Line.is_at_end())
112      return error(instrprof_error::truncated);
113    uint64_t Count;
114    if ((Line++)->getAsInteger(10, Count))
115      return error(instrprof_error::malformed);
116    Counts.push_back(Count);
117  }
118  // Give the record a reference to our internal counter storage.
119  Record.Counts = Counts;
120
121  return success();
122}
123
124template <class IntPtrT>
125static uint64_t getRawMagic();
126
127template <>
128uint64_t getRawMagic<uint64_t>() {
129  return
130    uint64_t(255) << 56 |
131    uint64_t('l') << 48 |
132    uint64_t('p') << 40 |
133    uint64_t('r') << 32 |
134    uint64_t('o') << 24 |
135    uint64_t('f') << 16 |
136    uint64_t('r') <<  8 |
137    uint64_t(129);
138}
139
140template <>
141uint64_t getRawMagic<uint32_t>() {
142  return
143    uint64_t(255) << 56 |
144    uint64_t('l') << 48 |
145    uint64_t('p') << 40 |
146    uint64_t('r') << 32 |
147    uint64_t('o') << 24 |
148    uint64_t('f') << 16 |
149    uint64_t('R') <<  8 |
150    uint64_t(129);
151}
152
153template <class IntPtrT>
154bool RawInstrProfReader<IntPtrT>::hasFormat(const MemoryBuffer &DataBuffer) {
155  if (DataBuffer.getBufferSize() < sizeof(uint64_t))
156    return false;
157  uint64_t Magic =
158    *reinterpret_cast<const uint64_t *>(DataBuffer.getBufferStart());
159  return getRawMagic<IntPtrT>() == Magic ||
160    sys::SwapByteOrder(getRawMagic<IntPtrT>()) == Magic;
161}
162
163template <class IntPtrT>
164error_code RawInstrProfReader<IntPtrT>::readHeader() {
165  if (!hasFormat(*DataBuffer))
166    return error(instrprof_error::bad_magic);
167  if (DataBuffer->getBufferSize() < sizeof(RawHeader))
168    return error(instrprof_error::bad_header);
169  auto *Header =
170    reinterpret_cast<const RawHeader *>(DataBuffer->getBufferStart());
171  ShouldSwapBytes = Header->Magic != getRawMagic<IntPtrT>();
172  return readHeader(*Header);
173}
174
175template <class IntPtrT>
176error_code RawInstrProfReader<IntPtrT>::readNextHeader(const char *CurrentPos) {
177  const char *End = DataBuffer->getBufferEnd();
178  // Skip zero padding between profiles.
179  while (CurrentPos != End && *CurrentPos == 0)
180    ++CurrentPos;
181  // If there's nothing left, we're done.
182  if (CurrentPos == End)
183    return instrprof_error::eof;
184  // If there isn't enough space for another header, this is probably just
185  // garbage at the end of the file.
186  if (CurrentPos + sizeof(RawHeader) > End)
187    return instrprof_error::malformed;
188  // The magic should have the same byte order as in the previous header.
189  uint64_t Magic = *reinterpret_cast<const uint64_t *>(CurrentPos);
190  if (Magic != swap(getRawMagic<IntPtrT>()))
191    return instrprof_error::bad_magic;
192
193  // There's another profile to read, so we need to process the header.
194  auto *Header = reinterpret_cast<const RawHeader *>(CurrentPos);
195  return readHeader(*Header);
196}
197
198static uint64_t getRawVersion() {
199  return 1;
200}
201
202template <class IntPtrT>
203error_code RawInstrProfReader<IntPtrT>::readHeader(const RawHeader &Header) {
204  if (swap(Header.Version) != getRawVersion())
205    return error(instrprof_error::unsupported_version);
206
207  CountersDelta = swap(Header.CountersDelta);
208  NamesDelta = swap(Header.NamesDelta);
209  auto DataSize = swap(Header.DataSize);
210  auto CountersSize = swap(Header.CountersSize);
211  auto NamesSize = swap(Header.NamesSize);
212
213  ptrdiff_t DataOffset = sizeof(RawHeader);
214  ptrdiff_t CountersOffset = DataOffset + sizeof(ProfileData) * DataSize;
215  ptrdiff_t NamesOffset = CountersOffset + sizeof(uint64_t) * CountersSize;
216  size_t ProfileSize = NamesOffset + sizeof(char) * NamesSize;
217
218  auto *Start = reinterpret_cast<const char *>(&Header);
219  if (Start + ProfileSize > DataBuffer->getBufferEnd())
220    return error(instrprof_error::bad_header);
221
222  Data = reinterpret_cast<const ProfileData *>(Start + DataOffset);
223  DataEnd = Data + DataSize;
224  CountersStart = reinterpret_cast<const uint64_t *>(Start + CountersOffset);
225  NamesStart = Start + NamesOffset;
226  ProfileEnd = Start + ProfileSize;
227
228  return success();
229}
230
231template <class IntPtrT>
232error_code
233RawInstrProfReader<IntPtrT>::readNextRecord(InstrProfRecord &Record) {
234  if (Data == DataEnd)
235    if (error_code EC = readNextHeader(ProfileEnd))
236      return EC;
237
238  // Get the raw data.
239  StringRef RawName(getName(Data->NamePtr), swap(Data->NameSize));
240  uint32_t NumCounters = swap(Data->NumCounters);
241  if (NumCounters == 0)
242    return error(instrprof_error::malformed);
243  auto RawCounts = makeArrayRef(getCounter(Data->CounterPtr), NumCounters);
244
245  // Check bounds.
246  auto *NamesStartAsCounter = reinterpret_cast<const uint64_t *>(NamesStart);
247  if (RawName.data() < NamesStart ||
248      RawName.data() + RawName.size() > DataBuffer->getBufferEnd() ||
249      RawCounts.data() < CountersStart ||
250      RawCounts.data() + RawCounts.size() > NamesStartAsCounter)
251    return error(instrprof_error::malformed);
252
253  // Store the data in Record, byte-swapping as necessary.
254  Record.Hash = swap(Data->FuncHash);
255  Record.Name = RawName;
256  if (ShouldSwapBytes) {
257    Counts.clear();
258    Counts.reserve(RawCounts.size());
259    for (uint64_t Count : RawCounts)
260      Counts.push_back(swap(Count));
261    Record.Counts = Counts;
262  } else
263    Record.Counts = RawCounts;
264
265  // Iterate.
266  ++Data;
267  return success();
268}
269
270namespace llvm {
271template class RawInstrProfReader<uint32_t>;
272template class RawInstrProfReader<uint64_t>;
273}
274
275InstrProfLookupTrait::hash_value_type
276InstrProfLookupTrait::ComputeHash(StringRef K) {
277  return IndexedInstrProf::ComputeHash(HashType, K);
278}
279
280bool IndexedInstrProfReader::hasFormat(const MemoryBuffer &DataBuffer) {
281  if (DataBuffer.getBufferSize() < 8)
282    return false;
283  using namespace support;
284  uint64_t Magic =
285      endian::read<uint64_t, little, aligned>(DataBuffer.getBufferStart());
286  return Magic == IndexedInstrProf::Magic;
287}
288
289error_code IndexedInstrProfReader::readHeader() {
290  const unsigned char *Start =
291      (const unsigned char *)DataBuffer->getBufferStart();
292  const unsigned char *Cur = Start;
293  if ((const unsigned char *)DataBuffer->getBufferEnd() - Cur < 24)
294    return error(instrprof_error::truncated);
295
296  using namespace support;
297
298  // Check the magic number.
299  uint64_t Magic = endian::readNext<uint64_t, little, unaligned>(Cur);
300  if (Magic != IndexedInstrProf::Magic)
301    return error(instrprof_error::bad_magic);
302
303  // Read the version.
304  uint64_t Version = endian::readNext<uint64_t, little, unaligned>(Cur);
305  if (Version != IndexedInstrProf::Version)
306    return error(instrprof_error::unsupported_version);
307
308  // Read the maximal function count.
309  MaxFunctionCount = endian::readNext<uint64_t, little, unaligned>(Cur);
310
311  // Read the hash type and start offset.
312  IndexedInstrProf::HashT HashType = static_cast<IndexedInstrProf::HashT>(
313      endian::readNext<uint64_t, little, unaligned>(Cur));
314  if (HashType > IndexedInstrProf::HashT::Last)
315    return error(instrprof_error::unsupported_hash_type);
316  uint64_t HashOffset = endian::readNext<uint64_t, little, unaligned>(Cur);
317
318  // The rest of the file is an on disk hash table.
319  Index.reset(InstrProfReaderIndex::Create(Start + HashOffset, Cur, Start,
320                                           InstrProfLookupTrait(HashType)));
321  // Set up our iterator for readNextRecord.
322  RecordIterator = Index->data_begin();
323
324  return success();
325}
326
327error_code IndexedInstrProfReader::getFunctionCounts(
328    StringRef FuncName, uint64_t &FuncHash, std::vector<uint64_t> &Counts) {
329  const auto &Iter = Index->find(FuncName);
330  if (Iter == Index->end())
331    return error(instrprof_error::unknown_function);
332
333  // Found it. Make sure it's valid before giving back a result.
334  const InstrProfRecord &Record = *Iter;
335  if (Record.Name.empty())
336    return error(instrprof_error::malformed);
337  FuncHash = Record.Hash;
338  Counts = Record.Counts;
339  return success();
340}
341
342error_code IndexedInstrProfReader::readNextRecord(InstrProfRecord &Record) {
343  // Are we out of records?
344  if (RecordIterator == Index->data_end())
345    return error(instrprof_error::eof);
346
347  // Read the next one.
348  Record = *RecordIterator;
349  ++RecordIterator;
350  if (Record.Name.empty())
351    return error(instrprof_error::malformed);
352  return success();
353}
354