1//=-- InstrProf.h - Instrumented profiling format support ---------*- C++ -*-=// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// Instrumentation-based profiling data is generated by instrumented 11// binaries through library functions in compiler-rt, and read by the clang 12// frontend to feed PGO. 13// 14//===----------------------------------------------------------------------===// 15 16#ifndef LLVM_PROFILEDATA_INSTRPROF_H_ 17#define LLVM_PROFILEDATA_INSTRPROF_H_ 18 19#include "llvm/ADT/STLExtras.h" 20#include "llvm/ADT/StringRef.h" 21#include "llvm/ADT/StringSet.h" 22#include "llvm/IR/GlobalValue.h" 23#include "llvm/ProfileData/InstrProfData.inc" 24#include "llvm/Support/Endian.h" 25#include "llvm/Support/ErrorHandling.h" 26#include "llvm/Support/ErrorOr.h" 27#include "llvm/Support/MD5.h" 28#include <cstdint> 29#include <list> 30#include <system_error> 31#include <vector> 32 33#define INSTR_PROF_INDEX_VERSION 3 34namespace llvm { 35 36class Function; 37class GlobalVariable; 38class Module; 39 40/// Return the name of data section containing profile counter variables. 41inline StringRef getInstrProfCountersSectionName(bool AddSegment) { 42 return AddSegment ? "__DATA," INSTR_PROF_CNTS_SECT_NAME_STR 43 : INSTR_PROF_CNTS_SECT_NAME_STR; 44} 45 46/// Return the name of data section containing names of instrumented 47/// functions. 48inline StringRef getInstrProfNameSectionName(bool AddSegment) { 49 return AddSegment ? "__DATA," INSTR_PROF_NAME_SECT_NAME_STR 50 : INSTR_PROF_NAME_SECT_NAME_STR; 51} 52 53/// Return the name of the data section containing per-function control 54/// data. 55inline StringRef getInstrProfDataSectionName(bool AddSegment) { 56 return AddSegment ? "__DATA," INSTR_PROF_DATA_SECT_NAME_STR 57 : INSTR_PROF_DATA_SECT_NAME_STR; 58} 59 60/// Return the name profile runtime entry point to do value profiling 61/// for a given site. 62inline StringRef getInstrProfValueProfFuncName() { 63 return INSTR_PROF_VALUE_PROF_FUNC_STR; 64} 65 66/// Return the name of the section containing function coverage mapping 67/// data. 68inline StringRef getInstrProfCoverageSectionName(bool AddSegment) { 69 return AddSegment ? "__DATA,__llvm_covmap" : "__llvm_covmap"; 70} 71 72/// Return the name prefix of variables containing instrumented function names. 73inline StringRef getInstrProfNameVarPrefix() { return "__profn_"; } 74 75/// Return the name prefix of variables containing per-function control data. 76inline StringRef getInstrProfDataVarPrefix() { return "__profd_"; } 77 78/// Return the name prefix of profile counter variables. 79inline StringRef getInstrProfCountersVarPrefix() { return "__profc_"; } 80 81/// Return the name prefix of the COMDAT group for instrumentation variables 82/// associated with a COMDAT function. 83inline StringRef getInstrProfComdatPrefix() { return "__profv_"; } 84 85/// Return the name of a covarage mapping variable (internal linkage) 86/// for each instrumented source module. Such variables are allocated 87/// in the __llvm_covmap section. 88inline StringRef getCoverageMappingVarName() { 89 return "__llvm_coverage_mapping"; 90} 91 92/// Return the name of function that registers all the per-function control 93/// data at program startup time by calling __llvm_register_function. This 94/// function has internal linkage and is called by __llvm_profile_init 95/// runtime method. This function is not generated for these platforms: 96/// Darwin, Linux, and FreeBSD. 97inline StringRef getInstrProfRegFuncsName() { 98 return "__llvm_profile_register_functions"; 99} 100 101/// Return the name of the runtime interface that registers per-function control 102/// data for one instrumented function. 103inline StringRef getInstrProfRegFuncName() { 104 return "__llvm_profile_register_function"; 105} 106 107/// Return the name of the runtime initialization method that is generated by 108/// the compiler. The function calls __llvm_profile_register_functions and 109/// __llvm_profile_override_default_filename functions if needed. This function 110/// has internal linkage and invoked at startup time via init_array. 111inline StringRef getInstrProfInitFuncName() { return "__llvm_profile_init"; } 112 113/// Return the name of the hook variable defined in profile runtime library. 114/// A reference to the variable causes the linker to link in the runtime 115/// initialization module (which defines the hook variable). 116inline StringRef getInstrProfRuntimeHookVarName() { 117 return "__llvm_profile_runtime"; 118} 119 120/// Return the name of the compiler generated function that references the 121/// runtime hook variable. The function is a weak global. 122inline StringRef getInstrProfRuntimeHookVarUseFuncName() { 123 return "__llvm_profile_runtime_user"; 124} 125 126/// Return the name of the profile runtime interface that overrides the default 127/// profile data file name. 128inline StringRef getInstrProfFileOverriderFuncName() { 129 return "__llvm_profile_override_default_filename"; 130} 131 132/// Return the modified name for function \c F suitable to be 133/// used the key for profile lookup. 134std::string getPGOFuncName(const Function &F, 135 uint64_t Version = INSTR_PROF_INDEX_VERSION); 136 137/// Return the modified name for a function suitable to be 138/// used the key for profile lookup. The function's original 139/// name is \c RawFuncName and has linkage of type \c Linkage. 140/// The function is defined in module \c FileName. 141std::string getPGOFuncName(StringRef RawFuncName, 142 GlobalValue::LinkageTypes Linkage, 143 StringRef FileName, 144 uint64_t Version = INSTR_PROF_INDEX_VERSION); 145 146/// Create and return the global variable for function name used in PGO 147/// instrumentation. \c FuncName is the name of the function returned 148/// by \c getPGOFuncName call. 149GlobalVariable *createPGOFuncNameVar(Function &F, StringRef FuncName); 150 151/// Create and return the global variable for function name used in PGO 152/// instrumentation. /// \c FuncName is the name of the function 153/// returned by \c getPGOFuncName call, \c M is the owning module, 154/// and \c Linkage is the linkage of the instrumented function. 155GlobalVariable *createPGOFuncNameVar(Module &M, 156 GlobalValue::LinkageTypes Linkage, 157 StringRef FuncName); 158 159/// Given a PGO function name, remove the filename prefix and return 160/// the original (static) function name. 161StringRef getFuncNameWithoutPrefix(StringRef PGOFuncName, StringRef FileName); 162 163const std::error_category &instrprof_category(); 164 165enum class instrprof_error { 166 success = 0, 167 eof, 168 unrecognized_format, 169 bad_magic, 170 bad_header, 171 unsupported_version, 172 unsupported_hash_type, 173 too_large, 174 truncated, 175 malformed, 176 unknown_function, 177 hash_mismatch, 178 count_mismatch, 179 counter_overflow, 180 value_site_count_mismatch 181}; 182 183inline std::error_code make_error_code(instrprof_error E) { 184 return std::error_code(static_cast<int>(E), instrprof_category()); 185} 186 187inline instrprof_error MergeResult(instrprof_error &Accumulator, 188 instrprof_error Result) { 189 // Prefer first error encountered as later errors may be secondary effects of 190 // the initial problem. 191 if (Accumulator == instrprof_error::success && 192 Result != instrprof_error::success) 193 Accumulator = Result; 194 return Accumulator; 195} 196 197enum InstrProfValueKind : uint32_t { 198#define VALUE_PROF_KIND(Enumerator, Value) Enumerator = Value, 199#include "llvm/ProfileData/InstrProfData.inc" 200}; 201 202namespace object { 203class SectionRef; 204} 205 206namespace IndexedInstrProf { 207uint64_t ComputeHash(StringRef K); 208} 209 210/// A symbol table used for function PGO name look-up with keys 211/// (such as pointers, md5hash values) to the function. A function's 212/// PGO name or name's md5hash are used in retrieving the profile 213/// data of the function. See \c getPGOFuncName() method for details 214/// on how PGO name is formed. 215class InstrProfSymtab { 216public: 217 typedef std::vector<std::pair<uint64_t, uint64_t>> AddrHashMap; 218 219private: 220 StringRef Data; 221 uint64_t Address; 222 // A map from MD5 hash keys to function name strings. 223 std::vector<std::pair<uint64_t, std::string>> HashNameMap; 224 // A map from function runtime address to function name MD5 hash. 225 // This map is only populated and used by raw instr profile reader. 226 AddrHashMap AddrToMD5Map; 227 228public: 229 InstrProfSymtab() : Data(), Address(0), HashNameMap(), AddrToMD5Map() {} 230 231 /// Create InstrProfSymtab from an object file section which 232 /// contains function PGO names that are uncompressed. 233 /// This interface is used by CoverageMappingReader. 234 std::error_code create(object::SectionRef &Section); 235 /// This interface is used by reader of CoverageMapping test 236 /// format. 237 inline std::error_code create(StringRef D, uint64_t BaseAddr); 238 /// Create InstrProfSymtab from a set of names iteratable from 239 /// \p IterRange. This interface is used by IndexedProfReader. 240 template <typename NameIterRange> void create(const NameIterRange &IterRange); 241 // If the symtab is created by a series of calls to \c addFuncName, \c 242 // finalizeSymtab needs to be called before looking up function names. 243 // This is required because the underlying map is a vector (for space 244 // efficiency) which needs to be sorted. 245 inline void finalizeSymtab(); 246 /// Update the symtab by adding \p FuncName to the table. This interface 247 /// is used by the raw and text profile readers. 248 void addFuncName(StringRef FuncName) { 249 HashNameMap.push_back(std::make_pair( 250 IndexedInstrProf::ComputeHash(FuncName), FuncName.str())); 251 } 252 /// Map a function address to its name's MD5 hash. This interface 253 /// is only used by the raw profiler reader. 254 void mapAddress(uint64_t Addr, uint64_t MD5Val) { 255 AddrToMD5Map.push_back(std::make_pair(Addr, MD5Val)); 256 } 257 AddrHashMap &getAddrHashMap() { return AddrToMD5Map; } 258 /// Return function's PGO name from the function name's symabol 259 /// address in the object file. If an error occurs, Return 260 /// an empty string. 261 StringRef getFuncName(uint64_t FuncNameAddress, size_t NameSize); 262 /// Return function's PGO name from the name's md5 hash value. 263 /// If not found, return an empty string. 264 inline StringRef getFuncName(uint64_t FuncMD5Hash); 265}; 266 267std::error_code InstrProfSymtab::create(StringRef D, uint64_t BaseAddr) { 268 Data = D; 269 Address = BaseAddr; 270 return std::error_code(); 271} 272 273template <typename NameIterRange> 274void InstrProfSymtab::create(const NameIterRange &IterRange) { 275 for (auto Name : IterRange) 276 HashNameMap.push_back( 277 std::make_pair(IndexedInstrProf::ComputeHash(Name), Name.str())); 278 finalizeSymtab(); 279} 280 281void InstrProfSymtab::finalizeSymtab() { 282 std::sort(HashNameMap.begin(), HashNameMap.end(), less_first()); 283 HashNameMap.erase(std::unique(HashNameMap.begin(), HashNameMap.end()), 284 HashNameMap.end()); 285 std::sort(AddrToMD5Map.begin(), AddrToMD5Map.end(), less_first()); 286 AddrToMD5Map.erase(std::unique(AddrToMD5Map.begin(), AddrToMD5Map.end()), 287 AddrToMD5Map.end()); 288} 289 290StringRef InstrProfSymtab::getFuncName(uint64_t FuncMD5Hash) { 291 auto Result = 292 std::lower_bound(HashNameMap.begin(), HashNameMap.end(), FuncMD5Hash, 293 [](const std::pair<uint64_t, std::string> &LHS, 294 uint64_t RHS) { return LHS.first < RHS; }); 295 if (Result != HashNameMap.end()) 296 return Result->second; 297 return StringRef(); 298} 299 300struct InstrProfValueSiteRecord { 301 /// Value profiling data pairs at a given value site. 302 std::list<InstrProfValueData> ValueData; 303 304 InstrProfValueSiteRecord() { ValueData.clear(); } 305 template <class InputIterator> 306 InstrProfValueSiteRecord(InputIterator F, InputIterator L) 307 : ValueData(F, L) {} 308 309 /// Sort ValueData ascending by Value 310 void sortByTargetValues() { 311 ValueData.sort( 312 [](const InstrProfValueData &left, const InstrProfValueData &right) { 313 return left.Value < right.Value; 314 }); 315 } 316 317 /// Merge data from another InstrProfValueSiteRecord 318 /// Optionally scale merged counts by \p Weight. 319 instrprof_error mergeValueData(InstrProfValueSiteRecord &Input, 320 uint64_t Weight = 1); 321}; 322 323/// Profiling information for a single function. 324struct InstrProfRecord { 325 InstrProfRecord() {} 326 InstrProfRecord(StringRef Name, uint64_t Hash, std::vector<uint64_t> Counts) 327 : Name(Name), Hash(Hash), Counts(std::move(Counts)) {} 328 StringRef Name; 329 uint64_t Hash; 330 std::vector<uint64_t> Counts; 331 332 typedef std::vector<std::pair<uint64_t, uint64_t>> ValueMapType; 333 334 /// Return the number of value profile kinds with non-zero number 335 /// of profile sites. 336 inline uint32_t getNumValueKinds() const; 337 /// Return the number of instrumented sites for ValueKind. 338 inline uint32_t getNumValueSites(uint32_t ValueKind) const; 339 /// Return the total number of ValueData for ValueKind. 340 inline uint32_t getNumValueData(uint32_t ValueKind) const; 341 /// Return the number of value data collected for ValueKind at profiling 342 /// site: Site. 343 inline uint32_t getNumValueDataForSite(uint32_t ValueKind, 344 uint32_t Site) const; 345 /// Return the array of profiled values at \p Site. 346 inline std::unique_ptr<InstrProfValueData[]> 347 getValueForSite(uint32_t ValueKind, uint32_t Site, 348 uint64_t (*ValueMapper)(uint32_t, uint64_t) = 0) const; 349 inline void 350 getValueForSite(InstrProfValueData Dest[], uint32_t ValueKind, uint32_t Site, 351 uint64_t (*ValueMapper)(uint32_t, uint64_t) = 0) const; 352 /// Reserve space for NumValueSites sites. 353 inline void reserveSites(uint32_t ValueKind, uint32_t NumValueSites); 354 /// Add ValueData for ValueKind at value Site. 355 void addValueData(uint32_t ValueKind, uint32_t Site, 356 InstrProfValueData *VData, uint32_t N, 357 ValueMapType *ValueMap); 358 359 /// Merge the counts in \p Other into this one. 360 /// Optionally scale merged counts by \p Weight. 361 instrprof_error merge(InstrProfRecord &Other, uint64_t Weight = 1); 362 363 /// Clear value data entries 364 void clearValueData() { 365 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) 366 getValueSitesForKind(Kind).clear(); 367 } 368 369private: 370 std::vector<InstrProfValueSiteRecord> IndirectCallSites; 371 const std::vector<InstrProfValueSiteRecord> & 372 getValueSitesForKind(uint32_t ValueKind) const { 373 switch (ValueKind) { 374 case IPVK_IndirectCallTarget: 375 return IndirectCallSites; 376 default: 377 llvm_unreachable("Unknown value kind!"); 378 } 379 return IndirectCallSites; 380 } 381 382 std::vector<InstrProfValueSiteRecord> & 383 getValueSitesForKind(uint32_t ValueKind) { 384 return const_cast<std::vector<InstrProfValueSiteRecord> &>( 385 const_cast<const InstrProfRecord *>(this) 386 ->getValueSitesForKind(ValueKind)); 387 } 388 389 // Map indirect call target name hash to name string. 390 uint64_t remapValue(uint64_t Value, uint32_t ValueKind, 391 ValueMapType *HashKeys); 392 393 // Merge Value Profile data from Src record to this record for ValueKind. 394 // Scale merged value counts by \p Weight. 395 instrprof_error mergeValueProfData(uint32_t ValueKind, InstrProfRecord &Src, 396 uint64_t Weight); 397}; 398 399uint32_t InstrProfRecord::getNumValueKinds() const { 400 uint32_t NumValueKinds = 0; 401 for (uint32_t Kind = IPVK_First; Kind <= IPVK_Last; ++Kind) 402 NumValueKinds += !(getValueSitesForKind(Kind).empty()); 403 return NumValueKinds; 404} 405 406uint32_t InstrProfRecord::getNumValueData(uint32_t ValueKind) const { 407 uint32_t N = 0; 408 const std::vector<InstrProfValueSiteRecord> &SiteRecords = 409 getValueSitesForKind(ValueKind); 410 for (auto &SR : SiteRecords) { 411 N += SR.ValueData.size(); 412 } 413 return N; 414} 415 416uint32_t InstrProfRecord::getNumValueSites(uint32_t ValueKind) const { 417 return getValueSitesForKind(ValueKind).size(); 418} 419 420uint32_t InstrProfRecord::getNumValueDataForSite(uint32_t ValueKind, 421 uint32_t Site) const { 422 return getValueSitesForKind(ValueKind)[Site].ValueData.size(); 423} 424 425std::unique_ptr<InstrProfValueData[]> InstrProfRecord::getValueForSite( 426 uint32_t ValueKind, uint32_t Site, 427 uint64_t (*ValueMapper)(uint32_t, uint64_t)) const { 428 uint32_t N = getNumValueDataForSite(ValueKind, Site); 429 if (N == 0) 430 return std::unique_ptr<InstrProfValueData[]>(nullptr); 431 432 auto VD = llvm::make_unique<InstrProfValueData[]>(N); 433 getValueForSite(VD.get(), ValueKind, Site, ValueMapper); 434 435 return VD; 436} 437 438void InstrProfRecord::getValueForSite(InstrProfValueData Dest[], 439 uint32_t ValueKind, uint32_t Site, 440 uint64_t (*ValueMapper)(uint32_t, 441 uint64_t)) const { 442 uint32_t I = 0; 443 for (auto V : getValueSitesForKind(ValueKind)[Site].ValueData) { 444 Dest[I].Value = ValueMapper ? ValueMapper(ValueKind, V.Value) : V.Value; 445 Dest[I].Count = V.Count; 446 I++; 447 } 448} 449 450void InstrProfRecord::reserveSites(uint32_t ValueKind, uint32_t NumValueSites) { 451 std::vector<InstrProfValueSiteRecord> &ValueSites = 452 getValueSitesForKind(ValueKind); 453 ValueSites.reserve(NumValueSites); 454} 455 456inline support::endianness getHostEndianness() { 457 return sys::IsLittleEndianHost ? support::little : support::big; 458} 459 460// Include definitions for value profile data 461#define INSTR_PROF_VALUE_PROF_DATA 462#include "llvm/ProfileData/InstrProfData.inc" 463 464 /* 465 * Initialize the record for runtime value profile data. 466 * Return 0 if the initialization is successful, otherwise 467 * return 1. 468 */ 469int initializeValueProfRuntimeRecord(ValueProfRuntimeRecord *RuntimeRecord, 470 const uint16_t *NumValueSites, 471 ValueProfNode **Nodes); 472 473/* Release memory allocated for the runtime record. */ 474void finalizeValueProfRuntimeRecord(ValueProfRuntimeRecord *RuntimeRecord); 475 476/* Return the size of ValueProfData structure that can be used to store 477 the value profile data collected at runtime. */ 478uint32_t getValueProfDataSizeRT(const ValueProfRuntimeRecord *Record); 479 480/* Return a ValueProfData instance that stores the data collected at runtime. */ 481ValueProfData * 482serializeValueProfDataFromRT(const ValueProfRuntimeRecord *Record, 483 ValueProfData *Dst); 484 485namespace IndexedInstrProf { 486 487enum class HashT : uint32_t { 488 MD5, 489 490 Last = MD5 491}; 492 493static inline uint64_t MD5Hash(StringRef Str) { 494 MD5 Hash; 495 Hash.update(Str); 496 llvm::MD5::MD5Result Result; 497 Hash.final(Result); 498 // Return the least significant 8 bytes. Our MD5 implementation returns the 499 // result in little endian, so we may need to swap bytes. 500 using namespace llvm::support; 501 return endian::read<uint64_t, little, unaligned>(Result); 502} 503 504inline uint64_t ComputeHash(HashT Type, StringRef K) { 505 switch (Type) { 506 case HashT::MD5: 507 return IndexedInstrProf::MD5Hash(K); 508 } 509 llvm_unreachable("Unhandled hash type"); 510} 511 512const uint64_t Magic = 0x8169666f72706cff; // "\xfflprofi\x81" 513const uint64_t Version = INSTR_PROF_INDEX_VERSION; 514const HashT HashType = HashT::MD5; 515 516inline uint64_t ComputeHash(StringRef K) { return ComputeHash(HashType, K); } 517 518// This structure defines the file header of the LLVM profile 519// data file in indexed-format. 520struct Header { 521 uint64_t Magic; 522 uint64_t Version; 523 uint64_t MaxFunctionCount; 524 uint64_t HashType; 525 uint64_t HashOffset; 526}; 527 528} // end namespace IndexedInstrProf 529 530namespace RawInstrProf { 531 532const uint64_t Version = INSTR_PROF_RAW_VERSION; 533 534template <class IntPtrT> inline uint64_t getMagic(); 535template <> inline uint64_t getMagic<uint64_t>() { 536 return INSTR_PROF_RAW_MAGIC_64; 537} 538 539template <> inline uint64_t getMagic<uint32_t>() { 540 return INSTR_PROF_RAW_MAGIC_32; 541} 542 543// Per-function profile data header/control structure. 544// The definition should match the structure defined in 545// compiler-rt/lib/profile/InstrProfiling.h. 546// It should also match the synthesized type in 547// Transforms/Instrumentation/InstrProfiling.cpp:getOrCreateRegionCounters. 548template <class IntPtrT> struct LLVM_ALIGNAS(8) ProfileData { 549 #define INSTR_PROF_DATA(Type, LLVMType, Name, Init) Type Name; 550 #include "llvm/ProfileData/InstrProfData.inc" 551}; 552 553// File header structure of the LLVM profile data in raw format. 554// The definition should match the header referenced in 555// compiler-rt/lib/profile/InstrProfilingFile.c and 556// InstrProfilingBuffer.c. 557struct Header { 558#define INSTR_PROF_RAW_HEADER(Type, Name, Init) const Type Name; 559#include "llvm/ProfileData/InstrProfData.inc" 560}; 561 562} // end namespace RawInstrProf 563 564namespace coverage { 565 566// Profile coverage map has the following layout: 567// [CoverageMapFileHeader] 568// [ArrayStart] 569// [CovMapFunctionRecord] 570// [CovMapFunctionRecord] 571// ... 572// [ArrayEnd] 573// [Encoded Region Mapping Data] 574LLVM_PACKED_START 575template <class IntPtrT> struct CovMapFunctionRecord { 576 #define COVMAP_FUNC_RECORD(Type, LLVMType, Name, Init) Type Name; 577 #include "llvm/ProfileData/InstrProfData.inc" 578}; 579LLVM_PACKED_END 580 581} 582 583} // end namespace llvm 584 585namespace std { 586template <> 587struct is_error_code_enum<llvm::instrprof_error> : std::true_type {}; 588} 589 590#endif // LLVM_PROFILEDATA_INSTRPROF_H_ 591