1//===-- llvm/CodeGen/DIEHash.cpp - Dwarf Hashing Framework ----------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains support for DWARF4 hashing of DIEs. 11// 12//===----------------------------------------------------------------------===// 13 14#include "ByteStreamer.h" 15#include "DIEHash.h" 16#include "DwarfDebug.h" 17#include "llvm/ADT/ArrayRef.h" 18#include "llvm/ADT/StringRef.h" 19#include "llvm/CodeGen/AsmPrinter.h" 20#include "llvm/CodeGen/DIE.h" 21#include "llvm/Support/Debug.h" 22#include "llvm/Support/Dwarf.h" 23#include "llvm/Support/Endian.h" 24#include "llvm/Support/MD5.h" 25#include "llvm/Support/raw_ostream.h" 26 27using namespace llvm; 28 29#define DEBUG_TYPE "dwarfdebug" 30 31/// \brief Grabs the string in whichever attribute is passed in and returns 32/// a reference to it. 33static StringRef getDIEStringAttr(const DIE &Die, uint16_t Attr) { 34 // Iterate through all the attributes until we find the one we're 35 // looking for, if we can't find it return an empty string. 36 for (const auto &V : Die.values()) 37 if (V.getAttribute() == Attr) 38 return V.getDIEString().getString(); 39 40 return StringRef(""); 41} 42 43/// \brief Adds the string in \p Str to the hash. This also hashes 44/// a trailing NULL with the string. 45void DIEHash::addString(StringRef Str) { 46 DEBUG(dbgs() << "Adding string " << Str << " to hash.\n"); 47 Hash.update(Str); 48 Hash.update(makeArrayRef((uint8_t)'\0')); 49} 50 51// FIXME: The LEB128 routines are copied and only slightly modified out of 52// LEB128.h. 53 54/// \brief Adds the unsigned in \p Value to the hash encoded as a ULEB128. 55void DIEHash::addULEB128(uint64_t Value) { 56 DEBUG(dbgs() << "Adding ULEB128 " << Value << " to hash.\n"); 57 do { 58 uint8_t Byte = Value & 0x7f; 59 Value >>= 7; 60 if (Value != 0) 61 Byte |= 0x80; // Mark this byte to show that more bytes will follow. 62 Hash.update(Byte); 63 } while (Value != 0); 64} 65 66void DIEHash::addSLEB128(int64_t Value) { 67 DEBUG(dbgs() << "Adding ULEB128 " << Value << " to hash.\n"); 68 bool More; 69 do { 70 uint8_t Byte = Value & 0x7f; 71 Value >>= 7; 72 More = !((((Value == 0) && ((Byte & 0x40) == 0)) || 73 ((Value == -1) && ((Byte & 0x40) != 0)))); 74 if (More) 75 Byte |= 0x80; // Mark this byte to show that more bytes will follow. 76 Hash.update(Byte); 77 } while (More); 78} 79 80/// \brief Including \p Parent adds the context of Parent to the hash.. 81void DIEHash::addParentContext(const DIE &Parent) { 82 83 DEBUG(dbgs() << "Adding parent context to hash...\n"); 84 85 // [7.27.2] For each surrounding type or namespace beginning with the 86 // outermost such construct... 87 SmallVector<const DIE *, 1> Parents; 88 const DIE *Cur = &Parent; 89 while (Cur->getParent()) { 90 Parents.push_back(Cur); 91 Cur = Cur->getParent(); 92 } 93 assert(Cur->getTag() == dwarf::DW_TAG_compile_unit || 94 Cur->getTag() == dwarf::DW_TAG_type_unit); 95 96 // Reverse iterate over our list to go from the outermost construct to the 97 // innermost. 98 for (SmallVectorImpl<const DIE *>::reverse_iterator I = Parents.rbegin(), 99 E = Parents.rend(); 100 I != E; ++I) { 101 const DIE &Die = **I; 102 103 // ... Append the letter "C" to the sequence... 104 addULEB128('C'); 105 106 // ... Followed by the DWARF tag of the construct... 107 addULEB128(Die.getTag()); 108 109 // ... Then the name, taken from the DW_AT_name attribute. 110 StringRef Name = getDIEStringAttr(Die, dwarf::DW_AT_name); 111 DEBUG(dbgs() << "... adding context: " << Name << "\n"); 112 if (!Name.empty()) 113 addString(Name); 114 } 115} 116 117// Collect all of the attributes for a particular DIE in single structure. 118void DIEHash::collectAttributes(const DIE &Die, DIEAttrs &Attrs) { 119#define COLLECT_ATTR(NAME) \ 120 case dwarf::NAME: \ 121 Attrs.NAME = V; \ 122 break 123 124 for (const auto &V : Die.values()) { 125 DEBUG(dbgs() << "Attribute: " 126 << dwarf::AttributeString(V.getAttribute()) 127 << " added.\n"); 128 switch (V.getAttribute()) { 129 COLLECT_ATTR(DW_AT_name); 130 COLLECT_ATTR(DW_AT_accessibility); 131 COLLECT_ATTR(DW_AT_address_class); 132 COLLECT_ATTR(DW_AT_allocated); 133 COLLECT_ATTR(DW_AT_artificial); 134 COLLECT_ATTR(DW_AT_associated); 135 COLLECT_ATTR(DW_AT_binary_scale); 136 COLLECT_ATTR(DW_AT_bit_offset); 137 COLLECT_ATTR(DW_AT_bit_size); 138 COLLECT_ATTR(DW_AT_bit_stride); 139 COLLECT_ATTR(DW_AT_byte_size); 140 COLLECT_ATTR(DW_AT_byte_stride); 141 COLLECT_ATTR(DW_AT_const_expr); 142 COLLECT_ATTR(DW_AT_const_value); 143 COLLECT_ATTR(DW_AT_containing_type); 144 COLLECT_ATTR(DW_AT_count); 145 COLLECT_ATTR(DW_AT_data_bit_offset); 146 COLLECT_ATTR(DW_AT_data_location); 147 COLLECT_ATTR(DW_AT_data_member_location); 148 COLLECT_ATTR(DW_AT_decimal_scale); 149 COLLECT_ATTR(DW_AT_decimal_sign); 150 COLLECT_ATTR(DW_AT_default_value); 151 COLLECT_ATTR(DW_AT_digit_count); 152 COLLECT_ATTR(DW_AT_discr); 153 COLLECT_ATTR(DW_AT_discr_list); 154 COLLECT_ATTR(DW_AT_discr_value); 155 COLLECT_ATTR(DW_AT_encoding); 156 COLLECT_ATTR(DW_AT_enum_class); 157 COLLECT_ATTR(DW_AT_endianity); 158 COLLECT_ATTR(DW_AT_explicit); 159 COLLECT_ATTR(DW_AT_is_optional); 160 COLLECT_ATTR(DW_AT_location); 161 COLLECT_ATTR(DW_AT_lower_bound); 162 COLLECT_ATTR(DW_AT_mutable); 163 COLLECT_ATTR(DW_AT_ordering); 164 COLLECT_ATTR(DW_AT_picture_string); 165 COLLECT_ATTR(DW_AT_prototyped); 166 COLLECT_ATTR(DW_AT_small); 167 COLLECT_ATTR(DW_AT_segment); 168 COLLECT_ATTR(DW_AT_string_length); 169 COLLECT_ATTR(DW_AT_threads_scaled); 170 COLLECT_ATTR(DW_AT_upper_bound); 171 COLLECT_ATTR(DW_AT_use_location); 172 COLLECT_ATTR(DW_AT_use_UTF8); 173 COLLECT_ATTR(DW_AT_variable_parameter); 174 COLLECT_ATTR(DW_AT_virtuality); 175 COLLECT_ATTR(DW_AT_visibility); 176 COLLECT_ATTR(DW_AT_vtable_elem_location); 177 COLLECT_ATTR(DW_AT_type); 178 default: 179 break; 180 } 181 } 182} 183 184void DIEHash::hashShallowTypeReference(dwarf::Attribute Attribute, 185 const DIE &Entry, StringRef Name) { 186 // append the letter 'N' 187 addULEB128('N'); 188 189 // the DWARF attribute code (DW_AT_type or DW_AT_friend), 190 addULEB128(Attribute); 191 192 // the context of the tag, 193 if (const DIE *Parent = Entry.getParent()) 194 addParentContext(*Parent); 195 196 // the letter 'E', 197 addULEB128('E'); 198 199 // and the name of the type. 200 addString(Name); 201 202 // Currently DW_TAG_friends are not used by Clang, but if they do become so, 203 // here's the relevant spec text to implement: 204 // 205 // For DW_TAG_friend, if the referenced entry is the DW_TAG_subprogram, 206 // the context is omitted and the name to be used is the ABI-specific name 207 // of the subprogram (e.g., the mangled linker name). 208} 209 210void DIEHash::hashRepeatedTypeReference(dwarf::Attribute Attribute, 211 unsigned DieNumber) { 212 // a) If T is in the list of [previously hashed types], use the letter 213 // 'R' as the marker 214 addULEB128('R'); 215 216 addULEB128(Attribute); 217 218 // and use the unsigned LEB128 encoding of [the index of T in the 219 // list] as the attribute value; 220 addULEB128(DieNumber); 221} 222 223void DIEHash::hashDIEEntry(dwarf::Attribute Attribute, dwarf::Tag Tag, 224 const DIE &Entry) { 225 assert(Tag != dwarf::DW_TAG_friend && "No current LLVM clients emit friend " 226 "tags. Add support here when there's " 227 "a use case"); 228 // Step 5 229 // If the tag in Step 3 is one of [the below tags] 230 if ((Tag == dwarf::DW_TAG_pointer_type || 231 Tag == dwarf::DW_TAG_reference_type || 232 Tag == dwarf::DW_TAG_rvalue_reference_type || 233 Tag == dwarf::DW_TAG_ptr_to_member_type) && 234 // and the referenced type (via the [below attributes]) 235 // FIXME: This seems overly restrictive, and causes hash mismatches 236 // there's a decl/def difference in the containing type of a 237 // ptr_to_member_type, but it's what DWARF says, for some reason. 238 Attribute == dwarf::DW_AT_type) { 239 // ... has a DW_AT_name attribute, 240 StringRef Name = getDIEStringAttr(Entry, dwarf::DW_AT_name); 241 if (!Name.empty()) { 242 hashShallowTypeReference(Attribute, Entry, Name); 243 return; 244 } 245 } 246 247 unsigned &DieNumber = Numbering[&Entry]; 248 if (DieNumber) { 249 hashRepeatedTypeReference(Attribute, DieNumber); 250 return; 251 } 252 253 // otherwise, b) use the letter 'T' as the marker, ... 254 addULEB128('T'); 255 256 addULEB128(Attribute); 257 258 // ... process the type T recursively by performing Steps 2 through 7, and 259 // use the result as the attribute value. 260 DieNumber = Numbering.size(); 261 computeHash(Entry); 262} 263 264// Hash all of the values in a block like set of values. This assumes that 265// all of the data is going to be added as integers. 266void DIEHash::hashBlockData(const DIE::const_value_range &Values) { 267 for (const auto &V : Values) 268 Hash.update((uint64_t)V.getDIEInteger().getValue()); 269} 270 271// Hash the contents of a loclistptr class. 272void DIEHash::hashLocList(const DIELocList &LocList) { 273 HashingByteStreamer Streamer(*this); 274 DwarfDebug &DD = *AP->getDwarfDebug(); 275 const DebugLocStream &Locs = DD.getDebugLocs(); 276 for (const auto &Entry : Locs.getEntries(Locs.getList(LocList.getValue()))) 277 DD.emitDebugLocEntry(Streamer, Entry); 278} 279 280// Hash an individual attribute \param Attr based on the type of attribute and 281// the form. 282void DIEHash::hashAttribute(const DIEValue &Value, dwarf::Tag Tag) { 283 dwarf::Attribute Attribute = Value.getAttribute(); 284 285 // Other attribute values use the letter 'A' as the marker, and the value 286 // consists of the form code (encoded as an unsigned LEB128 value) followed by 287 // the encoding of the value according to the form code. To ensure 288 // reproducibility of the signature, the set of forms used in the signature 289 // computation is limited to the following: DW_FORM_sdata, DW_FORM_flag, 290 // DW_FORM_string, and DW_FORM_block. 291 292 switch (Value.getType()) { 293 case DIEValue::isNone: 294 llvm_unreachable("Expected valid DIEValue"); 295 296 // 7.27 Step 3 297 // ... An attribute that refers to another type entry T is processed as 298 // follows: 299 case DIEValue::isEntry: 300 hashDIEEntry(Attribute, Tag, Value.getDIEEntry().getEntry()); 301 break; 302 case DIEValue::isInteger: { 303 addULEB128('A'); 304 addULEB128(Attribute); 305 switch (Value.getForm()) { 306 case dwarf::DW_FORM_data1: 307 case dwarf::DW_FORM_data2: 308 case dwarf::DW_FORM_data4: 309 case dwarf::DW_FORM_data8: 310 case dwarf::DW_FORM_udata: 311 case dwarf::DW_FORM_sdata: 312 addULEB128(dwarf::DW_FORM_sdata); 313 addSLEB128((int64_t)Value.getDIEInteger().getValue()); 314 break; 315 // DW_FORM_flag_present is just flag with a value of one. We still give it a 316 // value so just use the value. 317 case dwarf::DW_FORM_flag_present: 318 case dwarf::DW_FORM_flag: 319 addULEB128(dwarf::DW_FORM_flag); 320 addULEB128((int64_t)Value.getDIEInteger().getValue()); 321 break; 322 default: 323 llvm_unreachable("Unknown integer form!"); 324 } 325 break; 326 } 327 case DIEValue::isString: 328 addULEB128('A'); 329 addULEB128(Attribute); 330 addULEB128(dwarf::DW_FORM_string); 331 addString(Value.getDIEString().getString()); 332 break; 333 case DIEValue::isBlock: 334 case DIEValue::isLoc: 335 case DIEValue::isLocList: 336 addULEB128('A'); 337 addULEB128(Attribute); 338 addULEB128(dwarf::DW_FORM_block); 339 if (Value.getType() == DIEValue::isBlock) { 340 addULEB128(Value.getDIEBlock().ComputeSize(AP)); 341 hashBlockData(Value.getDIEBlock().values()); 342 } else if (Value.getType() == DIEValue::isLoc) { 343 addULEB128(Value.getDIELoc().ComputeSize(AP)); 344 hashBlockData(Value.getDIELoc().values()); 345 } else { 346 // We could add the block length, but that would take 347 // a bit of work and not add a lot of uniqueness 348 // to the hash in some way we could test. 349 hashLocList(Value.getDIELocList()); 350 } 351 break; 352 // FIXME: It's uncertain whether or not we should handle this at the moment. 353 case DIEValue::isExpr: 354 case DIEValue::isLabel: 355 case DIEValue::isDelta: 356 llvm_unreachable("Add support for additional value types."); 357 } 358} 359 360// Go through the attributes from \param Attrs in the order specified in 7.27.4 361// and hash them. 362void DIEHash::hashAttributes(const DIEAttrs &Attrs, dwarf::Tag Tag) { 363#define ADD_ATTR(ATTR) \ 364 { \ 365 if (ATTR) \ 366 hashAttribute(ATTR, Tag); \ 367 } 368 369 ADD_ATTR(Attrs.DW_AT_name); 370 ADD_ATTR(Attrs.DW_AT_accessibility); 371 ADD_ATTR(Attrs.DW_AT_address_class); 372 ADD_ATTR(Attrs.DW_AT_allocated); 373 ADD_ATTR(Attrs.DW_AT_artificial); 374 ADD_ATTR(Attrs.DW_AT_associated); 375 ADD_ATTR(Attrs.DW_AT_binary_scale); 376 ADD_ATTR(Attrs.DW_AT_bit_offset); 377 ADD_ATTR(Attrs.DW_AT_bit_size); 378 ADD_ATTR(Attrs.DW_AT_bit_stride); 379 ADD_ATTR(Attrs.DW_AT_byte_size); 380 ADD_ATTR(Attrs.DW_AT_byte_stride); 381 ADD_ATTR(Attrs.DW_AT_const_expr); 382 ADD_ATTR(Attrs.DW_AT_const_value); 383 ADD_ATTR(Attrs.DW_AT_containing_type); 384 ADD_ATTR(Attrs.DW_AT_count); 385 ADD_ATTR(Attrs.DW_AT_data_bit_offset); 386 ADD_ATTR(Attrs.DW_AT_data_location); 387 ADD_ATTR(Attrs.DW_AT_data_member_location); 388 ADD_ATTR(Attrs.DW_AT_decimal_scale); 389 ADD_ATTR(Attrs.DW_AT_decimal_sign); 390 ADD_ATTR(Attrs.DW_AT_default_value); 391 ADD_ATTR(Attrs.DW_AT_digit_count); 392 ADD_ATTR(Attrs.DW_AT_discr); 393 ADD_ATTR(Attrs.DW_AT_discr_list); 394 ADD_ATTR(Attrs.DW_AT_discr_value); 395 ADD_ATTR(Attrs.DW_AT_encoding); 396 ADD_ATTR(Attrs.DW_AT_enum_class); 397 ADD_ATTR(Attrs.DW_AT_endianity); 398 ADD_ATTR(Attrs.DW_AT_explicit); 399 ADD_ATTR(Attrs.DW_AT_is_optional); 400 ADD_ATTR(Attrs.DW_AT_location); 401 ADD_ATTR(Attrs.DW_AT_lower_bound); 402 ADD_ATTR(Attrs.DW_AT_mutable); 403 ADD_ATTR(Attrs.DW_AT_ordering); 404 ADD_ATTR(Attrs.DW_AT_picture_string); 405 ADD_ATTR(Attrs.DW_AT_prototyped); 406 ADD_ATTR(Attrs.DW_AT_small); 407 ADD_ATTR(Attrs.DW_AT_segment); 408 ADD_ATTR(Attrs.DW_AT_string_length); 409 ADD_ATTR(Attrs.DW_AT_threads_scaled); 410 ADD_ATTR(Attrs.DW_AT_upper_bound); 411 ADD_ATTR(Attrs.DW_AT_use_location); 412 ADD_ATTR(Attrs.DW_AT_use_UTF8); 413 ADD_ATTR(Attrs.DW_AT_variable_parameter); 414 ADD_ATTR(Attrs.DW_AT_virtuality); 415 ADD_ATTR(Attrs.DW_AT_visibility); 416 ADD_ATTR(Attrs.DW_AT_vtable_elem_location); 417 ADD_ATTR(Attrs.DW_AT_type); 418 419 // FIXME: Add the extended attributes. 420} 421 422// Add all of the attributes for \param Die to the hash. 423void DIEHash::addAttributes(const DIE &Die) { 424 DIEAttrs Attrs = {}; 425 collectAttributes(Die, Attrs); 426 hashAttributes(Attrs, Die.getTag()); 427} 428 429void DIEHash::hashNestedType(const DIE &Die, StringRef Name) { 430 // 7.27 Step 7 431 // ... append the letter 'S', 432 addULEB128('S'); 433 434 // the tag of C, 435 addULEB128(Die.getTag()); 436 437 // and the name. 438 addString(Name); 439} 440 441// Compute the hash of a DIE. This is based on the type signature computation 442// given in section 7.27 of the DWARF4 standard. It is the md5 hash of a 443// flattened description of the DIE. 444void DIEHash::computeHash(const DIE &Die) { 445 // Append the letter 'D', followed by the DWARF tag of the DIE. 446 addULEB128('D'); 447 addULEB128(Die.getTag()); 448 449 // Add each of the attributes of the DIE. 450 addAttributes(Die); 451 452 // Then hash each of the children of the DIE. 453 for (auto &C : Die.children()) { 454 // 7.27 Step 7 455 // If C is a nested type entry or a member function entry, ... 456 if (isType(C.getTag()) || C.getTag() == dwarf::DW_TAG_subprogram) { 457 StringRef Name = getDIEStringAttr(C, dwarf::DW_AT_name); 458 // ... and has a DW_AT_name attribute 459 if (!Name.empty()) { 460 hashNestedType(C, Name); 461 continue; 462 } 463 } 464 computeHash(C); 465 } 466 467 // Following the last (or if there are no children), append a zero byte. 468 Hash.update(makeArrayRef((uint8_t)'\0')); 469} 470 471/// This is based on the type signature computation given in section 7.27 of the 472/// DWARF4 standard. It is an md5 hash of the flattened description of the DIE 473/// with the inclusion of the full CU and all top level CU entities. 474// TODO: Initialize the type chain at 0 instead of 1 for CU signatures. 475uint64_t DIEHash::computeCUSignature(const DIE &Die) { 476 Numbering.clear(); 477 Numbering[&Die] = 1; 478 479 // Hash the DIE. 480 computeHash(Die); 481 482 // Now return the result. 483 MD5::MD5Result Result; 484 Hash.final(Result); 485 486 // ... take the least significant 8 bytes and return those. Our MD5 487 // implementation always returns its results in little endian, swap bytes 488 // appropriately. 489 return support::endian::read64le(Result + 8); 490} 491 492/// This is based on the type signature computation given in section 7.27 of the 493/// DWARF4 standard. It is an md5 hash of the flattened description of the DIE 494/// with the inclusion of additional forms not specifically called out in the 495/// standard. 496uint64_t DIEHash::computeTypeSignature(const DIE &Die) { 497 Numbering.clear(); 498 Numbering[&Die] = 1; 499 500 if (const DIE *Parent = Die.getParent()) 501 addParentContext(*Parent); 502 503 // Hash the DIE. 504 computeHash(Die); 505 506 // Now return the result. 507 MD5::MD5Result Result; 508 Hash.final(Result); 509 510 // ... take the least significant 8 bytes and return those. Our MD5 511 // implementation always returns its results in little endian, swap bytes 512 // appropriately. 513 return support::endian::read64le(Result + 8); 514} 515