DIEHash.cpp revision bd18c8d0903b695bd503a45cf11901d48eea61bd
1//===-- llvm/CodeGen/DIEHash.cpp - Dwarf Hashing Framework ----------------===// 2// 3// The LLVM Compiler Infrastructure 4// 5// This file is distributed under the University of Illinois Open Source 6// License. See LICENSE.TXT for details. 7// 8//===----------------------------------------------------------------------===// 9// 10// This file contains support for DWARF4 hashing of DIEs. 11// 12//===----------------------------------------------------------------------===// 13 14#define DEBUG_TYPE "dwarfdebug" 15 16#include "DIE.h" 17#include "DIEHash.h" 18#include "DwarfCompileUnit.h" 19#include "llvm/ADT/ArrayRef.h" 20#include "llvm/ADT/StringRef.h" 21#include "llvm/Support/Debug.h" 22#include "llvm/Support/Dwarf.h" 23#include "llvm/Support/Endian.h" 24#include "llvm/Support/MD5.h" 25#include "llvm/Support/raw_ostream.h" 26 27using namespace llvm; 28 29/// \brief Grabs the string in whichever attribute is passed in and returns 30/// a reference to it. 31static StringRef getDIEStringAttr(DIE *Die, uint16_t Attr) { 32 const SmallVectorImpl<DIEValue *> &Values = Die->getValues(); 33 const DIEAbbrev &Abbrevs = Die->getAbbrev(); 34 35 // Iterate through all the attributes until we find the one we're 36 // looking for, if we can't find it return an empty string. 37 for (size_t i = 0; i < Values.size(); ++i) { 38 if (Abbrevs.getData()[i].getAttribute() == Attr) { 39 DIEValue *V = Values[i]; 40 assert(isa<DIEString>(V) && "String requested. Not a string."); 41 DIEString *S = cast<DIEString>(V); 42 return S->getString(); 43 } 44 } 45 return StringRef(""); 46} 47 48/// \brief Adds the string in \p Str to the hash. This also hashes 49/// a trailing NULL with the string. 50void DIEHash::addString(StringRef Str) { 51 DEBUG(dbgs() << "Adding string " << Str << " to hash.\n"); 52 Hash.update(Str); 53 Hash.update(makeArrayRef((uint8_t)'\0')); 54} 55 56// FIXME: The LEB128 routines are copied and only slightly modified out of 57// LEB128.h. 58 59/// \brief Adds the unsigned in \p Value to the hash encoded as a ULEB128. 60void DIEHash::addULEB128(uint64_t Value) { 61 DEBUG(dbgs() << "Adding ULEB128 " << Value << " to hash.\n"); 62 do { 63 uint8_t Byte = Value & 0x7f; 64 Value >>= 7; 65 if (Value != 0) 66 Byte |= 0x80; // Mark this byte to show that more bytes will follow. 67 Hash.update(Byte); 68 } while (Value != 0); 69} 70 71/// \brief Including \p Parent adds the context of Parent to the hash.. 72void DIEHash::addParentContext(DIE *Parent) { 73 74 DEBUG(dbgs() << "Adding parent context to hash...\n"); 75 76 // [7.27.2] For each surrounding type or namespace beginning with the 77 // outermost such construct... 78 SmallVector<DIE *, 1> Parents; 79 while (Parent->getTag() != dwarf::DW_TAG_compile_unit) { 80 Parents.push_back(Parent); 81 Parent = Parent->getParent(); 82 } 83 84 // Reverse iterate over our list to go from the outermost construct to the 85 // innermost. 86 for (SmallVectorImpl<DIE *>::reverse_iterator I = Parents.rbegin(), 87 E = Parents.rend(); 88 I != E; ++I) { 89 DIE *Die = *I; 90 91 // ... Append the letter "C" to the sequence... 92 addULEB128('C'); 93 94 // ... Followed by the DWARF tag of the construct... 95 addULEB128(Die->getTag()); 96 97 // ... Then the name, taken from the DW_AT_name attribute. 98 StringRef Name = getDIEStringAttr(Die, dwarf::DW_AT_name); 99 DEBUG(dbgs() << "... adding context: " << Name << "\n"); 100 if (!Name.empty()) 101 addString(Name); 102 } 103} 104 105// Collect all of the attributes for a particular DIE in single structure. 106void DIEHash::collectAttributes(DIE *Die, DIEAttrs &Attrs) { 107 const SmallVectorImpl<DIEValue *> &Values = Die->getValues(); 108 const DIEAbbrev &Abbrevs = Die->getAbbrev(); 109 110#define COLLECT_ATTR(NAME) \ 111 Attrs.NAME.Val = Values[i]; \ 112 Attrs.NAME.Desc = &Abbrevs.getData()[i]; 113 114 for (size_t i = 0, e = Values.size(); i != e; ++i) { 115 DEBUG(dbgs() << "Attribute: " 116 << dwarf::AttributeString(Abbrevs.getData()[i].getAttribute()) 117 << " added.\n"); 118 switch (Abbrevs.getData()[i].getAttribute()) { 119 case dwarf::DW_AT_name: 120 COLLECT_ATTR(DW_AT_name); 121 break; 122 case dwarf::DW_AT_accessibility: 123 COLLECT_ATTR(DW_AT_accessibility) 124 break; 125 case dwarf::DW_AT_address_class: 126 COLLECT_ATTR(DW_AT_address_class) 127 break; 128 case dwarf::DW_AT_allocated: 129 COLLECT_ATTR(DW_AT_allocated) 130 break; 131 case dwarf::DW_AT_artificial: 132 COLLECT_ATTR(DW_AT_artificial) 133 break; 134 case dwarf::DW_AT_associated: 135 COLLECT_ATTR(DW_AT_associated) 136 break; 137 case dwarf::DW_AT_binary_scale: 138 COLLECT_ATTR(DW_AT_binary_scale) 139 break; 140 case dwarf::DW_AT_bit_offset: 141 COLLECT_ATTR(DW_AT_bit_offset) 142 break; 143 case dwarf::DW_AT_bit_size: 144 COLLECT_ATTR(DW_AT_bit_size) 145 break; 146 case dwarf::DW_AT_bit_stride: 147 COLLECT_ATTR(DW_AT_bit_stride) 148 break; 149 case dwarf::DW_AT_byte_size: 150 COLLECT_ATTR(DW_AT_byte_size) 151 break; 152 case dwarf::DW_AT_byte_stride: 153 COLLECT_ATTR(DW_AT_byte_stride) 154 break; 155 case dwarf::DW_AT_const_expr: 156 COLLECT_ATTR(DW_AT_const_expr) 157 break; 158 case dwarf::DW_AT_const_value: 159 COLLECT_ATTR(DW_AT_const_value) 160 break; 161 case dwarf::DW_AT_containing_type: 162 COLLECT_ATTR(DW_AT_containing_type) 163 break; 164 case dwarf::DW_AT_count: 165 COLLECT_ATTR(DW_AT_count) 166 break; 167 case dwarf::DW_AT_data_bit_offset: 168 COLLECT_ATTR(DW_AT_data_bit_offset) 169 break; 170 case dwarf::DW_AT_data_location: 171 COLLECT_ATTR(DW_AT_data_location) 172 break; 173 case dwarf::DW_AT_data_member_location: 174 COLLECT_ATTR(DW_AT_data_member_location) 175 break; 176 case dwarf::DW_AT_decimal_scale: 177 COLLECT_ATTR(DW_AT_decimal_scale) 178 break; 179 case dwarf::DW_AT_decimal_sign: 180 COLLECT_ATTR(DW_AT_decimal_sign) 181 break; 182 case dwarf::DW_AT_default_value: 183 COLLECT_ATTR(DW_AT_default_value) 184 break; 185 case dwarf::DW_AT_digit_count: 186 COLLECT_ATTR(DW_AT_digit_count) 187 break; 188 case dwarf::DW_AT_discr: 189 COLLECT_ATTR(DW_AT_discr) 190 break; 191 case dwarf::DW_AT_discr_list: 192 COLLECT_ATTR(DW_AT_discr_list) 193 break; 194 case dwarf::DW_AT_discr_value: 195 COLLECT_ATTR(DW_AT_discr_value) 196 break; 197 case dwarf::DW_AT_encoding: 198 COLLECT_ATTR(DW_AT_encoding) 199 break; 200 case dwarf::DW_AT_enum_class: 201 COLLECT_ATTR(DW_AT_enum_class) 202 break; 203 case dwarf::DW_AT_endianity: 204 COLLECT_ATTR(DW_AT_endianity) 205 break; 206 case dwarf::DW_AT_explicit: 207 COLLECT_ATTR(DW_AT_explicit) 208 break; 209 case dwarf::DW_AT_is_optional: 210 COLLECT_ATTR(DW_AT_is_optional) 211 break; 212 case dwarf::DW_AT_location: 213 COLLECT_ATTR(DW_AT_location) 214 break; 215 case dwarf::DW_AT_lower_bound: 216 COLLECT_ATTR(DW_AT_lower_bound) 217 break; 218 case dwarf::DW_AT_mutable: 219 COLLECT_ATTR(DW_AT_mutable) 220 break; 221 case dwarf::DW_AT_ordering: 222 COLLECT_ATTR(DW_AT_ordering) 223 break; 224 case dwarf::DW_AT_picture_string: 225 COLLECT_ATTR(DW_AT_picture_string) 226 break; 227 case dwarf::DW_AT_prototyped: 228 COLLECT_ATTR(DW_AT_prototyped) 229 break; 230 case dwarf::DW_AT_small: 231 COLLECT_ATTR(DW_AT_small) 232 break; 233 case dwarf::DW_AT_segment: 234 COLLECT_ATTR(DW_AT_segment) 235 break; 236 case dwarf::DW_AT_string_length: 237 COLLECT_ATTR(DW_AT_string_length) 238 break; 239 case dwarf::DW_AT_threads_scaled: 240 COLLECT_ATTR(DW_AT_threads_scaled) 241 break; 242 case dwarf::DW_AT_upper_bound: 243 COLLECT_ATTR(DW_AT_upper_bound) 244 break; 245 case dwarf::DW_AT_use_location: 246 COLLECT_ATTR(DW_AT_use_location) 247 break; 248 case dwarf::DW_AT_use_UTF8: 249 COLLECT_ATTR(DW_AT_use_UTF8) 250 break; 251 case dwarf::DW_AT_variable_parameter: 252 COLLECT_ATTR(DW_AT_variable_parameter) 253 break; 254 case dwarf::DW_AT_virtuality: 255 COLLECT_ATTR(DW_AT_virtuality) 256 break; 257 case dwarf::DW_AT_visibility: 258 COLLECT_ATTR(DW_AT_visibility) 259 break; 260 case dwarf::DW_AT_vtable_elem_location: 261 COLLECT_ATTR(DW_AT_vtable_elem_location) 262 break; 263 default: 264 break; 265 } 266 } 267} 268 269// Hash an individual attribute \param Attr based on the type of attribute and 270// the form. 271void DIEHash::hashAttribute(AttrEntry Attr) { 272 const DIEValue *Value = Attr.Val; 273 const DIEAbbrevData *Desc = Attr.Desc; 274 275 // TODO: Add support for types. 276 277 // Add the letter A to the hash. 278 addULEB128('A'); 279 280 // Then the attribute code and form. 281 addULEB128(Desc->getAttribute()); 282 addULEB128(Desc->getForm()); 283 284 // TODO: Add support for additional forms. 285 switch (Desc->getForm()) { 286 // TODO: We'll want to add DW_FORM_string here if we start emitting them 287 // again. 288 case dwarf::DW_FORM_strp: 289 addString(cast<DIEString>(Value)->getString()); 290 break; 291 case dwarf::DW_FORM_data1: 292 case dwarf::DW_FORM_data2: 293 case dwarf::DW_FORM_data4: 294 case dwarf::DW_FORM_data8: 295 case dwarf::DW_FORM_udata: 296 addULEB128(cast<DIEInteger>(Value)->getValue()); 297 break; 298 } 299} 300 301// Go through the attributes from \param Attrs in the order specified in 7.27.4 302// and hash them. 303void DIEHash::hashAttributes(const DIEAttrs &Attrs) { 304#define ADD_ATTR(ATTR) \ 305 { \ 306 if (ATTR.Val != 0) \ 307 hashAttribute(ATTR); \ 308 } 309 310 ADD_ATTR(Attrs.DW_AT_name); 311 ADD_ATTR(Attrs.DW_AT_accessibility); 312 ADD_ATTR(Attrs.DW_AT_address_class); 313 ADD_ATTR(Attrs.DW_AT_allocated); 314 ADD_ATTR(Attrs.DW_AT_artificial); 315 ADD_ATTR(Attrs.DW_AT_associated); 316 ADD_ATTR(Attrs.DW_AT_binary_scale); 317 ADD_ATTR(Attrs.DW_AT_bit_offset); 318 ADD_ATTR(Attrs.DW_AT_bit_size); 319 ADD_ATTR(Attrs.DW_AT_bit_stride); 320 ADD_ATTR(Attrs.DW_AT_byte_size); 321 ADD_ATTR(Attrs.DW_AT_byte_stride); 322 ADD_ATTR(Attrs.DW_AT_const_expr); 323 ADD_ATTR(Attrs.DW_AT_const_value); 324 ADD_ATTR(Attrs.DW_AT_containing_type); 325 ADD_ATTR(Attrs.DW_AT_count); 326 ADD_ATTR(Attrs.DW_AT_data_bit_offset); 327 ADD_ATTR(Attrs.DW_AT_data_location); 328 ADD_ATTR(Attrs.DW_AT_data_member_location); 329 ADD_ATTR(Attrs.DW_AT_decimal_scale); 330 ADD_ATTR(Attrs.DW_AT_decimal_sign); 331 ADD_ATTR(Attrs.DW_AT_default_value); 332 ADD_ATTR(Attrs.DW_AT_digit_count); 333 ADD_ATTR(Attrs.DW_AT_discr); 334 ADD_ATTR(Attrs.DW_AT_discr_list); 335 ADD_ATTR(Attrs.DW_AT_discr_value); 336 ADD_ATTR(Attrs.DW_AT_encoding); 337 ADD_ATTR(Attrs.DW_AT_enum_class); 338 ADD_ATTR(Attrs.DW_AT_endianity); 339 ADD_ATTR(Attrs.DW_AT_explicit); 340 ADD_ATTR(Attrs.DW_AT_is_optional); 341 ADD_ATTR(Attrs.DW_AT_location); 342 ADD_ATTR(Attrs.DW_AT_lower_bound); 343 ADD_ATTR(Attrs.DW_AT_mutable); 344 ADD_ATTR(Attrs.DW_AT_ordering); 345 ADD_ATTR(Attrs.DW_AT_picture_string); 346 ADD_ATTR(Attrs.DW_AT_prototyped); 347 ADD_ATTR(Attrs.DW_AT_small); 348 ADD_ATTR(Attrs.DW_AT_segment); 349 ADD_ATTR(Attrs.DW_AT_string_length); 350 ADD_ATTR(Attrs.DW_AT_threads_scaled); 351 ADD_ATTR(Attrs.DW_AT_upper_bound); 352 ADD_ATTR(Attrs.DW_AT_use_location); 353 ADD_ATTR(Attrs.DW_AT_use_UTF8); 354 ADD_ATTR(Attrs.DW_AT_variable_parameter); 355 ADD_ATTR(Attrs.DW_AT_virtuality); 356 ADD_ATTR(Attrs.DW_AT_visibility); 357 ADD_ATTR(Attrs.DW_AT_vtable_elem_location); 358 359 // FIXME: Add the extended attributes. 360} 361 362// Add all of the attributes for \param Die to the hash. 363void DIEHash::addAttributes(DIE *Die) { 364 DIEAttrs Attrs; 365 memset(&Attrs, 0, sizeof(Attrs)); 366 collectAttributes(Die, Attrs); 367 hashAttributes(Attrs); 368} 369 370// Compute the hash of a DIE. This is based on the type signature computation 371// given in section 7.27 of the DWARF4 standard. It is the md5 hash of a 372// flattened description of the DIE. 373void DIEHash::computeHash(DIE *Die) { 374 375 // Append the letter 'D', followed by the DWARF tag of the DIE. 376 addULEB128('D'); 377 addULEB128(Die->getTag()); 378 379 // Add each of the attributes of the DIE. 380 addAttributes(Die); 381 382 // Then hash each of the children of the DIE. 383 for (std::vector<DIE *>::const_iterator I = Die->getChildren().begin(), 384 E = Die->getChildren().end(); 385 I != E; ++I) 386 computeHash(*I); 387} 388 389/// This is based on the type signature computation given in section 7.27 of the 390/// DWARF4 standard. It is the md5 hash of a flattened description of the DIE 391/// with the exception that we are hashing only the context and the name of the 392/// type. 393uint64_t DIEHash::computeDIEODRSignature(DIE *Die) { 394 395 // Add the contexts to the hash. We won't be computing the ODR hash for 396 // function local types so it's safe to use the generic context hashing 397 // algorithm here. 398 // FIXME: If we figure out how to account for linkage in some way we could 399 // actually do this with a slight modification to the parent hash algorithm. 400 DIE *Parent = Die->getParent(); 401 if (Parent) 402 addParentContext(Parent); 403 404 // Add the current DIE information. 405 406 // Add the DWARF tag of the DIE. 407 addULEB128(Die->getTag()); 408 409 // Add the name of the type to the hash. 410 addString(getDIEStringAttr(Die, dwarf::DW_AT_name)); 411 412 // Now get the result. 413 MD5::MD5Result Result; 414 Hash.final(Result); 415 416 // ... take the least significant 8 bytes and return those. Our MD5 417 // implementation always returns its results in little endian, swap bytes 418 // appropriately. 419 return *reinterpret_cast<support::ulittle64_t *>(Result + 8); 420} 421 422/// This is based on the type signature computation given in section 7.27 of the 423/// DWARF4 standard. It is an md5 hash of the flattened description of the DIE 424/// with the inclusion of the full CU and all top level CU entities. 425uint64_t DIEHash::computeCUSignature(DIE *Die) { 426 427 // Hash the DIE. 428 computeHash(Die); 429 430 // Now return the result. 431 MD5::MD5Result Result; 432 Hash.final(Result); 433 434 // ... take the least significant 8 bytes and return those. Our MD5 435 // implementation always returns its results in little endian, swap bytes 436 // appropriately. 437 return *reinterpret_cast<support::ulittle64_t *>(Result + 8); 438} 439