1// Copyright (c) 2006, Google Inc.
2// All rights reserved.
3//
4// Redistribution and use in source and binary forms, with or without
5// modification, are permitted provided that the following conditions are
6// met:
7//
8//     * Redistributions of source code must retain the above copyright
9// notice, this list of conditions and the following disclaimer.
10//     * Redistributions in binary form must reproduce the above
11// copyright notice, this list of conditions and the following disclaimer
12// in the documentation and/or other materials provided with the
13// distribution.
14//     * Neither the name of Google Inc. nor the names of its
15// contributors may be used to endorse or promote products derived from
16// this software without specific prior written permission.
17//
18// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
21// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
22// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
23// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
24// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
25// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
26// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
27// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
28// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
29//
30// file_id.cc: Return a unique identifier for a file
31//
32// See file_id.h for documentation
33//
34
35#include "common/linux/file_id.h"
36
37#include <arpa/inet.h>
38#include <assert.h>
39#include <string.h>
40
41#include <algorithm>
42
43#include "common/linux/elf_gnu_compat.h"
44#include "common/linux/elfutils.h"
45#include "common/linux/linux_libc_support.h"
46#include "common/linux/memory_mapped_file.h"
47#include "third_party/lss/linux_syscall_support.h"
48
49namespace google_breakpad {
50
51FileID::FileID(const char* path) : path_(path) {}
52
53// ELF note name and desc are 32-bits word padded.
54#define NOTE_PADDING(a) ((a + 3) & ~3)
55
56// These functions are also used inside the crashed process, so be safe
57// and use the syscall/libc wrappers instead of direct syscalls or libc.
58
59template<typename ElfClass>
60static bool ElfClassBuildIDNoteIdentifier(const void *section, size_t length,
61                                          uint8_t identifier[kMDGUIDSize]) {
62  typedef typename ElfClass::Nhdr Nhdr;
63
64  const void* section_end = reinterpret_cast<const char*>(section) + length;
65  const Nhdr* note_header = reinterpret_cast<const Nhdr*>(section);
66  while (reinterpret_cast<const void *>(note_header) < section_end) {
67    if (note_header->n_type == NT_GNU_BUILD_ID)
68      break;
69    note_header = reinterpret_cast<const Nhdr*>(
70                  reinterpret_cast<const char*>(note_header) + sizeof(Nhdr) +
71                  NOTE_PADDING(note_header->n_namesz) +
72                  NOTE_PADDING(note_header->n_descsz));
73  }
74  if (reinterpret_cast<const void *>(note_header) >= section_end ||
75      note_header->n_descsz == 0) {
76    return false;
77  }
78
79  const char* build_id = reinterpret_cast<const char*>(note_header) +
80    sizeof(Nhdr) + NOTE_PADDING(note_header->n_namesz);
81  // Copy as many bits of the build ID as will fit
82  // into the GUID space.
83  my_memset(identifier, 0, kMDGUIDSize);
84  memcpy(identifier, build_id,
85         std::min(kMDGUIDSize, (size_t)note_header->n_descsz));
86
87  return true;
88}
89
90// Attempt to locate a .note.gnu.build-id section in an ELF binary
91// and copy as many bytes of it as will fit into |identifier|.
92static bool FindElfBuildIDNote(const void *elf_mapped_base,
93                               uint8_t identifier[kMDGUIDSize]) {
94  void* note_section;
95  size_t note_size;
96  int elfclass;
97  if ((!FindElfSegment(elf_mapped_base, PT_NOTE,
98                       (const void**)&note_section, &note_size, &elfclass) ||
99      note_size == 0)  &&
100      (!FindElfSection(elf_mapped_base, ".note.gnu.build-id", SHT_NOTE,
101                       (const void**)&note_section, &note_size, &elfclass) ||
102      note_size == 0)) {
103    return false;
104  }
105
106  if (elfclass == ELFCLASS32) {
107    return ElfClassBuildIDNoteIdentifier<ElfClass32>(note_section, note_size,
108                                                     identifier);
109  } else if (elfclass == ELFCLASS64) {
110    return ElfClassBuildIDNoteIdentifier<ElfClass64>(note_section, note_size,
111                                                     identifier);
112  }
113
114  return false;
115}
116
117// Attempt to locate the .text section of an ELF binary and generate
118// a simple hash by XORing the first page worth of bytes into |identifier|.
119static bool HashElfTextSection(const void *elf_mapped_base,
120                               uint8_t identifier[kMDGUIDSize]) {
121  void* text_section;
122  size_t text_size;
123  if (!FindElfSection(elf_mapped_base, ".text", SHT_PROGBITS,
124                      (const void**)&text_section, &text_size, NULL) ||
125      text_size == 0) {
126    return false;
127  }
128
129  my_memset(identifier, 0, kMDGUIDSize);
130  const uint8_t* ptr = reinterpret_cast<const uint8_t*>(text_section);
131  const uint8_t* ptr_end = ptr + std::min(text_size, static_cast<size_t>(4096));
132  while (ptr < ptr_end) {
133    for (unsigned i = 0; i < kMDGUIDSize; i++)
134      identifier[i] ^= ptr[i];
135    ptr += kMDGUIDSize;
136  }
137  return true;
138}
139
140// static
141bool FileID::ElfFileIdentifierFromMappedFile(const void* base,
142                                             uint8_t identifier[kMDGUIDSize]) {
143  // Look for a build id note first.
144  if (FindElfBuildIDNote(base, identifier))
145    return true;
146
147  // Fall back on hashing the first page of the text section.
148  return HashElfTextSection(base, identifier);
149}
150
151bool FileID::ElfFileIdentifier(uint8_t identifier[kMDGUIDSize]) {
152  MemoryMappedFile mapped_file(path_.c_str(), 0);
153  if (!mapped_file.data())  // Should probably check if size >= ElfW(Ehdr)?
154    return false;
155
156  return ElfFileIdentifierFromMappedFile(mapped_file.data(), identifier);
157}
158
159// static
160void FileID::ConvertIdentifierToString(const uint8_t identifier[kMDGUIDSize],
161                                       char* buffer, int buffer_length) {
162  uint8_t identifier_swapped[kMDGUIDSize];
163
164  // Endian-ness swap to match dump processor expectation.
165  memcpy(identifier_swapped, identifier, kMDGUIDSize);
166  uint32_t* data1 = reinterpret_cast<uint32_t*>(identifier_swapped);
167  *data1 = htonl(*data1);
168  uint16_t* data2 = reinterpret_cast<uint16_t*>(identifier_swapped + 4);
169  *data2 = htons(*data2);
170  uint16_t* data3 = reinterpret_cast<uint16_t*>(identifier_swapped + 6);
171  *data3 = htons(*data3);
172
173  int buffer_idx = 0;
174  for (unsigned int idx = 0;
175       (buffer_idx < buffer_length) && (idx < kMDGUIDSize);
176       ++idx) {
177    int hi = (identifier_swapped[idx] >> 4) & 0x0F;
178    int lo = (identifier_swapped[idx]) & 0x0F;
179
180    if (idx == 4 || idx == 6 || idx == 8 || idx == 10)
181      buffer[buffer_idx++] = '-';
182
183    buffer[buffer_idx++] = (hi >= 10) ? 'A' + hi - 10 : '0' + hi;
184    buffer[buffer_idx++] = (lo >= 10) ? 'A' + lo - 10 : '0' + lo;
185  }
186
187  // NULL terminate
188  buffer[(buffer_idx < buffer_length) ? buffer_idx : buffer_idx - 1] = 0;
189}
190
191}  // namespace google_breakpad
192