EhFrameReader.cpp revision 37b74a387bb3993387029859c2d9d051c41c724e
1//===- EhFrameReader.cpp --------------------------------------------------===//
2//
3//                     The MCLinker Project
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9#include "mcld/LD/EhFrameReader.h"
10
11#include "mcld/Fragment/NullFragment.h"
12#include "mcld/MC/Input.h"
13#include "mcld/LD/LDSection.h"
14#include "mcld/Support/MsgHandling.h"
15#include "mcld/Support/MemoryArea.h"
16
17#include <llvm/ADT/StringRef.h>
18#include <llvm/Support/Dwarf.h>
19#include <llvm/Support/LEB128.h>
20
21namespace mcld {
22
23//===----------------------------------------------------------------------===//
24// Helper Functions
25//===----------------------------------------------------------------------===//
26/// skip_LEB128 - skip the first LEB128 encoded value from *pp, update *pp
27/// to the next character.
28/// @return - false if we ran off the end of the string.
29static bool skip_LEB128(EhFrameReader::ConstAddress* pp,
30                        EhFrameReader::ConstAddress pend) {
31  for (EhFrameReader::ConstAddress p = *pp; p < pend; ++p) {
32    if ((*p & 0x80) == 0x0) {
33      *pp = p + 1;
34      return true;
35    }
36  }
37  return false;
38}
39
40//===----------------------------------------------------------------------===//
41// EhFrameReader
42//===----------------------------------------------------------------------===//
43template <>
44EhFrameReader::Token EhFrameReader::scan<true>(ConstAddress pHandler,
45                                               uint64_t pOffset,
46                                               llvm::StringRef pData) const {
47  Token result;
48  result.file_off = pOffset;
49
50  const uint32_t* data = (const uint32_t*)pHandler;
51  size_t cur_idx = 0;
52
53  // Length Field
54  uint32_t length = data[cur_idx++];
55  if (length == 0x0) {
56    // terminator
57    result.kind = Terminator;
58    result.data_off = 4;
59    result.size = 4;
60    return result;
61  }
62
63  // Extended Field
64  uint64_t extended = 0x0;
65  if (length == 0xFFFFFFFF) {
66    extended = data[cur_idx++];
67    extended <<= 32;
68    extended |= data[cur_idx++];
69    result.size = extended + 12;
70    result.data_off = 16;
71    // 64-bit obj file still uses 32-bit eh_frame.
72    assert(false && "We don't support 64-bit eh_frame.");
73  } else {
74    result.size = length + 4;
75    result.data_off = 8;
76  }
77
78  // ID Field
79  uint32_t ID = data[cur_idx++];
80  if (ID == 0x0)
81    result.kind = CIE;
82  else
83    result.kind = FDE;
84
85  return result;
86}
87
88template <>
89bool EhFrameReader::read<32, true>(Input& pInput, EhFrame& pEhFrame) {
90  // Alphabet:
91  //   {CIE, FDE, CIEt}
92  //
93  // Regular Expression:
94  //   (CIE FDE*)+ CIEt
95  //
96  // Autometa:
97  //   S = {Q0, Q1, Q2}, Start = Q0, Accept = Q2
98  //
99  //              FDE
100  //             +---+
101  //        CIE   \ /   CIEt
102  //   Q0 -------> Q1 -------> Q2
103  //    |         / \           ^
104  //    |        +---+          |
105  //    |         CIE           |
106  //    +-----------------------+
107  //              CIEt
108  const State autometa[NumOfStates][NumOfTokenKinds] = {
109      //     CIE     FDE    Term  Unknown
110      {Q1, Reject, Accept, Reject},  // Q0
111      {Q1, Q1, Accept, Reject},      // Q1
112  };
113
114  const Action transition[NumOfStates][NumOfTokenKinds] = {
115      /*    CIE     FDE     Term Unknown */
116      {addCIE, reject, addTerm, reject},  // Q0
117      {addCIE, addFDE, addTerm, reject},  // Q1
118  };
119
120  LDSection& section = pEhFrame.getSection();
121  if (section.size() == 0x0) {
122    NullFragment* frag = new NullFragment();
123    pEhFrame.addFragment(*frag);
124    return true;
125  }
126
127  // get file offset and address
128  uint64_t file_off = pInput.fileOffset() + section.offset();
129  llvm::StringRef sect_reg =
130      pInput.memArea()->request(file_off, section.size());
131  ConstAddress handler = (ConstAddress)sect_reg.begin();
132
133  State cur_state = Q0;
134  while (Reject != cur_state && Accept != cur_state) {
135    Token token = scan<true>(handler, file_off, sect_reg);
136    llvm::StringRef entry =
137        pInput.memArea()->request(token.file_off, token.size);
138
139    if (!transition[cur_state][token.kind](pEhFrame, entry, token)) {
140      // fail to scan
141      debug(diag::debug_cannot_scan_eh) << pInput.name();
142      return false;
143    }
144
145    file_off += token.size;
146    handler += token.size;
147
148    if (handler == sect_reg.end()) {
149      cur_state = Accept;
150    } else if (handler > sect_reg.end()) {
151      cur_state = Reject;
152    } else {
153      cur_state = autometa[cur_state][token.kind];
154    }
155  }  // end of while
156
157  if (Reject == cur_state) {
158    // fail to parse
159    debug(diag::debug_cannot_parse_eh) << pInput.name();
160    return false;
161  }
162  return true;
163}
164
165bool EhFrameReader::addCIE(EhFrame& pEhFrame,
166                           llvm::StringRef pRegion,
167                           const EhFrameReader::Token& pToken) {
168  // skip Length, Extended Length and CIE ID.
169  ConstAddress handler = pRegion.begin() + pToken.data_off;
170  ConstAddress cie_end = pRegion.end();
171  ConstAddress handler_start = handler;
172  uint64_t pr_ptr_data_offset = pToken.data_off;
173
174  // the version should be 1 or 3
175  uint8_t version = *handler++;
176  if (version != 1 && version != 3) {
177    return false;
178  }
179
180  // Set up the Augumentation String
181  ConstAddress aug_str_front = handler;
182  ConstAddress aug_str_back = static_cast<ConstAddress>(
183      memchr(aug_str_front, '\0', cie_end - aug_str_front));
184  if (aug_str_back == NULL) {
185    return false;
186  }
187
188  // skip the Augumentation String field
189  handler = aug_str_back + 1;
190
191  // skip the Code Alignment Factor
192  if (!skip_LEB128(&handler, cie_end)) {
193    return false;
194  }
195  // skip the Data Alignment Factor
196  if (!skip_LEB128(&handler, cie_end)) {
197    return false;
198  }
199  // skip the Return Address Register
200  if (cie_end - handler < 1) {
201    return false;
202  }
203  ++handler;
204
205  llvm::StringRef augment((const char*)aug_str_front);
206
207  // we discard this CIE if the augumentation string is '\0'
208  if (augment.size() == 0) {
209    EhFrame::CIE* cie = new EhFrame::CIE(pRegion);
210    cie->setFDEEncode(llvm::dwarf::DW_EH_PE_absptr);
211    pEhFrame.addCIE(*cie);
212    pEhFrame.getCIEMap().insert(std::make_pair(pToken.file_off, cie));
213    return true;
214  }
215
216  // the Augmentation String start with 'eh' is a CIE from gcc before 3.0,
217  // in LSB Core Spec 3.0RC1. We do not support it.
218  if (augment.size() > 1 && augment[0] == 'e' && augment[1] == 'h') {
219    return false;
220  }
221
222  // parse the Augmentation String to get the FDE encodeing if 'z' existed
223  uint8_t fde_encoding = llvm::dwarf::DW_EH_PE_absptr;
224  std::string augdata;
225  std::string pr_ptr_data;
226  if (augment[0] == 'z') {
227    unsigned offset;
228    size_t augdata_size = llvm::decodeULEB128((const uint8_t*)handler, &offset);
229    handler += offset;
230    augdata = std::string((const char*)handler, augdata_size);
231
232    // parse the Augmentation String
233    for (size_t i = 1; i < augment.size(); ++i) {
234      switch (augment[i]) {
235        // LDSA encoding (1 byte)
236        case 'L': {
237          if (cie_end - handler < 1) {
238            return false;
239          }
240          ++handler;
241          break;
242        }
243        // Two arguments, the first one represents the encoding of the second
244        // argument (1 byte). The second one is the address of personality
245        // routine.
246        case 'P': {
247          // the first argument
248          if (cie_end - handler < 1) {
249            return false;
250          }
251          uint8_t per_encode = *handler;
252          ++handler;
253          // get the length of the second argument
254          uint32_t per_length = 0;
255          if ((per_encode & 0x60) == 0x60) {
256            return false;
257          }
258          switch (per_encode & 7) {
259            default:
260              return false;
261            case llvm::dwarf::DW_EH_PE_udata2:
262              per_length = 2;
263              break;
264            case llvm::dwarf::DW_EH_PE_udata4:
265              per_length = 4;
266              break;
267            case llvm::dwarf::DW_EH_PE_udata8:
268              per_length = 8;
269              break;
270            case llvm::dwarf::DW_EH_PE_absptr:
271              per_length = 4;  // pPkg.bitclass / 8;
272              break;
273          }
274          // skip the alignment
275          if (llvm::dwarf::DW_EH_PE_aligned == (per_encode & 0xf0)) {
276            uint32_t per_align = handler - cie_end;
277            per_align += per_length - 1;
278            per_align &= ~(per_length - 1);
279            if (static_cast<uint32_t>(cie_end - handler) < per_align) {
280              return false;
281            }
282            handler += per_align;
283          }
284          // skip the second argument
285          if (static_cast<uint32_t>(cie_end - handler) < per_length) {
286            return false;
287          }
288          pr_ptr_data_offset += handler - handler_start;
289          pr_ptr_data = std::string((const char*)handler, per_length);
290          handler += per_length;
291          break;
292        }  // end of case 'P'
293
294        // FDE encoding (1 byte)
295        case 'R': {
296          if (cie_end - handler < 1) {
297            return false;
298          }
299          fde_encoding = *handler;
300          switch (fde_encoding & 7) {
301            case llvm::dwarf::DW_EH_PE_udata2:
302            case llvm::dwarf::DW_EH_PE_udata4:
303            case llvm::dwarf::DW_EH_PE_udata8:
304            case llvm::dwarf::DW_EH_PE_absptr:
305              break;
306            default:
307              return false;
308          }
309          ++handler;
310          break;
311        }
312        default:
313          return false;
314      }  // end switch
315    }    // the rest chars.
316  }      // first char is 'z'
317
318  // create and push back the CIE entry
319  EhFrame::CIE* cie = new EhFrame::CIE(pRegion);
320  cie->setFDEEncode(fde_encoding);
321  cie->setPersonalityOffset(pr_ptr_data_offset);
322  cie->setPersonalityName(pr_ptr_data);
323  cie->setAugmentationData(augdata);
324  pEhFrame.addCIE(*cie);
325  pEhFrame.getCIEMap().insert(std::make_pair(pToken.file_off, cie));
326  return true;
327}
328
329bool EhFrameReader::addFDE(EhFrame& pEhFrame,
330                           llvm::StringRef pRegion,
331                           const EhFrameReader::Token& pToken) {
332  if (pToken.data_off == pRegion.size())
333    return false;
334
335  const int32_t offset =
336      *(const int32_t*)(pRegion.begin() + pToken.data_off - 4);
337  size_t cie_offset =
338      (size_t)((int64_t)(pToken.file_off + 4) - (int32_t)offset);
339
340  EhFrame::CIEMap::iterator iter = pEhFrame.getCIEMap().find(cie_offset);
341  if (iter == pEhFrame.getCIEMap().end())
342    return false;
343
344  // create and push back the FDE entry
345  EhFrame::FDE* fde = new EhFrame::FDE(pRegion, *iter->second);
346  pEhFrame.addFDE(*fde);
347  return true;
348}
349
350bool EhFrameReader::addTerm(EhFrame& pEhFrame,
351                            llvm::StringRef pRegion,
352                            const EhFrameReader::Token& pToken) {
353  return true;
354}
355
356bool EhFrameReader::reject(EhFrame& pEhFrame,
357                           llvm::StringRef pRegion,
358                           const EhFrameReader::Token& pToken) {
359  return true;
360}
361
362}  // namespace mcld
363