1//===- EhFrameReader.cpp --------------------------------------------------===//
2//
3//                     The MCLinker Project
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9#include "mcld/LD/EhFrameReader.h"
10
11#include "mcld/Fragment/NullFragment.h"
12#include "mcld/MC/Input.h"
13#include "mcld/LD/LDSection.h"
14#include "mcld/Support/MsgHandling.h"
15#include "mcld/Support/MemoryArea.h"
16
17#include <llvm/ADT/StringRef.h>
18#include <llvm/Support/Dwarf.h>
19#include <llvm/Support/LEB128.h>
20
21namespace mcld {
22
23//===----------------------------------------------------------------------===//
24// Helper Functions
25//===----------------------------------------------------------------------===//
26/// skip_LEB128 - skip the first LEB128 encoded value from *pp, update *pp
27/// to the next character.
28/// @return - false if we ran off the end of the string.
29static bool skip_LEB128(EhFrameReader::ConstAddress* pp,
30                        EhFrameReader::ConstAddress pend) {
31  for (EhFrameReader::ConstAddress p = *pp; p < pend; ++p) {
32    if ((*p & 0x80) == 0x0) {
33      *pp = p + 1;
34      return true;
35    }
36  }
37  return false;
38}
39
40//===----------------------------------------------------------------------===//
41// EhFrameReader
42//===----------------------------------------------------------------------===//
43template <>
44EhFrameReader::Token EhFrameReader::scan<true>(ConstAddress pHandler,
45                                               uint64_t pOffset,
46                                               llvm::StringRef pData) const {
47  Token result;
48  result.file_off = pOffset;
49
50  const uint32_t* data = (const uint32_t*)pHandler;
51  size_t cur_idx = 0;
52
53  // Length Field
54  uint32_t length = data[cur_idx++];
55  if (length == 0x0) {
56    // terminator
57    result.kind = Terminator;
58    result.data_off = 4;
59    result.size = 4;
60    return result;
61  }
62
63  // Extended Field
64  uint64_t extended = 0x0;
65  if (length == 0xFFFFFFFF) {
66    extended = data[cur_idx++];
67    extended <<= 32;
68    extended |= data[cur_idx++];
69    result.size = extended + 12;
70    result.data_off = 16;
71    // 64-bit obj file still uses 32-bit eh_frame.
72    assert(false && "We don't support 64-bit eh_frame.");
73  } else {
74    result.size = length + 4;
75    result.data_off = 8;
76  }
77
78  // ID Field
79  uint32_t ID = data[cur_idx++];
80  if (ID == 0x0)
81    result.kind = CIE;
82  else
83    result.kind = FDE;
84
85  return result;
86}
87
88template <>
89bool EhFrameReader::read<32, true>(Input& pInput, EhFrame& pEhFrame) {
90  // Alphabet:
91  //   {CIE, FDE, CIEt}
92  //
93  // Regular Expression:
94  //   (CIE FDE*)+ CIEt
95  //
96  // Autometa:
97  //   S = {Q0, Q1, Q2}, Start = Q0, Accept = Q2
98  //
99  //              FDE
100  //             +---+
101  //        CIE   \ /   CIEt
102  //   Q0 -------> Q1 -------> Q2
103  //    |         / \           ^
104  //    |        +---+          |
105  //    |         CIE           |
106  //    +-----------------------+
107  //              CIEt
108  const State autometa[NumOfStates][NumOfTokenKinds] = {
109      //     CIE     FDE    Term  Unknown
110      {Q1, Reject, Accept, Reject},  // Q0
111      {Q1, Q1, Accept, Reject},      // Q1
112  };
113
114  const Action transition[NumOfStates][NumOfTokenKinds] = {
115      /*    CIE     FDE     Term Unknown */
116      {addCIE, reject, addTerm, reject},  // Q0
117      {addCIE, addFDE, addTerm, reject},  // Q1
118  };
119
120  LDSection& section = pEhFrame.getSection();
121  if (section.size() == 0x0) {
122    NullFragment* frag = new NullFragment();
123    pEhFrame.addFragment(*frag);
124    return true;
125  }
126
127  // get file offset and address
128  uint64_t file_off = pInput.fileOffset() + section.offset();
129  llvm::StringRef sect_reg =
130      pInput.memArea()->request(file_off, section.size());
131  ConstAddress handler = (ConstAddress)sect_reg.begin();
132
133  State cur_state = Q0;
134  while (Reject != cur_state && Accept != cur_state) {
135    Token token = scan<true>(handler, file_off, sect_reg);
136    llvm::StringRef entry =
137        pInput.memArea()->request(token.file_off, token.size);
138
139    if (!transition[cur_state][token.kind](pEhFrame, entry, token)) {
140      // fail to scan
141      debug(diag::debug_cannot_scan_eh) << pInput.name();
142      return false;
143    }
144
145    file_off += token.size;
146    handler += token.size;
147
148    if (handler == sect_reg.end()) {
149      cur_state = Accept;
150    } else if (handler > sect_reg.end()) {
151      cur_state = Reject;
152    } else {
153      cur_state = autometa[cur_state][token.kind];
154    }
155  }  // end of while
156
157  if (Reject == cur_state) {
158    // fail to parse
159    debug(diag::debug_cannot_parse_eh) << pInput.name();
160    return false;
161  }
162  return true;
163}
164
165bool EhFrameReader::addCIE(EhFrame& pEhFrame,
166                           llvm::StringRef pRegion,
167                           const EhFrameReader::Token& pToken) {
168  // skip Length, Extended Length and CIE ID.
169  ConstAddress handler = pRegion.begin() + pToken.data_off;
170  ConstAddress cie_end = pRegion.end();
171  ConstAddress handler_start = handler;
172  uint64_t pr_ptr_data_offset = pToken.data_off;
173
174  // the version should be 1 or 3
175  uint8_t version = *handler++;
176  if (version != 1 && version != 3) {
177    return false;
178  }
179
180  // Set up the Augumentation String
181  ConstAddress aug_str_front = handler;
182  ConstAddress aug_str_back = static_cast<ConstAddress>(
183      memchr(aug_str_front, '\0', cie_end - aug_str_front));
184  if (aug_str_back == NULL) {
185    return false;
186  }
187
188  // skip the Augumentation String field
189  handler = aug_str_back + 1;
190
191  // skip the Code Alignment Factor
192  if (!skip_LEB128(&handler, cie_end)) {
193    return false;
194  }
195  // skip the Data Alignment Factor
196  if (!skip_LEB128(&handler, cie_end)) {
197    return false;
198  }
199  // skip the Return Address Register
200  if (version == 1) {
201    if (cie_end - handler < 1)
202      return false;
203    ++handler;
204  } else {
205    if (!skip_LEB128(&handler, cie_end))
206      return false;
207  }
208
209  llvm::StringRef augment((const char*)aug_str_front);
210
211  // we discard this CIE if the augumentation string is '\0'
212  if (augment.size() == 0) {
213    EhFrame::CIE* cie = new EhFrame::CIE(pRegion);
214    cie->setFDEEncode(llvm::dwarf::DW_EH_PE_absptr);
215    pEhFrame.addCIE(*cie);
216    pEhFrame.getCIEMap().insert(std::make_pair(pToken.file_off, cie));
217    return true;
218  }
219
220  // the Augmentation String start with 'eh' is a CIE from gcc before 3.0,
221  // in LSB Core Spec 3.0RC1. We do not support it.
222  if (augment.size() > 1 && augment[0] == 'e' && augment[1] == 'h') {
223    return false;
224  }
225
226  // parse the Augmentation String to get the FDE encodeing if 'z' existed
227  uint8_t fde_encoding = llvm::dwarf::DW_EH_PE_absptr;
228  std::string augdata;
229  std::string pr_ptr_data;
230  if (augment[0] == 'z') {
231    unsigned offset;
232    size_t augdata_size = llvm::decodeULEB128((const uint8_t*)handler, &offset);
233    handler += offset;
234    augdata = std::string((const char*)handler, augdata_size);
235
236    // parse the Augmentation String
237    for (size_t i = 1; i < augment.size(); ++i) {
238      switch (augment[i]) {
239        // LDSA encoding (1 byte)
240        case 'L': {
241          if (cie_end - handler < 1) {
242            return false;
243          }
244          ++handler;
245          break;
246        }
247        // Two arguments, the first one represents the encoding of the second
248        // argument (1 byte). The second one is the address of personality
249        // routine.
250        case 'P': {
251          // the first argument
252          if (cie_end - handler < 1) {
253            return false;
254          }
255          uint8_t per_encode = *handler;
256          ++handler;
257          // get the length of the second argument
258          uint32_t per_length = 0;
259          if ((per_encode & 0x60) == 0x60) {
260            return false;
261          }
262          switch (per_encode & 7) {
263            default:
264              return false;
265            case llvm::dwarf::DW_EH_PE_udata2:
266              per_length = 2;
267              break;
268            case llvm::dwarf::DW_EH_PE_udata4:
269              per_length = 4;
270              break;
271            case llvm::dwarf::DW_EH_PE_udata8:
272              per_length = 8;
273              break;
274            case llvm::dwarf::DW_EH_PE_absptr:
275              per_length = 4;  // pPkg.bitclass / 8;
276              break;
277          }
278          // skip the alignment
279          if (llvm::dwarf::DW_EH_PE_aligned == (per_encode & 0xf0)) {
280            uint32_t per_align = handler - cie_end;
281            per_align += per_length - 1;
282            per_align &= ~(per_length - 1);
283            if (static_cast<uint32_t>(cie_end - handler) < per_align) {
284              return false;
285            }
286            handler += per_align;
287          }
288          // skip the second argument
289          if (static_cast<uint32_t>(cie_end - handler) < per_length) {
290            return false;
291          }
292          pr_ptr_data_offset += handler - handler_start;
293          pr_ptr_data = std::string((const char*)handler, per_length);
294          handler += per_length;
295          break;
296        }  // end of case 'P'
297
298        // FDE encoding (1 byte)
299        case 'R': {
300          if (cie_end - handler < 1) {
301            return false;
302          }
303          fde_encoding = *handler;
304          switch (fde_encoding & 7) {
305            case llvm::dwarf::DW_EH_PE_udata2:
306            case llvm::dwarf::DW_EH_PE_udata4:
307            case llvm::dwarf::DW_EH_PE_udata8:
308            case llvm::dwarf::DW_EH_PE_absptr:
309              break;
310            default:
311              return false;
312          }
313          ++handler;
314          break;
315        }
316        default:
317          return false;
318      }  // end switch
319    }    // the rest chars.
320  }      // first char is 'z'
321
322  // create and push back the CIE entry
323  EhFrame::CIE* cie = new EhFrame::CIE(pRegion);
324  cie->setFDEEncode(fde_encoding);
325  cie->setPersonalityOffset(pr_ptr_data_offset);
326  cie->setPersonalityName(pr_ptr_data);
327  cie->setAugmentationData(augdata);
328  pEhFrame.addCIE(*cie);
329  pEhFrame.getCIEMap().insert(std::make_pair(pToken.file_off, cie));
330  return true;
331}
332
333bool EhFrameReader::addFDE(EhFrame& pEhFrame,
334                           llvm::StringRef pRegion,
335                           const EhFrameReader::Token& pToken) {
336  if (pToken.data_off == pRegion.size())
337    return false;
338
339  const int32_t offset =
340      *(const int32_t*)(pRegion.begin() + pToken.data_off - 4);
341  size_t cie_offset =
342      (size_t)((int64_t)(pToken.file_off + 4) - (int32_t)offset);
343
344  EhFrame::CIEMap::iterator iter = pEhFrame.getCIEMap().find(cie_offset);
345  if (iter == pEhFrame.getCIEMap().end())
346    return false;
347
348  // create and push back the FDE entry
349  EhFrame::FDE* fde = new EhFrame::FDE(pRegion, *iter->second);
350  pEhFrame.addFDE(*fde);
351  return true;
352}
353
354bool EhFrameReader::addTerm(EhFrame& pEhFrame,
355                            llvm::StringRef pRegion,
356                            const EhFrameReader::Token& pToken) {
357  return true;
358}
359
360bool EhFrameReader::reject(EhFrame& pEhFrame,
361                           llvm::StringRef pRegion,
362                           const EhFrameReader::Token& pToken) {
363  return true;
364}
365
366}  // namespace mcld
367