1//===- EhFrameReader.cpp --------------------------------------------------===//
2//
3//                     The MCLinker Project
4//
5// This file is distributed under the University of Illinois Open Source
6// License. See LICENSE.TXT for details.
7//
8//===----------------------------------------------------------------------===//
9#include <mcld/LD/EhFrameReader.h>
10
11#include <mcld/Fragment/NullFragment.h>
12#include <mcld/MC/Input.h>
13#include <mcld/LD/LDSection.h>
14#include <mcld/Support/MsgHandling.h>
15#include <mcld/Support/MemoryArea.h>
16
17#include <llvm/ADT/StringRef.h>
18#include <llvm/Support/Dwarf.h>
19#include <llvm/Support/LEB128.h>
20
21using namespace mcld;
22using namespace llvm::dwarf;
23
24//===----------------------------------------------------------------------===//
25// Helper Functions
26//===----------------------------------------------------------------------===//
27/// skip_LEB128 - skip the first LEB128 encoded value from *pp, update *pp
28/// to the next character.
29/// @return - false if we ran off the end of the string.
30/// @ref - GNU gold 1.11, ehframe.h, Eh_frame::skip_leb128.
31static bool
32skip_LEB128(EhFrameReader::ConstAddress* pp, EhFrameReader::ConstAddress pend)
33{
34  for (EhFrameReader::ConstAddress p = *pp; p < pend; ++p) {
35    if (0x0 == (*p & 0x80)) {
36      *pp = p + 1;
37      return true;
38    }
39  }
40  return false;
41}
42
43//===----------------------------------------------------------------------===//
44// EhFrameReader
45//===----------------------------------------------------------------------===//
46template<> EhFrameReader::Token
47EhFrameReader::scan<true>(ConstAddress pHandler,
48                          uint64_t pOffset,
49                          llvm::StringRef pData) const
50{
51  Token result;
52  result.file_off = pOffset;
53
54  const uint32_t* data = (const uint32_t*)pHandler;
55  size_t cur_idx = 0;
56
57  // Length Field
58  uint32_t length = data[cur_idx++];
59  if (0x0 == length) {
60    // terminator
61    result.kind = Terminator;
62    result.data_off = 4;
63    result.size = 4;
64    return result;
65  }
66
67  // Extended Field
68  uint64_t extended = 0x0;
69  if (0xFFFFFFFF == length) {
70    extended = data[cur_idx++];
71    extended <<= 32;
72    extended |= data[cur_idx++];
73    result.size = extended + 12;
74    result.data_off = 16;
75    // 64-bit obj file still uses 32-bit eh_frame.
76    assert (false && "We don't support 64-bit eh_frame.");
77  }
78  else {
79    result.size = length + 4;
80    result.data_off = 8;
81  }
82
83  // ID Field
84  uint32_t ID = data[cur_idx++];
85  if (0x0 == ID)
86    result.kind = CIE;
87  else
88    result.kind = FDE;
89
90  return result;
91}
92
93template<>
94bool EhFrameReader::read<32, true>(Input& pInput, EhFrame& pEhFrame)
95{
96  // Alphabet:
97  //   {CIE, FDE, CIEt}
98  //
99  // Regular Expression:
100  //   (CIE FDE*)+ CIEt
101  //
102  // Autometa:
103  //   S = {Q0, Q1, Q2}, Start = Q0, Accept = Q2
104  //
105  //              FDE
106  //             +---+
107  //        CIE   \ /   CIEt
108  //   Q0 -------> Q1 -------> Q2
109  //    |         / \           ^
110  //    |        +---+          |
111  //    |         CIE           |
112  //    +-----------------------+
113  //              CIEt
114  const State autometa[NumOfStates][NumOfTokenKinds] = {
115  //     CIE     FDE    Term  Unknown
116    {     Q1, Reject, Accept, Reject }, // Q0
117    {     Q1,     Q1, Accept, Reject }, // Q1
118  };
119
120  const Action transition[NumOfStates][NumOfTokenKinds] = {
121   /*    CIE     FDE     Term Unknown */
122    { addCIE, reject, addTerm, reject}, // Q0
123    { addCIE, addFDE, addTerm, reject}, // Q1
124  };
125
126  LDSection& section = pEhFrame.getSection();
127  if (section.size() == 0x0) {
128    NullFragment* frag = new NullFragment();
129    pEhFrame.addFragment(*frag);
130    return true;
131  }
132
133  // get file offset and address
134  uint64_t file_off = pInput.fileOffset() + section.offset();
135  llvm::StringRef sect_reg =
136      pInput.memArea()->request(file_off, section.size());
137  ConstAddress handler = (ConstAddress)sect_reg.begin();
138
139  State cur_state = Q0;
140  while (Reject != cur_state && Accept != cur_state) {
141
142    Token token = scan<true>(handler, file_off, sect_reg);
143    llvm::StringRef entry = pInput.memArea()->request(token.file_off, token.size);
144
145    if (!transition[cur_state][token.kind](pEhFrame, entry, token)) {
146      // fail to scan
147      debug(diag::debug_cannot_scan_eh) << pInput.name();
148      return false;
149    }
150
151    file_off += token.size;
152    handler += token.size;
153
154    if (handler == sect_reg.end())
155      cur_state = Accept;
156    else if (handler > sect_reg.end()) {
157      cur_state = Reject;
158    }
159    else
160      cur_state = autometa[cur_state][token.kind];
161  } // end of while
162
163  if (Reject == cur_state) {
164    // fail to parse
165    debug(diag::debug_cannot_parse_eh) << pInput.name();
166    return false;
167  }
168  return true;
169}
170
171bool EhFrameReader::addCIE(EhFrame& pEhFrame,
172                           llvm::StringRef pRegion,
173                           const EhFrameReader::Token& pToken)
174{
175  // skip Length, Extended Length and CIE ID.
176  ConstAddress handler = pRegion.begin() + pToken.data_off;
177  ConstAddress cie_end = pRegion.end();
178  ConstAddress handler_start = handler;
179  uint64_t pr_ptr_data_offset = pToken.data_off;
180
181  // the version should be 1 or 3
182  uint8_t version = *handler++;
183  if (1 != version && 3 != version) {
184    return false;
185  }
186
187  // Set up the Augumentation String
188  ConstAddress aug_str_front = handler;
189  ConstAddress aug_str_back  = static_cast<ConstAddress>(
190                         memchr(aug_str_front, '\0', cie_end - aug_str_front));
191  if (NULL == aug_str_back) {
192    return false;
193  }
194
195  // skip the Augumentation String field
196  handler = aug_str_back + 1;
197
198  // skip the Code Alignment Factor
199  if (!skip_LEB128(&handler, cie_end)) {
200    return false;
201  }
202  // skip the Data Alignment Factor
203  if (!skip_LEB128(&handler, cie_end)) {
204    return false;
205  }
206  // skip the Return Address Register
207  if (cie_end - handler < 1) {
208    return false;
209  }
210  ++handler;
211
212  llvm::StringRef augment((const char*)aug_str_front);
213
214  // we discard this CIE if the augumentation string is '\0'
215  if (0 == augment.size()) {
216    EhFrame::CIE* cie = new EhFrame::CIE(pRegion);
217    cie->setFDEEncode(llvm::dwarf::DW_EH_PE_absptr);
218    pEhFrame.addCIE(*cie);
219    pEhFrame.getCIEMap().insert(std::make_pair(pToken.file_off, cie));
220    return true;
221  }
222
223  // the Augmentation String start with 'eh' is a CIE from gcc before 3.0,
224  // in LSB Core Spec 3.0RC1. We do not support it.
225  if (augment.size() > 1 && augment[0] == 'e' && augment[1] == 'h') {
226    return false;
227  }
228
229  // parse the Augmentation String to get the FDE encodeing if 'z' existed
230  uint8_t fde_encoding = llvm::dwarf::DW_EH_PE_absptr;
231  std::string augdata;
232  std::string pr_ptr_data;
233  if ('z' == augment[0]) {
234    unsigned offset;
235    size_t augdata_size = llvm::decodeULEB128((const uint8_t*)handler, &offset);
236    handler += offset;
237    augdata = std::string((const char*)handler, augdata_size);
238
239    // parse the Augmentation String
240    for (size_t i = 1; i < augment.size(); ++i) {
241      switch (augment[i]) {
242        // LDSA encoding (1 byte)
243        case 'L': {
244          if (cie_end - handler < 1) {
245            return false;
246          }
247          ++handler;
248          break;
249        }
250        // Two arguments, the first one represents the encoding of the second
251        // argument (1 byte). The second one is the address of personality
252        // routine.
253        case 'P': {
254          // the first argument
255          if (cie_end - handler < 1) {
256            return false;
257          }
258          uint8_t per_encode = *handler;
259          ++handler;
260          // get the length of the second argument
261          uint32_t per_length = 0;
262          if (0x60 == (per_encode & 0x60)) {
263            return false;
264          }
265          switch (per_encode & 7) {
266            default:
267              return false;
268            case llvm::dwarf::DW_EH_PE_udata2:
269              per_length = 2;
270              break;
271            case llvm::dwarf::DW_EH_PE_udata4:
272              per_length = 4;
273              break;
274            case llvm::dwarf::DW_EH_PE_udata8:
275              per_length = 8;
276              break;
277            case llvm::dwarf::DW_EH_PE_absptr:
278              per_length = 4; // pPkg.bitclass / 8;
279              break;
280          }
281          // skip the alignment
282          if (llvm::dwarf::DW_EH_PE_aligned == (per_encode & 0xf0)) {
283            uint32_t per_align = handler - cie_end;
284            per_align += per_length - 1;
285            per_align &= ~(per_length -1);
286            if (static_cast<uint32_t>(cie_end - handler) < per_align) {
287              return false;
288            }
289            handler += per_align;
290          }
291          // skip the second argument
292          if (static_cast<uint32_t>(cie_end - handler) < per_length) {
293            return false;
294          }
295          pr_ptr_data_offset += handler - handler_start;
296          pr_ptr_data = std::string((const char*)handler, per_length);
297          handler += per_length;
298          break;
299        } // end of case 'P'
300
301        // FDE encoding (1 byte)
302        case 'R': {
303          if (cie_end - handler < 1) {
304            return false;
305          }
306          fde_encoding = *handler;
307          switch (fde_encoding & 7) {
308            case llvm::dwarf::DW_EH_PE_udata2:
309            case llvm::dwarf::DW_EH_PE_udata4:
310            case llvm::dwarf::DW_EH_PE_udata8:
311            case llvm::dwarf::DW_EH_PE_absptr:
312              break;
313            default:
314              return false;
315          }
316          ++handler;
317          break;
318        }
319        default:
320          return false;
321      } // end switch
322    } // the rest chars.
323  } // first char is 'z'
324
325  // create and push back the CIE entry
326  EhFrame::CIE* cie = new EhFrame::CIE(pRegion);
327  cie->setFDEEncode(fde_encoding);
328  cie->setPersonalityOffset(pr_ptr_data_offset);
329  cie->setPersonalityName(pr_ptr_data);
330  cie->setAugmentationData(augdata);
331  pEhFrame.addCIE(*cie);
332  pEhFrame.getCIEMap().insert(std::make_pair(pToken.file_off, cie));
333  return true;
334}
335
336bool EhFrameReader::addFDE(EhFrame& pEhFrame,
337                           llvm::StringRef pRegion,
338                           const EhFrameReader::Token& pToken)
339{
340  if (pToken.data_off == pRegion.size())
341    return false;
342
343  const int32_t offset = *(const int32_t*) (pRegion.begin() + pToken.data_off
344                                            - 4);
345  size_t cie_offset = (size_t) ((int64_t) (pToken.file_off + 4) -
346                                (int32_t) offset);
347
348  EhFrame::CIEMap::iterator iter = pEhFrame.getCIEMap().find(cie_offset);
349  if (iter == pEhFrame.getCIEMap().end())
350    return false;
351
352  // create and push back the FDE entry
353  EhFrame::FDE* fde = new EhFrame::FDE(pRegion, *iter->second);
354  pEhFrame.addFDE(*fde);
355  return true;
356}
357
358bool EhFrameReader::addTerm(EhFrame& pEhFrame,
359                            llvm::StringRef pRegion,
360                            const EhFrameReader::Token& pToken)
361{
362  return true;
363}
364
365bool EhFrameReader::reject(EhFrame& pEhFrame,
366                           llvm::StringRef pRegion,
367                           const EhFrameReader::Token& pToken)
368{
369  return true;
370}
371