1/* -*- mode: C; c-basic-offset: 3; -*- */
2
3/*--------------------------------------------------------------------*/
4/*--- Reading of ARM(32) EXIDX unwind information      readexidx.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8   This file is part of Valgrind, a dynamic binary instrumentation
9   framework.
10
11   Copyright (C) 2014-2017 Mozilla Foundation
12
13   This program is free software; you can redistribute it and/or
14   modify it under the terms of the GNU General Public License as
15   published by the Free Software Foundation; either version 2 of the
16   License, or (at your option) any later version.
17
18   This program is distributed in the hope that it will be useful, but
19   WITHOUT ANY WARRANTY; without even the implied warranty of
20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21   General Public License for more details.
22
23   You should have received a copy of the GNU General Public License
24   along with this program; if not, write to the Free Software
25   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26   02111-1307, USA.
27
28   The GNU General Public License is contained in the file COPYING.
29*/
30
31/* libunwind - a platform-independent unwind library
32   Copyright 2011 Linaro Limited
33
34This file is part of libunwind.
35
36Permission is hereby granted, free of charge, to any person obtaining
37a copy of this software and associated documentation files (the
38"Software"), to deal in the Software without restriction, including
39without limitation the rights to use, copy, modify, merge, publish,
40distribute, sublicense, and/or sell copies of the Software, and to
41permit persons to whom the Software is furnished to do so, subject to
42the following conditions:
43
44The above copyright notice and this permission notice shall be
45included in all copies or substantial portions of the Software.
46
47THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
48EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
49MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
50NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
51LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
52OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
53WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.  */
54
55
56// Copyright (c) 2010 Google Inc.
57// All rights reserved.
58//
59// Redistribution and use in source and binary forms, with or without
60// modification, are permitted provided that the following conditions are
61// met:
62//
63//     * Redistributions of source code must retain the above copyright
64// notice, this list of conditions and the following disclaimer.
65//     * Redistributions in binary form must reproduce the above
66// copyright notice, this list of conditions and the following disclaimer
67// in the documentation and/or other materials provided with the
68// distribution.
69//     * Neither the name of Google Inc. nor the names of its
70// contributors may be used to endorse or promote products derived from
71// this software without specific prior written permission.
72//
73// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
74// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
75// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
76// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
77// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
78// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
79// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
80// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
81// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
82// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
83// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
84
85
86// Derived originally from libunwind, with very extensive modifications.
87/* Contributed by Julian Seward <jseward@acm.org> */
88
89
90// This file translates EXIDX unwind information into the same format
91// that Valgrind uses for CFI information.  Hence Valgrind's CFI
92// unwinding abilities also become usable for EXIDX.
93//
94// See: "Exception Handling ABI for the ARM Architecture", ARM IHI 0038A
95// http://infocenter.arm.com/help/topic/com.arm.doc.ihi0038a/IHI0038A_ehabi.pdf
96
97// EXIDX data is presented in two parts:
98//
99// * an index table.  This contains two words per routine,
100//   the first of which identifies the routine, and the second
101//   of which is a reference to the unwind bytecode.  If the
102//   bytecode is very compact -- 3 bytes or less -- it can be
103//   stored directly in the second word.
104//
105// * an area containing the unwind bytecodes.
106//
107// General flow is: ML_(read_exidx) iterates over all
108// of the index table entries (pairs).  For each entry, it:
109//
110// * calls ExtabEntryExtract to copy the bytecode out into
111// an intermediate buffer.
112
113// * uses ExtabEntryDecode to parse the intermediate
114//   buffer.  Each bytecode instruction is bundled into a
115//   arm_ex_to_module::extab_data structure, and handed to ..
116//
117// * .. TranslateCmd, which generates the pseudo-CFI
118//   records that Valgrind stores.
119
120// This file is derived from the following files in the Mozilla tree
121// toolkit/crashreporter/google-breakpad:
122//   src/common/arm_ex_to_module.cc
123//   src/common/arm_ex_reader.cc
124
125
126#if defined(VGA_arm)
127
128#include "pub_core_basics.h"
129#include "pub_core_libcbase.h"
130#include "pub_core_libcprint.h"
131#include "pub_core_libcassert.h"
132#include "pub_core_options.h"
133
134#include "priv_storage.h"
135#include "priv_readexidx.h"
136
137
138static void complain ( const HChar* str )
139{
140   if (!VG_(clo_xml) && VG_(clo_verbosity) > 1)
141      VG_(message)(Vg_UserMsg,
142                   "  Warning: whilst reading EXIDX: %s\n", str);
143}
144
145
146/*------------------------------------------------------------*/
147/*--- MemoryRange                                          ---*/
148/*------------------------------------------------------------*/
149
150typedef  struct { Addr start; SizeT len; }  MemoryRange;
151
152/* Initialise |mr| for [start .. start+len).  Zero ranges are allowed,
153   but wraparounds are not.  Returns True on success. */
154static Bool MemoryRange__init ( /*OUT*/MemoryRange* mr,
155                                const void* startV, SizeT len )
156{
157   VG_(memset)(mr, 0, sizeof(*mr));
158   /* This relies on Addr being unsigned. */
159   Addr start = (Addr)startV;
160   if (len > 0 && start + len - 1 < start) {
161      return False;
162   }
163   mr->start = start;
164   mr->len   = len;
165   return True;
166}
167
168static Bool MemoryRange__covers ( MemoryRange* mr,
169                                  const void* startV, SizeT len )
170{
171   vg_assert(len > 0);
172   if (mr->len == 0) {
173      return False;
174   }
175   Addr start = (Addr)startV;
176   return start >= mr->start && start + len - 1 <= mr->start + mr->len - 1;
177}
178
179
180/*------------------------------------------------------------*/
181/*--- (Pass 1 of 3) The EXIDX extractor                    ---*/
182/*------------------------------------------------------------*/
183
184#define ARM_EXIDX_CANT_UNWIND 0x00000001
185#define ARM_EXIDX_COMPACT     0x80000000
186#define ARM_EXTBL_OP_FINISH   0xb0
187#define ARM_EXIDX_TABLE_LIMIT (255*4)
188
189/* These are in the ARM-defined format, so their layout is important. */
190typedef
191   struct { UInt addr; UInt data; }
192   ExidxEntry;
193
194
195typedef
196   enum {
197      ExSuccess=1,      // success
198      ExInBufOverflow,  // out-of-range while reading .exidx
199      ExOutBufOverflow, // output buffer is too small
200      ExCantUnwind,     // this function is marked CANT_UNWIND
201      ExCantRepresent,  // entry valid, but we can't represent it
202      ExInvalid         // entry is invalid
203   }
204   ExExtractResult;
205
206
207/* Helper function for fishing bits out of the EXIDX representation. */
208static const void* Prel31ToAddr(const void* addr)
209{
210   UInt offset32 = *(const UInt*)addr;
211   // sign extend offset32[30:0] to 64 bits -- copy bit 30 to positions
212   // 63:31 inclusive.
213   ULong offset64 = offset32;
214   if (offset64 & (1ULL << 30))
215      offset64 |= 0xFFFFFFFF80000000ULL;
216   else
217      offset64 &= 0x000000007FFFFFFFULL;
218   return ((const UChar*)addr) + (UWord)offset64;
219}
220
221
222// Extract unwind bytecode for the function denoted by |entry| into |buf|,
223// and return the number of bytes of |buf| written, along with a code
224// indicating the outcome.
225static
226ExExtractResult ExtabEntryExtract ( MemoryRange* mr_exidx,
227                                    MemoryRange* mr_extab,
228                                    const ExidxEntry* entry,
229                                    UChar* buf, SizeT buf_size,
230                                    /*OUT*/SizeT* buf_used)
231{
232   Bool ok;
233   MemoryRange mr_out;
234   ok = MemoryRange__init(&mr_out, buf, buf_size);
235   if (!ok) return ExOutBufOverflow;
236
237   *buf_used = 0;
238
239#  define PUT_BUF_U8(_byte) \
240      do { if (!MemoryRange__covers(&mr_out, &buf[*buf_used], 1)) \
241              return ExOutBufOverflow; \
242           buf[(*buf_used)++] = (_byte); } while (0)
243
244#  define GET_EX_U32(_lval, _addr, _mr) \
245      do { if (!MemoryRange__covers((_mr), (const void*)(_addr), 4)) \
246              return ExInBufOverflow; \
247           (_lval) = *(const UInt*)(_addr); } while (0)
248
249#  define GET_EXIDX_U32(_lval, _addr) \
250      GET_EX_U32(_lval, _addr, mr_exidx)
251
252#  define GET_EXTAB_U32(_lval, _addr) \
253      GET_EX_U32(_lval, _addr, mr_extab)
254
255   UInt data;
256   GET_EXIDX_U32(data, &entry->data);
257
258   // A function can be marked CANT_UNWIND if (eg) it is known to be
259   // at the bottom of the stack.
260   if (data == ARM_EXIDX_CANT_UNWIND)
261      return ExCantUnwind;
262
263   UInt  pers;          // personality number
264   UInt  extra;         // number of extra data words required
265   UInt  extra_allowed; // number of extra data words allowed
266   const UInt* extbl_data;    // the handler entry, if not inlined
267
268   if (data & ARM_EXIDX_COMPACT) {
269      // The handler table entry has been inlined into the index table entry.
270      // In this case it can only be an ARM-defined compact model, since
271      // bit 31 is 1.  Only personalities 0, 1 and 2 are defined for the
272      // ARM compact model, but 1 and 2 are "Long format" and may require
273      // extra data words.  Hence the allowable personalities here are:
274      //   personality 0, in which case 'extra' has no meaning
275      //   personality 1, with zero extra words
276      //   personality 2, with zero extra words
277      extbl_data = NULL;
278      pers  = (data >> 24) & 0x0F;
279      extra = (data >> 16) & 0xFF;
280      extra_allowed = 0;
281   }
282   else {
283      // The index table entry is a pointer to the handler entry.  Note
284      // that Prel31ToAddr will read the given address, but we already
285      // range-checked above.
286      extbl_data = Prel31ToAddr(&entry->data);
287      GET_EXTAB_U32(data, extbl_data);
288      if (!(data & ARM_EXIDX_COMPACT)) {
289         // This denotes a "generic model" handler.  That will involve
290         // executing arbitrary machine code, which is something we
291         // can't represent here; hence reject it.
292         return ExCantRepresent;
293      }
294      // So we have a compact model representation.  Again, 3 possible
295      // personalities, but this time up to 255 allowable extra words.
296      pers  = (data >> 24) & 0x0F;
297      extra = (data >> 16) & 0xFF;
298      extra_allowed = 255;
299      extbl_data++;
300   }
301
302   // Now look at the handler table entry.  The first word is |data|
303   // and subsequent words start at |*extbl_data|.  The number of
304   // extra words to use is |extra|, provided that the personality
305   // allows extra words.  Even if it does, none may be available --
306   // extra_allowed is the maximum number of extra words allowed. */
307   if (pers == 0) {
308      // "Su16" in the documentation -- 3 unwinding insn bytes
309      // |extra| has no meaning here; instead that byte is an unwind-info byte
310      PUT_BUF_U8(data >> 16);
311      PUT_BUF_U8(data >> 8);
312      PUT_BUF_U8(data);
313   }
314   else if ((pers == 1 || pers == 2) && extra <= extra_allowed) {
315      // "Lu16" or "Lu32" respectively -- 2 unwinding insn bytes,
316      // and up to 255 extra words.
317      PUT_BUF_U8(data >> 8);
318      PUT_BUF_U8(data);
319      UInt j;
320      for (j = 0; j < extra; j++) {
321         GET_EXTAB_U32(data, extbl_data);
322         extbl_data++;
323         PUT_BUF_U8(data >> 24);
324         PUT_BUF_U8(data >> 16);
325         PUT_BUF_U8(data >> 8);
326         PUT_BUF_U8(data >> 0);
327      }
328   }
329   else {
330      // The entry is invalid.
331      return ExInvalid;
332   }
333
334   // Make sure the entry is terminated with "FINISH"
335   if (*buf_used > 0 && buf[(*buf_used) - 1] != ARM_EXTBL_OP_FINISH)
336      PUT_BUF_U8(ARM_EXTBL_OP_FINISH);
337
338   return ExSuccess;
339
340#  undef GET_EXTAB_U32
341#  undef GET_EXIDX_U32
342#  undef GET_U32
343#  undef PUT_BUF_U8
344}
345
346
347/*------------------------------------------------------------*/
348/*--- (Pass 2 of 3) The EXIDX decoder                      ---*/
349/*------------------------------------------------------------*/
350
351/* This (ExtabData) is an intermediate structure, used to carry
352   information from the decoder (pass 2) to the summariser (pass 3).
353   I don't think its layout is important. */
354typedef
355   enum {
356      ARM_EXIDX_CMD_FINISH=0x100,
357      ARM_EXIDX_CMD_SUB_FROM_VSP,
358      ARM_EXIDX_CMD_ADD_TO_VSP,
359      ARM_EXIDX_CMD_REG_POP,
360      ARM_EXIDX_CMD_REG_TO_SP,
361      ARM_EXIDX_CMD_VFP_POP,
362      ARM_EXIDX_CMD_WREG_POP,
363      ARM_EXIDX_CMD_WCGR_POP,
364      ARM_EXIDX_CMD_RESERVED,
365      ARM_EXIDX_CMD_REFUSED
366   }
367   ExtabCmd;
368
369static const HChar* showExtabCmd ( ExtabCmd cmd ) {
370   switch (cmd) {
371      case ARM_EXIDX_CMD_FINISH:       return "FINISH";
372      case ARM_EXIDX_CMD_SUB_FROM_VSP: return "SUB_FROM_VSP";
373      case ARM_EXIDX_CMD_ADD_TO_VSP:   return "ADD_TO_VSP";
374      case ARM_EXIDX_CMD_REG_POP:      return "REG_POP";
375      case ARM_EXIDX_CMD_REG_TO_SP:    return "REG_TO_SP";
376      case ARM_EXIDX_CMD_VFP_POP:      return "VFP_POP";
377      case ARM_EXIDX_CMD_WREG_POP:     return "WREG_POP";
378      case ARM_EXIDX_CMD_WCGR_POP:     return "WCGR_POP";
379      case ARM_EXIDX_CMD_RESERVED:     return "RESERVED";
380      case ARM_EXIDX_CMD_REFUSED:      return "REFUSED";
381      default:                         return "???";
382   }
383}
384
385
386typedef
387   struct { ExtabCmd cmd; UInt data; }
388   ExtabData;
389
390static void ppExtabData ( const ExtabData* etd ) {
391   VG_(printf)("ExtabData{%-12s 0x%08x}", showExtabCmd(etd->cmd), etd->data);
392}
393
394
395enum extab_cmd_flags {
396   ARM_EXIDX_VFP_SHIFT_16 = 1 << 16,
397   ARM_EXIDX_VFP_FSTMD = 1 << 17, // distinguishes FSTMxxD from FSTMxxX
398};
399
400
401/* Forwards */
402typedef  struct _SummState  SummState;
403static Int TranslateCmd(/*MOD*/SummState* state, const ExtabData* edata);
404
405
406// Take the unwind information extracted by ExtabEntryExtract
407// and parse it into frame-unwind instructions.  These are as
408// specified in "Table 4, ARM-defined frame-unwinding instructions"
409// in the specification document detailed in comments at the top
410// of this file.
411//
412// This reads from |buf[0, +data_size)|.  It checks for overruns of
413// the input buffer and returns a negative value if that happens, or
414// for any other failure cases.  It returns zero in case of success.
415// Whilst reading the input, it dumps the result in |*state|.
416static
417Int ExtabEntryDecode(/*OUT*/SummState* state, const UChar* buf, SizeT buf_size)
418{
419   if (buf == NULL || buf_size == 0)
420      return -3;
421
422   MemoryRange mr_in;
423   Bool ok = MemoryRange__init(&mr_in, buf, buf_size);
424   if (!ok)
425      return -2;
426
427#  define GET_BUF_U8(_lval) \
428      do { if (!MemoryRange__covers(&mr_in, buf, 1)) \
429              return -4; \
430           (_lval) = *(buf++); } while (0)
431
432   const UChar* end = buf + buf_size;
433
434   while (buf < end) {
435      ExtabData edata;
436      VG_(bzero_inline)(&edata, sizeof(edata));
437
438      UChar op;
439      GET_BUF_U8(op);
440      if ((op & 0xc0) == 0x00) {
441         // vsp = vsp + (xxxxxx << 2) + 4
442         edata.cmd  = ARM_EXIDX_CMD_ADD_TO_VSP;
443         edata.data = (((Int)op & 0x3f) << 2) + 4;
444      }
445      else if ((op & 0xc0) == 0x40) {
446         // vsp = vsp - (xxxxxx << 2) - 4
447         edata.cmd  = ARM_EXIDX_CMD_SUB_FROM_VSP;
448         edata.data = (((Int)op & 0x3f) << 2) + 4;
449      }
450      else if ((op & 0xf0) == 0x80) {
451         UChar op2;
452         GET_BUF_U8(op2);
453         if (op == 0x80 && op2 == 0x00) {
454            // Refuse to unwind
455            edata.cmd = ARM_EXIDX_CMD_REFUSED;
456         } else {
457            // Pop up to 12 integer registers under masks {r15-r12},{r11-r4}
458            edata.cmd  = ARM_EXIDX_CMD_REG_POP;
459            edata.data = ((op & 0xf) << 8) | op2;
460            edata.data = edata.data << 4;
461         }
462      }
463      else if ((op & 0xf0) == 0x90) {
464         if (op == 0x9d || op == 0x9f) {
465            // 9d: Reserved as prefix for ARM register to register moves
466            // 9f: Reserved as prefix for Intel Wireless MMX reg to reg moves
467            edata.cmd = ARM_EXIDX_CMD_RESERVED;
468         } else {
469            // Set vsp = r[nnnn]
470            edata.cmd  = ARM_EXIDX_CMD_REG_TO_SP;
471            edata.data = op & 0x0f;
472         }
473      }
474      else if ((op & 0xf0) == 0xa0) {
475         // Pop r4 to r[4+nnn],          or
476         // Pop r4 to r[4+nnn] and r14
477         Int nnn    = (op & 0x07);
478         edata.data = (1 << (nnn + 1)) - 1;
479         edata.data = edata.data << 4;
480         if (op & 0x08) edata.data |= 1 << 14;
481         edata.cmd = ARM_EXIDX_CMD_REG_POP;
482      }
483      else if (op == ARM_EXTBL_OP_FINISH) {
484         // Finish
485         edata.cmd = ARM_EXIDX_CMD_FINISH;
486         buf = end;
487      }
488      else if (op == 0xb1) {
489         UChar op2;
490         GET_BUF_U8(op2);
491         if (op2 == 0 || (op2 & 0xf0)) {
492            // Spare
493            edata.cmd = ARM_EXIDX_CMD_RESERVED;
494         } else {
495            // Pop integer registers under mask {r3,r2,r1,r0}
496            edata.cmd = ARM_EXIDX_CMD_REG_POP;
497            edata.data = op2 & 0x0f;
498         }
499      }
500      else if (op == 0xb2) {
501         // vsp = vsp + 0x204 + (uleb128 << 2)
502         ULong offset = 0;
503         UChar byte, shift = 0;
504         do {
505            GET_BUF_U8(byte);
506            offset |= (byte & 0x7f) << shift;
507            shift += 7;
508         } while ((byte & 0x80) && buf < end);
509         edata.data = offset * 4 + 0x204;
510         edata.cmd  = ARM_EXIDX_CMD_ADD_TO_VSP;
511      }
512      else if (op == 0xb3 || op == 0xc8 || op == 0xc9) {
513         // b3: Pop VFP regs D[ssss]    to D[ssss+cccc],    FSTMFDX-ishly
514         // c8: Pop VFP regs D[16+ssss] to D[16+ssss+cccc], FSTMFDD-ishly
515         // c9: Pop VFP regs D[ssss]    to D[ssss+cccc],    FSTMFDD-ishly
516         edata.cmd = ARM_EXIDX_CMD_VFP_POP;
517         GET_BUF_U8(edata.data);
518         if (op == 0xc8) edata.data |= ARM_EXIDX_VFP_SHIFT_16;
519         if (op != 0xb3) edata.data |= ARM_EXIDX_VFP_FSTMD;
520      }
521      else if ((op & 0xf8) == 0xb8 || (op & 0xf8) == 0xd0) {
522         // b8: Pop VFP regs D[8] to D[8+nnn], FSTMFDX-ishly
523         // d0: Pop VFP regs D[8] to D[8+nnn], FSTMFDD-ishly
524         edata.cmd  = ARM_EXIDX_CMD_VFP_POP;
525         edata.data = 0x80 | (op & 0x07);
526         if ((op & 0xf8) == 0xd0) edata.data |= ARM_EXIDX_VFP_FSTMD;
527      }
528      else if (op >= 0xc0 && op <= 0xc5) {
529         // Intel Wireless MMX pop wR[10]-wr[10+nnn], nnn != 6,7
530         edata.cmd  = ARM_EXIDX_CMD_WREG_POP;
531         edata.data = 0xa0 | (op & 0x07);
532      }
533      else if (op == 0xc6) {
534         // Intel Wireless MMX pop wR[ssss] to wR[ssss+cccc]
535         edata.cmd = ARM_EXIDX_CMD_WREG_POP;
536         GET_BUF_U8(edata.data);
537      }
538      else if (op == 0xc7) {
539         UChar op2;
540         GET_BUF_U8(op2);
541         if (op2 == 0 || (op2 & 0xf0)) {
542            // Spare
543            edata.cmd = ARM_EXIDX_CMD_RESERVED;
544         } else {
545            // Intel Wireless MMX pop wCGR registers under mask {wCGR3,2,1,0}
546            edata.cmd = ARM_EXIDX_CMD_WCGR_POP;
547            edata.data = op2 & 0x0f;
548         }
549      }
550      else {
551         // Spare
552         edata.cmd = ARM_EXIDX_CMD_RESERVED;
553      }
554
555      if (0)
556         VG_(printf)("  edata:  cmd %08x  data %08x\n",
557                     (UInt)edata.cmd, edata.data);
558
559      Int ret = TranslateCmd ( state, &edata );
560      if (ret < 0) return ret;
561   }
562   return 0;
563
564# undef GET_BUF_U8
565}
566
567
568/*------------------------------------------------------------*/
569/*--- (Pass 3 of 3) The EXIDX summariser                   ---*/
570/*------------------------------------------------------------*/
571
572/* In this translation into DiCfSI_m, we're going to have the CFA play
573   the role of the VSP.  That means that the VSP can be exactly any of
574   the CFA expressions, viz: {r7,r11,r12,r13) +/- offset.
575
576   All of this would be a lot simpler if the DiCfSI_m representation
577   was just a bit more expressive and orthogonal.  But it isn't.
578
579   The central difficulty is that, although we can track changes
580   to the offset of VSP (via vsp_off), we can't deal with assignments
581   of an entirely new expression to it, because the existing
582   rules in |cfi| will almost certainly refer to the CFA, and so
583   changing it will make them invalid.  Hence, below:
584
585   * for the case ARM_EXIDX_CMD_REG_TO_SP we simply disallow
586     assignment, and hence give up, if any rule refers to CFA
587
588   * for the case ARM_EXIDX_CMD_REG_POP, the SP (hence, VSP) is
589     updated by the pop, give up.
590
591   This is an ugly hack to work around not having a better (LUL-like)
592   expression representation.  That said, these restrictions don't
593   appear to be a big problem in practice.
594*/
595
596struct _SummState {
597   // The DiCfSI_m under construction
598   DiCfSI_m   cfi;
599   Int        vsp_off;
600   // For generating CFI register expressions, if needed.
601   DebugInfo* di;
602};
603
604
605/* Generate a trivial CfiExpr, for the ARM(32) integer register
606   numbered |gprNo|.  First ensure this DebugInfo has a cfsi_expr
607   array in which to park it.  Returns -1 if |gprNo| cannot be
608   represented, otherwise returns a value >= 0. */
609static
610Int gen_CfiExpr_CfiReg_ARM_GPR ( /*MB_MOD*/DebugInfo* di, UInt gprNo )
611{
612   CfiReg creg = Creg_INVALID;
613   switch (gprNo) {
614      case 13: creg = Creg_ARM_R13; break;
615      case 12: creg = Creg_ARM_R12; break;
616      case 15: creg = Creg_ARM_R15; break;
617      case 14: creg = Creg_ARM_R14; break;
618      case 7:  creg = Creg_ARM_R7;  break;
619      default: break;
620   }
621   if (creg == Creg_INVALID) {
622      return -1;
623   }
624   if (!di->cfsi_exprs) {
625      di->cfsi_exprs = VG_(newXA)( ML_(dinfo_zalloc), "di.gCCAG",
626                                   ML_(dinfo_free), sizeof(CfiExpr) );
627   }
628   Int res = ML_(CfiExpr_CfiReg)( di->cfsi_exprs, creg );
629   vg_assert(res >= 0);
630   return res;
631}
632
633
634/* Given a DiCfSI_m, find the _how/_off pair for the given ARM(32) GPR
635   number inside |cfsi_m|, or return NULL for both if that register
636   number is not represented. */
637static
638void maybeFindExprForRegno( /*OUT*/UChar** howPP, /*OUT*/Int** offPP,
639                            DiCfSI_m* cfsi_m, Int regNo )
640{
641   switch (regNo) {
642      case 15: *howPP = &cfsi_m->ra_how;  *offPP = &cfsi_m->ra_off;  return;
643      case 14: *howPP = &cfsi_m->r14_how; *offPP = &cfsi_m->r14_off; return;
644      case 13: *howPP = &cfsi_m->r13_how; *offPP = &cfsi_m->r13_off; return;
645      case 12: *howPP = &cfsi_m->r12_how; *offPP = &cfsi_m->r12_off; return;
646      case 11: *howPP = &cfsi_m->r11_how; *offPP = &cfsi_m->r11_off; return;
647      case 7:  *howPP = &cfsi_m->r7_how;  *offPP = &cfsi_m->r7_off;  return;
648      default: break;
649   }
650   *howPP = NULL; *offPP = NULL;
651}
652
653
654/* Set cfi.cfa_{how,off} so as to be a copy of the expression denoted
655   by (how,off), if it is possible to do so.  Returns True on
656   success. */
657static
658Bool setCFAfromCFIR( /*MOD*/DiCfSI_m* cfi, XArray*/*CfiExpr*/ cfsi_exprs,
659                     UChar how, Int off )
660{
661   switch (how) {
662      case CFIR_EXPR:
663         if (!cfsi_exprs) return False;
664         CfiExpr* e = (CfiExpr*)VG_(indexXA)(cfsi_exprs, off);
665         if (e->tag != Cex_CfiReg) return False;
666         if (e->Cex.CfiReg.reg == Creg_ARM_R7) {
667            cfi->cfa_how = CFIC_ARM_R7REL;
668            cfi->cfa_off = 0;
669            return True;
670         }
671         ML_(ppCfiExpr)(cfsi_exprs, off);
672         vg_assert(0);
673      default:
674         break;
675   }
676   VG_(printf)("setCFAfromCFIR: FAIL: how %d off %d\n", how, off);
677   vg_assert(0);
678   return False;
679}
680
681
682#define ARM_EXBUF_START(x) (((x) >> 4) & 0x0f)
683#define ARM_EXBUF_COUNT(x) ((x) & 0x0f)
684#define ARM_EXBUF_END(x)   (ARM_EXBUF_START(x) + ARM_EXBUF_COUNT(x))
685
686
687static Bool mentionsCFA ( DiCfSI_m* cfi )
688{
689#  define MENTIONS_CFA(_how) ((_how) == CFIR_CFAREL || (_how) == CFIR_MEMCFAREL)
690   if (MENTIONS_CFA(cfi->ra_how))  return True;
691   if (MENTIONS_CFA(cfi->r14_how)) return True;
692   if (MENTIONS_CFA(cfi->r13_how)) return True;
693   if (MENTIONS_CFA(cfi->r12_how)) return True;
694   if (MENTIONS_CFA(cfi->r11_how)) return True;
695   if (MENTIONS_CFA(cfi->r7_how))  return True;
696   return False;
697#  undef MENTIONS_CFA
698}
699
700
701// Translate command from extab_data to command for Module.
702static
703Int TranslateCmd(/*MOD*/SummState* state, const ExtabData* edata)
704{
705   /* Stay sane: check that the CFA has the expected form. */
706   vg_assert(state);
707   switch (state->cfi.cfa_how) {
708      case CFIC_ARM_R13REL: case CFIC_ARM_R12REL:
709      case CFIC_ARM_R11REL: case CFIC_ARM_R7REL: break;
710      default: vg_assert(0);
711   }
712
713   if (0) {
714      VG_(printf)("  TranslateCmd: ");
715      ppExtabData(edata);
716      VG_(printf)("\n");
717   }
718
719   Int ret = 0;
720   switch (edata->cmd) {
721      case ARM_EXIDX_CMD_FINISH:
722         /* Copy LR to PC if there isn't currently a rule for PC in force. */
723         if (state->cfi.ra_how == CFIR_UNKNOWN) {
724            if (state->cfi.r14_how == CFIR_UNKNOWN) {
725               state->cfi.ra_how = CFIR_EXPR;
726               state->cfi.ra_off = gen_CfiExpr_CfiReg_ARM_GPR(state->di, 14);
727               vg_assert(state->cfi.ra_off >= 0);
728            } else {
729               state->cfi.ra_how = state->cfi.r14_how;
730               state->cfi.ra_off = state->cfi.r14_off;
731            }
732         }
733         break;
734      case ARM_EXIDX_CMD_SUB_FROM_VSP:
735         state->vsp_off -= (Int)(edata->data);
736         break;
737      case ARM_EXIDX_CMD_ADD_TO_VSP:
738         state->vsp_off += (Int)(edata->data);
739         break;
740      case ARM_EXIDX_CMD_REG_POP: {
741         UInt i;
742         for (i = 0; i < 16; i++) {
743            if (edata->data & (1 << i)) {
744               // See if we're summarising for int register |i|.  If so,
745               // describe how to pull it off the stack.  The cast of |i| is
746               // a bit of a kludge but works because DW_REG_ARM_Rn has the
747               // value |n|, for 0 <= |n| <= 15 -- that is, for the ARM
748               // general-purpose registers.
749               UChar* rX_howP = NULL;
750               Int*   rX_offP = NULL;
751               maybeFindExprForRegno(&rX_howP, &rX_offP, &state->cfi, i);
752               if (rX_howP) {
753                  vg_assert(rX_offP);
754                  /* rX_howP and rX_offP point at one of the rX fields
755                     in |state->cfi|.  Hence the following assignments
756                     are really updating |state->cfi|. */
757                  *rX_howP = CFIR_MEMCFAREL;
758                  *rX_offP = state->vsp_off;
759               } else {
760                  /* We're not tracking this register, so ignore it. */
761                  vg_assert(!rX_offP);
762               }
763               state->vsp_off += 4;
764            }
765         }
766         /* Set cfa in case the SP got popped. */
767         if (edata->data & (1 << 13)) {
768            //  vsp = curr_rules_.mR13expr;
769            //state->cfi.cfa_how =
770            //state->cfi.cfa_off =
771            //state->vsp_off = 0;
772            // If this happens, it would make the existing CFA references
773            // in the summary invalid.  So give up instead.
774            goto cant_summarise;
775         }
776         break;
777         }
778      case ARM_EXIDX_CMD_REG_TO_SP: {
779         /* We're generating a new value for the CFA/VSP here.  Hence,
780            if the summary already refers to the CFA at all, we can't
781            go any further, and have to abandon summarisation. */
782         if (mentionsCFA(&state->cfi))
783            goto cant_summarise;
784         vg_assert(edata->data < 16);
785         Int reg_no = edata->data;
786         // Same comment as above, re the casting of |reg_no|, applies.
787         UChar* rX_howP = NULL;
788         Int*   rX_offP = NULL;
789         maybeFindExprForRegno(&rX_howP, &rX_offP, &state->cfi, reg_no);
790         if (rX_howP) {
791            vg_assert(rX_offP);
792            if (*rX_howP == CFIR_UNKNOWN) {
793               //curr_rules_.mR13expr = LExpr(LExpr::NODEREF, reg_no, 0);
794               Int expr_ix = gen_CfiExpr_CfiReg_ARM_GPR(state->di, reg_no);
795               if (expr_ix >= 0) {
796                  state->cfi.r13_how = CFIR_EXPR;
797                  state->cfi.r13_off = expr_ix;
798               } else {
799                  goto cant_summarise;
800               }
801            } else {
802               //curr_rules_.mR13expr = *reg_exprP;
803               state->cfi.r13_how = *rX_howP;
804               state->cfi.r13_off = *rX_offP;
805            }
806            //vsp = curr_rules_.mR13expr;
807            Bool ok = setCFAfromCFIR( &state->cfi, state->di->cfsi_exprs,
808                                      state->cfi.r13_how, state->cfi.r13_off );
809            if (!ok) goto cant_summarise;
810            state->vsp_off = 0;
811         } else {
812            vg_assert(!rX_offP);
813         }
814         break;
815      }
816      case ARM_EXIDX_CMD_VFP_POP: {
817         /* Don't recover VFP registers, but be sure to adjust the stack
818            pointer. */
819         UInt i;
820         for (i = ARM_EXBUF_START(edata->data);
821              i <= ARM_EXBUF_END(edata->data); i++) {
822            state->vsp_off += 8;
823         }
824         if (!(edata->data & ARM_EXIDX_VFP_FSTMD)) {
825            state->vsp_off += 4;
826         }
827         break;
828      }
829      case ARM_EXIDX_CMD_WREG_POP: {
830         UInt i;
831         for (i = ARM_EXBUF_START(edata->data);
832              i <= ARM_EXBUF_END(edata->data); i++) {
833            state->vsp_off += 8;
834         }
835         break;
836      }
837      case ARM_EXIDX_CMD_WCGR_POP: {
838         UInt i;
839         // Pop wCGR registers under mask {wCGR3,2,1,0}, hence "i < 4"
840         for (i = 0; i < 4; i++) {
841            if (edata->data & (1 << i)) {
842               state->vsp_off += 4;
843            }
844         }
845         break;
846      }
847      case ARM_EXIDX_CMD_REFUSED:
848      case ARM_EXIDX_CMD_RESERVED:
849         ret = -1;
850         break;
851   }
852   return ret;
853
854 cant_summarise:
855   return -10;
856}
857
858
859/* Initialise the EXIDX summariser, by writing initial values in |state|. */
860static
861void AddStackFrame ( /*OUT*/SummState* state,
862                     DebugInfo* di )
863{
864   VG_(bzero_inline)(state, sizeof(*state));
865   state->vsp_off = 0;
866   state->di      = di;
867   /* Initialise the DiCfSI_m that we are building. */
868   state->cfi.cfa_how = CFIC_ARM_R13REL;
869   state->cfi.cfa_off = 0;
870   state->cfi.ra_how  = CFIR_UNKNOWN;
871   state->cfi.r14_how = CFIR_UNKNOWN;
872   state->cfi.r13_how = CFIR_UNKNOWN;
873   state->cfi.r12_how = CFIR_UNKNOWN;
874   state->cfi.r11_how = CFIR_UNKNOWN;
875   state->cfi.r7_how  = CFIR_UNKNOWN;
876}
877
878static
879void SubmitStackFrame( /*MOD*/DebugInfo* di,
880                       SummState* state, Addr avma, SizeT len )
881{
882   // JRS: I'm really not sure what this means, or if it is necessary
883   // return address always winds up in pc
884   //stack_frame_entry_->initial_rules[ustr__ZDra()] // ".ra"
885   //  = stack_frame_entry_->initial_rules[ustr__pc()];
886   // maybe don't need to do anything here?
887
888   // the final value of vsp is the new value of sp.
889   switch (state->cfi.cfa_how) {
890      case CFIC_ARM_R13REL: case CFIC_ARM_R12REL:
891      case CFIC_ARM_R11REL: case CFIC_ARM_R7REL: break;
892      default: vg_assert(0);
893   }
894   state->cfi.r13_how = CFIR_CFAREL;
895   state->cfi.r13_off = state->vsp_off;
896
897   // Finally, add the completed RuleSet to the SecMap
898   if (len > 0) {
899
900      // Futz with the rules for r4 .. r11 in the same way as happens
901      // with the CFI summariser:
902      /* Mark callee-saved registers (r4 .. r11) as unchanged, if there is
903       no other information about them.  FIXME: do this just once, at
904       the point where the ruleset is committed. */
905      if (state->cfi.r7_how == CFIR_UNKNOWN) {
906         state->cfi.r7_how = CFIR_SAME;
907         state->cfi.r7_off = 0;
908      }
909      if (state->cfi.r11_how == CFIR_UNKNOWN) {
910         state->cfi.r11_how = CFIR_SAME;
911         state->cfi.r11_off = 0;
912      }
913      if (state->cfi.r12_how == CFIR_UNKNOWN) {
914         state->cfi.r12_how = CFIR_SAME;
915         state->cfi.r12_off = 0;
916      }
917      if (state->cfi.r14_how == CFIR_UNKNOWN) {
918         state->cfi.r14_how = CFIR_SAME;
919         state->cfi.r14_off = 0;
920      }
921
922      // And add them
923      ML_(addDiCfSI)(di, avma, len, &state->cfi);
924      if (di->trace_cfi)
925         ML_(ppDiCfSI)(di->cfsi_exprs, avma, len, &state->cfi);
926   }
927}
928
929
930/*------------------------------------------------------------*/
931/*--- Top level                                            ---*/
932/*------------------------------------------------------------*/
933
934void ML_(read_exidx) ( /*MOD*/DebugInfo* di,
935                       UChar*   exidx_img, SizeT exidx_size,
936                       UChar*   extab_img, SizeT extab_size,
937                       Addr     text_last_svma,
938                       PtrdiffT text_bias )
939{
940   if (di->trace_cfi)
941      VG_(printf)("BEGIN ML_(read_exidx) exidx_img=[%p, +%lu) "
942                  "extab_img=[%p, +%lu) text_last_svma=%lx text_bias=%lx\n",
943                  exidx_img, exidx_size, extab_img, extab_size,
944                  text_last_svma, (UWord)text_bias);
945   Bool ok;
946   MemoryRange mr_exidx, mr_extab;
947   ok =       MemoryRange__init(&mr_exidx, exidx_img, exidx_size);
948   ok = ok && MemoryRange__init(&mr_extab, extab_img, extab_size);
949   if (!ok) {
950      complain(".exidx or .extab image area wraparound");
951      return;
952   }
953
954   const ExidxEntry* start_img = (const ExidxEntry*)exidx_img;
955   const ExidxEntry* end_img   = (const ExidxEntry*)(exidx_img + exidx_size);
956
957   if (VG_(clo_verbosity) > 1)
958      VG_(message)(Vg_DebugMsg, "  Reading EXIDX entries: %lu available\n",
959                   exidx_size / sizeof(ExidxEntry) );
960
961   // Iterate over each of the EXIDX entries (pairs of 32-bit words).
962   // These occupy the entire .exidx section.
963   UWord n_attempted = 0, n_successful = 0;
964
965   const ExidxEntry* entry_img;
966   for (entry_img = start_img; entry_img < end_img; ++entry_img) {
967
968      n_attempted++;
969      // Figure out the code address range that this table entry_img is
970      // associated with.
971      Addr avma = (Addr)Prel31ToAddr(&entry_img->addr);
972      if (di->trace_cfi)
973         VG_(printf)("XXX1 entry: entry->addr 0x%x, avma 0x%lx\n",
974                     entry_img->addr, avma);
975
976      Addr next_avma;
977      if (entry_img < end_img - 1) {
978         next_avma = (Addr)Prel31ToAddr(&(entry_img+1)->addr);
979      } else {
980         // This is the last EXIDX entry in the sequence, so we don't
981         // have an address for the start of the next function, to limit
982         // this one.  Instead use the address of the last byte of the
983         // text section associated with this .exidx section, that we
984         // have been given.  So as to avoid junking up the CFI unwind
985         // tables with absurdly large address ranges in the case where
986         // text_last_svma_ is wrong, only use the value if it is nonzero
987         // and within one page of |svma|.  Otherwise assume a length of 1.
988         //
989         // In some cases, gcc has been observed to finish the exidx
990         // section with an entry of length 1 marked CANT_UNWIND,
991         // presumably exactly for the purpose of giving a definite
992         // length for the last real entry, without having to look at
993         // text segment boundaries.
994         Addr text_last_avma = text_last_svma + text_bias;
995
996         Bool plausible;
997         Addr maybe_next_avma = text_last_avma + 1;
998         if (maybe_next_avma > avma && maybe_next_avma - avma <= 4096) {
999            next_avma = maybe_next_avma;
1000            plausible = True;
1001         } else {
1002            next_avma = avma + 1;
1003            plausible = False;
1004         }
1005
1006         if (!plausible && avma != text_last_avma + 1) {
1007            HChar buf[100];
1008            VG_(snprintf)(buf, sizeof(buf),
1009                          "Implausible EXIDX last entry size %lu"
1010                          "; using 1 instead.", text_last_avma - avma);
1011            buf[sizeof(buf)-1] = 0;
1012            complain(buf);
1013         }
1014      }
1015
1016      // Extract the unwind info into |buf|.  This might fail for
1017      // various reasons.  It involves reading both the .exidx and
1018      // .extab sections.  All accesses to those sections are
1019      // bounds-checked.
1020      if (di->trace_cfi)
1021         VG_(printf)("XXX1 entry is for AVMA 0x%lx 0x%lx\n",
1022                     avma, next_avma-1);
1023      UChar buf[ARM_EXIDX_TABLE_LIMIT];
1024      SizeT buf_used = 0;
1025      ExExtractResult res
1026         = ExtabEntryExtract(&mr_exidx, &mr_extab,
1027                             entry_img, buf, sizeof(buf), &buf_used);
1028      if (res != ExSuccess) {
1029         // Couldn't extract the unwind info, for some reason.  Move on.
1030         switch (res) {
1031            case ExInBufOverflow:
1032               complain("ExtabEntryExtract: .exidx/.extab section overrun");
1033               break;
1034            case ExOutBufOverflow:
1035               complain("ExtabEntryExtract: bytecode buffer overflow");
1036               break;
1037            case ExCantUnwind:
1038               // Some functions are marked CantUnwind by the compiler.
1039               // Don't record these as attempted, since that's just
1040               // confusing, and failure to summarise them is not the fault
1041               // of this code.
1042               n_attempted--;
1043               if (0)
1044                  complain("ExtabEntryExtract: function is marked CANT_UNWIND");
1045               break;
1046            case ExCantRepresent:
1047               complain("ExtabEntryExtract: bytecode can't be represented");
1048               break;
1049            case ExInvalid:
1050               complain("ExtabEntryExtract: index table entry is invalid");
1051               break;
1052            default: {
1053               HChar mbuf[100];
1054               VG_(snprintf)(mbuf, sizeof(mbuf),
1055                             "ExtabEntryExtract: unknown error: %d", (Int)res);
1056               buf[sizeof(mbuf)-1] = 0;
1057               complain(mbuf);
1058               break;
1059            }
1060         }
1061         continue;
1062      }
1063
1064      // Finally, work through the unwind instructions in |buf| and
1065      // create CFI entries that Valgrind can use.  This can also fail.
1066      // First, initialise the summariser's running state, into which
1067      // ExtabEntryDecode will write the CFI entries.
1068
1069      SummState state;
1070      AddStackFrame( &state, di );
1071      Int ret = ExtabEntryDecode( &state, buf, buf_used );
1072      if (ret < 0) {
1073         /* Failed summarisation.  Ignore and move on. */
1074         HChar mbuf[100];
1075         VG_(snprintf)(mbuf, sizeof(mbuf),
1076                       "ExtabEntryDecode: failed with error code: %d", ret);
1077         mbuf[sizeof(mbuf)-1] = 0;
1078         complain(mbuf);
1079      } else {
1080         /* Successful summarisation.  Add it to the collection. */
1081         SubmitStackFrame( di, &state, avma, next_avma - avma );
1082         n_successful++;
1083      }
1084
1085   } /* iterating over .exidx */
1086
1087   if (VG_(clo_verbosity) > 1)
1088      VG_(message)(Vg_DebugMsg,
1089                   "  Reading EXIDX entries: %lu attempted, %lu successful\n",
1090                   n_attempted, n_successful);
1091}
1092
1093#endif /* defined(VGA_arm) */
1094
1095/*--------------------------------------------------------------------*/
1096/*--- end                                              readexidx.c ---*/
1097/*--------------------------------------------------------------------*/
1098