m_stacktrace.c revision 30e866ec72bb970337843a36c2e29bc26b213c63
1
2/*--------------------------------------------------------------------*/
3/*--- Take snapshots of client stacks.              m_stacktrace.c ---*/
4/*--------------------------------------------------------------------*/
5
6/*
7   This file is part of Valgrind, a dynamic binary instrumentation
8   framework.
9
10   Copyright (C) 2000-2013 Julian Seward
11      jseward@acm.org
12
13   This program is free software; you can redistribute it and/or
14   modify it under the terms of the GNU General Public License as
15   published by the Free Software Foundation; either version 2 of the
16   License, or (at your option) any later version.
17
18   This program is distributed in the hope that it will be useful, but
19   WITHOUT ANY WARRANTY; without even the implied warranty of
20   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
21   General Public License for more details.
22
23   You should have received a copy of the GNU General Public License
24   along with this program; if not, write to the Free Software
25   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
26   02111-1307, USA.
27
28   The GNU General Public License is contained in the file COPYING.
29*/
30
31#include "pub_core_basics.h"
32#include "pub_core_vki.h"
33#include "pub_core_libcsetjmp.h"    // to keep _threadstate.h happy
34#include "pub_core_threadstate.h"
35#include "pub_core_debuginfo.h"     // XXX: circular dependency
36#include "pub_core_aspacemgr.h"     // For VG_(is_addressable)()
37#include "pub_core_libcbase.h"
38#include "pub_core_libcassert.h"
39#include "pub_core_libcprint.h"
40#include "pub_core_machine.h"
41#include "pub_core_options.h"
42#include "pub_core_stacks.h"        // VG_(stack_limits)
43#include "pub_core_stacktrace.h"
44#include "pub_core_xarray.h"
45#include "pub_core_clientstate.h"   // VG_(client__dl_sysinfo_int80)
46#include "pub_core_trampoline.h"
47
48
49/*------------------------------------------------------------*/
50/*---                                                      ---*/
51/*--- BEGIN platform-dependent unwinder worker functions   ---*/
52/*---                                                      ---*/
53/*------------------------------------------------------------*/
54
55/* Take a snapshot of the client's stack, putting up to 'max_n_ips'
56   IPs into 'ips'.  In order to be thread-safe, we pass in the
57   thread's IP SP, FP if that's meaningful, and LR if that's
58   meaningful.  Returns number of IPs put in 'ips'.
59
60   If you know what the thread ID for this stack is, send that as the
61   first parameter, else send zero.  This helps generate better stack
62   traces on ppc64-linux and has no effect on other platforms.
63*/
64
65/* Do frame merging in the _i frames in _ips array of recursive cycles
66   of up to _nframes.  The merge is done during stack unwinding
67   (i.e. in platform specific unwinders) to collect as many
68   "interesting" stack traces as possible. */
69#define RECURSIVE_MERGE(_nframes,_ips,_i){                      \
70   Int dist;                                                    \
71   for (dist = 1; dist <= _nframes && dist < (Int)_i; dist++) { \
72      if (_ips[_i-1] == _ips[_i-1-dist]) {                      \
73         _i = _i - dist;                                        \
74         break;                                                 \
75      }                                                         \
76   }                                                            \
77}
78
79
80/* ------------------------ x86 ------------------------- */
81
82#if defined(VGP_x86_linux) || defined(VGP_x86_darwin)
83
84#define N_FP_CF_VERIF 1021
85// prime number so that size of fp_CF_verif is just below 4K or 8K
86// Note that this prime nr differs from the one chosen in
87// m_debuginfo/debuginfo.c for the cfsi cache : in case we have
88// a collision here between two IPs, we expect to not (often) have the
89// same collision in the cfsi cache (and vice-versa).
90
91// unwinding with fp chain is ok:
92#define FPUNWIND 0
93// there is no CFI info for this IP:
94#define NOINFO   1
95// Unwind with FP is not ok, must use CF unwind:
96#define CFUNWIND 2
97
98static Addr fp_CF_verif_cache [N_FP_CF_VERIF];
99
100/* An unwind done by following the fp chain technique can be incorrect
101   as not all frames are respecting the standard bp/sp ABI.
102   The CF information is now generated by default by gcc
103   (as part of the dwarf info). However, unwinding using CF information
104   is significantly slower : a slowdown of 20% has been observed
105   on an helgrind test case.
106   So, by default, the unwinding will be done using the fp chain.
107   But before accepting to unwind an IP with fp_chain, the result
108   of the unwind will be checked with the CF information.
109   This check can give 3 results:
110     FPUNWIND (0): there is CF info, and it gives the same result as fp unwind.
111       => it is assumed that future unwind for this IP can be done
112          with the fast fp chain, without further CF checking
113     NOINFO   (1): there is no CF info (so, fp unwind is the only do-able thing)
114     CFUNWIND (2): there is CF info, but unwind result differs.
115       => it is assumed that future unwind for this IP must be done
116       with the CF info.
117   Of course, if each fp unwind implies a check done with a CF unwind,
118   it would just be slower => we cache the check result in an
119   array of checked Addr.
120   The check for an IP will be stored at
121    fp_CF_verif_cache[IP % N_FP_CF_VERIF] as one of:
122                     IP ^ FPUNWIND
123                     IP ^ NOINFO
124                     IP ^ CFUNWIND
125
126   Note: we can re-use the last (ROUNDDOWN (log (N_FP_CF_VERIF))) bits
127   to store the check result, as they are guaranteed to be non significant
128   in the comparison between 2 IPs stored in fp_CF_verif_cache).
129   In other words, if two IPs are only differing on the last 2 bits,
130   then they will not land in the same cache bucket.
131*/
132
133static UInt fp_CF_verif_generation = 0;
134// Our cache has to be maintained in sync with the CFI cache.
135// Each time the CFI cache is changed, its generation will be incremented.
136// We will clear our cache when our saved generation differs from
137// the CFI cache generation.
138
139UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
140                               /*OUT*/Addr* ips, UInt max_n_ips,
141                               /*OUT*/Addr* sps, /*OUT*/Addr* fps,
142                               const UnwindStartRegs* startRegs,
143                               Addr fp_max_orig )
144{
145   const Bool do_stats = False; // compute and output some stats regularly.
146   static struct {
147      UInt nr; // nr of stacktraces computed
148      UInt nf; // nr of frames computed
149      UInt Ca; // unwind for which cache indicates CFUnwind must be used.
150      UInt FF; // unwind for which cache indicates FPUnwind can be used.
151      UInt Cf; // unwind at end of stack+store CFUNWIND (xip not end of stack).
152      UInt Fw; // unwind at end of stack+store FPUNWIND
153      UInt FO; // unwind + store FPUNWIND
154      UInt CF; // unwind + store CFUNWIND. Details below.
155      UInt xi; UInt xs; UInt xb; // register(s) which caused a 'store CFUNWIND'.
156      UInt Ck; // unwind fp invalid+store FPUNWIND
157      UInt MS; // microsoft unwind
158   } stats;
159
160   const Bool   debug = False;
161   //                 = VG_(debugLog_getLevel) () > 3;
162   //                 = True;
163   //                 = stats.nr >= 123456;
164   const HChar* unwind_case; // used when debug is True.
165   // Debugging this function is not straightforward.
166   // Here is the easiest way I have found:
167   // 1. Change the above to True.
168   // 2. Start your program under Valgrind with --tool=none --vgdb-error=0
169   // 3. Use GDB/vgdb to put a breakpoint where you want to debug the stacktrace
170   // 4. Continue till breakpoint is encountered
171   // 5. From GDB, use 'monitor v.info scheduler' and examine the unwind traces.
172   //    You might have to do twice 'monitor v.info scheduler' to see
173   //    the effect of caching the results of the verification.
174   //    You can also modify the debug dynamically using by using
175   //    'monitor v.set debuglog 4.
176
177   Int   i;
178   Addr  fp_max;
179   UInt  n_found = 0;
180   const Int cmrf = VG_(clo_merge_recursive_frames);
181
182   vg_assert(sizeof(Addr) == sizeof(UWord));
183   vg_assert(sizeof(Addr) == sizeof(void*));
184
185   D3UnwindRegs fpverif_uregs; // result of CF unwind for a check reason.
186   Addr xip_verified = 0; // xip for which we have calculated fpverif_uregs
187   // 0 assigned to silence false positive -Wuninitialized warning
188   // This is a false positive as xip_verified is assigned when
189   // xip_verif > CFUNWIND and only used if xip_verif > CFUNWIND.
190
191   D3UnwindRegs uregs;
192   uregs.xip = (Addr)startRegs->r_pc;
193   uregs.xsp = (Addr)startRegs->r_sp;
194   uregs.xbp = startRegs->misc.X86.r_ebp;
195   Addr fp_min = uregs.xsp;
196
197   /* Snaffle IPs from the client's stack into ips[0 .. max_n_ips-1],
198      stopping when the trail goes cold, which we guess to be
199      when FP is not a reasonable stack location. */
200
201   // JRS 2002-sep-17: hack, to round up fp_max to the end of the
202   // current page, at least.  Dunno if it helps.
203   // NJN 2002-sep-17: seems to -- stack traces look like 1.0.X again
204   fp_max = VG_PGROUNDUP(fp_max_orig);
205   if (fp_max >= sizeof(Addr))
206      fp_max -= sizeof(Addr);
207
208   if (debug)
209      VG_(printf)("max_n_ips=%d fp_min=0x%08lx fp_max_orig=0x08%lx, "
210                  "fp_max=0x%08lx ip=0x%08lx fp=0x%08lx\n",
211                  max_n_ips, fp_min, fp_max_orig, fp_max,
212                  uregs.xip, uregs.xbp);
213
214   /* Assertion broken before main() is reached in pthreaded programs;  the
215    * offending stack traces only have one item.  --njn, 2002-aug-16 */
216   /* vg_assert(fp_min <= fp_max);*/
217   // On Darwin, this kicks in for pthread-related stack traces, so they're
218   // only 1 entry long which is wrong.
219#  if !defined(VGO_darwin)
220   if (fp_min + 512 >= fp_max) {
221      /* If the stack limits look bogus, don't poke around ... but
222         don't bomb out either. */
223      if (sps) sps[0] = uregs.xsp;
224      if (fps) fps[0] = uregs.xbp;
225      ips[0] = uregs.xip;
226      return 1;
227   }
228#  endif
229
230   if (UNLIKELY (fp_CF_verif_generation != VG_(CF_info_generation)())) {
231      fp_CF_verif_generation = VG_(CF_info_generation)();
232      VG_(memset)(&fp_CF_verif_cache, 0, sizeof(fp_CF_verif_cache));
233   }
234
235
236   /* Loop unwinding the stack. Note that the IP value we get on
237    * each pass (whether from CFI info or a stack frame) is a
238    * return address so is actually after the calling instruction
239    * in the calling function.
240    *
241    * Because of this we subtract one from the IP after each pass
242    * of the loop so that we find the right CFI block on the next
243    * pass - otherwise we can find the wrong CFI info if it happens
244    * to change after the calling instruction and that will mean
245    * that we will fail to unwind the next step.
246    *
247    * This most frequently happens at the end of a function when
248    * a tail call occurs and we wind up using the CFI info for the
249    * next function which is completely wrong.
250    */
251   if (sps) sps[0] = uregs.xsp;
252   if (fps) fps[0] = uregs.xbp;
253   ips[0] = uregs.xip;
254   i = 1;
255   if (do_stats) stats.nr++;
256
257   while (True) {
258
259      if (i >= max_n_ips)
260         break;
261
262      UWord hash = uregs.xip % N_FP_CF_VERIF;
263      Addr xip_verif = uregs.xip ^ fp_CF_verif_cache [hash];
264      if (debug)
265         VG_(printf)("     uregs.xip 0x%08lx xip_verif[0x%08lx]"
266                     " xbp 0x%08lx xsp 0x%08lx\n",
267                     uregs.xip, xip_verif,
268                     uregs.xbp, uregs.xsp);
269      // If xip is in cache, then xip_verif will be <= CFUNWIND.
270      // Otherwise, if not in cache, xip_verif will be > CFUNWIND.
271
272      /* Try to derive a new (ip,sp,fp) triple from the current set. */
273
274      /* Do we have to do CFI unwinding ?
275         We do CFI unwinding if one of the following condition holds:
276         a. fp_CF_verif_cache contains xip but indicates CFUNWIND must
277            be done (i.e. fp unwind check failed when we did the first
278            unwind for this IP).
279         b. fp_CF_verif_cache does not contain xip.
280            We will try CFI unwinding in fpverif_uregs and compare with
281            FP unwind result to insert xip in the cache with the correct
282            indicator. */
283      if (UNLIKELY(xip_verif >= CFUNWIND)) {
284         if (xip_verif == CFUNWIND) {
285            /* case a : do "real" cfi unwind */
286            if ( VG_(use_CF_info)( &uregs, fp_min, fp_max ) ) {
287               if (debug) unwind_case = "Ca";
288               if (do_stats) stats.Ca++;
289               goto unwind_done;
290            }
291            /* ??? cache indicates we have to do CFI unwind (so, we
292             previously found CFI info, and failed the fp unwind
293             check). Now, we just failed with CFI.  So, once we
294             succeed, once we fail.  No idea what is going on =>
295             cleanup the cache entry and fallover to fp unwind (this
296             time). */
297            fp_CF_verif_cache [hash] = 0;
298            if (debug) VG_(printf)("     cache reset as CFI ok then nok\n");
299            //??? stats
300            xip_verif = NOINFO;
301         } else {
302            /* case b : do "verif" cfi unwind in fpverif_uregs */
303            fpverif_uregs = uregs;
304            xip_verified = uregs.xip;
305            if ( !VG_(use_CF_info)( &fpverif_uregs, fp_min, fp_max ) ) {
306               fp_CF_verif_cache [hash] = uregs.xip ^ NOINFO;
307               if (debug) VG_(printf)("     cache NOINFO fpverif_uregs\n");
308               xip_verif = NOINFO;
309            }
310         }
311      }
312
313      /* On x86, try the old-fashioned method of following the
314         %ebp-chain.  This can be done if the fp_CF_verif_cache for xip
315         indicate fp unwind is ok. This must be done if the cache indicates
316         there is no info. This is also done to confirm what to put in the cache
317         if xip was not in the cache. */
318      /* This deals with frames resulting from functions which begin "pushl%
319         ebp ; movl %esp, %ebp" which is the ABI-mandated preamble. */
320      if (fp_min <= uregs.xbp &&
321          uregs.xbp <= fp_max - 1 * sizeof(UWord)/*see comment below*/)
322      {
323         /* fp looks sane, so use it. */
324         uregs.xip = (((UWord*)uregs.xbp)[1]);
325         // We stop if we hit a zero (the traditional end-of-stack
326         // marker) or a one -- these correspond to recorded IPs of 0 or -1.
327         // The latter because r8818 (in this file) changes the meaning of
328         // entries [1] and above in a stack trace, by subtracting 1 from
329         // them.  Hence stacks that used to end with a zero value now end in
330         // -1 and so we must detect that too.
331         if (0 == uregs.xip || 1 == uregs.xip) {
332            if (xip_verif > CFUNWIND) {
333               // Check if we obtain the same result with fp unwind.
334               // If same result, then mark xip as fp unwindable
335               if (uregs.xip == fpverif_uregs.xip) {
336                  fp_CF_verif_cache [hash] = xip_verified ^ FPUNWIND;
337                  if (debug) VG_(printf)("     cache FPUNWIND 0\n");
338                  unwind_case = "Fw";
339                  if (do_stats) stats.Fw++;
340                  break;
341               } else {
342                  fp_CF_verif_cache [hash] = xip_verified ^ CFUNWIND;
343                  uregs = fpverif_uregs;
344                  if (debug) VG_(printf)("     cache CFUNWIND 0\n");
345                  unwind_case = "Cf";
346                  if (do_stats) stats.Cf++;
347                  goto unwind_done;
348               }
349            } else {
350               // end of stack => out of the loop.
351               break;
352            }
353         }
354
355         uregs.xsp = uregs.xbp + sizeof(Addr) /*saved %ebp*/
356                               + sizeof(Addr) /*ra*/;
357         uregs.xbp = (((UWord*)uregs.xbp)[0]);
358         if (xip_verif > CFUNWIND) {
359            if (uregs.xip == fpverif_uregs.xip
360                && uregs.xsp == fpverif_uregs.xsp
361                && uregs.xbp == fpverif_uregs.xbp) {
362               fp_CF_verif_cache [hash] = xip_verified ^ FPUNWIND;
363               if (debug) VG_(printf)("     cache FPUNWIND >2\n");
364               if (debug) unwind_case = "FO";
365               if (do_stats) stats.FO++;
366            } else {
367               fp_CF_verif_cache [hash] = xip_verified ^ CFUNWIND;
368               if (debug) VG_(printf)("     cache CFUNWIND >2\n");
369               if (do_stats && uregs.xip != fpverif_uregs.xip) stats.xi++;
370               if (do_stats && uregs.xsp != fpverif_uregs.xsp) stats.xs++;
371               if (do_stats && uregs.xbp != fpverif_uregs.xbp) stats.xb++;
372               uregs = fpverif_uregs;
373               if (debug) unwind_case = "CF";
374               if (do_stats) stats.CF++;
375            }
376         } else {
377            if (debug) unwind_case = "FF";
378            if (do_stats) stats.FF++;
379         }
380         goto unwind_done;
381      } else {
382         // fp unwind has failed.
383         // If we were checking the validity of the cfi unwinding,
384         // we mark in the cache that the fp unwind cannot be done, and that
385         // cfi unwind is desired.
386         if (xip_verif > CFUNWIND) {
387            // We know that fpverif_uregs contains valid information,
388            // as a failed cf unwind would have put NOINFO in xip_verif.
389            fp_CF_verif_cache [hash] = xip_verified ^ CFUNWIND;
390            if (debug) VG_(printf)("     cache CFUNWIND as fp failed\n");
391            uregs = fpverif_uregs;
392            if (debug) unwind_case = "Ck";
393            if (do_stats) stats.Ck++;
394            goto unwind_done;
395         }
396         // xip_verif is FPUNWIND or NOINFO.
397         // We failed the cfi unwind and/or the fp unwind.
398         // => fallback to FPO info.
399      }
400
401      /* And, similarly, try for MSVC FPO unwind info. */
402      if ( VG_(use_FPO_info)( &uregs.xip, &uregs.xsp, &uregs.xbp,
403                              fp_min, fp_max ) ) {
404         if (debug) unwind_case = "MS";
405         if (do_stats) stats.MS++;
406         goto unwind_done;
407      }
408
409      /* No luck.  We have to give up. */
410      break;
411
412   unwind_done:
413      /* Add a frame in ips/sps/fps */
414      /* fp is %ebp.  sp is %esp.  ip is %eip. */
415      if (0 == uregs.xip || 1 == uregs.xip) break;
416      if (sps) sps[i] = uregs.xsp;
417      if (fps) fps[i] = uregs.xbp;
418      ips[i++] = uregs.xip - 1;
419      /* -1: refer to calling insn, not the RA */
420      if (debug)
421         VG_(printf)("     ips%s[%d]=0x%08lx\n", unwind_case, i-1, ips[i-1]);
422      uregs.xip = uregs.xip - 1;
423      /* as per comment at the head of this loop */
424      if (UNLIKELY(cmrf > 0)) {RECURSIVE_MERGE(cmrf,ips,i);};
425   }
426
427   if (do_stats) stats.nf += i;
428   if (do_stats && stats.nr % 10000 == 0) {
429     VG_(printf)("nr %u nf %u "
430                 "Ca %u FF %u "
431                 "Cf %u "
432                 "Fw %u FO %u "
433                 "CF %u (xi %u xs %u xb %u) "
434                 "Ck %u MS %u\n",
435                 stats.nr, stats.nf,
436                 stats.Ca, stats.FF,
437                 stats.Cf,
438                 stats.Fw, stats.FO,
439                 stats.CF, stats.xi, stats.xs, stats.xb,
440                 stats.Ck, stats.MS);
441   }
442   n_found = i;
443   return n_found;
444}
445
446#undef N_FP_CF_VERIF
447#undef FPUNWIND
448#undef NOINFO
449#undef CFUNWIND
450
451#endif
452
453/* ----------------------- amd64 ------------------------ */
454
455#if defined(VGP_amd64_linux) || defined(VGP_amd64_darwin)
456
457UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
458                               /*OUT*/Addr* ips, UInt max_n_ips,
459                               /*OUT*/Addr* sps, /*OUT*/Addr* fps,
460                               const UnwindStartRegs* startRegs,
461                               Addr fp_max_orig )
462{
463   Bool  debug = False;
464   Int   i;
465   Addr  fp_max;
466   UInt  n_found = 0;
467   const Int cmrf = VG_(clo_merge_recursive_frames);
468
469   vg_assert(sizeof(Addr) == sizeof(UWord));
470   vg_assert(sizeof(Addr) == sizeof(void*));
471
472   D3UnwindRegs uregs;
473   uregs.xip = startRegs->r_pc;
474   uregs.xsp = startRegs->r_sp;
475   uregs.xbp = startRegs->misc.AMD64.r_rbp;
476   Addr fp_min = uregs.xsp;
477
478   /* Snaffle IPs from the client's stack into ips[0 .. max_n_ips-1],
479      stopping when the trail goes cold, which we guess to be
480      when FP is not a reasonable stack location. */
481
482   // JRS 2002-sep-17: hack, to round up fp_max to the end of the
483   // current page, at least.  Dunno if it helps.
484   // NJN 2002-sep-17: seems to -- stack traces look like 1.0.X again
485   fp_max = VG_PGROUNDUP(fp_max_orig);
486   if (fp_max >= sizeof(Addr))
487      fp_max -= sizeof(Addr);
488
489   if (debug)
490      VG_(printf)("max_n_ips=%d fp_min=0x%lx fp_max_orig=0x%lx, "
491                  "fp_max=0x%lx ip=0x%lx fp=0x%lx\n",
492                  max_n_ips, fp_min, fp_max_orig, fp_max,
493                  uregs.xip, uregs.xbp);
494
495   /* Assertion broken before main() is reached in pthreaded programs;  the
496    * offending stack traces only have one item.  --njn, 2002-aug-16 */
497   /* vg_assert(fp_min <= fp_max);*/
498   // On Darwin, this kicks in for pthread-related stack traces, so they're
499   // only 1 entry long which is wrong.
500#  if !defined(VGO_darwin)
501   if (fp_min + 256 >= fp_max) {
502      /* If the stack limits look bogus, don't poke around ... but
503         don't bomb out either. */
504      if (sps) sps[0] = uregs.xsp;
505      if (fps) fps[0] = uregs.xbp;
506      ips[0] = uregs.xip;
507      return 1;
508   }
509#  endif
510
511   /* fp is %rbp.  sp is %rsp.  ip is %rip. */
512
513   ips[0] = uregs.xip;
514   if (sps) sps[0] = uregs.xsp;
515   if (fps) fps[0] = uregs.xbp;
516   i = 1;
517
518   /* Loop unwinding the stack. Note that the IP value we get on
519    * each pass (whether from CFI info or a stack frame) is a
520    * return address so is actually after the calling instruction
521    * in the calling function.
522    *
523    * Because of this we subtract one from the IP after each pass
524    * of the loop so that we find the right CFI block on the next
525    * pass - otherwise we can find the wrong CFI info if it happens
526    * to change after the calling instruction and that will mean
527    * that we will fail to unwind the next step.
528    *
529    * This most frequently happens at the end of a function when
530    * a tail call occurs and we wind up using the CFI info for the
531    * next function which is completely wrong.
532    */
533   while (True) {
534
535      if (i >= max_n_ips)
536         break;
537
538      /* Try to derive a new (ip,sp,fp) triple from the current set. */
539
540      /* First off, see if there is any CFI info to hand which can
541         be used. */
542      if ( VG_(use_CF_info)( &uregs, fp_min, fp_max ) ) {
543         if (0 == uregs.xip || 1 == uregs.xip) break;
544         if (sps) sps[i] = uregs.xsp;
545         if (fps) fps[i] = uregs.xbp;
546         ips[i++] = uregs.xip - 1; /* -1: refer to calling insn, not the RA */
547         if (debug)
548            VG_(printf)("     ipsC[%d]=%#08lx\n", i-1, ips[i-1]);
549         uregs.xip = uregs.xip - 1; /* as per comment at the head of this loop */
550         if (UNLIKELY(cmrf > 0)) {RECURSIVE_MERGE(cmrf,ips,i);};
551         continue;
552      }
553
554      /* If VG_(use_CF_info) fails, it won't modify ip/sp/fp, so
555         we can safely try the old-fashioned method. */
556      /* This bit is supposed to deal with frames resulting from
557         functions which begin "pushq %rbp ; movq %rsp, %rbp".
558         Unfortunately, since we can't (easily) look at the insns at
559         the start of the fn, like GDB does, there's no reliable way
560         to tell.  Hence the hack of first trying out CFI, and if that
561         fails, then use this as a fallback. */
562      /* Note: re "- 1 * sizeof(UWord)", need to take account of the
563         fact that we are prodding at & ((UWord*)fp)[1] and so need to
564         adjust the limit check accordingly.  Omitting this has been
565         observed to cause segfaults on rare occasions. */
566      if (fp_min <= uregs.xbp && uregs.xbp <= fp_max - 1 * sizeof(UWord)) {
567         /* fp looks sane, so use it. */
568         uregs.xip = (((UWord*)uregs.xbp)[1]);
569         if (0 == uregs.xip || 1 == uregs.xip) break;
570         uregs.xsp = uregs.xbp + sizeof(Addr) /*saved %rbp*/
571                               + sizeof(Addr) /*ra*/;
572         uregs.xbp = (((UWord*)uregs.xbp)[0]);
573         if (sps) sps[i] = uregs.xsp;
574         if (fps) fps[i] = uregs.xbp;
575         ips[i++] = uregs.xip - 1; /* -1: refer to calling insn, not the RA */
576         if (debug)
577            VG_(printf)("     ipsF[%d]=%#08lx\n", i-1, ips[i-1]);
578         uregs.xip = uregs.xip - 1; /* as per comment at the head of this loop */
579         if (UNLIKELY(cmrf > 0)) {RECURSIVE_MERGE(cmrf,ips,i);};
580         continue;
581      }
582
583      /* Last-ditch hack (evidently GDB does something similar).  We
584         are in the middle of nowhere and we have a nonsense value for
585         the frame pointer.  If the stack pointer is still valid,
586         assume that what it points at is a return address.  Yes,
587         desperate measures.  Could do better here:
588         - check that the supposed return address is in
589           an executable page
590         - check that the supposed return address is just after a call insn
591         - given those two checks, don't just consider *sp as the return
592           address; instead scan a likely section of stack (eg sp .. sp+256)
593           and use suitable values found there.
594      */
595      if (fp_min <= uregs.xsp && uregs.xsp < fp_max) {
596         uregs.xip = ((UWord*)uregs.xsp)[0];
597         if (0 == uregs.xip || 1 == uregs.xip) break;
598         if (sps) sps[i] = uregs.xsp;
599         if (fps) fps[i] = uregs.xbp;
600         ips[i++] = uregs.xip == 0
601                    ? 0 /* sp[0] == 0 ==> stuck at the bottom of a
602                           thread stack */
603                    : uregs.xip - 1;
604                        /* -1: refer to calling insn, not the RA */
605         if (debug)
606            VG_(printf)("     ipsH[%d]=%#08lx\n", i-1, ips[i-1]);
607         uregs.xip = uregs.xip - 1; /* as per comment at the head of this loop */
608         uregs.xsp += 8;
609         if (UNLIKELY(cmrf > 0)) {RECURSIVE_MERGE(cmrf,ips,i);};
610         continue;
611      }
612
613      /* No luck at all.  We have to give up. */
614      break;
615   }
616
617   n_found = i;
618   return n_found;
619}
620
621#endif
622
623/* -----------------------ppc32/64 ---------------------- */
624
625#if defined(VGP_ppc32_linux) || defined(VGP_ppc64be_linux) \
626    || defined(VGP_ppc64le_linux)
627
628UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
629                               /*OUT*/Addr* ips, UInt max_n_ips,
630                               /*OUT*/Addr* sps, /*OUT*/Addr* fps,
631                               const UnwindStartRegs* startRegs,
632                               Addr fp_max_orig )
633{
634   Bool  lr_is_first_RA = False;
635#  if defined(VG_PLAT_USES_PPCTOC) || defined(VGP_ppc64le_linux)
636   Word redir_stack_size = 0;
637   Word redirs_used      = 0;
638#  endif
639   const Int cmrf = VG_(clo_merge_recursive_frames);
640
641   Bool  debug = False;
642   Int   i;
643   Addr  fp_max;
644   UInt  n_found = 0;
645
646   vg_assert(sizeof(Addr) == sizeof(UWord));
647   vg_assert(sizeof(Addr) == sizeof(void*));
648
649   Addr ip = (Addr)startRegs->r_pc;
650   Addr sp = (Addr)startRegs->r_sp;
651   Addr fp = sp;
652#  if defined(VGP_ppc32_linux)
653   Addr lr = startRegs->misc.PPC32.r_lr;
654#  elif defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)
655   Addr lr = startRegs->misc.PPC64.r_lr;
656#  endif
657   Addr fp_min = sp;
658
659   /* Snaffle IPs from the client's stack into ips[0 .. max_n_ips-1],
660      stopping when the trail goes cold, which we guess to be
661      when FP is not a reasonable stack location. */
662
663   // JRS 2002-sep-17: hack, to round up fp_max to the end of the
664   // current page, at least.  Dunno if it helps.
665   // NJN 2002-sep-17: seems to -- stack traces look like 1.0.X again
666   fp_max = VG_PGROUNDUP(fp_max_orig);
667   if (fp_max >= sizeof(Addr))
668      fp_max -= sizeof(Addr);
669
670   if (debug)
671      VG_(printf)("max_n_ips=%d fp_min=0x%lx fp_max_orig=0x%lx, "
672                  "fp_max=0x%lx ip=0x%lx fp=0x%lx\n",
673		  max_n_ips, fp_min, fp_max_orig, fp_max, ip, fp);
674
675   /* Assertion broken before main() is reached in pthreaded programs;  the
676    * offending stack traces only have one item.  --njn, 2002-aug-16 */
677   /* vg_assert(fp_min <= fp_max);*/
678   if (fp_min + 512 >= fp_max) {
679      /* If the stack limits look bogus, don't poke around ... but
680         don't bomb out either. */
681      if (sps) sps[0] = sp;
682      if (fps) fps[0] = fp;
683      ips[0] = ip;
684      return 1;
685   }
686
687   /* fp is %r1.  ip is %cia.  Note, ppc uses r1 as both the stack and
688      frame pointers. */
689
690#  if defined(VGP_ppc64be_linux) || defined(VGP_ppc64le_linux)
691   redir_stack_size = VEX_GUEST_PPC64_REDIR_STACK_SIZE;
692   redirs_used      = 0;
693#  endif
694
695#  if defined(VG_PLAT_USES_PPCTOC)
696   /* Deal with bogus LR values caused by function
697      interception/wrapping on ppc-TOC platforms; see comment on
698      similar code a few lines further down. */
699   if (ULong_to_Ptr(lr) == (void*)&VG_(ppctoc_magic_redirect_return_stub)
700       && VG_(is_valid_tid)(tid_if_known)) {
701      Word hsp = VG_(threads)[tid_if_known].arch.vex.guest_REDIR_SP;
702      redirs_used++;
703      if (hsp >= 1 && hsp < redir_stack_size)
704         lr = VG_(threads)[tid_if_known]
705                 .arch.vex.guest_REDIR_STACK[hsp-1];
706   }
707#  endif
708
709   /* We have to determine whether or not LR currently holds this fn
710      (call it F)'s return address.  It might not if F has previously
711      called some other function, hence overwriting LR with a pointer
712      to some part of F.  Hence if LR and IP point to the same
713      function then we conclude LR does not hold this function's
714      return address; instead the LR at entry must have been saved in
715      the stack by F's prologue and so we must get it from there
716      instead.  Note all this guff only applies to the innermost
717      frame. */
718   lr_is_first_RA = False;
719   {
720      const HChar *buf_lr, *buf_ip;
721      /* The following conditional looks grossly inefficient and
722         surely could be majorly improved, with not much effort. */
723      if (VG_(get_fnname_raw) (lr, &buf_lr)) {
724         HChar buf_lr_copy[VG_(strlen)(buf_lr) + 1];
725         VG_(strcpy)(buf_lr_copy, buf_lr);
726         if (VG_(get_fnname_raw) (ip, &buf_ip))
727            if (VG_(strcmp)(buf_lr_copy, buf_ip))
728               lr_is_first_RA = True;
729      }
730   }
731
732   if (sps) sps[0] = fp; /* NB. not sp */
733   if (fps) fps[0] = fp;
734   ips[0] = ip;
735   i = 1;
736
737   if (fp_min <= fp && fp < fp_max-VG_WORDSIZE+1) {
738
739      /* initial FP is sane; keep going */
740      fp = (((UWord*)fp)[0]);
741
742      while (True) {
743
744        /* On ppc64-linux (ppc64-elf, really), the lr save
745           slot is 2 words back from sp, whereas on ppc32-elf(?) it's
746           only one word back. */
747#        if defined(VG_PLAT_USES_PPCTOC) || defined(VGP_ppc64le_linux)
748         const Int lr_offset = 2;
749#        else
750         const Int lr_offset = 1;
751#        endif
752
753         if (i >= max_n_ips)
754            break;
755
756         /* Try to derive a new (ip,fp) pair from the current set. */
757
758         if (fp_min <= fp && fp <= fp_max - lr_offset * sizeof(UWord)) {
759            /* fp looks sane, so use it. */
760
761            if (i == 1 && lr_is_first_RA)
762               ip = lr;
763            else
764               ip = (((UWord*)fp)[lr_offset]);
765
766#           if defined(VG_PLAT_USES_PPCTOC) || defined(VGP_ppc64le_linux)
767            /* Nasty hack to do with function replacement/wrapping on
768               ppc64-linux.  If LR points to our magic return stub,
769               then we are in a wrapped or intercepted function, in
770               which LR has been messed with.  The original LR will
771               have been pushed onto the thread's hidden REDIR stack
772               one down from the top (top element is the saved R2) and
773               so we should restore the value from there instead.
774               Since nested redirections can and do happen, we keep
775               track of the number of nested LRs used by the unwinding
776               so far with 'redirs_used'. */
777            if (ip == (Addr)&VG_(ppctoc_magic_redirect_return_stub)
778                && VG_(is_valid_tid)(tid_if_known)) {
779               Word hsp = VG_(threads)[tid_if_known]
780                             .arch.vex.guest_REDIR_SP;
781               hsp -= 2 * redirs_used;
782               redirs_used ++;
783               if (hsp >= 1 && hsp < redir_stack_size)
784                  ip = VG_(threads)[tid_if_known]
785                          .arch.vex.guest_REDIR_STACK[hsp-1];
786            }
787#           endif
788
789            if (0 == ip || 1 == ip) break;
790            if (sps) sps[i] = fp; /* NB. not sp */
791            if (fps) fps[i] = fp;
792            fp = (((UWord*)fp)[0]);
793            ips[i++] = ip - 1; /* -1: refer to calling insn, not the RA */
794            if (debug)
795               VG_(printf)("     ipsF[%d]=%#08lx\n", i-1, ips[i-1]);
796            ip = ip - 1; /* ip is probably dead at this point, but
797                            play safe, a la x86/amd64 above.  See
798                            extensive comments above. */
799            if (UNLIKELY(cmrf > 0)) {RECURSIVE_MERGE(cmrf,ips,i);};
800            continue;
801         }
802
803         /* No luck there.  We have to give up. */
804         break;
805      }
806   }
807
808   n_found = i;
809   return n_found;
810}
811
812#endif
813
814/* ------------------------ arm ------------------------- */
815
816#if defined(VGP_arm_linux)
817
818static Bool in_same_fn ( Addr a1, Addr a2 )
819{
820   const HChar *buf_a1, *buf_a2;
821   /* The following conditional looks grossly inefficient and
822      surely could be majorly improved, with not much effort. */
823   if (VG_(get_fnname_raw) (a1, &buf_a1)) {
824      HChar buf_a1_copy[VG_(strlen)(buf_a1) + 1];
825      VG_(strcpy)(buf_a1_copy, buf_a1);
826      if (VG_(get_fnname_raw) (a2, &buf_a2))
827         if (VG_(strcmp)(buf_a1_copy, buf_a2))
828            return True;
829   }
830   return False;
831}
832
833static Bool in_same_page ( Addr a1, Addr a2 ) {
834   return (a1 & ~0xFFF) == (a2 & ~0xFFF);
835}
836
837static Addr abs_diff ( Addr a1, Addr a2 ) {
838   return (Addr)(a1 > a2 ? a1 - a2 : a2 - a1);
839}
840
841static Bool has_XT_perms ( Addr a )
842{
843   NSegment const* seg = VG_(am_find_nsegment)(a);
844   return seg && seg->hasX && seg->hasT;
845}
846
847static Bool looks_like_Thumb_call32 ( UShort w0, UShort w1 )
848{
849   if (0)
850      VG_(printf)("isT32call %04x %04x\n", (UInt)w0, (UInt)w1);
851   // BL  simm26
852   if ((w0 & 0xF800) == 0xF000 && (w1 & 0xC000) == 0xC000) return True;
853   // BLX simm26
854   if ((w0 & 0xF800) == 0xF000 && (w1 & 0xC000) == 0xC000) return True;
855   return False;
856}
857
858static Bool looks_like_Thumb_call16 ( UShort w0 )
859{
860   return False;
861}
862
863static Bool looks_like_ARM_call ( UInt a0 )
864{
865   if (0)
866      VG_(printf)("isA32call %08x\n", a0);
867   // Leading E forces unconditional only -- fix
868   if ((a0 & 0xFF000000) == 0xEB000000) return True;
869   return False;
870}
871
872static Bool looks_like_RA ( Addr ra )
873{
874   /* 'ra' is a plausible return address if it points to
875       an instruction after a call insn. */
876   Bool isT = (ra & 1);
877   if (isT) {
878      // returning to Thumb code
879      ra &= ~1;
880      ra -= 4;
881      if (has_XT_perms(ra)) {
882         UShort w0 = *(UShort*)ra;
883         UShort w1 = in_same_page(ra, ra+2) ? *(UShort*)(ra+2) : 0;
884         if (looks_like_Thumb_call16(w1) || looks_like_Thumb_call32(w0,w1))
885            return True;
886      }
887   } else {
888      // ARM
889      ra &= ~3;
890      ra -= 4;
891      if (has_XT_perms(ra)) {
892         UInt a0 = *(UInt*)ra;
893         if (looks_like_ARM_call(a0))
894            return True;
895      }
896   }
897   return False;
898}
899
900UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
901                               /*OUT*/Addr* ips, UInt max_n_ips,
902                               /*OUT*/Addr* sps, /*OUT*/Addr* fps,
903                               const UnwindStartRegs* startRegs,
904                               Addr fp_max_orig )
905{
906   Bool  debug = False;
907   Int   i;
908   Addr  fp_max;
909   UInt  n_found = 0;
910   const Int cmrf = VG_(clo_merge_recursive_frames);
911
912   vg_assert(sizeof(Addr) == sizeof(UWord));
913   vg_assert(sizeof(Addr) == sizeof(void*));
914
915   D3UnwindRegs uregs;
916   uregs.r15 = startRegs->r_pc & 0xFFFFFFFE;
917   uregs.r14 = startRegs->misc.ARM.r14;
918   uregs.r13 = startRegs->r_sp;
919   uregs.r12 = startRegs->misc.ARM.r12;
920   uregs.r11 = startRegs->misc.ARM.r11;
921   uregs.r7  = startRegs->misc.ARM.r7;
922   Addr fp_min = uregs.r13;
923
924   /* Snaffle IPs from the client's stack into ips[0 .. max_n_ips-1],
925      stopping when the trail goes cold, which we guess to be
926      when FP is not a reasonable stack location. */
927
928   // JRS 2002-sep-17: hack, to round up fp_max to the end of the
929   // current page, at least.  Dunno if it helps.
930   // NJN 2002-sep-17: seems to -- stack traces look like 1.0.X again
931   fp_max = VG_PGROUNDUP(fp_max_orig);
932   if (fp_max >= sizeof(Addr))
933      fp_max -= sizeof(Addr);
934
935   if (debug)
936      VG_(printf)("\nmax_n_ips=%d fp_min=0x%lx fp_max_orig=0x%lx, "
937                  "fp_max=0x%lx r15=0x%lx r13=0x%lx\n",
938                  max_n_ips, fp_min, fp_max_orig, fp_max,
939                  uregs.r15, uregs.r13);
940
941   /* Assertion broken before main() is reached in pthreaded programs;  the
942    * offending stack traces only have one item.  --njn, 2002-aug-16 */
943   /* vg_assert(fp_min <= fp_max);*/
944   // On Darwin, this kicks in for pthread-related stack traces, so they're
945   // only 1 entry long which is wrong.
946   if (fp_min + 512 >= fp_max) {
947      /* If the stack limits look bogus, don't poke around ... but
948         don't bomb out either. */
949      if (sps) sps[0] = uregs.r13;
950      if (fps) fps[0] = 0;
951      ips[0] = uregs.r15;
952      return 1;
953   }
954
955   /* */
956
957   if (sps) sps[0] = uregs.r13;
958   if (fps) fps[0] = 0;
959   ips[0] = uregs.r15;
960   i = 1;
961
962   /* Loop unwinding the stack. */
963   Bool do_stack_scan = False;
964
965   /* First try the Official Way, using Dwarf CFI. */
966   while (True) {
967      if (debug) {
968         VG_(printf)("i: %d, r15: 0x%lx, r13: 0x%lx\n",
969                     i, uregs.r15, uregs.r13);
970      }
971
972      if (i >= max_n_ips)
973         break;
974
975      if (VG_(use_CF_info)( &uregs, fp_min, fp_max )) {
976         if (sps) sps[i] = uregs.r13;
977         if (fps) fps[i] = 0;
978         ips[i++] = (uregs.r15 & 0xFFFFFFFE) - 1;
979         if (debug)
980            VG_(printf)("USING CFI: r15: 0x%lx, r13: 0x%lx\n",
981                        uregs.r15, uregs.r13);
982         uregs.r15 = (uregs.r15 & 0xFFFFFFFE) - 1;
983         if (UNLIKELY(cmrf > 0)) {RECURSIVE_MERGE(cmrf,ips,i);};
984         continue;
985      }
986
987      /* No luck.  We have to give up. */
988      do_stack_scan = True;
989      break;
990   }
991
992   /* Now try Plan B (maybe) -- stack scanning.  This often gives
993      pretty bad results, so this has to be enabled explicitly by the
994      user. */
995   if (do_stack_scan
996       && i < max_n_ips && i < (Int)VG_(clo_unw_stack_scan_thresh)) {
997      Int  nByStackScan = 0;
998      Addr lr = uregs.r14;
999      Addr sp = uregs.r13 & ~3;
1000      Addr pc = uregs.r15;
1001      // First see if LR contains
1002      // something that could be a valid return address.
1003      if (!in_same_fn(lr, pc) && looks_like_RA(lr)) {
1004         // take it only if 'cand' isn't obviously a duplicate
1005         // of the last found IP value
1006         Addr cand = (lr & 0xFFFFFFFE) - 1;
1007         if (abs_diff(cand, ips[i-1]) > 1) {
1008            if (sps) sps[i] = 0;
1009            if (fps) fps[i] = 0;
1010            ips[i++] = cand;
1011            if (UNLIKELY(cmrf > 0)) {RECURSIVE_MERGE(cmrf,ips,i);};
1012            nByStackScan++;
1013         }
1014      }
1015      while (in_same_page(sp, uregs.r13)) {
1016         if (i >= max_n_ips)
1017            break;
1018         // we're in the same page; fairly safe to keep going
1019         UWord w = *(UWord*)(sp & ~0x3);
1020         if (looks_like_RA(w)) {
1021            Addr cand = (w & 0xFFFFFFFE) - 1;
1022            // take it only if 'cand' isn't obviously a duplicate
1023            // of the last found IP value
1024            if (abs_diff(cand, ips[i-1]) > 1) {
1025               if (sps) sps[i] = 0;
1026               if (fps) fps[i] = 0;
1027               ips[i++] = cand;
1028               if (UNLIKELY(cmrf > 0)) {RECURSIVE_MERGE(cmrf,ips,i);};
1029               if (++nByStackScan >= VG_(clo_unw_stack_scan_frames)) break;
1030            }
1031         }
1032         sp += 4;
1033      }
1034   }
1035
1036   n_found = i;
1037   return n_found;
1038}
1039
1040#endif
1041
1042/* ------------------------ arm64 ------------------------- */
1043
1044#if defined(VGP_arm64_linux)
1045
1046UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
1047                               /*OUT*/Addr* ips, UInt max_n_ips,
1048                               /*OUT*/Addr* sps, /*OUT*/Addr* fps,
1049                               const UnwindStartRegs* startRegs,
1050                               Addr fp_max_orig )
1051{
1052   Bool  debug = False;
1053   Int   i;
1054   Addr  fp_max;
1055   UInt  n_found = 0;
1056   const Int cmrf = VG_(clo_merge_recursive_frames);
1057
1058   vg_assert(sizeof(Addr) == sizeof(UWord));
1059   vg_assert(sizeof(Addr) == sizeof(void*));
1060
1061   D3UnwindRegs uregs;
1062   uregs.pc = startRegs->r_pc;
1063   uregs.sp = startRegs->r_sp;
1064   uregs.x30 = startRegs->misc.ARM64.x30;
1065   uregs.x29 = startRegs->misc.ARM64.x29;
1066   Addr fp_min = uregs.sp;
1067
1068   /* Snaffle IPs from the client's stack into ips[0 .. max_n_ips-1],
1069      stopping when the trail goes cold, which we guess to be
1070      when FP is not a reasonable stack location. */
1071
1072   // JRS 2002-sep-17: hack, to round up fp_max to the end of the
1073   // current page, at least.  Dunno if it helps.
1074   // NJN 2002-sep-17: seems to -- stack traces look like 1.0.X again
1075   fp_max = VG_PGROUNDUP(fp_max_orig);
1076   if (fp_max >= sizeof(Addr))
1077      fp_max -= sizeof(Addr);
1078
1079   if (debug)
1080      VG_(printf)("\nmax_n_ips=%d fp_min=0x%lx fp_max_orig=0x%lx, "
1081                  "fp_max=0x%lx PC=0x%lx SP=0x%lx\n",
1082                  max_n_ips, fp_min, fp_max_orig, fp_max,
1083                  uregs.pc, uregs.sp);
1084
1085   /* Assertion broken before main() is reached in pthreaded programs;  the
1086    * offending stack traces only have one item.  --njn, 2002-aug-16 */
1087   /* vg_assert(fp_min <= fp_max);*/
1088   // On Darwin, this kicks in for pthread-related stack traces, so they're
1089   // only 1 entry long which is wrong.
1090   if (fp_min + 512 >= fp_max) {
1091      /* If the stack limits look bogus, don't poke around ... but
1092         don't bomb out either. */
1093      if (sps) sps[0] = uregs.sp;
1094      if (fps) fps[0] = uregs.x29;
1095      ips[0] = uregs.pc;
1096      return 1;
1097   }
1098
1099   /* */
1100
1101   if (sps) sps[0] = uregs.sp;
1102   if (fps) fps[0] = uregs.x29;
1103   ips[0] = uregs.pc;
1104   i = 1;
1105
1106   /* Loop unwinding the stack, using CFI. */
1107   while (True) {
1108      if (debug) {
1109         VG_(printf)("i: %d, pc: 0x%lx, sp: 0x%lx\n",
1110                     i, uregs.pc, uregs.sp);
1111      }
1112
1113      if (i >= max_n_ips)
1114         break;
1115
1116      if (VG_(use_CF_info)( &uregs, fp_min, fp_max )) {
1117         if (sps) sps[i] = uregs.sp;
1118         if (fps) fps[i] = uregs.x29;
1119         ips[i++] = uregs.pc - 1;
1120         if (debug)
1121            VG_(printf)("USING CFI: pc: 0x%lx, sp: 0x%lx\n",
1122                        uregs.pc, uregs.sp);
1123         uregs.pc = uregs.pc - 1;
1124         if (UNLIKELY(cmrf > 0)) {RECURSIVE_MERGE(cmrf,ips,i);};
1125         continue;
1126      }
1127
1128      /* No luck.  We have to give up. */
1129      break;
1130   }
1131
1132   n_found = i;
1133   return n_found;
1134}
1135
1136#endif
1137
1138/* ------------------------ s390x ------------------------- */
1139
1140#if defined(VGP_s390x_linux)
1141
1142UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
1143                               /*OUT*/Addr* ips, UInt max_n_ips,
1144                               /*OUT*/Addr* sps, /*OUT*/Addr* fps,
1145                               const UnwindStartRegs* startRegs,
1146                               Addr fp_max_orig )
1147{
1148   Bool  debug = False;
1149   Int   i;
1150   Addr  fp_max;
1151   UInt  n_found = 0;
1152   const Int cmrf = VG_(clo_merge_recursive_frames);
1153
1154   vg_assert(sizeof(Addr) == sizeof(UWord));
1155   vg_assert(sizeof(Addr) == sizeof(void*));
1156
1157   D3UnwindRegs uregs;
1158   uregs.ia = startRegs->r_pc;
1159   uregs.sp = startRegs->r_sp;
1160   Addr fp_min = uregs.sp;
1161   uregs.fp = startRegs->misc.S390X.r_fp;
1162   uregs.lr = startRegs->misc.S390X.r_lr;
1163
1164   fp_max = VG_PGROUNDUP(fp_max_orig);
1165   if (fp_max >= sizeof(Addr))
1166      fp_max -= sizeof(Addr);
1167
1168   if (debug)
1169      VG_(printf)("max_n_ips=%d fp_min=0x%lx fp_max_orig=0x%lx, "
1170                  "fp_max=0x%lx IA=0x%lx SP=0x%lx FP=0x%lx\n",
1171                  max_n_ips, fp_min, fp_max_orig, fp_max,
1172                  uregs.ia, uregs.sp,uregs.fp);
1173
1174   /* The first frame is pretty obvious */
1175   ips[0] = uregs.ia;
1176   if (sps) sps[0] = uregs.sp;
1177   if (fps) fps[0] = uregs.fp;
1178   i = 1;
1179
1180   /* for everything else we have to rely on the eh_frame. gcc defaults to
1181      not create a backchain and all the other  tools (like gdb) also have
1182      to use the CFI. */
1183   while (True) {
1184      if (i >= max_n_ips)
1185         break;
1186
1187      if (VG_(use_CF_info)( &uregs, fp_min, fp_max )) {
1188         if (sps) sps[i] = uregs.sp;
1189         if (fps) fps[i] = uregs.fp;
1190         ips[i++] = uregs.ia - 1;
1191         uregs.ia = uregs.ia - 1;
1192         if (UNLIKELY(cmrf > 0)) {RECURSIVE_MERGE(cmrf,ips,i);};
1193         continue;
1194      }
1195      /* A problem on the first frame? Lets assume it was a bad jump.
1196         We will use the link register and the current stack and frame
1197         pointers and see if we can use the CFI in the next round. */
1198      if (i == 1) {
1199         if (sps) {
1200            sps[i] = sps[0];
1201            uregs.sp = sps[0];
1202         }
1203         if (fps) {
1204            fps[i] = fps[0];
1205            uregs.fp = fps[0];
1206         }
1207         uregs.ia = uregs.lr - 1;
1208         ips[i++] = uregs.lr - 1;
1209         if (UNLIKELY(cmrf > 0)) {RECURSIVE_MERGE(cmrf,ips,i);};
1210         continue;
1211      }
1212
1213      /* No luck.  We have to give up. */
1214      break;
1215   }
1216
1217   n_found = i;
1218   return n_found;
1219}
1220
1221#endif
1222
1223/* ------------------------ mips 32/64 ------------------------- */
1224#if defined(VGP_mips32_linux) || defined(VGP_mips64_linux)
1225UInt VG_(get_StackTrace_wrk) ( ThreadId tid_if_known,
1226                               /*OUT*/Addr* ips, UInt max_n_ips,
1227                               /*OUT*/Addr* sps, /*OUT*/Addr* fps,
1228                               const UnwindStartRegs* startRegs,
1229                               Addr fp_max_orig )
1230{
1231   Bool  debug = False;
1232   Int   i;
1233   Addr  fp_max;
1234   UInt  n_found = 0;
1235   const Int cmrf = VG_(clo_merge_recursive_frames);
1236
1237   vg_assert(sizeof(Addr) == sizeof(UWord));
1238   vg_assert(sizeof(Addr) == sizeof(void*));
1239
1240   D3UnwindRegs uregs;
1241   uregs.pc = startRegs->r_pc;
1242   uregs.sp = startRegs->r_sp;
1243   Addr fp_min = uregs.sp;
1244
1245#if defined(VGP_mips32_linux)
1246   uregs.fp = startRegs->misc.MIPS32.r30;
1247   uregs.ra = startRegs->misc.MIPS32.r31;
1248#elif defined(VGP_mips64_linux)
1249   uregs.fp = startRegs->misc.MIPS64.r30;
1250   uregs.ra = startRegs->misc.MIPS64.r31;
1251#endif
1252
1253   /* Snaffle IPs from the client's stack into ips[0 .. max_n_ips-1],
1254      stopping when the trail goes cold, which we guess to be
1255      when FP is not a reasonable stack location. */
1256
1257   fp_max = VG_PGROUNDUP(fp_max_orig);
1258   if (fp_max >= sizeof(Addr))
1259      fp_max -= sizeof(Addr);
1260
1261   if (debug)
1262      VG_(printf)("max_n_ips=%d fp_min=0x%lx fp_max_orig=0x%lx, "
1263                  "fp_max=0x%lx pc=0x%lx sp=0x%lx fp=0x%lx\n",
1264                  max_n_ips, fp_min, fp_max_orig, fp_max,
1265                  uregs.pc, uregs.sp, uregs.fp);
1266
1267   if (sps) sps[0] = uregs.sp;
1268   if (fps) fps[0] = uregs.fp;
1269   ips[0] = uregs.pc;
1270   i = 1;
1271
1272   /* Loop unwinding the stack. */
1273
1274   while (True) {
1275      if (debug) {
1276         VG_(printf)("i: %d, pc: 0x%lx, sp: 0x%lx, ra: 0x%lx\n",
1277                     i, uregs.pc, uregs.sp, uregs.ra);
1278      }
1279      if (i >= max_n_ips)
1280         break;
1281
1282      D3UnwindRegs uregs_copy = uregs;
1283      if (VG_(use_CF_info)( &uregs, fp_min, fp_max )) {
1284         if (debug)
1285            VG_(printf)("USING CFI: pc: 0x%lx, sp: 0x%lx, ra: 0x%lx\n",
1286                        uregs.pc, uregs.sp, uregs.ra);
1287         if (0 != uregs.pc && 1 != uregs.pc) {
1288            if (sps) sps[i] = uregs.sp;
1289            if (fps) fps[i] = uregs.fp;
1290            ips[i++] = uregs.pc - 4;
1291            uregs.pc = uregs.pc - 4;
1292            if (UNLIKELY(cmrf > 0)) {RECURSIVE_MERGE(cmrf,ips,i);};
1293            continue;
1294         } else
1295            uregs = uregs_copy;
1296      }
1297
1298      int seen_sp_adjust = 0;
1299      long frame_offset = 0;
1300      PtrdiffT offset;
1301      if (VG_(get_inst_offset_in_function)(uregs.pc, &offset)) {
1302         Addr start_pc = uregs.pc - offset;
1303         Addr limit_pc = uregs.pc;
1304         Addr cur_pc;
1305         for (cur_pc = start_pc; cur_pc < limit_pc; cur_pc += 4) {
1306            unsigned long inst, high_word, low_word;
1307            unsigned long * cur_inst;
1308            /* Fetch the instruction.   */
1309            cur_inst = (unsigned long *)cur_pc;
1310            inst = *((UInt *) cur_inst);
1311            if(debug)
1312               VG_(printf)("cur_pc: 0x%lx, inst: 0x%lx\n", cur_pc, inst);
1313
1314            /* Save some code by pre-extracting some useful fields.  */
1315            high_word = (inst >> 16) & 0xffff;
1316            low_word = inst & 0xffff;
1317
1318            if (high_word == 0x27bd        /* addiu $sp,$sp,-i */
1319                || high_word == 0x23bd     /* addi $sp,$sp,-i */
1320                || high_word == 0x67bd) {  /* daddiu $sp,$sp,-i */
1321               if (low_word & 0x8000)	/* negative stack adjustment? */
1322                  frame_offset += 0x10000 - low_word;
1323               else
1324                  /* Exit loop if a positive stack adjustment is found, which
1325                     usually means that the stack cleanup code in the function
1326                     epilogue is reached.  */
1327               break;
1328            seen_sp_adjust = 1;
1329            }
1330         }
1331         if(debug)
1332            VG_(printf)("offset: 0x%lx\n", frame_offset);
1333      }
1334      if (seen_sp_adjust) {
1335         if (0 == uregs.pc || 1 == uregs.pc) break;
1336         if (uregs.pc == uregs.ra - 8) break;
1337         if (sps) {
1338            sps[i] = uregs.sp + frame_offset;
1339         }
1340         uregs.sp = uregs.sp + frame_offset;
1341
1342         if (fps) {
1343            fps[i] = fps[0];
1344            uregs.fp = fps[0];
1345         }
1346         if (0 == uregs.ra || 1 == uregs.ra) break;
1347         uregs.pc = uregs.ra - 8;
1348         ips[i++] = uregs.ra - 8;
1349         if (UNLIKELY(cmrf > 0)) {RECURSIVE_MERGE(cmrf,ips,i);};
1350         continue;
1351      }
1352
1353      if (i == 1) {
1354         if (sps) {
1355            sps[i] = sps[0];
1356            uregs.sp = sps[0];
1357         }
1358         if (fps) {
1359            fps[i] = fps[0];
1360            uregs.fp = fps[0];
1361         }
1362         if (0 == uregs.ra || 1 == uregs.ra) break;
1363         uregs.pc = uregs.ra - 8;
1364         ips[i++] = uregs.ra - 8;
1365         if (UNLIKELY(cmrf > 0)) {RECURSIVE_MERGE(cmrf,ips,i);};
1366         continue;
1367      }
1368      /* No luck.  We have to give up. */
1369      break;
1370   }
1371
1372   n_found = i;
1373   return n_found;
1374}
1375
1376#endif
1377
1378
1379/*------------------------------------------------------------*/
1380/*---                                                      ---*/
1381/*--- END platform-dependent unwinder worker functions     ---*/
1382/*---                                                      ---*/
1383/*------------------------------------------------------------*/
1384
1385/*------------------------------------------------------------*/
1386/*--- Exported functions.                                  ---*/
1387/*------------------------------------------------------------*/
1388
1389UInt VG_(get_StackTrace) ( ThreadId tid,
1390                           /*OUT*/StackTrace ips, UInt max_n_ips,
1391                           /*OUT*/StackTrace sps,
1392                           /*OUT*/StackTrace fps,
1393                           Word first_ip_delta )
1394{
1395   /* Get the register values with which to start the unwind. */
1396   UnwindStartRegs startRegs;
1397   VG_(memset)( &startRegs, 0, sizeof(startRegs) );
1398   VG_(get_UnwindStartRegs)( &startRegs, tid );
1399
1400   Addr stack_highest_byte = VG_(threads)[tid].client_stack_highest_byte;
1401   Addr stack_lowest_byte  = 0;
1402
1403#  if defined(VGP_x86_linux)
1404   /* Nasty little hack to deal with syscalls - if libc is using its
1405      _dl_sysinfo_int80 function for syscalls (the TLS version does),
1406      then ip will always appear to be in that function when doing a
1407      syscall, not the actual libc function doing the syscall.  This
1408      check sees if IP is within that function, and pops the return
1409      address off the stack so that ip is placed within the library
1410      function calling the syscall.  This makes stack backtraces much
1411      more useful.
1412
1413      The function is assumed to look like this (from glibc-2.3.6 sources):
1414         _dl_sysinfo_int80:
1415            int $0x80
1416            ret
1417      That is 3 (2+1) bytes long.  We could be more thorough and check
1418      the 3 bytes of the function are as expected, but I can't be
1419      bothered.
1420   */
1421   if (VG_(client__dl_sysinfo_int80) != 0 /* we know its address */
1422       && startRegs.r_pc >= VG_(client__dl_sysinfo_int80)
1423       && startRegs.r_pc < VG_(client__dl_sysinfo_int80)+3
1424       && VG_(am_is_valid_for_client)(startRegs.r_pc, sizeof(Addr),
1425                                      VKI_PROT_READ)) {
1426      startRegs.r_pc  = (ULong) *(Addr*)(UWord)startRegs.r_sp;
1427      startRegs.r_sp += (ULong) sizeof(Addr);
1428   }
1429#  endif
1430
1431   /* See if we can get a better idea of the stack limits */
1432   VG_(stack_limits)( (Addr)startRegs.r_sp,
1433                      &stack_lowest_byte, &stack_highest_byte );
1434
1435   /* Take into account the first_ip_delta. */
1436   startRegs.r_pc += (Long)(Word)first_ip_delta;
1437
1438   if (0)
1439      VG_(printf)("tid %d: stack_highest=0x%08lx ip=0x%010llx "
1440                  "sp=0x%010llx\n",
1441		  tid, stack_highest_byte,
1442                  startRegs.r_pc, startRegs.r_sp);
1443
1444   return VG_(get_StackTrace_wrk)(tid, ips, max_n_ips,
1445                                       sps, fps,
1446                                       &startRegs,
1447                                       stack_highest_byte);
1448}
1449
1450static void printIpDesc(UInt n, Addr ip, void* uu_opaque)
1451{
1452   InlIPCursor *iipc = VG_(new_IIPC)(ip);
1453
1454   do {
1455      const HChar *buf = VG_(describe_IP)(ip, iipc);
1456      if (VG_(clo_xml)) {
1457         VG_(printf_xml)("    %s\n", buf);
1458      } else {
1459         VG_(message)(Vg_UserMsg, "   %s %s\n",
1460                      ( n == 0 ? "at" : "by" ), buf);
1461      }
1462      n++;
1463      // Increase n to show "at" for only one level.
1464   } while (VG_(next_IIPC)(iipc));
1465   VG_(delete_IIPC)(iipc);
1466}
1467
1468/* Print a StackTrace. */
1469void VG_(pp_StackTrace) ( StackTrace ips, UInt n_ips )
1470{
1471   vg_assert( n_ips > 0 );
1472
1473   if (VG_(clo_xml))
1474      VG_(printf_xml)("  <stack>\n");
1475
1476   VG_(apply_StackTrace)( printIpDesc, NULL, ips, n_ips );
1477
1478   if (VG_(clo_xml))
1479      VG_(printf_xml)("  </stack>\n");
1480}
1481
1482/* Get and immediately print a StackTrace. */
1483void VG_(get_and_pp_StackTrace) ( ThreadId tid, UInt max_n_ips )
1484{
1485   Addr ips[max_n_ips];
1486   UInt n_ips
1487      = VG_(get_StackTrace)(tid, ips, max_n_ips,
1488                            NULL/*array to dump SP values in*/,
1489                            NULL/*array to dump FP values in*/,
1490                            0/*first_ip_delta*/);
1491   VG_(pp_StackTrace)(ips, n_ips);
1492}
1493
1494void VG_(apply_StackTrace)(
1495        void(*action)(UInt n, Addr ip, void* opaque),
1496        void* opaque,
1497        StackTrace ips, UInt n_ips
1498     )
1499{
1500   Bool main_done = False;
1501   Int i = 0;
1502
1503   vg_assert(n_ips > 0);
1504   do {
1505      Addr ip = ips[i];
1506
1507      // Stop after the first appearance of "main" or one of the other names
1508      // (the appearance of which is a pretty good sign that we've gone past
1509      // main without seeing it, for whatever reason)
1510      if ( ! VG_(clo_show_below_main) ) {
1511         Vg_FnNameKind kind = VG_(get_fnname_kind_from_IP)(ip);
1512         if (Vg_FnNameMain == kind || Vg_FnNameBelowMain == kind) {
1513            main_done = True;
1514         }
1515      }
1516
1517      // Act on the ip
1518      action(i, ip, opaque);
1519
1520      i++;
1521   } while (i < n_ips && !main_done);
1522}
1523
1524
1525/*--------------------------------------------------------------------*/
1526/*--- end                                                          ---*/
1527/*--------------------------------------------------------------------*/
1528