storage.c revision b32f58018498ea2225959b0ba11c18f0c433deef
1
2/*--------------------------------------------------------------------*/
3/*--- Format-neutral storage of and querying of info acquired from ---*/
4/*--- ELF/XCOFF stabs/dwarf1/dwarf2/dwarf3 debug info.             ---*/
5/*---                                                    storage.c ---*/
6/*--------------------------------------------------------------------*/
7
8/*
9   This file is part of Valgrind, a dynamic binary instrumentation
10   framework.
11
12   Copyright (C) 2000-2011 Julian Seward
13      jseward@acm.org
14
15   This program is free software; you can redistribute it and/or
16   modify it under the terms of the GNU General Public License as
17   published by the Free Software Foundation; either version 2 of the
18   License, or (at your option) any later version.
19
20   This program is distributed in the hope that it will be useful, but
21   WITHOUT ANY WARRANTY; without even the implied warranty of
22   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23   General Public License for more details.
24
25   You should have received a copy of the GNU General Public License
26   along with this program; if not, write to the Free Software
27   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28   02111-1307, USA.
29
30   The GNU General Public License is contained in the file COPYING.
31*/
32
33/* This file manages the data structures built by the debuginfo
34   system.  These are: the top level SegInfo list.  For each SegInfo,
35   there are tables for for address-to-symbol mappings,
36   address-to-src-file/line mappings, and address-to-CFI-info
37   mappings.
38*/
39
40#include "pub_core_basics.h"
41#include "pub_core_options.h"      /* VG_(clo_verbosity) */
42#include "pub_core_debuginfo.h"
43#include "pub_core_libcassert.h"
44#include "pub_core_libcbase.h"
45#include "pub_core_libcprint.h"
46#include "pub_core_xarray.h"
47#include "pub_core_oset.h"
48
49#include "priv_misc.h"         /* dinfo_zalloc/free/strdup */
50#include "priv_d3basics.h"     /* ML_(pp_GX) */
51#include "priv_tytypes.h"
52#include "priv_storage.h"      /* self */
53
54
55/*------------------------------------------------------------*/
56/*--- Misc (printing, errors)                              ---*/
57/*------------------------------------------------------------*/
58
59/* Show a non-fatal debug info reading error.  Use vg_panic if
60   terminal.  'serious' errors are shown regardless of the
61   verbosity setting. */
62void ML_(symerr) ( struct _DebugInfo* di, Bool serious, HChar* msg )
63{
64   /* XML mode hides everything :-( */
65   if (VG_(clo_xml))
66      return;
67
68   if (serious) {
69
70      VG_(message)(Vg_DebugMsg, "WARNING: Serious error when "
71                                "reading debug info\n");
72      if (True || VG_(clo_verbosity) < 2) {
73         /* Need to show what the file name is, at verbosity levels 2
74            or below, since that won't already have been shown */
75         VG_(message)(Vg_DebugMsg,
76                      "When reading debug info from %s:\n",
77                      (di && di->fsm.filename) ? di->fsm.filename
78                                               : (UChar*)"???");
79      }
80      VG_(message)(Vg_DebugMsg, "%s\n", msg);
81
82   } else { /* !serious */
83
84      if (VG_(clo_verbosity) >= 2)
85         VG_(message)(Vg_DebugMsg, "%s\n", msg);
86
87   }
88}
89
90
91/* Print a symbol. */
92void ML_(ppSym) ( Int idx, DiSym* sym )
93{
94   UChar** sec_names = sym->sec_names;
95   vg_assert(sym->pri_name);
96   if (sec_names)
97      vg_assert(sec_names);
98   VG_(printf)( "%5d:  %c%c %#8lx .. %#8lx (%d)      %s%s",
99                idx,
100                sym->isText ? 'T' : '-',
101                sym->isIFunc ? 'I' : '-',
102                sym->addr,
103                sym->addr + sym->size - 1, sym->size,
104                sym->pri_name, sec_names ? " " : "" );
105   if (sec_names) {
106      while (*sec_names) {
107         VG_(printf)("%s%s", *sec_names, *(sec_names+1) ? " " : "");
108         sec_names++;
109      }
110   }
111   VG_(printf)("\n");
112}
113
114/* Print a call-frame-info summary. */
115void ML_(ppDiCfSI) ( XArray* /* of CfiExpr */ exprs, DiCfSI* si )
116{
117#  define SHOW_HOW(_how, _off)                   \
118      do {                                       \
119         if (_how == CFIR_UNKNOWN) {             \
120            VG_(printf)("Unknown");              \
121         } else                                  \
122         if (_how == CFIR_SAME) {                \
123            VG_(printf)("Same");                 \
124         } else                                  \
125         if (_how == CFIR_CFAREL) {              \
126            VG_(printf)("cfa+%d", _off);         \
127         } else                                  \
128         if (_how == CFIR_MEMCFAREL) {           \
129            VG_(printf)("*(cfa+%d)", _off);      \
130         } else                                  \
131         if (_how == CFIR_EXPR) {                \
132            VG_(printf)("{");                    \
133            ML_(ppCfiExpr)(exprs, _off);         \
134            VG_(printf)("}");                    \
135         } else {                                \
136            vg_assert(0+0);                      \
137         }                                       \
138      } while (0)
139
140   VG_(printf)("[%#lx .. %#lx]: ", si->base,
141                               si->base + (UWord)si->len - 1);
142   switch (si->cfa_how) {
143      case CFIC_IA_SPREL:
144         VG_(printf)("let cfa=oldSP+%d", si->cfa_off);
145         break;
146      case CFIC_IA_BPREL:
147         VG_(printf)("let cfa=oldBP+%d", si->cfa_off);
148         break;
149      case CFIC_ARM_R13REL:
150         VG_(printf)("let cfa=oldR13+%d", si->cfa_off);
151         break;
152      case CFIC_ARM_R12REL:
153         VG_(printf)("let cfa=oldR12+%d", si->cfa_off);
154         break;
155      case CFIC_ARM_R11REL:
156         VG_(printf)("let cfa=oldR11+%d", si->cfa_off);
157         break;
158      case CFIR_SAME:
159         VG_(printf)("let cfa=Same");
160         break;
161      case CFIC_ARM_R7REL:
162         VG_(printf)("let cfa=oldR7+%d", si->cfa_off);
163         break;
164      case CFIC_EXPR:
165         VG_(printf)("let cfa={");
166         ML_(ppCfiExpr)(exprs, si->cfa_off);
167         VG_(printf)("}");
168         break;
169      default:
170         vg_assert(0);
171   }
172
173   VG_(printf)(" in RA=");
174   SHOW_HOW(si->ra_how, si->ra_off);
175#  if defined(VGA_x86) || defined(VGA_amd64)
176   VG_(printf)(" SP=");
177   SHOW_HOW(si->sp_how, si->sp_off);
178   VG_(printf)(" BP=");
179   SHOW_HOW(si->bp_how, si->bp_off);
180#  elif defined(VGA_arm)
181   VG_(printf)(" R14=");
182   SHOW_HOW(si->r14_how, si->r14_off);
183   VG_(printf)(" R13=");
184   SHOW_HOW(si->r13_how, si->r13_off);
185   VG_(printf)(" R12=");
186   SHOW_HOW(si->r12_how, si->r12_off);
187   VG_(printf)(" R11=");
188   SHOW_HOW(si->r11_how, si->r11_off);
189   VG_(printf)(" R7=");
190   SHOW_HOW(si->r7_how, si->r7_off);
191#  elif defined(VGA_ppc32) || defined(VGA_ppc64)
192#  elif defined(VGA_s390x)
193   VG_(printf)(" SP=");
194   SHOW_HOW(si->sp_how, si->sp_off);
195   VG_(printf)(" FP=");
196   SHOW_HOW(si->fp_how, si->fp_off);
197#  else
198#    error "Unknown arch"
199#  endif
200   VG_(printf)("\n");
201#  undef SHOW_HOW
202}
203
204
205/*------------------------------------------------------------*/
206/*--- Adding stuff                                         ---*/
207/*------------------------------------------------------------*/
208
209/* Add a str to the string table, including terminating zero, and
210   return pointer to the string in vg_strtab.  Unless it's been seen
211   recently, in which case we find the old pointer and return that.
212   This avoids the most egregious duplications.
213
214   JSGF: changed from returning an index to a pointer, and changed to
215   a chunking memory allocator rather than reallocating, so the
216   pointers are stable.
217*/
218UChar* ML_(addStr) ( struct _DebugInfo* di, UChar* str, Int len )
219{
220   struct strchunk *chunk;
221   Int    space_needed;
222   UChar* p;
223
224   if (len == -1) {
225      len = VG_(strlen)(str);
226   } else {
227      vg_assert(len >= 0);
228   }
229
230   space_needed = 1 + len;
231
232   // Allocate a new strtab chunk if necessary
233   if (di->strchunks == NULL ||
234       (di->strchunks->strtab_used
235        + space_needed) > SEGINFO_STRCHUNKSIZE) {
236      chunk = ML_(dinfo_zalloc)("di.storage.addStr.1", sizeof(*chunk));
237      chunk->strtab_used = 0;
238      chunk->next = di->strchunks;
239      di->strchunks = chunk;
240   }
241   chunk = di->strchunks;
242
243   p = &chunk->strtab[chunk->strtab_used];
244   VG_(memcpy)(p, str, len);
245   chunk->strtab[chunk->strtab_used+len] = '\0';
246   chunk->strtab_used += space_needed;
247
248   return p;
249}
250
251
252/* Add a symbol to the symbol table, by copying *sym.  'sym' may only
253   have one name, so there's no complexities to do with deep vs
254   shallow copying of the sec_name array.  This is checked.
255*/
256void ML_(addSym) ( struct _DebugInfo* di, DiSym* sym )
257{
258   UInt   new_sz, i;
259   DiSym* new_tab;
260
261   vg_assert(sym->pri_name != NULL);
262   vg_assert(sym->sec_names == NULL);
263
264   /* Ignore zero-sized syms. */
265   if (sym->size == 0) return;
266
267   if (di->symtab_used == di->symtab_size) {
268      new_sz = 2 * di->symtab_size;
269      if (new_sz == 0) new_sz = 500;
270      new_tab = ML_(dinfo_zalloc)( "di.storage.addSym.1",
271                                   new_sz * sizeof(DiSym) );
272      if (di->symtab != NULL) {
273         for (i = 0; i < di->symtab_used; i++)
274            new_tab[i] = di->symtab[i];
275         ML_(dinfo_free)(di->symtab);
276      }
277      di->symtab = new_tab;
278      di->symtab_size = new_sz;
279   }
280
281   di->symtab[di->symtab_used++] = *sym;
282   vg_assert(di->symtab_used <= di->symtab_size);
283}
284
285
286/* Add a location to the location table.
287*/
288static void addLoc ( struct _DebugInfo* di, DiLoc* loc )
289{
290   UInt   new_sz, i;
291   DiLoc* new_tab;
292
293   /* Zero-sized locs should have been ignored earlier */
294   vg_assert(loc->size > 0);
295
296   if (di->loctab_used == di->loctab_size) {
297      new_sz = 2 * di->loctab_size;
298      if (new_sz == 0) new_sz = 500;
299      new_tab = ML_(dinfo_zalloc)( "di.storage.addLoc.1",
300                                   new_sz * sizeof(DiLoc) );
301      if (di->loctab != NULL) {
302         for (i = 0; i < di->loctab_used; i++)
303            new_tab[i] = di->loctab[i];
304         ML_(dinfo_free)(di->loctab);
305      }
306      di->loctab = new_tab;
307      di->loctab_size = new_sz;
308   }
309
310   di->loctab[di->loctab_used] = *loc;
311   di->loctab_used++;
312   vg_assert(di->loctab_used <= di->loctab_size);
313}
314
315
316/* Resize the LocTab (line number table) to save memory, by removing
317   (and, potentially, allowing m_mallocfree to unmap) any unused space
318   at the end of the table.
319*/
320static void shrinkLocTab ( struct _DebugInfo* di )
321{
322   DiLoc* new_tab;
323   UWord new_sz = di->loctab_used;
324   if (new_sz == di->loctab_size) return;
325   vg_assert(new_sz < di->loctab_size);
326
327   new_tab = ML_(dinfo_zalloc)( "di.storage.shrinkLocTab",
328                                new_sz * sizeof(DiLoc) );
329   VG_(memcpy)(new_tab, di->loctab, new_sz * sizeof(DiLoc));
330
331   ML_(dinfo_free)(di->loctab);
332   di->loctab = new_tab;
333   di->loctab_size = new_sz;
334}
335
336
337/* Top-level place to call to add a source-location mapping entry.
338*/
339void ML_(addLineInfo) ( struct _DebugInfo* di,
340                        UChar*   filename,
341                        UChar*   dirname, /* NULL == directory is unknown */
342                        Addr     this,
343                        Addr     next,
344                        Int      lineno,
345                        Int      entry /* only needed for debug printing */
346     )
347{
348   static const Bool debug = False;
349   DiLoc loc;
350   Int size = next - this;
351
352   /* Ignore zero-sized locs */
353   if (this == next) return;
354
355   if (debug)
356      VG_(printf)( "  src %s %s line %d %#lx-%#lx\n",
357                   dirname ? dirname : (UChar*)"(unknown)",
358                   filename, lineno, this, next );
359
360   /* Maximum sanity checking.  Some versions of GNU as do a shabby
361    * job with stabs entries; if anything looks suspicious, revert to
362    * a size of 1.  This should catch the instruction of interest
363    * (since if using asm-level debug info, one instruction will
364    * correspond to one line, unlike with C-level debug info where
365    * multiple instructions can map to the one line), but avoid
366    * catching any other instructions bogusly. */
367   if (this > next) {
368       if (VG_(clo_verbosity) > 2) {
369           VG_(message)(Vg_DebugMsg,
370                        "warning: line info addresses out of order "
371                        "at entry %d: 0x%lx 0x%lx\n", entry, this, next);
372       }
373       size = 1;
374   }
375
376   if (size > MAX_LOC_SIZE) {
377       if (0)
378       VG_(message)(Vg_DebugMsg,
379                    "warning: line info address range too large "
380                    "at entry %d: %d\n", entry, size);
381       size = 1;
382   }
383
384   /* Rule out ones which are completely outside the r-x mapped area.
385      See "Comment_Regarding_Text_Range_Checks" elsewhere in this file
386      for background and rationale. */
387   vg_assert(di->fsm.have_rx_map && di->fsm.have_rw_map);
388   if (next-1 < di->fsm.rx_map_avma
389       || this >= di->fsm.rx_map_avma + di->fsm.rx_map_size ) {
390       if (0)
391          VG_(message)(Vg_DebugMsg,
392                       "warning: ignoring line info entry falling "
393                       "outside current DebugInfo: %#lx %#lx %#lx %#lx\n",
394                       di->text_avma,
395                       di->text_avma + di->text_size,
396                       this, next-1);
397       return;
398   }
399
400   vg_assert(lineno >= 0);
401   if (lineno > MAX_LINENO) {
402      static Bool complained = False;
403      if (!complained) {
404         complained = True;
405         VG_(message)(Vg_UserMsg,
406                      "warning: ignoring line info entry with "
407                      "huge line number (%d)\n", lineno);
408         VG_(message)(Vg_UserMsg,
409                      "         Can't handle line numbers "
410                      "greater than %d, sorry\n", MAX_LINENO);
411         VG_(message)(Vg_UserMsg,
412                      "(Nb: this message is only shown once)\n");
413      }
414      return;
415   }
416
417   loc.addr      = this;
418   loc.size      = (UShort)size;
419   loc.lineno    = lineno;
420   loc.filename  = filename;
421   loc.dirname   = dirname;
422
423   if (0) VG_(message)(Vg_DebugMsg,
424		       "addLoc: addr %#lx, size %d, line %d, file %s\n",
425		       this,size,lineno,filename);
426
427   addLoc ( di, &loc );
428}
429
430
431/* Top-level place to call to add a CFI summary record.  The supplied
432   DiCfSI is copied. */
433void ML_(addDiCfSI) ( struct _DebugInfo* di, DiCfSI* cfsi_orig )
434{
435   static const Bool debug = False;
436   UInt    new_sz, i;
437   DiCfSI* new_tab;
438   SSizeT  delta;
439
440   /* copy the original, so we can mess with it */
441   DiCfSI cfsi = *cfsi_orig;
442
443   if (debug) {
444      VG_(printf)("adding DiCfSI: ");
445      ML_(ppDiCfSI)(di->cfsi_exprs, &cfsi);
446   }
447
448   /* sanity */
449   vg_assert(cfsi.len > 0);
450   /* If this fails, the implication is you have a single procedure
451      with more than 5 million bytes of code.  Which is pretty
452      unlikely.  Either that, or the debuginfo reader is somehow
453      broken.  5 million is of course arbitrary; but it's big enough
454      to be bigger than the size of any plausible piece of code that
455      would fall within a single procedure. */
456   vg_assert(cfsi.len < 5000000);
457
458   vg_assert(di->fsm.have_rx_map && di->fsm.have_rw_map);
459   /* If we have an empty r-x mapping (is that possible?) then the
460      DiCfSI can't possibly fall inside it.  In which case skip. */
461   if (di->fsm.rx_map_size == 0)
462      return;
463
464   /* Rule out ones which are completely outside the r-x mapped area.
465      See "Comment_Regarding_Text_Range_Checks" elsewhere in this file
466      for background and rationale. */
467   if (cfsi.base + cfsi.len - 1 < di->fsm.rx_map_avma
468       || cfsi.base >= di->fsm.rx_map_avma + di->fsm.rx_map_size) {
469      static Int complaints = 10;
470      if (VG_(clo_trace_cfi) || complaints > 0) {
471         complaints--;
472         if (VG_(clo_verbosity) > 1) {
473            VG_(message)(
474               Vg_DebugMsg,
475               "warning: DiCfSI %#lx .. %#lx outside segment %#lx .. %#lx\n",
476               cfsi.base,
477               cfsi.base + cfsi.len - 1,
478               di->text_avma,
479               di->text_avma + di->text_size - 1
480            );
481         }
482         if (VG_(clo_trace_cfi))
483            ML_(ppDiCfSI)(di->cfsi_exprs, &cfsi);
484      }
485      return;
486   }
487
488   /* Now we know the range is at least partially inside the r-x
489      mapped area.  That implies that at least one of the ends of the
490      range falls inside the area.  If necessary, clip it so it is
491      completely within the area.  If we don't do this,
492      check_CFSI_related_invariants() in debuginfo.c (invariant #2)
493      will fail.  See
494      "Comment_on_IMPORTANT_CFSI_REPRESENTATIONAL_INVARIANTS" in
495      priv_storage.h for background. */
496   if (cfsi.base < di->fsm.rx_map_avma) {
497      /* Lower end is outside the mapped area.  Hence upper end must
498         be inside it. */
499      if (0) VG_(printf)("XXX truncate lower\n");
500      vg_assert(cfsi.base + cfsi.len - 1 >= di->fsm.rx_map_avma);
501      delta = (SSizeT)(di->fsm.rx_map_avma - cfsi.base);
502      vg_assert(delta > 0);
503      vg_assert(delta < (SSizeT)cfsi.len);
504      cfsi.base += delta;
505      cfsi.len -= delta;
506   }
507   else
508   if (cfsi.base + cfsi.len - 1 > di->fsm.rx_map_avma
509                                  + di->fsm.rx_map_size - 1) {
510      /* Upper end is outside the mapped area.  Hence lower end must be
511         inside it. */
512      if (0) VG_(printf)("XXX truncate upper\n");
513      vg_assert(cfsi.base <= di->fsm.rx_map_avma + di->fsm.rx_map_size - 1);
514      delta = (SSizeT)( (cfsi.base + cfsi.len - 1)
515                        - (di->fsm.rx_map_avma + di->fsm.rx_map_size - 1) );
516      vg_assert(delta > 0); vg_assert(delta < (SSizeT)cfsi.len);
517      cfsi.len -= delta;
518   }
519
520   /* Final checks */
521
522   /* Because: either cfsi was entirely inside the range, in which
523      case we asserted that len > 0 at the start, OR it fell partially
524      inside the range, in which case we reduced it by some size
525      (delta) which is < its original size. */
526   vg_assert(cfsi.len > 0);
527
528   /* Similar logic applies for the next two assertions. */
529   vg_assert(cfsi.base >= di->fsm.rx_map_avma);
530   vg_assert(cfsi.base + cfsi.len - 1
531             <= di->fsm.rx_map_avma + di->fsm.rx_map_size - 1);
532
533   if (di->cfsi_used == di->cfsi_size) {
534      new_sz = 2 * di->cfsi_size;
535      if (new_sz == 0) new_sz = 20;
536      new_tab = ML_(dinfo_zalloc)( "di.storage.addDiCfSI.1",
537                                   new_sz * sizeof(DiCfSI) );
538      if (di->cfsi != NULL) {
539         for (i = 0; i < di->cfsi_used; i++)
540            new_tab[i] = di->cfsi[i];
541         ML_(dinfo_free)(di->cfsi);
542      }
543      di->cfsi = new_tab;
544      di->cfsi_size = new_sz;
545   }
546
547   di->cfsi[di->cfsi_used] = cfsi;
548   di->cfsi_used++;
549   vg_assert(di->cfsi_used <= di->cfsi_size);
550}
551
552
553Int ML_(CfiExpr_Undef)( XArray* dst )
554{
555   CfiExpr e;
556   VG_(memset)( &e, 0, sizeof(e) );
557   e.tag = Cex_Undef;
558   return (Int)VG_(addToXA)( dst, &e );
559}
560Int ML_(CfiExpr_Deref)( XArray* dst, Int ixAddr )
561{
562   CfiExpr e;
563   VG_(memset)( &e, 0, sizeof(e) );
564   e.tag = Cex_Deref;
565   e.Cex.Deref.ixAddr = ixAddr;
566   return (Int)VG_(addToXA)( dst, &e );
567}
568Int ML_(CfiExpr_Const)( XArray* dst, UWord con )
569{
570   CfiExpr e;
571   VG_(memset)( &e, 0, sizeof(e) );
572   e.tag = Cex_Const;
573   e.Cex.Const.con = con;
574   return (Int)VG_(addToXA)( dst, &e );
575}
576Int ML_(CfiExpr_Binop)( XArray* dst, CfiOp op, Int ixL, Int ixR )
577{
578   CfiExpr e;
579   VG_(memset)( &e, 0, sizeof(e) );
580   e.tag = Cex_Binop;
581   e.Cex.Binop.op  = op;
582   e.Cex.Binop.ixL = ixL;
583   e.Cex.Binop.ixR = ixR;
584   return (Int)VG_(addToXA)( dst, &e );
585}
586Int ML_(CfiExpr_CfiReg)( XArray* dst, CfiReg reg )
587{
588   CfiExpr e;
589   VG_(memset)( &e, 0, sizeof(e) );
590   e.tag = Cex_CfiReg;
591   e.Cex.CfiReg.reg = reg;
592   return (Int)VG_(addToXA)( dst, &e );
593}
594Int ML_(CfiExpr_DwReg)( XArray* dst, Int reg )
595{
596   CfiExpr e;
597   VG_(memset)( &e, 0, sizeof(e) );
598   e.tag = Cex_DwReg;
599   e.Cex.DwReg.reg = reg;
600   return (Int)VG_(addToXA)( dst, &e );
601}
602
603static void ppCfiOp ( CfiOp op )
604{
605   switch (op) {
606      case Cop_Add: VG_(printf)("+"); break;
607      case Cop_Sub: VG_(printf)("-"); break;
608      case Cop_And: VG_(printf)("&"); break;
609      case Cop_Mul: VG_(printf)("*"); break;
610      case Cop_Shl: VG_(printf)("<<"); break;
611      case Cop_Shr: VG_(printf)(">>"); break;
612      case Cop_Eq: VG_(printf)("=="); break;
613      case Cop_Ge: VG_(printf)(">="); break;
614      case Cop_Gt: VG_(printf)(">"); break;
615      case Cop_Le: VG_(printf)("<="); break;
616      case Cop_Lt: VG_(printf)("<"); break;
617      case Cop_Ne: VG_(printf)("!="); break;
618      default:      vg_assert(0);
619   }
620}
621
622static void ppCfiReg ( CfiReg reg )
623{
624   switch (reg) {
625      case Creg_IA_SP:   VG_(printf)("xSP"); break;
626      case Creg_IA_BP:   VG_(printf)("xBP"); break;
627      case Creg_IA_IP:   VG_(printf)("xIP"); break;
628      case Creg_ARM_R13: VG_(printf)("R13"); break;
629      case Creg_ARM_R12: VG_(printf)("R12"); break;
630      case Creg_ARM_R15: VG_(printf)("R15"); break;
631      case Creg_ARM_R14: VG_(printf)("R14"); break;
632      default: vg_assert(0);
633   }
634}
635
636void ML_(ppCfiExpr)( XArray* src, Int ix )
637{
638   /* VG_(indexXA) checks for invalid src/ix values, so we can
639      use it indiscriminately. */
640   CfiExpr* e = (CfiExpr*) VG_(indexXA)( src, ix );
641   switch (e->tag) {
642      case Cex_Undef:
643         VG_(printf)("Undef");
644         break;
645      case Cex_Deref:
646         VG_(printf)("*(");
647         ML_(ppCfiExpr)(src, e->Cex.Deref.ixAddr);
648         VG_(printf)(")");
649         break;
650      case Cex_Const:
651         VG_(printf)("0x%lx", e->Cex.Const.con);
652         break;
653      case Cex_Binop:
654         VG_(printf)("(");
655         ML_(ppCfiExpr)(src, e->Cex.Binop.ixL);
656         VG_(printf)(")");
657         ppCfiOp(e->Cex.Binop.op);
658         VG_(printf)("(");
659         ML_(ppCfiExpr)(src, e->Cex.Binop.ixR);
660         VG_(printf)(")");
661         break;
662      case Cex_CfiReg:
663         ppCfiReg(e->Cex.CfiReg.reg);
664         break;
665      case Cex_DwReg:
666         VG_(printf)("dwr%d", e->Cex.DwReg.reg);
667         break;
668      default:
669         VG_(core_panic)("ML_(ppCfiExpr)");
670         /*NOTREACHED*/
671         break;
672   }
673}
674
675
676Word ML_(cmp_for_DiAddrRange_range) ( const void* keyV,
677                                      const void* elemV ) {
678   const Addr* key = (const Addr*)keyV;
679   const DiAddrRange* elem = (const DiAddrRange*)elemV;
680   if (0)
681      VG_(printf)("cmp_for_DiAddrRange_range: %#lx vs %#lx\n",
682                  *key, elem->aMin);
683   if ((*key) < elem->aMin) return -1;
684   if ((*key) > elem->aMax) return 1;
685   return 0;
686}
687
688static
689void show_scope ( OSet* /* of DiAddrRange */ scope, HChar* who )
690{
691   DiAddrRange* range;
692   VG_(printf)("Scope \"%s\" = {\n", who);
693   VG_(OSetGen_ResetIter)( scope );
694   while (True) {
695      range = VG_(OSetGen_Next)( scope );
696      if (!range) break;
697      VG_(printf)("   %#lx .. %#lx: %lu vars\n", range->aMin, range->aMax,
698                  range->vars ? VG_(sizeXA)(range->vars) : 0);
699   }
700   VG_(printf)("}\n");
701}
702
703/* Add the variable 'var' to 'scope' for the address range [aMin,aMax]
704   (inclusive of aMin and aMax).  Split existing ranges as required if
705   aMin or aMax or both don't match existing range boundaries, and add
706   'var' to all required ranges.  Take great care to preserve the
707   invariant that the ranges in 'scope' cover the entire address range
708   exactly once, with no overlaps and no holes. */
709static void add_var_to_arange (
710               /*MOD*/OSet* /* of DiAddrRange */ scope,
711               Addr aMin,
712               Addr aMax,
713               DiVariable* var
714            )
715{
716   DiAddrRange *first, *last, *range;
717   /* These xx variables are for assertion checking only; they don't
718      contribute anything to the actual work of this function. */
719   DiAddrRange *xxRangep, *xxFirst, *xxLast;
720   UWord       xxIters;
721
722   vg_assert(aMin <= aMax);
723
724   if (0) VG_(printf)("add_var_to_arange: %#lx .. %#lx\n", aMin, aMax);
725   if (0) show_scope( scope, "add_var_to_arange(1)" );
726
727   /* See if the lower end of the range (aMin) falls exactly on an
728      existing range boundary.  If not, find the range it does fall
729      into, and split it (copying the variables in the process), so
730      that aMin does exactly fall on a range boundary. */
731   first = VG_(OSetGen_Lookup)( scope, &aMin );
732   /* It must be present, since the presented OSet must cover
733      the entire address range. */
734   vg_assert(first);
735   vg_assert(first->aMin <= first->aMax);
736   vg_assert(first->aMin <= aMin && aMin <= first->aMax);
737
738   /* Fast track common case, which is that the range specified for
739      the variable exactly coincides with one already-existing
740      range. */
741   if (first->aMin == aMin && first->aMax == aMax) {
742      vg_assert(first->vars);
743      VG_(addToXA)( first->vars, var );
744      return;
745   }
746
747   /* We have to get into splitting ranges, which is complex
748      and slow. */
749   if (first->aMin < aMin) {
750      DiAddrRange* nyu;
751      /* Ok.  We'll have to split 'first'. */
752      /* truncate the upper end of 'first' */
753      Addr tmp = first->aMax;
754      first->aMax = aMin-1;
755      vg_assert(first->aMin <= first->aMax);
756      /* create a new range */
757      nyu = VG_(OSetGen_AllocNode)( scope, sizeof(DiAddrRange) );
758      vg_assert(nyu);
759      nyu->aMin = aMin;
760      nyu->aMax = tmp;
761      vg_assert(nyu->aMin <= nyu->aMax);
762      /* copy vars into it */
763      vg_assert(first->vars);
764      nyu->vars = VG_(cloneXA)( "di.storage.avta.1", first->vars );
765      vg_assert(nyu->vars);
766      VG_(OSetGen_Insert)( scope, nyu );
767      first = nyu;
768   }
769
770   vg_assert(first->aMin == aMin);
771
772   /* Now do exactly the same for the upper end (aMax): if it doesn't
773      fall on a boundary, cause it to do so by splitting the range it
774      does currently fall into. */
775   last = VG_(OSetGen_Lookup)( scope, &aMax );
776   vg_assert(last->aMin <= last->aMax);
777   vg_assert(last->aMin <= aMax && aMax <= last->aMax);
778
779   if (aMax < last->aMax) {
780      DiAddrRange* nyu;
781      /* We have to split 'last'. */
782      /* truncate the lower end of 'last' */
783      Addr tmp = last->aMin;
784      last->aMin = aMax+1;
785      vg_assert(last->aMin <= last->aMax);
786      /* create a new range */
787      nyu = VG_(OSetGen_AllocNode)( scope, sizeof(DiAddrRange) );
788      vg_assert(nyu);
789      nyu->aMin = tmp;
790      nyu->aMax = aMax;
791      vg_assert(nyu->aMin <= nyu->aMax);
792      /* copy vars into it */
793      vg_assert(last->vars);
794      nyu->vars = VG_(cloneXA)( "di.storage.avta.2", last->vars );
795      vg_assert(nyu->vars);
796      VG_(OSetGen_Insert)( scope, nyu );
797      last = nyu;
798   }
799
800   vg_assert(aMax == last->aMax);
801
802   xxFirst = (DiAddrRange*)VG_(OSetGen_Lookup)(scope, &aMin);
803   xxLast  = (DiAddrRange*)VG_(OSetGen_Lookup)(scope, &aMax);
804   vg_assert(xxFirst);
805   vg_assert(xxLast);
806   vg_assert(xxFirst->aMin == aMin);
807   vg_assert(xxLast->aMax == aMax);
808   if (xxFirst != xxLast)
809      vg_assert(xxFirst->aMax < xxLast->aMin);
810
811   /* Great.  Now we merely need to iterate over the segments from
812      'first' to 'last' inclusive, and add 'var' to the variable set
813      of each of them. */
814   if (0) {
815      static UWord ctr = 0;
816      ctr++;
817      VG_(printf)("ctr = %lu\n", ctr);
818      if (ctr >= 33263) show_scope( scope, "add_var_to_arange(2)" );
819   }
820
821   xxIters = 0;
822   range = xxRangep = NULL;
823   VG_(OSetGen_ResetIterAt)( scope, &aMin );
824   while (True) {
825      xxRangep = range;
826      range    = VG_(OSetGen_Next)( scope );
827      if (!range) break;
828      if (range->aMin > aMax) break;
829      xxIters++;
830      if (0) VG_(printf)("have range %#lx %#lx\n",
831                         range->aMin, range->aMax);
832
833      /* Sanity checks */
834      if (!xxRangep) {
835         /* This is the first in the range */
836         vg_assert(range->aMin == aMin);
837      } else {
838         vg_assert(xxRangep->aMax + 1 == range->aMin);
839      }
840
841      vg_assert(range->vars);
842      VG_(addToXA)( range->vars, var );
843   }
844   /* Done.  We should have seen at least one range. */
845   vg_assert(xxIters >= 1);
846   if (xxIters == 1) vg_assert(xxFirst == xxLast);
847   if (xxFirst == xxLast) vg_assert(xxIters == 1);
848   vg_assert(xxRangep);
849   vg_assert(xxRangep->aMax == aMax);
850   vg_assert(xxRangep == xxLast);
851}
852
853
854/* Top-level place to call to add a variable description (as extracted
855   from a DWARF3 .debug_info section. */
856void ML_(addVar)( struct _DebugInfo* di,
857                  Int    level,
858                  Addr   aMin,
859                  Addr   aMax,
860                  UChar* name, /* in di's .strchunks */
861                  UWord  typeR, /* a cuOff */
862                  GExpr* gexpr,
863                  GExpr* fbGX,
864                  UChar* fileName, /* where decl'd - may be NULL.
865                                      in di's .strchunks */
866                  Int    lineNo, /* where decl'd - may be zero */
867                  Bool   show )
868{
869   OSet* /* of DiAddrRange */ scope;
870   DiVariable var;
871   Bool       all;
872   TyEnt*     ent;
873   MaybeULong mul;
874   HChar*     badness;
875
876   tl_assert(di && di->admin_tyents);
877
878   if (0) {
879      VG_(printf)("  ML_(addVar): level %d  %#lx-%#lx  %s :: ",
880                  level, aMin, aMax, name );
881      ML_(pp_TyEnt_C_ishly)( di->admin_tyents, typeR );
882      VG_(printf)("\n  Var=");
883      ML_(pp_GX)(gexpr);
884      VG_(printf)("\n");
885      if (fbGX) {
886         VG_(printf)("  FrB=");
887         ML_(pp_GX)( fbGX );
888         VG_(printf)("\n");
889      } else {
890         VG_(printf)("  FrB=none\n");
891      }
892      VG_(printf)("\n");
893   }
894
895   vg_assert(level >= 0);
896   vg_assert(aMin <= aMax);
897   vg_assert(name);
898   vg_assert(gexpr);
899
900   ent = ML_(TyEnts__index_by_cuOff)( di->admin_tyents, NULL, typeR);
901   tl_assert(ent);
902   vg_assert(ML_(TyEnt__is_type)(ent));
903
904   /* "Comment_Regarding_Text_Range_Checks" (is referred to elsewhere)
905      ----------------------------------------------------------------
906      Ignore any variables whose aMin .. aMax (that is, range of text
907      addresses for which they actually exist) falls outside the text
908      segment.  Is this indicative of a bug in the reader?  Maybe.
909      (LATER): instead of restricting strictly to the .text segment,
910      be a bit more relaxed, and accept any variable whose text range
911      falls inside the r-x mapped area.  This is useful because .text
912      is not always the only instruction-carrying segment: others are:
913      .init .plt __libc_freeres_fn and .fini.  This implicitly assumes
914      that those extra sections have the same bias as .text, but that
915      seems a reasonable assumption to me. */
916   /* This is assured us by top level steering logic in debuginfo.c,
917      and it is re-checked at the start of
918      ML_(read_elf_debug_info). */
919   vg_assert(di->fsm.have_rx_map && di->fsm.have_rw_map);
920   if (level > 0
921       && (aMax < di->fsm.rx_map_avma
922           || aMin >= di->fsm.rx_map_avma + di->fsm.rx_map_size)) {
923      if (VG_(clo_verbosity) >= 0) {
924         VG_(message)(Vg_DebugMsg,
925            "warning: addVar: in range %#lx .. %#lx outside "
926            "segment %#lx .. %#lx (%s)\n",
927            aMin, aMax,
928            di->text_avma, di->text_avma + di->text_size -1,
929            name
930         );
931      }
932      return;
933   }
934
935   /* If the type's size is zero (which can mean unknown size), ignore
936      it.  We will never be able to actually relate a data address to
937      a data object with zero size, so there's no point in storing
938      info on it.  On 32-bit platforms, also reject types whose size
939      is 2^32 bytes or large.  (It's amazing what junk shows up ..) */
940   mul = ML_(sizeOfType)(di->admin_tyents, typeR);
941
942   badness = NULL;
943   if (mul.b != True)
944      badness = "unknown size";
945   else if (mul.ul == 0)
946      badness = "zero size   ";
947   else if (sizeof(void*) == 4 && mul.ul >= (1ULL<<32))
948      badness = "implausibly large";
949
950   if (badness) {
951      static Int complaints = 10;
952      if (VG_(clo_verbosity) >= 2 && complaints > 0) {
953         VG_(message)(Vg_DebugMsg, "warning: addVar: %s (%s)\n",
954                                   badness, name );
955         complaints--;
956      }
957      return;
958   }
959
960   if (!di->varinfo) {
961      di->varinfo = VG_(newXA)( ML_(dinfo_zalloc),
962                                "di.storage.addVar.1",
963                                ML_(dinfo_free),
964                                sizeof(OSet*) );
965   }
966
967   vg_assert(level < 256); /* arbitrary; stay sane */
968   /* Expand the top level array enough to map this level */
969   while ( VG_(sizeXA)(di->varinfo) <= level ) {
970      DiAddrRange* nyu;
971      scope = VG_(OSetGen_Create)( offsetof(DiAddrRange,aMin),
972                                   ML_(cmp_for_DiAddrRange_range),
973                                   ML_(dinfo_zalloc), "di.storage.addVar.2",
974                                   ML_(dinfo_free) );
975      vg_assert(scope);
976      if (0) VG_(printf)("create: scope = %p, adding at %ld\n",
977                         scope, VG_(sizeXA)(di->varinfo));
978      VG_(addToXA)( di->varinfo, &scope );
979      /* Add a single range covering the entire address space.  At
980         level 0 we require this doesn't get split.  At levels above 0
981         we require that any additions to it cause it to get split.
982         All of these invariants get checked both add_var_to_arange
983         and after reading is complete, in canonicaliseVarInfo. */
984      nyu = VG_(OSetGen_AllocNode)( scope, sizeof(DiAddrRange) );
985      vg_assert(nyu);
986      nyu->aMin = (Addr)0;
987      nyu->aMax = ~(Addr)0;
988      nyu->vars = VG_(newXA)( ML_(dinfo_zalloc), "di.storage.addVar.3",
989                              ML_(dinfo_free),
990                              sizeof(DiVariable) );
991      vg_assert(nyu->vars);
992      VG_(OSetGen_Insert)( scope, nyu );
993   }
994
995   vg_assert( VG_(sizeXA)(di->varinfo) > level );
996   scope = *(OSet**)VG_(indexXA)( di->varinfo, level );
997   vg_assert(scope);
998
999   var.name     = name;
1000   var.typeR    = typeR;
1001   var.gexpr    = gexpr;
1002   var.fbGX     = fbGX;
1003   var.fileName = fileName;
1004   var.lineNo   = lineNo;
1005
1006   all = aMin == (Addr)0 && aMax == ~(Addr)0;
1007   vg_assert(level == 0 ? all : !all);
1008
1009   add_var_to_arange( /*MOD*/scope, aMin, aMax, &var );
1010}
1011
1012
1013/* This really just checks the constructed data structure, as there is
1014   no canonicalisation to do. */
1015static void canonicaliseVarInfo ( struct _DebugInfo* di )
1016{
1017   Word i, nInThisScope;
1018
1019   if (!di->varinfo)
1020      return;
1021
1022   for (i = 0; i < VG_(sizeXA)(di->varinfo); i++) {
1023
1024      DiAddrRange *range, *rangep;
1025      OSet* scope = *(OSet**)VG_(indexXA)(di->varinfo, i);
1026      if (!scope) continue;
1027
1028      /* Deal with the global-scope case. */
1029      if (i == 0) {
1030         Addr zero = 0;
1031         vg_assert(VG_(OSetGen_Size)( scope ) == 1);
1032         range = VG_(OSetGen_Lookup)( scope, &zero );
1033         vg_assert(range);
1034         vg_assert(range->aMin == (Addr)0);
1035         vg_assert(range->aMax == ~(Addr)0);
1036         continue;
1037      }
1038
1039      /* All the rest of this is for the local-scope case. */
1040      /* iterate over all entries in 'scope' */
1041      nInThisScope = 0;
1042      rangep = NULL;
1043      VG_(OSetGen_ResetIter)(scope);
1044      while (True) {
1045         range = VG_(OSetGen_Next)(scope);
1046         if (!range) {
1047           /* We just saw the last one.  There must have been at
1048              least one entry in the range. */
1049           vg_assert(rangep);
1050           vg_assert(rangep->aMax == ~(Addr)0);
1051           break;
1052         }
1053
1054         vg_assert(range->aMin <= range->aMax);
1055         vg_assert(range->vars);
1056
1057         if (!rangep) {
1058           /* This is the first entry in the range. */
1059           vg_assert(range->aMin == 0);
1060         } else {
1061           vg_assert(rangep->aMax + 1 == range->aMin);
1062         }
1063
1064         rangep = range;
1065         nInThisScope++;
1066      } /* iterating over ranges in a given scope */
1067
1068      /* If there's only one entry in this (local) scope, it must
1069         cover the entire address space (obviously), but it must not
1070         contain any vars. */
1071
1072      vg_assert(nInThisScope > 0);
1073      if (nInThisScope == 1) {
1074         Addr zero = 0;
1075         vg_assert(VG_(OSetGen_Size)( scope ) == 1);
1076         range = VG_(OSetGen_Lookup)( scope, &zero );
1077         vg_assert(range);
1078         vg_assert(range->aMin == (Addr)0);
1079         vg_assert(range->aMax == ~(Addr)0);
1080         vg_assert(range->vars);
1081         vg_assert(VG_(sizeXA)(range->vars) == 0);
1082      }
1083
1084   } /* iterate over scopes */
1085}
1086
1087
1088/*------------------------------------------------------------*/
1089/*--- Canonicalisers                                       ---*/
1090/*------------------------------------------------------------*/
1091
1092/* Sort the symtab by starting address, and emit warnings if any
1093   symbols have overlapping address ranges.  We use that old chestnut,
1094   shellsort.  Mash the table around so as to establish the property
1095   that addresses are in order and the ranges to not overlap.  This
1096   facilitates using binary search to map addresses to symbols when we
1097   come to query the table.
1098*/
1099static Int compare_DiSym ( void* va, void* vb )
1100{
1101   DiSym* a = (DiSym*)va;
1102   DiSym* b = (DiSym*)vb;
1103   if (a->addr < b->addr) return -1;
1104   if (a->addr > b->addr) return  1;
1105   return 0;
1106}
1107
1108
1109/* An address is associated with more than one name.  Which do we
1110   prefer as the "display" name (that we show the user in stack
1111   traces)?  In order:
1112
1113   - Prefer "PMPI_<foo>" over "MPI_<foo>".
1114
1115   - Else, prefer a non-empty name over an empty one.
1116
1117   - Else, prefer a non-whitespace name over an all-whitespace name.
1118
1119   - Else, prefer the shorter symbol name.  If the symbol contains a
1120     version symbol ('@' on Linux, other platforms may differ), which means it
1121     is versioned, then the length up to the version symbol is used for length
1122     comparison purposes (so "foo@GLIBC_2.4.2" is considered shorter than
1123     "foobar").
1124
1125   - Else, if two symbols have the same length, prefer a versioned symbol over
1126     a non-versioned symbol.
1127
1128   - Else, use alphabetical ordering.
1129
1130   - Otherwise, they must be the same;  use the name with the lower address.
1131
1132   Very occasionally this goes wrong (eg. 'memcmp' and 'bcmp' are
1133   aliases in glibc, we choose the 'bcmp' symbol because it's shorter,
1134   so we can misdescribe memcmp() as bcmp()).  This is hard to avoid.
1135   It's mentioned in the FAQ file.
1136
1137   Returned value is True if a_name is preferred, False if b_name is
1138   preferred.
1139 */
1140static
1141Bool preferName ( struct _DebugInfo* di,
1142                  UChar* a_name, UChar* b_name,
1143                  Addr sym_avma/*exposition only*/ )
1144{
1145   Word cmp;
1146   Word vlena, vlenb;		/* length without version */
1147   const UChar *vpa, *vpb;
1148
1149   Bool preferA = False;
1150   Bool preferB = False;
1151
1152   vg_assert(a_name);
1153   vg_assert(b_name);
1154   vg_assert(a_name != b_name);
1155
1156   vlena = VG_(strlen)(a_name);
1157   vlenb = VG_(strlen)(b_name);
1158
1159#  if defined(VGO_linux)
1160#    define VERSION_CHAR '@'
1161#  elif defined(VGO_darwin)
1162#    define VERSION_CHAR '$'
1163#  else
1164#    error Unknown OS
1165#  endif
1166
1167   vpa = VG_(strchr)(a_name, VERSION_CHAR);
1168   vpb = VG_(strchr)(b_name, VERSION_CHAR);
1169
1170#  undef VERSION_CHAR
1171
1172   if (vpa)
1173      vlena = vpa - a_name;
1174   if (vpb)
1175      vlenb = vpb - b_name;
1176
1177   /* MPI hack: prefer PMPI_Foo over MPI_Foo */
1178   if (0==VG_(strncmp)(a_name, "MPI_", 4)
1179       && 0==VG_(strncmp)(b_name, "PMPI_", 5)
1180       && 0==VG_(strcmp)(a_name, 1+b_name)) {
1181      preferB = True; goto out;
1182   }
1183   if (0==VG_(strncmp)(b_name, "MPI_", 4)
1184       && 0==VG_(strncmp)(a_name, "PMPI_", 5)
1185       && 0==VG_(strcmp)(b_name, 1+a_name)) {
1186      preferA = True; goto out;
1187   }
1188
1189   /* Prefer non-empty name. */
1190   if (vlena  &&  !vlenb) {
1191      preferA = True; goto out;
1192   }
1193   if (vlenb  &&  !vlena) {
1194      preferB = True; goto out;
1195   }
1196
1197   /* Prefer non-whitespace name. */
1198   {
1199      Bool blankA = True;
1200      Bool blankB = True;
1201      Char *s;
1202      s = a_name;
1203      while (*s) {
1204         if (!VG_(isspace)(*s++)) {
1205            blankA = False;
1206            break;
1207         }
1208      }
1209      s = b_name;
1210      while (*s) {
1211         if (!VG_(isspace)(*s++)) {
1212            blankB = False;
1213            break;
1214         }
1215      }
1216
1217      if (!blankA  &&  blankB) {
1218         preferA = True; goto out;
1219      }
1220      if (!blankB  &&  blankA) {
1221         preferB = True; goto out;
1222      }
1223   }
1224
1225   /* Select the shortest unversioned name */
1226   if (vlena < vlenb) {
1227      preferA = True; goto out;
1228   }
1229   if (vlenb < vlena) {
1230      preferB = True; goto out;
1231   }
1232
1233   /* Equal lengths; select the versioned name */
1234   if (vpa && !vpb) {
1235      preferA = True; goto out;
1236   }
1237   if (vpb && !vpa) {
1238      preferB = True; goto out;
1239   }
1240
1241   /* Either both versioned or neither is versioned; select them
1242      alphabetically */
1243   cmp = VG_(strcmp)(a_name, b_name);
1244   if (cmp < 0) {
1245      preferA = True; goto out;
1246   }
1247   if (cmp > 0) {
1248      preferB = True; goto out;
1249   }
1250
1251   /* If we get here, they are the same name. */
1252
1253   /* In this case we could choose either (arbitrarily), but might as
1254      well choose the one with the lowest DiSym* address, so as to try
1255      and make the comparison mechanism more stable (a la sorting
1256      parlance).  Also, skip the diagnostic printing in this case. */
1257   return a_name <= b_name  ? True  : False;
1258
1259   /*NOTREACHED*/
1260   vg_assert(0);
1261  out:
1262   if (preferA && !preferB) {
1263      TRACE_SYMTAB("sym at %#lx: prefer '%s' to '%s'\n",
1264                   sym_avma, a_name, b_name );
1265      return True;
1266   }
1267   if (preferB && !preferA) {
1268      TRACE_SYMTAB("sym at %#lx: prefer '%s' to '%s'\n",
1269                   sym_avma, b_name, a_name );
1270      return False;
1271   }
1272   /*NOTREACHED*/
1273   vg_assert(0);
1274}
1275
1276
1277/* Add the names in FROM to the names in TO. */
1278static
1279void add_DiSym_names_to_from ( DebugInfo* di, DiSym* to, DiSym* from )
1280{
1281   vg_assert(to->pri_name);
1282   vg_assert(from->pri_name);
1283   /* Figure out how many names there will be in the new combined
1284      secondary vector. */
1285   UChar** to_sec   = to->sec_names;
1286   UChar** from_sec = from->sec_names;
1287   Word n_new_sec = 1;
1288   if (from_sec) {
1289      while (*from_sec) {
1290         n_new_sec++;
1291         from_sec++;
1292      }
1293   }
1294   if (to_sec) {
1295      while (*to_sec) {
1296         n_new_sec++;
1297         to_sec++;
1298      }
1299   }
1300   if (0)
1301      TRACE_SYMTAB("merge: -> %ld\n", n_new_sec);
1302   /* Create the new sec and copy stuff into it, putting the new
1303      entries at the end. */
1304   UChar** new_sec = ML_(dinfo_zalloc)( "di.storage.aDntf.1",
1305                                        (n_new_sec+1) * sizeof(UChar*) );
1306   from_sec = from->sec_names;
1307   to_sec   = to->sec_names;
1308   Word i = 0;
1309   if (to_sec) {
1310      while (*to_sec) {
1311         new_sec[i++] = *to_sec;
1312         to_sec++;
1313      }
1314   }
1315   new_sec[i++] = from->pri_name;
1316   if (from_sec) {
1317      while (*from_sec) {
1318         new_sec[i++] = *from_sec;
1319         from_sec++;
1320      }
1321   }
1322   vg_assert(i == n_new_sec);
1323   vg_assert(new_sec[i] == NULL);
1324   /* If we're replacing an existing secondary vector, free it. */
1325   if (to->sec_names) {
1326      ML_(dinfo_free)(to->sec_names);
1327   }
1328   to->sec_names = new_sec;
1329}
1330
1331
1332static void canonicaliseSymtab ( struct _DebugInfo* di )
1333{
1334   Word  i, j, n_truncated;
1335   Addr  sta1, sta2, end1, end2, toc1, toc2;
1336   UChar *pri1, *pri2, **sec1, **sec2;
1337   Bool  ist1, ist2, isf1, isf2;
1338
1339#  define SWAP(ty,aa,bb) \
1340      do { ty tt = (aa); (aa) = (bb); (bb) = tt; } while (0)
1341
1342   if (di->symtab_used == 0)
1343      return;
1344
1345   /* Check initial invariants */
1346   for (i = 0; i < di->symtab_used; i++) {
1347      DiSym* sym = &di->symtab[i];
1348      vg_assert(sym->pri_name);
1349      vg_assert(!sym->sec_names);
1350   }
1351
1352   /* Sort by address. */
1353   VG_(ssort)(di->symtab, di->symtab_used,
1354                          sizeof(*di->symtab), compare_DiSym);
1355
1356  cleanup_more:
1357
1358   /* If two symbols have identical address ranges, and agree on
1359      .isText and .isIFunc, merge them into a single entry, but
1360      preserve both names, so we end up knowing all the names for that
1361      particular address range. */
1362   while (1) {
1363      Word r, w, n_merged;
1364      n_merged = 0;
1365      w = 0;
1366      /* A pass merging entries together */
1367      for (r = 1; r < di->symtab_used; r++) {
1368         vg_assert(w < r);
1369         if (   di->symtab[w].addr      == di->symtab[r].addr
1370             && di->symtab[w].size      == di->symtab[r].size
1371             && !!di->symtab[w].isText  == !!di->symtab[r].isText
1372             && !!di->symtab[w].isIFunc == !!di->symtab[r].isIFunc) {
1373            /* merge the two into one */
1374            n_merged++;
1375            add_DiSym_names_to_from(di, &di->symtab[w], &di->symtab[r]);
1376            /* and use ::pri_names to indicate this slot is no longer in use */
1377            di->symtab[r].pri_name = NULL;
1378            if (di->symtab[r].sec_names) {
1379               ML_(dinfo_free)(di->symtab[r].sec_names);
1380               di->symtab[r].sec_names = NULL;
1381            }
1382            /* Completely zap the entry -- paranoia to make it more
1383               likely we'll notice if we inadvertantly use it
1384               again. */
1385            VG_(memset)(&di->symtab[r], 0, sizeof(DiSym));
1386         } else {
1387            w = r;
1388         }
1389      }
1390      TRACE_SYMTAB( "canonicaliseSymtab: %ld symbols merged\n", n_merged);
1391      if (n_merged == 0)
1392         break;
1393      /* Now a pass to squeeze out any unused ones */
1394      w = 0;
1395      for (r = 0; r < di->symtab_used; r++) {
1396         vg_assert(w <= r);
1397         if (di->symtab[r].pri_name == NULL)
1398            continue;
1399         if (w < r) {
1400            di->symtab[w] = di->symtab[r];
1401         }
1402         w++;
1403      }
1404      vg_assert(w + n_merged == di->symtab_used);
1405      di->symtab_used = w;
1406   }
1407
1408   /* Detect and "fix" overlapping address ranges. */
1409   n_truncated = 0;
1410
1411   for (i = 0; i < ((Word)di->symtab_used) -1; i++) {
1412
1413      vg_assert(di->symtab[i].addr <= di->symtab[i+1].addr);
1414
1415      /* Check for common (no overlap) case. */
1416      if (di->symtab[i].addr + di->symtab[i].size
1417          <= di->symtab[i+1].addr)
1418         continue;
1419
1420      /* There's an overlap.  Truncate one or the other. */
1421      if (di->trace_symtab) {
1422         VG_(printf)("overlapping address ranges in symbol table\n\t");
1423         ML_(ppSym)( i, &di->symtab[i] );
1424         VG_(printf)("\t");
1425         ML_(ppSym)( i+1, &di->symtab[i+1] );
1426         VG_(printf)("\n");
1427      }
1428
1429      /* Truncate one or the other. */
1430      sta1 = di->symtab[i].addr;
1431      end1 = sta1 + di->symtab[i].size - 1;
1432      toc1 = di->symtab[i].tocptr;
1433      pri1 = di->symtab[i].pri_name;
1434      sec1 = di->symtab[i].sec_names;
1435      ist1 = di->symtab[i].isText;
1436      isf1 = di->symtab[i].isIFunc;
1437
1438      sta2 = di->symtab[i+1].addr;
1439      end2 = sta2 + di->symtab[i+1].size - 1;
1440      toc2 = di->symtab[i+1].tocptr;
1441      pri2 = di->symtab[i+1].pri_name;
1442      sec2 = di->symtab[i+1].sec_names;
1443      ist2 = di->symtab[i+1].isText;
1444      isf2 = di->symtab[i+1].isIFunc;
1445
1446      if (sta1 < sta2) {
1447         end1 = sta2 - 1;
1448      } else {
1449         vg_assert(sta1 == sta2);
1450         if (end1 > end2) {
1451            sta1 = end2 + 1;
1452            SWAP(Addr,sta1,sta2); SWAP(Addr,end1,end2); SWAP(Addr,toc1,toc2);
1453            SWAP(UChar*,pri1,pri2); SWAP(UChar**,sec1,sec2);
1454            SWAP(Bool,ist1,ist2); SWAP(Bool,isf1,isf2);
1455         } else
1456         if (end1 < end2) {
1457            sta2 = end1 + 1;
1458         } else {
1459	   /* end1 == end2.  Identical addr ranges.  We'll eventually wind
1460              up back at cleanup_more, which will take care of it. */
1461	 }
1462      }
1463      di->symtab[i].addr      = sta1;
1464      di->symtab[i].size      = end1 - sta1 + 1;
1465      di->symtab[i].tocptr    = toc1;
1466      di->symtab[i].pri_name  = pri1;
1467      di->symtab[i].sec_names = sec1;
1468      di->symtab[i].isText    = ist1;
1469      di->symtab[i].isIFunc   = isf1;
1470
1471      di->symtab[i+1].addr      = sta2;
1472      di->symtab[i+1].size      = end2 - sta2 + 1;
1473      di->symtab[i+1].tocptr    = toc2;
1474      di->symtab[i+1].pri_name  = pri2;
1475      di->symtab[i+1].sec_names = sec2;
1476      di->symtab[i+1].isText    = ist2;
1477      di->symtab[i+1].isIFunc   = isf2;
1478
1479      vg_assert(sta1 <= sta2);
1480      vg_assert(di->symtab[i].size > 0);
1481      vg_assert(di->symtab[i+1].size > 0);
1482      /* It may be that the i+1 entry now needs to be moved further
1483         along to maintain the address order requirement. */
1484      j = i+1;
1485      while (j < ((Word)di->symtab_used)-1
1486             && di->symtab[j].addr > di->symtab[j+1].addr) {
1487         SWAP(DiSym,di->symtab[j],di->symtab[j+1]);
1488         j++;
1489      }
1490      n_truncated++;
1491   }
1492
1493   if (n_truncated > 0) goto cleanup_more;
1494
1495   /* Ensure relevant postconditions hold. */
1496   for (i = 0; i < ((Word)di->symtab_used)-1; i++) {
1497      /* No zero-sized symbols. */
1498      vg_assert(di->symtab[i].size > 0);
1499      /* In order. */
1500      vg_assert(di->symtab[i].addr < di->symtab[i+1].addr);
1501      /* No overlaps. */
1502      vg_assert(di->symtab[i].addr + di->symtab[i].size - 1
1503                < di->symtab[i+1].addr);
1504      /* Names are sane(ish) */
1505      vg_assert(di->symtab[i].pri_name);
1506      if (di->symtab[i].sec_names) {
1507         vg_assert(di->symtab[i].sec_names[0]);
1508      }
1509   }
1510
1511   /* For each symbol that has more than one name, use preferName to
1512      select the primary name.  This is a complete kludge in that
1513      doing it properly requires making a total ordering on the
1514      candidate names, whilst what we have to work with is an ad-hoc
1515      binary relation (preferName) that certainly doesn't have the
1516      relevant transitivity etc properties that are needed to induce a
1517      legitimate total order.  Doesn't matter though if it doesn't
1518      always work right since this is only used to generate names to
1519      show the user. */
1520   for (i = 0; i < ((Word)di->symtab_used)-1; i++) {
1521      DiSym*  sym = &di->symtab[i];
1522      UChar** sec = sym->sec_names;
1523      if (!sec)
1524         continue;
1525      /* Slow but simple.  Copy all the cands into a temp array,
1526         choose the primary name, and copy them all back again. */
1527      Word n_tmp = 1;
1528      while (*sec) { n_tmp++; sec++; }
1529      j = 0;
1530      UChar** tmp = ML_(dinfo_zalloc)( "di.storage.cS.1",
1531                                       (n_tmp+1) * sizeof(UChar*) );
1532      tmp[j++] = sym->pri_name;
1533      sec = sym->sec_names;
1534      while (*sec) { tmp[j++] = *sec; sec++; }
1535      vg_assert(j == n_tmp);
1536      vg_assert(tmp[n_tmp] == NULL); /* because of zalloc */
1537      /* Choose the most favoured. */
1538      Word best = 0;
1539      for (j = 1; j < n_tmp; j++) {
1540         if (preferName(di, tmp[best], tmp[j], di->symtab[i].addr)) {
1541            /* best is unchanged */
1542         } else {
1543            best = j;
1544         }
1545      }
1546      vg_assert(best >= 0 && best < n_tmp);
1547      /* Copy back */
1548      sym->pri_name = tmp[best];
1549      UChar** cursor = sym->sec_names;
1550      for (j = 0; j < n_tmp; j++) {
1551         if (j == best)
1552            continue;
1553         *cursor = tmp[j];
1554         cursor++;
1555      }
1556      vg_assert(*cursor == NULL);
1557      ML_(dinfo_free)( tmp );
1558   }
1559
1560#  undef SWAP
1561}
1562
1563
1564/* Sort the location table by starting address.  Mash the table around
1565   so as to establish the property that addresses are in order and the
1566   ranges do not overlap.  This facilitates using binary search to map
1567   addresses to locations when we come to query the table.
1568*/
1569static Int compare_DiLoc ( void* va, void* vb )
1570{
1571   DiLoc* a = (DiLoc*)va;
1572   DiLoc* b = (DiLoc*)vb;
1573   if (a->addr < b->addr) return -1;
1574   if (a->addr > b->addr) return  1;
1575   return 0;
1576}
1577
1578static void canonicaliseLoctab ( struct _DebugInfo* di )
1579{
1580   Word i, j;
1581
1582#  define SWAP(ty,aa,bb) \
1583      do { ty tt = (aa); (aa) = (bb); (bb) = tt; } while (0);
1584
1585   if (di->loctab_used == 0)
1586      return;
1587
1588   /* Sort by start address. */
1589   VG_(ssort)(di->loctab, di->loctab_used,
1590                          sizeof(*di->loctab), compare_DiLoc);
1591
1592   /* If two adjacent entries overlap, truncate the first. */
1593   for (i = 0; i < ((Word)di->loctab_used)-1; i++) {
1594      vg_assert(di->loctab[i].size < 10000);
1595      if (di->loctab[i].addr + di->loctab[i].size > di->loctab[i+1].addr) {
1596         /* Do this in signed int32 because the actual .size fields
1597            are only 12 bits. */
1598         Int new_size = di->loctab[i+1].addr - di->loctab[i].addr;
1599         if (new_size < 0) {
1600            di->loctab[i].size = 0;
1601         } else
1602         if (new_size > MAX_LOC_SIZE) {
1603           di->loctab[i].size = MAX_LOC_SIZE;
1604         } else {
1605           di->loctab[i].size = (UShort)new_size;
1606         }
1607      }
1608   }
1609
1610   /* Zap any zero-sized entries resulting from the truncation
1611      process. */
1612   j = 0;
1613   for (i = 0; i < (Word)di->loctab_used; i++) {
1614      if (di->loctab[i].size > 0) {
1615         if (j != i)
1616            di->loctab[j] = di->loctab[i];
1617         j++;
1618      }
1619   }
1620   di->loctab_used = j;
1621
1622   /* Ensure relevant postconditions hold. */
1623   for (i = 0; i < ((Word)di->loctab_used)-1; i++) {
1624      /*
1625      VG_(printf)("%d   (%d) %d 0x%x\n",
1626                   i, di->loctab[i+1].confident,
1627                   di->loctab[i+1].size, di->loctab[i+1].addr );
1628      */
1629      /* No zero-sized symbols. */
1630      vg_assert(di->loctab[i].size > 0);
1631      /* In order. */
1632      vg_assert(di->loctab[i].addr < di->loctab[i+1].addr);
1633      /* No overlaps. */
1634      vg_assert(di->loctab[i].addr + di->loctab[i].size - 1
1635                < di->loctab[i+1].addr);
1636   }
1637#  undef SWAP
1638
1639   /* Free up unused space at the end of the table. */
1640   shrinkLocTab(di);
1641}
1642
1643
1644/* Sort the call-frame-info table by starting address.  Mash the table
1645   around so as to establish the property that addresses are in order
1646   and the ranges do not overlap.  This facilitates using binary
1647   search to map addresses to locations when we come to query the
1648   table.
1649
1650   Also, set cfisi_minaddr and cfisi_maxaddr to be the min and max of
1651   any of the address ranges contained in cfisi[0 .. cfisi_used-1], so
1652   as to facilitate rapidly skipping this SegInfo when looking for an
1653   address which falls outside that range.
1654*/
1655static Int compare_DiCfSI ( void* va, void* vb )
1656{
1657   DiCfSI* a = (DiCfSI*)va;
1658   DiCfSI* b = (DiCfSI*)vb;
1659   if (a->base < b->base) return -1;
1660   if (a->base > b->base) return  1;
1661   return 0;
1662}
1663
1664void ML_(canonicaliseCFI) ( struct _DebugInfo* di )
1665{
1666   Word  i, j;
1667   const Addr minAvma = 0;
1668   const Addr maxAvma = ~minAvma;
1669
1670   /* Note: take care in here.  di->cfsi can be NULL, in which
1671      case _used and _size fields will be zero. */
1672   if (di->cfsi == NULL) {
1673      vg_assert(di->cfsi_used == 0);
1674      vg_assert(di->cfsi_size == 0);
1675   }
1676
1677   /* Set cfsi_minavma and cfsi_maxavma to summarise the entire
1678      address range contained in cfsi[0 .. cfsi_used-1]. */
1679   di->cfsi_minavma = maxAvma;
1680   di->cfsi_maxavma = minAvma;
1681   for (i = 0; i < (Word)di->cfsi_used; i++) {
1682      Addr here_min = di->cfsi[i].base;
1683      Addr here_max = di->cfsi[i].base + di->cfsi[i].len - 1;
1684      if (here_min < di->cfsi_minavma)
1685         di->cfsi_minavma = here_min;
1686      if (here_max > di->cfsi_maxavma)
1687         di->cfsi_maxavma = here_max;
1688   }
1689
1690   if (di->trace_cfi)
1691      VG_(printf)("canonicaliseCfiSI: %ld entries, %#lx .. %#lx\n",
1692                  di->cfsi_used,
1693	          di->cfsi_minavma, di->cfsi_maxavma);
1694
1695   /* Sort the cfsi array by base address. */
1696   VG_(ssort)(di->cfsi, di->cfsi_used, sizeof(*di->cfsi), compare_DiCfSI);
1697
1698   /* If two adjacent entries overlap, truncate the first. */
1699   for (i = 0; i < (Word)di->cfsi_used-1; i++) {
1700      if (di->cfsi[i].base + di->cfsi[i].len > di->cfsi[i+1].base) {
1701         Word new_len = di->cfsi[i+1].base - di->cfsi[i].base;
1702         /* how could it be otherwise?  The entries are sorted by the
1703            .base field. */
1704         vg_assert(new_len >= 0);
1705	 vg_assert(new_len <= di->cfsi[i].len);
1706         di->cfsi[i].len = new_len;
1707      }
1708   }
1709
1710   /* Zap any zero-sized entries resulting from the truncation
1711      process. */
1712   j = 0;
1713   for (i = 0; i < (Word)di->cfsi_used; i++) {
1714      if (di->cfsi[i].len > 0) {
1715         if (j != i)
1716            di->cfsi[j] = di->cfsi[i];
1717         j++;
1718      }
1719   }
1720   /* VG_(printf)("XXXXXXXXXXXXX %d %d\n", di->cfsi_used, j); */
1721   di->cfsi_used = j;
1722
1723   /* Ensure relevant postconditions hold. */
1724   for (i = 0; i < (Word)di->cfsi_used; i++) {
1725      /* No zero-length ranges. */
1726      vg_assert(di->cfsi[i].len > 0);
1727      /* Makes sense w.r.t. summary address range */
1728      vg_assert(di->cfsi[i].base >= di->cfsi_minavma);
1729      vg_assert(di->cfsi[i].base + di->cfsi[i].len - 1
1730                <= di->cfsi_maxavma);
1731
1732      if (i < di->cfsi_used - 1) {
1733         /*
1734         if (!(di->cfsi[i].base < di->cfsi[i+1].base)) {
1735            VG_(printf)("\nOOO cfsis:\n");
1736            ML_(ppCfiSI)(&di->cfsi[i]);
1737            ML_(ppCfiSI)(&di->cfsi[i+1]);
1738         }
1739         */
1740         /* In order. */
1741         vg_assert(di->cfsi[i].base < di->cfsi[i+1].base);
1742         /* No overlaps. */
1743         vg_assert(di->cfsi[i].base + di->cfsi[i].len - 1
1744                   < di->cfsi[i+1].base);
1745      }
1746   }
1747
1748}
1749
1750
1751/* Canonicalise the tables held by 'di', in preparation for use.  Call
1752   this after finishing adding entries to these tables. */
1753void ML_(canonicaliseTables) ( struct _DebugInfo* di )
1754{
1755   canonicaliseSymtab ( di );
1756   canonicaliseLoctab ( di );
1757   ML_(canonicaliseCFI) ( di );
1758   canonicaliseVarInfo ( di );
1759}
1760
1761
1762/*------------------------------------------------------------*/
1763/*--- Searching the tables                                 ---*/
1764/*------------------------------------------------------------*/
1765
1766/* Find a symbol-table index containing the specified pointer, or -1
1767   if not found.  Binary search.  */
1768
1769Word ML_(search_one_symtab) ( struct _DebugInfo* di, Addr ptr,
1770                              Bool match_anywhere_in_sym,
1771                              Bool findText )
1772{
1773   Addr a_mid_lo, a_mid_hi;
1774   Word mid, size,
1775        lo = 0,
1776        hi = di->symtab_used-1;
1777   while (True) {
1778      /* current unsearched space is from lo to hi, inclusive. */
1779      if (lo > hi) return -1; /* not found */
1780      mid      = (lo + hi) / 2;
1781      a_mid_lo = di->symtab[mid].addr;
1782      size = ( match_anywhere_in_sym
1783             ? di->symtab[mid].size
1784             : 1);
1785      a_mid_hi = ((Addr)di->symtab[mid].addr) + size - 1;
1786
1787      if (ptr < a_mid_lo) { hi = mid-1; continue; }
1788      if (ptr > a_mid_hi) { lo = mid+1; continue; }
1789      vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi);
1790      /* Found a symbol with the correct address range.  But is it
1791         of the right kind (text vs data) ? */
1792      if (  findText   &&   di->symtab[mid].isText  ) return mid;
1793      if ( (!findText) && (!di->symtab[mid].isText) ) return mid;
1794      return -1;
1795   }
1796}
1797
1798
1799/* Find a location-table index containing the specified pointer, or -1
1800   if not found.  Binary search.  */
1801
1802Word ML_(search_one_loctab) ( struct _DebugInfo* di, Addr ptr )
1803{
1804   Addr a_mid_lo, a_mid_hi;
1805   Word mid,
1806        lo = 0,
1807        hi = di->loctab_used-1;
1808   while (True) {
1809      /* current unsearched space is from lo to hi, inclusive. */
1810      if (lo > hi) return -1; /* not found */
1811      mid      = (lo + hi) / 2;
1812      a_mid_lo = di->loctab[mid].addr;
1813      a_mid_hi = ((Addr)di->loctab[mid].addr) + di->loctab[mid].size - 1;
1814
1815      if (ptr < a_mid_lo) { hi = mid-1; continue; }
1816      if (ptr > a_mid_hi) { lo = mid+1; continue; }
1817      vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi);
1818      return mid;
1819   }
1820}
1821
1822
1823/* Find a CFI-table index containing the specified pointer, or -1
1824   if not found.  Binary search.  */
1825
1826Word ML_(search_one_cfitab) ( struct _DebugInfo* di, Addr ptr )
1827{
1828   Addr a_mid_lo, a_mid_hi;
1829   Word mid, size,
1830        lo = 0,
1831        hi = di->cfsi_used-1;
1832   while (True) {
1833      /* current unsearched space is from lo to hi, inclusive. */
1834      if (lo > hi) return -1; /* not found */
1835      mid      = (lo + hi) / 2;
1836      a_mid_lo = di->cfsi[mid].base;
1837      size     = di->cfsi[mid].len;
1838      a_mid_hi = a_mid_lo + size - 1;
1839      vg_assert(a_mid_hi >= a_mid_lo);
1840      if (ptr < a_mid_lo) { hi = mid-1; continue; }
1841      if (ptr > a_mid_hi) { lo = mid+1; continue; }
1842      vg_assert(ptr >= a_mid_lo && ptr <= a_mid_hi);
1843      return mid;
1844   }
1845}
1846
1847
1848/* Find a FPO-table index containing the specified pointer, or -1
1849   if not found.  Binary search.  */
1850
1851Word ML_(search_one_fpotab) ( struct _DebugInfo* di, Addr ptr )
1852{
1853   Addr const addr = ptr - di->fsm.rx_map_avma;
1854   Addr a_mid_lo, a_mid_hi;
1855   Word mid, size,
1856        lo = 0,
1857        hi = di->fpo_size-1;
1858   while (True) {
1859      /* current unsearched space is from lo to hi, inclusive. */
1860      if (lo > hi) return -1; /* not found */
1861      mid      = (lo + hi) / 2;
1862      a_mid_lo = di->fpo[mid].ulOffStart;
1863      size     = di->fpo[mid].cbProcSize;
1864      a_mid_hi = a_mid_lo + size - 1;
1865      vg_assert(a_mid_hi >= a_mid_lo);
1866      if (addr < a_mid_lo) { hi = mid-1; continue; }
1867      if (addr > a_mid_hi) { lo = mid+1; continue; }
1868      vg_assert(addr >= a_mid_lo && addr <= a_mid_hi);
1869      return mid;
1870   }
1871}
1872
1873/*--------------------------------------------------------------------*/
1874/*--- end                                                          ---*/
1875/*--------------------------------------------------------------------*/
1876