1
2/*--------------------------------------------------------------------*/
3/*--- Ptrcheck: a pointer-use checker.                             ---*/
4/*--- This file checks stack and global array accesses.            ---*/
5/*---                                                    sg_main.c ---*/
6/*--------------------------------------------------------------------*/
7
8/*
9   This file is part of Ptrcheck, a Valgrind tool for checking pointer
10   use in programs.
11
12   Copyright (C) 2008-2013 OpenWorks Ltd
13      info@open-works.co.uk
14
15   This program is free software; you can redistribute it and/or
16   modify it under the terms of the GNU General Public License as
17   published by the Free Software Foundation; either version 2 of the
18   License, or (at your option) any later version.
19
20   This program is distributed in the hope that it will be useful, but
21   WITHOUT ANY WARRANTY; without even the implied warranty of
22   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23   General Public License for more details.
24
25   You should have received a copy of the GNU General Public License
26   along with this program; if not, write to the Free Software
27   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28   02111-1307, USA.
29
30   The GNU General Public License is contained in the file COPYING.
31
32   Neither the names of the U.S. Department of Energy nor the
33   University of California nor the names of its contributors may be
34   used to endorse or promote products derived from this software
35   without prior written permission.
36*/
37
38#include "pub_tool_basics.h"
39#include "pub_tool_libcbase.h"
40#include "pub_tool_libcassert.h"
41#include "pub_tool_libcprint.h"
42#include "pub_tool_tooliface.h"
43#include "pub_tool_wordfm.h"
44#include "pub_tool_xarray.h"
45#include "pub_tool_threadstate.h"
46#include "pub_tool_mallocfree.h"
47#include "pub_tool_machine.h"
48#include "pub_tool_debuginfo.h"
49#include "pub_tool_options.h"
50
51#include "pc_common.h"
52
53#include "sg_main.h"      // self
54
55
56static
57void preen_global_Invars ( Addr a, SizeT len ); /*fwds*/
58
59
60//////////////////////////////////////////////////////////////
61//                                                          //
62// Basic Stuff                                              //
63//                                                          //
64//////////////////////////////////////////////////////////////
65
66static inline Bool is_sane_TId ( ThreadId tid )
67{
68   return tid >= 0 && tid < VG_N_THREADS
69          && tid != VG_INVALID_THREADID;
70}
71
72static void* sg_malloc ( const HChar* cc, SizeT n ) {
73   void* p;
74   tl_assert(n > 0);
75   p = VG_(malloc)( cc, n );
76   return p;
77}
78
79static void sg_free ( void* p ) {
80   tl_assert(p);
81   VG_(free)(p);
82}
83
84
85/* Compare the intervals [a1,a1+n1) and [a2,a2+n2).  Return -1 if the
86   first interval is lower, 1 if the first interval is higher, and 0
87   if there is any overlap.  Redundant paranoia with casting is there
88   following what looked distinctly like a bug in gcc-4.1.2, in which
89   some of the comparisons were done signedly instead of
90   unsignedly. */
91inline
92static Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
93                                     Addr a2, SizeT n2 ) {
94   UWord a1w = (UWord)a1;
95   UWord n1w = (UWord)n1;
96   UWord a2w = (UWord)a2;
97   UWord n2w = (UWord)n2;
98   tl_assert(n1w > 0 && n2w > 0);
99   if (a1w + n1w <= a2w) return -1L;
100   if (a2w + n2w <= a1w) return 1L;
101   return 0;
102}
103
104/* Return true iff [aSmall,aSmall+nSmall) is entirely contained
105   within [aBig,aBig+nBig). */
106inline
107static Bool is_subinterval_of ( Addr aBig, SizeT nBig,
108                                Addr aSmall, SizeT nSmall ) {
109   tl_assert(nBig > 0 && nSmall > 0);
110   return aBig <= aSmall && aSmall + nSmall <= aBig + nBig;
111}
112
113inline
114static Addr Addr__min ( Addr a1, Addr a2 ) {
115   return a1 < a2 ? a1 : a2;
116}
117
118inline
119static Addr Addr__max ( Addr a1, Addr a2 ) {
120   return a1 < a2 ? a2 : a1;
121}
122
123
124//////////////////////////////////////////////////////////////
125//                                                          //
126// StackBlocks Persistent Cache                             //
127//                                                          //
128//////////////////////////////////////////////////////////////
129
130/* We maintain a set of XArray* of StackBlocks.  These are never
131   freed.  When a new StackBlock vector is acquired from
132   VG_(di_get_local_blocks_at_ip), we compare it to the existing set.
133   If not present, it is added.  If present, the just-acquired one is
134   freed and the copy used.
135
136   This simplifies storage management elsewhere.  It allows us to
137   assume that a pointer to an XArray* of StackBlock is valid forever.
138   It also means there are no duplicates anywhere, which could be
139   important from a space point of view for programs that generate a
140   lot of translations, or where translations are frequently discarded
141   and re-made.
142
143   Note that we normalise the arrays by sorting the elements according
144   to an arbitrary total order, so as to avoid the situation that two
145   vectors describe the same set of variables but are not structurally
146   identical. */
147
148static inline Bool StackBlock__sane ( const StackBlock* fb )
149{
150   if (fb->name[ sizeof(fb->name)-1 ] != 0)
151      return False;
152   if (fb->spRel != False && fb->spRel != True)
153      return False;
154   if (fb->isVec != False && fb->isVec != True)
155      return False;
156   return True;
157}
158
159/* Generate an arbitrary total ordering on StackBlocks. */
160static Word StackBlock__cmp ( const StackBlock* fb1, const StackBlock* fb2 )
161{
162   Word r;
163   tl_assert(StackBlock__sane(fb1));
164   tl_assert(StackBlock__sane(fb2));
165   /* Hopefully the .base test hits most of the time.  For the blocks
166      associated with any particular instruction, if the .base values
167      are the same then probably it doesn't make sense for the other
168      fields to be different.  But this is supposed to be a completely
169      general structural total order, so we have to compare everything
170      anyway. */
171   if (fb1->base < fb2->base) return -1;
172   if (fb1->base > fb2->base) return 1;
173   /* compare sizes */
174   if (fb1->szB < fb2->szB) return -1;
175   if (fb1->szB > fb2->szB) return 1;
176   /* compare sp/fp flag */
177   if (fb1->spRel < fb2->spRel) return -1;
178   if (fb1->spRel > fb2->spRel) return 1;
179   /* compare is/is-not array-typed flag */
180   if (fb1->isVec < fb2->isVec) return -1;
181   if (fb1->isVec > fb2->isVec) return 1;
182   /* compare the name */
183   r = (Word)VG_(strcmp)(fb1->name, fb2->name);
184   return r;
185}
186
187/* Returns True if all fields except .szB are the same.  szBs may or
188   may not be the same; they are simply not consulted. */
189static Bool StackBlock__all_fields_except_szB_are_equal (
190               StackBlock* fb1,
191               StackBlock* fb2
192            )
193{
194   tl_assert(StackBlock__sane(fb1));
195   tl_assert(StackBlock__sane(fb2));
196   return fb1->base == fb2->base
197          && fb1->spRel == fb2->spRel
198          && fb1->isVec == fb2->isVec
199          && 0 == VG_(strcmp)(fb1->name, fb2->name);
200}
201
202
203/* Generate an arbitrary total ordering on vectors of StackBlocks. */
204static Word StackBlocks__cmp ( XArray* fb1s, XArray* fb2s )
205{
206   Word i, r, n1, n2;
207   n1 = VG_(sizeXA)( fb1s );
208   n2 = VG_(sizeXA)( fb2s );
209   if (n1 < n2) return -1;
210   if (n1 > n2) return 1;
211   for (i = 0; i < n1; i++) {
212      StackBlock *fb1, *fb2;
213      fb1 = VG_(indexXA)( fb1s, i );
214      fb2 = VG_(indexXA)( fb2s, i );
215      r = StackBlock__cmp( fb1, fb2 );
216      if (r != 0) return r;
217   }
218   tl_assert(i == n1 && i == n2);
219   return 0;
220}
221
222static void pp_StackBlocks ( XArray* sbs )
223{
224   Word i, n = VG_(sizeXA)( sbs );
225   VG_(message)(Vg_DebugMsg, "<<< STACKBLOCKS\n" );
226   for (i = 0; i < n; i++) {
227      StackBlock* sb = (StackBlock*)VG_(indexXA)( sbs, i );
228      VG_(message)(Vg_DebugMsg,
229         "   StackBlock{ off %ld szB %lu spRel:%c isVec:%c \"%s\" }\n",
230         sb->base, sb->szB, sb->spRel ? 'Y' : 'N',
231         sb->isVec ? 'Y' : 'N', &sb->name[0]
232      );
233   }
234   VG_(message)(Vg_DebugMsg, ">>> STACKBLOCKS\n" );
235}
236
237
238/* ---------- The StackBlock vector cache ---------- */
239
240static WordFM* /* XArray* of StackBlock -> nothing */
241       frameBlocks_set = NULL;
242
243static void init_StackBlocks_set ( void )
244{
245   tl_assert(!frameBlocks_set);
246   frameBlocks_set
247      = VG_(newFM)( sg_malloc, "di.sg_main.iSBs.1", sg_free,
248                    (Word(*)(UWord,UWord))StackBlocks__cmp );
249   tl_assert(frameBlocks_set);
250}
251
252/* Find the given StackBlock-vector in our collection thereof.  If
253   found, deallocate the supplied one, and return the address of the
254   copy.  If not found, add the supplied one to our collection and
255   return its address. */
256static XArray* /* of StackBlock */
257       StackBlocks__find_and_dealloc__or_add
258          ( XArray* /* of StackBlock */ orig )
259{
260   UWord key, val;
261
262   /* First, normalise, as per comments above. */
263   VG_(setCmpFnXA)( orig, (XACmpFn_t)StackBlock__cmp );
264   VG_(sortXA)( orig );
265
266   /* Now get rid of any exact duplicates. */
267  nuke_dups:
268   { Word r, w, nEQ, n = VG_(sizeXA)( orig );
269     if (n >= 2) {
270        w = 0;
271        nEQ = 0;
272        for (r = 0; r < n; r++) {
273           if (r+1 < n) {
274              StackBlock* pR0 = VG_(indexXA)( orig, r+0 );
275              StackBlock* pR1 = VG_(indexXA)( orig, r+1 );
276              Word c = StackBlock__cmp(pR0,pR1);
277              tl_assert(c == -1 || c == 0);
278              if (c == 0) { nEQ++; continue; }
279           }
280           if (w != r) {
281              StackBlock* pW = VG_(indexXA)( orig, w );
282              StackBlock* pR = VG_(indexXA)( orig, r );
283              *pW = *pR;
284           }
285           w++;
286        }
287        tl_assert(r == n);
288        tl_assert(w + nEQ == n);
289        if (w < n) {
290           VG_(dropTailXA)( orig, n-w );
291        }
292        if (0) VG_(printf)("delta %ld\n", n-w);
293     }
294   }
295
296   /* Deal with the following strangeness, where two otherwise
297      identical blocks are claimed to have different sizes.  In which
298      case we use the larger size. */
299   /* StackBlock{ off 16 szB 66 spRel:Y isVec:Y "sz" }
300      StackBlock{ off 16 szB 130 spRel:Y isVec:Y "sz" }
301      StackBlock{ off 208 szB 16 spRel:Y isVec:Y "ar" }
302   */
303   { Word i, n = VG_(sizeXA)( orig );
304     if (n >= 2) {
305        for (i = 0; i < n-1; i++) {
306           StackBlock* sb0 = VG_(indexXA)( orig, i+0 );
307           StackBlock* sb1 = VG_(indexXA)( orig, i+1 );
308           if (StackBlock__all_fields_except_szB_are_equal(sb0, sb1)) {
309              /* They can't be identical because the previous tidying
310                 pass would have removed the duplicates.  And they
311                 can't be > because the earlier sorting pass would
312                 have ordered otherwise-identical descriptors
313                 according to < on .szB fields.  Hence: */
314              tl_assert(sb0->szB < sb1->szB);
315              sb0->szB = sb1->szB;
316              /* This makes the blocks identical, at the size of the
317                 larger one.  Rather than go to all the hassle of
318                 sliding the rest down, simply go back to the
319                 remove-duplicates stage.  The assertion guarantees
320                 that we eventually make progress, since the rm-dups
321                 stage will get rid of one of the blocks.  This is
322                 expected to happen only exceedingly rarely. */
323              tl_assert(StackBlock__cmp(sb0,sb1) == 0);
324              goto nuke_dups;
325           }
326        }
327     }
328   }
329
330   /* If there are any blocks which overlap and have the same
331      fpRel-ness, junk the whole descriptor; it's obviously bogus.
332      Icc11 certainly generates bogus info from time to time.
333
334      This check is pretty weak; really we ought to have a stronger
335      sanity check. */
336   { Word i, n = VG_(sizeXA)( orig );
337     static Int moans = 3;
338     for (i = 0; i < n-1; i++) {
339       StackBlock* sb1 = (StackBlock*)VG_(indexXA)( orig, i );
340       StackBlock* sb2 = (StackBlock*)VG_(indexXA)( orig, i+1 );
341       if (sb1->spRel == sb2->spRel
342           && (sb1->base >= sb2->base
343               || sb1->base + sb1->szB > sb2->base)) {
344          if (moans > 0 && !VG_(clo_xml)) {
345             moans--;
346             VG_(message)(Vg_UserMsg, "Warning: bogus DWARF3 info: "
347                                      "overlapping stack blocks\n");
348             if (VG_(clo_verbosity) >= 2)
349                pp_StackBlocks(orig);
350             if (moans == 0)
351                VG_(message)(Vg_UserMsg, "Further instances of this "
352                                         "message will not be shown\n" );
353          }
354          VG_(dropTailXA)( orig, VG_(sizeXA)( orig ));
355          break;
356       }
357     }
358   }
359
360   /* Now, do we have it already? */
361   if (VG_(lookupFM)( frameBlocks_set, &key, &val, (UWord)orig )) {
362      /* yes */
363      XArray* res;
364      tl_assert(val == 0);
365      tl_assert(key != (UWord)orig);
366      VG_(deleteXA)(orig);
367      res = (XArray*)key;
368      return res;
369   } else {
370      /* no */
371      VG_(addToFM)( frameBlocks_set, (UWord)orig, 0 );
372      return orig;
373   }
374}
375
376/* Top level function for getting the StackBlock vector for a given
377   instruction.  It is guaranteed that the returned pointer will be
378   valid for the entire rest of the run, and also that the addresses
379   of the individual elements of the array will not change. */
380
381static XArray* /* of StackBlock */ get_StackBlocks_for_IP ( Addr ip )
382{
383   XArray* blocks = VG_(di_get_stack_blocks_at_ip)( ip, True/*arrays only*/ );
384   tl_assert(blocks);
385   return StackBlocks__find_and_dealloc__or_add( blocks );
386}
387
388
389//////////////////////////////////////////////////////////////
390//                                                          //
391// GlobalBlocks Persistent Cache                            //
392//                                                          //
393//////////////////////////////////////////////////////////////
394
395/* Generate an arbitrary total ordering on GlobalBlocks. */
396static Word GlobalBlock__cmp ( GlobalBlock* gb1, GlobalBlock* gb2 )
397{
398   Word r;
399   /* compare addrs */
400   if (gb1->addr < gb2->addr) return -1;
401   if (gb1->addr > gb2->addr) return 1;
402   /* compare sizes */
403   if (gb1->szB < gb2->szB) return -1;
404   if (gb1->szB > gb2->szB) return 1;
405   /* compare is/is-not array-typed flag */
406   if (gb1->isVec < gb2->isVec) return -1;
407   if (gb1->isVec > gb2->isVec) return 1;
408   /* compare the name */
409   r = (Word)VG_(strcmp)(gb1->name, gb2->name);
410   if (r != 0) return r;
411   /* compare the soname */
412   r = (Word)VG_(strcmp)(gb1->soname, gb2->soname);
413   return r;
414}
415
416static WordFM* /* GlobalBlock* -> nothing */
417       globalBlock_set = NULL;
418
419static void init_GlobalBlock_set ( void )
420{
421   tl_assert(!globalBlock_set);
422    globalBlock_set
423       = VG_(newFM)( sg_malloc, "di.sg_main.iGBs.1", sg_free,
424                     (Word(*)(UWord,UWord))GlobalBlock__cmp );
425   tl_assert(globalBlock_set);
426}
427
428
429/* Top level function for making GlobalBlocks persistent.  Call here
430   with a non-persistent version, and the returned one is guaranteed
431   to be valid for the entire rest of the run.  The supplied one is
432   copied, not stored, so can be freed after the call. */
433
434static GlobalBlock* get_persistent_GlobalBlock ( GlobalBlock* orig )
435{
436   UWord key, val;
437   /* Now, do we have it already? */
438   if (VG_(lookupFM)( globalBlock_set, &key, &val, (UWord)orig )) {
439      /* yes, return the copy */
440      GlobalBlock* res;
441      tl_assert(val == 0);
442      res = (GlobalBlock*)key;
443      tl_assert(res != orig);
444      return res;
445   } else {
446      /* no.  clone it, store the clone and return the clone's
447         address. */
448      GlobalBlock* clone = sg_malloc( "di.sg_main.gpGB.1",
449                                      sizeof(GlobalBlock) );
450      tl_assert(clone);
451      *clone = *orig;
452      VG_(addToFM)( globalBlock_set, (UWord)clone, 0 );
453      return clone;
454   }
455}
456
457
458//////////////////////////////////////////////////////////////
459//                                                          //
460// Interval tree of StackTreeBlock                          //
461//                                                          //
462//////////////////////////////////////////////////////////////
463
464/* A node in a stack interval tree.  Zero length intervals (.szB == 0)
465   are not allowed.
466
467   A stack interval tree is a (WordFM StackTreeNode* void).  There is
468   one stack interval tree for each thread.
469*/
470typedef
471   struct {
472      Addr        addr;
473      SizeT       szB;   /* copied from .descr->szB */
474      StackBlock* descr; /* it's an instance of this block */
475      UWord       depth; /* depth of stack at time block was pushed */
476   }
477   StackTreeNode;
478
479static void pp_StackTree ( WordFM* sitree, const HChar* who )
480{
481   UWord keyW, valW;
482   VG_(printf)("<<< BEGIN pp_StackTree %s\n", who );
483   VG_(initIterFM)( sitree );
484   while (VG_(nextIterFM)( sitree, &keyW, &valW )) {
485      StackTreeNode* nd = (StackTreeNode*)keyW;
486      VG_(printf)("  [%#lx,+%lu) descr=%p %s %lu\n", nd->addr, nd->szB,
487                  nd->descr, nd->descr->name, nd->descr->szB);
488   }
489   VG_(printf)(">>> END   pp_StackTree %s\n", who );
490}
491
492/* Interval comparison function for StackTreeNode */
493static Word cmp_intervals_StackTreeNode ( StackTreeNode* sn1,
494                                          StackTreeNode* sn2 )
495{
496   return cmp_nonempty_intervals(sn1->addr, sn1->szB,
497                                 sn2->addr, sn2->szB);
498}
499
500/* Find the node holding 'a', if any. */
501static StackTreeNode* find_StackTreeNode ( WordFM* sitree, Addr a )
502{
503   UWord keyW, valW;
504   StackTreeNode key;
505   tl_assert(sitree);
506   key.addr = a;
507   key.szB  = 1;
508   if (VG_(lookupFM)( sitree, &keyW, &valW, (UWord)&key )) {
509      StackTreeNode* res = (StackTreeNode*)keyW;
510      tl_assert(valW == 0);
511      tl_assert(res != &key);
512      return res;
513   } else {
514      return NULL;
515   }
516}
517
518/* Note that the supplied XArray of FrameBlock must have been
519   made persistent already. */
520__attribute__((noinline))
521static void add_blocks_to_StackTree (
522               /*MOD*/WordFM* sitree,
523               XArray* /* FrameBlock */ descrs,
524               XArray* /* Addr */ bases,
525               UWord depth
526            )
527{
528   Bool debug = (Bool)0;
529   Word i, nDescrs, nBases;
530
531   nDescrs = VG_(sizeXA)( descrs ),
532   nBases = VG_(sizeXA)( bases );
533   tl_assert(nDescrs == nBases);
534
535   if (nDescrs == 0) return;
536
537   tl_assert(sitree);
538   if (debug) {
539      VG_(printf)("\ndepth = %lu\n", depth);
540      pp_StackTree( sitree, "add_blocks_to_StackTree-pre" );
541      pp_StackBlocks(descrs);
542   }
543
544   for (i = 0; i < nDescrs; i++) {
545      Bool already_present;
546      StackTreeNode* nyu;
547      Addr        addr  = *(Addr*)VG_(indexXA)( bases, i );
548      StackBlock* descr = (StackBlock*)VG_(indexXA)( descrs, i );
549      tl_assert(descr->szB > 0);
550      nyu = sg_malloc( "di.sg_main.abtST.1", sizeof(StackTreeNode) );
551      nyu->addr  = addr;
552      nyu->szB   = descr->szB;
553      nyu->descr = descr;
554      nyu->depth = depth;
555      if (debug) VG_(printf)("ADD %#lx %lu\n", addr, descr->szB);
556      already_present = VG_(addToFM)( sitree, (UWord)nyu, 0 );
557      /* The interval can't already be there; else we have
558         overlapping stack blocks. */
559      tl_assert(!already_present);
560      if (debug) {
561         pp_StackTree( sitree, "add_blocks_to_StackTree-step" );
562      }
563   }
564   if (debug) {
565      pp_StackTree( sitree, "add_blocks_to_StackTree-post" );
566      VG_(printf)("\n");
567   }
568}
569
570static void del_blocks_from_StackTree ( /*MOD*/WordFM* sitree,
571                                        XArray* /* Addr */ bases )
572{
573   UWord oldK, oldV;
574   Word i, nBases = VG_(sizeXA)( bases );
575   for (i = 0; i < nBases; i++) {
576      Bool b;
577      Addr addr = *(Addr*)VG_(indexXA)( bases, i );
578      StackTreeNode* nd = find_StackTreeNode(sitree, addr);
579      /* The interval must be there; we added it earlier when
580         the associated frame was created. */
581      tl_assert(nd);
582      b = VG_(delFromFM)( sitree, &oldK, &oldV, (UWord)nd );
583      /* we just found the block! */
584      tl_assert(b);
585      tl_assert(oldV == 0);
586      tl_assert(nd == (StackTreeNode*)oldK);
587      sg_free(nd);
588   }
589}
590
591
592static void delete_StackTree__kFin ( UWord keyW ) {
593   StackTreeNode* nd = (StackTreeNode*)keyW;
594   tl_assert(nd);
595   sg_free(nd);
596}
597static void delete_StackTree__vFin ( UWord valW ) {
598   tl_assert(valW == 0);
599}
600static void delete_StackTree ( WordFM* sitree )
601{
602   VG_(deleteFM)( sitree,
603                 delete_StackTree__kFin, delete_StackTree__vFin );
604}
605
606static WordFM* new_StackTree ( void ) {
607   return VG_(newFM)( sg_malloc, "di.sg_main.nST.1", sg_free,
608                      (Word(*)(UWord,UWord))cmp_intervals_StackTreeNode );
609}
610
611
612//////////////////////////////////////////////////////////////
613//                                                          //
614// Interval tree of GlobalTreeBlock                         //
615//                                                          //
616//////////////////////////////////////////////////////////////
617
618/* A node in a global interval tree.  Zero length intervals
619   (.szB == 0) are not allowed.
620
621   A global interval tree is a (WordFM GlobalTreeNode* void).  There
622   is one global interval tree for the entire process.
623*/
624typedef
625   struct {
626      Addr         addr; /* copied from .descr->addr */
627      SizeT        szB; /* copied from .descr->szB */
628      GlobalBlock* descr; /* it's this block */
629   }
630   GlobalTreeNode;
631
632static void GlobalTreeNode__pp ( GlobalTreeNode* nd ) {
633   tl_assert(nd->descr);
634   VG_(printf)("GTNode [%#lx,+%ld) %s",
635               nd->addr, nd->szB, nd->descr->name);
636}
637
638static void GlobalTree__pp ( WordFM* /* of (GlobalTreeNode,void) */ gitree,
639                             const HChar* who )
640{
641   UWord keyW, valW;
642   GlobalTreeNode* nd;
643   VG_(printf)("<<< GlobalBlockTree (%s)\n", who);
644   VG_(initIterFM)( gitree );
645   while (VG_(nextIterFM)( gitree, &keyW, &valW )) {
646      tl_assert(valW == 0);
647      nd = (GlobalTreeNode*)keyW;
648      VG_(printf)("  ");
649      GlobalTreeNode__pp(nd);
650      VG_(printf)("\n");
651   }
652   VG_(doneIterFM)( gitree );
653   VG_(printf)(">>>\n");
654}
655
656/* Interval comparison function for GlobalTreeNode */
657static Word cmp_intervals_GlobalTreeNode ( GlobalTreeNode* gn1,
658                                           GlobalTreeNode* gn2 )
659{
660   return cmp_nonempty_intervals( gn1->addr, gn1->szB,
661                                  gn2->addr, gn2->szB );
662}
663
664/* Find the node holding 'a', if any. */
665static GlobalTreeNode* find_GlobalTreeNode ( WordFM* gitree, Addr a )
666{
667   UWord keyW, valW;
668   GlobalTreeNode key;
669   key.addr = a;
670   key.szB  = 1;
671   if (VG_(lookupFM)( gitree, &keyW, &valW, (UWord)&key )) {
672      GlobalTreeNode* res = (GlobalTreeNode*)keyW;
673      tl_assert(valW == 0);
674      tl_assert(res != &key);
675      return res;
676   } else {
677      return NULL;
678   }
679}
680
681/* Note that the supplied GlobalBlock must have been made persistent
682   already. */
683static void add_block_to_GlobalTree (
684               /*MOD*/WordFM* gitree,
685               GlobalBlock* descr
686            )
687{
688   Bool already_present;
689   GlobalTreeNode *nyu, *nd;
690   UWord keyW, valW;
691   static Int moans = 3;
692
693   tl_assert(descr->szB > 0);
694   nyu = sg_malloc( "di.sg_main.abtG.1", sizeof(GlobalTreeNode) );
695   nyu->addr  = descr->addr;
696   nyu->szB   = descr->szB;
697   nyu->descr = descr;
698
699   /* Basically it's an error to add a global block to the tree that
700      is already in the tree.  However, detect and ignore attempts to
701      insert exact duplicates; they do appear for some reason
702      (possible a bug in m_debuginfo?) */
703   already_present = VG_(lookupFM)( gitree, &keyW, &valW, (UWord)nyu );
704   if (already_present) {
705      tl_assert(valW == 0);
706      nd = (GlobalTreeNode*)keyW;
707      tl_assert(nd);
708      tl_assert(nd != nyu);
709      tl_assert(nd->descr);
710      tl_assert(nyu->descr);
711      if (nd->addr == nyu->addr && nd->szB == nyu->szB
712          /* && 0 == VG_(strcmp)(nd->descr->name, nyu->descr->name) */
713          /* Although it seems reasonable to demand that duplicate
714             blocks have identical names, that is too strict.  For
715             example, reading debuginfo from glibc produces two
716             otherwise identical blocks with names "tzname" and
717             "__tzname".  A constraint of the form "must be identical,
718             or one must be a substring of the other" would fix that.
719             However, such trickery is scuppered by the fact that we
720             truncate all variable names to 15 characters to make
721             storage management simpler, hence giving pairs like
722             "__EI___pthread_" (truncated) vs "__pthread_keys".  So
723             it's simplest just to skip the name comparison
724             completely. */
725          && 0 == VG_(strcmp)(nd->descr->soname, nyu->descr->soname)) {
726         /* exact duplicate; ignore it */
727         sg_free(nyu);
728         return;
729      }
730      /* else fall through; the assertion below will catch it */
731   }
732
733   already_present = VG_(addToFM)( gitree, (UWord)nyu, 0 );
734   /* The interval can't already be there; else we have
735      overlapping global blocks. */
736   /* Unfortunately (25 Jan 09) at least icc11 has been seen to
737      generate overlapping block descriptions in the Dwarf3; clearly
738      bogus. */
739   if (already_present && moans > 0 && !VG_(clo_xml)) {
740      moans--;
741      VG_(message)(Vg_UserMsg, "Warning: bogus DWARF3 info: "
742                               "overlapping global blocks\n");
743      if (VG_(clo_verbosity) >= 2) {
744         GlobalTree__pp( gitree,
745                         "add_block_to_GlobalTree: non-exact duplicate" );
746         VG_(printf)("Overlapping block: ");
747         GlobalTreeNode__pp(nyu);
748         VG_(printf)("\n");
749      }
750      if (moans == 0)
751         VG_(message)(Vg_UserMsg, "Further instances of this "
752                                  "message will not be shown\n" );
753   }
754   /* tl_assert(!already_present); */
755}
756
757static Bool del_GlobalTree_range ( /*MOD*/WordFM* gitree,
758                                   Addr a, SizeT szB )
759{
760   /* One easy way to do this: look up [a,a+szB) in the tree.  That
761      will either succeed, producing a block which intersects that
762      range, in which case we delete it and repeat; or it will fail,
763      in which case there are no blocks intersecting the range, and we
764      can bring the process to a halt. */
765   UWord keyW, valW, oldK, oldV;
766   GlobalTreeNode key, *nd;
767   Bool b, anyFound;
768
769   tl_assert(szB > 0);
770
771   anyFound = False;
772
773   key.addr = a;
774   key.szB  = szB;
775
776   while (VG_(lookupFM)( gitree, &keyW, &valW, (UWord)&key )) {
777      anyFound = True;
778      nd = (GlobalTreeNode*)keyW;
779      tl_assert(valW == 0);
780      tl_assert(nd != &key);
781      tl_assert(cmp_nonempty_intervals(a, szB, nd->addr, nd->szB) == 0);
782
783      b = VG_(delFromFM)( gitree, &oldK, &oldV, (UWord)&key );
784      tl_assert(b);
785      tl_assert(oldV == 0);
786      tl_assert(oldK == keyW); /* check we deleted the node we just found */
787   }
788
789   return anyFound;
790}
791
792
793//////////////////////////////////////////////////////////////
794//                                                          //
795// Invar                                                    //
796//                                                          //
797//////////////////////////////////////////////////////////////
798
799/* An invariant, as resulting from watching the destination of a
800   memory referencing instruction.  Initially is Inv_Unset until the
801   instruction makes a first access. */
802
803typedef
804   enum {
805      Inv_Unset=1,  /* not established yet */
806      Inv_Unknown,  /* unknown location */
807      Inv_Stack0,   /* array-typed stack block in innermost frame */
808      Inv_StackN,   /* array-typed stack block in non-innermost frame */
809      Inv_Global,   /* array-typed global block */
810   }
811   InvarTag;
812
813typedef
814   struct {
815      InvarTag tag;
816      union {
817         struct {
818         } Unset;
819         struct {
820         } Unknown;
821         struct {
822            Addr  addr;
823            SizeT szB;
824            StackBlock* descr;
825         } Stack0; /* innermost stack frame */
826         struct {
827            /* Pointer to a node in the interval tree for
828              this thread. */
829            StackTreeNode* nd;
830         } StackN; /* non-innermost stack frame */
831         struct {
832           /* Pointer to a GlobalBlock in the interval tree of
833              global blocks. */
834           GlobalTreeNode* nd;
835         } Global;
836      }
837      Inv;
838   }
839   Invar;
840
841/* Partial debugging printing for an Invar. */
842static void pp_Invar ( Invar* i )
843{
844   switch (i->tag) {
845      case Inv_Unset:
846         VG_(printf)("Unset");
847         break;
848      case Inv_Unknown:
849         VG_(printf)("Unknown");
850         break;
851      case Inv_Stack0:
852         VG_(printf)("Stack0 [%#lx,+%lu)",
853                     i->Inv.Stack0.addr, i->Inv.Stack0.szB);
854         break;
855      case Inv_StackN:
856         VG_(printf)("StackN [%#lx,+%lu)",
857                     i->Inv.StackN.nd->addr, i->Inv.StackN.nd->szB);
858         break;
859      case Inv_Global:
860         VG_(printf)("Global [%#lx,+%lu)",
861                     i->Inv.Global.nd->addr, i->Inv.Global.nd->szB);
862         break;
863      default:
864         tl_assert(0);
865   }
866}
867
868/* Compare two Invars for equality. */
869static Bool eq_Invar ( Invar* i1, Invar* i2 )
870{
871   if (i1->tag != i2->tag)
872      return False;
873   switch (i1->tag) {
874      case Inv_Unset:
875         return True;
876      case Inv_Unknown:
877         return True;
878      case Inv_Stack0:
879         return i1->Inv.Stack0.addr == i2->Inv.Stack0.addr
880                && i1->Inv.Stack0.szB == i2->Inv.Stack0.szB;
881      case Inv_StackN:
882         return i1->Inv.StackN.nd == i2->Inv.StackN.nd;
883      case Inv_Global:
884         return i1->Inv.Global.nd == i2->Inv.Global.nd;
885      default:
886         tl_assert(0);
887   }
888   /*NOTREACHED*/
889   tl_assert(0);
890}
891
892/* Generate a piece of text showing 'ea' is relative to 'invar', if
893   known.  If unknown, generate an empty string.  'buf' must be at
894   least 32 bytes in size.  Also return the absolute value of the
895   delta, if known, or zero if not known.
896*/
897static void gen_delta_str ( /*OUT*/HChar* buf,
898                            /*OUT*/UWord* absDelta,
899                            Invar* inv, Addr ea )
900{
901   Addr  block = 0;
902   SizeT szB   = 0;
903
904   buf[0] = 0;
905   *absDelta = 0;
906
907   switch (inv->tag) {
908      case Inv_Unknown:
909      case Inv_Unset:
910         return; /* unknown */
911      case Inv_Stack0:
912         block = inv->Inv.Stack0.addr;
913         szB   = inv->Inv.Stack0.szB;
914         break;
915      case Inv_StackN:
916         block = inv->Inv.StackN.nd->addr;
917         szB   = inv->Inv.StackN.nd->szB;
918         break;
919      case Inv_Global:
920         block = inv->Inv.Global.nd->addr;
921         szB = inv->Inv.Global.nd->szB;
922         break;
923      default:
924         tl_assert(0);
925   }
926   tl_assert(szB > 0);
927   if (ea < block) {
928      *absDelta = block - ea;
929      VG_(sprintf)(buf, "%'lu before", *absDelta);
930   }
931   else if (ea >= block + szB) {
932      *absDelta = ea - (block + szB);
933      VG_(sprintf)(buf, "%'lu after", *absDelta);
934   }
935   else {
936     // Leave *absDelta at zero.
937     VG_(sprintf)(buf, "%'lu inside", ea - block);
938   }
939}
940
941
942/* Print selected parts of an Invar, suitable for use in error
943   messages. */
944static void show_Invar( HChar* buf, Word nBuf, Invar* inv, Word depth )
945{
946   const HChar* str;
947   tl_assert(nBuf >= 128);
948   buf[0] = 0;
949   switch (inv->tag) {
950      case Inv_Unknown:
951         VG_(sprintf)(buf, "%s", "unknown");
952         break;
953      case Inv_Stack0:
954         str = "array";
955         VG_(sprintf)(buf, "stack %s \"%s\" of size %'lu in this frame",
956                      str, inv->Inv.Stack0.descr->name,
957                      inv->Inv.Stack0.szB );
958         break;
959      case Inv_StackN:
960         str = "array";
961         VG_(sprintf)(buf, "stack %s \"%s\" of size %'lu in frame %lu back from here",
962                      str, inv->Inv.StackN.nd->descr->name,
963                           inv->Inv.StackN.nd->descr->szB,
964                           depth - inv->Inv.StackN.nd->depth );
965         break;
966      case Inv_Global:
967         str = "array";
968         VG_(sprintf)(buf, "global %s \"%s\" of size %'lu in object with soname \"%s\"",
969                      str, inv->Inv.Global.nd->descr->name,
970                           inv->Inv.Global.nd->descr->szB,
971                           inv->Inv.Global.nd->descr->soname );
972         break;
973      case Inv_Unset:
974         VG_(sprintf)(buf, "%s", "Unset!");
975         break;
976      default:
977         tl_assert(0);
978   }
979}
980
981
982//////////////////////////////////////////////////////////////
983//                                                          //
984// our globals                                              //
985//                                                          //
986//////////////////////////////////////////////////////////////
987
988//////////////////////////////////////////////////////////////
989///
990
991#define N_QCACHE 16
992
993/* Powers of two only, else the result will be chaos */
994#define QCACHE_ADVANCE_EVERY 16
995
996/* Per-thread query cache.  Note that the invar can only be Inv_StackN
997   (but not Inv_Stack0), Inv_Global or Inv_Unknown. */
998typedef
999   struct {
1000      Addr  addr;
1001      SizeT szB;
1002      Invar inv;
1003   }
1004   QCElem;
1005
1006typedef
1007   struct {
1008      Word   nInUse;
1009      QCElem elems[N_QCACHE];
1010   }
1011   QCache;
1012
1013static void QCache__invalidate ( QCache* qc ) {
1014   tl_assert(qc->nInUse >= 0);
1015   qc->nInUse = 0;
1016}
1017
1018static void QCache__pp ( QCache* qc, const HChar* who )
1019{
1020   Word i;
1021   VG_(printf)("<<< QCache with %ld elements (%s)\n", qc->nInUse, who);
1022   for (i = 0; i < qc->nInUse; i++) {
1023      VG_(printf)("  [%#lx,+%#lx) ", qc->elems[i].addr, qc->elems[i].szB);
1024      pp_Invar(&qc->elems[i].inv);
1025      VG_(printf)("\n");
1026   }
1027   VG_(printf)(">>>\n");
1028}
1029
1030static ULong stats__qcache_queries = 0;
1031static ULong stats__qcache_misses  = 0;
1032static ULong stats__qcache_probes  = 0;
1033
1034///
1035//////////////////////////////////////////////////////////////
1036
1037/* Each thread has:
1038   * a shadow stack of StackFrames, which is a double-linked list
1039   * an stack block interval tree
1040*/
1041static  struct _StackFrame**         shadowStacks;
1042
1043static  WordFM** /* StackTreeNode */ siTrees;
1044
1045static  QCache*                      qcaches;
1046
1047
1048/* Additionally, there is one global variable interval tree
1049   for the entire process.
1050*/
1051static WordFM* /* GlobalTreeNode */ giTree;
1052
1053
1054static void invalidate_all_QCaches ( void )
1055{
1056   Word i;
1057   for (i = 0; i < VG_N_THREADS; i++) {
1058      QCache__invalidate( &qcaches[i] );
1059   }
1060}
1061
1062static void ourGlobals_init ( void )
1063{
1064   Word i;
1065
1066   shadowStacks = sg_malloc( "di.sg_main.oGi.2",
1067                             VG_N_THREADS * sizeof shadowStacks[0] );
1068   siTrees = sg_malloc( "di.sg_main.oGi.3", VG_N_THREADS * sizeof siTrees[0] );
1069   qcaches = sg_malloc( "di.sg_main.oGi.4", VG_N_THREADS * sizeof qcaches[0] );
1070
1071   for (i = 0; i < VG_N_THREADS; i++) {
1072      shadowStacks[i] = NULL;
1073      siTrees[i] = NULL;
1074      qcaches[i] = (QCache){};
1075   }
1076   invalidate_all_QCaches();
1077   giTree = VG_(newFM)( sg_malloc, "di.sg_main.oGi.1", sg_free,
1078                        (Word(*)(UWord,UWord))cmp_intervals_GlobalTreeNode );
1079}
1080
1081
1082//////////////////////////////////////////////////////////////
1083//                                                          //
1084// Handle global variable load/unload events                //
1085//                                                          //
1086//////////////////////////////////////////////////////////////
1087
1088static void acquire_globals ( ULong di_handle )
1089{
1090   Word n, i;
1091   XArray* /* of GlobalBlock */ gbs;
1092   if (0) VG_(printf)("ACQUIRE GLOBALS %llu\n", di_handle );
1093   gbs = VG_(di_get_global_blocks_from_dihandle)
1094            (di_handle, True/*arrays only*/);
1095   if (0) VG_(printf)("   GOT %ld globals\n", VG_(sizeXA)( gbs ));
1096
1097   n = VG_(sizeXA)( gbs );
1098   for (i = 0; i < n; i++) {
1099      GlobalBlock* gbp;
1100      GlobalBlock* gb = VG_(indexXA)( gbs, i );
1101      if (0) VG_(printf)("   new Global size %2lu at %#lx:  %s %s\n",
1102                         gb->szB, gb->addr, gb->soname, gb->name );
1103      tl_assert(gb->szB > 0);
1104      /* Make a persistent copy of each GlobalBlock, and add it
1105         to the tree. */
1106      gbp = get_persistent_GlobalBlock( gb );
1107      add_block_to_GlobalTree( giTree, gbp );
1108   }
1109
1110   VG_(deleteXA)( gbs );
1111}
1112
1113
1114/* We only intercept these two because we need to see any di_handles
1115   that might arise from the mappings/allocations. */
1116void sg_new_mem_mmap( Addr a, SizeT len,
1117                      Bool rr, Bool ww, Bool xx, ULong di_handle )
1118{
1119   if (di_handle > 0)
1120      acquire_globals(di_handle);
1121}
1122void sg_new_mem_startup( Addr a, SizeT len,
1123                         Bool rr, Bool ww, Bool xx, ULong di_handle )
1124{
1125   if (di_handle > 0)
1126      acquire_globals(di_handle);
1127}
1128void sg_die_mem_munmap ( Addr a, SizeT len )
1129{
1130   Bool debug = (Bool)0;
1131   Bool overlap = False;
1132
1133   if (debug) VG_(printf)("MUNMAP %#lx %lu\n", a, len );
1134
1135   if (len == 0)
1136      return;
1137
1138   overlap = del_GlobalTree_range(giTree, a, len);
1139
1140   { /* redundant sanity check */
1141     UWord keyW, valW;
1142     VG_(initIterFM)( giTree );
1143     while (VG_(nextIterFM)( giTree, &keyW, &valW )) {
1144       GlobalTreeNode* nd = (GlobalTreeNode*)keyW;
1145        tl_assert(valW == 0);
1146        tl_assert(nd->szB > 0);
1147        tl_assert(nd->addr + nd->szB <= a
1148                  || a + len <= nd->addr);
1149     }
1150     VG_(doneIterFM)( giTree );
1151   }
1152
1153   if (!overlap)
1154      return;
1155
1156   /* Ok, the range contained some blocks.  Therefore we'll need to
1157      visit all the Invars in all the thread shadow stacks, and
1158      convert all Inv_Global entries that intersect [a,a+len) to
1159      Inv_Unknown. */
1160   tl_assert(len > 0);
1161   preen_global_Invars( a, len );
1162   invalidate_all_QCaches();
1163}
1164
1165
1166//////////////////////////////////////////////////////////////
1167//                                                          //
1168// StackFrame                                               //
1169//                                                          //
1170//////////////////////////////////////////////////////////////
1171
1172static ULong stats__total_accesses   = 0;
1173static ULong stats__classify_Stack0  = 0;
1174static ULong stats__classify_StackN  = 0;
1175static ULong stats__classify_Global  = 0;
1176static ULong stats__classify_Unknown = 0;
1177static ULong stats__Invars_preened   = 0;
1178static ULong stats__Invars_changed   = 0;
1179static ULong stats__t_i_b_empty      = 0;
1180static ULong stats__htab_fast        = 0;
1181static ULong stats__htab_searches    = 0;
1182static ULong stats__htab_probes      = 0;
1183static ULong stats__htab_resizes     = 0;
1184
1185
1186/* A dynamic instance of an instruction */
1187typedef
1188   struct {
1189      /* IMMUTABLE */
1190      Addr    insn_addr; /* NB! zero means 'not in use' */
1191      XArray* blocks; /* XArray* of StackBlock, or NULL if none */
1192      /* MUTABLE */
1193      Invar invar;
1194   }
1195   IInstance;
1196
1197
1198#define N_HTAB_FIXED 64
1199
1200typedef
1201   struct _StackFrame {
1202      /* The sp when the frame was created, so we know when to get rid
1203         of it. */
1204      Addr creation_sp;
1205      /* The stack frames for a thread are arranged as a doubly linked
1206         list.  Obviously the outermost frame in the stack has .outer
1207         as NULL and the innermost in theory has .inner as NULL.
1208         However, when a function returns, we don't delete the
1209         just-vacated StackFrame.  Instead, it is retained in the list
1210         and will be re-used when the next call happens.  This is so
1211         as to avoid constantly having to dynamically allocate and
1212         deallocate frames. */
1213      struct _StackFrame* inner;
1214      struct _StackFrame* outer;
1215      Word depth; /* 0 for outermost; increases inwards */
1216      /* Information for each memory referencing instruction, for this
1217         instantiation of the function.  The iinstances array is
1218         operated as a simple linear-probe hash table, which is
1219         dynamically expanded as necessary.  Once critical thing is
1220         that an IInstance with a .insn_addr of zero is interpreted to
1221         mean that hash table slot is unused.  This means we can't
1222         store an IInstance for address zero. */
1223      /* Note that htab initially points to htab_fixed.  If htab_fixed
1224         turns out not to be big enough then htab is made to point to
1225         dynamically allocated memory.  But it's often the case that
1226         htab_fixed is big enough, so this optimisation saves a huge
1227         number of sg_malloc/sg_free call pairs. */
1228      IInstance* htab;
1229      UWord      htab_size; /* size of hash table, MAY ONLY BE A POWER OF 2 */
1230      UWord      htab_used; /* number of hash table slots currently in use */
1231      /* If this frame is currently making a call, then the following
1232         are relevant. */
1233      Addr sp_at_call;
1234      Addr fp_at_call;
1235      XArray* /* of Addr */ blocks_added_by_call;
1236      /* See comment just above */
1237      IInstance htab_fixed[N_HTAB_FIXED];
1238   }
1239   StackFrame;
1240
1241
1242
1243
1244
1245/* Move this somewhere else? */
1246/* Visit all Invars in the entire system.  If 'isHeap' is True, change
1247   all Inv_Heap Invars that intersect [a,a+len) to Inv_Unknown.  If
1248   'isHeap' is False, do the same but to the Inv_Global{S,V} Invars
1249   instead. */
1250
1251__attribute__((noinline))
1252static void preen_global_Invar ( Invar* inv, Addr a, SizeT len )
1253{
1254   stats__Invars_preened++;
1255   tl_assert(len > 0);
1256   tl_assert(inv);
1257   switch (inv->tag) {
1258      case Inv_Global:
1259         tl_assert(inv->Inv.Global.nd);
1260         tl_assert(inv->Inv.Global.nd->szB > 0);
1261         if (0) VG_(printf)("preen_Invar Global %#lx %lu\n",
1262                            inv->Inv.Global.nd->addr,
1263                            inv->Inv.Global.nd->szB);
1264         if (0 == cmp_nonempty_intervals(a, len, inv->Inv.Global.nd->addr,
1265                                                 inv->Inv.Global.nd->szB)) {
1266            inv->tag = Inv_Unknown;
1267            stats__Invars_changed++;
1268         }
1269         break;
1270      case Inv_Stack0:
1271      case Inv_StackN:
1272      case Inv_Unknown:
1273         break;
1274      case Inv_Unset: /* this should never happen */
1275         /* fallthrough */
1276      default:
1277         tl_assert(0);
1278   }
1279}
1280
1281__attribute__((noinline))
1282static void preen_global_Invars ( Addr a, SizeT len )
1283{
1284   Int         i;
1285   UWord       u;
1286   StackFrame* frame;
1287   tl_assert(len > 0);
1288   for (i = 0; i < VG_N_THREADS; i++) {
1289      frame = shadowStacks[i];
1290      if (!frame)
1291         continue; /* no frames for this thread */
1292      /* start from the innermost frame */
1293      while (frame->inner)
1294         frame = frame->inner;
1295      tl_assert(frame->outer);
1296      /* work through the frames from innermost to outermost.  The
1297         order isn't important; we just need to ensure we visit each
1298         frame once (including those which are not actually active,
1299         more 'inner' than the 'innermost active frame', viz, just
1300         hanging around waiting to be used, when the current innermost
1301         active frame makes more calls.  See comments on definition of
1302         struct _StackFrame. */
1303      for (; frame; frame = frame->outer) {
1304         UWord xx = 0; /* sanity check only; count of used htab entries */
1305         if (!frame->htab)
1306            continue; /* frame not in use.  See shadowStack_unwind(). */
1307         for (u = 0; u < frame->htab_size; u++) {
1308            IInstance* ii = &frame->htab[u];
1309            if (ii->insn_addr == 0)
1310               continue; /* not in use */
1311            if (0) { pp_Invar(&ii->invar); VG_(printf)(" x\n"); }
1312            preen_global_Invar( &ii->invar, a, len );
1313            xx++;
1314         }
1315         tl_assert(xx == frame->htab_used);
1316      }
1317   }
1318}
1319
1320
1321/* XXX this should be >> 2 on ppc32/64 since the bottom two bits
1322   of the ip are guaranteed to be zero */
1323inline static UWord compute_II_hash ( Addr ip, UWord htab_size ) {
1324   return (ip >> 0) & (htab_size - 1);
1325}
1326
1327__attribute__((noinline))
1328static void initialise_II_hash_table ( StackFrame* sf )
1329{
1330   UWord i;
1331   sf->htab_size = N_HTAB_FIXED; /* initial hash table size */
1332   sf->htab = &sf->htab_fixed[0];
1333   tl_assert(sf->htab);
1334   sf->htab_used = 0;
1335   for (i = 0; i < sf->htab_size; i++)
1336      sf->htab[i].insn_addr = 0; /* NOT IN USE */
1337}
1338
1339
1340__attribute__((noinline))
1341static void resize_II_hash_table ( StackFrame* sf )
1342{
1343   UWord     i, j, ix, old_size, new_size;
1344   IInstance *old_htab, *new_htab, *old;
1345
1346   tl_assert(sf && sf->htab);
1347   old_size = sf->htab_size;
1348   new_size = 2 * old_size;
1349   old_htab = sf->htab;
1350   new_htab = sg_malloc( "di.sg_main.rIht.1",
1351                         new_size * sizeof(IInstance) );
1352   for (i = 0; i < new_size; i++) {
1353      new_htab[i].insn_addr = 0; /* NOT IN USE */
1354   }
1355   for (i = 0; i < old_size; i++) {
1356      old = &old_htab[i];
1357      if (old->insn_addr == 0 /* NOT IN USE */)
1358         continue;
1359      ix = compute_II_hash(old->insn_addr, new_size);
1360      /* find out where to put this, in the new table */
1361      j = new_size;
1362      while (1) {
1363         if (new_htab[ix].insn_addr == 0)
1364            break;
1365         /* This can't ever happen, because it would mean the new
1366            table is full; that isn't allowed -- even the old table is
1367            only allowed to become half full. */
1368         tl_assert(j > 0);
1369         j--;
1370         ix++; if (ix == new_size) ix = 0;
1371      }
1372      /* copy the old entry to this location */
1373      tl_assert(ix < new_size);
1374      tl_assert(new_htab[ix].insn_addr == 0);
1375      new_htab[ix] = *old;
1376      tl_assert(new_htab[ix].insn_addr != 0);
1377   }
1378   /* all entries copied; free old table. */
1379   if (old_htab != &sf->htab_fixed[0])
1380      sg_free(old_htab);
1381   sf->htab = new_htab;
1382   sf->htab_size = new_size;
1383   /* check sf->htab_used is correct.  Optional and a bit expensive
1384      but anyway: */
1385   j = 0;
1386   for (i = 0; i < new_size; i++) {
1387      if (new_htab[i].insn_addr != 0) {
1388         j++;
1389      }
1390   }
1391   tl_assert(j == sf->htab_used);
1392   if (0) VG_(printf)("resized tab for SF %p to %lu\n", sf, new_size);
1393}
1394
1395
1396__attribute__((noinline))
1397static IInstance* find_or_create_IInstance_SLOW (
1398                     StackFrame* sf,
1399                     Addr ip,
1400                     XArray* /* StackBlock */ ip_frameblocks
1401                  )
1402{
1403   UWord i, ix;
1404
1405   stats__htab_searches++;
1406
1407   tl_assert(sf);
1408   tl_assert(sf->htab);
1409
1410   /* Make sure the table loading doesn't get too high. */
1411   if (UNLIKELY(2 * sf->htab_used >= 1 * sf->htab_size)) {
1412      stats__htab_resizes++;
1413      resize_II_hash_table(sf);
1414   }
1415   tl_assert(2 * sf->htab_used <= sf->htab_size);
1416
1417   ix = compute_II_hash(ip, sf->htab_size);
1418   i = sf->htab_size;
1419   while (1) {
1420      stats__htab_probes++;
1421      /* Note that because of the way the fast-case handler works,
1422         these two tests are actually redundant in the first iteration
1423         of this loop.  (Except they aren't redundant if the code just
1424         above resized the table first. :-) */
1425      if (sf->htab[ix].insn_addr == ip)
1426         return &sf->htab[ix];
1427      if (sf->htab[ix].insn_addr == 0)
1428         break;
1429      /* If i ever gets to zero and we have found neither what we're
1430         looking for nor an empty slot, the table must be full.  Which
1431         isn't possible -- we monitor the load factor to ensure it
1432         doesn't get above say 50%; if that ever does happen the table
1433         is resized. */
1434      tl_assert(i > 0);
1435      i--;
1436      ix++;
1437      if (ix == sf->htab_size) ix = 0;
1438   }
1439
1440   /* So now we've found a free slot at ix, and we can use that. */
1441   tl_assert(sf->htab[ix].insn_addr == 0);
1442
1443   /* Add a new record in this slot. */
1444   tl_assert(ip != 0); /* CAN'T REPRESENT THIS */
1445   sf->htab[ix].insn_addr = ip;
1446   sf->htab[ix].blocks    = ip_frameblocks;
1447   sf->htab[ix].invar.tag = Inv_Unset;
1448   sf->htab_used++;
1449   return &sf->htab[ix];
1450}
1451
1452
1453inline
1454static IInstance* find_or_create_IInstance (
1455                     StackFrame* sf,
1456                     Addr ip,
1457                     XArray* /* StackBlock */ ip_frameblocks
1458                  )
1459{
1460   UWord ix = compute_II_hash(ip, sf->htab_size);
1461   /* Is it in the first slot we come to? */
1462   if (LIKELY(sf->htab[ix].insn_addr == ip)) {
1463      stats__htab_fast++;
1464      return &sf->htab[ix];
1465   }
1466   /* If the first slot we come to is empty, bag it. */
1467   if (LIKELY(sf->htab[ix].insn_addr == 0)) {
1468      stats__htab_fast++;
1469      tl_assert(ip != 0);
1470      sf->htab[ix].insn_addr = ip;
1471      sf->htab[ix].blocks    = ip_frameblocks;
1472      sf->htab[ix].invar.tag = Inv_Unset;
1473      sf->htab_used++;
1474      return &sf->htab[ix];
1475   }
1476   /* Otherwise we hand off to the slow case, which searches other
1477      slots, and optionally resizes the table if necessary. */
1478   return find_or_create_IInstance_SLOW( sf, ip, ip_frameblocks );
1479}
1480
1481
1482__attribute__((noinline))
1483static Addr calculate_StackBlock_EA ( StackBlock* descr,
1484                                      Addr sp, Addr fp ) {
1485   UWord w1 = (UWord)descr->base;
1486   UWord w2 = (UWord)(descr->spRel ? sp : fp);
1487   UWord ea = w1 + w2;
1488   return ea;
1489}
1490
1491/* Given an array of StackBlocks, return an array of Addrs, holding
1492   their effective addresses.  Caller deallocates result array. */
1493__attribute__((noinline))
1494static XArray* /* Addr */ calculate_StackBlock_EAs (
1495                             XArray* /* StackBlock */ blocks,
1496                             Addr sp, Addr fp
1497                          )
1498{
1499   XArray* res;
1500   Word i, n = VG_(sizeXA)( blocks );
1501   tl_assert(n > 0);
1502   res = VG_(newXA)( sg_malloc, "di.sg_main.cSBE.1", sg_free, sizeof(Addr) );
1503   for (i = 0; i < n; i++) {
1504      StackBlock* blk = VG_(indexXA)( blocks, i );
1505      Addr ea = calculate_StackBlock_EA( blk, sp, fp );
1506      VG_(addToXA)( res, &ea );
1507   }
1508   return res;
1509}
1510
1511
1512/* Try to classify the block into which a memory access falls, and
1513   write the result in 'inv'.  This writes all relevant fields of
1514   'inv'. */
1515__attribute__((noinline))
1516static void classify_address ( /*OUT*/Invar* inv,
1517                               ThreadId tid,
1518                               Addr ea, Addr sp, Addr fp,
1519                               UWord szB,
1520                               XArray* /* of StackBlock */ thisInstrBlocks )
1521{
1522   tl_assert(szB > 0);
1523   /* First, look in the stack blocks accessible in this instruction's
1524      frame. */
1525   {
1526     Word i, nBlocks = VG_(sizeXA)( thisInstrBlocks );
1527     if (nBlocks == 0) stats__t_i_b_empty++;
1528     for (i = 0; i < nBlocks; i++) {
1529        StackBlock* descr = VG_(indexXA)( thisInstrBlocks, i );
1530        Addr bea = calculate_StackBlock_EA( descr, sp, fp );
1531        if (bea <= ea && ea + szB <= bea + descr->szB) {
1532           /* found it */
1533           inv->tag = Inv_Stack0;
1534           inv->Inv.Stack0.addr  = bea;
1535           inv->Inv.Stack0.szB   = descr->szB;
1536           inv->Inv.Stack0.descr = descr;
1537           stats__classify_Stack0++;
1538           return;
1539        }
1540     }
1541   }
1542   /* Look in this thread's query cache */
1543   { Word i;
1544     QCache* cache = &qcaches[tid];
1545     static UWord ctr = 0;
1546     stats__qcache_queries++;
1547     for (i = 0; i < cache->nInUse; i++) {
1548        if (0) /* expensive in a loop like this */
1549               tl_assert(cache->elems[i].addr + cache->elems[i].szB != 0);
1550        stats__qcache_probes++;
1551        if (is_subinterval_of(cache->elems[i].addr,
1552                              cache->elems[i].szB, ea, szB)) {
1553           if (i > 0
1554               && (ctr++ & (QCACHE_ADVANCE_EVERY-1)) == 0) {
1555              QCElem tmp;
1556              tmp = cache->elems[i-1];
1557              cache->elems[i-1] = cache->elems[i];
1558              cache->elems[i] = tmp;
1559              i--;
1560           }
1561           *inv = cache->elems[i].inv;
1562           return;
1563        }
1564     }
1565     stats__qcache_misses++;
1566   }
1567   /* Ok, so it's not a block in the top frame.  Perhaps it's a block
1568      in some calling frame?  Consult this thread's stack-block
1569      interval tree to find out. */
1570   { StackTreeNode* nd = find_StackTreeNode( siTrees[tid], ea );
1571     /* We know that [ea,ea+1) is in the block, but we need to
1572        restrict to the case where the whole access falls within
1573        it. */
1574     if (nd && !is_subinterval_of(nd->addr, nd->szB, ea, szB)) {
1575        nd = NULL;
1576     }
1577     if (nd) {
1578        /* found it */
1579        inv->tag = Inv_StackN;
1580        inv->Inv.StackN.nd = nd;
1581        stats__classify_StackN++;
1582        goto out;
1583     }
1584   }
1585   /* Not in a stack block.  Try the global pool. */
1586   { GlobalTreeNode* nd = find_GlobalTreeNode(giTree, ea);
1587     /* We know that [ea,ea+1) is in the block, but we need to
1588        restrict to the case where the whole access falls within
1589        it. */
1590     if (nd && !is_subinterval_of(nd->addr, nd->szB, ea, szB)) {
1591        nd = NULL;
1592     }
1593     if (nd) {
1594        /* found it */
1595        inv->tag = Inv_Global;
1596        inv->Inv.Global.nd = nd;
1597        stats__classify_Global++;
1598        goto out;
1599     }
1600   }
1601   /* No idea - give up. */
1602   inv->tag = Inv_Unknown;
1603   stats__classify_Unknown++;
1604
1605   /* Update the cache */
1606  out:
1607   { Addr    toadd_addr = 0;
1608     SizeT   toadd_szB  = 0;
1609     QCache* cache      = &qcaches[tid];
1610
1611     static UWord ctr = 0;
1612     Bool show = False;
1613     if (0 && 0 == (ctr++ & 0x1FFFFF)) show = True;
1614
1615     if (show) QCache__pp(cache, "before upd");
1616
1617     switch (inv->tag) {
1618        case Inv_Global:
1619           toadd_addr = inv->Inv.Global.nd->addr;
1620           toadd_szB  = inv->Inv.Global.nd->szB;
1621           break;
1622        case Inv_StackN:
1623           toadd_addr = inv->Inv.StackN.nd->addr;
1624           toadd_szB  = inv->Inv.StackN.nd->szB;
1625           break;
1626        case Inv_Unknown: {
1627           /* This is more complex.  We need to figure out the
1628              intersection of the "holes" in the global and stack
1629              interval trees into which [ea,ea+szB) falls.  This is
1630              further complicated by the fact that [ea,ea+szB) might
1631              not fall cleanly into a hole; it may instead fall across
1632              the boundary of a stack or global block.  In that case
1633              we just ignore it and don't update the cache, since we
1634              have no way to represent this situation precisely. */
1635           StackTreeNode  sNegInf, sPosInf, sKey, *sLB, *sUB;
1636           GlobalTreeNode gNegInf, gPosInf, gKey, *gLB, *gUB;
1637           Addr gMin, gMax, sMin, sMax, uMin, uMax;
1638           Bool sOK, gOK;
1639           sNegInf.addr = 0;
1640           sNegInf.szB  = 1;
1641           sPosInf.addr = ~(UWord)0;
1642           sPosInf.szB  = 1;
1643           gNegInf.addr = 0;
1644           gNegInf.szB  = 1;
1645           gPosInf.addr = ~(UWord)0;
1646           gPosInf.szB  = 1;
1647           sKey.addr = ea;
1648           sKey.szB  = szB;
1649           gKey.addr = ea;
1650           gKey.szB  = szB;
1651           if (0) VG_(printf)("Tree sizes %ld %ld\n",
1652                              VG_(sizeFM)(siTrees[tid]), VG_(sizeFM)(giTree));
1653           sOK = VG_(findBoundsFM)( siTrees[tid],
1654                                    (UWord*)&sLB,    NULL/*unused*/,
1655                                    (UWord*)&sUB,    NULL/*unused*/,
1656                                    (UWord)&sNegInf, 0/*unused*/,
1657                                    (UWord)&sPosInf, 0/*unused*/,
1658                                    (UWord)&sKey );
1659           gOK = VG_(findBoundsFM)( giTree,
1660                                    (UWord*)&gLB,    NULL/*unused*/,
1661                                    (UWord*)&gUB,    NULL/*unused*/,
1662                                    (UWord)&gNegInf, 0/*unused*/,
1663                                    (UWord)&gPosInf, 0/*unused*/,
1664                                    (UWord)&gKey );
1665           if (!(sOK && gOK)) {
1666              /* If this happens, then [ea,ea+szB) partially overlaps
1667                 a heap or stack block.  We can't represent that, so
1668                 just forget it (should be very rare).  However, do
1669                 maximum sanity checks first.  In such a
1670                 partial overlap case, it can't be the case that both
1671                 [ea] and [ea+szB-1] overlap the same block, since if
1672                 that were indeed the case then it wouldn't be a
1673                 partial overlap; rather it would simply fall inside
1674                 that block entirely and we shouldn't be inside this
1675                 conditional at all. */
1676              if (!sOK) {
1677                 StackTreeNode *ndFirst, *ndLast;
1678                 ndFirst = find_StackTreeNode( siTrees[tid], ea );
1679                 ndLast  = find_StackTreeNode( siTrees[tid], ea+szB-1 );
1680                 /* if both ends of the range fall inside a block,
1681                    they can't be in the same block. */
1682                 if (ndFirst && ndLast)
1683                    tl_assert(ndFirst != ndLast);
1684                 /* for each end of the range, if it is in a block,
1685                    the range as a whole can't be entirely within the
1686                    block. */
1687                 if (ndFirst)
1688                    tl_assert(!is_subinterval_of(ndFirst->addr,
1689                                                 ndFirst->szB, ea, szB));
1690                 if (ndLast)
1691                    tl_assert(!is_subinterval_of(ndLast->addr,
1692                                                 ndLast->szB, ea, szB));
1693              }
1694              if (!gOK) {
1695                 GlobalTreeNode *ndFirst, *ndLast;
1696                 ndFirst = find_GlobalTreeNode( giTree, ea );
1697                 ndLast  = find_GlobalTreeNode( giTree, ea+szB-1 );
1698                 /* if both ends of the range fall inside a block,
1699                    they can't be in the same block. */
1700                 if (ndFirst && ndLast)
1701                    tl_assert(ndFirst != ndLast);
1702                 /* for each end of the range, if it is in a block,
1703                    the range as a whole can't be entirely within the
1704                    block. */
1705                 if (ndFirst)
1706                    tl_assert(!is_subinterval_of(ndFirst->addr,
1707                                                 ndFirst->szB, ea, szB));
1708                 if (ndLast)
1709                    tl_assert(!is_subinterval_of(ndLast->addr,
1710                                                 ndLast->szB, ea, szB));
1711              }
1712              if (0) VG_(printf)("overlapping blocks in cache\n");
1713              return;
1714           }
1715           sMin = sLB == &sNegInf  ? 0         : (sLB->addr + sLB->szB);
1716           sMax = sUB == &sPosInf  ? ~(UWord)0 : (sUB->addr - 1);
1717           gMin = gLB == &gNegInf  ? 0         : (gLB->addr + gLB->szB);
1718           gMax = gUB == &gPosInf  ? ~(UWord)0 : (gUB->addr - 1);
1719           if (0) VG_(printf)("sMin %lx sMax %lx gMin %lx gMax %lx\n",
1720                              sMin, sMax, gMin, gMax);
1721           /* [sMin,sMax] and [gMin,gMax] must both contain
1722              [ea,ea+szB) (right?)  That implies they must overlap at
1723              at least over [ea,ea+szB). */
1724           tl_assert(sMin <= ea && ea+szB-1 <= sMax);
1725           tl_assert(gMin <= ea && ea+szB-1 <= gMax);
1726           /* So now compute their intersection. */
1727           uMin = Addr__max( sMin, gMin );
1728           uMax = Addr__min( sMax, gMax );
1729           if (0) VG_(printf)("uMin %lx uMax %lx\n", uMin, uMax);
1730           tl_assert(uMin <= uMax);
1731           tl_assert(uMin <= ea && ea+szB-1 <= uMax);
1732           /* Finally, we can park [uMin,uMax] in the cache.  However,
1733              if uMax is ~0, we can't represent the difference; hence
1734              fudge uMax. */
1735           if (uMin < uMax && uMax == ~(UWord)0)
1736              uMax--;
1737           toadd_addr = uMin;
1738           toadd_szB  = uMax - uMin + 1;
1739           break;
1740        }
1741        default:
1742           /* We should only be caching info for the above 3 cases */
1743          tl_assert(0);
1744     } /* switch (inv->tag) */
1745
1746     { /* and actually add this to the cache, finally */
1747       Word i;
1748       Word ip = cache->nInUse / 2; /* doesn't seem critical */
1749
1750       if (cache->nInUse < N_QCACHE)
1751          cache->nInUse++;
1752       for (i = cache->nInUse-1; i > ip; i--) {
1753          cache->elems[i] = cache->elems[i-1];
1754       }
1755
1756       tl_assert(toadd_szB > 0);
1757       cache->elems[ip].addr = toadd_addr;
1758       cache->elems[ip].szB  = toadd_szB;
1759       cache->elems[ip].inv  = *inv;
1760     }
1761
1762     if (show) QCache__pp(cache, "after upd");
1763
1764   }
1765}
1766
1767
1768/* CALLED FROM GENERATED CODE */
1769static
1770VG_REGPARM(3)
1771void helperc__mem_access ( /* Known only at run time: */
1772                           Addr ea, Addr sp, Addr fp,
1773                           /* Known at translation time: */
1774                           Word sszB, Addr ip, XArray* ip_frameBlocks )
1775{
1776   UWord szB;
1777   IInstance* iinstance;
1778   Invar* inv;
1779   Invar new_inv;
1780   ThreadId tid = VG_(get_running_tid)();
1781   StackFrame* frame;
1782   HChar bufE[160], bufA[160], bufD[32];
1783
1784   stats__total_accesses++;
1785
1786   tl_assert(is_sane_TId(tid));
1787   frame = shadowStacks[tid];
1788   tl_assert(frame);
1789
1790   /* Find the instance info for this instruction. */
1791   tl_assert(ip_frameBlocks);
1792   iinstance = find_or_create_IInstance( frame, ip, ip_frameBlocks );
1793   tl_assert(iinstance);
1794   tl_assert(iinstance->blocks == ip_frameBlocks);
1795
1796   szB = (sszB < 0) ? (-sszB) : sszB;
1797   tl_assert(szB > 0);
1798
1799   inv = &iinstance->invar;
1800
1801   /* Deal with first uses of instruction instances. */
1802   if (inv->tag == Inv_Unset) {
1803      /* This is the first use of this instance of the instruction, so
1804         we can't make any check; we merely record what we saw, so we
1805         can compare it against what happens for 2nd and subsequent
1806         accesses. */
1807      classify_address( inv,
1808                        tid, ea, sp, fp, szB, iinstance->blocks );
1809      tl_assert(inv->tag != Inv_Unset);
1810      return;
1811   }
1812
1813   /* So generate an Invar and see if it's different from what
1814      we had before. */
1815   classify_address( &new_inv,
1816                     tid, ea, sp, fp, szB, iinstance->blocks );
1817   tl_assert(new_inv.tag != Inv_Unset);
1818
1819   /* Did we see something different from before?  If no, then there's
1820      no error. */
1821   tl_assert(inv->tag != Inv_Unset);
1822
1823   if (LIKELY(eq_Invar(&new_inv, inv)))
1824      return;
1825
1826   VG_(memset)(bufE, 0, sizeof(bufE));
1827   show_Invar( bufE, sizeof(bufE)-1, inv, frame->depth );
1828
1829   VG_(memset)(bufA, 0, sizeof(bufA));
1830   show_Invar( bufA, sizeof(bufA)-1, &new_inv, frame->depth );
1831
1832   VG_(memset)(bufD, 0, sizeof(bufD));
1833   UWord absDelta;
1834   gen_delta_str( bufD, &absDelta, inv, ea );
1835
1836   if (absDelta < 1024)
1837      sg_record_error_SorG( tid, ea, sszB, bufE, bufA, bufD );
1838
1839   /* And now install the new observation as "standard", so as to
1840      make future error messages make more sense. */
1841   *inv = new_inv;
1842}
1843
1844
1845////////////////////////////////////////
1846/* Primary push-a-new-frame routine.  Called indirectly from
1847   generated code. */
1848
1849static UWord stats__max_sitree_size = 0;
1850static UWord stats__max_gitree_size = 0;
1851
1852static
1853void shadowStack_new_frame ( ThreadId tid,
1854                             Addr     sp_at_call_insn,
1855                             Addr     sp_post_call_insn,
1856                             Addr     fp_at_call_insn,
1857                             Addr     ip_post_call_insn,
1858                             XArray*  descrs_at_call_insn )
1859{
1860   StackFrame *callee, *caller;
1861   tl_assert(is_sane_TId(tid));
1862
1863   caller = shadowStacks[tid];
1864   tl_assert(caller);
1865
1866   if (caller->outer) { /* "this is not the outermost frame" */
1867      tl_assert(caller->outer->inner == caller);
1868      tl_assert(caller->outer->depth >= 0);
1869      tl_assert(1 + caller->outer->depth == caller->depth);
1870   } else {
1871      tl_assert(caller->depth == 0);
1872   }
1873
1874   caller->sp_at_call = sp_at_call_insn;
1875   caller->fp_at_call = fp_at_call_insn;
1876
1877   if (descrs_at_call_insn) {
1878      tl_assert( VG_(sizeXA)(descrs_at_call_insn) > 0 );
1879      caller->blocks_added_by_call
1880         = calculate_StackBlock_EAs( descrs_at_call_insn,
1881                                     sp_at_call_insn, fp_at_call_insn );
1882      if (caller->blocks_added_by_call)
1883         add_blocks_to_StackTree( siTrees[tid],
1884                                  descrs_at_call_insn,
1885                                  caller->blocks_added_by_call,
1886                                  caller->depth /* stack depth at which
1887                                                   these blocks are
1888                                                   considered to exist*/ );
1889      if (1) {
1890         UWord s  = VG_(sizeFM)( siTrees[tid] );
1891         UWord g  = VG_(sizeFM)( giTree );
1892         Bool  sb = s > stats__max_sitree_size;
1893         Bool  gb = g > stats__max_gitree_size;
1894         if (sb) stats__max_sitree_size = s;
1895         if (gb) stats__max_gitree_size = g;
1896         if (0 && (sb || gb))
1897            VG_(message)(Vg_DebugMsg,
1898                         "exp-sgcheck: new max tree sizes: "
1899                         "StackTree %ld, GlobalTree %ld\n",
1900                         stats__max_sitree_size, stats__max_gitree_size );
1901      }
1902   } else {
1903      caller->blocks_added_by_call = NULL;
1904   }
1905
1906   /* caller->blocks_added_by_call is used again (and then freed) when
1907      this frame is removed from the stack. */
1908
1909   if (caller->inner) {
1910      callee = caller->inner;
1911   } else {
1912      callee = sg_malloc("di.sg_main.sSnf.1", sizeof(StackFrame));
1913      VG_(memset)(callee, 0, sizeof(StackFrame));
1914      callee->outer = caller;
1915      caller->inner = callee;
1916      callee->depth = 1 + caller->depth;
1917      tl_assert(callee->inner == NULL);
1918   }
1919
1920   /* This sets up .htab, .htab_size and .htab_used */
1921   initialise_II_hash_table( callee );
1922
1923   callee->creation_sp    = sp_post_call_insn;
1924   callee->sp_at_call     = 0; // not actually required ..
1925   callee->fp_at_call     = 0; // .. these 3 initialisations are ..
1926   callee->blocks_added_by_call = NULL; // .. just for cleanness
1927
1928   /* record the new running stack frame */
1929   shadowStacks[tid] = callee;
1930
1931   /* and this thread's query cache is now invalid */
1932   QCache__invalidate( &qcaches[tid] );
1933
1934   if (0)
1935   { Word d = callee->depth;
1936     const HChar *fnname;
1937     Bool ok;
1938     Addr ip = ip_post_call_insn;
1939     ok = VG_(get_fnname_w_offset)( ip, &fnname );
1940     while (d > 0) {
1941        VG_(printf)(" ");
1942        d--;
1943     }
1944     VG_(printf)("> %s %#lx\n", ok ? fnname : "???", ip);
1945   }
1946}
1947
1948/* CALLED FROM GENERATED CODE */
1949static
1950VG_REGPARM(3)
1951void helperc__new_frame ( Addr sp_post_call_insn,
1952                          Addr fp_at_call_insn,
1953                          Addr ip_post_call_insn,
1954                          XArray* blocks_at_call_insn,
1955                          Word sp_adjust )
1956{
1957   ThreadId tid = VG_(get_running_tid)();
1958   Addr     sp_at_call_insn = sp_post_call_insn + sp_adjust;
1959   shadowStack_new_frame( tid,
1960                          sp_at_call_insn,
1961                          sp_post_call_insn,
1962                          fp_at_call_insn,
1963                          ip_post_call_insn,
1964                          blocks_at_call_insn );
1965}
1966
1967
1968////////////////////////////////////////
1969/* Primary remove-frame(s) routine.  Called indirectly from
1970   generated code. */
1971
1972__attribute__((noinline))
1973static void shadowStack_unwind ( ThreadId tid, Addr sp_now )
1974{
1975   StackFrame *innermost, *innermostOrig;
1976   tl_assert(is_sane_TId(tid));
1977   innermost = shadowStacks[tid];
1978   tl_assert(innermost);
1979   innermostOrig = innermost;
1980   //VG_(printf)("UNWIND sp_new = %p\n", sp_now);
1981   while (1) {
1982      if (!innermost->outer)
1983         break;
1984      if (innermost->inner)
1985         tl_assert(innermost->inner->outer == innermost);
1986      tl_assert(innermost->outer->inner == innermost);
1987      tl_assert(innermost->blocks_added_by_call == NULL);
1988      if (sp_now <= innermost->creation_sp) break;
1989      //VG_(printf)("UNWIND     dump %p\n", innermost->creation_sp);
1990      tl_assert(innermost->htab);
1991      if (innermost->htab != &innermost->htab_fixed[0])
1992         sg_free(innermost->htab);
1993      /* be on the safe side */
1994      innermost->creation_sp = 0;
1995      innermost->htab = NULL;
1996      innermost->htab_size = 0;
1997      innermost->htab_used = 0;
1998      innermost->sp_at_call = 0;
1999      innermost->fp_at_call = 0;
2000      innermost->blocks_added_by_call = NULL;
2001      innermost = innermost->outer;
2002
2003      /* So now we're "back" in the calling frame.  Remove from this
2004         thread's stack-interval-tree, the blocks added at the time of
2005         the call. */
2006
2007      if (innermost->outer) { /* not at the outermost frame */
2008         if (innermost->blocks_added_by_call == NULL) {
2009         } else {
2010            del_blocks_from_StackTree( siTrees[tid],
2011                                       innermost->blocks_added_by_call );
2012            VG_(deleteXA)( innermost->blocks_added_by_call );
2013            innermost->blocks_added_by_call = NULL;
2014         }
2015      }
2016      /* That completes the required tidying of the interval tree
2017         associated with the frame we just removed. */
2018
2019      if (0) {
2020         Word d = innermost->depth;
2021         while (d > 0) {
2022            VG_(printf)(" ");
2023            d--;
2024         }
2025         VG_(printf)("X\n");
2026      }
2027
2028   }
2029
2030   tl_assert(innermost);
2031
2032   if (innermost != innermostOrig) {
2033      shadowStacks[tid] = innermost;
2034      /* this thread's query cache is now invalid */
2035      QCache__invalidate( &qcaches[tid] );
2036   }
2037}
2038
2039
2040
2041//////////////////////////////////////////////////////////////
2042//                                                          //
2043// Instrumentation                                          //
2044//                                                          //
2045//////////////////////////////////////////////////////////////
2046
2047/* What does instrumentation need to do?
2048
2049   - at each Call transfer, generate a call to shadowStack_new_frame
2050     do this by manually inspecting the IR
2051
2052   - at each sp change, if the sp change is negative,
2053     call shadowStack_unwind
2054     do this by asking for SP-change analysis
2055
2056   - for each memory referencing instruction,
2057     call helperc__mem_access
2058*/
2059
2060/* A complication: sg_ instrumentation and h_ instrumentation need to
2061   be interleaved.  Since the latter is a lot more complex than the
2062   former, we split the sg_ instrumentation here into four functions
2063   and let the h_ instrumenter call the four functions as it goes.
2064   Hence the h_ instrumenter drives the sg_ instrumenter.
2065
2066   To make this viable, the sg_ instrumenter carries what running
2067   state it needs in 'struct _SGEnv'.  This is exported only
2068   abstractly from this file.
2069*/
2070
2071struct _SGEnv {
2072   /* the current insn's IP */
2073   Addr curr_IP;
2074   /* whether the above is actually known */
2075   Bool curr_IP_known;
2076   /* if we find a mem ref, is it the first for this insn?  Used for
2077      detecting insns which make more than one memory ref, a situation
2078      we basically can't really handle properly; and so we ignore all
2079      but the first ref. */
2080   Bool firstRef;
2081   /* READONLY */
2082   IRTemp (*newIRTemp_cb)(IRType,void*);
2083   void* newIRTemp_opaque;
2084};
2085
2086
2087/* --- Helper fns for instrumentation --- */
2088
2089static IRTemp gen_Get_SP ( struct _SGEnv*  sge,
2090                           IRSB*           bbOut,
2091                           const VexGuestLayout* layout,
2092                           Int             hWordTy_szB )
2093{
2094   IRExpr* sp_expr;
2095   IRTemp  sp_temp;
2096   IRType  sp_type;
2097   /* This in effect forces the host and guest word sizes to be the
2098      same. */
2099   tl_assert(hWordTy_szB == layout->sizeof_SP);
2100   sp_type = layout->sizeof_SP == 8 ? Ity_I64 : Ity_I32;
2101   sp_expr = IRExpr_Get( layout->offset_SP, sp_type );
2102   sp_temp = sge->newIRTemp_cb( sp_type, sge->newIRTemp_opaque );
2103   addStmtToIRSB( bbOut, IRStmt_WrTmp( sp_temp, sp_expr ) );
2104   return sp_temp;
2105}
2106
2107static IRTemp gen_Get_FP ( struct _SGEnv*  sge,
2108                           IRSB*           bbOut,
2109                           const VexGuestLayout* layout,
2110                           Int             hWordTy_szB )
2111{
2112   IRExpr* fp_expr;
2113   IRTemp  fp_temp;
2114   IRType  fp_type;
2115   /* This in effect forces the host and guest word sizes to be the
2116      same. */
2117   tl_assert(hWordTy_szB == layout->sizeof_SP);
2118   fp_type = layout->sizeof_FP == 8 ? Ity_I64 : Ity_I32;
2119   fp_expr = IRExpr_Get( layout->offset_FP, fp_type );
2120   fp_temp = sge->newIRTemp_cb( fp_type, sge->newIRTemp_opaque );
2121   addStmtToIRSB( bbOut, IRStmt_WrTmp( fp_temp, fp_expr ) );
2122   return fp_temp;
2123}
2124
2125static void instrument_mem_access ( struct _SGEnv* sge,
2126                                    IRSB*   bbOut,
2127                                    IRExpr* addr,
2128                                    Int     szB,
2129                                    Bool    isStore,
2130                                    Int     hWordTy_szB,
2131                                    Addr    curr_IP,
2132                                    const VexGuestLayout* layout )
2133{
2134   IRType  tyAddr      = Ity_INVALID;
2135   XArray* frameBlocks = NULL;
2136
2137   tl_assert(isIRAtom(addr));
2138   tl_assert(hWordTy_szB == 4 || hWordTy_szB == 8);
2139
2140   tyAddr = typeOfIRExpr( bbOut->tyenv, addr );
2141   tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
2142
2143#if defined(VGA_x86)
2144   { UChar* p = (UChar*)curr_IP;
2145     // pop %ebp; RET
2146     if (p[0] == 0xc3 && p[-1] == 0x5d) return;
2147     // pop %ebp; RET $imm16
2148     if (p[0] == 0xc2 && p[-1] == 0x5d) return;
2149     // PUSH %EBP; mov %esp,%ebp
2150     if (p[0] == 0x55 && p[1] == 0x89 && p[2] == 0xe5) return;
2151   }
2152#endif
2153
2154   /* First off, find or create the StackBlocks for this instruction. */
2155   frameBlocks = get_StackBlocks_for_IP( curr_IP );
2156   tl_assert(frameBlocks);
2157   //if (VG_(sizeXA)( frameBlocks ) == 0)
2158   //   frameBlocks = NULL;
2159
2160   /* Generate a call to "helperc__mem_access", passing:
2161         addr current_SP current_FP szB curr_IP frameBlocks
2162   */
2163   { IRTemp t_SP = gen_Get_SP( sge, bbOut, layout, hWordTy_szB );
2164     IRTemp t_FP = gen_Get_FP( sge, bbOut, layout, hWordTy_szB );
2165     IRExpr** args
2166        = mkIRExprVec_6( addr,
2167                         IRExpr_RdTmp(t_SP),
2168                         IRExpr_RdTmp(t_FP),
2169                         mkIRExpr_HWord( isStore ? (-szB) : szB ),
2170                         mkIRExpr_HWord( curr_IP ),
2171                         mkIRExpr_HWord( (HWord)frameBlocks ) );
2172     IRDirty* di
2173        = unsafeIRDirty_0_N( 3/*regparms*/,
2174                             "helperc__mem_access",
2175                            VG_(fnptr_to_fnentry)( &helperc__mem_access ),
2176                             args );
2177
2178     addStmtToIRSB( bbOut, IRStmt_Dirty(di) );
2179   }
2180}
2181
2182
2183/* --- Instrumentation main (4 fns) --- */
2184
2185struct _SGEnv * sg_instrument_init ( IRTemp (*newIRTemp_cb)(IRType,void*),
2186                                     void* newIRTemp_opaque )
2187{
2188   struct _SGEnv * env = sg_malloc("di.sg_main.sii.1",
2189                                   sizeof(struct _SGEnv));
2190   tl_assert(env);
2191   env->curr_IP          = 0;
2192   env->curr_IP_known    = False;
2193   env->firstRef         = True;
2194   env->newIRTemp_cb     = newIRTemp_cb;
2195   env->newIRTemp_opaque = newIRTemp_opaque;
2196   return env;
2197}
2198
2199void sg_instrument_fini ( struct _SGEnv * env )
2200{
2201   sg_free(env);
2202}
2203
2204/* Add instrumentation for 'st' to 'sbOut', and possibly modify 'env'
2205   as required.  This must be called before 'st' itself is added to
2206   'sbOut'. */
2207void sg_instrument_IRStmt ( /*MOD*/struct _SGEnv * env,
2208                            /*MOD*/IRSB* sbOut,
2209                            IRStmt* st,
2210                            const VexGuestLayout* layout,
2211                            IRType gWordTy, IRType hWordTy )
2212{
2213   if (!sg_clo_enable_sg_checks)
2214      return;
2215
2216   tl_assert(st);
2217   tl_assert(isFlatIRStmt(st));
2218   switch (st->tag) {
2219      case Ist_NoOp:
2220      case Ist_AbiHint:
2221      case Ist_Put:
2222      case Ist_PutI:
2223      case Ist_MBE:
2224         /* None of these can contain any memory references. */
2225         break;
2226
2227      case Ist_Exit:
2228         tl_assert(st->Ist.Exit.jk != Ijk_Call);
2229         /* else we must deal with a conditional call */
2230         break;
2231
2232      case Ist_IMark:
2233         env->curr_IP_known = True;
2234         env->curr_IP       = st->Ist.IMark.addr;
2235         env->firstRef      = True;
2236         break;
2237
2238      case Ist_Store:
2239         tl_assert(env->curr_IP_known);
2240         if (env->firstRef) {
2241            instrument_mem_access(
2242               env, sbOut,
2243               st->Ist.Store.addr,
2244               sizeofIRType(typeOfIRExpr(sbOut->tyenv, st->Ist.Store.data)),
2245               True/*isStore*/,
2246               sizeofIRType(hWordTy),
2247               env->curr_IP, layout
2248            );
2249            env->firstRef = False;
2250         }
2251         break;
2252
2253      case Ist_WrTmp: {
2254         IRExpr* data = st->Ist.WrTmp.data;
2255         if (data->tag == Iex_Load) {
2256            tl_assert(env->curr_IP_known);
2257            if (env->firstRef) {
2258               instrument_mem_access(
2259                  env, sbOut,
2260                  data->Iex.Load.addr,
2261                  sizeofIRType(data->Iex.Load.ty),
2262                  False/*!isStore*/,
2263                  sizeofIRType(hWordTy),
2264                  env->curr_IP, layout
2265               );
2266               env->firstRef = False;
2267            }
2268         }
2269         break;
2270      }
2271
2272      case Ist_Dirty: {
2273         Int      dataSize;
2274         IRDirty* d = st->Ist.Dirty.details;
2275         if (d->mFx != Ifx_None) {
2276            /* This dirty helper accesses memory.  Collect the
2277               details. */
2278            tl_assert(env->curr_IP_known);
2279            if (env->firstRef) {
2280               tl_assert(d->mAddr != NULL);
2281               tl_assert(d->mSize != 0);
2282               dataSize = d->mSize;
2283               if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
2284                  instrument_mem_access(
2285                     env, sbOut, d->mAddr, dataSize, False/*!isStore*/,
2286                     sizeofIRType(hWordTy), env->curr_IP, layout
2287                  );
2288               }
2289               if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
2290                  instrument_mem_access(
2291                     env, sbOut, d->mAddr, dataSize, True/*isStore*/,
2292                     sizeofIRType(hWordTy), env->curr_IP, layout
2293                  );
2294               }
2295               env->firstRef = False;
2296            }
2297         } else {
2298            tl_assert(d->mAddr == NULL);
2299            tl_assert(d->mSize == 0);
2300         }
2301         break;
2302      }
2303
2304      case Ist_CAS: {
2305         /* We treat it as a read and a write of the location.  I
2306            think that is the same behaviour as it was before IRCAS
2307            was introduced, since prior to that point, the Vex front
2308            ends would translate a lock-prefixed instruction into a
2309            (normal) read followed by a (normal) write. */
2310         if (env->firstRef) {
2311            Int    dataSize;
2312            IRCAS* cas = st->Ist.CAS.details;
2313            tl_assert(cas->addr != NULL);
2314            tl_assert(cas->dataLo != NULL);
2315            dataSize = sizeofIRType(typeOfIRExpr(sbOut->tyenv, cas->dataLo));
2316            if (cas->dataHi != NULL)
2317               dataSize *= 2; /* since it's a doubleword-CAS */
2318            instrument_mem_access(
2319               env, sbOut, cas->addr, dataSize, False/*!isStore*/,
2320               sizeofIRType(hWordTy), env->curr_IP, layout
2321            );
2322            instrument_mem_access(
2323               env, sbOut, cas->addr, dataSize, True/*isStore*/,
2324               sizeofIRType(hWordTy), env->curr_IP, layout
2325            );
2326            env->firstRef = False;
2327         }
2328         break;
2329      }
2330
2331      default:
2332         tl_assert(0);
2333
2334   } /* switch (st->tag) */
2335}
2336
2337
2338/* Add instrumentation for the final jump of an IRSB 'sbOut', and
2339   possibly modify 'env' as required.  This must be the last
2340   instrumentation statement in the block. */
2341void sg_instrument_final_jump ( /*MOD*/struct _SGEnv * env,
2342                                /*MOD*/IRSB* sbOut,
2343                                IRExpr* next,
2344                                IRJumpKind jumpkind,
2345                                const VexGuestLayout* layout,
2346                                IRType gWordTy, IRType hWordTy )
2347{
2348   if (!sg_clo_enable_sg_checks)
2349      return;
2350
2351   if (jumpkind == Ijk_Call) {
2352      // Assumes x86 or amd64
2353      IRTemp   sp_post_call_insn, fp_post_call_insn;
2354      XArray*  frameBlocks;
2355      IRExpr** args;
2356      IRDirty* di;
2357      sp_post_call_insn
2358         = gen_Get_SP( env, sbOut, layout, sizeofIRType(hWordTy) );
2359      fp_post_call_insn
2360         = gen_Get_FP( env, sbOut, layout, sizeofIRType(hWordTy) );
2361      tl_assert(env->curr_IP_known);
2362      frameBlocks = get_StackBlocks_for_IP( env->curr_IP );
2363      tl_assert(frameBlocks);
2364      if (VG_(sizeXA)(frameBlocks) == 0)
2365         frameBlocks = NULL;
2366      args
2367         = mkIRExprVec_5(
2368              IRExpr_RdTmp(sp_post_call_insn),
2369              IRExpr_RdTmp(fp_post_call_insn),
2370                         /* assume the call doesn't change FP */
2371              next,
2372              mkIRExpr_HWord( (HWord)frameBlocks ),
2373              mkIRExpr_HWord( sizeofIRType(gWordTy) )
2374           );
2375      di = unsafeIRDirty_0_N(
2376              3/*regparms*/,
2377              "helperc__new_frame",
2378              VG_(fnptr_to_fnentry)( &helperc__new_frame ),
2379              args );
2380      addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
2381   }
2382}
2383
2384
2385//////////////////////////////////////////////////////////////
2386//                                                          //
2387// end Instrumentation                                      //
2388//                                                          //
2389//////////////////////////////////////////////////////////////
2390
2391
2392//////////////////////////////////////////////////////////////
2393//                                                          //
2394// misc                                                     //
2395//                                                          //
2396//////////////////////////////////////////////////////////////
2397
2398/* Make a new empty stack frame that is suitable for being the
2399   outermost frame in a stack.  It has a creation_sp of effectively
2400   infinity, so it can never be removed. */
2401static StackFrame* new_root_StackFrame ( void )
2402{
2403   StackFrame* sframe = sg_malloc("di.sg_main.nrS.1", sizeof(StackFrame));
2404   VG_(memset)( sframe, 0, sizeof(*sframe) );
2405   sframe->creation_sp = ~0UL;
2406
2407   /* This sets up .htab, .htab_size and .htab_used */
2408   initialise_II_hash_table( sframe );
2409
2410   /* ->depth, ->outer, ->inner are 0, NULL, NULL */
2411
2412   return sframe;
2413}
2414
2415/* Primary routine for setting up the shadow stack for a new thread.
2416   Note that this is used to create not only child thread stacks, but
2417   the root thread's stack too.  We create a new stack with
2418   .creation_sp set to infinity, so that the outermost frame can never
2419   be removed (by shadowStack_unwind).  The core calls this function
2420   as soon as a thread is created.  We cannot yet get its SP value,
2421   since that may not yet be set. */
2422static void shadowStack_thread_create ( ThreadId parent, ThreadId child )
2423{
2424   tl_assert(is_sane_TId(child));
2425   if (parent == VG_INVALID_THREADID) {
2426      /* creating the main thread's stack */
2427   } else {
2428      tl_assert(is_sane_TId(parent));
2429      tl_assert(parent != child);
2430      tl_assert(shadowStacks[parent] != NULL);
2431      tl_assert(siTrees[parent] != NULL);
2432   }
2433
2434   /* Create the child's stack.  Bear in mind we may be re-using
2435      it. */
2436   if (shadowStacks[child] == NULL) {
2437      /* First use of this stack.  Just allocate an initial frame. */
2438      tl_assert(siTrees[child] == NULL);
2439   } else {
2440      StackFrame *frame, *frame2;
2441      /* re-using a stack. */
2442      /* get rid of the interval tree */
2443      tl_assert(siTrees[child] != NULL);
2444      delete_StackTree( siTrees[child] );
2445      siTrees[child] = NULL;
2446      /* Throw away all existing frames. */
2447      frame = shadowStacks[child];
2448      while (frame->outer)
2449         frame = frame->outer;
2450      tl_assert(frame->depth == 0);
2451      while (frame) {
2452         frame2 = frame->inner;
2453         if (frame2) tl_assert(1 + frame->depth == frame2->depth);
2454         sg_free(frame);
2455         frame = frame2;
2456      }
2457      shadowStacks[child] = NULL;
2458   }
2459
2460   tl_assert(shadowStacks[child] == NULL);
2461   tl_assert(siTrees[child] == NULL);
2462
2463   /* Set up the initial stack frame. */
2464   shadowStacks[child] = new_root_StackFrame();
2465
2466   /* and set up the child's stack block interval tree. */
2467   siTrees[child] = new_StackTree();
2468}
2469
2470/* Once a thread is ready to go, the core calls here.  We take the
2471   opportunity to push a second frame on its stack, with the
2472   presumably valid SP value that is going to be used for the thread's
2473   startup.  Hence we should always wind up with a valid outermost
2474   frame for the thread. */
2475static void shadowStack_set_initial_SP ( ThreadId tid )
2476{
2477   StackFrame* sf;
2478   tl_assert(is_sane_TId(tid));
2479   sf = shadowStacks[tid];
2480   tl_assert(sf != NULL);
2481   tl_assert(sf->outer == NULL);
2482   tl_assert(sf->inner == NULL);
2483   tl_assert(sf->creation_sp == ~0UL);
2484   shadowStack_new_frame( tid, 0, VG_(get_SP)(tid),
2485                               0, VG_(get_IP)(tid), NULL );
2486}
2487
2488
2489//////////////////////////////////////////////////////////////
2490//                                                          //
2491// main-ish                                                 //
2492//                                                          //
2493//////////////////////////////////////////////////////////////
2494
2495/* CALLED indirectly FROM GENERATED CODE.  Calls here are created by
2496   sp-change analysis, as requested in pc_pre_clo_int(). */
2497void sg_die_mem_stack ( Addr old_SP, SizeT len ) {
2498   ThreadId  tid = VG_(get_running_tid)();
2499   shadowStack_unwind( tid, old_SP+len );
2500}
2501
2502void sg_pre_clo_init ( void ) {
2503   ourGlobals_init();
2504   init_StackBlocks_set();
2505   init_GlobalBlock_set();
2506}
2507
2508void sg_post_clo_init ( void ) {
2509}
2510
2511void sg_pre_thread_ll_create ( ThreadId parent, ThreadId child ) {
2512   shadowStack_thread_create(parent, child);
2513}
2514
2515void sg_pre_thread_first_insn ( ThreadId tid ) {
2516   shadowStack_set_initial_SP(tid);
2517}
2518
2519void sg_fini(Int exitcode)
2520{
2521   if (VG_(clo_stats)) {
2522      VG_(message)(Vg_DebugMsg,
2523         " sg_:  %'llu total accesses, of which:\n", stats__total_accesses);
2524      VG_(message)(Vg_DebugMsg,
2525         " sg_:     stack0: %'12llu classify\n",
2526         stats__classify_Stack0);
2527      VG_(message)(Vg_DebugMsg,
2528         " sg_:     stackN: %'12llu classify\n",
2529         stats__classify_StackN);
2530      VG_(message)(Vg_DebugMsg,
2531         " sg_:     global: %'12llu classify\n",
2532         stats__classify_Global);
2533      VG_(message)(Vg_DebugMsg,
2534         " sg_:    unknown: %'12llu classify\n",
2535         stats__classify_Unknown);
2536      VG_(message)(Vg_DebugMsg,
2537         " sg_:  %'llu Invars preened, of which %'llu changed\n",
2538         stats__Invars_preened, stats__Invars_changed);
2539      VG_(message)(Vg_DebugMsg,
2540         " sg_:   t_i_b_MT: %'12llu\n", stats__t_i_b_empty);
2541      VG_(message)(Vg_DebugMsg,
2542         " sg_:     qcache: %'llu searches, %'llu probes, %'llu misses\n",
2543         stats__qcache_queries, stats__qcache_probes, stats__qcache_misses);
2544      VG_(message)(Vg_DebugMsg,
2545         " sg_:  htab-fast: %'llu hits\n",
2546         stats__htab_fast);
2547      VG_(message)(Vg_DebugMsg,
2548         " sg_:  htab-slow: %'llu searches, %'llu probes, %'llu resizes\n",
2549         stats__htab_searches, stats__htab_probes, stats__htab_resizes);
2550   }
2551}
2552
2553
2554/*--------------------------------------------------------------------*/
2555/*--- end                                                sg_main.c ---*/
2556/*--------------------------------------------------------------------*/
2557