1
2/*--------------------------------------------------------------------*/
3/*--- Ptrcheck: a pointer-use checker.                             ---*/
4/*--- This file checks stack and global array accesses.            ---*/
5/*---                                                    sg_main.c ---*/
6/*--------------------------------------------------------------------*/
7
8/*
9   This file is part of Ptrcheck, a Valgrind tool for checking pointer
10   use in programs.
11
12   Copyright (C) 2008-2011 OpenWorks Ltd
13      info@open-works.co.uk
14
15   This program is free software; you can redistribute it and/or
16   modify it under the terms of the GNU General Public License as
17   published by the Free Software Foundation; either version 2 of the
18   License, or (at your option) any later version.
19
20   This program is distributed in the hope that it will be useful, but
21   WITHOUT ANY WARRANTY; without even the implied warranty of
22   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
23   General Public License for more details.
24
25   You should have received a copy of the GNU General Public License
26   along with this program; if not, write to the Free Software
27   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
28   02111-1307, USA.
29
30   The GNU General Public License is contained in the file COPYING.
31
32   Neither the names of the U.S. Department of Energy nor the
33   University of California nor the names of its contributors may be
34   used to endorse or promote products derived from this software
35   without prior written permission.
36*/
37
38#include "pub_tool_basics.h"
39#include "pub_tool_libcbase.h"
40#include "pub_tool_libcassert.h"
41#include "pub_tool_libcprint.h"
42#include "pub_tool_tooliface.h"
43#include "pub_tool_wordfm.h"
44#include "pub_tool_xarray.h"
45#include "pub_tool_threadstate.h"
46#include "pub_tool_mallocfree.h"
47#include "pub_tool_machine.h"
48#include "pub_tool_debuginfo.h"
49#include "pub_tool_options.h"
50
51#include "pc_common.h"
52
53#include "sg_main.h"      // self
54
55
56static
57void preen_global_Invars ( Addr a, SizeT len ); /*fwds*/
58
59
60//////////////////////////////////////////////////////////////
61//                                                          //
62// Basic Stuff                                              //
63//                                                          //
64//////////////////////////////////////////////////////////////
65
66static inline Bool is_sane_TId ( ThreadId tid )
67{
68   return tid >= 0 && tid < VG_N_THREADS
69          && tid != VG_INVALID_THREADID;
70}
71
72static void* sg_malloc ( HChar* cc, SizeT n ) {
73   void* p;
74   tl_assert(n > 0);
75   p = VG_(malloc)( cc, n );
76   tl_assert(p);
77   return p;
78}
79
80static void sg_free ( void* p ) {
81   tl_assert(p);
82   VG_(free)(p);
83}
84
85
86/* Compare the intervals [a1,a1+n1) and [a2,a2+n2).  Return -1 if the
87   first interval is lower, 1 if the first interval is higher, and 0
88   if there is any overlap.  Redundant paranoia with casting is there
89   following what looked distinctly like a bug in gcc-4.1.2, in which
90   some of the comparisons were done signedly instead of
91   unsignedly. */
92inline
93static Word cmp_nonempty_intervals ( Addr a1, SizeT n1,
94                                     Addr a2, SizeT n2 ) {
95   UWord a1w = (UWord)a1;
96   UWord n1w = (UWord)n1;
97   UWord a2w = (UWord)a2;
98   UWord n2w = (UWord)n2;
99   tl_assert(n1w > 0 && n2w > 0);
100   if (a1w + n1w <= a2w) return -1L;
101   if (a2w + n2w <= a1w) return 1L;
102   return 0;
103}
104
105/* Return true iff [aSmall,aSmall+nSmall) is entirely contained
106   within [aBig,aBig+nBig). */
107inline
108static Bool is_subinterval_of ( Addr aBig, SizeT nBig,
109                                Addr aSmall, SizeT nSmall ) {
110   tl_assert(nBig > 0 && nSmall > 0);
111   return aBig <= aSmall && aSmall + nSmall <= aBig + nBig;
112}
113
114inline
115static Addr Addr__min ( Addr a1, Addr a2 ) {
116   return a1 < a2 ? a1 : a2;
117}
118
119inline
120static Addr Addr__max ( Addr a1, Addr a2 ) {
121   return a1 < a2 ? a2 : a1;
122}
123
124
125//////////////////////////////////////////////////////////////
126//                                                          //
127// StackBlocks Persistent Cache                             //
128//                                                          //
129//////////////////////////////////////////////////////////////
130
131/* We maintain a set of XArray* of StackBlocks.  These are never
132   freed.  When a new StackBlock vector is acquired from
133   VG_(di_get_local_blocks_at_ip), we compare it to the existing set.
134   If not present, it is added.  If present, the just-acquired one is
135   freed and the copy used.
136
137   This simplifies storage management elsewhere.  It allows us to
138   assume that a pointer to an XArray* of StackBlock is valid forever.
139   It also means there are no duplicates anywhere, which could be
140   important from a space point of view for programs that generate a
141   lot of translations, or where translations are frequently discarded
142   and re-made.
143
144   Note that we normalise the arrays by sorting the elements according
145   to an arbitrary total order, so as to avoid the situation that two
146   vectors describe the same set of variables but are not structurally
147   identical. */
148
149static inline Bool StackBlock__sane ( StackBlock* fb )
150{
151   if (fb->name[ sizeof(fb->name)-1 ] != 0)
152      return False;
153   if (fb->spRel != False && fb->spRel != True)
154      return False;
155   if (fb->isVec != False && fb->isVec != True)
156      return False;
157   return True;
158}
159
160/* Generate an arbitrary total ordering on StackBlocks. */
161static Word StackBlock__cmp ( StackBlock* fb1, StackBlock* fb2 )
162{
163   Word r;
164   tl_assert(StackBlock__sane(fb1));
165   tl_assert(StackBlock__sane(fb2));
166   /* Hopefully the .base test hits most of the time.  For the blocks
167      associated with any particular instruction, if the .base values
168      are the same then probably it doesn't make sense for the other
169      fields to be different.  But this is supposed to be a completely
170      general structural total order, so we have to compare everything
171      anyway. */
172   if (fb1->base < fb2->base) return -1;
173   if (fb1->base > fb2->base) return 1;
174   /* compare sizes */
175   if (fb1->szB < fb2->szB) return -1;
176   if (fb1->szB > fb2->szB) return 1;
177   /* compare sp/fp flag */
178   if (fb1->spRel < fb2->spRel) return -1;
179   if (fb1->spRel > fb2->spRel) return 1;
180   /* compare is/is-not array-typed flag */
181   if (fb1->isVec < fb2->isVec) return -1;
182   if (fb1->isVec > fb2->isVec) return 1;
183   /* compare the name */
184   r = (Word)VG_(strcmp)(fb1->name, fb2->name);
185   return r;
186}
187
188/* Returns True if all fields except .szB are the same.  szBs may or
189   may not be the same; they are simply not consulted. */
190static Bool StackBlock__all_fields_except_szB_are_equal (
191               StackBlock* fb1,
192               StackBlock* fb2
193            )
194{
195   tl_assert(StackBlock__sane(fb1));
196   tl_assert(StackBlock__sane(fb2));
197   return fb1->base == fb2->base
198          && fb1->spRel == fb2->spRel
199          && fb1->isVec == fb2->isVec
200          && 0 == VG_(strcmp)(fb1->name, fb2->name);
201}
202
203
204/* Generate an arbitrary total ordering on vectors of StackBlocks. */
205static Word StackBlocks__cmp ( XArray* fb1s, XArray* fb2s )
206{
207   Word i, r, n1, n2;
208   n1 = VG_(sizeXA)( fb1s );
209   n2 = VG_(sizeXA)( fb2s );
210   if (n1 < n2) return -1;
211   if (n1 > n2) return 1;
212   for (i = 0; i < n1; i++) {
213      StackBlock *fb1, *fb2;
214      fb1 = VG_(indexXA)( fb1s, i );
215      fb2 = VG_(indexXA)( fb2s, i );
216      r = StackBlock__cmp( fb1, fb2 );
217      if (r != 0) return r;
218   }
219   tl_assert(i == n1 && i == n2);
220   return 0;
221}
222
223static void pp_StackBlocks ( XArray* sbs )
224{
225   Word i, n = VG_(sizeXA)( sbs );
226   VG_(message)(Vg_DebugMsg, "<<< STACKBLOCKS\n" );
227   for (i = 0; i < n; i++) {
228      StackBlock* sb = (StackBlock*)VG_(indexXA)( sbs, i );
229      VG_(message)(Vg_DebugMsg,
230         "   StackBlock{ off %ld szB %lu spRel:%c isVec:%c \"%s\" }\n",
231         sb->base, sb->szB, sb->spRel ? 'Y' : 'N',
232         sb->isVec ? 'Y' : 'N', &sb->name[0]
233      );
234   }
235   VG_(message)(Vg_DebugMsg, ">>> STACKBLOCKS\n" );
236}
237
238
239/* ---------- The StackBlock vector cache ---------- */
240
241static WordFM* /* XArray* of StackBlock -> nothing */
242       frameBlocks_set = NULL;
243
244static void init_StackBlocks_set ( void )
245{
246   tl_assert(!frameBlocks_set);
247   frameBlocks_set
248      = VG_(newFM)( sg_malloc, "di.sg_main.iSBs.1", sg_free,
249                    (Word(*)(UWord,UWord))StackBlocks__cmp );
250   tl_assert(frameBlocks_set);
251}
252
253/* Find the given StackBlock-vector in our collection thereof.  If
254   found, deallocate the supplied one, and return the address of the
255   copy.  If not found, add the supplied one to our collection and
256   return its address. */
257static XArray* /* of StackBlock */
258       StackBlocks__find_and_dealloc__or_add
259          ( XArray* /* of StackBlock */ orig )
260{
261   UWord key, val;
262
263   /* First, normalise, as per comments above. */
264   VG_(setCmpFnXA)( orig, (Int(*)(void*,void*))StackBlock__cmp );
265   VG_(sortXA)( orig );
266
267   /* Now get rid of any exact duplicates. */
268  nuke_dups:
269   { Word r, w, nEQ, n = VG_(sizeXA)( orig );
270     if (n >= 2) {
271        w = 0;
272        nEQ = 0;
273        for (r = 0; r < n; r++) {
274           if (r+1 < n) {
275              StackBlock* pR0 = VG_(indexXA)( orig, r+0 );
276              StackBlock* pR1 = VG_(indexXA)( orig, r+1 );
277              Word c = StackBlock__cmp(pR0,pR1);
278              tl_assert(c == -1 || c == 0);
279              if (c == 0) { nEQ++; continue; }
280           }
281           if (w != r) {
282              StackBlock* pW = VG_(indexXA)( orig, w );
283              StackBlock* pR = VG_(indexXA)( orig, r );
284              *pW = *pR;
285           }
286           w++;
287        }
288        tl_assert(r == n);
289        tl_assert(w + nEQ == n);
290        if (w < n) {
291           VG_(dropTailXA)( orig, n-w );
292        }
293        if (0) VG_(printf)("delta %ld\n", n-w);
294     }
295   }
296
297   /* Deal with the following strangeness, where two otherwise
298      identical blocks are claimed to have different sizes.  In which
299      case we use the larger size. */
300   /* StackBlock{ off 16 szB 66 spRel:Y isVec:Y "sz" }
301      StackBlock{ off 16 szB 130 spRel:Y isVec:Y "sz" }
302      StackBlock{ off 208 szB 16 spRel:Y isVec:Y "ar" }
303   */
304   { Word i, n = VG_(sizeXA)( orig );
305     if (n >= 2) {
306        for (i = 0; i < n-1; i++) {
307           StackBlock* sb0 = VG_(indexXA)( orig, i+0 );
308           StackBlock* sb1 = VG_(indexXA)( orig, i+1 );
309           if (StackBlock__all_fields_except_szB_are_equal(sb0, sb1)) {
310              /* They can't be identical because the previous tidying
311                 pass would have removed the duplicates.  And they
312                 can't be > because the earlier sorting pass would
313                 have ordered otherwise-identical descriptors
314                 according to < on .szB fields.  Hence: */
315              tl_assert(sb0->szB < sb1->szB);
316              sb0->szB = sb1->szB;
317              /* This makes the blocks identical, at the size of the
318                 larger one.  Rather than go to all the hassle of
319                 sliding the rest down, simply go back to the
320                 remove-duplicates stage.  The assertion guarantees
321                 that we eventually make progress, since the rm-dups
322                 stage will get rid of one of the blocks.  This is
323                 expected to happen only exceedingly rarely. */
324              tl_assert(StackBlock__cmp(sb0,sb1) == 0);
325              goto nuke_dups;
326           }
327        }
328     }
329   }
330
331   /* If there are any blocks which overlap and have the same
332      fpRel-ness, junk the whole descriptor; it's obviously bogus.
333      Icc11 certainly generates bogus info from time to time.
334
335      This check is pretty weak; really we ought to have a stronger
336      sanity check. */
337   { Word i, n = VG_(sizeXA)( orig );
338     static Int moans = 3;
339     for (i = 0; i < n-1; i++) {
340       StackBlock* sb1 = (StackBlock*)VG_(indexXA)( orig, i );
341       StackBlock* sb2 = (StackBlock*)VG_(indexXA)( orig, i+1 );
342       if (sb1->spRel == sb2->spRel
343           && (sb1->base >= sb2->base
344               || sb1->base + sb1->szB > sb2->base)) {
345          if (moans > 0 && !VG_(clo_xml)) {
346             moans--;
347             VG_(message)(Vg_UserMsg, "Warning: bogus DWARF3 info: "
348                                      "overlapping stack blocks\n");
349             if (VG_(clo_verbosity) >= 2)
350                pp_StackBlocks(orig);
351             if (moans == 0)
352                VG_(message)(Vg_UserMsg, "Further instances of this "
353                                         "message will not be shown\n" );
354          }
355          VG_(dropTailXA)( orig, VG_(sizeXA)( orig ));
356          break;
357       }
358     }
359   }
360
361   /* Now, do we have it already? */
362   if (VG_(lookupFM)( frameBlocks_set, &key, &val, (UWord)orig )) {
363      /* yes */
364      XArray* res;
365      tl_assert(val == 0);
366      tl_assert(key != (UWord)orig);
367      VG_(deleteXA)(orig);
368      res = (XArray*)key;
369      return res;
370   } else {
371      /* no */
372      VG_(addToFM)( frameBlocks_set, (UWord)orig, 0 );
373      return orig;
374   }
375}
376
377/* Top level function for getting the StackBlock vector for a given
378   instruction.  It is guaranteed that the returned pointer will be
379   valid for the entire rest of the run, and also that the addresses
380   of the individual elements of the array will not change. */
381
382static XArray* /* of StackBlock */ get_StackBlocks_for_IP ( Addr ip )
383{
384   XArray* blocks = VG_(di_get_stack_blocks_at_ip)( ip, True/*arrays only*/ );
385   tl_assert(blocks);
386   return StackBlocks__find_and_dealloc__or_add( blocks );
387}
388
389
390//////////////////////////////////////////////////////////////
391//                                                          //
392// GlobalBlocks Persistent Cache                            //
393//                                                          //
394//////////////////////////////////////////////////////////////
395
396/* Generate an arbitrary total ordering on GlobalBlocks. */
397static Word GlobalBlock__cmp ( GlobalBlock* gb1, GlobalBlock* gb2 )
398{
399   Word r;
400   /* compare addrs */
401   if (gb1->addr < gb2->addr) return -1;
402   if (gb1->addr > gb2->addr) return 1;
403   /* compare sizes */
404   if (gb1->szB < gb2->szB) return -1;
405   if (gb1->szB > gb2->szB) return 1;
406   /* compare is/is-not array-typed flag */
407   if (gb1->isVec < gb2->isVec) return -1;
408   if (gb1->isVec > gb2->isVec) return 1;
409   /* compare the name */
410   r = (Word)VG_(strcmp)(gb1->name, gb2->name);
411   if (r != 0) return r;
412   /* compare the soname */
413   r = (Word)VG_(strcmp)(gb1->soname, gb2->soname);
414   return r;
415}
416
417static WordFM* /* GlobalBlock* -> nothing */
418       globalBlock_set = NULL;
419
420static void init_GlobalBlock_set ( void )
421{
422   tl_assert(!globalBlock_set);
423    globalBlock_set
424       = VG_(newFM)( sg_malloc, "di.sg_main.iGBs.1", sg_free,
425                     (Word(*)(UWord,UWord))GlobalBlock__cmp );
426   tl_assert(globalBlock_set);
427}
428
429
430/* Top level function for making GlobalBlocks persistent.  Call here
431   with a non-persistent version, and the returned one is guaranteed
432   to be valid for the entire rest of the run.  The supplied one is
433   copied, not stored, so can be freed after the call. */
434
435static GlobalBlock* get_persistent_GlobalBlock ( GlobalBlock* orig )
436{
437   UWord key, val;
438   /* Now, do we have it already? */
439   if (VG_(lookupFM)( globalBlock_set, &key, &val, (UWord)orig )) {
440      /* yes, return the copy */
441      GlobalBlock* res;
442      tl_assert(val == 0);
443      res = (GlobalBlock*)key;
444      tl_assert(res != orig);
445      return res;
446   } else {
447      /* no.  clone it, store the clone and return the clone's
448         address. */
449      GlobalBlock* clone = sg_malloc( "di.sg_main.gpGB.1",
450                                      sizeof(GlobalBlock) );
451      tl_assert(clone);
452      *clone = *orig;
453      VG_(addToFM)( globalBlock_set, (UWord)clone, 0 );
454      return clone;
455   }
456}
457
458
459//////////////////////////////////////////////////////////////
460//                                                          //
461// Interval tree of StackTreeBlock                          //
462//                                                          //
463//////////////////////////////////////////////////////////////
464
465/* A node in a stack interval tree.  Zero length intervals (.szB == 0)
466   are not allowed.
467
468   A stack interval tree is a (WordFM StackTreeNode* void).  There is
469   one stack interval tree for each thread.
470*/
471typedef
472   struct {
473      Addr        addr;
474      SizeT       szB;   /* copied from .descr->szB */
475      StackBlock* descr; /* it's an instance of this block */
476      UWord       depth; /* depth of stack at time block was pushed */
477   }
478   StackTreeNode;
479
480static void pp_StackTree ( WordFM* sitree, HChar* who )
481{
482   UWord keyW, valW;
483   VG_(printf)("<<< BEGIN pp_StackTree %s\n", who );
484   VG_(initIterFM)( sitree );
485   while (VG_(nextIterFM)( sitree, &keyW, &valW )) {
486      StackTreeNode* nd = (StackTreeNode*)keyW;
487      VG_(printf)("  [%#lx,+%lu) descr=%p %s %lu\n", nd->addr, nd->szB,
488                  nd->descr, nd->descr->name, nd->descr->szB);
489   }
490   VG_(printf)(">>> END   pp_StackTree %s\n", who );
491}
492
493/* Interval comparison function for StackTreeNode */
494static Word cmp_intervals_StackTreeNode ( StackTreeNode* sn1,
495                                          StackTreeNode* sn2 )
496{
497   return cmp_nonempty_intervals(sn1->addr, sn1->szB,
498                                 sn2->addr, sn2->szB);
499}
500
501/* Find the node holding 'a', if any. */
502static StackTreeNode* find_StackTreeNode ( WordFM* sitree, Addr a )
503{
504   UWord keyW, valW;
505   StackTreeNode key;
506   tl_assert(sitree);
507   key.addr = a;
508   key.szB  = 1;
509   if (VG_(lookupFM)( sitree, &keyW, &valW, (UWord)&key )) {
510      StackTreeNode* res = (StackTreeNode*)keyW;
511      tl_assert(valW == 0);
512      tl_assert(res != &key);
513      return res;
514   } else {
515      return NULL;
516   }
517}
518
519/* Note that the supplied XArray of FrameBlock must have been
520   made persistent already. */
521__attribute__((noinline))
522static void add_blocks_to_StackTree (
523               /*MOD*/WordFM* sitree,
524               XArray* /* FrameBlock */ descrs,
525               XArray* /* Addr */ bases,
526               UWord depth
527            )
528{
529   Bool debug = (Bool)0;
530   Word i, nDescrs, nBases;
531
532   nDescrs = VG_(sizeXA)( descrs ),
533   nBases = VG_(sizeXA)( bases );
534   tl_assert(nDescrs == nBases);
535
536   if (nDescrs == 0) return;
537
538   tl_assert(sitree);
539   if (debug) {
540      VG_(printf)("\ndepth = %lu\n", depth);
541      pp_StackTree( sitree, "add_blocks_to_StackTree-pre" );
542      pp_StackBlocks(descrs);
543   }
544
545   for (i = 0; i < nDescrs; i++) {
546      Bool already_present;
547      StackTreeNode* nyu;
548      Addr        addr  = *(Addr*)VG_(indexXA)( bases, i );
549      StackBlock* descr = (StackBlock*)VG_(indexXA)( descrs, i );
550      tl_assert(descr->szB > 0);
551      nyu = sg_malloc( "di.sg_main.abtST.1", sizeof(StackTreeNode) );
552      nyu->addr  = addr;
553      nyu->szB   = descr->szB;
554      nyu->descr = descr;
555      nyu->depth = depth;
556      if (debug) VG_(printf)("ADD %#lx %lu\n", addr, descr->szB);
557      already_present = VG_(addToFM)( sitree, (UWord)nyu, 0 );
558      /* The interval can't already be there; else we have
559         overlapping stack blocks. */
560      tl_assert(!already_present);
561      if (debug) {
562         pp_StackTree( sitree, "add_blocks_to_StackTree-step" );
563      }
564   }
565   if (debug) {
566      pp_StackTree( sitree, "add_blocks_to_StackTree-post" );
567      VG_(printf)("\n");
568   }
569}
570
571static void del_blocks_from_StackTree ( /*MOD*/WordFM* sitree,
572                                        XArray* /* Addr */ bases )
573{
574   UWord oldK, oldV;
575   Word i, nBases = VG_(sizeXA)( bases );
576   for (i = 0; i < nBases; i++) {
577      Bool b;
578      Addr addr = *(Addr*)VG_(indexXA)( bases, i );
579      StackTreeNode* nd = find_StackTreeNode(sitree, addr);
580      /* The interval must be there; we added it earlier when
581         the associated frame was created. */
582      tl_assert(nd);
583      b = VG_(delFromFM)( sitree, &oldK, &oldV, (UWord)nd );
584      /* we just found the block! */
585      tl_assert(b);
586      tl_assert(oldV == 0);
587      tl_assert(nd == (StackTreeNode*)oldK);
588      sg_free(nd);
589   }
590}
591
592
593static void delete_StackTree__kFin ( UWord keyW ) {
594   StackTreeNode* nd = (StackTreeNode*)keyW;
595   tl_assert(nd);
596   sg_free(nd);
597}
598static void delete_StackTree__vFin ( UWord valW ) {
599   tl_assert(valW == 0);
600}
601static void delete_StackTree ( WordFM* sitree )
602{
603   VG_(deleteFM)( sitree,
604                 delete_StackTree__kFin, delete_StackTree__vFin );
605}
606
607static WordFM* new_StackTree ( void ) {
608   return VG_(newFM)( sg_malloc, "di.sg_main.nST.1", sg_free,
609                      (Word(*)(UWord,UWord))cmp_intervals_StackTreeNode );
610}
611
612
613//////////////////////////////////////////////////////////////
614//                                                          //
615// Interval tree of GlobalTreeBlock                         //
616//                                                          //
617//////////////////////////////////////////////////////////////
618
619/* A node in a global interval tree.  Zero length intervals
620   (.szB == 0) are not allowed.
621
622   A global interval tree is a (WordFM GlobalTreeNode* void).  There
623   is one global interval tree for the entire process.
624*/
625typedef
626   struct {
627      Addr         addr; /* copied from .descr->addr */
628      SizeT        szB; /* copied from .descr->szB */
629      GlobalBlock* descr; /* it's this block */
630   }
631   GlobalTreeNode;
632
633static void GlobalTreeNode__pp ( GlobalTreeNode* nd ) {
634   tl_assert(nd->descr);
635   VG_(printf)("GTNode [%#lx,+%ld) %s",
636               nd->addr, nd->szB, nd->descr->name);
637}
638
639static void GlobalTree__pp ( WordFM* /* of (GlobalTreeNode,void) */ gitree,
640                             HChar* who )
641{
642   UWord keyW, valW;
643   GlobalTreeNode* nd;
644   VG_(printf)("<<< GlobalBlockTree (%s)\n", who);
645   VG_(initIterFM)( gitree );
646   while (VG_(nextIterFM)( gitree, &keyW, &valW )) {
647      tl_assert(valW == 0);
648      nd = (GlobalTreeNode*)keyW;
649      VG_(printf)("  ");
650      GlobalTreeNode__pp(nd);
651      VG_(printf)("\n");
652   }
653   VG_(doneIterFM)( gitree );
654   VG_(printf)(">>>\n");
655}
656
657/* Interval comparison function for GlobalTreeNode */
658static Word cmp_intervals_GlobalTreeNode ( GlobalTreeNode* gn1,
659                                           GlobalTreeNode* gn2 )
660{
661   return cmp_nonempty_intervals( gn1->addr, gn1->szB,
662                                  gn2->addr, gn2->szB );
663}
664
665/* Find the node holding 'a', if any. */
666static GlobalTreeNode* find_GlobalTreeNode ( WordFM* gitree, Addr a )
667{
668   UWord keyW, valW;
669   GlobalTreeNode key;
670   key.addr = a;
671   key.szB  = 1;
672   if (VG_(lookupFM)( gitree, &keyW, &valW, (UWord)&key )) {
673      GlobalTreeNode* res = (GlobalTreeNode*)keyW;
674      tl_assert(valW == 0);
675      tl_assert(res != &key);
676      return res;
677   } else {
678      return NULL;
679   }
680}
681
682/* Note that the supplied GlobalBlock must have been made persistent
683   already. */
684static void add_block_to_GlobalTree (
685               /*MOD*/WordFM* gitree,
686               GlobalBlock* descr
687            )
688{
689   Bool already_present;
690   GlobalTreeNode *nyu, *nd;
691   UWord keyW, valW;
692   static Int moans = 3;
693
694   tl_assert(descr->szB > 0);
695   nyu = sg_malloc( "di.sg_main.abtG.1", sizeof(GlobalTreeNode) );
696   nyu->addr  = descr->addr;
697   nyu->szB   = descr->szB;
698   nyu->descr = descr;
699
700   /* Basically it's an error to add a global block to the tree that
701      is already in the tree.  However, detect and ignore attempts to
702      insert exact duplicates; they do appear for some reason
703      (possible a bug in m_debuginfo?) */
704   already_present = VG_(lookupFM)( gitree, &keyW, &valW, (UWord)nyu );
705   if (already_present) {
706      tl_assert(valW == 0);
707      nd = (GlobalTreeNode*)keyW;
708      tl_assert(nd);
709      tl_assert(nd != nyu);
710      tl_assert(nd->descr);
711      tl_assert(nyu->descr);
712      if (nd->addr == nyu->addr && nd->szB == nyu->szB
713          /* && 0 == VG_(strcmp)(nd->descr->name, nyu->descr->name) */
714          /* Although it seems reasonable to demand that duplicate
715             blocks have identical names, that is too strict.  For
716             example, reading debuginfo from glibc produces two
717             otherwise identical blocks with names "tzname" and
718             "__tzname".  A constraint of the form "must be identical,
719             or one must be a substring of the other" would fix that.
720             However, such trickery is scuppered by the fact that we
721             truncate all variable names to 15 characters to make
722             storage management simpler, hence giving pairs like
723             "__EI___pthread_" (truncated) vs "__pthread_keys".  So
724             it's simplest just to skip the name comparison
725             completely. */
726          && 0 == VG_(strcmp)(nd->descr->soname, nyu->descr->soname)) {
727         /* exact duplicate; ignore it */
728         sg_free(nyu);
729         return;
730      }
731      /* else fall through; the assertion below will catch it */
732   }
733
734   already_present = VG_(addToFM)( gitree, (UWord)nyu, 0 );
735   /* The interval can't already be there; else we have
736      overlapping global blocks. */
737   /* Unfortunately (25 Jan 09) at least icc11 has been seen to
738      generate overlapping block descriptions in the Dwarf3; clearly
739      bogus. */
740   if (already_present && moans > 0 && !VG_(clo_xml)) {
741      moans--;
742      VG_(message)(Vg_UserMsg, "Warning: bogus DWARF3 info: "
743                               "overlapping global blocks\n");
744      if (VG_(clo_verbosity) >= 2) {
745         GlobalTree__pp( gitree,
746                         "add_block_to_GlobalTree: non-exact duplicate" );
747         VG_(printf)("Overlapping block: ");
748         GlobalTreeNode__pp(nyu);
749         VG_(printf)("\n");
750      }
751      if (moans == 0)
752         VG_(message)(Vg_UserMsg, "Further instances of this "
753                                  "message will not be shown\n" );
754   }
755   /* tl_assert(!already_present); */
756}
757
758static Bool del_GlobalTree_range ( /*MOD*/WordFM* gitree,
759                                   Addr a, SizeT szB )
760{
761   /* One easy way to do this: look up [a,a+szB) in the tree.  That
762      will either succeed, producing a block which intersects that
763      range, in which case we delete it and repeat; or it will fail,
764      in which case there are no blocks intersecting the range, and we
765      can bring the process to a halt. */
766   UWord keyW, valW, oldK, oldV;
767   GlobalTreeNode key, *nd;
768   Bool b, anyFound;
769
770   tl_assert(szB > 0);
771
772   anyFound = False;
773
774   key.addr = a;
775   key.szB  = szB;
776
777   while (VG_(lookupFM)( gitree, &keyW, &valW, (UWord)&key )) {
778      anyFound = True;
779      nd = (GlobalTreeNode*)keyW;
780      tl_assert(valW == 0);
781      tl_assert(nd != &key);
782      tl_assert(cmp_nonempty_intervals(a, szB, nd->addr, nd->szB) == 0);
783
784      b = VG_(delFromFM)( gitree, &oldK, &oldV, (UWord)&key );
785      tl_assert(b);
786      tl_assert(oldV == 0);
787      tl_assert(oldK == keyW); /* check we deleted the node we just found */
788   }
789
790   return anyFound;
791}
792
793
794//////////////////////////////////////////////////////////////
795//                                                          //
796// Invar                                                    //
797//                                                          //
798//////////////////////////////////////////////////////////////
799
800/* An invariant, as resulting from watching the destination of a
801   memory referencing instruction.  Initially is Inv_Unset until the
802   instruction makes a first access. */
803
804typedef
805   enum {
806      Inv_Unset=1,  /* not established yet */
807      Inv_Unknown,  /* unknown location */
808      Inv_Stack0,   /* array-typed stack block in innermost frame */
809      Inv_StackN,   /* array-typed stack block in non-innermost frame */
810      Inv_Global,   /* array-typed global block */
811   }
812   InvarTag;
813
814typedef
815   struct {
816      InvarTag tag;
817      union {
818         struct {
819         } Unset;
820         struct {
821         } Unknown;
822         struct {
823            Addr  addr;
824            SizeT szB;
825            StackBlock* descr;
826         } Stack0; /* innermost stack frame */
827         struct {
828            /* Pointer to a node in the interval tree for
829              this thread. */
830            StackTreeNode* nd;
831         } StackN; /* non-innermost stack frame */
832         struct {
833           /* Pointer to a GlobalBlock in the interval tree of
834              global blocks. */
835           GlobalTreeNode* nd;
836         } Global;
837      }
838      Inv;
839   }
840   Invar;
841
842/* Partial debugging printing for an Invar. */
843static void pp_Invar ( Invar* i )
844{
845   switch (i->tag) {
846      case Inv_Unset:
847         VG_(printf)("Unset");
848         break;
849      case Inv_Unknown:
850         VG_(printf)("Unknown");
851         break;
852      case Inv_Stack0:
853         VG_(printf)("Stack0 [%#lx,+%lu)",
854                     i->Inv.Stack0.addr, i->Inv.Stack0.szB);
855         break;
856      case Inv_StackN:
857         VG_(printf)("StackN [%#lx,+%lu)",
858                     i->Inv.StackN.nd->addr, i->Inv.StackN.nd->szB);
859         break;
860      case Inv_Global:
861         VG_(printf)("Global [%#lx,+%lu)",
862                     i->Inv.Global.nd->addr, i->Inv.Global.nd->szB);
863         break;
864      default:
865         tl_assert(0);
866   }
867}
868
869/* Compare two Invars for equality. */
870static Bool eq_Invar ( Invar* i1, Invar* i2 )
871{
872   tl_assert(i1->tag != Inv_Unset);
873   tl_assert(i2->tag != Inv_Unset);
874   if (i1->tag != i2->tag)
875      return False;
876   switch (i1->tag) {
877      case Inv_Unknown:
878         return True;
879      case Inv_Stack0:
880         return i1->Inv.Stack0.addr == i2->Inv.Stack0.addr
881                && i1->Inv.Stack0.szB == i2->Inv.Stack0.szB;
882      case Inv_StackN:
883         return i1->Inv.StackN.nd == i2->Inv.StackN.nd;
884      case Inv_Global:
885         return i1->Inv.Global.nd == i2->Inv.Global.nd;
886      default:
887         tl_assert(0);
888   }
889   /*NOTREACHED*/
890   tl_assert(0);
891}
892
893/* Generate a piece of text showing 'ea' is relative to 'invar', if
894   known.  If unknown, generate an empty string.  'buf' must be at
895   least 32 bytes in size.  Also return the absolute value of the
896   delta, if known, or zero if not known.
897*/
898static void gen_delta_str ( /*OUT*/HChar* buf,
899                            /*OUT*/UWord* absDelta,
900                            Invar* inv, Addr ea )
901{
902   Addr  block = 0;
903   SizeT szB   = 0;
904
905   buf[0] = 0;
906   *absDelta = 0;
907
908   switch (inv->tag) {
909      case Inv_Unknown:
910      case Inv_Unset:
911         return; /* unknown */
912      case Inv_Stack0:
913         block = inv->Inv.Stack0.addr;
914         szB   = inv->Inv.Stack0.szB;
915         break;
916      case Inv_StackN:
917         block = inv->Inv.StackN.nd->addr;
918         szB   = inv->Inv.StackN.nd->szB;
919         break;
920      case Inv_Global:
921         block = inv->Inv.Global.nd->addr;
922         szB = inv->Inv.Global.nd->szB;
923         break;
924      default:
925         tl_assert(0);
926   }
927   tl_assert(szB > 0);
928   if (ea < block) {
929      *absDelta = block - ea;
930      VG_(sprintf)(buf, "%'lu before", *absDelta);
931   }
932   else if (ea >= block + szB) {
933      *absDelta = ea - (block + szB);
934      VG_(sprintf)(buf, "%'lu after", *absDelta);
935   }
936   else {
937     // Leave *absDelta at zero.
938     VG_(sprintf)(buf, "%'lu inside", ea - block);
939   }
940}
941
942
943/* Print selected parts of an Invar, suitable for use in error
944   messages. */
945static void show_Invar( HChar* buf, Word nBuf, Invar* inv, Word depth )
946{
947   HChar* str;
948   tl_assert(nBuf >= 128);
949   buf[0] = 0;
950   switch (inv->tag) {
951      case Inv_Unknown:
952         VG_(sprintf)(buf, "%s", "unknown");
953         break;
954      case Inv_Stack0:
955         str = "array";
956         VG_(sprintf)(buf, "stack %s \"%s\" of size %'lu in this frame",
957                      str, inv->Inv.Stack0.descr->name,
958                      inv->Inv.Stack0.szB );
959         break;
960      case Inv_StackN:
961         str = "array";
962         VG_(sprintf)(buf, "stack %s \"%s\" of size %'lu in frame %lu back from here",
963                      str, inv->Inv.StackN.nd->descr->name,
964                           inv->Inv.StackN.nd->descr->szB,
965                           depth - inv->Inv.StackN.nd->depth );
966         break;
967      case Inv_Global:
968         str = "array";
969         VG_(sprintf)(buf, "global %s \"%s\" of size %'lu in object with soname \"%s\"",
970                      str, inv->Inv.Global.nd->descr->name,
971                           inv->Inv.Global.nd->descr->szB,
972                           inv->Inv.Global.nd->descr->soname );
973         break;
974      case Inv_Unset:
975         VG_(sprintf)(buf, "%s", "Unset!");
976         break;
977      default:
978         tl_assert(0);
979   }
980}
981
982
983//////////////////////////////////////////////////////////////
984//                                                          //
985// our globals                                              //
986//                                                          //
987//////////////////////////////////////////////////////////////
988
989//////////////////////////////////////////////////////////////
990///
991
992#define N_QCACHE 16
993
994/* Powers of two only, else the result will be chaos */
995#define QCACHE_ADVANCE_EVERY 16
996
997/* Per-thread query cache.  Note that the invar can only be Inv_StackN
998   (but not Inv_Stack0), Inv_Global or Inv_Unknown. */
999typedef
1000   struct {
1001      Addr  addr;
1002      SizeT szB;
1003      Invar inv;
1004   }
1005   QCElem;
1006
1007typedef
1008   struct {
1009      Word   nInUse;
1010      QCElem elems[N_QCACHE];
1011   }
1012   QCache;
1013
1014static void QCache__invalidate ( QCache* qc ) {
1015   tl_assert(qc->nInUse >= 0);
1016   qc->nInUse = 0;
1017}
1018
1019static void QCache__pp ( QCache* qc, HChar* who )
1020{
1021   Word i;
1022   VG_(printf)("<<< QCache with %ld elements (%s)\n", qc->nInUse, who);
1023   for (i = 0; i < qc->nInUse; i++) {
1024      VG_(printf)("  [%#lx,+%#lx) ", qc->elems[i].addr, qc->elems[i].szB);
1025      pp_Invar(&qc->elems[i].inv);
1026      VG_(printf)("\n");
1027   }
1028   VG_(printf)(">>>\n");
1029}
1030
1031static ULong stats__qcache_queries = 0;
1032static ULong stats__qcache_misses  = 0;
1033static ULong stats__qcache_probes  = 0;
1034
1035///
1036//////////////////////////////////////////////////////////////
1037
1038/* Each thread has:
1039   * a shadow stack of StackFrames, which is a double-linked list
1040   * an stack block interval tree
1041*/
1042static  struct _StackFrame*          shadowStacks[VG_N_THREADS];
1043
1044static  WordFM* /* StackTreeNode */  siTrees[VG_N_THREADS];
1045
1046static  QCache                       qcaches[VG_N_THREADS];
1047
1048
1049/* Additionally, there is one global variable interval tree
1050   for the entire process.
1051*/
1052static WordFM* /* GlobalTreeNode */ giTree;
1053
1054
1055static void invalidate_all_QCaches ( void )
1056{
1057   Word i;
1058   for (i = 0; i < VG_N_THREADS; i++) {
1059      QCache__invalidate( &qcaches[i] );
1060   }
1061}
1062
1063static void ourGlobals_init ( void )
1064{
1065   Word i;
1066   for (i = 0; i < VG_N_THREADS; i++) {
1067      shadowStacks[i] = NULL;
1068      siTrees[i] = NULL;
1069   }
1070   invalidate_all_QCaches();
1071   giTree = VG_(newFM)( sg_malloc, "di.sg_main.oGi.1", sg_free,
1072                        (Word(*)(UWord,UWord))cmp_intervals_GlobalTreeNode );
1073}
1074
1075
1076//////////////////////////////////////////////////////////////
1077//                                                          //
1078// Handle global variable load/unload events                //
1079//                                                          //
1080//////////////////////////////////////////////////////////////
1081
1082static void acquire_globals ( ULong di_handle )
1083{
1084   Word n, i;
1085   XArray* /* of GlobalBlock */ gbs;
1086   if (0) VG_(printf)("ACQUIRE GLOBALS %llu\n", di_handle );
1087   gbs = VG_(di_get_global_blocks_from_dihandle)
1088            (di_handle, True/*arrays only*/);
1089   if (0) VG_(printf)("   GOT %ld globals\n", VG_(sizeXA)( gbs ));
1090
1091   n = VG_(sizeXA)( gbs );
1092   for (i = 0; i < n; i++) {
1093      GlobalBlock* gbp;
1094      GlobalBlock* gb = VG_(indexXA)( gbs, i );
1095      if (0) VG_(printf)("   new Global size %2lu at %#lx:  %s %s\n",
1096                         gb->szB, gb->addr, gb->soname, gb->name );
1097      tl_assert(gb->szB > 0);
1098      /* Make a persistent copy of each GlobalBlock, and add it
1099         to the tree. */
1100      gbp = get_persistent_GlobalBlock( gb );
1101      add_block_to_GlobalTree( giTree, gbp );
1102   }
1103
1104   VG_(deleteXA)( gbs );
1105}
1106
1107
1108/* We only intercept these two because we need to see any di_handles
1109   that might arise from the mappings/allocations. */
1110void sg_new_mem_mmap( Addr a, SizeT len,
1111                      Bool rr, Bool ww, Bool xx, ULong di_handle )
1112{
1113   if (di_handle > 0)
1114      acquire_globals(di_handle);
1115}
1116void sg_new_mem_startup( Addr a, SizeT len,
1117                         Bool rr, Bool ww, Bool xx, ULong di_handle )
1118{
1119   if (di_handle > 0)
1120      acquire_globals(di_handle);
1121}
1122void sg_die_mem_munmap ( Addr a, SizeT len )
1123{
1124   Bool debug = (Bool)0;
1125   Bool overlap = False;
1126
1127   if (debug) VG_(printf)("MUNMAP %#lx %lu\n", a, len );
1128
1129   if (len == 0)
1130      return;
1131
1132   overlap = del_GlobalTree_range(giTree, a, len);
1133
1134   { /* redundant sanity check */
1135     UWord keyW, valW;
1136     VG_(initIterFM)( giTree );
1137     while (VG_(nextIterFM)( giTree, &keyW, &valW )) {
1138       GlobalTreeNode* nd = (GlobalTreeNode*)keyW;
1139        tl_assert(valW == 0);
1140        tl_assert(nd->szB > 0);
1141        tl_assert(nd->addr + nd->szB <= a
1142                  || a + len <= nd->addr);
1143     }
1144     VG_(doneIterFM)( giTree );
1145   }
1146
1147   if (!overlap)
1148      return;
1149
1150   /* Ok, the range contained some blocks.  Therefore we'll need to
1151      visit all the Invars in all the thread shadow stacks, and
1152      convert all Inv_Global entries that intersect [a,a+len) to
1153      Inv_Unknown. */
1154   tl_assert(len > 0);
1155   preen_global_Invars( a, len );
1156   invalidate_all_QCaches();
1157}
1158
1159
1160//////////////////////////////////////////////////////////////
1161//                                                          //
1162// StackFrame                                               //
1163//                                                          //
1164//////////////////////////////////////////////////////////////
1165
1166static ULong stats__total_accesses   = 0;
1167static ULong stats__classify_Stack0  = 0;
1168static ULong stats__classify_StackN  = 0;
1169static ULong stats__classify_Global  = 0;
1170static ULong stats__classify_Unknown = 0;
1171static ULong stats__Invars_preened   = 0;
1172static ULong stats__Invars_changed   = 0;
1173static ULong stats__t_i_b_empty      = 0;
1174static ULong stats__htab_fast        = 0;
1175static ULong stats__htab_searches    = 0;
1176static ULong stats__htab_probes      = 0;
1177static ULong stats__htab_resizes     = 0;
1178
1179
1180/* A dynamic instance of an instruction */
1181typedef
1182   struct {
1183      /* IMMUTABLE */
1184      Addr    insn_addr; /* NB! zero means 'not in use' */
1185      XArray* blocks; /* XArray* of StackBlock, or NULL if none */
1186      /* MUTABLE */
1187      Invar invar;
1188   }
1189   IInstance;
1190
1191
1192#define N_HTAB_FIXED 64
1193
1194typedef
1195   struct _StackFrame {
1196      /* The sp when the frame was created, so we know when to get rid
1197         of it. */
1198      Addr creation_sp;
1199      /* The stack frames for a thread are arranged as a doubly linked
1200         list.  Obviously the outermost frame in the stack has .outer
1201         as NULL and the innermost in theory has .inner as NULL.
1202         However, when a function returns, we don't delete the
1203         just-vacated StackFrame.  Instead, it is retained in the list
1204         and will be re-used when the next call happens.  This is so
1205         as to avoid constantly having to dynamically allocate and
1206         deallocate frames. */
1207      struct _StackFrame* inner;
1208      struct _StackFrame* outer;
1209      Word depth; /* 0 for outermost; increases inwards */
1210      /* Information for each memory referencing instruction, for this
1211         instantiation of the function.  The iinstances array is
1212         operated as a simple linear-probe hash table, which is
1213         dynamically expanded as necessary.  Once critical thing is
1214         that an IInstance with a .insn_addr of zero is interpreted to
1215         mean that hash table slot is unused.  This means we can't
1216         store an IInstance for address zero. */
1217      /* Note that htab initially points to htab_fixed.  If htab_fixed
1218         turns out not to be big enough then htab is made to point to
1219         dynamically allocated memory.  But it's often the case that
1220         htab_fixed is big enough, so this optimisation saves a huge
1221         number of sg_malloc/sg_free call pairs. */
1222      IInstance* htab;
1223      UWord      htab_size; /* size of hash table, MAY ONLY BE A POWER OF 2 */
1224      UWord      htab_used; /* number of hash table slots currently in use */
1225      /* If this frame is currently making a call, then the following
1226         are relevant. */
1227      Addr sp_at_call;
1228      Addr fp_at_call;
1229      XArray* /* of Addr */ blocks_added_by_call;
1230      /* See comment just above */
1231      IInstance htab_fixed[N_HTAB_FIXED];
1232   }
1233   StackFrame;
1234
1235
1236
1237
1238
1239/* Move this somewhere else? */
1240/* Visit all Invars in the entire system.  If 'isHeap' is True, change
1241   all Inv_Heap Invars that intersect [a,a+len) to Inv_Unknown.  If
1242   'isHeap' is False, do the same but to the Inv_Global{S,V} Invars
1243   instead. */
1244
1245__attribute__((noinline))
1246static void preen_global_Invar ( Invar* inv, Addr a, SizeT len )
1247{
1248   stats__Invars_preened++;
1249   tl_assert(len > 0);
1250   tl_assert(inv);
1251   switch (inv->tag) {
1252      case Inv_Global:
1253         tl_assert(inv->Inv.Global.nd);
1254         tl_assert(inv->Inv.Global.nd->szB > 0);
1255         if (0) VG_(printf)("preen_Invar Global %#lx %lu\n",
1256                            inv->Inv.Global.nd->addr,
1257                            inv->Inv.Global.nd->szB);
1258         if (0 == cmp_nonempty_intervals(a, len, inv->Inv.Global.nd->addr,
1259                                                 inv->Inv.Global.nd->szB)) {
1260            inv->tag = Inv_Unknown;
1261            stats__Invars_changed++;
1262         }
1263         break;
1264      case Inv_Stack0:
1265      case Inv_StackN:
1266      case Inv_Unknown:
1267         break;
1268      default:
1269         tl_assert(0);
1270   }
1271}
1272
1273__attribute__((noinline))
1274static void preen_global_Invars ( Addr a, SizeT len )
1275{
1276   Int         i;
1277   UWord       u;
1278   StackFrame* frame;
1279   tl_assert(len > 0);
1280   for (i = 0; i < VG_N_THREADS; i++) {
1281      frame = shadowStacks[i];
1282      if (!frame)
1283         continue; /* no frames for this thread */
1284      /* start from the innermost frame */
1285      while (frame->inner)
1286         frame = frame->inner;
1287      tl_assert(frame->outer);
1288      /* work through the frames from innermost to outermost.  The
1289         order isn't important; we just need to ensure we visit each
1290         frame once (including those which are not actually active,
1291         more 'inner' than the 'innermost active frame', viz, just
1292         hanging around waiting to be used, when the current innermost
1293         active frame makes more calls.  See comments on definition of
1294         struct _StackFrame. */
1295      for (; frame; frame = frame->outer) {
1296         UWord xx = 0; /* sanity check only; count of used htab entries */
1297         if (!frame->htab)
1298            continue; /* frame not in use.  See shadowStack_unwind(). */
1299         for (u = 0; u < frame->htab_size; u++) {
1300            IInstance* ii = &frame->htab[u];
1301            if (ii->insn_addr == 0)
1302               continue; /* not in use */
1303            if (0) { pp_Invar(&ii->invar); VG_(printf)(" x\n"); }
1304            preen_global_Invar( &ii->invar, a, len );
1305            xx++;
1306         }
1307         tl_assert(xx == frame->htab_used);
1308      }
1309   }
1310}
1311
1312
1313/* XXX this should be >> 2 on ppc32/64 since the bottom two bits
1314   of the ip are guaranteed to be zero */
1315inline static UWord compute_II_hash ( Addr ip, UWord htab_size ) {
1316   return (ip >> 0) & (htab_size - 1);
1317}
1318
1319__attribute__((noinline))
1320static void initialise_II_hash_table ( StackFrame* sf )
1321{
1322   UWord i;
1323   sf->htab_size = N_HTAB_FIXED; /* initial hash table size */
1324   sf->htab = &sf->htab_fixed[0];
1325   tl_assert(sf->htab);
1326   sf->htab_used = 0;
1327   for (i = 0; i < sf->htab_size; i++)
1328      sf->htab[i].insn_addr = 0; /* NOT IN USE */
1329}
1330
1331
1332__attribute__((noinline))
1333static void resize_II_hash_table ( StackFrame* sf )
1334{
1335   UWord     i, j, ix, old_size, new_size;
1336   IInstance *old_htab, *new_htab, *old;
1337
1338   tl_assert(sf && sf->htab);
1339   old_size = sf->htab_size;
1340   new_size = 2 * old_size;
1341   old_htab = sf->htab;
1342   new_htab = sg_malloc( "di.sg_main.rIht.1",
1343                         new_size * sizeof(IInstance) );
1344   for (i = 0; i < new_size; i++) {
1345      new_htab[i].insn_addr = 0; /* NOT IN USE */
1346   }
1347   for (i = 0; i < old_size; i++) {
1348      old = &old_htab[i];
1349      if (old->insn_addr == 0 /* NOT IN USE */)
1350         continue;
1351      ix = compute_II_hash(old->insn_addr, new_size);
1352      /* find out where to put this, in the new table */
1353      j = new_size;
1354      while (1) {
1355         if (new_htab[ix].insn_addr == 0)
1356            break;
1357         /* This can't ever happen, because it would mean the new
1358            table is full; that isn't allowed -- even the old table is
1359            only allowed to become half full. */
1360         tl_assert(j > 0);
1361         j--;
1362         ix++; if (ix == new_size) ix = 0;
1363      }
1364      /* copy the old entry to this location */
1365      tl_assert(ix < new_size);
1366      tl_assert(new_htab[ix].insn_addr == 0);
1367      new_htab[ix] = *old;
1368      tl_assert(new_htab[ix].insn_addr != 0);
1369   }
1370   /* all entries copied; free old table. */
1371   if (old_htab != &sf->htab_fixed[0])
1372      sg_free(old_htab);
1373   sf->htab = new_htab;
1374   sf->htab_size = new_size;
1375   /* check sf->htab_used is correct.  Optional and a bit expensive
1376      but anyway: */
1377   j = 0;
1378   for (i = 0; i < new_size; i++) {
1379      if (new_htab[i].insn_addr != 0) {
1380         j++;
1381      }
1382   }
1383   tl_assert(j == sf->htab_used);
1384   if (0) VG_(printf)("resized tab for SF %p to %lu\n", sf, new_size);
1385}
1386
1387
1388__attribute__((noinline))
1389static IInstance* find_or_create_IInstance_SLOW (
1390                     StackFrame* sf,
1391                     Addr ip,
1392                     XArray* /* StackBlock */ ip_frameblocks
1393                  )
1394{
1395   UWord i, ix;
1396
1397   stats__htab_searches++;
1398
1399   tl_assert(sf);
1400   tl_assert(sf->htab);
1401
1402   /* Make sure the table loading doesn't get too high. */
1403   if (UNLIKELY(2 * sf->htab_used >= 1 * sf->htab_size)) {
1404      stats__htab_resizes++;
1405      resize_II_hash_table(sf);
1406   }
1407   tl_assert(2 * sf->htab_used <= sf->htab_size);
1408
1409   ix = compute_II_hash(ip, sf->htab_size);
1410   i = sf->htab_size;
1411   while (1) {
1412      stats__htab_probes++;
1413      /* Note that because of the way the fast-case handler works,
1414         these two tests are actually redundant in the first iteration
1415         of this loop.  (Except they aren't redundant if the code just
1416         above resized the table first. :-) */
1417      if (sf->htab[ix].insn_addr == ip)
1418         return &sf->htab[ix];
1419      if (sf->htab[ix].insn_addr == 0)
1420         break;
1421      /* If i ever gets to zero and we have found neither what we're
1422         looking for nor an empty slot, the table must be full.  Which
1423         isn't possible -- we monitor the load factor to ensure it
1424         doesn't get above say 50%; if that ever does happen the table
1425         is resized. */
1426      tl_assert(i > 0);
1427      i--;
1428      ix++;
1429      if (ix == sf->htab_size) ix = 0;
1430   }
1431
1432   /* So now we've found a free slot at ix, and we can use that. */
1433   tl_assert(sf->htab[ix].insn_addr == 0);
1434
1435   /* Add a new record in this slot. */
1436   tl_assert(ip != 0); /* CAN'T REPRESENT THIS */
1437   sf->htab[ix].insn_addr = ip;
1438   sf->htab[ix].blocks    = ip_frameblocks;
1439   sf->htab[ix].invar.tag = Inv_Unset;
1440   sf->htab_used++;
1441   return &sf->htab[ix];
1442}
1443
1444
1445inline
1446static IInstance* find_or_create_IInstance (
1447                     StackFrame* sf,
1448                     Addr ip,
1449                     XArray* /* StackBlock */ ip_frameblocks
1450                  )
1451{
1452   UWord ix = compute_II_hash(ip, sf->htab_size);
1453   /* Is it in the first slot we come to? */
1454   if (LIKELY(sf->htab[ix].insn_addr == ip)) {
1455      stats__htab_fast++;
1456      return &sf->htab[ix];
1457   }
1458   /* If the first slot we come to is empty, bag it. */
1459   if (LIKELY(sf->htab[ix].insn_addr == 0)) {
1460      stats__htab_fast++;
1461      tl_assert(ip != 0);
1462      sf->htab[ix].insn_addr = ip;
1463      sf->htab[ix].blocks    = ip_frameblocks;
1464      sf->htab[ix].invar.tag = Inv_Unset;
1465      sf->htab_used++;
1466      return &sf->htab[ix];
1467   }
1468   /* Otherwise we hand off to the slow case, which searches other
1469      slots, and optionally resizes the table if necessary. */
1470   return find_or_create_IInstance_SLOW( sf, ip, ip_frameblocks );
1471}
1472
1473
1474__attribute__((noinline))
1475static Addr calculate_StackBlock_EA ( StackBlock* descr,
1476                                      Addr sp, Addr fp ) {
1477   UWord w1 = (UWord)descr->base;
1478   UWord w2 = (UWord)(descr->spRel ? sp : fp);
1479   UWord ea = w1 + w2;
1480   return ea;
1481}
1482
1483/* Given an array of StackBlocks, return an array of Addrs, holding
1484   their effective addresses.  Caller deallocates result array. */
1485__attribute__((noinline))
1486static XArray* /* Addr */ calculate_StackBlock_EAs (
1487                             XArray* /* StackBlock */ blocks,
1488                             Addr sp, Addr fp
1489                          )
1490{
1491   XArray* res;
1492   Word i, n = VG_(sizeXA)( blocks );
1493   tl_assert(n > 0);
1494   res = VG_(newXA)( sg_malloc, "di.sg_main.cSBE.1", sg_free, sizeof(Addr) );
1495   for (i = 0; i < n; i++) {
1496      StackBlock* blk = VG_(indexXA)( blocks, i );
1497      Addr ea = calculate_StackBlock_EA( blk, sp, fp );
1498      VG_(addToXA)( res, &ea );
1499   }
1500   return res;
1501}
1502
1503
1504/* Try to classify the block into which a memory access falls, and
1505   write the result in 'inv'.  This writes all relevant fields of
1506   'inv'. */
1507__attribute__((noinline))
1508static void classify_address ( /*OUT*/Invar* inv,
1509                               ThreadId tid,
1510                               Addr ea, Addr sp, Addr fp,
1511                               UWord szB,
1512                               XArray* /* of StackBlock */ thisInstrBlocks )
1513{
1514   tl_assert(szB > 0);
1515   /* First, look in the stack blocks accessible in this instruction's
1516      frame. */
1517   {
1518     Word i, nBlocks = VG_(sizeXA)( thisInstrBlocks );
1519     if (nBlocks == 0) stats__t_i_b_empty++;
1520     for (i = 0; i < nBlocks; i++) {
1521        StackBlock* descr = VG_(indexXA)( thisInstrBlocks, i );
1522        Addr bea = calculate_StackBlock_EA( descr, sp, fp );
1523        if (bea <= ea && ea + szB <= bea + descr->szB) {
1524           /* found it */
1525           inv->tag = Inv_Stack0;
1526           inv->Inv.Stack0.addr  = bea;
1527           inv->Inv.Stack0.szB   = descr->szB;
1528           inv->Inv.Stack0.descr = descr;
1529           stats__classify_Stack0++;
1530           return;
1531        }
1532     }
1533   }
1534   /* Look in this thread's query cache */
1535   { Word i;
1536     QCache* cache = &qcaches[tid];
1537     static UWord ctr = 0;
1538     stats__qcache_queries++;
1539     for (i = 0; i < cache->nInUse; i++) {
1540        if (0) /* expensive in a loop like this */
1541               tl_assert(cache->elems[i].addr + cache->elems[i].szB != 0);
1542        stats__qcache_probes++;
1543        if (is_subinterval_of(cache->elems[i].addr,
1544                              cache->elems[i].szB, ea, szB)) {
1545           if (i > 0
1546               && (ctr++ & (QCACHE_ADVANCE_EVERY-1)) == 0) {
1547              QCElem tmp;
1548              tmp = cache->elems[i-1];
1549              cache->elems[i-1] = cache->elems[i];
1550              cache->elems[i] = tmp;
1551              i--;
1552           }
1553           *inv = cache->elems[i].inv;
1554           return;
1555        }
1556     }
1557     stats__qcache_misses++;
1558   }
1559   /* Ok, so it's not a block in the top frame.  Perhaps it's a block
1560      in some calling frame?  Consult this thread's stack-block
1561      interval tree to find out. */
1562   { StackTreeNode* nd = find_StackTreeNode( siTrees[tid], ea );
1563     /* We know that [ea,ea+1) is in the block, but we need to
1564        restrict to the case where the whole access falls within
1565        it. */
1566     if (nd && !is_subinterval_of(nd->addr, nd->szB, ea, szB)) {
1567        nd = NULL;
1568     }
1569     if (nd) {
1570        /* found it */
1571        inv->tag = Inv_StackN;
1572        inv->Inv.StackN.nd = nd;
1573        stats__classify_StackN++;
1574        goto out;
1575     }
1576   }
1577   /* Not in a stack block.  Try the global pool. */
1578   { GlobalTreeNode* nd = find_GlobalTreeNode(giTree, ea);
1579     /* We know that [ea,ea+1) is in the block, but we need to
1580        restrict to the case where the whole access falls within
1581        it. */
1582     if (nd && !is_subinterval_of(nd->addr, nd->szB, ea, szB)) {
1583        nd = NULL;
1584     }
1585     if (nd) {
1586        /* found it */
1587        inv->tag = Inv_Global;
1588        inv->Inv.Global.nd = nd;
1589        stats__classify_Global++;
1590        goto out;
1591     }
1592   }
1593   /* No idea - give up. */
1594   inv->tag = Inv_Unknown;
1595   stats__classify_Unknown++;
1596
1597   /* Update the cache */
1598  out:
1599   { Addr    toadd_addr = 0;
1600     SizeT   toadd_szB  = 0;
1601     QCache* cache      = &qcaches[tid];
1602
1603     static UWord ctr = 0;
1604     Bool show = False;
1605     if (0 && 0 == (ctr++ & 0x1FFFFF)) show = True;
1606
1607     if (show) QCache__pp(cache, "before upd");
1608
1609     switch (inv->tag) {
1610        case Inv_Global:
1611           toadd_addr = inv->Inv.Global.nd->addr;
1612           toadd_szB  = inv->Inv.Global.nd->szB;
1613           break;
1614        case Inv_StackN:
1615           toadd_addr = inv->Inv.StackN.nd->addr;
1616           toadd_szB  = inv->Inv.StackN.nd->szB;
1617           break;
1618        case Inv_Unknown: {
1619           /* This is more complex.  We need to figure out the
1620              intersection of the "holes" in the global and stack
1621              interval trees into which [ea,ea+szB) falls.  This is
1622              further complicated by the fact that [ea,ea+szB) might
1623              not fall cleanly into a hole; it may instead fall across
1624              the boundary of a stack or global block.  In that case
1625              we just ignore it and don't update the cache, since we
1626              have no way to represent this situation precisely. */
1627           StackTreeNode  sNegInf, sPosInf, sKey, *sLB, *sUB;
1628           GlobalTreeNode gNegInf, gPosInf, gKey, *gLB, *gUB;
1629           Addr gMin, gMax, sMin, sMax, uMin, uMax;
1630           Bool sOK, gOK;
1631           sNegInf.addr = 0;
1632           sNegInf.szB  = 1;
1633           sPosInf.addr = ~(UWord)0;
1634           sPosInf.szB  = 1;
1635           gNegInf.addr = 0;
1636           gNegInf.szB  = 1;
1637           gPosInf.addr = ~(UWord)0;
1638           gPosInf.szB  = 1;
1639           sKey.addr = ea;
1640           sKey.szB  = szB;
1641           gKey.addr = ea;
1642           gKey.szB  = szB;
1643           if (0) VG_(printf)("Tree sizes %ld %ld\n",
1644                              VG_(sizeFM)(siTrees[tid]), VG_(sizeFM)(giTree));
1645           sOK = VG_(findBoundsFM)( siTrees[tid],
1646                                    (UWord*)&sLB,    NULL/*unused*/,
1647                                    (UWord*)&sUB,    NULL/*unused*/,
1648                                    (UWord)&sNegInf, 0/*unused*/,
1649                                    (UWord)&sPosInf, 0/*unused*/,
1650                                    (UWord)&sKey );
1651           gOK = VG_(findBoundsFM)( giTree,
1652                                    (UWord*)&gLB,    NULL/*unused*/,
1653                                    (UWord*)&gUB,    NULL/*unused*/,
1654                                    (UWord)&gNegInf, 0/*unused*/,
1655                                    (UWord)&gPosInf, 0/*unused*/,
1656                                    (UWord)&gKey );
1657           if (!(sOK && gOK)) {
1658              /* If this happens, then [ea,ea+szB) partially overlaps
1659                 a heap or stack block.  We can't represent that, so
1660                 just forget it (should be very rare).  However, do
1661                 maximum sanity checks first.  In such a
1662                 partial overlap case, it can't be the case that both
1663                 [ea] and [ea+szB-1] overlap the same block, since if
1664                 that were indeed the case then it wouldn't be a
1665                 partial overlap; rather it would simply fall inside
1666                 that block entirely and we shouldn't be inside this
1667                 conditional at all. */
1668              if (!sOK) {
1669                 StackTreeNode *ndFirst, *ndLast;
1670                 ndFirst = find_StackTreeNode( siTrees[tid], ea );
1671                 ndLast  = find_StackTreeNode( siTrees[tid], ea+szB-1 );
1672                 /* if both ends of the range fall inside a block,
1673                    they can't be in the same block. */
1674                 if (ndFirst && ndLast)
1675                    tl_assert(ndFirst != ndLast);
1676                 /* for each end of the range, if it is in a block,
1677                    the range as a whole can't be entirely within the
1678                    block. */
1679                 if (ndFirst)
1680                    tl_assert(!is_subinterval_of(ndFirst->addr,
1681                                                 ndFirst->szB, ea, szB));
1682                 if (ndLast)
1683                    tl_assert(!is_subinterval_of(ndLast->addr,
1684                                                 ndLast->szB, ea, szB));
1685              }
1686              if (!gOK) {
1687                 GlobalTreeNode *ndFirst, *ndLast;
1688                 ndFirst = find_GlobalTreeNode( giTree, ea );
1689                 ndLast  = find_GlobalTreeNode( giTree, ea+szB-1 );
1690                 /* if both ends of the range fall inside a block,
1691                    they can't be in the same block. */
1692                 if (ndFirst && ndLast)
1693                    tl_assert(ndFirst != ndLast);
1694                 /* for each end of the range, if it is in a block,
1695                    the range as a whole can't be entirely within the
1696                    block. */
1697                 if (ndFirst)
1698                    tl_assert(!is_subinterval_of(ndFirst->addr,
1699                                                 ndFirst->szB, ea, szB));
1700                 if (ndLast)
1701                    tl_assert(!is_subinterval_of(ndLast->addr,
1702                                                 ndLast->szB, ea, szB));
1703              }
1704              if (0) VG_(printf)("overlapping blocks in cache\n");
1705              return;
1706           }
1707           sMin = sLB == &sNegInf  ? 0         : (sLB->addr + sLB->szB);
1708           sMax = sUB == &sPosInf  ? ~(UWord)0 : (sUB->addr - 1);
1709           gMin = gLB == &gNegInf  ? 0         : (gLB->addr + gLB->szB);
1710           gMax = gUB == &gPosInf  ? ~(UWord)0 : (gUB->addr - 1);
1711           if (0) VG_(printf)("sMin %lx sMax %lx gMin %lx gMax %lx\n",
1712                              sMin, sMax, gMin, gMax);
1713           /* [sMin,sMax] and [gMin,gMax] must both contain
1714              [ea,ea+szB) (right?)  That implies they must overlap at
1715              at least over [ea,ea+szB). */
1716           tl_assert(sMin <= ea && ea+szB-1 <= sMax);
1717           tl_assert(gMin <= ea && ea+szB-1 <= gMax);
1718           /* So now compute their intersection. */
1719           uMin = Addr__max( sMin, gMin );
1720           uMax = Addr__min( sMax, gMax );
1721           if (0) VG_(printf)("uMin %lx uMax %lx\n", uMin, uMax);
1722           tl_assert(uMin <= uMax);
1723           tl_assert(uMin <= ea && ea+szB-1 <= uMax);
1724           /* Finally, we can park [uMin,uMax] in the cache.  However,
1725              if uMax is ~0, we can't represent the difference; hence
1726              fudge uMax. */
1727           if (uMin < uMax && uMax == ~(UWord)0)
1728              uMax--;
1729           toadd_addr = uMin;
1730           toadd_szB  = uMax - uMin + 1;
1731           break;
1732        }
1733        default:
1734           /* We should only be caching info for the above 3 cases */
1735          tl_assert(0);
1736     } /* switch (inv->tag) */
1737
1738     { /* and actually add this to the cache, finally */
1739       Word i;
1740       Word ip = cache->nInUse / 2; /* doesn't seem critical */
1741
1742       if (cache->nInUse < N_QCACHE)
1743          cache->nInUse++;
1744       for (i = cache->nInUse-1; i > ip; i--) {
1745          cache->elems[i] = cache->elems[i-1];
1746       }
1747
1748       tl_assert(toadd_szB > 0);
1749       cache->elems[ip].addr = toadd_addr;
1750       cache->elems[ip].szB  = toadd_szB;
1751       cache->elems[ip].inv  = *inv;
1752     }
1753
1754     if (show) QCache__pp(cache, "after upd");
1755
1756   }
1757}
1758
1759
1760/* CALLED FROM GENERATED CODE */
1761static
1762VG_REGPARM(3)
1763void helperc__mem_access ( /* Known only at run time: */
1764                           Addr ea, Addr sp, Addr fp,
1765                           /* Known at translation time: */
1766                           Word sszB, Addr ip, XArray* ip_frameBlocks )
1767{
1768   UWord szB;
1769   IInstance* iinstance;
1770   Invar* inv;
1771   Invar new_inv;
1772   ThreadId tid = VG_(get_running_tid)();
1773   StackFrame* frame;
1774   HChar bufE[160], bufA[160], bufD[32];
1775
1776   stats__total_accesses++;
1777
1778   tl_assert(is_sane_TId(tid));
1779   frame = shadowStacks[tid];
1780   tl_assert(frame);
1781
1782   /* Find the instance info for this instruction. */
1783   tl_assert(ip_frameBlocks);
1784   iinstance = find_or_create_IInstance( frame, ip, ip_frameBlocks );
1785   tl_assert(iinstance);
1786   tl_assert(iinstance->blocks == ip_frameBlocks);
1787
1788   szB = (sszB < 0) ? (-sszB) : sszB;
1789   tl_assert(szB > 0);
1790
1791   inv = &iinstance->invar;
1792
1793   /* Deal with first uses of instruction instances. */
1794   if (inv->tag == Inv_Unset) {
1795      /* This is the first use of this instance of the instruction, so
1796         we can't make any check; we merely record what we saw, so we
1797         can compare it against what happens for 2nd and subsequent
1798         accesses. */
1799      classify_address( inv,
1800                        tid, ea, sp, fp, szB, iinstance->blocks );
1801      tl_assert(inv->tag != Inv_Unset);
1802      return;
1803   }
1804
1805   /* So generate an Invar and see if it's different from what
1806      we had before. */
1807   classify_address( &new_inv,
1808                     tid, ea, sp, fp, szB, iinstance->blocks );
1809   tl_assert(new_inv.tag != Inv_Unset);
1810
1811   /* Did we see something different from before?  If no, then there's
1812      no error. */
1813   if (LIKELY(eq_Invar(&new_inv, inv)))
1814      return;
1815
1816   tl_assert(inv->tag != Inv_Unset);
1817
1818   VG_(memset)(bufE, 0, sizeof(bufE));
1819   show_Invar( bufE, sizeof(bufE)-1, inv, frame->depth );
1820
1821   VG_(memset)(bufA, 0, sizeof(bufA));
1822   show_Invar( bufA, sizeof(bufA)-1, &new_inv, frame->depth );
1823
1824   VG_(memset)(bufD, 0, sizeof(bufD));
1825   UWord absDelta;
1826   gen_delta_str( bufD, &absDelta, inv, ea );
1827
1828   if (absDelta < 1024)
1829      sg_record_error_SorG( tid, ea, sszB, bufE, bufA, bufD );
1830
1831   /* And now install the new observation as "standard", so as to
1832      make future error messages make more sense. */
1833   *inv = new_inv;
1834}
1835
1836
1837////////////////////////////////////////
1838/* Primary push-a-new-frame routine.  Called indirectly from
1839   generated code. */
1840
1841static UWord stats__max_sitree_size = 0;
1842static UWord stats__max_gitree_size = 0;
1843
1844static
1845void shadowStack_new_frame ( ThreadId tid,
1846                             Addr     sp_at_call_insn,
1847                             Addr     sp_post_call_insn,
1848                             Addr     fp_at_call_insn,
1849                             Addr     ip_post_call_insn,
1850                             XArray*  descrs_at_call_insn )
1851{
1852   StackFrame *callee, *caller;
1853   tl_assert(is_sane_TId(tid));
1854
1855   caller = shadowStacks[tid];
1856   tl_assert(caller);
1857
1858   if (caller->outer) { /* "this is not the outermost frame" */
1859      tl_assert(caller->outer->inner == caller);
1860      tl_assert(caller->outer->depth >= 0);
1861      tl_assert(1 + caller->outer->depth == caller->depth);
1862   } else {
1863      tl_assert(caller->depth == 0);
1864   }
1865
1866   caller->sp_at_call = sp_at_call_insn;
1867   caller->fp_at_call = fp_at_call_insn;
1868
1869   if (descrs_at_call_insn) {
1870      tl_assert( VG_(sizeXA)(descrs_at_call_insn) > 0 );
1871      caller->blocks_added_by_call
1872         = calculate_StackBlock_EAs( descrs_at_call_insn,
1873                                     sp_at_call_insn, fp_at_call_insn );
1874      if (caller->blocks_added_by_call)
1875         add_blocks_to_StackTree( siTrees[tid],
1876                                  descrs_at_call_insn,
1877                                  caller->blocks_added_by_call,
1878                                  caller->depth /* stack depth at which
1879                                                   these blocks are
1880                                                   considered to exist*/ );
1881      if (1) {
1882         UWord s  = VG_(sizeFM)( siTrees[tid] );
1883         UWord g  = VG_(sizeFM)( giTree );
1884         Bool  sb = s > stats__max_sitree_size;
1885         Bool  gb = g > stats__max_gitree_size;
1886         if (sb) stats__max_sitree_size = s;
1887         if (gb) stats__max_gitree_size = g;
1888         if (0 && (sb || gb))
1889            VG_(message)(Vg_DebugMsg,
1890                         "exp-sgcheck: new max tree sizes: "
1891                         "StackTree %ld, GlobalTree %ld\n",
1892                         stats__max_sitree_size, stats__max_gitree_size );
1893      }
1894   } else {
1895      caller->blocks_added_by_call = NULL;
1896   }
1897
1898   /* caller->blocks_added_by_call is used again (and then freed) when
1899      this frame is removed from the stack. */
1900
1901   if (caller->inner) {
1902      callee = caller->inner;
1903   } else {
1904      callee = sg_malloc("di.sg_main.sSnf.1", sizeof(StackFrame));
1905      VG_(memset)(callee, 0, sizeof(StackFrame));
1906      callee->outer = caller;
1907      caller->inner = callee;
1908      callee->depth = 1 + caller->depth;
1909      tl_assert(callee->inner == NULL);
1910   }
1911
1912   /* This sets up .htab, .htab_size and .htab_used */
1913   initialise_II_hash_table( callee );
1914
1915   callee->creation_sp    = sp_post_call_insn;
1916   callee->sp_at_call     = 0; // not actually required ..
1917   callee->fp_at_call     = 0; // .. these 3 initialisations are ..
1918   callee->blocks_added_by_call = NULL; // .. just for cleanness
1919
1920   /* record the new running stack frame */
1921   shadowStacks[tid] = callee;
1922
1923   /* and this thread's query cache is now invalid */
1924   QCache__invalidate( &qcaches[tid] );
1925
1926   if (0)
1927   { Word d = callee->depth;
1928     HChar fnname[80];
1929     Bool ok;
1930     Addr ip = ip_post_call_insn;
1931     ok = VG_(get_fnname_w_offset)( ip, fnname, sizeof(fnname) );
1932     while (d > 0) {
1933        VG_(printf)(" ");
1934        d--;
1935     }
1936     VG_(printf)("> %s %#lx\n", ok ? fnname : "???", ip);
1937   }
1938}
1939
1940/* CALLED FROM GENERATED CODE */
1941static
1942VG_REGPARM(3)
1943void helperc__new_frame ( Addr sp_post_call_insn,
1944                          Addr fp_at_call_insn,
1945                          Addr ip_post_call_insn,
1946                          XArray* blocks_at_call_insn,
1947                          Word sp_adjust )
1948{
1949   ThreadId tid = VG_(get_running_tid)();
1950   Addr     sp_at_call_insn = sp_post_call_insn + sp_adjust;
1951   shadowStack_new_frame( tid,
1952                          sp_at_call_insn,
1953                          sp_post_call_insn,
1954                          fp_at_call_insn,
1955                          ip_post_call_insn,
1956                          blocks_at_call_insn );
1957}
1958
1959
1960////////////////////////////////////////
1961/* Primary remove-frame(s) routine.  Called indirectly from
1962   generated code. */
1963
1964__attribute__((noinline))
1965static void shadowStack_unwind ( ThreadId tid, Addr sp_now )
1966{
1967   StackFrame *innermost, *innermostOrig;
1968   tl_assert(is_sane_TId(tid));
1969   innermost = shadowStacks[tid];
1970   tl_assert(innermost);
1971   innermostOrig = innermost;
1972   //VG_(printf)("UNWIND sp_new = %p\n", sp_now);
1973   while (1) {
1974      if (!innermost->outer)
1975         break;
1976      if (innermost->inner)
1977         tl_assert(innermost->inner->outer == innermost);
1978      tl_assert(innermost->outer->inner == innermost);
1979      tl_assert(innermost->blocks_added_by_call == NULL);
1980      if (sp_now <= innermost->creation_sp) break;
1981      //VG_(printf)("UNWIND     dump %p\n", innermost->creation_sp);
1982      tl_assert(innermost->htab);
1983      if (innermost->htab != &innermost->htab_fixed[0])
1984         sg_free(innermost->htab);
1985      /* be on the safe side */
1986      innermost->creation_sp = 0;
1987      innermost->htab = NULL;
1988      innermost->htab_size = 0;
1989      innermost->htab_used = 0;
1990      innermost->sp_at_call = 0;
1991      innermost->fp_at_call = 0;
1992      innermost->blocks_added_by_call = NULL;
1993      innermost = innermost->outer;
1994
1995      /* So now we're "back" in the calling frame.  Remove from this
1996         thread's stack-interval-tree, the blocks added at the time of
1997         the call. */
1998
1999      if (innermost->outer) { /* not at the outermost frame */
2000         if (innermost->blocks_added_by_call == NULL) {
2001         } else {
2002            del_blocks_from_StackTree( siTrees[tid],
2003                                       innermost->blocks_added_by_call );
2004            VG_(deleteXA)( innermost->blocks_added_by_call );
2005            innermost->blocks_added_by_call = NULL;
2006         }
2007      }
2008      /* That completes the required tidying of the interval tree
2009         associated with the frame we just removed. */
2010
2011      if (0) {
2012         Word d = innermost->depth;
2013         while (d > 0) {
2014            VG_(printf)(" ");
2015            d--;
2016         }
2017         VG_(printf)("X\n");
2018      }
2019
2020   }
2021
2022   tl_assert(innermost);
2023
2024   if (innermost != innermostOrig) {
2025      shadowStacks[tid] = innermost;
2026      /* this thread's query cache is now invalid */
2027      QCache__invalidate( &qcaches[tid] );
2028   }
2029}
2030
2031
2032
2033//////////////////////////////////////////////////////////////
2034//                                                          //
2035// Instrumentation                                          //
2036//                                                          //
2037//////////////////////////////////////////////////////////////
2038
2039/* What does instrumentation need to do?
2040
2041   - at each Call transfer, generate a call to shadowStack_new_frame
2042     do this by manually inspecting the IR
2043
2044   - at each sp change, if the sp change is negative,
2045     call shadowStack_unwind
2046     do this by asking for SP-change analysis
2047
2048   - for each memory referencing instruction,
2049     call helperc__mem_access
2050*/
2051
2052/* A complication: sg_ instrumentation and h_ instrumentation need to
2053   be interleaved.  Since the latter is a lot more complex than the
2054   former, we split the sg_ instrumentation here into four functions
2055   and let the h_ instrumenter call the four functions as it goes.
2056   Hence the h_ instrumenter drives the sg_ instrumenter.
2057
2058   To make this viable, the sg_ instrumenter carries what running
2059   state it needs in 'struct _SGEnv'.  This is exported only
2060   abstractly from this file.
2061*/
2062
2063struct _SGEnv {
2064   /* the current insn's IP */
2065   Addr64 curr_IP;
2066   /* whether the above is actually known */
2067   Bool curr_IP_known;
2068   /* if we find a mem ref, is it the first for this insn?  Used for
2069      detecting insns which make more than one memory ref, a situation
2070      we basically can't really handle properly; and so we ignore all
2071      but the first ref. */
2072   Bool firstRef;
2073   /* READONLY */
2074   IRTemp (*newIRTemp_cb)(IRType,void*);
2075   void* newIRTemp_opaque;
2076};
2077
2078
2079/* --- Helper fns for instrumentation --- */
2080
2081static IRTemp gen_Get_SP ( struct _SGEnv*  sge,
2082                           IRSB*           bbOut,
2083                           VexGuestLayout* layout,
2084                           Int             hWordTy_szB )
2085{
2086   IRExpr* sp_expr;
2087   IRTemp  sp_temp;
2088   IRType  sp_type;
2089   /* This in effect forces the host and guest word sizes to be the
2090      same. */
2091   tl_assert(hWordTy_szB == layout->sizeof_SP);
2092   sp_type = layout->sizeof_SP == 8 ? Ity_I64 : Ity_I32;
2093   sp_expr = IRExpr_Get( layout->offset_SP, sp_type );
2094   sp_temp = sge->newIRTemp_cb( sp_type, sge->newIRTemp_opaque );
2095   addStmtToIRSB( bbOut, IRStmt_WrTmp( sp_temp, sp_expr ) );
2096   return sp_temp;
2097}
2098
2099static IRTemp gen_Get_FP ( struct _SGEnv*  sge,
2100                           IRSB*           bbOut,
2101                           VexGuestLayout* layout,
2102                           Int             hWordTy_szB )
2103{
2104   IRExpr* fp_expr;
2105   IRTemp  fp_temp;
2106   IRType  fp_type;
2107   /* This in effect forces the host and guest word sizes to be the
2108      same. */
2109   tl_assert(hWordTy_szB == layout->sizeof_SP);
2110   fp_type = layout->sizeof_FP == 8 ? Ity_I64 : Ity_I32;
2111   fp_expr = IRExpr_Get( layout->offset_FP, fp_type );
2112   fp_temp = sge->newIRTemp_cb( fp_type, sge->newIRTemp_opaque );
2113   addStmtToIRSB( bbOut, IRStmt_WrTmp( fp_temp, fp_expr ) );
2114   return fp_temp;
2115}
2116
2117static void instrument_mem_access ( struct _SGEnv* sge,
2118                                    IRSB*   bbOut,
2119                                    IRExpr* addr,
2120                                    Int     szB,
2121                                    Bool    isStore,
2122                                    Int     hWordTy_szB,
2123                                    Addr    curr_IP,
2124                                    VexGuestLayout* layout )
2125{
2126   IRType  tyAddr      = Ity_INVALID;
2127   XArray* frameBlocks = NULL;
2128
2129   tl_assert(isIRAtom(addr));
2130   tl_assert(hWordTy_szB == 4 || hWordTy_szB == 8);
2131
2132   tyAddr = typeOfIRExpr( bbOut->tyenv, addr );
2133   tl_assert(tyAddr == Ity_I32 || tyAddr == Ity_I64);
2134
2135#if defined(VGA_x86)
2136   { UChar* p = (UChar*)curr_IP;
2137     // pop %ebp; RET
2138     if (p[0] == 0xc3 && p[-1] == 0x5d) return;
2139     // pop %ebp; RET $imm16
2140     if (p[0] == 0xc2 && p[-1] == 0x5d) return;
2141     // PUSH %EBP; mov %esp,%ebp
2142     if (p[0] == 0x55 && p[1] == 0x89 && p[2] == 0xe5) return;
2143   }
2144#endif
2145
2146   /* First off, find or create the StackBlocks for this instruction. */
2147   frameBlocks = get_StackBlocks_for_IP( curr_IP );
2148   tl_assert(frameBlocks);
2149   //if (VG_(sizeXA)( frameBlocks ) == 0)
2150   //   frameBlocks = NULL;
2151
2152   /* Generate a call to "helperc__mem_access", passing:
2153         addr current_SP current_FP szB curr_IP frameBlocks
2154   */
2155   { IRTemp t_SP = gen_Get_SP( sge, bbOut, layout, hWordTy_szB );
2156     IRTemp t_FP = gen_Get_FP( sge, bbOut, layout, hWordTy_szB );
2157     IRExpr** args
2158        = mkIRExprVec_6( addr,
2159                         IRExpr_RdTmp(t_SP),
2160                         IRExpr_RdTmp(t_FP),
2161                         mkIRExpr_HWord( isStore ? (-szB) : szB ),
2162                         mkIRExpr_HWord( curr_IP ),
2163                         mkIRExpr_HWord( (HWord)frameBlocks ) );
2164     IRDirty* di
2165        = unsafeIRDirty_0_N( 3/*regparms*/,
2166                             "helperc__mem_access",
2167                            VG_(fnptr_to_fnentry)( &helperc__mem_access ),
2168                             args );
2169
2170     addStmtToIRSB( bbOut, IRStmt_Dirty(di) );
2171   }
2172}
2173
2174
2175/* --- Instrumentation main (4 fns) --- */
2176
2177struct _SGEnv * sg_instrument_init ( IRTemp (*newIRTemp_cb)(IRType,void*),
2178                                     void* newIRTemp_opaque )
2179{
2180   struct _SGEnv * env = sg_malloc("di.sg_main.sii.1",
2181                                   sizeof(struct _SGEnv));
2182   tl_assert(env);
2183   env->curr_IP          = 0;
2184   env->curr_IP_known    = False;
2185   env->firstRef         = True;
2186   env->newIRTemp_cb     = newIRTemp_cb;
2187   env->newIRTemp_opaque = newIRTemp_opaque;
2188   return env;
2189}
2190
2191void sg_instrument_fini ( struct _SGEnv * env )
2192{
2193   sg_free(env);
2194}
2195
2196/* Add instrumentation for 'st' to 'sbOut', and possibly modify 'env'
2197   as required.  This must be called before 'st' itself is added to
2198   'sbOut'. */
2199void sg_instrument_IRStmt ( /*MOD*/struct _SGEnv * env,
2200                            /*MOD*/IRSB* sbOut,
2201                            IRStmt* st,
2202                            VexGuestLayout* layout,
2203                            IRType gWordTy, IRType hWordTy )
2204{
2205   if (!sg_clo_enable_sg_checks)
2206      return;
2207
2208   tl_assert(st);
2209   tl_assert(isFlatIRStmt(st));
2210   switch (st->tag) {
2211      case Ist_NoOp:
2212      case Ist_AbiHint:
2213      case Ist_Put:
2214      case Ist_PutI:
2215      case Ist_MBE:
2216         /* None of these can contain any memory references. */
2217         break;
2218
2219      case Ist_Exit:
2220         tl_assert(st->Ist.Exit.jk != Ijk_Call);
2221         /* else we must deal with a conditional call */
2222         break;
2223
2224      case Ist_IMark:
2225         env->curr_IP_known = True;
2226         env->curr_IP       = (Addr)st->Ist.IMark.addr;
2227         env->firstRef      = True;
2228         break;
2229
2230      case Ist_Store:
2231         tl_assert(env->curr_IP_known);
2232         if (env->firstRef) {
2233            instrument_mem_access(
2234               env, sbOut,
2235               st->Ist.Store.addr,
2236               sizeofIRType(typeOfIRExpr(sbOut->tyenv, st->Ist.Store.data)),
2237               True/*isStore*/,
2238               sizeofIRType(hWordTy),
2239               env->curr_IP, layout
2240            );
2241            env->firstRef = False;
2242         }
2243         break;
2244
2245      case Ist_WrTmp: {
2246         IRExpr* data = st->Ist.WrTmp.data;
2247         if (data->tag == Iex_Load) {
2248            tl_assert(env->curr_IP_known);
2249            if (env->firstRef) {
2250               instrument_mem_access(
2251                  env, sbOut,
2252                  data->Iex.Load.addr,
2253                  sizeofIRType(data->Iex.Load.ty),
2254                  False/*!isStore*/,
2255                  sizeofIRType(hWordTy),
2256                  env->curr_IP, layout
2257               );
2258               env->firstRef = False;
2259            }
2260         }
2261         break;
2262      }
2263
2264      case Ist_Dirty: {
2265         Int      dataSize;
2266         IRDirty* d = st->Ist.Dirty.details;
2267         if (d->mFx != Ifx_None) {
2268            /* This dirty helper accesses memory.  Collect the
2269               details. */
2270            tl_assert(env->curr_IP_known);
2271            if (env->firstRef) {
2272               tl_assert(d->mAddr != NULL);
2273               tl_assert(d->mSize != 0);
2274               dataSize = d->mSize;
2275               if (d->mFx == Ifx_Read || d->mFx == Ifx_Modify) {
2276                  instrument_mem_access(
2277                     env, sbOut, d->mAddr, dataSize, False/*!isStore*/,
2278                     sizeofIRType(hWordTy), env->curr_IP, layout
2279                  );
2280               }
2281               if (d->mFx == Ifx_Write || d->mFx == Ifx_Modify) {
2282                  instrument_mem_access(
2283                     env, sbOut, d->mAddr, dataSize, True/*isStore*/,
2284                     sizeofIRType(hWordTy), env->curr_IP, layout
2285                  );
2286               }
2287               env->firstRef = False;
2288            }
2289         } else {
2290            tl_assert(d->mAddr == NULL);
2291            tl_assert(d->mSize == 0);
2292         }
2293         break;
2294      }
2295
2296      case Ist_CAS: {
2297         /* We treat it as a read and a write of the location.  I
2298            think that is the same behaviour as it was before IRCAS
2299            was introduced, since prior to that point, the Vex front
2300            ends would translate a lock-prefixed instruction into a
2301            (normal) read followed by a (normal) write. */
2302         if (env->firstRef) {
2303            Int    dataSize;
2304            IRCAS* cas = st->Ist.CAS.details;
2305            tl_assert(cas->addr != NULL);
2306            tl_assert(cas->dataLo != NULL);
2307            dataSize = sizeofIRType(typeOfIRExpr(sbOut->tyenv, cas->dataLo));
2308            if (cas->dataHi != NULL)
2309               dataSize *= 2; /* since it's a doubleword-CAS */
2310            instrument_mem_access(
2311               env, sbOut, cas->addr, dataSize, False/*!isStore*/,
2312               sizeofIRType(hWordTy), env->curr_IP, layout
2313            );
2314            instrument_mem_access(
2315               env, sbOut, cas->addr, dataSize, True/*isStore*/,
2316               sizeofIRType(hWordTy), env->curr_IP, layout
2317            );
2318            env->firstRef = False;
2319         }
2320         break;
2321      }
2322
2323      default:
2324         tl_assert(0);
2325
2326   } /* switch (st->tag) */
2327}
2328
2329
2330/* Add instrumentation for the final jump of an IRSB 'sbOut', and
2331   possibly modify 'env' as required.  This must be the last
2332   instrumentation statement in the block. */
2333void sg_instrument_final_jump ( /*MOD*/struct _SGEnv * env,
2334                                /*MOD*/IRSB* sbOut,
2335                                IRExpr* next,
2336                                IRJumpKind jumpkind,
2337                                VexGuestLayout* layout,
2338                                IRType gWordTy, IRType hWordTy )
2339{
2340   if (!sg_clo_enable_sg_checks)
2341      return;
2342
2343   if (jumpkind == Ijk_Call) {
2344      // Assumes x86 or amd64
2345      IRTemp   sp_post_call_insn, fp_post_call_insn;
2346      XArray*  frameBlocks;
2347      IRExpr** args;
2348      IRDirty* di;
2349      sp_post_call_insn
2350         = gen_Get_SP( env, sbOut, layout, sizeofIRType(hWordTy) );
2351      fp_post_call_insn
2352         = gen_Get_FP( env, sbOut, layout, sizeofIRType(hWordTy) );
2353      tl_assert(env->curr_IP_known);
2354      frameBlocks = get_StackBlocks_for_IP( env->curr_IP );
2355      tl_assert(frameBlocks);
2356      if (VG_(sizeXA)(frameBlocks) == 0)
2357         frameBlocks = NULL;
2358      args
2359         = mkIRExprVec_5(
2360              IRExpr_RdTmp(sp_post_call_insn),
2361              IRExpr_RdTmp(fp_post_call_insn),
2362                         /* assume the call doesn't change FP */
2363              next,
2364              mkIRExpr_HWord( (HWord)frameBlocks ),
2365              mkIRExpr_HWord( sizeofIRType(gWordTy) )
2366           );
2367      di = unsafeIRDirty_0_N(
2368              3/*regparms*/,
2369              "helperc__new_frame",
2370              VG_(fnptr_to_fnentry)( &helperc__new_frame ),
2371              args );
2372      addStmtToIRSB( sbOut, IRStmt_Dirty(di) );
2373   }
2374}
2375
2376
2377//////////////////////////////////////////////////////////////
2378//                                                          //
2379// end Instrumentation                                      //
2380//                                                          //
2381//////////////////////////////////////////////////////////////
2382
2383
2384//////////////////////////////////////////////////////////////
2385//                                                          //
2386// misc                                                     //
2387//                                                          //
2388//////////////////////////////////////////////////////////////
2389
2390/* Make a new empty stack frame that is suitable for being the
2391   outermost frame in a stack.  It has a creation_sp of effectively
2392   infinity, so it can never be removed. */
2393static StackFrame* new_root_StackFrame ( void )
2394{
2395   StackFrame* sframe = sg_malloc("di.sg_main.nrS.1", sizeof(StackFrame));
2396   VG_(memset)( sframe, 0, sizeof(*sframe) );
2397   sframe->creation_sp = ~0UL;
2398
2399   /* This sets up .htab, .htab_size and .htab_used */
2400   initialise_II_hash_table( sframe );
2401
2402   /* ->depth, ->outer, ->inner are 0, NULL, NULL */
2403
2404   return sframe;
2405}
2406
2407/* Primary routine for setting up the shadow stack for a new thread.
2408   Note that this is used to create not only child thread stacks, but
2409   the root thread's stack too.  We create a new stack with
2410   .creation_sp set to infinity, so that the outermost frame can never
2411   be removed (by shadowStack_unwind).  The core calls this function
2412   as soon as a thread is created.  We cannot yet get its SP value,
2413   since that may not yet be set. */
2414static void shadowStack_thread_create ( ThreadId parent, ThreadId child )
2415{
2416   tl_assert(is_sane_TId(child));
2417   if (parent == VG_INVALID_THREADID) {
2418      /* creating the main thread's stack */
2419   } else {
2420      tl_assert(is_sane_TId(parent));
2421      tl_assert(parent != child);
2422      tl_assert(shadowStacks[parent] != NULL);
2423      tl_assert(siTrees[parent] != NULL);
2424   }
2425
2426   /* Create the child's stack.  Bear in mind we may be re-using
2427      it. */
2428   if (shadowStacks[child] == NULL) {
2429      /* First use of this stack.  Just allocate an initial frame. */
2430      tl_assert(siTrees[child] == NULL);
2431   } else {
2432      StackFrame *frame, *frame2;
2433      /* re-using a stack. */
2434      /* get rid of the interval tree */
2435      tl_assert(siTrees[child] != NULL);
2436      delete_StackTree( siTrees[child] );
2437      siTrees[child] = NULL;
2438      /* Throw away all existing frames. */
2439      frame = shadowStacks[child];
2440      while (frame->outer)
2441         frame = frame->outer;
2442      tl_assert(frame->depth == 0);
2443      while (frame) {
2444         frame2 = frame->inner;
2445         if (frame2) tl_assert(1 + frame->depth == frame2->depth);
2446         sg_free(frame);
2447         frame = frame2;
2448      }
2449      shadowStacks[child] = NULL;
2450   }
2451
2452   tl_assert(shadowStacks[child] == NULL);
2453   tl_assert(siTrees[child] == NULL);
2454
2455   /* Set up the initial stack frame. */
2456   shadowStacks[child] = new_root_StackFrame();
2457
2458   /* and set up the child's stack block interval tree. */
2459   siTrees[child] = new_StackTree();
2460}
2461
2462/* Once a thread is ready to go, the core calls here.  We take the
2463   opportunity to push a second frame on its stack, with the
2464   presumably valid SP value that is going to be used for the thread's
2465   startup.  Hence we should always wind up with a valid outermost
2466   frame for the thread. */
2467static void shadowStack_set_initial_SP ( ThreadId tid )
2468{
2469   StackFrame* sf;
2470   tl_assert(is_sane_TId(tid));
2471   sf = shadowStacks[tid];
2472   tl_assert(sf != NULL);
2473   tl_assert(sf->outer == NULL);
2474   tl_assert(sf->inner == NULL);
2475   tl_assert(sf->creation_sp == ~0UL);
2476   shadowStack_new_frame( tid, 0, VG_(get_SP)(tid),
2477                               0, VG_(get_IP)(tid), NULL );
2478}
2479
2480
2481//////////////////////////////////////////////////////////////
2482//                                                          //
2483// main-ish                                                 //
2484//                                                          //
2485//////////////////////////////////////////////////////////////
2486
2487/* CALLED indirectly FROM GENERATED CODE.  Calls here are created by
2488   sp-change analysis, as requested in pc_pre_clo_int(). */
2489void sg_die_mem_stack ( Addr old_SP, SizeT len ) {
2490   ThreadId  tid = VG_(get_running_tid)();
2491   shadowStack_unwind( tid, old_SP+len );
2492}
2493
2494void sg_pre_clo_init ( void ) {
2495   ourGlobals_init();
2496   init_StackBlocks_set();
2497   init_GlobalBlock_set();
2498}
2499
2500void sg_post_clo_init ( void ) {
2501}
2502
2503void sg_pre_thread_ll_create ( ThreadId parent, ThreadId child ) {
2504   shadowStack_thread_create(parent, child);
2505}
2506
2507void sg_pre_thread_first_insn ( ThreadId tid ) {
2508   shadowStack_set_initial_SP(tid);
2509}
2510
2511void sg_fini(Int exitcode)
2512{
2513   if (VG_(clo_stats)) {
2514      VG_(message)(Vg_DebugMsg,
2515         " sg_:  %'llu total accesses, of which:\n", stats__total_accesses);
2516      VG_(message)(Vg_DebugMsg,
2517         " sg_:     stack0: %'12llu classify\n",
2518         stats__classify_Stack0);
2519      VG_(message)(Vg_DebugMsg,
2520         " sg_:     stackN: %'12llu classify\n",
2521         stats__classify_StackN);
2522      VG_(message)(Vg_DebugMsg,
2523         " sg_:     global: %'12llu classify\n",
2524         stats__classify_Global);
2525      VG_(message)(Vg_DebugMsg,
2526         " sg_:    unknown: %'12llu classify\n",
2527         stats__classify_Unknown);
2528      VG_(message)(Vg_DebugMsg,
2529         " sg_:  %'llu Invars preened, of which %'llu changed\n",
2530         stats__Invars_preened, stats__Invars_changed);
2531      VG_(message)(Vg_DebugMsg,
2532         " sg_:   t_i_b_MT: %'12llu\n", stats__t_i_b_empty);
2533      VG_(message)(Vg_DebugMsg,
2534         " sg_:     qcache: %'llu searches, %'llu probes, %'llu misses\n",
2535         stats__qcache_queries, stats__qcache_probes, stats__qcache_misses);
2536      VG_(message)(Vg_DebugMsg,
2537         " sg_:  htab-fast: %'llu hits\n",
2538         stats__htab_fast);
2539      VG_(message)(Vg_DebugMsg,
2540         " sg_:  htab-slow: %'llu searches, %'llu probes, %'llu resizes\n",
2541         stats__htab_searches, stats__htab_probes, stats__htab_resizes);
2542   }
2543}
2544
2545
2546/*--------------------------------------------------------------------*/
2547/*--- end                                                sg_main.c ---*/
2548/*--------------------------------------------------------------------*/
2549