1
2/*--------------------------------------------------------------------*/
3/*--- Management of the translation table and cache.               ---*/
4/*---                                                 m_transtab.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8   This file is part of Valgrind, a dynamic binary instrumentation
9   framework.
10
11   Copyright (C) 2000-2015 Julian Seward
12      jseward@acm.org
13
14   This program is free software; you can redistribute it and/or
15   modify it under the terms of the GNU General Public License as
16   published by the Free Software Foundation; either version 2 of the
17   License, or (at your option) any later version.
18
19   This program is distributed in the hope that it will be useful, but
20   WITHOUT ANY WARRANTY; without even the implied warranty of
21   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22   General Public License for more details.
23
24   You should have received a copy of the GNU General Public License
25   along with this program; if not, write to the Free Software
26   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27   02111-1307, USA.
28
29   The GNU General Public License is contained in the file COPYING.
30*/
31
32#include "pub_core_basics.h"
33#include "pub_core_debuglog.h"
34#include "pub_core_machine.h"    // For VG_(machine_get_VexArchInfo)
35#include "pub_core_libcbase.h"
36#include "pub_core_vki.h"        // to keep pub_core_libproc.h happy, sigh
37#include "pub_core_libcproc.h"   // VG_(invalidate_icache)
38#include "pub_core_libcassert.h"
39#include "pub_core_libcprint.h"
40#include "pub_core_options.h"
41#include "pub_core_tooliface.h"  // For VG_(details).avg_translation_sizeB
42#include "pub_core_transtab.h"
43#include "pub_core_aspacemgr.h"
44#include "pub_core_mallocfree.h" // VG_(out_of_memory_NORETURN)
45#include "pub_core_xarray.h"
46#include "pub_core_dispatch.h"   // For VG_(disp_cp*) addresses
47
48
49#define DEBUG_TRANSTAB 0
50
51
52/*-------------------------------------------------------------*/
53/*--- Management of the FIFO-based translation table+cache. ---*/
54/*-------------------------------------------------------------*/
55
56/* Nr of sectors provided via command line parameter. */
57UInt VG_(clo_num_transtab_sectors) = N_SECTORS_DEFAULT;
58/* Nr of sectors.
59   Will be set by VG_(init_tt_tc) to VG_(clo_num_transtab_sectors). */
60static SECno n_sectors = 0;
61
62/* Average size of a transtab code entry. 0 means to use the tool
63   provided default. */
64UInt VG_(clo_avg_transtab_entry_size) = 0;
65
66/*------------------ CONSTANTS ------------------*/
67/* Number of entries in hash table of each sector.  This needs to be a prime
68   number to work properly, it must be <= 65535 (so that a TTE index
69   fits in a UShort, leaving room for 0xFFFF(EC2TTE_DELETED, HTT_DELETED)
70   to denote 'deleted') and  0xFFFE (HTT_EMPTY) to denote 'Empty' in the
71   hash table.
72   It is strongly recommended not to change this.
73   65521 is the largest prime <= 65535. */
74#define N_HTTES_PER_SECTOR /*10007*/ /*30011*/ /*40009*/ 65521
75
76#define EC2TTE_DELETED  0xFFFF /* 16-bit special value */
77#define HTT_DELETED     EC2TTE_DELETED
78#define HTT_EMPTY       0XFFFE
79
80// HTTno is the Sector->htt hash table index. Must be the same type as TTEno.
81typedef UShort HTTno;
82
83/* Because each sector contains a hash table of TTEntries, we need to
84   specify the maximum allowable loading, after which the sector is
85   deemed full. */
86#define SECTOR_TT_LIMIT_PERCENT 65
87
88/* The sector is deemed full when this many entries are in it. */
89#define N_TTES_PER_SECTOR \
90           ((N_HTTES_PER_SECTOR * SECTOR_TT_LIMIT_PERCENT) / 100)
91
92/* Equivalence classes for fast address range deletion.  There are 1 +
93   2^ECLASS_WIDTH bins.  The highest one, ECLASS_MISC, describes an
94   address range which does not fall cleanly within any specific bin.
95   Note that ECLASS_SHIFT + ECLASS_WIDTH must be < 32.
96   ECLASS_N must fit in a EclassNo. */
97#define ECLASS_SHIFT 11
98#define ECLASS_WIDTH 8
99#define ECLASS_MISC  (1 << ECLASS_WIDTH)
100#define ECLASS_N     (1 + ECLASS_MISC)
101STATIC_ASSERT(ECLASS_SHIFT + ECLASS_WIDTH < 32);
102
103typedef UShort EClassNo;
104
105/*------------------ TYPES ------------------*/
106
107/* In edges ("to-me") in the graph created by chaining. */
108typedef
109   struct {
110      SECno from_sNo;   /* sector number */
111      TTEno from_tteNo; /* TTE number in given sector */
112      UInt  from_offs: (sizeof(UInt)*8)-1;  /* code offset from TCEntry::tcptr
113                                               where the patch is */
114      Bool  to_fastEP:1; /* Is the patch to a fast or slow entry point? */
115   }
116   InEdge;
117
118
119/* Out edges ("from-me") in the graph created by chaining. */
120typedef
121   struct {
122      SECno to_sNo;    /* sector number */
123      TTEno to_tteNo;  /* TTE number in given sector */
124      UInt  from_offs; /* code offset in owning translation where patch is */
125   }
126   OutEdge;
127
128
129#define N_FIXED_IN_EDGE_ARR 3
130typedef
131   struct {
132      Bool     has_var:1; /* True if var is used (then n_fixed must be 0) */
133      UInt     n_fixed: (sizeof(UInt)*8)-1; /* 0 .. N_FIXED_IN_EDGE_ARR */
134      union {
135         InEdge   fixed[N_FIXED_IN_EDGE_ARR];      /* if !has_var */
136         XArray*  var; /* XArray* of InEdgeArr */  /* if  has_var */
137      } edges;
138   }
139   InEdgeArr;
140
141#define N_FIXED_OUT_EDGE_ARR 2
142typedef
143   struct {
144      Bool    has_var:1; /* True if var is used (then n_fixed must be 0) */
145      UInt    n_fixed: (sizeof(UInt)*8)-1; /* 0 .. N_FIXED_OUT_EDGE_ARR */
146      union {
147         OutEdge fixed[N_FIXED_OUT_EDGE_ARR];      /* if !has_var */
148         XArray* var; /* XArray* of OutEdgeArr */  /* if  has_var */
149      } edges;
150   }
151   OutEdgeArr;
152
153
154/* A translation-table entry.  This indicates precisely which areas of
155   guest code are included in the translation, and contains all other
156   auxiliary info too.  */
157typedef
158   struct {
159      union {
160         struct {
161            /* Profiling only: the count and weight (arbitrary meaning) for
162               this translation.  Weight is a property of the translation
163               itself and computed once when the translation is created.
164               Count is an entry count for the translation and is
165               incremented by 1 every time the translation is used, if we
166               are profiling. */
167            ULong    count;
168            UShort   weight;
169         } prof; // if status == InUse
170         TTEno next_empty_tte; // if status != InUse
171      } usage;
172
173      /* Status of the slot.  Note, we need to be able to do lazy
174         deletion, hence the Deleted state. */
175      enum { InUse, Deleted, Empty } status;
176
177      /* 64-bit aligned pointer to one or more 64-bit words containing
178         the corresponding host code (must be in the same sector!)
179         This is a pointer into the sector's tc (code) area. */
180      ULong* tcptr;
181
182      /* This is the original guest address that purportedly is the
183         entry point of the translation.  You might think that .entry
184         should be the same as .vge->base[0], and most of the time it
185         is.  However, when doing redirections, that is not the case.
186         .vge must always correctly describe the guest code sections
187         from which this translation was made.  However, .entry may or
188         may not be a lie, depending on whether or not we're doing
189         redirection. */
190      Addr entry;
191
192      /* This structure describes precisely what ranges of guest code
193         the translation covers, so we can decide whether or not to
194         delete it when translations of a given address range are
195         invalidated. */
196      VexGuestExtents vge;
197
198      /* Address range summary info: these are pointers back to
199         eclass[] entries in the containing Sector.  Those entries in
200         turn point back here -- the two structures are mutually
201         redundant but both necessary to make fast deletions work.
202         The eclass info is similar to, and derived from, this entry's
203         'vge' field, but it is not the same */
204      UShort n_tte2ec;      // # tte2ec pointers (1 to 3)
205      EClassNo tte2ec_ec[3];  // for each, the eclass #
206      UInt     tte2ec_ix[3];  // and the index within the eclass.
207      // for i in 0 .. n_tte2ec-1
208      //    sec->ec2tte[ tte2ec_ec[i] ][ tte2ec_ix[i] ]
209      // should be the index
210      // of this TTEntry in the containing Sector's tt array.
211
212      /* Admin information for chaining.  'in_edges' is a set of the
213         patch points which jump to this translation -- hence are
214         predecessors in the control flow graph.  'out_edges' points
215         to successors in the control flow graph -- translations to
216         which this one has a patched jump.  In short these are just
217         backwards and forwards edges in the graph of patched-together
218         blocks.  The 'in_edges' contain slightly more info, enough
219         that we can undo the chaining of each mentioned patch point.
220         The 'out_edges' list exists only so that we can visit the
221         'in_edges' entries of all blocks we're patched through to, in
222         order to remove ourselves from then when we're deleted. */
223
224      /* A translation can disappear for two reasons:
225          1. erased (as part of the oldest sector cleanup) when the
226             youngest sector is full.
227          2. discarded due to calls to VG_(discard_translations).
228             VG_(discard_translations) sets the status of the
229             translation to 'Deleted'.
230             A.o., the gdbserver discards one or more translations
231             when a breakpoint is inserted or removed at an Addr,
232             or when single stepping mode is enabled/disabled
233             or when a translation is instrumented for gdbserver
234             (all the target jumps of this translation are
235              invalidated).
236
237         So, it is possible that the translation A to be patched
238         (to obtain a patched jump from A to B) is invalidated
239         after B is translated and before A is patched.
240         In case a translation is erased or discarded, the patching
241         cannot be done.  VG_(tt_tc_do_chaining) and find_TTEntry_from_hcode
242         are checking the 'from' translation still exists before
243         doing the patching.
244
245         Is it safe to erase or discard the current translation E being
246         executed ? Amazing, but yes, it is safe.
247         Here is the explanation:
248
249         The translation E being executed can only be erased if a new
250         translation N is being done. A new translation is done only
251         if the host addr is a not yet patched jump to another
252         translation. In such a case, the guest address of N is
253         assigned to the PC in the VEX state. Control is returned
254         to the scheduler. N will be translated. This can erase the
255         translation E (in case of sector full). VG_(tt_tc_do_chaining)
256         will not do the chaining to a non found translation E.
257         The execution will continue at the current guest PC
258         (i.e. the translation N).
259         => it is safe to erase the current translation being executed.
260
261         The current translation E being executed can also be discarded
262         (e.g. by gdbserver). VG_(discard_translations) will mark
263         this translation E as Deleted, but the translation itself
264         is not erased. In particular, its host code can only
265         be overwritten or erased in case a new translation is done.
266         A new translation will only be done if a not yet translated
267         jump is to be executed. The execution of the Deleted translation
268         E will continue till a non patched jump is encountered.
269         This situation is then similar to the 'erasing' case above :
270         the current translation E can be erased or overwritten, as the
271         execution will continue at the new translation N.
272
273      */
274
275      /* It is possible, although very unlikely, that a block A has
276         more than one patched jump to block B.  This could happen if
277         (eg) A finishes "jcond B; jmp B".
278
279         This means in turn that B's in_edges set can list A more than
280         once (twice in this example).  However, each such entry must
281         have a different from_offs, since a patched jump can only
282         jump to one place at once (it's meaningless for it to have
283         multiple destinations.)  IOW, the successor and predecessor
284         edges in the graph are not uniquely determined by a
285         TTEntry --> TTEntry pair, but rather by a
286         (TTEntry,offset) --> TTEntry triple.
287
288         If A has multiple edges to B then B will mention A multiple
289         times in its in_edges.  To make things simpler, we then
290         require that A mentions B exactly the same number of times in
291         its out_edges.  Furthermore, a matching out-in pair must have
292         the same offset (from_offs).  This facilitates sanity
293         checking, and it facilitates establishing the invariant that
294         a out_edges set may not have duplicates when using the
295         equality defined by (TTEntry,offset).  Hence the out_edges
296         and in_edges sets really do have both have set semantics.
297
298         eg if  A has been patched to B at offsets 42 and 87 (in A)
299         then   A.out_edges = { (B,42), (B,87) }   (in any order)
300         and    B.in_edges  = { (A,42), (A,87) }   (in any order)
301
302         Hence for each node pair P->Q in the graph, there's a 1:1
303         mapping between P.out_edges and Q.in_edges.
304      */
305      InEdgeArr  in_edges;
306      OutEdgeArr out_edges;
307   }
308   TTEntry;
309
310
311/* A structure used for mapping host code addresses back to the
312   relevant TTEntry.  Used when doing chaining, for finding the
313   TTEntry to which some arbitrary patch address belongs. */
314typedef
315   struct {
316      UChar* start;
317      UInt   len;
318      TTEno  tteNo;
319   }
320   HostExtent;
321
322/* Finally, a sector itself.  Each sector contains an array of
323   TCEntries, which hold code, and an array of TTEntries, containing
324   all required administrative info.  Profiling is supported using the
325   TTEntry usage.prof.count and usage.prof.weight fields, if required.
326
327   If the sector is not in use, all three pointers are NULL and
328   tt_n_inuse is zero.
329*/
330typedef
331   struct {
332      /* The TCEntry area.  Size of this depends on the average
333         translation size.  We try and size it so it becomes full
334         precisely when this sector's translation table (tt) reaches
335         its load limit (SECTOR_TT_LIMIT_PERCENT). */
336      ULong* tc;
337
338      /* An hash table, mapping guest address to an index in the tt array.
339         htt is a fixed size, always containing
340         exactly N_HTTES_PER_SECTOR entries. */
341      TTEno* htt;
342
343      /* The TTEntry array.  This is a fixed size, always containing
344         exactly N_TTES_PER_SECTOR entries. */
345      TTEntry* tt;
346
347      /* This points to the current allocation point in tc. */
348      ULong* tc_next;
349
350      /* The count of tt entries with state InUse. */
351      Int tt_n_inuse;
352
353      /* A list of Empty/Deleted entries, chained by tte->next_empty_tte */
354      TTEno empty_tt_list;
355
356      /* Expandable arrays of tt indices for each of the ECLASS_N
357         address range equivalence classes.  These hold indices into
358         the containing sector's tt array, which in turn should point
359         back here. */
360      Int     ec2tte_size[ECLASS_N];
361      Int     ec2tte_used[ECLASS_N];
362      TTEno*  ec2tte[ECLASS_N];
363
364      /* The host extents.  The [start, +len) ranges are constructed
365         in strictly non-overlapping order, so we can binary search
366         them at any time. */
367      XArray* host_extents; /* XArray* of HostExtent */
368   }
369   Sector;
370
371
372/*------------------ DECLS ------------------*/
373
374/* The root data structure is an array of sectors.  The index of the
375   youngest sector is recorded, and new translations are put into that
376   sector.  When it fills up, we move along to the next sector and
377   start to fill that up, wrapping around at the end of the array.
378   That way, once all N_TC_SECTORS have been bought into use for the
379   first time, and are full, we then re-use the oldest sector,
380   endlessly.
381
382   When running, youngest sector should be between >= 0 and <
383   N_TC_SECTORS.  The initial  value indicates the TT/TC system is
384   not yet initialised.
385*/
386static Sector sectors[MAX_N_SECTORS];
387static Int    youngest_sector = INV_SNO;
388
389/* The number of ULongs in each TCEntry area.  This is computed once
390   at startup and does not change. */
391static Int    tc_sector_szQ = 0;
392
393
394/* A list of sector numbers, in the order which they should be
395   searched to find translations.  This is an optimisation to be used
396   when searching for translations and should not affect
397   correctness.  INV_SNO denotes "no entry". */
398static SECno sector_search_order[MAX_N_SECTORS];
399
400
401/* Fast helper for the TC.  A direct-mapped cache which holds a set of
402   recently used (guest address, host address) pairs.  This array is
403   referred to directly from m_dispatch/dispatch-<platform>.S.
404
405   Entries in tt_fast may refer to any valid TC entry, regardless of
406   which sector it's in.  Consequently we must be very careful to
407   invalidate this cache when TC entries are changed or disappear.
408
409   A special .guest address - TRANSTAB_BOGUS_GUEST_ADDR -- must be
410   pointed at to cause that cache entry to miss.  This relies on the
411   assumption that no guest code actually has that address, hence a
412   value 0x1 seems good.  m_translate gives the client a synthetic
413   segfault if it tries to execute at this address.
414*/
415/*
416typedef
417   struct {
418      Addr guest;
419      Addr host;
420   }
421   FastCacheEntry;
422*/
423/*global*/ __attribute__((aligned(16)))
424           FastCacheEntry VG_(tt_fast)[VG_TT_FAST_SIZE];
425
426/* Make sure we're not used before initialisation. */
427static Bool init_done = False;
428
429
430/*------------------ STATS DECLS ------------------*/
431
432/* Number of fast-cache updates and flushes done. */
433static ULong n_fast_flushes = 0;
434static ULong n_fast_updates = 0;
435
436/* Number of full lookups done. */
437static ULong n_full_lookups = 0;
438static ULong n_lookup_probes = 0;
439
440/* Number/osize/tsize of translations entered; also the number of
441   those for which self-checking was requested. */
442static ULong n_in_count    = 0;
443static ULong n_in_osize    = 0;
444static ULong n_in_tsize    = 0;
445static ULong n_in_sc_count = 0;
446
447/* Number/osize of translations discarded due to lack of space. */
448static ULong n_dump_count = 0;
449static ULong n_dump_osize = 0;
450static ULong n_sectors_recycled = 0;
451
452/* Number/osize of translations discarded due to requests to do so. */
453static ULong n_disc_count = 0;
454static ULong n_disc_osize = 0;
455
456
457/*-------------------------------------------------------------*/
458/*--- Misc                                                  ---*/
459/*-------------------------------------------------------------*/
460
461static void* ttaux_malloc ( const HChar* tag, SizeT n )
462{
463   return VG_(arena_malloc)(VG_AR_TTAUX, tag, n);
464}
465
466static void ttaux_free ( void* p )
467{
468   VG_(arena_free)(VG_AR_TTAUX, p);
469}
470
471
472/*-------------------------------------------------------------*/
473/*--- Chaining support                                      ---*/
474/*-------------------------------------------------------------*/
475
476static inline TTEntry* index_tte ( SECno sNo, TTEno tteNo )
477{
478   vg_assert(sNo < n_sectors);
479   vg_assert(tteNo < N_TTES_PER_SECTOR);
480   Sector* s = &sectors[sNo];
481   vg_assert(s->tt);
482   TTEntry* tte = &s->tt[tteNo];
483   vg_assert(tte->status == InUse);
484   return tte;
485}
486
487static void InEdge__init ( InEdge* ie )
488{
489   ie->from_sNo   = INV_SNO; /* invalid */
490   ie->from_tteNo = 0;
491   ie->from_offs  = 0;
492   ie->to_fastEP  = False;
493}
494
495static void OutEdge__init ( OutEdge* oe )
496{
497   oe->to_sNo    = INV_SNO; /* invalid */
498   oe->to_tteNo  = 0;
499   oe->from_offs = 0;
500}
501
502static void TTEntry__init ( TTEntry* tte )
503{
504   VG_(memset)(tte, 0, sizeof(*tte));
505}
506
507static UWord InEdgeArr__size ( const InEdgeArr* iea )
508{
509   if (iea->has_var) {
510      vg_assert(iea->n_fixed == 0);
511      return VG_(sizeXA)(iea->edges.var);
512   } else {
513      vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
514      return iea->n_fixed;
515   }
516}
517
518static void InEdgeArr__makeEmpty ( InEdgeArr* iea )
519{
520   if (iea->has_var) {
521      vg_assert(iea->n_fixed == 0);
522      VG_(deleteXA)(iea->edges.var);
523      iea->edges.var = NULL;
524      iea->has_var = False;
525   } else {
526      vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
527      iea->n_fixed = 0;
528   }
529}
530
531static
532InEdge* InEdgeArr__index ( InEdgeArr* iea, UWord i )
533{
534   if (iea->has_var) {
535      vg_assert(iea->n_fixed == 0);
536      return (InEdge*)VG_(indexXA)(iea->edges.var, i);
537   } else {
538      vg_assert(i < iea->n_fixed);
539      return &iea->edges.fixed[i];
540   }
541}
542
543static
544void InEdgeArr__deleteIndex ( InEdgeArr* iea, UWord i )
545{
546   if (iea->has_var) {
547      vg_assert(iea->n_fixed == 0);
548      VG_(removeIndexXA)(iea->edges.var, i);
549   } else {
550      vg_assert(i < iea->n_fixed);
551      for (; i+1 < iea->n_fixed; i++) {
552         iea->edges.fixed[i] = iea->edges.fixed[i+1];
553      }
554      iea->n_fixed--;
555   }
556}
557
558static
559void InEdgeArr__add ( InEdgeArr* iea, InEdge* ie )
560{
561   if (iea->has_var) {
562      vg_assert(iea->n_fixed == 0);
563      VG_(addToXA)(iea->edges.var, ie);
564   } else {
565      vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
566      if (iea->n_fixed == N_FIXED_IN_EDGE_ARR) {
567         /* The fixed array is full, so we have to initialise an
568            XArray and copy the fixed array into it. */
569         XArray *var = VG_(newXA)(ttaux_malloc, "transtab.IEA__add",
570                                  ttaux_free,
571                                  sizeof(InEdge));
572         VG_(hintSizeXA) (var, iea->n_fixed + 1);
573         UWord i;
574         for (i = 0; i < iea->n_fixed; i++) {
575            VG_(addToXA)(var, &iea->edges.fixed[i]);
576         }
577         VG_(addToXA)(var, ie);
578         iea->n_fixed = 0;
579         iea->has_var = True;
580         iea->edges.var = var;
581      } else {
582         /* Just add to the fixed array. */
583         iea->edges.fixed[iea->n_fixed++] = *ie;
584      }
585   }
586}
587
588static UWord OutEdgeArr__size ( const OutEdgeArr* oea )
589{
590   if (oea->has_var) {
591      vg_assert(oea->n_fixed == 0);
592      return VG_(sizeXA)(oea->edges.var);
593   } else {
594      vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
595      return oea->n_fixed;
596   }
597}
598
599static void OutEdgeArr__makeEmpty ( OutEdgeArr* oea )
600{
601   if (oea->has_var) {
602      vg_assert(oea->n_fixed == 0);
603      VG_(deleteXA)(oea->edges.var);
604      oea->edges.var = NULL;
605      oea->has_var = False;
606   } else {
607      vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
608      oea->n_fixed = 0;
609   }
610}
611
612static
613OutEdge* OutEdgeArr__index ( OutEdgeArr* oea, UWord i )
614{
615   if (oea->has_var) {
616      vg_assert(oea->n_fixed == 0);
617      return (OutEdge*)VG_(indexXA)(oea->edges.var, i);
618   } else {
619      vg_assert(i < oea->n_fixed);
620      return &oea->edges.fixed[i];
621   }
622}
623
624static
625void OutEdgeArr__deleteIndex ( OutEdgeArr* oea, UWord i )
626{
627   if (oea->has_var) {
628      vg_assert(oea->n_fixed == 0);
629      VG_(removeIndexXA)(oea->edges.var, i);
630   } else {
631      vg_assert(i < oea->n_fixed);
632      for (; i+1 < oea->n_fixed; i++) {
633         oea->edges.fixed[i] = oea->edges.fixed[i+1];
634      }
635      oea->n_fixed--;
636   }
637}
638
639static
640void OutEdgeArr__add ( OutEdgeArr* oea, OutEdge* oe )
641{
642   if (oea->has_var) {
643      vg_assert(oea->n_fixed == 0);
644      VG_(addToXA)(oea->edges.var, oe);
645   } else {
646      vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
647      if (oea->n_fixed == N_FIXED_OUT_EDGE_ARR) {
648         /* The fixed array is full, so we have to initialise an
649            XArray and copy the fixed array into it. */
650         XArray *var = VG_(newXA)(ttaux_malloc, "transtab.OEA__add",
651                                  ttaux_free,
652                                  sizeof(OutEdge));
653         VG_(hintSizeXA) (var, oea->n_fixed+1);
654         UWord i;
655         for (i = 0; i < oea->n_fixed; i++) {
656            VG_(addToXA)(var, &oea->edges.fixed[i]);
657         }
658         VG_(addToXA)(var, oe);
659         oea->n_fixed = 0;
660         oea->has_var = True;
661         oea->edges.var = var;
662      } else {
663         /* Just add to the fixed array. */
664         oea->edges.fixed[oea->n_fixed++] = *oe;
665      }
666   }
667}
668
669static
670Int HostExtent__cmpOrd ( const void* v1, const void* v2 )
671{
672   const HostExtent* hx1 = v1;
673   const HostExtent* hx2 = v2;
674   if (hx1->start + hx1->len <= hx2->start) return -1;
675   if (hx2->start + hx2->len <= hx1->start) return 1;
676   return 0; /* partial overlap */
677}
678
679/* True if hx is a dead host extent, i.e. corresponds to host code
680   of an entry that was invalidated. */
681static
682Bool HostExtent__is_dead (const HostExtent* hx, const Sector* sec)
683{
684   const TTEno tteNo = hx->tteNo;
685#define LDEBUG(m) if (DEBUG_TRANSTAB)                           \
686      VG_(printf) (m                                            \
687                   " start 0x%p len %u sector %d ttslot %u"     \
688                   " tt.entry 0x%lu tt.tcptr 0x%p\n",           \
689                   hx->start, hx->len, (int)(sec - sectors),    \
690                   hx->tteNo,                                   \
691                   sec->tt[tteNo].entry, sec->tt[tteNo].tcptr)
692
693   /* Entry might have been invalidated and not re-used yet.*/
694   if (sec->tt[tteNo].status == Deleted) {
695      LDEBUG("found deleted entry");
696      return True;
697   }
698   /* Maybe we found this entry via a host_extents which was
699      inserted for an entry which was changed to Deleted then
700      re-used after. If this entry was re-used, then its tcptr
701      is >= to host_extents start (i.e. the previous tcptr) + len.
702      This is the case as there is no re-use of host code: a new
703      entry or re-used entry always gets "higher value" host code. */
704   if ((UChar*) sec->tt[tteNo].tcptr >= hx->start + hx->len) {
705      LDEBUG("found re-used entry");
706      return True;
707   }
708
709   return False;
710#undef LDEBUG
711}
712
713static __attribute__((noinline))
714Bool find_TTEntry_from_hcode( /*OUT*/SECno* from_sNo,
715                              /*OUT*/TTEno* from_tteNo,
716                              void* hcode )
717{
718   SECno i;
719
720   /* Search order logic copied from VG_(search_transtab). */
721   for (i = 0; i < n_sectors; i++) {
722      SECno sno = sector_search_order[i];
723      if (UNLIKELY(sno == INV_SNO))
724         return False; /* run out of sectors to search */
725
726      const Sector* sec = &sectors[sno];
727      const XArray* /* of HostExtent */ host_extents = sec->host_extents;
728      vg_assert(host_extents);
729
730      HostExtent key;
731      VG_(memset)(&key, 0, sizeof(key));
732      key.start = hcode;
733      key.len = 1;
734      Word firstW = -1, lastW = -1;
735      Bool found  = VG_(lookupXA_UNSAFE)(
736                       host_extents, &key, &firstW, &lastW,
737                       HostExtent__cmpOrd );
738      vg_assert(firstW == lastW); // always true, even if not found
739      if (found) {
740         HostExtent* hx = VG_(indexXA)(host_extents, firstW);
741         TTEno tteNo = hx->tteNo;
742         /* Do some additional sanity checks. */
743         vg_assert(tteNo < N_TTES_PER_SECTOR);
744
745         /* if this hx entry corresponds to dead host code, we must
746            tell this code has not been found, as it cannot be patched. */
747         if (HostExtent__is_dead (hx, sec))
748            return False;
749
750         vg_assert(sec->tt[tteNo].status == InUse);
751         /* Can only half check that the found TTEntry contains hcode,
752            due to not having a length value for the hcode in the
753            TTEntry. */
754         vg_assert((UChar*)sec->tt[tteNo].tcptr <= (UChar*)hcode);
755         /* Looks plausible */
756         *from_sNo   = sno;
757         *from_tteNo = tteNo;
758         return True;
759      }
760   }
761   return False;
762}
763
764
765/* Figure out whether or not hcode is jitted code present in the main
766   code cache (but not in the no-redir cache).  Used for sanity
767   checking. */
768static Bool is_in_the_main_TC ( const void* hcode )
769{
770   SECno i, sno;
771   for (i = 0; i < n_sectors; i++) {
772      sno = sector_search_order[i];
773      if (sno == INV_SNO)
774         break; /* run out of sectors to search */
775      if ((const UChar*)hcode >= (const UChar*)sectors[sno].tc
776          && (const UChar*)hcode <= (const UChar*)sectors[sno].tc_next
777                              + sizeof(ULong) - 1)
778         return True;
779   }
780   return False;
781}
782
783
784/* Fulfill a chaining request, and record admin info so we
785   can undo it later, if required.
786*/
787void VG_(tt_tc_do_chaining) ( void* from__patch_addr,
788                              SECno to_sNo,
789                              TTEno to_tteNo,
790                              Bool  to_fastEP )
791{
792   /* Get the CPU info established at startup. */
793   VexArch     arch_host = VexArch_INVALID;
794   VexArchInfo archinfo_host;
795   VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host));
796   VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host );
797   VexEndness endness_host = archinfo_host.endness;
798
799   // host_code is where we're patching to.  So it needs to
800   // take into account, whether we're jumping to the slow
801   // or fast entry point.  By definition, the fast entry point
802   // is exactly one event check's worth of code along from
803   // the slow (tcptr) entry point.
804   TTEntry* to_tte    = index_tte(to_sNo, to_tteNo);
805   void*    host_code = ((UChar*)to_tte->tcptr)
806                        + (to_fastEP ? LibVEX_evCheckSzB(arch_host) : 0);
807
808   // stay sane -- the patch point (dst) is in this sector's code cache
809   vg_assert( (UChar*)host_code >= (UChar*)sectors[to_sNo].tc );
810   vg_assert( (UChar*)host_code <= (UChar*)sectors[to_sNo].tc_next
811                                   + sizeof(ULong) - 1 );
812
813   /* Find the TTEntry for the from__ code.  This isn't simple since
814      we only know the patch address, which is going to be somewhere
815      inside the from_ block. */
816   SECno from_sNo   = INV_SNO;
817   TTEno from_tteNo = INV_TTE;
818   Bool from_found
819      = find_TTEntry_from_hcode( &from_sNo, &from_tteNo,
820                                 from__patch_addr );
821   if (!from_found) {
822      // The from code might have been discarded due to sector re-use
823      // or marked Deleted due to translation invalidation.
824      // In such a case, don't do the chaining.
825      VG_(debugLog)(1,"transtab",
826                    "host code %p not found (discarded? sector recycled?)"
827                    " => no chaining done\n",
828                    from__patch_addr);
829      return;
830   }
831
832   TTEntry* from_tte = index_tte(from_sNo, from_tteNo);
833
834   /* Get VEX to do the patching itself.  We have to hand it off
835      since it is host-dependent. */
836   VexInvalRange vir
837      = LibVEX_Chain(
838           arch_host, endness_host,
839           from__patch_addr,
840           VG_(fnptr_to_fnentry)(
841              to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP)
842                        : &VG_(disp_cp_chain_me_to_slowEP)),
843           (void*)host_code
844        );
845   VG_(invalidate_icache)( (void*)vir.start, vir.len );
846
847   /* Now do the tricky bit -- update the ch_succs and ch_preds info
848      for the two translations involved, so we can undo the chaining
849      later, which we will have to do if the to_ block gets removed
850      for whatever reason. */
851
852   /* This is the new from_ -> to_ link to add. */
853   InEdge ie;
854   InEdge__init(&ie);
855   ie.from_sNo   = from_sNo;
856   ie.from_tteNo = from_tteNo;
857   ie.to_fastEP  = to_fastEP;
858   HWord from_offs = (HWord)( (UChar*)from__patch_addr
859                              - (UChar*)from_tte->tcptr );
860   vg_assert(from_offs < 100000/* let's say */);
861   ie.from_offs  = (UInt)from_offs;
862
863   /* This is the new to_ -> from_ backlink to add. */
864   OutEdge oe;
865   OutEdge__init(&oe);
866   oe.to_sNo    = to_sNo;
867   oe.to_tteNo  = to_tteNo;
868   oe.from_offs = (UInt)from_offs;
869
870   /* Add .. */
871   InEdgeArr__add(&to_tte->in_edges, &ie);
872   OutEdgeArr__add(&from_tte->out_edges, &oe);
873}
874
875
876/* Unchain one patch, as described by the specified InEdge.  For
877   sanity check purposes only (to check that the patched location is
878   as expected) it also requires the fast and slow entry point
879   addresses of the destination block (that is, the block that owns
880   this InEdge). */
881__attribute__((noinline))
882static void unchain_one ( VexArch arch_host, VexEndness endness_host,
883                          InEdge* ie,
884                          void* to_fastEPaddr, void* to_slowEPaddr )
885{
886   vg_assert(ie);
887   TTEntry* tte
888      = index_tte(ie->from_sNo, ie->from_tteNo);
889   UChar* place_to_patch
890      = ((UChar*)tte->tcptr) + ie->from_offs;
891   UChar* disp_cp_chain_me
892      = VG_(fnptr_to_fnentry)(
893           ie->to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP)
894                         : &VG_(disp_cp_chain_me_to_slowEP)
895        );
896   UChar* place_to_jump_to_EXPECTED
897      = ie->to_fastEP ? to_fastEPaddr : to_slowEPaddr;
898
899   // stay sane: both src and dst for this unchaining are
900   // in the main code cache
901   vg_assert( is_in_the_main_TC(place_to_patch) ); // src
902   vg_assert( is_in_the_main_TC(place_to_jump_to_EXPECTED) ); // dst
903   // dst check is ok because LibVEX_UnChain checks that
904   // place_to_jump_to_EXPECTED really is the current dst, and
905   // asserts if it isn't.
906   VexInvalRange vir
907       = LibVEX_UnChain( arch_host, endness_host, place_to_patch,
908                         place_to_jump_to_EXPECTED, disp_cp_chain_me );
909   VG_(invalidate_icache)( (void*)vir.start, vir.len );
910}
911
912
913/* The specified block is about to be deleted.  Update the preds and
914   succs of its associated blocks accordingly.  This includes undoing
915   any chained jumps to this block. */
916static
917void unchain_in_preparation_for_deletion ( VexArch arch_host,
918                                           VexEndness endness_host,
919                                           SECno here_sNo, TTEno here_tteNo )
920{
921   if (DEBUG_TRANSTAB)
922      VG_(printf)("QQQ unchain_in_prep %u.%u...\n", here_sNo, here_tteNo);
923   UWord    i, j, n, m;
924   Int      evCheckSzB = LibVEX_evCheckSzB(arch_host);
925   TTEntry* here_tte   = index_tte(here_sNo, here_tteNo);
926   if (DEBUG_TRANSTAB)
927      VG_(printf)("... QQQ tt.entry 0x%lu tt.tcptr 0x%p\n",
928                  here_tte->entry, here_tte->tcptr);
929   vg_assert(here_tte->status == InUse);
930
931   /* Visit all InEdges owned by here_tte. */
932   n = InEdgeArr__size(&here_tte->in_edges);
933   for (i = 0; i < n; i++) {
934      InEdge* ie = InEdgeArr__index(&here_tte->in_edges, i);
935      // Undo the chaining.
936      UChar* here_slow_EP = (UChar*)here_tte->tcptr;
937      UChar* here_fast_EP = here_slow_EP + evCheckSzB;
938      unchain_one(arch_host, endness_host, ie, here_fast_EP, here_slow_EP);
939      // Find the corresponding entry in the "from" node's out_edges,
940      // and remove it.
941      TTEntry* from_tte = index_tte(ie->from_sNo, ie->from_tteNo);
942      m = OutEdgeArr__size(&from_tte->out_edges);
943      vg_assert(m > 0); // it must have at least one entry
944      for (j = 0; j < m; j++) {
945         OutEdge* oe = OutEdgeArr__index(&from_tte->out_edges, j);
946         if (oe->to_sNo == here_sNo && oe->to_tteNo == here_tteNo
947             && oe->from_offs == ie->from_offs)
948           break;
949      }
950      vg_assert(j < m); // "oe must be findable"
951      OutEdgeArr__deleteIndex(&from_tte->out_edges, j);
952   }
953
954   /* Visit all OutEdges owned by here_tte. */
955   n = OutEdgeArr__size(&here_tte->out_edges);
956   for (i = 0; i < n; i++) {
957      OutEdge* oe = OutEdgeArr__index(&here_tte->out_edges, i);
958      // Find the corresponding entry in the "to" node's in_edges,
959      // and remove it.
960      TTEntry* to_tte = index_tte(oe->to_sNo, oe->to_tteNo);
961      m = InEdgeArr__size(&to_tte->in_edges);
962      vg_assert(m > 0); // it must have at least one entry
963      for (j = 0; j < m; j++) {
964         InEdge* ie = InEdgeArr__index(&to_tte->in_edges, j);
965         if (ie->from_sNo == here_sNo && ie->from_tteNo == here_tteNo
966             && ie->from_offs == oe->from_offs)
967           break;
968      }
969      vg_assert(j < m); // "ie must be findable"
970      InEdgeArr__deleteIndex(&to_tte->in_edges, j);
971   }
972
973   InEdgeArr__makeEmpty(&here_tte->in_edges);
974   OutEdgeArr__makeEmpty(&here_tte->out_edges);
975}
976
977
978/*-------------------------------------------------------------*/
979/*--- Address-range equivalence class stuff                 ---*/
980/*-------------------------------------------------------------*/
981
982/* Return equivalence class number for a range. */
983
984static EClassNo range_to_eclass ( Addr start, UInt len )
985{
986   UInt mask   = (1 << ECLASS_WIDTH) - 1;
987   UInt lo     = (UInt)start;
988   UInt hi     = lo + len - 1;
989   UInt loBits = (lo >> ECLASS_SHIFT) & mask;
990   UInt hiBits = (hi >> ECLASS_SHIFT) & mask;
991   if (loBits == hiBits) {
992      vg_assert(loBits < ECLASS_N-1);
993      return loBits;
994   } else {
995      return ECLASS_MISC;
996   }
997}
998
999
1000/* Calculates the equivalence class numbers for any VexGuestExtent.
1001   These are written in *eclasses, which must be big enough to hold 3
1002   Ints.  The number written, between 1 and 3, is returned.  The
1003   eclasses are presented in order, and any duplicates are removed.
1004*/
1005
1006static
1007Int vexGuestExtents_to_eclasses ( /*OUT*/EClassNo* eclasses,
1008                                  const VexGuestExtents* vge )
1009{
1010
1011#  define SWAP(_lv1,_lv2) \
1012      do { Int t = _lv1; _lv1 = _lv2; _lv2 = t; } while (0)
1013
1014   Int i, j, n_ec;
1015   EClassNo r;
1016
1017   vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
1018
1019   n_ec = 0;
1020   for (i = 0; i < vge->n_used; i++) {
1021      r = range_to_eclass( vge->base[i], vge->len[i] );
1022      if (r == ECLASS_MISC)
1023         goto bad;
1024      /* only add if we haven't already seen it */
1025      for (j = 0; j < n_ec; j++)
1026         if (eclasses[j] == r)
1027            break;
1028      if (j == n_ec)
1029         eclasses[n_ec++] = r;
1030   }
1031
1032   if (n_ec == 1)
1033      return 1;
1034
1035   if (n_ec == 2) {
1036      /* sort */
1037      if (eclasses[0] > eclasses[1])
1038         SWAP(eclasses[0], eclasses[1]);
1039      return 2;
1040   }
1041
1042   if (n_ec == 3) {
1043      /* sort */
1044      if (eclasses[0] > eclasses[2])
1045         SWAP(eclasses[0], eclasses[2]);
1046      if (eclasses[0] > eclasses[1])
1047         SWAP(eclasses[0], eclasses[1]);
1048      if (eclasses[1] > eclasses[2])
1049         SWAP(eclasses[1], eclasses[2]);
1050      return 3;
1051   }
1052
1053   /* NOTREACHED */
1054   vg_assert(0);
1055
1056  bad:
1057   eclasses[0] = ECLASS_MISC;
1058   return 1;
1059
1060#  undef SWAP
1061}
1062
1063
1064/* Add tteno to the set of entries listed for equivalence class ec in
1065   this sector.  Returns used location in eclass array. */
1066
1067static
1068UInt addEClassNo ( /*MOD*/Sector* sec, EClassNo ec, TTEno tteno )
1069{
1070   Int    old_sz, new_sz, i, r;
1071   TTEno  *old_ar, *new_ar;
1072
1073   vg_assert(ec >= 0 && ec < ECLASS_N);
1074   vg_assert(tteno < N_TTES_PER_SECTOR);
1075
1076   if (DEBUG_TRANSTAB) VG_(printf)("ec %d  gets %d\n", ec, (Int)tteno);
1077
1078   if (sec->ec2tte_used[ec] >= sec->ec2tte_size[ec]) {
1079
1080      vg_assert(sec->ec2tte_used[ec] == sec->ec2tte_size[ec]);
1081
1082      old_sz = sec->ec2tte_size[ec];
1083      old_ar = sec->ec2tte[ec];
1084      new_sz = old_sz==0 ? 8 : old_sz<64 ? 2*old_sz : (3*old_sz)/2;
1085      new_ar = ttaux_malloc("transtab.aECN.1",
1086                            new_sz * sizeof(TTEno));
1087      for (i = 0; i < old_sz; i++)
1088         new_ar[i] = old_ar[i];
1089      if (old_ar)
1090         ttaux_free(old_ar);
1091      sec->ec2tte_size[ec] = new_sz;
1092      sec->ec2tte[ec] = new_ar;
1093
1094      if (DEBUG_TRANSTAB) VG_(printf)("expand ec %d to %d\n", ec, new_sz);
1095   }
1096
1097   /* Common case */
1098   r = sec->ec2tte_used[ec]++;
1099   vg_assert(r >= 0 && r < sec->ec2tte_size[ec]);
1100   sec->ec2tte[ec][r] = tteno;
1101   return (UInt)r;
1102}
1103
1104
1105/* 'vge' is being added to 'sec' at TT entry 'tteno'.  Add appropriate
1106   eclass entries to 'sec'. */
1107
1108static
1109void upd_eclasses_after_add ( /*MOD*/Sector* sec, TTEno tteno )
1110{
1111   Int i, r;
1112   EClassNo eclasses[3];
1113   TTEntry* tte;
1114   vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
1115
1116   tte = &sec->tt[tteno];
1117   r = vexGuestExtents_to_eclasses( eclasses, &tte->vge );
1118
1119   vg_assert(r >= 1 && r <= 3);
1120   tte->n_tte2ec = r;
1121
1122   for (i = 0; i < r; i++) {
1123      tte->tte2ec_ec[i] = eclasses[i];
1124      tte->tte2ec_ix[i] = addEClassNo( sec, eclasses[i], tteno );
1125   }
1126}
1127
1128
1129/* Check the eclass info in 'sec' to ensure it is consistent.  Returns
1130   True if OK, False if something's not right.  Expensive. */
1131
1132static Bool sanity_check_eclasses_in_sector ( const Sector* sec )
1133{
1134#  define BAD(_str) do { whassup = (_str); goto bad; } while (0)
1135
1136   const HChar* whassup = NULL;
1137   Int      j, k, n, ec_idx;
1138   EClassNo i;
1139   EClassNo ec_num;
1140   TTEntry* tte;
1141   TTEno    tteno;
1142   ULong*   tce;
1143
1144   /* Basic checks on this sector */
1145   if (sec->tt_n_inuse < 0 || sec->tt_n_inuse > N_TTES_PER_SECTOR)
1146      BAD("invalid sec->tt_n_inuse");
1147   tce = sec->tc_next;
1148   if (tce < &sec->tc[0] || tce > &sec->tc[tc_sector_szQ])
1149      BAD("sec->tc_next points outside tc");
1150
1151   /* For each eclass ... */
1152   for (i = 0; i < ECLASS_N; i++) {
1153      if (sec->ec2tte_size[i] == 0 && sec->ec2tte[i] != NULL)
1154         BAD("ec2tte_size/ec2tte mismatch(1)");
1155      if (sec->ec2tte_size[i] != 0 && sec->ec2tte[i] == NULL)
1156         BAD("ec2tte_size/ec2tte mismatch(2)");
1157      if (sec->ec2tte_used[i] < 0
1158          || sec->ec2tte_used[i] > sec->ec2tte_size[i])
1159         BAD("implausible ec2tte_used");
1160      if (sec->ec2tte_used[i] == 0)
1161         continue;
1162
1163      /* For each tt reference in each eclass .. ensure the reference
1164         is to a valid tt entry, and that the entry's address ranges
1165         really include this eclass. */
1166
1167      for (j = 0; j < sec->ec2tte_used[i]; j++) {
1168         tteno = sec->ec2tte[i][j];
1169         if (tteno == EC2TTE_DELETED)
1170            continue;
1171         if (tteno >= N_TTES_PER_SECTOR)
1172            BAD("implausible tteno");
1173         tte = &sec->tt[tteno];
1174         if (tte->status != InUse)
1175            BAD("tteno points to non-inuse tte");
1176         if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3)
1177            BAD("tte->n_tte2ec out of range");
1178         /* Exactly least one of tte->eclasses[0 .. tte->n_eclasses-1]
1179            must equal i.  Inspect tte's eclass info. */
1180         n = 0;
1181         for (k = 0; k < tte->n_tte2ec; k++) {
1182            if (k < tte->n_tte2ec-1
1183                && tte->tte2ec_ec[k] >= tte->tte2ec_ec[k+1])
1184               BAD("tte->tte2ec_ec[..] out of order");
1185            ec_num = tte->tte2ec_ec[k];
1186            if (ec_num < 0 || ec_num >= ECLASS_N)
1187               BAD("tte->tte2ec_ec[..] out of range");
1188            if (ec_num != i)
1189               continue;
1190            ec_idx = tte->tte2ec_ix[k];
1191            if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[i])
1192               BAD("tte->tte2ec_ix[..] out of range");
1193            if (ec_idx == j)
1194               n++;
1195         }
1196         if (n != 1)
1197            BAD("tteno does not point back at eclass");
1198      }
1199   }
1200
1201   /* That establishes that for each forward pointer from TTEntrys
1202      there is a corresponding backward pointer from the eclass[]
1203      arrays.  However, it doesn't rule out the possibility of other,
1204      bogus pointers in the eclass[] arrays.  So do those similarly:
1205      scan through them and check the TTEntryies they point at point
1206      back. */
1207
1208   for (tteno = 0; tteno < N_TTES_PER_SECTOR; tteno++) {
1209
1210      tte = &sec->tt[tteno];
1211      if (tte->status == Empty || tte->status == Deleted) {
1212         if (tte->n_tte2ec != 0)
1213            BAD("tte->n_eclasses nonzero for unused tte");
1214         continue;
1215      }
1216
1217      vg_assert(tte->status == InUse);
1218
1219      if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3)
1220         BAD("tte->n_eclasses out of range(2)");
1221
1222      for (j = 0; j < tte->n_tte2ec; j++) {
1223         ec_num = tte->tte2ec_ec[j];
1224         if (ec_num < 0 || ec_num >= ECLASS_N)
1225            BAD("tte->eclass[..] out of range");
1226         ec_idx = tte->tte2ec_ix[j];
1227         if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[ec_num])
1228            BAD("tte->ec_idx[..] out of range(2)");
1229         if (sec->ec2tte[ec_num][ec_idx] != tteno)
1230            BAD("ec2tte does not point back to tte");
1231      }
1232   }
1233
1234   return True;
1235
1236  bad:
1237   if (whassup)
1238      VG_(debugLog)(0, "transtab", "eclass sanity fail: %s\n", whassup);
1239
1240#  if 0
1241   VG_(printf)("eclass = %d\n", i);
1242   VG_(printf)("tteno = %d\n", (Int)tteno);
1243   switch (tte->status) {
1244      case InUse:   VG_(printf)("InUse\n"); break;
1245      case Deleted: VG_(printf)("Deleted\n"); break;
1246      case Empty:   VG_(printf)("Empty\n"); break;
1247   }
1248   if (tte->status != Empty) {
1249      for (k = 0; k < tte->vge.n_used; k++)
1250         VG_(printf)("0x%lx %u\n", tte->vge.base[k], (UInt)tte->vge.len[k]);
1251   }
1252#  endif
1253
1254   return False;
1255
1256#  undef BAD
1257}
1258
1259
1260/* Sanity check absolutely everything.  True == check passed. */
1261
1262/* forwards */
1263static Bool sanity_check_redir_tt_tc ( void );
1264
1265static Bool sanity_check_sector_search_order ( void )
1266{
1267   SECno i, j, nListed;
1268   /* assert the array is the right size */
1269   vg_assert(MAX_N_SECTORS == (sizeof(sector_search_order)
1270                               / sizeof(sector_search_order[0])));
1271   /* Check it's of the form  valid_sector_numbers ++ [INV_SNO, INV_SNO, ..] */
1272   for (i = 0; i < n_sectors; i++) {
1273      if (sector_search_order[i] == INV_SNO
1274          || sector_search_order[i] >= n_sectors)
1275         break;
1276   }
1277   nListed = i;
1278   for (/* */; i < n_sectors; i++) {
1279      if (sector_search_order[i] != INV_SNO)
1280         break;
1281   }
1282   if (i != n_sectors)
1283      return False;
1284   /* Check each sector number only appears once */
1285   for (i = 0; i < n_sectors; i++) {
1286      if (sector_search_order[i] == INV_SNO)
1287         continue;
1288      for (j = i+1; j < n_sectors; j++) {
1289         if (sector_search_order[j] == sector_search_order[i])
1290            return False;
1291      }
1292   }
1293   /* Check that the number of listed sectors equals the number
1294      in use, by counting nListed back down. */
1295   for (i = 0; i < n_sectors; i++) {
1296      if (sectors[i].tc != NULL)
1297         nListed--;
1298   }
1299   if (nListed != 0)
1300      return False;
1301   return True;
1302}
1303
1304static Bool sanity_check_all_sectors ( void )
1305{
1306   SECno   sno;
1307   Bool    sane;
1308   Sector* sec;
1309   for (sno = 0; sno < n_sectors; sno++) {
1310      Int i;
1311      Int nr_not_dead_hx = 0;
1312      Int szhxa;
1313      sec = &sectors[sno];
1314      if (sec->tc == NULL)
1315         continue;
1316      sane = sanity_check_eclasses_in_sector( sec );
1317      if (!sane)
1318         return False;
1319      szhxa = VG_(sizeXA)(sec->host_extents);
1320      for (i = 0; i < szhxa; i++) {
1321         const HostExtent* hx = VG_(indexXA)(sec->host_extents, i);
1322         if (!HostExtent__is_dead (hx, sec))
1323            nr_not_dead_hx++;
1324      }
1325      if (nr_not_dead_hx != sec->tt_n_inuse) {
1326         VG_(debugLog)(0, "transtab",
1327                       "nr_not_dead_hx %d sanity fail "
1328                       "(expected == in use %d)\n",
1329                       nr_not_dead_hx, sec->tt_n_inuse);
1330         return False;
1331      }
1332   }
1333
1334   if ( !sanity_check_redir_tt_tc() )
1335      return False;
1336   if ( !sanity_check_sector_search_order() )
1337      return False;
1338   return True;
1339}
1340
1341
1342
1343/*-------------------------------------------------------------*/
1344/*--- Add/find translations                                 ---*/
1345/*-------------------------------------------------------------*/
1346
1347static UInt vge_osize ( const VexGuestExtents* vge )
1348{
1349   UInt i, n = 0;
1350   for (i = 0; i < vge->n_used; i++)
1351      n += (UInt)vge->len[i];
1352   return n;
1353}
1354
1355static Bool isValidSector ( SECno sector )
1356{
1357   if (sector == INV_SNO || sector >= n_sectors)
1358      return False;
1359   return True;
1360}
1361
1362static inline HTTno HASH_TT ( Addr key )
1363{
1364   UInt kHi = sizeof(Addr) == 4 ? 0 : (key >> 32);
1365   UInt kLo = (UInt)key;
1366   UInt k32 = kHi ^ kLo;
1367   UInt ror = 7;
1368   if (ror > 0)
1369      k32 = (k32 >> ror) | (k32 << (32-ror));
1370   return (HTTno)(k32 % N_HTTES_PER_SECTOR);
1371}
1372
1373static void setFastCacheEntry ( Addr key, ULong* tcptr )
1374{
1375   UInt cno = (UInt)VG_TT_FAST_HASH(key);
1376   VG_(tt_fast)[cno].guest = key;
1377   VG_(tt_fast)[cno].host  = (Addr)tcptr;
1378   n_fast_updates++;
1379   /* This shouldn't fail.  It should be assured by m_translate
1380      which should reject any attempt to make translation of code
1381      starting at TRANSTAB_BOGUS_GUEST_ADDR. */
1382   vg_assert(VG_(tt_fast)[cno].guest != TRANSTAB_BOGUS_GUEST_ADDR);
1383}
1384
1385/* Invalidate the fast cache VG_(tt_fast). */
1386static void invalidateFastCache ( void )
1387{
1388   UInt j;
1389   /* This loop is popular enough to make it worth unrolling a
1390      bit, at least on ppc32. */
1391   vg_assert(VG_TT_FAST_SIZE > 0 && (VG_TT_FAST_SIZE % 4) == 0);
1392   for (j = 0; j < VG_TT_FAST_SIZE; j += 4) {
1393      VG_(tt_fast)[j+0].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1394      VG_(tt_fast)[j+1].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1395      VG_(tt_fast)[j+2].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1396      VG_(tt_fast)[j+3].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1397   }
1398
1399   vg_assert(j == VG_TT_FAST_SIZE);
1400   n_fast_flushes++;
1401}
1402
1403
1404static TTEno get_empty_tt_slot(SECno sNo)
1405{
1406   TTEno i;
1407
1408   i = sectors[sNo].empty_tt_list;
1409   sectors[sNo].empty_tt_list = sectors[sNo].tt[i].usage.next_empty_tte;
1410
1411   vg_assert (i >= 0 && i < N_TTES_PER_SECTOR);
1412
1413   return i;
1414}
1415
1416static void add_in_empty_tt_list (SECno sNo, TTEno tteno)
1417{
1418   sectors[sNo].tt[tteno].usage.next_empty_tte = sectors[sNo].empty_tt_list;
1419   sectors[sNo].empty_tt_list = tteno;
1420}
1421
1422static void initialiseSector ( SECno sno )
1423{
1424   UInt i;
1425   SysRes  sres;
1426   Sector* sec;
1427   vg_assert(isValidSector(sno));
1428
1429   { Bool sane = sanity_check_sector_search_order();
1430     vg_assert(sane);
1431   }
1432   sec = &sectors[sno];
1433
1434   if (sec->tc == NULL) {
1435
1436      /* Sector has never been used before.  Need to allocate tt and
1437         tc. */
1438      vg_assert(sec->tt == NULL);
1439      vg_assert(sec->tc_next == NULL);
1440      vg_assert(sec->tt_n_inuse == 0);
1441      for (EClassNo e = 0; e < ECLASS_N; e++) {
1442         vg_assert(sec->ec2tte_size[e] == 0);
1443         vg_assert(sec->ec2tte_used[e] == 0);
1444         vg_assert(sec->ec2tte[e] == NULL);
1445      }
1446      vg_assert(sec->host_extents == NULL);
1447
1448      if (VG_(clo_stats) || VG_(debugLog_getLevel)() >= 1)
1449         VG_(dmsg)("transtab: " "allocate sector %d\n", sno);
1450
1451      sres = VG_(am_mmap_anon_float_valgrind)( 8 * tc_sector_szQ );
1452      if (sr_isError(sres)) {
1453         VG_(out_of_memory_NORETURN)("initialiseSector(TC)",
1454                                     8 * tc_sector_szQ );
1455	 /*NOTREACHED*/
1456      }
1457      sec->tc = (ULong*)(Addr)sr_Res(sres);
1458
1459      sres = VG_(am_mmap_anon_float_valgrind)
1460                ( N_TTES_PER_SECTOR * sizeof(TTEntry) );
1461      if (sr_isError(sres)) {
1462         VG_(out_of_memory_NORETURN)("initialiseSector(TT)",
1463                                     N_TTES_PER_SECTOR * sizeof(TTEntry) );
1464	 /*NOTREACHED*/
1465      }
1466      sec->tt = (TTEntry*)(Addr)sr_Res(sres);
1467      sec->empty_tt_list = HTT_EMPTY;
1468      for (TTEno ei = 0; ei < N_TTES_PER_SECTOR; ei++) {
1469         sec->tt[ei].status   = Empty;
1470         sec->tt[ei].n_tte2ec = 0;
1471         add_in_empty_tt_list(sno, ei);
1472      }
1473
1474      sres = VG_(am_mmap_anon_float_valgrind)
1475                ( N_HTTES_PER_SECTOR * sizeof(TTEno) );
1476      if (sr_isError(sres)) {
1477         VG_(out_of_memory_NORETURN)("initialiseSector(HTT)",
1478                                     N_HTTES_PER_SECTOR * sizeof(TTEno) );
1479	 /*NOTREACHED*/
1480      }
1481      sec->htt = (TTEno*)(Addr)sr_Res(sres);
1482      for (HTTno hi = 0; hi < N_HTTES_PER_SECTOR; hi++)
1483         sec->htt[hi] = HTT_EMPTY;
1484
1485      /* Set up the host_extents array. */
1486      sec->host_extents
1487         = VG_(newXA)(ttaux_malloc, "transtab.initialiseSector(host_extents)",
1488                      ttaux_free,
1489                      sizeof(HostExtent));
1490
1491      /* Add an entry in the sector_search_order */
1492      for (i = 0; i < n_sectors; i++) {
1493         if (sector_search_order[i] == INV_SNO)
1494            break;
1495      }
1496      vg_assert(i >= 0 && i < n_sectors);
1497      sector_search_order[i] = sno;
1498
1499      if (VG_(clo_verbosity) > 2)
1500         VG_(message)(Vg_DebugMsg, "TT/TC: initialise sector %d\n", sno);
1501
1502   } else {
1503
1504      /* Sector has been used before.  Dump the old contents. */
1505      if (VG_(clo_stats) || VG_(debugLog_getLevel)() >= 1)
1506         VG_(dmsg)("transtab: " "recycle  sector %d\n", sno);
1507      n_sectors_recycled++;
1508
1509      vg_assert(sec->tt != NULL);
1510      vg_assert(sec->tc_next != NULL);
1511      n_dump_count += sec->tt_n_inuse;
1512
1513      VexArch     arch_host = VexArch_INVALID;
1514      VexArchInfo archinfo_host;
1515      VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host));
1516      VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host );
1517      VexEndness endness_host = archinfo_host.endness;
1518
1519      /* Visit each just-about-to-be-abandoned translation. */
1520      if (DEBUG_TRANSTAB) VG_(printf)("QQQ unlink-entire-sector: %d START\n",
1521                                      sno);
1522      sec->empty_tt_list = HTT_EMPTY;
1523      for (TTEno ei = 0; ei < N_TTES_PER_SECTOR; ei++) {
1524         if (sec->tt[ei].status == InUse) {
1525            vg_assert(sec->tt[ei].n_tte2ec >= 1);
1526            vg_assert(sec->tt[ei].n_tte2ec <= 3);
1527            n_dump_osize += vge_osize(&sec->tt[ei].vge);
1528            /* Tell the tool too. */
1529            if (VG_(needs).superblock_discards) {
1530               VG_TDICT_CALL( tool_discard_superblock_info,
1531                              sec->tt[ei].entry,
1532                              sec->tt[ei].vge );
1533            }
1534            unchain_in_preparation_for_deletion(arch_host,
1535                                                endness_host, sno, ei);
1536         } else {
1537            vg_assert(sec->tt[ei].n_tte2ec == 0);
1538         }
1539         sec->tt[ei].status   = Empty;
1540         sec->tt[ei].n_tte2ec = 0;
1541         add_in_empty_tt_list(sno, ei);
1542      }
1543      for (HTTno hi = 0; hi < N_HTTES_PER_SECTOR; hi++)
1544         sec->htt[hi] = HTT_EMPTY;
1545
1546      if (DEBUG_TRANSTAB) VG_(printf)("QQQ unlink-entire-sector: %d END\n",
1547                                      sno);
1548
1549      /* Free up the eclass structures. */
1550      for (EClassNo e = 0; e < ECLASS_N; e++) {
1551         if (sec->ec2tte_size[e] == 0) {
1552            vg_assert(sec->ec2tte_used[e] == 0);
1553            vg_assert(sec->ec2tte[e] == NULL);
1554         } else {
1555            vg_assert(sec->ec2tte[e] != NULL);
1556            ttaux_free(sec->ec2tte[e]);
1557            sec->ec2tte[e] = NULL;
1558            sec->ec2tte_size[e] = 0;
1559            sec->ec2tte_used[e] = 0;
1560         }
1561      }
1562
1563      /* Empty out the host extents array. */
1564      vg_assert(sec->host_extents != NULL);
1565      VG_(dropTailXA)(sec->host_extents, VG_(sizeXA)(sec->host_extents));
1566      vg_assert(VG_(sizeXA)(sec->host_extents) == 0);
1567
1568      /* Sanity check: ensure it is already in
1569         sector_search_order[]. */
1570      SECno ix;
1571      for (ix = 0; ix < n_sectors; ix++) {
1572         if (sector_search_order[ix] == sno)
1573            break;
1574      }
1575      vg_assert(ix >= 0 && ix < n_sectors);
1576
1577      if (VG_(clo_verbosity) > 2)
1578         VG_(message)(Vg_DebugMsg, "TT/TC: recycle sector %d\n", sno);
1579   }
1580
1581   sec->tc_next = sec->tc;
1582   sec->tt_n_inuse = 0;
1583
1584   invalidateFastCache();
1585
1586   { Bool sane = sanity_check_sector_search_order();
1587     vg_assert(sane);
1588   }
1589}
1590
1591/* Add a translation of vge to TT/TC.  The translation is temporarily
1592   in code[0 .. code_len-1].
1593
1594   pre: youngest_sector points to a valid (although possibly full)
1595   sector.
1596*/
1597void VG_(add_to_transtab)( const VexGuestExtents* vge,
1598                           Addr             entry,
1599                           Addr             code,
1600                           UInt             code_len,
1601                           Bool             is_self_checking,
1602                           Int              offs_profInc,
1603                           UInt             n_guest_instrs )
1604{
1605   Int    tcAvailQ, reqdQ, y;
1606   ULong  *tcptr, *tcptr2;
1607   UChar* srcP;
1608   UChar* dstP;
1609
1610   vg_assert(init_done);
1611   vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
1612
1613   /* 60000: should agree with N_TMPBUF in m_translate.c. */
1614   vg_assert(code_len > 0 && code_len < 60000);
1615
1616   /* Generally stay sane */
1617   vg_assert(n_guest_instrs < 200); /* it can be zero, tho */
1618
1619   if (DEBUG_TRANSTAB)
1620      VG_(printf)("add_to_transtab(entry = 0x%lx, len = %u) ...\n",
1621                  entry, code_len);
1622
1623   n_in_count++;
1624   n_in_tsize += code_len;
1625   n_in_osize += vge_osize(vge);
1626   if (is_self_checking)
1627      n_in_sc_count++;
1628
1629   y = youngest_sector;
1630   vg_assert(isValidSector(y));
1631
1632   if (sectors[y].tc == NULL)
1633      initialiseSector(y);
1634
1635   /* Try putting the translation in this sector. */
1636   reqdQ = (code_len + 7) >> 3;
1637
1638   /* Will it fit in tc? */
1639   tcAvailQ = ((ULong*)(&sectors[y].tc[tc_sector_szQ]))
1640              - ((ULong*)(sectors[y].tc_next));
1641   vg_assert(tcAvailQ >= 0);
1642   vg_assert(tcAvailQ <= tc_sector_szQ);
1643
1644   if (tcAvailQ < reqdQ
1645       || sectors[y].tt_n_inuse >= N_TTES_PER_SECTOR) {
1646      /* No.  So move on to the next sector.  Either it's never been
1647         used before, in which case it will get its tt/tc allocated
1648         now, or it has been used before, in which case it is set to be
1649         empty, hence throwing out the oldest sector. */
1650      vg_assert(tc_sector_szQ > 0);
1651      Int tt_loading_pct = (100 * sectors[y].tt_n_inuse)
1652                           / N_HTTES_PER_SECTOR;
1653      Int tc_loading_pct = (100 * (tc_sector_szQ - tcAvailQ))
1654                           / tc_sector_szQ;
1655      if (VG_(clo_stats) || VG_(debugLog_getLevel)() >= 1) {
1656         VG_(dmsg)("transtab: "
1657                   "declare  sector %d full "
1658                   "(TT loading %2d%%, TC loading %2d%%, avg tce size %d)\n",
1659                   y, tt_loading_pct, tc_loading_pct,
1660                   8 * (tc_sector_szQ - tcAvailQ)/sectors[y].tt_n_inuse);
1661      }
1662      youngest_sector++;
1663      if (youngest_sector >= n_sectors)
1664         youngest_sector = 0;
1665      y = youngest_sector;
1666      initialiseSector(y);
1667   }
1668
1669   /* Be sure ... */
1670   tcAvailQ = ((ULong*)(&sectors[y].tc[tc_sector_szQ]))
1671              - ((ULong*)(sectors[y].tc_next));
1672   vg_assert(tcAvailQ >= 0);
1673   vg_assert(tcAvailQ <= tc_sector_szQ);
1674   vg_assert(tcAvailQ >= reqdQ);
1675   vg_assert(sectors[y].tt_n_inuse < N_TTES_PER_SECTOR);
1676   vg_assert(sectors[y].tt_n_inuse >= 0);
1677
1678   /* Copy into tc. */
1679   tcptr = sectors[y].tc_next;
1680   vg_assert(tcptr >= &sectors[y].tc[0]);
1681   vg_assert(tcptr <= &sectors[y].tc[tc_sector_szQ]);
1682
1683   dstP = (UChar*)tcptr;
1684   srcP = (UChar*)code;
1685   VG_(memcpy)(dstP, srcP, code_len);
1686   sectors[y].tc_next += reqdQ;
1687   sectors[y].tt_n_inuse++;
1688
1689   /* more paranoia */
1690   tcptr2 = sectors[y].tc_next;
1691   vg_assert(tcptr2 >= &sectors[y].tc[0]);
1692   vg_assert(tcptr2 <= &sectors[y].tc[tc_sector_szQ]);
1693
1694   /* Find an empty tt slot, and use it.  There must be such a slot
1695      since tt is never allowed to get completely full. */
1696   TTEno tteix = get_empty_tt_slot(y);
1697   TTEntry__init(&sectors[y].tt[tteix]);
1698   sectors[y].tt[tteix].status = InUse;
1699   sectors[y].tt[tteix].tcptr  = tcptr;
1700   sectors[y].tt[tteix].usage.prof.count  = 0;
1701   sectors[y].tt[tteix].usage.prof.weight =
1702      n_guest_instrs == 0 ? 1 : n_guest_instrs;
1703   sectors[y].tt[tteix].vge    = *vge;
1704   sectors[y].tt[tteix].entry  = entry;
1705
1706   // Point an htt entry to the tt slot
1707   HTTno htti = HASH_TT(entry);
1708   vg_assert(htti >= 0 && htti < N_HTTES_PER_SECTOR);
1709   while (True) {
1710      if (sectors[y].htt[htti] == HTT_EMPTY
1711          || sectors[y].htt[htti] == HTT_DELETED)
1712         break;
1713      htti++;
1714      if (htti >= N_HTTES_PER_SECTOR)
1715         htti = 0;
1716   }
1717   sectors[y].htt[htti] = tteix;
1718
1719   /* Patch in the profile counter location, if necessary. */
1720   if (offs_profInc != -1) {
1721      vg_assert(offs_profInc >= 0 && offs_profInc < code_len);
1722      VexArch     arch_host = VexArch_INVALID;
1723      VexArchInfo archinfo_host;
1724      VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host));
1725      VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host );
1726      VexEndness endness_host = archinfo_host.endness;
1727      VexInvalRange vir
1728         = LibVEX_PatchProfInc( arch_host, endness_host,
1729                                dstP + offs_profInc,
1730                                &sectors[y].tt[tteix].usage.prof.count );
1731      VG_(invalidate_icache)( (void*)vir.start, vir.len );
1732   }
1733
1734   VG_(invalidate_icache)( dstP, code_len );
1735
1736   /* Add this entry to the host_extents map, checking that we're
1737      adding in order. */
1738   { HostExtent hx;
1739     hx.start = (UChar*)tcptr;
1740     hx.len   = code_len;
1741     hx.tteNo = tteix;
1742     vg_assert(hx.len > 0); /* bsearch fails w/ zero length entries */
1743     XArray* hx_array = sectors[y].host_extents;
1744     vg_assert(hx_array);
1745     Word n = VG_(sizeXA)(hx_array);
1746     if (n > 0) {
1747        HostExtent* hx_prev = (HostExtent*)VG_(indexXA)(hx_array, n-1);
1748        vg_assert(hx_prev->start + hx_prev->len <= hx.start);
1749     }
1750     VG_(addToXA)(hx_array, &hx);
1751     if (DEBUG_TRANSTAB)
1752        VG_(printf)("... hx.start 0x%p hx.len %u sector %d ttslot %d\n",
1753                    hx.start, hx.len, y, tteix);
1754   }
1755
1756   /* Update the fast-cache. */
1757   setFastCacheEntry( entry, tcptr );
1758
1759   /* Note the eclass numbers for this translation. */
1760   upd_eclasses_after_add( &sectors[y], tteix );
1761}
1762
1763
1764/* Search for the translation of the given guest address.  If
1765   requested, a successful search can also cause the fast-caches to be
1766   updated.
1767*/
1768Bool VG_(search_transtab) ( /*OUT*/Addr*  res_hcode,
1769                            /*OUT*/SECno* res_sNo,
1770                            /*OUT*/TTEno* res_tteNo,
1771                            Addr          guest_addr,
1772                            Bool          upd_cache )
1773{
1774   SECno i, sno;
1775   HTTno j, k, kstart;
1776   TTEno tti;
1777
1778   vg_assert(init_done);
1779   /* Find the initial probe point just once.  It will be the same in
1780      all sectors and avoids multiple expensive % operations. */
1781   n_full_lookups++;
1782   kstart = HASH_TT(guest_addr);
1783   vg_assert(kstart >= 0 && kstart < N_HTTES_PER_SECTOR);
1784
1785   /* Search in all the sectors,using sector_search_order[] as a
1786      heuristic guide as to what order to visit the sectors. */
1787   for (i = 0; i < n_sectors; i++) {
1788
1789      sno = sector_search_order[i];
1790      if (UNLIKELY(sno == INV_SNO))
1791         return False; /* run out of sectors to search */
1792
1793      k = kstart;
1794      for (j = 0; j < N_HTTES_PER_SECTOR; j++) {
1795         n_lookup_probes++;
1796         tti = sectors[sno].htt[k];
1797         if (tti < N_TTES_PER_SECTOR
1798             && sectors[sno].tt[tti].entry == guest_addr) {
1799            /* found it */
1800            if (upd_cache)
1801               setFastCacheEntry(
1802                  guest_addr, sectors[sno].tt[tti].tcptr );
1803            if (res_hcode)
1804               *res_hcode = (Addr)sectors[sno].tt[tti].tcptr;
1805            if (res_sNo)
1806               *res_sNo = sno;
1807            if (res_tteNo)
1808               *res_tteNo = tti;
1809            /* pull this one one step closer to the front.  For large
1810               apps this more or less halves the number of required
1811               probes. */
1812            if (i > 0) {
1813               Int tmp = sector_search_order[i-1];
1814               sector_search_order[i-1] = sector_search_order[i];
1815               sector_search_order[i] = tmp;
1816            }
1817            return True;
1818         }
1819         // tti is HTT_EMPTY or HTT_DELETED or not the entry of guest_addr
1820         if (sectors[sno].htt[k] == HTT_EMPTY)
1821            break; /* not found in this sector */
1822         k++;
1823         if (k == N_HTTES_PER_SECTOR)
1824            k = 0;
1825      }
1826
1827      /* If we fall off the end, all entries are InUse and not
1828         matching, or Deleted.  In any case we did not find it in this
1829         sector. */
1830   }
1831
1832   /* Not found in any sector. */
1833   return False;
1834}
1835
1836
1837/*-------------------------------------------------------------*/
1838/*--- Delete translations.                                  ---*/
1839/*-------------------------------------------------------------*/
1840
1841/* forward */
1842static void unredir_discard_translations( Addr, ULong );
1843
1844/* Stuff for deleting translations which intersect with a given
1845   address range.  Unfortunately, to make this run at a reasonable
1846   speed, it is complex. */
1847
1848static inline
1849Bool overlap1 ( Addr s1, ULong r1, Addr s2, ULong r2 )
1850{
1851   Addr e1 = s1 + r1 - 1;
1852   Addr e2 = s2 + r2 - 1;
1853   if (e1 < s2 || e2 < s1)
1854      return False;
1855   return True;
1856}
1857
1858static inline
1859Bool overlaps ( Addr start, ULong range, const VexGuestExtents* vge )
1860{
1861   if (overlap1(start, range, vge->base[0], vge->len[0]))
1862      return True;
1863   if (vge->n_used < 2)
1864      return False;
1865   if (overlap1(start, range, vge->base[1], vge->len[1]))
1866      return True;
1867   if (vge->n_used < 3)
1868      return False;
1869   if (overlap1(start, range, vge->base[2], vge->len[2]))
1870      return True;
1871   return False;
1872}
1873
1874
1875/* Delete a tt entry, and update all the eclass data accordingly. */
1876
1877static void delete_tte ( /*MOD*/Sector* sec, SECno secNo, TTEno tteno,
1878                         VexArch arch_host, VexEndness endness_host )
1879{
1880   Int      i, ec_idx;
1881   EClassNo ec_num;
1882   TTEntry* tte;
1883
1884   /* sec and secNo are mutually redundant; cross-check. */
1885   vg_assert(sec == &sectors[secNo]);
1886
1887   vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
1888   tte = &sec->tt[tteno];
1889   vg_assert(tte->status == InUse);
1890   vg_assert(tte->n_tte2ec >= 1 && tte->n_tte2ec <= 3);
1891
1892   /* Unchain .. */
1893   unchain_in_preparation_for_deletion(arch_host, endness_host, secNo, tteno);
1894
1895   /* Deal with the ec-to-tte links first. */
1896   for (i = 0; i < tte->n_tte2ec; i++) {
1897      ec_num = tte->tte2ec_ec[i];
1898      ec_idx = tte->tte2ec_ix[i];
1899      vg_assert(ec_num >= 0 && ec_num < ECLASS_N);
1900      vg_assert(ec_idx >= 0);
1901      vg_assert(ec_idx < sec->ec2tte_used[ec_num]);
1902      /* Assert that the two links point at each other. */
1903      vg_assert(sec->ec2tte[ec_num][ec_idx] == tteno);
1904      /* "delete" the pointer back to here. */
1905      sec->ec2tte[ec_num][ec_idx] = EC2TTE_DELETED;
1906   }
1907
1908   /* Now fix up this TTEntry. */
1909   /* Mark the entry as deleted in htt.
1910      Note: we could avoid the below hash table search by
1911      adding a reference from tte to its hash position in tt. */
1912   HTTno j;
1913   HTTno k = HASH_TT(tte->entry);
1914   vg_assert(k >= 0 && k < N_HTTES_PER_SECTOR);
1915   for (j = 0; j < N_HTTES_PER_SECTOR; j++) {
1916      if (sec->htt[k] == tteno)
1917         break;
1918      k++;
1919      if (k == N_HTTES_PER_SECTOR)
1920         k = 0;
1921   }
1922   vg_assert(j < N_HTTES_PER_SECTOR);
1923   sec->htt[k]   = HTT_DELETED;
1924   tte->status   = Deleted;
1925   tte->n_tte2ec = 0;
1926   add_in_empty_tt_list(secNo, tteno);
1927
1928   /* Stats .. */
1929   sec->tt_n_inuse--;
1930   n_disc_count++;
1931   n_disc_osize += vge_osize(&tte->vge);
1932
1933   /* Tell the tool too. */
1934   if (VG_(needs).superblock_discards) {
1935      VG_TDICT_CALL( tool_discard_superblock_info,
1936                     tte->entry,
1937                     tte->vge );
1938   }
1939}
1940
1941
1942/* Delete translations from sec which intersect specified range, but
1943   only consider translations in the specified eclass. */
1944
1945static
1946Bool delete_translations_in_sector_eclass ( /*MOD*/Sector* sec, SECno secNo,
1947                                            Addr guest_start, ULong range,
1948                                            EClassNo ec,
1949                                            VexArch arch_host,
1950                                            VexEndness endness_host )
1951{
1952   Int      i;
1953   TTEno    tteno;
1954   Bool     anyDeld = False;
1955   TTEntry* tte;
1956
1957   vg_assert(ec >= 0 && ec < ECLASS_N);
1958
1959   for (i = 0; i < sec->ec2tte_used[ec]; i++) {
1960
1961      tteno = sec->ec2tte[ec][i];
1962      if (tteno == EC2TTE_DELETED) {
1963         /* already deleted */
1964         continue;
1965      }
1966
1967      vg_assert(tteno < N_TTES_PER_SECTOR);
1968
1969      tte = &sec->tt[tteno];
1970      vg_assert(tte->status == InUse);
1971
1972      if (overlaps( guest_start, range, &tte->vge )) {
1973         anyDeld = True;
1974         delete_tte( sec, secNo, tteno, arch_host, endness_host );
1975      }
1976
1977   }
1978
1979   return anyDeld;
1980}
1981
1982
1983/* Delete translations from sec which intersect specified range, the
1984   slow way, by inspecting all translations in sec. */
1985
1986static
1987Bool delete_translations_in_sector ( /*MOD*/Sector* sec, SECno secNo,
1988                                     Addr guest_start, ULong range,
1989                                     VexArch arch_host,
1990                                     VexEndness endness_host )
1991{
1992   TTEno i;
1993   Bool  anyDeld = False;
1994
1995   for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1996      if (sec->tt[i].status == InUse
1997          && overlaps( guest_start, range, &sec->tt[i].vge )) {
1998         anyDeld = True;
1999         delete_tte( sec, secNo, i, arch_host, endness_host );
2000      }
2001   }
2002
2003   return anyDeld;
2004}
2005
2006
2007void VG_(discard_translations) ( Addr guest_start, ULong range,
2008                                 const HChar* who )
2009{
2010   Sector* sec;
2011   SECno   sno;
2012   EClassNo ec;
2013   Bool    anyDeleted = False;
2014
2015   vg_assert(init_done);
2016
2017   VG_(debugLog)(2, "transtab",
2018                    "discard_translations(0x%lx, %llu) req by %s\n",
2019                    guest_start, range, who );
2020
2021   /* Pre-deletion sanity check */
2022   if (VG_(clo_sanity_level >= 4)) {
2023      Bool sane = sanity_check_all_sectors();
2024      vg_assert(sane);
2025   }
2026
2027   if (range == 0)
2028      return;
2029
2030   VexArch     arch_host = VexArch_INVALID;
2031   VexArchInfo archinfo_host;
2032   VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host));
2033   VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host );
2034   VexEndness endness_host = archinfo_host.endness;
2035
2036   /* There are two different ways to do this.
2037
2038      If the range fits within a single address-range equivalence
2039      class, as will be the case for a cache line sized invalidation,
2040      then we only have to inspect the set of translations listed in
2041      that equivalence class, and also in the "sin-bin" equivalence
2042      class ECLASS_MISC.
2043
2044      Otherwise, the invalidation is of a larger range and probably
2045      results from munmap.  In this case it's (probably!) faster just
2046      to inspect all translations, dump those we don't want, and
2047      regenerate the equivalence class information (since modifying it
2048      in-situ is even more expensive).
2049   */
2050
2051   /* First off, figure out if the range falls within a single class,
2052      and if so which one. */
2053
2054   ec = ECLASS_MISC;
2055   if (range < (1ULL << ECLASS_SHIFT))
2056      ec = range_to_eclass( guest_start, (UInt)range );
2057
2058   /* if ec is ECLASS_MISC then we aren't looking at just a single
2059      class, so use the slow scheme.  Else use the fast scheme,
2060      examining 'ec' and ECLASS_MISC. */
2061
2062   if (ec != ECLASS_MISC) {
2063
2064      VG_(debugLog)(2, "transtab",
2065                       "                    FAST, ec = %d\n", ec);
2066
2067      /* Fast scheme */
2068      vg_assert(ec >= 0 && ec < ECLASS_MISC);
2069
2070      for (sno = 0; sno < n_sectors; sno++) {
2071         sec = &sectors[sno];
2072         if (sec->tc == NULL)
2073            continue;
2074         anyDeleted |= delete_translations_in_sector_eclass(
2075                          sec, sno, guest_start, range, ec,
2076                          arch_host, endness_host
2077                       );
2078         anyDeleted |= delete_translations_in_sector_eclass(
2079                          sec, sno, guest_start, range, ECLASS_MISC,
2080                          arch_host, endness_host
2081                       );
2082      }
2083
2084   } else {
2085
2086      /* slow scheme */
2087
2088      VG_(debugLog)(2, "transtab",
2089                       "                    SLOW, ec = %d\n", ec);
2090
2091      for (sno = 0; sno < n_sectors; sno++) {
2092         sec = &sectors[sno];
2093         if (sec->tc == NULL)
2094            continue;
2095         anyDeleted |= delete_translations_in_sector(
2096                          sec, sno, guest_start, range,
2097                          arch_host, endness_host
2098                       );
2099      }
2100
2101   }
2102
2103   if (anyDeleted)
2104      invalidateFastCache();
2105
2106   /* don't forget the no-redir cache */
2107   unredir_discard_translations( guest_start, range );
2108
2109   /* Post-deletion sanity check */
2110   if (VG_(clo_sanity_level >= 4)) {
2111      TTEno    i;
2112      TTEntry* tte;
2113      Bool     sane = sanity_check_all_sectors();
2114      vg_assert(sane);
2115      /* But now, also check the requested address range isn't
2116         present anywhere. */
2117      for (sno = 0; sno < n_sectors; sno++) {
2118         sec = &sectors[sno];
2119         if (sec->tc == NULL)
2120            continue;
2121         for (i = 0; i < N_TTES_PER_SECTOR; i++) {
2122            tte = &sec->tt[i];
2123            if (tte->status != InUse)
2124               continue;
2125            vg_assert(!overlaps( guest_start, range, &tte->vge ));
2126         }
2127      }
2128   }
2129}
2130
2131/* Whether or not tools may discard translations. */
2132Bool  VG_(ok_to_discard_translations) = False;
2133
2134/* This function is exported to tools which can use it to discard
2135   translations, provided it is safe to do so. */
2136void VG_(discard_translations_safely) ( Addr  start, SizeT len,
2137                                        const HChar* who )
2138{
2139   vg_assert(VG_(ok_to_discard_translations));
2140   VG_(discard_translations)(start, len, who);
2141}
2142
2143/*------------------------------------------------------------*/
2144/*--- AUXILIARY: the unredirected TT/TC                    ---*/
2145/*------------------------------------------------------------*/
2146
2147/* A very simple translation cache which holds a small number of
2148   unredirected translations.  This is completely independent of the
2149   main tt/tc structures.  When unredir_tc or unredir_tt becomes full,
2150   both structures are simply dumped and we start over.
2151
2152   Since these translations are unredirected, the search key is (by
2153   definition) the first address entry in the .vge field. */
2154
2155/* Sized to hold 500 translations of average size 1000 bytes. */
2156
2157#define UNREDIR_SZB   1000
2158
2159#define N_UNREDIR_TT  500
2160#define N_UNREDIR_TCQ (N_UNREDIR_TT * UNREDIR_SZB / sizeof(ULong))
2161
2162typedef
2163   struct {
2164      VexGuestExtents vge;
2165      Addr            hcode;
2166      Bool            inUse;
2167   }
2168   UTCEntry;
2169
2170/* We just allocate forwards in _tc, never deleting. */
2171static ULong    *unredir_tc;
2172static Int      unredir_tc_used = N_UNREDIR_TCQ;
2173
2174/* Slots in _tt can come into use and out again (.inUse).
2175   Nevertheless _tt_highwater is maintained so that invalidations
2176   don't have to scan all the slots when only a few are in use.
2177   _tt_highwater holds the index of the highest ever allocated
2178   slot. */
2179static UTCEntry unredir_tt[N_UNREDIR_TT];
2180static Int      unredir_tt_highwater;
2181
2182
2183static void init_unredir_tt_tc ( void )
2184{
2185   Int i;
2186   if (unredir_tc == NULL) {
2187      SysRes sres = VG_(am_mmap_anon_float_valgrind)
2188                       ( N_UNREDIR_TT * UNREDIR_SZB );
2189      if (sr_isError(sres)) {
2190         VG_(out_of_memory_NORETURN)("init_unredir_tt_tc",
2191                                     N_UNREDIR_TT * UNREDIR_SZB);
2192         /*NOTREACHED*/
2193      }
2194      unredir_tc = (ULong *)(Addr)sr_Res(sres);
2195   }
2196   unredir_tc_used = 0;
2197   for (i = 0; i < N_UNREDIR_TT; i++)
2198      unredir_tt[i].inUse = False;
2199   unredir_tt_highwater = -1;
2200}
2201
2202/* Do a sanity check; return False on failure. */
2203static Bool sanity_check_redir_tt_tc ( void )
2204{
2205   Int i;
2206   if (unredir_tt_highwater < -1) return False;
2207   if (unredir_tt_highwater >= N_UNREDIR_TT) return False;
2208
2209   for (i = unredir_tt_highwater+1; i < N_UNREDIR_TT; i++)
2210      if (unredir_tt[i].inUse)
2211         return False;
2212
2213   if (unredir_tc_used < 0) return False;
2214   if (unredir_tc_used > N_UNREDIR_TCQ) return False;
2215
2216   return True;
2217}
2218
2219
2220/* Add an UNREDIRECTED translation of vge to TT/TC.  The translation
2221   is temporarily in code[0 .. code_len-1].
2222*/
2223void VG_(add_to_unredir_transtab)( const VexGuestExtents* vge,
2224                                   Addr             entry,
2225                                   Addr             code,
2226                                   UInt             code_len )
2227{
2228   Int   i, j, code_szQ;
2229   HChar *srcP, *dstP;
2230
2231   vg_assert(sanity_check_redir_tt_tc());
2232
2233   /* This is the whole point: it's not redirected! */
2234   vg_assert(entry == vge->base[0]);
2235
2236   /* How many unredir_tt slots are needed */
2237   code_szQ = (code_len + 7) / 8;
2238
2239   /* Look for an empty unredir_tc slot */
2240   for (i = 0; i < N_UNREDIR_TT; i++)
2241      if (!unredir_tt[i].inUse)
2242         break;
2243
2244   if (i >= N_UNREDIR_TT || code_szQ > (N_UNREDIR_TCQ - unredir_tc_used)) {
2245      /* It's full; dump everything we currently have */
2246      init_unredir_tt_tc();
2247      i = 0;
2248   }
2249
2250   vg_assert(unredir_tc_used >= 0);
2251   vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
2252   vg_assert(code_szQ > 0);
2253   vg_assert(code_szQ + unredir_tc_used <= N_UNREDIR_TCQ);
2254   vg_assert(i >= 0 && i < N_UNREDIR_TT);
2255   vg_assert(unredir_tt[i].inUse == False);
2256
2257   if (i > unredir_tt_highwater)
2258      unredir_tt_highwater = i;
2259
2260   dstP = (HChar*)&unredir_tc[unredir_tc_used];
2261   srcP = (HChar*)code;
2262   for (j = 0; j < code_len; j++)
2263      dstP[j] = srcP[j];
2264
2265   VG_(invalidate_icache)( dstP, code_len );
2266
2267   unredir_tt[i].inUse = True;
2268   unredir_tt[i].vge   = *vge;
2269   unredir_tt[i].hcode = (Addr)dstP;
2270
2271   unredir_tc_used += code_szQ;
2272   vg_assert(unredir_tc_used >= 0);
2273   vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
2274
2275   vg_assert(&dstP[code_len] <= (HChar*)&unredir_tc[unredir_tc_used]);
2276}
2277
2278Bool VG_(search_unredir_transtab) ( /*OUT*/Addr*  result,
2279                                    Addr          guest_addr )
2280{
2281   Int i;
2282   for (i = 0; i < N_UNREDIR_TT; i++) {
2283      if (!unredir_tt[i].inUse)
2284         continue;
2285      if (unredir_tt[i].vge.base[0] == guest_addr) {
2286         *result = unredir_tt[i].hcode;
2287         return True;
2288      }
2289   }
2290   return False;
2291}
2292
2293static void unredir_discard_translations( Addr guest_start, ULong range )
2294{
2295   Int i;
2296
2297   vg_assert(sanity_check_redir_tt_tc());
2298
2299   for (i = 0; i <= unredir_tt_highwater; i++) {
2300      if (unredir_tt[i].inUse
2301          && overlaps( guest_start, range, &unredir_tt[i].vge))
2302         unredir_tt[i].inUse = False;
2303   }
2304}
2305
2306
2307/*------------------------------------------------------------*/
2308/*--- Initialisation.                                      ---*/
2309/*------------------------------------------------------------*/
2310
2311void VG_(init_tt_tc) ( void )
2312{
2313   Int i, avg_codeszQ;
2314
2315   vg_assert(!init_done);
2316   init_done = True;
2317
2318   /* Otherwise lots of things go wrong... */
2319   vg_assert(sizeof(ULong) == 8);
2320   vg_assert(sizeof(TTEno) == sizeof(HTTno));
2321   vg_assert(sizeof(TTEno) == 2);
2322   vg_assert(N_TTES_PER_SECTOR <= N_HTTES_PER_SECTOR);
2323   vg_assert(N_HTTES_PER_SECTOR < INV_TTE);
2324   vg_assert(N_HTTES_PER_SECTOR < EC2TTE_DELETED);
2325   vg_assert(N_HTTES_PER_SECTOR < HTT_EMPTY);
2326   /* check fast cache entries really are 2 words long */
2327   vg_assert(sizeof(Addr) == sizeof(void*));
2328   vg_assert(sizeof(FastCacheEntry) == 2 * sizeof(Addr));
2329   /* check fast cache entries are packed back-to-back with no spaces */
2330   vg_assert(sizeof( VG_(tt_fast) )
2331             == VG_TT_FAST_SIZE * sizeof(FastCacheEntry));
2332   /* check fast cache is aligned as we requested.  Not fatal if it
2333      isn't, but we might as well make sure. */
2334   vg_assert(VG_IS_16_ALIGNED( ((Addr) & VG_(tt_fast)[0]) ));
2335
2336   if (VG_(clo_verbosity) > 2)
2337      VG_(message)(Vg_DebugMsg,
2338                   "TT/TC: VG_(init_tt_tc) "
2339                   "(startup of code management)\n");
2340
2341   /* Figure out how big each tc area should be.  */
2342   if (VG_(clo_avg_transtab_entry_size) == 0)
2343      avg_codeszQ   = (VG_(details).avg_translation_sizeB + 7) / 8;
2344   else
2345      avg_codeszQ   = (VG_(clo_avg_transtab_entry_size) + 7) / 8;
2346   tc_sector_szQ = N_TTES_PER_SECTOR * (1 + avg_codeszQ);
2347
2348   /* Ensure the calculated value is not way crazy. */
2349   vg_assert(tc_sector_szQ >= 2 * N_TTES_PER_SECTOR);
2350   vg_assert(tc_sector_szQ <= 100 * N_TTES_PER_SECTOR);
2351
2352   n_sectors = VG_(clo_num_transtab_sectors);
2353   vg_assert(n_sectors >= MIN_N_SECTORS);
2354   vg_assert(n_sectors <= MAX_N_SECTORS);
2355
2356   /* Initialise the sectors, even the ones we aren't going to use.
2357      Set all fields to zero. */
2358   youngest_sector = 0;
2359   for (i = 0; i < MAX_N_SECTORS; i++)
2360      VG_(memset)(&sectors[i], 0, sizeof(sectors[i]));
2361
2362   /* Initialise the sector_search_order hint table, including the
2363      entries we aren't going to use. */
2364   for (i = 0; i < MAX_N_SECTORS; i++)
2365      sector_search_order[i] = INV_SNO;
2366
2367   /* Initialise the fast cache. */
2368   invalidateFastCache();
2369
2370   /* and the unredir tt/tc */
2371   init_unredir_tt_tc();
2372
2373   if (VG_(clo_verbosity) > 2 || VG_(clo_stats)
2374       || VG_(debugLog_getLevel) () >= 2) {
2375      VG_(message)(Vg_DebugMsg,
2376         "TT/TC: cache: %s--avg-transtab-entry-size=%u, "
2377         "%stool provided default %u\n",
2378         VG_(clo_avg_transtab_entry_size) == 0 ? "ignoring " : "using ",
2379         VG_(clo_avg_transtab_entry_size),
2380         VG_(clo_avg_transtab_entry_size) == 0 ? "using " : "ignoring ",
2381         VG_(details).avg_translation_sizeB);
2382      VG_(message)(Vg_DebugMsg,
2383         "TT/TC: cache: %d sectors of %d bytes each = %d total TC\n",
2384          n_sectors, 8 * tc_sector_szQ,
2385          n_sectors * 8 * tc_sector_szQ );
2386      VG_(message)(Vg_DebugMsg,
2387         "TT/TC: table: %d tables[%d]  of %d bytes each = %d total TT\n",
2388          n_sectors, N_TTES_PER_SECTOR,
2389          (int)(N_TTES_PER_SECTOR * sizeof(TTEntry)),
2390          (int)(n_sectors * N_TTES_PER_SECTOR * sizeof(TTEntry)));
2391      VG_(message)(Vg_DebugMsg,
2392         "TT/TC: table: %d tt entries each = %d total tt entries\n",
2393         N_TTES_PER_SECTOR, n_sectors * N_TTES_PER_SECTOR);
2394      VG_(message)(Vg_DebugMsg,
2395         "TT/TC: table: %d htt[%d] of %d bytes each = %d total HTT"
2396                       " (htt[%d] %d%% max occup)\n",
2397         n_sectors, N_HTTES_PER_SECTOR,
2398         (int)(N_HTTES_PER_SECTOR * sizeof(TTEno)),
2399         (int)(n_sectors * N_HTTES_PER_SECTOR * sizeof(TTEno)),
2400         N_HTTES_PER_SECTOR, SECTOR_TT_LIMIT_PERCENT);
2401   }
2402}
2403
2404
2405/*------------------------------------------------------------*/
2406/*--- Printing out statistics.                             ---*/
2407/*------------------------------------------------------------*/
2408
2409static Double safe_idiv( ULong a, ULong b )
2410{
2411   return (b == 0 ? 0 : (Double)a / (Double)b);
2412}
2413
2414UInt VG_(get_bbs_translated) ( void )
2415{
2416   return n_in_count;
2417}
2418
2419void VG_(print_tt_tc_stats) ( void )
2420{
2421   VG_(message)(Vg_DebugMsg,
2422      "    tt/tc: %'llu tt lookups requiring %'llu probes\n",
2423      n_full_lookups, n_lookup_probes );
2424   VG_(message)(Vg_DebugMsg,
2425      "    tt/tc: %'llu fast-cache updates, %'llu flushes\n",
2426      n_fast_updates, n_fast_flushes );
2427
2428   VG_(message)(Vg_DebugMsg,
2429                " transtab: new        %'llu "
2430                "(%'llu -> %'llu; ratio %3.1f) [%'llu scs] "
2431                "avg tce size %llu\n",
2432                n_in_count, n_in_osize, n_in_tsize,
2433                safe_idiv(n_in_tsize, n_in_osize),
2434                n_in_sc_count,
2435                n_in_tsize / (n_in_count ? n_in_count : 1));
2436   VG_(message)(Vg_DebugMsg,
2437                " transtab: dumped     %'llu (%'llu -> ?" "?) "
2438                "(sectors recycled %'llu)\n",
2439                n_dump_count, n_dump_osize, n_sectors_recycled );
2440   VG_(message)(Vg_DebugMsg,
2441                " transtab: discarded  %'llu (%'llu -> ?" "?)\n",
2442                n_disc_count, n_disc_osize );
2443
2444   if (DEBUG_TRANSTAB) {
2445      VG_(printf)("\n");
2446      for (EClassNo e = 0; e < ECLASS_N; e++) {
2447         VG_(printf)(" %4d", sectors[0].ec2tte_used[e]);
2448         if (e % 16 == 15)
2449            VG_(printf)("\n");
2450      }
2451      VG_(printf)("\n\n");
2452   }
2453}
2454
2455/*------------------------------------------------------------*/
2456/*--- Printing out of profiling results.                   ---*/
2457/*------------------------------------------------------------*/
2458
2459static ULong score ( const TTEntry* tte )
2460{
2461   return ((ULong)tte->usage.prof.weight) * ((ULong)tte->usage.prof.count);
2462}
2463
2464ULong VG_(get_SB_profile) ( SBProfEntry tops[], UInt n_tops )
2465{
2466   SECno sno;
2467   Int   r, s;
2468   ULong score_total;
2469   TTEno i;
2470
2471   /* First, compute the total weighted count, and find the top N
2472      ttes.  tops contains pointers to the most-used n_tops blocks, in
2473      descending order (viz, tops[0] is the highest scorer). */
2474   for (s = 0; s < n_tops; s++) {
2475      tops[s].addr  = 0;
2476      tops[s].score = 0;
2477   }
2478
2479   score_total = 0;
2480
2481   for (sno = 0; sno < n_sectors; sno++) {
2482      if (sectors[sno].tc == NULL)
2483         continue;
2484      for (i = 0; i < N_TTES_PER_SECTOR; i++) {
2485         if (sectors[sno].tt[i].status != InUse)
2486            continue;
2487         score_total += score(&sectors[sno].tt[i]);
2488         /* Find the rank for sectors[sno].tt[i]. */
2489         r = n_tops-1;
2490         while (True) {
2491            if (r == -1)
2492               break;
2493             if (tops[r].addr == 0) {
2494               r--;
2495               continue;
2496             }
2497             if ( score(&sectors[sno].tt[i]) > tops[r].score ) {
2498                r--;
2499                continue;
2500             }
2501             break;
2502         }
2503         r++;
2504         vg_assert(r >= 0 && r <= n_tops);
2505         /* This bb should be placed at r, and bbs above it shifted
2506            upwards one slot. */
2507         if (r < n_tops) {
2508            for (s = n_tops-1; s > r; s--)
2509               tops[s] = tops[s-1];
2510            tops[r].addr  = sectors[sno].tt[i].entry;
2511            tops[r].score = score( &sectors[sno].tt[i] );
2512         }
2513      }
2514   }
2515
2516   /* Now zero out all the counter fields, so that we can make
2517      multiple calls here and just get the values since the last call,
2518      each time, rather than values accumulated for the whole run. */
2519   for (sno = 0; sno < n_sectors; sno++) {
2520      if (sectors[sno].tc == NULL)
2521         continue;
2522      for (i = 0; i < N_TTES_PER_SECTOR; i++) {
2523         if (sectors[sno].tt[i].status != InUse)
2524            continue;
2525         sectors[sno].tt[i].usage.prof.count = 0;
2526      }
2527   }
2528
2529   return score_total;
2530}
2531
2532/*--------------------------------------------------------------------*/
2533/*--- end                                                          ---*/
2534/*--------------------------------------------------------------------*/
2535