m_transtab.c revision 597314210494248b4fbefd45525a748439629218
1
2/*--------------------------------------------------------------------*/
3/*--- Management of the translation table and cache.               ---*/
4/*---                                                 m_transtab.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8   This file is part of Valgrind, a dynamic binary instrumentation
9   framework.
10
11   Copyright (C) 2000-2013 Julian Seward
12      jseward@acm.org
13
14   This program is free software; you can redistribute it and/or
15   modify it under the terms of the GNU General Public License as
16   published by the Free Software Foundation; either version 2 of the
17   License, or (at your option) any later version.
18
19   This program is distributed in the hope that it will be useful, but
20   WITHOUT ANY WARRANTY; without even the implied warranty of
21   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22   General Public License for more details.
23
24   You should have received a copy of the GNU General Public License
25   along with this program; if not, write to the Free Software
26   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27   02111-1307, USA.
28
29   The GNU General Public License is contained in the file COPYING.
30*/
31
32#include "pub_core_basics.h"
33#include "pub_core_debuglog.h"
34#include "pub_core_machine.h"    // For VG_(machine_get_VexArchInfo)
35#include "pub_core_libcbase.h"
36#include "pub_core_vki.h"        // to keep pub_core_libproc.h happy, sigh
37#include "pub_core_libcproc.h"   // VG_(invalidate_icache)
38#include "pub_core_libcassert.h"
39#include "pub_core_libcprint.h"
40#include "pub_core_options.h"
41#include "pub_core_tooliface.h"  // For VG_(details).avg_translation_sizeB
42#include "pub_core_transtab.h"
43#include "pub_core_aspacemgr.h"
44#include "pub_core_mallocfree.h" // VG_(out_of_memory_NORETURN)
45#include "pub_core_xarray.h"
46#include "pub_core_dispatch.h"   // For VG_(disp_cp*) addresses
47
48
49#define DEBUG_TRANSTAB 0
50
51
52/*-------------------------------------------------------------*/
53/*--- Management of the FIFO-based translation table+cache. ---*/
54/*-------------------------------------------------------------*/
55
56/* Nr of sectors provided via command line parameter. */
57UInt VG_(clo_num_transtab_sectors) = N_SECTORS_DEFAULT;
58/* Nr of sectors.
59   Will be set by VG_(init_tt_tc) to VG_(clo_num_transtab_sectors). */
60static UInt n_sectors = 0;
61
62/*------------------ CONSTANTS ------------------*/
63/* Number of TC entries in each sector.  This needs to be a prime
64   number to work properly, it must be <= 65535 (so that a TT index
65   fits in a UShort, leaving room for 0xFFFF(EC2TTE_DELETED) to denote
66   'deleted') and it is strongly recommended not to change this.
67   65521 is the largest prime <= 65535. */
68#define N_TTES_PER_SECTOR /*10007*/ /*30011*/ /*40009*/ 65521
69
70/* Because each sector contains a hash table of TTEntries, we need to
71   specify the maximum allowable loading, after which the sector is
72   deemed full. */
73#define SECTOR_TT_LIMIT_PERCENT 65
74
75/* The sector is deemed full when this many entries are in it. */
76#define N_TTES_PER_SECTOR_USABLE \
77           ((N_TTES_PER_SECTOR * SECTOR_TT_LIMIT_PERCENT) / 100)
78
79/* Equivalence classes for fast address range deletion.  There are 1 +
80   2^ECLASS_WIDTH bins.  The highest one, ECLASS_MISC, describes an
81   address range which does not fall cleanly within any specific bin.
82   Note that ECLASS_SHIFT + ECLASS_WIDTH must be < 32. */
83#define ECLASS_SHIFT 11
84#define ECLASS_WIDTH 8
85#define ECLASS_MISC  (1 << ECLASS_WIDTH)
86#define ECLASS_N     (1 + ECLASS_MISC)
87
88#define EC2TTE_DELETED  0xFFFF /* 16-bit special value */
89
90
91/*------------------ TYPES ------------------*/
92
93/* In edges ("to-me") in the graph created by chaining. */
94typedef
95   struct {
96      UInt from_sNo;   /* sector number */
97      UInt from_tteNo; /* TTE number in given sector */
98      UInt from_offs;  /* code offset from TCEntry::tcptr where the patch is */
99      Bool to_fastEP;  /* Is the patch to a fast or slow entry point? */
100   }
101   InEdge;
102
103
104/* Out edges ("from-me") in the graph created by chaining. */
105typedef
106   struct {
107      UInt to_sNo;    /* sector number */
108      UInt to_tteNo;  /* TTE number in given sector */
109      UInt from_offs; /* code offset in owning translation where patch is */
110   }
111   OutEdge;
112
113
114#define N_FIXED_IN_EDGE_ARR 3
115typedef
116   struct {
117      UInt     n_fixed; /* 0 .. N_FIXED_IN_EDGE_ARR */
118      InEdge   fixed[N_FIXED_IN_EDGE_ARR];
119      XArray*  var; /* XArray* of InEdgeArr */
120   }
121   InEdgeArr;
122
123#define N_FIXED_OUT_EDGE_ARR 2
124typedef
125   struct {
126      UInt    n_fixed; /* 0 .. N_FIXED_OUT_EDGE_ARR */
127      OutEdge fixed[N_FIXED_OUT_EDGE_ARR];
128      XArray* var; /* XArray* of OutEdgeArr */
129   }
130   OutEdgeArr;
131
132
133/* A translation-table entry.  This indicates precisely which areas of
134   guest code are included in the translation, and contains all other
135   auxiliary info too.  */
136typedef
137   struct {
138      /* Profiling only: the count and weight (arbitrary meaning) for
139         this translation.  Weight is a property of the translation
140         itself and computed once when the translation is created.
141         Count is an entry count for the translation and is
142         incremented by 1 every time the translation is used, if we
143         are profiling. */
144      ULong    count;
145      UShort   weight;
146
147      /* Status of the slot.  Note, we need to be able to do lazy
148         deletion, hence the Deleted state. */
149      enum { InUse, Deleted, Empty } status;
150
151      /* 64-bit aligned pointer to one or more 64-bit words containing
152         the corresponding host code (must be in the same sector!)
153         This is a pointer into the sector's tc (code) area. */
154      ULong* tcptr;
155
156      /* This is the original guest address that purportedly is the
157         entry point of the translation.  You might think that .entry
158         should be the same as .vge->base[0], and most of the time it
159         is.  However, when doing redirections, that is not the case.
160         .vge must always correctly describe the guest code sections
161         from which this translation was made.  However, .entry may or
162         may not be a lie, depending on whether or not we're doing
163         redirection. */
164      Addr64 entry;
165
166      /* This structure describes precisely what ranges of guest code
167         the translation covers, so we can decide whether or not to
168         delete it when translations of a given address range are
169         invalidated. */
170      VexGuestExtents vge;
171
172      /* Address range summary info: these are pointers back to
173         eclass[] entries in the containing Sector.  Those entries in
174         turn point back here -- the two structures are mutually
175         redundant but both necessary to make fast deletions work.
176         The eclass info is similar to, and derived from, this entry's
177         'vge' field, but it is not the same */
178      UShort n_tte2ec;      // # tte2ec pointers (1 to 3)
179      UShort tte2ec_ec[3];  // for each, the eclass #
180      UInt   tte2ec_ix[3];  // and the index within the eclass.
181      // for i in 0 .. n_tte2ec-1
182      //    sec->ec2tte[ tte2ec_ec[i] ][ tte2ec_ix[i] ]
183      // should be the index
184      // of this TTEntry in the containing Sector's tt array.
185
186      /* Admin information for chaining.  'in_edges' is a set of the
187         patch points which jump to this translation -- hence are
188         predecessors in the control flow graph.  'out_edges' points
189         to successors in the control flow graph -- translations to
190         which this one has a patched jump.  In short these are just
191         backwards and forwards edges in the graph of patched-together
192         blocks.  The 'in_edges' contain slightly more info, enough
193         that we can undo the chaining of each mentioned patch point.
194         The 'out_edges' list exists only so that we can visit the
195         'in_edges' entries of all blocks we're patched through to, in
196         order to remove ourselves from then when we're deleted. */
197
198      /* A translation can disappear for two reasons:
199          1. erased (as part of the oldest sector cleanup) when the
200             youngest sector is full.
201          2. discarded due to calls to VG_(discard_translations).
202             VG_(discard_translations) sets the status of the
203             translation to 'Deleted'.
204             A.o., the gdbserver discards one or more translations
205             when a breakpoint is inserted or removed at an Addr,
206             or when single stepping mode is enabled/disabled
207             or when a translation is instrumented for gdbserver
208             (all the target jumps of this translation are
209              invalidated).
210
211         So, it is possible that the translation A to be patched
212         (to obtain a patched jump from A to B) is invalidated
213         after B is translated and before A is patched.
214         In case a translation is erased or discarded, the patching
215         cannot be done.  VG_(tt_tc_do_chaining) and find_TTEntry_from_hcode
216         are checking the 'from' translation still exists before
217         doing the patching.
218
219         Is it safe to erase or discard the current translation E being
220         executed ? Amazing, but yes, it is safe.
221         Here is the explanation:
222
223         The translation E being executed can only be erased if a new
224         translation N is being done. A new translation is done only
225         if the host addr is a not yet patched jump to another
226         translation. In such a case, the guest address of N is
227         assigned to the PC in the VEX state. Control is returned
228         to the scheduler. N will be translated. This can erase the
229         translation E (in case of sector full). VG_(tt_tc_do_chaining)
230         will not do the chaining to a non found translation E.
231         The execution will continue at the current guest PC
232         (i.e. the translation N).
233         => it is safe to erase the current translation being executed.
234
235         The current translation E being executed can also be discarded
236         (e.g. by gdbserver). VG_(discard_translations) will mark
237         this translation E as Deleted, but the translation itself
238         is not erased. In particular, its host code can only
239         be overwritten or erased in case a new translation is done.
240         A new translation will only be done if a not yet translated
241         jump is to be executed. The execution of the Deleted translation
242         E will continue till a non patched jump is encountered.
243         This situation is then similar to the 'erasing' case above :
244         the current translation E can be erased or overwritten, as the
245         execution will continue at the new translation N.
246
247      */
248
249      /* It is possible, although very unlikely, that a block A has
250         more than one patched jump to block B.  This could happen if
251         (eg) A finishes "jcond B; jmp B".
252
253         This means in turn that B's in_edges set can list A more than
254         once (twice in this example).  However, each such entry must
255         have a different from_offs, since a patched jump can only
256         jump to one place at once (it's meaningless for it to have
257         multiple destinations.)  IOW, the successor and predecessor
258         edges in the graph are not uniquely determined by a
259         TTEntry --> TTEntry pair, but rather by a
260         (TTEntry,offset) --> TTEntry triple.
261
262         If A has multiple edges to B then B will mention A multiple
263         times in its in_edges.  To make things simpler, we then
264         require that A mentions B exactly the same number of times in
265         its out_edges.  Furthermore, a matching out-in pair must have
266         the same offset (from_offs).  This facilitates sanity
267         checking, and it facilitates establishing the invariant that
268         a out_edges set may not have duplicates when using the
269         equality defined by (TTEntry,offset).  Hence the out_edges
270         and in_edges sets really do have both have set semantics.
271
272         eg if  A has been patched to B at offsets 42 and 87 (in A)
273         then   A.out_edges = { (B,42), (B,87) }   (in any order)
274         and    B.in_edges  = { (A,42), (A,87) }   (in any order)
275
276         Hence for each node pair P->Q in the graph, there's a 1:1
277         mapping between P.out_edges and Q.in_edges.
278      */
279      InEdgeArr  in_edges;
280      OutEdgeArr out_edges;
281   }
282   TTEntry;
283
284
285/* A structure used for mapping host code addresses back to the
286   relevant TTEntry.  Used when doing chaining, for finding the
287   TTEntry to which some arbitrary patch address belongs. */
288typedef
289   struct {
290      UChar* start;
291      UInt   len;
292      UInt   tteNo;
293   }
294   HostExtent;
295
296/* Finally, a sector itself.  Each sector contains an array of
297   TCEntries, which hold code, and an array of TTEntries, containing
298   all required administrative info.  Profiling is supported using the
299   TTEntry .count and .weight fields, if required.
300
301   If the sector is not in use, all three pointers are NULL and
302   tt_n_inuse is zero.
303*/
304typedef
305   struct {
306      /* The TCEntry area.  Size of this depends on the average
307         translation size.  We try and size it so it becomes full
308         precisely when this sector's translation table (tt) reaches
309         its load limit (SECTOR_TT_LIMIT_PERCENT). */
310      ULong* tc;
311
312      /* The TTEntry array.  This is a fixed size, always containing
313         exactly N_TTES_PER_SECTOR entries. */
314      TTEntry* tt;
315
316      /* This points to the current allocation point in tc. */
317      ULong* tc_next;
318
319      /* The count of tt entries with state InUse. */
320      Int tt_n_inuse;
321
322      /* Expandable arrays of tt indices for each of the ECLASS_N
323         address range equivalence classes.  These hold indices into
324         the containing sector's tt array, which in turn should point
325         back here. */
326      Int     ec2tte_size[ECLASS_N];
327      Int     ec2tte_used[ECLASS_N];
328      UShort* ec2tte[ECLASS_N];
329
330      /* The host extents.  The [start, +len) ranges are constructed
331         in strictly non-overlapping order, so we can binary search
332         them at any time. */
333      XArray* host_extents; /* XArray* of HostExtent */
334   }
335   Sector;
336
337
338/*------------------ DECLS ------------------*/
339
340/* The root data structure is an array of sectors.  The index of the
341   youngest sector is recorded, and new translations are put into that
342   sector.  When it fills up, we move along to the next sector and
343   start to fill that up, wrapping around at the end of the array.
344   That way, once all N_TC_SECTORS have been bought into use for the
345   first time, and are full, we then re-use the oldest sector,
346   endlessly.
347
348   When running, youngest sector should be between >= 0 and <
349   N_TC_SECTORS.  The initial -1 value indicates the TT/TC system is
350   not yet initialised.
351*/
352static Sector sectors[MAX_N_SECTORS];
353static Int    youngest_sector = -1;
354
355/* The number of ULongs in each TCEntry area.  This is computed once
356   at startup and does not change. */
357static Int    tc_sector_szQ = 0;
358
359
360/* A list of sector numbers, in the order which they should be
361   searched to find translations.  This is an optimisation to be used
362   when searching for translations and should not affect
363   correctness.  -1 denotes "no entry". */
364static Int sector_search_order[MAX_N_SECTORS];
365
366
367/* Fast helper for the TC.  A direct-mapped cache which holds a set of
368   recently used (guest address, host address) pairs.  This array is
369   referred to directly from m_dispatch/dispatch-<platform>.S.
370
371   Entries in tt_fast may refer to any valid TC entry, regardless of
372   which sector it's in.  Consequently we must be very careful to
373   invalidate this cache when TC entries are changed or disappear.
374
375   A special .guest address - TRANSTAB_BOGUS_GUEST_ADDR -- must be
376   pointed at to cause that cache entry to miss.  This relies on the
377   assumption that no guest code actually has that address, hence a
378   value 0x1 seems good.  m_translate gives the client a synthetic
379   segfault if it tries to execute at this address.
380*/
381/*
382typedef
383   struct {
384      Addr guest;
385      Addr host;
386   }
387   FastCacheEntry;
388*/
389/*global*/ __attribute__((aligned(16)))
390           FastCacheEntry VG_(tt_fast)[VG_TT_FAST_SIZE];
391
392/* Make sure we're not used before initialisation. */
393static Bool init_done = False;
394
395
396/*------------------ STATS DECLS ------------------*/
397
398/* Number of fast-cache updates and flushes done. */
399static ULong n_fast_flushes = 0;
400static ULong n_fast_updates = 0;
401
402/* Number of full lookups done. */
403static ULong n_full_lookups = 0;
404static ULong n_lookup_probes = 0;
405
406/* Number/osize/tsize of translations entered; also the number of
407   those for which self-checking was requested. */
408static ULong n_in_count    = 0;
409static ULong n_in_osize    = 0;
410static ULong n_in_tsize    = 0;
411static ULong n_in_sc_count = 0;
412
413/* Number/osize of translations discarded due to lack of space. */
414static ULong n_dump_count = 0;
415static ULong n_dump_osize = 0;
416
417/* Number/osize of translations discarded due to requests to do so. */
418static ULong n_disc_count = 0;
419static ULong n_disc_osize = 0;
420
421
422/*-------------------------------------------------------------*/
423/*--- Misc                                                  ---*/
424/*-------------------------------------------------------------*/
425
426static void* ttaux_malloc ( const HChar* tag, SizeT n )
427{
428   return VG_(arena_malloc)(VG_AR_TTAUX, tag, n);
429}
430
431static void ttaux_free ( void* p )
432{
433   VG_(arena_free)(VG_AR_TTAUX, p);
434}
435
436
437/*-------------------------------------------------------------*/
438/*--- Chaining support                                      ---*/
439/*-------------------------------------------------------------*/
440
441static inline TTEntry* index_tte ( UInt sNo, UInt tteNo )
442{
443   vg_assert(sNo < n_sectors);
444   vg_assert(tteNo < N_TTES_PER_SECTOR);
445   Sector* s = &sectors[sNo];
446   vg_assert(s->tt);
447   TTEntry* tte = &s->tt[tteNo];
448   vg_assert(tte->status == InUse);
449   return tte;
450}
451
452static void InEdge__init ( InEdge* ie )
453{
454   ie->from_sNo   = -1; /* invalid */
455   ie->from_tteNo = 0;
456   ie->from_offs  = 0;
457   ie->to_fastEP  = False;
458}
459
460static void OutEdge__init ( OutEdge* oe )
461{
462   oe->to_sNo    = -1; /* invalid */
463   oe->to_tteNo  = 0;
464   oe->from_offs = 0;
465}
466
467static void TTEntry__init ( TTEntry* tte )
468{
469   VG_(memset)(tte, 0, sizeof(*tte));
470}
471
472static UWord InEdgeArr__size ( InEdgeArr* iea )
473{
474   if (iea->var) {
475      vg_assert(iea->n_fixed == 0);
476      return VG_(sizeXA)(iea->var);
477   } else {
478      vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
479      return iea->n_fixed;
480   }
481}
482
483static void InEdgeArr__makeEmpty ( InEdgeArr* iea )
484{
485   if (iea->var) {
486      vg_assert(iea->n_fixed == 0);
487      VG_(deleteXA)(iea->var);
488      iea->var = NULL;
489   } else {
490      vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
491      iea->n_fixed = 0;
492   }
493}
494
495static
496InEdge* InEdgeArr__index ( InEdgeArr* iea, UWord i )
497{
498   if (iea->var) {
499      vg_assert(iea->n_fixed == 0);
500      return (InEdge*)VG_(indexXA)(iea->var, i);
501   } else {
502      vg_assert(i < iea->n_fixed);
503      return &iea->fixed[i];
504   }
505}
506
507static
508void InEdgeArr__deleteIndex ( InEdgeArr* iea, UWord i )
509{
510   if (iea->var) {
511      vg_assert(iea->n_fixed == 0);
512      VG_(removeIndexXA)(iea->var, i);
513   } else {
514      vg_assert(i < iea->n_fixed);
515      for (; i+1 < iea->n_fixed; i++) {
516         iea->fixed[i] = iea->fixed[i+1];
517      }
518      iea->n_fixed--;
519   }
520}
521
522static
523void InEdgeArr__add ( InEdgeArr* iea, InEdge* ie )
524{
525   if (iea->var) {
526      vg_assert(iea->n_fixed == 0);
527      VG_(addToXA)(iea->var, ie);
528   } else {
529      vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
530      if (iea->n_fixed == N_FIXED_IN_EDGE_ARR) {
531         /* The fixed array is full, so we have to initialise an
532            XArray and copy the fixed array into it. */
533         iea->var = VG_(newXA)(ttaux_malloc, "transtab.IEA__add",
534                               ttaux_free,
535                               sizeof(InEdge));
536         UWord i;
537         for (i = 0; i < iea->n_fixed; i++) {
538            VG_(addToXA)(iea->var, &iea->fixed[i]);
539         }
540         VG_(addToXA)(iea->var, ie);
541         iea->n_fixed = 0;
542      } else {
543         /* Just add to the fixed array. */
544         iea->fixed[iea->n_fixed++] = *ie;
545      }
546   }
547}
548
549static UWord OutEdgeArr__size ( OutEdgeArr* oea )
550{
551   if (oea->var) {
552      vg_assert(oea->n_fixed == 0);
553      return VG_(sizeXA)(oea->var);
554   } else {
555      vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
556      return oea->n_fixed;
557   }
558}
559
560static void OutEdgeArr__makeEmpty ( OutEdgeArr* oea )
561{
562   if (oea->var) {
563      vg_assert(oea->n_fixed == 0);
564      VG_(deleteXA)(oea->var);
565      oea->var = NULL;
566   } else {
567      vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
568      oea->n_fixed = 0;
569   }
570}
571
572static
573OutEdge* OutEdgeArr__index ( OutEdgeArr* oea, UWord i )
574{
575   if (oea->var) {
576      vg_assert(oea->n_fixed == 0);
577      return (OutEdge*)VG_(indexXA)(oea->var, i);
578   } else {
579      vg_assert(i < oea->n_fixed);
580      return &oea->fixed[i];
581   }
582}
583
584static
585void OutEdgeArr__deleteIndex ( OutEdgeArr* oea, UWord i )
586{
587   if (oea->var) {
588      vg_assert(oea->n_fixed == 0);
589      VG_(removeIndexXA)(oea->var, i);
590   } else {
591      vg_assert(i < oea->n_fixed);
592      for (; i+1 < oea->n_fixed; i++) {
593         oea->fixed[i] = oea->fixed[i+1];
594      }
595      oea->n_fixed--;
596   }
597}
598
599static
600void OutEdgeArr__add ( OutEdgeArr* oea, OutEdge* oe )
601{
602   if (oea->var) {
603      vg_assert(oea->n_fixed == 0);
604      VG_(addToXA)(oea->var, oe);
605   } else {
606      vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
607      if (oea->n_fixed == N_FIXED_OUT_EDGE_ARR) {
608         /* The fixed array is full, so we have to initialise an
609            XArray and copy the fixed array into it. */
610         oea->var = VG_(newXA)(ttaux_malloc, "transtab.OEA__add",
611                               ttaux_free,
612                               sizeof(OutEdge));
613         UWord i;
614         for (i = 0; i < oea->n_fixed; i++) {
615            VG_(addToXA)(oea->var, &oea->fixed[i]);
616         }
617         VG_(addToXA)(oea->var, oe);
618         oea->n_fixed = 0;
619      } else {
620         /* Just add to the fixed array. */
621         oea->fixed[oea->n_fixed++] = *oe;
622      }
623   }
624}
625
626static
627Int HostExtent__cmpOrd ( const void* v1, const void* v2 )
628{
629   const HostExtent* hx1 = v1;
630   const HostExtent* hx2 = v2;
631   if (hx1->start + hx1->len <= hx2->start) return -1;
632   if (hx2->start + hx2->len <= hx1->start) return 1;
633   return 0; /* partial overlap */
634}
635
636/* True if hx is a dead host extent, i.e. corresponds to host code
637   of an entry that was invalidated. */
638static
639Bool HostExtent__is_dead (const HostExtent* hx, const Sector* sec)
640{
641   const UInt tteNo = hx->tteNo;
642#define LDEBUG(m) if (DEBUG_TRANSTAB)                           \
643      VG_(printf) (m                                            \
644                   " start 0x%p len %u sector %d ttslot %u"     \
645                   " tt.entry 0x%llu tt.tcptr 0x%p\n",          \
646                   hx->start, hx->len, (int)(sec - sectors),    \
647                   hx->tteNo,                                   \
648                   sec->tt[tteNo].entry, sec->tt[tteNo].tcptr)
649
650   /* Entry might have been invalidated and not re-used yet.*/
651   if (sec->tt[tteNo].status == Deleted) {
652      LDEBUG("found deleted entry");
653      return True;
654   }
655   /* Maybe we found this entry via a host_extents which was
656      inserted for an entry which was changed to Deleted then
657      re-used after. If this entry was re-used, then its tcptr
658      is >= to host_extents start (i.e. the previous tcptr) + len.
659      This is the case as there is no re-use of host code: a new
660      entry or re-used entry always gets "higher value" host code. */
661   if ((UChar*) sec->tt[tteNo].tcptr >= hx->start + hx->len) {
662      LDEBUG("found re-used entry");
663      return True;
664   }
665
666   return False;
667#undef LDEBUG
668}
669
670static __attribute__((noinline))
671Bool find_TTEntry_from_hcode( /*OUT*/UInt* from_sNo,
672                              /*OUT*/UInt* from_tteNo,
673                              void* hcode )
674{
675   Int i;
676
677   /* Search order logic copied from VG_(search_transtab). */
678   for (i = 0; i < n_sectors; i++) {
679      Int sno = sector_search_order[i];
680      if (UNLIKELY(sno == -1))
681         return False; /* run out of sectors to search */
682
683      Sector* sec = &sectors[sno];
684      XArray* /* of HostExtent */ host_extents = sec->host_extents;
685      vg_assert(host_extents);
686
687      HostExtent key;
688      VG_(memset)(&key, 0, sizeof(key));
689      key.start = hcode;
690      key.len = 1;
691      Word firstW = -1, lastW = -1;
692      Bool found  = VG_(lookupXA_UNSAFE)(
693                       host_extents, &key, &firstW, &lastW,
694                       HostExtent__cmpOrd );
695      vg_assert(firstW == lastW); // always true, even if not found
696      if (found) {
697         HostExtent* hx = VG_(indexXA)(host_extents, firstW);
698         UInt tteNo = hx->tteNo;
699         /* Do some additional sanity checks. */
700         vg_assert(tteNo <= N_TTES_PER_SECTOR);
701
702         /* if this hx entry corresponds to dead host code, we must
703            tell this code has not been found, as it cannot be patched. */
704         if (HostExtent__is_dead (hx, sec))
705            return False;
706
707         vg_assert(sec->tt[tteNo].status == InUse);
708         /* Can only half check that the found TTEntry contains hcode,
709            due to not having a length value for the hcode in the
710            TTEntry. */
711         vg_assert((UChar*)sec->tt[tteNo].tcptr <= (UChar*)hcode);
712         /* Looks plausible */
713         *from_sNo   = sno;
714         *from_tteNo = (UInt)tteNo;
715         return True;
716      }
717   }
718   return False;
719}
720
721
722/* Figure out whether or not hcode is jitted code present in the main
723   code cache (but not in the no-redir cache).  Used for sanity
724   checking. */
725static Bool is_in_the_main_TC ( void* hcode )
726{
727   Int i, sno;
728   for (i = 0; i < n_sectors; i++) {
729      sno = sector_search_order[i];
730      if (sno == -1)
731         break; /* run out of sectors to search */
732      if ((UChar*)hcode >= (UChar*)sectors[sno].tc
733          && (UChar*)hcode <= (UChar*)sectors[sno].tc_next
734                              + sizeof(ULong) - 1)
735         return True;
736   }
737   return False;
738}
739
740
741/* Fulfill a chaining request, and record admin info so we
742   can undo it later, if required.
743*/
744void VG_(tt_tc_do_chaining) ( void* from__patch_addr,
745                              UInt  to_sNo,
746                              UInt  to_tteNo,
747                              Bool  to_fastEP )
748{
749   /* Get the CPU info established at startup. */
750   VexArch     arch_host = VexArch_INVALID;
751   VexArchInfo archinfo_host;
752   VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host));
753   VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host );
754   VexEndness endness_host = archinfo_host.endness;
755
756   // host_code is where we're patching to.  So it needs to
757   // take into account, whether we're jumping to the slow
758   // or fast entry point.  By definition, the fast entry point
759   // is exactly one event check's worth of code along from
760   // the slow (tcptr) entry point.
761   TTEntry* to_tte    = index_tte(to_sNo, to_tteNo);
762   void*    host_code = ((UChar*)to_tte->tcptr)
763                        + (to_fastEP ? LibVEX_evCheckSzB(arch_host,
764                                                         endness_host) : 0);
765
766   // stay sane -- the patch point (dst) is in this sector's code cache
767   vg_assert( (UChar*)host_code >= (UChar*)sectors[to_sNo].tc );
768   vg_assert( (UChar*)host_code <= (UChar*)sectors[to_sNo].tc_next
769                                   + sizeof(ULong) - 1 );
770
771   /* Find the TTEntry for the from__ code.  This isn't simple since
772      we only know the patch address, which is going to be somewhere
773      inside the from_ block. */
774   UInt from_sNo   = (UInt)-1;
775   UInt from_tteNo = (UInt)-1;
776   Bool from_found
777      = find_TTEntry_from_hcode( &from_sNo, &from_tteNo,
778                                 from__patch_addr );
779   if (!from_found) {
780      // The from code might have been discarded due to sector re-use
781      // or marked Deleted due to translation invalidation.
782      // In such a case, don't do the chaining.
783      VG_(debugLog)(1,"transtab",
784                    "host code %p not found (discarded? sector recycled?)"
785                    " => no chaining done\n",
786                    from__patch_addr);
787      return;
788   }
789
790   TTEntry* from_tte = index_tte(from_sNo, from_tteNo);
791
792   /* Get VEX to do the patching itself.  We have to hand it off
793      since it is host-dependent. */
794   VexInvalRange vir
795      = LibVEX_Chain(
796           arch_host, endness_host,
797           from__patch_addr,
798           VG_(fnptr_to_fnentry)(
799              to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP)
800                        : &VG_(disp_cp_chain_me_to_slowEP)),
801           (void*)host_code
802        );
803   VG_(invalidate_icache)( (void*)vir.start, vir.len );
804
805   /* Now do the tricky bit -- update the ch_succs and ch_preds info
806      for the two translations involved, so we can undo the chaining
807      later, which we will have to do if the to_ block gets removed
808      for whatever reason. */
809
810   /* This is the new from_ -> to_ link to add. */
811   InEdge ie;
812   InEdge__init(&ie);
813   ie.from_sNo   = from_sNo;
814   ie.from_tteNo = from_tteNo;
815   ie.to_fastEP  = to_fastEP;
816   HWord from_offs = (HWord)( (UChar*)from__patch_addr
817                              - (UChar*)from_tte->tcptr );
818   vg_assert(from_offs < 100000/* let's say */);
819   ie.from_offs  = (UInt)from_offs;
820
821   /* This is the new to_ -> from_ backlink to add. */
822   OutEdge oe;
823   OutEdge__init(&oe);
824   oe.to_sNo    = to_sNo;
825   oe.to_tteNo  = to_tteNo;
826   oe.from_offs = (UInt)from_offs;
827
828   /* Add .. */
829   InEdgeArr__add(&to_tte->in_edges, &ie);
830   OutEdgeArr__add(&from_tte->out_edges, &oe);
831}
832
833
834/* Unchain one patch, as described by the specified InEdge.  For
835   sanity check purposes only (to check that the patched location is
836   as expected) it also requires the fast and slow entry point
837   addresses of the destination block (that is, the block that owns
838   this InEdge). */
839__attribute__((noinline))
840static void unchain_one ( VexArch arch_host, VexEndness endness_host,
841                          InEdge* ie,
842                          void* to_fastEPaddr, void* to_slowEPaddr )
843{
844   vg_assert(ie);
845   TTEntry* tte
846      = index_tte(ie->from_sNo, ie->from_tteNo);
847   UChar* place_to_patch
848      = ((UChar*)tte->tcptr) + ie->from_offs;
849   UChar* disp_cp_chain_me
850      = VG_(fnptr_to_fnentry)(
851           ie->to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP)
852                         : &VG_(disp_cp_chain_me_to_slowEP)
853        );
854   UChar* place_to_jump_to_EXPECTED
855      = ie->to_fastEP ? to_fastEPaddr : to_slowEPaddr;
856
857   // stay sane: both src and dst for this unchaining are
858   // in the main code cache
859   vg_assert( is_in_the_main_TC(place_to_patch) ); // src
860   vg_assert( is_in_the_main_TC(place_to_jump_to_EXPECTED) ); // dst
861   // dst check is ok because LibVEX_UnChain checks that
862   // place_to_jump_to_EXPECTED really is the current dst, and
863   // asserts if it isn't.
864   VexInvalRange vir
865       = LibVEX_UnChain( arch_host, endness_host, place_to_patch,
866                         place_to_jump_to_EXPECTED, disp_cp_chain_me );
867   VG_(invalidate_icache)( (void*)vir.start, vir.len );
868}
869
870
871/* The specified block is about to be deleted.  Update the preds and
872   succs of its associated blocks accordingly.  This includes undoing
873   any chained jumps to this block. */
874static
875void unchain_in_preparation_for_deletion ( VexArch arch_host,
876                                           VexEndness endness_host,
877                                           UInt here_sNo, UInt here_tteNo )
878{
879   if (DEBUG_TRANSTAB)
880      VG_(printf)("QQQ unchain_in_prep %u.%u...\n", here_sNo, here_tteNo);
881   UWord    i, j, n, m;
882   Int      evCheckSzB = LibVEX_evCheckSzB(arch_host, endness_host);
883   TTEntry* here_tte   = index_tte(here_sNo, here_tteNo);
884   if (DEBUG_TRANSTAB)
885      VG_(printf)("... QQQ tt.entry 0x%llu tt.tcptr 0x%p\n",
886                  here_tte->entry, here_tte->tcptr);
887   vg_assert(here_tte->status == InUse);
888
889   /* Visit all InEdges owned by here_tte. */
890   n = InEdgeArr__size(&here_tte->in_edges);
891   for (i = 0; i < n; i++) {
892      InEdge* ie = InEdgeArr__index(&here_tte->in_edges, i);
893      // Undo the chaining.
894      UChar* here_slow_EP = (UChar*)here_tte->tcptr;
895      UChar* here_fast_EP = here_slow_EP + evCheckSzB;
896      unchain_one(arch_host, endness_host, ie, here_fast_EP, here_slow_EP);
897      // Find the corresponding entry in the "from" node's out_edges,
898      // and remove it.
899      TTEntry* from_tte = index_tte(ie->from_sNo, ie->from_tteNo);
900      m = OutEdgeArr__size(&from_tte->out_edges);
901      vg_assert(m > 0); // it must have at least one entry
902      for (j = 0; j < m; j++) {
903         OutEdge* oe = OutEdgeArr__index(&from_tte->out_edges, j);
904         if (oe->to_sNo == here_sNo && oe->to_tteNo == here_tteNo
905             && oe->from_offs == ie->from_offs)
906           break;
907      }
908      vg_assert(j < m); // "oe must be findable"
909      OutEdgeArr__deleteIndex(&from_tte->out_edges, j);
910   }
911
912   /* Visit all OutEdges owned by here_tte. */
913   n = OutEdgeArr__size(&here_tte->out_edges);
914   for (i = 0; i < n; i++) {
915      OutEdge* oe = OutEdgeArr__index(&here_tte->out_edges, i);
916      // Find the corresponding entry in the "to" node's in_edges,
917      // and remove it.
918      TTEntry* to_tte = index_tte(oe->to_sNo, oe->to_tteNo);
919      m = InEdgeArr__size(&to_tte->in_edges);
920      vg_assert(m > 0); // it must have at least one entry
921      for (j = 0; j < m; j++) {
922         InEdge* ie = InEdgeArr__index(&to_tte->in_edges, j);
923         if (ie->from_sNo == here_sNo && ie->from_tteNo == here_tteNo
924             && ie->from_offs == oe->from_offs)
925           break;
926      }
927      vg_assert(j < m); // "ie must be findable"
928      InEdgeArr__deleteIndex(&to_tte->in_edges, j);
929   }
930
931   InEdgeArr__makeEmpty(&here_tte->in_edges);
932   OutEdgeArr__makeEmpty(&here_tte->out_edges);
933}
934
935
936/*-------------------------------------------------------------*/
937/*--- Address-range equivalence class stuff                 ---*/
938/*-------------------------------------------------------------*/
939
940/* Return equivalence class number for a range. */
941
942static Int range_to_eclass ( Addr64 start, UInt len )
943{
944   UInt mask   = (1 << ECLASS_WIDTH) - 1;
945   UInt lo     = (UInt)start;
946   UInt hi     = lo + len - 1;
947   UInt loBits = (lo >> ECLASS_SHIFT) & mask;
948   UInt hiBits = (hi >> ECLASS_SHIFT) & mask;
949   if (loBits == hiBits) {
950      vg_assert(loBits < ECLASS_N-1);
951      return loBits;
952   } else {
953      return ECLASS_MISC;
954   }
955}
956
957
958/* Calculates the equivalence class numbers for any VexGuestExtent.
959   These are written in *eclasses, which must be big enough to hold 3
960   Ints.  The number written, between 1 and 3, is returned.  The
961   eclasses are presented in order, and any duplicates are removed.
962*/
963
964static
965Int vexGuestExtents_to_eclasses ( /*OUT*/Int* eclasses,
966                                  VexGuestExtents* vge )
967{
968#  define SWAP(_lv1,_lv2) \
969      do { Int t = _lv1; _lv1 = _lv2; _lv2 = t; } while (0)
970
971   Int i, j, n_ec, r;
972
973   vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
974
975   n_ec = 0;
976   for (i = 0; i < vge->n_used; i++) {
977      r = range_to_eclass( vge->base[i], (UInt)vge->len[i] );
978      if (r == ECLASS_MISC)
979         goto bad;
980      /* only add if we haven't already seen it */
981      for (j = 0; j < n_ec; j++)
982         if (eclasses[j] == r)
983            break;
984      if (j == n_ec)
985         eclasses[n_ec++] = r;
986   }
987
988   if (n_ec == 1)
989      return 1;
990
991   if (n_ec == 2) {
992      /* sort */
993      if (eclasses[0] > eclasses[1])
994         SWAP(eclasses[0], eclasses[1]);
995      return 2;
996   }
997
998   if (n_ec == 3) {
999      /* sort */
1000      if (eclasses[0] > eclasses[2])
1001         SWAP(eclasses[0], eclasses[2]);
1002      if (eclasses[0] > eclasses[1])
1003         SWAP(eclasses[0], eclasses[1]);
1004      if (eclasses[1] > eclasses[2])
1005         SWAP(eclasses[1], eclasses[2]);
1006      return 3;
1007   }
1008
1009   /* NOTREACHED */
1010   vg_assert(0);
1011
1012  bad:
1013   eclasses[0] = ECLASS_MISC;
1014   return 1;
1015
1016#  undef SWAP
1017}
1018
1019
1020/* Add tteno to the set of entries listed for equivalence class ec in
1021   this sector.  Returns used location in eclass array. */
1022
1023static
1024UInt addEClassNo ( /*MOD*/Sector* sec, Int ec, UShort tteno )
1025{
1026   Int    old_sz, new_sz, i, r;
1027   UShort *old_ar, *new_ar;
1028
1029   vg_assert(ec >= 0 && ec < ECLASS_N);
1030   vg_assert(tteno < N_TTES_PER_SECTOR);
1031
1032   if (DEBUG_TRANSTAB) VG_(printf)("ec %d  gets %d\n", ec, (Int)tteno);
1033
1034   if (sec->ec2tte_used[ec] >= sec->ec2tte_size[ec]) {
1035
1036      vg_assert(sec->ec2tte_used[ec] == sec->ec2tte_size[ec]);
1037
1038      old_sz = sec->ec2tte_size[ec];
1039      old_ar = sec->ec2tte[ec];
1040      new_sz = old_sz==0 ? 8 : old_sz<64 ? 2*old_sz : (3*old_sz)/2;
1041      new_ar = ttaux_malloc("transtab.aECN.1",
1042                            new_sz * sizeof(UShort));
1043      for (i = 0; i < old_sz; i++)
1044         new_ar[i] = old_ar[i];
1045      if (old_ar)
1046         ttaux_free(old_ar);
1047      sec->ec2tte_size[ec] = new_sz;
1048      sec->ec2tte[ec] = new_ar;
1049
1050      if (DEBUG_TRANSTAB) VG_(printf)("expand ec %d to %d\n", ec, new_sz);
1051   }
1052
1053   /* Common case */
1054   r = sec->ec2tte_used[ec]++;
1055   vg_assert(r >= 0 && r < sec->ec2tte_size[ec]);
1056   sec->ec2tte[ec][r] = tteno;
1057   return (UInt)r;
1058}
1059
1060
1061/* 'vge' is being added to 'sec' at TT entry 'tteno'.  Add appropriate
1062   eclass entries to 'sec'. */
1063
1064static
1065void upd_eclasses_after_add ( /*MOD*/Sector* sec, Int tteno )
1066{
1067   Int i, r, eclasses[3];
1068   TTEntry* tte;
1069   vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
1070
1071   tte = &sec->tt[tteno];
1072   r = vexGuestExtents_to_eclasses( eclasses, &tte->vge );
1073
1074   vg_assert(r >= 1 && r <= 3);
1075   tte->n_tte2ec = r;
1076
1077   for (i = 0; i < r; i++) {
1078      tte->tte2ec_ec[i] = eclasses[i];
1079      tte->tte2ec_ix[i] = addEClassNo( sec, eclasses[i], (UShort)tteno );
1080   }
1081}
1082
1083
1084/* Check the eclass info in 'sec' to ensure it is consistent.  Returns
1085   True if OK, False if something's not right.  Expensive. */
1086
1087static Bool sanity_check_eclasses_in_sector ( Sector* sec )
1088{
1089#  define BAD(_str) do { whassup = (_str); goto bad; } while (0)
1090
1091   const HChar* whassup = NULL;
1092   Int      i, j, k, n, ec_num, ec_idx;
1093   TTEntry* tte;
1094   UShort   tteno;
1095   ULong*   tce;
1096
1097   /* Basic checks on this sector */
1098   if (sec->tt_n_inuse < 0 || sec->tt_n_inuse > N_TTES_PER_SECTOR_USABLE)
1099      BAD("invalid sec->tt_n_inuse");
1100   tce = sec->tc_next;
1101   if (tce < &sec->tc[0] || tce > &sec->tc[tc_sector_szQ])
1102      BAD("sec->tc_next points outside tc");
1103
1104   /* For each eclass ... */
1105   for (i = 0; i < ECLASS_N; i++) {
1106      if (sec->ec2tte_size[i] == 0 && sec->ec2tte[i] != NULL)
1107         BAD("ec2tte_size/ec2tte mismatch(1)");
1108      if (sec->ec2tte_size[i] != 0 && sec->ec2tte[i] == NULL)
1109         BAD("ec2tte_size/ec2tte mismatch(2)");
1110      if (sec->ec2tte_used[i] < 0
1111          || sec->ec2tte_used[i] > sec->ec2tte_size[i])
1112         BAD("implausible ec2tte_used");
1113      if (sec->ec2tte_used[i] == 0)
1114         continue;
1115
1116      /* For each tt reference in each eclass .. ensure the reference
1117         is to a valid tt entry, and that the entry's address ranges
1118         really include this eclass. */
1119
1120      for (j = 0; j < sec->ec2tte_used[i]; j++) {
1121         tteno = sec->ec2tte[i][j];
1122         if (tteno == EC2TTE_DELETED)
1123            continue;
1124         if (tteno >= N_TTES_PER_SECTOR)
1125            BAD("implausible tteno");
1126         tte = &sec->tt[tteno];
1127         if (tte->status != InUse)
1128            BAD("tteno points to non-inuse tte");
1129         if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3)
1130            BAD("tte->n_tte2ec out of range");
1131         /* Exactly least one of tte->eclasses[0 .. tte->n_eclasses-1]
1132            must equal i.  Inspect tte's eclass info. */
1133         n = 0;
1134         for (k = 0; k < tte->n_tte2ec; k++) {
1135            if (k < tte->n_tte2ec-1
1136                && tte->tte2ec_ec[k] >= tte->tte2ec_ec[k+1])
1137               BAD("tte->tte2ec_ec[..] out of order");
1138            ec_num = tte->tte2ec_ec[k];
1139            if (ec_num < 0 || ec_num >= ECLASS_N)
1140               BAD("tte->tte2ec_ec[..] out of range");
1141            if (ec_num != i)
1142               continue;
1143            ec_idx = tte->tte2ec_ix[k];
1144            if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[i])
1145               BAD("tte->tte2ec_ix[..] out of range");
1146            if (ec_idx == j)
1147               n++;
1148         }
1149         if (n != 1)
1150            BAD("tteno does not point back at eclass");
1151      }
1152   }
1153
1154   /* That establishes that for each forward pointer from TTEntrys
1155      there is a corresponding backward pointer from the eclass[]
1156      arrays.  However, it doesn't rule out the possibility of other,
1157      bogus pointers in the eclass[] arrays.  So do those similarly:
1158      scan through them and check the TTEntryies they point at point
1159      back. */
1160
1161   for (i = 0; i < N_TTES_PER_SECTOR_USABLE; i++) {
1162
1163      tte = &sec->tt[i];
1164      if (tte->status == Empty || tte->status == Deleted) {
1165         if (tte->n_tte2ec != 0)
1166            BAD("tte->n_eclasses nonzero for unused tte");
1167         continue;
1168      }
1169
1170      vg_assert(tte->status == InUse);
1171
1172      if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3)
1173         BAD("tte->n_eclasses out of range(2)");
1174
1175      for (j = 0; j < tte->n_tte2ec; j++) {
1176         ec_num = tte->tte2ec_ec[j];
1177         if (ec_num < 0 || ec_num >= ECLASS_N)
1178            BAD("tte->eclass[..] out of range");
1179         ec_idx = tte->tte2ec_ix[j];
1180         if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[ec_num])
1181            BAD("tte->ec_idx[..] out of range(2)");
1182         if (sec->ec2tte[ec_num][ec_idx] != i)
1183            BAD("ec2tte does not point back to tte");
1184      }
1185   }
1186
1187   return True;
1188
1189  bad:
1190   if (whassup)
1191      VG_(debugLog)(0, "transtab", "eclass sanity fail: %s\n", whassup);
1192
1193#  if 0
1194   VG_(printf)("eclass = %d\n", i);
1195   VG_(printf)("tteno = %d\n", (Int)tteno);
1196   switch (tte->status) {
1197      case InUse:   VG_(printf)("InUse\n"); break;
1198      case Deleted: VG_(printf)("Deleted\n"); break;
1199      case Empty:   VG_(printf)("Empty\n"); break;
1200   }
1201   if (tte->status != Empty) {
1202      for (k = 0; k < tte->vge.n_used; k++)
1203         VG_(printf)("0x%llx %d\n", tte->vge.base[k],
1204                                    (Int)tte->vge.len[k]);
1205   }
1206#  endif
1207
1208   return False;
1209
1210#  undef BAD
1211}
1212
1213
1214/* Sanity check absolutely everything.  True == check passed. */
1215
1216/* forwards */
1217static Bool sanity_check_redir_tt_tc ( void );
1218
1219static Bool sanity_check_sector_search_order ( void )
1220{
1221   Int i, j, nListed;
1222   /* assert the array is the right size */
1223   vg_assert(MAX_N_SECTORS == (sizeof(sector_search_order)
1224                               / sizeof(sector_search_order[0])));
1225   /* Check it's of the form  valid_sector_numbers ++ [-1, -1, ..] */
1226   for (i = 0; i < n_sectors; i++) {
1227      if (sector_search_order[i] < 0 || sector_search_order[i] >= n_sectors)
1228         break;
1229   }
1230   nListed = i;
1231   for (/* */; i < n_sectors; i++) {
1232      if (sector_search_order[i] != -1)
1233         break;
1234   }
1235   if (i != n_sectors)
1236      return False;
1237   /* Check each sector number only appears once */
1238   for (i = 0; i < n_sectors; i++) {
1239      if (sector_search_order[i] == -1)
1240         continue;
1241      for (j = i+1; j < n_sectors; j++) {
1242         if (sector_search_order[j] == sector_search_order[i])
1243            return False;
1244      }
1245   }
1246   /* Check that the number of listed sectors equals the number
1247      in use, by counting nListed back down. */
1248   for (i = 0; i < n_sectors; i++) {
1249      if (sectors[i].tc != NULL)
1250         nListed--;
1251   }
1252   if (nListed != 0)
1253      return False;
1254   return True;
1255}
1256
1257static Bool sanity_check_all_sectors ( void )
1258{
1259   Int     sno;
1260   Bool    sane;
1261   Sector* sec;
1262   for (sno = 0; sno < n_sectors; sno++) {
1263      Int i;
1264      Int nr_not_dead_hx = 0;
1265      Int szhxa;
1266      sec = &sectors[sno];
1267      if (sec->tc == NULL)
1268         continue;
1269      sane = sanity_check_eclasses_in_sector( sec );
1270      if (!sane)
1271         return False;
1272      szhxa = VG_(sizeXA)(sec->host_extents);
1273      for (i = 0; i < szhxa; i++) {
1274         const HostExtent* hx = VG_(indexXA)(sec->host_extents, i);
1275         if (!HostExtent__is_dead (hx, sec))
1276            nr_not_dead_hx++;
1277      }
1278      if (nr_not_dead_hx != sec->tt_n_inuse) {
1279         VG_(debugLog)(0, "transtab",
1280                       "nr_not_dead_hx %d sanity fail (expected == in use %d)\n",
1281                       nr_not_dead_hx, sec->tt_n_inuse);
1282         return False;
1283      }
1284   }
1285
1286   if ( !sanity_check_redir_tt_tc() )
1287      return False;
1288   if ( !sanity_check_sector_search_order() )
1289      return False;
1290   return True;
1291}
1292
1293
1294
1295/*-------------------------------------------------------------*/
1296/*--- Add/find translations                                 ---*/
1297/*-------------------------------------------------------------*/
1298
1299static UInt vge_osize ( VexGuestExtents* vge )
1300{
1301   UInt i, n = 0;
1302   for (i = 0; i < vge->n_used; i++)
1303      n += (UInt)vge->len[i];
1304   return n;
1305}
1306
1307static Bool isValidSector ( Int sector )
1308{
1309   if (sector < 0 || sector >= n_sectors)
1310      return False;
1311   return True;
1312}
1313
1314static inline UInt HASH_TT ( Addr64 key )
1315{
1316   UInt kHi = (UInt)(key >> 32);
1317   UInt kLo = (UInt)key;
1318   UInt k32 = kHi ^ kLo;
1319   UInt ror = 7;
1320   if (ror > 0)
1321      k32 = (k32 >> ror) | (k32 << (32-ror));
1322   return k32 % N_TTES_PER_SECTOR;
1323}
1324
1325static void setFastCacheEntry ( Addr64 key, ULong* tcptr )
1326{
1327   UInt cno = (UInt)VG_TT_FAST_HASH(key);
1328   VG_(tt_fast)[cno].guest = (Addr)key;
1329   VG_(tt_fast)[cno].host  = (Addr)tcptr;
1330   n_fast_updates++;
1331   /* This shouldn't fail.  It should be assured by m_translate
1332      which should reject any attempt to make translation of code
1333      starting at TRANSTAB_BOGUS_GUEST_ADDR. */
1334   vg_assert(VG_(tt_fast)[cno].guest != TRANSTAB_BOGUS_GUEST_ADDR);
1335}
1336
1337/* Invalidate the fast cache VG_(tt_fast). */
1338static void invalidateFastCache ( void )
1339{
1340   UInt j;
1341   /* This loop is popular enough to make it worth unrolling a
1342      bit, at least on ppc32. */
1343   vg_assert(VG_TT_FAST_SIZE > 0 && (VG_TT_FAST_SIZE % 4) == 0);
1344   for (j = 0; j < VG_TT_FAST_SIZE; j += 4) {
1345      VG_(tt_fast)[j+0].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1346      VG_(tt_fast)[j+1].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1347      VG_(tt_fast)[j+2].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1348      VG_(tt_fast)[j+3].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1349   }
1350
1351   vg_assert(j == VG_TT_FAST_SIZE);
1352   n_fast_flushes++;
1353}
1354
1355static void initialiseSector ( Int sno )
1356{
1357   Int     i;
1358   SysRes  sres;
1359   Sector* sec;
1360   vg_assert(isValidSector(sno));
1361
1362   { Bool sane = sanity_check_sector_search_order();
1363     vg_assert(sane);
1364   }
1365   sec = &sectors[sno];
1366
1367   if (sec->tc == NULL) {
1368
1369      /* Sector has never been used before.  Need to allocate tt and
1370         tc. */
1371      vg_assert(sec->tt == NULL);
1372      vg_assert(sec->tc_next == NULL);
1373      vg_assert(sec->tt_n_inuse == 0);
1374      for (i = 0; i < ECLASS_N; i++) {
1375         vg_assert(sec->ec2tte_size[i] == 0);
1376         vg_assert(sec->ec2tte_used[i] == 0);
1377         vg_assert(sec->ec2tte[i] == NULL);
1378      }
1379      vg_assert(sec->host_extents == NULL);
1380
1381      VG_(debugLog)(1,"transtab", "allocate sector %d\n", sno);
1382      if (VG_(clo_stats))
1383         VG_(dmsg)("transtab: " "allocate sector %d\n", sno);
1384
1385      sres = VG_(am_mmap_anon_float_valgrind)( 8 * tc_sector_szQ );
1386      if (sr_isError(sres)) {
1387         VG_(out_of_memory_NORETURN)("initialiseSector(TC)",
1388                                     8 * tc_sector_szQ );
1389	 /*NOTREACHED*/
1390      }
1391      sec->tc = (ULong*)(AddrH)sr_Res(sres);
1392
1393      sres = VG_(am_mmap_anon_float_valgrind)
1394                ( N_TTES_PER_SECTOR * sizeof(TTEntry) );
1395      if (sr_isError(sres)) {
1396         VG_(out_of_memory_NORETURN)("initialiseSector(TT)",
1397                                     N_TTES_PER_SECTOR * sizeof(TTEntry) );
1398	 /*NOTREACHED*/
1399      }
1400      sec->tt = (TTEntry*)(AddrH)sr_Res(sres);
1401
1402      for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1403         sec->tt[i].status   = Empty;
1404         sec->tt[i].n_tte2ec = 0;
1405      }
1406
1407      /* Set up the host_extents array. */
1408      sec->host_extents
1409         = VG_(newXA)(ttaux_malloc, "transtab.initialiseSector(host_extents)",
1410                      ttaux_free,
1411                      sizeof(HostExtent));
1412
1413      /* Add an entry in the sector_search_order */
1414      for (i = 0; i < n_sectors; i++) {
1415         if (sector_search_order[i] == -1)
1416            break;
1417      }
1418      vg_assert(i >= 0 && i < n_sectors);
1419      sector_search_order[i] = sno;
1420
1421      if (VG_(clo_verbosity) > 2)
1422         VG_(message)(Vg_DebugMsg, "TT/TC: initialise sector %d\n", sno);
1423
1424   } else {
1425
1426      /* Sector has been used before.  Dump the old contents. */
1427      VG_(debugLog)(1,"transtab", "recycle sector %d\n", sno);
1428      if (VG_(clo_stats))
1429         VG_(dmsg)("transtab: " "recycle sector %d\n", sno);
1430
1431      vg_assert(sec->tt != NULL);
1432      vg_assert(sec->tc_next != NULL);
1433      n_dump_count += sec->tt_n_inuse;
1434
1435      VexArch     arch_host = VexArch_INVALID;
1436      VexArchInfo archinfo_host;
1437      VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host));
1438      VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host );
1439      VexEndness endness_host = archinfo_host.endness;
1440
1441      /* Visit each just-about-to-be-abandoned translation. */
1442      if (DEBUG_TRANSTAB) VG_(printf)("QQQ unlink-entire-sector: %d START\n",
1443                                      sno);
1444      for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1445         if (sec->tt[i].status == InUse) {
1446            vg_assert(sec->tt[i].n_tte2ec >= 1);
1447            vg_assert(sec->tt[i].n_tte2ec <= 3);
1448            n_dump_osize += vge_osize(&sec->tt[i].vge);
1449            /* Tell the tool too. */
1450            if (VG_(needs).superblock_discards) {
1451               VG_TDICT_CALL( tool_discard_superblock_info,
1452                              sec->tt[i].entry,
1453                              sec->tt[i].vge );
1454            }
1455            unchain_in_preparation_for_deletion(arch_host,
1456                                                endness_host, sno, i);
1457         } else {
1458            vg_assert(sec->tt[i].n_tte2ec == 0);
1459         }
1460         sec->tt[i].status   = Empty;
1461         sec->tt[i].n_tte2ec = 0;
1462      }
1463      if (DEBUG_TRANSTAB) VG_(printf)("QQQ unlink-entire-sector: %d END\n",
1464                                      sno);
1465
1466      /* Free up the eclass structures. */
1467      for (i = 0; i < ECLASS_N; i++) {
1468         if (sec->ec2tte_size[i] == 0) {
1469            vg_assert(sec->ec2tte_used[i] == 0);
1470            vg_assert(sec->ec2tte[i] == NULL);
1471         } else {
1472            vg_assert(sec->ec2tte[i] != NULL);
1473            ttaux_free(sec->ec2tte[i]);
1474            sec->ec2tte[i] = NULL;
1475            sec->ec2tte_size[i] = 0;
1476            sec->ec2tte_used[i] = 0;
1477         }
1478      }
1479
1480      /* Empty out the host extents array. */
1481      vg_assert(sec->host_extents != NULL);
1482      VG_(dropTailXA)(sec->host_extents, VG_(sizeXA)(sec->host_extents));
1483      vg_assert(VG_(sizeXA)(sec->host_extents) == 0);
1484
1485      /* Sanity check: ensure it is already in
1486         sector_search_order[]. */
1487      for (i = 0; i < n_sectors; i++) {
1488         if (sector_search_order[i] == sno)
1489            break;
1490      }
1491      vg_assert(i >= 0 && i < n_sectors);
1492
1493      if (VG_(clo_verbosity) > 2)
1494         VG_(message)(Vg_DebugMsg, "TT/TC: recycle sector %d\n", sno);
1495   }
1496
1497   sec->tc_next = sec->tc;
1498   sec->tt_n_inuse = 0;
1499
1500   invalidateFastCache();
1501
1502   { Bool sane = sanity_check_sector_search_order();
1503     vg_assert(sane);
1504   }
1505}
1506
1507
1508/* Add a translation of vge to TT/TC.  The translation is temporarily
1509   in code[0 .. code_len-1].
1510
1511   pre: youngest_sector points to a valid (although possibly full)
1512   sector.
1513*/
1514void VG_(add_to_transtab)( VexGuestExtents* vge,
1515                           Addr64           entry,
1516                           AddrH            code,
1517                           UInt             code_len,
1518                           Bool             is_self_checking,
1519                           Int              offs_profInc,
1520                           UInt             n_guest_instrs )
1521{
1522   Int    tcAvailQ, reqdQ, y, i;
1523   ULong  *tcptr, *tcptr2;
1524   UChar* srcP;
1525   UChar* dstP;
1526
1527   vg_assert(init_done);
1528   vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
1529
1530   /* 60000: should agree with N_TMPBUF in m_translate.c. */
1531   vg_assert(code_len > 0 && code_len < 60000);
1532
1533   /* Generally stay sane */
1534   vg_assert(n_guest_instrs < 200); /* it can be zero, tho */
1535
1536   if (DEBUG_TRANSTAB)
1537      VG_(printf)("add_to_transtab(entry = 0x%llx, len = %d) ...\n",
1538                  entry, code_len);
1539
1540   n_in_count++;
1541   n_in_tsize += code_len;
1542   n_in_osize += vge_osize(vge);
1543   if (is_self_checking)
1544      n_in_sc_count++;
1545
1546   y = youngest_sector;
1547   vg_assert(isValidSector(y));
1548
1549   if (sectors[y].tc == NULL)
1550      initialiseSector(y);
1551
1552   /* Try putting the translation in this sector. */
1553   reqdQ = (code_len + 7) >> 3;
1554
1555   /* Will it fit in tc? */
1556   tcAvailQ = ((ULong*)(&sectors[y].tc[tc_sector_szQ]))
1557              - ((ULong*)(sectors[y].tc_next));
1558   vg_assert(tcAvailQ >= 0);
1559   vg_assert(tcAvailQ <= tc_sector_szQ);
1560
1561   if (tcAvailQ < reqdQ
1562       || sectors[y].tt_n_inuse >= N_TTES_PER_SECTOR_USABLE) {
1563      /* No.  So move on to the next sector.  Either it's never been
1564         used before, in which case it will get its tt/tc allocated
1565         now, or it has been used before, in which case it is set to be
1566         empty, hence throwing out the oldest sector. */
1567      vg_assert(tc_sector_szQ > 0);
1568      Int tt_loading_pct = (100 * sectors[y].tt_n_inuse)
1569                           / N_TTES_PER_SECTOR;
1570      Int tc_loading_pct = (100 * (tc_sector_szQ - tcAvailQ))
1571                           / tc_sector_szQ;
1572      VG_(debugLog)(1,"transtab",
1573                      "declare sector %d full "
1574                      "(TT loading %2d%%, TC loading %2d%%)\n",
1575                      y, tt_loading_pct, tc_loading_pct);
1576      if (VG_(clo_stats)) {
1577         VG_(dmsg)("transtab: "
1578                   "declare sector %d full "
1579                   "(TT loading %2d%%, TC loading %2d%%)\n",
1580                   y, tt_loading_pct, tc_loading_pct);
1581      }
1582      youngest_sector++;
1583      if (youngest_sector >= n_sectors)
1584         youngest_sector = 0;
1585      y = youngest_sector;
1586      initialiseSector(y);
1587   }
1588
1589   /* Be sure ... */
1590   tcAvailQ = ((ULong*)(&sectors[y].tc[tc_sector_szQ]))
1591              - ((ULong*)(sectors[y].tc_next));
1592   vg_assert(tcAvailQ >= 0);
1593   vg_assert(tcAvailQ <= tc_sector_szQ);
1594   vg_assert(tcAvailQ >= reqdQ);
1595   vg_assert(sectors[y].tt_n_inuse < N_TTES_PER_SECTOR_USABLE);
1596   vg_assert(sectors[y].tt_n_inuse >= 0);
1597
1598   /* Copy into tc. */
1599   tcptr = sectors[y].tc_next;
1600   vg_assert(tcptr >= &sectors[y].tc[0]);
1601   vg_assert(tcptr <= &sectors[y].tc[tc_sector_szQ]);
1602
1603   dstP = (UChar*)tcptr;
1604   srcP = (UChar*)code;
1605   VG_(memcpy)(dstP, srcP, code_len);
1606   sectors[y].tc_next += reqdQ;
1607   sectors[y].tt_n_inuse++;
1608
1609   /* more paranoia */
1610   tcptr2 = sectors[y].tc_next;
1611   vg_assert(tcptr2 >= &sectors[y].tc[0]);
1612   vg_assert(tcptr2 <= &sectors[y].tc[tc_sector_szQ]);
1613
1614   /* Find an empty tt slot, and use it.  There must be such a slot
1615      since tt is never allowed to get completely full. */
1616   i = HASH_TT(entry);
1617   vg_assert(i >= 0 && i < N_TTES_PER_SECTOR);
1618   while (True) {
1619      if (sectors[y].tt[i].status == Empty
1620          || sectors[y].tt[i].status == Deleted)
1621         break;
1622      i++;
1623      if (i >= N_TTES_PER_SECTOR)
1624         i = 0;
1625   }
1626
1627   TTEntry__init(&sectors[y].tt[i]);
1628   sectors[y].tt[i].status = InUse;
1629   sectors[y].tt[i].tcptr  = tcptr;
1630   sectors[y].tt[i].count  = 0;
1631   sectors[y].tt[i].weight = n_guest_instrs == 0 ? 1 : n_guest_instrs;
1632   sectors[y].tt[i].vge    = *vge;
1633   sectors[y].tt[i].entry  = entry;
1634
1635   /* Patch in the profile counter location, if necessary. */
1636   if (offs_profInc != -1) {
1637      vg_assert(offs_profInc >= 0 && offs_profInc < code_len);
1638      VexArch     arch_host = VexArch_INVALID;
1639      VexArchInfo archinfo_host;
1640      VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host));
1641      VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host );
1642      VexEndness endness_host = archinfo_host.endness;
1643      VexInvalRange vir
1644         = LibVEX_PatchProfInc( arch_host, endness_host,
1645                                dstP + offs_profInc,
1646                                &sectors[y].tt[i].count );
1647      VG_(invalidate_icache)( (void*)vir.start, vir.len );
1648   }
1649
1650   VG_(invalidate_icache)( dstP, code_len );
1651
1652   /* Add this entry to the host_extents map, checking that we're
1653      adding in order. */
1654   { HostExtent hx;
1655     hx.start = (UChar*)tcptr;
1656     hx.len   = code_len;
1657     hx.tteNo = i;
1658     vg_assert(hx.len > 0); /* bsearch fails w/ zero length entries */
1659     XArray* hx_array = sectors[y].host_extents;
1660     vg_assert(hx_array);
1661     Word n = VG_(sizeXA)(hx_array);
1662     if (n > 0) {
1663        HostExtent* hx_prev = (HostExtent*)VG_(indexXA)(hx_array, n-1);
1664        vg_assert(hx_prev->start + hx_prev->len <= hx.start);
1665     }
1666     VG_(addToXA)(hx_array, &hx);
1667     if (DEBUG_TRANSTAB)
1668        VG_(printf)("... hx.start 0x%p hx.len %u sector %d ttslot %d\n",
1669                    hx.start, hx.len, y, i);
1670   }
1671
1672   /* Update the fast-cache. */
1673   setFastCacheEntry( entry, tcptr );
1674
1675   /* Note the eclass numbers for this translation. */
1676   upd_eclasses_after_add( &sectors[y], i );
1677}
1678
1679
1680/* Search for the translation of the given guest address.  If
1681   requested, a successful search can also cause the fast-caches to be
1682   updated.
1683*/
1684Bool VG_(search_transtab) ( /*OUT*/AddrH* res_hcode,
1685                            /*OUT*/UInt*  res_sNo,
1686                            /*OUT*/UInt*  res_tteNo,
1687                            Addr64        guest_addr,
1688                            Bool          upd_cache )
1689{
1690   Int i, j, k, kstart, sno;
1691
1692   vg_assert(init_done);
1693   /* Find the initial probe point just once.  It will be the same in
1694      all sectors and avoids multiple expensive % operations. */
1695   n_full_lookups++;
1696   k      = -1;
1697   kstart = HASH_TT(guest_addr);
1698   vg_assert(kstart >= 0 && kstart < N_TTES_PER_SECTOR);
1699
1700   /* Search in all the sectors,using sector_search_order[] as a
1701      heuristic guide as to what order to visit the sectors. */
1702   for (i = 0; i < n_sectors; i++) {
1703
1704      sno = sector_search_order[i];
1705      if (UNLIKELY(sno == -1))
1706         return False; /* run out of sectors to search */
1707
1708      k = kstart;
1709      for (j = 0; j < N_TTES_PER_SECTOR; j++) {
1710         n_lookup_probes++;
1711         if (sectors[sno].tt[k].status == InUse
1712             && sectors[sno].tt[k].entry == guest_addr) {
1713            /* found it */
1714            if (upd_cache)
1715               setFastCacheEntry(
1716                  guest_addr, sectors[sno].tt[k].tcptr );
1717            if (res_hcode)
1718               *res_hcode = (AddrH)sectors[sno].tt[k].tcptr;
1719            if (res_sNo)
1720               *res_sNo = sno;
1721            if (res_tteNo)
1722               *res_tteNo = k;
1723            /* pull this one one step closer to the front.  For large
1724               apps this more or less halves the number of required
1725               probes. */
1726            if (i > 0) {
1727               Int tmp = sector_search_order[i-1];
1728               sector_search_order[i-1] = sector_search_order[i];
1729               sector_search_order[i] = tmp;
1730            }
1731            return True;
1732         }
1733         if (sectors[sno].tt[k].status == Empty)
1734            break; /* not found in this sector */
1735         k++;
1736         if (k == N_TTES_PER_SECTOR)
1737            k = 0;
1738      }
1739
1740      /* If we fall off the end, all entries are InUse and not
1741         matching, or Deleted.  In any case we did not find it in this
1742         sector. */
1743   }
1744
1745   /* Not found in any sector. */
1746   return False;
1747}
1748
1749
1750/*-------------------------------------------------------------*/
1751/*--- Delete translations.                                  ---*/
1752/*-------------------------------------------------------------*/
1753
1754/* forward */
1755static void unredir_discard_translations( Addr64, ULong );
1756
1757/* Stuff for deleting translations which intersect with a given
1758   address range.  Unfortunately, to make this run at a reasonable
1759   speed, it is complex. */
1760
1761static inline
1762Bool overlap1 ( Addr64 s1, ULong r1, Addr64 s2, ULong r2 )
1763{
1764   Addr64 e1 = s1 + r1 - 1ULL;
1765   Addr64 e2 = s2 + r2 - 1ULL;
1766   if (e1 < s2 || e2 < s1)
1767      return False;
1768   return True;
1769}
1770
1771static inline
1772Bool overlaps ( Addr64 start, ULong range, VexGuestExtents* vge )
1773{
1774   if (overlap1(start, range, vge->base[0], (UInt)vge->len[0]))
1775      return True;
1776   if (vge->n_used < 2)
1777      return False;
1778   if (overlap1(start, range, vge->base[1], (UInt)vge->len[1]))
1779      return True;
1780   if (vge->n_used < 3)
1781      return False;
1782   if (overlap1(start, range, vge->base[2], (UInt)vge->len[2]))
1783      return True;
1784   return False;
1785}
1786
1787
1788/* Delete a tt entry, and update all the eclass data accordingly. */
1789
1790static void delete_tte ( /*MOD*/Sector* sec, UInt secNo, Int tteno,
1791                         VexArch arch_host, VexEndness endness_host )
1792{
1793   Int      i, ec_num, ec_idx;
1794   TTEntry* tte;
1795
1796   /* sec and secNo are mutually redundant; cross-check. */
1797   vg_assert(sec == &sectors[secNo]);
1798
1799   vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
1800   tte = &sec->tt[tteno];
1801   vg_assert(tte->status == InUse);
1802   vg_assert(tte->n_tte2ec >= 1 && tte->n_tte2ec <= 3);
1803
1804   /* Unchain .. */
1805   unchain_in_preparation_for_deletion(arch_host, endness_host, secNo, tteno);
1806
1807   /* Deal with the ec-to-tte links first. */
1808   for (i = 0; i < tte->n_tte2ec; i++) {
1809      ec_num = (Int)tte->tte2ec_ec[i];
1810      ec_idx = tte->tte2ec_ix[i];
1811      vg_assert(ec_num >= 0 && ec_num < ECLASS_N);
1812      vg_assert(ec_idx >= 0);
1813      vg_assert(ec_idx < sec->ec2tte_used[ec_num]);
1814      /* Assert that the two links point at each other. */
1815      vg_assert(sec->ec2tte[ec_num][ec_idx] == (UShort)tteno);
1816      /* "delete" the pointer back to here. */
1817      sec->ec2tte[ec_num][ec_idx] = EC2TTE_DELETED;
1818   }
1819
1820   /* Now fix up this TTEntry. */
1821   tte->status   = Deleted;
1822   tte->n_tte2ec = 0;
1823
1824   /* Stats .. */
1825   sec->tt_n_inuse--;
1826   n_disc_count++;
1827   n_disc_osize += vge_osize(&tte->vge);
1828
1829   /* Tell the tool too. */
1830   if (VG_(needs).superblock_discards) {
1831      VG_TDICT_CALL( tool_discard_superblock_info,
1832                     tte->entry,
1833                     tte->vge );
1834   }
1835}
1836
1837
1838/* Delete translations from sec which intersect specified range, but
1839   only consider translations in the specified eclass. */
1840
1841static
1842Bool delete_translations_in_sector_eclass ( /*MOD*/Sector* sec, UInt secNo,
1843                                            Addr64 guest_start, ULong range,
1844                                            Int ec,
1845                                            VexArch arch_host,
1846                                            VexEndness endness_host )
1847{
1848   Int      i;
1849   UShort   tteno;
1850   Bool     anyDeld = False;
1851   TTEntry* tte;
1852
1853   vg_assert(ec >= 0 && ec < ECLASS_N);
1854
1855   for (i = 0; i < sec->ec2tte_used[ec]; i++) {
1856
1857      tteno = sec->ec2tte[ec][i];
1858      if (tteno == EC2TTE_DELETED) {
1859         /* already deleted */
1860         continue;
1861      }
1862
1863      vg_assert(tteno < N_TTES_PER_SECTOR);
1864
1865      tte = &sec->tt[tteno];
1866      vg_assert(tte->status == InUse);
1867
1868      if (overlaps( guest_start, range, &tte->vge )) {
1869         anyDeld = True;
1870         delete_tte( sec, secNo, (Int)tteno, arch_host, endness_host );
1871      }
1872
1873   }
1874
1875   return anyDeld;
1876}
1877
1878
1879/* Delete translations from sec which intersect specified range, the
1880   slow way, by inspecting all translations in sec. */
1881
1882static
1883Bool delete_translations_in_sector ( /*MOD*/Sector* sec, UInt secNo,
1884                                     Addr64 guest_start, ULong range,
1885                                     VexArch arch_host,
1886                                     VexEndness endness_host )
1887{
1888   Int  i;
1889   Bool anyDeld = False;
1890
1891   for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1892      if (sec->tt[i].status == InUse
1893          && overlaps( guest_start, range, &sec->tt[i].vge )) {
1894         anyDeld = True;
1895         delete_tte( sec, secNo, i, arch_host, endness_host );
1896      }
1897   }
1898
1899   return anyDeld;
1900}
1901
1902
1903void VG_(discard_translations) ( Addr64 guest_start, ULong range,
1904                                 const HChar* who )
1905{
1906   Sector* sec;
1907   Int     sno, ec;
1908   Bool    anyDeleted = False;
1909
1910   vg_assert(init_done);
1911
1912   VG_(debugLog)(2, "transtab",
1913                    "discard_translations(0x%llx, %lld) req by %s\n",
1914                    guest_start, range, who );
1915
1916   /* Pre-deletion sanity check */
1917   if (VG_(clo_sanity_level >= 4)) {
1918      Bool sane = sanity_check_all_sectors();
1919      vg_assert(sane);
1920   }
1921
1922   if (range == 0)
1923      return;
1924
1925   VexArch     arch_host = VexArch_INVALID;
1926   VexArchInfo archinfo_host;
1927   VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host));
1928   VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host );
1929   VexEndness endness_host = archinfo_host.endness;
1930
1931   /* There are two different ways to do this.
1932
1933      If the range fits within a single address-range equivalence
1934      class, as will be the case for a cache line sized invalidation,
1935      then we only have to inspect the set of translations listed in
1936      that equivalence class, and also in the "sin-bin" equivalence
1937      class ECLASS_MISC.
1938
1939      Otherwise, the invalidation is of a larger range and probably
1940      results from munmap.  In this case it's (probably!) faster just
1941      to inspect all translations, dump those we don't want, and
1942      regenerate the equivalence class information (since modifying it
1943      in-situ is even more expensive).
1944   */
1945
1946   /* First off, figure out if the range falls within a single class,
1947      and if so which one. */
1948
1949   ec = ECLASS_MISC;
1950   if (range < (1ULL << ECLASS_SHIFT))
1951      ec = range_to_eclass( guest_start, (UInt)range );
1952
1953   /* if ec is ECLASS_MISC then we aren't looking at just a single
1954      class, so use the slow scheme.  Else use the fast scheme,
1955      examining 'ec' and ECLASS_MISC. */
1956
1957   if (ec != ECLASS_MISC) {
1958
1959      VG_(debugLog)(2, "transtab",
1960                       "                    FAST, ec = %d\n", ec);
1961
1962      /* Fast scheme */
1963      vg_assert(ec >= 0 && ec < ECLASS_MISC);
1964
1965      for (sno = 0; sno < n_sectors; sno++) {
1966         sec = &sectors[sno];
1967         if (sec->tc == NULL)
1968            continue;
1969         anyDeleted |= delete_translations_in_sector_eclass(
1970                          sec, sno, guest_start, range, ec,
1971                          arch_host, endness_host
1972                       );
1973         anyDeleted |= delete_translations_in_sector_eclass(
1974                          sec, sno, guest_start, range, ECLASS_MISC,
1975                          arch_host, endness_host
1976                       );
1977      }
1978
1979   } else {
1980
1981      /* slow scheme */
1982
1983      VG_(debugLog)(2, "transtab",
1984                       "                    SLOW, ec = %d\n", ec);
1985
1986      for (sno = 0; sno < n_sectors; sno++) {
1987         sec = &sectors[sno];
1988         if (sec->tc == NULL)
1989            continue;
1990         anyDeleted |= delete_translations_in_sector(
1991                          sec, sno, guest_start, range,
1992                          arch_host, endness_host
1993                       );
1994      }
1995
1996   }
1997
1998   if (anyDeleted)
1999      invalidateFastCache();
2000
2001   /* don't forget the no-redir cache */
2002   unredir_discard_translations( guest_start, range );
2003
2004   /* Post-deletion sanity check */
2005   if (VG_(clo_sanity_level >= 4)) {
2006      Int      i;
2007      TTEntry* tte;
2008      Bool     sane = sanity_check_all_sectors();
2009      vg_assert(sane);
2010      /* But now, also check the requested address range isn't
2011         present anywhere. */
2012      for (sno = 0; sno < n_sectors; sno++) {
2013         sec = &sectors[sno];
2014         if (sec->tc == NULL)
2015            continue;
2016         for (i = 0; i < N_TTES_PER_SECTOR; i++) {
2017            tte = &sec->tt[i];
2018            if (tte->status != InUse)
2019               continue;
2020            vg_assert(!overlaps( guest_start, range, &tte->vge ));
2021         }
2022      }
2023   }
2024}
2025
2026
2027/*------------------------------------------------------------*/
2028/*--- AUXILIARY: the unredirected TT/TC                    ---*/
2029/*------------------------------------------------------------*/
2030
2031/* A very simple translation cache which holds a small number of
2032   unredirected translations.  This is completely independent of the
2033   main tt/tc structures.  When unredir_tc or unredir_tt becomes full,
2034   both structures are simply dumped and we start over.
2035
2036   Since these translations are unredirected, the search key is (by
2037   definition) the first address entry in the .vge field. */
2038
2039/* Sized to hold 500 translations of average size 1000 bytes. */
2040
2041#define UNREDIR_SZB   1000
2042
2043#define N_UNREDIR_TT  500
2044#define N_UNREDIR_TCQ (N_UNREDIR_TT * UNREDIR_SZB / sizeof(ULong))
2045
2046typedef
2047   struct {
2048      VexGuestExtents vge;
2049      Addr            hcode;
2050      Bool            inUse;
2051   }
2052   UTCEntry;
2053
2054/* We just allocate forwards in _tc, never deleting. */
2055static ULong    *unredir_tc;
2056static Int      unredir_tc_used = N_UNREDIR_TCQ;
2057
2058/* Slots in _tt can come into use and out again (.inUse).
2059   Nevertheless _tt_highwater is maintained so that invalidations
2060   don't have to scan all the slots when only a few are in use.
2061   _tt_highwater holds the index of the highest ever allocated
2062   slot. */
2063static UTCEntry unredir_tt[N_UNREDIR_TT];
2064static Int      unredir_tt_highwater;
2065
2066
2067static void init_unredir_tt_tc ( void )
2068{
2069   Int i;
2070   if (unredir_tc == NULL) {
2071      SysRes sres = VG_(am_mmap_anon_float_valgrind)
2072                       ( N_UNREDIR_TT * UNREDIR_SZB );
2073      if (sr_isError(sres)) {
2074         VG_(out_of_memory_NORETURN)("init_unredir_tt_tc",
2075                                     N_UNREDIR_TT * UNREDIR_SZB);
2076         /*NOTREACHED*/
2077      }
2078      unredir_tc = (ULong *)(AddrH)sr_Res(sres);
2079   }
2080   unredir_tc_used = 0;
2081   for (i = 0; i < N_UNREDIR_TT; i++)
2082      unredir_tt[i].inUse = False;
2083   unredir_tt_highwater = -1;
2084}
2085
2086/* Do a sanity check; return False on failure. */
2087static Bool sanity_check_redir_tt_tc ( void )
2088{
2089   Int i;
2090   if (unredir_tt_highwater < -1) return False;
2091   if (unredir_tt_highwater >= N_UNREDIR_TT) return False;
2092
2093   for (i = unredir_tt_highwater+1; i < N_UNREDIR_TT; i++)
2094      if (unredir_tt[i].inUse)
2095         return False;
2096
2097   if (unredir_tc_used < 0) return False;
2098   if (unredir_tc_used > N_UNREDIR_TCQ) return False;
2099
2100   return True;
2101}
2102
2103
2104/* Add an UNREDIRECTED translation of vge to TT/TC.  The translation
2105   is temporarily in code[0 .. code_len-1].
2106*/
2107void VG_(add_to_unredir_transtab)( VexGuestExtents* vge,
2108                                   Addr64           entry,
2109                                   AddrH            code,
2110                                   UInt             code_len )
2111{
2112   Int   i, j, code_szQ;
2113   HChar *srcP, *dstP;
2114
2115   vg_assert(sanity_check_redir_tt_tc());
2116
2117   /* This is the whole point: it's not redirected! */
2118   vg_assert(entry == vge->base[0]);
2119
2120   /* How many unredir_tt slots are needed */
2121   code_szQ = (code_len + 7) / 8;
2122
2123   /* Look for an empty unredir_tc slot */
2124   for (i = 0; i < N_UNREDIR_TT; i++)
2125      if (!unredir_tt[i].inUse)
2126         break;
2127
2128   if (i >= N_UNREDIR_TT || code_szQ > (N_UNREDIR_TCQ - unredir_tc_used)) {
2129      /* It's full; dump everything we currently have */
2130      init_unredir_tt_tc();
2131      i = 0;
2132   }
2133
2134   vg_assert(unredir_tc_used >= 0);
2135   vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
2136   vg_assert(code_szQ > 0);
2137   vg_assert(code_szQ + unredir_tc_used <= N_UNREDIR_TCQ);
2138   vg_assert(i >= 0 && i < N_UNREDIR_TT);
2139   vg_assert(unredir_tt[i].inUse == False);
2140
2141   if (i > unredir_tt_highwater)
2142      unredir_tt_highwater = i;
2143
2144   dstP = (HChar*)&unredir_tc[unredir_tc_used];
2145   srcP = (HChar*)code;
2146   for (j = 0; j < code_len; j++)
2147      dstP[j] = srcP[j];
2148
2149   VG_(invalidate_icache)( dstP, code_len );
2150
2151   unredir_tt[i].inUse = True;
2152   unredir_tt[i].vge   = *vge;
2153   unredir_tt[i].hcode = (Addr)dstP;
2154
2155   unredir_tc_used += code_szQ;
2156   vg_assert(unredir_tc_used >= 0);
2157   vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
2158
2159   vg_assert(&dstP[code_len] <= (HChar*)&unredir_tc[unredir_tc_used]);
2160}
2161
2162Bool VG_(search_unredir_transtab) ( /*OUT*/AddrH* result,
2163                                    Addr64        guest_addr )
2164{
2165   Int i;
2166   for (i = 0; i < N_UNREDIR_TT; i++) {
2167      if (!unredir_tt[i].inUse)
2168         continue;
2169      if (unredir_tt[i].vge.base[0] == guest_addr) {
2170         *result = (AddrH)unredir_tt[i].hcode;
2171         return True;
2172      }
2173   }
2174   return False;
2175}
2176
2177static void unredir_discard_translations( Addr64 guest_start, ULong range )
2178{
2179   Int i;
2180
2181   vg_assert(sanity_check_redir_tt_tc());
2182
2183   for (i = 0; i <= unredir_tt_highwater; i++) {
2184      if (unredir_tt[i].inUse
2185          && overlaps( guest_start, range, &unredir_tt[i].vge))
2186         unredir_tt[i].inUse = False;
2187   }
2188}
2189
2190
2191/*------------------------------------------------------------*/
2192/*--- Initialisation.                                      ---*/
2193/*------------------------------------------------------------*/
2194
2195void VG_(init_tt_tc) ( void )
2196{
2197   Int i, avg_codeszQ;
2198
2199   vg_assert(!init_done);
2200   init_done = True;
2201
2202   /* Otherwise lots of things go wrong... */
2203   vg_assert(sizeof(ULong) == 8);
2204   vg_assert(sizeof(Addr64) == 8);
2205   /* check fast cache entries really are 2 words long */
2206   vg_assert(sizeof(Addr) == sizeof(void*));
2207   vg_assert(sizeof(FastCacheEntry) == 2 * sizeof(Addr));
2208   /* check fast cache entries are packed back-to-back with no spaces */
2209   vg_assert(sizeof( VG_(tt_fast) ) == VG_TT_FAST_SIZE * sizeof(FastCacheEntry));
2210   /* check fast cache is aligned as we requested.  Not fatal if it
2211      isn't, but we might as well make sure. */
2212   vg_assert(VG_IS_16_ALIGNED( ((Addr) & VG_(tt_fast)[0]) ));
2213
2214   if (VG_(clo_verbosity) > 2)
2215      VG_(message)(Vg_DebugMsg,
2216                   "TT/TC: VG_(init_tt_tc) "
2217                   "(startup of code management)\n");
2218
2219   /* Figure out how big each tc area should be.  */
2220   avg_codeszQ   = (VG_(details).avg_translation_sizeB + 7) / 8;
2221   tc_sector_szQ = N_TTES_PER_SECTOR_USABLE * (1 + avg_codeszQ);
2222
2223   /* Ensure the calculated value is not way crazy. */
2224   vg_assert(tc_sector_szQ >= 2 * N_TTES_PER_SECTOR_USABLE);
2225   vg_assert(tc_sector_szQ <= 100 * N_TTES_PER_SECTOR_USABLE);
2226
2227   n_sectors = VG_(clo_num_transtab_sectors);
2228   vg_assert(n_sectors >= MIN_N_SECTORS);
2229   vg_assert(n_sectors <= MAX_N_SECTORS);
2230
2231   /* Initialise the sectors, even the ones we aren't going to use.
2232      Set all fields to zero. */
2233   youngest_sector = 0;
2234   for (i = 0; i < MAX_N_SECTORS; i++)
2235      VG_(memset)(&sectors[i], 0, sizeof(sectors[i]));
2236
2237   /* Initialise the sector_search_order hint table, including the
2238      entries we aren't going to use. */
2239   for (i = 0; i < MAX_N_SECTORS; i++)
2240      sector_search_order[i] = -1;
2241
2242   /* Initialise the fast cache. */
2243   invalidateFastCache();
2244
2245   /* and the unredir tt/tc */
2246   init_unredir_tt_tc();
2247
2248   if (VG_(clo_verbosity) > 2 || VG_(clo_stats)
2249       || VG_(debugLog_getLevel) () >= 2) {
2250      VG_(message)(Vg_DebugMsg,
2251         "TT/TC: cache: %d sectors of %d bytes each = %d total\n",
2252          n_sectors, 8 * tc_sector_szQ,
2253          n_sectors * 8 * tc_sector_szQ );
2254      VG_(message)(Vg_DebugMsg,
2255         "TT/TC: table: %d tables  of %d bytes each = %d total\n",
2256          n_sectors, (int)(N_TTES_PER_SECTOR * sizeof(TTEntry)),
2257          (int)(n_sectors * N_TTES_PER_SECTOR * sizeof(TTEntry)));
2258      VG_(message)(Vg_DebugMsg,
2259         "TT/TC: table: %d entries each = %d total entries"
2260                       " max occupancy %d (%d%%)\n",
2261         N_TTES_PER_SECTOR,
2262         n_sectors * N_TTES_PER_SECTOR,
2263         n_sectors * N_TTES_PER_SECTOR_USABLE,
2264         SECTOR_TT_LIMIT_PERCENT );
2265   }
2266}
2267
2268
2269/*------------------------------------------------------------*/
2270/*--- Printing out statistics.                             ---*/
2271/*------------------------------------------------------------*/
2272
2273static ULong safe_idiv( ULong a, ULong b )
2274{
2275   return (b == 0 ? 0 : a / b);
2276}
2277
2278UInt VG_(get_bbs_translated) ( void )
2279{
2280   return n_in_count;
2281}
2282
2283void VG_(print_tt_tc_stats) ( void )
2284{
2285   VG_(message)(Vg_DebugMsg,
2286      "    tt/tc: %'llu tt lookups requiring %'llu probes\n",
2287      n_full_lookups, n_lookup_probes );
2288   VG_(message)(Vg_DebugMsg,
2289      "    tt/tc: %'llu fast-cache updates, %'llu flushes\n",
2290      n_fast_updates, n_fast_flushes );
2291
2292   VG_(message)(Vg_DebugMsg,
2293                " transtab: new        %'lld "
2294                "(%'llu -> %'llu; ratio %'llu:10) [%'llu scs]\n",
2295                n_in_count, n_in_osize, n_in_tsize,
2296                safe_idiv(10*n_in_tsize, n_in_osize),
2297                n_in_sc_count);
2298   VG_(message)(Vg_DebugMsg,
2299                " transtab: dumped     %'llu (%'llu -> ?" "?)\n",
2300                n_dump_count, n_dump_osize );
2301   VG_(message)(Vg_DebugMsg,
2302                " transtab: discarded  %'llu (%'llu -> ?" "?)\n",
2303                n_disc_count, n_disc_osize );
2304
2305   if (DEBUG_TRANSTAB) {
2306      Int i;
2307      VG_(printf)("\n");
2308      for (i = 0; i < ECLASS_N; i++) {
2309         VG_(printf)(" %4d", sectors[0].ec2tte_used[i]);
2310         if (i % 16 == 15)
2311            VG_(printf)("\n");
2312      }
2313      VG_(printf)("\n\n");
2314   }
2315}
2316
2317/*------------------------------------------------------------*/
2318/*--- Printing out of profiling results.                   ---*/
2319/*------------------------------------------------------------*/
2320
2321static ULong score ( TTEntry* tte )
2322{
2323   return ((ULong)tte->weight) * ((ULong)tte->count);
2324}
2325
2326ULong VG_(get_SB_profile) ( SBProfEntry tops[], UInt n_tops )
2327{
2328   Int   sno, i, r, s;
2329   ULong score_total;
2330
2331   /* First, compute the total weighted count, and find the top N
2332      ttes.  tops contains pointers to the most-used n_tops blocks, in
2333      descending order (viz, tops[0] is the highest scorer). */
2334   for (i = 0; i < n_tops; i++) {
2335      tops[i].addr  = 0;
2336      tops[i].score = 0;
2337   }
2338
2339   score_total = 0;
2340
2341   for (sno = 0; sno < n_sectors; sno++) {
2342      if (sectors[sno].tc == NULL)
2343         continue;
2344      for (i = 0; i < N_TTES_PER_SECTOR; i++) {
2345         if (sectors[sno].tt[i].status != InUse)
2346            continue;
2347         score_total += score(&sectors[sno].tt[i]);
2348         /* Find the rank for sectors[sno].tt[i]. */
2349         r = n_tops-1;
2350         while (True) {
2351            if (r == -1)
2352               break;
2353             if (tops[r].addr == 0) {
2354               r--;
2355               continue;
2356             }
2357             if ( score(&sectors[sno].tt[i]) > tops[r].score ) {
2358                r--;
2359                continue;
2360             }
2361             break;
2362         }
2363         r++;
2364         vg_assert(r >= 0 && r <= n_tops);
2365         /* This bb should be placed at r, and bbs above it shifted
2366            upwards one slot. */
2367         if (r < n_tops) {
2368            for (s = n_tops-1; s > r; s--)
2369               tops[s] = tops[s-1];
2370            tops[r].addr  = sectors[sno].tt[i].entry;
2371            tops[r].score = score( &sectors[sno].tt[i] );
2372         }
2373      }
2374   }
2375
2376   /* Now zero out all the counter fields, so that we can make
2377      multiple calls here and just get the values since the last call,
2378      each time, rather than values accumulated for the whole run. */
2379   for (sno = 0; sno < n_sectors; sno++) {
2380      if (sectors[sno].tc == NULL)
2381         continue;
2382      for (i = 0; i < N_TTES_PER_SECTOR; i++) {
2383         if (sectors[sno].tt[i].status != InUse)
2384            continue;
2385         sectors[sno].tt[i].count = 0;
2386      }
2387   }
2388
2389   return score_total;
2390}
2391
2392/*--------------------------------------------------------------------*/
2393/*--- end                                                          ---*/
2394/*--------------------------------------------------------------------*/
2395