1
2/*--------------------------------------------------------------------*/
3/*--- Management of the translation table and cache.               ---*/
4/*---                                                 m_transtab.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8   This file is part of Valgrind, a dynamic binary instrumentation
9   framework.
10
11   Copyright (C) 2000-2013 Julian Seward
12      jseward@acm.org
13
14   This program is free software; you can redistribute it and/or
15   modify it under the terms of the GNU General Public License as
16   published by the Free Software Foundation; either version 2 of the
17   License, or (at your option) any later version.
18
19   This program is distributed in the hope that it will be useful, but
20   WITHOUT ANY WARRANTY; without even the implied warranty of
21   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22   General Public License for more details.
23
24   You should have received a copy of the GNU General Public License
25   along with this program; if not, write to the Free Software
26   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27   02111-1307, USA.
28
29   The GNU General Public License is contained in the file COPYING.
30*/
31
32#include "pub_core_basics.h"
33#include "pub_core_debuglog.h"
34#include "pub_core_machine.h"    // For VG_(machine_get_VexArchInfo)
35#include "pub_core_libcbase.h"
36#include "pub_core_vki.h"        // to keep pub_core_libproc.h happy, sigh
37#include "pub_core_libcproc.h"   // VG_(invalidate_icache)
38#include "pub_core_libcassert.h"
39#include "pub_core_libcprint.h"
40#include "pub_core_options.h"
41#include "pub_core_tooliface.h"  // For VG_(details).avg_translation_sizeB
42#include "pub_core_transtab.h"
43#include "pub_core_aspacemgr.h"
44#include "pub_core_mallocfree.h" // VG_(out_of_memory_NORETURN)
45#include "pub_core_xarray.h"
46#include "pub_core_dispatch.h"   // For VG_(disp_cp*) addresses
47
48
49#define DEBUG_TRANSTAB 0
50
51
52/*-------------------------------------------------------------*/
53/*--- Management of the FIFO-based translation table+cache. ---*/
54/*-------------------------------------------------------------*/
55
56/* Nr of sectors provided via command line parameter. */
57UInt VG_(clo_num_transtab_sectors) = N_SECTORS_DEFAULT;
58/* Nr of sectors.
59   Will be set by VG_(init_tt_tc) to VG_(clo_num_transtab_sectors). */
60static UInt n_sectors = 0;
61
62/*------------------ CONSTANTS ------------------*/
63/* Number of TC entries in each sector.  This needs to be a prime
64   number to work properly, it must be <= 65535 (so that a TT index
65   fits in a UShort, leaving room for 0xFFFF(EC2TTE_DELETED) to denote
66   'deleted') and it is strongly recommended not to change this.
67   65521 is the largest prime <= 65535. */
68#define N_TTES_PER_SECTOR /*10007*/ /*30011*/ /*40009*/ 65521
69
70/* Because each sector contains a hash table of TTEntries, we need to
71   specify the maximum allowable loading, after which the sector is
72   deemed full. */
73#define SECTOR_TT_LIMIT_PERCENT 65
74
75/* The sector is deemed full when this many entries are in it. */
76#define N_TTES_PER_SECTOR_USABLE \
77           ((N_TTES_PER_SECTOR * SECTOR_TT_LIMIT_PERCENT) / 100)
78
79/* Equivalence classes for fast address range deletion.  There are 1 +
80   2^ECLASS_WIDTH bins.  The highest one, ECLASS_MISC, describes an
81   address range which does not fall cleanly within any specific bin.
82   Note that ECLASS_SHIFT + ECLASS_WIDTH must be < 32. */
83#define ECLASS_SHIFT 11
84#define ECLASS_WIDTH 8
85#define ECLASS_MISC  (1 << ECLASS_WIDTH)
86#define ECLASS_N     (1 + ECLASS_MISC)
87
88#define EC2TTE_DELETED  0xFFFF /* 16-bit special value */
89
90
91/*------------------ TYPES ------------------*/
92
93/* In edges ("to-me") in the graph created by chaining. */
94typedef
95   struct {
96      UInt from_sNo;   /* sector number */
97      UInt from_tteNo; /* TTE number in given sector */
98      UInt from_offs;  /* code offset from TCEntry::tcptr where the patch is */
99      Bool to_fastEP;  /* Is the patch to a fast or slow entry point? */
100   }
101   InEdge;
102
103
104/* Out edges ("from-me") in the graph created by chaining. */
105typedef
106   struct {
107      UInt to_sNo;    /* sector number */
108      UInt to_tteNo;  /* TTE number in given sector */
109      UInt from_offs; /* code offset in owning translation where patch is */
110   }
111   OutEdge;
112
113
114#define N_FIXED_IN_EDGE_ARR 3
115typedef
116   struct {
117      UInt     n_fixed; /* 0 .. N_FIXED_IN_EDGE_ARR */
118      InEdge   fixed[N_FIXED_IN_EDGE_ARR];
119      XArray*  var; /* XArray* of InEdgeArr */
120   }
121   InEdgeArr;
122
123#define N_FIXED_OUT_EDGE_ARR 2
124typedef
125   struct {
126      UInt    n_fixed; /* 0 .. N_FIXED_OUT_EDGE_ARR */
127      OutEdge fixed[N_FIXED_OUT_EDGE_ARR];
128      XArray* var; /* XArray* of OutEdgeArr */
129   }
130   OutEdgeArr;
131
132
133/* A translation-table entry.  This indicates precisely which areas of
134   guest code are included in the translation, and contains all other
135   auxiliary info too.  */
136typedef
137   struct {
138      /* Profiling only: the count and weight (arbitrary meaning) for
139         this translation.  Weight is a property of the translation
140         itself and computed once when the translation is created.
141         Count is an entry count for the translation and is
142         incremented by 1 every time the translation is used, if we
143         are profiling. */
144      ULong    count;
145      UShort   weight;
146
147      /* Status of the slot.  Note, we need to be able to do lazy
148         deletion, hence the Deleted state. */
149      enum { InUse, Deleted, Empty } status;
150
151      /* 64-bit aligned pointer to one or more 64-bit words containing
152         the corresponding host code (must be in the same sector!)
153         This is a pointer into the sector's tc (code) area. */
154      ULong* tcptr;
155
156      /* This is the original guest address that purportedly is the
157         entry point of the translation.  You might think that .entry
158         should be the same as .vge->base[0], and most of the time it
159         is.  However, when doing redirections, that is not the case.
160         .vge must always correctly describe the guest code sections
161         from which this translation was made.  However, .entry may or
162         may not be a lie, depending on whether or not we're doing
163         redirection. */
164      Addr64 entry;
165
166      /* This structure describes precisely what ranges of guest code
167         the translation covers, so we can decide whether or not to
168         delete it when translations of a given address range are
169         invalidated. */
170      VexGuestExtents vge;
171
172      /* Address range summary info: these are pointers back to
173         eclass[] entries in the containing Sector.  Those entries in
174         turn point back here -- the two structures are mutually
175         redundant but both necessary to make fast deletions work.
176         The eclass info is similar to, and derived from, this entry's
177         'vge' field, but it is not the same */
178      UShort n_tte2ec;      // # tte2ec pointers (1 to 3)
179      UShort tte2ec_ec[3];  // for each, the eclass #
180      UInt   tte2ec_ix[3];  // and the index within the eclass.
181      // for i in 0 .. n_tte2ec-1
182      //    sec->ec2tte[ tte2ec_ec[i] ][ tte2ec_ix[i] ]
183      // should be the index
184      // of this TTEntry in the containing Sector's tt array.
185
186      /* Admin information for chaining.  'in_edges' is a set of the
187         patch points which jump to this translation -- hence are
188         predecessors in the control flow graph.  'out_edges' points
189         to successors in the control flow graph -- translations to
190         which this one has a patched jump.  In short these are just
191         backwards and forwards edges in the graph of patched-together
192         blocks.  The 'in_edges' contain slightly more info, enough
193         that we can undo the chaining of each mentioned patch point.
194         The 'out_edges' list exists only so that we can visit the
195         'in_edges' entries of all blocks we're patched through to, in
196         order to remove ourselves from then when we're deleted. */
197
198      /* A translation can disappear for two reasons:
199          1. erased (as part of the oldest sector cleanup) when the
200             youngest sector is full.
201          2. discarded due to calls to VG_(discard_translations).
202             VG_(discard_translations) sets the status of the
203             translation to 'Deleted'.
204             A.o., the gdbserver discards one or more translations
205             when a breakpoint is inserted or removed at an Addr,
206             or when single stepping mode is enabled/disabled
207             or when a translation is instrumented for gdbserver
208             (all the target jumps of this translation are
209              invalidated).
210
211         So, it is possible that the translation A to be patched
212         (to obtain a patched jump from A to B) is invalidated
213         after B is translated and before A is patched.
214         In case a translation is erased or discarded, the patching
215         cannot be done.  VG_(tt_tc_do_chaining) and find_TTEntry_from_hcode
216         are checking the 'from' translation still exists before
217         doing the patching.
218
219         Is it safe to erase or discard the current translation E being
220         executed ? Amazing, but yes, it is safe.
221         Here is the explanation:
222
223         The translation E being executed can only be erased if a new
224         translation N is being done. A new translation is done only
225         if the host addr is a not yet patched jump to another
226         translation. In such a case, the guest address of N is
227         assigned to the PC in the VEX state. Control is returned
228         to the scheduler. N will be translated. This can erase the
229         translation E (in case of sector full). VG_(tt_tc_do_chaining)
230         will not do the chaining to a non found translation E.
231         The execution will continue at the current guest PC
232         (i.e. the translation N).
233         => it is safe to erase the current translation being executed.
234
235         The current translation E being executed can also be discarded
236         (e.g. by gdbserver). VG_(discard_translations) will mark
237         this translation E as Deleted, but the translation itself
238         is not erased. In particular, its host code can only
239         be overwritten or erased in case a new translation is done.
240         A new translation will only be done if a not yet translated
241         jump is to be executed. The execution of the Deleted translation
242         E will continue till a non patched jump is encountered.
243         This situation is then similar to the 'erasing' case above :
244         the current translation E can be erased or overwritten, as the
245         execution will continue at the new translation N.
246
247      */
248
249      /* It is possible, although very unlikely, that a block A has
250         more than one patched jump to block B.  This could happen if
251         (eg) A finishes "jcond B; jmp B".
252
253         This means in turn that B's in_edges set can list A more than
254         once (twice in this example).  However, each such entry must
255         have a different from_offs, since a patched jump can only
256         jump to one place at once (it's meaningless for it to have
257         multiple destinations.)  IOW, the successor and predecessor
258         edges in the graph are not uniquely determined by a
259         TTEntry --> TTEntry pair, but rather by a
260         (TTEntry,offset) --> TTEntry triple.
261
262         If A has multiple edges to B then B will mention A multiple
263         times in its in_edges.  To make things simpler, we then
264         require that A mentions B exactly the same number of times in
265         its out_edges.  Furthermore, a matching out-in pair must have
266         the same offset (from_offs).  This facilitates sanity
267         checking, and it facilitates establishing the invariant that
268         a out_edges set may not have duplicates when using the
269         equality defined by (TTEntry,offset).  Hence the out_edges
270         and in_edges sets really do have both have set semantics.
271
272         eg if  A has been patched to B at offsets 42 and 87 (in A)
273         then   A.out_edges = { (B,42), (B,87) }   (in any order)
274         and    B.in_edges  = { (A,42), (A,87) }   (in any order)
275
276         Hence for each node pair P->Q in the graph, there's a 1:1
277         mapping between P.out_edges and Q.in_edges.
278      */
279      InEdgeArr  in_edges;
280      OutEdgeArr out_edges;
281   }
282   TTEntry;
283
284
285/* A structure used for mapping host code addresses back to the
286   relevant TTEntry.  Used when doing chaining, for finding the
287   TTEntry to which some arbitrary patch address belongs. */
288typedef
289   struct {
290      UChar* start;
291      UInt   len;
292      UInt   tteNo;
293   }
294   HostExtent;
295
296/* Finally, a sector itself.  Each sector contains an array of
297   TCEntries, which hold code, and an array of TTEntries, containing
298   all required administrative info.  Profiling is supported using the
299   TTEntry .count and .weight fields, if required.
300
301   If the sector is not in use, all three pointers are NULL and
302   tt_n_inuse is zero.
303*/
304typedef
305   struct {
306      /* The TCEntry area.  Size of this depends on the average
307         translation size.  We try and size it so it becomes full
308         precisely when this sector's translation table (tt) reaches
309         its load limit (SECTOR_TT_LIMIT_PERCENT). */
310      ULong* tc;
311
312      /* The TTEntry array.  This is a fixed size, always containing
313         exactly N_TTES_PER_SECTOR entries. */
314      TTEntry* tt;
315
316      /* This points to the current allocation point in tc. */
317      ULong* tc_next;
318
319      /* The count of tt entries with state InUse. */
320      Int tt_n_inuse;
321
322      /* Expandable arrays of tt indices for each of the ECLASS_N
323         address range equivalence classes.  These hold indices into
324         the containing sector's tt array, which in turn should point
325         back here. */
326      Int     ec2tte_size[ECLASS_N];
327      Int     ec2tte_used[ECLASS_N];
328      UShort* ec2tte[ECLASS_N];
329
330      /* The host extents.  The [start, +len) ranges are constructed
331         in strictly non-overlapping order, so we can binary search
332         them at any time. */
333      XArray* host_extents; /* XArray* of HostExtent */
334   }
335   Sector;
336
337
338/*------------------ DECLS ------------------*/
339
340/* The root data structure is an array of sectors.  The index of the
341   youngest sector is recorded, and new translations are put into that
342   sector.  When it fills up, we move along to the next sector and
343   start to fill that up, wrapping around at the end of the array.
344   That way, once all N_TC_SECTORS have been bought into use for the
345   first time, and are full, we then re-use the oldest sector,
346   endlessly.
347
348   When running, youngest sector should be between >= 0 and <
349   N_TC_SECTORS.  The initial -1 value indicates the TT/TC system is
350   not yet initialised.
351*/
352static Sector sectors[MAX_N_SECTORS];
353static Int    youngest_sector = -1;
354
355/* The number of ULongs in each TCEntry area.  This is computed once
356   at startup and does not change. */
357static Int    tc_sector_szQ = 0;
358
359
360/* A list of sector numbers, in the order which they should be
361   searched to find translations.  This is an optimisation to be used
362   when searching for translations and should not affect
363   correctness.  -1 denotes "no entry". */
364static Int sector_search_order[MAX_N_SECTORS];
365
366
367/* Fast helper for the TC.  A direct-mapped cache which holds a set of
368   recently used (guest address, host address) pairs.  This array is
369   referred to directly from m_dispatch/dispatch-<platform>.S.
370
371   Entries in tt_fast may refer to any valid TC entry, regardless of
372   which sector it's in.  Consequently we must be very careful to
373   invalidate this cache when TC entries are changed or disappear.
374
375   A special .guest address - TRANSTAB_BOGUS_GUEST_ADDR -- must be
376   pointed at to cause that cache entry to miss.  This relies on the
377   assumption that no guest code actually has that address, hence a
378   value 0x1 seems good.  m_translate gives the client a synthetic
379   segfault if it tries to execute at this address.
380*/
381/*
382typedef
383   struct {
384      Addr guest;
385      Addr host;
386   }
387   FastCacheEntry;
388*/
389/*global*/ __attribute__((aligned(16)))
390           FastCacheEntry VG_(tt_fast)[VG_TT_FAST_SIZE];
391
392/* Make sure we're not used before initialisation. */
393static Bool init_done = False;
394
395
396/*------------------ STATS DECLS ------------------*/
397
398/* Number of fast-cache updates and flushes done. */
399static ULong n_fast_flushes = 0;
400static ULong n_fast_updates = 0;
401
402/* Number of full lookups done. */
403static ULong n_full_lookups = 0;
404static ULong n_lookup_probes = 0;
405
406/* Number/osize/tsize of translations entered; also the number of
407   those for which self-checking was requested. */
408static ULong n_in_count    = 0;
409static ULong n_in_osize    = 0;
410static ULong n_in_tsize    = 0;
411static ULong n_in_sc_count = 0;
412
413/* Number/osize of translations discarded due to lack of space. */
414static ULong n_dump_count = 0;
415static ULong n_dump_osize = 0;
416
417/* Number/osize of translations discarded due to requests to do so. */
418static ULong n_disc_count = 0;
419static ULong n_disc_osize = 0;
420
421
422/*-------------------------------------------------------------*/
423/*--- Misc                                                  ---*/
424/*-------------------------------------------------------------*/
425
426static void* ttaux_malloc ( const HChar* tag, SizeT n )
427{
428   return VG_(arena_malloc)(VG_AR_TTAUX, tag, n);
429}
430
431static void ttaux_free ( void* p )
432{
433   VG_(arena_free)(VG_AR_TTAUX, p);
434}
435
436
437/*-------------------------------------------------------------*/
438/*--- Chaining support                                      ---*/
439/*-------------------------------------------------------------*/
440
441static inline TTEntry* index_tte ( UInt sNo, UInt tteNo )
442{
443   vg_assert(sNo < n_sectors);
444   vg_assert(tteNo < N_TTES_PER_SECTOR);
445   Sector* s = &sectors[sNo];
446   vg_assert(s->tt);
447   TTEntry* tte = &s->tt[tteNo];
448   vg_assert(tte->status == InUse);
449   return tte;
450}
451
452static void InEdge__init ( InEdge* ie )
453{
454   ie->from_sNo   = -1; /* invalid */
455   ie->from_tteNo = 0;
456   ie->from_offs  = 0;
457   ie->to_fastEP  = False;
458}
459
460static void OutEdge__init ( OutEdge* oe )
461{
462   oe->to_sNo    = -1; /* invalid */
463   oe->to_tteNo  = 0;
464   oe->from_offs = 0;
465}
466
467static void TTEntry__init ( TTEntry* tte )
468{
469   VG_(memset)(tte, 0, sizeof(*tte));
470}
471
472static UWord InEdgeArr__size ( InEdgeArr* iea )
473{
474   if (iea->var) {
475      vg_assert(iea->n_fixed == 0);
476      return VG_(sizeXA)(iea->var);
477   } else {
478      vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
479      return iea->n_fixed;
480   }
481}
482
483static void InEdgeArr__makeEmpty ( InEdgeArr* iea )
484{
485   if (iea->var) {
486      vg_assert(iea->n_fixed == 0);
487      VG_(deleteXA)(iea->var);
488      iea->var = NULL;
489   } else {
490      vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
491      iea->n_fixed = 0;
492   }
493}
494
495static
496InEdge* InEdgeArr__index ( InEdgeArr* iea, UWord i )
497{
498   if (iea->var) {
499      vg_assert(iea->n_fixed == 0);
500      return (InEdge*)VG_(indexXA)(iea->var, i);
501   } else {
502      vg_assert(i < iea->n_fixed);
503      return &iea->fixed[i];
504   }
505}
506
507static
508void InEdgeArr__deleteIndex ( InEdgeArr* iea, UWord i )
509{
510   if (iea->var) {
511      vg_assert(iea->n_fixed == 0);
512      VG_(removeIndexXA)(iea->var, i);
513   } else {
514      vg_assert(i < iea->n_fixed);
515      for (; i+1 < iea->n_fixed; i++) {
516         iea->fixed[i] = iea->fixed[i+1];
517      }
518      iea->n_fixed--;
519   }
520}
521
522static
523void InEdgeArr__add ( InEdgeArr* iea, InEdge* ie )
524{
525   if (iea->var) {
526      vg_assert(iea->n_fixed == 0);
527      VG_(addToXA)(iea->var, ie);
528   } else {
529      vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
530      if (iea->n_fixed == N_FIXED_IN_EDGE_ARR) {
531         /* The fixed array is full, so we have to initialise an
532            XArray and copy the fixed array into it. */
533         iea->var = VG_(newXA)(ttaux_malloc, "transtab.IEA__add",
534                               ttaux_free,
535                               sizeof(InEdge));
536         UWord i;
537         for (i = 0; i < iea->n_fixed; i++) {
538            VG_(addToXA)(iea->var, &iea->fixed[i]);
539         }
540         VG_(addToXA)(iea->var, ie);
541         iea->n_fixed = 0;
542      } else {
543         /* Just add to the fixed array. */
544         iea->fixed[iea->n_fixed++] = *ie;
545      }
546   }
547}
548
549static UWord OutEdgeArr__size ( OutEdgeArr* oea )
550{
551   if (oea->var) {
552      vg_assert(oea->n_fixed == 0);
553      return VG_(sizeXA)(oea->var);
554   } else {
555      vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
556      return oea->n_fixed;
557   }
558}
559
560static void OutEdgeArr__makeEmpty ( OutEdgeArr* oea )
561{
562   if (oea->var) {
563      vg_assert(oea->n_fixed == 0);
564      VG_(deleteXA)(oea->var);
565      oea->var = NULL;
566   } else {
567      vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
568      oea->n_fixed = 0;
569   }
570}
571
572static
573OutEdge* OutEdgeArr__index ( OutEdgeArr* oea, UWord i )
574{
575   if (oea->var) {
576      vg_assert(oea->n_fixed == 0);
577      return (OutEdge*)VG_(indexXA)(oea->var, i);
578   } else {
579      vg_assert(i < oea->n_fixed);
580      return &oea->fixed[i];
581   }
582}
583
584static
585void OutEdgeArr__deleteIndex ( OutEdgeArr* oea, UWord i )
586{
587   if (oea->var) {
588      vg_assert(oea->n_fixed == 0);
589      VG_(removeIndexXA)(oea->var, i);
590   } else {
591      vg_assert(i < oea->n_fixed);
592      for (; i+1 < oea->n_fixed; i++) {
593         oea->fixed[i] = oea->fixed[i+1];
594      }
595      oea->n_fixed--;
596   }
597}
598
599static
600void OutEdgeArr__add ( OutEdgeArr* oea, OutEdge* oe )
601{
602   if (oea->var) {
603      vg_assert(oea->n_fixed == 0);
604      VG_(addToXA)(oea->var, oe);
605   } else {
606      vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
607      if (oea->n_fixed == N_FIXED_OUT_EDGE_ARR) {
608         /* The fixed array is full, so we have to initialise an
609            XArray and copy the fixed array into it. */
610         oea->var = VG_(newXA)(ttaux_malloc, "transtab.OEA__add",
611                               ttaux_free,
612                               sizeof(OutEdge));
613         UWord i;
614         for (i = 0; i < oea->n_fixed; i++) {
615            VG_(addToXA)(oea->var, &oea->fixed[i]);
616         }
617         VG_(addToXA)(oea->var, oe);
618         oea->n_fixed = 0;
619      } else {
620         /* Just add to the fixed array. */
621         oea->fixed[oea->n_fixed++] = *oe;
622      }
623   }
624}
625
626static
627Int HostExtent__cmpOrd ( const void* v1, const void* v2 )
628{
629   const HostExtent* hx1 = v1;
630   const HostExtent* hx2 = v2;
631   if (hx1->start + hx1->len <= hx2->start) return -1;
632   if (hx2->start + hx2->len <= hx1->start) return 1;
633   return 0; /* partial overlap */
634}
635
636/* True if hx is a dead host extent, i.e. corresponds to host code
637   of an entry that was invalidated. */
638static
639Bool HostExtent__is_dead (const HostExtent* hx, const Sector* sec)
640{
641   const UInt tteNo = hx->tteNo;
642#define LDEBUG(m) if (DEBUG_TRANSTAB)                           \
643      VG_(printf) (m                                            \
644                   " start 0x%p len %u sector %d ttslot %u"     \
645                   " tt.entry 0x%llu tt.tcptr 0x%p\n",          \
646                   hx->start, hx->len, (int)(sec - sectors),    \
647                   hx->tteNo,                                   \
648                   sec->tt[tteNo].entry, sec->tt[tteNo].tcptr)
649
650   /* Entry might have been invalidated and not re-used yet.*/
651   if (sec->tt[tteNo].status == Deleted) {
652      LDEBUG("found deleted entry");
653      return True;
654   }
655   /* Maybe we found this entry via a host_extents which was
656      inserted for an entry which was changed to Deleted then
657      re-used after. If this entry was re-used, then its tcptr
658      is >= to host_extents start (i.e. the previous tcptr) + len.
659      This is the case as there is no re-use of host code: a new
660      entry or re-used entry always gets "higher value" host code. */
661   if ((UChar*) sec->tt[tteNo].tcptr >= hx->start + hx->len) {
662      LDEBUG("found re-used entry");
663      return True;
664   }
665
666   return False;
667#undef LDEBUG
668}
669
670static __attribute__((noinline))
671Bool find_TTEntry_from_hcode( /*OUT*/UInt* from_sNo,
672                              /*OUT*/UInt* from_tteNo,
673                              void* hcode )
674{
675   Int i;
676
677   /* Search order logic copied from VG_(search_transtab). */
678   for (i = 0; i < n_sectors; i++) {
679      Int sno = sector_search_order[i];
680      if (UNLIKELY(sno == -1))
681         return False; /* run out of sectors to search */
682
683      Sector* sec = &sectors[sno];
684      XArray* /* of HostExtent */ host_extents = sec->host_extents;
685      vg_assert(host_extents);
686
687      HostExtent key;
688      VG_(memset)(&key, 0, sizeof(key));
689      key.start = hcode;
690      key.len = 1;
691      Word firstW = -1, lastW = -1;
692      Bool found  = VG_(lookupXA_UNSAFE)(
693                       host_extents, &key, &firstW, &lastW,
694                       HostExtent__cmpOrd );
695      vg_assert(firstW == lastW); // always true, even if not found
696      if (found) {
697         HostExtent* hx = VG_(indexXA)(host_extents, firstW);
698         UInt tteNo = hx->tteNo;
699         /* Do some additional sanity checks. */
700         vg_assert(tteNo <= N_TTES_PER_SECTOR);
701
702         /* if this hx entry corresponds to dead host code, we must
703            tell this code has not been found, as it cannot be patched. */
704         if (HostExtent__is_dead (hx, sec))
705            return False;
706
707         vg_assert(sec->tt[tteNo].status == InUse);
708         /* Can only half check that the found TTEntry contains hcode,
709            due to not having a length value for the hcode in the
710            TTEntry. */
711         vg_assert((UChar*)sec->tt[tteNo].tcptr <= (UChar*)hcode);
712         /* Looks plausible */
713         *from_sNo   = sno;
714         *from_tteNo = (UInt)tteNo;
715         return True;
716      }
717   }
718   return False;
719}
720
721
722/* Figure out whether or not hcode is jitted code present in the main
723   code cache (but not in the no-redir cache).  Used for sanity
724   checking. */
725static Bool is_in_the_main_TC ( void* hcode )
726{
727   Int i, sno;
728   for (i = 0; i < n_sectors; i++) {
729      sno = sector_search_order[i];
730      if (sno == -1)
731         break; /* run out of sectors to search */
732      if ((UChar*)hcode >= (UChar*)sectors[sno].tc
733          && (UChar*)hcode <= (UChar*)sectors[sno].tc_next
734                              + sizeof(ULong) - 1)
735         return True;
736   }
737   return False;
738}
739
740
741/* Fulfill a chaining request, and record admin info so we
742   can undo it later, if required.
743*/
744void VG_(tt_tc_do_chaining) ( void* from__patch_addr,
745                              UInt  to_sNo,
746                              UInt  to_tteNo,
747                              Bool  to_fastEP )
748{
749   /* Get the CPU info established at startup. */
750   VexArch vex_arch = VexArch_INVALID;
751   VG_(machine_get_VexArchInfo)( &vex_arch, NULL );
752
753   // host_code is where we're patching to.  So it needs to
754   // take into account, whether we're jumping to the slow
755   // or fast entry point.  By definition, the fast entry point
756   // is exactly one event check's worth of code along from
757   // the slow (tcptr) entry point.
758   TTEntry* to_tte    = index_tte(to_sNo, to_tteNo);
759   void*    host_code = ((UChar*)to_tte->tcptr)
760                        + (to_fastEP ? LibVEX_evCheckSzB(vex_arch) : 0);
761
762   // stay sane -- the patch point (dst) is in this sector's code cache
763   vg_assert( (UChar*)host_code >= (UChar*)sectors[to_sNo].tc );
764   vg_assert( (UChar*)host_code <= (UChar*)sectors[to_sNo].tc_next
765                                   + sizeof(ULong) - 1 );
766
767   /* Find the TTEntry for the from__ code.  This isn't simple since
768      we only know the patch address, which is going to be somewhere
769      inside the from_ block. */
770   UInt from_sNo   = (UInt)-1;
771   UInt from_tteNo = (UInt)-1;
772   Bool from_found
773      = find_TTEntry_from_hcode( &from_sNo, &from_tteNo,
774                                 from__patch_addr );
775   if (!from_found) {
776      // The from code might have been discarded due to sector re-use
777      // or marked Deleted due to translation invalidation.
778      // In such a case, don't do the chaining.
779      VG_(debugLog)(1,"transtab",
780                    "host code %p not found (discarded? sector recycled?)"
781                    " => no chaining done\n",
782                    from__patch_addr);
783      return;
784   }
785
786   TTEntry* from_tte = index_tte(from_sNo, from_tteNo);
787
788   /* Get VEX to do the patching itself.  We have to hand it off
789      since it is host-dependent. */
790   VexInvalRange vir
791      = LibVEX_Chain(
792           vex_arch,
793           from__patch_addr,
794           VG_(fnptr_to_fnentry)(
795              to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP)
796                        : &VG_(disp_cp_chain_me_to_slowEP)),
797           (void*)host_code
798        );
799   VG_(invalidate_icache)( (void*)vir.start, vir.len );
800
801   /* Now do the tricky bit -- update the ch_succs and ch_preds info
802      for the two translations involved, so we can undo the chaining
803      later, which we will have to do if the to_ block gets removed
804      for whatever reason. */
805
806   /* This is the new from_ -> to_ link to add. */
807   InEdge ie;
808   InEdge__init(&ie);
809   ie.from_sNo   = from_sNo;
810   ie.from_tteNo = from_tteNo;
811   ie.to_fastEP  = to_fastEP;
812   HWord from_offs = (HWord)( (UChar*)from__patch_addr
813                              - (UChar*)from_tte->tcptr );
814   vg_assert(from_offs < 100000/* let's say */);
815   ie.from_offs  = (UInt)from_offs;
816
817   /* This is the new to_ -> from_ backlink to add. */
818   OutEdge oe;
819   OutEdge__init(&oe);
820   oe.to_sNo    = to_sNo;
821   oe.to_tteNo  = to_tteNo;
822   oe.from_offs = (UInt)from_offs;
823
824   /* Add .. */
825   InEdgeArr__add(&to_tte->in_edges, &ie);
826   OutEdgeArr__add(&from_tte->out_edges, &oe);
827}
828
829
830/* Unchain one patch, as described by the specified InEdge.  For
831   sanity check purposes only (to check that the patched location is
832   as expected) it also requires the fast and slow entry point
833   addresses of the destination block (that is, the block that owns
834   this InEdge). */
835__attribute__((noinline))
836static void unchain_one ( VexArch vex_arch,
837                          InEdge* ie,
838                          void* to_fastEPaddr, void* to_slowEPaddr )
839{
840   vg_assert(ie);
841   TTEntry* tte
842      = index_tte(ie->from_sNo, ie->from_tteNo);
843   UChar* place_to_patch
844      = ((UChar*)tte->tcptr) + ie->from_offs;
845   UChar* disp_cp_chain_me
846      = VG_(fnptr_to_fnentry)(
847           ie->to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP)
848                         : &VG_(disp_cp_chain_me_to_slowEP)
849        );
850   UChar* place_to_jump_to_EXPECTED
851      = ie->to_fastEP ? to_fastEPaddr : to_slowEPaddr;
852
853   // stay sane: both src and dst for this unchaining are
854   // in the main code cache
855   vg_assert( is_in_the_main_TC(place_to_patch) ); // src
856   vg_assert( is_in_the_main_TC(place_to_jump_to_EXPECTED) ); // dst
857   // dst check is ok because LibVEX_UnChain checks that
858   // place_to_jump_to_EXPECTED really is the current dst, and
859   // asserts if it isn't.
860   VexInvalRange vir
861       = LibVEX_UnChain( vex_arch, place_to_patch,
862                         place_to_jump_to_EXPECTED, disp_cp_chain_me );
863   VG_(invalidate_icache)( (void*)vir.start, vir.len );
864}
865
866
867/* The specified block is about to be deleted.  Update the preds and
868   succs of its associated blocks accordingly.  This includes undoing
869   any chained jumps to this block. */
870static
871void unchain_in_preparation_for_deletion ( VexArch vex_arch,
872                                           UInt here_sNo, UInt here_tteNo )
873{
874   if (DEBUG_TRANSTAB)
875      VG_(printf)("QQQ unchain_in_prep %u.%u...\n", here_sNo, here_tteNo);
876   UWord    i, j, n, m;
877   Int      evCheckSzB = LibVEX_evCheckSzB(vex_arch);
878   TTEntry* here_tte   = index_tte(here_sNo, here_tteNo);
879   if (DEBUG_TRANSTAB)
880      VG_(printf)("... QQQ tt.entry 0x%llu tt.tcptr 0x%p\n",
881                  here_tte->entry, here_tte->tcptr);
882   vg_assert(here_tte->status == InUse);
883
884   /* Visit all InEdges owned by here_tte. */
885   n = InEdgeArr__size(&here_tte->in_edges);
886   for (i = 0; i < n; i++) {
887      InEdge* ie = InEdgeArr__index(&here_tte->in_edges, i);
888      // Undo the chaining.
889      UChar* here_slow_EP = (UChar*)here_tte->tcptr;
890      UChar* here_fast_EP = here_slow_EP + evCheckSzB;
891      unchain_one(vex_arch, ie, here_fast_EP, here_slow_EP);
892      // Find the corresponding entry in the "from" node's out_edges,
893      // and remove it.
894      TTEntry* from_tte = index_tte(ie->from_sNo, ie->from_tteNo);
895      m = OutEdgeArr__size(&from_tte->out_edges);
896      vg_assert(m > 0); // it must have at least one entry
897      for (j = 0; j < m; j++) {
898         OutEdge* oe = OutEdgeArr__index(&from_tte->out_edges, j);
899         if (oe->to_sNo == here_sNo && oe->to_tteNo == here_tteNo
900             && oe->from_offs == ie->from_offs)
901           break;
902      }
903      vg_assert(j < m); // "oe must be findable"
904      OutEdgeArr__deleteIndex(&from_tte->out_edges, j);
905   }
906
907   /* Visit all OutEdges owned by here_tte. */
908   n = OutEdgeArr__size(&here_tte->out_edges);
909   for (i = 0; i < n; i++) {
910      OutEdge* oe = OutEdgeArr__index(&here_tte->out_edges, i);
911      // Find the corresponding entry in the "to" node's in_edges,
912      // and remove it.
913      TTEntry* to_tte = index_tte(oe->to_sNo, oe->to_tteNo);
914      m = InEdgeArr__size(&to_tte->in_edges);
915      vg_assert(m > 0); // it must have at least one entry
916      for (j = 0; j < m; j++) {
917         InEdge* ie = InEdgeArr__index(&to_tte->in_edges, j);
918         if (ie->from_sNo == here_sNo && ie->from_tteNo == here_tteNo
919             && ie->from_offs == oe->from_offs)
920           break;
921      }
922      vg_assert(j < m); // "ie must be findable"
923      InEdgeArr__deleteIndex(&to_tte->in_edges, j);
924   }
925
926   InEdgeArr__makeEmpty(&here_tte->in_edges);
927   OutEdgeArr__makeEmpty(&here_tte->out_edges);
928}
929
930
931/*-------------------------------------------------------------*/
932/*--- Address-range equivalence class stuff                 ---*/
933/*-------------------------------------------------------------*/
934
935/* Return equivalence class number for a range. */
936
937static Int range_to_eclass ( Addr64 start, UInt len )
938{
939   UInt mask   = (1 << ECLASS_WIDTH) - 1;
940   UInt lo     = (UInt)start;
941   UInt hi     = lo + len - 1;
942   UInt loBits = (lo >> ECLASS_SHIFT) & mask;
943   UInt hiBits = (hi >> ECLASS_SHIFT) & mask;
944   if (loBits == hiBits) {
945      vg_assert(loBits < ECLASS_N-1);
946      return loBits;
947   } else {
948      return ECLASS_MISC;
949   }
950}
951
952
953/* Calculates the equivalence class numbers for any VexGuestExtent.
954   These are written in *eclasses, which must be big enough to hold 3
955   Ints.  The number written, between 1 and 3, is returned.  The
956   eclasses are presented in order, and any duplicates are removed.
957*/
958
959static
960Int vexGuestExtents_to_eclasses ( /*OUT*/Int* eclasses,
961                                  VexGuestExtents* vge )
962{
963#  define SWAP(_lv1,_lv2) \
964      do { Int t = _lv1; _lv1 = _lv2; _lv2 = t; } while (0)
965
966   Int i, j, n_ec, r;
967
968   vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
969
970   n_ec = 0;
971   for (i = 0; i < vge->n_used; i++) {
972      r = range_to_eclass( vge->base[i], (UInt)vge->len[i] );
973      if (r == ECLASS_MISC)
974         goto bad;
975      /* only add if we haven't already seen it */
976      for (j = 0; j < n_ec; j++)
977         if (eclasses[j] == r)
978            break;
979      if (j == n_ec)
980         eclasses[n_ec++] = r;
981   }
982
983   if (n_ec == 1)
984      return 1;
985
986   if (n_ec == 2) {
987      /* sort */
988      if (eclasses[0] > eclasses[1])
989         SWAP(eclasses[0], eclasses[1]);
990      return 2;
991   }
992
993   if (n_ec == 3) {
994      /* sort */
995      if (eclasses[0] > eclasses[2])
996         SWAP(eclasses[0], eclasses[2]);
997      if (eclasses[0] > eclasses[1])
998         SWAP(eclasses[0], eclasses[1]);
999      if (eclasses[1] > eclasses[2])
1000         SWAP(eclasses[1], eclasses[2]);
1001      return 3;
1002   }
1003
1004   /* NOTREACHED */
1005   vg_assert(0);
1006
1007  bad:
1008   eclasses[0] = ECLASS_MISC;
1009   return 1;
1010
1011#  undef SWAP
1012}
1013
1014
1015/* Add tteno to the set of entries listed for equivalence class ec in
1016   this sector.  Returns used location in eclass array. */
1017
1018static
1019UInt addEClassNo ( /*MOD*/Sector* sec, Int ec, UShort tteno )
1020{
1021   Int    old_sz, new_sz, i, r;
1022   UShort *old_ar, *new_ar;
1023
1024   vg_assert(ec >= 0 && ec < ECLASS_N);
1025   vg_assert(tteno < N_TTES_PER_SECTOR);
1026
1027   if (DEBUG_TRANSTAB) VG_(printf)("ec %d  gets %d\n", ec, (Int)tteno);
1028
1029   if (sec->ec2tte_used[ec] >= sec->ec2tte_size[ec]) {
1030
1031      vg_assert(sec->ec2tte_used[ec] == sec->ec2tte_size[ec]);
1032
1033      old_sz = sec->ec2tte_size[ec];
1034      old_ar = sec->ec2tte[ec];
1035      new_sz = old_sz==0 ? 8 : old_sz<64 ? 2*old_sz : (3*old_sz)/2;
1036      new_ar = ttaux_malloc("transtab.aECN.1",
1037                            new_sz * sizeof(UShort));
1038      for (i = 0; i < old_sz; i++)
1039         new_ar[i] = old_ar[i];
1040      if (old_ar)
1041         ttaux_free(old_ar);
1042      sec->ec2tte_size[ec] = new_sz;
1043      sec->ec2tte[ec] = new_ar;
1044
1045      if (DEBUG_TRANSTAB) VG_(printf)("expand ec %d to %d\n", ec, new_sz);
1046   }
1047
1048   /* Common case */
1049   r = sec->ec2tte_used[ec]++;
1050   vg_assert(r >= 0 && r < sec->ec2tte_size[ec]);
1051   sec->ec2tte[ec][r] = tteno;
1052   return (UInt)r;
1053}
1054
1055
1056/* 'vge' is being added to 'sec' at TT entry 'tteno'.  Add appropriate
1057   eclass entries to 'sec'. */
1058
1059static
1060void upd_eclasses_after_add ( /*MOD*/Sector* sec, Int tteno )
1061{
1062   Int i, r, eclasses[3];
1063   TTEntry* tte;
1064   vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
1065
1066   tte = &sec->tt[tteno];
1067   r = vexGuestExtents_to_eclasses( eclasses, &tte->vge );
1068
1069   vg_assert(r >= 1 && r <= 3);
1070   tte->n_tte2ec = r;
1071
1072   for (i = 0; i < r; i++) {
1073      tte->tte2ec_ec[i] = eclasses[i];
1074      tte->tte2ec_ix[i] = addEClassNo( sec, eclasses[i], (UShort)tteno );
1075   }
1076}
1077
1078
1079/* Check the eclass info in 'sec' to ensure it is consistent.  Returns
1080   True if OK, False if something's not right.  Expensive. */
1081
1082static Bool sanity_check_eclasses_in_sector ( Sector* sec )
1083{
1084#  define BAD(_str) do { whassup = (_str); goto bad; } while (0)
1085
1086   const HChar* whassup = NULL;
1087   Int      i, j, k, n, ec_num, ec_idx;
1088   TTEntry* tte;
1089   UShort   tteno;
1090   ULong*   tce;
1091
1092   /* Basic checks on this sector */
1093   if (sec->tt_n_inuse < 0 || sec->tt_n_inuse > N_TTES_PER_SECTOR_USABLE)
1094      BAD("invalid sec->tt_n_inuse");
1095   tce = sec->tc_next;
1096   if (tce < &sec->tc[0] || tce > &sec->tc[tc_sector_szQ])
1097      BAD("sec->tc_next points outside tc");
1098
1099   /* For each eclass ... */
1100   for (i = 0; i < ECLASS_N; i++) {
1101      if (sec->ec2tte_size[i] == 0 && sec->ec2tte[i] != NULL)
1102         BAD("ec2tte_size/ec2tte mismatch(1)");
1103      if (sec->ec2tte_size[i] != 0 && sec->ec2tte[i] == NULL)
1104         BAD("ec2tte_size/ec2tte mismatch(2)");
1105      if (sec->ec2tte_used[i] < 0
1106          || sec->ec2tte_used[i] > sec->ec2tte_size[i])
1107         BAD("implausible ec2tte_used");
1108      if (sec->ec2tte_used[i] == 0)
1109         continue;
1110
1111      /* For each tt reference in each eclass .. ensure the reference
1112         is to a valid tt entry, and that the entry's address ranges
1113         really include this eclass. */
1114
1115      for (j = 0; j < sec->ec2tte_used[i]; j++) {
1116         tteno = sec->ec2tte[i][j];
1117         if (tteno == EC2TTE_DELETED)
1118            continue;
1119         if (tteno >= N_TTES_PER_SECTOR)
1120            BAD("implausible tteno");
1121         tte = &sec->tt[tteno];
1122         if (tte->status != InUse)
1123            BAD("tteno points to non-inuse tte");
1124         if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3)
1125            BAD("tte->n_tte2ec out of range");
1126         /* Exactly least one of tte->eclasses[0 .. tte->n_eclasses-1]
1127            must equal i.  Inspect tte's eclass info. */
1128         n = 0;
1129         for (k = 0; k < tte->n_tte2ec; k++) {
1130            if (k < tte->n_tte2ec-1
1131                && tte->tte2ec_ec[k] >= tte->tte2ec_ec[k+1])
1132               BAD("tte->tte2ec_ec[..] out of order");
1133            ec_num = tte->tte2ec_ec[k];
1134            if (ec_num < 0 || ec_num >= ECLASS_N)
1135               BAD("tte->tte2ec_ec[..] out of range");
1136            if (ec_num != i)
1137               continue;
1138            ec_idx = tte->tte2ec_ix[k];
1139            if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[i])
1140               BAD("tte->tte2ec_ix[..] out of range");
1141            if (ec_idx == j)
1142               n++;
1143         }
1144         if (n != 1)
1145            BAD("tteno does not point back at eclass");
1146      }
1147   }
1148
1149   /* That establishes that for each forward pointer from TTEntrys
1150      there is a corresponding backward pointer from the eclass[]
1151      arrays.  However, it doesn't rule out the possibility of other,
1152      bogus pointers in the eclass[] arrays.  So do those similarly:
1153      scan through them and check the TTEntryies they point at point
1154      back. */
1155
1156   for (i = 0; i < N_TTES_PER_SECTOR_USABLE; i++) {
1157
1158      tte = &sec->tt[i];
1159      if (tte->status == Empty || tte->status == Deleted) {
1160         if (tte->n_tte2ec != 0)
1161            BAD("tte->n_eclasses nonzero for unused tte");
1162         continue;
1163      }
1164
1165      vg_assert(tte->status == InUse);
1166
1167      if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3)
1168         BAD("tte->n_eclasses out of range(2)");
1169
1170      for (j = 0; j < tte->n_tte2ec; j++) {
1171         ec_num = tte->tte2ec_ec[j];
1172         if (ec_num < 0 || ec_num >= ECLASS_N)
1173            BAD("tte->eclass[..] out of range");
1174         ec_idx = tte->tte2ec_ix[j];
1175         if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[ec_num])
1176            BAD("tte->ec_idx[..] out of range(2)");
1177         if (sec->ec2tte[ec_num][ec_idx] != i)
1178            BAD("ec2tte does not point back to tte");
1179      }
1180   }
1181
1182   return True;
1183
1184  bad:
1185   if (whassup)
1186      VG_(debugLog)(0, "transtab", "eclass sanity fail: %s\n", whassup);
1187
1188#  if 0
1189   VG_(printf)("eclass = %d\n", i);
1190   VG_(printf)("tteno = %d\n", (Int)tteno);
1191   switch (tte->status) {
1192      case InUse:   VG_(printf)("InUse\n"); break;
1193      case Deleted: VG_(printf)("Deleted\n"); break;
1194      case Empty:   VG_(printf)("Empty\n"); break;
1195   }
1196   if (tte->status != Empty) {
1197      for (k = 0; k < tte->vge.n_used; k++)
1198         VG_(printf)("0x%llx %d\n", tte->vge.base[k],
1199                                    (Int)tte->vge.len[k]);
1200   }
1201#  endif
1202
1203   return False;
1204
1205#  undef BAD
1206}
1207
1208
1209/* Sanity check absolutely everything.  True == check passed. */
1210
1211/* forwards */
1212static Bool sanity_check_redir_tt_tc ( void );
1213
1214static Bool sanity_check_sector_search_order ( void )
1215{
1216   Int i, j, nListed;
1217   /* assert the array is the right size */
1218   vg_assert(MAX_N_SECTORS == (sizeof(sector_search_order)
1219                               / sizeof(sector_search_order[0])));
1220   /* Check it's of the form  valid_sector_numbers ++ [-1, -1, ..] */
1221   for (i = 0; i < n_sectors; i++) {
1222      if (sector_search_order[i] < 0 || sector_search_order[i] >= n_sectors)
1223         break;
1224   }
1225   nListed = i;
1226   for (/* */; i < n_sectors; i++) {
1227      if (sector_search_order[i] != -1)
1228         break;
1229   }
1230   if (i != n_sectors)
1231      return False;
1232   /* Check each sector number only appears once */
1233   for (i = 0; i < n_sectors; i++) {
1234      if (sector_search_order[i] == -1)
1235         continue;
1236      for (j = i+1; j < n_sectors; j++) {
1237         if (sector_search_order[j] == sector_search_order[i])
1238            return False;
1239      }
1240   }
1241   /* Check that the number of listed sectors equals the number
1242      in use, by counting nListed back down. */
1243   for (i = 0; i < n_sectors; i++) {
1244      if (sectors[i].tc != NULL)
1245         nListed--;
1246   }
1247   if (nListed != 0)
1248      return False;
1249   return True;
1250}
1251
1252static Bool sanity_check_all_sectors ( void )
1253{
1254   Int     sno;
1255   Bool    sane;
1256   Sector* sec;
1257   for (sno = 0; sno < n_sectors; sno++) {
1258      Int i;
1259      Int nr_not_dead_hx = 0;
1260      Int szhxa;
1261      sec = &sectors[sno];
1262      if (sec->tc == NULL)
1263         continue;
1264      sane = sanity_check_eclasses_in_sector( sec );
1265      if (!sane)
1266         return False;
1267      szhxa = VG_(sizeXA)(sec->host_extents);
1268      for (i = 0; i < szhxa; i++) {
1269         const HostExtent* hx = VG_(indexXA)(sec->host_extents, i);
1270         if (!HostExtent__is_dead (hx, sec))
1271            nr_not_dead_hx++;
1272      }
1273      if (nr_not_dead_hx != sec->tt_n_inuse) {
1274         VG_(debugLog)(0, "transtab",
1275                       "nr_not_dead_hx %d sanity fail (expected == in use %d)\n",
1276                       nr_not_dead_hx, sec->tt_n_inuse);
1277         return False;
1278      }
1279   }
1280
1281   if ( !sanity_check_redir_tt_tc() )
1282      return False;
1283   if ( !sanity_check_sector_search_order() )
1284      return False;
1285   return True;
1286}
1287
1288
1289
1290/*-------------------------------------------------------------*/
1291/*--- Add/find translations                                 ---*/
1292/*-------------------------------------------------------------*/
1293
1294static UInt vge_osize ( VexGuestExtents* vge )
1295{
1296   UInt i, n = 0;
1297   for (i = 0; i < vge->n_used; i++)
1298      n += (UInt)vge->len[i];
1299   return n;
1300}
1301
1302static Bool isValidSector ( Int sector )
1303{
1304   if (sector < 0 || sector >= n_sectors)
1305      return False;
1306   return True;
1307}
1308
1309static inline UInt HASH_TT ( Addr64 key )
1310{
1311   UInt kHi = (UInt)(key >> 32);
1312   UInt kLo = (UInt)key;
1313   UInt k32 = kHi ^ kLo;
1314   UInt ror = 7;
1315   if (ror > 0)
1316      k32 = (k32 >> ror) | (k32 << (32-ror));
1317   return k32 % N_TTES_PER_SECTOR;
1318}
1319
1320static void setFastCacheEntry ( Addr64 key, ULong* tcptr )
1321{
1322   UInt cno = (UInt)VG_TT_FAST_HASH(key);
1323   VG_(tt_fast)[cno].guest = (Addr)key;
1324   VG_(tt_fast)[cno].host  = (Addr)tcptr;
1325   n_fast_updates++;
1326   /* This shouldn't fail.  It should be assured by m_translate
1327      which should reject any attempt to make translation of code
1328      starting at TRANSTAB_BOGUS_GUEST_ADDR. */
1329   vg_assert(VG_(tt_fast)[cno].guest != TRANSTAB_BOGUS_GUEST_ADDR);
1330}
1331
1332/* Invalidate the fast cache VG_(tt_fast). */
1333static void invalidateFastCache ( void )
1334{
1335   UInt j;
1336   /* This loop is popular enough to make it worth unrolling a
1337      bit, at least on ppc32. */
1338   vg_assert(VG_TT_FAST_SIZE > 0 && (VG_TT_FAST_SIZE % 4) == 0);
1339   for (j = 0; j < VG_TT_FAST_SIZE; j += 4) {
1340      VG_(tt_fast)[j+0].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1341      VG_(tt_fast)[j+1].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1342      VG_(tt_fast)[j+2].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1343      VG_(tt_fast)[j+3].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1344   }
1345
1346   vg_assert(j == VG_TT_FAST_SIZE);
1347   n_fast_flushes++;
1348}
1349
1350static void initialiseSector ( Int sno )
1351{
1352   Int     i;
1353   SysRes  sres;
1354   Sector* sec;
1355   vg_assert(isValidSector(sno));
1356
1357   { Bool sane = sanity_check_sector_search_order();
1358     vg_assert(sane);
1359   }
1360   sec = &sectors[sno];
1361
1362   if (sec->tc == NULL) {
1363
1364      /* Sector has never been used before.  Need to allocate tt and
1365         tc. */
1366      vg_assert(sec->tt == NULL);
1367      vg_assert(sec->tc_next == NULL);
1368      vg_assert(sec->tt_n_inuse == 0);
1369      for (i = 0; i < ECLASS_N; i++) {
1370         vg_assert(sec->ec2tte_size[i] == 0);
1371         vg_assert(sec->ec2tte_used[i] == 0);
1372         vg_assert(sec->ec2tte[i] == NULL);
1373      }
1374      vg_assert(sec->host_extents == NULL);
1375
1376      VG_(debugLog)(1,"transtab", "allocate sector %d\n", sno);
1377      if (VG_(clo_stats))
1378         VG_(dmsg)("transtab: " "allocate sector %d\n", sno);
1379
1380      sres = VG_(am_mmap_anon_float_valgrind)( 8 * tc_sector_szQ );
1381      if (sr_isError(sres)) {
1382         VG_(out_of_memory_NORETURN)("initialiseSector(TC)",
1383                                     8 * tc_sector_szQ );
1384	 /*NOTREACHED*/
1385      }
1386      sec->tc = (ULong*)(AddrH)sr_Res(sres);
1387
1388      sres = VG_(am_mmap_anon_float_valgrind)
1389                ( N_TTES_PER_SECTOR * sizeof(TTEntry) );
1390      if (sr_isError(sres)) {
1391         VG_(out_of_memory_NORETURN)("initialiseSector(TT)",
1392                                     N_TTES_PER_SECTOR * sizeof(TTEntry) );
1393	 /*NOTREACHED*/
1394      }
1395      sec->tt = (TTEntry*)(AddrH)sr_Res(sres);
1396
1397      for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1398         sec->tt[i].status   = Empty;
1399         sec->tt[i].n_tte2ec = 0;
1400      }
1401
1402      /* Set up the host_extents array. */
1403      sec->host_extents
1404         = VG_(newXA)(ttaux_malloc, "transtab.initialiseSector(host_extents)",
1405                      ttaux_free,
1406                      sizeof(HostExtent));
1407
1408      /* Add an entry in the sector_search_order */
1409      for (i = 0; i < n_sectors; i++) {
1410         if (sector_search_order[i] == -1)
1411            break;
1412      }
1413      vg_assert(i >= 0 && i < n_sectors);
1414      sector_search_order[i] = sno;
1415
1416      if (VG_(clo_verbosity) > 2)
1417         VG_(message)(Vg_DebugMsg, "TT/TC: initialise sector %d\n", sno);
1418
1419   } else {
1420
1421      /* Sector has been used before.  Dump the old contents. */
1422      VG_(debugLog)(1,"transtab", "recycle sector %d\n", sno);
1423      if (VG_(clo_stats))
1424         VG_(dmsg)("transtab: " "recycle sector %d\n", sno);
1425
1426      vg_assert(sec->tt != NULL);
1427      vg_assert(sec->tc_next != NULL);
1428      n_dump_count += sec->tt_n_inuse;
1429
1430      VexArch vex_arch = VexArch_INVALID;
1431      VG_(machine_get_VexArchInfo)( &vex_arch, NULL );
1432
1433      /* Visit each just-about-to-be-abandoned translation. */
1434      if (DEBUG_TRANSTAB) VG_(printf)("QQQ unlink-entire-sector: %d START\n",
1435                                      sno);
1436      for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1437         if (sec->tt[i].status == InUse) {
1438            vg_assert(sec->tt[i].n_tte2ec >= 1);
1439            vg_assert(sec->tt[i].n_tte2ec <= 3);
1440            n_dump_osize += vge_osize(&sec->tt[i].vge);
1441            /* Tell the tool too. */
1442            if (VG_(needs).superblock_discards) {
1443               VG_TDICT_CALL( tool_discard_superblock_info,
1444                              sec->tt[i].entry,
1445                              sec->tt[i].vge );
1446            }
1447            unchain_in_preparation_for_deletion(vex_arch, sno, i);
1448         } else {
1449            vg_assert(sec->tt[i].n_tte2ec == 0);
1450         }
1451         sec->tt[i].status   = Empty;
1452         sec->tt[i].n_tte2ec = 0;
1453      }
1454      if (DEBUG_TRANSTAB) VG_(printf)("QQQ unlink-entire-sector: %d END\n",
1455                                      sno);
1456
1457      /* Free up the eclass structures. */
1458      for (i = 0; i < ECLASS_N; i++) {
1459         if (sec->ec2tte_size[i] == 0) {
1460            vg_assert(sec->ec2tte_used[i] == 0);
1461            vg_assert(sec->ec2tte[i] == NULL);
1462         } else {
1463            vg_assert(sec->ec2tte[i] != NULL);
1464            ttaux_free(sec->ec2tte[i]);
1465            sec->ec2tte[i] = NULL;
1466            sec->ec2tte_size[i] = 0;
1467            sec->ec2tte_used[i] = 0;
1468         }
1469      }
1470
1471      /* Empty out the host extents array. */
1472      vg_assert(sec->host_extents != NULL);
1473      VG_(dropTailXA)(sec->host_extents, VG_(sizeXA)(sec->host_extents));
1474      vg_assert(VG_(sizeXA)(sec->host_extents) == 0);
1475
1476      /* Sanity check: ensure it is already in
1477         sector_search_order[]. */
1478      for (i = 0; i < n_sectors; i++) {
1479         if (sector_search_order[i] == sno)
1480            break;
1481      }
1482      vg_assert(i >= 0 && i < n_sectors);
1483
1484      if (VG_(clo_verbosity) > 2)
1485         VG_(message)(Vg_DebugMsg, "TT/TC: recycle sector %d\n", sno);
1486   }
1487
1488   sec->tc_next = sec->tc;
1489   sec->tt_n_inuse = 0;
1490
1491   invalidateFastCache();
1492
1493   { Bool sane = sanity_check_sector_search_order();
1494     vg_assert(sane);
1495   }
1496}
1497
1498
1499/* Add a translation of vge to TT/TC.  The translation is temporarily
1500   in code[0 .. code_len-1].
1501
1502   pre: youngest_sector points to a valid (although possibly full)
1503   sector.
1504*/
1505void VG_(add_to_transtab)( VexGuestExtents* vge,
1506                           Addr64           entry,
1507                           AddrH            code,
1508                           UInt             code_len,
1509                           Bool             is_self_checking,
1510                           Int              offs_profInc,
1511                           UInt             n_guest_instrs,
1512                           VexArch          arch_host )
1513{
1514   Int    tcAvailQ, reqdQ, y, i;
1515   ULong  *tcptr, *tcptr2;
1516   UChar* srcP;
1517   UChar* dstP;
1518
1519   vg_assert(init_done);
1520   vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
1521
1522   /* 60000: should agree with N_TMPBUF in m_translate.c. */
1523   vg_assert(code_len > 0 && code_len < 60000);
1524
1525   /* Generally stay sane */
1526   vg_assert(n_guest_instrs < 200); /* it can be zero, tho */
1527
1528   if (DEBUG_TRANSTAB)
1529      VG_(printf)("add_to_transtab(entry = 0x%llx, len = %d) ...\n",
1530                  entry, code_len);
1531
1532   n_in_count++;
1533   n_in_tsize += code_len;
1534   n_in_osize += vge_osize(vge);
1535   if (is_self_checking)
1536      n_in_sc_count++;
1537
1538   y = youngest_sector;
1539   vg_assert(isValidSector(y));
1540
1541   if (sectors[y].tc == NULL)
1542      initialiseSector(y);
1543
1544   /* Try putting the translation in this sector. */
1545   reqdQ = (code_len + 7) >> 3;
1546
1547   /* Will it fit in tc? */
1548   tcAvailQ = ((ULong*)(&sectors[y].tc[tc_sector_szQ]))
1549              - ((ULong*)(sectors[y].tc_next));
1550   vg_assert(tcAvailQ >= 0);
1551   vg_assert(tcAvailQ <= tc_sector_szQ);
1552
1553   if (tcAvailQ < reqdQ
1554       || sectors[y].tt_n_inuse >= N_TTES_PER_SECTOR_USABLE) {
1555      /* No.  So move on to the next sector.  Either it's never been
1556         used before, in which case it will get its tt/tc allocated
1557         now, or it has been used before, in which case it is set to be
1558         empty, hence throwing out the oldest sector. */
1559      vg_assert(tc_sector_szQ > 0);
1560      Int tt_loading_pct = (100 * sectors[y].tt_n_inuse)
1561                           / N_TTES_PER_SECTOR;
1562      Int tc_loading_pct = (100 * (tc_sector_szQ - tcAvailQ))
1563                           / tc_sector_szQ;
1564      VG_(debugLog)(1,"transtab",
1565                      "declare sector %d full "
1566                      "(TT loading %2d%%, TC loading %2d%%)\n",
1567                      y, tt_loading_pct, tc_loading_pct);
1568      if (VG_(clo_stats)) {
1569         VG_(dmsg)("transtab: "
1570                   "declare sector %d full "
1571                   "(TT loading %2d%%, TC loading %2d%%)\n",
1572                   y, tt_loading_pct, tc_loading_pct);
1573      }
1574      youngest_sector++;
1575      if (youngest_sector >= n_sectors)
1576         youngest_sector = 0;
1577      y = youngest_sector;
1578      initialiseSector(y);
1579   }
1580
1581   /* Be sure ... */
1582   tcAvailQ = ((ULong*)(&sectors[y].tc[tc_sector_szQ]))
1583              - ((ULong*)(sectors[y].tc_next));
1584   vg_assert(tcAvailQ >= 0);
1585   vg_assert(tcAvailQ <= tc_sector_szQ);
1586   vg_assert(tcAvailQ >= reqdQ);
1587   vg_assert(sectors[y].tt_n_inuse < N_TTES_PER_SECTOR_USABLE);
1588   vg_assert(sectors[y].tt_n_inuse >= 0);
1589
1590   /* Copy into tc. */
1591   tcptr = sectors[y].tc_next;
1592   vg_assert(tcptr >= &sectors[y].tc[0]);
1593   vg_assert(tcptr <= &sectors[y].tc[tc_sector_szQ]);
1594
1595   dstP = (UChar*)tcptr;
1596   srcP = (UChar*)code;
1597   VG_(memcpy)(dstP, srcP, code_len);
1598   sectors[y].tc_next += reqdQ;
1599   sectors[y].tt_n_inuse++;
1600
1601   /* more paranoia */
1602   tcptr2 = sectors[y].tc_next;
1603   vg_assert(tcptr2 >= &sectors[y].tc[0]);
1604   vg_assert(tcptr2 <= &sectors[y].tc[tc_sector_szQ]);
1605
1606   /* Find an empty tt slot, and use it.  There must be such a slot
1607      since tt is never allowed to get completely full. */
1608   i = HASH_TT(entry);
1609   vg_assert(i >= 0 && i < N_TTES_PER_SECTOR);
1610   while (True) {
1611      if (sectors[y].tt[i].status == Empty
1612          || sectors[y].tt[i].status == Deleted)
1613         break;
1614      i++;
1615      if (i >= N_TTES_PER_SECTOR)
1616         i = 0;
1617   }
1618
1619   TTEntry__init(&sectors[y].tt[i]);
1620   sectors[y].tt[i].status = InUse;
1621   sectors[y].tt[i].tcptr  = tcptr;
1622   sectors[y].tt[i].count  = 0;
1623   sectors[y].tt[i].weight = n_guest_instrs == 0 ? 1 : n_guest_instrs;
1624   sectors[y].tt[i].vge    = *vge;
1625   sectors[y].tt[i].entry  = entry;
1626
1627   /* Patch in the profile counter location, if necessary. */
1628   if (offs_profInc != -1) {
1629      vg_assert(offs_profInc >= 0 && offs_profInc < code_len);
1630      VexInvalRange vir
1631         = LibVEX_PatchProfInc( arch_host,
1632                                dstP + offs_profInc,
1633                                &sectors[y].tt[i].count );
1634      VG_(invalidate_icache)( (void*)vir.start, vir.len );
1635   }
1636
1637   VG_(invalidate_icache)( dstP, code_len );
1638
1639   /* Add this entry to the host_extents map, checking that we're
1640      adding in order. */
1641   { HostExtent hx;
1642     hx.start = (UChar*)tcptr;
1643     hx.len   = code_len;
1644     hx.tteNo = i;
1645     vg_assert(hx.len > 0); /* bsearch fails w/ zero length entries */
1646     XArray* hx_array = sectors[y].host_extents;
1647     vg_assert(hx_array);
1648     Word n = VG_(sizeXA)(hx_array);
1649     if (n > 0) {
1650        HostExtent* hx_prev = (HostExtent*)VG_(indexXA)(hx_array, n-1);
1651        vg_assert(hx_prev->start + hx_prev->len <= hx.start);
1652     }
1653     VG_(addToXA)(hx_array, &hx);
1654     if (DEBUG_TRANSTAB)
1655        VG_(printf)("... hx.start 0x%p hx.len %u sector %d ttslot %d\n",
1656                    hx.start, hx.len, y, i);
1657   }
1658
1659   /* Update the fast-cache. */
1660   setFastCacheEntry( entry, tcptr );
1661
1662   /* Note the eclass numbers for this translation. */
1663   upd_eclasses_after_add( &sectors[y], i );
1664}
1665
1666
1667/* Search for the translation of the given guest address.  If
1668   requested, a successful search can also cause the fast-caches to be
1669   updated.
1670*/
1671Bool VG_(search_transtab) ( /*OUT*/AddrH* res_hcode,
1672                            /*OUT*/UInt*  res_sNo,
1673                            /*OUT*/UInt*  res_tteNo,
1674                            Addr64        guest_addr,
1675                            Bool          upd_cache )
1676{
1677   Int i, j, k, kstart, sno;
1678
1679   vg_assert(init_done);
1680   /* Find the initial probe point just once.  It will be the same in
1681      all sectors and avoids multiple expensive % operations. */
1682   n_full_lookups++;
1683   k      = -1;
1684   kstart = HASH_TT(guest_addr);
1685   vg_assert(kstart >= 0 && kstart < N_TTES_PER_SECTOR);
1686
1687   /* Search in all the sectors,using sector_search_order[] as a
1688      heuristic guide as to what order to visit the sectors. */
1689   for (i = 0; i < n_sectors; i++) {
1690
1691      sno = sector_search_order[i];
1692      if (UNLIKELY(sno == -1))
1693         return False; /* run out of sectors to search */
1694
1695      k = kstart;
1696      for (j = 0; j < N_TTES_PER_SECTOR; j++) {
1697         n_lookup_probes++;
1698         if (sectors[sno].tt[k].status == InUse
1699             && sectors[sno].tt[k].entry == guest_addr) {
1700            /* found it */
1701            if (upd_cache)
1702               setFastCacheEntry(
1703                  guest_addr, sectors[sno].tt[k].tcptr );
1704            if (res_hcode)
1705               *res_hcode = (AddrH)sectors[sno].tt[k].tcptr;
1706            if (res_sNo)
1707               *res_sNo = sno;
1708            if (res_tteNo)
1709               *res_tteNo = k;
1710            /* pull this one one step closer to the front.  For large
1711               apps this more or less halves the number of required
1712               probes. */
1713            if (i > 0) {
1714               Int tmp = sector_search_order[i-1];
1715               sector_search_order[i-1] = sector_search_order[i];
1716               sector_search_order[i] = tmp;
1717            }
1718            return True;
1719         }
1720         if (sectors[sno].tt[k].status == Empty)
1721            break; /* not found in this sector */
1722         k++;
1723         if (k == N_TTES_PER_SECTOR)
1724            k = 0;
1725      }
1726
1727      /* If we fall off the end, all entries are InUse and not
1728         matching, or Deleted.  In any case we did not find it in this
1729         sector. */
1730   }
1731
1732   /* Not found in any sector. */
1733   return False;
1734}
1735
1736
1737/*-------------------------------------------------------------*/
1738/*--- Delete translations.                                  ---*/
1739/*-------------------------------------------------------------*/
1740
1741/* forward */
1742static void unredir_discard_translations( Addr64, ULong );
1743
1744/* Stuff for deleting translations which intersect with a given
1745   address range.  Unfortunately, to make this run at a reasonable
1746   speed, it is complex. */
1747
1748static inline
1749Bool overlap1 ( Addr64 s1, ULong r1, Addr64 s2, ULong r2 )
1750{
1751   Addr64 e1 = s1 + r1 - 1ULL;
1752   Addr64 e2 = s2 + r2 - 1ULL;
1753   if (e1 < s2 || e2 < s1)
1754      return False;
1755   return True;
1756}
1757
1758static inline
1759Bool overlaps ( Addr64 start, ULong range, VexGuestExtents* vge )
1760{
1761   if (overlap1(start, range, vge->base[0], (UInt)vge->len[0]))
1762      return True;
1763   if (vge->n_used < 2)
1764      return False;
1765   if (overlap1(start, range, vge->base[1], (UInt)vge->len[1]))
1766      return True;
1767   if (vge->n_used < 3)
1768      return False;
1769   if (overlap1(start, range, vge->base[2], (UInt)vge->len[2]))
1770      return True;
1771   return False;
1772}
1773
1774
1775/* Delete a tt entry, and update all the eclass data accordingly. */
1776
1777static void delete_tte ( /*MOD*/Sector* sec, UInt secNo, Int tteno,
1778                         VexArch vex_arch )
1779{
1780   Int      i, ec_num, ec_idx;
1781   TTEntry* tte;
1782
1783   /* sec and secNo are mutually redundant; cross-check. */
1784   vg_assert(sec == &sectors[secNo]);
1785
1786   vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
1787   tte = &sec->tt[tteno];
1788   vg_assert(tte->status == InUse);
1789   vg_assert(tte->n_tte2ec >= 1 && tte->n_tte2ec <= 3);
1790
1791   /* Unchain .. */
1792   unchain_in_preparation_for_deletion(vex_arch, secNo, tteno);
1793
1794   /* Deal with the ec-to-tte links first. */
1795   for (i = 0; i < tte->n_tte2ec; i++) {
1796      ec_num = (Int)tte->tte2ec_ec[i];
1797      ec_idx = tte->tte2ec_ix[i];
1798      vg_assert(ec_num >= 0 && ec_num < ECLASS_N);
1799      vg_assert(ec_idx >= 0);
1800      vg_assert(ec_idx < sec->ec2tte_used[ec_num]);
1801      /* Assert that the two links point at each other. */
1802      vg_assert(sec->ec2tte[ec_num][ec_idx] == (UShort)tteno);
1803      /* "delete" the pointer back to here. */
1804      sec->ec2tte[ec_num][ec_idx] = EC2TTE_DELETED;
1805   }
1806
1807   /* Now fix up this TTEntry. */
1808   tte->status   = Deleted;
1809   tte->n_tte2ec = 0;
1810
1811   /* Stats .. */
1812   sec->tt_n_inuse--;
1813   n_disc_count++;
1814   n_disc_osize += vge_osize(&tte->vge);
1815
1816   /* Tell the tool too. */
1817   if (VG_(needs).superblock_discards) {
1818      VG_TDICT_CALL( tool_discard_superblock_info,
1819                     tte->entry,
1820                     tte->vge );
1821   }
1822}
1823
1824
1825/* Delete translations from sec which intersect specified range, but
1826   only consider translations in the specified eclass. */
1827
1828static
1829Bool delete_translations_in_sector_eclass ( /*MOD*/Sector* sec, UInt secNo,
1830                                            Addr64 guest_start, ULong range,
1831                                            Int ec,
1832                                            VexArch vex_arch )
1833{
1834   Int      i;
1835   UShort   tteno;
1836   Bool     anyDeld = False;
1837   TTEntry* tte;
1838
1839   vg_assert(ec >= 0 && ec < ECLASS_N);
1840
1841   for (i = 0; i < sec->ec2tte_used[ec]; i++) {
1842
1843      tteno = sec->ec2tte[ec][i];
1844      if (tteno == EC2TTE_DELETED) {
1845         /* already deleted */
1846         continue;
1847      }
1848
1849      vg_assert(tteno < N_TTES_PER_SECTOR);
1850
1851      tte = &sec->tt[tteno];
1852      vg_assert(tte->status == InUse);
1853
1854      if (overlaps( guest_start, range, &tte->vge )) {
1855         anyDeld = True;
1856         delete_tte( sec, secNo, (Int)tteno, vex_arch );
1857      }
1858
1859   }
1860
1861   return anyDeld;
1862}
1863
1864
1865/* Delete translations from sec which intersect specified range, the
1866   slow way, by inspecting all translations in sec. */
1867
1868static
1869Bool delete_translations_in_sector ( /*MOD*/Sector* sec, UInt secNo,
1870                                     Addr64 guest_start, ULong range,
1871                                     VexArch vex_arch )
1872{
1873   Int  i;
1874   Bool anyDeld = False;
1875
1876   for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1877      if (sec->tt[i].status == InUse
1878          && overlaps( guest_start, range, &sec->tt[i].vge )) {
1879         anyDeld = True;
1880         delete_tte( sec, secNo, i, vex_arch );
1881      }
1882   }
1883
1884   return anyDeld;
1885}
1886
1887
1888void VG_(discard_translations) ( Addr64 guest_start, ULong range,
1889                                 const HChar* who )
1890{
1891   Sector* sec;
1892   Int     sno, ec;
1893   Bool    anyDeleted = False;
1894
1895   vg_assert(init_done);
1896
1897   VG_(debugLog)(2, "transtab",
1898                    "discard_translations(0x%llx, %lld) req by %s\n",
1899                    guest_start, range, who );
1900
1901   /* Pre-deletion sanity check */
1902   if (VG_(clo_sanity_level >= 4)) {
1903      Bool sane = sanity_check_all_sectors();
1904      vg_assert(sane);
1905   }
1906
1907   if (range == 0)
1908      return;
1909
1910   VexArch vex_arch = VexArch_INVALID;
1911   VG_(machine_get_VexArchInfo)( &vex_arch, NULL );
1912
1913   /* There are two different ways to do this.
1914
1915      If the range fits within a single address-range equivalence
1916      class, as will be the case for a cache line sized invalidation,
1917      then we only have to inspect the set of translations listed in
1918      that equivalence class, and also in the "sin-bin" equivalence
1919      class ECLASS_MISC.
1920
1921      Otherwise, the invalidation is of a larger range and probably
1922      results from munmap.  In this case it's (probably!) faster just
1923      to inspect all translations, dump those we don't want, and
1924      regenerate the equivalence class information (since modifying it
1925      in-situ is even more expensive).
1926   */
1927
1928   /* First off, figure out if the range falls within a single class,
1929      and if so which one. */
1930
1931   ec = ECLASS_MISC;
1932   if (range < (1ULL << ECLASS_SHIFT))
1933      ec = range_to_eclass( guest_start, (UInt)range );
1934
1935   /* if ec is ECLASS_MISC then we aren't looking at just a single
1936      class, so use the slow scheme.  Else use the fast scheme,
1937      examining 'ec' and ECLASS_MISC. */
1938
1939   if (ec != ECLASS_MISC) {
1940
1941      VG_(debugLog)(2, "transtab",
1942                       "                    FAST, ec = %d\n", ec);
1943
1944      /* Fast scheme */
1945      vg_assert(ec >= 0 && ec < ECLASS_MISC);
1946
1947      for (sno = 0; sno < n_sectors; sno++) {
1948         sec = &sectors[sno];
1949         if (sec->tc == NULL)
1950            continue;
1951         anyDeleted |= delete_translations_in_sector_eclass(
1952                          sec, sno, guest_start, range, ec,
1953                          vex_arch
1954                       );
1955         anyDeleted |= delete_translations_in_sector_eclass(
1956                          sec, sno, guest_start, range, ECLASS_MISC,
1957                          vex_arch
1958                       );
1959      }
1960
1961   } else {
1962
1963      /* slow scheme */
1964
1965      VG_(debugLog)(2, "transtab",
1966                       "                    SLOW, ec = %d\n", ec);
1967
1968      for (sno = 0; sno < n_sectors; sno++) {
1969         sec = &sectors[sno];
1970         if (sec->tc == NULL)
1971            continue;
1972         anyDeleted |= delete_translations_in_sector(
1973                          sec, sno, guest_start, range, vex_arch );
1974      }
1975
1976   }
1977
1978   if (anyDeleted)
1979      invalidateFastCache();
1980
1981   /* don't forget the no-redir cache */
1982   unredir_discard_translations( guest_start, range );
1983
1984   /* Post-deletion sanity check */
1985   if (VG_(clo_sanity_level >= 4)) {
1986      Int      i;
1987      TTEntry* tte;
1988      Bool     sane = sanity_check_all_sectors();
1989      vg_assert(sane);
1990      /* But now, also check the requested address range isn't
1991         present anywhere. */
1992      for (sno = 0; sno < n_sectors; sno++) {
1993         sec = &sectors[sno];
1994         if (sec->tc == NULL)
1995            continue;
1996         for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1997            tte = &sec->tt[i];
1998            if (tte->status != InUse)
1999               continue;
2000            vg_assert(!overlaps( guest_start, range, &tte->vge ));
2001         }
2002      }
2003   }
2004}
2005
2006
2007/*------------------------------------------------------------*/
2008/*--- AUXILIARY: the unredirected TT/TC                    ---*/
2009/*------------------------------------------------------------*/
2010
2011/* A very simple translation cache which holds a small number of
2012   unredirected translations.  This is completely independent of the
2013   main tt/tc structures.  When unredir_tc or unredir_tt becomes full,
2014   both structures are simply dumped and we start over.
2015
2016   Since these translations are unredirected, the search key is (by
2017   definition) the first address entry in the .vge field. */
2018
2019/* Sized to hold 500 translations of average size 1000 bytes. */
2020
2021#define UNREDIR_SZB   1000
2022
2023#define N_UNREDIR_TT  500
2024#define N_UNREDIR_TCQ (N_UNREDIR_TT * UNREDIR_SZB / sizeof(ULong))
2025
2026typedef
2027   struct {
2028      VexGuestExtents vge;
2029      Addr            hcode;
2030      Bool            inUse;
2031   }
2032   UTCEntry;
2033
2034/* We just allocate forwards in _tc, never deleting. */
2035static ULong    *unredir_tc;
2036static Int      unredir_tc_used = N_UNREDIR_TCQ;
2037
2038/* Slots in _tt can come into use and out again (.inUse).
2039   Nevertheless _tt_highwater is maintained so that invalidations
2040   don't have to scan all the slots when only a few are in use.
2041   _tt_highwater holds the index of the highest ever allocated
2042   slot. */
2043static UTCEntry unredir_tt[N_UNREDIR_TT];
2044static Int      unredir_tt_highwater;
2045
2046
2047static void init_unredir_tt_tc ( void )
2048{
2049   Int i;
2050   if (unredir_tc == NULL) {
2051      SysRes sres = VG_(am_mmap_anon_float_valgrind)
2052                       ( N_UNREDIR_TT * UNREDIR_SZB );
2053      if (sr_isError(sres)) {
2054         VG_(out_of_memory_NORETURN)("init_unredir_tt_tc",
2055                                     N_UNREDIR_TT * UNREDIR_SZB);
2056         /*NOTREACHED*/
2057      }
2058      unredir_tc = (ULong *)(AddrH)sr_Res(sres);
2059   }
2060   unredir_tc_used = 0;
2061   for (i = 0; i < N_UNREDIR_TT; i++)
2062      unredir_tt[i].inUse = False;
2063   unredir_tt_highwater = -1;
2064}
2065
2066/* Do a sanity check; return False on failure. */
2067static Bool sanity_check_redir_tt_tc ( void )
2068{
2069   Int i;
2070   if (unredir_tt_highwater < -1) return False;
2071   if (unredir_tt_highwater >= N_UNREDIR_TT) return False;
2072
2073   for (i = unredir_tt_highwater+1; i < N_UNREDIR_TT; i++)
2074      if (unredir_tt[i].inUse)
2075         return False;
2076
2077   if (unredir_tc_used < 0) return False;
2078   if (unredir_tc_used > N_UNREDIR_TCQ) return False;
2079
2080   return True;
2081}
2082
2083
2084/* Add an UNREDIRECTED translation of vge to TT/TC.  The translation
2085   is temporarily in code[0 .. code_len-1].
2086*/
2087void VG_(add_to_unredir_transtab)( VexGuestExtents* vge,
2088                                   Addr64           entry,
2089                                   AddrH            code,
2090                                   UInt             code_len )
2091{
2092   Int   i, j, code_szQ;
2093   HChar *srcP, *dstP;
2094
2095   vg_assert(sanity_check_redir_tt_tc());
2096
2097   /* This is the whole point: it's not redirected! */
2098   vg_assert(entry == vge->base[0]);
2099
2100   /* How many unredir_tt slots are needed */
2101   code_szQ = (code_len + 7) / 8;
2102
2103   /* Look for an empty unredir_tc slot */
2104   for (i = 0; i < N_UNREDIR_TT; i++)
2105      if (!unredir_tt[i].inUse)
2106         break;
2107
2108   if (i >= N_UNREDIR_TT || code_szQ > (N_UNREDIR_TCQ - unredir_tc_used)) {
2109      /* It's full; dump everything we currently have */
2110      init_unredir_tt_tc();
2111      i = 0;
2112   }
2113
2114   vg_assert(unredir_tc_used >= 0);
2115   vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
2116   vg_assert(code_szQ > 0);
2117   vg_assert(code_szQ + unredir_tc_used <= N_UNREDIR_TCQ);
2118   vg_assert(i >= 0 && i < N_UNREDIR_TT);
2119   vg_assert(unredir_tt[i].inUse == False);
2120
2121   if (i > unredir_tt_highwater)
2122      unredir_tt_highwater = i;
2123
2124   dstP = (HChar*)&unredir_tc[unredir_tc_used];
2125   srcP = (HChar*)code;
2126   for (j = 0; j < code_len; j++)
2127      dstP[j] = srcP[j];
2128
2129   VG_(invalidate_icache)( dstP, code_len );
2130
2131   unredir_tt[i].inUse = True;
2132   unredir_tt[i].vge   = *vge;
2133   unredir_tt[i].hcode = (Addr)dstP;
2134
2135   unredir_tc_used += code_szQ;
2136   vg_assert(unredir_tc_used >= 0);
2137   vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
2138
2139   vg_assert(&dstP[code_len] <= (HChar*)&unredir_tc[unredir_tc_used]);
2140}
2141
2142Bool VG_(search_unredir_transtab) ( /*OUT*/AddrH* result,
2143                                    Addr64        guest_addr )
2144{
2145   Int i;
2146   for (i = 0; i < N_UNREDIR_TT; i++) {
2147      if (!unredir_tt[i].inUse)
2148         continue;
2149      if (unredir_tt[i].vge.base[0] == guest_addr) {
2150         *result = (AddrH)unredir_tt[i].hcode;
2151         return True;
2152      }
2153   }
2154   return False;
2155}
2156
2157static void unredir_discard_translations( Addr64 guest_start, ULong range )
2158{
2159   Int i;
2160
2161   vg_assert(sanity_check_redir_tt_tc());
2162
2163   for (i = 0; i <= unredir_tt_highwater; i++) {
2164      if (unredir_tt[i].inUse
2165          && overlaps( guest_start, range, &unredir_tt[i].vge))
2166         unredir_tt[i].inUse = False;
2167   }
2168}
2169
2170
2171/*------------------------------------------------------------*/
2172/*--- Initialisation.                                      ---*/
2173/*------------------------------------------------------------*/
2174
2175void VG_(init_tt_tc) ( void )
2176{
2177   Int i, avg_codeszQ;
2178
2179   vg_assert(!init_done);
2180   init_done = True;
2181
2182   /* Otherwise lots of things go wrong... */
2183   vg_assert(sizeof(ULong) == 8);
2184   vg_assert(sizeof(Addr64) == 8);
2185   /* check fast cache entries really are 2 words long */
2186   vg_assert(sizeof(Addr) == sizeof(void*));
2187   vg_assert(sizeof(FastCacheEntry) == 2 * sizeof(Addr));
2188   /* check fast cache entries are packed back-to-back with no spaces */
2189   vg_assert(sizeof( VG_(tt_fast) ) == VG_TT_FAST_SIZE * sizeof(FastCacheEntry));
2190   /* check fast cache is aligned as we requested.  Not fatal if it
2191      isn't, but we might as well make sure. */
2192   vg_assert(VG_IS_16_ALIGNED( ((Addr) & VG_(tt_fast)[0]) ));
2193
2194   if (VG_(clo_verbosity) > 2)
2195      VG_(message)(Vg_DebugMsg,
2196                   "TT/TC: VG_(init_tt_tc) "
2197                   "(startup of code management)\n");
2198
2199   /* Figure out how big each tc area should be.  */
2200   avg_codeszQ   = (VG_(details).avg_translation_sizeB + 7) / 8;
2201   tc_sector_szQ = N_TTES_PER_SECTOR_USABLE * (1 + avg_codeszQ);
2202
2203   /* Ensure the calculated value is not way crazy. */
2204   vg_assert(tc_sector_szQ >= 2 * N_TTES_PER_SECTOR_USABLE);
2205   vg_assert(tc_sector_szQ <= 100 * N_TTES_PER_SECTOR_USABLE);
2206
2207   n_sectors = VG_(clo_num_transtab_sectors);
2208   vg_assert(n_sectors >= MIN_N_SECTORS);
2209   vg_assert(n_sectors <= MAX_N_SECTORS);
2210
2211   /* Initialise the sectors, even the ones we aren't going to use.
2212      Set all fields to zero. */
2213   youngest_sector = 0;
2214   for (i = 0; i < MAX_N_SECTORS; i++)
2215      VG_(memset)(&sectors[i], 0, sizeof(sectors[i]));
2216
2217   /* Initialise the sector_search_order hint table, including the
2218      entries we aren't going to use. */
2219   for (i = 0; i < MAX_N_SECTORS; i++)
2220      sector_search_order[i] = -1;
2221
2222   /* Initialise the fast cache. */
2223   invalidateFastCache();
2224
2225   /* and the unredir tt/tc */
2226   init_unredir_tt_tc();
2227
2228   if (VG_(clo_verbosity) > 2 || VG_(clo_stats)
2229       || VG_(debugLog_getLevel) () >= 2) {
2230      VG_(message)(Vg_DebugMsg,
2231         "TT/TC: cache: %d sectors of %d bytes each = %d total\n",
2232          n_sectors, 8 * tc_sector_szQ,
2233          n_sectors * 8 * tc_sector_szQ );
2234      VG_(message)(Vg_DebugMsg,
2235         "TT/TC: table: %d tables  of %d bytes each = %d total\n",
2236          n_sectors, (int)(N_TTES_PER_SECTOR * sizeof(TTEntry)),
2237          (int)(n_sectors * N_TTES_PER_SECTOR * sizeof(TTEntry)));
2238      VG_(message)(Vg_DebugMsg,
2239         "TT/TC: table: %d entries each = %d total entries"
2240                       " max occupancy %d (%d%%)\n",
2241         N_TTES_PER_SECTOR,
2242         n_sectors * N_TTES_PER_SECTOR,
2243         n_sectors * N_TTES_PER_SECTOR_USABLE,
2244         SECTOR_TT_LIMIT_PERCENT );
2245   }
2246}
2247
2248
2249/*------------------------------------------------------------*/
2250/*--- Printing out statistics.                             ---*/
2251/*------------------------------------------------------------*/
2252
2253static ULong safe_idiv( ULong a, ULong b )
2254{
2255   return (b == 0 ? 0 : a / b);
2256}
2257
2258UInt VG_(get_bbs_translated) ( void )
2259{
2260   return n_in_count;
2261}
2262
2263void VG_(print_tt_tc_stats) ( void )
2264{
2265   VG_(message)(Vg_DebugMsg,
2266      "    tt/tc: %'llu tt lookups requiring %'llu probes\n",
2267      n_full_lookups, n_lookup_probes );
2268   VG_(message)(Vg_DebugMsg,
2269      "    tt/tc: %'llu fast-cache updates, %'llu flushes\n",
2270      n_fast_updates, n_fast_flushes );
2271
2272   VG_(message)(Vg_DebugMsg,
2273                " transtab: new        %'lld "
2274                "(%'llu -> %'llu; ratio %'llu:10) [%'llu scs]\n",
2275                n_in_count, n_in_osize, n_in_tsize,
2276                safe_idiv(10*n_in_tsize, n_in_osize),
2277                n_in_sc_count);
2278   VG_(message)(Vg_DebugMsg,
2279                " transtab: dumped     %'llu (%'llu -> ?" "?)\n",
2280                n_dump_count, n_dump_osize );
2281   VG_(message)(Vg_DebugMsg,
2282                " transtab: discarded  %'llu (%'llu -> ?" "?)\n",
2283                n_disc_count, n_disc_osize );
2284
2285   if (DEBUG_TRANSTAB) {
2286      Int i;
2287      VG_(printf)("\n");
2288      for (i = 0; i < ECLASS_N; i++) {
2289         VG_(printf)(" %4d", sectors[0].ec2tte_used[i]);
2290         if (i % 16 == 15)
2291            VG_(printf)("\n");
2292      }
2293      VG_(printf)("\n\n");
2294   }
2295}
2296
2297/*------------------------------------------------------------*/
2298/*--- Printing out of profiling results.                   ---*/
2299/*------------------------------------------------------------*/
2300
2301static ULong score ( TTEntry* tte )
2302{
2303   return ((ULong)tte->weight) * ((ULong)tte->count);
2304}
2305
2306ULong VG_(get_SB_profile) ( SBProfEntry tops[], UInt n_tops )
2307{
2308   Int   sno, i, r, s;
2309   ULong score_total;
2310
2311   /* First, compute the total weighted count, and find the top N
2312      ttes.  tops contains pointers to the most-used n_tops blocks, in
2313      descending order (viz, tops[0] is the highest scorer). */
2314   for (i = 0; i < n_tops; i++) {
2315      tops[i].addr  = 0;
2316      tops[i].score = 0;
2317   }
2318
2319   score_total = 0;
2320
2321   for (sno = 0; sno < n_sectors; sno++) {
2322      if (sectors[sno].tc == NULL)
2323         continue;
2324      for (i = 0; i < N_TTES_PER_SECTOR; i++) {
2325         if (sectors[sno].tt[i].status != InUse)
2326            continue;
2327         score_total += score(&sectors[sno].tt[i]);
2328         /* Find the rank for sectors[sno].tt[i]. */
2329         r = n_tops-1;
2330         while (True) {
2331            if (r == -1)
2332               break;
2333             if (tops[r].addr == 0) {
2334               r--;
2335               continue;
2336             }
2337             if ( score(&sectors[sno].tt[i]) > tops[r].score ) {
2338                r--;
2339                continue;
2340             }
2341             break;
2342         }
2343         r++;
2344         vg_assert(r >= 0 && r <= n_tops);
2345         /* This bb should be placed at r, and bbs above it shifted
2346            upwards one slot. */
2347         if (r < n_tops) {
2348            for (s = n_tops-1; s > r; s--)
2349               tops[s] = tops[s-1];
2350            tops[r].addr  = sectors[sno].tt[i].entry;
2351            tops[r].score = score( &sectors[sno].tt[i] );
2352         }
2353      }
2354   }
2355
2356   /* Now zero out all the counter fields, so that we can make
2357      multiple calls here and just get the values since the last call,
2358      each time, rather than values accumulated for the whole run. */
2359   for (sno = 0; sno < n_sectors; sno++) {
2360      if (sectors[sno].tc == NULL)
2361         continue;
2362      for (i = 0; i < N_TTES_PER_SECTOR; i++) {
2363         if (sectors[sno].tt[i].status != InUse)
2364            continue;
2365         sectors[sno].tt[i].count = 0;
2366      }
2367   }
2368
2369   return score_total;
2370}
2371
2372/*--------------------------------------------------------------------*/
2373/*--- end                                                          ---*/
2374/*--------------------------------------------------------------------*/
2375