m_transtab.c revision b7301c690606964dd1ece81ce2fd2b6fd90ab012
1
2/*--------------------------------------------------------------------*/
3/*--- Management of the translation table and cache.               ---*/
4/*---                                                 m_transtab.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8   This file is part of Valgrind, a dynamic binary instrumentation
9   framework.
10
11   Copyright (C) 2000-2011 Julian Seward
12      jseward@acm.org
13
14   This program is free software; you can redistribute it and/or
15   modify it under the terms of the GNU General Public License as
16   published by the Free Software Foundation; either version 2 of the
17   License, or (at your option) any later version.
18
19   This program is distributed in the hope that it will be useful, but
20   WITHOUT ANY WARRANTY; without even the implied warranty of
21   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22   General Public License for more details.
23
24   You should have received a copy of the GNU General Public License
25   along with this program; if not, write to the Free Software
26   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27   02111-1307, USA.
28
29   The GNU General Public License is contained in the file COPYING.
30*/
31
32#include "pub_core_basics.h"
33#include "pub_core_debuglog.h"
34#include "pub_core_machine.h"    // For VG_(machine_get_VexArchInfo)
35#include "pub_core_libcbase.h"
36#include "pub_core_vki.h"        // to keep pub_core_libproc.h happy, sigh
37#include "pub_core_libcproc.h"   // VG_(invalidate_icache)
38#include "pub_core_libcassert.h"
39#include "pub_core_libcprint.h"
40#include "pub_core_options.h"
41#include "pub_core_tooliface.h"  // For VG_(details).avg_translation_sizeB
42#include "pub_core_transtab.h"
43#include "pub_core_aspacemgr.h"
44#include "pub_core_mallocfree.h" // VG_(out_of_memory_NORETURN)
45#include "pub_core_xarray.h"
46#include "pub_core_dispatch.h"   // For VG_(disp_cp*) addresses
47
48
49/* #define DEBUG_TRANSTAB */
50
51
52/*-------------------------------------------------------------*/
53/*--- Management of the FIFO-based translation table+cache. ---*/
54/*-------------------------------------------------------------*/
55
56/*------------------ CONSTANTS ------------------*/
57
58/* Number of sectors the TC is divided into.  If you need a larger
59   overall translation cache, increase this value. */
60#define N_SECTORS 8
61
62/* Number of TC entries in each sector.  This needs to be a prime
63   number to work properly, it must be <= 65535 (so that a TT index
64   fits in a UShort, leaving room for 0xFFFF(EC2TTE_DELETED) to denote
65   'deleted') and it is strongly recommended not to change this.
66   65521 is the largest prime <= 65535. */
67#define N_TTES_PER_SECTOR /*10007*/ /*30011*/ /*40009*/ 65521
68
69/* Because each sector contains a hash table of TTEntries, we need to
70   specify the maximum allowable loading, after which the sector is
71   deemed full. */
72#define SECTOR_TT_LIMIT_PERCENT 65
73
74/* The sector is deemed full when this many entries are in it. */
75#define N_TTES_PER_SECTOR_USABLE \
76           ((N_TTES_PER_SECTOR * SECTOR_TT_LIMIT_PERCENT) / 100)
77
78/* Equivalence classes for fast address range deletion.  There are 1 +
79   2^ECLASS_WIDTH bins.  The highest one, ECLASS_MISC, describes an
80   address range which does not fall cleanly within any specific bin.
81   Note that ECLASS_SHIFT + ECLASS_WIDTH must be < 32. */
82#define ECLASS_SHIFT 11
83#define ECLASS_WIDTH 8
84#define ECLASS_MISC  (1 << ECLASS_WIDTH)
85#define ECLASS_N     (1 + ECLASS_MISC)
86
87#define EC2TTE_DELETED  0xFFFF /* 16-bit special value */
88
89
90/*------------------ TYPES ------------------*/
91
92/* In edges ("to-me") in the graph created by chaining. */
93typedef
94   struct {
95      UInt from_sNo;   /* sector number */
96      UInt from_tteNo; /* TTE number in given sector */
97      UInt from_offs;  /* code offset from TCEntry::tcptr where the patch is */
98      Bool to_fastEP;  /* Is the patch to a fast or slow entry point? */
99   }
100   InEdge;
101
102
103/* Out edges ("from-me") in the graph created by chaining. */
104typedef
105   struct {
106      UInt to_sNo;    /* sector number */
107      UInt to_tteNo;  /* TTE number in given sector */
108      UInt from_offs; /* code offset in owning translation where patch is */
109   }
110   OutEdge;
111
112
113#define N_FIXED_IN_EDGE_ARR 3
114typedef
115   struct {
116      UInt     n_fixed; /* 0 .. N_FIXED_IN_EDGE_ARR */
117      InEdge   fixed[N_FIXED_IN_EDGE_ARR];
118      XArray*  var; /* XArray* of InEdgeArr */
119   }
120   InEdgeArr;
121
122#define N_FIXED_OUT_EDGE_ARR 2
123typedef
124   struct {
125      UInt    n_fixed; /* 0 .. N_FIXED_OUT_EDGE_ARR */
126      OutEdge fixed[N_FIXED_OUT_EDGE_ARR];
127      XArray* var; /* XArray* of OutEdgeArr */
128   }
129   OutEdgeArr;
130
131
132/* A translation-table entry.  This indicates precisely which areas of
133   guest code are included in the translation, and contains all other
134   auxiliary info too.  */
135typedef
136   struct {
137      /* Profiling only: the count and weight (arbitrary meaning) for
138         this translation.  Weight is a property of the translation
139         itself and computed once when the translation is created.
140         Count is an entry count for the translation and is
141         incremented by 1 every time the translation is used, if we
142         are profiling. */
143      ULong    count;
144      UShort   weight;
145
146      /* Status of the slot.  Note, we need to be able to do lazy
147         deletion, hence the Deleted state. */
148      enum { InUse, Deleted, Empty } status;
149
150      /* 64-bit aligned pointer to one or more 64-bit words containing
151         the corresponding host code (must be in the same sector!)
152         This is a pointer into the sector's tc (code) area. */
153      ULong* tcptr;
154
155      /* This is the original guest address that purportedly is the
156         entry point of the translation.  You might think that .entry
157         should be the same as .vge->base[0], and most of the time it
158         is.  However, when doing redirections, that is not the case.
159         .vge must always correctly describe the guest code sections
160         from which this translation was made.  However, .entry may or
161         may not be a lie, depending on whether or not we're doing
162         redirection. */
163      Addr64 entry;
164
165      /* This structure describes precisely what ranges of guest code
166         the translation covers, so we can decide whether or not to
167         delete it when translations of a given address range are
168         invalidated. */
169      VexGuestExtents vge;
170
171      /* Address range summary info: these are pointers back to
172         eclass[] entries in the containing Sector.  Those entries in
173         turn point back here -- the two structures are mutually
174         redundant but both necessary to make fast deletions work.
175         The eclass info is similar to, and derived from, this entry's
176         'vge' field, but it is not the same */
177      UShort n_tte2ec;      // # tte2ec pointers (1 to 3)
178      UShort tte2ec_ec[3];  // for each, the eclass #
179      UInt   tte2ec_ix[3];  // and the index within the eclass.
180      // for i in 0 .. n_tte2ec-1
181      //    sec->ec2tte[ tte2ec_ec[i] ][ tte2ec_ix[i] ]
182      // should be the index
183      // of this TTEntry in the containing Sector's tt array.
184
185      /* Admin information for chaining.  'in_edges' is a set of the
186         patch points which jump to this translation -- hence are
187         predecessors in the control flow graph.  'out_edges' points
188         to successors in the control flow graph -- translations to
189         which this one has a patched jump.  In short these are just
190         backwards and forwards edges in the graph of patched-together
191         blocks.  The 'in_edges' contain slightly more info, enough
192         that we can undo the chaining of each mentioned patch point.
193         The 'out_edges' list exists only so that we can visit the
194         'in_edges' entries of all blocks we're patched through to, in
195         order to remove ourselves from then when we're deleted. */
196
197      /* A translation can disappear for two reasons:
198          1. erased (as part of the oldest sector cleanup) when the
199             youngest sector is full.
200          2. discarded due to calls to VG_(discard_translations).
201             VG_(discard_translations) sets the status of the
202             translation to 'Deleted'.
203             A.o., the gdbserver discards one or more translations
204             when a breakpoint is inserted or removed at an Addr,
205             or when single stepping mode is enabled/disabled
206             or when a translation is instrumented for gdbserver
207             (all the target jumps of this translation are
208              invalidated).
209
210         So, it is possible that the translation A to be patched
211         (to obtain a patched jump from A to B) is invalidated
212         after B is translated and before A is patched.
213         In case a translation is erased or discarded, the patching
214         cannot be done.  VG_(tt_tc_do_chaining) and find_TTEntry_from_hcode
215         are checking the 'from' translation still exists before
216         doing the patching.
217
218         Is it safe to erase or discard the current translation E being
219         executed ? Amazing, but yes, it is safe.
220         Here is the explanation:
221
222         The translation E being executed can only be erased if a new
223         translation N is being done. A new translation is done only
224         if the host addr is a not yet patched jump to another
225         translation. In such a case, the guest address of N is
226         assigned to the PC in the VEX state. Control is returned
227         to the scheduler. N will be translated. This can erase the
228         translation E (in case of sector full). VG_(tt_tc_do_chaining)
229         will not do the chaining to a non found translation E.
230         The execution will continue at the current guest PC
231         (i.e. the translation N).
232         => it is safe to erase the current translation being executed.
233
234         The current translation E being executed can also be discarded
235         (e.g. by gdbserver). VG_(discard_translations) will mark
236         this translation E as Deleted, but the translation itself
237         is not erased. In particular, its host code can only
238         be overwritten or erased in case a new translation is done.
239         A new translation will only be done if a not yet translated
240         jump is to be executed. The execution of the Deleted translation
241         E will continue till a non patched jump is encountered.
242         This situation is then similar to the 'erasing' case above :
243         the current translation E can be erased or overwritten, as the
244         execution will continue at the new translation N.
245
246      */
247
248      /* It is possible, although very unlikely, that a block A has
249         more than one patched jump to block B.  This could happen if
250         (eg) A finishes "jcond B; jmp B".
251
252         This means in turn that B's in_edges set can list A more than
253         once (twice in this example).  However, each such entry must
254         have a different from_offs, since a patched jump can only
255         jump to one place at once (it's meaningless for it to have
256         multiple destinations.)  IOW, the successor and predecessor
257         edges in the graph are not uniquely determined by a
258         TTEntry --> TTEntry pair, but rather by a
259         (TTEntry,offset) --> TTEntry triple.
260
261         If A has multiple edges to B then B will mention A multiple
262         times in its in_edges.  To make things simpler, we then
263         require that A mentions B exactly the same number of times in
264         its out_edges.  Furthermore, a matching out-in pair must have
265         the same offset (from_offs).  This facilitates sanity
266         checking, and it facilitates establishing the invariant that
267         a out_edges set may not have duplicates when using the
268         equality defined by (TTEntry,offset).  Hence the out_edges
269         and in_edges sets really do have both have set semantics.
270
271         eg if  A has been patched to B at offsets 42 and 87 (in A)
272         then   A.out_edges = { (B,42), (B,87) }   (in any order)
273         and    B.in_edges  = { (A,42), (A,87) }   (in any order)
274
275         Hence for each node pair P->Q in the graph, there's a 1:1
276         mapping between P.out_edges and Q.in_edges.
277      */
278      InEdgeArr  in_edges;
279      OutEdgeArr out_edges;
280   }
281   TTEntry;
282
283
284/* A structure used for mapping host code addresses back to the
285   relevant TTEntry.  Used when doing chaining, for finding the
286   TTEntry to which some arbitrary patch address belongs. */
287typedef
288   struct {
289      UChar* start;
290      UInt   len;
291      UInt   tteNo;
292   }
293   HostExtent;
294
295/* Finally, a sector itself.  Each sector contains an array of
296   TCEntries, which hold code, and an array of TTEntries, containing
297   all required administrative info.  Profiling is supported using the
298   TTEntry .count and .weight fields, if required.
299
300   If the sector is not in use, all three pointers are NULL and
301   tt_n_inuse is zero.
302*/
303typedef
304   struct {
305      /* The TCEntry area.  Size of this depends on the average
306         translation size.  We try and size it so it becomes full
307         precisely when this sector's translation table (tt) reaches
308         its load limit (SECTOR_TT_LIMIT_PERCENT). */
309      ULong* tc;
310
311      /* The TTEntry array.  This is a fixed size, always containing
312         exactly N_TTES_PER_SECTOR entries. */
313      TTEntry* tt;
314
315      /* This points to the current allocation point in tc. */
316      ULong* tc_next;
317
318      /* The count of tt entries with state InUse. */
319      Int tt_n_inuse;
320
321      /* Expandable arrays of tt indices for each of the ECLASS_N
322         address range equivalence classes.  These hold indices into
323         the containing sector's tt array, which in turn should point
324         back here. */
325      Int     ec2tte_size[ECLASS_N];
326      Int     ec2tte_used[ECLASS_N];
327      UShort* ec2tte[ECLASS_N];
328
329      /* The host extents.  The [start, +len) ranges are constructed
330         in strictly non-overlapping order, so we can binary search
331         them at any time. */
332      XArray* host_extents; /* XArray* of HostExtent */
333   }
334   Sector;
335
336
337/*------------------ DECLS ------------------*/
338
339/* The root data structure is an array of sectors.  The index of the
340   youngest sector is recorded, and new translations are put into that
341   sector.  When it fills up, we move along to the next sector and
342   start to fill that up, wrapping around at the end of the array.
343   That way, once all N_TC_SECTORS have been bought into use for the
344   first time, and are full, we then re-use the oldest sector,
345   endlessly.
346
347   When running, youngest sector should be between >= 0 and <
348   N_TC_SECTORS.  The initial -1 value indicates the TT/TC system is
349   not yet initialised.
350*/
351static Sector sectors[N_SECTORS];
352static Int    youngest_sector = -1;
353
354/* The number of ULongs in each TCEntry area.  This is computed once
355   at startup and does not change. */
356static Int    tc_sector_szQ;
357
358
359/* A list of sector numbers, in the order which they should be
360   searched to find translations.  This is an optimisation to be used
361   when searching for translations and should not affect
362   correctness.  -1 denotes "no entry". */
363static Int sector_search_order[N_SECTORS];
364
365
366/* Fast helper for the TC.  A direct-mapped cache which holds a set of
367   recently used (guest address, host address) pairs.  This array is
368   referred to directly from m_dispatch/dispatch-<platform>.S.
369
370   Entries in tt_fast may refer to any valid TC entry, regardless of
371   which sector it's in.  Consequently we must be very careful to
372   invalidate this cache when TC entries are changed or disappear.
373
374   A special .guest address - TRANSTAB_BOGUS_GUEST_ADDR -- must be
375   pointed at to cause that cache entry to miss.  This relies on the
376   assumption that no guest code actually has that address, hence a
377   value 0x1 seems good.  m_translate gives the client a synthetic
378   segfault if it tries to execute at this address.
379*/
380/*
381typedef
382   struct {
383      Addr guest;
384      Addr host;
385   }
386   FastCacheEntry;
387*/
388/*global*/ __attribute__((aligned(16)))
389           FastCacheEntry VG_(tt_fast)[VG_TT_FAST_SIZE];
390
391/* Make sure we're not used before initialisation. */
392static Bool init_done = False;
393
394
395/*------------------ STATS DECLS ------------------*/
396
397/* Number of fast-cache updates and flushes done. */
398static ULong n_fast_flushes = 0;
399static ULong n_fast_updates = 0;
400
401/* Number of full lookups done. */
402static ULong n_full_lookups = 0;
403static ULong n_lookup_probes = 0;
404
405/* Number/osize/tsize of translations entered; also the number of
406   those for which self-checking was requested. */
407static ULong n_in_count    = 0;
408static ULong n_in_osize    = 0;
409static ULong n_in_tsize    = 0;
410static ULong n_in_sc_count = 0;
411
412/* Number/osize of translations discarded due to lack of space. */
413static ULong n_dump_count = 0;
414static ULong n_dump_osize = 0;
415
416/* Number/osize of translations discarded due to requests to do so. */
417static ULong n_disc_count = 0;
418static ULong n_disc_osize = 0;
419
420
421/*-------------------------------------------------------------*/
422/*--- Misc                                                  ---*/
423/*-------------------------------------------------------------*/
424
425static void* ttaux_malloc ( HChar* tag, SizeT n )
426{
427   return VG_(arena_malloc)(VG_AR_TTAUX, tag, n);
428}
429
430static void ttaux_free ( void* p )
431{
432   VG_(arena_free)(VG_AR_TTAUX, p);
433}
434
435
436/*-------------------------------------------------------------*/
437/*--- Chaining support                                      ---*/
438/*-------------------------------------------------------------*/
439
440static inline TTEntry* index_tte ( UInt sNo, UInt tteNo )
441{
442   vg_assert(sNo < N_SECTORS);
443   vg_assert(tteNo < N_TTES_PER_SECTOR);
444   Sector* s = &sectors[sNo];
445   vg_assert(s->tt);
446   TTEntry* tte = &s->tt[tteNo];
447   vg_assert(tte->status == InUse);
448   return tte;
449}
450
451static void InEdge__init ( InEdge* ie )
452{
453   ie->from_sNo   = -1; /* invalid */
454   ie->from_tteNo = 0;
455   ie->from_offs  = 0;
456   ie->to_fastEP  = False;
457}
458
459static void OutEdge__init ( OutEdge* oe )
460{
461   oe->to_sNo    = -1; /* invalid */
462   oe->to_tteNo  = 0;
463   oe->from_offs = 0;
464}
465
466static void TTEntry__init ( TTEntry* tte )
467{
468   VG_(memset)(tte, 0, sizeof(*tte));
469}
470
471static UWord InEdgeArr__size ( InEdgeArr* iea )
472{
473   if (iea->var) {
474      vg_assert(iea->n_fixed == 0);
475      return VG_(sizeXA)(iea->var);
476   } else {
477      vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
478      return iea->n_fixed;
479   }
480}
481
482static void InEdgeArr__makeEmpty ( InEdgeArr* iea )
483{
484   if (iea->var) {
485      vg_assert(iea->n_fixed == 0);
486      VG_(deleteXA)(iea->var);
487      iea->var = NULL;
488   } else {
489      vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
490      iea->n_fixed = 0;
491   }
492}
493
494static
495InEdge* InEdgeArr__index ( InEdgeArr* iea, UWord i )
496{
497   if (iea->var) {
498      vg_assert(iea->n_fixed == 0);
499      return (InEdge*)VG_(indexXA)(iea->var, i);
500   } else {
501      vg_assert(i < iea->n_fixed);
502      return &iea->fixed[i];
503   }
504}
505
506static
507void InEdgeArr__deleteIndex ( InEdgeArr* iea, UWord i )
508{
509   if (iea->var) {
510      vg_assert(iea->n_fixed == 0);
511      VG_(removeIndexXA)(iea->var, i);
512   } else {
513      vg_assert(i < iea->n_fixed);
514      for (; i+1 < iea->n_fixed; i++) {
515         iea->fixed[i] = iea->fixed[i+1];
516      }
517      iea->n_fixed--;
518   }
519}
520
521static
522void InEdgeArr__add ( InEdgeArr* iea, InEdge* ie )
523{
524   if (iea->var) {
525      vg_assert(iea->n_fixed == 0);
526      VG_(addToXA)(iea->var, ie);
527   } else {
528      vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR);
529      if (iea->n_fixed == N_FIXED_IN_EDGE_ARR) {
530         /* The fixed array is full, so we have to initialise an
531            XArray and copy the fixed array into it. */
532         iea->var = VG_(newXA)(ttaux_malloc, "transtab.IEA__add",
533                               ttaux_free,
534                               sizeof(InEdge));
535         UWord i;
536         for (i = 0; i < iea->n_fixed; i++) {
537            VG_(addToXA)(iea->var, &iea->fixed[i]);
538         }
539         VG_(addToXA)(iea->var, ie);
540         iea->n_fixed = 0;
541      } else {
542         /* Just add to the fixed array. */
543         iea->fixed[iea->n_fixed++] = *ie;
544      }
545   }
546}
547
548static UWord OutEdgeArr__size ( OutEdgeArr* oea )
549{
550   if (oea->var) {
551      vg_assert(oea->n_fixed == 0);
552      return VG_(sizeXA)(oea->var);
553   } else {
554      vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
555      return oea->n_fixed;
556   }
557}
558
559static void OutEdgeArr__makeEmpty ( OutEdgeArr* oea )
560{
561   if (oea->var) {
562      vg_assert(oea->n_fixed == 0);
563      VG_(deleteXA)(oea->var);
564      oea->var = NULL;
565   } else {
566      vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
567      oea->n_fixed = 0;
568   }
569}
570
571static
572OutEdge* OutEdgeArr__index ( OutEdgeArr* oea, UWord i )
573{
574   if (oea->var) {
575      vg_assert(oea->n_fixed == 0);
576      return (OutEdge*)VG_(indexXA)(oea->var, i);
577   } else {
578      vg_assert(i < oea->n_fixed);
579      return &oea->fixed[i];
580   }
581}
582
583static
584void OutEdgeArr__deleteIndex ( OutEdgeArr* oea, UWord i )
585{
586   if (oea->var) {
587      vg_assert(oea->n_fixed == 0);
588      VG_(removeIndexXA)(oea->var, i);
589   } else {
590      vg_assert(i < oea->n_fixed);
591      for (; i+1 < oea->n_fixed; i++) {
592         oea->fixed[i] = oea->fixed[i+1];
593      }
594      oea->n_fixed--;
595   }
596}
597
598static
599void OutEdgeArr__add ( OutEdgeArr* oea, OutEdge* oe )
600{
601   if (oea->var) {
602      vg_assert(oea->n_fixed == 0);
603      VG_(addToXA)(oea->var, oe);
604   } else {
605      vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR);
606      if (oea->n_fixed == N_FIXED_OUT_EDGE_ARR) {
607         /* The fixed array is full, so we have to initialise an
608            XArray and copy the fixed array into it. */
609         oea->var = VG_(newXA)(ttaux_malloc, "transtab.OEA__add",
610                               ttaux_free,
611                               sizeof(OutEdge));
612         UWord i;
613         for (i = 0; i < oea->n_fixed; i++) {
614            VG_(addToXA)(oea->var, &oea->fixed[i]);
615         }
616         VG_(addToXA)(oea->var, oe);
617         oea->n_fixed = 0;
618      } else {
619         /* Just add to the fixed array. */
620         oea->fixed[oea->n_fixed++] = *oe;
621      }
622   }
623}
624
625static
626Int HostExtent__cmpOrd ( void* v1, void* v2 )
627{
628   HostExtent* hx1 = (HostExtent*)v1;
629   HostExtent* hx2 = (HostExtent*)v2;
630   if (hx1->start + hx1->len <= hx2->start) return -1;
631   if (hx2->start + hx2->len <= hx1->start) return 1;
632   return 0; /* partial overlap */
633}
634
635static __attribute__((noinline))
636Bool find_TTEntry_from_hcode( /*OUT*/UInt* from_sNo,
637                              /*OUT*/UInt* from_tteNo,
638                              void* hcode )
639{
640   Int i;
641
642   /* Search order logic copied from VG_(search_transtab). */
643   for (i = 0; i < N_SECTORS; i++) {
644      Int sno = sector_search_order[i];
645      if (UNLIKELY(sno == -1))
646         return False; /* run out of sectors to search */
647
648      Sector* sec = &sectors[sno];
649      XArray* /* of HostExtent */ host_extents = sec->host_extents;
650      vg_assert(host_extents);
651
652      HostExtent key;
653      VG_(memset)(&key, 0, sizeof(key));
654      key.start = hcode;
655      key.len = 1;
656      Word firstW = -1, lastW = -1;
657      Bool found  = VG_(lookupXA_UNSAFE)(
658                       host_extents, &key, &firstW, &lastW,
659                       (Int(*)(void*,void*))HostExtent__cmpOrd
660                    );
661      vg_assert(firstW == lastW); // always true, even if not found
662      if (found) {
663         HostExtent* hx = VG_(indexXA)(host_extents, firstW);
664         UInt tteNo = hx->tteNo;
665         /* Do some additional sanity checks. */
666         vg_assert(tteNo <= N_TTES_PER_SECTOR);
667         /* Entry might have been invalidated. Consider this
668            as not found. */
669         if (sec->tt[tteNo].status == Deleted)
670            return False;
671         vg_assert(sec->tt[tteNo].status == InUse);
672         /* Can only half check that the found TTEntry contains hcode,
673            due to not having a length value for the hcode in the
674            TTEntry. */
675         vg_assert((UChar*)sec->tt[tteNo].tcptr <= (UChar*)hcode);
676         /* Looks plausible */
677         *from_sNo   = sno;
678         *from_tteNo = (UInt)tteNo;
679         return True;
680      }
681   }
682   return False;
683}
684
685
686/* Figure out whether or not hcode is jitted code present in the main
687   code cache (but not in the no-redir cache).  Used for sanity
688   checking. */
689static Bool is_in_the_main_TC ( void* hcode )
690{
691   Int i, sno;
692   for (i = 0; i < N_SECTORS; i++) {
693      sno = sector_search_order[i];
694      if (sno == -1)
695         break; /* run out of sectors to search */
696      if ((UChar*)hcode >= (UChar*)sectors[sno].tc
697          && (UChar*)hcode <= (UChar*)sectors[sno].tc_next
698                              + sizeof(ULong) - 1)
699         return True;
700   }
701   return False;
702}
703
704
705/* Fulfill a chaining request, and record admin info so we
706   can undo it later, if required.
707*/
708void VG_(tt_tc_do_chaining) ( void* from__patch_addr,
709                              UInt  to_sNo,
710                              UInt  to_tteNo,
711                              Bool  to_fastEP )
712{
713   /* Get the CPU info established at startup. */
714   VexArch vex_arch = VexArch_INVALID;
715   VG_(machine_get_VexArchInfo)( &vex_arch, NULL );
716
717   // host_code is where we're patching to.  So it needs to
718   // take into account, whether we're jumping to the slow
719   // or fast entry point.  By definition, the fast entry point
720   // is exactly one event check's worth of code along from
721   // the slow (tcptr) entry point.
722   TTEntry* to_tte    = index_tte(to_sNo, to_tteNo);
723   void*    host_code = ((UChar*)to_tte->tcptr)
724                        + (to_fastEP ? LibVEX_evCheckSzB(vex_arch) : 0);
725
726   // stay sane -- the patch point (dst) is in this sector's code cache
727   vg_assert( (UChar*)host_code >= (UChar*)sectors[to_sNo].tc );
728   vg_assert( (UChar*)host_code <= (UChar*)sectors[to_sNo].tc_next
729                                   + sizeof(ULong) - 1 );
730
731   /* Find the TTEntry for the from__ code.  This isn't simple since
732      we only know the patch address, which is going to be somewhere
733      inside the from_ block. */
734   UInt from_sNo   = (UInt)-1;
735   UInt from_tteNo = (UInt)-1;
736   Bool from_found
737      = find_TTEntry_from_hcode( &from_sNo, &from_tteNo,
738                                 from__patch_addr );
739   if (!from_found) {
740      // The from code might have been discarded due to sector re-use
741      // or marked Deleted due to translation invalidation.
742      // In such a case, don't do the chaining.
743      VG_(debugLog)(1,"transtab",
744                    "host code %p not found (discarded? sector recycled?)"
745                    " => no chaining done\n",
746                    from__patch_addr);
747      return;
748   }
749
750   TTEntry* from_tte = index_tte(from_sNo, from_tteNo);
751
752   /* Get VEX to do the patching itself.  We have to hand it off
753      since it is host-dependent. */
754   VexInvalRange vir
755      = LibVEX_Chain(
756           vex_arch,
757           from__patch_addr,
758           VG_(fnptr_to_fnentry)(
759              to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP)
760                        : &VG_(disp_cp_chain_me_to_slowEP)),
761           (void*)host_code
762        );
763   VG_(invalidate_icache)( (void*)vir.start, vir.len );
764
765   /* Now do the tricky bit -- update the ch_succs and ch_preds info
766      for the two translations involved, so we can undo the chaining
767      later, which we will have to do if the to_ block gets removed
768      for whatever reason. */
769
770   /* This is the new from_ -> to_ link to add. */
771   InEdge ie;
772   InEdge__init(&ie);
773   ie.from_sNo   = from_sNo;
774   ie.from_tteNo = from_tteNo;
775   ie.to_fastEP  = to_fastEP;
776   HWord from_offs = (HWord)( (UChar*)from__patch_addr
777                              - (UChar*)from_tte->tcptr );
778   vg_assert(from_offs < 100000/* let's say */);
779   ie.from_offs  = (UInt)from_offs;
780
781   /* This is the new to_ -> from_ backlink to add. */
782   OutEdge oe;
783   OutEdge__init(&oe);
784   oe.to_sNo    = to_sNo;
785   oe.to_tteNo  = to_tteNo;
786   oe.from_offs = (UInt)from_offs;
787
788   /* Add .. */
789   InEdgeArr__add(&to_tte->in_edges, &ie);
790   OutEdgeArr__add(&from_tte->out_edges, &oe);
791}
792
793
794/* Unchain one patch, as described by the specified InEdge.  For
795   sanity check purposes only (to check that the patched location is
796   as expected) it also requires the fast and slow entry point
797   addresses of the destination block (that is, the block that owns
798   this InEdge). */
799__attribute__((noinline))
800static void unchain_one ( VexArch vex_arch,
801                          InEdge* ie,
802                          void* to_fastEPaddr, void* to_slowEPaddr )
803{
804   vg_assert(ie);
805   TTEntry* tte
806      = index_tte(ie->from_sNo, ie->from_tteNo);
807   UChar* place_to_patch
808      = ((HChar*)tte->tcptr) + ie->from_offs;
809   UChar* disp_cp_chain_me
810      = VG_(fnptr_to_fnentry)(
811           ie->to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP)
812                         : &VG_(disp_cp_chain_me_to_slowEP)
813        );
814   UChar* place_to_jump_to_EXPECTED
815      = ie->to_fastEP ? to_fastEPaddr : to_slowEPaddr;
816
817   // stay sane: both src and dst for this unchaining are
818   // in the main code cache
819   vg_assert( is_in_the_main_TC(place_to_patch) ); // src
820   vg_assert( is_in_the_main_TC(place_to_jump_to_EXPECTED) ); // dst
821   // dst check is ok because LibVEX_UnChain checks that
822   // place_to_jump_to_EXPECTED really is the current dst, and
823   // asserts if it isn't.
824   VexInvalRange vir
825       = LibVEX_UnChain( vex_arch, place_to_patch,
826                         place_to_jump_to_EXPECTED, disp_cp_chain_me );
827   VG_(invalidate_icache)( (void*)vir.start, vir.len );
828}
829
830
831/* The specified block is about to be deleted.  Update the preds and
832   succs of its associated blocks accordingly.  This includes undoing
833   any chained jumps to this block. */
834static
835void unchain_in_preparation_for_deletion ( VexArch vex_arch,
836                                           UInt here_sNo, UInt here_tteNo )
837{
838   if (0)
839      VG_(printf)("QQQ unchain_in_prep %u.%u\n", here_sNo, here_tteNo);
840   UWord    i, j, n, m;
841   Int      evCheckSzB = LibVEX_evCheckSzB(vex_arch);
842   TTEntry* here_tte   = index_tte(here_sNo, here_tteNo);
843   vg_assert(here_tte->status == InUse);
844
845   /* Visit all InEdges owned by here_tte. */
846   n = InEdgeArr__size(&here_tte->in_edges);
847   for (i = 0; i < n; i++) {
848      InEdge* ie = InEdgeArr__index(&here_tte->in_edges, i);
849      // Undo the chaining.
850      UChar* here_slow_EP = (UChar*)here_tte->tcptr;
851      UChar* here_fast_EP = here_slow_EP + evCheckSzB;
852      unchain_one(vex_arch, ie, here_fast_EP, here_slow_EP);
853      // Find the corresponding entry in the "from" node's out_edges,
854      // and remove it.
855      TTEntry* from_tte = index_tte(ie->from_sNo, ie->from_tteNo);
856      m = OutEdgeArr__size(&from_tte->out_edges);
857      vg_assert(m > 0); // it must have at least one entry
858      for (j = 0; j < m; j++) {
859         OutEdge* oe = OutEdgeArr__index(&from_tte->out_edges, j);
860         if (oe->to_sNo == here_sNo && oe->to_tteNo == here_tteNo
861             && oe->from_offs == ie->from_offs)
862           break;
863      }
864      vg_assert(j < m); // "oe must be findable"
865      OutEdgeArr__deleteIndex(&from_tte->out_edges, j);
866   }
867
868   /* Visit all OutEdges owned by here_tte. */
869   n = OutEdgeArr__size(&here_tte->out_edges);
870   for (i = 0; i < n; i++) {
871      OutEdge* oe = OutEdgeArr__index(&here_tte->out_edges, i);
872      // Find the corresponding entry in the "to" node's in_edges,
873      // and remove it.
874      TTEntry* to_tte = index_tte(oe->to_sNo, oe->to_tteNo);
875      m = InEdgeArr__size(&to_tte->in_edges);
876      vg_assert(m > 0); // it must have at least one entry
877      for (j = 0; j < m; j++) {
878         InEdge* ie = InEdgeArr__index(&to_tte->in_edges, j);
879         if (ie->from_sNo == here_sNo && ie->from_tteNo == here_tteNo
880             && ie->from_offs == oe->from_offs)
881           break;
882      }
883      vg_assert(j < m); // "ie must be findable"
884      InEdgeArr__deleteIndex(&to_tte->in_edges, j);
885   }
886
887   InEdgeArr__makeEmpty(&here_tte->in_edges);
888   OutEdgeArr__makeEmpty(&here_tte->out_edges);
889}
890
891
892/*-------------------------------------------------------------*/
893/*--- Address-range equivalence class stuff                 ---*/
894/*-------------------------------------------------------------*/
895
896/* Return equivalence class number for a range. */
897
898static Int range_to_eclass ( Addr64 start, UInt len )
899{
900   UInt mask   = (1 << ECLASS_WIDTH) - 1;
901   UInt lo     = (UInt)start;
902   UInt hi     = lo + len - 1;
903   UInt loBits = (lo >> ECLASS_SHIFT) & mask;
904   UInt hiBits = (hi >> ECLASS_SHIFT) & mask;
905   if (loBits == hiBits) {
906      vg_assert(loBits < ECLASS_N-1);
907      return loBits;
908   } else {
909      return ECLASS_MISC;
910   }
911}
912
913
914/* Calculates the equivalence class numbers for any VexGuestExtent.
915   These are written in *eclasses, which must be big enough to hold 3
916   Ints.  The number written, between 1 and 3, is returned.  The
917   eclasses are presented in order, and any duplicates are removed.
918*/
919
920static
921Int vexGuestExtents_to_eclasses ( /*OUT*/Int* eclasses,
922                                  VexGuestExtents* vge )
923{
924#  define SWAP(_lv1,_lv2) \
925      do { Int t = _lv1; _lv1 = _lv2; _lv2 = t; } while (0)
926
927   Int i, j, n_ec, r;
928
929   vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
930
931   n_ec = 0;
932   for (i = 0; i < vge->n_used; i++) {
933      r = range_to_eclass( vge->base[i], (UInt)vge->len[i] );
934      if (r == ECLASS_MISC)
935         goto bad;
936      /* only add if we haven't already seen it */
937      for (j = 0; j < n_ec; j++)
938         if (eclasses[j] == r)
939            break;
940      if (j == n_ec)
941         eclasses[n_ec++] = r;
942   }
943
944   if (n_ec == 1)
945      return 1;
946
947   if (n_ec == 2) {
948      /* sort */
949      if (eclasses[0] > eclasses[1])
950         SWAP(eclasses[0], eclasses[1]);
951      return 2;
952   }
953
954   if (n_ec == 3) {
955      /* sort */
956      if (eclasses[0] > eclasses[2])
957         SWAP(eclasses[0], eclasses[2]);
958      if (eclasses[0] > eclasses[1])
959         SWAP(eclasses[0], eclasses[1]);
960      if (eclasses[1] > eclasses[2])
961         SWAP(eclasses[1], eclasses[2]);
962      return 3;
963   }
964
965   /* NOTREACHED */
966   vg_assert(0);
967
968  bad:
969   eclasses[0] = ECLASS_MISC;
970   return 1;
971
972#  undef SWAP
973}
974
975
976/* Add tteno to the set of entries listed for equivalence class ec in
977   this sector.  Returns used location in eclass array. */
978
979static
980UInt addEClassNo ( /*MOD*/Sector* sec, Int ec, UShort tteno )
981{
982   Int    old_sz, new_sz, i, r;
983   UShort *old_ar, *new_ar;
984
985   vg_assert(ec >= 0 && ec < ECLASS_N);
986   vg_assert(tteno < N_TTES_PER_SECTOR);
987
988   if (0) VG_(printf)("ec %d  gets %d\n", ec, (Int)tteno);
989
990   if (sec->ec2tte_used[ec] >= sec->ec2tte_size[ec]) {
991
992      vg_assert(sec->ec2tte_used[ec] == sec->ec2tte_size[ec]);
993
994      old_sz = sec->ec2tte_size[ec];
995      old_ar = sec->ec2tte[ec];
996      new_sz = old_sz==0 ? 8 : old_sz<64 ? 2*old_sz : (3*old_sz)/2;
997      new_ar = ttaux_malloc("transtab.aECN.1",
998                            new_sz * sizeof(UShort));
999      for (i = 0; i < old_sz; i++)
1000         new_ar[i] = old_ar[i];
1001      if (old_ar)
1002         ttaux_free(old_ar);
1003      sec->ec2tte_size[ec] = new_sz;
1004      sec->ec2tte[ec] = new_ar;
1005
1006      if (0) VG_(printf)("expand ec %d to %d\n", ec, new_sz);
1007   }
1008
1009   /* Common case */
1010   r = sec->ec2tte_used[ec]++;
1011   vg_assert(r >= 0 && r < sec->ec2tte_size[ec]);
1012   sec->ec2tte[ec][r] = tteno;
1013   return (UInt)r;
1014}
1015
1016
1017/* 'vge' is being added to 'sec' at TT entry 'tteno'.  Add appropriate
1018   eclass entries to 'sec'. */
1019
1020static
1021void upd_eclasses_after_add ( /*MOD*/Sector* sec, Int tteno )
1022{
1023   Int i, r, eclasses[3];
1024   TTEntry* tte;
1025   vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
1026
1027   tte = &sec->tt[tteno];
1028   r = vexGuestExtents_to_eclasses( eclasses, &tte->vge );
1029
1030   vg_assert(r >= 1 && r <= 3);
1031   tte->n_tte2ec = r;
1032
1033   for (i = 0; i < r; i++) {
1034      tte->tte2ec_ec[i] = eclasses[i];
1035      tte->tte2ec_ix[i] = addEClassNo( sec, eclasses[i], (UShort)tteno );
1036   }
1037}
1038
1039
1040/* Check the eclass info in 'sec' to ensure it is consistent.  Returns
1041   True if OK, False if something's not right.  Expensive. */
1042
1043static Bool sanity_check_eclasses_in_sector ( Sector* sec )
1044{
1045#  define BAD(_str) do { whassup = (_str); goto bad; } while (0)
1046
1047   HChar*   whassup = NULL;
1048   Int      i, j, k, n, ec_num, ec_idx;
1049   TTEntry* tte;
1050   UShort   tteno;
1051   ULong*   tce;
1052
1053   /* Basic checks on this sector */
1054   if (sec->tt_n_inuse < 0 || sec->tt_n_inuse > N_TTES_PER_SECTOR_USABLE)
1055      BAD("invalid sec->tt_n_inuse");
1056   tce = sec->tc_next;
1057   if (tce < &sec->tc[0] || tce > &sec->tc[tc_sector_szQ])
1058      BAD("sec->tc_next points outside tc");
1059
1060   /* For each eclass ... */
1061   for (i = 0; i < ECLASS_N; i++) {
1062      if (sec->ec2tte_size[i] == 0 && sec->ec2tte[i] != NULL)
1063         BAD("ec2tte_size/ec2tte mismatch(1)");
1064      if (sec->ec2tte_size[i] != 0 && sec->ec2tte[i] == NULL)
1065         BAD("ec2tte_size/ec2tte mismatch(2)");
1066      if (sec->ec2tte_used[i] < 0
1067          || sec->ec2tte_used[i] > sec->ec2tte_size[i])
1068         BAD("implausible ec2tte_used");
1069      if (sec->ec2tte_used[i] == 0)
1070         continue;
1071
1072      /* For each tt reference in each eclass .. ensure the reference
1073         is to a valid tt entry, and that the entry's address ranges
1074         really include this eclass. */
1075
1076      for (j = 0; j < sec->ec2tte_used[i]; j++) {
1077         tteno = sec->ec2tte[i][j];
1078         if (tteno == EC2TTE_DELETED)
1079            continue;
1080         if (tteno >= N_TTES_PER_SECTOR)
1081            BAD("implausible tteno");
1082         tte = &sec->tt[tteno];
1083         if (tte->status != InUse)
1084            BAD("tteno points to non-inuse tte");
1085         if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3)
1086            BAD("tte->n_tte2ec out of range");
1087         /* Exactly least one of tte->eclasses[0 .. tte->n_eclasses-1]
1088            must equal i.  Inspect tte's eclass info. */
1089         n = 0;
1090         for (k = 0; k < tte->n_tte2ec; k++) {
1091            if (k < tte->n_tte2ec-1
1092                && tte->tte2ec_ec[k] >= tte->tte2ec_ec[k+1])
1093               BAD("tte->tte2ec_ec[..] out of order");
1094            ec_num = tte->tte2ec_ec[k];
1095            if (ec_num < 0 || ec_num >= ECLASS_N)
1096               BAD("tte->tte2ec_ec[..] out of range");
1097            if (ec_num != i)
1098               continue;
1099            ec_idx = tte->tte2ec_ix[k];
1100            if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[i])
1101               BAD("tte->tte2ec_ix[..] out of range");
1102            if (ec_idx == j)
1103               n++;
1104         }
1105         if (n != 1)
1106            BAD("tteno does not point back at eclass");
1107      }
1108   }
1109
1110   /* That establishes that for each forward pointer from TTEntrys
1111      there is a corresponding backward pointer from the eclass[]
1112      arrays.  However, it doesn't rule out the possibility of other,
1113      bogus pointers in the eclass[] arrays.  So do those similarly:
1114      scan through them and check the TTEntryies they point at point
1115      back. */
1116
1117   for (i = 0; i < N_TTES_PER_SECTOR_USABLE; i++) {
1118
1119      tte = &sec->tt[i];
1120      if (tte->status == Empty || tte->status == Deleted) {
1121         if (tte->n_tte2ec != 0)
1122            BAD("tte->n_eclasses nonzero for unused tte");
1123         continue;
1124      }
1125
1126      vg_assert(tte->status == InUse);
1127
1128      if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3)
1129         BAD("tte->n_eclasses out of range(2)");
1130
1131      for (j = 0; j < tte->n_tte2ec; j++) {
1132         ec_num = tte->tte2ec_ec[j];
1133         if (ec_num < 0 || ec_num >= ECLASS_N)
1134            BAD("tte->eclass[..] out of range");
1135         ec_idx = tte->tte2ec_ix[j];
1136         if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[ec_num])
1137            BAD("tte->ec_idx[..] out of range(2)");
1138         if (sec->ec2tte[ec_num][ec_idx] != i)
1139            BAD("ec2tte does not point back to tte");
1140      }
1141   }
1142
1143   return True;
1144
1145  bad:
1146   if (whassup)
1147      VG_(debugLog)(0, "transtab", "eclass sanity fail: %s\n", whassup);
1148
1149#  if 0
1150   VG_(printf)("eclass = %d\n", i);
1151   VG_(printf)("tteno = %d\n", (Int)tteno);
1152   switch (tte->status) {
1153      case InUse:   VG_(printf)("InUse\n"); break;
1154      case Deleted: VG_(printf)("Deleted\n"); break;
1155      case Empty:   VG_(printf)("Empty\n"); break;
1156   }
1157   if (tte->status != Empty) {
1158      for (k = 0; k < tte->vge.n_used; k++)
1159         VG_(printf)("0x%llx %d\n", tte->vge.base[k],
1160                                    (Int)tte->vge.len[k]);
1161   }
1162#  endif
1163
1164   return False;
1165
1166#  undef BAD
1167}
1168
1169
1170/* Sanity check absolutely everything.  True == check passed. */
1171
1172/* forwards */
1173static Bool sanity_check_redir_tt_tc ( void );
1174
1175static Bool sanity_check_sector_search_order ( void )
1176{
1177   Int i, j, nListed;
1178   /* assert the array is the right size */
1179   vg_assert(N_SECTORS == (sizeof(sector_search_order)
1180                           / sizeof(sector_search_order[0])));
1181   /* Check it's of the form  valid_sector_numbers ++ [-1, -1, ..] */
1182   for (i = 0; i < N_SECTORS; i++) {
1183      if (sector_search_order[i] < 0 || sector_search_order[i] >= N_SECTORS)
1184         break;
1185   }
1186   nListed = i;
1187   for (/* */; i < N_SECTORS; i++) {
1188      if (sector_search_order[i] != -1)
1189         break;
1190   }
1191   if (i != N_SECTORS)
1192      return False;
1193   /* Check each sector number only appears once */
1194   for (i = 0; i < N_SECTORS; i++) {
1195      if (sector_search_order[i] == -1)
1196         continue;
1197      for (j = i+1; j < N_SECTORS; j++) {
1198         if (sector_search_order[j] == sector_search_order[i])
1199            return False;
1200      }
1201   }
1202   /* Check that the number of listed sectors equals the number
1203      in use, by counting nListed back down. */
1204   for (i = 0; i < N_SECTORS; i++) {
1205      if (sectors[i].tc != NULL)
1206         nListed--;
1207   }
1208   if (nListed != 0)
1209      return False;
1210   return True;
1211}
1212
1213static Bool sanity_check_all_sectors ( void )
1214{
1215   Int     sno;
1216   Bool    sane;
1217   Sector* sec;
1218   for (sno = 0; sno < N_SECTORS; sno++) {
1219      sec = &sectors[sno];
1220      if (sec->tc == NULL)
1221         continue;
1222      sane = sanity_check_eclasses_in_sector( sec );
1223      if (!sane)
1224         return False;
1225   }
1226   if ( !sanity_check_redir_tt_tc() )
1227      return False;
1228   if ( !sanity_check_sector_search_order() )
1229      return False;
1230   return True;
1231}
1232
1233
1234
1235/*-------------------------------------------------------------*/
1236/*--- Add/find translations                                 ---*/
1237/*-------------------------------------------------------------*/
1238
1239static UInt vge_osize ( VexGuestExtents* vge )
1240{
1241   UInt i, n = 0;
1242   for (i = 0; i < vge->n_used; i++)
1243      n += (UInt)vge->len[i];
1244   return n;
1245}
1246
1247static Bool isValidSector ( Int sector )
1248{
1249   if (sector < 0 || sector >= N_SECTORS)
1250      return False;
1251   return True;
1252}
1253
1254static inline UInt HASH_TT ( Addr64 key )
1255{
1256   UInt kHi = (UInt)(key >> 32);
1257   UInt kLo = (UInt)key;
1258   UInt k32 = kHi ^ kLo;
1259   UInt ror = 7;
1260   if (ror > 0)
1261      k32 = (k32 >> ror) | (k32 << (32-ror));
1262   return k32 % N_TTES_PER_SECTOR;
1263}
1264
1265static void setFastCacheEntry ( Addr64 key, ULong* tcptr )
1266{
1267   UInt cno = (UInt)VG_TT_FAST_HASH(key);
1268   VG_(tt_fast)[cno].guest = (Addr)key;
1269   VG_(tt_fast)[cno].host  = (Addr)tcptr;
1270   n_fast_updates++;
1271   /* This shouldn't fail.  It should be assured by m_translate
1272      which should reject any attempt to make translation of code
1273      starting at TRANSTAB_BOGUS_GUEST_ADDR. */
1274   vg_assert(VG_(tt_fast)[cno].guest != TRANSTAB_BOGUS_GUEST_ADDR);
1275}
1276
1277/* Invalidate the fast cache VG_(tt_fast). */
1278static void invalidateFastCache ( void )
1279{
1280   UInt j;
1281   /* This loop is popular enough to make it worth unrolling a
1282      bit, at least on ppc32. */
1283   vg_assert(VG_TT_FAST_SIZE > 0 && (VG_TT_FAST_SIZE % 4) == 0);
1284   for (j = 0; j < VG_TT_FAST_SIZE; j += 4) {
1285      VG_(tt_fast)[j+0].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1286      VG_(tt_fast)[j+1].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1287      VG_(tt_fast)[j+2].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1288      VG_(tt_fast)[j+3].guest = TRANSTAB_BOGUS_GUEST_ADDR;
1289   }
1290
1291   vg_assert(j == VG_TT_FAST_SIZE);
1292   n_fast_flushes++;
1293}
1294
1295static void initialiseSector ( Int sno )
1296{
1297   Int     i;
1298   SysRes  sres;
1299   Sector* sec;
1300   vg_assert(isValidSector(sno));
1301
1302   { Bool sane = sanity_check_sector_search_order();
1303     vg_assert(sane);
1304   }
1305   sec = &sectors[sno];
1306
1307   if (sec->tc == NULL) {
1308
1309      /* Sector has never been used before.  Need to allocate tt and
1310         tc. */
1311      vg_assert(sec->tt == NULL);
1312      vg_assert(sec->tc_next == NULL);
1313      vg_assert(sec->tt_n_inuse == 0);
1314      for (i = 0; i < ECLASS_N; i++) {
1315         vg_assert(sec->ec2tte_size[i] == 0);
1316         vg_assert(sec->ec2tte_used[i] == 0);
1317         vg_assert(sec->ec2tte[i] == NULL);
1318      }
1319      vg_assert(sec->host_extents == NULL);
1320
1321      VG_(debugLog)(1,"transtab", "allocate sector %d\n", sno);
1322
1323      sres = VG_(am_mmap_anon_float_valgrind)( 8 * tc_sector_szQ );
1324      if (sr_isError(sres)) {
1325         VG_(out_of_memory_NORETURN)("initialiseSector(TC)",
1326                                     8 * tc_sector_szQ );
1327	 /*NOTREACHED*/
1328      }
1329      sec->tc = (ULong*)(AddrH)sr_Res(sres);
1330
1331      sres = VG_(am_mmap_anon_float_valgrind)
1332                ( N_TTES_PER_SECTOR * sizeof(TTEntry) );
1333      if (sr_isError(sres)) {
1334         VG_(out_of_memory_NORETURN)("initialiseSector(TT)",
1335                                     N_TTES_PER_SECTOR * sizeof(TTEntry) );
1336	 /*NOTREACHED*/
1337      }
1338      sec->tt = (TTEntry*)(AddrH)sr_Res(sres);
1339
1340      for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1341         sec->tt[i].status   = Empty;
1342         sec->tt[i].n_tte2ec = 0;
1343      }
1344
1345      /* Set up the host_extents array. */
1346      sec->host_extents
1347         = VG_(newXA)(ttaux_malloc, "transtab.initialiseSector(host_extents)",
1348                      ttaux_free,
1349                      sizeof(HostExtent));
1350
1351      /* Add an entry in the sector_search_order */
1352      for (i = 0; i < N_SECTORS; i++) {
1353         if (sector_search_order[i] == -1)
1354            break;
1355      }
1356      vg_assert(i >= 0 && i < N_SECTORS);
1357      sector_search_order[i] = sno;
1358
1359      if (VG_(clo_verbosity) > 2)
1360         VG_(message)(Vg_DebugMsg, "TT/TC: initialise sector %d\n", sno);
1361
1362   } else {
1363
1364      /* Sector has been used before.  Dump the old contents. */
1365      VG_(debugLog)(1,"transtab", "recycle sector %d\n", sno);
1366      vg_assert(sec->tt != NULL);
1367      vg_assert(sec->tc_next != NULL);
1368      n_dump_count += sec->tt_n_inuse;
1369
1370      VexArch vex_arch = VexArch_INVALID;
1371      VG_(machine_get_VexArchInfo)( &vex_arch, NULL );
1372
1373      /* Visit each just-about-to-be-abandoned translation. */
1374      if (0) VG_(printf)("QQQ unlink-entire-sector: %d START\n", sno);
1375      for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1376         if (sec->tt[i].status == InUse) {
1377            vg_assert(sec->tt[i].n_tte2ec >= 1);
1378            vg_assert(sec->tt[i].n_tte2ec <= 3);
1379            n_dump_osize += vge_osize(&sec->tt[i].vge);
1380            /* Tell the tool too. */
1381            if (VG_(needs).superblock_discards) {
1382               VG_TDICT_CALL( tool_discard_superblock_info,
1383                              sec->tt[i].entry,
1384                              sec->tt[i].vge );
1385            }
1386            unchain_in_preparation_for_deletion(vex_arch, sno, i);
1387         } else {
1388            vg_assert(sec->tt[i].n_tte2ec == 0);
1389         }
1390         sec->tt[i].status   = Empty;
1391         sec->tt[i].n_tte2ec = 0;
1392      }
1393      if (0) VG_(printf)("QQQ unlink-entire-sector: %d END\n", sno);
1394
1395      /* Free up the eclass structures. */
1396      for (i = 0; i < ECLASS_N; i++) {
1397         if (sec->ec2tte_size[i] == 0) {
1398            vg_assert(sec->ec2tte_used[i] == 0);
1399            vg_assert(sec->ec2tte[i] == NULL);
1400         } else {
1401            vg_assert(sec->ec2tte[i] != NULL);
1402            ttaux_free(sec->ec2tte[i]);
1403            sec->ec2tte[i] = NULL;
1404            sec->ec2tte_size[i] = 0;
1405            sec->ec2tte_used[i] = 0;
1406         }
1407      }
1408
1409      /* Empty out the host extents array. */
1410      vg_assert(sec->host_extents != NULL);
1411      VG_(dropTailXA)(sec->host_extents, VG_(sizeXA)(sec->host_extents));
1412      vg_assert(VG_(sizeXA)(sec->host_extents) == 0);
1413
1414      /* Sanity check: ensure it is already in
1415         sector_search_order[]. */
1416      for (i = 0; i < N_SECTORS; i++) {
1417         if (sector_search_order[i] == sno)
1418            break;
1419      }
1420      vg_assert(i >= 0 && i < N_SECTORS);
1421
1422      if (VG_(clo_verbosity) > 2)
1423         VG_(message)(Vg_DebugMsg, "TT/TC: recycle sector %d\n", sno);
1424   }
1425
1426   sec->tc_next = sec->tc;
1427   sec->tt_n_inuse = 0;
1428
1429   invalidateFastCache();
1430
1431   { Bool sane = sanity_check_sector_search_order();
1432     vg_assert(sane);
1433   }
1434}
1435
1436
1437/* Add a translation of vge to TT/TC.  The translation is temporarily
1438   in code[0 .. code_len-1].
1439
1440   pre: youngest_sector points to a valid (although possibly full)
1441   sector.
1442*/
1443void VG_(add_to_transtab)( VexGuestExtents* vge,
1444                           Addr64           entry,
1445                           AddrH            code,
1446                           UInt             code_len,
1447                           Bool             is_self_checking,
1448                           Int              offs_profInc,
1449                           UInt             n_guest_instrs,
1450                           VexArch          arch_host )
1451{
1452   Int    tcAvailQ, reqdQ, y, i;
1453   ULong  *tcptr, *tcptr2;
1454   UChar* srcP;
1455   UChar* dstP;
1456
1457   vg_assert(init_done);
1458   vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
1459
1460   /* 60000: should agree with N_TMPBUF in m_translate.c. */
1461   vg_assert(code_len > 0 && code_len < 60000);
1462
1463   /* Generally stay sane */
1464   vg_assert(n_guest_instrs < 200); /* it can be zero, tho */
1465
1466   if (0)
1467      VG_(printf)("add_to_transtab(entry = 0x%llx, len = %d)\n",
1468                  entry, code_len);
1469
1470   n_in_count++;
1471   n_in_tsize += code_len;
1472   n_in_osize += vge_osize(vge);
1473   if (is_self_checking)
1474      n_in_sc_count++;
1475
1476   y = youngest_sector;
1477   vg_assert(isValidSector(y));
1478
1479   if (sectors[y].tc == NULL)
1480      initialiseSector(y);
1481
1482   /* Try putting the translation in this sector. */
1483   reqdQ = (code_len + 7) >> 3;
1484
1485   /* Will it fit in tc? */
1486   tcAvailQ = ((ULong*)(&sectors[y].tc[tc_sector_szQ]))
1487              - ((ULong*)(sectors[y].tc_next));
1488   vg_assert(tcAvailQ >= 0);
1489   vg_assert(tcAvailQ <= tc_sector_szQ);
1490
1491   if (tcAvailQ < reqdQ
1492       || sectors[y].tt_n_inuse >= N_TTES_PER_SECTOR_USABLE) {
1493      /* No.  So move on to the next sector.  Either it's never been
1494         used before, in which case it will get its tt/tc allocated
1495         now, or it has been used before, in which case it is set to be
1496         empty, hence throwing out the oldest sector. */
1497      vg_assert(tc_sector_szQ > 0);
1498      VG_(debugLog)(1,"transtab",
1499                      "declare sector %d full "
1500                      "(TT loading %2d%%, TC loading %2d%%)\n",
1501                      y,
1502                      (100 * sectors[y].tt_n_inuse)
1503                         / N_TTES_PER_SECTOR,
1504                      (100 * (tc_sector_szQ - tcAvailQ))
1505                         / tc_sector_szQ);
1506      youngest_sector++;
1507      if (youngest_sector >= N_SECTORS)
1508         youngest_sector = 0;
1509      y = youngest_sector;
1510      initialiseSector(y);
1511   }
1512
1513   /* Be sure ... */
1514   tcAvailQ = ((ULong*)(&sectors[y].tc[tc_sector_szQ]))
1515              - ((ULong*)(sectors[y].tc_next));
1516   vg_assert(tcAvailQ >= 0);
1517   vg_assert(tcAvailQ <= tc_sector_szQ);
1518   vg_assert(tcAvailQ >= reqdQ);
1519   vg_assert(sectors[y].tt_n_inuse < N_TTES_PER_SECTOR_USABLE);
1520   vg_assert(sectors[y].tt_n_inuse >= 0);
1521
1522   /* Copy into tc. */
1523   tcptr = sectors[y].tc_next;
1524   vg_assert(tcptr >= &sectors[y].tc[0]);
1525   vg_assert(tcptr <= &sectors[y].tc[tc_sector_szQ]);
1526
1527   dstP = (UChar*)tcptr;
1528   srcP = (UChar*)code;
1529   VG_(memcpy)(dstP, srcP, code_len);
1530   sectors[y].tc_next += reqdQ;
1531   sectors[y].tt_n_inuse++;
1532
1533   /* more paranoia */
1534   tcptr2 = sectors[y].tc_next;
1535   vg_assert(tcptr2 >= &sectors[y].tc[0]);
1536   vg_assert(tcptr2 <= &sectors[y].tc[tc_sector_szQ]);
1537
1538   /* Find an empty tt slot, and use it.  There must be such a slot
1539      since tt is never allowed to get completely full. */
1540   i = HASH_TT(entry);
1541   vg_assert(i >= 0 && i < N_TTES_PER_SECTOR);
1542   while (True) {
1543      if (sectors[y].tt[i].status == Empty
1544          || sectors[y].tt[i].status == Deleted)
1545         break;
1546      i++;
1547      if (i >= N_TTES_PER_SECTOR)
1548         i = 0;
1549   }
1550
1551   TTEntry__init(&sectors[y].tt[i]);
1552   sectors[y].tt[i].status = InUse;
1553   sectors[y].tt[i].tcptr  = tcptr;
1554   sectors[y].tt[i].count  = 0;
1555   sectors[y].tt[i].weight = n_guest_instrs == 0 ? 1 : n_guest_instrs;
1556   sectors[y].tt[i].vge    = *vge;
1557   sectors[y].tt[i].entry  = entry;
1558
1559   /* Patch in the profile counter location, if necessary. */
1560   if (offs_profInc != -1) {
1561      vg_assert(offs_profInc >= 0 && offs_profInc < code_len);
1562      VexInvalRange vir
1563         = LibVEX_PatchProfInc( arch_host,
1564                                dstP + offs_profInc,
1565                                &sectors[y].tt[i].count );
1566      VG_(invalidate_icache)( (void*)vir.start, vir.len );
1567   }
1568
1569   VG_(invalidate_icache)( dstP, code_len );
1570
1571   /* Add this entry to the host_extents map, checking that we're
1572      adding in order. */
1573   { HostExtent hx;
1574     hx.start = (UChar*)tcptr;
1575     hx.len   = code_len;
1576     hx.tteNo = i;
1577     vg_assert(hx.len > 0); /* bsearch fails w/ zero length entries */
1578     XArray* hx_array = sectors[y].host_extents;
1579     vg_assert(hx_array);
1580     Word n = VG_(sizeXA)(hx_array);
1581     if (n > 0) {
1582        HostExtent* hx_prev = (HostExtent*)VG_(indexXA)(hx_array, n-1);
1583        vg_assert(hx_prev->start + hx_prev->len <= hx.start);
1584     }
1585     VG_(addToXA)(hx_array, &hx);
1586   }
1587
1588   /* Update the fast-cache. */
1589   setFastCacheEntry( entry, tcptr );
1590
1591   /* Note the eclass numbers for this translation. */
1592   upd_eclasses_after_add( &sectors[y], i );
1593}
1594
1595
1596/* Search for the translation of the given guest address.  If
1597   requested, a successful search can also cause the fast-caches to be
1598   updated.
1599*/
1600Bool VG_(search_transtab) ( /*OUT*/AddrH* res_hcode,
1601                            /*OUT*/UInt*  res_sNo,
1602                            /*OUT*/UInt*  res_tteNo,
1603                            Addr64        guest_addr,
1604                            Bool          upd_cache )
1605{
1606   Int i, j, k, kstart, sno;
1607
1608   vg_assert(init_done);
1609   /* Find the initial probe point just once.  It will be the same in
1610      all sectors and avoids multiple expensive % operations. */
1611   n_full_lookups++;
1612   k      = -1;
1613   kstart = HASH_TT(guest_addr);
1614   vg_assert(kstart >= 0 && kstart < N_TTES_PER_SECTOR);
1615
1616   /* Search in all the sectors,using sector_search_order[] as a
1617      heuristic guide as to what order to visit the sectors. */
1618   for (i = 0; i < N_SECTORS; i++) {
1619
1620      sno = sector_search_order[i];
1621      if (UNLIKELY(sno == -1))
1622         return False; /* run out of sectors to search */
1623
1624      k = kstart;
1625      for (j = 0; j < N_TTES_PER_SECTOR; j++) {
1626         n_lookup_probes++;
1627         if (sectors[sno].tt[k].status == InUse
1628             && sectors[sno].tt[k].entry == guest_addr) {
1629            /* found it */
1630            if (upd_cache)
1631               setFastCacheEntry(
1632                  guest_addr, sectors[sno].tt[k].tcptr );
1633            if (res_hcode)
1634               *res_hcode = (AddrH)sectors[sno].tt[k].tcptr;
1635            if (res_sNo)
1636               *res_sNo = sno;
1637            if (res_tteNo)
1638               *res_tteNo = k;
1639            /* pull this one one step closer to the front.  For large
1640               apps this more or less halves the number of required
1641               probes. */
1642            if (i > 0) {
1643               Int tmp = sector_search_order[i-1];
1644               sector_search_order[i-1] = sector_search_order[i];
1645               sector_search_order[i] = tmp;
1646            }
1647            return True;
1648         }
1649         if (sectors[sno].tt[k].status == Empty)
1650            break; /* not found in this sector */
1651         k++;
1652         if (k == N_TTES_PER_SECTOR)
1653            k = 0;
1654      }
1655
1656      /* If we fall off the end, all entries are InUse and not
1657         matching, or Deleted.  In any case we did not find it in this
1658         sector. */
1659   }
1660
1661   /* Not found in any sector. */
1662   return False;
1663}
1664
1665
1666/*-------------------------------------------------------------*/
1667/*--- Delete translations.                                  ---*/
1668/*-------------------------------------------------------------*/
1669
1670/* forward */
1671static void unredir_discard_translations( Addr64, ULong );
1672
1673/* Stuff for deleting translations which intersect with a given
1674   address range.  Unfortunately, to make this run at a reasonable
1675   speed, it is complex. */
1676
1677static inline
1678Bool overlap1 ( Addr64 s1, ULong r1, Addr64 s2, ULong r2 )
1679{
1680   Addr64 e1 = s1 + r1 - 1ULL;
1681   Addr64 e2 = s2 + r2 - 1ULL;
1682   if (e1 < s2 || e2 < s1)
1683      return False;
1684   return True;
1685}
1686
1687static inline
1688Bool overlaps ( Addr64 start, ULong range, VexGuestExtents* vge )
1689{
1690   if (overlap1(start, range, vge->base[0], (UInt)vge->len[0]))
1691      return True;
1692   if (vge->n_used < 2)
1693      return False;
1694   if (overlap1(start, range, vge->base[1], (UInt)vge->len[1]))
1695      return True;
1696   if (vge->n_used < 3)
1697      return False;
1698   if (overlap1(start, range, vge->base[2], (UInt)vge->len[2]))
1699      return True;
1700   return False;
1701}
1702
1703
1704/* Delete a tt entry, and update all the eclass data accordingly. */
1705
1706static void delete_tte ( /*MOD*/Sector* sec, UInt secNo, Int tteno,
1707                         VexArch vex_arch )
1708{
1709   Int      i, ec_num, ec_idx;
1710   TTEntry* tte;
1711
1712   /* sec and secNo are mutually redundant; cross-check. */
1713   vg_assert(sec == &sectors[secNo]);
1714
1715   vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
1716   tte = &sec->tt[tteno];
1717   vg_assert(tte->status == InUse);
1718   vg_assert(tte->n_tte2ec >= 1 && tte->n_tte2ec <= 3);
1719
1720   /* Unchain .. */
1721   unchain_in_preparation_for_deletion(vex_arch, secNo, tteno);
1722
1723   /* Deal with the ec-to-tte links first. */
1724   for (i = 0; i < tte->n_tte2ec; i++) {
1725      ec_num = (Int)tte->tte2ec_ec[i];
1726      ec_idx = tte->tte2ec_ix[i];
1727      vg_assert(ec_num >= 0 && ec_num < ECLASS_N);
1728      vg_assert(ec_idx >= 0);
1729      vg_assert(ec_idx < sec->ec2tte_used[ec_num]);
1730      /* Assert that the two links point at each other. */
1731      vg_assert(sec->ec2tte[ec_num][ec_idx] == (UShort)tteno);
1732      /* "delete" the pointer back to here. */
1733      sec->ec2tte[ec_num][ec_idx] = EC2TTE_DELETED;
1734   }
1735
1736   /* Now fix up this TTEntry. */
1737   tte->status   = Deleted;
1738   tte->n_tte2ec = 0;
1739
1740   /* Stats .. */
1741   sec->tt_n_inuse--;
1742   n_disc_count++;
1743   n_disc_osize += vge_osize(&tte->vge);
1744
1745   /* Tell the tool too. */
1746   if (VG_(needs).superblock_discards) {
1747      VG_TDICT_CALL( tool_discard_superblock_info,
1748                     tte->entry,
1749                     tte->vge );
1750   }
1751}
1752
1753
1754/* Delete translations from sec which intersect specified range, but
1755   only consider translations in the specified eclass. */
1756
1757static
1758Bool delete_translations_in_sector_eclass ( /*MOD*/Sector* sec, UInt secNo,
1759                                            Addr64 guest_start, ULong range,
1760                                            Int ec,
1761                                            VexArch vex_arch )
1762{
1763   Int      i;
1764   UShort   tteno;
1765   Bool     anyDeld = False;
1766   TTEntry* tte;
1767
1768   vg_assert(ec >= 0 && ec < ECLASS_N);
1769
1770   for (i = 0; i < sec->ec2tte_used[ec]; i++) {
1771
1772      tteno = sec->ec2tte[ec][i];
1773      if (tteno == EC2TTE_DELETED) {
1774         /* already deleted */
1775         continue;
1776      }
1777
1778      vg_assert(tteno < N_TTES_PER_SECTOR);
1779
1780      tte = &sec->tt[tteno];
1781      vg_assert(tte->status == InUse);
1782
1783      if (overlaps( guest_start, range, &tte->vge )) {
1784         anyDeld = True;
1785         delete_tte( sec, secNo, (Int)tteno, vex_arch );
1786      }
1787
1788   }
1789
1790   return anyDeld;
1791}
1792
1793
1794/* Delete translations from sec which intersect specified range, the
1795   slow way, by inspecting all translations in sec. */
1796
1797static
1798Bool delete_translations_in_sector ( /*MOD*/Sector* sec, UInt secNo,
1799                                     Addr64 guest_start, ULong range,
1800                                     VexArch vex_arch )
1801{
1802   Int  i;
1803   Bool anyDeld = False;
1804
1805   for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1806      if (sec->tt[i].status == InUse
1807          && overlaps( guest_start, range, &sec->tt[i].vge )) {
1808         anyDeld = True;
1809         delete_tte( sec, secNo, i, vex_arch );
1810      }
1811   }
1812
1813   return anyDeld;
1814}
1815
1816
1817void VG_(discard_translations) ( Addr64 guest_start, ULong range,
1818                                 HChar* who )
1819{
1820   Sector* sec;
1821   Int     sno, ec;
1822   Bool    anyDeleted = False;
1823
1824   vg_assert(init_done);
1825
1826   VG_(debugLog)(2, "transtab",
1827                    "discard_translations(0x%llx, %lld) req by %s\n",
1828                    guest_start, range, who );
1829
1830   /* Pre-deletion sanity check */
1831   if (VG_(clo_sanity_level >= 4)) {
1832      Bool sane = sanity_check_all_sectors();
1833      vg_assert(sane);
1834   }
1835
1836   if (range == 0)
1837      return;
1838
1839   VexArch vex_arch = VexArch_INVALID;
1840   VG_(machine_get_VexArchInfo)( &vex_arch, NULL );
1841
1842   /* There are two different ways to do this.
1843
1844      If the range fits within a single address-range equivalence
1845      class, as will be the case for a cache line sized invalidation,
1846      then we only have to inspect the set of translations listed in
1847      that equivalence class, and also in the "sin-bin" equivalence
1848      class ECLASS_MISC.
1849
1850      Otherwise, the invalidation is of a larger range and probably
1851      results from munmap.  In this case it's (probably!) faster just
1852      to inspect all translations, dump those we don't want, and
1853      regenerate the equivalence class information (since modifying it
1854      in-situ is even more expensive).
1855   */
1856
1857   /* First off, figure out if the range falls within a single class,
1858      and if so which one. */
1859
1860   ec = ECLASS_MISC;
1861   if (range < (1ULL << ECLASS_SHIFT))
1862      ec = range_to_eclass( guest_start, (UInt)range );
1863
1864   /* if ec is ECLASS_MISC then we aren't looking at just a single
1865      class, so use the slow scheme.  Else use the fast scheme,
1866      examining 'ec' and ECLASS_MISC. */
1867
1868   if (ec != ECLASS_MISC) {
1869
1870      VG_(debugLog)(2, "transtab",
1871                       "                    FAST, ec = %d\n", ec);
1872
1873      /* Fast scheme */
1874      vg_assert(ec >= 0 && ec < ECLASS_MISC);
1875
1876      for (sno = 0; sno < N_SECTORS; sno++) {
1877         sec = &sectors[sno];
1878         if (sec->tc == NULL)
1879            continue;
1880         anyDeleted |= delete_translations_in_sector_eclass(
1881                          sec, sno, guest_start, range, ec,
1882                          vex_arch
1883                       );
1884         anyDeleted |= delete_translations_in_sector_eclass(
1885                          sec, sno, guest_start, range, ECLASS_MISC,
1886                          vex_arch
1887                       );
1888      }
1889
1890   } else {
1891
1892      /* slow scheme */
1893
1894      VG_(debugLog)(2, "transtab",
1895                       "                    SLOW, ec = %d\n", ec);
1896
1897      for (sno = 0; sno < N_SECTORS; sno++) {
1898         sec = &sectors[sno];
1899         if (sec->tc == NULL)
1900            continue;
1901         anyDeleted |= delete_translations_in_sector(
1902                          sec, sno, guest_start, range, vex_arch );
1903      }
1904
1905   }
1906
1907   if (anyDeleted)
1908      invalidateFastCache();
1909
1910   /* don't forget the no-redir cache */
1911   unredir_discard_translations( guest_start, range );
1912
1913   /* Post-deletion sanity check */
1914   if (VG_(clo_sanity_level >= 4)) {
1915      Int      i;
1916      TTEntry* tte;
1917      Bool     sane = sanity_check_all_sectors();
1918      vg_assert(sane);
1919      /* But now, also check the requested address range isn't
1920         present anywhere. */
1921      for (sno = 0; sno < N_SECTORS; sno++) {
1922         sec = &sectors[sno];
1923         if (sec->tc == NULL)
1924            continue;
1925         for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1926            tte = &sec->tt[i];
1927            if (tte->status != InUse)
1928               continue;
1929            vg_assert(!overlaps( guest_start, range, &tte->vge ));
1930         }
1931      }
1932   }
1933}
1934
1935
1936/*------------------------------------------------------------*/
1937/*--- AUXILIARY: the unredirected TT/TC                    ---*/
1938/*------------------------------------------------------------*/
1939
1940/* A very simple translation cache which holds a small number of
1941   unredirected translations.  This is completely independent of the
1942   main tt/tc structures.  When unredir_tc or unredir_tt becomes full,
1943   both structures are simply dumped and we start over.
1944
1945   Since these translations are unredirected, the search key is (by
1946   definition) the first address entry in the .vge field. */
1947
1948/* Sized to hold 500 translations of average size 1000 bytes. */
1949
1950#define UNREDIR_SZB   1000
1951
1952#define N_UNREDIR_TT  500
1953#define N_UNREDIR_TCQ (N_UNREDIR_TT * UNREDIR_SZB / sizeof(ULong))
1954
1955typedef
1956   struct {
1957      VexGuestExtents vge;
1958      Addr            hcode;
1959      Bool            inUse;
1960   }
1961   UTCEntry;
1962
1963/* We just allocate forwards in _tc, never deleting. */
1964static ULong    *unredir_tc;
1965static Int      unredir_tc_used = N_UNREDIR_TCQ;
1966
1967/* Slots in _tt can come into use and out again (.inUse).
1968   Nevertheless _tt_highwater is maintained so that invalidations
1969   don't have to scan all the slots when only a few are in use.
1970   _tt_highwater holds the index of the highest ever allocated
1971   slot. */
1972static UTCEntry unredir_tt[N_UNREDIR_TT];
1973static Int      unredir_tt_highwater;
1974
1975
1976static void init_unredir_tt_tc ( void )
1977{
1978   Int i;
1979   if (unredir_tc == NULL) {
1980      SysRes sres = VG_(am_mmap_anon_float_valgrind)
1981                       ( N_UNREDIR_TT * UNREDIR_SZB );
1982      if (sr_isError(sres)) {
1983         VG_(out_of_memory_NORETURN)("init_unredir_tt_tc",
1984                                     N_UNREDIR_TT * UNREDIR_SZB);
1985         /*NOTREACHED*/
1986      }
1987      unredir_tc = (ULong *)(AddrH)sr_Res(sres);
1988   }
1989   unredir_tc_used = 0;
1990   for (i = 0; i < N_UNREDIR_TT; i++)
1991      unredir_tt[i].inUse = False;
1992   unredir_tt_highwater = -1;
1993}
1994
1995/* Do a sanity check; return False on failure. */
1996static Bool sanity_check_redir_tt_tc ( void )
1997{
1998   Int i;
1999   if (unredir_tt_highwater < -1) return False;
2000   if (unredir_tt_highwater >= N_UNREDIR_TT) return False;
2001
2002   for (i = unredir_tt_highwater+1; i < N_UNREDIR_TT; i++)
2003      if (unredir_tt[i].inUse)
2004         return False;
2005
2006   if (unredir_tc_used < 0) return False;
2007   if (unredir_tc_used > N_UNREDIR_TCQ) return False;
2008
2009   return True;
2010}
2011
2012
2013/* Add an UNREDIRECTED translation of vge to TT/TC.  The translation
2014   is temporarily in code[0 .. code_len-1].
2015*/
2016void VG_(add_to_unredir_transtab)( VexGuestExtents* vge,
2017                                   Addr64           entry,
2018                                   AddrH            code,
2019                                   UInt             code_len )
2020{
2021   Int   i, j, code_szQ;
2022   HChar *srcP, *dstP;
2023
2024   vg_assert(sanity_check_redir_tt_tc());
2025
2026   /* This is the whole point: it's not redirected! */
2027   vg_assert(entry == vge->base[0]);
2028
2029   /* How many unredir_tt slots are needed */
2030   code_szQ = (code_len + 7) / 8;
2031
2032   /* Look for an empty unredir_tc slot */
2033   for (i = 0; i < N_UNREDIR_TT; i++)
2034      if (!unredir_tt[i].inUse)
2035         break;
2036
2037   if (i >= N_UNREDIR_TT || code_szQ > (N_UNREDIR_TCQ - unredir_tc_used)) {
2038      /* It's full; dump everything we currently have */
2039      init_unredir_tt_tc();
2040      i = 0;
2041   }
2042
2043   vg_assert(unredir_tc_used >= 0);
2044   vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
2045   vg_assert(code_szQ > 0);
2046   vg_assert(code_szQ + unredir_tc_used <= N_UNREDIR_TCQ);
2047   vg_assert(i >= 0 && i < N_UNREDIR_TT);
2048   vg_assert(unredir_tt[i].inUse == False);
2049
2050   if (i > unredir_tt_highwater)
2051      unredir_tt_highwater = i;
2052
2053   dstP = (HChar*)&unredir_tc[unredir_tc_used];
2054   srcP = (HChar*)code;
2055   for (j = 0; j < code_len; j++)
2056      dstP[j] = srcP[j];
2057
2058   VG_(invalidate_icache)( dstP, code_len );
2059
2060   unredir_tt[i].inUse = True;
2061   unredir_tt[i].vge   = *vge;
2062   unredir_tt[i].hcode = (Addr)dstP;
2063
2064   unredir_tc_used += code_szQ;
2065   vg_assert(unredir_tc_used >= 0);
2066   vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
2067
2068   vg_assert(&dstP[code_len] <= (HChar*)&unredir_tc[unredir_tc_used]);
2069}
2070
2071Bool VG_(search_unredir_transtab) ( /*OUT*/AddrH* result,
2072                                    Addr64        guest_addr )
2073{
2074   Int i;
2075   for (i = 0; i < N_UNREDIR_TT; i++) {
2076      if (!unredir_tt[i].inUse)
2077         continue;
2078      if (unredir_tt[i].vge.base[0] == guest_addr) {
2079         *result = (AddrH)unredir_tt[i].hcode;
2080         return True;
2081      }
2082   }
2083   return False;
2084}
2085
2086static void unredir_discard_translations( Addr64 guest_start, ULong range )
2087{
2088   Int i;
2089
2090   vg_assert(sanity_check_redir_tt_tc());
2091
2092   for (i = 0; i <= unredir_tt_highwater; i++) {
2093      if (unredir_tt[i].inUse
2094          && overlaps( guest_start, range, &unredir_tt[i].vge))
2095         unredir_tt[i].inUse = False;
2096   }
2097}
2098
2099
2100/*------------------------------------------------------------*/
2101/*--- Initialisation.                                      ---*/
2102/*------------------------------------------------------------*/
2103
2104void VG_(init_tt_tc) ( void )
2105{
2106   Int i, j, avg_codeszQ;
2107
2108   vg_assert(!init_done);
2109   init_done = True;
2110
2111   /* Otherwise lots of things go wrong... */
2112   vg_assert(sizeof(ULong) == 8);
2113   vg_assert(sizeof(Addr64) == 8);
2114   /* check fast cache entries really are 2 words long */
2115   vg_assert(sizeof(Addr) == sizeof(void*));
2116   vg_assert(sizeof(FastCacheEntry) == 2 * sizeof(Addr));
2117   /* check fast cache entries are packed back-to-back with no spaces */
2118   vg_assert(sizeof( VG_(tt_fast) ) == VG_TT_FAST_SIZE * sizeof(FastCacheEntry));
2119   /* check fast cache is aligned as we requested.  Not fatal if it
2120      isn't, but we might as well make sure. */
2121   vg_assert(VG_IS_16_ALIGNED( ((Addr) & VG_(tt_fast)[0]) ));
2122
2123   if (VG_(clo_verbosity) > 2)
2124      VG_(message)(Vg_DebugMsg,
2125                   "TT/TC: VG_(init_tt_tc) "
2126                   "(startup of code management)\n");
2127
2128   /* Figure out how big each tc area should be.  */
2129   avg_codeszQ   = (VG_(details).avg_translation_sizeB + 7) / 8;
2130   tc_sector_szQ = N_TTES_PER_SECTOR_USABLE * (1 + avg_codeszQ);
2131
2132   /* Ensure the calculated value is not way crazy. */
2133   vg_assert(tc_sector_szQ >= 2 * N_TTES_PER_SECTOR_USABLE);
2134   vg_assert(tc_sector_szQ <= 100 * N_TTES_PER_SECTOR_USABLE);
2135
2136   /* Initialise the sectors */
2137   youngest_sector = 0;
2138   for (i = 0; i < N_SECTORS; i++) {
2139      sectors[i].tc = NULL;
2140      sectors[i].tt = NULL;
2141      sectors[i].tc_next = NULL;
2142      sectors[i].tt_n_inuse = 0;
2143      for (j = 0; j < ECLASS_N; j++) {
2144         sectors[i].ec2tte_size[j] = 0;
2145         sectors[i].ec2tte_used[j] = 0;
2146         sectors[i].ec2tte[j] = NULL;
2147      }
2148      sectors[i].host_extents = NULL;
2149   }
2150
2151   /* Initialise the sector_search_order hint table. */
2152   for (i = 0; i < N_SECTORS; i++)
2153      sector_search_order[i] = -1;
2154
2155   /* Initialise the fast cache. */
2156   invalidateFastCache();
2157
2158   /* and the unredir tt/tc */
2159   init_unredir_tt_tc();
2160
2161   if (VG_(clo_verbosity) > 2) {
2162      VG_(message)(Vg_DebugMsg,
2163         "TT/TC: cache: %d sectors of %d bytes each = %d total\n",
2164          N_SECTORS, 8 * tc_sector_szQ,
2165          N_SECTORS * 8 * tc_sector_szQ );
2166      VG_(message)(Vg_DebugMsg,
2167         "TT/TC: table: %d total entries, max occupancy %d (%d%%)\n",
2168         N_SECTORS * N_TTES_PER_SECTOR,
2169         N_SECTORS * N_TTES_PER_SECTOR_USABLE,
2170         SECTOR_TT_LIMIT_PERCENT );
2171   }
2172
2173   VG_(debugLog)(2, "transtab",
2174      "cache: %d sectors of %d bytes each = %d total\n",
2175       N_SECTORS, 8 * tc_sector_szQ,
2176       N_SECTORS * 8 * tc_sector_szQ );
2177   VG_(debugLog)(2, "transtab",
2178      "table: %d total entries, max occupancy %d (%d%%)\n",
2179      N_SECTORS * N_TTES_PER_SECTOR,
2180      N_SECTORS * N_TTES_PER_SECTOR_USABLE,
2181      SECTOR_TT_LIMIT_PERCENT );
2182}
2183
2184
2185/*------------------------------------------------------------*/
2186/*--- Printing out statistics.                             ---*/
2187/*------------------------------------------------------------*/
2188
2189static ULong safe_idiv( ULong a, ULong b )
2190{
2191   return (b == 0 ? 0 : a / b);
2192}
2193
2194UInt VG_(get_bbs_translated) ( void )
2195{
2196   return n_in_count;
2197}
2198
2199void VG_(print_tt_tc_stats) ( void )
2200{
2201   VG_(message)(Vg_DebugMsg,
2202      "    tt/tc: %'llu tt lookups requiring %'llu probes\n",
2203      n_full_lookups, n_lookup_probes );
2204   VG_(message)(Vg_DebugMsg,
2205      "    tt/tc: %'llu fast-cache updates, %'llu flushes\n",
2206      n_fast_updates, n_fast_flushes );
2207
2208   VG_(message)(Vg_DebugMsg,
2209                " transtab: new        %'lld "
2210                "(%'llu -> %'llu; ratio %'llu:10) [%'llu scs]\n",
2211                n_in_count, n_in_osize, n_in_tsize,
2212                safe_idiv(10*n_in_tsize, n_in_osize),
2213                n_in_sc_count);
2214   VG_(message)(Vg_DebugMsg,
2215                " transtab: dumped     %'llu (%'llu -> ?" "?)\n",
2216                n_dump_count, n_dump_osize );
2217   VG_(message)(Vg_DebugMsg,
2218                " transtab: discarded  %'llu (%'llu -> ?" "?)\n",
2219                n_disc_count, n_disc_osize );
2220
2221   if (0) {
2222      Int i;
2223      VG_(printf)("\n");
2224      for (i = 0; i < ECLASS_N; i++) {
2225         VG_(printf)(" %4d", sectors[0].ec2tte_used[i]);
2226         if (i % 16 == 15)
2227            VG_(printf)("\n");
2228      }
2229      VG_(printf)("\n\n");
2230   }
2231}
2232
2233/*------------------------------------------------------------*/
2234/*--- Printing out of profiling results.                   ---*/
2235/*------------------------------------------------------------*/
2236
2237static ULong score ( TTEntry* tte )
2238{
2239   return ((ULong)tte->weight) * ((ULong)tte->count);
2240}
2241
2242ULong VG_(get_BB_profile) ( BBProfEntry tops[], UInt n_tops )
2243{
2244   Int   sno, i, r, s;
2245   ULong score_total;
2246
2247   /* First, compute the total weighted count, and find the top N
2248      ttes.  tops contains pointers to the most-used n_tops blocks, in
2249      descending order (viz, tops[0] is the highest scorer). */
2250   for (i = 0; i < n_tops; i++) {
2251      tops[i].addr  = 0;
2252      tops[i].score = 0;
2253   }
2254
2255   score_total = 0;
2256
2257   for (sno = 0; sno < N_SECTORS; sno++) {
2258      if (sectors[sno].tc == NULL)
2259         continue;
2260      for (i = 0; i < N_TTES_PER_SECTOR; i++) {
2261         if (sectors[sno].tt[i].status != InUse)
2262            continue;
2263         score_total += score(&sectors[sno].tt[i]);
2264         /* Find the rank for sectors[sno].tt[i]. */
2265         r = n_tops-1;
2266         while (True) {
2267            if (r == -1)
2268               break;
2269             if (tops[r].addr == 0) {
2270               r--;
2271               continue;
2272             }
2273             if ( score(&sectors[sno].tt[i]) > tops[r].score ) {
2274                r--;
2275                continue;
2276             }
2277             break;
2278         }
2279         r++;
2280         vg_assert(r >= 0 && r <= n_tops);
2281         /* This bb should be placed at r, and bbs above it shifted
2282            upwards one slot. */
2283         if (r < n_tops) {
2284            for (s = n_tops-1; s > r; s--)
2285               tops[s] = tops[s-1];
2286            tops[r].addr  = sectors[sno].tt[i].entry;
2287            tops[r].score = score( &sectors[sno].tt[i] );
2288         }
2289      }
2290   }
2291
2292   return score_total;
2293}
2294
2295/*--------------------------------------------------------------------*/
2296/*--- end                                                          ---*/
2297/*--------------------------------------------------------------------*/
2298