m_transtab.c revision e808930793aeddc4cfd3e7a94b665913bec2566c
1
2/*--------------------------------------------------------------------*/
3/*--- Management of the translation table and cache.               ---*/
4/*---                                                 m_transtab.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8   This file is part of Valgrind, a dynamic binary instrumentation
9   framework.
10
11   Copyright (C) 2000-2006 Julian Seward
12      jseward@acm.org
13
14   This program is free software; you can redistribute it and/or
15   modify it under the terms of the GNU General Public License as
16   published by the Free Software Foundation; either version 2 of the
17   License, or (at your option) any later version.
18
19   This program is distributed in the hope that it will be useful, but
20   WITHOUT ANY WARRANTY; without even the implied warranty of
21   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22   General Public License for more details.
23
24   You should have received a copy of the GNU General Public License
25   along with this program; if not, write to the Free Software
26   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27   02111-1307, USA.
28
29   The GNU General Public License is contained in the file COPYING.
30*/
31
32#include "pub_core_basics.h"
33#include "pub_core_debuglog.h"
34#include "pub_core_machine.h"    // For VG(machine_get_VexArchInfo)
35#include "pub_core_libcbase.h"
36#include "pub_core_libcassert.h"
37#include "pub_core_libcprint.h"
38#include "pub_core_options.h"
39#include "pub_core_tooliface.h"  // For VG_(details).avg_translation_sizeB
40#include "pub_core_transtab.h"
41#include "pub_core_aspacemgr.h"
42#include "pub_core_mallocfree.h" // VG_(out_of_memory_NORETURN)
43
44/* #define DEBUG_TRANSTAB */
45
46
47/*-------------------------------------------------------------*/
48/*--- Management of the FIFO-based translation table+cache. ---*/
49/*-------------------------------------------------------------*/
50
51/*------------------ CONSTANTS ------------------*/
52
53/* Number of sectors the TC is divided into.  If you need a larger
54   overall translation cache, increase this value. */
55#define N_SECTORS 8
56
57/* Number of TC entries in each sector.  This needs to be a prime
58   number to work properly, it must be <= 65535 (so that a TT index
59   fits in a UShort, leaving room for 0xFFFF(EC2TTE_DELETED) to denote
60   'deleted') and it is strongly recommended not to change this.
61   65521 is the largest prime <= 65535. */
62#define N_TTES_PER_SECTOR /*30011*/ /*40009*/ 65521
63
64/* Because each sector contains a hash table of TTEntries, we need to
65   specify the maximum allowable loading, after which the sector is
66   deemed full. */
67#define SECTOR_TT_LIMIT_PERCENT 80
68
69/* The sector is deemed full when this many entries are in it. */
70#define N_TTES_PER_SECTOR_USABLE \
71           ((N_TTES_PER_SECTOR * SECTOR_TT_LIMIT_PERCENT) / 100)
72
73/* Equivalence classes for fast address range deletion.  There are 1 +
74   2^ECLASS_WIDTH bins.  The highest one, ECLASS_MISC, describes an
75   address range which does not fall cleanly within any specific bin.
76   Note that ECLASS_SHIFT + ECLASS_WIDTH must be < 32. */
77#define ECLASS_SHIFT 11
78#define ECLASS_WIDTH 8
79#define ECLASS_MISC  (1 << ECLASS_WIDTH)
80#define ECLASS_N     (1 + ECLASS_MISC)
81
82#define EC2TTE_DELETED  0xFFFF /* 16-bit special value */
83
84
85/*------------------ TYPES ------------------*/
86
87/* A translation-cache entry is two parts:
88   - The guest address of the first (entry) bb in the translation,
89     as a 64-bit word.
90   - One or more 64-bit words containing the code.
91   It is supposed to be 64-bit aligned.
92*/
93/*
94typedef
95   struct {
96      Addr64 orig_addr;
97      ULong  code[0];
98   }
99   TCEntry;
100*/
101
102/* A translation-table entry.  This indicates precisely which areas of
103   guest code are included in the translation, and contains all other
104   auxiliary info too.  */
105typedef
106   struct {
107      /* Profiling only: the count and weight (arbitrary meaning) for
108         this translation.  Weight is a property of the translation
109         itself and computed once when the translation is created.
110         Count is an entry count for the translation and is
111         incremented by 1 every time the translation is used, if we
112         are profiling. */
113      UInt     count;
114      UShort   weight;
115
116      /* Status of the slot.  Note, we need to be able to do lazy
117         deletion, hence the Deleted state. */
118      enum { InUse, Deleted, Empty } status;
119
120      /* Pointer to the corresponding TCEntry (must be in the same
121         sector!) */
122      ULong* tce;
123
124      /* This is the original guest address that purportedly is the
125         entry point of the translation.  You might think that .entry
126         should be the same as .vge->base[0], and most of the time it
127         is.  However, when doing redirections, that is not the case.
128         .vge must always correctly describe the guest code sections
129         from which this translation was made.  However, .entry may or
130         may not be a lie, depending on whether or not we're doing
131         redirection. */
132      Addr64 entry;
133
134      /* This structure describes precisely what ranges of guest code
135         the translation covers, so we can decide whether or not to
136         delete it when translations of a given address range are
137         invalidated. */
138      VexGuestExtents vge;
139
140      /* Address range summary info: these are pointers back to
141         eclass[] entries in the containing Sector.  Those entries in
142         turn point back here -- the two structures are mutually
143         redundant but both necessary to make fast deletions work.
144         The eclass info is similar to, and derived from, this entry's
145         'vge' field, but it is not the same */
146      UShort n_tte2ec;      // # tte2ec pointers (1 to 3)
147      UShort tte2ec_ec[3];  // for each, the eclass #
148      UInt   tte2ec_ix[3];  // and the index within the eclass.
149      // for i in 0 .. n_tte2ec-1
150      //    sec->ec2tte[ tte2ec_ec[i] ][ tte2ec_ix[i] ]
151      // should be the index
152      // of this TTEntry in the containing Sector's tt array.
153   }
154   TTEntry;
155
156
157/* Finally, a sector itself.  Each sector contains an array of
158   TCEntries, which hold code, and an array of TTEntries, containing
159   all required administrative info.  Profiling is supported using the
160   TTEntry .count and .weight fields, if required.  Each sector is
161   independent in that no cross-sector references are allowed.
162
163   If the sector is not in use, all three pointers are NULL and
164   tt_n_inuse is zero.
165*/
166typedef
167   struct {
168      /* The TCEntry area.  Size of this depends on the average
169         translation size.  We try and size it so it becomes full
170         precisely when this sector's translation table (tt) reaches
171         its load limit (SECTOR_TT_LIMIT_PERCENT). */
172      ULong* tc;
173
174      /* The TTEntry array.  This is a fixed size, always containing
175         exactly N_TTES_PER_SECTOR entries. */
176      TTEntry* tt;
177
178      /* This points to the current allocation point in tc. */
179      ULong* tc_next;
180
181      /* The count of tt entries with state InUse. */
182      Int tt_n_inuse;
183
184      /* Expandable arrays of tt indices for each of the ECLASS_N
185         address range equivalence classes.  These hold indices into
186         the containing sector's tt array, which in turn should point
187         back here. */
188      Int     ec2tte_size[ECLASS_N];
189      Int     ec2tte_used[ECLASS_N];
190      UShort* ec2tte[ECLASS_N];
191   }
192   Sector;
193
194
195/*------------------ DECLS ------------------*/
196
197/* The root data structure is an array of sectors.  The index of the
198   youngest sector is recorded, and new translations are put into that
199   sector.  When it fills up, we move along to the next sector and
200   start to fill that up, wrapping around at the end of the array.
201   That way, once all N_TC_SECTORS have been bought into use for the
202   first time, and are full, we then re-use the oldest sector,
203   endlessly.
204
205   When running, youngest sector should be between >= 0 and <
206   N_TC_SECTORS.  The initial -1 value indicates the TT/TC system is
207   not yet initialised.
208*/
209static Sector sectors[N_SECTORS];
210static Int    youngest_sector = -1;
211
212/* The number of ULongs in each TCEntry area.  This is computed once
213   at startup and does not change. */
214static Int    tc_sector_szQ;
215
216
217/* Fast helper for the TC.  A direct-mapped cache which holds a
218   pointer to a TC entry which may or may not be the correct one, but
219   which we hope usually is.  This array is referred to directly from
220   <arch>/dispatch.S.
221
222   Entries in tt_fast may point to any valid TC entry, regardless of
223   which sector it's in.  Consequently we must be very careful to
224   invalidate this cache when TC entries are changed or disappear.
225
226   A special TCEntry -- bogus_tc_entry -- must be pointed at to cause
227   that cache entry to miss.  This relies on the assumption that no
228   guest code actually has an address of 0x1.
229*/
230/*global*/ ULong* VG_(tt_fast)[VG_TT_FAST_SIZE];
231
232static ULong bogus_tc_entry = (Addr64)1;
233
234
235/* For profiling, we have a parallel array of pointers to .count
236   fields in TT entries.  Again, these pointers must be invalidated
237   when translations disappear.  A NULL pointer suffices to indicate
238   an unused slot.
239
240   tt_fast and tt_fastN change together: if tt_fast[i] points to
241   bogus_tc_entry then the corresponding tt_fastN[i] must be null.  If
242   tt_fast[i] points to some TC entry somewhere, then tt_fastN[i]
243   *must* point to the .count field of the corresponding TT entry.
244
245   tt_fast and tt_fastN are referred to from assembly code
246   (dispatch.S).
247*/
248/*global*/ UInt* VG_(tt_fastN)[VG_TT_FAST_SIZE];
249
250
251/* Make sure we're not used before initialisation. */
252static Bool init_done = False;
253
254
255/*------------------ STATS DECLS ------------------*/
256
257/* Number of fast-cache updates and flushes done. */
258ULong n_fast_flushes = 0;
259ULong n_fast_updates = 0;
260
261/* Number of full lookups done. */
262ULong n_full_lookups = 0;
263ULong n_lookup_probes = 0;
264
265/* Number/osize/tsize of translations entered; also the number of
266   those for which self-checking was requested. */
267ULong n_in_count    = 0;
268ULong n_in_osize    = 0;
269ULong n_in_tsize    = 0;
270ULong n_in_sc_count = 0;
271
272/* Number/osize of translations discarded due to lack of space. */
273ULong n_dump_count = 0;
274ULong n_dump_osize = 0;
275
276/* Number/osize of translations discarded due to requests to do so. */
277ULong n_disc_count = 0;
278ULong n_disc_osize = 0;
279
280
281/*-------------------------------------------------------------*/
282/*--- Address-range equivalence class stuff                 ---*/
283/*-------------------------------------------------------------*/
284
285/* Return equivalence class number for a range. */
286
287static Int range_to_eclass ( Addr64 start, UInt len )
288{
289   UInt mask   = (1 << ECLASS_WIDTH) - 1;
290   UInt lo     = (UInt)start;
291   UInt hi     = lo + len - 1;
292   UInt loBits = (lo >> ECLASS_SHIFT) & mask;
293   UInt hiBits = (hi >> ECLASS_SHIFT) & mask;
294   if (loBits == hiBits) {
295      vg_assert(loBits < ECLASS_N-1);
296      return loBits;
297   } else {
298      return ECLASS_MISC;
299   }
300}
301
302
303/* Calculates the equivalence class numbers for any VexGuestExtent.
304   These are written in *eclasses, which must be big enough to hold 3
305   Ints.  The number written, between 1 and 3, is returned.  The
306   eclasses are presented in order, and any duplicates are removed.
307*/
308
309static
310Int vexGuestExtents_to_eclasses ( /*OUT*/Int* eclasses,
311                                  VexGuestExtents* vge )
312{
313#  define SWAP(_lv1,_lv2) \
314      do { Int t = _lv1; _lv1 = _lv2; _lv2 = t; } while (0)
315
316   Int i, j, n_ec, r;
317
318   vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
319
320   n_ec = 0;
321   for (i = 0; i < vge->n_used; i++) {
322      r = range_to_eclass( vge->base[i], (UInt)vge->len[i] );
323      if (r == ECLASS_MISC)
324         goto bad;
325      /* only add if we haven't already seen it */
326      for (j = 0; j < n_ec; j++)
327         if (eclasses[j] == r)
328            break;
329      if (j == n_ec)
330         eclasses[n_ec++] = r;
331   }
332
333   if (n_ec == 1)
334      return 1;
335
336   if (n_ec == 2) {
337      /* sort */
338      if (eclasses[0] > eclasses[1])
339         SWAP(eclasses[0], eclasses[1]);
340      return 2;
341   }
342
343   if (n_ec == 3) {
344      /* sort */
345      if (eclasses[0] > eclasses[2])
346         SWAP(eclasses[0], eclasses[2]);
347      if (eclasses[0] > eclasses[1])
348         SWAP(eclasses[0], eclasses[1]);
349      if (eclasses[1] > eclasses[2])
350         SWAP(eclasses[1], eclasses[2]);
351      return 3;
352   }
353
354   /* NOTREACHED */
355   vg_assert(0);
356
357  bad:
358   eclasses[0] = ECLASS_MISC;
359   return 1;
360
361#  undef SWAP
362}
363
364
365/* Add tteno to the set of entries listed for equivalence class ec in
366   this sector.  Returns used location in eclass array. */
367
368static
369UInt addEClassNo ( /*MOD*/Sector* sec, Int ec, UShort tteno )
370{
371   Int    old_sz, new_sz, i, r;
372   UShort *old_ar, *new_ar;
373
374   vg_assert(ec >= 0 && ec < ECLASS_N);
375   vg_assert(tteno < N_TTES_PER_SECTOR);
376
377   if (0) VG_(printf)("ec %d  gets %d\n", ec, (Int)tteno);
378
379   if (sec->ec2tte_used[ec] >= sec->ec2tte_size[ec]) {
380
381      vg_assert(sec->ec2tte_used[ec] == sec->ec2tte_size[ec]);
382
383      old_sz = sec->ec2tte_size[ec];
384      old_ar = sec->ec2tte[ec];
385      new_sz = old_sz==0 ? 8 : old_sz<64 ? 2*old_sz : (3*old_sz)/2;
386      new_ar = VG_(arena_malloc)(VG_AR_TTAUX, new_sz * sizeof(UShort));
387      for (i = 0; i < old_sz; i++)
388         new_ar[i] = old_ar[i];
389      if (old_ar)
390         VG_(arena_free)(VG_AR_TTAUX, old_ar);
391      sec->ec2tte_size[ec] = new_sz;
392      sec->ec2tte[ec] = new_ar;
393
394      if (0) VG_(printf)("expand ec %d to %d\n", ec, new_sz);
395   }
396
397   /* Common case */
398   r = sec->ec2tte_used[ec]++;
399   vg_assert(r >= 0 && r < sec->ec2tte_size[ec]);
400   sec->ec2tte[ec][r] = tteno;
401   return (UInt)r;
402}
403
404
405/* 'vge' is being added to 'sec' at TT entry 'tteno'.  Add appropriate
406   eclass entries to 'sec'. */
407
408static
409void upd_eclasses_after_add ( /*MOD*/Sector* sec, Int tteno )
410{
411   Int i, r, eclasses[3];
412   TTEntry* tte;
413   vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
414
415   tte = &sec->tt[tteno];
416   r = vexGuestExtents_to_eclasses( eclasses, &tte->vge );
417
418   vg_assert(r >= 1 && r <= 3);
419   tte->n_tte2ec = r;
420
421   for (i = 0; i < r; i++) {
422      tte->tte2ec_ec[i] = eclasses[i];
423      tte->tte2ec_ix[i] = addEClassNo( sec, eclasses[i], (UShort)tteno );
424   }
425}
426
427
428/* Check the eclass info in 'sec' to ensure it is consistent.  Returns
429   True if OK, False if something's not right.  Expensive. */
430
431static Bool sanity_check_eclasses_in_sector ( Sector* sec )
432{
433#  define BAD(_str) do { whassup = (_str); goto bad; } while (0)
434
435   HChar*   whassup = NULL;
436   Int      i, j, k, n, ec_num, ec_idx;
437   TTEntry* tte;
438   UShort   tteno;
439   ULong*   tce;
440
441   /* Basic checks on this sector */
442   if (sec->tt_n_inuse < 0 || sec->tt_n_inuse > N_TTES_PER_SECTOR_USABLE)
443      BAD("invalid sec->tt_n_inuse");
444   tce = sec->tc_next;
445   if (tce < &sec->tc[0] || tce > &sec->tc[tc_sector_szQ])
446      BAD("sec->tc_next points outside tc");
447
448   /* For each eclass ... */
449   for (i = 0; i < ECLASS_N; i++) {
450      if (sec->ec2tte_size[i] == 0 && sec->ec2tte[i] != NULL)
451         BAD("ec2tte_size/ec2tte mismatch(1)");
452      if (sec->ec2tte_size[i] != 0 && sec->ec2tte[i] == NULL)
453         BAD("ec2tte_size/ec2tte mismatch(2)");
454      if (sec->ec2tte_used[i] < 0
455          || sec->ec2tte_used[i] > sec->ec2tte_size[i])
456         BAD("implausible ec2tte_used");
457      if (sec->ec2tte_used[i] == 0)
458         continue;
459
460      /* For each tt reference in each eclass .. ensure the reference
461         is to a valid tt entry, and that the entry's address ranges
462         really include this eclass. */
463
464      for (j = 0; j < sec->ec2tte_used[i]; j++) {
465         tteno = sec->ec2tte[i][j];
466         if (tteno == EC2TTE_DELETED)
467            continue;
468         if (tteno >= N_TTES_PER_SECTOR)
469            BAD("implausible tteno");
470         tte = &sec->tt[tteno];
471         if (tte->status != InUse)
472            BAD("tteno points to non-inuse tte");
473         if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3)
474            BAD("tte->n_tte2ec out of range");
475         /* Exactly least one of tte->eclasses[0 .. tte->n_eclasses-1]
476            must equal i.  Inspect tte's eclass info. */
477         n = 0;
478         for (k = 0; k < tte->n_tte2ec; k++) {
479            if (k < tte->n_tte2ec-1
480                && tte->tte2ec_ec[k] >= tte->tte2ec_ec[k+1])
481               BAD("tte->tte2ec_ec[..] out of order");
482            ec_num = tte->tte2ec_ec[k];
483            if (ec_num < 0 || ec_num >= ECLASS_N)
484               BAD("tte->tte2ec_ec[..] out of range");
485            if (ec_num != i)
486               continue;
487            ec_idx = tte->tte2ec_ix[k];
488            if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[i])
489               BAD("tte->tte2ec_ix[..] out of range");
490            if (ec_idx == j)
491               n++;
492         }
493         if (n != 1)
494            BAD("tteno does not point back at eclass");
495      }
496   }
497
498   /* That establishes that for each forward pointer from TTEntrys
499      there is a corresponding backward pointer from the eclass[]
500      arrays.  However, it doesn't rule out the possibility of other,
501      bogus pointers in the eclass[] arrays.  So do those similarly:
502      scan through them and check the TTEntryies they point at point
503      back. */
504
505   for (i = 0; i < N_TTES_PER_SECTOR_USABLE; i++) {
506
507      tte = &sec->tt[i];
508      if (tte->status == Empty || tte->status == Deleted) {
509         if (tte->n_tte2ec != 0)
510            BAD("tte->n_eclasses nonzero for unused tte");
511         continue;
512      }
513
514      vg_assert(tte->status == InUse);
515
516      if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3)
517         BAD("tte->n_eclasses out of range(2)");
518
519      for (j = 0; j < tte->n_tte2ec; j++) {
520         ec_num = tte->tte2ec_ec[j];
521         if (ec_num < 0 || ec_num >= ECLASS_N)
522            BAD("tte->eclass[..] out of range");
523         ec_idx = tte->tte2ec_ix[j];
524         if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[ec_num])
525            BAD("tte->ec_idx[..] out of range(2)");
526         if (sec->ec2tte[ec_num][ec_idx] != i)
527            BAD("ec2tte does not point back to tte");
528      }
529   }
530
531   return True;
532
533  bad:
534   if (whassup)
535      VG_(debugLog)(0, "transtab", "eclass sanity fail: %s\n", whassup);
536
537#  if 0
538   VG_(printf)("eclass = %d\n", i);
539   VG_(printf)("tteno = %d\n", (Int)tteno);
540   switch (tte->status) {
541      case InUse:   VG_(printf)("InUse\n"); break;
542      case Deleted: VG_(printf)("Deleted\n"); break;
543      case Empty:   VG_(printf)("Empty\n"); break;
544   }
545   if (tte->status != Empty) {
546      for (k = 0; k < tte->vge.n_used; k++)
547         VG_(printf)("0x%llx %d\n", tte->vge.base[k],
548                                    (Int)tte->vge.len[k]);
549   }
550#  endif
551
552   return False;
553
554#  undef BAD
555}
556
557
558/* Sanity check absolutely everything.  True == check passed. */
559
560/* forward */
561static Bool sanity_check_redir_tt_tc ( void );
562
563static Bool sanity_check_all_sectors ( void )
564{
565   Int     sno;
566   Bool    sane;
567   Sector* sec;
568   for (sno = 0; sno < N_SECTORS; sno++) {
569      sec = &sectors[sno];
570      if (sec->tc == NULL)
571         continue;
572      sane = sanity_check_eclasses_in_sector( sec );
573      if (!sane)
574         return False;
575   }
576   if (!sanity_check_redir_tt_tc() )
577      return False;
578   return True;
579}
580
581
582/*-------------------------------------------------------------*/
583/*--- Add/find translations                                 ---*/
584/*-------------------------------------------------------------*/
585
586static UInt vge_osize ( VexGuestExtents* vge )
587{
588   UInt i, n = 0;
589   for (i = 0; i < vge->n_used; i++)
590      n += (UInt)vge->len[i];
591   return n;
592}
593
594static Bool isValidSector ( Int sector )
595{
596   if (sector < 0 || sector >= N_SECTORS)
597      return False;
598   return True;
599}
600
601static inline UInt HASH_TT ( Addr64 key )
602{
603   UInt kHi = (UInt)(key >> 32);
604   UInt kLo = (UInt)key;
605   UInt k32 = kHi ^ kLo;
606   UInt ror = 7;
607   if (ror > 0)
608      k32 = (k32 >> ror) | (k32 << (32-ror));
609   return k32 % N_TTES_PER_SECTOR;
610}
611
612static void setFastCacheEntry ( Addr64 key, ULong* tce, UInt* count )
613{
614   UInt cno = (UInt)VG_TT_FAST_HASH(key);
615   VG_(tt_fast)[cno]  = tce;
616   VG_(tt_fastN)[cno] = count;
617   n_fast_updates++;
618}
619
620static void invalidateFastCache ( void )
621{
622   UInt j;
623   /* This loop is popular enough to make it worth unrolling a
624      bit, at least on ppc32. */
625   vg_assert(VG_TT_FAST_SIZE > 0 && (VG_TT_FAST_SIZE % 4) == 0);
626   for (j = 0; j < VG_TT_FAST_SIZE; j += 4) {
627      VG_(tt_fast)[j+0]  = &bogus_tc_entry;
628      VG_(tt_fast)[j+1]  = &bogus_tc_entry;
629      VG_(tt_fast)[j+2]  = &bogus_tc_entry;
630      VG_(tt_fast)[j+3]  = &bogus_tc_entry;
631      VG_(tt_fastN)[j+0] = NULL;
632      VG_(tt_fastN)[j+1] = NULL;
633      VG_(tt_fastN)[j+2] = NULL;
634      VG_(tt_fastN)[j+3] = NULL;
635   }
636   vg_assert(j == VG_TT_FAST_SIZE);
637   n_fast_flushes++;
638}
639
640static void initialiseSector ( Int sno )
641{
642   Int    i;
643   SysRes sres;
644   Sector* sec;
645   vg_assert(isValidSector(sno));
646
647   sec = &sectors[sno];
648
649   if (sec->tc == NULL) {
650
651      /* Sector has never been used before.  Need to allocate tt and
652         tc. */
653      vg_assert(sec->tt == NULL);
654      vg_assert(sec->tc_next == NULL);
655      vg_assert(sec->tt_n_inuse == 0);
656      for (i = 0; i < ECLASS_N; i++) {
657         vg_assert(sec->ec2tte_size[i] == 0);
658         vg_assert(sec->ec2tte_used[i] == 0);
659         vg_assert(sec->ec2tte[i] == NULL);
660      }
661
662      VG_(debugLog)(1,"transtab", "allocate sector %d\n", sno);
663
664      sres = VG_(am_mmap_anon_float_valgrind)( 8 * tc_sector_szQ );
665      if (sres.isError) {
666         VG_(out_of_memory_NORETURN)("initialiseSector(TC)",
667                                     8 * tc_sector_szQ );
668	 /*NOTREACHED*/
669      }
670      sec->tc = (ULong*)sres.res;
671
672      sres = VG_(am_mmap_anon_float_valgrind)
673                ( N_TTES_PER_SECTOR * sizeof(TTEntry) );
674      if (sres.isError) {
675         VG_(out_of_memory_NORETURN)("initialiseSector(TT)",
676                                     N_TTES_PER_SECTOR * sizeof(TTEntry) );
677	 /*NOTREACHED*/
678      }
679      sec->tt = (TTEntry*)sres.res;
680
681      for (i = 0; i < N_TTES_PER_SECTOR; i++) {
682         sec->tt[i].status   = Empty;
683         sec->tt[i].n_tte2ec = 0;
684      }
685
686      if (VG_(clo_verbosity) > 2)
687         VG_(message)(Vg_DebugMsg, "TT/TC: initialise sector %d", sno);
688
689   } else {
690
691      /* Sector has been used before.  Dump the old contents. */
692      VG_(debugLog)(1,"transtab", "recycle sector %d\n", sno);
693      vg_assert(sec->tt != NULL);
694      vg_assert(sec->tc_next != NULL);
695      n_dump_count += sec->tt_n_inuse;
696
697      /* Visit each just-about-to-be-abandoned translation. */
698      for (i = 0; i < N_TTES_PER_SECTOR; i++) {
699         if (sec->tt[i].status == InUse) {
700            vg_assert(sec->tt[i].n_tte2ec >= 1);
701            vg_assert(sec->tt[i].n_tte2ec <= 3);
702            n_dump_osize += vge_osize(&sec->tt[i].vge);
703            /* Tell the tool too. */
704            if (VG_(needs).basic_block_discards) {
705               VG_TDICT_CALL( tool_discard_basic_block_info,
706                              sec->tt[i].entry,
707                              sec->tt[i].vge );
708            }
709         } else {
710            vg_assert(sec->tt[i].n_tte2ec == 0);
711         }
712         sec->tt[i].status   = Empty;
713         sec->tt[i].n_tte2ec = 0;
714      }
715
716      /* Free up the eclass structures. */
717      for (i = 0; i < ECLASS_N; i++) {
718         if (sec->ec2tte_size[i] == 0) {
719            vg_assert(sec->ec2tte_used[i] == 0);
720            vg_assert(sec->ec2tte[i] == NULL);
721         } else {
722            vg_assert(sec->ec2tte[i] != NULL);
723            VG_(arena_free)(VG_AR_TTAUX, sec->ec2tte[i]);
724            sec->ec2tte[i] = NULL;
725            sec->ec2tte_size[i] = 0;
726            sec->ec2tte_used[i] = 0;
727         }
728      }
729
730      if (VG_(clo_verbosity) > 2)
731         VG_(message)(Vg_DebugMsg, "TT/TC: recycle sector %d", sno);
732   }
733
734   sec->tc_next = sec->tc;
735   sec->tt_n_inuse = 0;
736
737   invalidateFastCache();
738}
739
740static void invalidate_icache ( void *ptr, Int nbytes )
741{
742#  if defined(VGA_ppc32) || defined(VGA_ppc64)
743   Addr startaddr = (Addr) ptr;
744   Addr endaddr   = startaddr + nbytes;
745   Addr cls;
746   Addr addr;
747   VexArchInfo vai;
748
749   VG_(machine_get_VexArchInfo)( NULL, &vai );
750   cls = vai.ppc_cache_line_szB;
751
752   /* Stay sane .. */
753   vg_assert(cls == 32 || cls == 128);
754
755   startaddr &= ~(cls - 1);
756   for (addr = startaddr; addr < endaddr; addr += cls)
757      asm volatile("dcbst 0,%0" : : "r" (addr));
758   asm volatile("sync");
759   for (addr = startaddr; addr < endaddr; addr += cls)
760      asm volatile("icbi 0,%0" : : "r" (addr));
761   asm volatile("sync; isync");
762
763#  elif defined(VGA_x86)
764   /* no need to do anything, hardware provides coherence */
765
766#  elif defined(VGA_amd64)
767   /* no need to do anything, hardware provides coherence */
768
769#  else
770#    error "Unknown ARCH"
771#  endif
772}
773
774
775/* Add a translation of vge to TT/TC.  The translation is temporarily
776   in code[0 .. code_len-1].
777
778   pre: youngest_sector points to a valid (although possibly full)
779   sector.
780*/
781void VG_(add_to_transtab)( VexGuestExtents* vge,
782                           Addr64           entry,
783                           AddrH            code,
784                           UInt             code_len,
785                           Bool             is_self_checking )
786{
787   Int    tcAvailQ, reqdQ, y, i;
788   ULong  *tce, *tce2;
789   UChar* srcP;
790   UChar* dstP;
791
792   vg_assert(init_done);
793   vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
794
795   /* 60000: should agree with N_TMPBUF in m_translate.c. */
796   vg_assert(code_len > 0 && code_len < 60000);
797
798   if (0)
799      VG_(printf)("add_to_transtab(entry = 0x%llx, len = %d)\n",
800                  entry, code_len);
801
802   n_in_count++;
803   n_in_tsize += code_len;
804   n_in_osize += vge_osize(vge);
805   if (is_self_checking)
806      n_in_sc_count++;
807
808   y = youngest_sector;
809   vg_assert(isValidSector(y));
810
811   if (sectors[y].tc == NULL)
812      initialiseSector(y);
813
814   /* Try putting the translation in this sector. */
815   reqdQ = 1 + ((code_len + 7) >> 3);
816
817   /* Will it fit in tc? */
818   tcAvailQ = ((ULong*)(&sectors[y].tc[tc_sector_szQ]))
819              - ((ULong*)(sectors[y].tc_next));
820   vg_assert(tcAvailQ >= 0);
821   vg_assert(tcAvailQ <= tc_sector_szQ);
822
823   if (tcAvailQ < reqdQ
824       || sectors[y].tt_n_inuse >= N_TTES_PER_SECTOR_USABLE) {
825      /* No.  So move on to the next sector.  Either it's never been
826         used before, in which case it will get its tt/tc allocated
827         now, or it has been used before, in which case it is set to be
828         empty, hence throwing out the oldest sector. */
829      vg_assert(tc_sector_szQ > 0);
830      VG_(debugLog)(1,"transtab",
831                      "declare sector %d full "
832                      "(TT loading %2d%%, TC loading %2d%%)\n",
833                      y,
834                      (100 * sectors[y].tt_n_inuse)
835                         / N_TTES_PER_SECTOR,
836                      (100 * (tc_sector_szQ - tcAvailQ))
837                         / tc_sector_szQ);
838      youngest_sector++;
839      if (youngest_sector >= N_SECTORS)
840         youngest_sector = 0;
841      y = youngest_sector;
842      initialiseSector(y);
843   }
844
845   /* Be sure ... */
846   tcAvailQ = ((ULong*)(&sectors[y].tc[tc_sector_szQ]))
847              - ((ULong*)(sectors[y].tc_next));
848   vg_assert(tcAvailQ >= 0);
849   vg_assert(tcAvailQ <= tc_sector_szQ);
850   vg_assert(tcAvailQ >= reqdQ);
851   vg_assert(sectors[y].tt_n_inuse < N_TTES_PER_SECTOR_USABLE);
852   vg_assert(sectors[y].tt_n_inuse >= 0);
853
854   /* Copy into tc. */
855   tce = sectors[y].tc_next;
856   vg_assert(tce >= &sectors[y].tc[0]);
857   vg_assert(tce <= &sectors[y].tc[tc_sector_szQ]);
858
859   tce[0] = entry;
860   dstP = (UChar*)(&tce[1]);
861   srcP = (UChar*)code;
862   for (i = 0; i < code_len; i++)
863      dstP[i] = srcP[i];
864   sectors[y].tc_next += reqdQ;
865   sectors[y].tt_n_inuse++;
866
867   invalidate_icache( dstP, code_len );
868
869   /* more paranoia */
870   tce2 = sectors[y].tc_next;
871   vg_assert(tce2 >= &sectors[y].tc[0]);
872   vg_assert(tce2 <= &sectors[y].tc[tc_sector_szQ]);
873
874   /* Find an empty tt slot, and use it.  There must be such a slot
875      since tt is never allowed to get completely full. */
876   i = HASH_TT(entry);
877   vg_assert(i >= 0 && i < N_TTES_PER_SECTOR);
878   while (True) {
879      if (sectors[y].tt[i].status == Empty
880          || sectors[y].tt[i].status == Deleted)
881         break;
882      i++;
883      if (i >= N_TTES_PER_SECTOR)
884         i = 0;
885   }
886
887   sectors[y].tt[i].status = InUse;
888   sectors[y].tt[i].tce    = tce;
889   sectors[y].tt[i].count  = 0;
890   sectors[y].tt[i].weight = 1;
891   sectors[y].tt[i].vge    = *vge;
892   sectors[y].tt[i].entry  = entry;
893
894   /* Update the fast-cache. */
895   setFastCacheEntry( entry, tce, &sectors[y].tt[i].count );
896
897   /* Note the eclass numbers for this translation. */
898   upd_eclasses_after_add( &sectors[y], i );
899}
900
901
902/* Search for the translation of the given guest address.  If
903   requested, a successful search can also cause the fast-caches to be
904   updated.
905*/
906Bool VG_(search_transtab) ( /*OUT*/AddrH* result,
907                            Addr64        guest_addr,
908                            Bool          upd_cache )
909{
910   Int i, j, k, kstart, sno;
911
912   vg_assert(init_done);
913   /* Find the initial probe point just once.  It will be the same in
914      all sectors and avoids multiple expensive % operations. */
915   n_full_lookups++;
916   k      = -1;
917   kstart = HASH_TT(guest_addr);
918   vg_assert(kstart >= 0 && kstart < N_TTES_PER_SECTOR);
919
920   /* Search in all the sectors.  Although the order should not matter,
921      it might be most efficient to search in the order youngest to
922      oldest. */
923   sno = youngest_sector;
924   for (i = 0; i < N_SECTORS; i++) {
925
926      if (sectors[sno].tc == NULL)
927         goto notfound; /* sector not in use. */
928
929      k = kstart;
930      for (j = 0; j < N_TTES_PER_SECTOR; j++) {
931         n_lookup_probes++;
932         if (sectors[sno].tt[k].status == InUse
933             && sectors[sno].tt[k].entry == guest_addr) {
934            /* found it */
935            if (upd_cache)
936               setFastCacheEntry(
937                  guest_addr, sectors[sno].tt[k].tce,
938                              &sectors[sno].tt[k].count );
939            if (result)
940               *result = sizeof(Addr64) + (AddrH)sectors[sno].tt[k].tce;
941            return True;
942         }
943         if (sectors[sno].tt[k].status == Empty)
944            break; /* not found in this sector */
945         k++;
946         if (k == N_TTES_PER_SECTOR)
947            k = 0;
948      }
949
950      /* If we fall off the end, all entries are InUse and not
951         matching, or Deleted.  In any case we did not find it in this
952         sector. */
953
954     notfound:
955      /* move to the next oldest sector */
956      sno = sno==0 ? (N_SECTORS-1) : (sno-1);
957   }
958
959   /* Not found in any sector. */
960   return False;
961}
962
963
964/*-------------------------------------------------------------*/
965/*--- Delete translations.                                  ---*/
966/*-------------------------------------------------------------*/
967
968/* forward */
969static void unredir_discard_translations( Addr64, ULong );
970
971/* Stuff for deleting translations which intersect with a given
972   address range.  Unfortunately, to make this run at a reasonable
973   speed, it is complex. */
974
975static inline
976Bool overlap1 ( Addr64 s1, ULong r1, Addr64 s2, ULong r2 )
977{
978   Addr64 e1 = s1 + r1 - 1ULL;
979   Addr64 e2 = s2 + r2 - 1ULL;
980   if (e1 < s2 || e2 < s1)
981      return False;
982   return True;
983}
984
985static inline
986Bool overlaps ( Addr64 start, ULong range, VexGuestExtents* vge )
987{
988   if (overlap1(start, range, vge->base[0], (UInt)vge->len[0]))
989      return True;
990   if (vge->n_used < 2)
991      return False;
992   if (overlap1(start, range, vge->base[1], (UInt)vge->len[1]))
993      return True;
994   if (vge->n_used < 3)
995      return False;
996   if (overlap1(start, range, vge->base[2], (UInt)vge->len[2]))
997      return True;
998   return False;
999}
1000
1001
1002/* Delete a tt entry, and update all the eclass data accordingly. */
1003
1004static void delete_tte ( /*MOD*/Sector* sec, Int tteno )
1005{
1006   Int      i, ec_num, ec_idx;
1007   TTEntry* tte;
1008
1009   vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
1010   tte = &sec->tt[tteno];
1011   vg_assert(tte->status == InUse);
1012   vg_assert(tte->n_tte2ec >= 1 && tte->n_tte2ec <= 3);
1013
1014   /* Deal with the ec-to-tte links first. */
1015   for (i = 0; i < tte->n_tte2ec; i++) {
1016      ec_num = (Int)tte->tte2ec_ec[i];
1017      ec_idx = tte->tte2ec_ix[i];
1018      vg_assert(ec_num >= 0 && ec_num < ECLASS_N);
1019      vg_assert(ec_idx >= 0);
1020      vg_assert(ec_idx < sec->ec2tte_used[ec_num]);
1021      /* Assert that the two links point at each other. */
1022      vg_assert(sec->ec2tte[ec_num][ec_idx] == (UShort)tteno);
1023      /* "delete" the pointer back to here. */
1024      sec->ec2tte[ec_num][ec_idx] = EC2TTE_DELETED;
1025   }
1026
1027   /* Now fix up this TTEntry. */
1028   tte->status   = Deleted;
1029   tte->n_tte2ec = 0;
1030
1031   /* Stats .. */
1032   sec->tt_n_inuse--;
1033   n_disc_count++;
1034   n_disc_osize += vge_osize(&tte->vge);
1035
1036   /* Tell the tool too. */
1037   if (VG_(needs).basic_block_discards) {
1038      VG_TDICT_CALL( tool_discard_basic_block_info,
1039                     tte->entry,
1040                     tte->vge );
1041   }
1042}
1043
1044
1045/* Delete translations from sec which intersect specified range, but
1046   only consider translations in the specified eclass. */
1047
1048static
1049Bool delete_translations_in_sector_eclass ( /*MOD*/Sector* sec,
1050                                            Addr64 guest_start, ULong range,
1051                                            Int ec )
1052{
1053   Int      i;
1054   UShort   tteno;
1055   Bool     anyDeld = False;
1056   TTEntry* tte;
1057
1058   vg_assert(ec >= 0 && ec < ECLASS_N);
1059
1060   for (i = 0; i < sec->ec2tte_used[ec]; i++) {
1061
1062      tteno = sec->ec2tte[ec][i];
1063      if (tteno == EC2TTE_DELETED) {
1064         /* already deleted */
1065         continue;
1066      }
1067
1068      vg_assert(tteno < N_TTES_PER_SECTOR);
1069
1070      tte = &sec->tt[tteno];
1071      vg_assert(tte->status == InUse);
1072
1073      if (overlaps( guest_start, range, &tte->vge )) {
1074         anyDeld = True;
1075         delete_tte( sec, (Int)tteno );
1076      }
1077
1078   }
1079
1080   return anyDeld;
1081}
1082
1083
1084/* Delete translations from sec which intersect specified range, the
1085   slow way, by inspecting all translations in sec. */
1086
1087static
1088Bool delete_translations_in_sector ( /*MOD*/Sector* sec,
1089                                     Addr64 guest_start, ULong range )
1090{
1091   Int  i;
1092   Bool anyDeld = False;
1093
1094   for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1095      if (sec->tt[i].status == InUse
1096          && overlaps( guest_start, range, &sec->tt[i].vge )) {
1097         anyDeld = True;
1098         delete_tte( sec, i );
1099      }
1100   }
1101
1102   return anyDeld;
1103}
1104
1105
1106void VG_(discard_translations) ( Addr64 guest_start, ULong range,
1107                                 HChar* who )
1108{
1109   Sector* sec;
1110   Int     sno, ec;
1111   Bool    anyDeleted = False;
1112
1113   vg_assert(init_done);
1114
1115   VG_(debugLog)(2, "transtab",
1116                    "discard_translations(0x%llx, %lld) req by %s\n",
1117                    guest_start, range, who );
1118
1119   /* Pre-deletion sanity check */
1120   if (VG_(clo_sanity_level >= 4)) {
1121      Bool sane = sanity_check_all_sectors();
1122      vg_assert(sane);
1123   }
1124
1125   if (range == 0)
1126      return;
1127
1128   /* There are two different ways to do this.
1129
1130      If the range fits within a single address-range equivalence
1131      class, as will be the case for a cache line sized invalidation,
1132      then we only have to inspect the set of translations listed in
1133      that equivalence class, and also in the "sin-bin" equivalence
1134      class ECLASS_MISC.
1135
1136      Otherwise, the invalidation is of a larger range and probably
1137      results from munmap.  In this case it's (probably!) faster just
1138      to inspect all translations, dump those we don't want, and
1139      regenerate the equivalence class information (since modifying it
1140      in-situ is even more expensive).
1141   */
1142
1143   /* First off, figure out if the range falls within a single class,
1144      and if so which one. */
1145
1146   ec = ECLASS_MISC;
1147   if (range < (1ULL << ECLASS_SHIFT))
1148      ec = range_to_eclass( guest_start, (UInt)range );
1149
1150   /* if ec is ECLASS_MISC then we aren't looking at just a single
1151      class, so use the slow scheme.  Else use the fast scheme,
1152      examining 'ec' and ECLASS_MISC. */
1153
1154   if (ec != ECLASS_MISC) {
1155
1156      VG_(debugLog)(2, "transtab",
1157                       "                    FAST, ec = %d\n", ec);
1158
1159      /* Fast scheme */
1160      vg_assert(ec >= 0 && ec < ECLASS_MISC);
1161
1162      for (sno = 0; sno < N_SECTORS; sno++) {
1163         sec = &sectors[sno];
1164         if (sec->tc == NULL)
1165            continue;
1166         anyDeleted |= delete_translations_in_sector_eclass(
1167                         sec, guest_start, range, ec );
1168         anyDeleted |= delete_translations_in_sector_eclass(
1169                         sec, guest_start, range, ECLASS_MISC );
1170      }
1171
1172   } else {
1173
1174      /* slow scheme */
1175
1176      VG_(debugLog)(2, "transtab",
1177                       "                    SLOW, ec = %d\n", ec);
1178
1179      for (sno = 0; sno < N_SECTORS; sno++) {
1180         sec = &sectors[sno];
1181         if (sec->tc == NULL)
1182            continue;
1183         anyDeleted |= delete_translations_in_sector(
1184                         sec, guest_start, range );
1185      }
1186
1187   }
1188
1189   if (anyDeleted)
1190      invalidateFastCache();
1191
1192   /* don't forget the no-redir cache */
1193   unredir_discard_translations( guest_start, range );
1194
1195   /* Post-deletion sanity check */
1196   if (VG_(clo_sanity_level >= 4)) {
1197      Int      i;
1198      TTEntry* tte;
1199      Bool     sane = sanity_check_all_sectors();
1200      vg_assert(sane);
1201      /* But now, also check the requested address range isn't
1202         present anywhere. */
1203      for (sno = 0; sno < N_SECTORS; sno++) {
1204         sec = &sectors[sno];
1205         if (sec->tc == NULL)
1206            continue;
1207         for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1208            tte = &sec->tt[i];
1209            if (tte->status != InUse)
1210               continue;
1211            vg_assert(!overlaps( guest_start, range, &tte->vge ));
1212         }
1213      }
1214   }
1215}
1216
1217
1218/*------------------------------------------------------------*/
1219/*--- AUXILIARY: the unredirected TT/TC                    ---*/
1220/*------------------------------------------------------------*/
1221
1222/* A very simple translation cache which holds a small number of
1223   unredirected translations.  This is completely independent of the
1224   main tt/tc structures.  When unredir_tc or unredir_tt becomes full,
1225   both structures are simply dumped and we start over.
1226
1227   Since these translations are unredirected, the search key is (by
1228   definition) the first address entry in the .vge field. */
1229
1230/* Sized to hold 500 translations of average size 1000 bytes. */
1231
1232#define UNREDIR_SZB   1000
1233
1234#define N_UNREDIR_TT  500
1235#define N_UNREDIR_TCQ (N_UNREDIR_TT * UNREDIR_SZB / sizeof(ULong))
1236
1237typedef
1238   struct {
1239      VexGuestExtents vge;
1240      Addr            hcode;
1241      Bool            inUse;
1242   }
1243   UTCEntry;
1244
1245/* We just allocate forwards in _tc, never deleting. */
1246static ULong    *unredir_tc;
1247static Int      unredir_tc_used = N_UNREDIR_TCQ;
1248
1249/* Slots in _tt can come into use and out again (.inUse).
1250   Nevertheless _tt_highwater is maintained so that invalidations
1251   don't have to scan all the slots when only a few are in use.
1252   _tt_highwater holds the index of the highest ever allocated
1253   slot. */
1254static UTCEntry unredir_tt[N_UNREDIR_TT];
1255static Int      unredir_tt_highwater;
1256
1257
1258static void init_unredir_tt_tc ( void )
1259{
1260   Int i;
1261   if (unredir_tc == NULL) {
1262      SysRes sres = VG_(am_mmap_anon_float_valgrind)( N_UNREDIR_TT * UNREDIR_SZB );
1263      if (sres.isError) {
1264         VG_(out_of_memory_NORETURN)("init_unredir_tt_tc", N_UNREDIR_TT * UNREDIR_SZB);
1265         /*NOTREACHED*/
1266      }
1267      unredir_tc = (ULong *)sres.res;
1268   }
1269   unredir_tc_used = 0;
1270   for (i = 0; i < N_UNREDIR_TT; i++)
1271      unredir_tt[i].inUse = False;
1272   unredir_tt_highwater = -1;
1273}
1274
1275/* Do a sanity check; return False on failure. */
1276static Bool sanity_check_redir_tt_tc ( void )
1277{
1278   Int i;
1279   if (unredir_tt_highwater < -1) return False;
1280   if (unredir_tt_highwater >= N_UNREDIR_TT) return False;
1281
1282   for (i = unredir_tt_highwater+1; i < N_UNREDIR_TT; i++)
1283      if (unredir_tt[i].inUse)
1284         return False;
1285
1286   if (unredir_tc_used < 0) return False;
1287   if (unredir_tc_used > N_UNREDIR_TCQ) return False;
1288
1289   return True;
1290}
1291
1292
1293/* Add an UNREDIRECTED translation of vge to TT/TC.  The translation
1294   is temporarily in code[0 .. code_len-1].
1295*/
1296void VG_(add_to_unredir_transtab)( VexGuestExtents* vge,
1297                                   Addr64           entry,
1298                                   AddrH            code,
1299                                   UInt             code_len,
1300                                   Bool             is_self_checking )
1301{
1302   Int   i, j, code_szQ;
1303   HChar *srcP, *dstP;
1304
1305   vg_assert(sanity_check_redir_tt_tc());
1306
1307   /* This is the whole point: it's not redirected! */
1308   vg_assert(entry == vge->base[0]);
1309
1310   /* How many unredir_tt slots are needed */
1311   code_szQ = (code_len + 7) / 8;
1312
1313   /* Look for an empty unredir_tc slot */
1314   for (i = 0; i < N_UNREDIR_TT; i++)
1315      if (!unredir_tt[i].inUse)
1316         break;
1317
1318   if (i >= N_UNREDIR_TT || code_szQ > (N_UNREDIR_TCQ - unredir_tc_used)) {
1319      /* It's full; dump everything we currently have */
1320      init_unredir_tt_tc();
1321      i = 0;
1322   }
1323
1324   vg_assert(unredir_tc_used >= 0);
1325   vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
1326   vg_assert(code_szQ > 0);
1327   vg_assert(code_szQ + unredir_tc_used <= N_UNREDIR_TCQ);
1328   vg_assert(i >= 0 && i < N_UNREDIR_TT);
1329   vg_assert(unredir_tt[i].inUse == False);
1330
1331   if (i > unredir_tt_highwater)
1332      unredir_tt_highwater = i;
1333
1334   dstP = (HChar*)&unredir_tc[unredir_tc_used];
1335   srcP = (HChar*)code;
1336   for (j = 0; j < code_len; j++)
1337      dstP[j] = srcP[j];
1338
1339   invalidate_icache( dstP, code_len );
1340
1341   unredir_tt[i].inUse = True;
1342   unredir_tt[i].vge   = *vge;
1343   unredir_tt[i].hcode = (Addr)dstP;
1344
1345   unredir_tc_used += code_szQ;
1346   vg_assert(unredir_tc_used >= 0);
1347   vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
1348
1349   vg_assert(&dstP[code_len] <= (HChar*)&unredir_tc[unredir_tc_used]);
1350}
1351
1352Bool VG_(search_unredir_transtab) ( /*OUT*/AddrH* result,
1353                                    Addr64        guest_addr )
1354{
1355   Int i;
1356   for (i = 0; i < N_UNREDIR_TT; i++) {
1357      if (!unredir_tt[i].inUse)
1358         continue;
1359      if (unredir_tt[i].vge.base[0] == guest_addr) {
1360         *result = (AddrH)unredir_tt[i].hcode;
1361         return True;
1362      }
1363   }
1364   return False;
1365}
1366
1367static void unredir_discard_translations( Addr64 guest_start, ULong range )
1368{
1369   Int i;
1370
1371   vg_assert(sanity_check_redir_tt_tc());
1372
1373   for (i = 0; i <= unredir_tt_highwater; i++) {
1374      if (unredir_tt[i].inUse
1375          && overlaps( guest_start, range, &unredir_tt[i].vge))
1376         unredir_tt[i].inUse = False;
1377   }
1378}
1379
1380
1381/*------------------------------------------------------------*/
1382/*--- Initialisation.                                      ---*/
1383/*------------------------------------------------------------*/
1384
1385void VG_(init_tt_tc) ( void )
1386{
1387   Int i, j, avg_codeszQ;
1388
1389   vg_assert(!init_done);
1390   init_done = True;
1391
1392   /* Otherwise lots of things go wrong... */
1393   vg_assert(sizeof(ULong) == 8);
1394   vg_assert(sizeof(Addr64) == 8);
1395
1396   if (VG_(clo_verbosity) > 2)
1397      VG_(message)(Vg_DebugMsg,
1398                   "TT/TC: VG_(init_tt_tc) "
1399                   "(startup of code management)");
1400
1401   /* Figure out how big each tc area should be.  */
1402   avg_codeszQ   = (VG_(details).avg_translation_sizeB + 7) / 8;
1403   tc_sector_szQ = N_TTES_PER_SECTOR_USABLE * (1 + avg_codeszQ);
1404
1405   /* Ensure the calculated value is not way crazy. */
1406   vg_assert(tc_sector_szQ >= 2 * N_TTES_PER_SECTOR_USABLE);
1407   vg_assert(tc_sector_szQ <= 80 * N_TTES_PER_SECTOR_USABLE);
1408
1409   /* Initialise the sectors */
1410   youngest_sector = 0;
1411   for (i = 0; i < N_SECTORS; i++) {
1412      sectors[i].tc = NULL;
1413      sectors[i].tt = NULL;
1414      sectors[i].tc_next = NULL;
1415      sectors[i].tt_n_inuse = 0;
1416      for (j = 0; j < ECLASS_N; j++) {
1417         sectors[i].ec2tte_size[j] = 0;
1418         sectors[i].ec2tte_used[j] = 0;
1419         sectors[i].ec2tte[j] = NULL;
1420      }
1421   }
1422
1423   /* and the fast caches. */
1424   invalidateFastCache();
1425
1426   /* and the unredir tt/tc */
1427   init_unredir_tt_tc();
1428
1429   if (VG_(clo_verbosity) > 2) {
1430      VG_(message)(Vg_DebugMsg,
1431         "TT/TC: cache: %d sectors of %d bytes each = %d total",
1432          N_SECTORS, 8 * tc_sector_szQ,
1433          N_SECTORS * 8 * tc_sector_szQ );
1434      VG_(message)(Vg_DebugMsg,
1435         "TT/TC: table: %d total entries, max occupancy %d (%d%%)",
1436         N_SECTORS * N_TTES_PER_SECTOR,
1437         N_SECTORS * N_TTES_PER_SECTOR_USABLE,
1438         SECTOR_TT_LIMIT_PERCENT );
1439   }
1440
1441   VG_(debugLog)(2, "transtab",
1442      "cache: %d sectors of %d bytes each = %d total\n",
1443       N_SECTORS, 8 * tc_sector_szQ,
1444       N_SECTORS * 8 * tc_sector_szQ );
1445   VG_(debugLog)(2, "transtab",
1446      "table: %d total entries, max occupancy %d (%d%%)\n",
1447      N_SECTORS * N_TTES_PER_SECTOR,
1448      N_SECTORS * N_TTES_PER_SECTOR_USABLE,
1449      SECTOR_TT_LIMIT_PERCENT );
1450}
1451
1452
1453/*------------------------------------------------------------*/
1454/*--- Printing out statistics.                             ---*/
1455/*------------------------------------------------------------*/
1456
1457static ULong safe_idiv( ULong a, ULong b )
1458{
1459   return (b == 0 ? 0 : a / b);
1460}
1461
1462UInt VG_(get_bbs_translated) ( void )
1463{
1464   return n_in_count;
1465}
1466
1467void VG_(print_tt_tc_stats) ( void )
1468{
1469   VG_(message)(Vg_DebugMsg,
1470      "    tt/tc: %,llu tt lookups requiring %,llu probes",
1471      n_full_lookups, n_lookup_probes );
1472   VG_(message)(Vg_DebugMsg,
1473      "    tt/tc: %,llu fast-cache updates, %,llu flushes",
1474      n_fast_updates, n_fast_flushes );
1475
1476   VG_(message)(Vg_DebugMsg,
1477                " transtab: new        %,lld "
1478                "(%,llu -> %,llu; ratio %,llu:10) [%,llu scs]",
1479                n_in_count, n_in_osize, n_in_tsize,
1480                safe_idiv(10*n_in_tsize, n_in_osize),
1481                n_in_sc_count);
1482   VG_(message)(Vg_DebugMsg,
1483                " transtab: dumped     %,llu (%,llu -> ?" "?)",
1484                n_dump_count, n_dump_osize );
1485   VG_(message)(Vg_DebugMsg,
1486                " transtab: discarded  %,llu (%,llu -> ?" "?)",
1487                n_disc_count, n_disc_osize );
1488
1489   if (0) {
1490      Int i;
1491      VG_(printf)("\n");
1492      for (i = 0; i < ECLASS_N; i++) {
1493         VG_(printf)(" %4d", sectors[0].ec2tte_used[i]);
1494         if (i % 16 == 15)
1495            VG_(printf)("\n");
1496      }
1497      VG_(printf)("\n\n");
1498   }
1499}
1500
1501/*------------------------------------------------------------*/
1502/*--- Printing out of profiling results.                   ---*/
1503/*------------------------------------------------------------*/
1504
1505static ULong score ( TTEntry* tte )
1506{
1507   return ((ULong)tte->weight) * ((ULong)tte->count);
1508}
1509
1510ULong VG_(get_BB_profile) ( BBProfEntry tops[], UInt n_tops )
1511{
1512   Int   sno, i, r, s;
1513   ULong score_total;
1514
1515   /* First, compute the total weighted count, and find the top N
1516      ttes.  tops contains pointers to the most-used n_tops blocks, in
1517      descending order (viz, tops[0] is the highest scorer). */
1518   for (i = 0; i < n_tops; i++) {
1519      tops[i].addr  = 0;
1520      tops[i].score = 0;
1521   }
1522
1523   score_total = 0;
1524
1525   for (sno = 0; sno < N_SECTORS; sno++) {
1526      if (sectors[sno].tc == NULL)
1527         continue;
1528      for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1529         if (sectors[sno].tt[i].status != InUse)
1530            continue;
1531         score_total += score(&sectors[sno].tt[i]);
1532         /* Find the rank for sectors[sno].tt[i]. */
1533         r = n_tops-1;
1534         while (True) {
1535            if (r == -1)
1536               break;
1537             if (tops[r].addr == 0) {
1538               r--;
1539               continue;
1540             }
1541             if ( score(&sectors[sno].tt[i]) > tops[r].score ) {
1542                r--;
1543                continue;
1544             }
1545             break;
1546         }
1547         r++;
1548         vg_assert(r >= 0 && r <= n_tops);
1549         /* This bb should be placed at r, and bbs above it shifted
1550            upwards one slot. */
1551         if (r < n_tops) {
1552            for (s = n_tops-1; s > r; s--)
1553               tops[s] = tops[s-1];
1554            tops[r].addr  = sectors[sno].tt[i].entry;
1555            tops[r].score = score( &sectors[sno].tt[i] );
1556         }
1557      }
1558   }
1559
1560   return score_total;
1561}
1562
1563/*--------------------------------------------------------------------*/
1564/*--- end                                                          ---*/
1565/*--------------------------------------------------------------------*/
1566