m_transtab.c revision e4b0bf07b0ee0a18eacc5aba91686ab5fc1d327b
1
2/*--------------------------------------------------------------------*/
3/*--- Management of the translation table and cache.               ---*/
4/*---                                                 m_transtab.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8   This file is part of Valgrind, a dynamic binary instrumentation
9   framework.
10
11   Copyright (C) 2000-2006 Julian Seward
12      jseward@acm.org
13
14   This program is free software; you can redistribute it and/or
15   modify it under the terms of the GNU General Public License as
16   published by the Free Software Foundation; either version 2 of the
17   License, or (at your option) any later version.
18
19   This program is distributed in the hope that it will be useful, but
20   WITHOUT ANY WARRANTY; without even the implied warranty of
21   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22   General Public License for more details.
23
24   You should have received a copy of the GNU General Public License
25   along with this program; if not, write to the Free Software
26   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27   02111-1307, USA.
28
29   The GNU General Public License is contained in the file COPYING.
30*/
31
32#include "pub_core_basics.h"
33#include "pub_core_debuglog.h"
34#include "pub_core_machine.h"    // For VG(machine_get_VexArchInfo)
35#include "pub_core_libcbase.h"
36#include "pub_core_libcassert.h"
37#include "pub_core_libcprint.h"
38#include "pub_core_options.h"
39#include "pub_core_tooliface.h"  // For VG_(details).avg_translation_sizeB
40#include "pub_core_transtab.h"
41#include "pub_core_aspacemgr.h"
42#include "pub_core_mallocfree.h" // VG_(out_of_memory_NORETURN)
43
44/* #define DEBUG_TRANSTAB */
45
46
47/*-------------------------------------------------------------*/
48/*--- Management of the FIFO-based translation table+cache. ---*/
49/*-------------------------------------------------------------*/
50
51/*------------------ CONSTANTS ------------------*/
52
53/* Number of sectors the TC is divided into.  If you need a larger
54   overall translation cache, increase this value. */
55#define N_SECTORS 8
56
57/* Number of TC entries in each sector.  This needs to be a prime
58   number to work properly, it must be <= 65535 (so that a TT index
59   fits in a UShort, leaving room for 0xFFFF(EC2TTE_DELETED) to denote
60   'deleted') and it is strongly recommended not to change this.
61   65521 is the largest prime <= 65535. */
62#define N_TTES_PER_SECTOR /*30011*/ /*40009*/ 65521
63
64/* Because each sector contains a hash table of TTEntries, we need to
65   specify the maximum allowable loading, after which the sector is
66   deemed full. */
67#define SECTOR_TT_LIMIT_PERCENT 80
68
69/* The sector is deemed full when this many entries are in it. */
70#define N_TTES_PER_SECTOR_USABLE \
71           ((N_TTES_PER_SECTOR * SECTOR_TT_LIMIT_PERCENT) / 100)
72
73/* Equivalence classes for fast address range deletion.  There are 1 +
74   2^ECLASS_WIDTH bins.  The highest one, ECLASS_MISC, describes an
75   address range which does not fall cleanly within any specific bin.
76   Note that ECLASS_SHIFT + ECLASS_WIDTH must be < 32. */
77#define ECLASS_SHIFT 11
78#define ECLASS_WIDTH 8
79#define ECLASS_MISC  (1 << ECLASS_WIDTH)
80#define ECLASS_N     (1 + ECLASS_MISC)
81
82#define EC2TTE_DELETED  0xFFFF /* 16-bit special value */
83
84
85/*------------------ TYPES ------------------*/
86
87/* A translation-cache entry is two parts:
88   - The guest address of the first (entry) bb in the translation,
89     as a 64-bit word.
90   - One or more 64-bit words containing the code.
91   It is supposed to be 64-bit aligned.
92*/
93/*
94typedef
95   struct {
96      Addr64 orig_addr;
97      ULong  code[0];
98   }
99   TCEntry;
100*/
101
102/* A translation-table entry.  This indicates precisely which areas of
103   guest code are included in the translation, and contains all other
104   auxiliary info too.  */
105typedef
106   struct {
107      /* Profiling only: the count and weight (arbitrary meaning) for
108         this translation.  Weight is a property of the translation
109         itself and computed once when the translation is created.
110         Count is an entry count for the translation and is
111         incremented by 1 every time the translation is used, if we
112         are profiling. */
113      UInt     count;
114      UShort   weight;
115
116      /* Status of the slot.  Note, we need to be able to do lazy
117         deletion, hence the Deleted state. */
118      enum { InUse, Deleted, Empty } status;
119
120      /* Pointer to the corresponding TCEntry (must be in the same
121         sector!) */
122      ULong* tce;
123
124      /* This is the original guest address that purportedly is the
125         entry point of the translation.  You might think that .entry
126         should be the same as .vge->base[0], and most of the time it
127         is.  However, when doing redirections, that is not the case.
128         .vge must always correctly describe the guest code sections
129         from which this translation was made.  However, .entry may or
130         may not be a lie, depending on whether or not we're doing
131         redirection. */
132      Addr64 entry;
133
134      /* This structure describes precisely what ranges of guest code
135         the translation covers, so we can decide whether or not to
136         delete it when translations of a given address range are
137         invalidated. */
138      VexGuestExtents vge;
139
140      /* Address range summary info: these are pointers back to
141         eclass[] entries in the containing Sector.  Those entries in
142         turn point back here -- the two structures are mutually
143         redundant but both necessary to make fast deletions work.
144         The eclass info is similar to, and derived from, this entry's
145         'vge' field, but it is not the same */
146      UShort n_tte2ec;      // # tte2ec pointers (1 to 3)
147      UShort tte2ec_ec[3];  // for each, the eclass #
148      UInt   tte2ec_ix[3];  // and the index within the eclass.
149      // for i in 0 .. n_tte2ec-1
150      //    sec->ec2tte[ tte2ec_ec[i] ][ tte2ec_ix[i] ]
151      // should be the index
152      // of this TTEntry in the containing Sector's tt array.
153   }
154   TTEntry;
155
156
157/* Finally, a sector itself.  Each sector contains an array of
158   TCEntries, which hold code, and an array of TTEntries, containing
159   all required administrative info.  Profiling is supported using the
160   TTEntry .count and .weight fields, if required.  Each sector is
161   independent in that no cross-sector references are allowed.
162
163   If the sector is not in use, all three pointers are NULL and
164   tt_n_inuse is zero.
165*/
166typedef
167   struct {
168      /* The TCEntry area.  Size of this depends on the average
169         translation size.  We try and size it so it becomes full
170         precisely when this sector's translation table (tt) reaches
171         its load limit (SECTOR_TT_LIMIT_PERCENT). */
172      ULong* tc;
173
174      /* The TTEntry array.  This is a fixed size, always containing
175         exactly N_TTES_PER_SECTOR entries. */
176      TTEntry* tt;
177
178      /* This points to the current allocation point in tc. */
179      ULong* tc_next;
180
181      /* The count of tt entries with state InUse. */
182      Int tt_n_inuse;
183
184      /* Expandable arrays of tt indices for each of the ECLASS_N
185         address range equivalence classes.  These hold indices into
186         the containing sector's tt array, which in turn should point
187         back here. */
188      Int     ec2tte_size[ECLASS_N];
189      Int     ec2tte_used[ECLASS_N];
190      UShort* ec2tte[ECLASS_N];
191   }
192   Sector;
193
194
195/*------------------ DECLS ------------------*/
196
197/* The root data structure is an array of sectors.  The index of the
198   youngest sector is recorded, and new translations are put into that
199   sector.  When it fills up, we move along to the next sector and
200   start to fill that up, wrapping around at the end of the array.
201   That way, once all N_TC_SECTORS have been bought into use for the
202   first time, and are full, we then re-use the oldest sector,
203   endlessly.
204
205   When running, youngest sector should be between >= 0 and <
206   N_TC_SECTORS.  The initial -1 value indicates the TT/TC system is
207   not yet initialised.
208*/
209static Sector sectors[N_SECTORS];
210static Int    youngest_sector = -1;
211
212/* The number of ULongs in each TCEntry area.  This is computed once
213   at startup and does not change. */
214static Int    tc_sector_szQ;
215
216
217/* Fast helper for the TC.  A direct-mapped cache which holds a
218   pointer to a TC entry which may or may not be the correct one, but
219   which we hope usually is.  This array is referred to directly from
220   <arch>/dispatch.S.
221
222   Entries in tt_fast may point to any valid TC entry, regardless of
223   which sector it's in.  Consequently we must be very careful to
224   invalidate this cache when TC entries are changed or disappear.
225
226   A special TCEntry -- bogus_tc_entry -- must be pointed at to cause
227   that cache entry to miss.  This relies on the assumption that no
228   guest code actually has an address of 0x1.
229*/
230/*global*/ ULong* VG_(tt_fast)[VG_TT_FAST_SIZE];
231
232static ULong bogus_tc_entry = (Addr64)1;
233
234
235/* For profiling, we have a parallel array of pointers to .count
236   fields in TT entries.  Again, these pointers must be invalidated
237   when translations disappear.  A NULL pointer suffices to indicate
238   an unused slot.
239
240   tt_fast and tt_fastN change together: if tt_fast[i] points to
241   bogus_tc_entry then the corresponding tt_fastN[i] must be null.  If
242   tt_fast[i] points to some TC entry somewhere, then tt_fastN[i]
243   *must* point to the .count field of the corresponding TT entry.
244
245   tt_fast and tt_fastN are referred to from assembly code
246   (dispatch.S).
247*/
248/*global*/ UInt* VG_(tt_fastN)[VG_TT_FAST_SIZE];
249
250
251/* Make sure we're not used before initialisation. */
252static Bool init_done = False;
253
254
255/*------------------ STATS DECLS ------------------*/
256
257/* Number of fast-cache updates and flushes done. */
258ULong n_fast_flushes = 0;
259ULong n_fast_updates = 0;
260
261/* Number of full lookups done. */
262ULong n_full_lookups = 0;
263ULong n_lookup_probes = 0;
264
265/* Number/osize/tsize of translations entered; also the number of
266   those for which self-checking was requested. */
267ULong n_in_count    = 0;
268ULong n_in_osize    = 0;
269ULong n_in_tsize    = 0;
270ULong n_in_sc_count = 0;
271
272/* Number/osize of translations discarded due to lack of space. */
273ULong n_dump_count = 0;
274ULong n_dump_osize = 0;
275
276/* Number/osize of translations discarded due to requests to do so. */
277ULong n_disc_count = 0;
278ULong n_disc_osize = 0;
279
280
281/*-------------------------------------------------------------*/
282/*--- Address-range equivalence class stuff                 ---*/
283/*-------------------------------------------------------------*/
284
285/* Return equivalence class number for a range. */
286
287static Int range_to_eclass ( Addr64 start, UInt len )
288{
289   UInt mask   = (1 << ECLASS_WIDTH) - 1;
290   UInt lo     = (UInt)start;
291   UInt hi     = lo + len - 1;
292   UInt loBits = (lo >> ECLASS_SHIFT) & mask;
293   UInt hiBits = (hi >> ECLASS_SHIFT) & mask;
294   if (loBits == hiBits) {
295      vg_assert(loBits < ECLASS_N-1);
296      return loBits;
297   } else {
298      return ECLASS_MISC;
299   }
300}
301
302
303/* Calculates the equivalence class numbers for any VexGuestExtent.
304   These are written in *eclasses, which must be big enough to hold 3
305   Ints.  The number written, between 1 and 3, is returned.  The
306   eclasses are presented in order, and any duplicates are removed.
307*/
308
309static
310Int vexGuestExtents_to_eclasses ( /*OUT*/Int* eclasses,
311                                  VexGuestExtents* vge )
312{
313#  define SWAP(_lv1,_lv2) \
314      do { Int t = _lv1; _lv1 = _lv2; _lv2 = t; } while (0)
315
316   Int i, j, n_ec, r;
317
318   vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
319
320   n_ec = 0;
321   for (i = 0; i < vge->n_used; i++) {
322      r = range_to_eclass( vge->base[i], (UInt)vge->len[i] );
323      if (r == ECLASS_MISC)
324         goto bad;
325      /* only add if we haven't already seen it */
326      for (j = 0; j < n_ec; j++)
327         if (eclasses[j] == r)
328            break;
329      if (j == n_ec)
330         eclasses[n_ec++] = r;
331   }
332
333   if (n_ec == 1)
334      return 1;
335
336   if (n_ec == 2) {
337      /* sort */
338      if (eclasses[0] > eclasses[1])
339         SWAP(eclasses[0], eclasses[1]);
340      return 2;
341   }
342
343   if (n_ec == 3) {
344      /* sort */
345      if (eclasses[0] > eclasses[2])
346         SWAP(eclasses[0], eclasses[2]);
347      if (eclasses[0] > eclasses[1])
348         SWAP(eclasses[0], eclasses[1]);
349      if (eclasses[1] > eclasses[2])
350         SWAP(eclasses[1], eclasses[2]);
351      return 3;
352   }
353
354   /* NOTREACHED */
355   vg_assert(0);
356
357  bad:
358   eclasses[0] = ECLASS_MISC;
359   return 1;
360
361#  undef SWAP
362}
363
364
365/* Add tteno to the set of entries listed for equivalence class ec in
366   this sector.  Returns used location in eclass array. */
367
368static
369UInt addEClassNo ( /*MOD*/Sector* sec, Int ec, UShort tteno )
370{
371   Int    old_sz, new_sz, i, r;
372   UShort *old_ar, *new_ar;
373
374   vg_assert(ec >= 0 && ec < ECLASS_N);
375   vg_assert(tteno < N_TTES_PER_SECTOR);
376
377   if (0) VG_(printf)("ec %d  gets %d\n", ec, (Int)tteno);
378
379   if (sec->ec2tte_used[ec] >= sec->ec2tte_size[ec]) {
380
381      vg_assert(sec->ec2tte_used[ec] == sec->ec2tte_size[ec]);
382
383      old_sz = sec->ec2tte_size[ec];
384      old_ar = sec->ec2tte[ec];
385      new_sz = old_sz==0 ? 8 : old_sz<64 ? 2*old_sz : (3*old_sz)/2;
386      new_ar = VG_(arena_malloc)(VG_AR_TTAUX, new_sz * sizeof(UShort));
387      for (i = 0; i < old_sz; i++)
388         new_ar[i] = old_ar[i];
389      if (old_ar)
390         VG_(arena_free)(VG_AR_TTAUX, old_ar);
391      sec->ec2tte_size[ec] = new_sz;
392      sec->ec2tte[ec] = new_ar;
393
394      if (0) VG_(printf)("expand ec %d to %d\n", ec, new_sz);
395   }
396
397   /* Common case */
398   r = sec->ec2tte_used[ec]++;
399   vg_assert(r >= 0 && r < sec->ec2tte_size[ec]);
400   sec->ec2tte[ec][r] = tteno;
401   return (UInt)r;
402}
403
404
405/* 'vge' is being added to 'sec' at TT entry 'tteno'.  Add appropriate
406   eclass entries to 'sec'. */
407
408static
409void upd_eclasses_after_add ( /*MOD*/Sector* sec, Int tteno )
410{
411   Int i, r, eclasses[3];
412   TTEntry* tte;
413   vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
414
415   tte = &sec->tt[tteno];
416   r = vexGuestExtents_to_eclasses( eclasses, &tte->vge );
417
418   vg_assert(r >= 1 && r <= 3);
419   tte->n_tte2ec = r;
420
421   for (i = 0; i < r; i++) {
422      tte->tte2ec_ec[i] = eclasses[i];
423      tte->tte2ec_ix[i] = addEClassNo( sec, eclasses[i], (UShort)tteno );
424   }
425}
426
427
428/* Check the eclass info in 'sec' to ensure it is consistent.  Returns
429   True if OK, False if something's not right.  Expensive. */
430
431static Bool sanity_check_eclasses_in_sector ( Sector* sec )
432{
433#  define BAD(_str) do { whassup = (_str); goto bad; } while (0)
434
435   HChar*   whassup = NULL;
436   Int      i, j, k, n, ec_num, ec_idx;
437   TTEntry* tte;
438   UShort   tteno;
439   ULong*   tce;
440
441   /* Basic checks on this sector */
442   if (sec->tt_n_inuse < 0 || sec->tt_n_inuse > N_TTES_PER_SECTOR_USABLE)
443      BAD("invalid sec->tt_n_inuse");
444   tce = sec->tc_next;
445   if (tce < &sec->tc[0] || tce > &sec->tc[tc_sector_szQ])
446      BAD("sec->tc_next points outside tc");
447
448   /* For each eclass ... */
449   for (i = 0; i < ECLASS_N; i++) {
450      if (sec->ec2tte_size[i] == 0 && sec->ec2tte[i] != NULL)
451         BAD("ec2tte_size/ec2tte mismatch(1)");
452      if (sec->ec2tte_size[i] != 0 && sec->ec2tte[i] == NULL)
453         BAD("ec2tte_size/ec2tte mismatch(2)");
454      if (sec->ec2tte_used[i] < 0
455          || sec->ec2tte_used[i] > sec->ec2tte_size[i])
456         BAD("implausible ec2tte_used");
457      if (sec->ec2tte_used[i] == 0)
458         continue;
459
460      /* For each tt reference in each eclass .. ensure the reference
461         is to a valid tt entry, and that the entry's address ranges
462         really include this eclass. */
463
464      for (j = 0; j < sec->ec2tte_used[i]; j++) {
465         tteno = sec->ec2tte[i][j];
466         if (tteno == EC2TTE_DELETED)
467            continue;
468         if (tteno >= N_TTES_PER_SECTOR)
469            BAD("implausible tteno");
470         tte = &sec->tt[tteno];
471         if (tte->status != InUse)
472            BAD("tteno points to non-inuse tte");
473         if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3)
474            BAD("tte->n_tte2ec out of range");
475         /* Exactly least one of tte->eclasses[0 .. tte->n_eclasses-1]
476            must equal i.  Inspect tte's eclass info. */
477         n = 0;
478         for (k = 0; k < tte->n_tte2ec; k++) {
479            if (k < tte->n_tte2ec-1
480                && tte->tte2ec_ec[k] >= tte->tte2ec_ec[k+1])
481               BAD("tte->tte2ec_ec[..] out of order");
482            ec_num = tte->tte2ec_ec[k];
483            if (ec_num < 0 || ec_num >= ECLASS_N)
484               BAD("tte->tte2ec_ec[..] out of range");
485            if (ec_num != i)
486               continue;
487            ec_idx = tte->tte2ec_ix[k];
488            if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[i])
489               BAD("tte->tte2ec_ix[..] out of range");
490            if (ec_idx == j)
491               n++;
492         }
493         if (n != 1)
494            BAD("tteno does not point back at eclass");
495      }
496   }
497
498   /* That establishes that for each forward pointer from TTEntrys
499      there is a corresponding backward pointer from the eclass[]
500      arrays.  However, it doesn't rule out the possibility of other,
501      bogus pointers in the eclass[] arrays.  So do those similarly:
502      scan through them and check the TTEntryies they point at point
503      back. */
504
505   for (i = 0; i < N_TTES_PER_SECTOR_USABLE; i++) {
506
507      tte = &sec->tt[i];
508      if (tte->status == Empty || tte->status == Deleted) {
509         if (tte->n_tte2ec != 0)
510            BAD("tte->n_eclasses nonzero for unused tte");
511         continue;
512      }
513
514      vg_assert(tte->status == InUse);
515
516      if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3)
517         BAD("tte->n_eclasses out of range(2)");
518
519      for (j = 0; j < tte->n_tte2ec; j++) {
520         ec_num = tte->tte2ec_ec[j];
521         if (ec_num < 0 || ec_num >= ECLASS_N)
522            BAD("tte->eclass[..] out of range");
523         ec_idx = tte->tte2ec_ix[j];
524         if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[ec_num])
525            BAD("tte->ec_idx[..] out of range(2)");
526         if (sec->ec2tte[ec_num][ec_idx] != i)
527            BAD("ec2tte does not point back to tte");
528      }
529   }
530
531   return True;
532
533  bad:
534   if (whassup)
535      VG_(debugLog)(0, "transtab", "eclass sanity fail: %s\n", whassup);
536
537#  if 0
538   VG_(printf)("eclass = %d\n", i);
539   VG_(printf)("tteno = %d\n", (Int)tteno);
540   switch (tte->status) {
541      case InUse:   VG_(printf)("InUse\n"); break;
542      case Deleted: VG_(printf)("Deleted\n"); break;
543      case Empty:   VG_(printf)("Empty\n"); break;
544   }
545   if (tte->status != Empty) {
546      for (k = 0; k < tte->vge.n_used; k++)
547         VG_(printf)("0x%llx %d\n", tte->vge.base[k],
548                                    (Int)tte->vge.len[k]);
549   }
550#  endif
551
552   return False;
553
554#  undef BAD
555}
556
557
558/* Sanity check absolutely everything.  True == check passed. */
559
560/* forward */
561static Bool sanity_check_redir_tt_tc ( void );
562
563static Bool sanity_check_all_sectors ( void )
564{
565   Int     sno;
566   Bool    sane;
567   Sector* sec;
568   for (sno = 0; sno < N_SECTORS; sno++) {
569      sec = &sectors[sno];
570      if (sec->tc == NULL)
571         continue;
572      sane = sanity_check_eclasses_in_sector( sec );
573      if (!sane)
574         return False;
575   }
576   if (!sanity_check_redir_tt_tc() )
577      return False;
578   return True;
579}
580
581
582/*-------------------------------------------------------------*/
583/*--- Add/find translations                                 ---*/
584/*-------------------------------------------------------------*/
585
586static UInt vge_osize ( VexGuestExtents* vge )
587{
588   UInt i, n = 0;
589   for (i = 0; i < vge->n_used; i++)
590      n += (UInt)vge->len[i];
591   return n;
592}
593
594static Bool isValidSector ( Int sector )
595{
596   if (sector < 0 || sector >= N_SECTORS)
597      return False;
598   return True;
599}
600
601static inline UInt HASH_TT ( Addr64 key )
602{
603   UInt kHi = (UInt)(key >> 32);
604   UInt kLo = (UInt)key;
605   UInt k32 = kHi ^ kLo;
606   UInt ror = 7;
607   if (ror > 0)
608      k32 = (k32 >> ror) | (k32 << (32-ror));
609   return k32 % N_TTES_PER_SECTOR;
610}
611
612static void setFastCacheEntry ( Addr64 key, ULong* tce, UInt* count )
613{
614   UInt cno = (UInt)VG_TT_FAST_HASH(key);
615   VG_(tt_fast)[cno]  = tce;
616   VG_(tt_fastN)[cno] = count;
617   n_fast_updates++;
618}
619
620static void invalidateFastCache ( void )
621{
622   UInt j;
623   /* This loop is popular enough to make it worth unrolling a
624      bit, at least on ppc32. */
625   vg_assert(VG_TT_FAST_SIZE > 0 && (VG_TT_FAST_SIZE % 4) == 0);
626   for (j = 0; j < VG_TT_FAST_SIZE; j += 4) {
627      VG_(tt_fast)[j+0]  = &bogus_tc_entry;
628      VG_(tt_fast)[j+1]  = &bogus_tc_entry;
629      VG_(tt_fast)[j+2]  = &bogus_tc_entry;
630      VG_(tt_fast)[j+3]  = &bogus_tc_entry;
631      VG_(tt_fastN)[j+0] = NULL;
632      VG_(tt_fastN)[j+1] = NULL;
633      VG_(tt_fastN)[j+2] = NULL;
634      VG_(tt_fastN)[j+3] = NULL;
635   }
636   vg_assert(j == VG_TT_FAST_SIZE);
637   n_fast_flushes++;
638}
639
640static void initialiseSector ( Int sno )
641{
642   Int    i;
643   SysRes sres;
644   Sector* sec;
645   vg_assert(isValidSector(sno));
646
647   sec = &sectors[sno];
648
649   if (sec->tc == NULL) {
650
651      /* Sector has never been used before.  Need to allocate tt and
652         tc. */
653      vg_assert(sec->tt == NULL);
654      vg_assert(sec->tc_next == NULL);
655      vg_assert(sec->tt_n_inuse == 0);
656      for (i = 0; i < ECLASS_N; i++) {
657         vg_assert(sec->ec2tte_size[i] == 0);
658         vg_assert(sec->ec2tte_used[i] == 0);
659         vg_assert(sec->ec2tte[i] == NULL);
660      }
661
662      VG_(debugLog)(1,"transtab", "allocate sector %d\n", sno);
663
664      sres = VG_(am_mmap_anon_float_valgrind)( 8 * tc_sector_szQ );
665      if (sres.isError) {
666         VG_(out_of_memory_NORETURN)("initialiseSector(TC)",
667                                     8 * tc_sector_szQ );
668	 /*NOTREACHED*/
669      }
670      sec->tc = (ULong*)sres.val;
671
672      sres = VG_(am_mmap_anon_float_valgrind)
673                ( N_TTES_PER_SECTOR * sizeof(TTEntry) );
674      if (sres.isError) {
675         VG_(out_of_memory_NORETURN)("initialiseSector(TT)",
676                                     N_TTES_PER_SECTOR * sizeof(TTEntry) );
677	 /*NOTREACHED*/
678      }
679      sec->tt = (TTEntry*)sres.val;
680
681      for (i = 0; i < N_TTES_PER_SECTOR; i++) {
682         sec->tt[i].status   = Empty;
683         sec->tt[i].n_tte2ec = 0;
684      }
685
686      if (VG_(clo_verbosity) > 2)
687         VG_(message)(Vg_DebugMsg, "TT/TC: initialise sector %d", sno);
688
689   } else {
690
691      /* Sector has been used before.  Dump the old contents. */
692      VG_(debugLog)(1,"transtab", "recycle sector %d\n", sno);
693      vg_assert(sec->tt != NULL);
694      vg_assert(sec->tc_next != NULL);
695      n_dump_count += sec->tt_n_inuse;
696
697      /* Visit each just-about-to-be-abandoned translation. */
698      for (i = 0; i < N_TTES_PER_SECTOR; i++) {
699         if (sec->tt[i].status == InUse) {
700            vg_assert(sec->tt[i].n_tte2ec >= 1);
701            vg_assert(sec->tt[i].n_tte2ec <= 3);
702            n_dump_osize += vge_osize(&sec->tt[i].vge);
703            /* Tell the tool too. */
704            if (VG_(needs).basic_block_discards) {
705               VG_TDICT_CALL( tool_discard_basic_block_info,
706                              sec->tt[i].entry,
707                              sec->tt[i].vge );
708            }
709         } else {
710            vg_assert(sec->tt[i].n_tte2ec == 0);
711         }
712         sec->tt[i].status   = Empty;
713         sec->tt[i].n_tte2ec = 0;
714      }
715
716      /* Free up the eclass structures. */
717      for (i = 0; i < ECLASS_N; i++) {
718         if (sec->ec2tte_size[i] == 0) {
719            vg_assert(sec->ec2tte_used[i] == 0);
720            vg_assert(sec->ec2tte[i] == NULL);
721         } else {
722            vg_assert(sec->ec2tte[i] != NULL);
723            VG_(arena_free)(VG_AR_TTAUX, sec->ec2tte[i]);
724            sec->ec2tte[i] = NULL;
725            sec->ec2tte_size[i] = 0;
726            sec->ec2tte_used[i] = 0;
727         }
728      }
729
730      if (VG_(clo_verbosity) > 2)
731         VG_(message)(Vg_DebugMsg, "TT/TC: recycle sector %d", sno);
732   }
733
734   sec->tc_next = sec->tc;
735   sec->tt_n_inuse = 0;
736
737   invalidateFastCache();
738}
739
740static void invalidate_icache ( void *ptr, Int nbytes )
741{
742#  if defined(VGA_ppc32) || defined(VGA_ppc64)
743   Addr startaddr = (Addr) ptr;
744   Addr endaddr   = startaddr + nbytes;
745   Addr cls;
746   Addr addr;
747   VexArchInfo vai;
748
749   VG_(machine_get_VexArchInfo)( NULL, &vai );
750   cls = vai.ppc_cache_line_szB;
751
752   /* Stay sane .. */
753   vg_assert(cls == 32 || cls == 128);
754
755   startaddr &= ~(cls - 1);
756   for (addr = startaddr; addr < endaddr; addr += cls)
757      asm volatile("dcbst 0,%0" : : "r" (addr));
758   asm volatile("sync");
759   for (addr = startaddr; addr < endaddr; addr += cls)
760      asm volatile("icbi 0,%0" : : "r" (addr));
761   asm volatile("sync; isync");
762
763#  elif defined(VGA_x86)
764   /* no need to do anything, hardware provides coherence */
765
766#  elif defined(VGA_amd64)
767   /* no need to do anything, hardware provides coherence */
768
769#  else
770#    error "Unknown ARCH"
771#  endif
772}
773
774
775/* Add a translation of vge to TT/TC.  The translation is temporarily
776   in code[0 .. code_len-1].
777
778   pre: youngest_sector points to a valid (although possibly full)
779   sector.
780*/
781void VG_(add_to_transtab)( VexGuestExtents* vge,
782                           Addr64           entry,
783                           AddrH            code,
784                           UInt             code_len,
785                           Bool             is_self_checking )
786{
787   Int    tcAvailQ, reqdQ, y, i;
788   ULong  *tce, *tce2;
789   UChar* srcP;
790   UChar* dstP;
791
792   vg_assert(init_done);
793   vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
794   vg_assert(code_len > 0 && code_len < 20000);
795
796   if (0)
797      VG_(printf)("add_to_transtab(entry = 0x%llx, len = %d)\n",
798                  entry, code_len);
799
800   n_in_count++;
801   n_in_tsize += code_len;
802   n_in_osize += vge_osize(vge);
803   if (is_self_checking)
804      n_in_sc_count++;
805
806   y = youngest_sector;
807   vg_assert(isValidSector(y));
808
809   if (sectors[y].tc == NULL)
810      initialiseSector(y);
811
812   /* Try putting the translation in this sector. */
813   reqdQ = 1 + ((code_len + 7) >> 3);
814
815   /* Will it fit in tc? */
816   tcAvailQ = ((ULong*)(&sectors[y].tc[tc_sector_szQ]))
817              - ((ULong*)(sectors[y].tc_next));
818   vg_assert(tcAvailQ >= 0);
819   vg_assert(tcAvailQ <= tc_sector_szQ);
820
821   if (tcAvailQ < reqdQ
822       || sectors[y].tt_n_inuse >= N_TTES_PER_SECTOR_USABLE) {
823      /* No.  So move on to the next sector.  Either it's never been
824         used before, in which case it will get its tt/tc allocated
825         now, or it has been used before, in which case it is set to be
826         empty, hence throwing out the oldest sector. */
827      vg_assert(tc_sector_szQ > 0);
828      VG_(debugLog)(1,"transtab",
829                      "declare sector %d full "
830                      "(TT loading %2d%%, TC loading %2d%%)\n",
831                      y,
832                      (100 * sectors[y].tt_n_inuse)
833                         / N_TTES_PER_SECTOR,
834                      (100 * (tc_sector_szQ - tcAvailQ))
835                         / tc_sector_szQ);
836      youngest_sector++;
837      if (youngest_sector >= N_SECTORS)
838         youngest_sector = 0;
839      y = youngest_sector;
840      initialiseSector(y);
841   }
842
843   /* Be sure ... */
844   tcAvailQ = ((ULong*)(&sectors[y].tc[tc_sector_szQ]))
845              - ((ULong*)(sectors[y].tc_next));
846   vg_assert(tcAvailQ >= 0);
847   vg_assert(tcAvailQ <= tc_sector_szQ);
848   vg_assert(tcAvailQ >= reqdQ);
849   vg_assert(sectors[y].tt_n_inuse < N_TTES_PER_SECTOR_USABLE);
850   vg_assert(sectors[y].tt_n_inuse >= 0);
851
852   /* Copy into tc. */
853   tce = sectors[y].tc_next;
854   vg_assert(tce >= &sectors[y].tc[0]);
855   vg_assert(tce <= &sectors[y].tc[tc_sector_szQ]);
856
857   tce[0] = entry;
858   dstP = (UChar*)(&tce[1]);
859   srcP = (UChar*)code;
860   for (i = 0; i < code_len; i++)
861      dstP[i] = srcP[i];
862   sectors[y].tc_next += reqdQ;
863   sectors[y].tt_n_inuse++;
864
865   invalidate_icache( dstP, code_len );
866
867   /* more paranoia */
868   tce2 = sectors[y].tc_next;
869   vg_assert(tce2 >= &sectors[y].tc[0]);
870   vg_assert(tce2 <= &sectors[y].tc[tc_sector_szQ]);
871
872   /* Find an empty tt slot, and use it.  There must be such a slot
873      since tt is never allowed to get completely full. */
874   i = HASH_TT(entry);
875   vg_assert(i >= 0 && i < N_TTES_PER_SECTOR);
876   while (True) {
877      if (sectors[y].tt[i].status == Empty
878          || sectors[y].tt[i].status == Deleted)
879         break;
880      i++;
881      if (i >= N_TTES_PER_SECTOR)
882         i = 0;
883   }
884
885   sectors[y].tt[i].status = InUse;
886   sectors[y].tt[i].tce    = tce;
887   sectors[y].tt[i].count  = 0;
888   sectors[y].tt[i].weight = 1;
889   sectors[y].tt[i].vge    = *vge;
890   sectors[y].tt[i].entry  = entry;
891
892   /* Update the fast-cache. */
893   setFastCacheEntry( entry, tce, &sectors[y].tt[i].count );
894
895   /* Note the eclass numbers for this translation. */
896   upd_eclasses_after_add( &sectors[y], i );
897}
898
899
900/* Search for the translation of the given guest address.  If
901   requested, a successful search can also cause the fast-caches to be
902   updated.
903*/
904Bool VG_(search_transtab) ( /*OUT*/AddrH* result,
905                            Addr64        guest_addr,
906                            Bool          upd_cache )
907{
908   Int i, j, k, kstart, sno;
909
910   vg_assert(init_done);
911   /* Find the initial probe point just once.  It will be the same in
912      all sectors and avoids multiple expensive % operations. */
913   n_full_lookups++;
914   k      = -1;
915   kstart = HASH_TT(guest_addr);
916   vg_assert(kstart >= 0 && kstart < N_TTES_PER_SECTOR);
917
918   /* Search in all the sectors.  Although the order should not matter,
919      it might be most efficient to search in the order youngest to
920      oldest. */
921   sno = youngest_sector;
922   for (i = 0; i < N_SECTORS; i++) {
923
924      if (sectors[sno].tc == NULL)
925         goto notfound; /* sector not in use. */
926
927      k = kstart;
928      for (j = 0; j < N_TTES_PER_SECTOR; j++) {
929         n_lookup_probes++;
930         if (sectors[sno].tt[k].status == InUse
931             && sectors[sno].tt[k].entry == guest_addr) {
932            /* found it */
933            if (upd_cache)
934               setFastCacheEntry(
935                  guest_addr, sectors[sno].tt[k].tce,
936                              &sectors[sno].tt[k].count );
937            if (result)
938               *result = sizeof(Addr64) + (AddrH)sectors[sno].tt[k].tce;
939            return True;
940         }
941         if (sectors[sno].tt[k].status == Empty)
942            break; /* not found in this sector */
943         k++;
944         if (k == N_TTES_PER_SECTOR)
945            k = 0;
946      }
947
948      /* If we fall off the end, all entries are InUse and not
949         matching, or Deleted.  In any case we did not find it in this
950         sector. */
951
952     notfound:
953      /* move to the next oldest sector */
954      sno = sno==0 ? (N_SECTORS-1) : (sno-1);
955   }
956
957   /* Not found in any sector. */
958   return False;
959}
960
961
962/*-------------------------------------------------------------*/
963/*--- Delete translations.                                  ---*/
964/*-------------------------------------------------------------*/
965
966/* forward */
967static void unredir_discard_translations( Addr64, ULong );
968
969/* Stuff for deleting translations which intersect with a given
970   address range.  Unfortunately, to make this run at a reasonable
971   speed, it is complex. */
972
973static inline
974Bool overlap1 ( Addr64 s1, ULong r1, Addr64 s2, ULong r2 )
975{
976   Addr64 e1 = s1 + r1 - 1ULL;
977   Addr64 e2 = s2 + r2 - 1ULL;
978   if (e1 < s2 || e2 < s1)
979      return False;
980   return True;
981}
982
983static inline
984Bool overlaps ( Addr64 start, ULong range, VexGuestExtents* vge )
985{
986   if (overlap1(start, range, vge->base[0], (UInt)vge->len[0]))
987      return True;
988   if (vge->n_used < 2)
989      return False;
990   if (overlap1(start, range, vge->base[1], (UInt)vge->len[1]))
991      return True;
992   if (vge->n_used < 3)
993      return False;
994   if (overlap1(start, range, vge->base[2], (UInt)vge->len[2]))
995      return True;
996   return False;
997}
998
999
1000/* Delete a tt entry, and update all the eclass data accordingly. */
1001
1002static void delete_tte ( /*MOD*/Sector* sec, Int tteno )
1003{
1004   Int      i, ec_num, ec_idx;
1005   TTEntry* tte;
1006
1007   vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
1008   tte = &sec->tt[tteno];
1009   vg_assert(tte->status == InUse);
1010   vg_assert(tte->n_tte2ec >= 1 && tte->n_tte2ec <= 3);
1011
1012   /* Deal with the ec-to-tte links first. */
1013   for (i = 0; i < tte->n_tte2ec; i++) {
1014      ec_num = (Int)tte->tte2ec_ec[i];
1015      ec_idx = tte->tte2ec_ix[i];
1016      vg_assert(ec_num >= 0 && ec_num < ECLASS_N);
1017      vg_assert(ec_idx >= 0);
1018      vg_assert(ec_idx < sec->ec2tte_used[ec_num]);
1019      /* Assert that the two links point at each other. */
1020      vg_assert(sec->ec2tte[ec_num][ec_idx] == (UShort)tteno);
1021      /* "delete" the pointer back to here. */
1022      sec->ec2tte[ec_num][ec_idx] = EC2TTE_DELETED;
1023   }
1024
1025   /* Now fix up this TTEntry. */
1026   tte->status   = Deleted;
1027   tte->n_tte2ec = 0;
1028
1029   /* Stats .. */
1030   sec->tt_n_inuse--;
1031   n_disc_count++;
1032   n_disc_osize += vge_osize(&tte->vge);
1033
1034   /* Tell the tool too. */
1035   if (VG_(needs).basic_block_discards) {
1036      VG_TDICT_CALL( tool_discard_basic_block_info,
1037                     tte->entry,
1038                     tte->vge );
1039   }
1040}
1041
1042
1043/* Delete translations from sec which intersect specified range, but
1044   only consider translations in the specified eclass. */
1045
1046static
1047Bool delete_translations_in_sector_eclass ( /*MOD*/Sector* sec,
1048                                            Addr64 guest_start, ULong range,
1049                                            Int ec )
1050{
1051   Int      i;
1052   UShort   tteno;
1053   Bool     anyDeld = False;
1054   TTEntry* tte;
1055
1056   vg_assert(ec >= 0 && ec < ECLASS_N);
1057
1058   for (i = 0; i < sec->ec2tte_used[ec]; i++) {
1059
1060      tteno = sec->ec2tte[ec][i];
1061      if (tteno == EC2TTE_DELETED) {
1062         /* already deleted */
1063         continue;
1064      }
1065
1066      vg_assert(tteno < N_TTES_PER_SECTOR);
1067
1068      tte = &sec->tt[tteno];
1069      vg_assert(tte->status == InUse);
1070
1071      if (overlaps( guest_start, range, &tte->vge )) {
1072         anyDeld = True;
1073         delete_tte( sec, (Int)tteno );
1074      }
1075
1076   }
1077
1078   return anyDeld;
1079}
1080
1081
1082/* Delete translations from sec which intersect specified range, the
1083   slow way, by inspecting all translations in sec. */
1084
1085static
1086Bool delete_translations_in_sector ( /*MOD*/Sector* sec,
1087                                     Addr64 guest_start, ULong range )
1088{
1089   Int  i;
1090   Bool anyDeld = False;
1091
1092   for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1093      if (sec->tt[i].status == InUse
1094          && overlaps( guest_start, range, &sec->tt[i].vge )) {
1095         anyDeld = True;
1096         delete_tte( sec, i );
1097      }
1098   }
1099
1100   return anyDeld;
1101}
1102
1103
1104void VG_(discard_translations) ( Addr64 guest_start, ULong range,
1105                                 HChar* who )
1106{
1107   Sector* sec;
1108   Int     sno, ec;
1109   Bool    anyDeleted = False;
1110
1111   vg_assert(init_done);
1112
1113   VG_(debugLog)(2, "transtab",
1114                    "discard_translations(0x%llx, %lld) req by %s\n",
1115                    guest_start, range, who );
1116
1117   /* Pre-deletion sanity check */
1118   if (VG_(clo_sanity_level >= 4)) {
1119      Bool sane = sanity_check_all_sectors();
1120      vg_assert(sane);
1121   }
1122
1123   if (range == 0)
1124      return;
1125
1126   /* There are two different ways to do this.
1127
1128      If the range fits within a single address-range equivalence
1129      class, as will be the case for a cache line sized invalidation,
1130      then we only have to inspect the set of translations listed in
1131      that equivalence class, and also in the "sin-bin" equivalence
1132      class ECLASS_MISC.
1133
1134      Otherwise, the invalidation is of a larger range and probably
1135      results from munmap.  In this case it's (probably!) faster just
1136      to inspect all translations, dump those we don't want, and
1137      regenerate the equivalence class information (since modifying it
1138      in-situ is even more expensive).
1139   */
1140
1141   /* First off, figure out if the range falls within a single class,
1142      and if so which one. */
1143
1144   ec = ECLASS_MISC;
1145   if (range < (1ULL << ECLASS_SHIFT))
1146      ec = range_to_eclass( guest_start, (UInt)range );
1147
1148   /* if ec is ECLASS_MISC then we aren't looking at just a single
1149      class, so use the slow scheme.  Else use the fast scheme,
1150      examining 'ec' and ECLASS_MISC. */
1151
1152   if (ec != ECLASS_MISC) {
1153
1154      VG_(debugLog)(2, "transtab",
1155                       "                    FAST, ec = %d\n", ec);
1156
1157      /* Fast scheme */
1158      vg_assert(ec >= 0 && ec < ECLASS_MISC);
1159
1160      for (sno = 0; sno < N_SECTORS; sno++) {
1161         sec = &sectors[sno];
1162         if (sec->tc == NULL)
1163            continue;
1164         anyDeleted |= delete_translations_in_sector_eclass(
1165                         sec, guest_start, range, ec );
1166         anyDeleted |= delete_translations_in_sector_eclass(
1167                         sec, guest_start, range, ECLASS_MISC );
1168      }
1169
1170   } else {
1171
1172      /* slow scheme */
1173
1174      VG_(debugLog)(2, "transtab",
1175                       "                    SLOW, ec = %d\n", ec);
1176
1177      for (sno = 0; sno < N_SECTORS; sno++) {
1178         sec = &sectors[sno];
1179         if (sec->tc == NULL)
1180            continue;
1181         anyDeleted |= delete_translations_in_sector(
1182                         sec, guest_start, range );
1183      }
1184
1185   }
1186
1187   if (anyDeleted)
1188      invalidateFastCache();
1189
1190   /* don't forget the no-redir cache */
1191   unredir_discard_translations( guest_start, range );
1192
1193   /* Post-deletion sanity check */
1194   if (VG_(clo_sanity_level >= 4)) {
1195      Int      i;
1196      TTEntry* tte;
1197      Bool     sane = sanity_check_all_sectors();
1198      vg_assert(sane);
1199      /* But now, also check the requested address range isn't
1200         present anywhere. */
1201      for (sno = 0; sno < N_SECTORS; sno++) {
1202         sec = &sectors[sno];
1203         if (sec->tc == NULL)
1204            continue;
1205         for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1206            tte = &sec->tt[i];
1207            if (tte->status != InUse)
1208               continue;
1209            vg_assert(!overlaps( guest_start, range, &tte->vge ));
1210         }
1211      }
1212   }
1213}
1214
1215
1216/*------------------------------------------------------------*/
1217/*--- AUXILIARY: the unredirected TT/TC                    ---*/
1218/*------------------------------------------------------------*/
1219
1220/* A very simple translation cache which holds a small number of
1221   unredirected translations.  This is completely independent of the
1222   main tt/tc structures.  When unredir_tc or unredir_tt becomes full,
1223   both structures are simply dumped and we start over.
1224
1225   Since these translations are unredirected, the search key is (by
1226   definition) the first address entry in the .vge field. */
1227
1228/* Sized to hold 500 translations of average size 1000 bytes. */
1229
1230#define UNREDIR_SZB   1000
1231
1232#define N_UNREDIR_TT  500
1233#define N_UNREDIR_TCQ (N_UNREDIR_TT * UNREDIR_SZB / sizeof(ULong))
1234
1235typedef
1236   struct {
1237      VexGuestExtents vge;
1238      Addr            hcode;
1239      Bool            inUse;
1240   }
1241   UTCEntry;
1242
1243/* We just allocate forwards in _tc, never deleting. */
1244static ULong    *unredir_tc;
1245static Int      unredir_tc_used = N_UNREDIR_TCQ;
1246
1247/* Slots in _tt can come into use and out again (.inUse).
1248   Nevertheless _tt_highwater is maintained so that invalidations
1249   don't have to scan all the slots when only a few are in use.
1250   _tt_highwater holds the index of the highest ever allocated
1251   slot. */
1252static UTCEntry unredir_tt[N_UNREDIR_TT];
1253static Int      unredir_tt_highwater;
1254
1255
1256static void init_unredir_tt_tc ( void )
1257{
1258   Int i;
1259   if (unredir_tc == NULL) {
1260      SysRes sres = VG_(am_mmap_anon_float_valgrind)( N_UNREDIR_TT * UNREDIR_SZB );
1261      if (sres.isError) {
1262         VG_(out_of_memory_NORETURN)("init_unredir_tt_tc", N_UNREDIR_TT * UNREDIR_SZB);
1263         /*NOTREACHED*/
1264      }
1265      unredir_tc = (ULong *)sres.val;
1266   }
1267   unredir_tc_used = 0;
1268   for (i = 0; i < N_UNREDIR_TT; i++)
1269      unredir_tt[i].inUse = False;
1270   unredir_tt_highwater = -1;
1271}
1272
1273/* Do a sanity check; return False on failure. */
1274static Bool sanity_check_redir_tt_tc ( void )
1275{
1276   Int i;
1277   if (unredir_tt_highwater < -1) return False;
1278   if (unredir_tt_highwater >= N_UNREDIR_TT) return False;
1279
1280   for (i = unredir_tt_highwater+1; i < N_UNREDIR_TT; i++)
1281      if (unredir_tt[i].inUse)
1282         return False;
1283
1284   if (unredir_tc_used < 0) return False;
1285   if (unredir_tc_used > N_UNREDIR_TCQ) return False;
1286
1287   return True;
1288}
1289
1290
1291/* Add an UNREDIRECTED translation of vge to TT/TC.  The translation
1292   is temporarily in code[0 .. code_len-1].
1293*/
1294void VG_(add_to_unredir_transtab)( VexGuestExtents* vge,
1295                                   Addr64           entry,
1296                                   AddrH            code,
1297                                   UInt             code_len,
1298                                   Bool             is_self_checking )
1299{
1300   Int   i, j, code_szQ;
1301   HChar *srcP, *dstP;
1302
1303   vg_assert(sanity_check_redir_tt_tc());
1304
1305   /* This is the whole point: it's not redirected! */
1306   vg_assert(entry == vge->base[0]);
1307
1308   /* How many unredir_tt slots are needed */
1309   code_szQ = (code_len + 7) / 8;
1310
1311   /* Look for an empty unredir_tc slot */
1312   for (i = 0; i < N_UNREDIR_TT; i++)
1313      if (!unredir_tt[i].inUse)
1314         break;
1315
1316   if (i >= N_UNREDIR_TT || code_szQ > (N_UNREDIR_TCQ - unredir_tc_used)) {
1317      /* It's full; dump everything we currently have */
1318      init_unredir_tt_tc();
1319      i = 0;
1320   }
1321
1322   vg_assert(unredir_tc_used >= 0);
1323   vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
1324   vg_assert(code_szQ > 0);
1325   vg_assert(code_szQ + unredir_tc_used <= N_UNREDIR_TCQ);
1326   vg_assert(i >= 0 && i < N_UNREDIR_TT);
1327   vg_assert(unredir_tt[i].inUse == False);
1328
1329   if (i > unredir_tt_highwater)
1330      unredir_tt_highwater = i;
1331
1332   dstP = (HChar*)&unredir_tc[unredir_tc_used];
1333   srcP = (HChar*)code;
1334   for (j = 0; j < code_len; j++)
1335      dstP[j] = srcP[j];
1336
1337   invalidate_icache( dstP, code_len );
1338
1339   unredir_tt[i].inUse = True;
1340   unredir_tt[i].vge   = *vge;
1341   unredir_tt[i].hcode = (Addr)dstP;
1342
1343   unredir_tc_used += code_szQ;
1344   vg_assert(unredir_tc_used >= 0);
1345   vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
1346
1347   vg_assert(&dstP[code_len] <= (HChar*)&unredir_tc[unredir_tc_used]);
1348}
1349
1350Bool VG_(search_unredir_transtab) ( /*OUT*/AddrH* result,
1351                                    Addr64        guest_addr )
1352{
1353   Int i;
1354   for (i = 0; i < N_UNREDIR_TT; i++) {
1355      if (!unredir_tt[i].inUse)
1356         continue;
1357      if (unredir_tt[i].vge.base[0] == guest_addr) {
1358         *result = (AddrH)unredir_tt[i].hcode;
1359         return True;
1360      }
1361   }
1362   return False;
1363}
1364
1365static void unredir_discard_translations( Addr64 guest_start, ULong range )
1366{
1367   Int i;
1368
1369   vg_assert(sanity_check_redir_tt_tc());
1370
1371   for (i = 0; i <= unredir_tt_highwater; i++) {
1372      if (unredir_tt[i].inUse
1373          && overlaps( guest_start, range, &unredir_tt[i].vge))
1374         unredir_tt[i].inUse = False;
1375   }
1376}
1377
1378
1379/*------------------------------------------------------------*/
1380/*--- Initialisation.                                      ---*/
1381/*------------------------------------------------------------*/
1382
1383void VG_(init_tt_tc) ( void )
1384{
1385   Int i, j, avg_codeszQ;
1386
1387   vg_assert(!init_done);
1388   init_done = True;
1389
1390   /* Otherwise lots of things go wrong... */
1391   vg_assert(sizeof(ULong) == 8);
1392   vg_assert(sizeof(Addr64) == 8);
1393
1394   if (VG_(clo_verbosity) > 2)
1395      VG_(message)(Vg_DebugMsg,
1396                   "TT/TC: VG_(init_tt_tc) "
1397                   "(startup of code management)");
1398
1399   /* Figure out how big each tc area should be.  */
1400   avg_codeszQ   = (VG_(details).avg_translation_sizeB + 7) / 8;
1401   tc_sector_szQ = N_TTES_PER_SECTOR_USABLE * (1 + avg_codeszQ);
1402
1403   /* Ensure the calculated value is not way crazy. */
1404   vg_assert(tc_sector_szQ >= 2 * N_TTES_PER_SECTOR_USABLE);
1405   vg_assert(tc_sector_szQ <= 50 * N_TTES_PER_SECTOR_USABLE);
1406
1407   /* Initialise the sectors */
1408   youngest_sector = 0;
1409   for (i = 0; i < N_SECTORS; i++) {
1410      sectors[i].tc = NULL;
1411      sectors[i].tt = NULL;
1412      sectors[i].tc_next = NULL;
1413      sectors[i].tt_n_inuse = 0;
1414      for (j = 0; j < ECLASS_N; j++) {
1415         sectors[i].ec2tte_size[j] = 0;
1416         sectors[i].ec2tte_used[j] = 0;
1417         sectors[i].ec2tte[j] = NULL;
1418      }
1419   }
1420
1421   /* and the fast caches. */
1422   invalidateFastCache();
1423
1424   /* and the unredir tt/tc */
1425   init_unredir_tt_tc();
1426
1427   if (VG_(clo_verbosity) > 2) {
1428      VG_(message)(Vg_DebugMsg,
1429         "TT/TC: cache: %d sectors of %d bytes each = %d total",
1430          N_SECTORS, 8 * tc_sector_szQ,
1431          N_SECTORS * 8 * tc_sector_szQ );
1432      VG_(message)(Vg_DebugMsg,
1433         "TT/TC: table: %d total entries, max occupancy %d (%d%%)",
1434         N_SECTORS * N_TTES_PER_SECTOR,
1435         N_SECTORS * N_TTES_PER_SECTOR_USABLE,
1436         SECTOR_TT_LIMIT_PERCENT );
1437   }
1438
1439   VG_(debugLog)(2, "transtab",
1440      "cache: %d sectors of %d bytes each = %d total\n",
1441       N_SECTORS, 8 * tc_sector_szQ,
1442       N_SECTORS * 8 * tc_sector_szQ );
1443   VG_(debugLog)(2, "transtab",
1444      "table: %d total entries, max occupancy %d (%d%%)\n",
1445      N_SECTORS * N_TTES_PER_SECTOR,
1446      N_SECTORS * N_TTES_PER_SECTOR_USABLE,
1447      SECTOR_TT_LIMIT_PERCENT );
1448}
1449
1450
1451/*------------------------------------------------------------*/
1452/*--- Printing out statistics.                             ---*/
1453/*------------------------------------------------------------*/
1454
1455static ULong safe_idiv( ULong a, ULong b )
1456{
1457   return (b == 0 ? 0 : a / b);
1458}
1459
1460UInt VG_(get_bbs_translated) ( void )
1461{
1462   return n_in_count;
1463}
1464
1465void VG_(print_tt_tc_stats) ( void )
1466{
1467   VG_(message)(Vg_DebugMsg,
1468      "    tt/tc: %,llu tt lookups requiring %,llu probes",
1469      n_full_lookups, n_lookup_probes );
1470   VG_(message)(Vg_DebugMsg,
1471      "    tt/tc: %,llu fast-cache updates, %,llu flushes",
1472      n_fast_updates, n_fast_flushes );
1473
1474   VG_(message)(Vg_DebugMsg,
1475                " transtab: new        %,lld "
1476                "(%,llu -> %,llu; ratio %,llu:10) [%,llu scs]",
1477                n_in_count, n_in_osize, n_in_tsize,
1478                safe_idiv(10*n_in_tsize, n_in_osize),
1479                n_in_sc_count);
1480   VG_(message)(Vg_DebugMsg,
1481                " transtab: dumped     %,llu (%,llu -> ?" "?)",
1482                n_dump_count, n_dump_osize );
1483   VG_(message)(Vg_DebugMsg,
1484                " transtab: discarded  %,llu (%,llu -> ?" "?)",
1485                n_disc_count, n_disc_osize );
1486
1487   if (0) {
1488      Int i;
1489      VG_(printf)("\n");
1490      for (i = 0; i < ECLASS_N; i++) {
1491         VG_(printf)(" %4d", sectors[0].ec2tte_used[i]);
1492         if (i % 16 == 15)
1493            VG_(printf)("\n");
1494      }
1495      VG_(printf)("\n\n");
1496   }
1497}
1498
1499/*------------------------------------------------------------*/
1500/*--- Printing out of profiling results.                   ---*/
1501/*------------------------------------------------------------*/
1502
1503static ULong score ( TTEntry* tte )
1504{
1505   return ((ULong)tte->weight) * ((ULong)tte->count);
1506}
1507
1508ULong VG_(get_BB_profile) ( BBProfEntry tops[], UInt n_tops )
1509{
1510   Int   sno, i, r, s;
1511   ULong score_total;
1512
1513   /* First, compute the total weighted count, and find the top N
1514      ttes.  tops contains pointers to the most-used n_tops blocks, in
1515      descending order (viz, tops[0] is the highest scorer). */
1516   for (i = 0; i < n_tops; i++) {
1517      tops[i].addr  = 0;
1518      tops[i].score = 0;
1519   }
1520
1521   score_total = 0;
1522
1523   for (sno = 0; sno < N_SECTORS; sno++) {
1524      if (sectors[sno].tc == NULL)
1525         continue;
1526      for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1527         if (sectors[sno].tt[i].status != InUse)
1528            continue;
1529         score_total += score(&sectors[sno].tt[i]);
1530         /* Find the rank for sectors[sno].tt[i]. */
1531         r = n_tops-1;
1532         while (True) {
1533            if (r == -1)
1534               break;
1535             if (tops[r].addr == 0) {
1536               r--;
1537               continue;
1538             }
1539             if ( score(&sectors[sno].tt[i]) > tops[r].score ) {
1540                r--;
1541                continue;
1542             }
1543             break;
1544         }
1545         r++;
1546         vg_assert(r >= 0 && r <= n_tops);
1547         /* This bb should be placed at r, and bbs above it shifted
1548            upwards one slot. */
1549         if (r < n_tops) {
1550            for (s = n_tops-1; s > r; s--)
1551               tops[s] = tops[s-1];
1552            tops[r].addr  = sectors[sno].tt[i].entry;
1553            tops[r].score = score( &sectors[sno].tt[i] );
1554         }
1555      }
1556   }
1557
1558   return score_total;
1559}
1560
1561/*--------------------------------------------------------------------*/
1562/*--- end                                                          ---*/
1563/*--------------------------------------------------------------------*/
1564