m_transtab.c revision 9f207460d70d38c46c9e81996a3dcdf90961c6db
1
2/*--------------------------------------------------------------------*/
3/*--- Management of the translation table and cache.               ---*/
4/*---                                                 m_transtab.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8   This file is part of Valgrind, a dynamic binary instrumentation
9   framework.
10
11   Copyright (C) 2000-2009 Julian Seward
12      jseward@acm.org
13
14   This program is free software; you can redistribute it and/or
15   modify it under the terms of the GNU General Public License as
16   published by the Free Software Foundation; either version 2 of the
17   License, or (at your option) any later version.
18
19   This program is distributed in the hope that it will be useful, but
20   WITHOUT ANY WARRANTY; without even the implied warranty of
21   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22   General Public License for more details.
23
24   You should have received a copy of the GNU General Public License
25   along with this program; if not, write to the Free Software
26   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27   02111-1307, USA.
28
29   The GNU General Public License is contained in the file COPYING.
30*/
31
32#include "pub_core_basics.h"
33#include "pub_core_debuglog.h"
34#include "pub_core_machine.h"    // For VG(machine_get_VexArchInfo)
35#include "pub_core_libcbase.h"
36#include "pub_core_libcassert.h"
37#include "pub_core_libcprint.h"
38#include "pub_core_options.h"
39#include "pub_core_tooliface.h"  // For VG_(details).avg_translation_sizeB
40#include "pub_core_transtab.h"
41#include "pub_core_aspacemgr.h"
42#include "pub_core_mallocfree.h" // VG_(out_of_memory_NORETURN)
43
44/* #define DEBUG_TRANSTAB */
45
46
47/*-------------------------------------------------------------*/
48/*--- Management of the FIFO-based translation table+cache. ---*/
49/*-------------------------------------------------------------*/
50
51/*------------------ CONSTANTS ------------------*/
52
53/* Number of sectors the TC is divided into.  If you need a larger
54   overall translation cache, increase this value. */
55#define N_SECTORS 8
56
57/* Number of TC entries in each sector.  This needs to be a prime
58   number to work properly, it must be <= 65535 (so that a TT index
59   fits in a UShort, leaving room for 0xFFFF(EC2TTE_DELETED) to denote
60   'deleted') and it is strongly recommended not to change this.
61   65521 is the largest prime <= 65535. */
62#define N_TTES_PER_SECTOR /*30011*/ /*40009*/ 65521
63
64/* Because each sector contains a hash table of TTEntries, we need to
65   specify the maximum allowable loading, after which the sector is
66   deemed full. */
67#define SECTOR_TT_LIMIT_PERCENT 80
68
69/* The sector is deemed full when this many entries are in it. */
70#define N_TTES_PER_SECTOR_USABLE \
71           ((N_TTES_PER_SECTOR * SECTOR_TT_LIMIT_PERCENT) / 100)
72
73/* Equivalence classes for fast address range deletion.  There are 1 +
74   2^ECLASS_WIDTH bins.  The highest one, ECLASS_MISC, describes an
75   address range which does not fall cleanly within any specific bin.
76   Note that ECLASS_SHIFT + ECLASS_WIDTH must be < 32. */
77#define ECLASS_SHIFT 11
78#define ECLASS_WIDTH 8
79#define ECLASS_MISC  (1 << ECLASS_WIDTH)
80#define ECLASS_N     (1 + ECLASS_MISC)
81
82#define EC2TTE_DELETED  0xFFFF /* 16-bit special value */
83
84
85/*------------------ TYPES ------------------*/
86
87/* A translation-table entry.  This indicates precisely which areas of
88   guest code are included in the translation, and contains all other
89   auxiliary info too.  */
90typedef
91   struct {
92      /* Profiling only: the count and weight (arbitrary meaning) for
93         this translation.  Weight is a property of the translation
94         itself and computed once when the translation is created.
95         Count is an entry count for the translation and is
96         incremented by 1 every time the translation is used, if we
97         are profiling. */
98      UInt     count;
99      UShort   weight;
100
101      /* Status of the slot.  Note, we need to be able to do lazy
102         deletion, hence the Deleted state. */
103      enum { InUse, Deleted, Empty } status;
104
105      /* 64-bit aligned pointer to one or more 64-bit words containing
106         the corresponding host code (must be in the same sector!)
107         This is a pointer into the sector's tc (code) area. */
108      ULong* tcptr;
109
110      /* This is the original guest address that purportedly is the
111         entry point of the translation.  You might think that .entry
112         should be the same as .vge->base[0], and most of the time it
113         is.  However, when doing redirections, that is not the case.
114         .vge must always correctly describe the guest code sections
115         from which this translation was made.  However, .entry may or
116         may not be a lie, depending on whether or not we're doing
117         redirection. */
118      Addr64 entry;
119
120      /* This structure describes precisely what ranges of guest code
121         the translation covers, so we can decide whether or not to
122         delete it when translations of a given address range are
123         invalidated. */
124      VexGuestExtents vge;
125
126      /* Address range summary info: these are pointers back to
127         eclass[] entries in the containing Sector.  Those entries in
128         turn point back here -- the two structures are mutually
129         redundant but both necessary to make fast deletions work.
130         The eclass info is similar to, and derived from, this entry's
131         'vge' field, but it is not the same */
132      UShort n_tte2ec;      // # tte2ec pointers (1 to 3)
133      UShort tte2ec_ec[3];  // for each, the eclass #
134      UInt   tte2ec_ix[3];  // and the index within the eclass.
135      // for i in 0 .. n_tte2ec-1
136      //    sec->ec2tte[ tte2ec_ec[i] ][ tte2ec_ix[i] ]
137      // should be the index
138      // of this TTEntry in the containing Sector's tt array.
139   }
140   TTEntry;
141
142
143/* Finally, a sector itself.  Each sector contains an array of
144   TCEntries, which hold code, and an array of TTEntries, containing
145   all required administrative info.  Profiling is supported using the
146   TTEntry .count and .weight fields, if required.  Each sector is
147   independent in that no cross-sector references are allowed.
148
149   If the sector is not in use, all three pointers are NULL and
150   tt_n_inuse is zero.
151*/
152typedef
153   struct {
154      /* The TCEntry area.  Size of this depends on the average
155         translation size.  We try and size it so it becomes full
156         precisely when this sector's translation table (tt) reaches
157         its load limit (SECTOR_TT_LIMIT_PERCENT). */
158      ULong* tc;
159
160      /* The TTEntry array.  This is a fixed size, always containing
161         exactly N_TTES_PER_SECTOR entries. */
162      TTEntry* tt;
163
164      /* This points to the current allocation point in tc. */
165      ULong* tc_next;
166
167      /* The count of tt entries with state InUse. */
168      Int tt_n_inuse;
169
170      /* Expandable arrays of tt indices for each of the ECLASS_N
171         address range equivalence classes.  These hold indices into
172         the containing sector's tt array, which in turn should point
173         back here. */
174      Int     ec2tte_size[ECLASS_N];
175      Int     ec2tte_used[ECLASS_N];
176      UShort* ec2tte[ECLASS_N];
177   }
178   Sector;
179
180
181/*------------------ DECLS ------------------*/
182
183/* The root data structure is an array of sectors.  The index of the
184   youngest sector is recorded, and new translations are put into that
185   sector.  When it fills up, we move along to the next sector and
186   start to fill that up, wrapping around at the end of the array.
187   That way, once all N_TC_SECTORS have been bought into use for the
188   first time, and are full, we then re-use the oldest sector,
189   endlessly.
190
191   When running, youngest sector should be between >= 0 and <
192   N_TC_SECTORS.  The initial -1 value indicates the TT/TC system is
193   not yet initialised.
194*/
195static Sector sectors[N_SECTORS];
196static Int    youngest_sector = -1;
197
198/* The number of ULongs in each TCEntry area.  This is computed once
199   at startup and does not change. */
200static Int    tc_sector_szQ;
201
202
203/* Fast helper for the TC.  A direct-mapped cache which holds a set of
204   recently used (guest address, host address) pairs.  This array is
205   referred to directly from m_dispatch/dispatch-<platform>.S.
206
207   Entries in tt_fast may refer to any valid TC entry, regardless of
208   which sector it's in.  Consequently we must be very careful to
209   invalidate this cache when TC entries are changed or disappear.
210
211   A special .guest address - TRANSTAB_BOGUS_GUEST_ADDR -- must be
212   pointed at to cause that cache entry to miss.  This relies on the
213   assumption that no guest code actually has that address, hence a
214   value 0x1 seems good.  m_translate gives the client a synthetic
215   segfault if it tries to execute at this address.
216*/
217/*
218typedef
219   struct {
220      Addr guest;
221      Addr host;
222   }
223   FastCacheEntry;
224*/
225/*global*/ __attribute__((aligned(16)))
226           FastCacheEntry VG_(tt_fast)[VG_TT_FAST_SIZE];
227/*
228#define TRANSTAB_BOGUS_GUEST_ADDR ((Addr)1)
229*/
230
231/* For profiling, we have a parallel array of pointers to .count
232   fields in TT entries.  Again, these pointers must be invalidated
233   when translations disappear.  A NULL pointer suffices to indicate
234   an unused slot.
235
236   When not profiling (the normal case, VG_(clo_profile_flags) == 0),
237   all tt_fastN entries are set to NULL at startup and never read nor
238   written after that.
239
240   When profiling (VG_(clo_profile_flags) > 0), tt_fast and tt_fastN
241   change together: if tt_fast[i].guest is TRANSTAB_BOGUS_GUEST_ADDR
242   then the corresponding tt_fastN[i] must be null.  If
243   tt_fast[i].guest is any other value, then tt_fastN[i] *must* point
244   to the .count field of the corresponding TT entry.
245
246   tt_fast and tt_fastN are referred to from assembly code
247   (dispatch.S).
248*/
249/*global*/ UInt* VG_(tt_fastN)[VG_TT_FAST_SIZE];
250
251
252/* Make sure we're not used before initialisation. */
253static Bool init_done = False;
254
255
256/*------------------ STATS DECLS ------------------*/
257
258/* Number of fast-cache updates and flushes done. */
259ULong n_fast_flushes = 0;
260ULong n_fast_updates = 0;
261
262/* Number of full lookups done. */
263ULong n_full_lookups = 0;
264ULong n_lookup_probes = 0;
265
266/* Number/osize/tsize of translations entered; also the number of
267   those for which self-checking was requested. */
268ULong n_in_count    = 0;
269ULong n_in_osize    = 0;
270ULong n_in_tsize    = 0;
271ULong n_in_sc_count = 0;
272
273/* Number/osize of translations discarded due to lack of space. */
274ULong n_dump_count = 0;
275ULong n_dump_osize = 0;
276
277/* Number/osize of translations discarded due to requests to do so. */
278ULong n_disc_count = 0;
279ULong n_disc_osize = 0;
280
281
282/*-------------------------------------------------------------*/
283/*--- Address-range equivalence class stuff                 ---*/
284/*-------------------------------------------------------------*/
285
286/* Return equivalence class number for a range. */
287
288static Int range_to_eclass ( Addr64 start, UInt len )
289{
290   UInt mask   = (1 << ECLASS_WIDTH) - 1;
291   UInt lo     = (UInt)start;
292   UInt hi     = lo + len - 1;
293   UInt loBits = (lo >> ECLASS_SHIFT) & mask;
294   UInt hiBits = (hi >> ECLASS_SHIFT) & mask;
295   if (loBits == hiBits) {
296      vg_assert(loBits < ECLASS_N-1);
297      return loBits;
298   } else {
299      return ECLASS_MISC;
300   }
301}
302
303
304/* Calculates the equivalence class numbers for any VexGuestExtent.
305   These are written in *eclasses, which must be big enough to hold 3
306   Ints.  The number written, between 1 and 3, is returned.  The
307   eclasses are presented in order, and any duplicates are removed.
308*/
309
310static
311Int vexGuestExtents_to_eclasses ( /*OUT*/Int* eclasses,
312                                  VexGuestExtents* vge )
313{
314#  define SWAP(_lv1,_lv2) \
315      do { Int t = _lv1; _lv1 = _lv2; _lv2 = t; } while (0)
316
317   Int i, j, n_ec, r;
318
319   vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
320
321   n_ec = 0;
322   for (i = 0; i < vge->n_used; i++) {
323      r = range_to_eclass( vge->base[i], (UInt)vge->len[i] );
324      if (r == ECLASS_MISC)
325         goto bad;
326      /* only add if we haven't already seen it */
327      for (j = 0; j < n_ec; j++)
328         if (eclasses[j] == r)
329            break;
330      if (j == n_ec)
331         eclasses[n_ec++] = r;
332   }
333
334   if (n_ec == 1)
335      return 1;
336
337   if (n_ec == 2) {
338      /* sort */
339      if (eclasses[0] > eclasses[1])
340         SWAP(eclasses[0], eclasses[1]);
341      return 2;
342   }
343
344   if (n_ec == 3) {
345      /* sort */
346      if (eclasses[0] > eclasses[2])
347         SWAP(eclasses[0], eclasses[2]);
348      if (eclasses[0] > eclasses[1])
349         SWAP(eclasses[0], eclasses[1]);
350      if (eclasses[1] > eclasses[2])
351         SWAP(eclasses[1], eclasses[2]);
352      return 3;
353   }
354
355   /* NOTREACHED */
356   vg_assert(0);
357
358  bad:
359   eclasses[0] = ECLASS_MISC;
360   return 1;
361
362#  undef SWAP
363}
364
365
366/* Add tteno to the set of entries listed for equivalence class ec in
367   this sector.  Returns used location in eclass array. */
368
369static
370UInt addEClassNo ( /*MOD*/Sector* sec, Int ec, UShort tteno )
371{
372   Int    old_sz, new_sz, i, r;
373   UShort *old_ar, *new_ar;
374
375   vg_assert(ec >= 0 && ec < ECLASS_N);
376   vg_assert(tteno < N_TTES_PER_SECTOR);
377
378   if (0) VG_(printf)("ec %d  gets %d\n", ec, (Int)tteno);
379
380   if (sec->ec2tte_used[ec] >= sec->ec2tte_size[ec]) {
381
382      vg_assert(sec->ec2tte_used[ec] == sec->ec2tte_size[ec]);
383
384      old_sz = sec->ec2tte_size[ec];
385      old_ar = sec->ec2tte[ec];
386      new_sz = old_sz==0 ? 8 : old_sz<64 ? 2*old_sz : (3*old_sz)/2;
387      new_ar = VG_(arena_malloc)(VG_AR_TTAUX, "transtab.aECN.1",
388                                 new_sz * sizeof(UShort));
389      for (i = 0; i < old_sz; i++)
390         new_ar[i] = old_ar[i];
391      if (old_ar)
392         VG_(arena_free)(VG_AR_TTAUX, old_ar);
393      sec->ec2tte_size[ec] = new_sz;
394      sec->ec2tte[ec] = new_ar;
395
396      if (0) VG_(printf)("expand ec %d to %d\n", ec, new_sz);
397   }
398
399   /* Common case */
400   r = sec->ec2tte_used[ec]++;
401   vg_assert(r >= 0 && r < sec->ec2tte_size[ec]);
402   sec->ec2tte[ec][r] = tteno;
403   return (UInt)r;
404}
405
406
407/* 'vge' is being added to 'sec' at TT entry 'tteno'.  Add appropriate
408   eclass entries to 'sec'. */
409
410static
411void upd_eclasses_after_add ( /*MOD*/Sector* sec, Int tteno )
412{
413   Int i, r, eclasses[3];
414   TTEntry* tte;
415   vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
416
417   tte = &sec->tt[tteno];
418   r = vexGuestExtents_to_eclasses( eclasses, &tte->vge );
419
420   vg_assert(r >= 1 && r <= 3);
421   tte->n_tte2ec = r;
422
423   for (i = 0; i < r; i++) {
424      tte->tte2ec_ec[i] = eclasses[i];
425      tte->tte2ec_ix[i] = addEClassNo( sec, eclasses[i], (UShort)tteno );
426   }
427}
428
429
430/* Check the eclass info in 'sec' to ensure it is consistent.  Returns
431   True if OK, False if something's not right.  Expensive. */
432
433static Bool sanity_check_eclasses_in_sector ( Sector* sec )
434{
435#  define BAD(_str) do { whassup = (_str); goto bad; } while (0)
436
437   HChar*   whassup = NULL;
438   Int      i, j, k, n, ec_num, ec_idx;
439   TTEntry* tte;
440   UShort   tteno;
441   ULong*   tce;
442
443   /* Basic checks on this sector */
444   if (sec->tt_n_inuse < 0 || sec->tt_n_inuse > N_TTES_PER_SECTOR_USABLE)
445      BAD("invalid sec->tt_n_inuse");
446   tce = sec->tc_next;
447   if (tce < &sec->tc[0] || tce > &sec->tc[tc_sector_szQ])
448      BAD("sec->tc_next points outside tc");
449
450   /* For each eclass ... */
451   for (i = 0; i < ECLASS_N; i++) {
452      if (sec->ec2tte_size[i] == 0 && sec->ec2tte[i] != NULL)
453         BAD("ec2tte_size/ec2tte mismatch(1)");
454      if (sec->ec2tte_size[i] != 0 && sec->ec2tte[i] == NULL)
455         BAD("ec2tte_size/ec2tte mismatch(2)");
456      if (sec->ec2tte_used[i] < 0
457          || sec->ec2tte_used[i] > sec->ec2tte_size[i])
458         BAD("implausible ec2tte_used");
459      if (sec->ec2tte_used[i] == 0)
460         continue;
461
462      /* For each tt reference in each eclass .. ensure the reference
463         is to a valid tt entry, and that the entry's address ranges
464         really include this eclass. */
465
466      for (j = 0; j < sec->ec2tte_used[i]; j++) {
467         tteno = sec->ec2tte[i][j];
468         if (tteno == EC2TTE_DELETED)
469            continue;
470         if (tteno >= N_TTES_PER_SECTOR)
471            BAD("implausible tteno");
472         tte = &sec->tt[tteno];
473         if (tte->status != InUse)
474            BAD("tteno points to non-inuse tte");
475         if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3)
476            BAD("tte->n_tte2ec out of range");
477         /* Exactly least one of tte->eclasses[0 .. tte->n_eclasses-1]
478            must equal i.  Inspect tte's eclass info. */
479         n = 0;
480         for (k = 0; k < tte->n_tte2ec; k++) {
481            if (k < tte->n_tte2ec-1
482                && tte->tte2ec_ec[k] >= tte->tte2ec_ec[k+1])
483               BAD("tte->tte2ec_ec[..] out of order");
484            ec_num = tte->tte2ec_ec[k];
485            if (ec_num < 0 || ec_num >= ECLASS_N)
486               BAD("tte->tte2ec_ec[..] out of range");
487            if (ec_num != i)
488               continue;
489            ec_idx = tte->tte2ec_ix[k];
490            if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[i])
491               BAD("tte->tte2ec_ix[..] out of range");
492            if (ec_idx == j)
493               n++;
494         }
495         if (n != 1)
496            BAD("tteno does not point back at eclass");
497      }
498   }
499
500   /* That establishes that for each forward pointer from TTEntrys
501      there is a corresponding backward pointer from the eclass[]
502      arrays.  However, it doesn't rule out the possibility of other,
503      bogus pointers in the eclass[] arrays.  So do those similarly:
504      scan through them and check the TTEntryies they point at point
505      back. */
506
507   for (i = 0; i < N_TTES_PER_SECTOR_USABLE; i++) {
508
509      tte = &sec->tt[i];
510      if (tte->status == Empty || tte->status == Deleted) {
511         if (tte->n_tte2ec != 0)
512            BAD("tte->n_eclasses nonzero for unused tte");
513         continue;
514      }
515
516      vg_assert(tte->status == InUse);
517
518      if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3)
519         BAD("tte->n_eclasses out of range(2)");
520
521      for (j = 0; j < tte->n_tte2ec; j++) {
522         ec_num = tte->tte2ec_ec[j];
523         if (ec_num < 0 || ec_num >= ECLASS_N)
524            BAD("tte->eclass[..] out of range");
525         ec_idx = tte->tte2ec_ix[j];
526         if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[ec_num])
527            BAD("tte->ec_idx[..] out of range(2)");
528         if (sec->ec2tte[ec_num][ec_idx] != i)
529            BAD("ec2tte does not point back to tte");
530      }
531   }
532
533   return True;
534
535  bad:
536   if (whassup)
537      VG_(debugLog)(0, "transtab", "eclass sanity fail: %s\n", whassup);
538
539#  if 0
540   VG_(printf)("eclass = %d\n", i);
541   VG_(printf)("tteno = %d\n", (Int)tteno);
542   switch (tte->status) {
543      case InUse:   VG_(printf)("InUse\n"); break;
544      case Deleted: VG_(printf)("Deleted\n"); break;
545      case Empty:   VG_(printf)("Empty\n"); break;
546   }
547   if (tte->status != Empty) {
548      for (k = 0; k < tte->vge.n_used; k++)
549         VG_(printf)("0x%llx %d\n", tte->vge.base[k],
550                                    (Int)tte->vge.len[k]);
551   }
552#  endif
553
554   return False;
555
556#  undef BAD
557}
558
559
560/* Sanity check absolutely everything.  True == check passed. */
561
562/* forwards */
563static Bool sanity_check_redir_tt_tc ( void );
564static Bool sanity_check_fastcache ( void );
565
566static Bool sanity_check_all_sectors ( void )
567{
568   Int     sno;
569   Bool    sane;
570   Sector* sec;
571   for (sno = 0; sno < N_SECTORS; sno++) {
572      sec = &sectors[sno];
573      if (sec->tc == NULL)
574         continue;
575      sane = sanity_check_eclasses_in_sector( sec );
576      if (!sane)
577         return False;
578   }
579   if ( !sanity_check_redir_tt_tc() )
580      return False;
581   if ( !sanity_check_fastcache() )
582      return False;
583   return True;
584}
585
586
587/*-------------------------------------------------------------*/
588/*--- Add/find translations                                 ---*/
589/*-------------------------------------------------------------*/
590
591static UInt vge_osize ( VexGuestExtents* vge )
592{
593   UInt i, n = 0;
594   for (i = 0; i < vge->n_used; i++)
595      n += (UInt)vge->len[i];
596   return n;
597}
598
599static Bool isValidSector ( Int sector )
600{
601   if (sector < 0 || sector >= N_SECTORS)
602      return False;
603   return True;
604}
605
606static inline UInt HASH_TT ( Addr64 key )
607{
608   UInt kHi = (UInt)(key >> 32);
609   UInt kLo = (UInt)key;
610   UInt k32 = kHi ^ kLo;
611   UInt ror = 7;
612   if (ror > 0)
613      k32 = (k32 >> ror) | (k32 << (32-ror));
614   return k32 % N_TTES_PER_SECTOR;
615}
616
617static void setFastCacheEntry ( Addr64 key, ULong* tcptr, UInt* count )
618{
619   UInt cno = (UInt)VG_TT_FAST_HASH(key);
620   VG_(tt_fast)[cno].guest = (Addr)key;
621   VG_(tt_fast)[cno].host  = (Addr)tcptr;
622   if (VG_(clo_profile_flags) > 0)
623      VG_(tt_fastN)[cno] = count;
624   n_fast_updates++;
625   /* This shouldn't fail.  It should be assured by m_translate
626      which should reject any attempt to make translation of code
627      starting at TRANSTAB_BOGUS_GUEST_ADDR. */
628   vg_assert(VG_(tt_fast)[cno].guest != TRANSTAB_BOGUS_GUEST_ADDR);
629}
630
631/* Invalidate the fast cache's counter array, VG_(tt_fastN). */
632static void invalidateFastNCache ( void )
633{
634   UInt j;
635   vg_assert(VG_TT_FAST_SIZE > 0 && (VG_TT_FAST_SIZE % 4) == 0);
636   for (j = 0; j < VG_TT_FAST_SIZE; j += 4) {
637      VG_(tt_fastN)[j+0] = NULL;
638      VG_(tt_fastN)[j+1] = NULL;
639      VG_(tt_fastN)[j+2] = NULL;
640      VG_(tt_fastN)[j+3] = NULL;
641   }
642   vg_assert(j == VG_TT_FAST_SIZE);
643}
644
645/* Invalidate the fast cache VG_(tt_fast).  If profiling, also
646   invalidate the fast cache's counter array VG_(tt_fastN), otherwise
647   don't touch it. */
648static void invalidateFastCache ( void )
649{
650   UInt j;
651   /* This loop is popular enough to make it worth unrolling a
652      bit, at least on ppc32. */
653   vg_assert(VG_TT_FAST_SIZE > 0 && (VG_TT_FAST_SIZE % 4) == 0);
654   for (j = 0; j < VG_TT_FAST_SIZE; j += 4) {
655      VG_(tt_fast)[j+0].guest = TRANSTAB_BOGUS_GUEST_ADDR;
656      VG_(tt_fast)[j+1].guest = TRANSTAB_BOGUS_GUEST_ADDR;
657      VG_(tt_fast)[j+2].guest = TRANSTAB_BOGUS_GUEST_ADDR;
658      VG_(tt_fast)[j+3].guest = TRANSTAB_BOGUS_GUEST_ADDR;
659   }
660
661   if (VG_(clo_profile_flags) > 0)
662      invalidateFastNCache();
663
664   vg_assert(j == VG_TT_FAST_SIZE);
665   n_fast_flushes++;
666}
667
668static Bool sanity_check_fastcache ( void )
669{
670   UInt j;
671   if (0) VG_(printf)("sanity check fastcache\n");
672   if (VG_(clo_profile_flags) > 0) {
673      /* profiling */
674      for (j = 0; j < VG_TT_FAST_SIZE; j++) {
675         if (VG_(tt_fastN)[j] == NULL
676             && VG_(tt_fast)[j].guest != TRANSTAB_BOGUS_GUEST_ADDR)
677            return False;
678         if (VG_(tt_fastN)[j] != NULL
679             && VG_(tt_fast)[j].guest == TRANSTAB_BOGUS_GUEST_ADDR)
680            return False;
681      }
682   } else {
683      /* not profiling */
684      for (j = 0; j < VG_TT_FAST_SIZE; j++) {
685         if (VG_(tt_fastN)[j] != NULL)
686            return False;
687      }
688   }
689   return True;
690}
691
692static void initialiseSector ( Int sno )
693{
694   Int    i;
695   SysRes sres;
696   Sector* sec;
697   vg_assert(isValidSector(sno));
698
699   sec = &sectors[sno];
700
701   if (sec->tc == NULL) {
702
703      /* Sector has never been used before.  Need to allocate tt and
704         tc. */
705      vg_assert(sec->tt == NULL);
706      vg_assert(sec->tc_next == NULL);
707      vg_assert(sec->tt_n_inuse == 0);
708      for (i = 0; i < ECLASS_N; i++) {
709         vg_assert(sec->ec2tte_size[i] == 0);
710         vg_assert(sec->ec2tte_used[i] == 0);
711         vg_assert(sec->ec2tte[i] == NULL);
712      }
713
714      VG_(debugLog)(1,"transtab", "allocate sector %d\n", sno);
715
716      sres = VG_(am_mmap_anon_float_valgrind)( 8 * tc_sector_szQ );
717      if (sres.isError) {
718         VG_(out_of_memory_NORETURN)("initialiseSector(TC)",
719                                     8 * tc_sector_szQ );
720	 /*NOTREACHED*/
721      }
722      sec->tc = (ULong*)sres.res;
723
724      sres = VG_(am_mmap_anon_float_valgrind)
725                ( N_TTES_PER_SECTOR * sizeof(TTEntry) );
726      if (sres.isError) {
727         VG_(out_of_memory_NORETURN)("initialiseSector(TT)",
728                                     N_TTES_PER_SECTOR * sizeof(TTEntry) );
729	 /*NOTREACHED*/
730      }
731      sec->tt = (TTEntry*)sres.res;
732
733      for (i = 0; i < N_TTES_PER_SECTOR; i++) {
734         sec->tt[i].status   = Empty;
735         sec->tt[i].n_tte2ec = 0;
736      }
737
738      if (VG_(clo_verbosity) > 2)
739         VG_(message)(Vg_DebugMsg, "TT/TC: initialise sector %d", sno);
740
741   } else {
742
743      /* Sector has been used before.  Dump the old contents. */
744      VG_(debugLog)(1,"transtab", "recycle sector %d\n", sno);
745      vg_assert(sec->tt != NULL);
746      vg_assert(sec->tc_next != NULL);
747      n_dump_count += sec->tt_n_inuse;
748
749      /* Visit each just-about-to-be-abandoned translation. */
750      for (i = 0; i < N_TTES_PER_SECTOR; i++) {
751         if (sec->tt[i].status == InUse) {
752            vg_assert(sec->tt[i].n_tte2ec >= 1);
753            vg_assert(sec->tt[i].n_tte2ec <= 3);
754            n_dump_osize += vge_osize(&sec->tt[i].vge);
755            /* Tell the tool too. */
756            if (VG_(needs).superblock_discards) {
757               VG_TDICT_CALL( tool_discard_superblock_info,
758                              sec->tt[i].entry,
759                              sec->tt[i].vge );
760            }
761         } else {
762            vg_assert(sec->tt[i].n_tte2ec == 0);
763         }
764         sec->tt[i].status   = Empty;
765         sec->tt[i].n_tte2ec = 0;
766      }
767
768      /* Free up the eclass structures. */
769      for (i = 0; i < ECLASS_N; i++) {
770         if (sec->ec2tte_size[i] == 0) {
771            vg_assert(sec->ec2tte_used[i] == 0);
772            vg_assert(sec->ec2tte[i] == NULL);
773         } else {
774            vg_assert(sec->ec2tte[i] != NULL);
775            VG_(arena_free)(VG_AR_TTAUX, sec->ec2tte[i]);
776            sec->ec2tte[i] = NULL;
777            sec->ec2tte_size[i] = 0;
778            sec->ec2tte_used[i] = 0;
779         }
780      }
781
782      if (VG_(clo_verbosity) > 2)
783         VG_(message)(Vg_DebugMsg, "TT/TC: recycle sector %d", sno);
784   }
785
786   sec->tc_next = sec->tc;
787   sec->tt_n_inuse = 0;
788
789   invalidateFastCache();
790}
791
792static void invalidate_icache ( void *ptr, Int nbytes )
793{
794#  if defined(VGA_ppc32) || defined(VGA_ppc64)
795   Addr startaddr = (Addr) ptr;
796   Addr endaddr   = startaddr + nbytes;
797   Addr cls;
798   Addr addr;
799   VexArchInfo vai;
800
801   if (nbytes == 0) return;
802   vg_assert(nbytes > 0);
803
804   VG_(machine_get_VexArchInfo)( NULL, &vai );
805   cls = vai.ppc_cache_line_szB;
806
807   /* Stay sane .. */
808   vg_assert(cls == 32 || cls == 64 || cls == 128);
809
810   startaddr &= ~(cls - 1);
811   for (addr = startaddr; addr < endaddr; addr += cls)
812      asm volatile("dcbst 0,%0" : : "r" (addr));
813   asm volatile("sync");
814   for (addr = startaddr; addr < endaddr; addr += cls)
815      asm volatile("icbi 0,%0" : : "r" (addr));
816   asm volatile("sync; isync");
817
818#  elif defined(VGA_x86)
819   /* no need to do anything, hardware provides coherence */
820
821#  elif defined(VGA_amd64)
822   /* no need to do anything, hardware provides coherence */
823
824#  else
825#    error "Unknown ARCH"
826#  endif
827}
828
829
830/* Add a translation of vge to TT/TC.  The translation is temporarily
831   in code[0 .. code_len-1].
832
833   pre: youngest_sector points to a valid (although possibly full)
834   sector.
835*/
836void VG_(add_to_transtab)( VexGuestExtents* vge,
837                           Addr64           entry,
838                           AddrH            code,
839                           UInt             code_len,
840                           Bool             is_self_checking )
841{
842   Int    tcAvailQ, reqdQ, y, i;
843   ULong  *tcptr, *tcptr2;
844   UChar* srcP;
845   UChar* dstP;
846
847   vg_assert(init_done);
848   vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
849
850   /* 60000: should agree with N_TMPBUF in m_translate.c. */
851   vg_assert(code_len > 0 && code_len < 60000);
852
853   if (0)
854      VG_(printf)("add_to_transtab(entry = 0x%llx, len = %d)\n",
855                  entry, code_len);
856
857   n_in_count++;
858   n_in_tsize += code_len;
859   n_in_osize += vge_osize(vge);
860   if (is_self_checking)
861      n_in_sc_count++;
862
863   y = youngest_sector;
864   vg_assert(isValidSector(y));
865
866   if (sectors[y].tc == NULL)
867      initialiseSector(y);
868
869   /* Try putting the translation in this sector. */
870   reqdQ = (code_len + 7) >> 3;
871
872   /* Will it fit in tc? */
873   tcAvailQ = ((ULong*)(&sectors[y].tc[tc_sector_szQ]))
874              - ((ULong*)(sectors[y].tc_next));
875   vg_assert(tcAvailQ >= 0);
876   vg_assert(tcAvailQ <= tc_sector_szQ);
877
878   if (tcAvailQ < reqdQ
879       || sectors[y].tt_n_inuse >= N_TTES_PER_SECTOR_USABLE) {
880      /* No.  So move on to the next sector.  Either it's never been
881         used before, in which case it will get its tt/tc allocated
882         now, or it has been used before, in which case it is set to be
883         empty, hence throwing out the oldest sector. */
884      vg_assert(tc_sector_szQ > 0);
885      VG_(debugLog)(1,"transtab",
886                      "declare sector %d full "
887                      "(TT loading %2d%%, TC loading %2d%%)\n",
888                      y,
889                      (100 * sectors[y].tt_n_inuse)
890                         / N_TTES_PER_SECTOR,
891                      (100 * (tc_sector_szQ - tcAvailQ))
892                         / tc_sector_szQ);
893      youngest_sector++;
894      if (youngest_sector >= N_SECTORS)
895         youngest_sector = 0;
896      y = youngest_sector;
897      initialiseSector(y);
898   }
899
900   /* Be sure ... */
901   tcAvailQ = ((ULong*)(&sectors[y].tc[tc_sector_szQ]))
902              - ((ULong*)(sectors[y].tc_next));
903   vg_assert(tcAvailQ >= 0);
904   vg_assert(tcAvailQ <= tc_sector_szQ);
905   vg_assert(tcAvailQ >= reqdQ);
906   vg_assert(sectors[y].tt_n_inuse < N_TTES_PER_SECTOR_USABLE);
907   vg_assert(sectors[y].tt_n_inuse >= 0);
908
909   /* Copy into tc. */
910   tcptr = sectors[y].tc_next;
911   vg_assert(tcptr >= &sectors[y].tc[0]);
912   vg_assert(tcptr <= &sectors[y].tc[tc_sector_szQ]);
913
914   dstP = (UChar*)tcptr;
915   srcP = (UChar*)code;
916   for (i = 0; i < code_len; i++)
917      dstP[i] = srcP[i];
918   sectors[y].tc_next += reqdQ;
919   sectors[y].tt_n_inuse++;
920
921   invalidate_icache( dstP, code_len );
922
923   /* more paranoia */
924   tcptr2 = sectors[y].tc_next;
925   vg_assert(tcptr2 >= &sectors[y].tc[0]);
926   vg_assert(tcptr2 <= &sectors[y].tc[tc_sector_szQ]);
927
928   /* Find an empty tt slot, and use it.  There must be such a slot
929      since tt is never allowed to get completely full. */
930   i = HASH_TT(entry);
931   vg_assert(i >= 0 && i < N_TTES_PER_SECTOR);
932   while (True) {
933      if (sectors[y].tt[i].status == Empty
934          || sectors[y].tt[i].status == Deleted)
935         break;
936      i++;
937      if (i >= N_TTES_PER_SECTOR)
938         i = 0;
939   }
940
941   sectors[y].tt[i].status = InUse;
942   sectors[y].tt[i].tcptr  = tcptr;
943   sectors[y].tt[i].count  = 0;
944   sectors[y].tt[i].weight = 1;
945   sectors[y].tt[i].vge    = *vge;
946   sectors[y].tt[i].entry  = entry;
947
948   /* Update the fast-cache. */
949   setFastCacheEntry( entry, tcptr, &sectors[y].tt[i].count );
950
951   /* Note the eclass numbers for this translation. */
952   upd_eclasses_after_add( &sectors[y], i );
953}
954
955
956/* Search for the translation of the given guest address.  If
957   requested, a successful search can also cause the fast-caches to be
958   updated.
959*/
960Bool VG_(search_transtab) ( /*OUT*/AddrH* result,
961                            Addr64        guest_addr,
962                            Bool          upd_cache )
963{
964   Int i, j, k, kstart, sno;
965
966   vg_assert(init_done);
967   /* Find the initial probe point just once.  It will be the same in
968      all sectors and avoids multiple expensive % operations. */
969   n_full_lookups++;
970   k      = -1;
971   kstart = HASH_TT(guest_addr);
972   vg_assert(kstart >= 0 && kstart < N_TTES_PER_SECTOR);
973
974   /* Search in all the sectors.  Although the order should not matter,
975      it might be most efficient to search in the order youngest to
976      oldest. */
977   sno = youngest_sector;
978   for (i = 0; i < N_SECTORS; i++) {
979
980      if (sectors[sno].tc == NULL)
981         goto notfound; /* sector not in use. */
982
983      k = kstart;
984      for (j = 0; j < N_TTES_PER_SECTOR; j++) {
985         n_lookup_probes++;
986         if (sectors[sno].tt[k].status == InUse
987             && sectors[sno].tt[k].entry == guest_addr) {
988            /* found it */
989            if (upd_cache)
990               setFastCacheEntry(
991                  guest_addr, sectors[sno].tt[k].tcptr,
992                              &sectors[sno].tt[k].count );
993            if (result)
994               *result = (AddrH)sectors[sno].tt[k].tcptr;
995            return True;
996         }
997         if (sectors[sno].tt[k].status == Empty)
998            break; /* not found in this sector */
999         k++;
1000         if (k == N_TTES_PER_SECTOR)
1001            k = 0;
1002      }
1003
1004      /* If we fall off the end, all entries are InUse and not
1005         matching, or Deleted.  In any case we did not find it in this
1006         sector. */
1007
1008     notfound:
1009      /* move to the next oldest sector */
1010      sno = sno==0 ? (N_SECTORS-1) : (sno-1);
1011   }
1012
1013   /* Not found in any sector. */
1014   return False;
1015}
1016
1017
1018/*-------------------------------------------------------------*/
1019/*--- Delete translations.                                  ---*/
1020/*-------------------------------------------------------------*/
1021
1022/* forward */
1023static void unredir_discard_translations( Addr64, ULong );
1024
1025/* Stuff for deleting translations which intersect with a given
1026   address range.  Unfortunately, to make this run at a reasonable
1027   speed, it is complex. */
1028
1029static inline
1030Bool overlap1 ( Addr64 s1, ULong r1, Addr64 s2, ULong r2 )
1031{
1032   Addr64 e1 = s1 + r1 - 1ULL;
1033   Addr64 e2 = s2 + r2 - 1ULL;
1034   if (e1 < s2 || e2 < s1)
1035      return False;
1036   return True;
1037}
1038
1039static inline
1040Bool overlaps ( Addr64 start, ULong range, VexGuestExtents* vge )
1041{
1042   if (overlap1(start, range, vge->base[0], (UInt)vge->len[0]))
1043      return True;
1044   if (vge->n_used < 2)
1045      return False;
1046   if (overlap1(start, range, vge->base[1], (UInt)vge->len[1]))
1047      return True;
1048   if (vge->n_used < 3)
1049      return False;
1050   if (overlap1(start, range, vge->base[2], (UInt)vge->len[2]))
1051      return True;
1052   return False;
1053}
1054
1055
1056/* Delete a tt entry, and update all the eclass data accordingly. */
1057
1058static void delete_tte ( /*MOD*/Sector* sec, Int tteno )
1059{
1060   Int      i, ec_num, ec_idx;
1061   TTEntry* tte;
1062
1063   vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
1064   tte = &sec->tt[tteno];
1065   vg_assert(tte->status == InUse);
1066   vg_assert(tte->n_tte2ec >= 1 && tte->n_tte2ec <= 3);
1067
1068   /* Deal with the ec-to-tte links first. */
1069   for (i = 0; i < tte->n_tte2ec; i++) {
1070      ec_num = (Int)tte->tte2ec_ec[i];
1071      ec_idx = tte->tte2ec_ix[i];
1072      vg_assert(ec_num >= 0 && ec_num < ECLASS_N);
1073      vg_assert(ec_idx >= 0);
1074      vg_assert(ec_idx < sec->ec2tte_used[ec_num]);
1075      /* Assert that the two links point at each other. */
1076      vg_assert(sec->ec2tte[ec_num][ec_idx] == (UShort)tteno);
1077      /* "delete" the pointer back to here. */
1078      sec->ec2tte[ec_num][ec_idx] = EC2TTE_DELETED;
1079   }
1080
1081   /* Now fix up this TTEntry. */
1082   tte->status   = Deleted;
1083   tte->n_tte2ec = 0;
1084
1085   /* Stats .. */
1086   sec->tt_n_inuse--;
1087   n_disc_count++;
1088   n_disc_osize += vge_osize(&tte->vge);
1089
1090   /* Tell the tool too. */
1091   if (VG_(needs).superblock_discards) {
1092      VG_TDICT_CALL( tool_discard_superblock_info,
1093                     tte->entry,
1094                     tte->vge );
1095   }
1096}
1097
1098
1099/* Delete translations from sec which intersect specified range, but
1100   only consider translations in the specified eclass. */
1101
1102static
1103Bool delete_translations_in_sector_eclass ( /*MOD*/Sector* sec,
1104                                            Addr64 guest_start, ULong range,
1105                                            Int ec )
1106{
1107   Int      i;
1108   UShort   tteno;
1109   Bool     anyDeld = False;
1110   TTEntry* tte;
1111
1112   vg_assert(ec >= 0 && ec < ECLASS_N);
1113
1114   for (i = 0; i < sec->ec2tte_used[ec]; i++) {
1115
1116      tteno = sec->ec2tte[ec][i];
1117      if (tteno == EC2TTE_DELETED) {
1118         /* already deleted */
1119         continue;
1120      }
1121
1122      vg_assert(tteno < N_TTES_PER_SECTOR);
1123
1124      tte = &sec->tt[tteno];
1125      vg_assert(tte->status == InUse);
1126
1127      if (overlaps( guest_start, range, &tte->vge )) {
1128         anyDeld = True;
1129         delete_tte( sec, (Int)tteno );
1130      }
1131
1132   }
1133
1134   return anyDeld;
1135}
1136
1137
1138/* Delete translations from sec which intersect specified range, the
1139   slow way, by inspecting all translations in sec. */
1140
1141static
1142Bool delete_translations_in_sector ( /*MOD*/Sector* sec,
1143                                     Addr64 guest_start, ULong range )
1144{
1145   Int  i;
1146   Bool anyDeld = False;
1147
1148   for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1149      if (sec->tt[i].status == InUse
1150          && overlaps( guest_start, range, &sec->tt[i].vge )) {
1151         anyDeld = True;
1152         delete_tte( sec, i );
1153      }
1154   }
1155
1156   return anyDeld;
1157}
1158
1159
1160void VG_(discard_translations) ( Addr64 guest_start, ULong range,
1161                                 HChar* who )
1162{
1163   Sector* sec;
1164   Int     sno, ec;
1165   Bool    anyDeleted = False;
1166
1167   vg_assert(init_done);
1168
1169   VG_(debugLog)(2, "transtab",
1170                    "discard_translations(0x%llx, %lld) req by %s\n",
1171                    guest_start, range, who );
1172
1173   /* Pre-deletion sanity check */
1174   if (VG_(clo_sanity_level >= 4)) {
1175      Bool sane = sanity_check_all_sectors();
1176      vg_assert(sane);
1177   }
1178
1179   if (range == 0)
1180      return;
1181
1182   /* There are two different ways to do this.
1183
1184      If the range fits within a single address-range equivalence
1185      class, as will be the case for a cache line sized invalidation,
1186      then we only have to inspect the set of translations listed in
1187      that equivalence class, and also in the "sin-bin" equivalence
1188      class ECLASS_MISC.
1189
1190      Otherwise, the invalidation is of a larger range and probably
1191      results from munmap.  In this case it's (probably!) faster just
1192      to inspect all translations, dump those we don't want, and
1193      regenerate the equivalence class information (since modifying it
1194      in-situ is even more expensive).
1195   */
1196
1197   /* First off, figure out if the range falls within a single class,
1198      and if so which one. */
1199
1200   ec = ECLASS_MISC;
1201   if (range < (1ULL << ECLASS_SHIFT))
1202      ec = range_to_eclass( guest_start, (UInt)range );
1203
1204   /* if ec is ECLASS_MISC then we aren't looking at just a single
1205      class, so use the slow scheme.  Else use the fast scheme,
1206      examining 'ec' and ECLASS_MISC. */
1207
1208   if (ec != ECLASS_MISC) {
1209
1210      VG_(debugLog)(2, "transtab",
1211                       "                    FAST, ec = %d\n", ec);
1212
1213      /* Fast scheme */
1214      vg_assert(ec >= 0 && ec < ECLASS_MISC);
1215
1216      for (sno = 0; sno < N_SECTORS; sno++) {
1217         sec = &sectors[sno];
1218         if (sec->tc == NULL)
1219            continue;
1220         anyDeleted |= delete_translations_in_sector_eclass(
1221                         sec, guest_start, range, ec );
1222         anyDeleted |= delete_translations_in_sector_eclass(
1223                         sec, guest_start, range, ECLASS_MISC );
1224      }
1225
1226   } else {
1227
1228      /* slow scheme */
1229
1230      VG_(debugLog)(2, "transtab",
1231                       "                    SLOW, ec = %d\n", ec);
1232
1233      for (sno = 0; sno < N_SECTORS; sno++) {
1234         sec = &sectors[sno];
1235         if (sec->tc == NULL)
1236            continue;
1237         anyDeleted |= delete_translations_in_sector(
1238                         sec, guest_start, range );
1239      }
1240
1241   }
1242
1243   if (anyDeleted)
1244      invalidateFastCache();
1245
1246   /* don't forget the no-redir cache */
1247   unredir_discard_translations( guest_start, range );
1248
1249   /* Post-deletion sanity check */
1250   if (VG_(clo_sanity_level >= 4)) {
1251      Int      i;
1252      TTEntry* tte;
1253      Bool     sane = sanity_check_all_sectors();
1254      vg_assert(sane);
1255      /* But now, also check the requested address range isn't
1256         present anywhere. */
1257      for (sno = 0; sno < N_SECTORS; sno++) {
1258         sec = &sectors[sno];
1259         if (sec->tc == NULL)
1260            continue;
1261         for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1262            tte = &sec->tt[i];
1263            if (tte->status != InUse)
1264               continue;
1265            vg_assert(!overlaps( guest_start, range, &tte->vge ));
1266         }
1267      }
1268   }
1269}
1270
1271
1272/*------------------------------------------------------------*/
1273/*--- AUXILIARY: the unredirected TT/TC                    ---*/
1274/*------------------------------------------------------------*/
1275
1276/* A very simple translation cache which holds a small number of
1277   unredirected translations.  This is completely independent of the
1278   main tt/tc structures.  When unredir_tc or unredir_tt becomes full,
1279   both structures are simply dumped and we start over.
1280
1281   Since these translations are unredirected, the search key is (by
1282   definition) the first address entry in the .vge field. */
1283
1284/* Sized to hold 500 translations of average size 1000 bytes. */
1285
1286#define UNREDIR_SZB   1000
1287
1288#define N_UNREDIR_TT  500
1289#define N_UNREDIR_TCQ (N_UNREDIR_TT * UNREDIR_SZB / sizeof(ULong))
1290
1291typedef
1292   struct {
1293      VexGuestExtents vge;
1294      Addr            hcode;
1295      Bool            inUse;
1296   }
1297   UTCEntry;
1298
1299/* We just allocate forwards in _tc, never deleting. */
1300static ULong    *unredir_tc;
1301static Int      unredir_tc_used = N_UNREDIR_TCQ;
1302
1303/* Slots in _tt can come into use and out again (.inUse).
1304   Nevertheless _tt_highwater is maintained so that invalidations
1305   don't have to scan all the slots when only a few are in use.
1306   _tt_highwater holds the index of the highest ever allocated
1307   slot. */
1308static UTCEntry unredir_tt[N_UNREDIR_TT];
1309static Int      unredir_tt_highwater;
1310
1311
1312static void init_unredir_tt_tc ( void )
1313{
1314   Int i;
1315   if (unredir_tc == NULL) {
1316      SysRes sres = VG_(am_mmap_anon_float_valgrind)( N_UNREDIR_TT * UNREDIR_SZB );
1317      if (sres.isError) {
1318         VG_(out_of_memory_NORETURN)("init_unredir_tt_tc", N_UNREDIR_TT * UNREDIR_SZB);
1319         /*NOTREACHED*/
1320      }
1321      unredir_tc = (ULong *)sres.res;
1322   }
1323   unredir_tc_used = 0;
1324   for (i = 0; i < N_UNREDIR_TT; i++)
1325      unredir_tt[i].inUse = False;
1326   unredir_tt_highwater = -1;
1327}
1328
1329/* Do a sanity check; return False on failure. */
1330static Bool sanity_check_redir_tt_tc ( void )
1331{
1332   Int i;
1333   if (unredir_tt_highwater < -1) return False;
1334   if (unredir_tt_highwater >= N_UNREDIR_TT) return False;
1335
1336   for (i = unredir_tt_highwater+1; i < N_UNREDIR_TT; i++)
1337      if (unredir_tt[i].inUse)
1338         return False;
1339
1340   if (unredir_tc_used < 0) return False;
1341   if (unredir_tc_used > N_UNREDIR_TCQ) return False;
1342
1343   return True;
1344}
1345
1346
1347/* Add an UNREDIRECTED translation of vge to TT/TC.  The translation
1348   is temporarily in code[0 .. code_len-1].
1349*/
1350void VG_(add_to_unredir_transtab)( VexGuestExtents* vge,
1351                                   Addr64           entry,
1352                                   AddrH            code,
1353                                   UInt             code_len )
1354{
1355   Int   i, j, code_szQ;
1356   HChar *srcP, *dstP;
1357
1358   vg_assert(sanity_check_redir_tt_tc());
1359
1360   /* This is the whole point: it's not redirected! */
1361   vg_assert(entry == vge->base[0]);
1362
1363   /* How many unredir_tt slots are needed */
1364   code_szQ = (code_len + 7) / 8;
1365
1366   /* Look for an empty unredir_tc slot */
1367   for (i = 0; i < N_UNREDIR_TT; i++)
1368      if (!unredir_tt[i].inUse)
1369         break;
1370
1371   if (i >= N_UNREDIR_TT || code_szQ > (N_UNREDIR_TCQ - unredir_tc_used)) {
1372      /* It's full; dump everything we currently have */
1373      init_unredir_tt_tc();
1374      i = 0;
1375   }
1376
1377   vg_assert(unredir_tc_used >= 0);
1378   vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
1379   vg_assert(code_szQ > 0);
1380   vg_assert(code_szQ + unredir_tc_used <= N_UNREDIR_TCQ);
1381   vg_assert(i >= 0 && i < N_UNREDIR_TT);
1382   vg_assert(unredir_tt[i].inUse == False);
1383
1384   if (i > unredir_tt_highwater)
1385      unredir_tt_highwater = i;
1386
1387   dstP = (HChar*)&unredir_tc[unredir_tc_used];
1388   srcP = (HChar*)code;
1389   for (j = 0; j < code_len; j++)
1390      dstP[j] = srcP[j];
1391
1392   invalidate_icache( dstP, code_len );
1393
1394   unredir_tt[i].inUse = True;
1395   unredir_tt[i].vge   = *vge;
1396   unredir_tt[i].hcode = (Addr)dstP;
1397
1398   unredir_tc_used += code_szQ;
1399   vg_assert(unredir_tc_used >= 0);
1400   vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
1401
1402   vg_assert(&dstP[code_len] <= (HChar*)&unredir_tc[unredir_tc_used]);
1403}
1404
1405Bool VG_(search_unredir_transtab) ( /*OUT*/AddrH* result,
1406                                    Addr64        guest_addr )
1407{
1408   Int i;
1409   for (i = 0; i < N_UNREDIR_TT; i++) {
1410      if (!unredir_tt[i].inUse)
1411         continue;
1412      if (unredir_tt[i].vge.base[0] == guest_addr) {
1413         *result = (AddrH)unredir_tt[i].hcode;
1414         return True;
1415      }
1416   }
1417   return False;
1418}
1419
1420static void unredir_discard_translations( Addr64 guest_start, ULong range )
1421{
1422   Int i;
1423
1424   vg_assert(sanity_check_redir_tt_tc());
1425
1426   for (i = 0; i <= unredir_tt_highwater; i++) {
1427      if (unredir_tt[i].inUse
1428          && overlaps( guest_start, range, &unredir_tt[i].vge))
1429         unredir_tt[i].inUse = False;
1430   }
1431}
1432
1433
1434/*------------------------------------------------------------*/
1435/*--- Initialisation.                                      ---*/
1436/*------------------------------------------------------------*/
1437
1438void VG_(init_tt_tc) ( void )
1439{
1440   Int i, j, avg_codeszQ;
1441
1442   vg_assert(!init_done);
1443   init_done = True;
1444
1445   /* Otherwise lots of things go wrong... */
1446   vg_assert(sizeof(ULong) == 8);
1447   vg_assert(sizeof(Addr64) == 8);
1448   /* check fast cache entries really are 2 words long */
1449   vg_assert(sizeof(Addr) == sizeof(void*));
1450   vg_assert(sizeof(FastCacheEntry) == 2 * sizeof(Addr));
1451   /* check fast cache entries are packed back-to-back with no spaces */
1452   vg_assert(sizeof( VG_(tt_fast) ) == VG_TT_FAST_SIZE * sizeof(FastCacheEntry));
1453   /* check fast cache is aligned as we requested.  Not fatal if it
1454      isn't, but we might as well make sure. */
1455   vg_assert(VG_IS_16_ALIGNED( ((Addr) & VG_(tt_fast)[0]) ));
1456
1457   if (VG_(clo_verbosity) > 2)
1458      VG_(message)(Vg_DebugMsg,
1459                   "TT/TC: VG_(init_tt_tc) "
1460                   "(startup of code management)");
1461
1462   /* Figure out how big each tc area should be.  */
1463   avg_codeszQ   = (VG_(details).avg_translation_sizeB + 7) / 8;
1464   tc_sector_szQ = N_TTES_PER_SECTOR_USABLE * (1 + avg_codeszQ);
1465
1466   /* Ensure the calculated value is not way crazy. */
1467   vg_assert(tc_sector_szQ >= 2 * N_TTES_PER_SECTOR_USABLE);
1468   vg_assert(tc_sector_szQ <= 80 * N_TTES_PER_SECTOR_USABLE);
1469
1470   /* Initialise the sectors */
1471   youngest_sector = 0;
1472   for (i = 0; i < N_SECTORS; i++) {
1473      sectors[i].tc = NULL;
1474      sectors[i].tt = NULL;
1475      sectors[i].tc_next = NULL;
1476      sectors[i].tt_n_inuse = 0;
1477      for (j = 0; j < ECLASS_N; j++) {
1478         sectors[i].ec2tte_size[j] = 0;
1479         sectors[i].ec2tte_used[j] = 0;
1480         sectors[i].ec2tte[j] = NULL;
1481      }
1482   }
1483
1484   /* Initialise the fast caches.  If not profiling (the usual case),
1485      we have to explicitly invalidate the fastN cache as
1486      invalidateFastCache() won't do that for us. */
1487   invalidateFastCache();
1488   if (VG_(clo_profile_flags) == 0)
1489      invalidateFastNCache();
1490
1491   /* and the unredir tt/tc */
1492   init_unredir_tt_tc();
1493
1494   if (VG_(clo_verbosity) > 2) {
1495      VG_(message)(Vg_DebugMsg,
1496         "TT/TC: cache: %d sectors of %d bytes each = %d total",
1497          N_SECTORS, 8 * tc_sector_szQ,
1498          N_SECTORS * 8 * tc_sector_szQ );
1499      VG_(message)(Vg_DebugMsg,
1500         "TT/TC: table: %d total entries, max occupancy %d (%d%%)",
1501         N_SECTORS * N_TTES_PER_SECTOR,
1502         N_SECTORS * N_TTES_PER_SECTOR_USABLE,
1503         SECTOR_TT_LIMIT_PERCENT );
1504   }
1505
1506   VG_(debugLog)(2, "transtab",
1507      "cache: %d sectors of %d bytes each = %d total\n",
1508       N_SECTORS, 8 * tc_sector_szQ,
1509       N_SECTORS * 8 * tc_sector_szQ );
1510   VG_(debugLog)(2, "transtab",
1511      "table: %d total entries, max occupancy %d (%d%%)\n",
1512      N_SECTORS * N_TTES_PER_SECTOR,
1513      N_SECTORS * N_TTES_PER_SECTOR_USABLE,
1514      SECTOR_TT_LIMIT_PERCENT );
1515}
1516
1517
1518/*------------------------------------------------------------*/
1519/*--- Printing out statistics.                             ---*/
1520/*------------------------------------------------------------*/
1521
1522static ULong safe_idiv( ULong a, ULong b )
1523{
1524   return (b == 0 ? 0 : a / b);
1525}
1526
1527UInt VG_(get_bbs_translated) ( void )
1528{
1529   return n_in_count;
1530}
1531
1532void VG_(print_tt_tc_stats) ( void )
1533{
1534   VG_(message)(Vg_DebugMsg,
1535      "    tt/tc: %'llu tt lookups requiring %'llu probes",
1536      n_full_lookups, n_lookup_probes );
1537   VG_(message)(Vg_DebugMsg,
1538      "    tt/tc: %'llu fast-cache updates, %'llu flushes",
1539      n_fast_updates, n_fast_flushes );
1540
1541   VG_(message)(Vg_DebugMsg,
1542                " transtab: new        %'lld "
1543                "(%'llu -> %'llu; ratio %'llu:10) [%'llu scs]",
1544                n_in_count, n_in_osize, n_in_tsize,
1545                safe_idiv(10*n_in_tsize, n_in_osize),
1546                n_in_sc_count);
1547   VG_(message)(Vg_DebugMsg,
1548                " transtab: dumped     %'llu (%'llu -> ?" "?)",
1549                n_dump_count, n_dump_osize );
1550   VG_(message)(Vg_DebugMsg,
1551                " transtab: discarded  %'llu (%'llu -> ?" "?)",
1552                n_disc_count, n_disc_osize );
1553
1554   if (0) {
1555      Int i;
1556      VG_(printf)("\n");
1557      for (i = 0; i < ECLASS_N; i++) {
1558         VG_(printf)(" %4d", sectors[0].ec2tte_used[i]);
1559         if (i % 16 == 15)
1560            VG_(printf)("\n");
1561      }
1562      VG_(printf)("\n\n");
1563   }
1564}
1565
1566/*------------------------------------------------------------*/
1567/*--- Printing out of profiling results.                   ---*/
1568/*------------------------------------------------------------*/
1569
1570static ULong score ( TTEntry* tte )
1571{
1572   return ((ULong)tte->weight) * ((ULong)tte->count);
1573}
1574
1575ULong VG_(get_BB_profile) ( BBProfEntry tops[], UInt n_tops )
1576{
1577   Int   sno, i, r, s;
1578   ULong score_total;
1579
1580   /* First, compute the total weighted count, and find the top N
1581      ttes.  tops contains pointers to the most-used n_tops blocks, in
1582      descending order (viz, tops[0] is the highest scorer). */
1583   for (i = 0; i < n_tops; i++) {
1584      tops[i].addr  = 0;
1585      tops[i].score = 0;
1586   }
1587
1588   score_total = 0;
1589
1590   for (sno = 0; sno < N_SECTORS; sno++) {
1591      if (sectors[sno].tc == NULL)
1592         continue;
1593      for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1594         if (sectors[sno].tt[i].status != InUse)
1595            continue;
1596         score_total += score(&sectors[sno].tt[i]);
1597         /* Find the rank for sectors[sno].tt[i]. */
1598         r = n_tops-1;
1599         while (True) {
1600            if (r == -1)
1601               break;
1602             if (tops[r].addr == 0) {
1603               r--;
1604               continue;
1605             }
1606             if ( score(&sectors[sno].tt[i]) > tops[r].score ) {
1607                r--;
1608                continue;
1609             }
1610             break;
1611         }
1612         r++;
1613         vg_assert(r >= 0 && r <= n_tops);
1614         /* This bb should be placed at r, and bbs above it shifted
1615            upwards one slot. */
1616         if (r < n_tops) {
1617            for (s = n_tops-1; s > r; s--)
1618               tops[s] = tops[s-1];
1619            tops[r].addr  = sectors[sno].tt[i].entry;
1620            tops[r].score = score( &sectors[sno].tt[i] );
1621         }
1622      }
1623   }
1624
1625   return score_total;
1626}
1627
1628/*--------------------------------------------------------------------*/
1629/*--- end                                                          ---*/
1630/*--------------------------------------------------------------------*/
1631