m_transtab.c revision 4d474d086188fd1f29fa97dbd84d8ea2e589a9b8
1
2/*--------------------------------------------------------------------*/
3/*--- Management of the translation table and cache.               ---*/
4/*---                                                 m_transtab.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8   This file is part of Valgrind, a dynamic binary instrumentation
9   framework.
10
11   Copyright (C) 2000-2008 Julian Seward
12      jseward@acm.org
13
14   This program is free software; you can redistribute it and/or
15   modify it under the terms of the GNU General Public License as
16   published by the Free Software Foundation; either version 2 of the
17   License, or (at your option) any later version.
18
19   This program is distributed in the hope that it will be useful, but
20   WITHOUT ANY WARRANTY; without even the implied warranty of
21   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22   General Public License for more details.
23
24   You should have received a copy of the GNU General Public License
25   along with this program; if not, write to the Free Software
26   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27   02111-1307, USA.
28
29   The GNU General Public License is contained in the file COPYING.
30*/
31
32#include "pub_core_basics.h"
33#include "pub_core_debuglog.h"
34#include "pub_core_machine.h"    // For VG(machine_get_VexArchInfo)
35#include "pub_core_libcbase.h"
36#include "pub_core_libcassert.h"
37#include "pub_core_libcprint.h"
38#include "pub_core_options.h"
39#include "pub_core_tooliface.h"  // For VG_(details).avg_translation_sizeB
40#include "pub_core_transtab.h"
41#include "pub_core_aspacemgr.h"
42#include "pub_core_mallocfree.h" // VG_(out_of_memory_NORETURN)
43
44/* #define DEBUG_TRANSTAB */
45
46
47/*-------------------------------------------------------------*/
48/*--- Management of the FIFO-based translation table+cache. ---*/
49/*-------------------------------------------------------------*/
50
51/*------------------ CONSTANTS ------------------*/
52
53/* Number of sectors the TC is divided into.  If you need a larger
54   overall translation cache, increase this value. */
55#define N_SECTORS 8
56
57/* Number of TC entries in each sector.  This needs to be a prime
58   number to work properly, it must be <= 65535 (so that a TT index
59   fits in a UShort, leaving room for 0xFFFF(EC2TTE_DELETED) to denote
60   'deleted') and it is strongly recommended not to change this.
61   65521 is the largest prime <= 65535. */
62#define N_TTES_PER_SECTOR /*30011*/ /*40009*/ 65521
63
64/* Because each sector contains a hash table of TTEntries, we need to
65   specify the maximum allowable loading, after which the sector is
66   deemed full. */
67#define SECTOR_TT_LIMIT_PERCENT 80
68
69/* The sector is deemed full when this many entries are in it. */
70#define N_TTES_PER_SECTOR_USABLE \
71           ((N_TTES_PER_SECTOR * SECTOR_TT_LIMIT_PERCENT) / 100)
72
73/* Equivalence classes for fast address range deletion.  There are 1 +
74   2^ECLASS_WIDTH bins.  The highest one, ECLASS_MISC, describes an
75   address range which does not fall cleanly within any specific bin.
76   Note that ECLASS_SHIFT + ECLASS_WIDTH must be < 32. */
77#define ECLASS_SHIFT 11
78#define ECLASS_WIDTH 8
79#define ECLASS_MISC  (1 << ECLASS_WIDTH)
80#define ECLASS_N     (1 + ECLASS_MISC)
81
82#define EC2TTE_DELETED  0xFFFF /* 16-bit special value */
83
84
85/*------------------ TYPES ------------------*/
86
87/* A translation-table entry.  This indicates precisely which areas of
88   guest code are included in the translation, and contains all other
89   auxiliary info too.  */
90typedef
91   struct {
92      /* Profiling only: the count and weight (arbitrary meaning) for
93         this translation.  Weight is a property of the translation
94         itself and computed once when the translation is created.
95         Count is an entry count for the translation and is
96         incremented by 1 every time the translation is used, if we
97         are profiling. */
98      UInt     count;
99      UShort   weight;
100
101      /* Status of the slot.  Note, we need to be able to do lazy
102         deletion, hence the Deleted state. */
103      enum { InUse, Deleted, Empty } status;
104
105      /* 64-bit aligned pointer to one or more 64-bit words containing
106         the corresponding host code (must be in the same sector!)
107         This is a pointer into the sector's tc (code) area. */
108      ULong* tcptr;
109
110      /* This is the original guest address that purportedly is the
111         entry point of the translation.  You might think that .entry
112         should be the same as .vge->base[0], and most of the time it
113         is.  However, when doing redirections, that is not the case.
114         .vge must always correctly describe the guest code sections
115         from which this translation was made.  However, .entry may or
116         may not be a lie, depending on whether or not we're doing
117         redirection. */
118      Addr64 entry;
119
120      /* This structure describes precisely what ranges of guest code
121         the translation covers, so we can decide whether or not to
122         delete it when translations of a given address range are
123         invalidated. */
124      VexGuestExtents vge;
125
126      /* Address range summary info: these are pointers back to
127         eclass[] entries in the containing Sector.  Those entries in
128         turn point back here -- the two structures are mutually
129         redundant but both necessary to make fast deletions work.
130         The eclass info is similar to, and derived from, this entry's
131         'vge' field, but it is not the same */
132      UShort n_tte2ec;      // # tte2ec pointers (1 to 3)
133      UShort tte2ec_ec[3];  // for each, the eclass #
134      UInt   tte2ec_ix[3];  // and the index within the eclass.
135      // for i in 0 .. n_tte2ec-1
136      //    sec->ec2tte[ tte2ec_ec[i] ][ tte2ec_ix[i] ]
137      // should be the index
138      // of this TTEntry in the containing Sector's tt array.
139   }
140   TTEntry;
141
142
143/* Finally, a sector itself.  Each sector contains an array of
144   TCEntries, which hold code, and an array of TTEntries, containing
145   all required administrative info.  Profiling is supported using the
146   TTEntry .count and .weight fields, if required.  Each sector is
147   independent in that no cross-sector references are allowed.
148
149   If the sector is not in use, all three pointers are NULL and
150   tt_n_inuse is zero.
151*/
152typedef
153   struct {
154      /* The TCEntry area.  Size of this depends on the average
155         translation size.  We try and size it so it becomes full
156         precisely when this sector's translation table (tt) reaches
157         its load limit (SECTOR_TT_LIMIT_PERCENT). */
158      ULong* tc;
159
160      /* The TTEntry array.  This is a fixed size, always containing
161         exactly N_TTES_PER_SECTOR entries. */
162      TTEntry* tt;
163
164      /* This points to the current allocation point in tc. */
165      ULong* tc_next;
166
167      /* The count of tt entries with state InUse. */
168      Int tt_n_inuse;
169
170      /* Expandable arrays of tt indices for each of the ECLASS_N
171         address range equivalence classes.  These hold indices into
172         the containing sector's tt array, which in turn should point
173         back here. */
174      Int     ec2tte_size[ECLASS_N];
175      Int     ec2tte_used[ECLASS_N];
176      UShort* ec2tte[ECLASS_N];
177   }
178   Sector;
179
180
181/*------------------ DECLS ------------------*/
182
183/* The root data structure is an array of sectors.  The index of the
184   youngest sector is recorded, and new translations are put into that
185   sector.  When it fills up, we move along to the next sector and
186   start to fill that up, wrapping around at the end of the array.
187   That way, once all N_TC_SECTORS have been bought into use for the
188   first time, and are full, we then re-use the oldest sector,
189   endlessly.
190
191   When running, youngest sector should be between >= 0 and <
192   N_TC_SECTORS.  The initial -1 value indicates the TT/TC system is
193   not yet initialised.
194*/
195static Sector sectors[N_SECTORS];
196static Int    youngest_sector = -1;
197
198/* The number of ULongs in each TCEntry area.  This is computed once
199   at startup and does not change. */
200static Int    tc_sector_szQ;
201
202
203/* Fast helper for the TC.  A direct-mapped cache which holds a set of
204   recently used (guest address, host address) pairs.  This array is
205   referred to directly from m_dispatch/dispatch-<platform>.S.
206
207   Entries in tt_fast may refer to any valid TC entry, regardless of
208   which sector it's in.  Consequently we must be very careful to
209   invalidate this cache when TC entries are changed or disappear.
210
211   A special .guest address - TRANSTAB_BOGUS_GUEST_ADDR -- must be
212   pointed at to cause that cache entry to miss.  This relies on the
213   assumption that no guest code actually has that address, hence a
214   value 0x1 seems good.  m_translate gives the client a synthetic
215   segfault if it tries to execute at this address.
216*/
217/*
218typedef
219   struct {
220      Addr guest;
221      Addr host;
222   }
223   FastCacheEntry;
224*/
225/*global*/ __attribute__((aligned(16)))
226           FastCacheEntry VG_(tt_fast)[VG_TT_FAST_SIZE];
227/*
228#define TRANSTAB_BOGUS_GUEST_ADDR ((Addr)1)
229*/
230
231/* For profiling, we have a parallel array of pointers to .count
232   fields in TT entries.  Again, these pointers must be invalidated
233   when translations disappear.  A NULL pointer suffices to indicate
234   an unused slot.
235
236   When not profiling (the normal case, VG_(clo_profile_flags) == 0),
237   all tt_fastN entries are set to NULL at startup and never read nor
238   written after that.
239
240   When profiling (VG_(clo_profile_flags) > 0), tt_fast and tt_fastN
241   change together: if tt_fast[i].guest is TRANSTAB_BOGUS_GUEST_ADDR
242   then the corresponding tt_fastN[i] must be null.  If
243   tt_fast[i].guest is any other value, then tt_fastN[i] *must* point
244   to the .count field of the corresponding TT entry.
245
246   tt_fast and tt_fastN are referred to from assembly code
247   (dispatch.S).
248*/
249/*global*/ UInt* VG_(tt_fastN)[VG_TT_FAST_SIZE];
250
251
252/* Make sure we're not used before initialisation. */
253static Bool init_done = False;
254
255
256/*------------------ STATS DECLS ------------------*/
257
258/* Number of fast-cache updates and flushes done. */
259ULong n_fast_flushes = 0;
260ULong n_fast_updates = 0;
261
262/* Number of full lookups done. */
263ULong n_full_lookups = 0;
264ULong n_lookup_probes = 0;
265
266/* Number/osize/tsize of translations entered; also the number of
267   those for which self-checking was requested. */
268ULong n_in_count    = 0;
269ULong n_in_osize    = 0;
270ULong n_in_tsize    = 0;
271ULong n_in_sc_count = 0;
272
273/* Number/osize of translations discarded due to lack of space. */
274ULong n_dump_count = 0;
275ULong n_dump_osize = 0;
276
277/* Number/osize of translations discarded due to requests to do so. */
278ULong n_disc_count = 0;
279ULong n_disc_osize = 0;
280
281
282/*-------------------------------------------------------------*/
283/*--- Address-range equivalence class stuff                 ---*/
284/*-------------------------------------------------------------*/
285
286/* Return equivalence class number for a range. */
287
288static Int range_to_eclass ( Addr64 start, UInt len )
289{
290   UInt mask   = (1 << ECLASS_WIDTH) - 1;
291   UInt lo     = (UInt)start;
292   UInt hi     = lo + len - 1;
293   UInt loBits = (lo >> ECLASS_SHIFT) & mask;
294   UInt hiBits = (hi >> ECLASS_SHIFT) & mask;
295   if (loBits == hiBits) {
296      vg_assert(loBits < ECLASS_N-1);
297      return loBits;
298   } else {
299      return ECLASS_MISC;
300   }
301}
302
303
304/* Calculates the equivalence class numbers for any VexGuestExtent.
305   These are written in *eclasses, which must be big enough to hold 3
306   Ints.  The number written, between 1 and 3, is returned.  The
307   eclasses are presented in order, and any duplicates are removed.
308*/
309
310static
311Int vexGuestExtents_to_eclasses ( /*OUT*/Int* eclasses,
312                                  VexGuestExtents* vge )
313{
314#  define SWAP(_lv1,_lv2) \
315      do { Int t = _lv1; _lv1 = _lv2; _lv2 = t; } while (0)
316
317   Int i, j, n_ec, r;
318
319   vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
320
321   n_ec = 0;
322   for (i = 0; i < vge->n_used; i++) {
323      r = range_to_eclass( vge->base[i], (UInt)vge->len[i] );
324      if (r == ECLASS_MISC)
325         goto bad;
326      /* only add if we haven't already seen it */
327      for (j = 0; j < n_ec; j++)
328         if (eclasses[j] == r)
329            break;
330      if (j == n_ec)
331         eclasses[n_ec++] = r;
332   }
333
334   if (n_ec == 1)
335      return 1;
336
337   if (n_ec == 2) {
338      /* sort */
339      if (eclasses[0] > eclasses[1])
340         SWAP(eclasses[0], eclasses[1]);
341      return 2;
342   }
343
344   if (n_ec == 3) {
345      /* sort */
346      if (eclasses[0] > eclasses[2])
347         SWAP(eclasses[0], eclasses[2]);
348      if (eclasses[0] > eclasses[1])
349         SWAP(eclasses[0], eclasses[1]);
350      if (eclasses[1] > eclasses[2])
351         SWAP(eclasses[1], eclasses[2]);
352      return 3;
353   }
354
355   /* NOTREACHED */
356   vg_assert(0);
357
358  bad:
359   eclasses[0] = ECLASS_MISC;
360   return 1;
361
362#  undef SWAP
363}
364
365
366/* Add tteno to the set of entries listed for equivalence class ec in
367   this sector.  Returns used location in eclass array. */
368
369static
370UInt addEClassNo ( /*MOD*/Sector* sec, Int ec, UShort tteno )
371{
372   Int    old_sz, new_sz, i, r;
373   UShort *old_ar, *new_ar;
374
375   vg_assert(ec >= 0 && ec < ECLASS_N);
376   vg_assert(tteno < N_TTES_PER_SECTOR);
377
378   if (0) VG_(printf)("ec %d  gets %d\n", ec, (Int)tteno);
379
380   if (sec->ec2tte_used[ec] >= sec->ec2tte_size[ec]) {
381
382      vg_assert(sec->ec2tte_used[ec] == sec->ec2tte_size[ec]);
383
384      old_sz = sec->ec2tte_size[ec];
385      old_ar = sec->ec2tte[ec];
386      new_sz = old_sz==0 ? 8 : old_sz<64 ? 2*old_sz : (3*old_sz)/2;
387      new_ar = VG_(arena_malloc)(VG_AR_TTAUX, new_sz * sizeof(UShort));
388      for (i = 0; i < old_sz; i++)
389         new_ar[i] = old_ar[i];
390      if (old_ar)
391         VG_(arena_free)(VG_AR_TTAUX, old_ar);
392      sec->ec2tte_size[ec] = new_sz;
393      sec->ec2tte[ec] = new_ar;
394
395      if (0) VG_(printf)("expand ec %d to %d\n", ec, new_sz);
396   }
397
398   /* Common case */
399   r = sec->ec2tte_used[ec]++;
400   vg_assert(r >= 0 && r < sec->ec2tte_size[ec]);
401   sec->ec2tte[ec][r] = tteno;
402   return (UInt)r;
403}
404
405
406/* 'vge' is being added to 'sec' at TT entry 'tteno'.  Add appropriate
407   eclass entries to 'sec'. */
408
409static
410void upd_eclasses_after_add ( /*MOD*/Sector* sec, Int tteno )
411{
412   Int i, r, eclasses[3];
413   TTEntry* tte;
414   vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
415
416   tte = &sec->tt[tteno];
417   r = vexGuestExtents_to_eclasses( eclasses, &tte->vge );
418
419   vg_assert(r >= 1 && r <= 3);
420   tte->n_tte2ec = r;
421
422   for (i = 0; i < r; i++) {
423      tte->tte2ec_ec[i] = eclasses[i];
424      tte->tte2ec_ix[i] = addEClassNo( sec, eclasses[i], (UShort)tteno );
425   }
426}
427
428
429/* Check the eclass info in 'sec' to ensure it is consistent.  Returns
430   True if OK, False if something's not right.  Expensive. */
431
432static Bool sanity_check_eclasses_in_sector ( Sector* sec )
433{
434#  define BAD(_str) do { whassup = (_str); goto bad; } while (0)
435
436   HChar*   whassup = NULL;
437   Int      i, j, k, n, ec_num, ec_idx;
438   TTEntry* tte;
439   UShort   tteno;
440   ULong*   tce;
441
442   /* Basic checks on this sector */
443   if (sec->tt_n_inuse < 0 || sec->tt_n_inuse > N_TTES_PER_SECTOR_USABLE)
444      BAD("invalid sec->tt_n_inuse");
445   tce = sec->tc_next;
446   if (tce < &sec->tc[0] || tce > &sec->tc[tc_sector_szQ])
447      BAD("sec->tc_next points outside tc");
448
449   /* For each eclass ... */
450   for (i = 0; i < ECLASS_N; i++) {
451      if (sec->ec2tte_size[i] == 0 && sec->ec2tte[i] != NULL)
452         BAD("ec2tte_size/ec2tte mismatch(1)");
453      if (sec->ec2tte_size[i] != 0 && sec->ec2tte[i] == NULL)
454         BAD("ec2tte_size/ec2tte mismatch(2)");
455      if (sec->ec2tte_used[i] < 0
456          || sec->ec2tte_used[i] > sec->ec2tte_size[i])
457         BAD("implausible ec2tte_used");
458      if (sec->ec2tte_used[i] == 0)
459         continue;
460
461      /* For each tt reference in each eclass .. ensure the reference
462         is to a valid tt entry, and that the entry's address ranges
463         really include this eclass. */
464
465      for (j = 0; j < sec->ec2tte_used[i]; j++) {
466         tteno = sec->ec2tte[i][j];
467         if (tteno == EC2TTE_DELETED)
468            continue;
469         if (tteno >= N_TTES_PER_SECTOR)
470            BAD("implausible tteno");
471         tte = &sec->tt[tteno];
472         if (tte->status != InUse)
473            BAD("tteno points to non-inuse tte");
474         if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3)
475            BAD("tte->n_tte2ec out of range");
476         /* Exactly least one of tte->eclasses[0 .. tte->n_eclasses-1]
477            must equal i.  Inspect tte's eclass info. */
478         n = 0;
479         for (k = 0; k < tte->n_tte2ec; k++) {
480            if (k < tte->n_tte2ec-1
481                && tte->tte2ec_ec[k] >= tte->tte2ec_ec[k+1])
482               BAD("tte->tte2ec_ec[..] out of order");
483            ec_num = tte->tte2ec_ec[k];
484            if (ec_num < 0 || ec_num >= ECLASS_N)
485               BAD("tte->tte2ec_ec[..] out of range");
486            if (ec_num != i)
487               continue;
488            ec_idx = tte->tte2ec_ix[k];
489            if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[i])
490               BAD("tte->tte2ec_ix[..] out of range");
491            if (ec_idx == j)
492               n++;
493         }
494         if (n != 1)
495            BAD("tteno does not point back at eclass");
496      }
497   }
498
499   /* That establishes that for each forward pointer from TTEntrys
500      there is a corresponding backward pointer from the eclass[]
501      arrays.  However, it doesn't rule out the possibility of other,
502      bogus pointers in the eclass[] arrays.  So do those similarly:
503      scan through them and check the TTEntryies they point at point
504      back. */
505
506   for (i = 0; i < N_TTES_PER_SECTOR_USABLE; i++) {
507
508      tte = &sec->tt[i];
509      if (tte->status == Empty || tte->status == Deleted) {
510         if (tte->n_tte2ec != 0)
511            BAD("tte->n_eclasses nonzero for unused tte");
512         continue;
513      }
514
515      vg_assert(tte->status == InUse);
516
517      if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3)
518         BAD("tte->n_eclasses out of range(2)");
519
520      for (j = 0; j < tte->n_tte2ec; j++) {
521         ec_num = tte->tte2ec_ec[j];
522         if (ec_num < 0 || ec_num >= ECLASS_N)
523            BAD("tte->eclass[..] out of range");
524         ec_idx = tte->tte2ec_ix[j];
525         if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[ec_num])
526            BAD("tte->ec_idx[..] out of range(2)");
527         if (sec->ec2tte[ec_num][ec_idx] != i)
528            BAD("ec2tte does not point back to tte");
529      }
530   }
531
532   return True;
533
534  bad:
535   if (whassup)
536      VG_(debugLog)(0, "transtab", "eclass sanity fail: %s\n", whassup);
537
538#  if 0
539   VG_(printf)("eclass = %d\n", i);
540   VG_(printf)("tteno = %d\n", (Int)tteno);
541   switch (tte->status) {
542      case InUse:   VG_(printf)("InUse\n"); break;
543      case Deleted: VG_(printf)("Deleted\n"); break;
544      case Empty:   VG_(printf)("Empty\n"); break;
545   }
546   if (tte->status != Empty) {
547      for (k = 0; k < tte->vge.n_used; k++)
548         VG_(printf)("0x%llx %d\n", tte->vge.base[k],
549                                    (Int)tte->vge.len[k]);
550   }
551#  endif
552
553   return False;
554
555#  undef BAD
556}
557
558
559/* Sanity check absolutely everything.  True == check passed. */
560
561/* forwards */
562static Bool sanity_check_redir_tt_tc ( void );
563static Bool sanity_check_fastcache ( void );
564
565static Bool sanity_check_all_sectors ( void )
566{
567   Int     sno;
568   Bool    sane;
569   Sector* sec;
570   for (sno = 0; sno < N_SECTORS; sno++) {
571      sec = &sectors[sno];
572      if (sec->tc == NULL)
573         continue;
574      sane = sanity_check_eclasses_in_sector( sec );
575      if (!sane)
576         return False;
577   }
578   if ( !sanity_check_redir_tt_tc() )
579      return False;
580   if ( !sanity_check_fastcache() )
581      return False;
582   return True;
583}
584
585
586/*-------------------------------------------------------------*/
587/*--- Add/find translations                                 ---*/
588/*-------------------------------------------------------------*/
589
590static UInt vge_osize ( VexGuestExtents* vge )
591{
592   UInt i, n = 0;
593   for (i = 0; i < vge->n_used; i++)
594      n += (UInt)vge->len[i];
595   return n;
596}
597
598static Bool isValidSector ( Int sector )
599{
600   if (sector < 0 || sector >= N_SECTORS)
601      return False;
602   return True;
603}
604
605static inline UInt HASH_TT ( Addr64 key )
606{
607   UInt kHi = (UInt)(key >> 32);
608   UInt kLo = (UInt)key;
609   UInt k32 = kHi ^ kLo;
610   UInt ror = 7;
611   if (ror > 0)
612      k32 = (k32 >> ror) | (k32 << (32-ror));
613   return k32 % N_TTES_PER_SECTOR;
614}
615
616static void setFastCacheEntry ( Addr64 key, ULong* tcptr, UInt* count )
617{
618   UInt cno = (UInt)VG_TT_FAST_HASH(key);
619   VG_(tt_fast)[cno].guest = (Addr)key;
620   VG_(tt_fast)[cno].host  = (Addr)tcptr;
621   if (VG_(clo_profile_flags) > 0)
622      VG_(tt_fastN)[cno] = count;
623   n_fast_updates++;
624   /* This shouldn't fail.  It should be assured by m_translate
625      which should reject any attempt to make translation of code
626      starting at TRANSTAB_BOGUS_GUEST_ADDR. */
627   vg_assert(VG_(tt_fast)[cno].guest != TRANSTAB_BOGUS_GUEST_ADDR);
628}
629
630/* Invalidate the fast cache's counter array, VG_(tt_fastN). */
631static void invalidateFastNCache ( void )
632{
633   UInt j;
634   vg_assert(VG_TT_FAST_SIZE > 0 && (VG_TT_FAST_SIZE % 4) == 0);
635   for (j = 0; j < VG_TT_FAST_SIZE; j += 4) {
636      VG_(tt_fastN)[j+0] = NULL;
637      VG_(tt_fastN)[j+1] = NULL;
638      VG_(tt_fastN)[j+2] = NULL;
639      VG_(tt_fastN)[j+3] = NULL;
640   }
641   vg_assert(j == VG_TT_FAST_SIZE);
642}
643
644/* Invalidate the fast cache VG_(tt_fast).  If profiling, also
645   invalidate the fast cache's counter array VG_(tt_fastN), otherwise
646   don't touch it. */
647static void invalidateFastCache ( void )
648{
649   UInt j;
650   /* This loop is popular enough to make it worth unrolling a
651      bit, at least on ppc32. */
652   vg_assert(VG_TT_FAST_SIZE > 0 && (VG_TT_FAST_SIZE % 4) == 0);
653   for (j = 0; j < VG_TT_FAST_SIZE; j += 4) {
654      VG_(tt_fast)[j+0].guest = TRANSTAB_BOGUS_GUEST_ADDR;
655      VG_(tt_fast)[j+1].guest = TRANSTAB_BOGUS_GUEST_ADDR;
656      VG_(tt_fast)[j+2].guest = TRANSTAB_BOGUS_GUEST_ADDR;
657      VG_(tt_fast)[j+3].guest = TRANSTAB_BOGUS_GUEST_ADDR;
658   }
659
660   if (VG_(clo_profile_flags) > 0)
661      invalidateFastNCache();
662
663   vg_assert(j == VG_TT_FAST_SIZE);
664   n_fast_flushes++;
665}
666
667static Bool sanity_check_fastcache ( void )
668{
669   UInt j;
670   if (0) VG_(printf)("sanity check fastcache\n");
671   if (VG_(clo_profile_flags) > 0) {
672      /* profiling */
673      for (j = 0; j < VG_TT_FAST_SIZE; j++) {
674         if (VG_(tt_fastN)[j] == NULL
675             && VG_(tt_fast)[j].guest != TRANSTAB_BOGUS_GUEST_ADDR)
676            return False;
677         if (VG_(tt_fastN)[j] != NULL
678             && VG_(tt_fast)[j].guest == TRANSTAB_BOGUS_GUEST_ADDR)
679            return False;
680      }
681   } else {
682      /* not profiling */
683      for (j = 0; j < VG_TT_FAST_SIZE; j++) {
684         if (VG_(tt_fastN)[j] != NULL)
685            return False;
686      }
687   }
688   return True;
689}
690
691static void initialiseSector ( Int sno )
692{
693   Int    i;
694   SysRes sres;
695   Sector* sec;
696   vg_assert(isValidSector(sno));
697
698   sec = &sectors[sno];
699
700   if (sec->tc == NULL) {
701
702      /* Sector has never been used before.  Need to allocate tt and
703         tc. */
704      vg_assert(sec->tt == NULL);
705      vg_assert(sec->tc_next == NULL);
706      vg_assert(sec->tt_n_inuse == 0);
707      for (i = 0; i < ECLASS_N; i++) {
708         vg_assert(sec->ec2tte_size[i] == 0);
709         vg_assert(sec->ec2tte_used[i] == 0);
710         vg_assert(sec->ec2tte[i] == NULL);
711      }
712
713      VG_(debugLog)(1,"transtab", "allocate sector %d\n", sno);
714
715      sres = VG_(am_mmap_anon_float_valgrind)( 8 * tc_sector_szQ );
716      if (sres.isError) {
717         VG_(out_of_memory_NORETURN)("initialiseSector(TC)",
718                                     8 * tc_sector_szQ );
719	 /*NOTREACHED*/
720      }
721      sec->tc = (ULong*)sres.res;
722
723      sres = VG_(am_mmap_anon_float_valgrind)
724                ( N_TTES_PER_SECTOR * sizeof(TTEntry) );
725      if (sres.isError) {
726         VG_(out_of_memory_NORETURN)("initialiseSector(TT)",
727                                     N_TTES_PER_SECTOR * sizeof(TTEntry) );
728	 /*NOTREACHED*/
729      }
730      sec->tt = (TTEntry*)sres.res;
731
732      for (i = 0; i < N_TTES_PER_SECTOR; i++) {
733         sec->tt[i].status   = Empty;
734         sec->tt[i].n_tte2ec = 0;
735      }
736
737      if (VG_(clo_verbosity) > 2)
738         VG_(message)(Vg_DebugMsg, "TT/TC: initialise sector %d", sno);
739
740   } else {
741
742      /* Sector has been used before.  Dump the old contents. */
743      VG_(debugLog)(1,"transtab", "recycle sector %d\n", sno);
744      vg_assert(sec->tt != NULL);
745      vg_assert(sec->tc_next != NULL);
746      n_dump_count += sec->tt_n_inuse;
747
748      /* Visit each just-about-to-be-abandoned translation. */
749      for (i = 0; i < N_TTES_PER_SECTOR; i++) {
750         if (sec->tt[i].status == InUse) {
751            vg_assert(sec->tt[i].n_tte2ec >= 1);
752            vg_assert(sec->tt[i].n_tte2ec <= 3);
753            n_dump_osize += vge_osize(&sec->tt[i].vge);
754            /* Tell the tool too. */
755            if (VG_(needs).superblock_discards) {
756               VG_TDICT_CALL( tool_discard_superblock_info,
757                              sec->tt[i].entry,
758                              sec->tt[i].vge );
759            }
760         } else {
761            vg_assert(sec->tt[i].n_tte2ec == 0);
762         }
763         sec->tt[i].status   = Empty;
764         sec->tt[i].n_tte2ec = 0;
765      }
766
767      /* Free up the eclass structures. */
768      for (i = 0; i < ECLASS_N; i++) {
769         if (sec->ec2tte_size[i] == 0) {
770            vg_assert(sec->ec2tte_used[i] == 0);
771            vg_assert(sec->ec2tte[i] == NULL);
772         } else {
773            vg_assert(sec->ec2tte[i] != NULL);
774            VG_(arena_free)(VG_AR_TTAUX, sec->ec2tte[i]);
775            sec->ec2tte[i] = NULL;
776            sec->ec2tte_size[i] = 0;
777            sec->ec2tte_used[i] = 0;
778         }
779      }
780
781      if (VG_(clo_verbosity) > 2)
782         VG_(message)(Vg_DebugMsg, "TT/TC: recycle sector %d", sno);
783   }
784
785   sec->tc_next = sec->tc;
786   sec->tt_n_inuse = 0;
787
788   invalidateFastCache();
789}
790
791static void invalidate_icache ( void *ptr, Int nbytes )
792{
793#  if defined(VGA_ppc32) || defined(VGA_ppc64)
794   Addr startaddr = (Addr) ptr;
795   Addr endaddr   = startaddr + nbytes;
796   Addr cls;
797   Addr addr;
798   VexArchInfo vai;
799
800   if (nbytes == 0) return;
801   vg_assert(nbytes > 0);
802
803   VG_(machine_get_VexArchInfo)( NULL, &vai );
804   cls = vai.ppc_cache_line_szB;
805
806   /* Stay sane .. */
807   vg_assert(cls == 32 || cls == 128);
808
809   startaddr &= ~(cls - 1);
810   for (addr = startaddr; addr < endaddr; addr += cls)
811      asm volatile("dcbst 0,%0" : : "r" (addr));
812   asm volatile("sync");
813   for (addr = startaddr; addr < endaddr; addr += cls)
814      asm volatile("icbi 0,%0" : : "r" (addr));
815   asm volatile("sync; isync");
816
817#  elif defined(VGA_x86)
818   /* no need to do anything, hardware provides coherence */
819
820#  elif defined(VGA_amd64)
821   /* no need to do anything, hardware provides coherence */
822
823#  else
824#    error "Unknown ARCH"
825#  endif
826}
827
828
829/* Add a translation of vge to TT/TC.  The translation is temporarily
830   in code[0 .. code_len-1].
831
832   pre: youngest_sector points to a valid (although possibly full)
833   sector.
834*/
835void VG_(add_to_transtab)( VexGuestExtents* vge,
836                           Addr64           entry,
837                           AddrH            code,
838                           UInt             code_len,
839                           Bool             is_self_checking )
840{
841   Int    tcAvailQ, reqdQ, y, i;
842   ULong  *tcptr, *tcptr2;
843   UChar* srcP;
844   UChar* dstP;
845
846   vg_assert(init_done);
847   vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
848
849   /* 60000: should agree with N_TMPBUF in m_translate.c. */
850   vg_assert(code_len > 0 && code_len < 60000);
851
852   if (0)
853      VG_(printf)("add_to_transtab(entry = 0x%llx, len = %d)\n",
854                  entry, code_len);
855
856   n_in_count++;
857   n_in_tsize += code_len;
858   n_in_osize += vge_osize(vge);
859   if (is_self_checking)
860      n_in_sc_count++;
861
862   y = youngest_sector;
863   vg_assert(isValidSector(y));
864
865   if (sectors[y].tc == NULL)
866      initialiseSector(y);
867
868   /* Try putting the translation in this sector. */
869   reqdQ = (code_len + 7) >> 3;
870
871   /* Will it fit in tc? */
872   tcAvailQ = ((ULong*)(&sectors[y].tc[tc_sector_szQ]))
873              - ((ULong*)(sectors[y].tc_next));
874   vg_assert(tcAvailQ >= 0);
875   vg_assert(tcAvailQ <= tc_sector_szQ);
876
877   if (tcAvailQ < reqdQ
878       || sectors[y].tt_n_inuse >= N_TTES_PER_SECTOR_USABLE) {
879      /* No.  So move on to the next sector.  Either it's never been
880         used before, in which case it will get its tt/tc allocated
881         now, or it has been used before, in which case it is set to be
882         empty, hence throwing out the oldest sector. */
883      vg_assert(tc_sector_szQ > 0);
884      VG_(debugLog)(1,"transtab",
885                      "declare sector %d full "
886                      "(TT loading %2d%%, TC loading %2d%%)\n",
887                      y,
888                      (100 * sectors[y].tt_n_inuse)
889                         / N_TTES_PER_SECTOR,
890                      (100 * (tc_sector_szQ - tcAvailQ))
891                         / tc_sector_szQ);
892      youngest_sector++;
893      if (youngest_sector >= N_SECTORS)
894         youngest_sector = 0;
895      y = youngest_sector;
896      initialiseSector(y);
897   }
898
899   /* Be sure ... */
900   tcAvailQ = ((ULong*)(&sectors[y].tc[tc_sector_szQ]))
901              - ((ULong*)(sectors[y].tc_next));
902   vg_assert(tcAvailQ >= 0);
903   vg_assert(tcAvailQ <= tc_sector_szQ);
904   vg_assert(tcAvailQ >= reqdQ);
905   vg_assert(sectors[y].tt_n_inuse < N_TTES_PER_SECTOR_USABLE);
906   vg_assert(sectors[y].tt_n_inuse >= 0);
907
908   /* Copy into tc. */
909   tcptr = sectors[y].tc_next;
910   vg_assert(tcptr >= &sectors[y].tc[0]);
911   vg_assert(tcptr <= &sectors[y].tc[tc_sector_szQ]);
912
913   dstP = (UChar*)tcptr;
914   srcP = (UChar*)code;
915   for (i = 0; i < code_len; i++)
916      dstP[i] = srcP[i];
917   sectors[y].tc_next += reqdQ;
918   sectors[y].tt_n_inuse++;
919
920   invalidate_icache( dstP, code_len );
921
922   /* more paranoia */
923   tcptr2 = sectors[y].tc_next;
924   vg_assert(tcptr2 >= &sectors[y].tc[0]);
925   vg_assert(tcptr2 <= &sectors[y].tc[tc_sector_szQ]);
926
927   /* Find an empty tt slot, and use it.  There must be such a slot
928      since tt is never allowed to get completely full. */
929   i = HASH_TT(entry);
930   vg_assert(i >= 0 && i < N_TTES_PER_SECTOR);
931   while (True) {
932      if (sectors[y].tt[i].status == Empty
933          || sectors[y].tt[i].status == Deleted)
934         break;
935      i++;
936      if (i >= N_TTES_PER_SECTOR)
937         i = 0;
938   }
939
940   sectors[y].tt[i].status = InUse;
941   sectors[y].tt[i].tcptr  = tcptr;
942   sectors[y].tt[i].count  = 0;
943   sectors[y].tt[i].weight = 1;
944   sectors[y].tt[i].vge    = *vge;
945   sectors[y].tt[i].entry  = entry;
946
947   /* Update the fast-cache. */
948   setFastCacheEntry( entry, tcptr, &sectors[y].tt[i].count );
949
950   /* Note the eclass numbers for this translation. */
951   upd_eclasses_after_add( &sectors[y], i );
952}
953
954
955/* Search for the translation of the given guest address.  If
956   requested, a successful search can also cause the fast-caches to be
957   updated.
958*/
959Bool VG_(search_transtab) ( /*OUT*/AddrH* result,
960                            Addr64        guest_addr,
961                            Bool          upd_cache )
962{
963   Int i, j, k, kstart, sno;
964
965   vg_assert(init_done);
966   /* Find the initial probe point just once.  It will be the same in
967      all sectors and avoids multiple expensive % operations. */
968   n_full_lookups++;
969   k      = -1;
970   kstart = HASH_TT(guest_addr);
971   vg_assert(kstart >= 0 && kstart < N_TTES_PER_SECTOR);
972
973   /* Search in all the sectors.  Although the order should not matter,
974      it might be most efficient to search in the order youngest to
975      oldest. */
976   sno = youngest_sector;
977   for (i = 0; i < N_SECTORS; i++) {
978
979      if (sectors[sno].tc == NULL)
980         goto notfound; /* sector not in use. */
981
982      k = kstart;
983      for (j = 0; j < N_TTES_PER_SECTOR; j++) {
984         n_lookup_probes++;
985         if (sectors[sno].tt[k].status == InUse
986             && sectors[sno].tt[k].entry == guest_addr) {
987            /* found it */
988            if (upd_cache)
989               setFastCacheEntry(
990                  guest_addr, sectors[sno].tt[k].tcptr,
991                              &sectors[sno].tt[k].count );
992            if (result)
993               *result = (AddrH)sectors[sno].tt[k].tcptr;
994            return True;
995         }
996         if (sectors[sno].tt[k].status == Empty)
997            break; /* not found in this sector */
998         k++;
999         if (k == N_TTES_PER_SECTOR)
1000            k = 0;
1001      }
1002
1003      /* If we fall off the end, all entries are InUse and not
1004         matching, or Deleted.  In any case we did not find it in this
1005         sector. */
1006
1007     notfound:
1008      /* move to the next oldest sector */
1009      sno = sno==0 ? (N_SECTORS-1) : (sno-1);
1010   }
1011
1012   /* Not found in any sector. */
1013   return False;
1014}
1015
1016
1017/*-------------------------------------------------------------*/
1018/*--- Delete translations.                                  ---*/
1019/*-------------------------------------------------------------*/
1020
1021/* forward */
1022static void unredir_discard_translations( Addr64, ULong );
1023
1024/* Stuff for deleting translations which intersect with a given
1025   address range.  Unfortunately, to make this run at a reasonable
1026   speed, it is complex. */
1027
1028static inline
1029Bool overlap1 ( Addr64 s1, ULong r1, Addr64 s2, ULong r2 )
1030{
1031   Addr64 e1 = s1 + r1 - 1ULL;
1032   Addr64 e2 = s2 + r2 - 1ULL;
1033   if (e1 < s2 || e2 < s1)
1034      return False;
1035   return True;
1036}
1037
1038static inline
1039Bool overlaps ( Addr64 start, ULong range, VexGuestExtents* vge )
1040{
1041   if (overlap1(start, range, vge->base[0], (UInt)vge->len[0]))
1042      return True;
1043   if (vge->n_used < 2)
1044      return False;
1045   if (overlap1(start, range, vge->base[1], (UInt)vge->len[1]))
1046      return True;
1047   if (vge->n_used < 3)
1048      return False;
1049   if (overlap1(start, range, vge->base[2], (UInt)vge->len[2]))
1050      return True;
1051   return False;
1052}
1053
1054
1055/* Delete a tt entry, and update all the eclass data accordingly. */
1056
1057static void delete_tte ( /*MOD*/Sector* sec, Int tteno )
1058{
1059   Int      i, ec_num, ec_idx;
1060   TTEntry* tte;
1061
1062   vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
1063   tte = &sec->tt[tteno];
1064   vg_assert(tte->status == InUse);
1065   vg_assert(tte->n_tte2ec >= 1 && tte->n_tte2ec <= 3);
1066
1067   /* Deal with the ec-to-tte links first. */
1068   for (i = 0; i < tte->n_tte2ec; i++) {
1069      ec_num = (Int)tte->tte2ec_ec[i];
1070      ec_idx = tte->tte2ec_ix[i];
1071      vg_assert(ec_num >= 0 && ec_num < ECLASS_N);
1072      vg_assert(ec_idx >= 0);
1073      vg_assert(ec_idx < sec->ec2tte_used[ec_num]);
1074      /* Assert that the two links point at each other. */
1075      vg_assert(sec->ec2tte[ec_num][ec_idx] == (UShort)tteno);
1076      /* "delete" the pointer back to here. */
1077      sec->ec2tte[ec_num][ec_idx] = EC2TTE_DELETED;
1078   }
1079
1080   /* Now fix up this TTEntry. */
1081   tte->status   = Deleted;
1082   tte->n_tte2ec = 0;
1083
1084   /* Stats .. */
1085   sec->tt_n_inuse--;
1086   n_disc_count++;
1087   n_disc_osize += vge_osize(&tte->vge);
1088
1089   /* Tell the tool too. */
1090   if (VG_(needs).superblock_discards) {
1091      VG_TDICT_CALL( tool_discard_superblock_info,
1092                     tte->entry,
1093                     tte->vge );
1094   }
1095}
1096
1097
1098/* Delete translations from sec which intersect specified range, but
1099   only consider translations in the specified eclass. */
1100
1101static
1102Bool delete_translations_in_sector_eclass ( /*MOD*/Sector* sec,
1103                                            Addr64 guest_start, ULong range,
1104                                            Int ec )
1105{
1106   Int      i;
1107   UShort   tteno;
1108   Bool     anyDeld = False;
1109   TTEntry* tte;
1110
1111   vg_assert(ec >= 0 && ec < ECLASS_N);
1112
1113   for (i = 0; i < sec->ec2tte_used[ec]; i++) {
1114
1115      tteno = sec->ec2tte[ec][i];
1116      if (tteno == EC2TTE_DELETED) {
1117         /* already deleted */
1118         continue;
1119      }
1120
1121      vg_assert(tteno < N_TTES_PER_SECTOR);
1122
1123      tte = &sec->tt[tteno];
1124      vg_assert(tte->status == InUse);
1125
1126      if (overlaps( guest_start, range, &tte->vge )) {
1127         anyDeld = True;
1128         delete_tte( sec, (Int)tteno );
1129      }
1130
1131   }
1132
1133   return anyDeld;
1134}
1135
1136
1137/* Delete translations from sec which intersect specified range, the
1138   slow way, by inspecting all translations in sec. */
1139
1140static
1141Bool delete_translations_in_sector ( /*MOD*/Sector* sec,
1142                                     Addr64 guest_start, ULong range )
1143{
1144   Int  i;
1145   Bool anyDeld = False;
1146
1147   for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1148      if (sec->tt[i].status == InUse
1149          && overlaps( guest_start, range, &sec->tt[i].vge )) {
1150         anyDeld = True;
1151         delete_tte( sec, i );
1152      }
1153   }
1154
1155   return anyDeld;
1156}
1157
1158
1159void VG_(discard_translations) ( Addr64 guest_start, ULong range,
1160                                 HChar* who )
1161{
1162   Sector* sec;
1163   Int     sno, ec;
1164   Bool    anyDeleted = False;
1165
1166   vg_assert(init_done);
1167
1168   VG_(debugLog)(2, "transtab",
1169                    "discard_translations(0x%llx, %lld) req by %s\n",
1170                    guest_start, range, who );
1171
1172   /* Pre-deletion sanity check */
1173   if (VG_(clo_sanity_level >= 4)) {
1174      Bool sane = sanity_check_all_sectors();
1175      vg_assert(sane);
1176   }
1177
1178   if (range == 0)
1179      return;
1180
1181   /* There are two different ways to do this.
1182
1183      If the range fits within a single address-range equivalence
1184      class, as will be the case for a cache line sized invalidation,
1185      then we only have to inspect the set of translations listed in
1186      that equivalence class, and also in the "sin-bin" equivalence
1187      class ECLASS_MISC.
1188
1189      Otherwise, the invalidation is of a larger range and probably
1190      results from munmap.  In this case it's (probably!) faster just
1191      to inspect all translations, dump those we don't want, and
1192      regenerate the equivalence class information (since modifying it
1193      in-situ is even more expensive).
1194   */
1195
1196   /* First off, figure out if the range falls within a single class,
1197      and if so which one. */
1198
1199   ec = ECLASS_MISC;
1200   if (range < (1ULL << ECLASS_SHIFT))
1201      ec = range_to_eclass( guest_start, (UInt)range );
1202
1203   /* if ec is ECLASS_MISC then we aren't looking at just a single
1204      class, so use the slow scheme.  Else use the fast scheme,
1205      examining 'ec' and ECLASS_MISC. */
1206
1207   if (ec != ECLASS_MISC) {
1208
1209      VG_(debugLog)(2, "transtab",
1210                       "                    FAST, ec = %d\n", ec);
1211
1212      /* Fast scheme */
1213      vg_assert(ec >= 0 && ec < ECLASS_MISC);
1214
1215      for (sno = 0; sno < N_SECTORS; sno++) {
1216         sec = &sectors[sno];
1217         if (sec->tc == NULL)
1218            continue;
1219         anyDeleted |= delete_translations_in_sector_eclass(
1220                         sec, guest_start, range, ec );
1221         anyDeleted |= delete_translations_in_sector_eclass(
1222                         sec, guest_start, range, ECLASS_MISC );
1223      }
1224
1225   } else {
1226
1227      /* slow scheme */
1228
1229      VG_(debugLog)(2, "transtab",
1230                       "                    SLOW, ec = %d\n", ec);
1231
1232      for (sno = 0; sno < N_SECTORS; sno++) {
1233         sec = &sectors[sno];
1234         if (sec->tc == NULL)
1235            continue;
1236         anyDeleted |= delete_translations_in_sector(
1237                         sec, guest_start, range );
1238      }
1239
1240   }
1241
1242   if (anyDeleted)
1243      invalidateFastCache();
1244
1245   /* don't forget the no-redir cache */
1246   unredir_discard_translations( guest_start, range );
1247
1248   /* Post-deletion sanity check */
1249   if (VG_(clo_sanity_level >= 4)) {
1250      Int      i;
1251      TTEntry* tte;
1252      Bool     sane = sanity_check_all_sectors();
1253      vg_assert(sane);
1254      /* But now, also check the requested address range isn't
1255         present anywhere. */
1256      for (sno = 0; sno < N_SECTORS; sno++) {
1257         sec = &sectors[sno];
1258         if (sec->tc == NULL)
1259            continue;
1260         for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1261            tte = &sec->tt[i];
1262            if (tte->status != InUse)
1263               continue;
1264            vg_assert(!overlaps( guest_start, range, &tte->vge ));
1265         }
1266      }
1267   }
1268}
1269
1270
1271/*------------------------------------------------------------*/
1272/*--- AUXILIARY: the unredirected TT/TC                    ---*/
1273/*------------------------------------------------------------*/
1274
1275/* A very simple translation cache which holds a small number of
1276   unredirected translations.  This is completely independent of the
1277   main tt/tc structures.  When unredir_tc or unredir_tt becomes full,
1278   both structures are simply dumped and we start over.
1279
1280   Since these translations are unredirected, the search key is (by
1281   definition) the first address entry in the .vge field. */
1282
1283/* Sized to hold 500 translations of average size 1000 bytes. */
1284
1285#define UNREDIR_SZB   1000
1286
1287#define N_UNREDIR_TT  500
1288#define N_UNREDIR_TCQ (N_UNREDIR_TT * UNREDIR_SZB / sizeof(ULong))
1289
1290typedef
1291   struct {
1292      VexGuestExtents vge;
1293      Addr            hcode;
1294      Bool            inUse;
1295   }
1296   UTCEntry;
1297
1298/* We just allocate forwards in _tc, never deleting. */
1299static ULong    *unredir_tc;
1300static Int      unredir_tc_used = N_UNREDIR_TCQ;
1301
1302/* Slots in _tt can come into use and out again (.inUse).
1303   Nevertheless _tt_highwater is maintained so that invalidations
1304   don't have to scan all the slots when only a few are in use.
1305   _tt_highwater holds the index of the highest ever allocated
1306   slot. */
1307static UTCEntry unredir_tt[N_UNREDIR_TT];
1308static Int      unredir_tt_highwater;
1309
1310
1311static void init_unredir_tt_tc ( void )
1312{
1313   Int i;
1314   if (unredir_tc == NULL) {
1315      SysRes sres = VG_(am_mmap_anon_float_valgrind)( N_UNREDIR_TT * UNREDIR_SZB );
1316      if (sres.isError) {
1317         VG_(out_of_memory_NORETURN)("init_unredir_tt_tc", N_UNREDIR_TT * UNREDIR_SZB);
1318         /*NOTREACHED*/
1319      }
1320      unredir_tc = (ULong *)sres.res;
1321   }
1322   unredir_tc_used = 0;
1323   for (i = 0; i < N_UNREDIR_TT; i++)
1324      unredir_tt[i].inUse = False;
1325   unredir_tt_highwater = -1;
1326}
1327
1328/* Do a sanity check; return False on failure. */
1329static Bool sanity_check_redir_tt_tc ( void )
1330{
1331   Int i;
1332   if (unredir_tt_highwater < -1) return False;
1333   if (unredir_tt_highwater >= N_UNREDIR_TT) return False;
1334
1335   for (i = unredir_tt_highwater+1; i < N_UNREDIR_TT; i++)
1336      if (unredir_tt[i].inUse)
1337         return False;
1338
1339   if (unredir_tc_used < 0) return False;
1340   if (unredir_tc_used > N_UNREDIR_TCQ) return False;
1341
1342   return True;
1343}
1344
1345
1346/* Add an UNREDIRECTED translation of vge to TT/TC.  The translation
1347   is temporarily in code[0 .. code_len-1].
1348*/
1349void VG_(add_to_unredir_transtab)( VexGuestExtents* vge,
1350                                   Addr64           entry,
1351                                   AddrH            code,
1352                                   UInt             code_len,
1353                                   Bool             is_self_checking )
1354{
1355   Int   i, j, code_szQ;
1356   HChar *srcP, *dstP;
1357
1358   vg_assert(sanity_check_redir_tt_tc());
1359
1360   /* This is the whole point: it's not redirected! */
1361   vg_assert(entry == vge->base[0]);
1362
1363   /* How many unredir_tt slots are needed */
1364   code_szQ = (code_len + 7) / 8;
1365
1366   /* Look for an empty unredir_tc slot */
1367   for (i = 0; i < N_UNREDIR_TT; i++)
1368      if (!unredir_tt[i].inUse)
1369         break;
1370
1371   if (i >= N_UNREDIR_TT || code_szQ > (N_UNREDIR_TCQ - unredir_tc_used)) {
1372      /* It's full; dump everything we currently have */
1373      init_unredir_tt_tc();
1374      i = 0;
1375   }
1376
1377   vg_assert(unredir_tc_used >= 0);
1378   vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
1379   vg_assert(code_szQ > 0);
1380   vg_assert(code_szQ + unredir_tc_used <= N_UNREDIR_TCQ);
1381   vg_assert(i >= 0 && i < N_UNREDIR_TT);
1382   vg_assert(unredir_tt[i].inUse == False);
1383
1384   if (i > unredir_tt_highwater)
1385      unredir_tt_highwater = i;
1386
1387   dstP = (HChar*)&unredir_tc[unredir_tc_used];
1388   srcP = (HChar*)code;
1389   for (j = 0; j < code_len; j++)
1390      dstP[j] = srcP[j];
1391
1392   invalidate_icache( dstP, code_len );
1393
1394   unredir_tt[i].inUse = True;
1395   unredir_tt[i].vge   = *vge;
1396   unredir_tt[i].hcode = (Addr)dstP;
1397
1398   unredir_tc_used += code_szQ;
1399   vg_assert(unredir_tc_used >= 0);
1400   vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
1401
1402   vg_assert(&dstP[code_len] <= (HChar*)&unredir_tc[unredir_tc_used]);
1403}
1404
1405Bool VG_(search_unredir_transtab) ( /*OUT*/AddrH* result,
1406                                    Addr64        guest_addr )
1407{
1408   Int i;
1409   for (i = 0; i < N_UNREDIR_TT; i++) {
1410      if (!unredir_tt[i].inUse)
1411         continue;
1412      if (unredir_tt[i].vge.base[0] == guest_addr) {
1413         *result = (AddrH)unredir_tt[i].hcode;
1414         return True;
1415      }
1416   }
1417   return False;
1418}
1419
1420static void unredir_discard_translations( Addr64 guest_start, ULong range )
1421{
1422   Int i;
1423
1424   vg_assert(sanity_check_redir_tt_tc());
1425
1426   for (i = 0; i <= unredir_tt_highwater; i++) {
1427      if (unredir_tt[i].inUse
1428          && overlaps( guest_start, range, &unredir_tt[i].vge))
1429         unredir_tt[i].inUse = False;
1430   }
1431}
1432
1433
1434/*------------------------------------------------------------*/
1435/*--- Initialisation.                                      ---*/
1436/*------------------------------------------------------------*/
1437
1438void VG_(init_tt_tc) ( void )
1439{
1440   Int i, j, avg_codeszQ;
1441
1442   vg_assert(!init_done);
1443   init_done = True;
1444
1445   /* Otherwise lots of things go wrong... */
1446   vg_assert(sizeof(ULong) == 8);
1447   vg_assert(sizeof(Addr64) == 8);
1448   /* check fast cache entries really are 2 words long */
1449   vg_assert(sizeof(Addr) == sizeof(void*));
1450   vg_assert(sizeof(FastCacheEntry) == 2 * sizeof(Addr));
1451   /* check fast cache entries are packed back-to-back with no spaces */
1452   vg_assert(sizeof( VG_(tt_fast) ) == VG_TT_FAST_SIZE * sizeof(FastCacheEntry));
1453   /* check fast cache is aligned as we requested.  Not fatal if it
1454      isn't, but we might as well make sure. */
1455   vg_assert(VG_IS_16_ALIGNED( ((Addr) & VG_(tt_fast)[0]) ));
1456
1457   if (VG_(clo_verbosity) > 2)
1458      VG_(message)(Vg_DebugMsg,
1459                   "TT/TC: VG_(init_tt_tc) "
1460                   "(startup of code management)");
1461
1462   /* Figure out how big each tc area should be.  */
1463   avg_codeszQ   = (VG_(details).avg_translation_sizeB + 7) / 8;
1464   tc_sector_szQ = N_TTES_PER_SECTOR_USABLE * (1 + avg_codeszQ);
1465
1466   /* Ensure the calculated value is not way crazy. */
1467   vg_assert(tc_sector_szQ >= 2 * N_TTES_PER_SECTOR_USABLE);
1468   vg_assert(tc_sector_szQ <= 80 * N_TTES_PER_SECTOR_USABLE);
1469
1470   /* Initialise the sectors */
1471   youngest_sector = 0;
1472   for (i = 0; i < N_SECTORS; i++) {
1473      sectors[i].tc = NULL;
1474      sectors[i].tt = NULL;
1475      sectors[i].tc_next = NULL;
1476      sectors[i].tt_n_inuse = 0;
1477      for (j = 0; j < ECLASS_N; j++) {
1478         sectors[i].ec2tte_size[j] = 0;
1479         sectors[i].ec2tte_used[j] = 0;
1480         sectors[i].ec2tte[j] = NULL;
1481      }
1482   }
1483
1484   /* Initialise the fast caches.  If not profiling (the usual case),
1485      we have to explicitly invalidate the fastN cache as
1486      invalidateFastCache() won't do that for us. */
1487   invalidateFastCache();
1488   if (VG_(clo_profile_flags) == 0)
1489      invalidateFastNCache();
1490
1491   /* and the unredir tt/tc */
1492   init_unredir_tt_tc();
1493
1494   if (VG_(clo_verbosity) > 2) {
1495      VG_(message)(Vg_DebugMsg,
1496         "TT/TC: cache: %d sectors of %d bytes each = %d total",
1497          N_SECTORS, 8 * tc_sector_szQ,
1498          N_SECTORS * 8 * tc_sector_szQ );
1499      VG_(message)(Vg_DebugMsg,
1500         "TT/TC: table: %d total entries, max occupancy %d (%d%%)",
1501         N_SECTORS * N_TTES_PER_SECTOR,
1502         N_SECTORS * N_TTES_PER_SECTOR_USABLE,
1503         SECTOR_TT_LIMIT_PERCENT );
1504   }
1505
1506   VG_(debugLog)(2, "transtab",
1507      "cache: %d sectors of %d bytes each = %d total\n",
1508       N_SECTORS, 8 * tc_sector_szQ,
1509       N_SECTORS * 8 * tc_sector_szQ );
1510   VG_(debugLog)(2, "transtab",
1511      "table: %d total entries, max occupancy %d (%d%%)\n",
1512      N_SECTORS * N_TTES_PER_SECTOR,
1513      N_SECTORS * N_TTES_PER_SECTOR_USABLE,
1514      SECTOR_TT_LIMIT_PERCENT );
1515}
1516
1517
1518/*------------------------------------------------------------*/
1519/*--- Printing out statistics.                             ---*/
1520/*------------------------------------------------------------*/
1521
1522static ULong safe_idiv( ULong a, ULong b )
1523{
1524   return (b == 0 ? 0 : a / b);
1525}
1526
1527UInt VG_(get_bbs_translated) ( void )
1528{
1529   return n_in_count;
1530}
1531
1532void VG_(print_tt_tc_stats) ( void )
1533{
1534   VG_(message)(Vg_DebugMsg,
1535      "    tt/tc: %,llu tt lookups requiring %,llu probes",
1536      n_full_lookups, n_lookup_probes );
1537   VG_(message)(Vg_DebugMsg,
1538      "    tt/tc: %,llu fast-cache updates, %,llu flushes",
1539      n_fast_updates, n_fast_flushes );
1540
1541   VG_(message)(Vg_DebugMsg,
1542                " transtab: new        %,lld "
1543                "(%,llu -> %,llu; ratio %,llu:10) [%,llu scs]",
1544                n_in_count, n_in_osize, n_in_tsize,
1545                safe_idiv(10*n_in_tsize, n_in_osize),
1546                n_in_sc_count);
1547   VG_(message)(Vg_DebugMsg,
1548                " transtab: dumped     %,llu (%,llu -> ?" "?)",
1549                n_dump_count, n_dump_osize );
1550   VG_(message)(Vg_DebugMsg,
1551                " transtab: discarded  %,llu (%,llu -> ?" "?)",
1552                n_disc_count, n_disc_osize );
1553
1554   if (0) {
1555      Int i;
1556      VG_(printf)("\n");
1557      for (i = 0; i < ECLASS_N; i++) {
1558         VG_(printf)(" %4d", sectors[0].ec2tte_used[i]);
1559         if (i % 16 == 15)
1560            VG_(printf)("\n");
1561      }
1562      VG_(printf)("\n\n");
1563   }
1564}
1565
1566/*------------------------------------------------------------*/
1567/*--- Printing out of profiling results.                   ---*/
1568/*------------------------------------------------------------*/
1569
1570static ULong score ( TTEntry* tte )
1571{
1572   return ((ULong)tte->weight) * ((ULong)tte->count);
1573}
1574
1575ULong VG_(get_BB_profile) ( BBProfEntry tops[], UInt n_tops )
1576{
1577   Int   sno, i, r, s;
1578   ULong score_total;
1579
1580   /* First, compute the total weighted count, and find the top N
1581      ttes.  tops contains pointers to the most-used n_tops blocks, in
1582      descending order (viz, tops[0] is the highest scorer). */
1583   for (i = 0; i < n_tops; i++) {
1584      tops[i].addr  = 0;
1585      tops[i].score = 0;
1586   }
1587
1588   score_total = 0;
1589
1590   for (sno = 0; sno < N_SECTORS; sno++) {
1591      if (sectors[sno].tc == NULL)
1592         continue;
1593      for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1594         if (sectors[sno].tt[i].status != InUse)
1595            continue;
1596         score_total += score(&sectors[sno].tt[i]);
1597         /* Find the rank for sectors[sno].tt[i]. */
1598         r = n_tops-1;
1599         while (True) {
1600            if (r == -1)
1601               break;
1602             if (tops[r].addr == 0) {
1603               r--;
1604               continue;
1605             }
1606             if ( score(&sectors[sno].tt[i]) > tops[r].score ) {
1607                r--;
1608                continue;
1609             }
1610             break;
1611         }
1612         r++;
1613         vg_assert(r >= 0 && r <= n_tops);
1614         /* This bb should be placed at r, and bbs above it shifted
1615            upwards one slot. */
1616         if (r < n_tops) {
1617            for (s = n_tops-1; s > r; s--)
1618               tops[s] = tops[s-1];
1619            tops[r].addr  = sectors[sno].tt[i].entry;
1620            tops[r].score = score( &sectors[sno].tt[i] );
1621         }
1622      }
1623   }
1624
1625   return score_total;
1626}
1627
1628/*--------------------------------------------------------------------*/
1629/*--- end                                                          ---*/
1630/*--------------------------------------------------------------------*/
1631