m_transtab.c revision 5d0d1f3a78d6c5c765360982a08d87a068ce7f1c
1
2/*--------------------------------------------------------------------*/
3/*--- Management of the translation table and cache.               ---*/
4/*---                                                 m_transtab.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8   This file is part of Valgrind, a dynamic binary instrumentation
9   framework.
10
11   Copyright (C) 2000-2009 Julian Seward
12      jseward@acm.org
13
14   This program is free software; you can redistribute it and/or
15   modify it under the terms of the GNU General Public License as
16   published by the Free Software Foundation; either version 2 of the
17   License, or (at your option) any later version.
18
19   This program is distributed in the hope that it will be useful, but
20   WITHOUT ANY WARRANTY; without even the implied warranty of
21   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22   General Public License for more details.
23
24   You should have received a copy of the GNU General Public License
25   along with this program; if not, write to the Free Software
26   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27   02111-1307, USA.
28
29   The GNU General Public License is contained in the file COPYING.
30*/
31
32#include "pub_core_basics.h"
33#include "pub_core_debuglog.h"
34#include "pub_core_machine.h"    // For VG(machine_get_VexArchInfo)
35#include "pub_core_libcbase.h"
36#include "pub_core_libcassert.h"
37#include "pub_core_libcprint.h"
38#include "pub_core_options.h"
39#include "pub_core_tooliface.h"  // For VG_(details).avg_translation_sizeB
40#include "pub_core_transtab.h"
41#include "pub_core_aspacemgr.h"
42#include "pub_core_mallocfree.h" // VG_(out_of_memory_NORETURN)
43
44// JRS FIXME get rid of this somehow
45#if defined(VGP_arm_linux)
46# include "pub_core_vkiscnums.h" // __ARM_NR_cacheflush
47# include "pub_core_syscall.h"   // VG_(do_syscallN)
48#endif
49
50
51/* #define DEBUG_TRANSTAB */
52
53
54/*-------------------------------------------------------------*/
55/*--- Management of the FIFO-based translation table+cache. ---*/
56/*-------------------------------------------------------------*/
57
58/*------------------ CONSTANTS ------------------*/
59
60/* Number of sectors the TC is divided into.  If you need a larger
61   overall translation cache, increase this value. */
62#define N_SECTORS 8
63
64/* Number of TC entries in each sector.  This needs to be a prime
65   number to work properly, it must be <= 65535 (so that a TT index
66   fits in a UShort, leaving room for 0xFFFF(EC2TTE_DELETED) to denote
67   'deleted') and it is strongly recommended not to change this.
68   65521 is the largest prime <= 65535. */
69#define N_TTES_PER_SECTOR /*30011*/ /*40009*/ 65521
70
71/* Because each sector contains a hash table of TTEntries, we need to
72   specify the maximum allowable loading, after which the sector is
73   deemed full. */
74#define SECTOR_TT_LIMIT_PERCENT 65
75
76/* The sector is deemed full when this many entries are in it. */
77#define N_TTES_PER_SECTOR_USABLE \
78           ((N_TTES_PER_SECTOR * SECTOR_TT_LIMIT_PERCENT) / 100)
79
80/* Equivalence classes for fast address range deletion.  There are 1 +
81   2^ECLASS_WIDTH bins.  The highest one, ECLASS_MISC, describes an
82   address range which does not fall cleanly within any specific bin.
83   Note that ECLASS_SHIFT + ECLASS_WIDTH must be < 32. */
84#define ECLASS_SHIFT 11
85#define ECLASS_WIDTH 8
86#define ECLASS_MISC  (1 << ECLASS_WIDTH)
87#define ECLASS_N     (1 + ECLASS_MISC)
88
89#define EC2TTE_DELETED  0xFFFF /* 16-bit special value */
90
91
92/*------------------ TYPES ------------------*/
93
94/* A translation-table entry.  This indicates precisely which areas of
95   guest code are included in the translation, and contains all other
96   auxiliary info too.  */
97typedef
98   struct {
99      /* Profiling only: the count and weight (arbitrary meaning) for
100         this translation.  Weight is a property of the translation
101         itself and computed once when the translation is created.
102         Count is an entry count for the translation and is
103         incremented by 1 every time the translation is used, if we
104         are profiling. */
105      UInt     count;
106      UShort   weight;
107
108      /* Status of the slot.  Note, we need to be able to do lazy
109         deletion, hence the Deleted state. */
110      enum { InUse, Deleted, Empty } status;
111
112      /* 64-bit aligned pointer to one or more 64-bit words containing
113         the corresponding host code (must be in the same sector!)
114         This is a pointer into the sector's tc (code) area. */
115      ULong* tcptr;
116
117      /* This is the original guest address that purportedly is the
118         entry point of the translation.  You might think that .entry
119         should be the same as .vge->base[0], and most of the time it
120         is.  However, when doing redirections, that is not the case.
121         .vge must always correctly describe the guest code sections
122         from which this translation was made.  However, .entry may or
123         may not be a lie, depending on whether or not we're doing
124         redirection. */
125      Addr64 entry;
126
127      /* This structure describes precisely what ranges of guest code
128         the translation covers, so we can decide whether or not to
129         delete it when translations of a given address range are
130         invalidated. */
131      VexGuestExtents vge;
132
133      /* Address range summary info: these are pointers back to
134         eclass[] entries in the containing Sector.  Those entries in
135         turn point back here -- the two structures are mutually
136         redundant but both necessary to make fast deletions work.
137         The eclass info is similar to, and derived from, this entry's
138         'vge' field, but it is not the same */
139      UShort n_tte2ec;      // # tte2ec pointers (1 to 3)
140      UShort tte2ec_ec[3];  // for each, the eclass #
141      UInt   tte2ec_ix[3];  // and the index within the eclass.
142      // for i in 0 .. n_tte2ec-1
143      //    sec->ec2tte[ tte2ec_ec[i] ][ tte2ec_ix[i] ]
144      // should be the index
145      // of this TTEntry in the containing Sector's tt array.
146   }
147   TTEntry;
148
149
150/* Finally, a sector itself.  Each sector contains an array of
151   TCEntries, which hold code, and an array of TTEntries, containing
152   all required administrative info.  Profiling is supported using the
153   TTEntry .count and .weight fields, if required.  Each sector is
154   independent in that no cross-sector references are allowed.
155
156   If the sector is not in use, all three pointers are NULL and
157   tt_n_inuse is zero.
158*/
159typedef
160   struct {
161      /* The TCEntry area.  Size of this depends on the average
162         translation size.  We try and size it so it becomes full
163         precisely when this sector's translation table (tt) reaches
164         its load limit (SECTOR_TT_LIMIT_PERCENT). */
165      ULong* tc;
166
167      /* The TTEntry array.  This is a fixed size, always containing
168         exactly N_TTES_PER_SECTOR entries. */
169      TTEntry* tt;
170
171      /* This points to the current allocation point in tc. */
172      ULong* tc_next;
173
174      /* The count of tt entries with state InUse. */
175      Int tt_n_inuse;
176
177      /* Expandable arrays of tt indices for each of the ECLASS_N
178         address range equivalence classes.  These hold indices into
179         the containing sector's tt array, which in turn should point
180         back here. */
181      Int     ec2tte_size[ECLASS_N];
182      Int     ec2tte_used[ECLASS_N];
183      UShort* ec2tte[ECLASS_N];
184   }
185   Sector;
186
187
188/*------------------ DECLS ------------------*/
189
190/* The root data structure is an array of sectors.  The index of the
191   youngest sector is recorded, and new translations are put into that
192   sector.  When it fills up, we move along to the next sector and
193   start to fill that up, wrapping around at the end of the array.
194   That way, once all N_TC_SECTORS have been bought into use for the
195   first time, and are full, we then re-use the oldest sector,
196   endlessly.
197
198   When running, youngest sector should be between >= 0 and <
199   N_TC_SECTORS.  The initial -1 value indicates the TT/TC system is
200   not yet initialised.
201*/
202static Sector sectors[N_SECTORS];
203static Int    youngest_sector = -1;
204
205/* The number of ULongs in each TCEntry area.  This is computed once
206   at startup and does not change. */
207static Int    tc_sector_szQ;
208
209
210/* A list of sector numbers, in the order which they should be
211   searched to find translations.  This is an optimisation to be used
212   when searching for translations and should not affect
213   correctness.  -1 denotes "no entry". */
214static Int sector_search_order[N_SECTORS];
215
216
217/* Fast helper for the TC.  A direct-mapped cache which holds a set of
218   recently used (guest address, host address) pairs.  This array is
219   referred to directly from m_dispatch/dispatch-<platform>.S.
220
221   Entries in tt_fast may refer to any valid TC entry, regardless of
222   which sector it's in.  Consequently we must be very careful to
223   invalidate this cache when TC entries are changed or disappear.
224
225   A special .guest address - TRANSTAB_BOGUS_GUEST_ADDR -- must be
226   pointed at to cause that cache entry to miss.  This relies on the
227   assumption that no guest code actually has that address, hence a
228   value 0x1 seems good.  m_translate gives the client a synthetic
229   segfault if it tries to execute at this address.
230*/
231/*
232typedef
233   struct {
234      Addr guest;
235      Addr host;
236   }
237   FastCacheEntry;
238*/
239/*global*/ __attribute__((aligned(16)))
240           FastCacheEntry VG_(tt_fast)[VG_TT_FAST_SIZE];
241/*
242#define TRANSTAB_BOGUS_GUEST_ADDR ((Addr)1)
243*/
244
245/* For profiling, we have a parallel array of pointers to .count
246   fields in TT entries.  Again, these pointers must be invalidated
247   when translations disappear.  A NULL pointer suffices to indicate
248   an unused slot.
249
250   When not profiling (the normal case, VG_(clo_profile_flags) == 0),
251   all tt_fastN entries are set to NULL at startup and never read nor
252   written after that.
253
254   When profiling (VG_(clo_profile_flags) > 0), tt_fast and tt_fastN
255   change together: if tt_fast[i].guest is TRANSTAB_BOGUS_GUEST_ADDR
256   then the corresponding tt_fastN[i] must be null.  If
257   tt_fast[i].guest is any other value, then tt_fastN[i] *must* point
258   to the .count field of the corresponding TT entry.
259
260   tt_fast and tt_fastN are referred to from assembly code
261   (dispatch.S).
262*/
263/*global*/ UInt* VG_(tt_fastN)[VG_TT_FAST_SIZE];
264
265
266/* Make sure we're not used before initialisation. */
267static Bool init_done = False;
268
269
270/*------------------ STATS DECLS ------------------*/
271
272/* Number of fast-cache updates and flushes done. */
273ULong n_fast_flushes = 0;
274ULong n_fast_updates = 0;
275
276/* Number of full lookups done. */
277ULong n_full_lookups = 0;
278ULong n_lookup_probes = 0;
279
280/* Number/osize/tsize of translations entered; also the number of
281   those for which self-checking was requested. */
282ULong n_in_count    = 0;
283ULong n_in_osize    = 0;
284ULong n_in_tsize    = 0;
285ULong n_in_sc_count = 0;
286
287/* Number/osize of translations discarded due to lack of space. */
288ULong n_dump_count = 0;
289ULong n_dump_osize = 0;
290
291/* Number/osize of translations discarded due to requests to do so. */
292ULong n_disc_count = 0;
293ULong n_disc_osize = 0;
294
295
296/*-------------------------------------------------------------*/
297/*--- Address-range equivalence class stuff                 ---*/
298/*-------------------------------------------------------------*/
299
300/* Return equivalence class number for a range. */
301
302static Int range_to_eclass ( Addr64 start, UInt len )
303{
304   UInt mask   = (1 << ECLASS_WIDTH) - 1;
305   UInt lo     = (UInt)start;
306   UInt hi     = lo + len - 1;
307   UInt loBits = (lo >> ECLASS_SHIFT) & mask;
308   UInt hiBits = (hi >> ECLASS_SHIFT) & mask;
309   if (loBits == hiBits) {
310      vg_assert(loBits < ECLASS_N-1);
311      return loBits;
312   } else {
313      return ECLASS_MISC;
314   }
315}
316
317
318/* Calculates the equivalence class numbers for any VexGuestExtent.
319   These are written in *eclasses, which must be big enough to hold 3
320   Ints.  The number written, between 1 and 3, is returned.  The
321   eclasses are presented in order, and any duplicates are removed.
322*/
323
324static
325Int vexGuestExtents_to_eclasses ( /*OUT*/Int* eclasses,
326                                  VexGuestExtents* vge )
327{
328#  define SWAP(_lv1,_lv2) \
329      do { Int t = _lv1; _lv1 = _lv2; _lv2 = t; } while (0)
330
331   Int i, j, n_ec, r;
332
333   vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
334
335   n_ec = 0;
336   for (i = 0; i < vge->n_used; i++) {
337      r = range_to_eclass( vge->base[i], (UInt)vge->len[i] );
338      if (r == ECLASS_MISC)
339         goto bad;
340      /* only add if we haven't already seen it */
341      for (j = 0; j < n_ec; j++)
342         if (eclasses[j] == r)
343            break;
344      if (j == n_ec)
345         eclasses[n_ec++] = r;
346   }
347
348   if (n_ec == 1)
349      return 1;
350
351   if (n_ec == 2) {
352      /* sort */
353      if (eclasses[0] > eclasses[1])
354         SWAP(eclasses[0], eclasses[1]);
355      return 2;
356   }
357
358   if (n_ec == 3) {
359      /* sort */
360      if (eclasses[0] > eclasses[2])
361         SWAP(eclasses[0], eclasses[2]);
362      if (eclasses[0] > eclasses[1])
363         SWAP(eclasses[0], eclasses[1]);
364      if (eclasses[1] > eclasses[2])
365         SWAP(eclasses[1], eclasses[2]);
366      return 3;
367   }
368
369   /* NOTREACHED */
370   vg_assert(0);
371
372  bad:
373   eclasses[0] = ECLASS_MISC;
374   return 1;
375
376#  undef SWAP
377}
378
379
380/* Add tteno to the set of entries listed for equivalence class ec in
381   this sector.  Returns used location in eclass array. */
382
383static
384UInt addEClassNo ( /*MOD*/Sector* sec, Int ec, UShort tteno )
385{
386   Int    old_sz, new_sz, i, r;
387   UShort *old_ar, *new_ar;
388
389   vg_assert(ec >= 0 && ec < ECLASS_N);
390   vg_assert(tteno < N_TTES_PER_SECTOR);
391
392   if (0) VG_(printf)("ec %d  gets %d\n", ec, (Int)tteno);
393
394   if (sec->ec2tte_used[ec] >= sec->ec2tte_size[ec]) {
395
396      vg_assert(sec->ec2tte_used[ec] == sec->ec2tte_size[ec]);
397
398      old_sz = sec->ec2tte_size[ec];
399      old_ar = sec->ec2tte[ec];
400      new_sz = old_sz==0 ? 8 : old_sz<64 ? 2*old_sz : (3*old_sz)/2;
401      new_ar = VG_(arena_malloc)(VG_AR_TTAUX, "transtab.aECN.1",
402                                 new_sz * sizeof(UShort));
403      for (i = 0; i < old_sz; i++)
404         new_ar[i] = old_ar[i];
405      if (old_ar)
406         VG_(arena_free)(VG_AR_TTAUX, old_ar);
407      sec->ec2tte_size[ec] = new_sz;
408      sec->ec2tte[ec] = new_ar;
409
410      if (0) VG_(printf)("expand ec %d to %d\n", ec, new_sz);
411   }
412
413   /* Common case */
414   r = sec->ec2tte_used[ec]++;
415   vg_assert(r >= 0 && r < sec->ec2tte_size[ec]);
416   sec->ec2tte[ec][r] = tteno;
417   return (UInt)r;
418}
419
420
421/* 'vge' is being added to 'sec' at TT entry 'tteno'.  Add appropriate
422   eclass entries to 'sec'. */
423
424static
425void upd_eclasses_after_add ( /*MOD*/Sector* sec, Int tteno )
426{
427   Int i, r, eclasses[3];
428   TTEntry* tte;
429   vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
430
431   tte = &sec->tt[tteno];
432   r = vexGuestExtents_to_eclasses( eclasses, &tte->vge );
433
434   vg_assert(r >= 1 && r <= 3);
435   tte->n_tte2ec = r;
436
437   for (i = 0; i < r; i++) {
438      tte->tte2ec_ec[i] = eclasses[i];
439      tte->tte2ec_ix[i] = addEClassNo( sec, eclasses[i], (UShort)tteno );
440   }
441}
442
443
444/* Check the eclass info in 'sec' to ensure it is consistent.  Returns
445   True if OK, False if something's not right.  Expensive. */
446
447static Bool sanity_check_eclasses_in_sector ( Sector* sec )
448{
449#  define BAD(_str) do { whassup = (_str); goto bad; } while (0)
450
451   HChar*   whassup = NULL;
452   Int      i, j, k, n, ec_num, ec_idx;
453   TTEntry* tte;
454   UShort   tteno;
455   ULong*   tce;
456
457   /* Basic checks on this sector */
458   if (sec->tt_n_inuse < 0 || sec->tt_n_inuse > N_TTES_PER_SECTOR_USABLE)
459      BAD("invalid sec->tt_n_inuse");
460   tce = sec->tc_next;
461   if (tce < &sec->tc[0] || tce > &sec->tc[tc_sector_szQ])
462      BAD("sec->tc_next points outside tc");
463
464   /* For each eclass ... */
465   for (i = 0; i < ECLASS_N; i++) {
466      if (sec->ec2tte_size[i] == 0 && sec->ec2tte[i] != NULL)
467         BAD("ec2tte_size/ec2tte mismatch(1)");
468      if (sec->ec2tte_size[i] != 0 && sec->ec2tte[i] == NULL)
469         BAD("ec2tte_size/ec2tte mismatch(2)");
470      if (sec->ec2tte_used[i] < 0
471          || sec->ec2tte_used[i] > sec->ec2tte_size[i])
472         BAD("implausible ec2tte_used");
473      if (sec->ec2tte_used[i] == 0)
474         continue;
475
476      /* For each tt reference in each eclass .. ensure the reference
477         is to a valid tt entry, and that the entry's address ranges
478         really include this eclass. */
479
480      for (j = 0; j < sec->ec2tte_used[i]; j++) {
481         tteno = sec->ec2tte[i][j];
482         if (tteno == EC2TTE_DELETED)
483            continue;
484         if (tteno >= N_TTES_PER_SECTOR)
485            BAD("implausible tteno");
486         tte = &sec->tt[tteno];
487         if (tte->status != InUse)
488            BAD("tteno points to non-inuse tte");
489         if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3)
490            BAD("tte->n_tte2ec out of range");
491         /* Exactly least one of tte->eclasses[0 .. tte->n_eclasses-1]
492            must equal i.  Inspect tte's eclass info. */
493         n = 0;
494         for (k = 0; k < tte->n_tte2ec; k++) {
495            if (k < tte->n_tte2ec-1
496                && tte->tte2ec_ec[k] >= tte->tte2ec_ec[k+1])
497               BAD("tte->tte2ec_ec[..] out of order");
498            ec_num = tte->tte2ec_ec[k];
499            if (ec_num < 0 || ec_num >= ECLASS_N)
500               BAD("tte->tte2ec_ec[..] out of range");
501            if (ec_num != i)
502               continue;
503            ec_idx = tte->tte2ec_ix[k];
504            if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[i])
505               BAD("tte->tte2ec_ix[..] out of range");
506            if (ec_idx == j)
507               n++;
508         }
509         if (n != 1)
510            BAD("tteno does not point back at eclass");
511      }
512   }
513
514   /* That establishes that for each forward pointer from TTEntrys
515      there is a corresponding backward pointer from the eclass[]
516      arrays.  However, it doesn't rule out the possibility of other,
517      bogus pointers in the eclass[] arrays.  So do those similarly:
518      scan through them and check the TTEntryies they point at point
519      back. */
520
521   for (i = 0; i < N_TTES_PER_SECTOR_USABLE; i++) {
522
523      tte = &sec->tt[i];
524      if (tte->status == Empty || tte->status == Deleted) {
525         if (tte->n_tte2ec != 0)
526            BAD("tte->n_eclasses nonzero for unused tte");
527         continue;
528      }
529
530      vg_assert(tte->status == InUse);
531
532      if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3)
533         BAD("tte->n_eclasses out of range(2)");
534
535      for (j = 0; j < tte->n_tte2ec; j++) {
536         ec_num = tte->tte2ec_ec[j];
537         if (ec_num < 0 || ec_num >= ECLASS_N)
538            BAD("tte->eclass[..] out of range");
539         ec_idx = tte->tte2ec_ix[j];
540         if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[ec_num])
541            BAD("tte->ec_idx[..] out of range(2)");
542         if (sec->ec2tte[ec_num][ec_idx] != i)
543            BAD("ec2tte does not point back to tte");
544      }
545   }
546
547   return True;
548
549  bad:
550   if (whassup)
551      VG_(debugLog)(0, "transtab", "eclass sanity fail: %s\n", whassup);
552
553#  if 0
554   VG_(printf)("eclass = %d\n", i);
555   VG_(printf)("tteno = %d\n", (Int)tteno);
556   switch (tte->status) {
557      case InUse:   VG_(printf)("InUse\n"); break;
558      case Deleted: VG_(printf)("Deleted\n"); break;
559      case Empty:   VG_(printf)("Empty\n"); break;
560   }
561   if (tte->status != Empty) {
562      for (k = 0; k < tte->vge.n_used; k++)
563         VG_(printf)("0x%llx %d\n", tte->vge.base[k],
564                                    (Int)tte->vge.len[k]);
565   }
566#  endif
567
568   return False;
569
570#  undef BAD
571}
572
573
574/* Sanity check absolutely everything.  True == check passed. */
575
576/* forwards */
577static Bool sanity_check_redir_tt_tc ( void );
578static Bool sanity_check_fastcache ( void );
579
580static Bool sanity_check_sector_search_order ( void )
581{
582   Int i, j, nListed;
583   /* assert the array is the right size */
584   vg_assert(N_SECTORS == (sizeof(sector_search_order)
585                           / sizeof(sector_search_order[0])));
586   /* Check it's of the form  valid_sector_numbers ++ [-1, -1, ..] */
587   for (i = 0; i < N_SECTORS; i++) {
588      if (sector_search_order[i] < 0 || sector_search_order[i] >= N_SECTORS)
589         break;
590   }
591   nListed = i;
592   for (/* */; i < N_SECTORS; i++) {
593      if (sector_search_order[i] != -1)
594         break;
595   }
596   if (i != N_SECTORS)
597      return False;
598   /* Check each sector number only appears once */
599   for (i = 0; i < N_SECTORS; i++) {
600      if (sector_search_order[i] == -1)
601         continue;
602      for (j = i+1; j < N_SECTORS; j++) {
603         if (sector_search_order[j] == sector_search_order[i])
604            return False;
605      }
606   }
607   /* Check that the number of listed sectors equals the number
608      in use, by counting nListed back down. */
609   for (i = 0; i < N_SECTORS; i++) {
610      if (sectors[i].tc != NULL)
611         nListed--;
612   }
613   if (nListed != 0)
614      return False;
615   return True;
616}
617
618static Bool sanity_check_all_sectors ( void )
619{
620   Int     sno;
621   Bool    sane;
622   Sector* sec;
623   for (sno = 0; sno < N_SECTORS; sno++) {
624      sec = &sectors[sno];
625      if (sec->tc == NULL)
626         continue;
627      sane = sanity_check_eclasses_in_sector( sec );
628      if (!sane)
629         return False;
630   }
631   if ( !sanity_check_redir_tt_tc() )
632      return False;
633   if ( !sanity_check_fastcache() )
634      return False;
635   if ( !sanity_check_sector_search_order() )
636      return False;
637   return True;
638}
639
640
641
642/*-------------------------------------------------------------*/
643/*--- Add/find translations                                 ---*/
644/*-------------------------------------------------------------*/
645
646static UInt vge_osize ( VexGuestExtents* vge )
647{
648   UInt i, n = 0;
649   for (i = 0; i < vge->n_used; i++)
650      n += (UInt)vge->len[i];
651   return n;
652}
653
654static Bool isValidSector ( Int sector )
655{
656   if (sector < 0 || sector >= N_SECTORS)
657      return False;
658   return True;
659}
660
661static inline UInt HASH_TT ( Addr64 key )
662{
663   UInt kHi = (UInt)(key >> 32);
664   UInt kLo = (UInt)key;
665   UInt k32 = kHi ^ kLo;
666   UInt ror = 7;
667   if (ror > 0)
668      k32 = (k32 >> ror) | (k32 << (32-ror));
669   return k32 % N_TTES_PER_SECTOR;
670}
671
672static void setFastCacheEntry ( Addr64 key, ULong* tcptr, UInt* count )
673{
674   UInt cno = (UInt)VG_TT_FAST_HASH(key);
675   VG_(tt_fast)[cno].guest = (Addr)key;
676   VG_(tt_fast)[cno].host  = (Addr)tcptr;
677   if (VG_(clo_profile_flags) > 0)
678      VG_(tt_fastN)[cno] = count;
679   n_fast_updates++;
680   /* This shouldn't fail.  It should be assured by m_translate
681      which should reject any attempt to make translation of code
682      starting at TRANSTAB_BOGUS_GUEST_ADDR. */
683   vg_assert(VG_(tt_fast)[cno].guest != TRANSTAB_BOGUS_GUEST_ADDR);
684}
685
686/* Invalidate the fast cache's counter array, VG_(tt_fastN). */
687static void invalidateFastNCache ( void )
688{
689   UInt j;
690   vg_assert(VG_TT_FAST_SIZE > 0 && (VG_TT_FAST_SIZE % 4) == 0);
691   for (j = 0; j < VG_TT_FAST_SIZE; j += 4) {
692      VG_(tt_fastN)[j+0] = NULL;
693      VG_(tt_fastN)[j+1] = NULL;
694      VG_(tt_fastN)[j+2] = NULL;
695      VG_(tt_fastN)[j+3] = NULL;
696   }
697   vg_assert(j == VG_TT_FAST_SIZE);
698}
699
700/* Invalidate the fast cache VG_(tt_fast).  If profiling, also
701   invalidate the fast cache's counter array VG_(tt_fastN), otherwise
702   don't touch it. */
703static void invalidateFastCache ( void )
704{
705   UInt j;
706   /* This loop is popular enough to make it worth unrolling a
707      bit, at least on ppc32. */
708   vg_assert(VG_TT_FAST_SIZE > 0 && (VG_TT_FAST_SIZE % 4) == 0);
709   for (j = 0; j < VG_TT_FAST_SIZE; j += 4) {
710      VG_(tt_fast)[j+0].guest = TRANSTAB_BOGUS_GUEST_ADDR;
711      VG_(tt_fast)[j+1].guest = TRANSTAB_BOGUS_GUEST_ADDR;
712      VG_(tt_fast)[j+2].guest = TRANSTAB_BOGUS_GUEST_ADDR;
713      VG_(tt_fast)[j+3].guest = TRANSTAB_BOGUS_GUEST_ADDR;
714   }
715
716   if (VG_(clo_profile_flags) > 0)
717      invalidateFastNCache();
718
719   vg_assert(j == VG_TT_FAST_SIZE);
720   n_fast_flushes++;
721}
722
723static Bool sanity_check_fastcache ( void )
724{
725   UInt j;
726   if (0) VG_(printf)("sanity check fastcache\n");
727   if (VG_(clo_profile_flags) > 0) {
728      /* profiling */
729      for (j = 0; j < VG_TT_FAST_SIZE; j++) {
730         if (VG_(tt_fastN)[j] == NULL
731             && VG_(tt_fast)[j].guest != TRANSTAB_BOGUS_GUEST_ADDR)
732            return False;
733         if (VG_(tt_fastN)[j] != NULL
734             && VG_(tt_fast)[j].guest == TRANSTAB_BOGUS_GUEST_ADDR)
735            return False;
736      }
737   } else {
738      /* not profiling */
739      for (j = 0; j < VG_TT_FAST_SIZE; j++) {
740         if (VG_(tt_fastN)[j] != NULL)
741            return False;
742      }
743   }
744   return True;
745}
746
747static void initialiseSector ( Int sno )
748{
749   Int    i;
750   SysRes sres;
751   Sector* sec;
752   vg_assert(isValidSector(sno));
753
754   { Bool sane = sanity_check_sector_search_order();
755     vg_assert(sane);
756   }
757   sec = &sectors[sno];
758
759   if (sec->tc == NULL) {
760
761      /* Sector has never been used before.  Need to allocate tt and
762         tc. */
763      vg_assert(sec->tt == NULL);
764      vg_assert(sec->tc_next == NULL);
765      vg_assert(sec->tt_n_inuse == 0);
766      for (i = 0; i < ECLASS_N; i++) {
767         vg_assert(sec->ec2tte_size[i] == 0);
768         vg_assert(sec->ec2tte_used[i] == 0);
769         vg_assert(sec->ec2tte[i] == NULL);
770      }
771
772      VG_(debugLog)(1,"transtab", "allocate sector %d\n", sno);
773
774      sres = VG_(am_mmap_anon_float_valgrind)( 8 * tc_sector_szQ );
775      if (sr_isError(sres)) {
776         VG_(out_of_memory_NORETURN)("initialiseSector(TC)",
777                                     8 * tc_sector_szQ );
778	 /*NOTREACHED*/
779      }
780      sec->tc = (ULong*)(AddrH)sr_Res(sres);
781
782      sres = VG_(am_mmap_anon_float_valgrind)
783                ( N_TTES_PER_SECTOR * sizeof(TTEntry) );
784      if (sr_isError(sres)) {
785         VG_(out_of_memory_NORETURN)("initialiseSector(TT)",
786                                     N_TTES_PER_SECTOR * sizeof(TTEntry) );
787	 /*NOTREACHED*/
788      }
789      sec->tt = (TTEntry*)(AddrH)sr_Res(sres);
790
791      for (i = 0; i < N_TTES_PER_SECTOR; i++) {
792         sec->tt[i].status   = Empty;
793         sec->tt[i].n_tte2ec = 0;
794      }
795
796      /* Add an entry in the sector_search_order */
797      for (i = 0; i < N_SECTORS; i++) {
798         if (sector_search_order[i] == -1)
799            break;
800      }
801      vg_assert(i >= 0 && i < N_SECTORS);
802      sector_search_order[i] = sno;
803
804      if (VG_(clo_verbosity) > 2)
805         VG_(message)(Vg_DebugMsg, "TT/TC: initialise sector %d\n", sno);
806
807   } else {
808
809      /* Sector has been used before.  Dump the old contents. */
810      VG_(debugLog)(1,"transtab", "recycle sector %d\n", sno);
811      vg_assert(sec->tt != NULL);
812      vg_assert(sec->tc_next != NULL);
813      n_dump_count += sec->tt_n_inuse;
814
815      /* Visit each just-about-to-be-abandoned translation. */
816      for (i = 0; i < N_TTES_PER_SECTOR; i++) {
817         if (sec->tt[i].status == InUse) {
818            vg_assert(sec->tt[i].n_tte2ec >= 1);
819            vg_assert(sec->tt[i].n_tte2ec <= 3);
820            n_dump_osize += vge_osize(&sec->tt[i].vge);
821            /* Tell the tool too. */
822            if (VG_(needs).superblock_discards) {
823               VG_TDICT_CALL( tool_discard_superblock_info,
824                              sec->tt[i].entry,
825                              sec->tt[i].vge );
826            }
827         } else {
828            vg_assert(sec->tt[i].n_tte2ec == 0);
829         }
830         sec->tt[i].status   = Empty;
831         sec->tt[i].n_tte2ec = 0;
832      }
833
834      /* Free up the eclass structures. */
835      for (i = 0; i < ECLASS_N; i++) {
836         if (sec->ec2tte_size[i] == 0) {
837            vg_assert(sec->ec2tte_used[i] == 0);
838            vg_assert(sec->ec2tte[i] == NULL);
839         } else {
840            vg_assert(sec->ec2tte[i] != NULL);
841            VG_(arena_free)(VG_AR_TTAUX, sec->ec2tte[i]);
842            sec->ec2tte[i] = NULL;
843            sec->ec2tte_size[i] = 0;
844            sec->ec2tte_used[i] = 0;
845         }
846      }
847
848      /* Sanity check: ensure it is already in
849         sector_search_order[]. */
850      for (i = 0; i < N_SECTORS; i++) {
851         if (sector_search_order[i] == sno)
852            break;
853      }
854      vg_assert(i >= 0 && i < N_SECTORS);
855
856      if (VG_(clo_verbosity) > 2)
857         VG_(message)(Vg_DebugMsg, "TT/TC: recycle sector %d\n", sno);
858   }
859
860   sec->tc_next = sec->tc;
861   sec->tt_n_inuse = 0;
862
863   invalidateFastCache();
864
865   { Bool sane = sanity_check_sector_search_order();
866     vg_assert(sane);
867   }
868}
869
870static void invalidate_icache ( void *ptr, Int nbytes )
871{
872#  if defined(VGA_ppc32) || defined(VGA_ppc64)
873   Addr startaddr = (Addr) ptr;
874   Addr endaddr   = startaddr + nbytes;
875   Addr cls;
876   Addr addr;
877   VexArchInfo vai;
878
879   if (nbytes == 0) return;
880   vg_assert(nbytes > 0);
881
882   VG_(machine_get_VexArchInfo)( NULL, &vai );
883   cls = vai.ppc_cache_line_szB;
884
885   /* Stay sane .. */
886   vg_assert(cls == 32 || cls == 64 || cls == 128);
887
888   startaddr &= ~(cls - 1);
889   for (addr = startaddr; addr < endaddr; addr += cls) {
890      __asm__ __volatile__("dcbst 0,%0" : : "r" (addr));
891   }
892   __asm__ __volatile__("sync");
893   for (addr = startaddr; addr < endaddr; addr += cls) {
894      __asm__ __volatile__("icbi 0,%0" : : "r" (addr));
895   }
896   __asm__ __volatile__("sync; isync");
897
898#  elif defined(VGA_x86)
899   /* no need to do anything, hardware provides coherence */
900
901#  elif defined(VGA_amd64)
902   /* no need to do anything, hardware provides coherence */
903
904#  elif defined(VGP_arm_linux)
905   /* ARM cache flushes are privileged, so we must defer to the kernel. */
906   Addr startaddr = (Addr) ptr;
907   Addr endaddr   = startaddr + nbytes;
908   VG_(do_syscall2)(__NR_ARM_cacheflush, startaddr, endaddr);
909
910#  else
911#    error "Unknown ARCH"
912#  endif
913}
914
915
916/* Add a translation of vge to TT/TC.  The translation is temporarily
917   in code[0 .. code_len-1].
918
919   pre: youngest_sector points to a valid (although possibly full)
920   sector.
921*/
922void VG_(add_to_transtab)( VexGuestExtents* vge,
923                           Addr64           entry,
924                           AddrH            code,
925                           UInt             code_len,
926                           Bool             is_self_checking )
927{
928   Int    tcAvailQ, reqdQ, y, i;
929   ULong  *tcptr, *tcptr2;
930   UChar* srcP;
931   UChar* dstP;
932
933   vg_assert(init_done);
934   vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
935
936   /* 60000: should agree with N_TMPBUF in m_translate.c. */
937   vg_assert(code_len > 0 && code_len < 60000);
938
939   if (0)
940      VG_(printf)("add_to_transtab(entry = 0x%llx, len = %d)\n",
941                  entry, code_len);
942
943   n_in_count++;
944   n_in_tsize += code_len;
945   n_in_osize += vge_osize(vge);
946   if (is_self_checking)
947      n_in_sc_count++;
948
949   y = youngest_sector;
950   vg_assert(isValidSector(y));
951
952   if (sectors[y].tc == NULL)
953      initialiseSector(y);
954
955   /* Try putting the translation in this sector. */
956   reqdQ = (code_len + 7) >> 3;
957
958   /* Will it fit in tc? */
959   tcAvailQ = ((ULong*)(&sectors[y].tc[tc_sector_szQ]))
960              - ((ULong*)(sectors[y].tc_next));
961   vg_assert(tcAvailQ >= 0);
962   vg_assert(tcAvailQ <= tc_sector_szQ);
963
964   if (tcAvailQ < reqdQ
965       || sectors[y].tt_n_inuse >= N_TTES_PER_SECTOR_USABLE) {
966      /* No.  So move on to the next sector.  Either it's never been
967         used before, in which case it will get its tt/tc allocated
968         now, or it has been used before, in which case it is set to be
969         empty, hence throwing out the oldest sector. */
970      vg_assert(tc_sector_szQ > 0);
971      VG_(debugLog)(1,"transtab",
972                      "declare sector %d full "
973                      "(TT loading %2d%%, TC loading %2d%%)\n",
974                      y,
975                      (100 * sectors[y].tt_n_inuse)
976                         / N_TTES_PER_SECTOR,
977                      (100 * (tc_sector_szQ - tcAvailQ))
978                         / tc_sector_szQ);
979      youngest_sector++;
980      if (youngest_sector >= N_SECTORS)
981         youngest_sector = 0;
982      y = youngest_sector;
983      initialiseSector(y);
984   }
985
986   /* Be sure ... */
987   tcAvailQ = ((ULong*)(&sectors[y].tc[tc_sector_szQ]))
988              - ((ULong*)(sectors[y].tc_next));
989   vg_assert(tcAvailQ >= 0);
990   vg_assert(tcAvailQ <= tc_sector_szQ);
991   vg_assert(tcAvailQ >= reqdQ);
992   vg_assert(sectors[y].tt_n_inuse < N_TTES_PER_SECTOR_USABLE);
993   vg_assert(sectors[y].tt_n_inuse >= 0);
994
995   /* Copy into tc. */
996   tcptr = sectors[y].tc_next;
997   vg_assert(tcptr >= &sectors[y].tc[0]);
998   vg_assert(tcptr <= &sectors[y].tc[tc_sector_szQ]);
999
1000   dstP = (UChar*)tcptr;
1001   srcP = (UChar*)code;
1002   for (i = 0; i < code_len; i++)
1003      dstP[i] = srcP[i];
1004   sectors[y].tc_next += reqdQ;
1005   sectors[y].tt_n_inuse++;
1006
1007   invalidate_icache( dstP, code_len );
1008
1009   /* more paranoia */
1010   tcptr2 = sectors[y].tc_next;
1011   vg_assert(tcptr2 >= &sectors[y].tc[0]);
1012   vg_assert(tcptr2 <= &sectors[y].tc[tc_sector_szQ]);
1013
1014   /* Find an empty tt slot, and use it.  There must be such a slot
1015      since tt is never allowed to get completely full. */
1016   i = HASH_TT(entry);
1017   vg_assert(i >= 0 && i < N_TTES_PER_SECTOR);
1018   while (True) {
1019      if (sectors[y].tt[i].status == Empty
1020          || sectors[y].tt[i].status == Deleted)
1021         break;
1022      i++;
1023      if (i >= N_TTES_PER_SECTOR)
1024         i = 0;
1025   }
1026
1027   sectors[y].tt[i].status = InUse;
1028   sectors[y].tt[i].tcptr  = tcptr;
1029   sectors[y].tt[i].count  = 0;
1030   sectors[y].tt[i].weight = 1;
1031   sectors[y].tt[i].vge    = *vge;
1032   sectors[y].tt[i].entry  = entry;
1033
1034   /* Update the fast-cache. */
1035   setFastCacheEntry( entry, tcptr, &sectors[y].tt[i].count );
1036
1037   /* Note the eclass numbers for this translation. */
1038   upd_eclasses_after_add( &sectors[y], i );
1039}
1040
1041
1042/* Search for the translation of the given guest address.  If
1043   requested, a successful search can also cause the fast-caches to be
1044   updated.
1045*/
1046Bool VG_(search_transtab) ( /*OUT*/AddrH* result,
1047                            Addr64        guest_addr,
1048                            Bool          upd_cache )
1049{
1050   Int i, j, k, kstart, sno;
1051
1052   vg_assert(init_done);
1053   /* Find the initial probe point just once.  It will be the same in
1054      all sectors and avoids multiple expensive % operations. */
1055   n_full_lookups++;
1056   k      = -1;
1057   kstart = HASH_TT(guest_addr);
1058   vg_assert(kstart >= 0 && kstart < N_TTES_PER_SECTOR);
1059
1060   /* Search in all the sectors,using sector_search_order[] as a
1061      heuristic guide as to what order to visit the sectors. */
1062   for (i = 0; i < N_SECTORS; i++) {
1063
1064      sno = sector_search_order[i];
1065      if (UNLIKELY(sno == -1))
1066         return False; /* run out of sectors to search */
1067
1068      k = kstart;
1069      for (j = 0; j < N_TTES_PER_SECTOR; j++) {
1070         n_lookup_probes++;
1071         if (sectors[sno].tt[k].status == InUse
1072             && sectors[sno].tt[k].entry == guest_addr) {
1073            /* found it */
1074            if (upd_cache)
1075               setFastCacheEntry(
1076                  guest_addr, sectors[sno].tt[k].tcptr,
1077                              &sectors[sno].tt[k].count );
1078            if (result)
1079               *result = (AddrH)sectors[sno].tt[k].tcptr;
1080            /* pull this one one step closer to the front.  For large
1081               apps this more or less halves the number of required
1082               probes. */
1083            if (i > 0) {
1084               Int tmp = sector_search_order[i-1];
1085               sector_search_order[i-1] = sector_search_order[i];
1086               sector_search_order[i] = tmp;
1087            }
1088            return True;
1089         }
1090         if (sectors[sno].tt[k].status == Empty)
1091            break; /* not found in this sector */
1092         k++;
1093         if (k == N_TTES_PER_SECTOR)
1094            k = 0;
1095      }
1096
1097      /* If we fall off the end, all entries are InUse and not
1098         matching, or Deleted.  In any case we did not find it in this
1099         sector. */
1100   }
1101
1102   /* Not found in any sector. */
1103   return False;
1104}
1105
1106
1107/*-------------------------------------------------------------*/
1108/*--- Delete translations.                                  ---*/
1109/*-------------------------------------------------------------*/
1110
1111/* forward */
1112static void unredir_discard_translations( Addr64, ULong );
1113
1114/* Stuff for deleting translations which intersect with a given
1115   address range.  Unfortunately, to make this run at a reasonable
1116   speed, it is complex. */
1117
1118static inline
1119Bool overlap1 ( Addr64 s1, ULong r1, Addr64 s2, ULong r2 )
1120{
1121   Addr64 e1 = s1 + r1 - 1ULL;
1122   Addr64 e2 = s2 + r2 - 1ULL;
1123   if (e1 < s2 || e2 < s1)
1124      return False;
1125   return True;
1126}
1127
1128static inline
1129Bool overlaps ( Addr64 start, ULong range, VexGuestExtents* vge )
1130{
1131   if (overlap1(start, range, vge->base[0], (UInt)vge->len[0]))
1132      return True;
1133   if (vge->n_used < 2)
1134      return False;
1135   if (overlap1(start, range, vge->base[1], (UInt)vge->len[1]))
1136      return True;
1137   if (vge->n_used < 3)
1138      return False;
1139   if (overlap1(start, range, vge->base[2], (UInt)vge->len[2]))
1140      return True;
1141   return False;
1142}
1143
1144
1145/* Delete a tt entry, and update all the eclass data accordingly. */
1146
1147static void delete_tte ( /*MOD*/Sector* sec, Int tteno )
1148{
1149   Int      i, ec_num, ec_idx;
1150   TTEntry* tte;
1151
1152   vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR);
1153   tte = &sec->tt[tteno];
1154   vg_assert(tte->status == InUse);
1155   vg_assert(tte->n_tte2ec >= 1 && tte->n_tte2ec <= 3);
1156
1157   /* Deal with the ec-to-tte links first. */
1158   for (i = 0; i < tte->n_tte2ec; i++) {
1159      ec_num = (Int)tte->tte2ec_ec[i];
1160      ec_idx = tte->tte2ec_ix[i];
1161      vg_assert(ec_num >= 0 && ec_num < ECLASS_N);
1162      vg_assert(ec_idx >= 0);
1163      vg_assert(ec_idx < sec->ec2tte_used[ec_num]);
1164      /* Assert that the two links point at each other. */
1165      vg_assert(sec->ec2tte[ec_num][ec_idx] == (UShort)tteno);
1166      /* "delete" the pointer back to here. */
1167      sec->ec2tte[ec_num][ec_idx] = EC2TTE_DELETED;
1168   }
1169
1170   /* Now fix up this TTEntry. */
1171   tte->status   = Deleted;
1172   tte->n_tte2ec = 0;
1173
1174   /* Stats .. */
1175   sec->tt_n_inuse--;
1176   n_disc_count++;
1177   n_disc_osize += vge_osize(&tte->vge);
1178
1179   /* Tell the tool too. */
1180   if (VG_(needs).superblock_discards) {
1181      VG_TDICT_CALL( tool_discard_superblock_info,
1182                     tte->entry,
1183                     tte->vge );
1184   }
1185}
1186
1187
1188/* Delete translations from sec which intersect specified range, but
1189   only consider translations in the specified eclass. */
1190
1191static
1192Bool delete_translations_in_sector_eclass ( /*MOD*/Sector* sec,
1193                                            Addr64 guest_start, ULong range,
1194                                            Int ec )
1195{
1196   Int      i;
1197   UShort   tteno;
1198   Bool     anyDeld = False;
1199   TTEntry* tte;
1200
1201   vg_assert(ec >= 0 && ec < ECLASS_N);
1202
1203   for (i = 0; i < sec->ec2tte_used[ec]; i++) {
1204
1205      tteno = sec->ec2tte[ec][i];
1206      if (tteno == EC2TTE_DELETED) {
1207         /* already deleted */
1208         continue;
1209      }
1210
1211      vg_assert(tteno < N_TTES_PER_SECTOR);
1212
1213      tte = &sec->tt[tteno];
1214      vg_assert(tte->status == InUse);
1215
1216      if (overlaps( guest_start, range, &tte->vge )) {
1217         anyDeld = True;
1218         delete_tte( sec, (Int)tteno );
1219      }
1220
1221   }
1222
1223   return anyDeld;
1224}
1225
1226
1227/* Delete translations from sec which intersect specified range, the
1228   slow way, by inspecting all translations in sec. */
1229
1230static
1231Bool delete_translations_in_sector ( /*MOD*/Sector* sec,
1232                                     Addr64 guest_start, ULong range )
1233{
1234   Int  i;
1235   Bool anyDeld = False;
1236
1237   for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1238      if (sec->tt[i].status == InUse
1239          && overlaps( guest_start, range, &sec->tt[i].vge )) {
1240         anyDeld = True;
1241         delete_tte( sec, i );
1242      }
1243   }
1244
1245   return anyDeld;
1246}
1247
1248
1249void VG_(discard_translations) ( Addr64 guest_start, ULong range,
1250                                 HChar* who )
1251{
1252   Sector* sec;
1253   Int     sno, ec;
1254   Bool    anyDeleted = False;
1255
1256   vg_assert(init_done);
1257
1258   VG_(debugLog)(2, "transtab",
1259                    "discard_translations(0x%llx, %lld) req by %s\n",
1260                    guest_start, range, who );
1261
1262   /* Pre-deletion sanity check */
1263   if (VG_(clo_sanity_level >= 4)) {
1264      Bool sane = sanity_check_all_sectors();
1265      vg_assert(sane);
1266   }
1267
1268   if (range == 0)
1269      return;
1270
1271   /* There are two different ways to do this.
1272
1273      If the range fits within a single address-range equivalence
1274      class, as will be the case for a cache line sized invalidation,
1275      then we only have to inspect the set of translations listed in
1276      that equivalence class, and also in the "sin-bin" equivalence
1277      class ECLASS_MISC.
1278
1279      Otherwise, the invalidation is of a larger range and probably
1280      results from munmap.  In this case it's (probably!) faster just
1281      to inspect all translations, dump those we don't want, and
1282      regenerate the equivalence class information (since modifying it
1283      in-situ is even more expensive).
1284   */
1285
1286   /* First off, figure out if the range falls within a single class,
1287      and if so which one. */
1288
1289   ec = ECLASS_MISC;
1290   if (range < (1ULL << ECLASS_SHIFT))
1291      ec = range_to_eclass( guest_start, (UInt)range );
1292
1293   /* if ec is ECLASS_MISC then we aren't looking at just a single
1294      class, so use the slow scheme.  Else use the fast scheme,
1295      examining 'ec' and ECLASS_MISC. */
1296
1297   if (ec != ECLASS_MISC) {
1298
1299      VG_(debugLog)(2, "transtab",
1300                       "                    FAST, ec = %d\n", ec);
1301
1302      /* Fast scheme */
1303      vg_assert(ec >= 0 && ec < ECLASS_MISC);
1304
1305      for (sno = 0; sno < N_SECTORS; sno++) {
1306         sec = &sectors[sno];
1307         if (sec->tc == NULL)
1308            continue;
1309         anyDeleted |= delete_translations_in_sector_eclass(
1310                         sec, guest_start, range, ec );
1311         anyDeleted |= delete_translations_in_sector_eclass(
1312                         sec, guest_start, range, ECLASS_MISC );
1313      }
1314
1315   } else {
1316
1317      /* slow scheme */
1318
1319      VG_(debugLog)(2, "transtab",
1320                       "                    SLOW, ec = %d\n", ec);
1321
1322      for (sno = 0; sno < N_SECTORS; sno++) {
1323         sec = &sectors[sno];
1324         if (sec->tc == NULL)
1325            continue;
1326         anyDeleted |= delete_translations_in_sector(
1327                         sec, guest_start, range );
1328      }
1329
1330   }
1331
1332   if (anyDeleted)
1333      invalidateFastCache();
1334
1335   /* don't forget the no-redir cache */
1336   unredir_discard_translations( guest_start, range );
1337
1338   /* Post-deletion sanity check */
1339   if (VG_(clo_sanity_level >= 4)) {
1340      Int      i;
1341      TTEntry* tte;
1342      Bool     sane = sanity_check_all_sectors();
1343      vg_assert(sane);
1344      /* But now, also check the requested address range isn't
1345         present anywhere. */
1346      for (sno = 0; sno < N_SECTORS; sno++) {
1347         sec = &sectors[sno];
1348         if (sec->tc == NULL)
1349            continue;
1350         for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1351            tte = &sec->tt[i];
1352            if (tte->status != InUse)
1353               continue;
1354            vg_assert(!overlaps( guest_start, range, &tte->vge ));
1355         }
1356      }
1357   }
1358}
1359
1360
1361/*------------------------------------------------------------*/
1362/*--- AUXILIARY: the unredirected TT/TC                    ---*/
1363/*------------------------------------------------------------*/
1364
1365/* A very simple translation cache which holds a small number of
1366   unredirected translations.  This is completely independent of the
1367   main tt/tc structures.  When unredir_tc or unredir_tt becomes full,
1368   both structures are simply dumped and we start over.
1369
1370   Since these translations are unredirected, the search key is (by
1371   definition) the first address entry in the .vge field. */
1372
1373/* Sized to hold 500 translations of average size 1000 bytes. */
1374
1375#define UNREDIR_SZB   1000
1376
1377#define N_UNREDIR_TT  500
1378#define N_UNREDIR_TCQ (N_UNREDIR_TT * UNREDIR_SZB / sizeof(ULong))
1379
1380typedef
1381   struct {
1382      VexGuestExtents vge;
1383      Addr            hcode;
1384      Bool            inUse;
1385   }
1386   UTCEntry;
1387
1388/* We just allocate forwards in _tc, never deleting. */
1389static ULong    *unredir_tc;
1390static Int      unredir_tc_used = N_UNREDIR_TCQ;
1391
1392/* Slots in _tt can come into use and out again (.inUse).
1393   Nevertheless _tt_highwater is maintained so that invalidations
1394   don't have to scan all the slots when only a few are in use.
1395   _tt_highwater holds the index of the highest ever allocated
1396   slot. */
1397static UTCEntry unredir_tt[N_UNREDIR_TT];
1398static Int      unredir_tt_highwater;
1399
1400
1401static void init_unredir_tt_tc ( void )
1402{
1403   Int i;
1404   if (unredir_tc == NULL) {
1405      SysRes sres = VG_(am_mmap_anon_float_valgrind)
1406                       ( N_UNREDIR_TT * UNREDIR_SZB );
1407      if (sr_isError(sres)) {
1408         VG_(out_of_memory_NORETURN)("init_unredir_tt_tc",
1409                                     N_UNREDIR_TT * UNREDIR_SZB);
1410         /*NOTREACHED*/
1411      }
1412      unredir_tc = (ULong *)(AddrH)sr_Res(sres);
1413   }
1414   unredir_tc_used = 0;
1415   for (i = 0; i < N_UNREDIR_TT; i++)
1416      unredir_tt[i].inUse = False;
1417   unredir_tt_highwater = -1;
1418}
1419
1420/* Do a sanity check; return False on failure. */
1421static Bool sanity_check_redir_tt_tc ( void )
1422{
1423   Int i;
1424   if (unredir_tt_highwater < -1) return False;
1425   if (unredir_tt_highwater >= N_UNREDIR_TT) return False;
1426
1427   for (i = unredir_tt_highwater+1; i < N_UNREDIR_TT; i++)
1428      if (unredir_tt[i].inUse)
1429         return False;
1430
1431   if (unredir_tc_used < 0) return False;
1432   if (unredir_tc_used > N_UNREDIR_TCQ) return False;
1433
1434   return True;
1435}
1436
1437
1438/* Add an UNREDIRECTED translation of vge to TT/TC.  The translation
1439   is temporarily in code[0 .. code_len-1].
1440*/
1441void VG_(add_to_unredir_transtab)( VexGuestExtents* vge,
1442                                   Addr64           entry,
1443                                   AddrH            code,
1444                                   UInt             code_len )
1445{
1446   Int   i, j, code_szQ;
1447   HChar *srcP, *dstP;
1448
1449   vg_assert(sanity_check_redir_tt_tc());
1450
1451   /* This is the whole point: it's not redirected! */
1452   vg_assert(entry == vge->base[0]);
1453
1454   /* How many unredir_tt slots are needed */
1455   code_szQ = (code_len + 7) / 8;
1456
1457   /* Look for an empty unredir_tc slot */
1458   for (i = 0; i < N_UNREDIR_TT; i++)
1459      if (!unredir_tt[i].inUse)
1460         break;
1461
1462   if (i >= N_UNREDIR_TT || code_szQ > (N_UNREDIR_TCQ - unredir_tc_used)) {
1463      /* It's full; dump everything we currently have */
1464      init_unredir_tt_tc();
1465      i = 0;
1466   }
1467
1468   vg_assert(unredir_tc_used >= 0);
1469   vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
1470   vg_assert(code_szQ > 0);
1471   vg_assert(code_szQ + unredir_tc_used <= N_UNREDIR_TCQ);
1472   vg_assert(i >= 0 && i < N_UNREDIR_TT);
1473   vg_assert(unredir_tt[i].inUse == False);
1474
1475   if (i > unredir_tt_highwater)
1476      unredir_tt_highwater = i;
1477
1478   dstP = (HChar*)&unredir_tc[unredir_tc_used];
1479   srcP = (HChar*)code;
1480   for (j = 0; j < code_len; j++)
1481      dstP[j] = srcP[j];
1482
1483   invalidate_icache( dstP, code_len );
1484
1485   unredir_tt[i].inUse = True;
1486   unredir_tt[i].vge   = *vge;
1487   unredir_tt[i].hcode = (Addr)dstP;
1488
1489   unredir_tc_used += code_szQ;
1490   vg_assert(unredir_tc_used >= 0);
1491   vg_assert(unredir_tc_used <= N_UNREDIR_TCQ);
1492
1493   vg_assert(&dstP[code_len] <= (HChar*)&unredir_tc[unredir_tc_used]);
1494}
1495
1496Bool VG_(search_unredir_transtab) ( /*OUT*/AddrH* result,
1497                                    Addr64        guest_addr )
1498{
1499   Int i;
1500   for (i = 0; i < N_UNREDIR_TT; i++) {
1501      if (!unredir_tt[i].inUse)
1502         continue;
1503      if (unredir_tt[i].vge.base[0] == guest_addr) {
1504         *result = (AddrH)unredir_tt[i].hcode;
1505         return True;
1506      }
1507   }
1508   return False;
1509}
1510
1511static void unredir_discard_translations( Addr64 guest_start, ULong range )
1512{
1513   Int i;
1514
1515   vg_assert(sanity_check_redir_tt_tc());
1516
1517   for (i = 0; i <= unredir_tt_highwater; i++) {
1518      if (unredir_tt[i].inUse
1519          && overlaps( guest_start, range, &unredir_tt[i].vge))
1520         unredir_tt[i].inUse = False;
1521   }
1522}
1523
1524
1525/*------------------------------------------------------------*/
1526/*--- Initialisation.                                      ---*/
1527/*------------------------------------------------------------*/
1528
1529void VG_(init_tt_tc) ( void )
1530{
1531   Int i, j, avg_codeszQ;
1532
1533   vg_assert(!init_done);
1534   init_done = True;
1535
1536   /* Otherwise lots of things go wrong... */
1537   vg_assert(sizeof(ULong) == 8);
1538   vg_assert(sizeof(Addr64) == 8);
1539   /* check fast cache entries really are 2 words long */
1540   vg_assert(sizeof(Addr) == sizeof(void*));
1541   vg_assert(sizeof(FastCacheEntry) == 2 * sizeof(Addr));
1542   /* check fast cache entries are packed back-to-back with no spaces */
1543   vg_assert(sizeof( VG_(tt_fast) ) == VG_TT_FAST_SIZE * sizeof(FastCacheEntry));
1544   /* check fast cache is aligned as we requested.  Not fatal if it
1545      isn't, but we might as well make sure. */
1546   vg_assert(VG_IS_16_ALIGNED( ((Addr) & VG_(tt_fast)[0]) ));
1547
1548   if (VG_(clo_verbosity) > 2)
1549      VG_(message)(Vg_DebugMsg,
1550                   "TT/TC: VG_(init_tt_tc) "
1551                   "(startup of code management)\n");
1552
1553   /* Figure out how big each tc area should be.  */
1554   avg_codeszQ   = (VG_(details).avg_translation_sizeB + 7) / 8;
1555   tc_sector_szQ = N_TTES_PER_SECTOR_USABLE * (1 + avg_codeszQ);
1556
1557   /* Ensure the calculated value is not way crazy. */
1558   vg_assert(tc_sector_szQ >= 2 * N_TTES_PER_SECTOR_USABLE);
1559   vg_assert(tc_sector_szQ <= 80 * N_TTES_PER_SECTOR_USABLE);
1560
1561   /* Initialise the sectors */
1562   youngest_sector = 0;
1563   for (i = 0; i < N_SECTORS; i++) {
1564      sectors[i].tc = NULL;
1565      sectors[i].tt = NULL;
1566      sectors[i].tc_next = NULL;
1567      sectors[i].tt_n_inuse = 0;
1568      for (j = 0; j < ECLASS_N; j++) {
1569         sectors[i].ec2tte_size[j] = 0;
1570         sectors[i].ec2tte_used[j] = 0;
1571         sectors[i].ec2tte[j] = NULL;
1572      }
1573   }
1574
1575   /* Initialise the sector_search_order hint table. */
1576   for (i = 0; i < N_SECTORS; i++)
1577      sector_search_order[i] = -1;
1578
1579   /* Initialise the fast caches.  If not profiling (the usual case),
1580      we have to explicitly invalidate the fastN cache as
1581      invalidateFastCache() won't do that for us. */
1582   invalidateFastCache();
1583   if (VG_(clo_profile_flags) == 0)
1584      invalidateFastNCache();
1585
1586   /* and the unredir tt/tc */
1587   init_unredir_tt_tc();
1588
1589   if (VG_(clo_verbosity) > 2) {
1590      VG_(message)(Vg_DebugMsg,
1591         "TT/TC: cache: %d sectors of %d bytes each = %d total\n",
1592          N_SECTORS, 8 * tc_sector_szQ,
1593          N_SECTORS * 8 * tc_sector_szQ );
1594      VG_(message)(Vg_DebugMsg,
1595         "TT/TC: table: %d total entries, max occupancy %d (%d%%)\n",
1596         N_SECTORS * N_TTES_PER_SECTOR,
1597         N_SECTORS * N_TTES_PER_SECTOR_USABLE,
1598         SECTOR_TT_LIMIT_PERCENT );
1599   }
1600
1601   VG_(debugLog)(2, "transtab",
1602      "cache: %d sectors of %d bytes each = %d total\n",
1603       N_SECTORS, 8 * tc_sector_szQ,
1604       N_SECTORS * 8 * tc_sector_szQ );
1605   VG_(debugLog)(2, "transtab",
1606      "table: %d total entries, max occupancy %d (%d%%)\n",
1607      N_SECTORS * N_TTES_PER_SECTOR,
1608      N_SECTORS * N_TTES_PER_SECTOR_USABLE,
1609      SECTOR_TT_LIMIT_PERCENT );
1610}
1611
1612
1613/*------------------------------------------------------------*/
1614/*--- Printing out statistics.                             ---*/
1615/*------------------------------------------------------------*/
1616
1617static ULong safe_idiv( ULong a, ULong b )
1618{
1619   return (b == 0 ? 0 : a / b);
1620}
1621
1622UInt VG_(get_bbs_translated) ( void )
1623{
1624   return n_in_count;
1625}
1626
1627void VG_(print_tt_tc_stats) ( void )
1628{
1629   VG_(message)(Vg_DebugMsg,
1630      "    tt/tc: %'llu tt lookups requiring %'llu probes\n",
1631      n_full_lookups, n_lookup_probes );
1632   VG_(message)(Vg_DebugMsg,
1633      "    tt/tc: %'llu fast-cache updates, %'llu flushes\n",
1634      n_fast_updates, n_fast_flushes );
1635
1636   VG_(message)(Vg_DebugMsg,
1637                " transtab: new        %'lld "
1638                "(%'llu -> %'llu; ratio %'llu:10) [%'llu scs]\n",
1639                n_in_count, n_in_osize, n_in_tsize,
1640                safe_idiv(10*n_in_tsize, n_in_osize),
1641                n_in_sc_count);
1642   VG_(message)(Vg_DebugMsg,
1643                " transtab: dumped     %'llu (%'llu -> ?" "?)\n",
1644                n_dump_count, n_dump_osize );
1645   VG_(message)(Vg_DebugMsg,
1646                " transtab: discarded  %'llu (%'llu -> ?" "?)\n",
1647                n_disc_count, n_disc_osize );
1648
1649   if (0) {
1650      Int i;
1651      VG_(printf)("\n");
1652      for (i = 0; i < ECLASS_N; i++) {
1653         VG_(printf)(" %4d", sectors[0].ec2tte_used[i]);
1654         if (i % 16 == 15)
1655            VG_(printf)("\n");
1656      }
1657      VG_(printf)("\n\n");
1658   }
1659}
1660
1661/*------------------------------------------------------------*/
1662/*--- Printing out of profiling results.                   ---*/
1663/*------------------------------------------------------------*/
1664
1665static ULong score ( TTEntry* tte )
1666{
1667   return ((ULong)tte->weight) * ((ULong)tte->count);
1668}
1669
1670ULong VG_(get_BB_profile) ( BBProfEntry tops[], UInt n_tops )
1671{
1672   Int   sno, i, r, s;
1673   ULong score_total;
1674
1675   /* First, compute the total weighted count, and find the top N
1676      ttes.  tops contains pointers to the most-used n_tops blocks, in
1677      descending order (viz, tops[0] is the highest scorer). */
1678   for (i = 0; i < n_tops; i++) {
1679      tops[i].addr  = 0;
1680      tops[i].score = 0;
1681   }
1682
1683   score_total = 0;
1684
1685   for (sno = 0; sno < N_SECTORS; sno++) {
1686      if (sectors[sno].tc == NULL)
1687         continue;
1688      for (i = 0; i < N_TTES_PER_SECTOR; i++) {
1689         if (sectors[sno].tt[i].status != InUse)
1690            continue;
1691         score_total += score(&sectors[sno].tt[i]);
1692         /* Find the rank for sectors[sno].tt[i]. */
1693         r = n_tops-1;
1694         while (True) {
1695            if (r == -1)
1696               break;
1697             if (tops[r].addr == 0) {
1698               r--;
1699               continue;
1700             }
1701             if ( score(&sectors[sno].tt[i]) > tops[r].score ) {
1702                r--;
1703                continue;
1704             }
1705             break;
1706         }
1707         r++;
1708         vg_assert(r >= 0 && r <= n_tops);
1709         /* This bb should be placed at r, and bbs above it shifted
1710            upwards one slot. */
1711         if (r < n_tops) {
1712            for (s = n_tops-1; s > r; s--)
1713               tops[s] = tops[s-1];
1714            tops[r].addr  = sectors[sno].tt[i].entry;
1715            tops[r].score = score( &sectors[sno].tt[i] );
1716         }
1717      }
1718   }
1719
1720   return score_total;
1721}
1722
1723/*--------------------------------------------------------------------*/
1724/*--- end                                                          ---*/
1725/*--------------------------------------------------------------------*/
1726