m_transtab.c revision 45f4e7c91119c7d01a59f5e827c67841632c9314
1
2/*--------------------------------------------------------------------*/
3/*--- Management of the translation table and cache.               ---*/
4/*---                                                 m_transtab.c ---*/
5/*--------------------------------------------------------------------*/
6
7/*
8   This file is part of Valgrind, a dynamic binary instrumentation
9   framework.
10
11   Copyright (C) 2000-2005 Julian Seward
12      jseward@acm.org
13
14   This program is free software; you can redistribute it and/or
15   modify it under the terms of the GNU General Public License as
16   published by the Free Software Foundation; either version 2 of the
17   License, or (at your option) any later version.
18
19   This program is distributed in the hope that it will be useful, but
20   WITHOUT ANY WARRANTY; without even the implied warranty of
21   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
22   General Public License for more details.
23
24   You should have received a copy of the GNU General Public License
25   along with this program; if not, write to the Free Software
26   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA
27   02111-1307, USA.
28
29   The GNU General Public License is contained in the file COPYING.
30*/
31
32#include "pub_core_basics.h"
33#include "pub_core_debuglog.h"
34#include "pub_core_machine.h"    // ppc32: VG_(cache_line_size_ppc32)
35#include "pub_core_libcbase.h"
36#include "pub_core_libcassert.h"
37#include "pub_core_libcprint.h"
38#include "pub_core_options.h"
39#include "pub_core_tooliface.h"  // For VG_(details).avg_translation_sizeB
40#include "pub_core_transtab.h"
41#include "pub_core_aspacemgr.h"
42#include "pub_core_mallocfree.h" // VG_(out_of_memory_NORETURN)
43
44/* #define DEBUG_TRANSTAB */
45
46
47/*-------------------------------------------------------------*/
48/*--- Management of the FIFO-based translation table+cache. ---*/
49/*-------------------------------------------------------------*/
50
51/*------------------ CONSTANTS ------------------*/
52
53/* Number of sectors the TC is divided into.  If you need a larger
54   overall translation cache, increase this value. */
55#define N_SECTORS 8
56
57/* Number of TC entries in each sector.  This needs to be a prime
58   number to work properly, and it is strongly recommended not to
59   change this. */
60#define N_TTES_PER_SECTOR /*30011*/ 40009
61
62/* Because each sector contains a hash table of TTEntries, we need to
63   specify the maximum allowable loading, after which the sector is
64   deemed full. */
65#define SECTOR_TT_LIMIT_PERCENT 60
66
67/* The sector is deemed full when this many entries are in it. */
68#define N_TTES_PER_SECTOR_USABLE \
69           ((N_TTES_PER_SECTOR * SECTOR_TT_LIMIT_PERCENT) / 100)
70
71
72/*------------------ TYPES ------------------*/
73
74/* A translation-cache entry is two parts:
75   - The guest address of the first (entry) bb in the translation,
76     as a 64-bit word.
77   - One or more 64-bit words containing the code.
78   It is supposed to be 64-bit aligned.
79*/
80/*
81typedef
82   struct {
83      Addr64 orig_addr;
84      ULong  code[0];
85   }
86   TCEntry;
87*/
88
89/* A translation-table entry.  This indicates precisely which areas of
90   guest code are included in the translation, and contains all other
91   auxiliary info too.  */
92typedef
93   struct {
94      /* Profiling only: the count and weight (arbitrary meaning) for
95         this translation.  Weight is a property of the translation
96         itself and computed once when the translation is created.
97         Count is an entry count for the translation and is
98         incremented by 1 every time the translation is used, if we
99         are profiling. */
100      UInt     count;
101      UShort   weight;
102
103      /* Status of the slot.  Note, we need to be able to do lazy
104         deletion, hence the Deleted state. */
105      enum { InUse, Deleted, Empty } status;
106
107      /* Pointer to the corresponding TCEntry (must be in the same
108         sector!) */
109      ULong* tce;
110
111      /* This is the original guest address that purportedly is the
112         entry point of the translation.  You might think that .entry
113         should be the same as .vge->base[0], and most of the time it
114         is.  However, when doing redirections, that is not the case.
115         .vge must always correctly describe the guest code sections
116         from which this translation was made.  However, .entry may or
117         may not be a lie, depending on whether or not we're doing
118         redirection. */
119      Addr64 entry;
120
121      /* This structure describes precisely what ranges of guest code
122         the translation covers, so we can decide whether or not to
123         delete it when translations of a given address range are
124         invalidated. */
125      VexGuestExtents vge;
126   }
127   TTEntry;
128
129
130/* Finally, a sector itself.  Each sector contains an array of
131   TCEntries, which hold code, and an array of TTEntries, containing
132   all required administrative info.  Profiling is supported using the
133   TTEntry .count and .weight fields, if required.  Each sector is
134   independent in that no cross-sector references are allowed.
135
136   If the sector is not in use, all three pointers are NULL and
137   tt_n_inuse is zero.
138*/
139typedef
140   struct {
141      /* The TCEntry area.  Size of this depends on the average
142         translation size.  We try and size it so it becomes full
143         precisely when this sector's translation table (tt) reaches
144         its load limit (SECTOR_TT_LIMIT_PERCENT). */
145      ULong* tc;
146
147      /* The TTEntry array.  This is a fixed size, always containing
148         exactly N_TTES_PER_SECTOR entries. */
149      TTEntry* tt;
150
151      /* This points to the current allocation point in tc. */
152      ULong* tc_next;
153
154      /* The count of tt entries with state InUse. */
155      Int tt_n_inuse;
156   }
157   Sector;
158
159
160/*------------------ DECLS ------------------*/
161
162/* The root data structure is an array of sectors.  The index of the
163   youngest sector is recorded, and new translations are put into that
164   sector.  When it fills up, we move along to the next sector and
165   start to fill that up, wrapping around at the end of the array.
166   That way, once all N_TC_SECTORS have been bought into use for the
167   first time, and are full, we then re-use the oldest sector,
168   endlessly.
169
170   When running, youngest sector should be between >= 0 and <
171   N_TC_SECTORS.  The initial -1 value indicates the TT/TC system is
172   not yet initialised.
173*/
174static Sector sectors[N_SECTORS];
175static Int    youngest_sector = -1;
176
177/* The number of ULongs in each TCEntry area.  This is computed once
178   at startup and does not change. */
179static Int    tc_sector_szQ;
180
181
182/* Fast helper for the TC.  A direct-mapped cache which holds a
183   pointer to a TC entry which may or may not be the correct one, but
184   which we hope usually is.  This array is referred to directly from
185   <arch>/dispatch.S.
186
187   Entries in tt_fast may point to any valid TC entry, regardless of
188   which sector it's in.  Consequently we must be very careful to
189   invalidate this cache when TC entries are changed or disappear.
190
191   A special TCEntry -- bogus_tc_entry -- must be pointed at to cause
192   that cache entry to miss.  This relies on the assumption that no
193   guest code actually has an address of 0x1.
194*/
195/*global*/ ULong* VG_(tt_fast)[VG_TT_FAST_SIZE];
196
197static ULong bogus_tc_entry = (Addr64)1;
198
199
200/* For profiling, we have a parallel array of pointers to .count
201   fields in TT entries.  Again, these pointers must be invalidated
202   when translations disappear.  A NULL pointer suffices to indicate
203   an unused slot.
204
205   tt_fast and tt_fastN change together: if tt_fast[i] points to
206   bogus_tc_entry then the corresponding tt_fastN[i] must be null.  If
207   tt_fast[i] points to some TC entry somewhere, then tt_fastN[i]
208   *must* point to the .count field of the corresponding TT entry.
209
210   tt_fast and tt_fastN are referred to from assembly code
211   (dispatch.S).
212*/
213/*global*/ UInt* VG_(tt_fastN)[VG_TT_FAST_SIZE];
214
215
216/* Make sure we're not used before initialisation. */
217static Bool init_done = False;
218
219
220/*------------------ STATS DECLS ------------------*/
221
222/* Number of fast-cache updates and flushes done. */
223ULong n_fast_flushes = 0;
224ULong n_fast_updates = 0;
225
226/* Number of full lookups done. */
227ULong n_full_lookups = 0;
228ULong n_lookup_probes = 0;
229
230/* Number/osize/tsize of translations entered; also the number of
231   those for which self-checking was requested. */
232ULong n_in_count    = 0;
233ULong n_in_osize    = 0;
234ULong n_in_tsize    = 0;
235ULong n_in_sc_count = 0;
236
237/* Number/osize of translations discarded due to lack of space. */
238ULong n_dump_count = 0;
239ULong n_dump_osize = 0;
240
241/* Number/osize of translations discarded due to requests to do so. */
242ULong n_disc_count = 0;
243ULong n_disc_osize = 0;
244
245
246
247/*-------------------------------------------------------------*/
248/*--- Add/delete/find translations                          ---*/
249/*-------------------------------------------------------------*/
250
251static UInt vge_osize ( VexGuestExtents* vge )
252{
253   UInt i, n = 0;
254   for (i = 0; i < vge->n_used; i++)
255      n += (UInt)vge->len[i];
256   return n;
257}
258
259static Bool isValidSector ( Int sector )
260{
261   if (sector < 0 || sector >= N_SECTORS)
262      return False;
263   return True;
264}
265
266static inline UInt HASH_TT ( Addr64 key )
267{
268   UInt kHi = (UInt)(key >> 32);
269   UInt kLo = (UInt)key;
270   return (kHi ^ kLo) % N_TTES_PER_SECTOR;
271}
272
273static void setFastCacheEntry ( Addr64 key, ULong* tce, UInt* count )
274{
275   UInt cno = ((UInt)key) & VG_TT_FAST_MASK;
276   VG_(tt_fast)[cno]  = tce;
277   VG_(tt_fastN)[cno] = count;
278   n_fast_updates++;
279}
280
281static void invalidateFastCache ( void )
282{
283   UInt j;
284   for (j = 0; j < VG_TT_FAST_SIZE; j++) {
285      VG_(tt_fast)[j]  = &bogus_tc_entry;
286      VG_(tt_fastN)[j] = NULL;
287   }
288   n_fast_flushes++;
289}
290
291static void initialiseSector ( Int sno )
292{
293   Int    i;
294   SysRes sres;
295   vg_assert(isValidSector(sno));
296
297   if (sectors[sno].tc == NULL) {
298      /* Sector has never been used before.  Need to allocate tt and
299         tc. */
300      vg_assert(sectors[sno].tt == NULL);
301      vg_assert(sectors[sno].tc_next == NULL);
302      vg_assert(sectors[sno].tt_n_inuse == 0);
303
304      VG_(debugLog)(1,"transtab", "allocate sector %d\n", sno);
305
306      sres = VG_(am_mmap_anon_float_valgrind)( 8 * tc_sector_szQ );
307      if (sres.isError) {
308         VG_(out_of_memory_NORETURN)("initialiseSector(TC)",
309                                     8 * tc_sector_szQ );
310	 /*NOTREACHED*/
311      }
312      sectors[sno].tc = (ULong*)sres.val;
313
314      sres = VG_(am_mmap_anon_float_valgrind)
315                ( N_TTES_PER_SECTOR * sizeof(TTEntry) );
316      if (sres.isError) {
317         VG_(out_of_memory_NORETURN)("initialiseSector(TT)",
318                                     N_TTES_PER_SECTOR * sizeof(TTEntry) );
319	 /*NOTREACHED*/
320      }
321      sectors[sno].tt = (TTEntry*)sres.val;
322
323      if (VG_(clo_verbosity) > 2)
324         VG_(message)(Vg_DebugMsg, "TT/TC: initialise sector %d", sno);
325   } else {
326      /* Sector has been used before. */
327      VG_(debugLog)(1,"transtab", "recycle sector %d\n", sno);
328      vg_assert(sectors[sno].tt != NULL);
329      vg_assert(sectors[sno].tc_next != NULL);
330      n_dump_count += sectors[sno].tt_n_inuse;
331      for (i = 0; i < N_TTES_PER_SECTOR; i++) {
332         if (sectors[sno].tt[i].status == InUse) {
333            n_dump_osize += vge_osize(&sectors[sno].tt[i].vge);
334         }
335      }
336      if (VG_(clo_verbosity) > 2)
337         VG_(message)(Vg_DebugMsg, "TT/TC: recycle sector %d", sno);
338   }
339
340   sectors[sno].tc_next = sectors[sno].tc;
341   sectors[sno].tt_n_inuse = 0;
342   for (i = 0; i < N_TTES_PER_SECTOR; i++)
343      sectors[sno].tt[i].status = Empty;
344
345   invalidateFastCache();
346}
347
348static void invalidate_icache ( void *ptr, Int nbytes )
349{
350#  if defined(VGA_ppc32)
351   Addr startaddr = (Addr) ptr;
352   Addr endaddr   = startaddr + nbytes;
353   Addr cls       = VG_(cache_line_size_ppc32);
354   Addr addr;
355
356   /* Stay sane .. */
357   vg_assert(cls == 32 || cls == 128);
358
359   startaddr &= ~(cls - 1);
360   for (addr = startaddr; addr < endaddr; addr += cls)
361      asm volatile("dcbst 0,%0" : : "r" (addr));
362   asm volatile("sync");
363   for (addr = startaddr; addr < endaddr; addr += cls)
364      asm volatile("icbi 0,%0" : : "r" (addr));
365   asm volatile("sync; isync");
366
367#  elif defined(VGA_x86)
368   /* no need to do anything, hardware provides coherence */
369
370#  elif defined(VGA_amd64)
371   /* no need to do anything, hardware provides coherence */
372
373#  else
374#    error "Unknown ARCH"
375#  endif
376}
377
378
379/* Add a translation of vge to TT/TC.  The translation is temporarily
380   in code[0 .. code_len-1].
381
382   pre: youngest_sector points to a valid (although possibly full)
383   sector.
384*/
385void VG_(add_to_transtab)( VexGuestExtents* vge,
386                           Addr64           entry,
387                           AddrH            code,
388                           UInt             code_len,
389                           Bool             is_self_checking )
390{
391   Int    tcAvailQ, reqdQ, y, i;
392   ULong  *tce, *tce2;
393   UChar* srcP;
394   UChar* dstP;
395
396   vg_assert(init_done);
397   vg_assert(vge->n_used >= 1 && vge->n_used <= 3);
398   vg_assert(code_len > 0 && code_len < 20000);
399
400   if (0)
401      VG_(printf)("add_to_transtab(entry = 0x%llx, len = %d)\n",
402                  entry, code_len);
403
404   n_in_count++;
405   n_in_tsize += code_len;
406   n_in_osize += vge_osize(vge);
407   if (is_self_checking)
408      n_in_sc_count++;
409
410   y = youngest_sector;
411   vg_assert(isValidSector(y));
412
413   if (sectors[y].tc == NULL)
414      initialiseSector(y);
415
416   /* Try putting the translation in this sector. */
417   reqdQ = 1 + ((code_len + 7) >> 3);
418
419   /* Will it fit in tc? */
420   tcAvailQ = ((ULong*)(&sectors[y].tc[tc_sector_szQ]))
421              - ((ULong*)(sectors[y].tc_next));
422   vg_assert(tcAvailQ >= 0);
423   vg_assert(tcAvailQ <= tc_sector_szQ);
424
425   if (tcAvailQ < reqdQ
426       || sectors[y].tt_n_inuse >= N_TTES_PER_SECTOR_USABLE) {
427      /* No.  So move on to the next sector.  Either it's never been
428         used before, in which case it will get its tt/tc allocated
429         now, or it has been used before, in which case it is set to be
430         empty, hence throwing out the oldest sector. */
431      youngest_sector++;
432      if (youngest_sector >= N_SECTORS)
433         youngest_sector = 0;
434      y = youngest_sector;
435      initialiseSector(y);
436   }
437
438   /* Be sure ... */
439   tcAvailQ = ((ULong*)(&sectors[y].tc[tc_sector_szQ]))
440              - ((ULong*)(sectors[y].tc_next));
441   vg_assert(tcAvailQ >= 0);
442   vg_assert(tcAvailQ <= tc_sector_szQ);
443   vg_assert(tcAvailQ >= reqdQ);
444   vg_assert(sectors[y].tt_n_inuse < N_TTES_PER_SECTOR_USABLE);
445   vg_assert(sectors[y].tt_n_inuse >= 0);
446
447   /* Copy into tc. */
448   tce = sectors[y].tc_next;
449   vg_assert(tce >= &sectors[y].tc[0]);
450   vg_assert(tce <= &sectors[y].tc[tc_sector_szQ]);
451
452   tce[0] = entry;
453   dstP = (UChar*)(&tce[1]);
454   srcP = (UChar*)code;
455   for (i = 0; i < code_len; i++)
456      dstP[i] = srcP[i];
457   sectors[y].tc_next += reqdQ;
458   sectors[y].tt_n_inuse++;
459
460   invalidate_icache( dstP, code_len );
461
462   /* more paranoia */
463   tce2 = sectors[y].tc_next;
464   vg_assert(tce2 >= &sectors[y].tc[0]);
465   vg_assert(tce2 <= &sectors[y].tc[tc_sector_szQ]);
466
467   /* Find an empty tt slot, and use it.  There must be such a slot
468      since tt is never allowed to get completely full. */
469   i = HASH_TT(entry);
470   vg_assert(i >= 0 && i < N_TTES_PER_SECTOR);
471   while (True) {
472      if (sectors[y].tt[i].status == Empty
473          || sectors[y].tt[i].status == Deleted)
474         break;
475      i++;
476      if (i >= N_TTES_PER_SECTOR)
477         i = 0;
478   }
479
480   sectors[y].tt[i].status = InUse;
481   sectors[y].tt[i].tce    = tce;
482   sectors[y].tt[i].count  = 0;
483   sectors[y].tt[i].weight = 1;
484   sectors[y].tt[i].vge    = *vge;
485   sectors[y].tt[i].entry  = entry;
486
487   setFastCacheEntry( entry, tce, &sectors[y].tt[i].count );
488}
489
490
491/* Search for the translation of the given guest address.  If
492   requested, a successful search can also cause the fast-caches to be
493   updated.
494*/
495Bool VG_(search_transtab) ( /*OUT*/AddrH* result,
496                            Addr64        guest_addr,
497                            Bool          upd_cache )
498{
499   Int i, j, k, kstart, sno;
500
501   vg_assert(init_done);
502   /* Find the initial probe point just once.  It will be the same in
503      all sectors and avoids multiple expensive % operations. */
504   n_full_lookups++;
505   k      = -1;
506   kstart = HASH_TT(guest_addr);
507   vg_assert(kstart >= 0 && kstart < N_TTES_PER_SECTOR);
508
509   /* Search in all the sectors.  Although the order should not matter,
510      it might be most efficient to search in the order youngest to
511      oldest. */
512   sno = youngest_sector;
513   for (i = 0; i < N_SECTORS; i++) {
514
515      if (sectors[sno].tc == NULL)
516         goto notfound; /* sector not in use. */
517
518      k = kstart;
519      for (j = 0; j < N_TTES_PER_SECTOR; j++) {
520         n_lookup_probes++;
521         if (sectors[sno].tt[k].status == InUse
522             && sectors[sno].tt[k].entry == guest_addr) {
523            /* found it */
524            if (upd_cache)
525               setFastCacheEntry(
526                  guest_addr, sectors[sno].tt[k].tce,
527                              &sectors[sno].tt[k].count );
528            if (result)
529               *result = sizeof(Addr64) + (AddrH)sectors[sno].tt[k].tce;
530            return True;
531         }
532         if (sectors[sno].tt[k].status == Empty)
533            break; /* not found in this sector */
534         k++;
535         if (k == N_TTES_PER_SECTOR)
536            k = 0;
537      }
538
539      /* If we fall off the end, all entries are InUse and not
540         matching, or Deleted.  In any case we did not find it in this
541         sector. */
542
543     notfound:
544      /* move to the next oldest sector */
545      sno = sno==0 ? (N_SECTORS-1) : (sno-1);
546   }
547
548   /* Not found in any sector. */
549   return False;
550}
551
552
553/* Delete all translations which intersect with any part of the
554   specified guest address range.  Note, this is SLOW.
555*/
556
557static inline
558Bool overlap1 ( Addr64 s1, ULong r1, Addr64 s2, ULong r2 )
559{
560   Addr64 e1 = s1 + r1 - 1ULL;
561   Addr64 e2 = s2 + r2 - 1ULL;
562   if (e1 < s2 || e2 < s1)
563      return False;
564   return True;
565}
566
567static inline
568Bool overlaps ( Addr64 start, ULong range, VexGuestExtents* vge )
569{
570   if (overlap1(start, range, vge->base[0], (UInt)vge->len[0]))
571      return True;
572   if (vge->n_used < 2)
573      return False;
574   if (overlap1(start, range, vge->base[1], (UInt)vge->len[1]))
575      return True;
576   if (vge->n_used < 3)
577      return False;
578   if (overlap1(start, range, vge->base[2], (UInt)vge->len[2]))
579      return True;
580   return False;
581}
582
583
584void VG_(discard_translations) ( Addr64 guest_start, ULong range,
585                                 HChar* who )
586{
587   Int sno, i;
588   Bool anyDeleted = False;
589
590   vg_assert(init_done);
591
592   VG_(debugLog)(1, "transtab",
593                    "discard_translations(0x%llx, %lld) req by %s\n",
594                    guest_start, range, who );
595
596   for (sno = 0; sno < N_SECTORS; sno++) {
597      if (sectors[sno].tc == NULL)
598         continue;
599      for (i = 0; i < N_TTES_PER_SECTOR; i++) {
600         if (sectors[sno].tt[i].status == InUse
601             && overlaps( guest_start, range, &sectors[sno].tt[i].vge )) {
602            sectors[sno].tt[i].status = Deleted;
603            sectors[sno].tt_n_inuse--;
604              anyDeleted = True;
605            n_disc_count++;
606            n_disc_osize += vge_osize(&sectors[sno].tt[i].vge);
607         }
608      }
609   }
610
611   if (anyDeleted)
612      invalidateFastCache();
613}
614
615
616/*------------------------------------------------------------*/
617/*--- Initialisation.                                      ---*/
618/*------------------------------------------------------------*/
619
620void VG_(init_tt_tc) ( void )
621{
622   Int i, avg_codeszQ;
623
624   vg_assert(!init_done);
625   init_done = True;
626
627   /* Otherwise lots of things go wrong... */
628   vg_assert(sizeof(ULong) == 8);
629   vg_assert(sizeof(Addr64) == 8);
630
631   if (VG_(clo_verbosity) > 2)
632      VG_(message)(Vg_DebugMsg,
633                   "TT/TC: VG_(init_tt_tc) "
634                   "(startup of code management)");
635
636   /* Figure out how big each tc area should be.  */
637   avg_codeszQ   = (VG_(details).avg_translation_sizeB + 7) / 8;
638   tc_sector_szQ = N_TTES_PER_SECTOR_USABLE * (1 + avg_codeszQ);
639
640   /* Ensure the calculated value is not way crazy. */
641   vg_assert(tc_sector_szQ >= 2 * N_TTES_PER_SECTOR_USABLE);
642   vg_assert(tc_sector_szQ <= 50 * N_TTES_PER_SECTOR_USABLE);
643
644   /* Initialise the sectors */
645   youngest_sector = 0;
646   for (i = 0; i < N_SECTORS; i++) {
647      sectors[i].tc = NULL;
648      sectors[i].tt = NULL;
649      sectors[i].tc_next = NULL;
650      sectors[i].tt_n_inuse = 0;
651   }
652
653   /* and the fast caches. */
654   invalidateFastCache();
655
656   if (VG_(clo_verbosity) > 2) {
657      VG_(message)(Vg_DebugMsg,
658         "TT/TC: cache: %d sectors of %d bytes each = %d total",
659          N_SECTORS, 8 * tc_sector_szQ,
660          N_SECTORS * 8 * tc_sector_szQ );
661      VG_(message)(Vg_DebugMsg,
662         "TT/TC: table: %d total entries, max occupancy %d (%d%%)",
663         N_SECTORS * N_TTES_PER_SECTOR,
664         N_SECTORS * N_TTES_PER_SECTOR_USABLE,
665         SECTOR_TT_LIMIT_PERCENT );
666   }
667
668   VG_(debugLog)(2, "transtab",
669      "cache: %d sectors of %d bytes each = %d total\n",
670       N_SECTORS, 8 * tc_sector_szQ,
671       N_SECTORS * 8 * tc_sector_szQ );
672   VG_(debugLog)(2, "transtab",
673      "table: %d total entries, max occupancy %d (%d%%)\n",
674      N_SECTORS * N_TTES_PER_SECTOR,
675      N_SECTORS * N_TTES_PER_SECTOR_USABLE,
676      SECTOR_TT_LIMIT_PERCENT );
677}
678
679
680/*------------------------------------------------------------*/
681/*--- Printing out statistics.                             ---*/
682/*------------------------------------------------------------*/
683
684static ULong safe_idiv( ULong a, ULong b )
685{
686   return (b == 0 ? 0 : a / b);
687}
688
689UInt VG_(get_bbs_translated) ( void )
690{
691   return n_in_count;
692}
693
694void VG_(print_tt_tc_stats) ( void )
695{
696   VG_(message)(Vg_DebugMsg,
697      "    tt/tc: %llu tt lookups requiring %llu probes",
698      n_full_lookups, n_lookup_probes );
699   VG_(message)(Vg_DebugMsg,
700      "    tt/tc: %llu fast-cache updates, %llu flushes",
701      n_fast_updates, n_fast_flushes );
702
703   VG_(message)(Vg_DebugMsg,
704                "translate: new        %lld "
705                "(%lld -> %lld; ratio %lld:10) [%lld scs]",
706                n_in_count, n_in_osize, n_in_tsize,
707                safe_idiv(10*n_in_tsize, n_in_osize),
708                n_in_sc_count);
709   VG_(message)(Vg_DebugMsg,
710                "translate: dumped     %lld (%lld -> ?" "?)",
711                n_dump_count, n_dump_osize );
712   VG_(message)(Vg_DebugMsg,
713                "translate: discarded  %lld (%lld -> ?" "?)",
714                n_disc_count, n_disc_osize );
715}
716
717/*------------------------------------------------------------*/
718/*--- Printing out of profiling results.                   ---*/
719/*------------------------------------------------------------*/
720
721static ULong score ( TTEntry* tte )
722{
723   return ((ULong)tte->weight) * ((ULong)tte->count);
724}
725
726ULong VG_(get_BB_profile) ( BBProfEntry tops[], UInt n_tops )
727{
728   Int   sno, i, r, s;
729   ULong score_total;
730
731   /* First, compute the total weighted count, and find the top N
732      ttes.  tops contains pointers to the most-used n_tops blocks, in
733      descending order (viz, tops[0] is the highest scorer). */
734   for (i = 0; i < n_tops; i++) {
735      tops[i].addr  = 0;
736      tops[i].score = 0;
737   }
738
739   score_total = 0;
740
741   for (sno = 0; sno < N_SECTORS; sno++) {
742      if (sectors[sno].tc == NULL)
743         continue;
744      for (i = 0; i < N_TTES_PER_SECTOR; i++) {
745         if (sectors[sno].tt[i].status != InUse)
746            continue;
747         score_total += score(&sectors[sno].tt[i]);
748         /* Find the rank for sectors[sno].tt[i]. */
749         r = n_tops-1;
750         while (True) {
751            if (r == -1)
752               break;
753             if (tops[r].addr == 0) {
754               r--;
755               continue;
756             }
757             if ( score(&sectors[sno].tt[i]) > tops[r].score ) {
758                r--;
759                continue;
760             }
761             break;
762         }
763         r++;
764         vg_assert(r >= 0 && r <= n_tops);
765         /* This bb should be placed at r, and bbs above it shifted
766            upwards one slot. */
767         if (r < n_tops) {
768            for (s = n_tops-1; s > r; s--)
769               tops[s] = tops[s-1];
770            tops[r].addr  = sectors[sno].tt[i].entry;
771            tops[r].score = score( &sectors[sno].tt[i] );
772         }
773      }
774   }
775
776   return score_total;
777}
778
779/*--------------------------------------------------------------------*/
780/*--- end                                                          ---*/
781/*--------------------------------------------------------------------*/
782