m_transtab.c revision 45f4e7c91119c7d01a59f5e827c67841632c9314
1 2/*--------------------------------------------------------------------*/ 3/*--- Management of the translation table and cache. ---*/ 4/*--- m_transtab.c ---*/ 5/*--------------------------------------------------------------------*/ 6 7/* 8 This file is part of Valgrind, a dynamic binary instrumentation 9 framework. 10 11 Copyright (C) 2000-2005 Julian Seward 12 jseward@acm.org 13 14 This program is free software; you can redistribute it and/or 15 modify it under the terms of the GNU General Public License as 16 published by the Free Software Foundation; either version 2 of the 17 License, or (at your option) any later version. 18 19 This program is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 General Public License for more details. 23 24 You should have received a copy of the GNU General Public License 25 along with this program; if not, write to the Free Software 26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 27 02111-1307, USA. 28 29 The GNU General Public License is contained in the file COPYING. 30*/ 31 32#include "pub_core_basics.h" 33#include "pub_core_debuglog.h" 34#include "pub_core_machine.h" // ppc32: VG_(cache_line_size_ppc32) 35#include "pub_core_libcbase.h" 36#include "pub_core_libcassert.h" 37#include "pub_core_libcprint.h" 38#include "pub_core_options.h" 39#include "pub_core_tooliface.h" // For VG_(details).avg_translation_sizeB 40#include "pub_core_transtab.h" 41#include "pub_core_aspacemgr.h" 42#include "pub_core_mallocfree.h" // VG_(out_of_memory_NORETURN) 43 44/* #define DEBUG_TRANSTAB */ 45 46 47/*-------------------------------------------------------------*/ 48/*--- Management of the FIFO-based translation table+cache. ---*/ 49/*-------------------------------------------------------------*/ 50 51/*------------------ CONSTANTS ------------------*/ 52 53/* Number of sectors the TC is divided into. If you need a larger 54 overall translation cache, increase this value. */ 55#define N_SECTORS 8 56 57/* Number of TC entries in each sector. This needs to be a prime 58 number to work properly, and it is strongly recommended not to 59 change this. */ 60#define N_TTES_PER_SECTOR /*30011*/ 40009 61 62/* Because each sector contains a hash table of TTEntries, we need to 63 specify the maximum allowable loading, after which the sector is 64 deemed full. */ 65#define SECTOR_TT_LIMIT_PERCENT 60 66 67/* The sector is deemed full when this many entries are in it. */ 68#define N_TTES_PER_SECTOR_USABLE \ 69 ((N_TTES_PER_SECTOR * SECTOR_TT_LIMIT_PERCENT) / 100) 70 71 72/*------------------ TYPES ------------------*/ 73 74/* A translation-cache entry is two parts: 75 - The guest address of the first (entry) bb in the translation, 76 as a 64-bit word. 77 - One or more 64-bit words containing the code. 78 It is supposed to be 64-bit aligned. 79*/ 80/* 81typedef 82 struct { 83 Addr64 orig_addr; 84 ULong code[0]; 85 } 86 TCEntry; 87*/ 88 89/* A translation-table entry. This indicates precisely which areas of 90 guest code are included in the translation, and contains all other 91 auxiliary info too. */ 92typedef 93 struct { 94 /* Profiling only: the count and weight (arbitrary meaning) for 95 this translation. Weight is a property of the translation 96 itself and computed once when the translation is created. 97 Count is an entry count for the translation and is 98 incremented by 1 every time the translation is used, if we 99 are profiling. */ 100 UInt count; 101 UShort weight; 102 103 /* Status of the slot. Note, we need to be able to do lazy 104 deletion, hence the Deleted state. */ 105 enum { InUse, Deleted, Empty } status; 106 107 /* Pointer to the corresponding TCEntry (must be in the same 108 sector!) */ 109 ULong* tce; 110 111 /* This is the original guest address that purportedly is the 112 entry point of the translation. You might think that .entry 113 should be the same as .vge->base[0], and most of the time it 114 is. However, when doing redirections, that is not the case. 115 .vge must always correctly describe the guest code sections 116 from which this translation was made. However, .entry may or 117 may not be a lie, depending on whether or not we're doing 118 redirection. */ 119 Addr64 entry; 120 121 /* This structure describes precisely what ranges of guest code 122 the translation covers, so we can decide whether or not to 123 delete it when translations of a given address range are 124 invalidated. */ 125 VexGuestExtents vge; 126 } 127 TTEntry; 128 129 130/* Finally, a sector itself. Each sector contains an array of 131 TCEntries, which hold code, and an array of TTEntries, containing 132 all required administrative info. Profiling is supported using the 133 TTEntry .count and .weight fields, if required. Each sector is 134 independent in that no cross-sector references are allowed. 135 136 If the sector is not in use, all three pointers are NULL and 137 tt_n_inuse is zero. 138*/ 139typedef 140 struct { 141 /* The TCEntry area. Size of this depends on the average 142 translation size. We try and size it so it becomes full 143 precisely when this sector's translation table (tt) reaches 144 its load limit (SECTOR_TT_LIMIT_PERCENT). */ 145 ULong* tc; 146 147 /* The TTEntry array. This is a fixed size, always containing 148 exactly N_TTES_PER_SECTOR entries. */ 149 TTEntry* tt; 150 151 /* This points to the current allocation point in tc. */ 152 ULong* tc_next; 153 154 /* The count of tt entries with state InUse. */ 155 Int tt_n_inuse; 156 } 157 Sector; 158 159 160/*------------------ DECLS ------------------*/ 161 162/* The root data structure is an array of sectors. The index of the 163 youngest sector is recorded, and new translations are put into that 164 sector. When it fills up, we move along to the next sector and 165 start to fill that up, wrapping around at the end of the array. 166 That way, once all N_TC_SECTORS have been bought into use for the 167 first time, and are full, we then re-use the oldest sector, 168 endlessly. 169 170 When running, youngest sector should be between >= 0 and < 171 N_TC_SECTORS. The initial -1 value indicates the TT/TC system is 172 not yet initialised. 173*/ 174static Sector sectors[N_SECTORS]; 175static Int youngest_sector = -1; 176 177/* The number of ULongs in each TCEntry area. This is computed once 178 at startup and does not change. */ 179static Int tc_sector_szQ; 180 181 182/* Fast helper for the TC. A direct-mapped cache which holds a 183 pointer to a TC entry which may or may not be the correct one, but 184 which we hope usually is. This array is referred to directly from 185 <arch>/dispatch.S. 186 187 Entries in tt_fast may point to any valid TC entry, regardless of 188 which sector it's in. Consequently we must be very careful to 189 invalidate this cache when TC entries are changed or disappear. 190 191 A special TCEntry -- bogus_tc_entry -- must be pointed at to cause 192 that cache entry to miss. This relies on the assumption that no 193 guest code actually has an address of 0x1. 194*/ 195/*global*/ ULong* VG_(tt_fast)[VG_TT_FAST_SIZE]; 196 197static ULong bogus_tc_entry = (Addr64)1; 198 199 200/* For profiling, we have a parallel array of pointers to .count 201 fields in TT entries. Again, these pointers must be invalidated 202 when translations disappear. A NULL pointer suffices to indicate 203 an unused slot. 204 205 tt_fast and tt_fastN change together: if tt_fast[i] points to 206 bogus_tc_entry then the corresponding tt_fastN[i] must be null. If 207 tt_fast[i] points to some TC entry somewhere, then tt_fastN[i] 208 *must* point to the .count field of the corresponding TT entry. 209 210 tt_fast and tt_fastN are referred to from assembly code 211 (dispatch.S). 212*/ 213/*global*/ UInt* VG_(tt_fastN)[VG_TT_FAST_SIZE]; 214 215 216/* Make sure we're not used before initialisation. */ 217static Bool init_done = False; 218 219 220/*------------------ STATS DECLS ------------------*/ 221 222/* Number of fast-cache updates and flushes done. */ 223ULong n_fast_flushes = 0; 224ULong n_fast_updates = 0; 225 226/* Number of full lookups done. */ 227ULong n_full_lookups = 0; 228ULong n_lookup_probes = 0; 229 230/* Number/osize/tsize of translations entered; also the number of 231 those for which self-checking was requested. */ 232ULong n_in_count = 0; 233ULong n_in_osize = 0; 234ULong n_in_tsize = 0; 235ULong n_in_sc_count = 0; 236 237/* Number/osize of translations discarded due to lack of space. */ 238ULong n_dump_count = 0; 239ULong n_dump_osize = 0; 240 241/* Number/osize of translations discarded due to requests to do so. */ 242ULong n_disc_count = 0; 243ULong n_disc_osize = 0; 244 245 246 247/*-------------------------------------------------------------*/ 248/*--- Add/delete/find translations ---*/ 249/*-------------------------------------------------------------*/ 250 251static UInt vge_osize ( VexGuestExtents* vge ) 252{ 253 UInt i, n = 0; 254 for (i = 0; i < vge->n_used; i++) 255 n += (UInt)vge->len[i]; 256 return n; 257} 258 259static Bool isValidSector ( Int sector ) 260{ 261 if (sector < 0 || sector >= N_SECTORS) 262 return False; 263 return True; 264} 265 266static inline UInt HASH_TT ( Addr64 key ) 267{ 268 UInt kHi = (UInt)(key >> 32); 269 UInt kLo = (UInt)key; 270 return (kHi ^ kLo) % N_TTES_PER_SECTOR; 271} 272 273static void setFastCacheEntry ( Addr64 key, ULong* tce, UInt* count ) 274{ 275 UInt cno = ((UInt)key) & VG_TT_FAST_MASK; 276 VG_(tt_fast)[cno] = tce; 277 VG_(tt_fastN)[cno] = count; 278 n_fast_updates++; 279} 280 281static void invalidateFastCache ( void ) 282{ 283 UInt j; 284 for (j = 0; j < VG_TT_FAST_SIZE; j++) { 285 VG_(tt_fast)[j] = &bogus_tc_entry; 286 VG_(tt_fastN)[j] = NULL; 287 } 288 n_fast_flushes++; 289} 290 291static void initialiseSector ( Int sno ) 292{ 293 Int i; 294 SysRes sres; 295 vg_assert(isValidSector(sno)); 296 297 if (sectors[sno].tc == NULL) { 298 /* Sector has never been used before. Need to allocate tt and 299 tc. */ 300 vg_assert(sectors[sno].tt == NULL); 301 vg_assert(sectors[sno].tc_next == NULL); 302 vg_assert(sectors[sno].tt_n_inuse == 0); 303 304 VG_(debugLog)(1,"transtab", "allocate sector %d\n", sno); 305 306 sres = VG_(am_mmap_anon_float_valgrind)( 8 * tc_sector_szQ ); 307 if (sres.isError) { 308 VG_(out_of_memory_NORETURN)("initialiseSector(TC)", 309 8 * tc_sector_szQ ); 310 /*NOTREACHED*/ 311 } 312 sectors[sno].tc = (ULong*)sres.val; 313 314 sres = VG_(am_mmap_anon_float_valgrind) 315 ( N_TTES_PER_SECTOR * sizeof(TTEntry) ); 316 if (sres.isError) { 317 VG_(out_of_memory_NORETURN)("initialiseSector(TT)", 318 N_TTES_PER_SECTOR * sizeof(TTEntry) ); 319 /*NOTREACHED*/ 320 } 321 sectors[sno].tt = (TTEntry*)sres.val; 322 323 if (VG_(clo_verbosity) > 2) 324 VG_(message)(Vg_DebugMsg, "TT/TC: initialise sector %d", sno); 325 } else { 326 /* Sector has been used before. */ 327 VG_(debugLog)(1,"transtab", "recycle sector %d\n", sno); 328 vg_assert(sectors[sno].tt != NULL); 329 vg_assert(sectors[sno].tc_next != NULL); 330 n_dump_count += sectors[sno].tt_n_inuse; 331 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 332 if (sectors[sno].tt[i].status == InUse) { 333 n_dump_osize += vge_osize(§ors[sno].tt[i].vge); 334 } 335 } 336 if (VG_(clo_verbosity) > 2) 337 VG_(message)(Vg_DebugMsg, "TT/TC: recycle sector %d", sno); 338 } 339 340 sectors[sno].tc_next = sectors[sno].tc; 341 sectors[sno].tt_n_inuse = 0; 342 for (i = 0; i < N_TTES_PER_SECTOR; i++) 343 sectors[sno].tt[i].status = Empty; 344 345 invalidateFastCache(); 346} 347 348static void invalidate_icache ( void *ptr, Int nbytes ) 349{ 350# if defined(VGA_ppc32) 351 Addr startaddr = (Addr) ptr; 352 Addr endaddr = startaddr + nbytes; 353 Addr cls = VG_(cache_line_size_ppc32); 354 Addr addr; 355 356 /* Stay sane .. */ 357 vg_assert(cls == 32 || cls == 128); 358 359 startaddr &= ~(cls - 1); 360 for (addr = startaddr; addr < endaddr; addr += cls) 361 asm volatile("dcbst 0,%0" : : "r" (addr)); 362 asm volatile("sync"); 363 for (addr = startaddr; addr < endaddr; addr += cls) 364 asm volatile("icbi 0,%0" : : "r" (addr)); 365 asm volatile("sync; isync"); 366 367# elif defined(VGA_x86) 368 /* no need to do anything, hardware provides coherence */ 369 370# elif defined(VGA_amd64) 371 /* no need to do anything, hardware provides coherence */ 372 373# else 374# error "Unknown ARCH" 375# endif 376} 377 378 379/* Add a translation of vge to TT/TC. The translation is temporarily 380 in code[0 .. code_len-1]. 381 382 pre: youngest_sector points to a valid (although possibly full) 383 sector. 384*/ 385void VG_(add_to_transtab)( VexGuestExtents* vge, 386 Addr64 entry, 387 AddrH code, 388 UInt code_len, 389 Bool is_self_checking ) 390{ 391 Int tcAvailQ, reqdQ, y, i; 392 ULong *tce, *tce2; 393 UChar* srcP; 394 UChar* dstP; 395 396 vg_assert(init_done); 397 vg_assert(vge->n_used >= 1 && vge->n_used <= 3); 398 vg_assert(code_len > 0 && code_len < 20000); 399 400 if (0) 401 VG_(printf)("add_to_transtab(entry = 0x%llx, len = %d)\n", 402 entry, code_len); 403 404 n_in_count++; 405 n_in_tsize += code_len; 406 n_in_osize += vge_osize(vge); 407 if (is_self_checking) 408 n_in_sc_count++; 409 410 y = youngest_sector; 411 vg_assert(isValidSector(y)); 412 413 if (sectors[y].tc == NULL) 414 initialiseSector(y); 415 416 /* Try putting the translation in this sector. */ 417 reqdQ = 1 + ((code_len + 7) >> 3); 418 419 /* Will it fit in tc? */ 420 tcAvailQ = ((ULong*)(§ors[y].tc[tc_sector_szQ])) 421 - ((ULong*)(sectors[y].tc_next)); 422 vg_assert(tcAvailQ >= 0); 423 vg_assert(tcAvailQ <= tc_sector_szQ); 424 425 if (tcAvailQ < reqdQ 426 || sectors[y].tt_n_inuse >= N_TTES_PER_SECTOR_USABLE) { 427 /* No. So move on to the next sector. Either it's never been 428 used before, in which case it will get its tt/tc allocated 429 now, or it has been used before, in which case it is set to be 430 empty, hence throwing out the oldest sector. */ 431 youngest_sector++; 432 if (youngest_sector >= N_SECTORS) 433 youngest_sector = 0; 434 y = youngest_sector; 435 initialiseSector(y); 436 } 437 438 /* Be sure ... */ 439 tcAvailQ = ((ULong*)(§ors[y].tc[tc_sector_szQ])) 440 - ((ULong*)(sectors[y].tc_next)); 441 vg_assert(tcAvailQ >= 0); 442 vg_assert(tcAvailQ <= tc_sector_szQ); 443 vg_assert(tcAvailQ >= reqdQ); 444 vg_assert(sectors[y].tt_n_inuse < N_TTES_PER_SECTOR_USABLE); 445 vg_assert(sectors[y].tt_n_inuse >= 0); 446 447 /* Copy into tc. */ 448 tce = sectors[y].tc_next; 449 vg_assert(tce >= §ors[y].tc[0]); 450 vg_assert(tce <= §ors[y].tc[tc_sector_szQ]); 451 452 tce[0] = entry; 453 dstP = (UChar*)(&tce[1]); 454 srcP = (UChar*)code; 455 for (i = 0; i < code_len; i++) 456 dstP[i] = srcP[i]; 457 sectors[y].tc_next += reqdQ; 458 sectors[y].tt_n_inuse++; 459 460 invalidate_icache( dstP, code_len ); 461 462 /* more paranoia */ 463 tce2 = sectors[y].tc_next; 464 vg_assert(tce2 >= §ors[y].tc[0]); 465 vg_assert(tce2 <= §ors[y].tc[tc_sector_szQ]); 466 467 /* Find an empty tt slot, and use it. There must be such a slot 468 since tt is never allowed to get completely full. */ 469 i = HASH_TT(entry); 470 vg_assert(i >= 0 && i < N_TTES_PER_SECTOR); 471 while (True) { 472 if (sectors[y].tt[i].status == Empty 473 || sectors[y].tt[i].status == Deleted) 474 break; 475 i++; 476 if (i >= N_TTES_PER_SECTOR) 477 i = 0; 478 } 479 480 sectors[y].tt[i].status = InUse; 481 sectors[y].tt[i].tce = tce; 482 sectors[y].tt[i].count = 0; 483 sectors[y].tt[i].weight = 1; 484 sectors[y].tt[i].vge = *vge; 485 sectors[y].tt[i].entry = entry; 486 487 setFastCacheEntry( entry, tce, §ors[y].tt[i].count ); 488} 489 490 491/* Search for the translation of the given guest address. If 492 requested, a successful search can also cause the fast-caches to be 493 updated. 494*/ 495Bool VG_(search_transtab) ( /*OUT*/AddrH* result, 496 Addr64 guest_addr, 497 Bool upd_cache ) 498{ 499 Int i, j, k, kstart, sno; 500 501 vg_assert(init_done); 502 /* Find the initial probe point just once. It will be the same in 503 all sectors and avoids multiple expensive % operations. */ 504 n_full_lookups++; 505 k = -1; 506 kstart = HASH_TT(guest_addr); 507 vg_assert(kstart >= 0 && kstart < N_TTES_PER_SECTOR); 508 509 /* Search in all the sectors. Although the order should not matter, 510 it might be most efficient to search in the order youngest to 511 oldest. */ 512 sno = youngest_sector; 513 for (i = 0; i < N_SECTORS; i++) { 514 515 if (sectors[sno].tc == NULL) 516 goto notfound; /* sector not in use. */ 517 518 k = kstart; 519 for (j = 0; j < N_TTES_PER_SECTOR; j++) { 520 n_lookup_probes++; 521 if (sectors[sno].tt[k].status == InUse 522 && sectors[sno].tt[k].entry == guest_addr) { 523 /* found it */ 524 if (upd_cache) 525 setFastCacheEntry( 526 guest_addr, sectors[sno].tt[k].tce, 527 §ors[sno].tt[k].count ); 528 if (result) 529 *result = sizeof(Addr64) + (AddrH)sectors[sno].tt[k].tce; 530 return True; 531 } 532 if (sectors[sno].tt[k].status == Empty) 533 break; /* not found in this sector */ 534 k++; 535 if (k == N_TTES_PER_SECTOR) 536 k = 0; 537 } 538 539 /* If we fall off the end, all entries are InUse and not 540 matching, or Deleted. In any case we did not find it in this 541 sector. */ 542 543 notfound: 544 /* move to the next oldest sector */ 545 sno = sno==0 ? (N_SECTORS-1) : (sno-1); 546 } 547 548 /* Not found in any sector. */ 549 return False; 550} 551 552 553/* Delete all translations which intersect with any part of the 554 specified guest address range. Note, this is SLOW. 555*/ 556 557static inline 558Bool overlap1 ( Addr64 s1, ULong r1, Addr64 s2, ULong r2 ) 559{ 560 Addr64 e1 = s1 + r1 - 1ULL; 561 Addr64 e2 = s2 + r2 - 1ULL; 562 if (e1 < s2 || e2 < s1) 563 return False; 564 return True; 565} 566 567static inline 568Bool overlaps ( Addr64 start, ULong range, VexGuestExtents* vge ) 569{ 570 if (overlap1(start, range, vge->base[0], (UInt)vge->len[0])) 571 return True; 572 if (vge->n_used < 2) 573 return False; 574 if (overlap1(start, range, vge->base[1], (UInt)vge->len[1])) 575 return True; 576 if (vge->n_used < 3) 577 return False; 578 if (overlap1(start, range, vge->base[2], (UInt)vge->len[2])) 579 return True; 580 return False; 581} 582 583 584void VG_(discard_translations) ( Addr64 guest_start, ULong range, 585 HChar* who ) 586{ 587 Int sno, i; 588 Bool anyDeleted = False; 589 590 vg_assert(init_done); 591 592 VG_(debugLog)(1, "transtab", 593 "discard_translations(0x%llx, %lld) req by %s\n", 594 guest_start, range, who ); 595 596 for (sno = 0; sno < N_SECTORS; sno++) { 597 if (sectors[sno].tc == NULL) 598 continue; 599 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 600 if (sectors[sno].tt[i].status == InUse 601 && overlaps( guest_start, range, §ors[sno].tt[i].vge )) { 602 sectors[sno].tt[i].status = Deleted; 603 sectors[sno].tt_n_inuse--; 604 anyDeleted = True; 605 n_disc_count++; 606 n_disc_osize += vge_osize(§ors[sno].tt[i].vge); 607 } 608 } 609 } 610 611 if (anyDeleted) 612 invalidateFastCache(); 613} 614 615 616/*------------------------------------------------------------*/ 617/*--- Initialisation. ---*/ 618/*------------------------------------------------------------*/ 619 620void VG_(init_tt_tc) ( void ) 621{ 622 Int i, avg_codeszQ; 623 624 vg_assert(!init_done); 625 init_done = True; 626 627 /* Otherwise lots of things go wrong... */ 628 vg_assert(sizeof(ULong) == 8); 629 vg_assert(sizeof(Addr64) == 8); 630 631 if (VG_(clo_verbosity) > 2) 632 VG_(message)(Vg_DebugMsg, 633 "TT/TC: VG_(init_tt_tc) " 634 "(startup of code management)"); 635 636 /* Figure out how big each tc area should be. */ 637 avg_codeszQ = (VG_(details).avg_translation_sizeB + 7) / 8; 638 tc_sector_szQ = N_TTES_PER_SECTOR_USABLE * (1 + avg_codeszQ); 639 640 /* Ensure the calculated value is not way crazy. */ 641 vg_assert(tc_sector_szQ >= 2 * N_TTES_PER_SECTOR_USABLE); 642 vg_assert(tc_sector_szQ <= 50 * N_TTES_PER_SECTOR_USABLE); 643 644 /* Initialise the sectors */ 645 youngest_sector = 0; 646 for (i = 0; i < N_SECTORS; i++) { 647 sectors[i].tc = NULL; 648 sectors[i].tt = NULL; 649 sectors[i].tc_next = NULL; 650 sectors[i].tt_n_inuse = 0; 651 } 652 653 /* and the fast caches. */ 654 invalidateFastCache(); 655 656 if (VG_(clo_verbosity) > 2) { 657 VG_(message)(Vg_DebugMsg, 658 "TT/TC: cache: %d sectors of %d bytes each = %d total", 659 N_SECTORS, 8 * tc_sector_szQ, 660 N_SECTORS * 8 * tc_sector_szQ ); 661 VG_(message)(Vg_DebugMsg, 662 "TT/TC: table: %d total entries, max occupancy %d (%d%%)", 663 N_SECTORS * N_TTES_PER_SECTOR, 664 N_SECTORS * N_TTES_PER_SECTOR_USABLE, 665 SECTOR_TT_LIMIT_PERCENT ); 666 } 667 668 VG_(debugLog)(2, "transtab", 669 "cache: %d sectors of %d bytes each = %d total\n", 670 N_SECTORS, 8 * tc_sector_szQ, 671 N_SECTORS * 8 * tc_sector_szQ ); 672 VG_(debugLog)(2, "transtab", 673 "table: %d total entries, max occupancy %d (%d%%)\n", 674 N_SECTORS * N_TTES_PER_SECTOR, 675 N_SECTORS * N_TTES_PER_SECTOR_USABLE, 676 SECTOR_TT_LIMIT_PERCENT ); 677} 678 679 680/*------------------------------------------------------------*/ 681/*--- Printing out statistics. ---*/ 682/*------------------------------------------------------------*/ 683 684static ULong safe_idiv( ULong a, ULong b ) 685{ 686 return (b == 0 ? 0 : a / b); 687} 688 689UInt VG_(get_bbs_translated) ( void ) 690{ 691 return n_in_count; 692} 693 694void VG_(print_tt_tc_stats) ( void ) 695{ 696 VG_(message)(Vg_DebugMsg, 697 " tt/tc: %llu tt lookups requiring %llu probes", 698 n_full_lookups, n_lookup_probes ); 699 VG_(message)(Vg_DebugMsg, 700 " tt/tc: %llu fast-cache updates, %llu flushes", 701 n_fast_updates, n_fast_flushes ); 702 703 VG_(message)(Vg_DebugMsg, 704 "translate: new %lld " 705 "(%lld -> %lld; ratio %lld:10) [%lld scs]", 706 n_in_count, n_in_osize, n_in_tsize, 707 safe_idiv(10*n_in_tsize, n_in_osize), 708 n_in_sc_count); 709 VG_(message)(Vg_DebugMsg, 710 "translate: dumped %lld (%lld -> ?" "?)", 711 n_dump_count, n_dump_osize ); 712 VG_(message)(Vg_DebugMsg, 713 "translate: discarded %lld (%lld -> ?" "?)", 714 n_disc_count, n_disc_osize ); 715} 716 717/*------------------------------------------------------------*/ 718/*--- Printing out of profiling results. ---*/ 719/*------------------------------------------------------------*/ 720 721static ULong score ( TTEntry* tte ) 722{ 723 return ((ULong)tte->weight) * ((ULong)tte->count); 724} 725 726ULong VG_(get_BB_profile) ( BBProfEntry tops[], UInt n_tops ) 727{ 728 Int sno, i, r, s; 729 ULong score_total; 730 731 /* First, compute the total weighted count, and find the top N 732 ttes. tops contains pointers to the most-used n_tops blocks, in 733 descending order (viz, tops[0] is the highest scorer). */ 734 for (i = 0; i < n_tops; i++) { 735 tops[i].addr = 0; 736 tops[i].score = 0; 737 } 738 739 score_total = 0; 740 741 for (sno = 0; sno < N_SECTORS; sno++) { 742 if (sectors[sno].tc == NULL) 743 continue; 744 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 745 if (sectors[sno].tt[i].status != InUse) 746 continue; 747 score_total += score(§ors[sno].tt[i]); 748 /* Find the rank for sectors[sno].tt[i]. */ 749 r = n_tops-1; 750 while (True) { 751 if (r == -1) 752 break; 753 if (tops[r].addr == 0) { 754 r--; 755 continue; 756 } 757 if ( score(§ors[sno].tt[i]) > tops[r].score ) { 758 r--; 759 continue; 760 } 761 break; 762 } 763 r++; 764 vg_assert(r >= 0 && r <= n_tops); 765 /* This bb should be placed at r, and bbs above it shifted 766 upwards one slot. */ 767 if (r < n_tops) { 768 for (s = n_tops-1; s > r; s--) 769 tops[s] = tops[s-1]; 770 tops[r].addr = sectors[sno].tt[i].entry; 771 tops[r].score = score( §ors[sno].tt[i] ); 772 } 773 } 774 } 775 776 return score_total; 777} 778 779/*--------------------------------------------------------------------*/ 780/*--- end ---*/ 781/*--------------------------------------------------------------------*/ 782