m_transtab.c revision e4b0bf07b0ee0a18eacc5aba91686ab5fc1d327b
1 2/*--------------------------------------------------------------------*/ 3/*--- Management of the translation table and cache. ---*/ 4/*--- m_transtab.c ---*/ 5/*--------------------------------------------------------------------*/ 6 7/* 8 This file is part of Valgrind, a dynamic binary instrumentation 9 framework. 10 11 Copyright (C) 2000-2006 Julian Seward 12 jseward@acm.org 13 14 This program is free software; you can redistribute it and/or 15 modify it under the terms of the GNU General Public License as 16 published by the Free Software Foundation; either version 2 of the 17 License, or (at your option) any later version. 18 19 This program is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 General Public License for more details. 23 24 You should have received a copy of the GNU General Public License 25 along with this program; if not, write to the Free Software 26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 27 02111-1307, USA. 28 29 The GNU General Public License is contained in the file COPYING. 30*/ 31 32#include "pub_core_basics.h" 33#include "pub_core_debuglog.h" 34#include "pub_core_machine.h" // For VG(machine_get_VexArchInfo) 35#include "pub_core_libcbase.h" 36#include "pub_core_libcassert.h" 37#include "pub_core_libcprint.h" 38#include "pub_core_options.h" 39#include "pub_core_tooliface.h" // For VG_(details).avg_translation_sizeB 40#include "pub_core_transtab.h" 41#include "pub_core_aspacemgr.h" 42#include "pub_core_mallocfree.h" // VG_(out_of_memory_NORETURN) 43 44/* #define DEBUG_TRANSTAB */ 45 46 47/*-------------------------------------------------------------*/ 48/*--- Management of the FIFO-based translation table+cache. ---*/ 49/*-------------------------------------------------------------*/ 50 51/*------------------ CONSTANTS ------------------*/ 52 53/* Number of sectors the TC is divided into. If you need a larger 54 overall translation cache, increase this value. */ 55#define N_SECTORS 8 56 57/* Number of TC entries in each sector. This needs to be a prime 58 number to work properly, it must be <= 65535 (so that a TT index 59 fits in a UShort, leaving room for 0xFFFF(EC2TTE_DELETED) to denote 60 'deleted') and it is strongly recommended not to change this. 61 65521 is the largest prime <= 65535. */ 62#define N_TTES_PER_SECTOR /*30011*/ /*40009*/ 65521 63 64/* Because each sector contains a hash table of TTEntries, we need to 65 specify the maximum allowable loading, after which the sector is 66 deemed full. */ 67#define SECTOR_TT_LIMIT_PERCENT 80 68 69/* The sector is deemed full when this many entries are in it. */ 70#define N_TTES_PER_SECTOR_USABLE \ 71 ((N_TTES_PER_SECTOR * SECTOR_TT_LIMIT_PERCENT) / 100) 72 73/* Equivalence classes for fast address range deletion. There are 1 + 74 2^ECLASS_WIDTH bins. The highest one, ECLASS_MISC, describes an 75 address range which does not fall cleanly within any specific bin. 76 Note that ECLASS_SHIFT + ECLASS_WIDTH must be < 32. */ 77#define ECLASS_SHIFT 11 78#define ECLASS_WIDTH 8 79#define ECLASS_MISC (1 << ECLASS_WIDTH) 80#define ECLASS_N (1 + ECLASS_MISC) 81 82#define EC2TTE_DELETED 0xFFFF /* 16-bit special value */ 83 84 85/*------------------ TYPES ------------------*/ 86 87/* A translation-cache entry is two parts: 88 - The guest address of the first (entry) bb in the translation, 89 as a 64-bit word. 90 - One or more 64-bit words containing the code. 91 It is supposed to be 64-bit aligned. 92*/ 93/* 94typedef 95 struct { 96 Addr64 orig_addr; 97 ULong code[0]; 98 } 99 TCEntry; 100*/ 101 102/* A translation-table entry. This indicates precisely which areas of 103 guest code are included in the translation, and contains all other 104 auxiliary info too. */ 105typedef 106 struct { 107 /* Profiling only: the count and weight (arbitrary meaning) for 108 this translation. Weight is a property of the translation 109 itself and computed once when the translation is created. 110 Count is an entry count for the translation and is 111 incremented by 1 every time the translation is used, if we 112 are profiling. */ 113 UInt count; 114 UShort weight; 115 116 /* Status of the slot. Note, we need to be able to do lazy 117 deletion, hence the Deleted state. */ 118 enum { InUse, Deleted, Empty } status; 119 120 /* Pointer to the corresponding TCEntry (must be in the same 121 sector!) */ 122 ULong* tce; 123 124 /* This is the original guest address that purportedly is the 125 entry point of the translation. You might think that .entry 126 should be the same as .vge->base[0], and most of the time it 127 is. However, when doing redirections, that is not the case. 128 .vge must always correctly describe the guest code sections 129 from which this translation was made. However, .entry may or 130 may not be a lie, depending on whether or not we're doing 131 redirection. */ 132 Addr64 entry; 133 134 /* This structure describes precisely what ranges of guest code 135 the translation covers, so we can decide whether or not to 136 delete it when translations of a given address range are 137 invalidated. */ 138 VexGuestExtents vge; 139 140 /* Address range summary info: these are pointers back to 141 eclass[] entries in the containing Sector. Those entries in 142 turn point back here -- the two structures are mutually 143 redundant but both necessary to make fast deletions work. 144 The eclass info is similar to, and derived from, this entry's 145 'vge' field, but it is not the same */ 146 UShort n_tte2ec; // # tte2ec pointers (1 to 3) 147 UShort tte2ec_ec[3]; // for each, the eclass # 148 UInt tte2ec_ix[3]; // and the index within the eclass. 149 // for i in 0 .. n_tte2ec-1 150 // sec->ec2tte[ tte2ec_ec[i] ][ tte2ec_ix[i] ] 151 // should be the index 152 // of this TTEntry in the containing Sector's tt array. 153 } 154 TTEntry; 155 156 157/* Finally, a sector itself. Each sector contains an array of 158 TCEntries, which hold code, and an array of TTEntries, containing 159 all required administrative info. Profiling is supported using the 160 TTEntry .count and .weight fields, if required. Each sector is 161 independent in that no cross-sector references are allowed. 162 163 If the sector is not in use, all three pointers are NULL and 164 tt_n_inuse is zero. 165*/ 166typedef 167 struct { 168 /* The TCEntry area. Size of this depends on the average 169 translation size. We try and size it so it becomes full 170 precisely when this sector's translation table (tt) reaches 171 its load limit (SECTOR_TT_LIMIT_PERCENT). */ 172 ULong* tc; 173 174 /* The TTEntry array. This is a fixed size, always containing 175 exactly N_TTES_PER_SECTOR entries. */ 176 TTEntry* tt; 177 178 /* This points to the current allocation point in tc. */ 179 ULong* tc_next; 180 181 /* The count of tt entries with state InUse. */ 182 Int tt_n_inuse; 183 184 /* Expandable arrays of tt indices for each of the ECLASS_N 185 address range equivalence classes. These hold indices into 186 the containing sector's tt array, which in turn should point 187 back here. */ 188 Int ec2tte_size[ECLASS_N]; 189 Int ec2tte_used[ECLASS_N]; 190 UShort* ec2tte[ECLASS_N]; 191 } 192 Sector; 193 194 195/*------------------ DECLS ------------------*/ 196 197/* The root data structure is an array of sectors. The index of the 198 youngest sector is recorded, and new translations are put into that 199 sector. When it fills up, we move along to the next sector and 200 start to fill that up, wrapping around at the end of the array. 201 That way, once all N_TC_SECTORS have been bought into use for the 202 first time, and are full, we then re-use the oldest sector, 203 endlessly. 204 205 When running, youngest sector should be between >= 0 and < 206 N_TC_SECTORS. The initial -1 value indicates the TT/TC system is 207 not yet initialised. 208*/ 209static Sector sectors[N_SECTORS]; 210static Int youngest_sector = -1; 211 212/* The number of ULongs in each TCEntry area. This is computed once 213 at startup and does not change. */ 214static Int tc_sector_szQ; 215 216 217/* Fast helper for the TC. A direct-mapped cache which holds a 218 pointer to a TC entry which may or may not be the correct one, but 219 which we hope usually is. This array is referred to directly from 220 <arch>/dispatch.S. 221 222 Entries in tt_fast may point to any valid TC entry, regardless of 223 which sector it's in. Consequently we must be very careful to 224 invalidate this cache when TC entries are changed or disappear. 225 226 A special TCEntry -- bogus_tc_entry -- must be pointed at to cause 227 that cache entry to miss. This relies on the assumption that no 228 guest code actually has an address of 0x1. 229*/ 230/*global*/ ULong* VG_(tt_fast)[VG_TT_FAST_SIZE]; 231 232static ULong bogus_tc_entry = (Addr64)1; 233 234 235/* For profiling, we have a parallel array of pointers to .count 236 fields in TT entries. Again, these pointers must be invalidated 237 when translations disappear. A NULL pointer suffices to indicate 238 an unused slot. 239 240 tt_fast and tt_fastN change together: if tt_fast[i] points to 241 bogus_tc_entry then the corresponding tt_fastN[i] must be null. If 242 tt_fast[i] points to some TC entry somewhere, then tt_fastN[i] 243 *must* point to the .count field of the corresponding TT entry. 244 245 tt_fast and tt_fastN are referred to from assembly code 246 (dispatch.S). 247*/ 248/*global*/ UInt* VG_(tt_fastN)[VG_TT_FAST_SIZE]; 249 250 251/* Make sure we're not used before initialisation. */ 252static Bool init_done = False; 253 254 255/*------------------ STATS DECLS ------------------*/ 256 257/* Number of fast-cache updates and flushes done. */ 258ULong n_fast_flushes = 0; 259ULong n_fast_updates = 0; 260 261/* Number of full lookups done. */ 262ULong n_full_lookups = 0; 263ULong n_lookup_probes = 0; 264 265/* Number/osize/tsize of translations entered; also the number of 266 those for which self-checking was requested. */ 267ULong n_in_count = 0; 268ULong n_in_osize = 0; 269ULong n_in_tsize = 0; 270ULong n_in_sc_count = 0; 271 272/* Number/osize of translations discarded due to lack of space. */ 273ULong n_dump_count = 0; 274ULong n_dump_osize = 0; 275 276/* Number/osize of translations discarded due to requests to do so. */ 277ULong n_disc_count = 0; 278ULong n_disc_osize = 0; 279 280 281/*-------------------------------------------------------------*/ 282/*--- Address-range equivalence class stuff ---*/ 283/*-------------------------------------------------------------*/ 284 285/* Return equivalence class number for a range. */ 286 287static Int range_to_eclass ( Addr64 start, UInt len ) 288{ 289 UInt mask = (1 << ECLASS_WIDTH) - 1; 290 UInt lo = (UInt)start; 291 UInt hi = lo + len - 1; 292 UInt loBits = (lo >> ECLASS_SHIFT) & mask; 293 UInt hiBits = (hi >> ECLASS_SHIFT) & mask; 294 if (loBits == hiBits) { 295 vg_assert(loBits < ECLASS_N-1); 296 return loBits; 297 } else { 298 return ECLASS_MISC; 299 } 300} 301 302 303/* Calculates the equivalence class numbers for any VexGuestExtent. 304 These are written in *eclasses, which must be big enough to hold 3 305 Ints. The number written, between 1 and 3, is returned. The 306 eclasses are presented in order, and any duplicates are removed. 307*/ 308 309static 310Int vexGuestExtents_to_eclasses ( /*OUT*/Int* eclasses, 311 VexGuestExtents* vge ) 312{ 313# define SWAP(_lv1,_lv2) \ 314 do { Int t = _lv1; _lv1 = _lv2; _lv2 = t; } while (0) 315 316 Int i, j, n_ec, r; 317 318 vg_assert(vge->n_used >= 1 && vge->n_used <= 3); 319 320 n_ec = 0; 321 for (i = 0; i < vge->n_used; i++) { 322 r = range_to_eclass( vge->base[i], (UInt)vge->len[i] ); 323 if (r == ECLASS_MISC) 324 goto bad; 325 /* only add if we haven't already seen it */ 326 for (j = 0; j < n_ec; j++) 327 if (eclasses[j] == r) 328 break; 329 if (j == n_ec) 330 eclasses[n_ec++] = r; 331 } 332 333 if (n_ec == 1) 334 return 1; 335 336 if (n_ec == 2) { 337 /* sort */ 338 if (eclasses[0] > eclasses[1]) 339 SWAP(eclasses[0], eclasses[1]); 340 return 2; 341 } 342 343 if (n_ec == 3) { 344 /* sort */ 345 if (eclasses[0] > eclasses[2]) 346 SWAP(eclasses[0], eclasses[2]); 347 if (eclasses[0] > eclasses[1]) 348 SWAP(eclasses[0], eclasses[1]); 349 if (eclasses[1] > eclasses[2]) 350 SWAP(eclasses[1], eclasses[2]); 351 return 3; 352 } 353 354 /* NOTREACHED */ 355 vg_assert(0); 356 357 bad: 358 eclasses[0] = ECLASS_MISC; 359 return 1; 360 361# undef SWAP 362} 363 364 365/* Add tteno to the set of entries listed for equivalence class ec in 366 this sector. Returns used location in eclass array. */ 367 368static 369UInt addEClassNo ( /*MOD*/Sector* sec, Int ec, UShort tteno ) 370{ 371 Int old_sz, new_sz, i, r; 372 UShort *old_ar, *new_ar; 373 374 vg_assert(ec >= 0 && ec < ECLASS_N); 375 vg_assert(tteno < N_TTES_PER_SECTOR); 376 377 if (0) VG_(printf)("ec %d gets %d\n", ec, (Int)tteno); 378 379 if (sec->ec2tte_used[ec] >= sec->ec2tte_size[ec]) { 380 381 vg_assert(sec->ec2tte_used[ec] == sec->ec2tte_size[ec]); 382 383 old_sz = sec->ec2tte_size[ec]; 384 old_ar = sec->ec2tte[ec]; 385 new_sz = old_sz==0 ? 8 : old_sz<64 ? 2*old_sz : (3*old_sz)/2; 386 new_ar = VG_(arena_malloc)(VG_AR_TTAUX, new_sz * sizeof(UShort)); 387 for (i = 0; i < old_sz; i++) 388 new_ar[i] = old_ar[i]; 389 if (old_ar) 390 VG_(arena_free)(VG_AR_TTAUX, old_ar); 391 sec->ec2tte_size[ec] = new_sz; 392 sec->ec2tte[ec] = new_ar; 393 394 if (0) VG_(printf)("expand ec %d to %d\n", ec, new_sz); 395 } 396 397 /* Common case */ 398 r = sec->ec2tte_used[ec]++; 399 vg_assert(r >= 0 && r < sec->ec2tte_size[ec]); 400 sec->ec2tte[ec][r] = tteno; 401 return (UInt)r; 402} 403 404 405/* 'vge' is being added to 'sec' at TT entry 'tteno'. Add appropriate 406 eclass entries to 'sec'. */ 407 408static 409void upd_eclasses_after_add ( /*MOD*/Sector* sec, Int tteno ) 410{ 411 Int i, r, eclasses[3]; 412 TTEntry* tte; 413 vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR); 414 415 tte = &sec->tt[tteno]; 416 r = vexGuestExtents_to_eclasses( eclasses, &tte->vge ); 417 418 vg_assert(r >= 1 && r <= 3); 419 tte->n_tte2ec = r; 420 421 for (i = 0; i < r; i++) { 422 tte->tte2ec_ec[i] = eclasses[i]; 423 tte->tte2ec_ix[i] = addEClassNo( sec, eclasses[i], (UShort)tteno ); 424 } 425} 426 427 428/* Check the eclass info in 'sec' to ensure it is consistent. Returns 429 True if OK, False if something's not right. Expensive. */ 430 431static Bool sanity_check_eclasses_in_sector ( Sector* sec ) 432{ 433# define BAD(_str) do { whassup = (_str); goto bad; } while (0) 434 435 HChar* whassup = NULL; 436 Int i, j, k, n, ec_num, ec_idx; 437 TTEntry* tte; 438 UShort tteno; 439 ULong* tce; 440 441 /* Basic checks on this sector */ 442 if (sec->tt_n_inuse < 0 || sec->tt_n_inuse > N_TTES_PER_SECTOR_USABLE) 443 BAD("invalid sec->tt_n_inuse"); 444 tce = sec->tc_next; 445 if (tce < &sec->tc[0] || tce > &sec->tc[tc_sector_szQ]) 446 BAD("sec->tc_next points outside tc"); 447 448 /* For each eclass ... */ 449 for (i = 0; i < ECLASS_N; i++) { 450 if (sec->ec2tte_size[i] == 0 && sec->ec2tte[i] != NULL) 451 BAD("ec2tte_size/ec2tte mismatch(1)"); 452 if (sec->ec2tte_size[i] != 0 && sec->ec2tte[i] == NULL) 453 BAD("ec2tte_size/ec2tte mismatch(2)"); 454 if (sec->ec2tte_used[i] < 0 455 || sec->ec2tte_used[i] > sec->ec2tte_size[i]) 456 BAD("implausible ec2tte_used"); 457 if (sec->ec2tte_used[i] == 0) 458 continue; 459 460 /* For each tt reference in each eclass .. ensure the reference 461 is to a valid tt entry, and that the entry's address ranges 462 really include this eclass. */ 463 464 for (j = 0; j < sec->ec2tte_used[i]; j++) { 465 tteno = sec->ec2tte[i][j]; 466 if (tteno == EC2TTE_DELETED) 467 continue; 468 if (tteno >= N_TTES_PER_SECTOR) 469 BAD("implausible tteno"); 470 tte = &sec->tt[tteno]; 471 if (tte->status != InUse) 472 BAD("tteno points to non-inuse tte"); 473 if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3) 474 BAD("tte->n_tte2ec out of range"); 475 /* Exactly least one of tte->eclasses[0 .. tte->n_eclasses-1] 476 must equal i. Inspect tte's eclass info. */ 477 n = 0; 478 for (k = 0; k < tte->n_tte2ec; k++) { 479 if (k < tte->n_tte2ec-1 480 && tte->tte2ec_ec[k] >= tte->tte2ec_ec[k+1]) 481 BAD("tte->tte2ec_ec[..] out of order"); 482 ec_num = tte->tte2ec_ec[k]; 483 if (ec_num < 0 || ec_num >= ECLASS_N) 484 BAD("tte->tte2ec_ec[..] out of range"); 485 if (ec_num != i) 486 continue; 487 ec_idx = tte->tte2ec_ix[k]; 488 if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[i]) 489 BAD("tte->tte2ec_ix[..] out of range"); 490 if (ec_idx == j) 491 n++; 492 } 493 if (n != 1) 494 BAD("tteno does not point back at eclass"); 495 } 496 } 497 498 /* That establishes that for each forward pointer from TTEntrys 499 there is a corresponding backward pointer from the eclass[] 500 arrays. However, it doesn't rule out the possibility of other, 501 bogus pointers in the eclass[] arrays. So do those similarly: 502 scan through them and check the TTEntryies they point at point 503 back. */ 504 505 for (i = 0; i < N_TTES_PER_SECTOR_USABLE; i++) { 506 507 tte = &sec->tt[i]; 508 if (tte->status == Empty || tte->status == Deleted) { 509 if (tte->n_tte2ec != 0) 510 BAD("tte->n_eclasses nonzero for unused tte"); 511 continue; 512 } 513 514 vg_assert(tte->status == InUse); 515 516 if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3) 517 BAD("tte->n_eclasses out of range(2)"); 518 519 for (j = 0; j < tte->n_tte2ec; j++) { 520 ec_num = tte->tte2ec_ec[j]; 521 if (ec_num < 0 || ec_num >= ECLASS_N) 522 BAD("tte->eclass[..] out of range"); 523 ec_idx = tte->tte2ec_ix[j]; 524 if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[ec_num]) 525 BAD("tte->ec_idx[..] out of range(2)"); 526 if (sec->ec2tte[ec_num][ec_idx] != i) 527 BAD("ec2tte does not point back to tte"); 528 } 529 } 530 531 return True; 532 533 bad: 534 if (whassup) 535 VG_(debugLog)(0, "transtab", "eclass sanity fail: %s\n", whassup); 536 537# if 0 538 VG_(printf)("eclass = %d\n", i); 539 VG_(printf)("tteno = %d\n", (Int)tteno); 540 switch (tte->status) { 541 case InUse: VG_(printf)("InUse\n"); break; 542 case Deleted: VG_(printf)("Deleted\n"); break; 543 case Empty: VG_(printf)("Empty\n"); break; 544 } 545 if (tte->status != Empty) { 546 for (k = 0; k < tte->vge.n_used; k++) 547 VG_(printf)("0x%llx %d\n", tte->vge.base[k], 548 (Int)tte->vge.len[k]); 549 } 550# endif 551 552 return False; 553 554# undef BAD 555} 556 557 558/* Sanity check absolutely everything. True == check passed. */ 559 560/* forward */ 561static Bool sanity_check_redir_tt_tc ( void ); 562 563static Bool sanity_check_all_sectors ( void ) 564{ 565 Int sno; 566 Bool sane; 567 Sector* sec; 568 for (sno = 0; sno < N_SECTORS; sno++) { 569 sec = §ors[sno]; 570 if (sec->tc == NULL) 571 continue; 572 sane = sanity_check_eclasses_in_sector( sec ); 573 if (!sane) 574 return False; 575 } 576 if (!sanity_check_redir_tt_tc() ) 577 return False; 578 return True; 579} 580 581 582/*-------------------------------------------------------------*/ 583/*--- Add/find translations ---*/ 584/*-------------------------------------------------------------*/ 585 586static UInt vge_osize ( VexGuestExtents* vge ) 587{ 588 UInt i, n = 0; 589 for (i = 0; i < vge->n_used; i++) 590 n += (UInt)vge->len[i]; 591 return n; 592} 593 594static Bool isValidSector ( Int sector ) 595{ 596 if (sector < 0 || sector >= N_SECTORS) 597 return False; 598 return True; 599} 600 601static inline UInt HASH_TT ( Addr64 key ) 602{ 603 UInt kHi = (UInt)(key >> 32); 604 UInt kLo = (UInt)key; 605 UInt k32 = kHi ^ kLo; 606 UInt ror = 7; 607 if (ror > 0) 608 k32 = (k32 >> ror) | (k32 << (32-ror)); 609 return k32 % N_TTES_PER_SECTOR; 610} 611 612static void setFastCacheEntry ( Addr64 key, ULong* tce, UInt* count ) 613{ 614 UInt cno = (UInt)VG_TT_FAST_HASH(key); 615 VG_(tt_fast)[cno] = tce; 616 VG_(tt_fastN)[cno] = count; 617 n_fast_updates++; 618} 619 620static void invalidateFastCache ( void ) 621{ 622 UInt j; 623 /* This loop is popular enough to make it worth unrolling a 624 bit, at least on ppc32. */ 625 vg_assert(VG_TT_FAST_SIZE > 0 && (VG_TT_FAST_SIZE % 4) == 0); 626 for (j = 0; j < VG_TT_FAST_SIZE; j += 4) { 627 VG_(tt_fast)[j+0] = &bogus_tc_entry; 628 VG_(tt_fast)[j+1] = &bogus_tc_entry; 629 VG_(tt_fast)[j+2] = &bogus_tc_entry; 630 VG_(tt_fast)[j+3] = &bogus_tc_entry; 631 VG_(tt_fastN)[j+0] = NULL; 632 VG_(tt_fastN)[j+1] = NULL; 633 VG_(tt_fastN)[j+2] = NULL; 634 VG_(tt_fastN)[j+3] = NULL; 635 } 636 vg_assert(j == VG_TT_FAST_SIZE); 637 n_fast_flushes++; 638} 639 640static void initialiseSector ( Int sno ) 641{ 642 Int i; 643 SysRes sres; 644 Sector* sec; 645 vg_assert(isValidSector(sno)); 646 647 sec = §ors[sno]; 648 649 if (sec->tc == NULL) { 650 651 /* Sector has never been used before. Need to allocate tt and 652 tc. */ 653 vg_assert(sec->tt == NULL); 654 vg_assert(sec->tc_next == NULL); 655 vg_assert(sec->tt_n_inuse == 0); 656 for (i = 0; i < ECLASS_N; i++) { 657 vg_assert(sec->ec2tte_size[i] == 0); 658 vg_assert(sec->ec2tte_used[i] == 0); 659 vg_assert(sec->ec2tte[i] == NULL); 660 } 661 662 VG_(debugLog)(1,"transtab", "allocate sector %d\n", sno); 663 664 sres = VG_(am_mmap_anon_float_valgrind)( 8 * tc_sector_szQ ); 665 if (sres.isError) { 666 VG_(out_of_memory_NORETURN)("initialiseSector(TC)", 667 8 * tc_sector_szQ ); 668 /*NOTREACHED*/ 669 } 670 sec->tc = (ULong*)sres.val; 671 672 sres = VG_(am_mmap_anon_float_valgrind) 673 ( N_TTES_PER_SECTOR * sizeof(TTEntry) ); 674 if (sres.isError) { 675 VG_(out_of_memory_NORETURN)("initialiseSector(TT)", 676 N_TTES_PER_SECTOR * sizeof(TTEntry) ); 677 /*NOTREACHED*/ 678 } 679 sec->tt = (TTEntry*)sres.val; 680 681 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 682 sec->tt[i].status = Empty; 683 sec->tt[i].n_tte2ec = 0; 684 } 685 686 if (VG_(clo_verbosity) > 2) 687 VG_(message)(Vg_DebugMsg, "TT/TC: initialise sector %d", sno); 688 689 } else { 690 691 /* Sector has been used before. Dump the old contents. */ 692 VG_(debugLog)(1,"transtab", "recycle sector %d\n", sno); 693 vg_assert(sec->tt != NULL); 694 vg_assert(sec->tc_next != NULL); 695 n_dump_count += sec->tt_n_inuse; 696 697 /* Visit each just-about-to-be-abandoned translation. */ 698 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 699 if (sec->tt[i].status == InUse) { 700 vg_assert(sec->tt[i].n_tte2ec >= 1); 701 vg_assert(sec->tt[i].n_tte2ec <= 3); 702 n_dump_osize += vge_osize(&sec->tt[i].vge); 703 /* Tell the tool too. */ 704 if (VG_(needs).basic_block_discards) { 705 VG_TDICT_CALL( tool_discard_basic_block_info, 706 sec->tt[i].entry, 707 sec->tt[i].vge ); 708 } 709 } else { 710 vg_assert(sec->tt[i].n_tte2ec == 0); 711 } 712 sec->tt[i].status = Empty; 713 sec->tt[i].n_tte2ec = 0; 714 } 715 716 /* Free up the eclass structures. */ 717 for (i = 0; i < ECLASS_N; i++) { 718 if (sec->ec2tte_size[i] == 0) { 719 vg_assert(sec->ec2tte_used[i] == 0); 720 vg_assert(sec->ec2tte[i] == NULL); 721 } else { 722 vg_assert(sec->ec2tte[i] != NULL); 723 VG_(arena_free)(VG_AR_TTAUX, sec->ec2tte[i]); 724 sec->ec2tte[i] = NULL; 725 sec->ec2tte_size[i] = 0; 726 sec->ec2tte_used[i] = 0; 727 } 728 } 729 730 if (VG_(clo_verbosity) > 2) 731 VG_(message)(Vg_DebugMsg, "TT/TC: recycle sector %d", sno); 732 } 733 734 sec->tc_next = sec->tc; 735 sec->tt_n_inuse = 0; 736 737 invalidateFastCache(); 738} 739 740static void invalidate_icache ( void *ptr, Int nbytes ) 741{ 742# if defined(VGA_ppc32) || defined(VGA_ppc64) 743 Addr startaddr = (Addr) ptr; 744 Addr endaddr = startaddr + nbytes; 745 Addr cls; 746 Addr addr; 747 VexArchInfo vai; 748 749 VG_(machine_get_VexArchInfo)( NULL, &vai ); 750 cls = vai.ppc_cache_line_szB; 751 752 /* Stay sane .. */ 753 vg_assert(cls == 32 || cls == 128); 754 755 startaddr &= ~(cls - 1); 756 for (addr = startaddr; addr < endaddr; addr += cls) 757 asm volatile("dcbst 0,%0" : : "r" (addr)); 758 asm volatile("sync"); 759 for (addr = startaddr; addr < endaddr; addr += cls) 760 asm volatile("icbi 0,%0" : : "r" (addr)); 761 asm volatile("sync; isync"); 762 763# elif defined(VGA_x86) 764 /* no need to do anything, hardware provides coherence */ 765 766# elif defined(VGA_amd64) 767 /* no need to do anything, hardware provides coherence */ 768 769# else 770# error "Unknown ARCH" 771# endif 772} 773 774 775/* Add a translation of vge to TT/TC. The translation is temporarily 776 in code[0 .. code_len-1]. 777 778 pre: youngest_sector points to a valid (although possibly full) 779 sector. 780*/ 781void VG_(add_to_transtab)( VexGuestExtents* vge, 782 Addr64 entry, 783 AddrH code, 784 UInt code_len, 785 Bool is_self_checking ) 786{ 787 Int tcAvailQ, reqdQ, y, i; 788 ULong *tce, *tce2; 789 UChar* srcP; 790 UChar* dstP; 791 792 vg_assert(init_done); 793 vg_assert(vge->n_used >= 1 && vge->n_used <= 3); 794 vg_assert(code_len > 0 && code_len < 20000); 795 796 if (0) 797 VG_(printf)("add_to_transtab(entry = 0x%llx, len = %d)\n", 798 entry, code_len); 799 800 n_in_count++; 801 n_in_tsize += code_len; 802 n_in_osize += vge_osize(vge); 803 if (is_self_checking) 804 n_in_sc_count++; 805 806 y = youngest_sector; 807 vg_assert(isValidSector(y)); 808 809 if (sectors[y].tc == NULL) 810 initialiseSector(y); 811 812 /* Try putting the translation in this sector. */ 813 reqdQ = 1 + ((code_len + 7) >> 3); 814 815 /* Will it fit in tc? */ 816 tcAvailQ = ((ULong*)(§ors[y].tc[tc_sector_szQ])) 817 - ((ULong*)(sectors[y].tc_next)); 818 vg_assert(tcAvailQ >= 0); 819 vg_assert(tcAvailQ <= tc_sector_szQ); 820 821 if (tcAvailQ < reqdQ 822 || sectors[y].tt_n_inuse >= N_TTES_PER_SECTOR_USABLE) { 823 /* No. So move on to the next sector. Either it's never been 824 used before, in which case it will get its tt/tc allocated 825 now, or it has been used before, in which case it is set to be 826 empty, hence throwing out the oldest sector. */ 827 vg_assert(tc_sector_szQ > 0); 828 VG_(debugLog)(1,"transtab", 829 "declare sector %d full " 830 "(TT loading %2d%%, TC loading %2d%%)\n", 831 y, 832 (100 * sectors[y].tt_n_inuse) 833 / N_TTES_PER_SECTOR, 834 (100 * (tc_sector_szQ - tcAvailQ)) 835 / tc_sector_szQ); 836 youngest_sector++; 837 if (youngest_sector >= N_SECTORS) 838 youngest_sector = 0; 839 y = youngest_sector; 840 initialiseSector(y); 841 } 842 843 /* Be sure ... */ 844 tcAvailQ = ((ULong*)(§ors[y].tc[tc_sector_szQ])) 845 - ((ULong*)(sectors[y].tc_next)); 846 vg_assert(tcAvailQ >= 0); 847 vg_assert(tcAvailQ <= tc_sector_szQ); 848 vg_assert(tcAvailQ >= reqdQ); 849 vg_assert(sectors[y].tt_n_inuse < N_TTES_PER_SECTOR_USABLE); 850 vg_assert(sectors[y].tt_n_inuse >= 0); 851 852 /* Copy into tc. */ 853 tce = sectors[y].tc_next; 854 vg_assert(tce >= §ors[y].tc[0]); 855 vg_assert(tce <= §ors[y].tc[tc_sector_szQ]); 856 857 tce[0] = entry; 858 dstP = (UChar*)(&tce[1]); 859 srcP = (UChar*)code; 860 for (i = 0; i < code_len; i++) 861 dstP[i] = srcP[i]; 862 sectors[y].tc_next += reqdQ; 863 sectors[y].tt_n_inuse++; 864 865 invalidate_icache( dstP, code_len ); 866 867 /* more paranoia */ 868 tce2 = sectors[y].tc_next; 869 vg_assert(tce2 >= §ors[y].tc[0]); 870 vg_assert(tce2 <= §ors[y].tc[tc_sector_szQ]); 871 872 /* Find an empty tt slot, and use it. There must be such a slot 873 since tt is never allowed to get completely full. */ 874 i = HASH_TT(entry); 875 vg_assert(i >= 0 && i < N_TTES_PER_SECTOR); 876 while (True) { 877 if (sectors[y].tt[i].status == Empty 878 || sectors[y].tt[i].status == Deleted) 879 break; 880 i++; 881 if (i >= N_TTES_PER_SECTOR) 882 i = 0; 883 } 884 885 sectors[y].tt[i].status = InUse; 886 sectors[y].tt[i].tce = tce; 887 sectors[y].tt[i].count = 0; 888 sectors[y].tt[i].weight = 1; 889 sectors[y].tt[i].vge = *vge; 890 sectors[y].tt[i].entry = entry; 891 892 /* Update the fast-cache. */ 893 setFastCacheEntry( entry, tce, §ors[y].tt[i].count ); 894 895 /* Note the eclass numbers for this translation. */ 896 upd_eclasses_after_add( §ors[y], i ); 897} 898 899 900/* Search for the translation of the given guest address. If 901 requested, a successful search can also cause the fast-caches to be 902 updated. 903*/ 904Bool VG_(search_transtab) ( /*OUT*/AddrH* result, 905 Addr64 guest_addr, 906 Bool upd_cache ) 907{ 908 Int i, j, k, kstart, sno; 909 910 vg_assert(init_done); 911 /* Find the initial probe point just once. It will be the same in 912 all sectors and avoids multiple expensive % operations. */ 913 n_full_lookups++; 914 k = -1; 915 kstart = HASH_TT(guest_addr); 916 vg_assert(kstart >= 0 && kstart < N_TTES_PER_SECTOR); 917 918 /* Search in all the sectors. Although the order should not matter, 919 it might be most efficient to search in the order youngest to 920 oldest. */ 921 sno = youngest_sector; 922 for (i = 0; i < N_SECTORS; i++) { 923 924 if (sectors[sno].tc == NULL) 925 goto notfound; /* sector not in use. */ 926 927 k = kstart; 928 for (j = 0; j < N_TTES_PER_SECTOR; j++) { 929 n_lookup_probes++; 930 if (sectors[sno].tt[k].status == InUse 931 && sectors[sno].tt[k].entry == guest_addr) { 932 /* found it */ 933 if (upd_cache) 934 setFastCacheEntry( 935 guest_addr, sectors[sno].tt[k].tce, 936 §ors[sno].tt[k].count ); 937 if (result) 938 *result = sizeof(Addr64) + (AddrH)sectors[sno].tt[k].tce; 939 return True; 940 } 941 if (sectors[sno].tt[k].status == Empty) 942 break; /* not found in this sector */ 943 k++; 944 if (k == N_TTES_PER_SECTOR) 945 k = 0; 946 } 947 948 /* If we fall off the end, all entries are InUse and not 949 matching, or Deleted. In any case we did not find it in this 950 sector. */ 951 952 notfound: 953 /* move to the next oldest sector */ 954 sno = sno==0 ? (N_SECTORS-1) : (sno-1); 955 } 956 957 /* Not found in any sector. */ 958 return False; 959} 960 961 962/*-------------------------------------------------------------*/ 963/*--- Delete translations. ---*/ 964/*-------------------------------------------------------------*/ 965 966/* forward */ 967static void unredir_discard_translations( Addr64, ULong ); 968 969/* Stuff for deleting translations which intersect with a given 970 address range. Unfortunately, to make this run at a reasonable 971 speed, it is complex. */ 972 973static inline 974Bool overlap1 ( Addr64 s1, ULong r1, Addr64 s2, ULong r2 ) 975{ 976 Addr64 e1 = s1 + r1 - 1ULL; 977 Addr64 e2 = s2 + r2 - 1ULL; 978 if (e1 < s2 || e2 < s1) 979 return False; 980 return True; 981} 982 983static inline 984Bool overlaps ( Addr64 start, ULong range, VexGuestExtents* vge ) 985{ 986 if (overlap1(start, range, vge->base[0], (UInt)vge->len[0])) 987 return True; 988 if (vge->n_used < 2) 989 return False; 990 if (overlap1(start, range, vge->base[1], (UInt)vge->len[1])) 991 return True; 992 if (vge->n_used < 3) 993 return False; 994 if (overlap1(start, range, vge->base[2], (UInt)vge->len[2])) 995 return True; 996 return False; 997} 998 999 1000/* Delete a tt entry, and update all the eclass data accordingly. */ 1001 1002static void delete_tte ( /*MOD*/Sector* sec, Int tteno ) 1003{ 1004 Int i, ec_num, ec_idx; 1005 TTEntry* tte; 1006 1007 vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR); 1008 tte = &sec->tt[tteno]; 1009 vg_assert(tte->status == InUse); 1010 vg_assert(tte->n_tte2ec >= 1 && tte->n_tte2ec <= 3); 1011 1012 /* Deal with the ec-to-tte links first. */ 1013 for (i = 0; i < tte->n_tte2ec; i++) { 1014 ec_num = (Int)tte->tte2ec_ec[i]; 1015 ec_idx = tte->tte2ec_ix[i]; 1016 vg_assert(ec_num >= 0 && ec_num < ECLASS_N); 1017 vg_assert(ec_idx >= 0); 1018 vg_assert(ec_idx < sec->ec2tte_used[ec_num]); 1019 /* Assert that the two links point at each other. */ 1020 vg_assert(sec->ec2tte[ec_num][ec_idx] == (UShort)tteno); 1021 /* "delete" the pointer back to here. */ 1022 sec->ec2tte[ec_num][ec_idx] = EC2TTE_DELETED; 1023 } 1024 1025 /* Now fix up this TTEntry. */ 1026 tte->status = Deleted; 1027 tte->n_tte2ec = 0; 1028 1029 /* Stats .. */ 1030 sec->tt_n_inuse--; 1031 n_disc_count++; 1032 n_disc_osize += vge_osize(&tte->vge); 1033 1034 /* Tell the tool too. */ 1035 if (VG_(needs).basic_block_discards) { 1036 VG_TDICT_CALL( tool_discard_basic_block_info, 1037 tte->entry, 1038 tte->vge ); 1039 } 1040} 1041 1042 1043/* Delete translations from sec which intersect specified range, but 1044 only consider translations in the specified eclass. */ 1045 1046static 1047Bool delete_translations_in_sector_eclass ( /*MOD*/Sector* sec, 1048 Addr64 guest_start, ULong range, 1049 Int ec ) 1050{ 1051 Int i; 1052 UShort tteno; 1053 Bool anyDeld = False; 1054 TTEntry* tte; 1055 1056 vg_assert(ec >= 0 && ec < ECLASS_N); 1057 1058 for (i = 0; i < sec->ec2tte_used[ec]; i++) { 1059 1060 tteno = sec->ec2tte[ec][i]; 1061 if (tteno == EC2TTE_DELETED) { 1062 /* already deleted */ 1063 continue; 1064 } 1065 1066 vg_assert(tteno < N_TTES_PER_SECTOR); 1067 1068 tte = &sec->tt[tteno]; 1069 vg_assert(tte->status == InUse); 1070 1071 if (overlaps( guest_start, range, &tte->vge )) { 1072 anyDeld = True; 1073 delete_tte( sec, (Int)tteno ); 1074 } 1075 1076 } 1077 1078 return anyDeld; 1079} 1080 1081 1082/* Delete translations from sec which intersect specified range, the 1083 slow way, by inspecting all translations in sec. */ 1084 1085static 1086Bool delete_translations_in_sector ( /*MOD*/Sector* sec, 1087 Addr64 guest_start, ULong range ) 1088{ 1089 Int i; 1090 Bool anyDeld = False; 1091 1092 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 1093 if (sec->tt[i].status == InUse 1094 && overlaps( guest_start, range, &sec->tt[i].vge )) { 1095 anyDeld = True; 1096 delete_tte( sec, i ); 1097 } 1098 } 1099 1100 return anyDeld; 1101} 1102 1103 1104void VG_(discard_translations) ( Addr64 guest_start, ULong range, 1105 HChar* who ) 1106{ 1107 Sector* sec; 1108 Int sno, ec; 1109 Bool anyDeleted = False; 1110 1111 vg_assert(init_done); 1112 1113 VG_(debugLog)(2, "transtab", 1114 "discard_translations(0x%llx, %lld) req by %s\n", 1115 guest_start, range, who ); 1116 1117 /* Pre-deletion sanity check */ 1118 if (VG_(clo_sanity_level >= 4)) { 1119 Bool sane = sanity_check_all_sectors(); 1120 vg_assert(sane); 1121 } 1122 1123 if (range == 0) 1124 return; 1125 1126 /* There are two different ways to do this. 1127 1128 If the range fits within a single address-range equivalence 1129 class, as will be the case for a cache line sized invalidation, 1130 then we only have to inspect the set of translations listed in 1131 that equivalence class, and also in the "sin-bin" equivalence 1132 class ECLASS_MISC. 1133 1134 Otherwise, the invalidation is of a larger range and probably 1135 results from munmap. In this case it's (probably!) faster just 1136 to inspect all translations, dump those we don't want, and 1137 regenerate the equivalence class information (since modifying it 1138 in-situ is even more expensive). 1139 */ 1140 1141 /* First off, figure out if the range falls within a single class, 1142 and if so which one. */ 1143 1144 ec = ECLASS_MISC; 1145 if (range < (1ULL << ECLASS_SHIFT)) 1146 ec = range_to_eclass( guest_start, (UInt)range ); 1147 1148 /* if ec is ECLASS_MISC then we aren't looking at just a single 1149 class, so use the slow scheme. Else use the fast scheme, 1150 examining 'ec' and ECLASS_MISC. */ 1151 1152 if (ec != ECLASS_MISC) { 1153 1154 VG_(debugLog)(2, "transtab", 1155 " FAST, ec = %d\n", ec); 1156 1157 /* Fast scheme */ 1158 vg_assert(ec >= 0 && ec < ECLASS_MISC); 1159 1160 for (sno = 0; sno < N_SECTORS; sno++) { 1161 sec = §ors[sno]; 1162 if (sec->tc == NULL) 1163 continue; 1164 anyDeleted |= delete_translations_in_sector_eclass( 1165 sec, guest_start, range, ec ); 1166 anyDeleted |= delete_translations_in_sector_eclass( 1167 sec, guest_start, range, ECLASS_MISC ); 1168 } 1169 1170 } else { 1171 1172 /* slow scheme */ 1173 1174 VG_(debugLog)(2, "transtab", 1175 " SLOW, ec = %d\n", ec); 1176 1177 for (sno = 0; sno < N_SECTORS; sno++) { 1178 sec = §ors[sno]; 1179 if (sec->tc == NULL) 1180 continue; 1181 anyDeleted |= delete_translations_in_sector( 1182 sec, guest_start, range ); 1183 } 1184 1185 } 1186 1187 if (anyDeleted) 1188 invalidateFastCache(); 1189 1190 /* don't forget the no-redir cache */ 1191 unredir_discard_translations( guest_start, range ); 1192 1193 /* Post-deletion sanity check */ 1194 if (VG_(clo_sanity_level >= 4)) { 1195 Int i; 1196 TTEntry* tte; 1197 Bool sane = sanity_check_all_sectors(); 1198 vg_assert(sane); 1199 /* But now, also check the requested address range isn't 1200 present anywhere. */ 1201 for (sno = 0; sno < N_SECTORS; sno++) { 1202 sec = §ors[sno]; 1203 if (sec->tc == NULL) 1204 continue; 1205 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 1206 tte = &sec->tt[i]; 1207 if (tte->status != InUse) 1208 continue; 1209 vg_assert(!overlaps( guest_start, range, &tte->vge )); 1210 } 1211 } 1212 } 1213} 1214 1215 1216/*------------------------------------------------------------*/ 1217/*--- AUXILIARY: the unredirected TT/TC ---*/ 1218/*------------------------------------------------------------*/ 1219 1220/* A very simple translation cache which holds a small number of 1221 unredirected translations. This is completely independent of the 1222 main tt/tc structures. When unredir_tc or unredir_tt becomes full, 1223 both structures are simply dumped and we start over. 1224 1225 Since these translations are unredirected, the search key is (by 1226 definition) the first address entry in the .vge field. */ 1227 1228/* Sized to hold 500 translations of average size 1000 bytes. */ 1229 1230#define UNREDIR_SZB 1000 1231 1232#define N_UNREDIR_TT 500 1233#define N_UNREDIR_TCQ (N_UNREDIR_TT * UNREDIR_SZB / sizeof(ULong)) 1234 1235typedef 1236 struct { 1237 VexGuestExtents vge; 1238 Addr hcode; 1239 Bool inUse; 1240 } 1241 UTCEntry; 1242 1243/* We just allocate forwards in _tc, never deleting. */ 1244static ULong *unredir_tc; 1245static Int unredir_tc_used = N_UNREDIR_TCQ; 1246 1247/* Slots in _tt can come into use and out again (.inUse). 1248 Nevertheless _tt_highwater is maintained so that invalidations 1249 don't have to scan all the slots when only a few are in use. 1250 _tt_highwater holds the index of the highest ever allocated 1251 slot. */ 1252static UTCEntry unredir_tt[N_UNREDIR_TT]; 1253static Int unredir_tt_highwater; 1254 1255 1256static void init_unredir_tt_tc ( void ) 1257{ 1258 Int i; 1259 if (unredir_tc == NULL) { 1260 SysRes sres = VG_(am_mmap_anon_float_valgrind)( N_UNREDIR_TT * UNREDIR_SZB ); 1261 if (sres.isError) { 1262 VG_(out_of_memory_NORETURN)("init_unredir_tt_tc", N_UNREDIR_TT * UNREDIR_SZB); 1263 /*NOTREACHED*/ 1264 } 1265 unredir_tc = (ULong *)sres.val; 1266 } 1267 unredir_tc_used = 0; 1268 for (i = 0; i < N_UNREDIR_TT; i++) 1269 unredir_tt[i].inUse = False; 1270 unredir_tt_highwater = -1; 1271} 1272 1273/* Do a sanity check; return False on failure. */ 1274static Bool sanity_check_redir_tt_tc ( void ) 1275{ 1276 Int i; 1277 if (unredir_tt_highwater < -1) return False; 1278 if (unredir_tt_highwater >= N_UNREDIR_TT) return False; 1279 1280 for (i = unredir_tt_highwater+1; i < N_UNREDIR_TT; i++) 1281 if (unredir_tt[i].inUse) 1282 return False; 1283 1284 if (unredir_tc_used < 0) return False; 1285 if (unredir_tc_used > N_UNREDIR_TCQ) return False; 1286 1287 return True; 1288} 1289 1290 1291/* Add an UNREDIRECTED translation of vge to TT/TC. The translation 1292 is temporarily in code[0 .. code_len-1]. 1293*/ 1294void VG_(add_to_unredir_transtab)( VexGuestExtents* vge, 1295 Addr64 entry, 1296 AddrH code, 1297 UInt code_len, 1298 Bool is_self_checking ) 1299{ 1300 Int i, j, code_szQ; 1301 HChar *srcP, *dstP; 1302 1303 vg_assert(sanity_check_redir_tt_tc()); 1304 1305 /* This is the whole point: it's not redirected! */ 1306 vg_assert(entry == vge->base[0]); 1307 1308 /* How many unredir_tt slots are needed */ 1309 code_szQ = (code_len + 7) / 8; 1310 1311 /* Look for an empty unredir_tc slot */ 1312 for (i = 0; i < N_UNREDIR_TT; i++) 1313 if (!unredir_tt[i].inUse) 1314 break; 1315 1316 if (i >= N_UNREDIR_TT || code_szQ > (N_UNREDIR_TCQ - unredir_tc_used)) { 1317 /* It's full; dump everything we currently have */ 1318 init_unredir_tt_tc(); 1319 i = 0; 1320 } 1321 1322 vg_assert(unredir_tc_used >= 0); 1323 vg_assert(unredir_tc_used <= N_UNREDIR_TCQ); 1324 vg_assert(code_szQ > 0); 1325 vg_assert(code_szQ + unredir_tc_used <= N_UNREDIR_TCQ); 1326 vg_assert(i >= 0 && i < N_UNREDIR_TT); 1327 vg_assert(unredir_tt[i].inUse == False); 1328 1329 if (i > unredir_tt_highwater) 1330 unredir_tt_highwater = i; 1331 1332 dstP = (HChar*)&unredir_tc[unredir_tc_used]; 1333 srcP = (HChar*)code; 1334 for (j = 0; j < code_len; j++) 1335 dstP[j] = srcP[j]; 1336 1337 invalidate_icache( dstP, code_len ); 1338 1339 unredir_tt[i].inUse = True; 1340 unredir_tt[i].vge = *vge; 1341 unredir_tt[i].hcode = (Addr)dstP; 1342 1343 unredir_tc_used += code_szQ; 1344 vg_assert(unredir_tc_used >= 0); 1345 vg_assert(unredir_tc_used <= N_UNREDIR_TCQ); 1346 1347 vg_assert(&dstP[code_len] <= (HChar*)&unredir_tc[unredir_tc_used]); 1348} 1349 1350Bool VG_(search_unredir_transtab) ( /*OUT*/AddrH* result, 1351 Addr64 guest_addr ) 1352{ 1353 Int i; 1354 for (i = 0; i < N_UNREDIR_TT; i++) { 1355 if (!unredir_tt[i].inUse) 1356 continue; 1357 if (unredir_tt[i].vge.base[0] == guest_addr) { 1358 *result = (AddrH)unredir_tt[i].hcode; 1359 return True; 1360 } 1361 } 1362 return False; 1363} 1364 1365static void unredir_discard_translations( Addr64 guest_start, ULong range ) 1366{ 1367 Int i; 1368 1369 vg_assert(sanity_check_redir_tt_tc()); 1370 1371 for (i = 0; i <= unredir_tt_highwater; i++) { 1372 if (unredir_tt[i].inUse 1373 && overlaps( guest_start, range, &unredir_tt[i].vge)) 1374 unredir_tt[i].inUse = False; 1375 } 1376} 1377 1378 1379/*------------------------------------------------------------*/ 1380/*--- Initialisation. ---*/ 1381/*------------------------------------------------------------*/ 1382 1383void VG_(init_tt_tc) ( void ) 1384{ 1385 Int i, j, avg_codeszQ; 1386 1387 vg_assert(!init_done); 1388 init_done = True; 1389 1390 /* Otherwise lots of things go wrong... */ 1391 vg_assert(sizeof(ULong) == 8); 1392 vg_assert(sizeof(Addr64) == 8); 1393 1394 if (VG_(clo_verbosity) > 2) 1395 VG_(message)(Vg_DebugMsg, 1396 "TT/TC: VG_(init_tt_tc) " 1397 "(startup of code management)"); 1398 1399 /* Figure out how big each tc area should be. */ 1400 avg_codeszQ = (VG_(details).avg_translation_sizeB + 7) / 8; 1401 tc_sector_szQ = N_TTES_PER_SECTOR_USABLE * (1 + avg_codeszQ); 1402 1403 /* Ensure the calculated value is not way crazy. */ 1404 vg_assert(tc_sector_szQ >= 2 * N_TTES_PER_SECTOR_USABLE); 1405 vg_assert(tc_sector_szQ <= 50 * N_TTES_PER_SECTOR_USABLE); 1406 1407 /* Initialise the sectors */ 1408 youngest_sector = 0; 1409 for (i = 0; i < N_SECTORS; i++) { 1410 sectors[i].tc = NULL; 1411 sectors[i].tt = NULL; 1412 sectors[i].tc_next = NULL; 1413 sectors[i].tt_n_inuse = 0; 1414 for (j = 0; j < ECLASS_N; j++) { 1415 sectors[i].ec2tte_size[j] = 0; 1416 sectors[i].ec2tte_used[j] = 0; 1417 sectors[i].ec2tte[j] = NULL; 1418 } 1419 } 1420 1421 /* and the fast caches. */ 1422 invalidateFastCache(); 1423 1424 /* and the unredir tt/tc */ 1425 init_unredir_tt_tc(); 1426 1427 if (VG_(clo_verbosity) > 2) { 1428 VG_(message)(Vg_DebugMsg, 1429 "TT/TC: cache: %d sectors of %d bytes each = %d total", 1430 N_SECTORS, 8 * tc_sector_szQ, 1431 N_SECTORS * 8 * tc_sector_szQ ); 1432 VG_(message)(Vg_DebugMsg, 1433 "TT/TC: table: %d total entries, max occupancy %d (%d%%)", 1434 N_SECTORS * N_TTES_PER_SECTOR, 1435 N_SECTORS * N_TTES_PER_SECTOR_USABLE, 1436 SECTOR_TT_LIMIT_PERCENT ); 1437 } 1438 1439 VG_(debugLog)(2, "transtab", 1440 "cache: %d sectors of %d bytes each = %d total\n", 1441 N_SECTORS, 8 * tc_sector_szQ, 1442 N_SECTORS * 8 * tc_sector_szQ ); 1443 VG_(debugLog)(2, "transtab", 1444 "table: %d total entries, max occupancy %d (%d%%)\n", 1445 N_SECTORS * N_TTES_PER_SECTOR, 1446 N_SECTORS * N_TTES_PER_SECTOR_USABLE, 1447 SECTOR_TT_LIMIT_PERCENT ); 1448} 1449 1450 1451/*------------------------------------------------------------*/ 1452/*--- Printing out statistics. ---*/ 1453/*------------------------------------------------------------*/ 1454 1455static ULong safe_idiv( ULong a, ULong b ) 1456{ 1457 return (b == 0 ? 0 : a / b); 1458} 1459 1460UInt VG_(get_bbs_translated) ( void ) 1461{ 1462 return n_in_count; 1463} 1464 1465void VG_(print_tt_tc_stats) ( void ) 1466{ 1467 VG_(message)(Vg_DebugMsg, 1468 " tt/tc: %,llu tt lookups requiring %,llu probes", 1469 n_full_lookups, n_lookup_probes ); 1470 VG_(message)(Vg_DebugMsg, 1471 " tt/tc: %,llu fast-cache updates, %,llu flushes", 1472 n_fast_updates, n_fast_flushes ); 1473 1474 VG_(message)(Vg_DebugMsg, 1475 " transtab: new %,lld " 1476 "(%,llu -> %,llu; ratio %,llu:10) [%,llu scs]", 1477 n_in_count, n_in_osize, n_in_tsize, 1478 safe_idiv(10*n_in_tsize, n_in_osize), 1479 n_in_sc_count); 1480 VG_(message)(Vg_DebugMsg, 1481 " transtab: dumped %,llu (%,llu -> ?" "?)", 1482 n_dump_count, n_dump_osize ); 1483 VG_(message)(Vg_DebugMsg, 1484 " transtab: discarded %,llu (%,llu -> ?" "?)", 1485 n_disc_count, n_disc_osize ); 1486 1487 if (0) { 1488 Int i; 1489 VG_(printf)("\n"); 1490 for (i = 0; i < ECLASS_N; i++) { 1491 VG_(printf)(" %4d", sectors[0].ec2tte_used[i]); 1492 if (i % 16 == 15) 1493 VG_(printf)("\n"); 1494 } 1495 VG_(printf)("\n\n"); 1496 } 1497} 1498 1499/*------------------------------------------------------------*/ 1500/*--- Printing out of profiling results. ---*/ 1501/*------------------------------------------------------------*/ 1502 1503static ULong score ( TTEntry* tte ) 1504{ 1505 return ((ULong)tte->weight) * ((ULong)tte->count); 1506} 1507 1508ULong VG_(get_BB_profile) ( BBProfEntry tops[], UInt n_tops ) 1509{ 1510 Int sno, i, r, s; 1511 ULong score_total; 1512 1513 /* First, compute the total weighted count, and find the top N 1514 ttes. tops contains pointers to the most-used n_tops blocks, in 1515 descending order (viz, tops[0] is the highest scorer). */ 1516 for (i = 0; i < n_tops; i++) { 1517 tops[i].addr = 0; 1518 tops[i].score = 0; 1519 } 1520 1521 score_total = 0; 1522 1523 for (sno = 0; sno < N_SECTORS; sno++) { 1524 if (sectors[sno].tc == NULL) 1525 continue; 1526 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 1527 if (sectors[sno].tt[i].status != InUse) 1528 continue; 1529 score_total += score(§ors[sno].tt[i]); 1530 /* Find the rank for sectors[sno].tt[i]. */ 1531 r = n_tops-1; 1532 while (True) { 1533 if (r == -1) 1534 break; 1535 if (tops[r].addr == 0) { 1536 r--; 1537 continue; 1538 } 1539 if ( score(§ors[sno].tt[i]) > tops[r].score ) { 1540 r--; 1541 continue; 1542 } 1543 break; 1544 } 1545 r++; 1546 vg_assert(r >= 0 && r <= n_tops); 1547 /* This bb should be placed at r, and bbs above it shifted 1548 upwards one slot. */ 1549 if (r < n_tops) { 1550 for (s = n_tops-1; s > r; s--) 1551 tops[s] = tops[s-1]; 1552 tops[r].addr = sectors[sno].tt[i].entry; 1553 tops[r].score = score( §ors[sno].tt[i] ); 1554 } 1555 } 1556 } 1557 1558 return score_total; 1559} 1560 1561/*--------------------------------------------------------------------*/ 1562/*--- end ---*/ 1563/*--------------------------------------------------------------------*/ 1564