m_transtab.c revision e808930793aeddc4cfd3e7a94b665913bec2566c
1 2/*--------------------------------------------------------------------*/ 3/*--- Management of the translation table and cache. ---*/ 4/*--- m_transtab.c ---*/ 5/*--------------------------------------------------------------------*/ 6 7/* 8 This file is part of Valgrind, a dynamic binary instrumentation 9 framework. 10 11 Copyright (C) 2000-2006 Julian Seward 12 jseward@acm.org 13 14 This program is free software; you can redistribute it and/or 15 modify it under the terms of the GNU General Public License as 16 published by the Free Software Foundation; either version 2 of the 17 License, or (at your option) any later version. 18 19 This program is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 General Public License for more details. 23 24 You should have received a copy of the GNU General Public License 25 along with this program; if not, write to the Free Software 26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 27 02111-1307, USA. 28 29 The GNU General Public License is contained in the file COPYING. 30*/ 31 32#include "pub_core_basics.h" 33#include "pub_core_debuglog.h" 34#include "pub_core_machine.h" // For VG(machine_get_VexArchInfo) 35#include "pub_core_libcbase.h" 36#include "pub_core_libcassert.h" 37#include "pub_core_libcprint.h" 38#include "pub_core_options.h" 39#include "pub_core_tooliface.h" // For VG_(details).avg_translation_sizeB 40#include "pub_core_transtab.h" 41#include "pub_core_aspacemgr.h" 42#include "pub_core_mallocfree.h" // VG_(out_of_memory_NORETURN) 43 44/* #define DEBUG_TRANSTAB */ 45 46 47/*-------------------------------------------------------------*/ 48/*--- Management of the FIFO-based translation table+cache. ---*/ 49/*-------------------------------------------------------------*/ 50 51/*------------------ CONSTANTS ------------------*/ 52 53/* Number of sectors the TC is divided into. If you need a larger 54 overall translation cache, increase this value. */ 55#define N_SECTORS 8 56 57/* Number of TC entries in each sector. This needs to be a prime 58 number to work properly, it must be <= 65535 (so that a TT index 59 fits in a UShort, leaving room for 0xFFFF(EC2TTE_DELETED) to denote 60 'deleted') and it is strongly recommended not to change this. 61 65521 is the largest prime <= 65535. */ 62#define N_TTES_PER_SECTOR /*30011*/ /*40009*/ 65521 63 64/* Because each sector contains a hash table of TTEntries, we need to 65 specify the maximum allowable loading, after which the sector is 66 deemed full. */ 67#define SECTOR_TT_LIMIT_PERCENT 80 68 69/* The sector is deemed full when this many entries are in it. */ 70#define N_TTES_PER_SECTOR_USABLE \ 71 ((N_TTES_PER_SECTOR * SECTOR_TT_LIMIT_PERCENT) / 100) 72 73/* Equivalence classes for fast address range deletion. There are 1 + 74 2^ECLASS_WIDTH bins. The highest one, ECLASS_MISC, describes an 75 address range which does not fall cleanly within any specific bin. 76 Note that ECLASS_SHIFT + ECLASS_WIDTH must be < 32. */ 77#define ECLASS_SHIFT 11 78#define ECLASS_WIDTH 8 79#define ECLASS_MISC (1 << ECLASS_WIDTH) 80#define ECLASS_N (1 + ECLASS_MISC) 81 82#define EC2TTE_DELETED 0xFFFF /* 16-bit special value */ 83 84 85/*------------------ TYPES ------------------*/ 86 87/* A translation-cache entry is two parts: 88 - The guest address of the first (entry) bb in the translation, 89 as a 64-bit word. 90 - One or more 64-bit words containing the code. 91 It is supposed to be 64-bit aligned. 92*/ 93/* 94typedef 95 struct { 96 Addr64 orig_addr; 97 ULong code[0]; 98 } 99 TCEntry; 100*/ 101 102/* A translation-table entry. This indicates precisely which areas of 103 guest code are included in the translation, and contains all other 104 auxiliary info too. */ 105typedef 106 struct { 107 /* Profiling only: the count and weight (arbitrary meaning) for 108 this translation. Weight is a property of the translation 109 itself and computed once when the translation is created. 110 Count is an entry count for the translation and is 111 incremented by 1 every time the translation is used, if we 112 are profiling. */ 113 UInt count; 114 UShort weight; 115 116 /* Status of the slot. Note, we need to be able to do lazy 117 deletion, hence the Deleted state. */ 118 enum { InUse, Deleted, Empty } status; 119 120 /* Pointer to the corresponding TCEntry (must be in the same 121 sector!) */ 122 ULong* tce; 123 124 /* This is the original guest address that purportedly is the 125 entry point of the translation. You might think that .entry 126 should be the same as .vge->base[0], and most of the time it 127 is. However, when doing redirections, that is not the case. 128 .vge must always correctly describe the guest code sections 129 from which this translation was made. However, .entry may or 130 may not be a lie, depending on whether or not we're doing 131 redirection. */ 132 Addr64 entry; 133 134 /* This structure describes precisely what ranges of guest code 135 the translation covers, so we can decide whether or not to 136 delete it when translations of a given address range are 137 invalidated. */ 138 VexGuestExtents vge; 139 140 /* Address range summary info: these are pointers back to 141 eclass[] entries in the containing Sector. Those entries in 142 turn point back here -- the two structures are mutually 143 redundant but both necessary to make fast deletions work. 144 The eclass info is similar to, and derived from, this entry's 145 'vge' field, but it is not the same */ 146 UShort n_tte2ec; // # tte2ec pointers (1 to 3) 147 UShort tte2ec_ec[3]; // for each, the eclass # 148 UInt tte2ec_ix[3]; // and the index within the eclass. 149 // for i in 0 .. n_tte2ec-1 150 // sec->ec2tte[ tte2ec_ec[i] ][ tte2ec_ix[i] ] 151 // should be the index 152 // of this TTEntry in the containing Sector's tt array. 153 } 154 TTEntry; 155 156 157/* Finally, a sector itself. Each sector contains an array of 158 TCEntries, which hold code, and an array of TTEntries, containing 159 all required administrative info. Profiling is supported using the 160 TTEntry .count and .weight fields, if required. Each sector is 161 independent in that no cross-sector references are allowed. 162 163 If the sector is not in use, all three pointers are NULL and 164 tt_n_inuse is zero. 165*/ 166typedef 167 struct { 168 /* The TCEntry area. Size of this depends on the average 169 translation size. We try and size it so it becomes full 170 precisely when this sector's translation table (tt) reaches 171 its load limit (SECTOR_TT_LIMIT_PERCENT). */ 172 ULong* tc; 173 174 /* The TTEntry array. This is a fixed size, always containing 175 exactly N_TTES_PER_SECTOR entries. */ 176 TTEntry* tt; 177 178 /* This points to the current allocation point in tc. */ 179 ULong* tc_next; 180 181 /* The count of tt entries with state InUse. */ 182 Int tt_n_inuse; 183 184 /* Expandable arrays of tt indices for each of the ECLASS_N 185 address range equivalence classes. These hold indices into 186 the containing sector's tt array, which in turn should point 187 back here. */ 188 Int ec2tte_size[ECLASS_N]; 189 Int ec2tte_used[ECLASS_N]; 190 UShort* ec2tte[ECLASS_N]; 191 } 192 Sector; 193 194 195/*------------------ DECLS ------------------*/ 196 197/* The root data structure is an array of sectors. The index of the 198 youngest sector is recorded, and new translations are put into that 199 sector. When it fills up, we move along to the next sector and 200 start to fill that up, wrapping around at the end of the array. 201 That way, once all N_TC_SECTORS have been bought into use for the 202 first time, and are full, we then re-use the oldest sector, 203 endlessly. 204 205 When running, youngest sector should be between >= 0 and < 206 N_TC_SECTORS. The initial -1 value indicates the TT/TC system is 207 not yet initialised. 208*/ 209static Sector sectors[N_SECTORS]; 210static Int youngest_sector = -1; 211 212/* The number of ULongs in each TCEntry area. This is computed once 213 at startup and does not change. */ 214static Int tc_sector_szQ; 215 216 217/* Fast helper for the TC. A direct-mapped cache which holds a 218 pointer to a TC entry which may or may not be the correct one, but 219 which we hope usually is. This array is referred to directly from 220 <arch>/dispatch.S. 221 222 Entries in tt_fast may point to any valid TC entry, regardless of 223 which sector it's in. Consequently we must be very careful to 224 invalidate this cache when TC entries are changed or disappear. 225 226 A special TCEntry -- bogus_tc_entry -- must be pointed at to cause 227 that cache entry to miss. This relies on the assumption that no 228 guest code actually has an address of 0x1. 229*/ 230/*global*/ ULong* VG_(tt_fast)[VG_TT_FAST_SIZE]; 231 232static ULong bogus_tc_entry = (Addr64)1; 233 234 235/* For profiling, we have a parallel array of pointers to .count 236 fields in TT entries. Again, these pointers must be invalidated 237 when translations disappear. A NULL pointer suffices to indicate 238 an unused slot. 239 240 tt_fast and tt_fastN change together: if tt_fast[i] points to 241 bogus_tc_entry then the corresponding tt_fastN[i] must be null. If 242 tt_fast[i] points to some TC entry somewhere, then tt_fastN[i] 243 *must* point to the .count field of the corresponding TT entry. 244 245 tt_fast and tt_fastN are referred to from assembly code 246 (dispatch.S). 247*/ 248/*global*/ UInt* VG_(tt_fastN)[VG_TT_FAST_SIZE]; 249 250 251/* Make sure we're not used before initialisation. */ 252static Bool init_done = False; 253 254 255/*------------------ STATS DECLS ------------------*/ 256 257/* Number of fast-cache updates and flushes done. */ 258ULong n_fast_flushes = 0; 259ULong n_fast_updates = 0; 260 261/* Number of full lookups done. */ 262ULong n_full_lookups = 0; 263ULong n_lookup_probes = 0; 264 265/* Number/osize/tsize of translations entered; also the number of 266 those for which self-checking was requested. */ 267ULong n_in_count = 0; 268ULong n_in_osize = 0; 269ULong n_in_tsize = 0; 270ULong n_in_sc_count = 0; 271 272/* Number/osize of translations discarded due to lack of space. */ 273ULong n_dump_count = 0; 274ULong n_dump_osize = 0; 275 276/* Number/osize of translations discarded due to requests to do so. */ 277ULong n_disc_count = 0; 278ULong n_disc_osize = 0; 279 280 281/*-------------------------------------------------------------*/ 282/*--- Address-range equivalence class stuff ---*/ 283/*-------------------------------------------------------------*/ 284 285/* Return equivalence class number for a range. */ 286 287static Int range_to_eclass ( Addr64 start, UInt len ) 288{ 289 UInt mask = (1 << ECLASS_WIDTH) - 1; 290 UInt lo = (UInt)start; 291 UInt hi = lo + len - 1; 292 UInt loBits = (lo >> ECLASS_SHIFT) & mask; 293 UInt hiBits = (hi >> ECLASS_SHIFT) & mask; 294 if (loBits == hiBits) { 295 vg_assert(loBits < ECLASS_N-1); 296 return loBits; 297 } else { 298 return ECLASS_MISC; 299 } 300} 301 302 303/* Calculates the equivalence class numbers for any VexGuestExtent. 304 These are written in *eclasses, which must be big enough to hold 3 305 Ints. The number written, between 1 and 3, is returned. The 306 eclasses are presented in order, and any duplicates are removed. 307*/ 308 309static 310Int vexGuestExtents_to_eclasses ( /*OUT*/Int* eclasses, 311 VexGuestExtents* vge ) 312{ 313# define SWAP(_lv1,_lv2) \ 314 do { Int t = _lv1; _lv1 = _lv2; _lv2 = t; } while (0) 315 316 Int i, j, n_ec, r; 317 318 vg_assert(vge->n_used >= 1 && vge->n_used <= 3); 319 320 n_ec = 0; 321 for (i = 0; i < vge->n_used; i++) { 322 r = range_to_eclass( vge->base[i], (UInt)vge->len[i] ); 323 if (r == ECLASS_MISC) 324 goto bad; 325 /* only add if we haven't already seen it */ 326 for (j = 0; j < n_ec; j++) 327 if (eclasses[j] == r) 328 break; 329 if (j == n_ec) 330 eclasses[n_ec++] = r; 331 } 332 333 if (n_ec == 1) 334 return 1; 335 336 if (n_ec == 2) { 337 /* sort */ 338 if (eclasses[0] > eclasses[1]) 339 SWAP(eclasses[0], eclasses[1]); 340 return 2; 341 } 342 343 if (n_ec == 3) { 344 /* sort */ 345 if (eclasses[0] > eclasses[2]) 346 SWAP(eclasses[0], eclasses[2]); 347 if (eclasses[0] > eclasses[1]) 348 SWAP(eclasses[0], eclasses[1]); 349 if (eclasses[1] > eclasses[2]) 350 SWAP(eclasses[1], eclasses[2]); 351 return 3; 352 } 353 354 /* NOTREACHED */ 355 vg_assert(0); 356 357 bad: 358 eclasses[0] = ECLASS_MISC; 359 return 1; 360 361# undef SWAP 362} 363 364 365/* Add tteno to the set of entries listed for equivalence class ec in 366 this sector. Returns used location in eclass array. */ 367 368static 369UInt addEClassNo ( /*MOD*/Sector* sec, Int ec, UShort tteno ) 370{ 371 Int old_sz, new_sz, i, r; 372 UShort *old_ar, *new_ar; 373 374 vg_assert(ec >= 0 && ec < ECLASS_N); 375 vg_assert(tteno < N_TTES_PER_SECTOR); 376 377 if (0) VG_(printf)("ec %d gets %d\n", ec, (Int)tteno); 378 379 if (sec->ec2tte_used[ec] >= sec->ec2tte_size[ec]) { 380 381 vg_assert(sec->ec2tte_used[ec] == sec->ec2tte_size[ec]); 382 383 old_sz = sec->ec2tte_size[ec]; 384 old_ar = sec->ec2tte[ec]; 385 new_sz = old_sz==0 ? 8 : old_sz<64 ? 2*old_sz : (3*old_sz)/2; 386 new_ar = VG_(arena_malloc)(VG_AR_TTAUX, new_sz * sizeof(UShort)); 387 for (i = 0; i < old_sz; i++) 388 new_ar[i] = old_ar[i]; 389 if (old_ar) 390 VG_(arena_free)(VG_AR_TTAUX, old_ar); 391 sec->ec2tte_size[ec] = new_sz; 392 sec->ec2tte[ec] = new_ar; 393 394 if (0) VG_(printf)("expand ec %d to %d\n", ec, new_sz); 395 } 396 397 /* Common case */ 398 r = sec->ec2tte_used[ec]++; 399 vg_assert(r >= 0 && r < sec->ec2tte_size[ec]); 400 sec->ec2tte[ec][r] = tteno; 401 return (UInt)r; 402} 403 404 405/* 'vge' is being added to 'sec' at TT entry 'tteno'. Add appropriate 406 eclass entries to 'sec'. */ 407 408static 409void upd_eclasses_after_add ( /*MOD*/Sector* sec, Int tteno ) 410{ 411 Int i, r, eclasses[3]; 412 TTEntry* tte; 413 vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR); 414 415 tte = &sec->tt[tteno]; 416 r = vexGuestExtents_to_eclasses( eclasses, &tte->vge ); 417 418 vg_assert(r >= 1 && r <= 3); 419 tte->n_tte2ec = r; 420 421 for (i = 0; i < r; i++) { 422 tte->tte2ec_ec[i] = eclasses[i]; 423 tte->tte2ec_ix[i] = addEClassNo( sec, eclasses[i], (UShort)tteno ); 424 } 425} 426 427 428/* Check the eclass info in 'sec' to ensure it is consistent. Returns 429 True if OK, False if something's not right. Expensive. */ 430 431static Bool sanity_check_eclasses_in_sector ( Sector* sec ) 432{ 433# define BAD(_str) do { whassup = (_str); goto bad; } while (0) 434 435 HChar* whassup = NULL; 436 Int i, j, k, n, ec_num, ec_idx; 437 TTEntry* tte; 438 UShort tteno; 439 ULong* tce; 440 441 /* Basic checks on this sector */ 442 if (sec->tt_n_inuse < 0 || sec->tt_n_inuse > N_TTES_PER_SECTOR_USABLE) 443 BAD("invalid sec->tt_n_inuse"); 444 tce = sec->tc_next; 445 if (tce < &sec->tc[0] || tce > &sec->tc[tc_sector_szQ]) 446 BAD("sec->tc_next points outside tc"); 447 448 /* For each eclass ... */ 449 for (i = 0; i < ECLASS_N; i++) { 450 if (sec->ec2tte_size[i] == 0 && sec->ec2tte[i] != NULL) 451 BAD("ec2tte_size/ec2tte mismatch(1)"); 452 if (sec->ec2tte_size[i] != 0 && sec->ec2tte[i] == NULL) 453 BAD("ec2tte_size/ec2tte mismatch(2)"); 454 if (sec->ec2tte_used[i] < 0 455 || sec->ec2tte_used[i] > sec->ec2tte_size[i]) 456 BAD("implausible ec2tte_used"); 457 if (sec->ec2tte_used[i] == 0) 458 continue; 459 460 /* For each tt reference in each eclass .. ensure the reference 461 is to a valid tt entry, and that the entry's address ranges 462 really include this eclass. */ 463 464 for (j = 0; j < sec->ec2tte_used[i]; j++) { 465 tteno = sec->ec2tte[i][j]; 466 if (tteno == EC2TTE_DELETED) 467 continue; 468 if (tteno >= N_TTES_PER_SECTOR) 469 BAD("implausible tteno"); 470 tte = &sec->tt[tteno]; 471 if (tte->status != InUse) 472 BAD("tteno points to non-inuse tte"); 473 if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3) 474 BAD("tte->n_tte2ec out of range"); 475 /* Exactly least one of tte->eclasses[0 .. tte->n_eclasses-1] 476 must equal i. Inspect tte's eclass info. */ 477 n = 0; 478 for (k = 0; k < tte->n_tte2ec; k++) { 479 if (k < tte->n_tte2ec-1 480 && tte->tte2ec_ec[k] >= tte->tte2ec_ec[k+1]) 481 BAD("tte->tte2ec_ec[..] out of order"); 482 ec_num = tte->tte2ec_ec[k]; 483 if (ec_num < 0 || ec_num >= ECLASS_N) 484 BAD("tte->tte2ec_ec[..] out of range"); 485 if (ec_num != i) 486 continue; 487 ec_idx = tte->tte2ec_ix[k]; 488 if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[i]) 489 BAD("tte->tte2ec_ix[..] out of range"); 490 if (ec_idx == j) 491 n++; 492 } 493 if (n != 1) 494 BAD("tteno does not point back at eclass"); 495 } 496 } 497 498 /* That establishes that for each forward pointer from TTEntrys 499 there is a corresponding backward pointer from the eclass[] 500 arrays. However, it doesn't rule out the possibility of other, 501 bogus pointers in the eclass[] arrays. So do those similarly: 502 scan through them and check the TTEntryies they point at point 503 back. */ 504 505 for (i = 0; i < N_TTES_PER_SECTOR_USABLE; i++) { 506 507 tte = &sec->tt[i]; 508 if (tte->status == Empty || tte->status == Deleted) { 509 if (tte->n_tte2ec != 0) 510 BAD("tte->n_eclasses nonzero for unused tte"); 511 continue; 512 } 513 514 vg_assert(tte->status == InUse); 515 516 if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3) 517 BAD("tte->n_eclasses out of range(2)"); 518 519 for (j = 0; j < tte->n_tte2ec; j++) { 520 ec_num = tte->tte2ec_ec[j]; 521 if (ec_num < 0 || ec_num >= ECLASS_N) 522 BAD("tte->eclass[..] out of range"); 523 ec_idx = tte->tte2ec_ix[j]; 524 if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[ec_num]) 525 BAD("tte->ec_idx[..] out of range(2)"); 526 if (sec->ec2tte[ec_num][ec_idx] != i) 527 BAD("ec2tte does not point back to tte"); 528 } 529 } 530 531 return True; 532 533 bad: 534 if (whassup) 535 VG_(debugLog)(0, "transtab", "eclass sanity fail: %s\n", whassup); 536 537# if 0 538 VG_(printf)("eclass = %d\n", i); 539 VG_(printf)("tteno = %d\n", (Int)tteno); 540 switch (tte->status) { 541 case InUse: VG_(printf)("InUse\n"); break; 542 case Deleted: VG_(printf)("Deleted\n"); break; 543 case Empty: VG_(printf)("Empty\n"); break; 544 } 545 if (tte->status != Empty) { 546 for (k = 0; k < tte->vge.n_used; k++) 547 VG_(printf)("0x%llx %d\n", tte->vge.base[k], 548 (Int)tte->vge.len[k]); 549 } 550# endif 551 552 return False; 553 554# undef BAD 555} 556 557 558/* Sanity check absolutely everything. True == check passed. */ 559 560/* forward */ 561static Bool sanity_check_redir_tt_tc ( void ); 562 563static Bool sanity_check_all_sectors ( void ) 564{ 565 Int sno; 566 Bool sane; 567 Sector* sec; 568 for (sno = 0; sno < N_SECTORS; sno++) { 569 sec = §ors[sno]; 570 if (sec->tc == NULL) 571 continue; 572 sane = sanity_check_eclasses_in_sector( sec ); 573 if (!sane) 574 return False; 575 } 576 if (!sanity_check_redir_tt_tc() ) 577 return False; 578 return True; 579} 580 581 582/*-------------------------------------------------------------*/ 583/*--- Add/find translations ---*/ 584/*-------------------------------------------------------------*/ 585 586static UInt vge_osize ( VexGuestExtents* vge ) 587{ 588 UInt i, n = 0; 589 for (i = 0; i < vge->n_used; i++) 590 n += (UInt)vge->len[i]; 591 return n; 592} 593 594static Bool isValidSector ( Int sector ) 595{ 596 if (sector < 0 || sector >= N_SECTORS) 597 return False; 598 return True; 599} 600 601static inline UInt HASH_TT ( Addr64 key ) 602{ 603 UInt kHi = (UInt)(key >> 32); 604 UInt kLo = (UInt)key; 605 UInt k32 = kHi ^ kLo; 606 UInt ror = 7; 607 if (ror > 0) 608 k32 = (k32 >> ror) | (k32 << (32-ror)); 609 return k32 % N_TTES_PER_SECTOR; 610} 611 612static void setFastCacheEntry ( Addr64 key, ULong* tce, UInt* count ) 613{ 614 UInt cno = (UInt)VG_TT_FAST_HASH(key); 615 VG_(tt_fast)[cno] = tce; 616 VG_(tt_fastN)[cno] = count; 617 n_fast_updates++; 618} 619 620static void invalidateFastCache ( void ) 621{ 622 UInt j; 623 /* This loop is popular enough to make it worth unrolling a 624 bit, at least on ppc32. */ 625 vg_assert(VG_TT_FAST_SIZE > 0 && (VG_TT_FAST_SIZE % 4) == 0); 626 for (j = 0; j < VG_TT_FAST_SIZE; j += 4) { 627 VG_(tt_fast)[j+0] = &bogus_tc_entry; 628 VG_(tt_fast)[j+1] = &bogus_tc_entry; 629 VG_(tt_fast)[j+2] = &bogus_tc_entry; 630 VG_(tt_fast)[j+3] = &bogus_tc_entry; 631 VG_(tt_fastN)[j+0] = NULL; 632 VG_(tt_fastN)[j+1] = NULL; 633 VG_(tt_fastN)[j+2] = NULL; 634 VG_(tt_fastN)[j+3] = NULL; 635 } 636 vg_assert(j == VG_TT_FAST_SIZE); 637 n_fast_flushes++; 638} 639 640static void initialiseSector ( Int sno ) 641{ 642 Int i; 643 SysRes sres; 644 Sector* sec; 645 vg_assert(isValidSector(sno)); 646 647 sec = §ors[sno]; 648 649 if (sec->tc == NULL) { 650 651 /* Sector has never been used before. Need to allocate tt and 652 tc. */ 653 vg_assert(sec->tt == NULL); 654 vg_assert(sec->tc_next == NULL); 655 vg_assert(sec->tt_n_inuse == 0); 656 for (i = 0; i < ECLASS_N; i++) { 657 vg_assert(sec->ec2tte_size[i] == 0); 658 vg_assert(sec->ec2tte_used[i] == 0); 659 vg_assert(sec->ec2tte[i] == NULL); 660 } 661 662 VG_(debugLog)(1,"transtab", "allocate sector %d\n", sno); 663 664 sres = VG_(am_mmap_anon_float_valgrind)( 8 * tc_sector_szQ ); 665 if (sres.isError) { 666 VG_(out_of_memory_NORETURN)("initialiseSector(TC)", 667 8 * tc_sector_szQ ); 668 /*NOTREACHED*/ 669 } 670 sec->tc = (ULong*)sres.res; 671 672 sres = VG_(am_mmap_anon_float_valgrind) 673 ( N_TTES_PER_SECTOR * sizeof(TTEntry) ); 674 if (sres.isError) { 675 VG_(out_of_memory_NORETURN)("initialiseSector(TT)", 676 N_TTES_PER_SECTOR * sizeof(TTEntry) ); 677 /*NOTREACHED*/ 678 } 679 sec->tt = (TTEntry*)sres.res; 680 681 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 682 sec->tt[i].status = Empty; 683 sec->tt[i].n_tte2ec = 0; 684 } 685 686 if (VG_(clo_verbosity) > 2) 687 VG_(message)(Vg_DebugMsg, "TT/TC: initialise sector %d", sno); 688 689 } else { 690 691 /* Sector has been used before. Dump the old contents. */ 692 VG_(debugLog)(1,"transtab", "recycle sector %d\n", sno); 693 vg_assert(sec->tt != NULL); 694 vg_assert(sec->tc_next != NULL); 695 n_dump_count += sec->tt_n_inuse; 696 697 /* Visit each just-about-to-be-abandoned translation. */ 698 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 699 if (sec->tt[i].status == InUse) { 700 vg_assert(sec->tt[i].n_tte2ec >= 1); 701 vg_assert(sec->tt[i].n_tte2ec <= 3); 702 n_dump_osize += vge_osize(&sec->tt[i].vge); 703 /* Tell the tool too. */ 704 if (VG_(needs).basic_block_discards) { 705 VG_TDICT_CALL( tool_discard_basic_block_info, 706 sec->tt[i].entry, 707 sec->tt[i].vge ); 708 } 709 } else { 710 vg_assert(sec->tt[i].n_tte2ec == 0); 711 } 712 sec->tt[i].status = Empty; 713 sec->tt[i].n_tte2ec = 0; 714 } 715 716 /* Free up the eclass structures. */ 717 for (i = 0; i < ECLASS_N; i++) { 718 if (sec->ec2tte_size[i] == 0) { 719 vg_assert(sec->ec2tte_used[i] == 0); 720 vg_assert(sec->ec2tte[i] == NULL); 721 } else { 722 vg_assert(sec->ec2tte[i] != NULL); 723 VG_(arena_free)(VG_AR_TTAUX, sec->ec2tte[i]); 724 sec->ec2tte[i] = NULL; 725 sec->ec2tte_size[i] = 0; 726 sec->ec2tte_used[i] = 0; 727 } 728 } 729 730 if (VG_(clo_verbosity) > 2) 731 VG_(message)(Vg_DebugMsg, "TT/TC: recycle sector %d", sno); 732 } 733 734 sec->tc_next = sec->tc; 735 sec->tt_n_inuse = 0; 736 737 invalidateFastCache(); 738} 739 740static void invalidate_icache ( void *ptr, Int nbytes ) 741{ 742# if defined(VGA_ppc32) || defined(VGA_ppc64) 743 Addr startaddr = (Addr) ptr; 744 Addr endaddr = startaddr + nbytes; 745 Addr cls; 746 Addr addr; 747 VexArchInfo vai; 748 749 VG_(machine_get_VexArchInfo)( NULL, &vai ); 750 cls = vai.ppc_cache_line_szB; 751 752 /* Stay sane .. */ 753 vg_assert(cls == 32 || cls == 128); 754 755 startaddr &= ~(cls - 1); 756 for (addr = startaddr; addr < endaddr; addr += cls) 757 asm volatile("dcbst 0,%0" : : "r" (addr)); 758 asm volatile("sync"); 759 for (addr = startaddr; addr < endaddr; addr += cls) 760 asm volatile("icbi 0,%0" : : "r" (addr)); 761 asm volatile("sync; isync"); 762 763# elif defined(VGA_x86) 764 /* no need to do anything, hardware provides coherence */ 765 766# elif defined(VGA_amd64) 767 /* no need to do anything, hardware provides coherence */ 768 769# else 770# error "Unknown ARCH" 771# endif 772} 773 774 775/* Add a translation of vge to TT/TC. The translation is temporarily 776 in code[0 .. code_len-1]. 777 778 pre: youngest_sector points to a valid (although possibly full) 779 sector. 780*/ 781void VG_(add_to_transtab)( VexGuestExtents* vge, 782 Addr64 entry, 783 AddrH code, 784 UInt code_len, 785 Bool is_self_checking ) 786{ 787 Int tcAvailQ, reqdQ, y, i; 788 ULong *tce, *tce2; 789 UChar* srcP; 790 UChar* dstP; 791 792 vg_assert(init_done); 793 vg_assert(vge->n_used >= 1 && vge->n_used <= 3); 794 795 /* 60000: should agree with N_TMPBUF in m_translate.c. */ 796 vg_assert(code_len > 0 && code_len < 60000); 797 798 if (0) 799 VG_(printf)("add_to_transtab(entry = 0x%llx, len = %d)\n", 800 entry, code_len); 801 802 n_in_count++; 803 n_in_tsize += code_len; 804 n_in_osize += vge_osize(vge); 805 if (is_self_checking) 806 n_in_sc_count++; 807 808 y = youngest_sector; 809 vg_assert(isValidSector(y)); 810 811 if (sectors[y].tc == NULL) 812 initialiseSector(y); 813 814 /* Try putting the translation in this sector. */ 815 reqdQ = 1 + ((code_len + 7) >> 3); 816 817 /* Will it fit in tc? */ 818 tcAvailQ = ((ULong*)(§ors[y].tc[tc_sector_szQ])) 819 - ((ULong*)(sectors[y].tc_next)); 820 vg_assert(tcAvailQ >= 0); 821 vg_assert(tcAvailQ <= tc_sector_szQ); 822 823 if (tcAvailQ < reqdQ 824 || sectors[y].tt_n_inuse >= N_TTES_PER_SECTOR_USABLE) { 825 /* No. So move on to the next sector. Either it's never been 826 used before, in which case it will get its tt/tc allocated 827 now, or it has been used before, in which case it is set to be 828 empty, hence throwing out the oldest sector. */ 829 vg_assert(tc_sector_szQ > 0); 830 VG_(debugLog)(1,"transtab", 831 "declare sector %d full " 832 "(TT loading %2d%%, TC loading %2d%%)\n", 833 y, 834 (100 * sectors[y].tt_n_inuse) 835 / N_TTES_PER_SECTOR, 836 (100 * (tc_sector_szQ - tcAvailQ)) 837 / tc_sector_szQ); 838 youngest_sector++; 839 if (youngest_sector >= N_SECTORS) 840 youngest_sector = 0; 841 y = youngest_sector; 842 initialiseSector(y); 843 } 844 845 /* Be sure ... */ 846 tcAvailQ = ((ULong*)(§ors[y].tc[tc_sector_szQ])) 847 - ((ULong*)(sectors[y].tc_next)); 848 vg_assert(tcAvailQ >= 0); 849 vg_assert(tcAvailQ <= tc_sector_szQ); 850 vg_assert(tcAvailQ >= reqdQ); 851 vg_assert(sectors[y].tt_n_inuse < N_TTES_PER_SECTOR_USABLE); 852 vg_assert(sectors[y].tt_n_inuse >= 0); 853 854 /* Copy into tc. */ 855 tce = sectors[y].tc_next; 856 vg_assert(tce >= §ors[y].tc[0]); 857 vg_assert(tce <= §ors[y].tc[tc_sector_szQ]); 858 859 tce[0] = entry; 860 dstP = (UChar*)(&tce[1]); 861 srcP = (UChar*)code; 862 for (i = 0; i < code_len; i++) 863 dstP[i] = srcP[i]; 864 sectors[y].tc_next += reqdQ; 865 sectors[y].tt_n_inuse++; 866 867 invalidate_icache( dstP, code_len ); 868 869 /* more paranoia */ 870 tce2 = sectors[y].tc_next; 871 vg_assert(tce2 >= §ors[y].tc[0]); 872 vg_assert(tce2 <= §ors[y].tc[tc_sector_szQ]); 873 874 /* Find an empty tt slot, and use it. There must be such a slot 875 since tt is never allowed to get completely full. */ 876 i = HASH_TT(entry); 877 vg_assert(i >= 0 && i < N_TTES_PER_SECTOR); 878 while (True) { 879 if (sectors[y].tt[i].status == Empty 880 || sectors[y].tt[i].status == Deleted) 881 break; 882 i++; 883 if (i >= N_TTES_PER_SECTOR) 884 i = 0; 885 } 886 887 sectors[y].tt[i].status = InUse; 888 sectors[y].tt[i].tce = tce; 889 sectors[y].tt[i].count = 0; 890 sectors[y].tt[i].weight = 1; 891 sectors[y].tt[i].vge = *vge; 892 sectors[y].tt[i].entry = entry; 893 894 /* Update the fast-cache. */ 895 setFastCacheEntry( entry, tce, §ors[y].tt[i].count ); 896 897 /* Note the eclass numbers for this translation. */ 898 upd_eclasses_after_add( §ors[y], i ); 899} 900 901 902/* Search for the translation of the given guest address. If 903 requested, a successful search can also cause the fast-caches to be 904 updated. 905*/ 906Bool VG_(search_transtab) ( /*OUT*/AddrH* result, 907 Addr64 guest_addr, 908 Bool upd_cache ) 909{ 910 Int i, j, k, kstart, sno; 911 912 vg_assert(init_done); 913 /* Find the initial probe point just once. It will be the same in 914 all sectors and avoids multiple expensive % operations. */ 915 n_full_lookups++; 916 k = -1; 917 kstart = HASH_TT(guest_addr); 918 vg_assert(kstart >= 0 && kstart < N_TTES_PER_SECTOR); 919 920 /* Search in all the sectors. Although the order should not matter, 921 it might be most efficient to search in the order youngest to 922 oldest. */ 923 sno = youngest_sector; 924 for (i = 0; i < N_SECTORS; i++) { 925 926 if (sectors[sno].tc == NULL) 927 goto notfound; /* sector not in use. */ 928 929 k = kstart; 930 for (j = 0; j < N_TTES_PER_SECTOR; j++) { 931 n_lookup_probes++; 932 if (sectors[sno].tt[k].status == InUse 933 && sectors[sno].tt[k].entry == guest_addr) { 934 /* found it */ 935 if (upd_cache) 936 setFastCacheEntry( 937 guest_addr, sectors[sno].tt[k].tce, 938 §ors[sno].tt[k].count ); 939 if (result) 940 *result = sizeof(Addr64) + (AddrH)sectors[sno].tt[k].tce; 941 return True; 942 } 943 if (sectors[sno].tt[k].status == Empty) 944 break; /* not found in this sector */ 945 k++; 946 if (k == N_TTES_PER_SECTOR) 947 k = 0; 948 } 949 950 /* If we fall off the end, all entries are InUse and not 951 matching, or Deleted. In any case we did not find it in this 952 sector. */ 953 954 notfound: 955 /* move to the next oldest sector */ 956 sno = sno==0 ? (N_SECTORS-1) : (sno-1); 957 } 958 959 /* Not found in any sector. */ 960 return False; 961} 962 963 964/*-------------------------------------------------------------*/ 965/*--- Delete translations. ---*/ 966/*-------------------------------------------------------------*/ 967 968/* forward */ 969static void unredir_discard_translations( Addr64, ULong ); 970 971/* Stuff for deleting translations which intersect with a given 972 address range. Unfortunately, to make this run at a reasonable 973 speed, it is complex. */ 974 975static inline 976Bool overlap1 ( Addr64 s1, ULong r1, Addr64 s2, ULong r2 ) 977{ 978 Addr64 e1 = s1 + r1 - 1ULL; 979 Addr64 e2 = s2 + r2 - 1ULL; 980 if (e1 < s2 || e2 < s1) 981 return False; 982 return True; 983} 984 985static inline 986Bool overlaps ( Addr64 start, ULong range, VexGuestExtents* vge ) 987{ 988 if (overlap1(start, range, vge->base[0], (UInt)vge->len[0])) 989 return True; 990 if (vge->n_used < 2) 991 return False; 992 if (overlap1(start, range, vge->base[1], (UInt)vge->len[1])) 993 return True; 994 if (vge->n_used < 3) 995 return False; 996 if (overlap1(start, range, vge->base[2], (UInt)vge->len[2])) 997 return True; 998 return False; 999} 1000 1001 1002/* Delete a tt entry, and update all the eclass data accordingly. */ 1003 1004static void delete_tte ( /*MOD*/Sector* sec, Int tteno ) 1005{ 1006 Int i, ec_num, ec_idx; 1007 TTEntry* tte; 1008 1009 vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR); 1010 tte = &sec->tt[tteno]; 1011 vg_assert(tte->status == InUse); 1012 vg_assert(tte->n_tte2ec >= 1 && tte->n_tte2ec <= 3); 1013 1014 /* Deal with the ec-to-tte links first. */ 1015 for (i = 0; i < tte->n_tte2ec; i++) { 1016 ec_num = (Int)tte->tte2ec_ec[i]; 1017 ec_idx = tte->tte2ec_ix[i]; 1018 vg_assert(ec_num >= 0 && ec_num < ECLASS_N); 1019 vg_assert(ec_idx >= 0); 1020 vg_assert(ec_idx < sec->ec2tte_used[ec_num]); 1021 /* Assert that the two links point at each other. */ 1022 vg_assert(sec->ec2tte[ec_num][ec_idx] == (UShort)tteno); 1023 /* "delete" the pointer back to here. */ 1024 sec->ec2tte[ec_num][ec_idx] = EC2TTE_DELETED; 1025 } 1026 1027 /* Now fix up this TTEntry. */ 1028 tte->status = Deleted; 1029 tte->n_tte2ec = 0; 1030 1031 /* Stats .. */ 1032 sec->tt_n_inuse--; 1033 n_disc_count++; 1034 n_disc_osize += vge_osize(&tte->vge); 1035 1036 /* Tell the tool too. */ 1037 if (VG_(needs).basic_block_discards) { 1038 VG_TDICT_CALL( tool_discard_basic_block_info, 1039 tte->entry, 1040 tte->vge ); 1041 } 1042} 1043 1044 1045/* Delete translations from sec which intersect specified range, but 1046 only consider translations in the specified eclass. */ 1047 1048static 1049Bool delete_translations_in_sector_eclass ( /*MOD*/Sector* sec, 1050 Addr64 guest_start, ULong range, 1051 Int ec ) 1052{ 1053 Int i; 1054 UShort tteno; 1055 Bool anyDeld = False; 1056 TTEntry* tte; 1057 1058 vg_assert(ec >= 0 && ec < ECLASS_N); 1059 1060 for (i = 0; i < sec->ec2tte_used[ec]; i++) { 1061 1062 tteno = sec->ec2tte[ec][i]; 1063 if (tteno == EC2TTE_DELETED) { 1064 /* already deleted */ 1065 continue; 1066 } 1067 1068 vg_assert(tteno < N_TTES_PER_SECTOR); 1069 1070 tte = &sec->tt[tteno]; 1071 vg_assert(tte->status == InUse); 1072 1073 if (overlaps( guest_start, range, &tte->vge )) { 1074 anyDeld = True; 1075 delete_tte( sec, (Int)tteno ); 1076 } 1077 1078 } 1079 1080 return anyDeld; 1081} 1082 1083 1084/* Delete translations from sec which intersect specified range, the 1085 slow way, by inspecting all translations in sec. */ 1086 1087static 1088Bool delete_translations_in_sector ( /*MOD*/Sector* sec, 1089 Addr64 guest_start, ULong range ) 1090{ 1091 Int i; 1092 Bool anyDeld = False; 1093 1094 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 1095 if (sec->tt[i].status == InUse 1096 && overlaps( guest_start, range, &sec->tt[i].vge )) { 1097 anyDeld = True; 1098 delete_tte( sec, i ); 1099 } 1100 } 1101 1102 return anyDeld; 1103} 1104 1105 1106void VG_(discard_translations) ( Addr64 guest_start, ULong range, 1107 HChar* who ) 1108{ 1109 Sector* sec; 1110 Int sno, ec; 1111 Bool anyDeleted = False; 1112 1113 vg_assert(init_done); 1114 1115 VG_(debugLog)(2, "transtab", 1116 "discard_translations(0x%llx, %lld) req by %s\n", 1117 guest_start, range, who ); 1118 1119 /* Pre-deletion sanity check */ 1120 if (VG_(clo_sanity_level >= 4)) { 1121 Bool sane = sanity_check_all_sectors(); 1122 vg_assert(sane); 1123 } 1124 1125 if (range == 0) 1126 return; 1127 1128 /* There are two different ways to do this. 1129 1130 If the range fits within a single address-range equivalence 1131 class, as will be the case for a cache line sized invalidation, 1132 then we only have to inspect the set of translations listed in 1133 that equivalence class, and also in the "sin-bin" equivalence 1134 class ECLASS_MISC. 1135 1136 Otherwise, the invalidation is of a larger range and probably 1137 results from munmap. In this case it's (probably!) faster just 1138 to inspect all translations, dump those we don't want, and 1139 regenerate the equivalence class information (since modifying it 1140 in-situ is even more expensive). 1141 */ 1142 1143 /* First off, figure out if the range falls within a single class, 1144 and if so which one. */ 1145 1146 ec = ECLASS_MISC; 1147 if (range < (1ULL << ECLASS_SHIFT)) 1148 ec = range_to_eclass( guest_start, (UInt)range ); 1149 1150 /* if ec is ECLASS_MISC then we aren't looking at just a single 1151 class, so use the slow scheme. Else use the fast scheme, 1152 examining 'ec' and ECLASS_MISC. */ 1153 1154 if (ec != ECLASS_MISC) { 1155 1156 VG_(debugLog)(2, "transtab", 1157 " FAST, ec = %d\n", ec); 1158 1159 /* Fast scheme */ 1160 vg_assert(ec >= 0 && ec < ECLASS_MISC); 1161 1162 for (sno = 0; sno < N_SECTORS; sno++) { 1163 sec = §ors[sno]; 1164 if (sec->tc == NULL) 1165 continue; 1166 anyDeleted |= delete_translations_in_sector_eclass( 1167 sec, guest_start, range, ec ); 1168 anyDeleted |= delete_translations_in_sector_eclass( 1169 sec, guest_start, range, ECLASS_MISC ); 1170 } 1171 1172 } else { 1173 1174 /* slow scheme */ 1175 1176 VG_(debugLog)(2, "transtab", 1177 " SLOW, ec = %d\n", ec); 1178 1179 for (sno = 0; sno < N_SECTORS; sno++) { 1180 sec = §ors[sno]; 1181 if (sec->tc == NULL) 1182 continue; 1183 anyDeleted |= delete_translations_in_sector( 1184 sec, guest_start, range ); 1185 } 1186 1187 } 1188 1189 if (anyDeleted) 1190 invalidateFastCache(); 1191 1192 /* don't forget the no-redir cache */ 1193 unredir_discard_translations( guest_start, range ); 1194 1195 /* Post-deletion sanity check */ 1196 if (VG_(clo_sanity_level >= 4)) { 1197 Int i; 1198 TTEntry* tte; 1199 Bool sane = sanity_check_all_sectors(); 1200 vg_assert(sane); 1201 /* But now, also check the requested address range isn't 1202 present anywhere. */ 1203 for (sno = 0; sno < N_SECTORS; sno++) { 1204 sec = §ors[sno]; 1205 if (sec->tc == NULL) 1206 continue; 1207 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 1208 tte = &sec->tt[i]; 1209 if (tte->status != InUse) 1210 continue; 1211 vg_assert(!overlaps( guest_start, range, &tte->vge )); 1212 } 1213 } 1214 } 1215} 1216 1217 1218/*------------------------------------------------------------*/ 1219/*--- AUXILIARY: the unredirected TT/TC ---*/ 1220/*------------------------------------------------------------*/ 1221 1222/* A very simple translation cache which holds a small number of 1223 unredirected translations. This is completely independent of the 1224 main tt/tc structures. When unredir_tc or unredir_tt becomes full, 1225 both structures are simply dumped and we start over. 1226 1227 Since these translations are unredirected, the search key is (by 1228 definition) the first address entry in the .vge field. */ 1229 1230/* Sized to hold 500 translations of average size 1000 bytes. */ 1231 1232#define UNREDIR_SZB 1000 1233 1234#define N_UNREDIR_TT 500 1235#define N_UNREDIR_TCQ (N_UNREDIR_TT * UNREDIR_SZB / sizeof(ULong)) 1236 1237typedef 1238 struct { 1239 VexGuestExtents vge; 1240 Addr hcode; 1241 Bool inUse; 1242 } 1243 UTCEntry; 1244 1245/* We just allocate forwards in _tc, never deleting. */ 1246static ULong *unredir_tc; 1247static Int unredir_tc_used = N_UNREDIR_TCQ; 1248 1249/* Slots in _tt can come into use and out again (.inUse). 1250 Nevertheless _tt_highwater is maintained so that invalidations 1251 don't have to scan all the slots when only a few are in use. 1252 _tt_highwater holds the index of the highest ever allocated 1253 slot. */ 1254static UTCEntry unredir_tt[N_UNREDIR_TT]; 1255static Int unredir_tt_highwater; 1256 1257 1258static void init_unredir_tt_tc ( void ) 1259{ 1260 Int i; 1261 if (unredir_tc == NULL) { 1262 SysRes sres = VG_(am_mmap_anon_float_valgrind)( N_UNREDIR_TT * UNREDIR_SZB ); 1263 if (sres.isError) { 1264 VG_(out_of_memory_NORETURN)("init_unredir_tt_tc", N_UNREDIR_TT * UNREDIR_SZB); 1265 /*NOTREACHED*/ 1266 } 1267 unredir_tc = (ULong *)sres.res; 1268 } 1269 unredir_tc_used = 0; 1270 for (i = 0; i < N_UNREDIR_TT; i++) 1271 unredir_tt[i].inUse = False; 1272 unredir_tt_highwater = -1; 1273} 1274 1275/* Do a sanity check; return False on failure. */ 1276static Bool sanity_check_redir_tt_tc ( void ) 1277{ 1278 Int i; 1279 if (unredir_tt_highwater < -1) return False; 1280 if (unredir_tt_highwater >= N_UNREDIR_TT) return False; 1281 1282 for (i = unredir_tt_highwater+1; i < N_UNREDIR_TT; i++) 1283 if (unredir_tt[i].inUse) 1284 return False; 1285 1286 if (unredir_tc_used < 0) return False; 1287 if (unredir_tc_used > N_UNREDIR_TCQ) return False; 1288 1289 return True; 1290} 1291 1292 1293/* Add an UNREDIRECTED translation of vge to TT/TC. The translation 1294 is temporarily in code[0 .. code_len-1]. 1295*/ 1296void VG_(add_to_unredir_transtab)( VexGuestExtents* vge, 1297 Addr64 entry, 1298 AddrH code, 1299 UInt code_len, 1300 Bool is_self_checking ) 1301{ 1302 Int i, j, code_szQ; 1303 HChar *srcP, *dstP; 1304 1305 vg_assert(sanity_check_redir_tt_tc()); 1306 1307 /* This is the whole point: it's not redirected! */ 1308 vg_assert(entry == vge->base[0]); 1309 1310 /* How many unredir_tt slots are needed */ 1311 code_szQ = (code_len + 7) / 8; 1312 1313 /* Look for an empty unredir_tc slot */ 1314 for (i = 0; i < N_UNREDIR_TT; i++) 1315 if (!unredir_tt[i].inUse) 1316 break; 1317 1318 if (i >= N_UNREDIR_TT || code_szQ > (N_UNREDIR_TCQ - unredir_tc_used)) { 1319 /* It's full; dump everything we currently have */ 1320 init_unredir_tt_tc(); 1321 i = 0; 1322 } 1323 1324 vg_assert(unredir_tc_used >= 0); 1325 vg_assert(unredir_tc_used <= N_UNREDIR_TCQ); 1326 vg_assert(code_szQ > 0); 1327 vg_assert(code_szQ + unredir_tc_used <= N_UNREDIR_TCQ); 1328 vg_assert(i >= 0 && i < N_UNREDIR_TT); 1329 vg_assert(unredir_tt[i].inUse == False); 1330 1331 if (i > unredir_tt_highwater) 1332 unredir_tt_highwater = i; 1333 1334 dstP = (HChar*)&unredir_tc[unredir_tc_used]; 1335 srcP = (HChar*)code; 1336 for (j = 0; j < code_len; j++) 1337 dstP[j] = srcP[j]; 1338 1339 invalidate_icache( dstP, code_len ); 1340 1341 unredir_tt[i].inUse = True; 1342 unredir_tt[i].vge = *vge; 1343 unredir_tt[i].hcode = (Addr)dstP; 1344 1345 unredir_tc_used += code_szQ; 1346 vg_assert(unredir_tc_used >= 0); 1347 vg_assert(unredir_tc_used <= N_UNREDIR_TCQ); 1348 1349 vg_assert(&dstP[code_len] <= (HChar*)&unredir_tc[unredir_tc_used]); 1350} 1351 1352Bool VG_(search_unredir_transtab) ( /*OUT*/AddrH* result, 1353 Addr64 guest_addr ) 1354{ 1355 Int i; 1356 for (i = 0; i < N_UNREDIR_TT; i++) { 1357 if (!unredir_tt[i].inUse) 1358 continue; 1359 if (unredir_tt[i].vge.base[0] == guest_addr) { 1360 *result = (AddrH)unredir_tt[i].hcode; 1361 return True; 1362 } 1363 } 1364 return False; 1365} 1366 1367static void unredir_discard_translations( Addr64 guest_start, ULong range ) 1368{ 1369 Int i; 1370 1371 vg_assert(sanity_check_redir_tt_tc()); 1372 1373 for (i = 0; i <= unredir_tt_highwater; i++) { 1374 if (unredir_tt[i].inUse 1375 && overlaps( guest_start, range, &unredir_tt[i].vge)) 1376 unredir_tt[i].inUse = False; 1377 } 1378} 1379 1380 1381/*------------------------------------------------------------*/ 1382/*--- Initialisation. ---*/ 1383/*------------------------------------------------------------*/ 1384 1385void VG_(init_tt_tc) ( void ) 1386{ 1387 Int i, j, avg_codeszQ; 1388 1389 vg_assert(!init_done); 1390 init_done = True; 1391 1392 /* Otherwise lots of things go wrong... */ 1393 vg_assert(sizeof(ULong) == 8); 1394 vg_assert(sizeof(Addr64) == 8); 1395 1396 if (VG_(clo_verbosity) > 2) 1397 VG_(message)(Vg_DebugMsg, 1398 "TT/TC: VG_(init_tt_tc) " 1399 "(startup of code management)"); 1400 1401 /* Figure out how big each tc area should be. */ 1402 avg_codeszQ = (VG_(details).avg_translation_sizeB + 7) / 8; 1403 tc_sector_szQ = N_TTES_PER_SECTOR_USABLE * (1 + avg_codeszQ); 1404 1405 /* Ensure the calculated value is not way crazy. */ 1406 vg_assert(tc_sector_szQ >= 2 * N_TTES_PER_SECTOR_USABLE); 1407 vg_assert(tc_sector_szQ <= 80 * N_TTES_PER_SECTOR_USABLE); 1408 1409 /* Initialise the sectors */ 1410 youngest_sector = 0; 1411 for (i = 0; i < N_SECTORS; i++) { 1412 sectors[i].tc = NULL; 1413 sectors[i].tt = NULL; 1414 sectors[i].tc_next = NULL; 1415 sectors[i].tt_n_inuse = 0; 1416 for (j = 0; j < ECLASS_N; j++) { 1417 sectors[i].ec2tte_size[j] = 0; 1418 sectors[i].ec2tte_used[j] = 0; 1419 sectors[i].ec2tte[j] = NULL; 1420 } 1421 } 1422 1423 /* and the fast caches. */ 1424 invalidateFastCache(); 1425 1426 /* and the unredir tt/tc */ 1427 init_unredir_tt_tc(); 1428 1429 if (VG_(clo_verbosity) > 2) { 1430 VG_(message)(Vg_DebugMsg, 1431 "TT/TC: cache: %d sectors of %d bytes each = %d total", 1432 N_SECTORS, 8 * tc_sector_szQ, 1433 N_SECTORS * 8 * tc_sector_szQ ); 1434 VG_(message)(Vg_DebugMsg, 1435 "TT/TC: table: %d total entries, max occupancy %d (%d%%)", 1436 N_SECTORS * N_TTES_PER_SECTOR, 1437 N_SECTORS * N_TTES_PER_SECTOR_USABLE, 1438 SECTOR_TT_LIMIT_PERCENT ); 1439 } 1440 1441 VG_(debugLog)(2, "transtab", 1442 "cache: %d sectors of %d bytes each = %d total\n", 1443 N_SECTORS, 8 * tc_sector_szQ, 1444 N_SECTORS * 8 * tc_sector_szQ ); 1445 VG_(debugLog)(2, "transtab", 1446 "table: %d total entries, max occupancy %d (%d%%)\n", 1447 N_SECTORS * N_TTES_PER_SECTOR, 1448 N_SECTORS * N_TTES_PER_SECTOR_USABLE, 1449 SECTOR_TT_LIMIT_PERCENT ); 1450} 1451 1452 1453/*------------------------------------------------------------*/ 1454/*--- Printing out statistics. ---*/ 1455/*------------------------------------------------------------*/ 1456 1457static ULong safe_idiv( ULong a, ULong b ) 1458{ 1459 return (b == 0 ? 0 : a / b); 1460} 1461 1462UInt VG_(get_bbs_translated) ( void ) 1463{ 1464 return n_in_count; 1465} 1466 1467void VG_(print_tt_tc_stats) ( void ) 1468{ 1469 VG_(message)(Vg_DebugMsg, 1470 " tt/tc: %,llu tt lookups requiring %,llu probes", 1471 n_full_lookups, n_lookup_probes ); 1472 VG_(message)(Vg_DebugMsg, 1473 " tt/tc: %,llu fast-cache updates, %,llu flushes", 1474 n_fast_updates, n_fast_flushes ); 1475 1476 VG_(message)(Vg_DebugMsg, 1477 " transtab: new %,lld " 1478 "(%,llu -> %,llu; ratio %,llu:10) [%,llu scs]", 1479 n_in_count, n_in_osize, n_in_tsize, 1480 safe_idiv(10*n_in_tsize, n_in_osize), 1481 n_in_sc_count); 1482 VG_(message)(Vg_DebugMsg, 1483 " transtab: dumped %,llu (%,llu -> ?" "?)", 1484 n_dump_count, n_dump_osize ); 1485 VG_(message)(Vg_DebugMsg, 1486 " transtab: discarded %,llu (%,llu -> ?" "?)", 1487 n_disc_count, n_disc_osize ); 1488 1489 if (0) { 1490 Int i; 1491 VG_(printf)("\n"); 1492 for (i = 0; i < ECLASS_N; i++) { 1493 VG_(printf)(" %4d", sectors[0].ec2tte_used[i]); 1494 if (i % 16 == 15) 1495 VG_(printf)("\n"); 1496 } 1497 VG_(printf)("\n\n"); 1498 } 1499} 1500 1501/*------------------------------------------------------------*/ 1502/*--- Printing out of profiling results. ---*/ 1503/*------------------------------------------------------------*/ 1504 1505static ULong score ( TTEntry* tte ) 1506{ 1507 return ((ULong)tte->weight) * ((ULong)tte->count); 1508} 1509 1510ULong VG_(get_BB_profile) ( BBProfEntry tops[], UInt n_tops ) 1511{ 1512 Int sno, i, r, s; 1513 ULong score_total; 1514 1515 /* First, compute the total weighted count, and find the top N 1516 ttes. tops contains pointers to the most-used n_tops blocks, in 1517 descending order (viz, tops[0] is the highest scorer). */ 1518 for (i = 0; i < n_tops; i++) { 1519 tops[i].addr = 0; 1520 tops[i].score = 0; 1521 } 1522 1523 score_total = 0; 1524 1525 for (sno = 0; sno < N_SECTORS; sno++) { 1526 if (sectors[sno].tc == NULL) 1527 continue; 1528 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 1529 if (sectors[sno].tt[i].status != InUse) 1530 continue; 1531 score_total += score(§ors[sno].tt[i]); 1532 /* Find the rank for sectors[sno].tt[i]. */ 1533 r = n_tops-1; 1534 while (True) { 1535 if (r == -1) 1536 break; 1537 if (tops[r].addr == 0) { 1538 r--; 1539 continue; 1540 } 1541 if ( score(§ors[sno].tt[i]) > tops[r].score ) { 1542 r--; 1543 continue; 1544 } 1545 break; 1546 } 1547 r++; 1548 vg_assert(r >= 0 && r <= n_tops); 1549 /* This bb should be placed at r, and bbs above it shifted 1550 upwards one slot. */ 1551 if (r < n_tops) { 1552 for (s = n_tops-1; s > r; s--) 1553 tops[s] = tops[s-1]; 1554 tops[r].addr = sectors[sno].tt[i].entry; 1555 tops[r].score = score( §ors[sno].tt[i] ); 1556 } 1557 } 1558 } 1559 1560 return score_total; 1561} 1562 1563/*--------------------------------------------------------------------*/ 1564/*--- end ---*/ 1565/*--------------------------------------------------------------------*/ 1566