m_transtab.c revision 4d474d086188fd1f29fa97dbd84d8ea2e589a9b8
1 2/*--------------------------------------------------------------------*/ 3/*--- Management of the translation table and cache. ---*/ 4/*--- m_transtab.c ---*/ 5/*--------------------------------------------------------------------*/ 6 7/* 8 This file is part of Valgrind, a dynamic binary instrumentation 9 framework. 10 11 Copyright (C) 2000-2008 Julian Seward 12 jseward@acm.org 13 14 This program is free software; you can redistribute it and/or 15 modify it under the terms of the GNU General Public License as 16 published by the Free Software Foundation; either version 2 of the 17 License, or (at your option) any later version. 18 19 This program is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 General Public License for more details. 23 24 You should have received a copy of the GNU General Public License 25 along with this program; if not, write to the Free Software 26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 27 02111-1307, USA. 28 29 The GNU General Public License is contained in the file COPYING. 30*/ 31 32#include "pub_core_basics.h" 33#include "pub_core_debuglog.h" 34#include "pub_core_machine.h" // For VG(machine_get_VexArchInfo) 35#include "pub_core_libcbase.h" 36#include "pub_core_libcassert.h" 37#include "pub_core_libcprint.h" 38#include "pub_core_options.h" 39#include "pub_core_tooliface.h" // For VG_(details).avg_translation_sizeB 40#include "pub_core_transtab.h" 41#include "pub_core_aspacemgr.h" 42#include "pub_core_mallocfree.h" // VG_(out_of_memory_NORETURN) 43 44/* #define DEBUG_TRANSTAB */ 45 46 47/*-------------------------------------------------------------*/ 48/*--- Management of the FIFO-based translation table+cache. ---*/ 49/*-------------------------------------------------------------*/ 50 51/*------------------ CONSTANTS ------------------*/ 52 53/* Number of sectors the TC is divided into. If you need a larger 54 overall translation cache, increase this value. */ 55#define N_SECTORS 8 56 57/* Number of TC entries in each sector. This needs to be a prime 58 number to work properly, it must be <= 65535 (so that a TT index 59 fits in a UShort, leaving room for 0xFFFF(EC2TTE_DELETED) to denote 60 'deleted') and it is strongly recommended not to change this. 61 65521 is the largest prime <= 65535. */ 62#define N_TTES_PER_SECTOR /*30011*/ /*40009*/ 65521 63 64/* Because each sector contains a hash table of TTEntries, we need to 65 specify the maximum allowable loading, after which the sector is 66 deemed full. */ 67#define SECTOR_TT_LIMIT_PERCENT 80 68 69/* The sector is deemed full when this many entries are in it. */ 70#define N_TTES_PER_SECTOR_USABLE \ 71 ((N_TTES_PER_SECTOR * SECTOR_TT_LIMIT_PERCENT) / 100) 72 73/* Equivalence classes for fast address range deletion. There are 1 + 74 2^ECLASS_WIDTH bins. The highest one, ECLASS_MISC, describes an 75 address range which does not fall cleanly within any specific bin. 76 Note that ECLASS_SHIFT + ECLASS_WIDTH must be < 32. */ 77#define ECLASS_SHIFT 11 78#define ECLASS_WIDTH 8 79#define ECLASS_MISC (1 << ECLASS_WIDTH) 80#define ECLASS_N (1 + ECLASS_MISC) 81 82#define EC2TTE_DELETED 0xFFFF /* 16-bit special value */ 83 84 85/*------------------ TYPES ------------------*/ 86 87/* A translation-table entry. This indicates precisely which areas of 88 guest code are included in the translation, and contains all other 89 auxiliary info too. */ 90typedef 91 struct { 92 /* Profiling only: the count and weight (arbitrary meaning) for 93 this translation. Weight is a property of the translation 94 itself and computed once when the translation is created. 95 Count is an entry count for the translation and is 96 incremented by 1 every time the translation is used, if we 97 are profiling. */ 98 UInt count; 99 UShort weight; 100 101 /* Status of the slot. Note, we need to be able to do lazy 102 deletion, hence the Deleted state. */ 103 enum { InUse, Deleted, Empty } status; 104 105 /* 64-bit aligned pointer to one or more 64-bit words containing 106 the corresponding host code (must be in the same sector!) 107 This is a pointer into the sector's tc (code) area. */ 108 ULong* tcptr; 109 110 /* This is the original guest address that purportedly is the 111 entry point of the translation. You might think that .entry 112 should be the same as .vge->base[0], and most of the time it 113 is. However, when doing redirections, that is not the case. 114 .vge must always correctly describe the guest code sections 115 from which this translation was made. However, .entry may or 116 may not be a lie, depending on whether or not we're doing 117 redirection. */ 118 Addr64 entry; 119 120 /* This structure describes precisely what ranges of guest code 121 the translation covers, so we can decide whether or not to 122 delete it when translations of a given address range are 123 invalidated. */ 124 VexGuestExtents vge; 125 126 /* Address range summary info: these are pointers back to 127 eclass[] entries in the containing Sector. Those entries in 128 turn point back here -- the two structures are mutually 129 redundant but both necessary to make fast deletions work. 130 The eclass info is similar to, and derived from, this entry's 131 'vge' field, but it is not the same */ 132 UShort n_tte2ec; // # tte2ec pointers (1 to 3) 133 UShort tte2ec_ec[3]; // for each, the eclass # 134 UInt tte2ec_ix[3]; // and the index within the eclass. 135 // for i in 0 .. n_tte2ec-1 136 // sec->ec2tte[ tte2ec_ec[i] ][ tte2ec_ix[i] ] 137 // should be the index 138 // of this TTEntry in the containing Sector's tt array. 139 } 140 TTEntry; 141 142 143/* Finally, a sector itself. Each sector contains an array of 144 TCEntries, which hold code, and an array of TTEntries, containing 145 all required administrative info. Profiling is supported using the 146 TTEntry .count and .weight fields, if required. Each sector is 147 independent in that no cross-sector references are allowed. 148 149 If the sector is not in use, all three pointers are NULL and 150 tt_n_inuse is zero. 151*/ 152typedef 153 struct { 154 /* The TCEntry area. Size of this depends on the average 155 translation size. We try and size it so it becomes full 156 precisely when this sector's translation table (tt) reaches 157 its load limit (SECTOR_TT_LIMIT_PERCENT). */ 158 ULong* tc; 159 160 /* The TTEntry array. This is a fixed size, always containing 161 exactly N_TTES_PER_SECTOR entries. */ 162 TTEntry* tt; 163 164 /* This points to the current allocation point in tc. */ 165 ULong* tc_next; 166 167 /* The count of tt entries with state InUse. */ 168 Int tt_n_inuse; 169 170 /* Expandable arrays of tt indices for each of the ECLASS_N 171 address range equivalence classes. These hold indices into 172 the containing sector's tt array, which in turn should point 173 back here. */ 174 Int ec2tte_size[ECLASS_N]; 175 Int ec2tte_used[ECLASS_N]; 176 UShort* ec2tte[ECLASS_N]; 177 } 178 Sector; 179 180 181/*------------------ DECLS ------------------*/ 182 183/* The root data structure is an array of sectors. The index of the 184 youngest sector is recorded, and new translations are put into that 185 sector. When it fills up, we move along to the next sector and 186 start to fill that up, wrapping around at the end of the array. 187 That way, once all N_TC_SECTORS have been bought into use for the 188 first time, and are full, we then re-use the oldest sector, 189 endlessly. 190 191 When running, youngest sector should be between >= 0 and < 192 N_TC_SECTORS. The initial -1 value indicates the TT/TC system is 193 not yet initialised. 194*/ 195static Sector sectors[N_SECTORS]; 196static Int youngest_sector = -1; 197 198/* The number of ULongs in each TCEntry area. This is computed once 199 at startup and does not change. */ 200static Int tc_sector_szQ; 201 202 203/* Fast helper for the TC. A direct-mapped cache which holds a set of 204 recently used (guest address, host address) pairs. This array is 205 referred to directly from m_dispatch/dispatch-<platform>.S. 206 207 Entries in tt_fast may refer to any valid TC entry, regardless of 208 which sector it's in. Consequently we must be very careful to 209 invalidate this cache when TC entries are changed or disappear. 210 211 A special .guest address - TRANSTAB_BOGUS_GUEST_ADDR -- must be 212 pointed at to cause that cache entry to miss. This relies on the 213 assumption that no guest code actually has that address, hence a 214 value 0x1 seems good. m_translate gives the client a synthetic 215 segfault if it tries to execute at this address. 216*/ 217/* 218typedef 219 struct { 220 Addr guest; 221 Addr host; 222 } 223 FastCacheEntry; 224*/ 225/*global*/ __attribute__((aligned(16))) 226 FastCacheEntry VG_(tt_fast)[VG_TT_FAST_SIZE]; 227/* 228#define TRANSTAB_BOGUS_GUEST_ADDR ((Addr)1) 229*/ 230 231/* For profiling, we have a parallel array of pointers to .count 232 fields in TT entries. Again, these pointers must be invalidated 233 when translations disappear. A NULL pointer suffices to indicate 234 an unused slot. 235 236 When not profiling (the normal case, VG_(clo_profile_flags) == 0), 237 all tt_fastN entries are set to NULL at startup and never read nor 238 written after that. 239 240 When profiling (VG_(clo_profile_flags) > 0), tt_fast and tt_fastN 241 change together: if tt_fast[i].guest is TRANSTAB_BOGUS_GUEST_ADDR 242 then the corresponding tt_fastN[i] must be null. If 243 tt_fast[i].guest is any other value, then tt_fastN[i] *must* point 244 to the .count field of the corresponding TT entry. 245 246 tt_fast and tt_fastN are referred to from assembly code 247 (dispatch.S). 248*/ 249/*global*/ UInt* VG_(tt_fastN)[VG_TT_FAST_SIZE]; 250 251 252/* Make sure we're not used before initialisation. */ 253static Bool init_done = False; 254 255 256/*------------------ STATS DECLS ------------------*/ 257 258/* Number of fast-cache updates and flushes done. */ 259ULong n_fast_flushes = 0; 260ULong n_fast_updates = 0; 261 262/* Number of full lookups done. */ 263ULong n_full_lookups = 0; 264ULong n_lookup_probes = 0; 265 266/* Number/osize/tsize of translations entered; also the number of 267 those for which self-checking was requested. */ 268ULong n_in_count = 0; 269ULong n_in_osize = 0; 270ULong n_in_tsize = 0; 271ULong n_in_sc_count = 0; 272 273/* Number/osize of translations discarded due to lack of space. */ 274ULong n_dump_count = 0; 275ULong n_dump_osize = 0; 276 277/* Number/osize of translations discarded due to requests to do so. */ 278ULong n_disc_count = 0; 279ULong n_disc_osize = 0; 280 281 282/*-------------------------------------------------------------*/ 283/*--- Address-range equivalence class stuff ---*/ 284/*-------------------------------------------------------------*/ 285 286/* Return equivalence class number for a range. */ 287 288static Int range_to_eclass ( Addr64 start, UInt len ) 289{ 290 UInt mask = (1 << ECLASS_WIDTH) - 1; 291 UInt lo = (UInt)start; 292 UInt hi = lo + len - 1; 293 UInt loBits = (lo >> ECLASS_SHIFT) & mask; 294 UInt hiBits = (hi >> ECLASS_SHIFT) & mask; 295 if (loBits == hiBits) { 296 vg_assert(loBits < ECLASS_N-1); 297 return loBits; 298 } else { 299 return ECLASS_MISC; 300 } 301} 302 303 304/* Calculates the equivalence class numbers for any VexGuestExtent. 305 These are written in *eclasses, which must be big enough to hold 3 306 Ints. The number written, between 1 and 3, is returned. The 307 eclasses are presented in order, and any duplicates are removed. 308*/ 309 310static 311Int vexGuestExtents_to_eclasses ( /*OUT*/Int* eclasses, 312 VexGuestExtents* vge ) 313{ 314# define SWAP(_lv1,_lv2) \ 315 do { Int t = _lv1; _lv1 = _lv2; _lv2 = t; } while (0) 316 317 Int i, j, n_ec, r; 318 319 vg_assert(vge->n_used >= 1 && vge->n_used <= 3); 320 321 n_ec = 0; 322 for (i = 0; i < vge->n_used; i++) { 323 r = range_to_eclass( vge->base[i], (UInt)vge->len[i] ); 324 if (r == ECLASS_MISC) 325 goto bad; 326 /* only add if we haven't already seen it */ 327 for (j = 0; j < n_ec; j++) 328 if (eclasses[j] == r) 329 break; 330 if (j == n_ec) 331 eclasses[n_ec++] = r; 332 } 333 334 if (n_ec == 1) 335 return 1; 336 337 if (n_ec == 2) { 338 /* sort */ 339 if (eclasses[0] > eclasses[1]) 340 SWAP(eclasses[0], eclasses[1]); 341 return 2; 342 } 343 344 if (n_ec == 3) { 345 /* sort */ 346 if (eclasses[0] > eclasses[2]) 347 SWAP(eclasses[0], eclasses[2]); 348 if (eclasses[0] > eclasses[1]) 349 SWAP(eclasses[0], eclasses[1]); 350 if (eclasses[1] > eclasses[2]) 351 SWAP(eclasses[1], eclasses[2]); 352 return 3; 353 } 354 355 /* NOTREACHED */ 356 vg_assert(0); 357 358 bad: 359 eclasses[0] = ECLASS_MISC; 360 return 1; 361 362# undef SWAP 363} 364 365 366/* Add tteno to the set of entries listed for equivalence class ec in 367 this sector. Returns used location in eclass array. */ 368 369static 370UInt addEClassNo ( /*MOD*/Sector* sec, Int ec, UShort tteno ) 371{ 372 Int old_sz, new_sz, i, r; 373 UShort *old_ar, *new_ar; 374 375 vg_assert(ec >= 0 && ec < ECLASS_N); 376 vg_assert(tteno < N_TTES_PER_SECTOR); 377 378 if (0) VG_(printf)("ec %d gets %d\n", ec, (Int)tteno); 379 380 if (sec->ec2tte_used[ec] >= sec->ec2tte_size[ec]) { 381 382 vg_assert(sec->ec2tte_used[ec] == sec->ec2tte_size[ec]); 383 384 old_sz = sec->ec2tte_size[ec]; 385 old_ar = sec->ec2tte[ec]; 386 new_sz = old_sz==0 ? 8 : old_sz<64 ? 2*old_sz : (3*old_sz)/2; 387 new_ar = VG_(arena_malloc)(VG_AR_TTAUX, new_sz * sizeof(UShort)); 388 for (i = 0; i < old_sz; i++) 389 new_ar[i] = old_ar[i]; 390 if (old_ar) 391 VG_(arena_free)(VG_AR_TTAUX, old_ar); 392 sec->ec2tte_size[ec] = new_sz; 393 sec->ec2tte[ec] = new_ar; 394 395 if (0) VG_(printf)("expand ec %d to %d\n", ec, new_sz); 396 } 397 398 /* Common case */ 399 r = sec->ec2tte_used[ec]++; 400 vg_assert(r >= 0 && r < sec->ec2tte_size[ec]); 401 sec->ec2tte[ec][r] = tteno; 402 return (UInt)r; 403} 404 405 406/* 'vge' is being added to 'sec' at TT entry 'tteno'. Add appropriate 407 eclass entries to 'sec'. */ 408 409static 410void upd_eclasses_after_add ( /*MOD*/Sector* sec, Int tteno ) 411{ 412 Int i, r, eclasses[3]; 413 TTEntry* tte; 414 vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR); 415 416 tte = &sec->tt[tteno]; 417 r = vexGuestExtents_to_eclasses( eclasses, &tte->vge ); 418 419 vg_assert(r >= 1 && r <= 3); 420 tte->n_tte2ec = r; 421 422 for (i = 0; i < r; i++) { 423 tte->tte2ec_ec[i] = eclasses[i]; 424 tte->tte2ec_ix[i] = addEClassNo( sec, eclasses[i], (UShort)tteno ); 425 } 426} 427 428 429/* Check the eclass info in 'sec' to ensure it is consistent. Returns 430 True if OK, False if something's not right. Expensive. */ 431 432static Bool sanity_check_eclasses_in_sector ( Sector* sec ) 433{ 434# define BAD(_str) do { whassup = (_str); goto bad; } while (0) 435 436 HChar* whassup = NULL; 437 Int i, j, k, n, ec_num, ec_idx; 438 TTEntry* tte; 439 UShort tteno; 440 ULong* tce; 441 442 /* Basic checks on this sector */ 443 if (sec->tt_n_inuse < 0 || sec->tt_n_inuse > N_TTES_PER_SECTOR_USABLE) 444 BAD("invalid sec->tt_n_inuse"); 445 tce = sec->tc_next; 446 if (tce < &sec->tc[0] || tce > &sec->tc[tc_sector_szQ]) 447 BAD("sec->tc_next points outside tc"); 448 449 /* For each eclass ... */ 450 for (i = 0; i < ECLASS_N; i++) { 451 if (sec->ec2tte_size[i] == 0 && sec->ec2tte[i] != NULL) 452 BAD("ec2tte_size/ec2tte mismatch(1)"); 453 if (sec->ec2tte_size[i] != 0 && sec->ec2tte[i] == NULL) 454 BAD("ec2tte_size/ec2tte mismatch(2)"); 455 if (sec->ec2tte_used[i] < 0 456 || sec->ec2tte_used[i] > sec->ec2tte_size[i]) 457 BAD("implausible ec2tte_used"); 458 if (sec->ec2tte_used[i] == 0) 459 continue; 460 461 /* For each tt reference in each eclass .. ensure the reference 462 is to a valid tt entry, and that the entry's address ranges 463 really include this eclass. */ 464 465 for (j = 0; j < sec->ec2tte_used[i]; j++) { 466 tteno = sec->ec2tte[i][j]; 467 if (tteno == EC2TTE_DELETED) 468 continue; 469 if (tteno >= N_TTES_PER_SECTOR) 470 BAD("implausible tteno"); 471 tte = &sec->tt[tteno]; 472 if (tte->status != InUse) 473 BAD("tteno points to non-inuse tte"); 474 if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3) 475 BAD("tte->n_tte2ec out of range"); 476 /* Exactly least one of tte->eclasses[0 .. tte->n_eclasses-1] 477 must equal i. Inspect tte's eclass info. */ 478 n = 0; 479 for (k = 0; k < tte->n_tte2ec; k++) { 480 if (k < tte->n_tte2ec-1 481 && tte->tte2ec_ec[k] >= tte->tte2ec_ec[k+1]) 482 BAD("tte->tte2ec_ec[..] out of order"); 483 ec_num = tte->tte2ec_ec[k]; 484 if (ec_num < 0 || ec_num >= ECLASS_N) 485 BAD("tte->tte2ec_ec[..] out of range"); 486 if (ec_num != i) 487 continue; 488 ec_idx = tte->tte2ec_ix[k]; 489 if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[i]) 490 BAD("tte->tte2ec_ix[..] out of range"); 491 if (ec_idx == j) 492 n++; 493 } 494 if (n != 1) 495 BAD("tteno does not point back at eclass"); 496 } 497 } 498 499 /* That establishes that for each forward pointer from TTEntrys 500 there is a corresponding backward pointer from the eclass[] 501 arrays. However, it doesn't rule out the possibility of other, 502 bogus pointers in the eclass[] arrays. So do those similarly: 503 scan through them and check the TTEntryies they point at point 504 back. */ 505 506 for (i = 0; i < N_TTES_PER_SECTOR_USABLE; i++) { 507 508 tte = &sec->tt[i]; 509 if (tte->status == Empty || tte->status == Deleted) { 510 if (tte->n_tte2ec != 0) 511 BAD("tte->n_eclasses nonzero for unused tte"); 512 continue; 513 } 514 515 vg_assert(tte->status == InUse); 516 517 if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3) 518 BAD("tte->n_eclasses out of range(2)"); 519 520 for (j = 0; j < tte->n_tte2ec; j++) { 521 ec_num = tte->tte2ec_ec[j]; 522 if (ec_num < 0 || ec_num >= ECLASS_N) 523 BAD("tte->eclass[..] out of range"); 524 ec_idx = tte->tte2ec_ix[j]; 525 if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[ec_num]) 526 BAD("tte->ec_idx[..] out of range(2)"); 527 if (sec->ec2tte[ec_num][ec_idx] != i) 528 BAD("ec2tte does not point back to tte"); 529 } 530 } 531 532 return True; 533 534 bad: 535 if (whassup) 536 VG_(debugLog)(0, "transtab", "eclass sanity fail: %s\n", whassup); 537 538# if 0 539 VG_(printf)("eclass = %d\n", i); 540 VG_(printf)("tteno = %d\n", (Int)tteno); 541 switch (tte->status) { 542 case InUse: VG_(printf)("InUse\n"); break; 543 case Deleted: VG_(printf)("Deleted\n"); break; 544 case Empty: VG_(printf)("Empty\n"); break; 545 } 546 if (tte->status != Empty) { 547 for (k = 0; k < tte->vge.n_used; k++) 548 VG_(printf)("0x%llx %d\n", tte->vge.base[k], 549 (Int)tte->vge.len[k]); 550 } 551# endif 552 553 return False; 554 555# undef BAD 556} 557 558 559/* Sanity check absolutely everything. True == check passed. */ 560 561/* forwards */ 562static Bool sanity_check_redir_tt_tc ( void ); 563static Bool sanity_check_fastcache ( void ); 564 565static Bool sanity_check_all_sectors ( void ) 566{ 567 Int sno; 568 Bool sane; 569 Sector* sec; 570 for (sno = 0; sno < N_SECTORS; sno++) { 571 sec = §ors[sno]; 572 if (sec->tc == NULL) 573 continue; 574 sane = sanity_check_eclasses_in_sector( sec ); 575 if (!sane) 576 return False; 577 } 578 if ( !sanity_check_redir_tt_tc() ) 579 return False; 580 if ( !sanity_check_fastcache() ) 581 return False; 582 return True; 583} 584 585 586/*-------------------------------------------------------------*/ 587/*--- Add/find translations ---*/ 588/*-------------------------------------------------------------*/ 589 590static UInt vge_osize ( VexGuestExtents* vge ) 591{ 592 UInt i, n = 0; 593 for (i = 0; i < vge->n_used; i++) 594 n += (UInt)vge->len[i]; 595 return n; 596} 597 598static Bool isValidSector ( Int sector ) 599{ 600 if (sector < 0 || sector >= N_SECTORS) 601 return False; 602 return True; 603} 604 605static inline UInt HASH_TT ( Addr64 key ) 606{ 607 UInt kHi = (UInt)(key >> 32); 608 UInt kLo = (UInt)key; 609 UInt k32 = kHi ^ kLo; 610 UInt ror = 7; 611 if (ror > 0) 612 k32 = (k32 >> ror) | (k32 << (32-ror)); 613 return k32 % N_TTES_PER_SECTOR; 614} 615 616static void setFastCacheEntry ( Addr64 key, ULong* tcptr, UInt* count ) 617{ 618 UInt cno = (UInt)VG_TT_FAST_HASH(key); 619 VG_(tt_fast)[cno].guest = (Addr)key; 620 VG_(tt_fast)[cno].host = (Addr)tcptr; 621 if (VG_(clo_profile_flags) > 0) 622 VG_(tt_fastN)[cno] = count; 623 n_fast_updates++; 624 /* This shouldn't fail. It should be assured by m_translate 625 which should reject any attempt to make translation of code 626 starting at TRANSTAB_BOGUS_GUEST_ADDR. */ 627 vg_assert(VG_(tt_fast)[cno].guest != TRANSTAB_BOGUS_GUEST_ADDR); 628} 629 630/* Invalidate the fast cache's counter array, VG_(tt_fastN). */ 631static void invalidateFastNCache ( void ) 632{ 633 UInt j; 634 vg_assert(VG_TT_FAST_SIZE > 0 && (VG_TT_FAST_SIZE % 4) == 0); 635 for (j = 0; j < VG_TT_FAST_SIZE; j += 4) { 636 VG_(tt_fastN)[j+0] = NULL; 637 VG_(tt_fastN)[j+1] = NULL; 638 VG_(tt_fastN)[j+2] = NULL; 639 VG_(tt_fastN)[j+3] = NULL; 640 } 641 vg_assert(j == VG_TT_FAST_SIZE); 642} 643 644/* Invalidate the fast cache VG_(tt_fast). If profiling, also 645 invalidate the fast cache's counter array VG_(tt_fastN), otherwise 646 don't touch it. */ 647static void invalidateFastCache ( void ) 648{ 649 UInt j; 650 /* This loop is popular enough to make it worth unrolling a 651 bit, at least on ppc32. */ 652 vg_assert(VG_TT_FAST_SIZE > 0 && (VG_TT_FAST_SIZE % 4) == 0); 653 for (j = 0; j < VG_TT_FAST_SIZE; j += 4) { 654 VG_(tt_fast)[j+0].guest = TRANSTAB_BOGUS_GUEST_ADDR; 655 VG_(tt_fast)[j+1].guest = TRANSTAB_BOGUS_GUEST_ADDR; 656 VG_(tt_fast)[j+2].guest = TRANSTAB_BOGUS_GUEST_ADDR; 657 VG_(tt_fast)[j+3].guest = TRANSTAB_BOGUS_GUEST_ADDR; 658 } 659 660 if (VG_(clo_profile_flags) > 0) 661 invalidateFastNCache(); 662 663 vg_assert(j == VG_TT_FAST_SIZE); 664 n_fast_flushes++; 665} 666 667static Bool sanity_check_fastcache ( void ) 668{ 669 UInt j; 670 if (0) VG_(printf)("sanity check fastcache\n"); 671 if (VG_(clo_profile_flags) > 0) { 672 /* profiling */ 673 for (j = 0; j < VG_TT_FAST_SIZE; j++) { 674 if (VG_(tt_fastN)[j] == NULL 675 && VG_(tt_fast)[j].guest != TRANSTAB_BOGUS_GUEST_ADDR) 676 return False; 677 if (VG_(tt_fastN)[j] != NULL 678 && VG_(tt_fast)[j].guest == TRANSTAB_BOGUS_GUEST_ADDR) 679 return False; 680 } 681 } else { 682 /* not profiling */ 683 for (j = 0; j < VG_TT_FAST_SIZE; j++) { 684 if (VG_(tt_fastN)[j] != NULL) 685 return False; 686 } 687 } 688 return True; 689} 690 691static void initialiseSector ( Int sno ) 692{ 693 Int i; 694 SysRes sres; 695 Sector* sec; 696 vg_assert(isValidSector(sno)); 697 698 sec = §ors[sno]; 699 700 if (sec->tc == NULL) { 701 702 /* Sector has never been used before. Need to allocate tt and 703 tc. */ 704 vg_assert(sec->tt == NULL); 705 vg_assert(sec->tc_next == NULL); 706 vg_assert(sec->tt_n_inuse == 0); 707 for (i = 0; i < ECLASS_N; i++) { 708 vg_assert(sec->ec2tte_size[i] == 0); 709 vg_assert(sec->ec2tte_used[i] == 0); 710 vg_assert(sec->ec2tte[i] == NULL); 711 } 712 713 VG_(debugLog)(1,"transtab", "allocate sector %d\n", sno); 714 715 sres = VG_(am_mmap_anon_float_valgrind)( 8 * tc_sector_szQ ); 716 if (sres.isError) { 717 VG_(out_of_memory_NORETURN)("initialiseSector(TC)", 718 8 * tc_sector_szQ ); 719 /*NOTREACHED*/ 720 } 721 sec->tc = (ULong*)sres.res; 722 723 sres = VG_(am_mmap_anon_float_valgrind) 724 ( N_TTES_PER_SECTOR * sizeof(TTEntry) ); 725 if (sres.isError) { 726 VG_(out_of_memory_NORETURN)("initialiseSector(TT)", 727 N_TTES_PER_SECTOR * sizeof(TTEntry) ); 728 /*NOTREACHED*/ 729 } 730 sec->tt = (TTEntry*)sres.res; 731 732 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 733 sec->tt[i].status = Empty; 734 sec->tt[i].n_tte2ec = 0; 735 } 736 737 if (VG_(clo_verbosity) > 2) 738 VG_(message)(Vg_DebugMsg, "TT/TC: initialise sector %d", sno); 739 740 } else { 741 742 /* Sector has been used before. Dump the old contents. */ 743 VG_(debugLog)(1,"transtab", "recycle sector %d\n", sno); 744 vg_assert(sec->tt != NULL); 745 vg_assert(sec->tc_next != NULL); 746 n_dump_count += sec->tt_n_inuse; 747 748 /* Visit each just-about-to-be-abandoned translation. */ 749 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 750 if (sec->tt[i].status == InUse) { 751 vg_assert(sec->tt[i].n_tte2ec >= 1); 752 vg_assert(sec->tt[i].n_tte2ec <= 3); 753 n_dump_osize += vge_osize(&sec->tt[i].vge); 754 /* Tell the tool too. */ 755 if (VG_(needs).superblock_discards) { 756 VG_TDICT_CALL( tool_discard_superblock_info, 757 sec->tt[i].entry, 758 sec->tt[i].vge ); 759 } 760 } else { 761 vg_assert(sec->tt[i].n_tte2ec == 0); 762 } 763 sec->tt[i].status = Empty; 764 sec->tt[i].n_tte2ec = 0; 765 } 766 767 /* Free up the eclass structures. */ 768 for (i = 0; i < ECLASS_N; i++) { 769 if (sec->ec2tte_size[i] == 0) { 770 vg_assert(sec->ec2tte_used[i] == 0); 771 vg_assert(sec->ec2tte[i] == NULL); 772 } else { 773 vg_assert(sec->ec2tte[i] != NULL); 774 VG_(arena_free)(VG_AR_TTAUX, sec->ec2tte[i]); 775 sec->ec2tte[i] = NULL; 776 sec->ec2tte_size[i] = 0; 777 sec->ec2tte_used[i] = 0; 778 } 779 } 780 781 if (VG_(clo_verbosity) > 2) 782 VG_(message)(Vg_DebugMsg, "TT/TC: recycle sector %d", sno); 783 } 784 785 sec->tc_next = sec->tc; 786 sec->tt_n_inuse = 0; 787 788 invalidateFastCache(); 789} 790 791static void invalidate_icache ( void *ptr, Int nbytes ) 792{ 793# if defined(VGA_ppc32) || defined(VGA_ppc64) 794 Addr startaddr = (Addr) ptr; 795 Addr endaddr = startaddr + nbytes; 796 Addr cls; 797 Addr addr; 798 VexArchInfo vai; 799 800 if (nbytes == 0) return; 801 vg_assert(nbytes > 0); 802 803 VG_(machine_get_VexArchInfo)( NULL, &vai ); 804 cls = vai.ppc_cache_line_szB; 805 806 /* Stay sane .. */ 807 vg_assert(cls == 32 || cls == 128); 808 809 startaddr &= ~(cls - 1); 810 for (addr = startaddr; addr < endaddr; addr += cls) 811 asm volatile("dcbst 0,%0" : : "r" (addr)); 812 asm volatile("sync"); 813 for (addr = startaddr; addr < endaddr; addr += cls) 814 asm volatile("icbi 0,%0" : : "r" (addr)); 815 asm volatile("sync; isync"); 816 817# elif defined(VGA_x86) 818 /* no need to do anything, hardware provides coherence */ 819 820# elif defined(VGA_amd64) 821 /* no need to do anything, hardware provides coherence */ 822 823# else 824# error "Unknown ARCH" 825# endif 826} 827 828 829/* Add a translation of vge to TT/TC. The translation is temporarily 830 in code[0 .. code_len-1]. 831 832 pre: youngest_sector points to a valid (although possibly full) 833 sector. 834*/ 835void VG_(add_to_transtab)( VexGuestExtents* vge, 836 Addr64 entry, 837 AddrH code, 838 UInt code_len, 839 Bool is_self_checking ) 840{ 841 Int tcAvailQ, reqdQ, y, i; 842 ULong *tcptr, *tcptr2; 843 UChar* srcP; 844 UChar* dstP; 845 846 vg_assert(init_done); 847 vg_assert(vge->n_used >= 1 && vge->n_used <= 3); 848 849 /* 60000: should agree with N_TMPBUF in m_translate.c. */ 850 vg_assert(code_len > 0 && code_len < 60000); 851 852 if (0) 853 VG_(printf)("add_to_transtab(entry = 0x%llx, len = %d)\n", 854 entry, code_len); 855 856 n_in_count++; 857 n_in_tsize += code_len; 858 n_in_osize += vge_osize(vge); 859 if (is_self_checking) 860 n_in_sc_count++; 861 862 y = youngest_sector; 863 vg_assert(isValidSector(y)); 864 865 if (sectors[y].tc == NULL) 866 initialiseSector(y); 867 868 /* Try putting the translation in this sector. */ 869 reqdQ = (code_len + 7) >> 3; 870 871 /* Will it fit in tc? */ 872 tcAvailQ = ((ULong*)(§ors[y].tc[tc_sector_szQ])) 873 - ((ULong*)(sectors[y].tc_next)); 874 vg_assert(tcAvailQ >= 0); 875 vg_assert(tcAvailQ <= tc_sector_szQ); 876 877 if (tcAvailQ < reqdQ 878 || sectors[y].tt_n_inuse >= N_TTES_PER_SECTOR_USABLE) { 879 /* No. So move on to the next sector. Either it's never been 880 used before, in which case it will get its tt/tc allocated 881 now, or it has been used before, in which case it is set to be 882 empty, hence throwing out the oldest sector. */ 883 vg_assert(tc_sector_szQ > 0); 884 VG_(debugLog)(1,"transtab", 885 "declare sector %d full " 886 "(TT loading %2d%%, TC loading %2d%%)\n", 887 y, 888 (100 * sectors[y].tt_n_inuse) 889 / N_TTES_PER_SECTOR, 890 (100 * (tc_sector_szQ - tcAvailQ)) 891 / tc_sector_szQ); 892 youngest_sector++; 893 if (youngest_sector >= N_SECTORS) 894 youngest_sector = 0; 895 y = youngest_sector; 896 initialiseSector(y); 897 } 898 899 /* Be sure ... */ 900 tcAvailQ = ((ULong*)(§ors[y].tc[tc_sector_szQ])) 901 - ((ULong*)(sectors[y].tc_next)); 902 vg_assert(tcAvailQ >= 0); 903 vg_assert(tcAvailQ <= tc_sector_szQ); 904 vg_assert(tcAvailQ >= reqdQ); 905 vg_assert(sectors[y].tt_n_inuse < N_TTES_PER_SECTOR_USABLE); 906 vg_assert(sectors[y].tt_n_inuse >= 0); 907 908 /* Copy into tc. */ 909 tcptr = sectors[y].tc_next; 910 vg_assert(tcptr >= §ors[y].tc[0]); 911 vg_assert(tcptr <= §ors[y].tc[tc_sector_szQ]); 912 913 dstP = (UChar*)tcptr; 914 srcP = (UChar*)code; 915 for (i = 0; i < code_len; i++) 916 dstP[i] = srcP[i]; 917 sectors[y].tc_next += reqdQ; 918 sectors[y].tt_n_inuse++; 919 920 invalidate_icache( dstP, code_len ); 921 922 /* more paranoia */ 923 tcptr2 = sectors[y].tc_next; 924 vg_assert(tcptr2 >= §ors[y].tc[0]); 925 vg_assert(tcptr2 <= §ors[y].tc[tc_sector_szQ]); 926 927 /* Find an empty tt slot, and use it. There must be such a slot 928 since tt is never allowed to get completely full. */ 929 i = HASH_TT(entry); 930 vg_assert(i >= 0 && i < N_TTES_PER_SECTOR); 931 while (True) { 932 if (sectors[y].tt[i].status == Empty 933 || sectors[y].tt[i].status == Deleted) 934 break; 935 i++; 936 if (i >= N_TTES_PER_SECTOR) 937 i = 0; 938 } 939 940 sectors[y].tt[i].status = InUse; 941 sectors[y].tt[i].tcptr = tcptr; 942 sectors[y].tt[i].count = 0; 943 sectors[y].tt[i].weight = 1; 944 sectors[y].tt[i].vge = *vge; 945 sectors[y].tt[i].entry = entry; 946 947 /* Update the fast-cache. */ 948 setFastCacheEntry( entry, tcptr, §ors[y].tt[i].count ); 949 950 /* Note the eclass numbers for this translation. */ 951 upd_eclasses_after_add( §ors[y], i ); 952} 953 954 955/* Search for the translation of the given guest address. If 956 requested, a successful search can also cause the fast-caches to be 957 updated. 958*/ 959Bool VG_(search_transtab) ( /*OUT*/AddrH* result, 960 Addr64 guest_addr, 961 Bool upd_cache ) 962{ 963 Int i, j, k, kstart, sno; 964 965 vg_assert(init_done); 966 /* Find the initial probe point just once. It will be the same in 967 all sectors and avoids multiple expensive % operations. */ 968 n_full_lookups++; 969 k = -1; 970 kstart = HASH_TT(guest_addr); 971 vg_assert(kstart >= 0 && kstart < N_TTES_PER_SECTOR); 972 973 /* Search in all the sectors. Although the order should not matter, 974 it might be most efficient to search in the order youngest to 975 oldest. */ 976 sno = youngest_sector; 977 for (i = 0; i < N_SECTORS; i++) { 978 979 if (sectors[sno].tc == NULL) 980 goto notfound; /* sector not in use. */ 981 982 k = kstart; 983 for (j = 0; j < N_TTES_PER_SECTOR; j++) { 984 n_lookup_probes++; 985 if (sectors[sno].tt[k].status == InUse 986 && sectors[sno].tt[k].entry == guest_addr) { 987 /* found it */ 988 if (upd_cache) 989 setFastCacheEntry( 990 guest_addr, sectors[sno].tt[k].tcptr, 991 §ors[sno].tt[k].count ); 992 if (result) 993 *result = (AddrH)sectors[sno].tt[k].tcptr; 994 return True; 995 } 996 if (sectors[sno].tt[k].status == Empty) 997 break; /* not found in this sector */ 998 k++; 999 if (k == N_TTES_PER_SECTOR) 1000 k = 0; 1001 } 1002 1003 /* If we fall off the end, all entries are InUse and not 1004 matching, or Deleted. In any case we did not find it in this 1005 sector. */ 1006 1007 notfound: 1008 /* move to the next oldest sector */ 1009 sno = sno==0 ? (N_SECTORS-1) : (sno-1); 1010 } 1011 1012 /* Not found in any sector. */ 1013 return False; 1014} 1015 1016 1017/*-------------------------------------------------------------*/ 1018/*--- Delete translations. ---*/ 1019/*-------------------------------------------------------------*/ 1020 1021/* forward */ 1022static void unredir_discard_translations( Addr64, ULong ); 1023 1024/* Stuff for deleting translations which intersect with a given 1025 address range. Unfortunately, to make this run at a reasonable 1026 speed, it is complex. */ 1027 1028static inline 1029Bool overlap1 ( Addr64 s1, ULong r1, Addr64 s2, ULong r2 ) 1030{ 1031 Addr64 e1 = s1 + r1 - 1ULL; 1032 Addr64 e2 = s2 + r2 - 1ULL; 1033 if (e1 < s2 || e2 < s1) 1034 return False; 1035 return True; 1036} 1037 1038static inline 1039Bool overlaps ( Addr64 start, ULong range, VexGuestExtents* vge ) 1040{ 1041 if (overlap1(start, range, vge->base[0], (UInt)vge->len[0])) 1042 return True; 1043 if (vge->n_used < 2) 1044 return False; 1045 if (overlap1(start, range, vge->base[1], (UInt)vge->len[1])) 1046 return True; 1047 if (vge->n_used < 3) 1048 return False; 1049 if (overlap1(start, range, vge->base[2], (UInt)vge->len[2])) 1050 return True; 1051 return False; 1052} 1053 1054 1055/* Delete a tt entry, and update all the eclass data accordingly. */ 1056 1057static void delete_tte ( /*MOD*/Sector* sec, Int tteno ) 1058{ 1059 Int i, ec_num, ec_idx; 1060 TTEntry* tte; 1061 1062 vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR); 1063 tte = &sec->tt[tteno]; 1064 vg_assert(tte->status == InUse); 1065 vg_assert(tte->n_tte2ec >= 1 && tte->n_tte2ec <= 3); 1066 1067 /* Deal with the ec-to-tte links first. */ 1068 for (i = 0; i < tte->n_tte2ec; i++) { 1069 ec_num = (Int)tte->tte2ec_ec[i]; 1070 ec_idx = tte->tte2ec_ix[i]; 1071 vg_assert(ec_num >= 0 && ec_num < ECLASS_N); 1072 vg_assert(ec_idx >= 0); 1073 vg_assert(ec_idx < sec->ec2tte_used[ec_num]); 1074 /* Assert that the two links point at each other. */ 1075 vg_assert(sec->ec2tte[ec_num][ec_idx] == (UShort)tteno); 1076 /* "delete" the pointer back to here. */ 1077 sec->ec2tte[ec_num][ec_idx] = EC2TTE_DELETED; 1078 } 1079 1080 /* Now fix up this TTEntry. */ 1081 tte->status = Deleted; 1082 tte->n_tte2ec = 0; 1083 1084 /* Stats .. */ 1085 sec->tt_n_inuse--; 1086 n_disc_count++; 1087 n_disc_osize += vge_osize(&tte->vge); 1088 1089 /* Tell the tool too. */ 1090 if (VG_(needs).superblock_discards) { 1091 VG_TDICT_CALL( tool_discard_superblock_info, 1092 tte->entry, 1093 tte->vge ); 1094 } 1095} 1096 1097 1098/* Delete translations from sec which intersect specified range, but 1099 only consider translations in the specified eclass. */ 1100 1101static 1102Bool delete_translations_in_sector_eclass ( /*MOD*/Sector* sec, 1103 Addr64 guest_start, ULong range, 1104 Int ec ) 1105{ 1106 Int i; 1107 UShort tteno; 1108 Bool anyDeld = False; 1109 TTEntry* tte; 1110 1111 vg_assert(ec >= 0 && ec < ECLASS_N); 1112 1113 for (i = 0; i < sec->ec2tte_used[ec]; i++) { 1114 1115 tteno = sec->ec2tte[ec][i]; 1116 if (tteno == EC2TTE_DELETED) { 1117 /* already deleted */ 1118 continue; 1119 } 1120 1121 vg_assert(tteno < N_TTES_PER_SECTOR); 1122 1123 tte = &sec->tt[tteno]; 1124 vg_assert(tte->status == InUse); 1125 1126 if (overlaps( guest_start, range, &tte->vge )) { 1127 anyDeld = True; 1128 delete_tte( sec, (Int)tteno ); 1129 } 1130 1131 } 1132 1133 return anyDeld; 1134} 1135 1136 1137/* Delete translations from sec which intersect specified range, the 1138 slow way, by inspecting all translations in sec. */ 1139 1140static 1141Bool delete_translations_in_sector ( /*MOD*/Sector* sec, 1142 Addr64 guest_start, ULong range ) 1143{ 1144 Int i; 1145 Bool anyDeld = False; 1146 1147 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 1148 if (sec->tt[i].status == InUse 1149 && overlaps( guest_start, range, &sec->tt[i].vge )) { 1150 anyDeld = True; 1151 delete_tte( sec, i ); 1152 } 1153 } 1154 1155 return anyDeld; 1156} 1157 1158 1159void VG_(discard_translations) ( Addr64 guest_start, ULong range, 1160 HChar* who ) 1161{ 1162 Sector* sec; 1163 Int sno, ec; 1164 Bool anyDeleted = False; 1165 1166 vg_assert(init_done); 1167 1168 VG_(debugLog)(2, "transtab", 1169 "discard_translations(0x%llx, %lld) req by %s\n", 1170 guest_start, range, who ); 1171 1172 /* Pre-deletion sanity check */ 1173 if (VG_(clo_sanity_level >= 4)) { 1174 Bool sane = sanity_check_all_sectors(); 1175 vg_assert(sane); 1176 } 1177 1178 if (range == 0) 1179 return; 1180 1181 /* There are two different ways to do this. 1182 1183 If the range fits within a single address-range equivalence 1184 class, as will be the case for a cache line sized invalidation, 1185 then we only have to inspect the set of translations listed in 1186 that equivalence class, and also in the "sin-bin" equivalence 1187 class ECLASS_MISC. 1188 1189 Otherwise, the invalidation is of a larger range and probably 1190 results from munmap. In this case it's (probably!) faster just 1191 to inspect all translations, dump those we don't want, and 1192 regenerate the equivalence class information (since modifying it 1193 in-situ is even more expensive). 1194 */ 1195 1196 /* First off, figure out if the range falls within a single class, 1197 and if so which one. */ 1198 1199 ec = ECLASS_MISC; 1200 if (range < (1ULL << ECLASS_SHIFT)) 1201 ec = range_to_eclass( guest_start, (UInt)range ); 1202 1203 /* if ec is ECLASS_MISC then we aren't looking at just a single 1204 class, so use the slow scheme. Else use the fast scheme, 1205 examining 'ec' and ECLASS_MISC. */ 1206 1207 if (ec != ECLASS_MISC) { 1208 1209 VG_(debugLog)(2, "transtab", 1210 " FAST, ec = %d\n", ec); 1211 1212 /* Fast scheme */ 1213 vg_assert(ec >= 0 && ec < ECLASS_MISC); 1214 1215 for (sno = 0; sno < N_SECTORS; sno++) { 1216 sec = §ors[sno]; 1217 if (sec->tc == NULL) 1218 continue; 1219 anyDeleted |= delete_translations_in_sector_eclass( 1220 sec, guest_start, range, ec ); 1221 anyDeleted |= delete_translations_in_sector_eclass( 1222 sec, guest_start, range, ECLASS_MISC ); 1223 } 1224 1225 } else { 1226 1227 /* slow scheme */ 1228 1229 VG_(debugLog)(2, "transtab", 1230 " SLOW, ec = %d\n", ec); 1231 1232 for (sno = 0; sno < N_SECTORS; sno++) { 1233 sec = §ors[sno]; 1234 if (sec->tc == NULL) 1235 continue; 1236 anyDeleted |= delete_translations_in_sector( 1237 sec, guest_start, range ); 1238 } 1239 1240 } 1241 1242 if (anyDeleted) 1243 invalidateFastCache(); 1244 1245 /* don't forget the no-redir cache */ 1246 unredir_discard_translations( guest_start, range ); 1247 1248 /* Post-deletion sanity check */ 1249 if (VG_(clo_sanity_level >= 4)) { 1250 Int i; 1251 TTEntry* tte; 1252 Bool sane = sanity_check_all_sectors(); 1253 vg_assert(sane); 1254 /* But now, also check the requested address range isn't 1255 present anywhere. */ 1256 for (sno = 0; sno < N_SECTORS; sno++) { 1257 sec = §ors[sno]; 1258 if (sec->tc == NULL) 1259 continue; 1260 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 1261 tte = &sec->tt[i]; 1262 if (tte->status != InUse) 1263 continue; 1264 vg_assert(!overlaps( guest_start, range, &tte->vge )); 1265 } 1266 } 1267 } 1268} 1269 1270 1271/*------------------------------------------------------------*/ 1272/*--- AUXILIARY: the unredirected TT/TC ---*/ 1273/*------------------------------------------------------------*/ 1274 1275/* A very simple translation cache which holds a small number of 1276 unredirected translations. This is completely independent of the 1277 main tt/tc structures. When unredir_tc or unredir_tt becomes full, 1278 both structures are simply dumped and we start over. 1279 1280 Since these translations are unredirected, the search key is (by 1281 definition) the first address entry in the .vge field. */ 1282 1283/* Sized to hold 500 translations of average size 1000 bytes. */ 1284 1285#define UNREDIR_SZB 1000 1286 1287#define N_UNREDIR_TT 500 1288#define N_UNREDIR_TCQ (N_UNREDIR_TT * UNREDIR_SZB / sizeof(ULong)) 1289 1290typedef 1291 struct { 1292 VexGuestExtents vge; 1293 Addr hcode; 1294 Bool inUse; 1295 } 1296 UTCEntry; 1297 1298/* We just allocate forwards in _tc, never deleting. */ 1299static ULong *unredir_tc; 1300static Int unredir_tc_used = N_UNREDIR_TCQ; 1301 1302/* Slots in _tt can come into use and out again (.inUse). 1303 Nevertheless _tt_highwater is maintained so that invalidations 1304 don't have to scan all the slots when only a few are in use. 1305 _tt_highwater holds the index of the highest ever allocated 1306 slot. */ 1307static UTCEntry unredir_tt[N_UNREDIR_TT]; 1308static Int unredir_tt_highwater; 1309 1310 1311static void init_unredir_tt_tc ( void ) 1312{ 1313 Int i; 1314 if (unredir_tc == NULL) { 1315 SysRes sres = VG_(am_mmap_anon_float_valgrind)( N_UNREDIR_TT * UNREDIR_SZB ); 1316 if (sres.isError) { 1317 VG_(out_of_memory_NORETURN)("init_unredir_tt_tc", N_UNREDIR_TT * UNREDIR_SZB); 1318 /*NOTREACHED*/ 1319 } 1320 unredir_tc = (ULong *)sres.res; 1321 } 1322 unredir_tc_used = 0; 1323 for (i = 0; i < N_UNREDIR_TT; i++) 1324 unredir_tt[i].inUse = False; 1325 unredir_tt_highwater = -1; 1326} 1327 1328/* Do a sanity check; return False on failure. */ 1329static Bool sanity_check_redir_tt_tc ( void ) 1330{ 1331 Int i; 1332 if (unredir_tt_highwater < -1) return False; 1333 if (unredir_tt_highwater >= N_UNREDIR_TT) return False; 1334 1335 for (i = unredir_tt_highwater+1; i < N_UNREDIR_TT; i++) 1336 if (unredir_tt[i].inUse) 1337 return False; 1338 1339 if (unredir_tc_used < 0) return False; 1340 if (unredir_tc_used > N_UNREDIR_TCQ) return False; 1341 1342 return True; 1343} 1344 1345 1346/* Add an UNREDIRECTED translation of vge to TT/TC. The translation 1347 is temporarily in code[0 .. code_len-1]. 1348*/ 1349void VG_(add_to_unredir_transtab)( VexGuestExtents* vge, 1350 Addr64 entry, 1351 AddrH code, 1352 UInt code_len, 1353 Bool is_self_checking ) 1354{ 1355 Int i, j, code_szQ; 1356 HChar *srcP, *dstP; 1357 1358 vg_assert(sanity_check_redir_tt_tc()); 1359 1360 /* This is the whole point: it's not redirected! */ 1361 vg_assert(entry == vge->base[0]); 1362 1363 /* How many unredir_tt slots are needed */ 1364 code_szQ = (code_len + 7) / 8; 1365 1366 /* Look for an empty unredir_tc slot */ 1367 for (i = 0; i < N_UNREDIR_TT; i++) 1368 if (!unredir_tt[i].inUse) 1369 break; 1370 1371 if (i >= N_UNREDIR_TT || code_szQ > (N_UNREDIR_TCQ - unredir_tc_used)) { 1372 /* It's full; dump everything we currently have */ 1373 init_unredir_tt_tc(); 1374 i = 0; 1375 } 1376 1377 vg_assert(unredir_tc_used >= 0); 1378 vg_assert(unredir_tc_used <= N_UNREDIR_TCQ); 1379 vg_assert(code_szQ > 0); 1380 vg_assert(code_szQ + unredir_tc_used <= N_UNREDIR_TCQ); 1381 vg_assert(i >= 0 && i < N_UNREDIR_TT); 1382 vg_assert(unredir_tt[i].inUse == False); 1383 1384 if (i > unredir_tt_highwater) 1385 unredir_tt_highwater = i; 1386 1387 dstP = (HChar*)&unredir_tc[unredir_tc_used]; 1388 srcP = (HChar*)code; 1389 for (j = 0; j < code_len; j++) 1390 dstP[j] = srcP[j]; 1391 1392 invalidate_icache( dstP, code_len ); 1393 1394 unredir_tt[i].inUse = True; 1395 unredir_tt[i].vge = *vge; 1396 unredir_tt[i].hcode = (Addr)dstP; 1397 1398 unredir_tc_used += code_szQ; 1399 vg_assert(unredir_tc_used >= 0); 1400 vg_assert(unredir_tc_used <= N_UNREDIR_TCQ); 1401 1402 vg_assert(&dstP[code_len] <= (HChar*)&unredir_tc[unredir_tc_used]); 1403} 1404 1405Bool VG_(search_unredir_transtab) ( /*OUT*/AddrH* result, 1406 Addr64 guest_addr ) 1407{ 1408 Int i; 1409 for (i = 0; i < N_UNREDIR_TT; i++) { 1410 if (!unredir_tt[i].inUse) 1411 continue; 1412 if (unredir_tt[i].vge.base[0] == guest_addr) { 1413 *result = (AddrH)unredir_tt[i].hcode; 1414 return True; 1415 } 1416 } 1417 return False; 1418} 1419 1420static void unredir_discard_translations( Addr64 guest_start, ULong range ) 1421{ 1422 Int i; 1423 1424 vg_assert(sanity_check_redir_tt_tc()); 1425 1426 for (i = 0; i <= unredir_tt_highwater; i++) { 1427 if (unredir_tt[i].inUse 1428 && overlaps( guest_start, range, &unredir_tt[i].vge)) 1429 unredir_tt[i].inUse = False; 1430 } 1431} 1432 1433 1434/*------------------------------------------------------------*/ 1435/*--- Initialisation. ---*/ 1436/*------------------------------------------------------------*/ 1437 1438void VG_(init_tt_tc) ( void ) 1439{ 1440 Int i, j, avg_codeszQ; 1441 1442 vg_assert(!init_done); 1443 init_done = True; 1444 1445 /* Otherwise lots of things go wrong... */ 1446 vg_assert(sizeof(ULong) == 8); 1447 vg_assert(sizeof(Addr64) == 8); 1448 /* check fast cache entries really are 2 words long */ 1449 vg_assert(sizeof(Addr) == sizeof(void*)); 1450 vg_assert(sizeof(FastCacheEntry) == 2 * sizeof(Addr)); 1451 /* check fast cache entries are packed back-to-back with no spaces */ 1452 vg_assert(sizeof( VG_(tt_fast) ) == VG_TT_FAST_SIZE * sizeof(FastCacheEntry)); 1453 /* check fast cache is aligned as we requested. Not fatal if it 1454 isn't, but we might as well make sure. */ 1455 vg_assert(VG_IS_16_ALIGNED( ((Addr) & VG_(tt_fast)[0]) )); 1456 1457 if (VG_(clo_verbosity) > 2) 1458 VG_(message)(Vg_DebugMsg, 1459 "TT/TC: VG_(init_tt_tc) " 1460 "(startup of code management)"); 1461 1462 /* Figure out how big each tc area should be. */ 1463 avg_codeszQ = (VG_(details).avg_translation_sizeB + 7) / 8; 1464 tc_sector_szQ = N_TTES_PER_SECTOR_USABLE * (1 + avg_codeszQ); 1465 1466 /* Ensure the calculated value is not way crazy. */ 1467 vg_assert(tc_sector_szQ >= 2 * N_TTES_PER_SECTOR_USABLE); 1468 vg_assert(tc_sector_szQ <= 80 * N_TTES_PER_SECTOR_USABLE); 1469 1470 /* Initialise the sectors */ 1471 youngest_sector = 0; 1472 for (i = 0; i < N_SECTORS; i++) { 1473 sectors[i].tc = NULL; 1474 sectors[i].tt = NULL; 1475 sectors[i].tc_next = NULL; 1476 sectors[i].tt_n_inuse = 0; 1477 for (j = 0; j < ECLASS_N; j++) { 1478 sectors[i].ec2tte_size[j] = 0; 1479 sectors[i].ec2tte_used[j] = 0; 1480 sectors[i].ec2tte[j] = NULL; 1481 } 1482 } 1483 1484 /* Initialise the fast caches. If not profiling (the usual case), 1485 we have to explicitly invalidate the fastN cache as 1486 invalidateFastCache() won't do that for us. */ 1487 invalidateFastCache(); 1488 if (VG_(clo_profile_flags) == 0) 1489 invalidateFastNCache(); 1490 1491 /* and the unredir tt/tc */ 1492 init_unredir_tt_tc(); 1493 1494 if (VG_(clo_verbosity) > 2) { 1495 VG_(message)(Vg_DebugMsg, 1496 "TT/TC: cache: %d sectors of %d bytes each = %d total", 1497 N_SECTORS, 8 * tc_sector_szQ, 1498 N_SECTORS * 8 * tc_sector_szQ ); 1499 VG_(message)(Vg_DebugMsg, 1500 "TT/TC: table: %d total entries, max occupancy %d (%d%%)", 1501 N_SECTORS * N_TTES_PER_SECTOR, 1502 N_SECTORS * N_TTES_PER_SECTOR_USABLE, 1503 SECTOR_TT_LIMIT_PERCENT ); 1504 } 1505 1506 VG_(debugLog)(2, "transtab", 1507 "cache: %d sectors of %d bytes each = %d total\n", 1508 N_SECTORS, 8 * tc_sector_szQ, 1509 N_SECTORS * 8 * tc_sector_szQ ); 1510 VG_(debugLog)(2, "transtab", 1511 "table: %d total entries, max occupancy %d (%d%%)\n", 1512 N_SECTORS * N_TTES_PER_SECTOR, 1513 N_SECTORS * N_TTES_PER_SECTOR_USABLE, 1514 SECTOR_TT_LIMIT_PERCENT ); 1515} 1516 1517 1518/*------------------------------------------------------------*/ 1519/*--- Printing out statistics. ---*/ 1520/*------------------------------------------------------------*/ 1521 1522static ULong safe_idiv( ULong a, ULong b ) 1523{ 1524 return (b == 0 ? 0 : a / b); 1525} 1526 1527UInt VG_(get_bbs_translated) ( void ) 1528{ 1529 return n_in_count; 1530} 1531 1532void VG_(print_tt_tc_stats) ( void ) 1533{ 1534 VG_(message)(Vg_DebugMsg, 1535 " tt/tc: %,llu tt lookups requiring %,llu probes", 1536 n_full_lookups, n_lookup_probes ); 1537 VG_(message)(Vg_DebugMsg, 1538 " tt/tc: %,llu fast-cache updates, %,llu flushes", 1539 n_fast_updates, n_fast_flushes ); 1540 1541 VG_(message)(Vg_DebugMsg, 1542 " transtab: new %,lld " 1543 "(%,llu -> %,llu; ratio %,llu:10) [%,llu scs]", 1544 n_in_count, n_in_osize, n_in_tsize, 1545 safe_idiv(10*n_in_tsize, n_in_osize), 1546 n_in_sc_count); 1547 VG_(message)(Vg_DebugMsg, 1548 " transtab: dumped %,llu (%,llu -> ?" "?)", 1549 n_dump_count, n_dump_osize ); 1550 VG_(message)(Vg_DebugMsg, 1551 " transtab: discarded %,llu (%,llu -> ?" "?)", 1552 n_disc_count, n_disc_osize ); 1553 1554 if (0) { 1555 Int i; 1556 VG_(printf)("\n"); 1557 for (i = 0; i < ECLASS_N; i++) { 1558 VG_(printf)(" %4d", sectors[0].ec2tte_used[i]); 1559 if (i % 16 == 15) 1560 VG_(printf)("\n"); 1561 } 1562 VG_(printf)("\n\n"); 1563 } 1564} 1565 1566/*------------------------------------------------------------*/ 1567/*--- Printing out of profiling results. ---*/ 1568/*------------------------------------------------------------*/ 1569 1570static ULong score ( TTEntry* tte ) 1571{ 1572 return ((ULong)tte->weight) * ((ULong)tte->count); 1573} 1574 1575ULong VG_(get_BB_profile) ( BBProfEntry tops[], UInt n_tops ) 1576{ 1577 Int sno, i, r, s; 1578 ULong score_total; 1579 1580 /* First, compute the total weighted count, and find the top N 1581 ttes. tops contains pointers to the most-used n_tops blocks, in 1582 descending order (viz, tops[0] is the highest scorer). */ 1583 for (i = 0; i < n_tops; i++) { 1584 tops[i].addr = 0; 1585 tops[i].score = 0; 1586 } 1587 1588 score_total = 0; 1589 1590 for (sno = 0; sno < N_SECTORS; sno++) { 1591 if (sectors[sno].tc == NULL) 1592 continue; 1593 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 1594 if (sectors[sno].tt[i].status != InUse) 1595 continue; 1596 score_total += score(§ors[sno].tt[i]); 1597 /* Find the rank for sectors[sno].tt[i]. */ 1598 r = n_tops-1; 1599 while (True) { 1600 if (r == -1) 1601 break; 1602 if (tops[r].addr == 0) { 1603 r--; 1604 continue; 1605 } 1606 if ( score(§ors[sno].tt[i]) > tops[r].score ) { 1607 r--; 1608 continue; 1609 } 1610 break; 1611 } 1612 r++; 1613 vg_assert(r >= 0 && r <= n_tops); 1614 /* This bb should be placed at r, and bbs above it shifted 1615 upwards one slot. */ 1616 if (r < n_tops) { 1617 for (s = n_tops-1; s > r; s--) 1618 tops[s] = tops[s-1]; 1619 tops[r].addr = sectors[sno].tt[i].entry; 1620 tops[r].score = score( §ors[sno].tt[i] ); 1621 } 1622 } 1623 } 1624 1625 return score_total; 1626} 1627 1628/*--------------------------------------------------------------------*/ 1629/*--- end ---*/ 1630/*--------------------------------------------------------------------*/ 1631