m_transtab.c revision 9f207460d70d38c46c9e81996a3dcdf90961c6db
1 2/*--------------------------------------------------------------------*/ 3/*--- Management of the translation table and cache. ---*/ 4/*--- m_transtab.c ---*/ 5/*--------------------------------------------------------------------*/ 6 7/* 8 This file is part of Valgrind, a dynamic binary instrumentation 9 framework. 10 11 Copyright (C) 2000-2009 Julian Seward 12 jseward@acm.org 13 14 This program is free software; you can redistribute it and/or 15 modify it under the terms of the GNU General Public License as 16 published by the Free Software Foundation; either version 2 of the 17 License, or (at your option) any later version. 18 19 This program is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 General Public License for more details. 23 24 You should have received a copy of the GNU General Public License 25 along with this program; if not, write to the Free Software 26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 27 02111-1307, USA. 28 29 The GNU General Public License is contained in the file COPYING. 30*/ 31 32#include "pub_core_basics.h" 33#include "pub_core_debuglog.h" 34#include "pub_core_machine.h" // For VG(machine_get_VexArchInfo) 35#include "pub_core_libcbase.h" 36#include "pub_core_libcassert.h" 37#include "pub_core_libcprint.h" 38#include "pub_core_options.h" 39#include "pub_core_tooliface.h" // For VG_(details).avg_translation_sizeB 40#include "pub_core_transtab.h" 41#include "pub_core_aspacemgr.h" 42#include "pub_core_mallocfree.h" // VG_(out_of_memory_NORETURN) 43 44/* #define DEBUG_TRANSTAB */ 45 46 47/*-------------------------------------------------------------*/ 48/*--- Management of the FIFO-based translation table+cache. ---*/ 49/*-------------------------------------------------------------*/ 50 51/*------------------ CONSTANTS ------------------*/ 52 53/* Number of sectors the TC is divided into. If you need a larger 54 overall translation cache, increase this value. */ 55#define N_SECTORS 8 56 57/* Number of TC entries in each sector. This needs to be a prime 58 number to work properly, it must be <= 65535 (so that a TT index 59 fits in a UShort, leaving room for 0xFFFF(EC2TTE_DELETED) to denote 60 'deleted') and it is strongly recommended not to change this. 61 65521 is the largest prime <= 65535. */ 62#define N_TTES_PER_SECTOR /*30011*/ /*40009*/ 65521 63 64/* Because each sector contains a hash table of TTEntries, we need to 65 specify the maximum allowable loading, after which the sector is 66 deemed full. */ 67#define SECTOR_TT_LIMIT_PERCENT 80 68 69/* The sector is deemed full when this many entries are in it. */ 70#define N_TTES_PER_SECTOR_USABLE \ 71 ((N_TTES_PER_SECTOR * SECTOR_TT_LIMIT_PERCENT) / 100) 72 73/* Equivalence classes for fast address range deletion. There are 1 + 74 2^ECLASS_WIDTH bins. The highest one, ECLASS_MISC, describes an 75 address range which does not fall cleanly within any specific bin. 76 Note that ECLASS_SHIFT + ECLASS_WIDTH must be < 32. */ 77#define ECLASS_SHIFT 11 78#define ECLASS_WIDTH 8 79#define ECLASS_MISC (1 << ECLASS_WIDTH) 80#define ECLASS_N (1 + ECLASS_MISC) 81 82#define EC2TTE_DELETED 0xFFFF /* 16-bit special value */ 83 84 85/*------------------ TYPES ------------------*/ 86 87/* A translation-table entry. This indicates precisely which areas of 88 guest code are included in the translation, and contains all other 89 auxiliary info too. */ 90typedef 91 struct { 92 /* Profiling only: the count and weight (arbitrary meaning) for 93 this translation. Weight is a property of the translation 94 itself and computed once when the translation is created. 95 Count is an entry count for the translation and is 96 incremented by 1 every time the translation is used, if we 97 are profiling. */ 98 UInt count; 99 UShort weight; 100 101 /* Status of the slot. Note, we need to be able to do lazy 102 deletion, hence the Deleted state. */ 103 enum { InUse, Deleted, Empty } status; 104 105 /* 64-bit aligned pointer to one or more 64-bit words containing 106 the corresponding host code (must be in the same sector!) 107 This is a pointer into the sector's tc (code) area. */ 108 ULong* tcptr; 109 110 /* This is the original guest address that purportedly is the 111 entry point of the translation. You might think that .entry 112 should be the same as .vge->base[0], and most of the time it 113 is. However, when doing redirections, that is not the case. 114 .vge must always correctly describe the guest code sections 115 from which this translation was made. However, .entry may or 116 may not be a lie, depending on whether or not we're doing 117 redirection. */ 118 Addr64 entry; 119 120 /* This structure describes precisely what ranges of guest code 121 the translation covers, so we can decide whether or not to 122 delete it when translations of a given address range are 123 invalidated. */ 124 VexGuestExtents vge; 125 126 /* Address range summary info: these are pointers back to 127 eclass[] entries in the containing Sector. Those entries in 128 turn point back here -- the two structures are mutually 129 redundant but both necessary to make fast deletions work. 130 The eclass info is similar to, and derived from, this entry's 131 'vge' field, but it is not the same */ 132 UShort n_tte2ec; // # tte2ec pointers (1 to 3) 133 UShort tte2ec_ec[3]; // for each, the eclass # 134 UInt tte2ec_ix[3]; // and the index within the eclass. 135 // for i in 0 .. n_tte2ec-1 136 // sec->ec2tte[ tte2ec_ec[i] ][ tte2ec_ix[i] ] 137 // should be the index 138 // of this TTEntry in the containing Sector's tt array. 139 } 140 TTEntry; 141 142 143/* Finally, a sector itself. Each sector contains an array of 144 TCEntries, which hold code, and an array of TTEntries, containing 145 all required administrative info. Profiling is supported using the 146 TTEntry .count and .weight fields, if required. Each sector is 147 independent in that no cross-sector references are allowed. 148 149 If the sector is not in use, all three pointers are NULL and 150 tt_n_inuse is zero. 151*/ 152typedef 153 struct { 154 /* The TCEntry area. Size of this depends on the average 155 translation size. We try and size it so it becomes full 156 precisely when this sector's translation table (tt) reaches 157 its load limit (SECTOR_TT_LIMIT_PERCENT). */ 158 ULong* tc; 159 160 /* The TTEntry array. This is a fixed size, always containing 161 exactly N_TTES_PER_SECTOR entries. */ 162 TTEntry* tt; 163 164 /* This points to the current allocation point in tc. */ 165 ULong* tc_next; 166 167 /* The count of tt entries with state InUse. */ 168 Int tt_n_inuse; 169 170 /* Expandable arrays of tt indices for each of the ECLASS_N 171 address range equivalence classes. These hold indices into 172 the containing sector's tt array, which in turn should point 173 back here. */ 174 Int ec2tte_size[ECLASS_N]; 175 Int ec2tte_used[ECLASS_N]; 176 UShort* ec2tte[ECLASS_N]; 177 } 178 Sector; 179 180 181/*------------------ DECLS ------------------*/ 182 183/* The root data structure is an array of sectors. The index of the 184 youngest sector is recorded, and new translations are put into that 185 sector. When it fills up, we move along to the next sector and 186 start to fill that up, wrapping around at the end of the array. 187 That way, once all N_TC_SECTORS have been bought into use for the 188 first time, and are full, we then re-use the oldest sector, 189 endlessly. 190 191 When running, youngest sector should be between >= 0 and < 192 N_TC_SECTORS. The initial -1 value indicates the TT/TC system is 193 not yet initialised. 194*/ 195static Sector sectors[N_SECTORS]; 196static Int youngest_sector = -1; 197 198/* The number of ULongs in each TCEntry area. This is computed once 199 at startup and does not change. */ 200static Int tc_sector_szQ; 201 202 203/* Fast helper for the TC. A direct-mapped cache which holds a set of 204 recently used (guest address, host address) pairs. This array is 205 referred to directly from m_dispatch/dispatch-<platform>.S. 206 207 Entries in tt_fast may refer to any valid TC entry, regardless of 208 which sector it's in. Consequently we must be very careful to 209 invalidate this cache when TC entries are changed or disappear. 210 211 A special .guest address - TRANSTAB_BOGUS_GUEST_ADDR -- must be 212 pointed at to cause that cache entry to miss. This relies on the 213 assumption that no guest code actually has that address, hence a 214 value 0x1 seems good. m_translate gives the client a synthetic 215 segfault if it tries to execute at this address. 216*/ 217/* 218typedef 219 struct { 220 Addr guest; 221 Addr host; 222 } 223 FastCacheEntry; 224*/ 225/*global*/ __attribute__((aligned(16))) 226 FastCacheEntry VG_(tt_fast)[VG_TT_FAST_SIZE]; 227/* 228#define TRANSTAB_BOGUS_GUEST_ADDR ((Addr)1) 229*/ 230 231/* For profiling, we have a parallel array of pointers to .count 232 fields in TT entries. Again, these pointers must be invalidated 233 when translations disappear. A NULL pointer suffices to indicate 234 an unused slot. 235 236 When not profiling (the normal case, VG_(clo_profile_flags) == 0), 237 all tt_fastN entries are set to NULL at startup and never read nor 238 written after that. 239 240 When profiling (VG_(clo_profile_flags) > 0), tt_fast and tt_fastN 241 change together: if tt_fast[i].guest is TRANSTAB_BOGUS_GUEST_ADDR 242 then the corresponding tt_fastN[i] must be null. If 243 tt_fast[i].guest is any other value, then tt_fastN[i] *must* point 244 to the .count field of the corresponding TT entry. 245 246 tt_fast and tt_fastN are referred to from assembly code 247 (dispatch.S). 248*/ 249/*global*/ UInt* VG_(tt_fastN)[VG_TT_FAST_SIZE]; 250 251 252/* Make sure we're not used before initialisation. */ 253static Bool init_done = False; 254 255 256/*------------------ STATS DECLS ------------------*/ 257 258/* Number of fast-cache updates and flushes done. */ 259ULong n_fast_flushes = 0; 260ULong n_fast_updates = 0; 261 262/* Number of full lookups done. */ 263ULong n_full_lookups = 0; 264ULong n_lookup_probes = 0; 265 266/* Number/osize/tsize of translations entered; also the number of 267 those for which self-checking was requested. */ 268ULong n_in_count = 0; 269ULong n_in_osize = 0; 270ULong n_in_tsize = 0; 271ULong n_in_sc_count = 0; 272 273/* Number/osize of translations discarded due to lack of space. */ 274ULong n_dump_count = 0; 275ULong n_dump_osize = 0; 276 277/* Number/osize of translations discarded due to requests to do so. */ 278ULong n_disc_count = 0; 279ULong n_disc_osize = 0; 280 281 282/*-------------------------------------------------------------*/ 283/*--- Address-range equivalence class stuff ---*/ 284/*-------------------------------------------------------------*/ 285 286/* Return equivalence class number for a range. */ 287 288static Int range_to_eclass ( Addr64 start, UInt len ) 289{ 290 UInt mask = (1 << ECLASS_WIDTH) - 1; 291 UInt lo = (UInt)start; 292 UInt hi = lo + len - 1; 293 UInt loBits = (lo >> ECLASS_SHIFT) & mask; 294 UInt hiBits = (hi >> ECLASS_SHIFT) & mask; 295 if (loBits == hiBits) { 296 vg_assert(loBits < ECLASS_N-1); 297 return loBits; 298 } else { 299 return ECLASS_MISC; 300 } 301} 302 303 304/* Calculates the equivalence class numbers for any VexGuestExtent. 305 These are written in *eclasses, which must be big enough to hold 3 306 Ints. The number written, between 1 and 3, is returned. The 307 eclasses are presented in order, and any duplicates are removed. 308*/ 309 310static 311Int vexGuestExtents_to_eclasses ( /*OUT*/Int* eclasses, 312 VexGuestExtents* vge ) 313{ 314# define SWAP(_lv1,_lv2) \ 315 do { Int t = _lv1; _lv1 = _lv2; _lv2 = t; } while (0) 316 317 Int i, j, n_ec, r; 318 319 vg_assert(vge->n_used >= 1 && vge->n_used <= 3); 320 321 n_ec = 0; 322 for (i = 0; i < vge->n_used; i++) { 323 r = range_to_eclass( vge->base[i], (UInt)vge->len[i] ); 324 if (r == ECLASS_MISC) 325 goto bad; 326 /* only add if we haven't already seen it */ 327 for (j = 0; j < n_ec; j++) 328 if (eclasses[j] == r) 329 break; 330 if (j == n_ec) 331 eclasses[n_ec++] = r; 332 } 333 334 if (n_ec == 1) 335 return 1; 336 337 if (n_ec == 2) { 338 /* sort */ 339 if (eclasses[0] > eclasses[1]) 340 SWAP(eclasses[0], eclasses[1]); 341 return 2; 342 } 343 344 if (n_ec == 3) { 345 /* sort */ 346 if (eclasses[0] > eclasses[2]) 347 SWAP(eclasses[0], eclasses[2]); 348 if (eclasses[0] > eclasses[1]) 349 SWAP(eclasses[0], eclasses[1]); 350 if (eclasses[1] > eclasses[2]) 351 SWAP(eclasses[1], eclasses[2]); 352 return 3; 353 } 354 355 /* NOTREACHED */ 356 vg_assert(0); 357 358 bad: 359 eclasses[0] = ECLASS_MISC; 360 return 1; 361 362# undef SWAP 363} 364 365 366/* Add tteno to the set of entries listed for equivalence class ec in 367 this sector. Returns used location in eclass array. */ 368 369static 370UInt addEClassNo ( /*MOD*/Sector* sec, Int ec, UShort tteno ) 371{ 372 Int old_sz, new_sz, i, r; 373 UShort *old_ar, *new_ar; 374 375 vg_assert(ec >= 0 && ec < ECLASS_N); 376 vg_assert(tteno < N_TTES_PER_SECTOR); 377 378 if (0) VG_(printf)("ec %d gets %d\n", ec, (Int)tteno); 379 380 if (sec->ec2tte_used[ec] >= sec->ec2tte_size[ec]) { 381 382 vg_assert(sec->ec2tte_used[ec] == sec->ec2tte_size[ec]); 383 384 old_sz = sec->ec2tte_size[ec]; 385 old_ar = sec->ec2tte[ec]; 386 new_sz = old_sz==0 ? 8 : old_sz<64 ? 2*old_sz : (3*old_sz)/2; 387 new_ar = VG_(arena_malloc)(VG_AR_TTAUX, "transtab.aECN.1", 388 new_sz * sizeof(UShort)); 389 for (i = 0; i < old_sz; i++) 390 new_ar[i] = old_ar[i]; 391 if (old_ar) 392 VG_(arena_free)(VG_AR_TTAUX, old_ar); 393 sec->ec2tte_size[ec] = new_sz; 394 sec->ec2tte[ec] = new_ar; 395 396 if (0) VG_(printf)("expand ec %d to %d\n", ec, new_sz); 397 } 398 399 /* Common case */ 400 r = sec->ec2tte_used[ec]++; 401 vg_assert(r >= 0 && r < sec->ec2tte_size[ec]); 402 sec->ec2tte[ec][r] = tteno; 403 return (UInt)r; 404} 405 406 407/* 'vge' is being added to 'sec' at TT entry 'tteno'. Add appropriate 408 eclass entries to 'sec'. */ 409 410static 411void upd_eclasses_after_add ( /*MOD*/Sector* sec, Int tteno ) 412{ 413 Int i, r, eclasses[3]; 414 TTEntry* tte; 415 vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR); 416 417 tte = &sec->tt[tteno]; 418 r = vexGuestExtents_to_eclasses( eclasses, &tte->vge ); 419 420 vg_assert(r >= 1 && r <= 3); 421 tte->n_tte2ec = r; 422 423 for (i = 0; i < r; i++) { 424 tte->tte2ec_ec[i] = eclasses[i]; 425 tte->tte2ec_ix[i] = addEClassNo( sec, eclasses[i], (UShort)tteno ); 426 } 427} 428 429 430/* Check the eclass info in 'sec' to ensure it is consistent. Returns 431 True if OK, False if something's not right. Expensive. */ 432 433static Bool sanity_check_eclasses_in_sector ( Sector* sec ) 434{ 435# define BAD(_str) do { whassup = (_str); goto bad; } while (0) 436 437 HChar* whassup = NULL; 438 Int i, j, k, n, ec_num, ec_idx; 439 TTEntry* tte; 440 UShort tteno; 441 ULong* tce; 442 443 /* Basic checks on this sector */ 444 if (sec->tt_n_inuse < 0 || sec->tt_n_inuse > N_TTES_PER_SECTOR_USABLE) 445 BAD("invalid sec->tt_n_inuse"); 446 tce = sec->tc_next; 447 if (tce < &sec->tc[0] || tce > &sec->tc[tc_sector_szQ]) 448 BAD("sec->tc_next points outside tc"); 449 450 /* For each eclass ... */ 451 for (i = 0; i < ECLASS_N; i++) { 452 if (sec->ec2tte_size[i] == 0 && sec->ec2tte[i] != NULL) 453 BAD("ec2tte_size/ec2tte mismatch(1)"); 454 if (sec->ec2tte_size[i] != 0 && sec->ec2tte[i] == NULL) 455 BAD("ec2tte_size/ec2tte mismatch(2)"); 456 if (sec->ec2tte_used[i] < 0 457 || sec->ec2tte_used[i] > sec->ec2tte_size[i]) 458 BAD("implausible ec2tte_used"); 459 if (sec->ec2tte_used[i] == 0) 460 continue; 461 462 /* For each tt reference in each eclass .. ensure the reference 463 is to a valid tt entry, and that the entry's address ranges 464 really include this eclass. */ 465 466 for (j = 0; j < sec->ec2tte_used[i]; j++) { 467 tteno = sec->ec2tte[i][j]; 468 if (tteno == EC2TTE_DELETED) 469 continue; 470 if (tteno >= N_TTES_PER_SECTOR) 471 BAD("implausible tteno"); 472 tte = &sec->tt[tteno]; 473 if (tte->status != InUse) 474 BAD("tteno points to non-inuse tte"); 475 if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3) 476 BAD("tte->n_tte2ec out of range"); 477 /* Exactly least one of tte->eclasses[0 .. tte->n_eclasses-1] 478 must equal i. Inspect tte's eclass info. */ 479 n = 0; 480 for (k = 0; k < tte->n_tte2ec; k++) { 481 if (k < tte->n_tte2ec-1 482 && tte->tte2ec_ec[k] >= tte->tte2ec_ec[k+1]) 483 BAD("tte->tte2ec_ec[..] out of order"); 484 ec_num = tte->tte2ec_ec[k]; 485 if (ec_num < 0 || ec_num >= ECLASS_N) 486 BAD("tte->tte2ec_ec[..] out of range"); 487 if (ec_num != i) 488 continue; 489 ec_idx = tte->tte2ec_ix[k]; 490 if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[i]) 491 BAD("tte->tte2ec_ix[..] out of range"); 492 if (ec_idx == j) 493 n++; 494 } 495 if (n != 1) 496 BAD("tteno does not point back at eclass"); 497 } 498 } 499 500 /* That establishes that for each forward pointer from TTEntrys 501 there is a corresponding backward pointer from the eclass[] 502 arrays. However, it doesn't rule out the possibility of other, 503 bogus pointers in the eclass[] arrays. So do those similarly: 504 scan through them and check the TTEntryies they point at point 505 back. */ 506 507 for (i = 0; i < N_TTES_PER_SECTOR_USABLE; i++) { 508 509 tte = &sec->tt[i]; 510 if (tte->status == Empty || tte->status == Deleted) { 511 if (tte->n_tte2ec != 0) 512 BAD("tte->n_eclasses nonzero for unused tte"); 513 continue; 514 } 515 516 vg_assert(tte->status == InUse); 517 518 if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3) 519 BAD("tte->n_eclasses out of range(2)"); 520 521 for (j = 0; j < tte->n_tte2ec; j++) { 522 ec_num = tte->tte2ec_ec[j]; 523 if (ec_num < 0 || ec_num >= ECLASS_N) 524 BAD("tte->eclass[..] out of range"); 525 ec_idx = tte->tte2ec_ix[j]; 526 if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[ec_num]) 527 BAD("tte->ec_idx[..] out of range(2)"); 528 if (sec->ec2tte[ec_num][ec_idx] != i) 529 BAD("ec2tte does not point back to tte"); 530 } 531 } 532 533 return True; 534 535 bad: 536 if (whassup) 537 VG_(debugLog)(0, "transtab", "eclass sanity fail: %s\n", whassup); 538 539# if 0 540 VG_(printf)("eclass = %d\n", i); 541 VG_(printf)("tteno = %d\n", (Int)tteno); 542 switch (tte->status) { 543 case InUse: VG_(printf)("InUse\n"); break; 544 case Deleted: VG_(printf)("Deleted\n"); break; 545 case Empty: VG_(printf)("Empty\n"); break; 546 } 547 if (tte->status != Empty) { 548 for (k = 0; k < tte->vge.n_used; k++) 549 VG_(printf)("0x%llx %d\n", tte->vge.base[k], 550 (Int)tte->vge.len[k]); 551 } 552# endif 553 554 return False; 555 556# undef BAD 557} 558 559 560/* Sanity check absolutely everything. True == check passed. */ 561 562/* forwards */ 563static Bool sanity_check_redir_tt_tc ( void ); 564static Bool sanity_check_fastcache ( void ); 565 566static Bool sanity_check_all_sectors ( void ) 567{ 568 Int sno; 569 Bool sane; 570 Sector* sec; 571 for (sno = 0; sno < N_SECTORS; sno++) { 572 sec = §ors[sno]; 573 if (sec->tc == NULL) 574 continue; 575 sane = sanity_check_eclasses_in_sector( sec ); 576 if (!sane) 577 return False; 578 } 579 if ( !sanity_check_redir_tt_tc() ) 580 return False; 581 if ( !sanity_check_fastcache() ) 582 return False; 583 return True; 584} 585 586 587/*-------------------------------------------------------------*/ 588/*--- Add/find translations ---*/ 589/*-------------------------------------------------------------*/ 590 591static UInt vge_osize ( VexGuestExtents* vge ) 592{ 593 UInt i, n = 0; 594 for (i = 0; i < vge->n_used; i++) 595 n += (UInt)vge->len[i]; 596 return n; 597} 598 599static Bool isValidSector ( Int sector ) 600{ 601 if (sector < 0 || sector >= N_SECTORS) 602 return False; 603 return True; 604} 605 606static inline UInt HASH_TT ( Addr64 key ) 607{ 608 UInt kHi = (UInt)(key >> 32); 609 UInt kLo = (UInt)key; 610 UInt k32 = kHi ^ kLo; 611 UInt ror = 7; 612 if (ror > 0) 613 k32 = (k32 >> ror) | (k32 << (32-ror)); 614 return k32 % N_TTES_PER_SECTOR; 615} 616 617static void setFastCacheEntry ( Addr64 key, ULong* tcptr, UInt* count ) 618{ 619 UInt cno = (UInt)VG_TT_FAST_HASH(key); 620 VG_(tt_fast)[cno].guest = (Addr)key; 621 VG_(tt_fast)[cno].host = (Addr)tcptr; 622 if (VG_(clo_profile_flags) > 0) 623 VG_(tt_fastN)[cno] = count; 624 n_fast_updates++; 625 /* This shouldn't fail. It should be assured by m_translate 626 which should reject any attempt to make translation of code 627 starting at TRANSTAB_BOGUS_GUEST_ADDR. */ 628 vg_assert(VG_(tt_fast)[cno].guest != TRANSTAB_BOGUS_GUEST_ADDR); 629} 630 631/* Invalidate the fast cache's counter array, VG_(tt_fastN). */ 632static void invalidateFastNCache ( void ) 633{ 634 UInt j; 635 vg_assert(VG_TT_FAST_SIZE > 0 && (VG_TT_FAST_SIZE % 4) == 0); 636 for (j = 0; j < VG_TT_FAST_SIZE; j += 4) { 637 VG_(tt_fastN)[j+0] = NULL; 638 VG_(tt_fastN)[j+1] = NULL; 639 VG_(tt_fastN)[j+2] = NULL; 640 VG_(tt_fastN)[j+3] = NULL; 641 } 642 vg_assert(j == VG_TT_FAST_SIZE); 643} 644 645/* Invalidate the fast cache VG_(tt_fast). If profiling, also 646 invalidate the fast cache's counter array VG_(tt_fastN), otherwise 647 don't touch it. */ 648static void invalidateFastCache ( void ) 649{ 650 UInt j; 651 /* This loop is popular enough to make it worth unrolling a 652 bit, at least on ppc32. */ 653 vg_assert(VG_TT_FAST_SIZE > 0 && (VG_TT_FAST_SIZE % 4) == 0); 654 for (j = 0; j < VG_TT_FAST_SIZE; j += 4) { 655 VG_(tt_fast)[j+0].guest = TRANSTAB_BOGUS_GUEST_ADDR; 656 VG_(tt_fast)[j+1].guest = TRANSTAB_BOGUS_GUEST_ADDR; 657 VG_(tt_fast)[j+2].guest = TRANSTAB_BOGUS_GUEST_ADDR; 658 VG_(tt_fast)[j+3].guest = TRANSTAB_BOGUS_GUEST_ADDR; 659 } 660 661 if (VG_(clo_profile_flags) > 0) 662 invalidateFastNCache(); 663 664 vg_assert(j == VG_TT_FAST_SIZE); 665 n_fast_flushes++; 666} 667 668static Bool sanity_check_fastcache ( void ) 669{ 670 UInt j; 671 if (0) VG_(printf)("sanity check fastcache\n"); 672 if (VG_(clo_profile_flags) > 0) { 673 /* profiling */ 674 for (j = 0; j < VG_TT_FAST_SIZE; j++) { 675 if (VG_(tt_fastN)[j] == NULL 676 && VG_(tt_fast)[j].guest != TRANSTAB_BOGUS_GUEST_ADDR) 677 return False; 678 if (VG_(tt_fastN)[j] != NULL 679 && VG_(tt_fast)[j].guest == TRANSTAB_BOGUS_GUEST_ADDR) 680 return False; 681 } 682 } else { 683 /* not profiling */ 684 for (j = 0; j < VG_TT_FAST_SIZE; j++) { 685 if (VG_(tt_fastN)[j] != NULL) 686 return False; 687 } 688 } 689 return True; 690} 691 692static void initialiseSector ( Int sno ) 693{ 694 Int i; 695 SysRes sres; 696 Sector* sec; 697 vg_assert(isValidSector(sno)); 698 699 sec = §ors[sno]; 700 701 if (sec->tc == NULL) { 702 703 /* Sector has never been used before. Need to allocate tt and 704 tc. */ 705 vg_assert(sec->tt == NULL); 706 vg_assert(sec->tc_next == NULL); 707 vg_assert(sec->tt_n_inuse == 0); 708 for (i = 0; i < ECLASS_N; i++) { 709 vg_assert(sec->ec2tte_size[i] == 0); 710 vg_assert(sec->ec2tte_used[i] == 0); 711 vg_assert(sec->ec2tte[i] == NULL); 712 } 713 714 VG_(debugLog)(1,"transtab", "allocate sector %d\n", sno); 715 716 sres = VG_(am_mmap_anon_float_valgrind)( 8 * tc_sector_szQ ); 717 if (sres.isError) { 718 VG_(out_of_memory_NORETURN)("initialiseSector(TC)", 719 8 * tc_sector_szQ ); 720 /*NOTREACHED*/ 721 } 722 sec->tc = (ULong*)sres.res; 723 724 sres = VG_(am_mmap_anon_float_valgrind) 725 ( N_TTES_PER_SECTOR * sizeof(TTEntry) ); 726 if (sres.isError) { 727 VG_(out_of_memory_NORETURN)("initialiseSector(TT)", 728 N_TTES_PER_SECTOR * sizeof(TTEntry) ); 729 /*NOTREACHED*/ 730 } 731 sec->tt = (TTEntry*)sres.res; 732 733 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 734 sec->tt[i].status = Empty; 735 sec->tt[i].n_tte2ec = 0; 736 } 737 738 if (VG_(clo_verbosity) > 2) 739 VG_(message)(Vg_DebugMsg, "TT/TC: initialise sector %d", sno); 740 741 } else { 742 743 /* Sector has been used before. Dump the old contents. */ 744 VG_(debugLog)(1,"transtab", "recycle sector %d\n", sno); 745 vg_assert(sec->tt != NULL); 746 vg_assert(sec->tc_next != NULL); 747 n_dump_count += sec->tt_n_inuse; 748 749 /* Visit each just-about-to-be-abandoned translation. */ 750 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 751 if (sec->tt[i].status == InUse) { 752 vg_assert(sec->tt[i].n_tte2ec >= 1); 753 vg_assert(sec->tt[i].n_tte2ec <= 3); 754 n_dump_osize += vge_osize(&sec->tt[i].vge); 755 /* Tell the tool too. */ 756 if (VG_(needs).superblock_discards) { 757 VG_TDICT_CALL( tool_discard_superblock_info, 758 sec->tt[i].entry, 759 sec->tt[i].vge ); 760 } 761 } else { 762 vg_assert(sec->tt[i].n_tte2ec == 0); 763 } 764 sec->tt[i].status = Empty; 765 sec->tt[i].n_tte2ec = 0; 766 } 767 768 /* Free up the eclass structures. */ 769 for (i = 0; i < ECLASS_N; i++) { 770 if (sec->ec2tte_size[i] == 0) { 771 vg_assert(sec->ec2tte_used[i] == 0); 772 vg_assert(sec->ec2tte[i] == NULL); 773 } else { 774 vg_assert(sec->ec2tte[i] != NULL); 775 VG_(arena_free)(VG_AR_TTAUX, sec->ec2tte[i]); 776 sec->ec2tte[i] = NULL; 777 sec->ec2tte_size[i] = 0; 778 sec->ec2tte_used[i] = 0; 779 } 780 } 781 782 if (VG_(clo_verbosity) > 2) 783 VG_(message)(Vg_DebugMsg, "TT/TC: recycle sector %d", sno); 784 } 785 786 sec->tc_next = sec->tc; 787 sec->tt_n_inuse = 0; 788 789 invalidateFastCache(); 790} 791 792static void invalidate_icache ( void *ptr, Int nbytes ) 793{ 794# if defined(VGA_ppc32) || defined(VGA_ppc64) 795 Addr startaddr = (Addr) ptr; 796 Addr endaddr = startaddr + nbytes; 797 Addr cls; 798 Addr addr; 799 VexArchInfo vai; 800 801 if (nbytes == 0) return; 802 vg_assert(nbytes > 0); 803 804 VG_(machine_get_VexArchInfo)( NULL, &vai ); 805 cls = vai.ppc_cache_line_szB; 806 807 /* Stay sane .. */ 808 vg_assert(cls == 32 || cls == 64 || cls == 128); 809 810 startaddr &= ~(cls - 1); 811 for (addr = startaddr; addr < endaddr; addr += cls) 812 asm volatile("dcbst 0,%0" : : "r" (addr)); 813 asm volatile("sync"); 814 for (addr = startaddr; addr < endaddr; addr += cls) 815 asm volatile("icbi 0,%0" : : "r" (addr)); 816 asm volatile("sync; isync"); 817 818# elif defined(VGA_x86) 819 /* no need to do anything, hardware provides coherence */ 820 821# elif defined(VGA_amd64) 822 /* no need to do anything, hardware provides coherence */ 823 824# else 825# error "Unknown ARCH" 826# endif 827} 828 829 830/* Add a translation of vge to TT/TC. The translation is temporarily 831 in code[0 .. code_len-1]. 832 833 pre: youngest_sector points to a valid (although possibly full) 834 sector. 835*/ 836void VG_(add_to_transtab)( VexGuestExtents* vge, 837 Addr64 entry, 838 AddrH code, 839 UInt code_len, 840 Bool is_self_checking ) 841{ 842 Int tcAvailQ, reqdQ, y, i; 843 ULong *tcptr, *tcptr2; 844 UChar* srcP; 845 UChar* dstP; 846 847 vg_assert(init_done); 848 vg_assert(vge->n_used >= 1 && vge->n_used <= 3); 849 850 /* 60000: should agree with N_TMPBUF in m_translate.c. */ 851 vg_assert(code_len > 0 && code_len < 60000); 852 853 if (0) 854 VG_(printf)("add_to_transtab(entry = 0x%llx, len = %d)\n", 855 entry, code_len); 856 857 n_in_count++; 858 n_in_tsize += code_len; 859 n_in_osize += vge_osize(vge); 860 if (is_self_checking) 861 n_in_sc_count++; 862 863 y = youngest_sector; 864 vg_assert(isValidSector(y)); 865 866 if (sectors[y].tc == NULL) 867 initialiseSector(y); 868 869 /* Try putting the translation in this sector. */ 870 reqdQ = (code_len + 7) >> 3; 871 872 /* Will it fit in tc? */ 873 tcAvailQ = ((ULong*)(§ors[y].tc[tc_sector_szQ])) 874 - ((ULong*)(sectors[y].tc_next)); 875 vg_assert(tcAvailQ >= 0); 876 vg_assert(tcAvailQ <= tc_sector_szQ); 877 878 if (tcAvailQ < reqdQ 879 || sectors[y].tt_n_inuse >= N_TTES_PER_SECTOR_USABLE) { 880 /* No. So move on to the next sector. Either it's never been 881 used before, in which case it will get its tt/tc allocated 882 now, or it has been used before, in which case it is set to be 883 empty, hence throwing out the oldest sector. */ 884 vg_assert(tc_sector_szQ > 0); 885 VG_(debugLog)(1,"transtab", 886 "declare sector %d full " 887 "(TT loading %2d%%, TC loading %2d%%)\n", 888 y, 889 (100 * sectors[y].tt_n_inuse) 890 / N_TTES_PER_SECTOR, 891 (100 * (tc_sector_szQ - tcAvailQ)) 892 / tc_sector_szQ); 893 youngest_sector++; 894 if (youngest_sector >= N_SECTORS) 895 youngest_sector = 0; 896 y = youngest_sector; 897 initialiseSector(y); 898 } 899 900 /* Be sure ... */ 901 tcAvailQ = ((ULong*)(§ors[y].tc[tc_sector_szQ])) 902 - ((ULong*)(sectors[y].tc_next)); 903 vg_assert(tcAvailQ >= 0); 904 vg_assert(tcAvailQ <= tc_sector_szQ); 905 vg_assert(tcAvailQ >= reqdQ); 906 vg_assert(sectors[y].tt_n_inuse < N_TTES_PER_SECTOR_USABLE); 907 vg_assert(sectors[y].tt_n_inuse >= 0); 908 909 /* Copy into tc. */ 910 tcptr = sectors[y].tc_next; 911 vg_assert(tcptr >= §ors[y].tc[0]); 912 vg_assert(tcptr <= §ors[y].tc[tc_sector_szQ]); 913 914 dstP = (UChar*)tcptr; 915 srcP = (UChar*)code; 916 for (i = 0; i < code_len; i++) 917 dstP[i] = srcP[i]; 918 sectors[y].tc_next += reqdQ; 919 sectors[y].tt_n_inuse++; 920 921 invalidate_icache( dstP, code_len ); 922 923 /* more paranoia */ 924 tcptr2 = sectors[y].tc_next; 925 vg_assert(tcptr2 >= §ors[y].tc[0]); 926 vg_assert(tcptr2 <= §ors[y].tc[tc_sector_szQ]); 927 928 /* Find an empty tt slot, and use it. There must be such a slot 929 since tt is never allowed to get completely full. */ 930 i = HASH_TT(entry); 931 vg_assert(i >= 0 && i < N_TTES_PER_SECTOR); 932 while (True) { 933 if (sectors[y].tt[i].status == Empty 934 || sectors[y].tt[i].status == Deleted) 935 break; 936 i++; 937 if (i >= N_TTES_PER_SECTOR) 938 i = 0; 939 } 940 941 sectors[y].tt[i].status = InUse; 942 sectors[y].tt[i].tcptr = tcptr; 943 sectors[y].tt[i].count = 0; 944 sectors[y].tt[i].weight = 1; 945 sectors[y].tt[i].vge = *vge; 946 sectors[y].tt[i].entry = entry; 947 948 /* Update the fast-cache. */ 949 setFastCacheEntry( entry, tcptr, §ors[y].tt[i].count ); 950 951 /* Note the eclass numbers for this translation. */ 952 upd_eclasses_after_add( §ors[y], i ); 953} 954 955 956/* Search for the translation of the given guest address. If 957 requested, a successful search can also cause the fast-caches to be 958 updated. 959*/ 960Bool VG_(search_transtab) ( /*OUT*/AddrH* result, 961 Addr64 guest_addr, 962 Bool upd_cache ) 963{ 964 Int i, j, k, kstart, sno; 965 966 vg_assert(init_done); 967 /* Find the initial probe point just once. It will be the same in 968 all sectors and avoids multiple expensive % operations. */ 969 n_full_lookups++; 970 k = -1; 971 kstart = HASH_TT(guest_addr); 972 vg_assert(kstart >= 0 && kstart < N_TTES_PER_SECTOR); 973 974 /* Search in all the sectors. Although the order should not matter, 975 it might be most efficient to search in the order youngest to 976 oldest. */ 977 sno = youngest_sector; 978 for (i = 0; i < N_SECTORS; i++) { 979 980 if (sectors[sno].tc == NULL) 981 goto notfound; /* sector not in use. */ 982 983 k = kstart; 984 for (j = 0; j < N_TTES_PER_SECTOR; j++) { 985 n_lookup_probes++; 986 if (sectors[sno].tt[k].status == InUse 987 && sectors[sno].tt[k].entry == guest_addr) { 988 /* found it */ 989 if (upd_cache) 990 setFastCacheEntry( 991 guest_addr, sectors[sno].tt[k].tcptr, 992 §ors[sno].tt[k].count ); 993 if (result) 994 *result = (AddrH)sectors[sno].tt[k].tcptr; 995 return True; 996 } 997 if (sectors[sno].tt[k].status == Empty) 998 break; /* not found in this sector */ 999 k++; 1000 if (k == N_TTES_PER_SECTOR) 1001 k = 0; 1002 } 1003 1004 /* If we fall off the end, all entries are InUse and not 1005 matching, or Deleted. In any case we did not find it in this 1006 sector. */ 1007 1008 notfound: 1009 /* move to the next oldest sector */ 1010 sno = sno==0 ? (N_SECTORS-1) : (sno-1); 1011 } 1012 1013 /* Not found in any sector. */ 1014 return False; 1015} 1016 1017 1018/*-------------------------------------------------------------*/ 1019/*--- Delete translations. ---*/ 1020/*-------------------------------------------------------------*/ 1021 1022/* forward */ 1023static void unredir_discard_translations( Addr64, ULong ); 1024 1025/* Stuff for deleting translations which intersect with a given 1026 address range. Unfortunately, to make this run at a reasonable 1027 speed, it is complex. */ 1028 1029static inline 1030Bool overlap1 ( Addr64 s1, ULong r1, Addr64 s2, ULong r2 ) 1031{ 1032 Addr64 e1 = s1 + r1 - 1ULL; 1033 Addr64 e2 = s2 + r2 - 1ULL; 1034 if (e1 < s2 || e2 < s1) 1035 return False; 1036 return True; 1037} 1038 1039static inline 1040Bool overlaps ( Addr64 start, ULong range, VexGuestExtents* vge ) 1041{ 1042 if (overlap1(start, range, vge->base[0], (UInt)vge->len[0])) 1043 return True; 1044 if (vge->n_used < 2) 1045 return False; 1046 if (overlap1(start, range, vge->base[1], (UInt)vge->len[1])) 1047 return True; 1048 if (vge->n_used < 3) 1049 return False; 1050 if (overlap1(start, range, vge->base[2], (UInt)vge->len[2])) 1051 return True; 1052 return False; 1053} 1054 1055 1056/* Delete a tt entry, and update all the eclass data accordingly. */ 1057 1058static void delete_tte ( /*MOD*/Sector* sec, Int tteno ) 1059{ 1060 Int i, ec_num, ec_idx; 1061 TTEntry* tte; 1062 1063 vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR); 1064 tte = &sec->tt[tteno]; 1065 vg_assert(tte->status == InUse); 1066 vg_assert(tte->n_tte2ec >= 1 && tte->n_tte2ec <= 3); 1067 1068 /* Deal with the ec-to-tte links first. */ 1069 for (i = 0; i < tte->n_tte2ec; i++) { 1070 ec_num = (Int)tte->tte2ec_ec[i]; 1071 ec_idx = tte->tte2ec_ix[i]; 1072 vg_assert(ec_num >= 0 && ec_num < ECLASS_N); 1073 vg_assert(ec_idx >= 0); 1074 vg_assert(ec_idx < sec->ec2tte_used[ec_num]); 1075 /* Assert that the two links point at each other. */ 1076 vg_assert(sec->ec2tte[ec_num][ec_idx] == (UShort)tteno); 1077 /* "delete" the pointer back to here. */ 1078 sec->ec2tte[ec_num][ec_idx] = EC2TTE_DELETED; 1079 } 1080 1081 /* Now fix up this TTEntry. */ 1082 tte->status = Deleted; 1083 tte->n_tte2ec = 0; 1084 1085 /* Stats .. */ 1086 sec->tt_n_inuse--; 1087 n_disc_count++; 1088 n_disc_osize += vge_osize(&tte->vge); 1089 1090 /* Tell the tool too. */ 1091 if (VG_(needs).superblock_discards) { 1092 VG_TDICT_CALL( tool_discard_superblock_info, 1093 tte->entry, 1094 tte->vge ); 1095 } 1096} 1097 1098 1099/* Delete translations from sec which intersect specified range, but 1100 only consider translations in the specified eclass. */ 1101 1102static 1103Bool delete_translations_in_sector_eclass ( /*MOD*/Sector* sec, 1104 Addr64 guest_start, ULong range, 1105 Int ec ) 1106{ 1107 Int i; 1108 UShort tteno; 1109 Bool anyDeld = False; 1110 TTEntry* tte; 1111 1112 vg_assert(ec >= 0 && ec < ECLASS_N); 1113 1114 for (i = 0; i < sec->ec2tte_used[ec]; i++) { 1115 1116 tteno = sec->ec2tte[ec][i]; 1117 if (tteno == EC2TTE_DELETED) { 1118 /* already deleted */ 1119 continue; 1120 } 1121 1122 vg_assert(tteno < N_TTES_PER_SECTOR); 1123 1124 tte = &sec->tt[tteno]; 1125 vg_assert(tte->status == InUse); 1126 1127 if (overlaps( guest_start, range, &tte->vge )) { 1128 anyDeld = True; 1129 delete_tte( sec, (Int)tteno ); 1130 } 1131 1132 } 1133 1134 return anyDeld; 1135} 1136 1137 1138/* Delete translations from sec which intersect specified range, the 1139 slow way, by inspecting all translations in sec. */ 1140 1141static 1142Bool delete_translations_in_sector ( /*MOD*/Sector* sec, 1143 Addr64 guest_start, ULong range ) 1144{ 1145 Int i; 1146 Bool anyDeld = False; 1147 1148 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 1149 if (sec->tt[i].status == InUse 1150 && overlaps( guest_start, range, &sec->tt[i].vge )) { 1151 anyDeld = True; 1152 delete_tte( sec, i ); 1153 } 1154 } 1155 1156 return anyDeld; 1157} 1158 1159 1160void VG_(discard_translations) ( Addr64 guest_start, ULong range, 1161 HChar* who ) 1162{ 1163 Sector* sec; 1164 Int sno, ec; 1165 Bool anyDeleted = False; 1166 1167 vg_assert(init_done); 1168 1169 VG_(debugLog)(2, "transtab", 1170 "discard_translations(0x%llx, %lld) req by %s\n", 1171 guest_start, range, who ); 1172 1173 /* Pre-deletion sanity check */ 1174 if (VG_(clo_sanity_level >= 4)) { 1175 Bool sane = sanity_check_all_sectors(); 1176 vg_assert(sane); 1177 } 1178 1179 if (range == 0) 1180 return; 1181 1182 /* There are two different ways to do this. 1183 1184 If the range fits within a single address-range equivalence 1185 class, as will be the case for a cache line sized invalidation, 1186 then we only have to inspect the set of translations listed in 1187 that equivalence class, and also in the "sin-bin" equivalence 1188 class ECLASS_MISC. 1189 1190 Otherwise, the invalidation is of a larger range and probably 1191 results from munmap. In this case it's (probably!) faster just 1192 to inspect all translations, dump those we don't want, and 1193 regenerate the equivalence class information (since modifying it 1194 in-situ is even more expensive). 1195 */ 1196 1197 /* First off, figure out if the range falls within a single class, 1198 and if so which one. */ 1199 1200 ec = ECLASS_MISC; 1201 if (range < (1ULL << ECLASS_SHIFT)) 1202 ec = range_to_eclass( guest_start, (UInt)range ); 1203 1204 /* if ec is ECLASS_MISC then we aren't looking at just a single 1205 class, so use the slow scheme. Else use the fast scheme, 1206 examining 'ec' and ECLASS_MISC. */ 1207 1208 if (ec != ECLASS_MISC) { 1209 1210 VG_(debugLog)(2, "transtab", 1211 " FAST, ec = %d\n", ec); 1212 1213 /* Fast scheme */ 1214 vg_assert(ec >= 0 && ec < ECLASS_MISC); 1215 1216 for (sno = 0; sno < N_SECTORS; sno++) { 1217 sec = §ors[sno]; 1218 if (sec->tc == NULL) 1219 continue; 1220 anyDeleted |= delete_translations_in_sector_eclass( 1221 sec, guest_start, range, ec ); 1222 anyDeleted |= delete_translations_in_sector_eclass( 1223 sec, guest_start, range, ECLASS_MISC ); 1224 } 1225 1226 } else { 1227 1228 /* slow scheme */ 1229 1230 VG_(debugLog)(2, "transtab", 1231 " SLOW, ec = %d\n", ec); 1232 1233 for (sno = 0; sno < N_SECTORS; sno++) { 1234 sec = §ors[sno]; 1235 if (sec->tc == NULL) 1236 continue; 1237 anyDeleted |= delete_translations_in_sector( 1238 sec, guest_start, range ); 1239 } 1240 1241 } 1242 1243 if (anyDeleted) 1244 invalidateFastCache(); 1245 1246 /* don't forget the no-redir cache */ 1247 unredir_discard_translations( guest_start, range ); 1248 1249 /* Post-deletion sanity check */ 1250 if (VG_(clo_sanity_level >= 4)) { 1251 Int i; 1252 TTEntry* tte; 1253 Bool sane = sanity_check_all_sectors(); 1254 vg_assert(sane); 1255 /* But now, also check the requested address range isn't 1256 present anywhere. */ 1257 for (sno = 0; sno < N_SECTORS; sno++) { 1258 sec = §ors[sno]; 1259 if (sec->tc == NULL) 1260 continue; 1261 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 1262 tte = &sec->tt[i]; 1263 if (tte->status != InUse) 1264 continue; 1265 vg_assert(!overlaps( guest_start, range, &tte->vge )); 1266 } 1267 } 1268 } 1269} 1270 1271 1272/*------------------------------------------------------------*/ 1273/*--- AUXILIARY: the unredirected TT/TC ---*/ 1274/*------------------------------------------------------------*/ 1275 1276/* A very simple translation cache which holds a small number of 1277 unredirected translations. This is completely independent of the 1278 main tt/tc structures. When unredir_tc or unredir_tt becomes full, 1279 both structures are simply dumped and we start over. 1280 1281 Since these translations are unredirected, the search key is (by 1282 definition) the first address entry in the .vge field. */ 1283 1284/* Sized to hold 500 translations of average size 1000 bytes. */ 1285 1286#define UNREDIR_SZB 1000 1287 1288#define N_UNREDIR_TT 500 1289#define N_UNREDIR_TCQ (N_UNREDIR_TT * UNREDIR_SZB / sizeof(ULong)) 1290 1291typedef 1292 struct { 1293 VexGuestExtents vge; 1294 Addr hcode; 1295 Bool inUse; 1296 } 1297 UTCEntry; 1298 1299/* We just allocate forwards in _tc, never deleting. */ 1300static ULong *unredir_tc; 1301static Int unredir_tc_used = N_UNREDIR_TCQ; 1302 1303/* Slots in _tt can come into use and out again (.inUse). 1304 Nevertheless _tt_highwater is maintained so that invalidations 1305 don't have to scan all the slots when only a few are in use. 1306 _tt_highwater holds the index of the highest ever allocated 1307 slot. */ 1308static UTCEntry unredir_tt[N_UNREDIR_TT]; 1309static Int unredir_tt_highwater; 1310 1311 1312static void init_unredir_tt_tc ( void ) 1313{ 1314 Int i; 1315 if (unredir_tc == NULL) { 1316 SysRes sres = VG_(am_mmap_anon_float_valgrind)( N_UNREDIR_TT * UNREDIR_SZB ); 1317 if (sres.isError) { 1318 VG_(out_of_memory_NORETURN)("init_unredir_tt_tc", N_UNREDIR_TT * UNREDIR_SZB); 1319 /*NOTREACHED*/ 1320 } 1321 unredir_tc = (ULong *)sres.res; 1322 } 1323 unredir_tc_used = 0; 1324 for (i = 0; i < N_UNREDIR_TT; i++) 1325 unredir_tt[i].inUse = False; 1326 unredir_tt_highwater = -1; 1327} 1328 1329/* Do a sanity check; return False on failure. */ 1330static Bool sanity_check_redir_tt_tc ( void ) 1331{ 1332 Int i; 1333 if (unredir_tt_highwater < -1) return False; 1334 if (unredir_tt_highwater >= N_UNREDIR_TT) return False; 1335 1336 for (i = unredir_tt_highwater+1; i < N_UNREDIR_TT; i++) 1337 if (unredir_tt[i].inUse) 1338 return False; 1339 1340 if (unredir_tc_used < 0) return False; 1341 if (unredir_tc_used > N_UNREDIR_TCQ) return False; 1342 1343 return True; 1344} 1345 1346 1347/* Add an UNREDIRECTED translation of vge to TT/TC. The translation 1348 is temporarily in code[0 .. code_len-1]. 1349*/ 1350void VG_(add_to_unredir_transtab)( VexGuestExtents* vge, 1351 Addr64 entry, 1352 AddrH code, 1353 UInt code_len ) 1354{ 1355 Int i, j, code_szQ; 1356 HChar *srcP, *dstP; 1357 1358 vg_assert(sanity_check_redir_tt_tc()); 1359 1360 /* This is the whole point: it's not redirected! */ 1361 vg_assert(entry == vge->base[0]); 1362 1363 /* How many unredir_tt slots are needed */ 1364 code_szQ = (code_len + 7) / 8; 1365 1366 /* Look for an empty unredir_tc slot */ 1367 for (i = 0; i < N_UNREDIR_TT; i++) 1368 if (!unredir_tt[i].inUse) 1369 break; 1370 1371 if (i >= N_UNREDIR_TT || code_szQ > (N_UNREDIR_TCQ - unredir_tc_used)) { 1372 /* It's full; dump everything we currently have */ 1373 init_unredir_tt_tc(); 1374 i = 0; 1375 } 1376 1377 vg_assert(unredir_tc_used >= 0); 1378 vg_assert(unredir_tc_used <= N_UNREDIR_TCQ); 1379 vg_assert(code_szQ > 0); 1380 vg_assert(code_szQ + unredir_tc_used <= N_UNREDIR_TCQ); 1381 vg_assert(i >= 0 && i < N_UNREDIR_TT); 1382 vg_assert(unredir_tt[i].inUse == False); 1383 1384 if (i > unredir_tt_highwater) 1385 unredir_tt_highwater = i; 1386 1387 dstP = (HChar*)&unredir_tc[unredir_tc_used]; 1388 srcP = (HChar*)code; 1389 for (j = 0; j < code_len; j++) 1390 dstP[j] = srcP[j]; 1391 1392 invalidate_icache( dstP, code_len ); 1393 1394 unredir_tt[i].inUse = True; 1395 unredir_tt[i].vge = *vge; 1396 unredir_tt[i].hcode = (Addr)dstP; 1397 1398 unredir_tc_used += code_szQ; 1399 vg_assert(unredir_tc_used >= 0); 1400 vg_assert(unredir_tc_used <= N_UNREDIR_TCQ); 1401 1402 vg_assert(&dstP[code_len] <= (HChar*)&unredir_tc[unredir_tc_used]); 1403} 1404 1405Bool VG_(search_unredir_transtab) ( /*OUT*/AddrH* result, 1406 Addr64 guest_addr ) 1407{ 1408 Int i; 1409 for (i = 0; i < N_UNREDIR_TT; i++) { 1410 if (!unredir_tt[i].inUse) 1411 continue; 1412 if (unredir_tt[i].vge.base[0] == guest_addr) { 1413 *result = (AddrH)unredir_tt[i].hcode; 1414 return True; 1415 } 1416 } 1417 return False; 1418} 1419 1420static void unredir_discard_translations( Addr64 guest_start, ULong range ) 1421{ 1422 Int i; 1423 1424 vg_assert(sanity_check_redir_tt_tc()); 1425 1426 for (i = 0; i <= unredir_tt_highwater; i++) { 1427 if (unredir_tt[i].inUse 1428 && overlaps( guest_start, range, &unredir_tt[i].vge)) 1429 unredir_tt[i].inUse = False; 1430 } 1431} 1432 1433 1434/*------------------------------------------------------------*/ 1435/*--- Initialisation. ---*/ 1436/*------------------------------------------------------------*/ 1437 1438void VG_(init_tt_tc) ( void ) 1439{ 1440 Int i, j, avg_codeszQ; 1441 1442 vg_assert(!init_done); 1443 init_done = True; 1444 1445 /* Otherwise lots of things go wrong... */ 1446 vg_assert(sizeof(ULong) == 8); 1447 vg_assert(sizeof(Addr64) == 8); 1448 /* check fast cache entries really are 2 words long */ 1449 vg_assert(sizeof(Addr) == sizeof(void*)); 1450 vg_assert(sizeof(FastCacheEntry) == 2 * sizeof(Addr)); 1451 /* check fast cache entries are packed back-to-back with no spaces */ 1452 vg_assert(sizeof( VG_(tt_fast) ) == VG_TT_FAST_SIZE * sizeof(FastCacheEntry)); 1453 /* check fast cache is aligned as we requested. Not fatal if it 1454 isn't, but we might as well make sure. */ 1455 vg_assert(VG_IS_16_ALIGNED( ((Addr) & VG_(tt_fast)[0]) )); 1456 1457 if (VG_(clo_verbosity) > 2) 1458 VG_(message)(Vg_DebugMsg, 1459 "TT/TC: VG_(init_tt_tc) " 1460 "(startup of code management)"); 1461 1462 /* Figure out how big each tc area should be. */ 1463 avg_codeszQ = (VG_(details).avg_translation_sizeB + 7) / 8; 1464 tc_sector_szQ = N_TTES_PER_SECTOR_USABLE * (1 + avg_codeszQ); 1465 1466 /* Ensure the calculated value is not way crazy. */ 1467 vg_assert(tc_sector_szQ >= 2 * N_TTES_PER_SECTOR_USABLE); 1468 vg_assert(tc_sector_szQ <= 80 * N_TTES_PER_SECTOR_USABLE); 1469 1470 /* Initialise the sectors */ 1471 youngest_sector = 0; 1472 for (i = 0; i < N_SECTORS; i++) { 1473 sectors[i].tc = NULL; 1474 sectors[i].tt = NULL; 1475 sectors[i].tc_next = NULL; 1476 sectors[i].tt_n_inuse = 0; 1477 for (j = 0; j < ECLASS_N; j++) { 1478 sectors[i].ec2tte_size[j] = 0; 1479 sectors[i].ec2tte_used[j] = 0; 1480 sectors[i].ec2tte[j] = NULL; 1481 } 1482 } 1483 1484 /* Initialise the fast caches. If not profiling (the usual case), 1485 we have to explicitly invalidate the fastN cache as 1486 invalidateFastCache() won't do that for us. */ 1487 invalidateFastCache(); 1488 if (VG_(clo_profile_flags) == 0) 1489 invalidateFastNCache(); 1490 1491 /* and the unredir tt/tc */ 1492 init_unredir_tt_tc(); 1493 1494 if (VG_(clo_verbosity) > 2) { 1495 VG_(message)(Vg_DebugMsg, 1496 "TT/TC: cache: %d sectors of %d bytes each = %d total", 1497 N_SECTORS, 8 * tc_sector_szQ, 1498 N_SECTORS * 8 * tc_sector_szQ ); 1499 VG_(message)(Vg_DebugMsg, 1500 "TT/TC: table: %d total entries, max occupancy %d (%d%%)", 1501 N_SECTORS * N_TTES_PER_SECTOR, 1502 N_SECTORS * N_TTES_PER_SECTOR_USABLE, 1503 SECTOR_TT_LIMIT_PERCENT ); 1504 } 1505 1506 VG_(debugLog)(2, "transtab", 1507 "cache: %d sectors of %d bytes each = %d total\n", 1508 N_SECTORS, 8 * tc_sector_szQ, 1509 N_SECTORS * 8 * tc_sector_szQ ); 1510 VG_(debugLog)(2, "transtab", 1511 "table: %d total entries, max occupancy %d (%d%%)\n", 1512 N_SECTORS * N_TTES_PER_SECTOR, 1513 N_SECTORS * N_TTES_PER_SECTOR_USABLE, 1514 SECTOR_TT_LIMIT_PERCENT ); 1515} 1516 1517 1518/*------------------------------------------------------------*/ 1519/*--- Printing out statistics. ---*/ 1520/*------------------------------------------------------------*/ 1521 1522static ULong safe_idiv( ULong a, ULong b ) 1523{ 1524 return (b == 0 ? 0 : a / b); 1525} 1526 1527UInt VG_(get_bbs_translated) ( void ) 1528{ 1529 return n_in_count; 1530} 1531 1532void VG_(print_tt_tc_stats) ( void ) 1533{ 1534 VG_(message)(Vg_DebugMsg, 1535 " tt/tc: %'llu tt lookups requiring %'llu probes", 1536 n_full_lookups, n_lookup_probes ); 1537 VG_(message)(Vg_DebugMsg, 1538 " tt/tc: %'llu fast-cache updates, %'llu flushes", 1539 n_fast_updates, n_fast_flushes ); 1540 1541 VG_(message)(Vg_DebugMsg, 1542 " transtab: new %'lld " 1543 "(%'llu -> %'llu; ratio %'llu:10) [%'llu scs]", 1544 n_in_count, n_in_osize, n_in_tsize, 1545 safe_idiv(10*n_in_tsize, n_in_osize), 1546 n_in_sc_count); 1547 VG_(message)(Vg_DebugMsg, 1548 " transtab: dumped %'llu (%'llu -> ?" "?)", 1549 n_dump_count, n_dump_osize ); 1550 VG_(message)(Vg_DebugMsg, 1551 " transtab: discarded %'llu (%'llu -> ?" "?)", 1552 n_disc_count, n_disc_osize ); 1553 1554 if (0) { 1555 Int i; 1556 VG_(printf)("\n"); 1557 for (i = 0; i < ECLASS_N; i++) { 1558 VG_(printf)(" %4d", sectors[0].ec2tte_used[i]); 1559 if (i % 16 == 15) 1560 VG_(printf)("\n"); 1561 } 1562 VG_(printf)("\n\n"); 1563 } 1564} 1565 1566/*------------------------------------------------------------*/ 1567/*--- Printing out of profiling results. ---*/ 1568/*------------------------------------------------------------*/ 1569 1570static ULong score ( TTEntry* tte ) 1571{ 1572 return ((ULong)tte->weight) * ((ULong)tte->count); 1573} 1574 1575ULong VG_(get_BB_profile) ( BBProfEntry tops[], UInt n_tops ) 1576{ 1577 Int sno, i, r, s; 1578 ULong score_total; 1579 1580 /* First, compute the total weighted count, and find the top N 1581 ttes. tops contains pointers to the most-used n_tops blocks, in 1582 descending order (viz, tops[0] is the highest scorer). */ 1583 for (i = 0; i < n_tops; i++) { 1584 tops[i].addr = 0; 1585 tops[i].score = 0; 1586 } 1587 1588 score_total = 0; 1589 1590 for (sno = 0; sno < N_SECTORS; sno++) { 1591 if (sectors[sno].tc == NULL) 1592 continue; 1593 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 1594 if (sectors[sno].tt[i].status != InUse) 1595 continue; 1596 score_total += score(§ors[sno].tt[i]); 1597 /* Find the rank for sectors[sno].tt[i]. */ 1598 r = n_tops-1; 1599 while (True) { 1600 if (r == -1) 1601 break; 1602 if (tops[r].addr == 0) { 1603 r--; 1604 continue; 1605 } 1606 if ( score(§ors[sno].tt[i]) > tops[r].score ) { 1607 r--; 1608 continue; 1609 } 1610 break; 1611 } 1612 r++; 1613 vg_assert(r >= 0 && r <= n_tops); 1614 /* This bb should be placed at r, and bbs above it shifted 1615 upwards one slot. */ 1616 if (r < n_tops) { 1617 for (s = n_tops-1; s > r; s--) 1618 tops[s] = tops[s-1]; 1619 tops[r].addr = sectors[sno].tt[i].entry; 1620 tops[r].score = score( §ors[sno].tt[i] ); 1621 } 1622 } 1623 } 1624 1625 return score_total; 1626} 1627 1628/*--------------------------------------------------------------------*/ 1629/*--- end ---*/ 1630/*--------------------------------------------------------------------*/ 1631