m_transtab.c revision 597314210494248b4fbefd45525a748439629218
1 2/*--------------------------------------------------------------------*/ 3/*--- Management of the translation table and cache. ---*/ 4/*--- m_transtab.c ---*/ 5/*--------------------------------------------------------------------*/ 6 7/* 8 This file is part of Valgrind, a dynamic binary instrumentation 9 framework. 10 11 Copyright (C) 2000-2013 Julian Seward 12 jseward@acm.org 13 14 This program is free software; you can redistribute it and/or 15 modify it under the terms of the GNU General Public License as 16 published by the Free Software Foundation; either version 2 of the 17 License, or (at your option) any later version. 18 19 This program is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 General Public License for more details. 23 24 You should have received a copy of the GNU General Public License 25 along with this program; if not, write to the Free Software 26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 27 02111-1307, USA. 28 29 The GNU General Public License is contained in the file COPYING. 30*/ 31 32#include "pub_core_basics.h" 33#include "pub_core_debuglog.h" 34#include "pub_core_machine.h" // For VG_(machine_get_VexArchInfo) 35#include "pub_core_libcbase.h" 36#include "pub_core_vki.h" // to keep pub_core_libproc.h happy, sigh 37#include "pub_core_libcproc.h" // VG_(invalidate_icache) 38#include "pub_core_libcassert.h" 39#include "pub_core_libcprint.h" 40#include "pub_core_options.h" 41#include "pub_core_tooliface.h" // For VG_(details).avg_translation_sizeB 42#include "pub_core_transtab.h" 43#include "pub_core_aspacemgr.h" 44#include "pub_core_mallocfree.h" // VG_(out_of_memory_NORETURN) 45#include "pub_core_xarray.h" 46#include "pub_core_dispatch.h" // For VG_(disp_cp*) addresses 47 48 49#define DEBUG_TRANSTAB 0 50 51 52/*-------------------------------------------------------------*/ 53/*--- Management of the FIFO-based translation table+cache. ---*/ 54/*-------------------------------------------------------------*/ 55 56/* Nr of sectors provided via command line parameter. */ 57UInt VG_(clo_num_transtab_sectors) = N_SECTORS_DEFAULT; 58/* Nr of sectors. 59 Will be set by VG_(init_tt_tc) to VG_(clo_num_transtab_sectors). */ 60static UInt n_sectors = 0; 61 62/*------------------ CONSTANTS ------------------*/ 63/* Number of TC entries in each sector. This needs to be a prime 64 number to work properly, it must be <= 65535 (so that a TT index 65 fits in a UShort, leaving room for 0xFFFF(EC2TTE_DELETED) to denote 66 'deleted') and it is strongly recommended not to change this. 67 65521 is the largest prime <= 65535. */ 68#define N_TTES_PER_SECTOR /*10007*/ /*30011*/ /*40009*/ 65521 69 70/* Because each sector contains a hash table of TTEntries, we need to 71 specify the maximum allowable loading, after which the sector is 72 deemed full. */ 73#define SECTOR_TT_LIMIT_PERCENT 65 74 75/* The sector is deemed full when this many entries are in it. */ 76#define N_TTES_PER_SECTOR_USABLE \ 77 ((N_TTES_PER_SECTOR * SECTOR_TT_LIMIT_PERCENT) / 100) 78 79/* Equivalence classes for fast address range deletion. There are 1 + 80 2^ECLASS_WIDTH bins. The highest one, ECLASS_MISC, describes an 81 address range which does not fall cleanly within any specific bin. 82 Note that ECLASS_SHIFT + ECLASS_WIDTH must be < 32. */ 83#define ECLASS_SHIFT 11 84#define ECLASS_WIDTH 8 85#define ECLASS_MISC (1 << ECLASS_WIDTH) 86#define ECLASS_N (1 + ECLASS_MISC) 87 88#define EC2TTE_DELETED 0xFFFF /* 16-bit special value */ 89 90 91/*------------------ TYPES ------------------*/ 92 93/* In edges ("to-me") in the graph created by chaining. */ 94typedef 95 struct { 96 UInt from_sNo; /* sector number */ 97 UInt from_tteNo; /* TTE number in given sector */ 98 UInt from_offs; /* code offset from TCEntry::tcptr where the patch is */ 99 Bool to_fastEP; /* Is the patch to a fast or slow entry point? */ 100 } 101 InEdge; 102 103 104/* Out edges ("from-me") in the graph created by chaining. */ 105typedef 106 struct { 107 UInt to_sNo; /* sector number */ 108 UInt to_tteNo; /* TTE number in given sector */ 109 UInt from_offs; /* code offset in owning translation where patch is */ 110 } 111 OutEdge; 112 113 114#define N_FIXED_IN_EDGE_ARR 3 115typedef 116 struct { 117 UInt n_fixed; /* 0 .. N_FIXED_IN_EDGE_ARR */ 118 InEdge fixed[N_FIXED_IN_EDGE_ARR]; 119 XArray* var; /* XArray* of InEdgeArr */ 120 } 121 InEdgeArr; 122 123#define N_FIXED_OUT_EDGE_ARR 2 124typedef 125 struct { 126 UInt n_fixed; /* 0 .. N_FIXED_OUT_EDGE_ARR */ 127 OutEdge fixed[N_FIXED_OUT_EDGE_ARR]; 128 XArray* var; /* XArray* of OutEdgeArr */ 129 } 130 OutEdgeArr; 131 132 133/* A translation-table entry. This indicates precisely which areas of 134 guest code are included in the translation, and contains all other 135 auxiliary info too. */ 136typedef 137 struct { 138 /* Profiling only: the count and weight (arbitrary meaning) for 139 this translation. Weight is a property of the translation 140 itself and computed once when the translation is created. 141 Count is an entry count for the translation and is 142 incremented by 1 every time the translation is used, if we 143 are profiling. */ 144 ULong count; 145 UShort weight; 146 147 /* Status of the slot. Note, we need to be able to do lazy 148 deletion, hence the Deleted state. */ 149 enum { InUse, Deleted, Empty } status; 150 151 /* 64-bit aligned pointer to one or more 64-bit words containing 152 the corresponding host code (must be in the same sector!) 153 This is a pointer into the sector's tc (code) area. */ 154 ULong* tcptr; 155 156 /* This is the original guest address that purportedly is the 157 entry point of the translation. You might think that .entry 158 should be the same as .vge->base[0], and most of the time it 159 is. However, when doing redirections, that is not the case. 160 .vge must always correctly describe the guest code sections 161 from which this translation was made. However, .entry may or 162 may not be a lie, depending on whether or not we're doing 163 redirection. */ 164 Addr64 entry; 165 166 /* This structure describes precisely what ranges of guest code 167 the translation covers, so we can decide whether or not to 168 delete it when translations of a given address range are 169 invalidated. */ 170 VexGuestExtents vge; 171 172 /* Address range summary info: these are pointers back to 173 eclass[] entries in the containing Sector. Those entries in 174 turn point back here -- the two structures are mutually 175 redundant but both necessary to make fast deletions work. 176 The eclass info is similar to, and derived from, this entry's 177 'vge' field, but it is not the same */ 178 UShort n_tte2ec; // # tte2ec pointers (1 to 3) 179 UShort tte2ec_ec[3]; // for each, the eclass # 180 UInt tte2ec_ix[3]; // and the index within the eclass. 181 // for i in 0 .. n_tte2ec-1 182 // sec->ec2tte[ tte2ec_ec[i] ][ tte2ec_ix[i] ] 183 // should be the index 184 // of this TTEntry in the containing Sector's tt array. 185 186 /* Admin information for chaining. 'in_edges' is a set of the 187 patch points which jump to this translation -- hence are 188 predecessors in the control flow graph. 'out_edges' points 189 to successors in the control flow graph -- translations to 190 which this one has a patched jump. In short these are just 191 backwards and forwards edges in the graph of patched-together 192 blocks. The 'in_edges' contain slightly more info, enough 193 that we can undo the chaining of each mentioned patch point. 194 The 'out_edges' list exists only so that we can visit the 195 'in_edges' entries of all blocks we're patched through to, in 196 order to remove ourselves from then when we're deleted. */ 197 198 /* A translation can disappear for two reasons: 199 1. erased (as part of the oldest sector cleanup) when the 200 youngest sector is full. 201 2. discarded due to calls to VG_(discard_translations). 202 VG_(discard_translations) sets the status of the 203 translation to 'Deleted'. 204 A.o., the gdbserver discards one or more translations 205 when a breakpoint is inserted or removed at an Addr, 206 or when single stepping mode is enabled/disabled 207 or when a translation is instrumented for gdbserver 208 (all the target jumps of this translation are 209 invalidated). 210 211 So, it is possible that the translation A to be patched 212 (to obtain a patched jump from A to B) is invalidated 213 after B is translated and before A is patched. 214 In case a translation is erased or discarded, the patching 215 cannot be done. VG_(tt_tc_do_chaining) and find_TTEntry_from_hcode 216 are checking the 'from' translation still exists before 217 doing the patching. 218 219 Is it safe to erase or discard the current translation E being 220 executed ? Amazing, but yes, it is safe. 221 Here is the explanation: 222 223 The translation E being executed can only be erased if a new 224 translation N is being done. A new translation is done only 225 if the host addr is a not yet patched jump to another 226 translation. In such a case, the guest address of N is 227 assigned to the PC in the VEX state. Control is returned 228 to the scheduler. N will be translated. This can erase the 229 translation E (in case of sector full). VG_(tt_tc_do_chaining) 230 will not do the chaining to a non found translation E. 231 The execution will continue at the current guest PC 232 (i.e. the translation N). 233 => it is safe to erase the current translation being executed. 234 235 The current translation E being executed can also be discarded 236 (e.g. by gdbserver). VG_(discard_translations) will mark 237 this translation E as Deleted, but the translation itself 238 is not erased. In particular, its host code can only 239 be overwritten or erased in case a new translation is done. 240 A new translation will only be done if a not yet translated 241 jump is to be executed. The execution of the Deleted translation 242 E will continue till a non patched jump is encountered. 243 This situation is then similar to the 'erasing' case above : 244 the current translation E can be erased or overwritten, as the 245 execution will continue at the new translation N. 246 247 */ 248 249 /* It is possible, although very unlikely, that a block A has 250 more than one patched jump to block B. This could happen if 251 (eg) A finishes "jcond B; jmp B". 252 253 This means in turn that B's in_edges set can list A more than 254 once (twice in this example). However, each such entry must 255 have a different from_offs, since a patched jump can only 256 jump to one place at once (it's meaningless for it to have 257 multiple destinations.) IOW, the successor and predecessor 258 edges in the graph are not uniquely determined by a 259 TTEntry --> TTEntry pair, but rather by a 260 (TTEntry,offset) --> TTEntry triple. 261 262 If A has multiple edges to B then B will mention A multiple 263 times in its in_edges. To make things simpler, we then 264 require that A mentions B exactly the same number of times in 265 its out_edges. Furthermore, a matching out-in pair must have 266 the same offset (from_offs). This facilitates sanity 267 checking, and it facilitates establishing the invariant that 268 a out_edges set may not have duplicates when using the 269 equality defined by (TTEntry,offset). Hence the out_edges 270 and in_edges sets really do have both have set semantics. 271 272 eg if A has been patched to B at offsets 42 and 87 (in A) 273 then A.out_edges = { (B,42), (B,87) } (in any order) 274 and B.in_edges = { (A,42), (A,87) } (in any order) 275 276 Hence for each node pair P->Q in the graph, there's a 1:1 277 mapping between P.out_edges and Q.in_edges. 278 */ 279 InEdgeArr in_edges; 280 OutEdgeArr out_edges; 281 } 282 TTEntry; 283 284 285/* A structure used for mapping host code addresses back to the 286 relevant TTEntry. Used when doing chaining, for finding the 287 TTEntry to which some arbitrary patch address belongs. */ 288typedef 289 struct { 290 UChar* start; 291 UInt len; 292 UInt tteNo; 293 } 294 HostExtent; 295 296/* Finally, a sector itself. Each sector contains an array of 297 TCEntries, which hold code, and an array of TTEntries, containing 298 all required administrative info. Profiling is supported using the 299 TTEntry .count and .weight fields, if required. 300 301 If the sector is not in use, all three pointers are NULL and 302 tt_n_inuse is zero. 303*/ 304typedef 305 struct { 306 /* The TCEntry area. Size of this depends on the average 307 translation size. We try and size it so it becomes full 308 precisely when this sector's translation table (tt) reaches 309 its load limit (SECTOR_TT_LIMIT_PERCENT). */ 310 ULong* tc; 311 312 /* The TTEntry array. This is a fixed size, always containing 313 exactly N_TTES_PER_SECTOR entries. */ 314 TTEntry* tt; 315 316 /* This points to the current allocation point in tc. */ 317 ULong* tc_next; 318 319 /* The count of tt entries with state InUse. */ 320 Int tt_n_inuse; 321 322 /* Expandable arrays of tt indices for each of the ECLASS_N 323 address range equivalence classes. These hold indices into 324 the containing sector's tt array, which in turn should point 325 back here. */ 326 Int ec2tte_size[ECLASS_N]; 327 Int ec2tte_used[ECLASS_N]; 328 UShort* ec2tte[ECLASS_N]; 329 330 /* The host extents. The [start, +len) ranges are constructed 331 in strictly non-overlapping order, so we can binary search 332 them at any time. */ 333 XArray* host_extents; /* XArray* of HostExtent */ 334 } 335 Sector; 336 337 338/*------------------ DECLS ------------------*/ 339 340/* The root data structure is an array of sectors. The index of the 341 youngest sector is recorded, and new translations are put into that 342 sector. When it fills up, we move along to the next sector and 343 start to fill that up, wrapping around at the end of the array. 344 That way, once all N_TC_SECTORS have been bought into use for the 345 first time, and are full, we then re-use the oldest sector, 346 endlessly. 347 348 When running, youngest sector should be between >= 0 and < 349 N_TC_SECTORS. The initial -1 value indicates the TT/TC system is 350 not yet initialised. 351*/ 352static Sector sectors[MAX_N_SECTORS]; 353static Int youngest_sector = -1; 354 355/* The number of ULongs in each TCEntry area. This is computed once 356 at startup and does not change. */ 357static Int tc_sector_szQ = 0; 358 359 360/* A list of sector numbers, in the order which they should be 361 searched to find translations. This is an optimisation to be used 362 when searching for translations and should not affect 363 correctness. -1 denotes "no entry". */ 364static Int sector_search_order[MAX_N_SECTORS]; 365 366 367/* Fast helper for the TC. A direct-mapped cache which holds a set of 368 recently used (guest address, host address) pairs. This array is 369 referred to directly from m_dispatch/dispatch-<platform>.S. 370 371 Entries in tt_fast may refer to any valid TC entry, regardless of 372 which sector it's in. Consequently we must be very careful to 373 invalidate this cache when TC entries are changed or disappear. 374 375 A special .guest address - TRANSTAB_BOGUS_GUEST_ADDR -- must be 376 pointed at to cause that cache entry to miss. This relies on the 377 assumption that no guest code actually has that address, hence a 378 value 0x1 seems good. m_translate gives the client a synthetic 379 segfault if it tries to execute at this address. 380*/ 381/* 382typedef 383 struct { 384 Addr guest; 385 Addr host; 386 } 387 FastCacheEntry; 388*/ 389/*global*/ __attribute__((aligned(16))) 390 FastCacheEntry VG_(tt_fast)[VG_TT_FAST_SIZE]; 391 392/* Make sure we're not used before initialisation. */ 393static Bool init_done = False; 394 395 396/*------------------ STATS DECLS ------------------*/ 397 398/* Number of fast-cache updates and flushes done. */ 399static ULong n_fast_flushes = 0; 400static ULong n_fast_updates = 0; 401 402/* Number of full lookups done. */ 403static ULong n_full_lookups = 0; 404static ULong n_lookup_probes = 0; 405 406/* Number/osize/tsize of translations entered; also the number of 407 those for which self-checking was requested. */ 408static ULong n_in_count = 0; 409static ULong n_in_osize = 0; 410static ULong n_in_tsize = 0; 411static ULong n_in_sc_count = 0; 412 413/* Number/osize of translations discarded due to lack of space. */ 414static ULong n_dump_count = 0; 415static ULong n_dump_osize = 0; 416 417/* Number/osize of translations discarded due to requests to do so. */ 418static ULong n_disc_count = 0; 419static ULong n_disc_osize = 0; 420 421 422/*-------------------------------------------------------------*/ 423/*--- Misc ---*/ 424/*-------------------------------------------------------------*/ 425 426static void* ttaux_malloc ( const HChar* tag, SizeT n ) 427{ 428 return VG_(arena_malloc)(VG_AR_TTAUX, tag, n); 429} 430 431static void ttaux_free ( void* p ) 432{ 433 VG_(arena_free)(VG_AR_TTAUX, p); 434} 435 436 437/*-------------------------------------------------------------*/ 438/*--- Chaining support ---*/ 439/*-------------------------------------------------------------*/ 440 441static inline TTEntry* index_tte ( UInt sNo, UInt tteNo ) 442{ 443 vg_assert(sNo < n_sectors); 444 vg_assert(tteNo < N_TTES_PER_SECTOR); 445 Sector* s = §ors[sNo]; 446 vg_assert(s->tt); 447 TTEntry* tte = &s->tt[tteNo]; 448 vg_assert(tte->status == InUse); 449 return tte; 450} 451 452static void InEdge__init ( InEdge* ie ) 453{ 454 ie->from_sNo = -1; /* invalid */ 455 ie->from_tteNo = 0; 456 ie->from_offs = 0; 457 ie->to_fastEP = False; 458} 459 460static void OutEdge__init ( OutEdge* oe ) 461{ 462 oe->to_sNo = -1; /* invalid */ 463 oe->to_tteNo = 0; 464 oe->from_offs = 0; 465} 466 467static void TTEntry__init ( TTEntry* tte ) 468{ 469 VG_(memset)(tte, 0, sizeof(*tte)); 470} 471 472static UWord InEdgeArr__size ( InEdgeArr* iea ) 473{ 474 if (iea->var) { 475 vg_assert(iea->n_fixed == 0); 476 return VG_(sizeXA)(iea->var); 477 } else { 478 vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR); 479 return iea->n_fixed; 480 } 481} 482 483static void InEdgeArr__makeEmpty ( InEdgeArr* iea ) 484{ 485 if (iea->var) { 486 vg_assert(iea->n_fixed == 0); 487 VG_(deleteXA)(iea->var); 488 iea->var = NULL; 489 } else { 490 vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR); 491 iea->n_fixed = 0; 492 } 493} 494 495static 496InEdge* InEdgeArr__index ( InEdgeArr* iea, UWord i ) 497{ 498 if (iea->var) { 499 vg_assert(iea->n_fixed == 0); 500 return (InEdge*)VG_(indexXA)(iea->var, i); 501 } else { 502 vg_assert(i < iea->n_fixed); 503 return &iea->fixed[i]; 504 } 505} 506 507static 508void InEdgeArr__deleteIndex ( InEdgeArr* iea, UWord i ) 509{ 510 if (iea->var) { 511 vg_assert(iea->n_fixed == 0); 512 VG_(removeIndexXA)(iea->var, i); 513 } else { 514 vg_assert(i < iea->n_fixed); 515 for (; i+1 < iea->n_fixed; i++) { 516 iea->fixed[i] = iea->fixed[i+1]; 517 } 518 iea->n_fixed--; 519 } 520} 521 522static 523void InEdgeArr__add ( InEdgeArr* iea, InEdge* ie ) 524{ 525 if (iea->var) { 526 vg_assert(iea->n_fixed == 0); 527 VG_(addToXA)(iea->var, ie); 528 } else { 529 vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR); 530 if (iea->n_fixed == N_FIXED_IN_EDGE_ARR) { 531 /* The fixed array is full, so we have to initialise an 532 XArray and copy the fixed array into it. */ 533 iea->var = VG_(newXA)(ttaux_malloc, "transtab.IEA__add", 534 ttaux_free, 535 sizeof(InEdge)); 536 UWord i; 537 for (i = 0; i < iea->n_fixed; i++) { 538 VG_(addToXA)(iea->var, &iea->fixed[i]); 539 } 540 VG_(addToXA)(iea->var, ie); 541 iea->n_fixed = 0; 542 } else { 543 /* Just add to the fixed array. */ 544 iea->fixed[iea->n_fixed++] = *ie; 545 } 546 } 547} 548 549static UWord OutEdgeArr__size ( OutEdgeArr* oea ) 550{ 551 if (oea->var) { 552 vg_assert(oea->n_fixed == 0); 553 return VG_(sizeXA)(oea->var); 554 } else { 555 vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR); 556 return oea->n_fixed; 557 } 558} 559 560static void OutEdgeArr__makeEmpty ( OutEdgeArr* oea ) 561{ 562 if (oea->var) { 563 vg_assert(oea->n_fixed == 0); 564 VG_(deleteXA)(oea->var); 565 oea->var = NULL; 566 } else { 567 vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR); 568 oea->n_fixed = 0; 569 } 570} 571 572static 573OutEdge* OutEdgeArr__index ( OutEdgeArr* oea, UWord i ) 574{ 575 if (oea->var) { 576 vg_assert(oea->n_fixed == 0); 577 return (OutEdge*)VG_(indexXA)(oea->var, i); 578 } else { 579 vg_assert(i < oea->n_fixed); 580 return &oea->fixed[i]; 581 } 582} 583 584static 585void OutEdgeArr__deleteIndex ( OutEdgeArr* oea, UWord i ) 586{ 587 if (oea->var) { 588 vg_assert(oea->n_fixed == 0); 589 VG_(removeIndexXA)(oea->var, i); 590 } else { 591 vg_assert(i < oea->n_fixed); 592 for (; i+1 < oea->n_fixed; i++) { 593 oea->fixed[i] = oea->fixed[i+1]; 594 } 595 oea->n_fixed--; 596 } 597} 598 599static 600void OutEdgeArr__add ( OutEdgeArr* oea, OutEdge* oe ) 601{ 602 if (oea->var) { 603 vg_assert(oea->n_fixed == 0); 604 VG_(addToXA)(oea->var, oe); 605 } else { 606 vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR); 607 if (oea->n_fixed == N_FIXED_OUT_EDGE_ARR) { 608 /* The fixed array is full, so we have to initialise an 609 XArray and copy the fixed array into it. */ 610 oea->var = VG_(newXA)(ttaux_malloc, "transtab.OEA__add", 611 ttaux_free, 612 sizeof(OutEdge)); 613 UWord i; 614 for (i = 0; i < oea->n_fixed; i++) { 615 VG_(addToXA)(oea->var, &oea->fixed[i]); 616 } 617 VG_(addToXA)(oea->var, oe); 618 oea->n_fixed = 0; 619 } else { 620 /* Just add to the fixed array. */ 621 oea->fixed[oea->n_fixed++] = *oe; 622 } 623 } 624} 625 626static 627Int HostExtent__cmpOrd ( const void* v1, const void* v2 ) 628{ 629 const HostExtent* hx1 = v1; 630 const HostExtent* hx2 = v2; 631 if (hx1->start + hx1->len <= hx2->start) return -1; 632 if (hx2->start + hx2->len <= hx1->start) return 1; 633 return 0; /* partial overlap */ 634} 635 636/* True if hx is a dead host extent, i.e. corresponds to host code 637 of an entry that was invalidated. */ 638static 639Bool HostExtent__is_dead (const HostExtent* hx, const Sector* sec) 640{ 641 const UInt tteNo = hx->tteNo; 642#define LDEBUG(m) if (DEBUG_TRANSTAB) \ 643 VG_(printf) (m \ 644 " start 0x%p len %u sector %d ttslot %u" \ 645 " tt.entry 0x%llu tt.tcptr 0x%p\n", \ 646 hx->start, hx->len, (int)(sec - sectors), \ 647 hx->tteNo, \ 648 sec->tt[tteNo].entry, sec->tt[tteNo].tcptr) 649 650 /* Entry might have been invalidated and not re-used yet.*/ 651 if (sec->tt[tteNo].status == Deleted) { 652 LDEBUG("found deleted entry"); 653 return True; 654 } 655 /* Maybe we found this entry via a host_extents which was 656 inserted for an entry which was changed to Deleted then 657 re-used after. If this entry was re-used, then its tcptr 658 is >= to host_extents start (i.e. the previous tcptr) + len. 659 This is the case as there is no re-use of host code: a new 660 entry or re-used entry always gets "higher value" host code. */ 661 if ((UChar*) sec->tt[tteNo].tcptr >= hx->start + hx->len) { 662 LDEBUG("found re-used entry"); 663 return True; 664 } 665 666 return False; 667#undef LDEBUG 668} 669 670static __attribute__((noinline)) 671Bool find_TTEntry_from_hcode( /*OUT*/UInt* from_sNo, 672 /*OUT*/UInt* from_tteNo, 673 void* hcode ) 674{ 675 Int i; 676 677 /* Search order logic copied from VG_(search_transtab). */ 678 for (i = 0; i < n_sectors; i++) { 679 Int sno = sector_search_order[i]; 680 if (UNLIKELY(sno == -1)) 681 return False; /* run out of sectors to search */ 682 683 Sector* sec = §ors[sno]; 684 XArray* /* of HostExtent */ host_extents = sec->host_extents; 685 vg_assert(host_extents); 686 687 HostExtent key; 688 VG_(memset)(&key, 0, sizeof(key)); 689 key.start = hcode; 690 key.len = 1; 691 Word firstW = -1, lastW = -1; 692 Bool found = VG_(lookupXA_UNSAFE)( 693 host_extents, &key, &firstW, &lastW, 694 HostExtent__cmpOrd ); 695 vg_assert(firstW == lastW); // always true, even if not found 696 if (found) { 697 HostExtent* hx = VG_(indexXA)(host_extents, firstW); 698 UInt tteNo = hx->tteNo; 699 /* Do some additional sanity checks. */ 700 vg_assert(tteNo <= N_TTES_PER_SECTOR); 701 702 /* if this hx entry corresponds to dead host code, we must 703 tell this code has not been found, as it cannot be patched. */ 704 if (HostExtent__is_dead (hx, sec)) 705 return False; 706 707 vg_assert(sec->tt[tteNo].status == InUse); 708 /* Can only half check that the found TTEntry contains hcode, 709 due to not having a length value for the hcode in the 710 TTEntry. */ 711 vg_assert((UChar*)sec->tt[tteNo].tcptr <= (UChar*)hcode); 712 /* Looks plausible */ 713 *from_sNo = sno; 714 *from_tteNo = (UInt)tteNo; 715 return True; 716 } 717 } 718 return False; 719} 720 721 722/* Figure out whether or not hcode is jitted code present in the main 723 code cache (but not in the no-redir cache). Used for sanity 724 checking. */ 725static Bool is_in_the_main_TC ( void* hcode ) 726{ 727 Int i, sno; 728 for (i = 0; i < n_sectors; i++) { 729 sno = sector_search_order[i]; 730 if (sno == -1) 731 break; /* run out of sectors to search */ 732 if ((UChar*)hcode >= (UChar*)sectors[sno].tc 733 && (UChar*)hcode <= (UChar*)sectors[sno].tc_next 734 + sizeof(ULong) - 1) 735 return True; 736 } 737 return False; 738} 739 740 741/* Fulfill a chaining request, and record admin info so we 742 can undo it later, if required. 743*/ 744void VG_(tt_tc_do_chaining) ( void* from__patch_addr, 745 UInt to_sNo, 746 UInt to_tteNo, 747 Bool to_fastEP ) 748{ 749 /* Get the CPU info established at startup. */ 750 VexArch arch_host = VexArch_INVALID; 751 VexArchInfo archinfo_host; 752 VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host)); 753 VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host ); 754 VexEndness endness_host = archinfo_host.endness; 755 756 // host_code is where we're patching to. So it needs to 757 // take into account, whether we're jumping to the slow 758 // or fast entry point. By definition, the fast entry point 759 // is exactly one event check's worth of code along from 760 // the slow (tcptr) entry point. 761 TTEntry* to_tte = index_tte(to_sNo, to_tteNo); 762 void* host_code = ((UChar*)to_tte->tcptr) 763 + (to_fastEP ? LibVEX_evCheckSzB(arch_host, 764 endness_host) : 0); 765 766 // stay sane -- the patch point (dst) is in this sector's code cache 767 vg_assert( (UChar*)host_code >= (UChar*)sectors[to_sNo].tc ); 768 vg_assert( (UChar*)host_code <= (UChar*)sectors[to_sNo].tc_next 769 + sizeof(ULong) - 1 ); 770 771 /* Find the TTEntry for the from__ code. This isn't simple since 772 we only know the patch address, which is going to be somewhere 773 inside the from_ block. */ 774 UInt from_sNo = (UInt)-1; 775 UInt from_tteNo = (UInt)-1; 776 Bool from_found 777 = find_TTEntry_from_hcode( &from_sNo, &from_tteNo, 778 from__patch_addr ); 779 if (!from_found) { 780 // The from code might have been discarded due to sector re-use 781 // or marked Deleted due to translation invalidation. 782 // In such a case, don't do the chaining. 783 VG_(debugLog)(1,"transtab", 784 "host code %p not found (discarded? sector recycled?)" 785 " => no chaining done\n", 786 from__patch_addr); 787 return; 788 } 789 790 TTEntry* from_tte = index_tte(from_sNo, from_tteNo); 791 792 /* Get VEX to do the patching itself. We have to hand it off 793 since it is host-dependent. */ 794 VexInvalRange vir 795 = LibVEX_Chain( 796 arch_host, endness_host, 797 from__patch_addr, 798 VG_(fnptr_to_fnentry)( 799 to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP) 800 : &VG_(disp_cp_chain_me_to_slowEP)), 801 (void*)host_code 802 ); 803 VG_(invalidate_icache)( (void*)vir.start, vir.len ); 804 805 /* Now do the tricky bit -- update the ch_succs and ch_preds info 806 for the two translations involved, so we can undo the chaining 807 later, which we will have to do if the to_ block gets removed 808 for whatever reason. */ 809 810 /* This is the new from_ -> to_ link to add. */ 811 InEdge ie; 812 InEdge__init(&ie); 813 ie.from_sNo = from_sNo; 814 ie.from_tteNo = from_tteNo; 815 ie.to_fastEP = to_fastEP; 816 HWord from_offs = (HWord)( (UChar*)from__patch_addr 817 - (UChar*)from_tte->tcptr ); 818 vg_assert(from_offs < 100000/* let's say */); 819 ie.from_offs = (UInt)from_offs; 820 821 /* This is the new to_ -> from_ backlink to add. */ 822 OutEdge oe; 823 OutEdge__init(&oe); 824 oe.to_sNo = to_sNo; 825 oe.to_tteNo = to_tteNo; 826 oe.from_offs = (UInt)from_offs; 827 828 /* Add .. */ 829 InEdgeArr__add(&to_tte->in_edges, &ie); 830 OutEdgeArr__add(&from_tte->out_edges, &oe); 831} 832 833 834/* Unchain one patch, as described by the specified InEdge. For 835 sanity check purposes only (to check that the patched location is 836 as expected) it also requires the fast and slow entry point 837 addresses of the destination block (that is, the block that owns 838 this InEdge). */ 839__attribute__((noinline)) 840static void unchain_one ( VexArch arch_host, VexEndness endness_host, 841 InEdge* ie, 842 void* to_fastEPaddr, void* to_slowEPaddr ) 843{ 844 vg_assert(ie); 845 TTEntry* tte 846 = index_tte(ie->from_sNo, ie->from_tteNo); 847 UChar* place_to_patch 848 = ((UChar*)tte->tcptr) + ie->from_offs; 849 UChar* disp_cp_chain_me 850 = VG_(fnptr_to_fnentry)( 851 ie->to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP) 852 : &VG_(disp_cp_chain_me_to_slowEP) 853 ); 854 UChar* place_to_jump_to_EXPECTED 855 = ie->to_fastEP ? to_fastEPaddr : to_slowEPaddr; 856 857 // stay sane: both src and dst for this unchaining are 858 // in the main code cache 859 vg_assert( is_in_the_main_TC(place_to_patch) ); // src 860 vg_assert( is_in_the_main_TC(place_to_jump_to_EXPECTED) ); // dst 861 // dst check is ok because LibVEX_UnChain checks that 862 // place_to_jump_to_EXPECTED really is the current dst, and 863 // asserts if it isn't. 864 VexInvalRange vir 865 = LibVEX_UnChain( arch_host, endness_host, place_to_patch, 866 place_to_jump_to_EXPECTED, disp_cp_chain_me ); 867 VG_(invalidate_icache)( (void*)vir.start, vir.len ); 868} 869 870 871/* The specified block is about to be deleted. Update the preds and 872 succs of its associated blocks accordingly. This includes undoing 873 any chained jumps to this block. */ 874static 875void unchain_in_preparation_for_deletion ( VexArch arch_host, 876 VexEndness endness_host, 877 UInt here_sNo, UInt here_tteNo ) 878{ 879 if (DEBUG_TRANSTAB) 880 VG_(printf)("QQQ unchain_in_prep %u.%u...\n", here_sNo, here_tteNo); 881 UWord i, j, n, m; 882 Int evCheckSzB = LibVEX_evCheckSzB(arch_host, endness_host); 883 TTEntry* here_tte = index_tte(here_sNo, here_tteNo); 884 if (DEBUG_TRANSTAB) 885 VG_(printf)("... QQQ tt.entry 0x%llu tt.tcptr 0x%p\n", 886 here_tte->entry, here_tte->tcptr); 887 vg_assert(here_tte->status == InUse); 888 889 /* Visit all InEdges owned by here_tte. */ 890 n = InEdgeArr__size(&here_tte->in_edges); 891 for (i = 0; i < n; i++) { 892 InEdge* ie = InEdgeArr__index(&here_tte->in_edges, i); 893 // Undo the chaining. 894 UChar* here_slow_EP = (UChar*)here_tte->tcptr; 895 UChar* here_fast_EP = here_slow_EP + evCheckSzB; 896 unchain_one(arch_host, endness_host, ie, here_fast_EP, here_slow_EP); 897 // Find the corresponding entry in the "from" node's out_edges, 898 // and remove it. 899 TTEntry* from_tte = index_tte(ie->from_sNo, ie->from_tteNo); 900 m = OutEdgeArr__size(&from_tte->out_edges); 901 vg_assert(m > 0); // it must have at least one entry 902 for (j = 0; j < m; j++) { 903 OutEdge* oe = OutEdgeArr__index(&from_tte->out_edges, j); 904 if (oe->to_sNo == here_sNo && oe->to_tteNo == here_tteNo 905 && oe->from_offs == ie->from_offs) 906 break; 907 } 908 vg_assert(j < m); // "oe must be findable" 909 OutEdgeArr__deleteIndex(&from_tte->out_edges, j); 910 } 911 912 /* Visit all OutEdges owned by here_tte. */ 913 n = OutEdgeArr__size(&here_tte->out_edges); 914 for (i = 0; i < n; i++) { 915 OutEdge* oe = OutEdgeArr__index(&here_tte->out_edges, i); 916 // Find the corresponding entry in the "to" node's in_edges, 917 // and remove it. 918 TTEntry* to_tte = index_tte(oe->to_sNo, oe->to_tteNo); 919 m = InEdgeArr__size(&to_tte->in_edges); 920 vg_assert(m > 0); // it must have at least one entry 921 for (j = 0; j < m; j++) { 922 InEdge* ie = InEdgeArr__index(&to_tte->in_edges, j); 923 if (ie->from_sNo == here_sNo && ie->from_tteNo == here_tteNo 924 && ie->from_offs == oe->from_offs) 925 break; 926 } 927 vg_assert(j < m); // "ie must be findable" 928 InEdgeArr__deleteIndex(&to_tte->in_edges, j); 929 } 930 931 InEdgeArr__makeEmpty(&here_tte->in_edges); 932 OutEdgeArr__makeEmpty(&here_tte->out_edges); 933} 934 935 936/*-------------------------------------------------------------*/ 937/*--- Address-range equivalence class stuff ---*/ 938/*-------------------------------------------------------------*/ 939 940/* Return equivalence class number for a range. */ 941 942static Int range_to_eclass ( Addr64 start, UInt len ) 943{ 944 UInt mask = (1 << ECLASS_WIDTH) - 1; 945 UInt lo = (UInt)start; 946 UInt hi = lo + len - 1; 947 UInt loBits = (lo >> ECLASS_SHIFT) & mask; 948 UInt hiBits = (hi >> ECLASS_SHIFT) & mask; 949 if (loBits == hiBits) { 950 vg_assert(loBits < ECLASS_N-1); 951 return loBits; 952 } else { 953 return ECLASS_MISC; 954 } 955} 956 957 958/* Calculates the equivalence class numbers for any VexGuestExtent. 959 These are written in *eclasses, which must be big enough to hold 3 960 Ints. The number written, between 1 and 3, is returned. The 961 eclasses are presented in order, and any duplicates are removed. 962*/ 963 964static 965Int vexGuestExtents_to_eclasses ( /*OUT*/Int* eclasses, 966 VexGuestExtents* vge ) 967{ 968# define SWAP(_lv1,_lv2) \ 969 do { Int t = _lv1; _lv1 = _lv2; _lv2 = t; } while (0) 970 971 Int i, j, n_ec, r; 972 973 vg_assert(vge->n_used >= 1 && vge->n_used <= 3); 974 975 n_ec = 0; 976 for (i = 0; i < vge->n_used; i++) { 977 r = range_to_eclass( vge->base[i], (UInt)vge->len[i] ); 978 if (r == ECLASS_MISC) 979 goto bad; 980 /* only add if we haven't already seen it */ 981 for (j = 0; j < n_ec; j++) 982 if (eclasses[j] == r) 983 break; 984 if (j == n_ec) 985 eclasses[n_ec++] = r; 986 } 987 988 if (n_ec == 1) 989 return 1; 990 991 if (n_ec == 2) { 992 /* sort */ 993 if (eclasses[0] > eclasses[1]) 994 SWAP(eclasses[0], eclasses[1]); 995 return 2; 996 } 997 998 if (n_ec == 3) { 999 /* sort */ 1000 if (eclasses[0] > eclasses[2]) 1001 SWAP(eclasses[0], eclasses[2]); 1002 if (eclasses[0] > eclasses[1]) 1003 SWAP(eclasses[0], eclasses[1]); 1004 if (eclasses[1] > eclasses[2]) 1005 SWAP(eclasses[1], eclasses[2]); 1006 return 3; 1007 } 1008 1009 /* NOTREACHED */ 1010 vg_assert(0); 1011 1012 bad: 1013 eclasses[0] = ECLASS_MISC; 1014 return 1; 1015 1016# undef SWAP 1017} 1018 1019 1020/* Add tteno to the set of entries listed for equivalence class ec in 1021 this sector. Returns used location in eclass array. */ 1022 1023static 1024UInt addEClassNo ( /*MOD*/Sector* sec, Int ec, UShort tteno ) 1025{ 1026 Int old_sz, new_sz, i, r; 1027 UShort *old_ar, *new_ar; 1028 1029 vg_assert(ec >= 0 && ec < ECLASS_N); 1030 vg_assert(tteno < N_TTES_PER_SECTOR); 1031 1032 if (DEBUG_TRANSTAB) VG_(printf)("ec %d gets %d\n", ec, (Int)tteno); 1033 1034 if (sec->ec2tte_used[ec] >= sec->ec2tte_size[ec]) { 1035 1036 vg_assert(sec->ec2tte_used[ec] == sec->ec2tte_size[ec]); 1037 1038 old_sz = sec->ec2tte_size[ec]; 1039 old_ar = sec->ec2tte[ec]; 1040 new_sz = old_sz==0 ? 8 : old_sz<64 ? 2*old_sz : (3*old_sz)/2; 1041 new_ar = ttaux_malloc("transtab.aECN.1", 1042 new_sz * sizeof(UShort)); 1043 for (i = 0; i < old_sz; i++) 1044 new_ar[i] = old_ar[i]; 1045 if (old_ar) 1046 ttaux_free(old_ar); 1047 sec->ec2tte_size[ec] = new_sz; 1048 sec->ec2tte[ec] = new_ar; 1049 1050 if (DEBUG_TRANSTAB) VG_(printf)("expand ec %d to %d\n", ec, new_sz); 1051 } 1052 1053 /* Common case */ 1054 r = sec->ec2tte_used[ec]++; 1055 vg_assert(r >= 0 && r < sec->ec2tte_size[ec]); 1056 sec->ec2tte[ec][r] = tteno; 1057 return (UInt)r; 1058} 1059 1060 1061/* 'vge' is being added to 'sec' at TT entry 'tteno'. Add appropriate 1062 eclass entries to 'sec'. */ 1063 1064static 1065void upd_eclasses_after_add ( /*MOD*/Sector* sec, Int tteno ) 1066{ 1067 Int i, r, eclasses[3]; 1068 TTEntry* tte; 1069 vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR); 1070 1071 tte = &sec->tt[tteno]; 1072 r = vexGuestExtents_to_eclasses( eclasses, &tte->vge ); 1073 1074 vg_assert(r >= 1 && r <= 3); 1075 tte->n_tte2ec = r; 1076 1077 for (i = 0; i < r; i++) { 1078 tte->tte2ec_ec[i] = eclasses[i]; 1079 tte->tte2ec_ix[i] = addEClassNo( sec, eclasses[i], (UShort)tteno ); 1080 } 1081} 1082 1083 1084/* Check the eclass info in 'sec' to ensure it is consistent. Returns 1085 True if OK, False if something's not right. Expensive. */ 1086 1087static Bool sanity_check_eclasses_in_sector ( Sector* sec ) 1088{ 1089# define BAD(_str) do { whassup = (_str); goto bad; } while (0) 1090 1091 const HChar* whassup = NULL; 1092 Int i, j, k, n, ec_num, ec_idx; 1093 TTEntry* tte; 1094 UShort tteno; 1095 ULong* tce; 1096 1097 /* Basic checks on this sector */ 1098 if (sec->tt_n_inuse < 0 || sec->tt_n_inuse > N_TTES_PER_SECTOR_USABLE) 1099 BAD("invalid sec->tt_n_inuse"); 1100 tce = sec->tc_next; 1101 if (tce < &sec->tc[0] || tce > &sec->tc[tc_sector_szQ]) 1102 BAD("sec->tc_next points outside tc"); 1103 1104 /* For each eclass ... */ 1105 for (i = 0; i < ECLASS_N; i++) { 1106 if (sec->ec2tte_size[i] == 0 && sec->ec2tte[i] != NULL) 1107 BAD("ec2tte_size/ec2tte mismatch(1)"); 1108 if (sec->ec2tte_size[i] != 0 && sec->ec2tte[i] == NULL) 1109 BAD("ec2tte_size/ec2tte mismatch(2)"); 1110 if (sec->ec2tte_used[i] < 0 1111 || sec->ec2tte_used[i] > sec->ec2tte_size[i]) 1112 BAD("implausible ec2tte_used"); 1113 if (sec->ec2tte_used[i] == 0) 1114 continue; 1115 1116 /* For each tt reference in each eclass .. ensure the reference 1117 is to a valid tt entry, and that the entry's address ranges 1118 really include this eclass. */ 1119 1120 for (j = 0; j < sec->ec2tte_used[i]; j++) { 1121 tteno = sec->ec2tte[i][j]; 1122 if (tteno == EC2TTE_DELETED) 1123 continue; 1124 if (tteno >= N_TTES_PER_SECTOR) 1125 BAD("implausible tteno"); 1126 tte = &sec->tt[tteno]; 1127 if (tte->status != InUse) 1128 BAD("tteno points to non-inuse tte"); 1129 if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3) 1130 BAD("tte->n_tte2ec out of range"); 1131 /* Exactly least one of tte->eclasses[0 .. tte->n_eclasses-1] 1132 must equal i. Inspect tte's eclass info. */ 1133 n = 0; 1134 for (k = 0; k < tte->n_tte2ec; k++) { 1135 if (k < tte->n_tte2ec-1 1136 && tte->tte2ec_ec[k] >= tte->tte2ec_ec[k+1]) 1137 BAD("tte->tte2ec_ec[..] out of order"); 1138 ec_num = tte->tte2ec_ec[k]; 1139 if (ec_num < 0 || ec_num >= ECLASS_N) 1140 BAD("tte->tte2ec_ec[..] out of range"); 1141 if (ec_num != i) 1142 continue; 1143 ec_idx = tte->tte2ec_ix[k]; 1144 if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[i]) 1145 BAD("tte->tte2ec_ix[..] out of range"); 1146 if (ec_idx == j) 1147 n++; 1148 } 1149 if (n != 1) 1150 BAD("tteno does not point back at eclass"); 1151 } 1152 } 1153 1154 /* That establishes that for each forward pointer from TTEntrys 1155 there is a corresponding backward pointer from the eclass[] 1156 arrays. However, it doesn't rule out the possibility of other, 1157 bogus pointers in the eclass[] arrays. So do those similarly: 1158 scan through them and check the TTEntryies they point at point 1159 back. */ 1160 1161 for (i = 0; i < N_TTES_PER_SECTOR_USABLE; i++) { 1162 1163 tte = &sec->tt[i]; 1164 if (tte->status == Empty || tte->status == Deleted) { 1165 if (tte->n_tte2ec != 0) 1166 BAD("tte->n_eclasses nonzero for unused tte"); 1167 continue; 1168 } 1169 1170 vg_assert(tte->status == InUse); 1171 1172 if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3) 1173 BAD("tte->n_eclasses out of range(2)"); 1174 1175 for (j = 0; j < tte->n_tte2ec; j++) { 1176 ec_num = tte->tte2ec_ec[j]; 1177 if (ec_num < 0 || ec_num >= ECLASS_N) 1178 BAD("tte->eclass[..] out of range"); 1179 ec_idx = tte->tte2ec_ix[j]; 1180 if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[ec_num]) 1181 BAD("tte->ec_idx[..] out of range(2)"); 1182 if (sec->ec2tte[ec_num][ec_idx] != i) 1183 BAD("ec2tte does not point back to tte"); 1184 } 1185 } 1186 1187 return True; 1188 1189 bad: 1190 if (whassup) 1191 VG_(debugLog)(0, "transtab", "eclass sanity fail: %s\n", whassup); 1192 1193# if 0 1194 VG_(printf)("eclass = %d\n", i); 1195 VG_(printf)("tteno = %d\n", (Int)tteno); 1196 switch (tte->status) { 1197 case InUse: VG_(printf)("InUse\n"); break; 1198 case Deleted: VG_(printf)("Deleted\n"); break; 1199 case Empty: VG_(printf)("Empty\n"); break; 1200 } 1201 if (tte->status != Empty) { 1202 for (k = 0; k < tte->vge.n_used; k++) 1203 VG_(printf)("0x%llx %d\n", tte->vge.base[k], 1204 (Int)tte->vge.len[k]); 1205 } 1206# endif 1207 1208 return False; 1209 1210# undef BAD 1211} 1212 1213 1214/* Sanity check absolutely everything. True == check passed. */ 1215 1216/* forwards */ 1217static Bool sanity_check_redir_tt_tc ( void ); 1218 1219static Bool sanity_check_sector_search_order ( void ) 1220{ 1221 Int i, j, nListed; 1222 /* assert the array is the right size */ 1223 vg_assert(MAX_N_SECTORS == (sizeof(sector_search_order) 1224 / sizeof(sector_search_order[0]))); 1225 /* Check it's of the form valid_sector_numbers ++ [-1, -1, ..] */ 1226 for (i = 0; i < n_sectors; i++) { 1227 if (sector_search_order[i] < 0 || sector_search_order[i] >= n_sectors) 1228 break; 1229 } 1230 nListed = i; 1231 for (/* */; i < n_sectors; i++) { 1232 if (sector_search_order[i] != -1) 1233 break; 1234 } 1235 if (i != n_sectors) 1236 return False; 1237 /* Check each sector number only appears once */ 1238 for (i = 0; i < n_sectors; i++) { 1239 if (sector_search_order[i] == -1) 1240 continue; 1241 for (j = i+1; j < n_sectors; j++) { 1242 if (sector_search_order[j] == sector_search_order[i]) 1243 return False; 1244 } 1245 } 1246 /* Check that the number of listed sectors equals the number 1247 in use, by counting nListed back down. */ 1248 for (i = 0; i < n_sectors; i++) { 1249 if (sectors[i].tc != NULL) 1250 nListed--; 1251 } 1252 if (nListed != 0) 1253 return False; 1254 return True; 1255} 1256 1257static Bool sanity_check_all_sectors ( void ) 1258{ 1259 Int sno; 1260 Bool sane; 1261 Sector* sec; 1262 for (sno = 0; sno < n_sectors; sno++) { 1263 Int i; 1264 Int nr_not_dead_hx = 0; 1265 Int szhxa; 1266 sec = §ors[sno]; 1267 if (sec->tc == NULL) 1268 continue; 1269 sane = sanity_check_eclasses_in_sector( sec ); 1270 if (!sane) 1271 return False; 1272 szhxa = VG_(sizeXA)(sec->host_extents); 1273 for (i = 0; i < szhxa; i++) { 1274 const HostExtent* hx = VG_(indexXA)(sec->host_extents, i); 1275 if (!HostExtent__is_dead (hx, sec)) 1276 nr_not_dead_hx++; 1277 } 1278 if (nr_not_dead_hx != sec->tt_n_inuse) { 1279 VG_(debugLog)(0, "transtab", 1280 "nr_not_dead_hx %d sanity fail (expected == in use %d)\n", 1281 nr_not_dead_hx, sec->tt_n_inuse); 1282 return False; 1283 } 1284 } 1285 1286 if ( !sanity_check_redir_tt_tc() ) 1287 return False; 1288 if ( !sanity_check_sector_search_order() ) 1289 return False; 1290 return True; 1291} 1292 1293 1294 1295/*-------------------------------------------------------------*/ 1296/*--- Add/find translations ---*/ 1297/*-------------------------------------------------------------*/ 1298 1299static UInt vge_osize ( VexGuestExtents* vge ) 1300{ 1301 UInt i, n = 0; 1302 for (i = 0; i < vge->n_used; i++) 1303 n += (UInt)vge->len[i]; 1304 return n; 1305} 1306 1307static Bool isValidSector ( Int sector ) 1308{ 1309 if (sector < 0 || sector >= n_sectors) 1310 return False; 1311 return True; 1312} 1313 1314static inline UInt HASH_TT ( Addr64 key ) 1315{ 1316 UInt kHi = (UInt)(key >> 32); 1317 UInt kLo = (UInt)key; 1318 UInt k32 = kHi ^ kLo; 1319 UInt ror = 7; 1320 if (ror > 0) 1321 k32 = (k32 >> ror) | (k32 << (32-ror)); 1322 return k32 % N_TTES_PER_SECTOR; 1323} 1324 1325static void setFastCacheEntry ( Addr64 key, ULong* tcptr ) 1326{ 1327 UInt cno = (UInt)VG_TT_FAST_HASH(key); 1328 VG_(tt_fast)[cno].guest = (Addr)key; 1329 VG_(tt_fast)[cno].host = (Addr)tcptr; 1330 n_fast_updates++; 1331 /* This shouldn't fail. It should be assured by m_translate 1332 which should reject any attempt to make translation of code 1333 starting at TRANSTAB_BOGUS_GUEST_ADDR. */ 1334 vg_assert(VG_(tt_fast)[cno].guest != TRANSTAB_BOGUS_GUEST_ADDR); 1335} 1336 1337/* Invalidate the fast cache VG_(tt_fast). */ 1338static void invalidateFastCache ( void ) 1339{ 1340 UInt j; 1341 /* This loop is popular enough to make it worth unrolling a 1342 bit, at least on ppc32. */ 1343 vg_assert(VG_TT_FAST_SIZE > 0 && (VG_TT_FAST_SIZE % 4) == 0); 1344 for (j = 0; j < VG_TT_FAST_SIZE; j += 4) { 1345 VG_(tt_fast)[j+0].guest = TRANSTAB_BOGUS_GUEST_ADDR; 1346 VG_(tt_fast)[j+1].guest = TRANSTAB_BOGUS_GUEST_ADDR; 1347 VG_(tt_fast)[j+2].guest = TRANSTAB_BOGUS_GUEST_ADDR; 1348 VG_(tt_fast)[j+3].guest = TRANSTAB_BOGUS_GUEST_ADDR; 1349 } 1350 1351 vg_assert(j == VG_TT_FAST_SIZE); 1352 n_fast_flushes++; 1353} 1354 1355static void initialiseSector ( Int sno ) 1356{ 1357 Int i; 1358 SysRes sres; 1359 Sector* sec; 1360 vg_assert(isValidSector(sno)); 1361 1362 { Bool sane = sanity_check_sector_search_order(); 1363 vg_assert(sane); 1364 } 1365 sec = §ors[sno]; 1366 1367 if (sec->tc == NULL) { 1368 1369 /* Sector has never been used before. Need to allocate tt and 1370 tc. */ 1371 vg_assert(sec->tt == NULL); 1372 vg_assert(sec->tc_next == NULL); 1373 vg_assert(sec->tt_n_inuse == 0); 1374 for (i = 0; i < ECLASS_N; i++) { 1375 vg_assert(sec->ec2tte_size[i] == 0); 1376 vg_assert(sec->ec2tte_used[i] == 0); 1377 vg_assert(sec->ec2tte[i] == NULL); 1378 } 1379 vg_assert(sec->host_extents == NULL); 1380 1381 VG_(debugLog)(1,"transtab", "allocate sector %d\n", sno); 1382 if (VG_(clo_stats)) 1383 VG_(dmsg)("transtab: " "allocate sector %d\n", sno); 1384 1385 sres = VG_(am_mmap_anon_float_valgrind)( 8 * tc_sector_szQ ); 1386 if (sr_isError(sres)) { 1387 VG_(out_of_memory_NORETURN)("initialiseSector(TC)", 1388 8 * tc_sector_szQ ); 1389 /*NOTREACHED*/ 1390 } 1391 sec->tc = (ULong*)(AddrH)sr_Res(sres); 1392 1393 sres = VG_(am_mmap_anon_float_valgrind) 1394 ( N_TTES_PER_SECTOR * sizeof(TTEntry) ); 1395 if (sr_isError(sres)) { 1396 VG_(out_of_memory_NORETURN)("initialiseSector(TT)", 1397 N_TTES_PER_SECTOR * sizeof(TTEntry) ); 1398 /*NOTREACHED*/ 1399 } 1400 sec->tt = (TTEntry*)(AddrH)sr_Res(sres); 1401 1402 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 1403 sec->tt[i].status = Empty; 1404 sec->tt[i].n_tte2ec = 0; 1405 } 1406 1407 /* Set up the host_extents array. */ 1408 sec->host_extents 1409 = VG_(newXA)(ttaux_malloc, "transtab.initialiseSector(host_extents)", 1410 ttaux_free, 1411 sizeof(HostExtent)); 1412 1413 /* Add an entry in the sector_search_order */ 1414 for (i = 0; i < n_sectors; i++) { 1415 if (sector_search_order[i] == -1) 1416 break; 1417 } 1418 vg_assert(i >= 0 && i < n_sectors); 1419 sector_search_order[i] = sno; 1420 1421 if (VG_(clo_verbosity) > 2) 1422 VG_(message)(Vg_DebugMsg, "TT/TC: initialise sector %d\n", sno); 1423 1424 } else { 1425 1426 /* Sector has been used before. Dump the old contents. */ 1427 VG_(debugLog)(1,"transtab", "recycle sector %d\n", sno); 1428 if (VG_(clo_stats)) 1429 VG_(dmsg)("transtab: " "recycle sector %d\n", sno); 1430 1431 vg_assert(sec->tt != NULL); 1432 vg_assert(sec->tc_next != NULL); 1433 n_dump_count += sec->tt_n_inuse; 1434 1435 VexArch arch_host = VexArch_INVALID; 1436 VexArchInfo archinfo_host; 1437 VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host)); 1438 VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host ); 1439 VexEndness endness_host = archinfo_host.endness; 1440 1441 /* Visit each just-about-to-be-abandoned translation. */ 1442 if (DEBUG_TRANSTAB) VG_(printf)("QQQ unlink-entire-sector: %d START\n", 1443 sno); 1444 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 1445 if (sec->tt[i].status == InUse) { 1446 vg_assert(sec->tt[i].n_tte2ec >= 1); 1447 vg_assert(sec->tt[i].n_tte2ec <= 3); 1448 n_dump_osize += vge_osize(&sec->tt[i].vge); 1449 /* Tell the tool too. */ 1450 if (VG_(needs).superblock_discards) { 1451 VG_TDICT_CALL( tool_discard_superblock_info, 1452 sec->tt[i].entry, 1453 sec->tt[i].vge ); 1454 } 1455 unchain_in_preparation_for_deletion(arch_host, 1456 endness_host, sno, i); 1457 } else { 1458 vg_assert(sec->tt[i].n_tte2ec == 0); 1459 } 1460 sec->tt[i].status = Empty; 1461 sec->tt[i].n_tte2ec = 0; 1462 } 1463 if (DEBUG_TRANSTAB) VG_(printf)("QQQ unlink-entire-sector: %d END\n", 1464 sno); 1465 1466 /* Free up the eclass structures. */ 1467 for (i = 0; i < ECLASS_N; i++) { 1468 if (sec->ec2tte_size[i] == 0) { 1469 vg_assert(sec->ec2tte_used[i] == 0); 1470 vg_assert(sec->ec2tte[i] == NULL); 1471 } else { 1472 vg_assert(sec->ec2tte[i] != NULL); 1473 ttaux_free(sec->ec2tte[i]); 1474 sec->ec2tte[i] = NULL; 1475 sec->ec2tte_size[i] = 0; 1476 sec->ec2tte_used[i] = 0; 1477 } 1478 } 1479 1480 /* Empty out the host extents array. */ 1481 vg_assert(sec->host_extents != NULL); 1482 VG_(dropTailXA)(sec->host_extents, VG_(sizeXA)(sec->host_extents)); 1483 vg_assert(VG_(sizeXA)(sec->host_extents) == 0); 1484 1485 /* Sanity check: ensure it is already in 1486 sector_search_order[]. */ 1487 for (i = 0; i < n_sectors; i++) { 1488 if (sector_search_order[i] == sno) 1489 break; 1490 } 1491 vg_assert(i >= 0 && i < n_sectors); 1492 1493 if (VG_(clo_verbosity) > 2) 1494 VG_(message)(Vg_DebugMsg, "TT/TC: recycle sector %d\n", sno); 1495 } 1496 1497 sec->tc_next = sec->tc; 1498 sec->tt_n_inuse = 0; 1499 1500 invalidateFastCache(); 1501 1502 { Bool sane = sanity_check_sector_search_order(); 1503 vg_assert(sane); 1504 } 1505} 1506 1507 1508/* Add a translation of vge to TT/TC. The translation is temporarily 1509 in code[0 .. code_len-1]. 1510 1511 pre: youngest_sector points to a valid (although possibly full) 1512 sector. 1513*/ 1514void VG_(add_to_transtab)( VexGuestExtents* vge, 1515 Addr64 entry, 1516 AddrH code, 1517 UInt code_len, 1518 Bool is_self_checking, 1519 Int offs_profInc, 1520 UInt n_guest_instrs ) 1521{ 1522 Int tcAvailQ, reqdQ, y, i; 1523 ULong *tcptr, *tcptr2; 1524 UChar* srcP; 1525 UChar* dstP; 1526 1527 vg_assert(init_done); 1528 vg_assert(vge->n_used >= 1 && vge->n_used <= 3); 1529 1530 /* 60000: should agree with N_TMPBUF in m_translate.c. */ 1531 vg_assert(code_len > 0 && code_len < 60000); 1532 1533 /* Generally stay sane */ 1534 vg_assert(n_guest_instrs < 200); /* it can be zero, tho */ 1535 1536 if (DEBUG_TRANSTAB) 1537 VG_(printf)("add_to_transtab(entry = 0x%llx, len = %d) ...\n", 1538 entry, code_len); 1539 1540 n_in_count++; 1541 n_in_tsize += code_len; 1542 n_in_osize += vge_osize(vge); 1543 if (is_self_checking) 1544 n_in_sc_count++; 1545 1546 y = youngest_sector; 1547 vg_assert(isValidSector(y)); 1548 1549 if (sectors[y].tc == NULL) 1550 initialiseSector(y); 1551 1552 /* Try putting the translation in this sector. */ 1553 reqdQ = (code_len + 7) >> 3; 1554 1555 /* Will it fit in tc? */ 1556 tcAvailQ = ((ULong*)(§ors[y].tc[tc_sector_szQ])) 1557 - ((ULong*)(sectors[y].tc_next)); 1558 vg_assert(tcAvailQ >= 0); 1559 vg_assert(tcAvailQ <= tc_sector_szQ); 1560 1561 if (tcAvailQ < reqdQ 1562 || sectors[y].tt_n_inuse >= N_TTES_PER_SECTOR_USABLE) { 1563 /* No. So move on to the next sector. Either it's never been 1564 used before, in which case it will get its tt/tc allocated 1565 now, or it has been used before, in which case it is set to be 1566 empty, hence throwing out the oldest sector. */ 1567 vg_assert(tc_sector_szQ > 0); 1568 Int tt_loading_pct = (100 * sectors[y].tt_n_inuse) 1569 / N_TTES_PER_SECTOR; 1570 Int tc_loading_pct = (100 * (tc_sector_szQ - tcAvailQ)) 1571 / tc_sector_szQ; 1572 VG_(debugLog)(1,"transtab", 1573 "declare sector %d full " 1574 "(TT loading %2d%%, TC loading %2d%%)\n", 1575 y, tt_loading_pct, tc_loading_pct); 1576 if (VG_(clo_stats)) { 1577 VG_(dmsg)("transtab: " 1578 "declare sector %d full " 1579 "(TT loading %2d%%, TC loading %2d%%)\n", 1580 y, tt_loading_pct, tc_loading_pct); 1581 } 1582 youngest_sector++; 1583 if (youngest_sector >= n_sectors) 1584 youngest_sector = 0; 1585 y = youngest_sector; 1586 initialiseSector(y); 1587 } 1588 1589 /* Be sure ... */ 1590 tcAvailQ = ((ULong*)(§ors[y].tc[tc_sector_szQ])) 1591 - ((ULong*)(sectors[y].tc_next)); 1592 vg_assert(tcAvailQ >= 0); 1593 vg_assert(tcAvailQ <= tc_sector_szQ); 1594 vg_assert(tcAvailQ >= reqdQ); 1595 vg_assert(sectors[y].tt_n_inuse < N_TTES_PER_SECTOR_USABLE); 1596 vg_assert(sectors[y].tt_n_inuse >= 0); 1597 1598 /* Copy into tc. */ 1599 tcptr = sectors[y].tc_next; 1600 vg_assert(tcptr >= §ors[y].tc[0]); 1601 vg_assert(tcptr <= §ors[y].tc[tc_sector_szQ]); 1602 1603 dstP = (UChar*)tcptr; 1604 srcP = (UChar*)code; 1605 VG_(memcpy)(dstP, srcP, code_len); 1606 sectors[y].tc_next += reqdQ; 1607 sectors[y].tt_n_inuse++; 1608 1609 /* more paranoia */ 1610 tcptr2 = sectors[y].tc_next; 1611 vg_assert(tcptr2 >= §ors[y].tc[0]); 1612 vg_assert(tcptr2 <= §ors[y].tc[tc_sector_szQ]); 1613 1614 /* Find an empty tt slot, and use it. There must be such a slot 1615 since tt is never allowed to get completely full. */ 1616 i = HASH_TT(entry); 1617 vg_assert(i >= 0 && i < N_TTES_PER_SECTOR); 1618 while (True) { 1619 if (sectors[y].tt[i].status == Empty 1620 || sectors[y].tt[i].status == Deleted) 1621 break; 1622 i++; 1623 if (i >= N_TTES_PER_SECTOR) 1624 i = 0; 1625 } 1626 1627 TTEntry__init(§ors[y].tt[i]); 1628 sectors[y].tt[i].status = InUse; 1629 sectors[y].tt[i].tcptr = tcptr; 1630 sectors[y].tt[i].count = 0; 1631 sectors[y].tt[i].weight = n_guest_instrs == 0 ? 1 : n_guest_instrs; 1632 sectors[y].tt[i].vge = *vge; 1633 sectors[y].tt[i].entry = entry; 1634 1635 /* Patch in the profile counter location, if necessary. */ 1636 if (offs_profInc != -1) { 1637 vg_assert(offs_profInc >= 0 && offs_profInc < code_len); 1638 VexArch arch_host = VexArch_INVALID; 1639 VexArchInfo archinfo_host; 1640 VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host)); 1641 VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host ); 1642 VexEndness endness_host = archinfo_host.endness; 1643 VexInvalRange vir 1644 = LibVEX_PatchProfInc( arch_host, endness_host, 1645 dstP + offs_profInc, 1646 §ors[y].tt[i].count ); 1647 VG_(invalidate_icache)( (void*)vir.start, vir.len ); 1648 } 1649 1650 VG_(invalidate_icache)( dstP, code_len ); 1651 1652 /* Add this entry to the host_extents map, checking that we're 1653 adding in order. */ 1654 { HostExtent hx; 1655 hx.start = (UChar*)tcptr; 1656 hx.len = code_len; 1657 hx.tteNo = i; 1658 vg_assert(hx.len > 0); /* bsearch fails w/ zero length entries */ 1659 XArray* hx_array = sectors[y].host_extents; 1660 vg_assert(hx_array); 1661 Word n = VG_(sizeXA)(hx_array); 1662 if (n > 0) { 1663 HostExtent* hx_prev = (HostExtent*)VG_(indexXA)(hx_array, n-1); 1664 vg_assert(hx_prev->start + hx_prev->len <= hx.start); 1665 } 1666 VG_(addToXA)(hx_array, &hx); 1667 if (DEBUG_TRANSTAB) 1668 VG_(printf)("... hx.start 0x%p hx.len %u sector %d ttslot %d\n", 1669 hx.start, hx.len, y, i); 1670 } 1671 1672 /* Update the fast-cache. */ 1673 setFastCacheEntry( entry, tcptr ); 1674 1675 /* Note the eclass numbers for this translation. */ 1676 upd_eclasses_after_add( §ors[y], i ); 1677} 1678 1679 1680/* Search for the translation of the given guest address. If 1681 requested, a successful search can also cause the fast-caches to be 1682 updated. 1683*/ 1684Bool VG_(search_transtab) ( /*OUT*/AddrH* res_hcode, 1685 /*OUT*/UInt* res_sNo, 1686 /*OUT*/UInt* res_tteNo, 1687 Addr64 guest_addr, 1688 Bool upd_cache ) 1689{ 1690 Int i, j, k, kstart, sno; 1691 1692 vg_assert(init_done); 1693 /* Find the initial probe point just once. It will be the same in 1694 all sectors and avoids multiple expensive % operations. */ 1695 n_full_lookups++; 1696 k = -1; 1697 kstart = HASH_TT(guest_addr); 1698 vg_assert(kstart >= 0 && kstart < N_TTES_PER_SECTOR); 1699 1700 /* Search in all the sectors,using sector_search_order[] as a 1701 heuristic guide as to what order to visit the sectors. */ 1702 for (i = 0; i < n_sectors; i++) { 1703 1704 sno = sector_search_order[i]; 1705 if (UNLIKELY(sno == -1)) 1706 return False; /* run out of sectors to search */ 1707 1708 k = kstart; 1709 for (j = 0; j < N_TTES_PER_SECTOR; j++) { 1710 n_lookup_probes++; 1711 if (sectors[sno].tt[k].status == InUse 1712 && sectors[sno].tt[k].entry == guest_addr) { 1713 /* found it */ 1714 if (upd_cache) 1715 setFastCacheEntry( 1716 guest_addr, sectors[sno].tt[k].tcptr ); 1717 if (res_hcode) 1718 *res_hcode = (AddrH)sectors[sno].tt[k].tcptr; 1719 if (res_sNo) 1720 *res_sNo = sno; 1721 if (res_tteNo) 1722 *res_tteNo = k; 1723 /* pull this one one step closer to the front. For large 1724 apps this more or less halves the number of required 1725 probes. */ 1726 if (i > 0) { 1727 Int tmp = sector_search_order[i-1]; 1728 sector_search_order[i-1] = sector_search_order[i]; 1729 sector_search_order[i] = tmp; 1730 } 1731 return True; 1732 } 1733 if (sectors[sno].tt[k].status == Empty) 1734 break; /* not found in this sector */ 1735 k++; 1736 if (k == N_TTES_PER_SECTOR) 1737 k = 0; 1738 } 1739 1740 /* If we fall off the end, all entries are InUse and not 1741 matching, or Deleted. In any case we did not find it in this 1742 sector. */ 1743 } 1744 1745 /* Not found in any sector. */ 1746 return False; 1747} 1748 1749 1750/*-------------------------------------------------------------*/ 1751/*--- Delete translations. ---*/ 1752/*-------------------------------------------------------------*/ 1753 1754/* forward */ 1755static void unredir_discard_translations( Addr64, ULong ); 1756 1757/* Stuff for deleting translations which intersect with a given 1758 address range. Unfortunately, to make this run at a reasonable 1759 speed, it is complex. */ 1760 1761static inline 1762Bool overlap1 ( Addr64 s1, ULong r1, Addr64 s2, ULong r2 ) 1763{ 1764 Addr64 e1 = s1 + r1 - 1ULL; 1765 Addr64 e2 = s2 + r2 - 1ULL; 1766 if (e1 < s2 || e2 < s1) 1767 return False; 1768 return True; 1769} 1770 1771static inline 1772Bool overlaps ( Addr64 start, ULong range, VexGuestExtents* vge ) 1773{ 1774 if (overlap1(start, range, vge->base[0], (UInt)vge->len[0])) 1775 return True; 1776 if (vge->n_used < 2) 1777 return False; 1778 if (overlap1(start, range, vge->base[1], (UInt)vge->len[1])) 1779 return True; 1780 if (vge->n_used < 3) 1781 return False; 1782 if (overlap1(start, range, vge->base[2], (UInt)vge->len[2])) 1783 return True; 1784 return False; 1785} 1786 1787 1788/* Delete a tt entry, and update all the eclass data accordingly. */ 1789 1790static void delete_tte ( /*MOD*/Sector* sec, UInt secNo, Int tteno, 1791 VexArch arch_host, VexEndness endness_host ) 1792{ 1793 Int i, ec_num, ec_idx; 1794 TTEntry* tte; 1795 1796 /* sec and secNo are mutually redundant; cross-check. */ 1797 vg_assert(sec == §ors[secNo]); 1798 1799 vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR); 1800 tte = &sec->tt[tteno]; 1801 vg_assert(tte->status == InUse); 1802 vg_assert(tte->n_tte2ec >= 1 && tte->n_tte2ec <= 3); 1803 1804 /* Unchain .. */ 1805 unchain_in_preparation_for_deletion(arch_host, endness_host, secNo, tteno); 1806 1807 /* Deal with the ec-to-tte links first. */ 1808 for (i = 0; i < tte->n_tte2ec; i++) { 1809 ec_num = (Int)tte->tte2ec_ec[i]; 1810 ec_idx = tte->tte2ec_ix[i]; 1811 vg_assert(ec_num >= 0 && ec_num < ECLASS_N); 1812 vg_assert(ec_idx >= 0); 1813 vg_assert(ec_idx < sec->ec2tte_used[ec_num]); 1814 /* Assert that the two links point at each other. */ 1815 vg_assert(sec->ec2tte[ec_num][ec_idx] == (UShort)tteno); 1816 /* "delete" the pointer back to here. */ 1817 sec->ec2tte[ec_num][ec_idx] = EC2TTE_DELETED; 1818 } 1819 1820 /* Now fix up this TTEntry. */ 1821 tte->status = Deleted; 1822 tte->n_tte2ec = 0; 1823 1824 /* Stats .. */ 1825 sec->tt_n_inuse--; 1826 n_disc_count++; 1827 n_disc_osize += vge_osize(&tte->vge); 1828 1829 /* Tell the tool too. */ 1830 if (VG_(needs).superblock_discards) { 1831 VG_TDICT_CALL( tool_discard_superblock_info, 1832 tte->entry, 1833 tte->vge ); 1834 } 1835} 1836 1837 1838/* Delete translations from sec which intersect specified range, but 1839 only consider translations in the specified eclass. */ 1840 1841static 1842Bool delete_translations_in_sector_eclass ( /*MOD*/Sector* sec, UInt secNo, 1843 Addr64 guest_start, ULong range, 1844 Int ec, 1845 VexArch arch_host, 1846 VexEndness endness_host ) 1847{ 1848 Int i; 1849 UShort tteno; 1850 Bool anyDeld = False; 1851 TTEntry* tte; 1852 1853 vg_assert(ec >= 0 && ec < ECLASS_N); 1854 1855 for (i = 0; i < sec->ec2tte_used[ec]; i++) { 1856 1857 tteno = sec->ec2tte[ec][i]; 1858 if (tteno == EC2TTE_DELETED) { 1859 /* already deleted */ 1860 continue; 1861 } 1862 1863 vg_assert(tteno < N_TTES_PER_SECTOR); 1864 1865 tte = &sec->tt[tteno]; 1866 vg_assert(tte->status == InUse); 1867 1868 if (overlaps( guest_start, range, &tte->vge )) { 1869 anyDeld = True; 1870 delete_tte( sec, secNo, (Int)tteno, arch_host, endness_host ); 1871 } 1872 1873 } 1874 1875 return anyDeld; 1876} 1877 1878 1879/* Delete translations from sec which intersect specified range, the 1880 slow way, by inspecting all translations in sec. */ 1881 1882static 1883Bool delete_translations_in_sector ( /*MOD*/Sector* sec, UInt secNo, 1884 Addr64 guest_start, ULong range, 1885 VexArch arch_host, 1886 VexEndness endness_host ) 1887{ 1888 Int i; 1889 Bool anyDeld = False; 1890 1891 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 1892 if (sec->tt[i].status == InUse 1893 && overlaps( guest_start, range, &sec->tt[i].vge )) { 1894 anyDeld = True; 1895 delete_tte( sec, secNo, i, arch_host, endness_host ); 1896 } 1897 } 1898 1899 return anyDeld; 1900} 1901 1902 1903void VG_(discard_translations) ( Addr64 guest_start, ULong range, 1904 const HChar* who ) 1905{ 1906 Sector* sec; 1907 Int sno, ec; 1908 Bool anyDeleted = False; 1909 1910 vg_assert(init_done); 1911 1912 VG_(debugLog)(2, "transtab", 1913 "discard_translations(0x%llx, %lld) req by %s\n", 1914 guest_start, range, who ); 1915 1916 /* Pre-deletion sanity check */ 1917 if (VG_(clo_sanity_level >= 4)) { 1918 Bool sane = sanity_check_all_sectors(); 1919 vg_assert(sane); 1920 } 1921 1922 if (range == 0) 1923 return; 1924 1925 VexArch arch_host = VexArch_INVALID; 1926 VexArchInfo archinfo_host; 1927 VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host)); 1928 VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host ); 1929 VexEndness endness_host = archinfo_host.endness; 1930 1931 /* There are two different ways to do this. 1932 1933 If the range fits within a single address-range equivalence 1934 class, as will be the case for a cache line sized invalidation, 1935 then we only have to inspect the set of translations listed in 1936 that equivalence class, and also in the "sin-bin" equivalence 1937 class ECLASS_MISC. 1938 1939 Otherwise, the invalidation is of a larger range and probably 1940 results from munmap. In this case it's (probably!) faster just 1941 to inspect all translations, dump those we don't want, and 1942 regenerate the equivalence class information (since modifying it 1943 in-situ is even more expensive). 1944 */ 1945 1946 /* First off, figure out if the range falls within a single class, 1947 and if so which one. */ 1948 1949 ec = ECLASS_MISC; 1950 if (range < (1ULL << ECLASS_SHIFT)) 1951 ec = range_to_eclass( guest_start, (UInt)range ); 1952 1953 /* if ec is ECLASS_MISC then we aren't looking at just a single 1954 class, so use the slow scheme. Else use the fast scheme, 1955 examining 'ec' and ECLASS_MISC. */ 1956 1957 if (ec != ECLASS_MISC) { 1958 1959 VG_(debugLog)(2, "transtab", 1960 " FAST, ec = %d\n", ec); 1961 1962 /* Fast scheme */ 1963 vg_assert(ec >= 0 && ec < ECLASS_MISC); 1964 1965 for (sno = 0; sno < n_sectors; sno++) { 1966 sec = §ors[sno]; 1967 if (sec->tc == NULL) 1968 continue; 1969 anyDeleted |= delete_translations_in_sector_eclass( 1970 sec, sno, guest_start, range, ec, 1971 arch_host, endness_host 1972 ); 1973 anyDeleted |= delete_translations_in_sector_eclass( 1974 sec, sno, guest_start, range, ECLASS_MISC, 1975 arch_host, endness_host 1976 ); 1977 } 1978 1979 } else { 1980 1981 /* slow scheme */ 1982 1983 VG_(debugLog)(2, "transtab", 1984 " SLOW, ec = %d\n", ec); 1985 1986 for (sno = 0; sno < n_sectors; sno++) { 1987 sec = §ors[sno]; 1988 if (sec->tc == NULL) 1989 continue; 1990 anyDeleted |= delete_translations_in_sector( 1991 sec, sno, guest_start, range, 1992 arch_host, endness_host 1993 ); 1994 } 1995 1996 } 1997 1998 if (anyDeleted) 1999 invalidateFastCache(); 2000 2001 /* don't forget the no-redir cache */ 2002 unredir_discard_translations( guest_start, range ); 2003 2004 /* Post-deletion sanity check */ 2005 if (VG_(clo_sanity_level >= 4)) { 2006 Int i; 2007 TTEntry* tte; 2008 Bool sane = sanity_check_all_sectors(); 2009 vg_assert(sane); 2010 /* But now, also check the requested address range isn't 2011 present anywhere. */ 2012 for (sno = 0; sno < n_sectors; sno++) { 2013 sec = §ors[sno]; 2014 if (sec->tc == NULL) 2015 continue; 2016 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 2017 tte = &sec->tt[i]; 2018 if (tte->status != InUse) 2019 continue; 2020 vg_assert(!overlaps( guest_start, range, &tte->vge )); 2021 } 2022 } 2023 } 2024} 2025 2026 2027/*------------------------------------------------------------*/ 2028/*--- AUXILIARY: the unredirected TT/TC ---*/ 2029/*------------------------------------------------------------*/ 2030 2031/* A very simple translation cache which holds a small number of 2032 unredirected translations. This is completely independent of the 2033 main tt/tc structures. When unredir_tc or unredir_tt becomes full, 2034 both structures are simply dumped and we start over. 2035 2036 Since these translations are unredirected, the search key is (by 2037 definition) the first address entry in the .vge field. */ 2038 2039/* Sized to hold 500 translations of average size 1000 bytes. */ 2040 2041#define UNREDIR_SZB 1000 2042 2043#define N_UNREDIR_TT 500 2044#define N_UNREDIR_TCQ (N_UNREDIR_TT * UNREDIR_SZB / sizeof(ULong)) 2045 2046typedef 2047 struct { 2048 VexGuestExtents vge; 2049 Addr hcode; 2050 Bool inUse; 2051 } 2052 UTCEntry; 2053 2054/* We just allocate forwards in _tc, never deleting. */ 2055static ULong *unredir_tc; 2056static Int unredir_tc_used = N_UNREDIR_TCQ; 2057 2058/* Slots in _tt can come into use and out again (.inUse). 2059 Nevertheless _tt_highwater is maintained so that invalidations 2060 don't have to scan all the slots when only a few are in use. 2061 _tt_highwater holds the index of the highest ever allocated 2062 slot. */ 2063static UTCEntry unredir_tt[N_UNREDIR_TT]; 2064static Int unredir_tt_highwater; 2065 2066 2067static void init_unredir_tt_tc ( void ) 2068{ 2069 Int i; 2070 if (unredir_tc == NULL) { 2071 SysRes sres = VG_(am_mmap_anon_float_valgrind) 2072 ( N_UNREDIR_TT * UNREDIR_SZB ); 2073 if (sr_isError(sres)) { 2074 VG_(out_of_memory_NORETURN)("init_unredir_tt_tc", 2075 N_UNREDIR_TT * UNREDIR_SZB); 2076 /*NOTREACHED*/ 2077 } 2078 unredir_tc = (ULong *)(AddrH)sr_Res(sres); 2079 } 2080 unredir_tc_used = 0; 2081 for (i = 0; i < N_UNREDIR_TT; i++) 2082 unredir_tt[i].inUse = False; 2083 unredir_tt_highwater = -1; 2084} 2085 2086/* Do a sanity check; return False on failure. */ 2087static Bool sanity_check_redir_tt_tc ( void ) 2088{ 2089 Int i; 2090 if (unredir_tt_highwater < -1) return False; 2091 if (unredir_tt_highwater >= N_UNREDIR_TT) return False; 2092 2093 for (i = unredir_tt_highwater+1; i < N_UNREDIR_TT; i++) 2094 if (unredir_tt[i].inUse) 2095 return False; 2096 2097 if (unredir_tc_used < 0) return False; 2098 if (unredir_tc_used > N_UNREDIR_TCQ) return False; 2099 2100 return True; 2101} 2102 2103 2104/* Add an UNREDIRECTED translation of vge to TT/TC. The translation 2105 is temporarily in code[0 .. code_len-1]. 2106*/ 2107void VG_(add_to_unredir_transtab)( VexGuestExtents* vge, 2108 Addr64 entry, 2109 AddrH code, 2110 UInt code_len ) 2111{ 2112 Int i, j, code_szQ; 2113 HChar *srcP, *dstP; 2114 2115 vg_assert(sanity_check_redir_tt_tc()); 2116 2117 /* This is the whole point: it's not redirected! */ 2118 vg_assert(entry == vge->base[0]); 2119 2120 /* How many unredir_tt slots are needed */ 2121 code_szQ = (code_len + 7) / 8; 2122 2123 /* Look for an empty unredir_tc slot */ 2124 for (i = 0; i < N_UNREDIR_TT; i++) 2125 if (!unredir_tt[i].inUse) 2126 break; 2127 2128 if (i >= N_UNREDIR_TT || code_szQ > (N_UNREDIR_TCQ - unredir_tc_used)) { 2129 /* It's full; dump everything we currently have */ 2130 init_unredir_tt_tc(); 2131 i = 0; 2132 } 2133 2134 vg_assert(unredir_tc_used >= 0); 2135 vg_assert(unredir_tc_used <= N_UNREDIR_TCQ); 2136 vg_assert(code_szQ > 0); 2137 vg_assert(code_szQ + unredir_tc_used <= N_UNREDIR_TCQ); 2138 vg_assert(i >= 0 && i < N_UNREDIR_TT); 2139 vg_assert(unredir_tt[i].inUse == False); 2140 2141 if (i > unredir_tt_highwater) 2142 unredir_tt_highwater = i; 2143 2144 dstP = (HChar*)&unredir_tc[unredir_tc_used]; 2145 srcP = (HChar*)code; 2146 for (j = 0; j < code_len; j++) 2147 dstP[j] = srcP[j]; 2148 2149 VG_(invalidate_icache)( dstP, code_len ); 2150 2151 unredir_tt[i].inUse = True; 2152 unredir_tt[i].vge = *vge; 2153 unredir_tt[i].hcode = (Addr)dstP; 2154 2155 unredir_tc_used += code_szQ; 2156 vg_assert(unredir_tc_used >= 0); 2157 vg_assert(unredir_tc_used <= N_UNREDIR_TCQ); 2158 2159 vg_assert(&dstP[code_len] <= (HChar*)&unredir_tc[unredir_tc_used]); 2160} 2161 2162Bool VG_(search_unredir_transtab) ( /*OUT*/AddrH* result, 2163 Addr64 guest_addr ) 2164{ 2165 Int i; 2166 for (i = 0; i < N_UNREDIR_TT; i++) { 2167 if (!unredir_tt[i].inUse) 2168 continue; 2169 if (unredir_tt[i].vge.base[0] == guest_addr) { 2170 *result = (AddrH)unredir_tt[i].hcode; 2171 return True; 2172 } 2173 } 2174 return False; 2175} 2176 2177static void unredir_discard_translations( Addr64 guest_start, ULong range ) 2178{ 2179 Int i; 2180 2181 vg_assert(sanity_check_redir_tt_tc()); 2182 2183 for (i = 0; i <= unredir_tt_highwater; i++) { 2184 if (unredir_tt[i].inUse 2185 && overlaps( guest_start, range, &unredir_tt[i].vge)) 2186 unredir_tt[i].inUse = False; 2187 } 2188} 2189 2190 2191/*------------------------------------------------------------*/ 2192/*--- Initialisation. ---*/ 2193/*------------------------------------------------------------*/ 2194 2195void VG_(init_tt_tc) ( void ) 2196{ 2197 Int i, avg_codeszQ; 2198 2199 vg_assert(!init_done); 2200 init_done = True; 2201 2202 /* Otherwise lots of things go wrong... */ 2203 vg_assert(sizeof(ULong) == 8); 2204 vg_assert(sizeof(Addr64) == 8); 2205 /* check fast cache entries really are 2 words long */ 2206 vg_assert(sizeof(Addr) == sizeof(void*)); 2207 vg_assert(sizeof(FastCacheEntry) == 2 * sizeof(Addr)); 2208 /* check fast cache entries are packed back-to-back with no spaces */ 2209 vg_assert(sizeof( VG_(tt_fast) ) == VG_TT_FAST_SIZE * sizeof(FastCacheEntry)); 2210 /* check fast cache is aligned as we requested. Not fatal if it 2211 isn't, but we might as well make sure. */ 2212 vg_assert(VG_IS_16_ALIGNED( ((Addr) & VG_(tt_fast)[0]) )); 2213 2214 if (VG_(clo_verbosity) > 2) 2215 VG_(message)(Vg_DebugMsg, 2216 "TT/TC: VG_(init_tt_tc) " 2217 "(startup of code management)\n"); 2218 2219 /* Figure out how big each tc area should be. */ 2220 avg_codeszQ = (VG_(details).avg_translation_sizeB + 7) / 8; 2221 tc_sector_szQ = N_TTES_PER_SECTOR_USABLE * (1 + avg_codeszQ); 2222 2223 /* Ensure the calculated value is not way crazy. */ 2224 vg_assert(tc_sector_szQ >= 2 * N_TTES_PER_SECTOR_USABLE); 2225 vg_assert(tc_sector_szQ <= 100 * N_TTES_PER_SECTOR_USABLE); 2226 2227 n_sectors = VG_(clo_num_transtab_sectors); 2228 vg_assert(n_sectors >= MIN_N_SECTORS); 2229 vg_assert(n_sectors <= MAX_N_SECTORS); 2230 2231 /* Initialise the sectors, even the ones we aren't going to use. 2232 Set all fields to zero. */ 2233 youngest_sector = 0; 2234 for (i = 0; i < MAX_N_SECTORS; i++) 2235 VG_(memset)(§ors[i], 0, sizeof(sectors[i])); 2236 2237 /* Initialise the sector_search_order hint table, including the 2238 entries we aren't going to use. */ 2239 for (i = 0; i < MAX_N_SECTORS; i++) 2240 sector_search_order[i] = -1; 2241 2242 /* Initialise the fast cache. */ 2243 invalidateFastCache(); 2244 2245 /* and the unredir tt/tc */ 2246 init_unredir_tt_tc(); 2247 2248 if (VG_(clo_verbosity) > 2 || VG_(clo_stats) 2249 || VG_(debugLog_getLevel) () >= 2) { 2250 VG_(message)(Vg_DebugMsg, 2251 "TT/TC: cache: %d sectors of %d bytes each = %d total\n", 2252 n_sectors, 8 * tc_sector_szQ, 2253 n_sectors * 8 * tc_sector_szQ ); 2254 VG_(message)(Vg_DebugMsg, 2255 "TT/TC: table: %d tables of %d bytes each = %d total\n", 2256 n_sectors, (int)(N_TTES_PER_SECTOR * sizeof(TTEntry)), 2257 (int)(n_sectors * N_TTES_PER_SECTOR * sizeof(TTEntry))); 2258 VG_(message)(Vg_DebugMsg, 2259 "TT/TC: table: %d entries each = %d total entries" 2260 " max occupancy %d (%d%%)\n", 2261 N_TTES_PER_SECTOR, 2262 n_sectors * N_TTES_PER_SECTOR, 2263 n_sectors * N_TTES_PER_SECTOR_USABLE, 2264 SECTOR_TT_LIMIT_PERCENT ); 2265 } 2266} 2267 2268 2269/*------------------------------------------------------------*/ 2270/*--- Printing out statistics. ---*/ 2271/*------------------------------------------------------------*/ 2272 2273static ULong safe_idiv( ULong a, ULong b ) 2274{ 2275 return (b == 0 ? 0 : a / b); 2276} 2277 2278UInt VG_(get_bbs_translated) ( void ) 2279{ 2280 return n_in_count; 2281} 2282 2283void VG_(print_tt_tc_stats) ( void ) 2284{ 2285 VG_(message)(Vg_DebugMsg, 2286 " tt/tc: %'llu tt lookups requiring %'llu probes\n", 2287 n_full_lookups, n_lookup_probes ); 2288 VG_(message)(Vg_DebugMsg, 2289 " tt/tc: %'llu fast-cache updates, %'llu flushes\n", 2290 n_fast_updates, n_fast_flushes ); 2291 2292 VG_(message)(Vg_DebugMsg, 2293 " transtab: new %'lld " 2294 "(%'llu -> %'llu; ratio %'llu:10) [%'llu scs]\n", 2295 n_in_count, n_in_osize, n_in_tsize, 2296 safe_idiv(10*n_in_tsize, n_in_osize), 2297 n_in_sc_count); 2298 VG_(message)(Vg_DebugMsg, 2299 " transtab: dumped %'llu (%'llu -> ?" "?)\n", 2300 n_dump_count, n_dump_osize ); 2301 VG_(message)(Vg_DebugMsg, 2302 " transtab: discarded %'llu (%'llu -> ?" "?)\n", 2303 n_disc_count, n_disc_osize ); 2304 2305 if (DEBUG_TRANSTAB) { 2306 Int i; 2307 VG_(printf)("\n"); 2308 for (i = 0; i < ECLASS_N; i++) { 2309 VG_(printf)(" %4d", sectors[0].ec2tte_used[i]); 2310 if (i % 16 == 15) 2311 VG_(printf)("\n"); 2312 } 2313 VG_(printf)("\n\n"); 2314 } 2315} 2316 2317/*------------------------------------------------------------*/ 2318/*--- Printing out of profiling results. ---*/ 2319/*------------------------------------------------------------*/ 2320 2321static ULong score ( TTEntry* tte ) 2322{ 2323 return ((ULong)tte->weight) * ((ULong)tte->count); 2324} 2325 2326ULong VG_(get_SB_profile) ( SBProfEntry tops[], UInt n_tops ) 2327{ 2328 Int sno, i, r, s; 2329 ULong score_total; 2330 2331 /* First, compute the total weighted count, and find the top N 2332 ttes. tops contains pointers to the most-used n_tops blocks, in 2333 descending order (viz, tops[0] is the highest scorer). */ 2334 for (i = 0; i < n_tops; i++) { 2335 tops[i].addr = 0; 2336 tops[i].score = 0; 2337 } 2338 2339 score_total = 0; 2340 2341 for (sno = 0; sno < n_sectors; sno++) { 2342 if (sectors[sno].tc == NULL) 2343 continue; 2344 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 2345 if (sectors[sno].tt[i].status != InUse) 2346 continue; 2347 score_total += score(§ors[sno].tt[i]); 2348 /* Find the rank for sectors[sno].tt[i]. */ 2349 r = n_tops-1; 2350 while (True) { 2351 if (r == -1) 2352 break; 2353 if (tops[r].addr == 0) { 2354 r--; 2355 continue; 2356 } 2357 if ( score(§ors[sno].tt[i]) > tops[r].score ) { 2358 r--; 2359 continue; 2360 } 2361 break; 2362 } 2363 r++; 2364 vg_assert(r >= 0 && r <= n_tops); 2365 /* This bb should be placed at r, and bbs above it shifted 2366 upwards one slot. */ 2367 if (r < n_tops) { 2368 for (s = n_tops-1; s > r; s--) 2369 tops[s] = tops[s-1]; 2370 tops[r].addr = sectors[sno].tt[i].entry; 2371 tops[r].score = score( §ors[sno].tt[i] ); 2372 } 2373 } 2374 } 2375 2376 /* Now zero out all the counter fields, so that we can make 2377 multiple calls here and just get the values since the last call, 2378 each time, rather than values accumulated for the whole run. */ 2379 for (sno = 0; sno < n_sectors; sno++) { 2380 if (sectors[sno].tc == NULL) 2381 continue; 2382 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 2383 if (sectors[sno].tt[i].status != InUse) 2384 continue; 2385 sectors[sno].tt[i].count = 0; 2386 } 2387 } 2388 2389 return score_total; 2390} 2391 2392/*--------------------------------------------------------------------*/ 2393/*--- end ---*/ 2394/*--------------------------------------------------------------------*/ 2395