m_transtab.c revision 523b5b8ca67d2063afd02342d0138b0dc0ed6706
1 2/*--------------------------------------------------------------------*/ 3/*--- Management of the translation table and cache. ---*/ 4/*--- m_transtab.c ---*/ 5/*--------------------------------------------------------------------*/ 6 7/* 8 This file is part of Valgrind, a dynamic binary instrumentation 9 framework. 10 11 Copyright (C) 2000-2013 Julian Seward 12 jseward@acm.org 13 14 This program is free software; you can redistribute it and/or 15 modify it under the terms of the GNU General Public License as 16 published by the Free Software Foundation; either version 2 of the 17 License, or (at your option) any later version. 18 19 This program is distributed in the hope that it will be useful, but 20 WITHOUT ANY WARRANTY; without even the implied warranty of 21 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 22 General Public License for more details. 23 24 You should have received a copy of the GNU General Public License 25 along with this program; if not, write to the Free Software 26 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 27 02111-1307, USA. 28 29 The GNU General Public License is contained in the file COPYING. 30*/ 31 32#include "pub_core_basics.h" 33#include "pub_core_debuglog.h" 34#include "pub_core_machine.h" // For VG_(machine_get_VexArchInfo) 35#include "pub_core_libcbase.h" 36#include "pub_core_vki.h" // to keep pub_core_libproc.h happy, sigh 37#include "pub_core_libcproc.h" // VG_(invalidate_icache) 38#include "pub_core_libcassert.h" 39#include "pub_core_libcprint.h" 40#include "pub_core_options.h" 41#include "pub_core_tooliface.h" // For VG_(details).avg_translation_sizeB 42#include "pub_core_transtab.h" 43#include "pub_core_aspacemgr.h" 44#include "pub_core_mallocfree.h" // VG_(out_of_memory_NORETURN) 45#include "pub_core_xarray.h" 46#include "pub_core_dispatch.h" // For VG_(disp_cp*) addresses 47 48 49#define DEBUG_TRANSTAB 0 50 51 52/*-------------------------------------------------------------*/ 53/*--- Management of the FIFO-based translation table+cache. ---*/ 54/*-------------------------------------------------------------*/ 55 56/* Nr of sectors provided via command line parameter. */ 57UInt VG_(clo_num_transtab_sectors) = N_SECTORS_DEFAULT; 58/* Nr of sectors. 59 Will be set by VG_(init_tt_tc) to VG_(clo_num_transtab_sectors). */ 60static SECno n_sectors = 0; 61 62/* Average size of a transtab code entry. 0 means to use the tool 63 provided default. */ 64UInt VG_(clo_avg_transtab_entry_size) = 0; 65 66/*------------------ CONSTANTS ------------------*/ 67/* Number of entries in hash table of each sector. This needs to be a prime 68 number to work properly, it must be <= 65535 (so that a TTE index 69 fits in a UShort, leaving room for 0xFFFF(EC2TTE_DELETED, HTT_DELETED) 70 to denote 'deleted') and 0xFFFE (HTT_EMPTY) to denote 'Empty' in the 71 hash table. 72 It is strongly recommended not to change this. 73 65521 is the largest prime <= 65535. */ 74#define N_HTTES_PER_SECTOR /*10007*/ /*30011*/ /*40009*/ 65521 75 76#define EC2TTE_DELETED 0xFFFF /* 16-bit special value */ 77#define HTT_DELETED EC2TTE_DELETED 78#define HTT_EMPTY 0XFFFE 79 80// HTTno is the Sector->htt hash table index. Must be the same type as TTEno. 81typedef UShort HTTno; 82 83/* Because each sector contains a hash table of TTEntries, we need to 84 specify the maximum allowable loading, after which the sector is 85 deemed full. */ 86#define SECTOR_TT_LIMIT_PERCENT 65 87 88/* The sector is deemed full when this many entries are in it. */ 89#define N_TTES_PER_SECTOR \ 90 ((N_HTTES_PER_SECTOR * SECTOR_TT_LIMIT_PERCENT) / 100) 91 92/* Equivalence classes for fast address range deletion. There are 1 + 93 2^ECLASS_WIDTH bins. The highest one, ECLASS_MISC, describes an 94 address range which does not fall cleanly within any specific bin. 95 Note that ECLASS_SHIFT + ECLASS_WIDTH must be < 32. */ 96#define ECLASS_SHIFT 11 97#define ECLASS_WIDTH 8 98#define ECLASS_MISC (1 << ECLASS_WIDTH) 99#define ECLASS_N (1 + ECLASS_MISC) 100 101 102 103/*------------------ TYPES ------------------*/ 104 105/* In edges ("to-me") in the graph created by chaining. */ 106typedef 107 struct { 108 SECno from_sNo; /* sector number */ 109 TTEno from_tteNo; /* TTE number in given sector */ 110 UInt from_offs; /* code offset from TCEntry::tcptr where the patch is */ 111 Bool to_fastEP; /* Is the patch to a fast or slow entry point? */ 112 } 113 InEdge; 114 115 116/* Out edges ("from-me") in the graph created by chaining. */ 117typedef 118 struct { 119 SECno to_sNo; /* sector number */ 120 TTEno to_tteNo; /* TTE number in given sector */ 121 UInt from_offs; /* code offset in owning translation where patch is */ 122 } 123 OutEdge; 124 125 126#define N_FIXED_IN_EDGE_ARR 3 127typedef 128 struct { 129 UInt n_fixed; /* 0 .. N_FIXED_IN_EDGE_ARR */ 130 InEdge fixed[N_FIXED_IN_EDGE_ARR]; 131 XArray* var; /* XArray* of InEdgeArr */ 132 } 133 InEdgeArr; 134 135#define N_FIXED_OUT_EDGE_ARR 2 136typedef 137 struct { 138 UInt n_fixed; /* 0 .. N_FIXED_OUT_EDGE_ARR */ 139 OutEdge fixed[N_FIXED_OUT_EDGE_ARR]; 140 XArray* var; /* XArray* of OutEdgeArr */ 141 } 142 OutEdgeArr; 143 144 145/* A translation-table entry. This indicates precisely which areas of 146 guest code are included in the translation, and contains all other 147 auxiliary info too. */ 148typedef 149 struct { 150 union { 151 struct { 152 /* Profiling only: the count and weight (arbitrary meaning) for 153 this translation. Weight is a property of the translation 154 itself and computed once when the translation is created. 155 Count is an entry count for the translation and is 156 incremented by 1 every time the translation is used, if we 157 are profiling. */ 158 ULong count; 159 UShort weight; 160 } prof; // if status == InUse 161 TTEno next_empty_tte; // if status != InUse 162 } usage; 163 164 /* Status of the slot. Note, we need to be able to do lazy 165 deletion, hence the Deleted state. */ 166 enum { InUse, Deleted, Empty } status; 167 168 /* 64-bit aligned pointer to one or more 64-bit words containing 169 the corresponding host code (must be in the same sector!) 170 This is a pointer into the sector's tc (code) area. */ 171 ULong* tcptr; 172 173 /* This is the original guest address that purportedly is the 174 entry point of the translation. You might think that .entry 175 should be the same as .vge->base[0], and most of the time it 176 is. However, when doing redirections, that is not the case. 177 .vge must always correctly describe the guest code sections 178 from which this translation was made. However, .entry may or 179 may not be a lie, depending on whether or not we're doing 180 redirection. */ 181 Addr entry; 182 183 /* This structure describes precisely what ranges of guest code 184 the translation covers, so we can decide whether or not to 185 delete it when translations of a given address range are 186 invalidated. */ 187 VexGuestExtents vge; 188 189 /* Address range summary info: these are pointers back to 190 eclass[] entries in the containing Sector. Those entries in 191 turn point back here -- the two structures are mutually 192 redundant but both necessary to make fast deletions work. 193 The eclass info is similar to, and derived from, this entry's 194 'vge' field, but it is not the same */ 195 UShort n_tte2ec; // # tte2ec pointers (1 to 3) 196 UShort tte2ec_ec[3]; // for each, the eclass # 197 UInt tte2ec_ix[3]; // and the index within the eclass. 198 // for i in 0 .. n_tte2ec-1 199 // sec->ec2tte[ tte2ec_ec[i] ][ tte2ec_ix[i] ] 200 // should be the index 201 // of this TTEntry in the containing Sector's tt array. 202 203 /* Admin information for chaining. 'in_edges' is a set of the 204 patch points which jump to this translation -- hence are 205 predecessors in the control flow graph. 'out_edges' points 206 to successors in the control flow graph -- translations to 207 which this one has a patched jump. In short these are just 208 backwards and forwards edges in the graph of patched-together 209 blocks. The 'in_edges' contain slightly more info, enough 210 that we can undo the chaining of each mentioned patch point. 211 The 'out_edges' list exists only so that we can visit the 212 'in_edges' entries of all blocks we're patched through to, in 213 order to remove ourselves from then when we're deleted. */ 214 215 /* A translation can disappear for two reasons: 216 1. erased (as part of the oldest sector cleanup) when the 217 youngest sector is full. 218 2. discarded due to calls to VG_(discard_translations). 219 VG_(discard_translations) sets the status of the 220 translation to 'Deleted'. 221 A.o., the gdbserver discards one or more translations 222 when a breakpoint is inserted or removed at an Addr, 223 or when single stepping mode is enabled/disabled 224 or when a translation is instrumented for gdbserver 225 (all the target jumps of this translation are 226 invalidated). 227 228 So, it is possible that the translation A to be patched 229 (to obtain a patched jump from A to B) is invalidated 230 after B is translated and before A is patched. 231 In case a translation is erased or discarded, the patching 232 cannot be done. VG_(tt_tc_do_chaining) and find_TTEntry_from_hcode 233 are checking the 'from' translation still exists before 234 doing the patching. 235 236 Is it safe to erase or discard the current translation E being 237 executed ? Amazing, but yes, it is safe. 238 Here is the explanation: 239 240 The translation E being executed can only be erased if a new 241 translation N is being done. A new translation is done only 242 if the host addr is a not yet patched jump to another 243 translation. In such a case, the guest address of N is 244 assigned to the PC in the VEX state. Control is returned 245 to the scheduler. N will be translated. This can erase the 246 translation E (in case of sector full). VG_(tt_tc_do_chaining) 247 will not do the chaining to a non found translation E. 248 The execution will continue at the current guest PC 249 (i.e. the translation N). 250 => it is safe to erase the current translation being executed. 251 252 The current translation E being executed can also be discarded 253 (e.g. by gdbserver). VG_(discard_translations) will mark 254 this translation E as Deleted, but the translation itself 255 is not erased. In particular, its host code can only 256 be overwritten or erased in case a new translation is done. 257 A new translation will only be done if a not yet translated 258 jump is to be executed. The execution of the Deleted translation 259 E will continue till a non patched jump is encountered. 260 This situation is then similar to the 'erasing' case above : 261 the current translation E can be erased or overwritten, as the 262 execution will continue at the new translation N. 263 264 */ 265 266 /* It is possible, although very unlikely, that a block A has 267 more than one patched jump to block B. This could happen if 268 (eg) A finishes "jcond B; jmp B". 269 270 This means in turn that B's in_edges set can list A more than 271 once (twice in this example). However, each such entry must 272 have a different from_offs, since a patched jump can only 273 jump to one place at once (it's meaningless for it to have 274 multiple destinations.) IOW, the successor and predecessor 275 edges in the graph are not uniquely determined by a 276 TTEntry --> TTEntry pair, but rather by a 277 (TTEntry,offset) --> TTEntry triple. 278 279 If A has multiple edges to B then B will mention A multiple 280 times in its in_edges. To make things simpler, we then 281 require that A mentions B exactly the same number of times in 282 its out_edges. Furthermore, a matching out-in pair must have 283 the same offset (from_offs). This facilitates sanity 284 checking, and it facilitates establishing the invariant that 285 a out_edges set may not have duplicates when using the 286 equality defined by (TTEntry,offset). Hence the out_edges 287 and in_edges sets really do have both have set semantics. 288 289 eg if A has been patched to B at offsets 42 and 87 (in A) 290 then A.out_edges = { (B,42), (B,87) } (in any order) 291 and B.in_edges = { (A,42), (A,87) } (in any order) 292 293 Hence for each node pair P->Q in the graph, there's a 1:1 294 mapping between P.out_edges and Q.in_edges. 295 */ 296 InEdgeArr in_edges; 297 OutEdgeArr out_edges; 298 } 299 TTEntry; 300 301 302/* A structure used for mapping host code addresses back to the 303 relevant TTEntry. Used when doing chaining, for finding the 304 TTEntry to which some arbitrary patch address belongs. */ 305typedef 306 struct { 307 UChar* start; 308 UInt len; 309 TTEno tteNo; 310 } 311 HostExtent; 312 313/* Finally, a sector itself. Each sector contains an array of 314 TCEntries, which hold code, and an array of TTEntries, containing 315 all required administrative info. Profiling is supported using the 316 TTEntry usage.prof.count and usage.prof.weight fields, if required. 317 318 If the sector is not in use, all three pointers are NULL and 319 tt_n_inuse is zero. 320*/ 321typedef 322 struct { 323 /* The TCEntry area. Size of this depends on the average 324 translation size. We try and size it so it becomes full 325 precisely when this sector's translation table (tt) reaches 326 its load limit (SECTOR_TT_LIMIT_PERCENT). */ 327 ULong* tc; 328 329 /* An hash table, mapping guest address to an index in the tt array. 330 htt is a fixed size, always containing 331 exactly N_HTTES_PER_SECTOR entries. */ 332 TTEno* htt; 333 334 /* The TTEntry array. This is a fixed size, always containing 335 exactly N_TTES_PER_SECTOR entries. */ 336 TTEntry* tt; 337 338 /* This points to the current allocation point in tc. */ 339 ULong* tc_next; 340 341 /* The count of tt entries with state InUse. */ 342 Int tt_n_inuse; 343 344 /* A list of Empty/Deleted entries, chained by tte->next_empty_tte */ 345 TTEno empty_tt_list; 346 347 /* Expandable arrays of tt indices for each of the ECLASS_N 348 address range equivalence classes. These hold indices into 349 the containing sector's tt array, which in turn should point 350 back here. */ 351 Int ec2tte_size[ECLASS_N]; 352 Int ec2tte_used[ECLASS_N]; 353 TTEno* ec2tte[ECLASS_N]; 354 355 /* The host extents. The [start, +len) ranges are constructed 356 in strictly non-overlapping order, so we can binary search 357 them at any time. */ 358 XArray* host_extents; /* XArray* of HostExtent */ 359 } 360 Sector; 361 362 363/*------------------ DECLS ------------------*/ 364 365/* The root data structure is an array of sectors. The index of the 366 youngest sector is recorded, and new translations are put into that 367 sector. When it fills up, we move along to the next sector and 368 start to fill that up, wrapping around at the end of the array. 369 That way, once all N_TC_SECTORS have been bought into use for the 370 first time, and are full, we then re-use the oldest sector, 371 endlessly. 372 373 When running, youngest sector should be between >= 0 and < 374 N_TC_SECTORS. The initial value indicates the TT/TC system is 375 not yet initialised. 376*/ 377static Sector sectors[MAX_N_SECTORS]; 378static Int youngest_sector = INV_SNO; 379 380/* The number of ULongs in each TCEntry area. This is computed once 381 at startup and does not change. */ 382static Int tc_sector_szQ = 0; 383 384 385/* A list of sector numbers, in the order which they should be 386 searched to find translations. This is an optimisation to be used 387 when searching for translations and should not affect 388 correctness. INV_SNO denotes "no entry". */ 389static SECno sector_search_order[MAX_N_SECTORS]; 390 391 392/* Fast helper for the TC. A direct-mapped cache which holds a set of 393 recently used (guest address, host address) pairs. This array is 394 referred to directly from m_dispatch/dispatch-<platform>.S. 395 396 Entries in tt_fast may refer to any valid TC entry, regardless of 397 which sector it's in. Consequently we must be very careful to 398 invalidate this cache when TC entries are changed or disappear. 399 400 A special .guest address - TRANSTAB_BOGUS_GUEST_ADDR -- must be 401 pointed at to cause that cache entry to miss. This relies on the 402 assumption that no guest code actually has that address, hence a 403 value 0x1 seems good. m_translate gives the client a synthetic 404 segfault if it tries to execute at this address. 405*/ 406/* 407typedef 408 struct { 409 Addr guest; 410 Addr host; 411 } 412 FastCacheEntry; 413*/ 414/*global*/ __attribute__((aligned(16))) 415 FastCacheEntry VG_(tt_fast)[VG_TT_FAST_SIZE]; 416 417/* Make sure we're not used before initialisation. */ 418static Bool init_done = False; 419 420 421/*------------------ STATS DECLS ------------------*/ 422 423/* Number of fast-cache updates and flushes done. */ 424static ULong n_fast_flushes = 0; 425static ULong n_fast_updates = 0; 426 427/* Number of full lookups done. */ 428static ULong n_full_lookups = 0; 429static ULong n_lookup_probes = 0; 430 431/* Number/osize/tsize of translations entered; also the number of 432 those for which self-checking was requested. */ 433static ULong n_in_count = 0; 434static ULong n_in_osize = 0; 435static ULong n_in_tsize = 0; 436static ULong n_in_sc_count = 0; 437 438/* Number/osize of translations discarded due to lack of space. */ 439static ULong n_dump_count = 0; 440static ULong n_dump_osize = 0; 441static ULong n_sectors_recycled = 0; 442 443/* Number/osize of translations discarded due to requests to do so. */ 444static ULong n_disc_count = 0; 445static ULong n_disc_osize = 0; 446 447 448/*-------------------------------------------------------------*/ 449/*--- Misc ---*/ 450/*-------------------------------------------------------------*/ 451 452static void* ttaux_malloc ( const HChar* tag, SizeT n ) 453{ 454 return VG_(arena_malloc)(VG_AR_TTAUX, tag, n); 455} 456 457static void ttaux_free ( void* p ) 458{ 459 VG_(arena_free)(VG_AR_TTAUX, p); 460} 461 462 463/*-------------------------------------------------------------*/ 464/*--- Chaining support ---*/ 465/*-------------------------------------------------------------*/ 466 467static inline TTEntry* index_tte ( SECno sNo, TTEno tteNo ) 468{ 469 vg_assert(sNo < n_sectors); 470 vg_assert(tteNo < N_TTES_PER_SECTOR); 471 Sector* s = §ors[sNo]; 472 vg_assert(s->tt); 473 TTEntry* tte = &s->tt[tteNo]; 474 vg_assert(tte->status == InUse); 475 return tte; 476} 477 478static void InEdge__init ( InEdge* ie ) 479{ 480 ie->from_sNo = INV_SNO; /* invalid */ 481 ie->from_tteNo = 0; 482 ie->from_offs = 0; 483 ie->to_fastEP = False; 484} 485 486static void OutEdge__init ( OutEdge* oe ) 487{ 488 oe->to_sNo = INV_SNO; /* invalid */ 489 oe->to_tteNo = 0; 490 oe->from_offs = 0; 491} 492 493static void TTEntry__init ( TTEntry* tte ) 494{ 495 VG_(memset)(tte, 0, sizeof(*tte)); 496} 497 498static UWord InEdgeArr__size ( const InEdgeArr* iea ) 499{ 500 if (iea->var) { 501 vg_assert(iea->n_fixed == 0); 502 return VG_(sizeXA)(iea->var); 503 } else { 504 vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR); 505 return iea->n_fixed; 506 } 507} 508 509static void InEdgeArr__makeEmpty ( InEdgeArr* iea ) 510{ 511 if (iea->var) { 512 vg_assert(iea->n_fixed == 0); 513 VG_(deleteXA)(iea->var); 514 iea->var = NULL; 515 } else { 516 vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR); 517 iea->n_fixed = 0; 518 } 519} 520 521static 522InEdge* InEdgeArr__index ( InEdgeArr* iea, UWord i ) 523{ 524 if (iea->var) { 525 vg_assert(iea->n_fixed == 0); 526 return (InEdge*)VG_(indexXA)(iea->var, i); 527 } else { 528 vg_assert(i < iea->n_fixed); 529 return &iea->fixed[i]; 530 } 531} 532 533static 534void InEdgeArr__deleteIndex ( InEdgeArr* iea, UWord i ) 535{ 536 if (iea->var) { 537 vg_assert(iea->n_fixed == 0); 538 VG_(removeIndexXA)(iea->var, i); 539 } else { 540 vg_assert(i < iea->n_fixed); 541 for (; i+1 < iea->n_fixed; i++) { 542 iea->fixed[i] = iea->fixed[i+1]; 543 } 544 iea->n_fixed--; 545 } 546} 547 548static 549void InEdgeArr__add ( InEdgeArr* iea, InEdge* ie ) 550{ 551 if (iea->var) { 552 vg_assert(iea->n_fixed == 0); 553 VG_(addToXA)(iea->var, ie); 554 } else { 555 vg_assert(iea->n_fixed <= N_FIXED_IN_EDGE_ARR); 556 if (iea->n_fixed == N_FIXED_IN_EDGE_ARR) { 557 /* The fixed array is full, so we have to initialise an 558 XArray and copy the fixed array into it. */ 559 iea->var = VG_(newXA)(ttaux_malloc, "transtab.IEA__add", 560 ttaux_free, 561 sizeof(InEdge)); 562 UWord i; 563 for (i = 0; i < iea->n_fixed; i++) { 564 VG_(addToXA)(iea->var, &iea->fixed[i]); 565 } 566 VG_(addToXA)(iea->var, ie); 567 iea->n_fixed = 0; 568 } else { 569 /* Just add to the fixed array. */ 570 iea->fixed[iea->n_fixed++] = *ie; 571 } 572 } 573} 574 575static UWord OutEdgeArr__size ( const OutEdgeArr* oea ) 576{ 577 if (oea->var) { 578 vg_assert(oea->n_fixed == 0); 579 return VG_(sizeXA)(oea->var); 580 } else { 581 vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR); 582 return oea->n_fixed; 583 } 584} 585 586static void OutEdgeArr__makeEmpty ( OutEdgeArr* oea ) 587{ 588 if (oea->var) { 589 vg_assert(oea->n_fixed == 0); 590 VG_(deleteXA)(oea->var); 591 oea->var = NULL; 592 } else { 593 vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR); 594 oea->n_fixed = 0; 595 } 596} 597 598static 599OutEdge* OutEdgeArr__index ( OutEdgeArr* oea, UWord i ) 600{ 601 if (oea->var) { 602 vg_assert(oea->n_fixed == 0); 603 return (OutEdge*)VG_(indexXA)(oea->var, i); 604 } else { 605 vg_assert(i < oea->n_fixed); 606 return &oea->fixed[i]; 607 } 608} 609 610static 611void OutEdgeArr__deleteIndex ( OutEdgeArr* oea, UWord i ) 612{ 613 if (oea->var) { 614 vg_assert(oea->n_fixed == 0); 615 VG_(removeIndexXA)(oea->var, i); 616 } else { 617 vg_assert(i < oea->n_fixed); 618 for (; i+1 < oea->n_fixed; i++) { 619 oea->fixed[i] = oea->fixed[i+1]; 620 } 621 oea->n_fixed--; 622 } 623} 624 625static 626void OutEdgeArr__add ( OutEdgeArr* oea, OutEdge* oe ) 627{ 628 if (oea->var) { 629 vg_assert(oea->n_fixed == 0); 630 VG_(addToXA)(oea->var, oe); 631 } else { 632 vg_assert(oea->n_fixed <= N_FIXED_OUT_EDGE_ARR); 633 if (oea->n_fixed == N_FIXED_OUT_EDGE_ARR) { 634 /* The fixed array is full, so we have to initialise an 635 XArray and copy the fixed array into it. */ 636 oea->var = VG_(newXA)(ttaux_malloc, "transtab.OEA__add", 637 ttaux_free, 638 sizeof(OutEdge)); 639 UWord i; 640 for (i = 0; i < oea->n_fixed; i++) { 641 VG_(addToXA)(oea->var, &oea->fixed[i]); 642 } 643 VG_(addToXA)(oea->var, oe); 644 oea->n_fixed = 0; 645 } else { 646 /* Just add to the fixed array. */ 647 oea->fixed[oea->n_fixed++] = *oe; 648 } 649 } 650} 651 652static 653Int HostExtent__cmpOrd ( const void* v1, const void* v2 ) 654{ 655 const HostExtent* hx1 = v1; 656 const HostExtent* hx2 = v2; 657 if (hx1->start + hx1->len <= hx2->start) return -1; 658 if (hx2->start + hx2->len <= hx1->start) return 1; 659 return 0; /* partial overlap */ 660} 661 662/* True if hx is a dead host extent, i.e. corresponds to host code 663 of an entry that was invalidated. */ 664static 665Bool HostExtent__is_dead (const HostExtent* hx, const Sector* sec) 666{ 667 const TTEno tteNo = hx->tteNo; 668#define LDEBUG(m) if (DEBUG_TRANSTAB) \ 669 VG_(printf) (m \ 670 " start 0x%p len %u sector %d ttslot %u" \ 671 " tt.entry 0x%lu tt.tcptr 0x%p\n", \ 672 hx->start, hx->len, (int)(sec - sectors), \ 673 hx->tteNo, \ 674 sec->tt[tteNo].entry, sec->tt[tteNo].tcptr) 675 676 /* Entry might have been invalidated and not re-used yet.*/ 677 if (sec->tt[tteNo].status == Deleted) { 678 LDEBUG("found deleted entry"); 679 return True; 680 } 681 /* Maybe we found this entry via a host_extents which was 682 inserted for an entry which was changed to Deleted then 683 re-used after. If this entry was re-used, then its tcptr 684 is >= to host_extents start (i.e. the previous tcptr) + len. 685 This is the case as there is no re-use of host code: a new 686 entry or re-used entry always gets "higher value" host code. */ 687 if ((UChar*) sec->tt[tteNo].tcptr >= hx->start + hx->len) { 688 LDEBUG("found re-used entry"); 689 return True; 690 } 691 692 return False; 693#undef LDEBUG 694} 695 696static __attribute__((noinline)) 697Bool find_TTEntry_from_hcode( /*OUT*/SECno* from_sNo, 698 /*OUT*/TTEno* from_tteNo, 699 void* hcode ) 700{ 701 SECno i; 702 703 /* Search order logic copied from VG_(search_transtab). */ 704 for (i = 0; i < n_sectors; i++) { 705 SECno sno = sector_search_order[i]; 706 if (UNLIKELY(sno == INV_SNO)) 707 return False; /* run out of sectors to search */ 708 709 const Sector* sec = §ors[sno]; 710 const XArray* /* of HostExtent */ host_extents = sec->host_extents; 711 vg_assert(host_extents); 712 713 HostExtent key; 714 VG_(memset)(&key, 0, sizeof(key)); 715 key.start = hcode; 716 key.len = 1; 717 Word firstW = -1, lastW = -1; 718 Bool found = VG_(lookupXA_UNSAFE)( 719 host_extents, &key, &firstW, &lastW, 720 HostExtent__cmpOrd ); 721 vg_assert(firstW == lastW); // always true, even if not found 722 if (found) { 723 HostExtent* hx = VG_(indexXA)(host_extents, firstW); 724 TTEno tteNo = hx->tteNo; 725 /* Do some additional sanity checks. */ 726 vg_assert(tteNo < N_TTES_PER_SECTOR); 727 728 /* if this hx entry corresponds to dead host code, we must 729 tell this code has not been found, as it cannot be patched. */ 730 if (HostExtent__is_dead (hx, sec)) 731 return False; 732 733 vg_assert(sec->tt[tteNo].status == InUse); 734 /* Can only half check that the found TTEntry contains hcode, 735 due to not having a length value for the hcode in the 736 TTEntry. */ 737 vg_assert((UChar*)sec->tt[tteNo].tcptr <= (UChar*)hcode); 738 /* Looks plausible */ 739 *from_sNo = sno; 740 *from_tteNo = tteNo; 741 return True; 742 } 743 } 744 return False; 745} 746 747 748/* Figure out whether or not hcode is jitted code present in the main 749 code cache (but not in the no-redir cache). Used for sanity 750 checking. */ 751static Bool is_in_the_main_TC ( const void* hcode ) 752{ 753 SECno i, sno; 754 for (i = 0; i < n_sectors; i++) { 755 sno = sector_search_order[i]; 756 if (sno == INV_SNO) 757 break; /* run out of sectors to search */ 758 if ((const UChar*)hcode >= (const UChar*)sectors[sno].tc 759 && (const UChar*)hcode <= (const UChar*)sectors[sno].tc_next 760 + sizeof(ULong) - 1) 761 return True; 762 } 763 return False; 764} 765 766 767/* Fulfill a chaining request, and record admin info so we 768 can undo it later, if required. 769*/ 770void VG_(tt_tc_do_chaining) ( void* from__patch_addr, 771 SECno to_sNo, 772 TTEno to_tteNo, 773 Bool to_fastEP ) 774{ 775 /* Get the CPU info established at startup. */ 776 VexArch arch_host = VexArch_INVALID; 777 VexArchInfo archinfo_host; 778 VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host)); 779 VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host ); 780 VexEndness endness_host = archinfo_host.endness; 781 782 // host_code is where we're patching to. So it needs to 783 // take into account, whether we're jumping to the slow 784 // or fast entry point. By definition, the fast entry point 785 // is exactly one event check's worth of code along from 786 // the slow (tcptr) entry point. 787 TTEntry* to_tte = index_tte(to_sNo, to_tteNo); 788 void* host_code = ((UChar*)to_tte->tcptr) 789 + (to_fastEP ? LibVEX_evCheckSzB(arch_host) : 0); 790 791 // stay sane -- the patch point (dst) is in this sector's code cache 792 vg_assert( (UChar*)host_code >= (UChar*)sectors[to_sNo].tc ); 793 vg_assert( (UChar*)host_code <= (UChar*)sectors[to_sNo].tc_next 794 + sizeof(ULong) - 1 ); 795 796 /* Find the TTEntry for the from__ code. This isn't simple since 797 we only know the patch address, which is going to be somewhere 798 inside the from_ block. */ 799 SECno from_sNo = INV_SNO; 800 TTEno from_tteNo = INV_TTE; 801 Bool from_found 802 = find_TTEntry_from_hcode( &from_sNo, &from_tteNo, 803 from__patch_addr ); 804 if (!from_found) { 805 // The from code might have been discarded due to sector re-use 806 // or marked Deleted due to translation invalidation. 807 // In such a case, don't do the chaining. 808 VG_(debugLog)(1,"transtab", 809 "host code %p not found (discarded? sector recycled?)" 810 " => no chaining done\n", 811 from__patch_addr); 812 return; 813 } 814 815 TTEntry* from_tte = index_tte(from_sNo, from_tteNo); 816 817 /* Get VEX to do the patching itself. We have to hand it off 818 since it is host-dependent. */ 819 VexInvalRange vir 820 = LibVEX_Chain( 821 arch_host, endness_host, 822 from__patch_addr, 823 VG_(fnptr_to_fnentry)( 824 to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP) 825 : &VG_(disp_cp_chain_me_to_slowEP)), 826 (void*)host_code 827 ); 828 VG_(invalidate_icache)( (void*)vir.start, vir.len ); 829 830 /* Now do the tricky bit -- update the ch_succs and ch_preds info 831 for the two translations involved, so we can undo the chaining 832 later, which we will have to do if the to_ block gets removed 833 for whatever reason. */ 834 835 /* This is the new from_ -> to_ link to add. */ 836 InEdge ie; 837 InEdge__init(&ie); 838 ie.from_sNo = from_sNo; 839 ie.from_tteNo = from_tteNo; 840 ie.to_fastEP = to_fastEP; 841 HWord from_offs = (HWord)( (UChar*)from__patch_addr 842 - (UChar*)from_tte->tcptr ); 843 vg_assert(from_offs < 100000/* let's say */); 844 ie.from_offs = (UInt)from_offs; 845 846 /* This is the new to_ -> from_ backlink to add. */ 847 OutEdge oe; 848 OutEdge__init(&oe); 849 oe.to_sNo = to_sNo; 850 oe.to_tteNo = to_tteNo; 851 oe.from_offs = (UInt)from_offs; 852 853 /* Add .. */ 854 InEdgeArr__add(&to_tte->in_edges, &ie); 855 OutEdgeArr__add(&from_tte->out_edges, &oe); 856} 857 858 859/* Unchain one patch, as described by the specified InEdge. For 860 sanity check purposes only (to check that the patched location is 861 as expected) it also requires the fast and slow entry point 862 addresses of the destination block (that is, the block that owns 863 this InEdge). */ 864__attribute__((noinline)) 865static void unchain_one ( VexArch arch_host, VexEndness endness_host, 866 InEdge* ie, 867 void* to_fastEPaddr, void* to_slowEPaddr ) 868{ 869 vg_assert(ie); 870 TTEntry* tte 871 = index_tte(ie->from_sNo, ie->from_tteNo); 872 UChar* place_to_patch 873 = ((UChar*)tte->tcptr) + ie->from_offs; 874 UChar* disp_cp_chain_me 875 = VG_(fnptr_to_fnentry)( 876 ie->to_fastEP ? &VG_(disp_cp_chain_me_to_fastEP) 877 : &VG_(disp_cp_chain_me_to_slowEP) 878 ); 879 UChar* place_to_jump_to_EXPECTED 880 = ie->to_fastEP ? to_fastEPaddr : to_slowEPaddr; 881 882 // stay sane: both src and dst for this unchaining are 883 // in the main code cache 884 vg_assert( is_in_the_main_TC(place_to_patch) ); // src 885 vg_assert( is_in_the_main_TC(place_to_jump_to_EXPECTED) ); // dst 886 // dst check is ok because LibVEX_UnChain checks that 887 // place_to_jump_to_EXPECTED really is the current dst, and 888 // asserts if it isn't. 889 VexInvalRange vir 890 = LibVEX_UnChain( arch_host, endness_host, place_to_patch, 891 place_to_jump_to_EXPECTED, disp_cp_chain_me ); 892 VG_(invalidate_icache)( (void*)vir.start, vir.len ); 893} 894 895 896/* The specified block is about to be deleted. Update the preds and 897 succs of its associated blocks accordingly. This includes undoing 898 any chained jumps to this block. */ 899static 900void unchain_in_preparation_for_deletion ( VexArch arch_host, 901 VexEndness endness_host, 902 SECno here_sNo, TTEno here_tteNo ) 903{ 904 if (DEBUG_TRANSTAB) 905 VG_(printf)("QQQ unchain_in_prep %u.%u...\n", here_sNo, here_tteNo); 906 UWord i, j, n, m; 907 Int evCheckSzB = LibVEX_evCheckSzB(arch_host); 908 TTEntry* here_tte = index_tte(here_sNo, here_tteNo); 909 if (DEBUG_TRANSTAB) 910 VG_(printf)("... QQQ tt.entry 0x%lu tt.tcptr 0x%p\n", 911 here_tte->entry, here_tte->tcptr); 912 vg_assert(here_tte->status == InUse); 913 914 /* Visit all InEdges owned by here_tte. */ 915 n = InEdgeArr__size(&here_tte->in_edges); 916 for (i = 0; i < n; i++) { 917 InEdge* ie = InEdgeArr__index(&here_tte->in_edges, i); 918 // Undo the chaining. 919 UChar* here_slow_EP = (UChar*)here_tte->tcptr; 920 UChar* here_fast_EP = here_slow_EP + evCheckSzB; 921 unchain_one(arch_host, endness_host, ie, here_fast_EP, here_slow_EP); 922 // Find the corresponding entry in the "from" node's out_edges, 923 // and remove it. 924 TTEntry* from_tte = index_tte(ie->from_sNo, ie->from_tteNo); 925 m = OutEdgeArr__size(&from_tte->out_edges); 926 vg_assert(m > 0); // it must have at least one entry 927 for (j = 0; j < m; j++) { 928 OutEdge* oe = OutEdgeArr__index(&from_tte->out_edges, j); 929 if (oe->to_sNo == here_sNo && oe->to_tteNo == here_tteNo 930 && oe->from_offs == ie->from_offs) 931 break; 932 } 933 vg_assert(j < m); // "oe must be findable" 934 OutEdgeArr__deleteIndex(&from_tte->out_edges, j); 935 } 936 937 /* Visit all OutEdges owned by here_tte. */ 938 n = OutEdgeArr__size(&here_tte->out_edges); 939 for (i = 0; i < n; i++) { 940 OutEdge* oe = OutEdgeArr__index(&here_tte->out_edges, i); 941 // Find the corresponding entry in the "to" node's in_edges, 942 // and remove it. 943 TTEntry* to_tte = index_tte(oe->to_sNo, oe->to_tteNo); 944 m = InEdgeArr__size(&to_tte->in_edges); 945 vg_assert(m > 0); // it must have at least one entry 946 for (j = 0; j < m; j++) { 947 InEdge* ie = InEdgeArr__index(&to_tte->in_edges, j); 948 if (ie->from_sNo == here_sNo && ie->from_tteNo == here_tteNo 949 && ie->from_offs == oe->from_offs) 950 break; 951 } 952 vg_assert(j < m); // "ie must be findable" 953 InEdgeArr__deleteIndex(&to_tte->in_edges, j); 954 } 955 956 InEdgeArr__makeEmpty(&here_tte->in_edges); 957 OutEdgeArr__makeEmpty(&here_tte->out_edges); 958} 959 960 961/*-------------------------------------------------------------*/ 962/*--- Address-range equivalence class stuff ---*/ 963/*-------------------------------------------------------------*/ 964 965/* Return equivalence class number for a range. */ 966 967static Int range_to_eclass ( Addr start, UInt len ) 968{ 969 UInt mask = (1 << ECLASS_WIDTH) - 1; 970 UInt lo = (UInt)start; 971 UInt hi = lo + len - 1; 972 UInt loBits = (lo >> ECLASS_SHIFT) & mask; 973 UInt hiBits = (hi >> ECLASS_SHIFT) & mask; 974 if (loBits == hiBits) { 975 vg_assert(loBits < ECLASS_N-1); 976 return loBits; 977 } else { 978 return ECLASS_MISC; 979 } 980} 981 982 983/* Calculates the equivalence class numbers for any VexGuestExtent. 984 These are written in *eclasses, which must be big enough to hold 3 985 Ints. The number written, between 1 and 3, is returned. The 986 eclasses are presented in order, and any duplicates are removed. 987*/ 988 989static 990Int vexGuestExtents_to_eclasses ( /*OUT*/Int* eclasses, 991 const VexGuestExtents* vge ) 992{ 993 994# define SWAP(_lv1,_lv2) \ 995 do { Int t = _lv1; _lv1 = _lv2; _lv2 = t; } while (0) 996 997 Int i, j, n_ec, r; 998 999 vg_assert(vge->n_used >= 1 && vge->n_used <= 3); 1000 1001 n_ec = 0; 1002 for (i = 0; i < vge->n_used; i++) { 1003 r = range_to_eclass( vge->base[i], vge->len[i] ); 1004 if (r == ECLASS_MISC) 1005 goto bad; 1006 /* only add if we haven't already seen it */ 1007 for (j = 0; j < n_ec; j++) 1008 if (eclasses[j] == r) 1009 break; 1010 if (j == n_ec) 1011 eclasses[n_ec++] = r; 1012 } 1013 1014 if (n_ec == 1) 1015 return 1; 1016 1017 if (n_ec == 2) { 1018 /* sort */ 1019 if (eclasses[0] > eclasses[1]) 1020 SWAP(eclasses[0], eclasses[1]); 1021 return 2; 1022 } 1023 1024 if (n_ec == 3) { 1025 /* sort */ 1026 if (eclasses[0] > eclasses[2]) 1027 SWAP(eclasses[0], eclasses[2]); 1028 if (eclasses[0] > eclasses[1]) 1029 SWAP(eclasses[0], eclasses[1]); 1030 if (eclasses[1] > eclasses[2]) 1031 SWAP(eclasses[1], eclasses[2]); 1032 return 3; 1033 } 1034 1035 /* NOTREACHED */ 1036 vg_assert(0); 1037 1038 bad: 1039 eclasses[0] = ECLASS_MISC; 1040 return 1; 1041 1042# undef SWAP 1043} 1044 1045 1046/* Add tteno to the set of entries listed for equivalence class ec in 1047 this sector. Returns used location in eclass array. */ 1048 1049static 1050UInt addEClassNo ( /*MOD*/Sector* sec, Int ec, TTEno tteno ) 1051{ 1052 Int old_sz, new_sz, i, r; 1053 TTEno *old_ar, *new_ar; 1054 1055 vg_assert(ec >= 0 && ec < ECLASS_N); 1056 vg_assert(tteno < N_TTES_PER_SECTOR); 1057 1058 if (DEBUG_TRANSTAB) VG_(printf)("ec %d gets %d\n", ec, (Int)tteno); 1059 1060 if (sec->ec2tte_used[ec] >= sec->ec2tte_size[ec]) { 1061 1062 vg_assert(sec->ec2tte_used[ec] == sec->ec2tte_size[ec]); 1063 1064 old_sz = sec->ec2tte_size[ec]; 1065 old_ar = sec->ec2tte[ec]; 1066 new_sz = old_sz==0 ? 8 : old_sz<64 ? 2*old_sz : (3*old_sz)/2; 1067 new_ar = ttaux_malloc("transtab.aECN.1", 1068 new_sz * sizeof(TTEno)); 1069 for (i = 0; i < old_sz; i++) 1070 new_ar[i] = old_ar[i]; 1071 if (old_ar) 1072 ttaux_free(old_ar); 1073 sec->ec2tte_size[ec] = new_sz; 1074 sec->ec2tte[ec] = new_ar; 1075 1076 if (DEBUG_TRANSTAB) VG_(printf)("expand ec %d to %d\n", ec, new_sz); 1077 } 1078 1079 /* Common case */ 1080 r = sec->ec2tte_used[ec]++; 1081 vg_assert(r >= 0 && r < sec->ec2tte_size[ec]); 1082 sec->ec2tte[ec][r] = tteno; 1083 return (UInt)r; 1084} 1085 1086 1087/* 'vge' is being added to 'sec' at TT entry 'tteno'. Add appropriate 1088 eclass entries to 'sec'. */ 1089 1090static 1091void upd_eclasses_after_add ( /*MOD*/Sector* sec, TTEno tteno ) 1092{ 1093 Int i, r, eclasses[3]; 1094 TTEntry* tte; 1095 vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR); 1096 1097 tte = &sec->tt[tteno]; 1098 r = vexGuestExtents_to_eclasses( eclasses, &tte->vge ); 1099 1100 vg_assert(r >= 1 && r <= 3); 1101 tte->n_tte2ec = r; 1102 1103 for (i = 0; i < r; i++) { 1104 tte->tte2ec_ec[i] = eclasses[i]; 1105 tte->tte2ec_ix[i] = addEClassNo( sec, eclasses[i], tteno ); 1106 } 1107} 1108 1109 1110/* Check the eclass info in 'sec' to ensure it is consistent. Returns 1111 True if OK, False if something's not right. Expensive. */ 1112 1113static Bool sanity_check_eclasses_in_sector ( const Sector* sec ) 1114{ 1115# define BAD(_str) do { whassup = (_str); goto bad; } while (0) 1116 1117 const HChar* whassup = NULL; 1118 Int i, j, k, n, ec_num, ec_idx; 1119 TTEntry* tte; 1120 TTEno tteno; 1121 ULong* tce; 1122 1123 /* Basic checks on this sector */ 1124 if (sec->tt_n_inuse < 0 || sec->tt_n_inuse > N_TTES_PER_SECTOR) 1125 BAD("invalid sec->tt_n_inuse"); 1126 tce = sec->tc_next; 1127 if (tce < &sec->tc[0] || tce > &sec->tc[tc_sector_szQ]) 1128 BAD("sec->tc_next points outside tc"); 1129 1130 /* For each eclass ... */ 1131 for (i = 0; i < ECLASS_N; i++) { 1132 if (sec->ec2tte_size[i] == 0 && sec->ec2tte[i] != NULL) 1133 BAD("ec2tte_size/ec2tte mismatch(1)"); 1134 if (sec->ec2tte_size[i] != 0 && sec->ec2tte[i] == NULL) 1135 BAD("ec2tte_size/ec2tte mismatch(2)"); 1136 if (sec->ec2tte_used[i] < 0 1137 || sec->ec2tte_used[i] > sec->ec2tte_size[i]) 1138 BAD("implausible ec2tte_used"); 1139 if (sec->ec2tte_used[i] == 0) 1140 continue; 1141 1142 /* For each tt reference in each eclass .. ensure the reference 1143 is to a valid tt entry, and that the entry's address ranges 1144 really include this eclass. */ 1145 1146 for (j = 0; j < sec->ec2tte_used[i]; j++) { 1147 tteno = sec->ec2tte[i][j]; 1148 if (tteno == EC2TTE_DELETED) 1149 continue; 1150 if (tteno >= N_TTES_PER_SECTOR) 1151 BAD("implausible tteno"); 1152 tte = &sec->tt[tteno]; 1153 if (tte->status != InUse) 1154 BAD("tteno points to non-inuse tte"); 1155 if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3) 1156 BAD("tte->n_tte2ec out of range"); 1157 /* Exactly least one of tte->eclasses[0 .. tte->n_eclasses-1] 1158 must equal i. Inspect tte's eclass info. */ 1159 n = 0; 1160 for (k = 0; k < tte->n_tte2ec; k++) { 1161 if (k < tte->n_tte2ec-1 1162 && tte->tte2ec_ec[k] >= tte->tte2ec_ec[k+1]) 1163 BAD("tte->tte2ec_ec[..] out of order"); 1164 ec_num = tte->tte2ec_ec[k]; 1165 if (ec_num < 0 || ec_num >= ECLASS_N) 1166 BAD("tte->tte2ec_ec[..] out of range"); 1167 if (ec_num != i) 1168 continue; 1169 ec_idx = tte->tte2ec_ix[k]; 1170 if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[i]) 1171 BAD("tte->tte2ec_ix[..] out of range"); 1172 if (ec_idx == j) 1173 n++; 1174 } 1175 if (n != 1) 1176 BAD("tteno does not point back at eclass"); 1177 } 1178 } 1179 1180 /* That establishes that for each forward pointer from TTEntrys 1181 there is a corresponding backward pointer from the eclass[] 1182 arrays. However, it doesn't rule out the possibility of other, 1183 bogus pointers in the eclass[] arrays. So do those similarly: 1184 scan through them and check the TTEntryies they point at point 1185 back. */ 1186 1187 for (tteno = 0; tteno < N_TTES_PER_SECTOR; tteno++) { 1188 1189 tte = &sec->tt[tteno]; 1190 if (tte->status == Empty || tte->status == Deleted) { 1191 if (tte->n_tte2ec != 0) 1192 BAD("tte->n_eclasses nonzero for unused tte"); 1193 continue; 1194 } 1195 1196 vg_assert(tte->status == InUse); 1197 1198 if (tte->n_tte2ec < 1 || tte->n_tte2ec > 3) 1199 BAD("tte->n_eclasses out of range(2)"); 1200 1201 for (j = 0; j < tte->n_tte2ec; j++) { 1202 ec_num = tte->tte2ec_ec[j]; 1203 if (ec_num < 0 || ec_num >= ECLASS_N) 1204 BAD("tte->eclass[..] out of range"); 1205 ec_idx = tte->tte2ec_ix[j]; 1206 if (ec_idx < 0 || ec_idx >= sec->ec2tte_used[ec_num]) 1207 BAD("tte->ec_idx[..] out of range(2)"); 1208 if (sec->ec2tte[ec_num][ec_idx] != tteno) 1209 BAD("ec2tte does not point back to tte"); 1210 } 1211 } 1212 1213 return True; 1214 1215 bad: 1216 if (whassup) 1217 VG_(debugLog)(0, "transtab", "eclass sanity fail: %s\n", whassup); 1218 1219# if 0 1220 VG_(printf)("eclass = %d\n", i); 1221 VG_(printf)("tteno = %d\n", (Int)tteno); 1222 switch (tte->status) { 1223 case InUse: VG_(printf)("InUse\n"); break; 1224 case Deleted: VG_(printf)("Deleted\n"); break; 1225 case Empty: VG_(printf)("Empty\n"); break; 1226 } 1227 if (tte->status != Empty) { 1228 for (k = 0; k < tte->vge.n_used; k++) 1229 VG_(printf)("0x%lx %u\n", tte->vge.base[k], (UInt)tte->vge.len[k]); 1230 } 1231# endif 1232 1233 return False; 1234 1235# undef BAD 1236} 1237 1238 1239/* Sanity check absolutely everything. True == check passed. */ 1240 1241/* forwards */ 1242static Bool sanity_check_redir_tt_tc ( void ); 1243 1244static Bool sanity_check_sector_search_order ( void ) 1245{ 1246 SECno i, j, nListed; 1247 /* assert the array is the right size */ 1248 vg_assert(MAX_N_SECTORS == (sizeof(sector_search_order) 1249 / sizeof(sector_search_order[0]))); 1250 /* Check it's of the form valid_sector_numbers ++ [INV_SNO, INV_SNO, ..] */ 1251 for (i = 0; i < n_sectors; i++) { 1252 if (sector_search_order[i] == INV_SNO 1253 || sector_search_order[i] >= n_sectors) 1254 break; 1255 } 1256 nListed = i; 1257 for (/* */; i < n_sectors; i++) { 1258 if (sector_search_order[i] != INV_SNO) 1259 break; 1260 } 1261 if (i != n_sectors) 1262 return False; 1263 /* Check each sector number only appears once */ 1264 for (i = 0; i < n_sectors; i++) { 1265 if (sector_search_order[i] == INV_SNO) 1266 continue; 1267 for (j = i+1; j < n_sectors; j++) { 1268 if (sector_search_order[j] == sector_search_order[i]) 1269 return False; 1270 } 1271 } 1272 /* Check that the number of listed sectors equals the number 1273 in use, by counting nListed back down. */ 1274 for (i = 0; i < n_sectors; i++) { 1275 if (sectors[i].tc != NULL) 1276 nListed--; 1277 } 1278 if (nListed != 0) 1279 return False; 1280 return True; 1281} 1282 1283static Bool sanity_check_all_sectors ( void ) 1284{ 1285 SECno sno; 1286 Bool sane; 1287 Sector* sec; 1288 for (sno = 0; sno < n_sectors; sno++) { 1289 Int i; 1290 Int nr_not_dead_hx = 0; 1291 Int szhxa; 1292 sec = §ors[sno]; 1293 if (sec->tc == NULL) 1294 continue; 1295 sane = sanity_check_eclasses_in_sector( sec ); 1296 if (!sane) 1297 return False; 1298 szhxa = VG_(sizeXA)(sec->host_extents); 1299 for (i = 0; i < szhxa; i++) { 1300 const HostExtent* hx = VG_(indexXA)(sec->host_extents, i); 1301 if (!HostExtent__is_dead (hx, sec)) 1302 nr_not_dead_hx++; 1303 } 1304 if (nr_not_dead_hx != sec->tt_n_inuse) { 1305 VG_(debugLog)(0, "transtab", 1306 "nr_not_dead_hx %d sanity fail " 1307 "(expected == in use %d)\n", 1308 nr_not_dead_hx, sec->tt_n_inuse); 1309 return False; 1310 } 1311 } 1312 1313 if ( !sanity_check_redir_tt_tc() ) 1314 return False; 1315 if ( !sanity_check_sector_search_order() ) 1316 return False; 1317 return True; 1318} 1319 1320 1321 1322/*-------------------------------------------------------------*/ 1323/*--- Add/find translations ---*/ 1324/*-------------------------------------------------------------*/ 1325 1326static UInt vge_osize ( const VexGuestExtents* vge ) 1327{ 1328 UInt i, n = 0; 1329 for (i = 0; i < vge->n_used; i++) 1330 n += (UInt)vge->len[i]; 1331 return n; 1332} 1333 1334static Bool isValidSector ( SECno sector ) 1335{ 1336 if (sector == INV_SNO || sector >= n_sectors) 1337 return False; 1338 return True; 1339} 1340 1341static inline HTTno HASH_TT ( Addr key ) 1342{ 1343 UInt kHi = sizeof(Addr) == 4 ? 0 : (key >> 32); 1344 UInt kLo = (UInt)key; 1345 UInt k32 = kHi ^ kLo; 1346 UInt ror = 7; 1347 if (ror > 0) 1348 k32 = (k32 >> ror) | (k32 << (32-ror)); 1349 return (HTTno)(k32 % N_HTTES_PER_SECTOR); 1350} 1351 1352static void setFastCacheEntry ( Addr key, ULong* tcptr ) 1353{ 1354 UInt cno = (UInt)VG_TT_FAST_HASH(key); 1355 VG_(tt_fast)[cno].guest = key; 1356 VG_(tt_fast)[cno].host = (Addr)tcptr; 1357 n_fast_updates++; 1358 /* This shouldn't fail. It should be assured by m_translate 1359 which should reject any attempt to make translation of code 1360 starting at TRANSTAB_BOGUS_GUEST_ADDR. */ 1361 vg_assert(VG_(tt_fast)[cno].guest != TRANSTAB_BOGUS_GUEST_ADDR); 1362} 1363 1364/* Invalidate the fast cache VG_(tt_fast). */ 1365static void invalidateFastCache ( void ) 1366{ 1367 UInt j; 1368 /* This loop is popular enough to make it worth unrolling a 1369 bit, at least on ppc32. */ 1370 vg_assert(VG_TT_FAST_SIZE > 0 && (VG_TT_FAST_SIZE % 4) == 0); 1371 for (j = 0; j < VG_TT_FAST_SIZE; j += 4) { 1372 VG_(tt_fast)[j+0].guest = TRANSTAB_BOGUS_GUEST_ADDR; 1373 VG_(tt_fast)[j+1].guest = TRANSTAB_BOGUS_GUEST_ADDR; 1374 VG_(tt_fast)[j+2].guest = TRANSTAB_BOGUS_GUEST_ADDR; 1375 VG_(tt_fast)[j+3].guest = TRANSTAB_BOGUS_GUEST_ADDR; 1376 } 1377 1378 vg_assert(j == VG_TT_FAST_SIZE); 1379 n_fast_flushes++; 1380} 1381 1382 1383static TTEno get_empty_tt_slot(SECno sNo) 1384{ 1385 TTEno i; 1386 1387 i = sectors[sNo].empty_tt_list; 1388 sectors[sNo].empty_tt_list = sectors[sNo].tt[i].usage.next_empty_tte; 1389 1390 vg_assert (i >= 0 && i < N_TTES_PER_SECTOR); 1391 1392 return i; 1393} 1394 1395static void add_in_empty_tt_list (SECno sNo, TTEno tteno) 1396{ 1397 sectors[sNo].tt[tteno].usage.next_empty_tte = sectors[sNo].empty_tt_list; 1398 sectors[sNo].empty_tt_list = tteno; 1399} 1400 1401static void initialiseSector ( SECno sno ) 1402{ 1403 UInt i; 1404 SysRes sres; 1405 Sector* sec; 1406 vg_assert(isValidSector(sno)); 1407 1408 { Bool sane = sanity_check_sector_search_order(); 1409 vg_assert(sane); 1410 } 1411 sec = §ors[sno]; 1412 1413 if (sec->tc == NULL) { 1414 1415 /* Sector has never been used before. Need to allocate tt and 1416 tc. */ 1417 vg_assert(sec->tt == NULL); 1418 vg_assert(sec->tc_next == NULL); 1419 vg_assert(sec->tt_n_inuse == 0); 1420 for (i = 0; i < ECLASS_N; i++) { 1421 vg_assert(sec->ec2tte_size[i] == 0); 1422 vg_assert(sec->ec2tte_used[i] == 0); 1423 vg_assert(sec->ec2tte[i] == NULL); 1424 } 1425 vg_assert(sec->host_extents == NULL); 1426 1427 if (VG_(clo_stats) || VG_(debugLog_getLevel)() >= 1) 1428 VG_(dmsg)("transtab: " "allocate sector %d\n", sno); 1429 1430 sres = VG_(am_mmap_anon_float_valgrind)( 8 * tc_sector_szQ ); 1431 if (sr_isError(sres)) { 1432 VG_(out_of_memory_NORETURN)("initialiseSector(TC)", 1433 8 * tc_sector_szQ ); 1434 /*NOTREACHED*/ 1435 } 1436 sec->tc = (ULong*)(Addr)sr_Res(sres); 1437 1438 sres = VG_(am_mmap_anon_float_valgrind) 1439 ( N_TTES_PER_SECTOR * sizeof(TTEntry) ); 1440 if (sr_isError(sres)) { 1441 VG_(out_of_memory_NORETURN)("initialiseSector(TT)", 1442 N_TTES_PER_SECTOR * sizeof(TTEntry) ); 1443 /*NOTREACHED*/ 1444 } 1445 sec->tt = (TTEntry*)(Addr)sr_Res(sres); 1446 sec->empty_tt_list = HTT_EMPTY; 1447 for (TTEno ei = 0; ei < N_TTES_PER_SECTOR; ei++) { 1448 sec->tt[ei].status = Empty; 1449 sec->tt[ei].n_tte2ec = 0; 1450 add_in_empty_tt_list(sno, ei); 1451 } 1452 1453 sres = VG_(am_mmap_anon_float_valgrind) 1454 ( N_HTTES_PER_SECTOR * sizeof(TTEno) ); 1455 if (sr_isError(sres)) { 1456 VG_(out_of_memory_NORETURN)("initialiseSector(HTT)", 1457 N_HTTES_PER_SECTOR * sizeof(TTEno) ); 1458 /*NOTREACHED*/ 1459 } 1460 sec->htt = (TTEno*)(Addr)sr_Res(sres); 1461 for (HTTno hi = 0; hi < N_HTTES_PER_SECTOR; hi++) 1462 sec->htt[hi] = HTT_EMPTY; 1463 1464 /* Set up the host_extents array. */ 1465 sec->host_extents 1466 = VG_(newXA)(ttaux_malloc, "transtab.initialiseSector(host_extents)", 1467 ttaux_free, 1468 sizeof(HostExtent)); 1469 1470 /* Add an entry in the sector_search_order */ 1471 for (i = 0; i < n_sectors; i++) { 1472 if (sector_search_order[i] == INV_SNO) 1473 break; 1474 } 1475 vg_assert(i >= 0 && i < n_sectors); 1476 sector_search_order[i] = sno; 1477 1478 if (VG_(clo_verbosity) > 2) 1479 VG_(message)(Vg_DebugMsg, "TT/TC: initialise sector %d\n", sno); 1480 1481 } else { 1482 1483 /* Sector has been used before. Dump the old contents. */ 1484 if (VG_(clo_stats) || VG_(debugLog_getLevel)() >= 1) 1485 VG_(dmsg)("transtab: " "recycle sector %d\n", sno); 1486 n_sectors_recycled++; 1487 1488 vg_assert(sec->tt != NULL); 1489 vg_assert(sec->tc_next != NULL); 1490 n_dump_count += sec->tt_n_inuse; 1491 1492 VexArch arch_host = VexArch_INVALID; 1493 VexArchInfo archinfo_host; 1494 VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host)); 1495 VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host ); 1496 VexEndness endness_host = archinfo_host.endness; 1497 1498 /* Visit each just-about-to-be-abandoned translation. */ 1499 if (DEBUG_TRANSTAB) VG_(printf)("QQQ unlink-entire-sector: %d START\n", 1500 sno); 1501 sec->empty_tt_list = HTT_EMPTY; 1502 for (TTEno ei = 0; ei < N_TTES_PER_SECTOR; ei++) { 1503 if (sec->tt[ei].status == InUse) { 1504 vg_assert(sec->tt[ei].n_tte2ec >= 1); 1505 vg_assert(sec->tt[ei].n_tte2ec <= 3); 1506 n_dump_osize += vge_osize(&sec->tt[ei].vge); 1507 /* Tell the tool too. */ 1508 if (VG_(needs).superblock_discards) { 1509 VG_TDICT_CALL( tool_discard_superblock_info, 1510 sec->tt[ei].entry, 1511 sec->tt[ei].vge ); 1512 } 1513 unchain_in_preparation_for_deletion(arch_host, 1514 endness_host, sno, ei); 1515 } else { 1516 vg_assert(sec->tt[ei].n_tte2ec == 0); 1517 } 1518 sec->tt[ei].status = Empty; 1519 sec->tt[ei].n_tte2ec = 0; 1520 add_in_empty_tt_list(sno, ei); 1521 } 1522 for (HTTno hi = 0; hi < N_HTTES_PER_SECTOR; hi++) 1523 sec->htt[hi] = HTT_EMPTY; 1524 1525 if (DEBUG_TRANSTAB) VG_(printf)("QQQ unlink-entire-sector: %d END\n", 1526 sno); 1527 1528 /* Free up the eclass structures. */ 1529 for (i = 0; i < ECLASS_N; i++) { 1530 if (sec->ec2tte_size[i] == 0) { 1531 vg_assert(sec->ec2tte_used[i] == 0); 1532 vg_assert(sec->ec2tte[i] == NULL); 1533 } else { 1534 vg_assert(sec->ec2tte[i] != NULL); 1535 ttaux_free(sec->ec2tte[i]); 1536 sec->ec2tte[i] = NULL; 1537 sec->ec2tte_size[i] = 0; 1538 sec->ec2tte_used[i] = 0; 1539 } 1540 } 1541 1542 /* Empty out the host extents array. */ 1543 vg_assert(sec->host_extents != NULL); 1544 VG_(dropTailXA)(sec->host_extents, VG_(sizeXA)(sec->host_extents)); 1545 vg_assert(VG_(sizeXA)(sec->host_extents) == 0); 1546 1547 /* Sanity check: ensure it is already in 1548 sector_search_order[]. */ 1549 SECno ix; 1550 for (ix = 0; ix < n_sectors; ix++) { 1551 if (sector_search_order[ix] == sno) 1552 break; 1553 } 1554 vg_assert(ix >= 0 && ix < n_sectors); 1555 1556 if (VG_(clo_verbosity) > 2) 1557 VG_(message)(Vg_DebugMsg, "TT/TC: recycle sector %d\n", sno); 1558 } 1559 1560 sec->tc_next = sec->tc; 1561 sec->tt_n_inuse = 0; 1562 1563 invalidateFastCache(); 1564 1565 { Bool sane = sanity_check_sector_search_order(); 1566 vg_assert(sane); 1567 } 1568} 1569 1570/* Add a translation of vge to TT/TC. The translation is temporarily 1571 in code[0 .. code_len-1]. 1572 1573 pre: youngest_sector points to a valid (although possibly full) 1574 sector. 1575*/ 1576void VG_(add_to_transtab)( const VexGuestExtents* vge, 1577 Addr entry, 1578 Addr code, 1579 UInt code_len, 1580 Bool is_self_checking, 1581 Int offs_profInc, 1582 UInt n_guest_instrs ) 1583{ 1584 Int tcAvailQ, reqdQ, y; 1585 ULong *tcptr, *tcptr2; 1586 UChar* srcP; 1587 UChar* dstP; 1588 1589 vg_assert(init_done); 1590 vg_assert(vge->n_used >= 1 && vge->n_used <= 3); 1591 1592 /* 60000: should agree with N_TMPBUF in m_translate.c. */ 1593 vg_assert(code_len > 0 && code_len < 60000); 1594 1595 /* Generally stay sane */ 1596 vg_assert(n_guest_instrs < 200); /* it can be zero, tho */ 1597 1598 if (DEBUG_TRANSTAB) 1599 VG_(printf)("add_to_transtab(entry = 0x%lx, len = %u) ...\n", 1600 entry, code_len); 1601 1602 n_in_count++; 1603 n_in_tsize += code_len; 1604 n_in_osize += vge_osize(vge); 1605 if (is_self_checking) 1606 n_in_sc_count++; 1607 1608 y = youngest_sector; 1609 vg_assert(isValidSector(y)); 1610 1611 if (sectors[y].tc == NULL) 1612 initialiseSector(y); 1613 1614 /* Try putting the translation in this sector. */ 1615 reqdQ = (code_len + 7) >> 3; 1616 1617 /* Will it fit in tc? */ 1618 tcAvailQ = ((ULong*)(§ors[y].tc[tc_sector_szQ])) 1619 - ((ULong*)(sectors[y].tc_next)); 1620 vg_assert(tcAvailQ >= 0); 1621 vg_assert(tcAvailQ <= tc_sector_szQ); 1622 1623 if (tcAvailQ < reqdQ 1624 || sectors[y].tt_n_inuse >= N_TTES_PER_SECTOR) { 1625 /* No. So move on to the next sector. Either it's never been 1626 used before, in which case it will get its tt/tc allocated 1627 now, or it has been used before, in which case it is set to be 1628 empty, hence throwing out the oldest sector. */ 1629 vg_assert(tc_sector_szQ > 0); 1630 Int tt_loading_pct = (100 * sectors[y].tt_n_inuse) 1631 / N_HTTES_PER_SECTOR; 1632 Int tc_loading_pct = (100 * (tc_sector_szQ - tcAvailQ)) 1633 / tc_sector_szQ; 1634 if (VG_(clo_stats) || VG_(debugLog_getLevel)() >= 1) { 1635 VG_(dmsg)("transtab: " 1636 "declare sector %d full " 1637 "(TT loading %2d%%, TC loading %2d%%, avg tce size %d)\n", 1638 y, tt_loading_pct, tc_loading_pct, 1639 8 * (tc_sector_szQ - tcAvailQ)/sectors[y].tt_n_inuse); 1640 } 1641 youngest_sector++; 1642 if (youngest_sector >= n_sectors) 1643 youngest_sector = 0; 1644 y = youngest_sector; 1645 initialiseSector(y); 1646 } 1647 1648 /* Be sure ... */ 1649 tcAvailQ = ((ULong*)(§ors[y].tc[tc_sector_szQ])) 1650 - ((ULong*)(sectors[y].tc_next)); 1651 vg_assert(tcAvailQ >= 0); 1652 vg_assert(tcAvailQ <= tc_sector_szQ); 1653 vg_assert(tcAvailQ >= reqdQ); 1654 vg_assert(sectors[y].tt_n_inuse < N_TTES_PER_SECTOR); 1655 vg_assert(sectors[y].tt_n_inuse >= 0); 1656 1657 /* Copy into tc. */ 1658 tcptr = sectors[y].tc_next; 1659 vg_assert(tcptr >= §ors[y].tc[0]); 1660 vg_assert(tcptr <= §ors[y].tc[tc_sector_szQ]); 1661 1662 dstP = (UChar*)tcptr; 1663 srcP = (UChar*)code; 1664 VG_(memcpy)(dstP, srcP, code_len); 1665 sectors[y].tc_next += reqdQ; 1666 sectors[y].tt_n_inuse++; 1667 1668 /* more paranoia */ 1669 tcptr2 = sectors[y].tc_next; 1670 vg_assert(tcptr2 >= §ors[y].tc[0]); 1671 vg_assert(tcptr2 <= §ors[y].tc[tc_sector_szQ]); 1672 1673 /* Find an empty tt slot, and use it. There must be such a slot 1674 since tt is never allowed to get completely full. */ 1675 TTEno tteix = get_empty_tt_slot(y); 1676 TTEntry__init(§ors[y].tt[tteix]); 1677 sectors[y].tt[tteix].status = InUse; 1678 sectors[y].tt[tteix].tcptr = tcptr; 1679 sectors[y].tt[tteix].usage.prof.count = 0; 1680 sectors[y].tt[tteix].usage.prof.weight = 1681 n_guest_instrs == 0 ? 1 : n_guest_instrs; 1682 sectors[y].tt[tteix].vge = *vge; 1683 sectors[y].tt[tteix].entry = entry; 1684 1685 // Point an htt entry to the tt slot 1686 HTTno htti = HASH_TT(entry); 1687 vg_assert(htti >= 0 && htti < N_HTTES_PER_SECTOR); 1688 while (True) { 1689 if (sectors[y].htt[htti] == HTT_EMPTY 1690 || sectors[y].htt[htti] == HTT_DELETED) 1691 break; 1692 htti++; 1693 if (htti >= N_HTTES_PER_SECTOR) 1694 htti = 0; 1695 } 1696 sectors[y].htt[htti] = tteix; 1697 1698 /* Patch in the profile counter location, if necessary. */ 1699 if (offs_profInc != -1) { 1700 vg_assert(offs_profInc >= 0 && offs_profInc < code_len); 1701 VexArch arch_host = VexArch_INVALID; 1702 VexArchInfo archinfo_host; 1703 VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host)); 1704 VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host ); 1705 VexEndness endness_host = archinfo_host.endness; 1706 VexInvalRange vir 1707 = LibVEX_PatchProfInc( arch_host, endness_host, 1708 dstP + offs_profInc, 1709 §ors[y].tt[tteix].usage.prof.count ); 1710 VG_(invalidate_icache)( (void*)vir.start, vir.len ); 1711 } 1712 1713 VG_(invalidate_icache)( dstP, code_len ); 1714 1715 /* Add this entry to the host_extents map, checking that we're 1716 adding in order. */ 1717 { HostExtent hx; 1718 hx.start = (UChar*)tcptr; 1719 hx.len = code_len; 1720 hx.tteNo = tteix; 1721 vg_assert(hx.len > 0); /* bsearch fails w/ zero length entries */ 1722 XArray* hx_array = sectors[y].host_extents; 1723 vg_assert(hx_array); 1724 Word n = VG_(sizeXA)(hx_array); 1725 if (n > 0) { 1726 HostExtent* hx_prev = (HostExtent*)VG_(indexXA)(hx_array, n-1); 1727 vg_assert(hx_prev->start + hx_prev->len <= hx.start); 1728 } 1729 VG_(addToXA)(hx_array, &hx); 1730 if (DEBUG_TRANSTAB) 1731 VG_(printf)("... hx.start 0x%p hx.len %u sector %d ttslot %d\n", 1732 hx.start, hx.len, y, tteix); 1733 } 1734 1735 /* Update the fast-cache. */ 1736 setFastCacheEntry( entry, tcptr ); 1737 1738 /* Note the eclass numbers for this translation. */ 1739 upd_eclasses_after_add( §ors[y], tteix ); 1740} 1741 1742 1743/* Search for the translation of the given guest address. If 1744 requested, a successful search can also cause the fast-caches to be 1745 updated. 1746*/ 1747Bool VG_(search_transtab) ( /*OUT*/Addr* res_hcode, 1748 /*OUT*/SECno* res_sNo, 1749 /*OUT*/TTEno* res_tteNo, 1750 Addr guest_addr, 1751 Bool upd_cache ) 1752{ 1753 SECno i, sno; 1754 HTTno j, k, kstart; 1755 TTEno tti; 1756 1757 vg_assert(init_done); 1758 /* Find the initial probe point just once. It will be the same in 1759 all sectors and avoids multiple expensive % operations. */ 1760 n_full_lookups++; 1761 kstart = HASH_TT(guest_addr); 1762 vg_assert(kstart >= 0 && kstart < N_HTTES_PER_SECTOR); 1763 1764 /* Search in all the sectors,using sector_search_order[] as a 1765 heuristic guide as to what order to visit the sectors. */ 1766 for (i = 0; i < n_sectors; i++) { 1767 1768 sno = sector_search_order[i]; 1769 if (UNLIKELY(sno == INV_SNO)) 1770 return False; /* run out of sectors to search */ 1771 1772 k = kstart; 1773 for (j = 0; j < N_HTTES_PER_SECTOR; j++) { 1774 n_lookup_probes++; 1775 tti = sectors[sno].htt[k]; 1776 if (tti < N_TTES_PER_SECTOR 1777 && sectors[sno].tt[tti].entry == guest_addr) { 1778 /* found it */ 1779 if (upd_cache) 1780 setFastCacheEntry( 1781 guest_addr, sectors[sno].tt[tti].tcptr ); 1782 if (res_hcode) 1783 *res_hcode = (Addr)sectors[sno].tt[tti].tcptr; 1784 if (res_sNo) 1785 *res_sNo = sno; 1786 if (res_tteNo) 1787 *res_tteNo = tti; 1788 /* pull this one one step closer to the front. For large 1789 apps this more or less halves the number of required 1790 probes. */ 1791 if (i > 0) { 1792 Int tmp = sector_search_order[i-1]; 1793 sector_search_order[i-1] = sector_search_order[i]; 1794 sector_search_order[i] = tmp; 1795 } 1796 return True; 1797 } 1798 // tti is HTT_EMPTY or HTT_DELETED or not the entry of guest_addr 1799 if (sectors[sno].htt[k] == HTT_EMPTY) 1800 break; /* not found in this sector */ 1801 k++; 1802 if (k == N_HTTES_PER_SECTOR) 1803 k = 0; 1804 } 1805 1806 /* If we fall off the end, all entries are InUse and not 1807 matching, or Deleted. In any case we did not find it in this 1808 sector. */ 1809 } 1810 1811 /* Not found in any sector. */ 1812 return False; 1813} 1814 1815 1816/*-------------------------------------------------------------*/ 1817/*--- Delete translations. ---*/ 1818/*-------------------------------------------------------------*/ 1819 1820/* forward */ 1821static void unredir_discard_translations( Addr, ULong ); 1822 1823/* Stuff for deleting translations which intersect with a given 1824 address range. Unfortunately, to make this run at a reasonable 1825 speed, it is complex. */ 1826 1827static inline 1828Bool overlap1 ( Addr s1, ULong r1, Addr s2, ULong r2 ) 1829{ 1830 Addr e1 = s1 + r1 - 1; 1831 Addr e2 = s2 + r2 - 1; 1832 if (e1 < s2 || e2 < s1) 1833 return False; 1834 return True; 1835} 1836 1837static inline 1838Bool overlaps ( Addr start, ULong range, const VexGuestExtents* vge ) 1839{ 1840 if (overlap1(start, range, vge->base[0], vge->len[0])) 1841 return True; 1842 if (vge->n_used < 2) 1843 return False; 1844 if (overlap1(start, range, vge->base[1], vge->len[1])) 1845 return True; 1846 if (vge->n_used < 3) 1847 return False; 1848 if (overlap1(start, range, vge->base[2], vge->len[2])) 1849 return True; 1850 return False; 1851} 1852 1853 1854/* Delete a tt entry, and update all the eclass data accordingly. */ 1855 1856static void delete_tte ( /*MOD*/Sector* sec, SECno secNo, TTEno tteno, 1857 VexArch arch_host, VexEndness endness_host ) 1858{ 1859 Int i, ec_num, ec_idx; 1860 TTEntry* tte; 1861 1862 /* sec and secNo are mutually redundant; cross-check. */ 1863 vg_assert(sec == §ors[secNo]); 1864 1865 vg_assert(tteno >= 0 && tteno < N_TTES_PER_SECTOR); 1866 tte = &sec->tt[tteno]; 1867 vg_assert(tte->status == InUse); 1868 vg_assert(tte->n_tte2ec >= 1 && tte->n_tte2ec <= 3); 1869 1870 /* Unchain .. */ 1871 unchain_in_preparation_for_deletion(arch_host, endness_host, secNo, tteno); 1872 1873 /* Deal with the ec-to-tte links first. */ 1874 for (i = 0; i < tte->n_tte2ec; i++) { 1875 ec_num = (Int)tte->tte2ec_ec[i]; 1876 ec_idx = tte->tte2ec_ix[i]; 1877 vg_assert(ec_num >= 0 && ec_num < ECLASS_N); 1878 vg_assert(ec_idx >= 0); 1879 vg_assert(ec_idx < sec->ec2tte_used[ec_num]); 1880 /* Assert that the two links point at each other. */ 1881 vg_assert(sec->ec2tte[ec_num][ec_idx] == tteno); 1882 /* "delete" the pointer back to here. */ 1883 sec->ec2tte[ec_num][ec_idx] = EC2TTE_DELETED; 1884 } 1885 1886 /* Now fix up this TTEntry. */ 1887 /* Mark the entry as deleted in htt. 1888 Note: we could avoid the below hash table search by 1889 adding a reference from tte to its hash position in tt. */ 1890 HTTno j; 1891 HTTno k = HASH_TT(tte->entry); 1892 vg_assert(k >= 0 && k < N_HTTES_PER_SECTOR); 1893 for (j = 0; j < N_HTTES_PER_SECTOR; j++) { 1894 if (sec->htt[k] == tteno) 1895 break; 1896 k++; 1897 if (k == N_HTTES_PER_SECTOR) 1898 k = 0; 1899 } 1900 vg_assert(j < N_HTTES_PER_SECTOR); 1901 sec->htt[k] = HTT_DELETED; 1902 tte->status = Deleted; 1903 tte->n_tte2ec = 0; 1904 add_in_empty_tt_list(secNo, tteno); 1905 1906 /* Stats .. */ 1907 sec->tt_n_inuse--; 1908 n_disc_count++; 1909 n_disc_osize += vge_osize(&tte->vge); 1910 1911 /* Tell the tool too. */ 1912 if (VG_(needs).superblock_discards) { 1913 VG_TDICT_CALL( tool_discard_superblock_info, 1914 tte->entry, 1915 tte->vge ); 1916 } 1917} 1918 1919 1920/* Delete translations from sec which intersect specified range, but 1921 only consider translations in the specified eclass. */ 1922 1923static 1924Bool delete_translations_in_sector_eclass ( /*MOD*/Sector* sec, SECno secNo, 1925 Addr guest_start, ULong range, 1926 Int ec, 1927 VexArch arch_host, 1928 VexEndness endness_host ) 1929{ 1930 Int i; 1931 TTEno tteno; 1932 Bool anyDeld = False; 1933 TTEntry* tte; 1934 1935 vg_assert(ec >= 0 && ec < ECLASS_N); 1936 1937 for (i = 0; i < sec->ec2tte_used[ec]; i++) { 1938 1939 tteno = sec->ec2tte[ec][i]; 1940 if (tteno == EC2TTE_DELETED) { 1941 /* already deleted */ 1942 continue; 1943 } 1944 1945 vg_assert(tteno < N_TTES_PER_SECTOR); 1946 1947 tte = &sec->tt[tteno]; 1948 vg_assert(tte->status == InUse); 1949 1950 if (overlaps( guest_start, range, &tte->vge )) { 1951 anyDeld = True; 1952 delete_tte( sec, secNo, tteno, arch_host, endness_host ); 1953 } 1954 1955 } 1956 1957 return anyDeld; 1958} 1959 1960 1961/* Delete translations from sec which intersect specified range, the 1962 slow way, by inspecting all translations in sec. */ 1963 1964static 1965Bool delete_translations_in_sector ( /*MOD*/Sector* sec, SECno secNo, 1966 Addr guest_start, ULong range, 1967 VexArch arch_host, 1968 VexEndness endness_host ) 1969{ 1970 TTEno i; 1971 Bool anyDeld = False; 1972 1973 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 1974 if (sec->tt[i].status == InUse 1975 && overlaps( guest_start, range, &sec->tt[i].vge )) { 1976 anyDeld = True; 1977 delete_tte( sec, secNo, i, arch_host, endness_host ); 1978 } 1979 } 1980 1981 return anyDeld; 1982} 1983 1984 1985void VG_(discard_translations) ( Addr guest_start, ULong range, 1986 const HChar* who ) 1987{ 1988 Sector* sec; 1989 SECno sno; 1990 Int ec; 1991 Bool anyDeleted = False; 1992 1993 vg_assert(init_done); 1994 1995 VG_(debugLog)(2, "transtab", 1996 "discard_translations(0x%lx, %llu) req by %s\n", 1997 guest_start, range, who ); 1998 1999 /* Pre-deletion sanity check */ 2000 if (VG_(clo_sanity_level >= 4)) { 2001 Bool sane = sanity_check_all_sectors(); 2002 vg_assert(sane); 2003 } 2004 2005 if (range == 0) 2006 return; 2007 2008 VexArch arch_host = VexArch_INVALID; 2009 VexArchInfo archinfo_host; 2010 VG_(bzero_inline)(&archinfo_host, sizeof(archinfo_host)); 2011 VG_(machine_get_VexArchInfo)( &arch_host, &archinfo_host ); 2012 VexEndness endness_host = archinfo_host.endness; 2013 2014 /* There are two different ways to do this. 2015 2016 If the range fits within a single address-range equivalence 2017 class, as will be the case for a cache line sized invalidation, 2018 then we only have to inspect the set of translations listed in 2019 that equivalence class, and also in the "sin-bin" equivalence 2020 class ECLASS_MISC. 2021 2022 Otherwise, the invalidation is of a larger range and probably 2023 results from munmap. In this case it's (probably!) faster just 2024 to inspect all translations, dump those we don't want, and 2025 regenerate the equivalence class information (since modifying it 2026 in-situ is even more expensive). 2027 */ 2028 2029 /* First off, figure out if the range falls within a single class, 2030 and if so which one. */ 2031 2032 ec = ECLASS_MISC; 2033 if (range < (1ULL << ECLASS_SHIFT)) 2034 ec = range_to_eclass( guest_start, (UInt)range ); 2035 2036 /* if ec is ECLASS_MISC then we aren't looking at just a single 2037 class, so use the slow scheme. Else use the fast scheme, 2038 examining 'ec' and ECLASS_MISC. */ 2039 2040 if (ec != ECLASS_MISC) { 2041 2042 VG_(debugLog)(2, "transtab", 2043 " FAST, ec = %d\n", ec); 2044 2045 /* Fast scheme */ 2046 vg_assert(ec >= 0 && ec < ECLASS_MISC); 2047 2048 for (sno = 0; sno < n_sectors; sno++) { 2049 sec = §ors[sno]; 2050 if (sec->tc == NULL) 2051 continue; 2052 anyDeleted |= delete_translations_in_sector_eclass( 2053 sec, sno, guest_start, range, ec, 2054 arch_host, endness_host 2055 ); 2056 anyDeleted |= delete_translations_in_sector_eclass( 2057 sec, sno, guest_start, range, ECLASS_MISC, 2058 arch_host, endness_host 2059 ); 2060 } 2061 2062 } else { 2063 2064 /* slow scheme */ 2065 2066 VG_(debugLog)(2, "transtab", 2067 " SLOW, ec = %d\n", ec); 2068 2069 for (sno = 0; sno < n_sectors; sno++) { 2070 sec = §ors[sno]; 2071 if (sec->tc == NULL) 2072 continue; 2073 anyDeleted |= delete_translations_in_sector( 2074 sec, sno, guest_start, range, 2075 arch_host, endness_host 2076 ); 2077 } 2078 2079 } 2080 2081 if (anyDeleted) 2082 invalidateFastCache(); 2083 2084 /* don't forget the no-redir cache */ 2085 unredir_discard_translations( guest_start, range ); 2086 2087 /* Post-deletion sanity check */ 2088 if (VG_(clo_sanity_level >= 4)) { 2089 TTEno i; 2090 TTEntry* tte; 2091 Bool sane = sanity_check_all_sectors(); 2092 vg_assert(sane); 2093 /* But now, also check the requested address range isn't 2094 present anywhere. */ 2095 for (sno = 0; sno < n_sectors; sno++) { 2096 sec = §ors[sno]; 2097 if (sec->tc == NULL) 2098 continue; 2099 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 2100 tte = &sec->tt[i]; 2101 if (tte->status != InUse) 2102 continue; 2103 vg_assert(!overlaps( guest_start, range, &tte->vge )); 2104 } 2105 } 2106 } 2107} 2108 2109/* Whether or not tools may discard translations. */ 2110Bool VG_(ok_to_discard_translations) = False; 2111 2112/* This function is exported to tools which can use it to discard 2113 translations, provided it is safe to do so. */ 2114void VG_(discard_translations_safely) ( Addr start, SizeT len, 2115 const HChar* who ) 2116{ 2117 vg_assert(VG_(ok_to_discard_translations)); 2118 VG_(discard_translations)(start, len, who); 2119} 2120 2121/*------------------------------------------------------------*/ 2122/*--- AUXILIARY: the unredirected TT/TC ---*/ 2123/*------------------------------------------------------------*/ 2124 2125/* A very simple translation cache which holds a small number of 2126 unredirected translations. This is completely independent of the 2127 main tt/tc structures. When unredir_tc or unredir_tt becomes full, 2128 both structures are simply dumped and we start over. 2129 2130 Since these translations are unredirected, the search key is (by 2131 definition) the first address entry in the .vge field. */ 2132 2133/* Sized to hold 500 translations of average size 1000 bytes. */ 2134 2135#define UNREDIR_SZB 1000 2136 2137#define N_UNREDIR_TT 500 2138#define N_UNREDIR_TCQ (N_UNREDIR_TT * UNREDIR_SZB / sizeof(ULong)) 2139 2140typedef 2141 struct { 2142 VexGuestExtents vge; 2143 Addr hcode; 2144 Bool inUse; 2145 } 2146 UTCEntry; 2147 2148/* We just allocate forwards in _tc, never deleting. */ 2149static ULong *unredir_tc; 2150static Int unredir_tc_used = N_UNREDIR_TCQ; 2151 2152/* Slots in _tt can come into use and out again (.inUse). 2153 Nevertheless _tt_highwater is maintained so that invalidations 2154 don't have to scan all the slots when only a few are in use. 2155 _tt_highwater holds the index of the highest ever allocated 2156 slot. */ 2157static UTCEntry unredir_tt[N_UNREDIR_TT]; 2158static Int unredir_tt_highwater; 2159 2160 2161static void init_unredir_tt_tc ( void ) 2162{ 2163 Int i; 2164 if (unredir_tc == NULL) { 2165 SysRes sres = VG_(am_mmap_anon_float_valgrind) 2166 ( N_UNREDIR_TT * UNREDIR_SZB ); 2167 if (sr_isError(sres)) { 2168 VG_(out_of_memory_NORETURN)("init_unredir_tt_tc", 2169 N_UNREDIR_TT * UNREDIR_SZB); 2170 /*NOTREACHED*/ 2171 } 2172 unredir_tc = (ULong *)(Addr)sr_Res(sres); 2173 } 2174 unredir_tc_used = 0; 2175 for (i = 0; i < N_UNREDIR_TT; i++) 2176 unredir_tt[i].inUse = False; 2177 unredir_tt_highwater = -1; 2178} 2179 2180/* Do a sanity check; return False on failure. */ 2181static Bool sanity_check_redir_tt_tc ( void ) 2182{ 2183 Int i; 2184 if (unredir_tt_highwater < -1) return False; 2185 if (unredir_tt_highwater >= N_UNREDIR_TT) return False; 2186 2187 for (i = unredir_tt_highwater+1; i < N_UNREDIR_TT; i++) 2188 if (unredir_tt[i].inUse) 2189 return False; 2190 2191 if (unredir_tc_used < 0) return False; 2192 if (unredir_tc_used > N_UNREDIR_TCQ) return False; 2193 2194 return True; 2195} 2196 2197 2198/* Add an UNREDIRECTED translation of vge to TT/TC. The translation 2199 is temporarily in code[0 .. code_len-1]. 2200*/ 2201void VG_(add_to_unredir_transtab)( const VexGuestExtents* vge, 2202 Addr entry, 2203 Addr code, 2204 UInt code_len ) 2205{ 2206 Int i, j, code_szQ; 2207 HChar *srcP, *dstP; 2208 2209 vg_assert(sanity_check_redir_tt_tc()); 2210 2211 /* This is the whole point: it's not redirected! */ 2212 vg_assert(entry == vge->base[0]); 2213 2214 /* How many unredir_tt slots are needed */ 2215 code_szQ = (code_len + 7) / 8; 2216 2217 /* Look for an empty unredir_tc slot */ 2218 for (i = 0; i < N_UNREDIR_TT; i++) 2219 if (!unredir_tt[i].inUse) 2220 break; 2221 2222 if (i >= N_UNREDIR_TT || code_szQ > (N_UNREDIR_TCQ - unredir_tc_used)) { 2223 /* It's full; dump everything we currently have */ 2224 init_unredir_tt_tc(); 2225 i = 0; 2226 } 2227 2228 vg_assert(unredir_tc_used >= 0); 2229 vg_assert(unredir_tc_used <= N_UNREDIR_TCQ); 2230 vg_assert(code_szQ > 0); 2231 vg_assert(code_szQ + unredir_tc_used <= N_UNREDIR_TCQ); 2232 vg_assert(i >= 0 && i < N_UNREDIR_TT); 2233 vg_assert(unredir_tt[i].inUse == False); 2234 2235 if (i > unredir_tt_highwater) 2236 unredir_tt_highwater = i; 2237 2238 dstP = (HChar*)&unredir_tc[unredir_tc_used]; 2239 srcP = (HChar*)code; 2240 for (j = 0; j < code_len; j++) 2241 dstP[j] = srcP[j]; 2242 2243 VG_(invalidate_icache)( dstP, code_len ); 2244 2245 unredir_tt[i].inUse = True; 2246 unredir_tt[i].vge = *vge; 2247 unredir_tt[i].hcode = (Addr)dstP; 2248 2249 unredir_tc_used += code_szQ; 2250 vg_assert(unredir_tc_used >= 0); 2251 vg_assert(unredir_tc_used <= N_UNREDIR_TCQ); 2252 2253 vg_assert(&dstP[code_len] <= (HChar*)&unredir_tc[unredir_tc_used]); 2254} 2255 2256Bool VG_(search_unredir_transtab) ( /*OUT*/Addr* result, 2257 Addr guest_addr ) 2258{ 2259 Int i; 2260 for (i = 0; i < N_UNREDIR_TT; i++) { 2261 if (!unredir_tt[i].inUse) 2262 continue; 2263 if (unredir_tt[i].vge.base[0] == guest_addr) { 2264 *result = unredir_tt[i].hcode; 2265 return True; 2266 } 2267 } 2268 return False; 2269} 2270 2271static void unredir_discard_translations( Addr guest_start, ULong range ) 2272{ 2273 Int i; 2274 2275 vg_assert(sanity_check_redir_tt_tc()); 2276 2277 for (i = 0; i <= unredir_tt_highwater; i++) { 2278 if (unredir_tt[i].inUse 2279 && overlaps( guest_start, range, &unredir_tt[i].vge)) 2280 unredir_tt[i].inUse = False; 2281 } 2282} 2283 2284 2285/*------------------------------------------------------------*/ 2286/*--- Initialisation. ---*/ 2287/*------------------------------------------------------------*/ 2288 2289void VG_(init_tt_tc) ( void ) 2290{ 2291 Int i, avg_codeszQ; 2292 2293 vg_assert(!init_done); 2294 init_done = True; 2295 2296 /* Otherwise lots of things go wrong... */ 2297 vg_assert(sizeof(ULong) == 8); 2298 vg_assert(sizeof(TTEno) == sizeof(HTTno)); 2299 vg_assert(sizeof(TTEno) == 2); 2300 vg_assert(N_TTES_PER_SECTOR <= N_HTTES_PER_SECTOR); 2301 vg_assert(N_HTTES_PER_SECTOR < INV_TTE); 2302 vg_assert(N_HTTES_PER_SECTOR < EC2TTE_DELETED); 2303 vg_assert(N_HTTES_PER_SECTOR < HTT_EMPTY); 2304 /* check fast cache entries really are 2 words long */ 2305 vg_assert(sizeof(Addr) == sizeof(void*)); 2306 vg_assert(sizeof(FastCacheEntry) == 2 * sizeof(Addr)); 2307 /* check fast cache entries are packed back-to-back with no spaces */ 2308 vg_assert(sizeof( VG_(tt_fast) ) 2309 == VG_TT_FAST_SIZE * sizeof(FastCacheEntry)); 2310 /* check fast cache is aligned as we requested. Not fatal if it 2311 isn't, but we might as well make sure. */ 2312 vg_assert(VG_IS_16_ALIGNED( ((Addr) & VG_(tt_fast)[0]) )); 2313 2314 if (VG_(clo_verbosity) > 2) 2315 VG_(message)(Vg_DebugMsg, 2316 "TT/TC: VG_(init_tt_tc) " 2317 "(startup of code management)\n"); 2318 2319 /* Figure out how big each tc area should be. */ 2320 if (VG_(clo_avg_transtab_entry_size) == 0) 2321 avg_codeszQ = (VG_(details).avg_translation_sizeB + 7) / 8; 2322 else 2323 avg_codeszQ = (VG_(clo_avg_transtab_entry_size) + 7) / 8; 2324 tc_sector_szQ = N_TTES_PER_SECTOR * (1 + avg_codeszQ); 2325 2326 /* Ensure the calculated value is not way crazy. */ 2327 vg_assert(tc_sector_szQ >= 2 * N_TTES_PER_SECTOR); 2328 vg_assert(tc_sector_szQ <= 100 * N_TTES_PER_SECTOR); 2329 2330 n_sectors = VG_(clo_num_transtab_sectors); 2331 vg_assert(n_sectors >= MIN_N_SECTORS); 2332 vg_assert(n_sectors <= MAX_N_SECTORS); 2333 2334 /* Initialise the sectors, even the ones we aren't going to use. 2335 Set all fields to zero. */ 2336 youngest_sector = 0; 2337 for (i = 0; i < MAX_N_SECTORS; i++) 2338 VG_(memset)(§ors[i], 0, sizeof(sectors[i])); 2339 2340 /* Initialise the sector_search_order hint table, including the 2341 entries we aren't going to use. */ 2342 for (i = 0; i < MAX_N_SECTORS; i++) 2343 sector_search_order[i] = INV_SNO; 2344 2345 /* Initialise the fast cache. */ 2346 invalidateFastCache(); 2347 2348 /* and the unredir tt/tc */ 2349 init_unredir_tt_tc(); 2350 2351 if (VG_(clo_verbosity) > 2 || VG_(clo_stats) 2352 || VG_(debugLog_getLevel) () >= 2) { 2353 VG_(message)(Vg_DebugMsg, 2354 "TT/TC: cache: %s--avg-transtab-entry-size=%d, " 2355 "%stool provided default %d\n", 2356 VG_(clo_avg_transtab_entry_size) == 0 ? "ignoring " : "using ", 2357 VG_(clo_avg_transtab_entry_size), 2358 VG_(clo_avg_transtab_entry_size) == 0 ? "using " : "ignoring ", 2359 VG_(details).avg_translation_sizeB); 2360 VG_(message)(Vg_DebugMsg, 2361 "TT/TC: cache: %d sectors of %d bytes each = %d total TC\n", 2362 n_sectors, 8 * tc_sector_szQ, 2363 n_sectors * 8 * tc_sector_szQ ); 2364 VG_(message)(Vg_DebugMsg, 2365 "TT/TC: table: %d tables[%d] of %d bytes each = %d total TT\n", 2366 n_sectors, N_TTES_PER_SECTOR, 2367 (int)(N_TTES_PER_SECTOR * sizeof(TTEntry)), 2368 (int)(n_sectors * N_TTES_PER_SECTOR * sizeof(TTEntry))); 2369 VG_(message)(Vg_DebugMsg, 2370 "TT/TC: table: %d tt entries each = %d total tt entries\n", 2371 N_TTES_PER_SECTOR, n_sectors * N_TTES_PER_SECTOR); 2372 VG_(message)(Vg_DebugMsg, 2373 "TT/TC: table: %d htt[%d] of %d bytes each = %d total HTT" 2374 " (htt[%d] %d%% max occup)\n", 2375 n_sectors, N_HTTES_PER_SECTOR, 2376 (int)(N_HTTES_PER_SECTOR * sizeof(TTEno)), 2377 (int)(n_sectors * N_HTTES_PER_SECTOR * sizeof(TTEno)), 2378 N_HTTES_PER_SECTOR, SECTOR_TT_LIMIT_PERCENT); 2379 } 2380} 2381 2382 2383/*------------------------------------------------------------*/ 2384/*--- Printing out statistics. ---*/ 2385/*------------------------------------------------------------*/ 2386 2387static ULong safe_idiv( ULong a, ULong b ) 2388{ 2389 return (b == 0 ? 0 : a / b); 2390} 2391 2392UInt VG_(get_bbs_translated) ( void ) 2393{ 2394 return n_in_count; 2395} 2396 2397void VG_(print_tt_tc_stats) ( void ) 2398{ 2399 VG_(message)(Vg_DebugMsg, 2400 " tt/tc: %'llu tt lookups requiring %'llu probes\n", 2401 n_full_lookups, n_lookup_probes ); 2402 VG_(message)(Vg_DebugMsg, 2403 " tt/tc: %'llu fast-cache updates, %'llu flushes\n", 2404 n_fast_updates, n_fast_flushes ); 2405 2406 VG_(message)(Vg_DebugMsg, 2407 " transtab: new %'lld " 2408 "(%'llu -> %'llu; ratio %'llu:10) [%'llu scs] " 2409 "avg tce size %d\n", 2410 n_in_count, n_in_osize, n_in_tsize, 2411 safe_idiv(10*n_in_tsize, n_in_osize), 2412 n_in_sc_count, 2413 (int) (n_in_tsize / n_in_count)); 2414 VG_(message)(Vg_DebugMsg, 2415 " transtab: dumped %'llu (%'llu -> ?" "?) " 2416 "(sectors recycled %'llu)\n", 2417 n_dump_count, n_dump_osize, n_sectors_recycled ); 2418 VG_(message)(Vg_DebugMsg, 2419 " transtab: discarded %'llu (%'llu -> ?" "?)\n", 2420 n_disc_count, n_disc_osize ); 2421 2422 if (DEBUG_TRANSTAB) { 2423 Int i; 2424 VG_(printf)("\n"); 2425 for (i = 0; i < ECLASS_N; i++) { 2426 VG_(printf)(" %4d", sectors[0].ec2tte_used[i]); 2427 if (i % 16 == 15) 2428 VG_(printf)("\n"); 2429 } 2430 VG_(printf)("\n\n"); 2431 } 2432} 2433 2434/*------------------------------------------------------------*/ 2435/*--- Printing out of profiling results. ---*/ 2436/*------------------------------------------------------------*/ 2437 2438static ULong score ( const TTEntry* tte ) 2439{ 2440 return ((ULong)tte->usage.prof.weight) * ((ULong)tte->usage.prof.count); 2441} 2442 2443ULong VG_(get_SB_profile) ( SBProfEntry tops[], UInt n_tops ) 2444{ 2445 SECno sno; 2446 Int r, s; 2447 ULong score_total; 2448 TTEno i; 2449 2450 /* First, compute the total weighted count, and find the top N 2451 ttes. tops contains pointers to the most-used n_tops blocks, in 2452 descending order (viz, tops[0] is the highest scorer). */ 2453 for (s = 0; s < n_tops; s++) { 2454 tops[s].addr = 0; 2455 tops[s].score = 0; 2456 } 2457 2458 score_total = 0; 2459 2460 for (sno = 0; sno < n_sectors; sno++) { 2461 if (sectors[sno].tc == NULL) 2462 continue; 2463 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 2464 if (sectors[sno].tt[i].status != InUse) 2465 continue; 2466 score_total += score(§ors[sno].tt[i]); 2467 /* Find the rank for sectors[sno].tt[i]. */ 2468 r = n_tops-1; 2469 while (True) { 2470 if (r == -1) 2471 break; 2472 if (tops[r].addr == 0) { 2473 r--; 2474 continue; 2475 } 2476 if ( score(§ors[sno].tt[i]) > tops[r].score ) { 2477 r--; 2478 continue; 2479 } 2480 break; 2481 } 2482 r++; 2483 vg_assert(r >= 0 && r <= n_tops); 2484 /* This bb should be placed at r, and bbs above it shifted 2485 upwards one slot. */ 2486 if (r < n_tops) { 2487 for (s = n_tops-1; s > r; s--) 2488 tops[s] = tops[s-1]; 2489 tops[r].addr = sectors[sno].tt[i].entry; 2490 tops[r].score = score( §ors[sno].tt[i] ); 2491 } 2492 } 2493 } 2494 2495 /* Now zero out all the counter fields, so that we can make 2496 multiple calls here and just get the values since the last call, 2497 each time, rather than values accumulated for the whole run. */ 2498 for (sno = 0; sno < n_sectors; sno++) { 2499 if (sectors[sno].tc == NULL) 2500 continue; 2501 for (i = 0; i < N_TTES_PER_SECTOR; i++) { 2502 if (sectors[sno].tt[i].status != InUse) 2503 continue; 2504 sectors[sno].tt[i].usage.prof.count = 0; 2505 } 2506 } 2507 2508 return score_total; 2509} 2510 2511/*--------------------------------------------------------------------*/ 2512/*--- end ---*/ 2513/*--------------------------------------------------------------------*/ 2514