1/* 2 * Copyright (C) 2009 Nicolai Haehnle. 3 * Copyright 2011 Tom Stellard <tstellar@gmail.com> 4 * 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining 8 * a copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sublicense, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial 17 * portions of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 22 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 23 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 24 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 25 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 */ 28 29#include "radeon_program_pair.h" 30 31#include <stdio.h> 32 33#include "main/glheader.h" 34#include "util/register_allocate.h" 35#include "util/u_memory.h" 36#include "util/ralloc.h" 37 38#include "r300_fragprog_swizzle.h" 39#include "radeon_compiler.h" 40#include "radeon_compiler_util.h" 41#include "radeon_dataflow.h" 42#include "radeon_list.h" 43#include "radeon_regalloc.h" 44#include "radeon_variable.h" 45 46#define VERBOSE 0 47 48#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) 49 50 51 52struct register_info { 53 struct live_intervals Live[4]; 54 55 unsigned int Used:1; 56 unsigned int Allocated:1; 57 unsigned int File:3; 58 unsigned int Index:RC_REGISTER_INDEX_BITS; 59 unsigned int Writemask; 60}; 61 62struct regalloc_state { 63 struct radeon_compiler * C; 64 65 struct register_info * Input; 66 unsigned int NumInputs; 67 68 struct register_info * Temporary; 69 unsigned int NumTemporaries; 70 71 unsigned int Simple; 72 int LoopEnd; 73}; 74 75struct rc_class { 76 enum rc_reg_class ID; 77 78 unsigned int WritemaskCount; 79 80 /** List of writemasks that belong to this class */ 81 unsigned int Writemasks[3]; 82 83 84}; 85 86static const struct rc_class rc_class_list [] = { 87 {RC_REG_CLASS_SINGLE, 3, 88 {RC_MASK_X, 89 RC_MASK_Y, 90 RC_MASK_Z}}, 91 {RC_REG_CLASS_DOUBLE, 3, 92 {RC_MASK_X | RC_MASK_Y, 93 RC_MASK_X | RC_MASK_Z, 94 RC_MASK_Y | RC_MASK_Z}}, 95 {RC_REG_CLASS_TRIPLE, 1, 96 {RC_MASK_X | RC_MASK_Y | RC_MASK_Z, 97 RC_MASK_NONE, 98 RC_MASK_NONE}}, 99 {RC_REG_CLASS_ALPHA, 1, 100 {RC_MASK_W, 101 RC_MASK_NONE, 102 RC_MASK_NONE}}, 103 {RC_REG_CLASS_SINGLE_PLUS_ALPHA, 3, 104 {RC_MASK_X | RC_MASK_W, 105 RC_MASK_Y | RC_MASK_W, 106 RC_MASK_Z | RC_MASK_W}}, 107 {RC_REG_CLASS_DOUBLE_PLUS_ALPHA, 3, 108 {RC_MASK_X | RC_MASK_Y | RC_MASK_W, 109 RC_MASK_X | RC_MASK_Z | RC_MASK_W, 110 RC_MASK_Y | RC_MASK_Z | RC_MASK_W}}, 111 {RC_REG_CLASS_TRIPLE_PLUS_ALPHA, 1, 112 {RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W, 113 RC_MASK_NONE, 114 RC_MASK_NONE}}, 115 {RC_REG_CLASS_X, 1, 116 {RC_MASK_X, 117 RC_MASK_NONE, 118 RC_MASK_NONE}}, 119 {RC_REG_CLASS_Y, 1, 120 {RC_MASK_Y, 121 RC_MASK_NONE, 122 RC_MASK_NONE}}, 123 {RC_REG_CLASS_Z, 1, 124 {RC_MASK_Z, 125 RC_MASK_NONE, 126 RC_MASK_NONE}}, 127 {RC_REG_CLASS_XY, 1, 128 {RC_MASK_X | RC_MASK_Y, 129 RC_MASK_NONE, 130 RC_MASK_NONE}}, 131 {RC_REG_CLASS_YZ, 1, 132 {RC_MASK_Y | RC_MASK_Z, 133 RC_MASK_NONE, 134 RC_MASK_NONE}}, 135 {RC_REG_CLASS_XZ, 1, 136 {RC_MASK_X | RC_MASK_Z, 137 RC_MASK_NONE, 138 RC_MASK_NONE}}, 139 {RC_REG_CLASS_XW, 1, 140 {RC_MASK_X | RC_MASK_W, 141 RC_MASK_NONE, 142 RC_MASK_NONE}}, 143 {RC_REG_CLASS_YW, 1, 144 {RC_MASK_Y | RC_MASK_W, 145 RC_MASK_NONE, 146 RC_MASK_NONE}}, 147 {RC_REG_CLASS_ZW, 1, 148 {RC_MASK_Z | RC_MASK_W, 149 RC_MASK_NONE, 150 RC_MASK_NONE}}, 151 {RC_REG_CLASS_XYW, 1, 152 {RC_MASK_X | RC_MASK_Y | RC_MASK_W, 153 RC_MASK_NONE, 154 RC_MASK_NONE}}, 155 {RC_REG_CLASS_YZW, 1, 156 {RC_MASK_Y | RC_MASK_Z | RC_MASK_W, 157 RC_MASK_NONE, 158 RC_MASK_NONE}}, 159 {RC_REG_CLASS_XZW, 1, 160 {RC_MASK_X | RC_MASK_Z | RC_MASK_W, 161 RC_MASK_NONE, 162 RC_MASK_NONE}} 163}; 164 165static void print_live_intervals(struct live_intervals * src) 166{ 167 if (!src || !src->Used) { 168 DBG("(null)"); 169 return; 170 } 171 172 DBG("(%i,%i)", src->Start, src->End); 173} 174 175static int overlap_live_intervals(struct live_intervals * a, struct live_intervals * b) 176{ 177 if (VERBOSE) { 178 DBG("overlap_live_intervals: "); 179 print_live_intervals(a); 180 DBG(" to "); 181 print_live_intervals(b); 182 DBG("\n"); 183 } 184 185 if (!a->Used || !b->Used) { 186 DBG(" unused interval\n"); 187 return 0; 188 } 189 190 if (a->Start > b->Start) { 191 if (a->Start < b->End) { 192 DBG(" overlap\n"); 193 return 1; 194 } 195 } else if (b->Start > a->Start) { 196 if (b->Start < a->End) { 197 DBG(" overlap\n"); 198 return 1; 199 } 200 } else { /* a->Start == b->Start */ 201 if (a->Start != a->End && b->Start != b->End) { 202 DBG(" overlap\n"); 203 return 1; 204 } 205 } 206 207 DBG(" no overlap\n"); 208 209 return 0; 210} 211 212static void scan_read_callback(void * data, struct rc_instruction * inst, 213 rc_register_file file, unsigned int index, unsigned int mask) 214{ 215 struct regalloc_state * s = data; 216 struct register_info * reg; 217 unsigned int i; 218 219 if (file != RC_FILE_INPUT) 220 return; 221 222 s->Input[index].Used = 1; 223 reg = &s->Input[index]; 224 225 for (i = 0; i < 4; i++) { 226 if (!((mask >> i) & 0x1)) { 227 continue; 228 } 229 reg->Live[i].Used = 1; 230 reg->Live[i].Start = 0; 231 reg->Live[i].End = 232 s->LoopEnd > inst->IP ? s->LoopEnd : inst->IP; 233 } 234} 235 236static void remap_register(void * data, struct rc_instruction * inst, 237 rc_register_file * file, unsigned int * index) 238{ 239 struct regalloc_state * s = data; 240 const struct register_info * reg; 241 242 if (*file == RC_FILE_TEMPORARY && s->Simple) 243 reg = &s->Temporary[*index]; 244 else if (*file == RC_FILE_INPUT) 245 reg = &s->Input[*index]; 246 else 247 return; 248 249 if (reg->Allocated) { 250 *index = reg->Index; 251 } 252} 253 254static void alloc_input_simple(void * data, unsigned int input, 255 unsigned int hwreg) 256{ 257 struct regalloc_state * s = data; 258 259 if (input >= s->NumInputs) 260 return; 261 262 s->Input[input].Allocated = 1; 263 s->Input[input].File = RC_FILE_TEMPORARY; 264 s->Input[input].Index = hwreg; 265} 266 267/* This functions offsets the temporary register indices by the number 268 * of input registers, because input registers are actually temporaries and 269 * should not occupy the same space. 270 * 271 * This pass is supposed to be used to maintain correct allocation of inputs 272 * if the standard register allocation is disabled. */ 273static void do_regalloc_inputs_only(struct regalloc_state * s) 274{ 275 for (unsigned i = 0; i < s->NumTemporaries; i++) { 276 s->Temporary[i].Allocated = 1; 277 s->Temporary[i].File = RC_FILE_TEMPORARY; 278 s->Temporary[i].Index = i + s->NumInputs; 279 } 280} 281 282static unsigned int is_derivative(rc_opcode op) 283{ 284 return (op == RC_OPCODE_DDX || op == RC_OPCODE_DDY); 285} 286 287static int find_class( 288 const struct rc_class * classes, 289 unsigned int writemask, 290 unsigned int max_writemask_count) 291{ 292 unsigned int i; 293 for (i = 0; i < RC_REG_CLASS_COUNT; i++) { 294 unsigned int j; 295 if (classes[i].WritemaskCount > max_writemask_count) { 296 continue; 297 } 298 for (j = 0; j < 3; j++) { 299 if (classes[i].Writemasks[j] == writemask) { 300 return i; 301 } 302 } 303 } 304 return -1; 305} 306 307struct variable_get_class_cb_data { 308 unsigned int * can_change_writemask; 309 unsigned int conversion_swizzle; 310}; 311 312static void variable_get_class_read_cb( 313 void * userdata, 314 struct rc_instruction * inst, 315 struct rc_pair_instruction_arg * arg, 316 struct rc_pair_instruction_source * src) 317{ 318 struct variable_get_class_cb_data * d = userdata; 319 unsigned int new_swizzle = rc_adjust_channels(arg->Swizzle, 320 d->conversion_swizzle); 321 if (!r300_swizzle_is_native_basic(new_swizzle)) { 322 *d->can_change_writemask = 0; 323 } 324} 325 326static enum rc_reg_class variable_get_class( 327 struct rc_variable * variable, 328 const struct rc_class * classes) 329{ 330 unsigned int i; 331 unsigned int can_change_writemask= 1; 332 unsigned int writemask = rc_variable_writemask_sum(variable); 333 struct rc_list * readers = rc_variable_readers_union(variable); 334 int class_index; 335 336 if (!variable->C->is_r500) { 337 struct rc_class c; 338 struct rc_variable * var_ptr; 339 /* The assumption here is that if an instruction has type 340 * RC_INSTRUCTION_NORMAL then it is a TEX instruction. 341 * r300 and r400 can't swizzle the result of a TEX lookup. */ 342 for (var_ptr = variable; var_ptr; var_ptr = var_ptr->Friend) { 343 if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) { 344 writemask = RC_MASK_XYZW; 345 } 346 } 347 348 /* Check if it is possible to do swizzle packing for r300/r400 349 * without creating non-native swizzles. */ 350 class_index = find_class(classes, writemask, 3); 351 if (class_index < 0) { 352 goto error; 353 } 354 c = classes[class_index]; 355 if (c.WritemaskCount == 1) { 356 goto done; 357 } 358 for (i = 0; i < c.WritemaskCount; i++) { 359 struct rc_variable * var_ptr; 360 for (var_ptr = variable; var_ptr; 361 var_ptr = var_ptr->Friend) { 362 int j; 363 unsigned int conversion_swizzle = 364 rc_make_conversion_swizzle( 365 writemask, c.Writemasks[i]); 366 struct variable_get_class_cb_data d; 367 d.can_change_writemask = &can_change_writemask; 368 d.conversion_swizzle = conversion_swizzle; 369 /* If we get this far var_ptr->Inst has to 370 * be a pair instruction. If variable or any 371 * of its friends are normal instructions, 372 * then the writemask will be set to RC_MASK_XYZW 373 * and the function will return before it gets 374 * here. */ 375 rc_pair_for_all_reads_arg(var_ptr->Inst, 376 variable_get_class_read_cb, &d); 377 378 for (j = 0; j < var_ptr->ReaderCount; j++) { 379 unsigned int old_swizzle; 380 unsigned int new_swizzle; 381 struct rc_reader r = var_ptr->Readers[j]; 382 if (r.Inst->Type == 383 RC_INSTRUCTION_PAIR ) { 384 old_swizzle = r.U.P.Arg->Swizzle; 385 } else { 386 /* Source operands of TEX 387 * instructions can't be 388 * swizzle on r300/r400 GPUs. 389 */ 390 can_change_writemask = 0; 391 break; 392 } 393 new_swizzle = rc_adjust_channels( 394 old_swizzle, conversion_swizzle); 395 if (!r300_swizzle_is_native_basic( 396 new_swizzle)) { 397 can_change_writemask = 0; 398 break; 399 } 400 } 401 if (!can_change_writemask) { 402 break; 403 } 404 } 405 if (!can_change_writemask) { 406 break; 407 } 408 } 409 } 410 411 if (variable->Inst->Type == RC_INSTRUCTION_PAIR) { 412 /* DDX/DDY seem to always fail when their writemasks are 413 * changed.*/ 414 if (is_derivative(variable->Inst->U.P.RGB.Opcode) 415 || is_derivative(variable->Inst->U.P.Alpha.Opcode)) { 416 can_change_writemask = 0; 417 } 418 } 419 for ( ; readers; readers = readers->Next) { 420 struct rc_reader * r = readers->Item; 421 if (r->Inst->Type == RC_INSTRUCTION_PAIR) { 422 if (r->U.P.Arg->Source == RC_PAIR_PRESUB_SRC) { 423 can_change_writemask = 0; 424 break; 425 } 426 /* DDX/DDY also fail when their swizzles are changed. */ 427 if (is_derivative(r->Inst->U.P.RGB.Opcode) 428 || is_derivative(r->Inst->U.P.Alpha.Opcode)) { 429 can_change_writemask = 0; 430 break; 431 } 432 } 433 } 434 435 class_index = find_class(classes, writemask, 436 can_change_writemask ? 3 : 1); 437done: 438 if (class_index > -1) { 439 return classes[class_index].ID; 440 } else { 441error: 442 rc_error(variable->C, 443 "Could not find class for index=%u mask=%u\n", 444 variable->Dst.Index, writemask); 445 return 0; 446 } 447} 448 449static unsigned int overlap_live_intervals_array( 450 struct live_intervals * a, 451 struct live_intervals * b) 452{ 453 unsigned int a_chan, b_chan; 454 for (a_chan = 0; a_chan < 4; a_chan++) { 455 for (b_chan = 0; b_chan < 4; b_chan++) { 456 if (overlap_live_intervals(&a[a_chan], &b[b_chan])) { 457 return 1; 458 } 459 } 460 } 461 return 0; 462} 463 464static unsigned int reg_get_index(int reg) 465{ 466 return reg / RC_MASK_XYZW; 467} 468 469static unsigned int reg_get_writemask(int reg) 470{ 471 return (reg % RC_MASK_XYZW) + 1; 472} 473 474static int get_reg_id(unsigned int index, unsigned int writemask) 475{ 476 assert(writemask); 477 if (writemask == 0) { 478 return 0; 479 } 480 return (index * RC_MASK_XYZW) + (writemask - 1); 481} 482 483#if VERBOSE 484static void print_reg(int reg) 485{ 486 unsigned int index = reg_get_index(reg); 487 unsigned int mask = reg_get_writemask(reg); 488 fprintf(stderr, "Temp[%u].%c%c%c%c", index, 489 mask & RC_MASK_X ? 'x' : '_', 490 mask & RC_MASK_Y ? 'y' : '_', 491 mask & RC_MASK_Z ? 'z' : '_', 492 mask & RC_MASK_W ? 'w' : '_'); 493} 494#endif 495 496static void add_register_conflicts( 497 struct ra_regs * regs, 498 unsigned int max_temp_regs) 499{ 500 unsigned int index, a_mask, b_mask; 501 for (index = 0; index < max_temp_regs; index++) { 502 for(a_mask = 1; a_mask <= RC_MASK_XYZW; a_mask++) { 503 for (b_mask = a_mask + 1; b_mask <= RC_MASK_XYZW; 504 b_mask++) { 505 if (a_mask & b_mask) { 506 ra_add_reg_conflict(regs, 507 get_reg_id(index, a_mask), 508 get_reg_id(index, b_mask)); 509 } 510 } 511 } 512 } 513} 514 515static void do_advanced_regalloc(struct regalloc_state * s) 516{ 517 518 unsigned int i, input_node, node_count, node_index; 519 unsigned int * node_classes; 520 struct rc_instruction * inst; 521 struct rc_list * var_ptr; 522 struct rc_list * variables; 523 struct ra_graph * graph; 524 const struct rc_regalloc_state *ra_state = s->C->regalloc_state; 525 526 /* Get list of program variables */ 527 variables = rc_get_variables(s->C); 528 node_count = rc_list_count(variables); 529 node_classes = memory_pool_malloc(&s->C->Pool, 530 node_count * sizeof(unsigned int)); 531 532 for (var_ptr = variables, node_index = 0; var_ptr; 533 var_ptr = var_ptr->Next, node_index++) { 534 unsigned int class_index; 535 /* Compute the live intervals */ 536 rc_variable_compute_live_intervals(var_ptr->Item); 537 538 class_index = variable_get_class(var_ptr->Item, rc_class_list); 539 node_classes[node_index] = ra_state->class_ids[class_index]; 540 } 541 542 543 /* Calculate live intervals for input registers */ 544 for (inst = s->C->Program.Instructions.Next; 545 inst != &s->C->Program.Instructions; 546 inst = inst->Next) { 547 rc_opcode op = rc_get_flow_control_inst(inst); 548 if (op == RC_OPCODE_BGNLOOP) { 549 struct rc_instruction * endloop = 550 rc_match_bgnloop(inst); 551 if (endloop->IP > s->LoopEnd) { 552 s->LoopEnd = endloop->IP; 553 } 554 } 555 rc_for_all_reads_mask(inst, scan_read_callback, s); 556 } 557 558 /* Compute the writemask for inputs. */ 559 for (i = 0; i < s->NumInputs; i++) { 560 unsigned int chan, writemask = 0; 561 for (chan = 0; chan < 4; chan++) { 562 if (s->Input[i].Live[chan].Used) { 563 writemask |= (1 << chan); 564 } 565 } 566 s->Input[i].Writemask = writemask; 567 } 568 569 graph = ra_alloc_interference_graph(ra_state->regs, 570 node_count + s->NumInputs); 571 572 for (node_index = 0; node_index < node_count; node_index++) { 573 ra_set_node_class(graph, node_index, node_classes[node_index]); 574 } 575 576 /* Build the interference graph */ 577 for (var_ptr = variables, node_index = 0; var_ptr; 578 var_ptr = var_ptr->Next,node_index++) { 579 struct rc_list * a, * b; 580 unsigned int b_index; 581 582 for (a = var_ptr, b = var_ptr->Next, b_index = node_index + 1; 583 b; b = b->Next, b_index++) { 584 struct rc_variable * var_a = a->Item; 585 while (var_a) { 586 struct rc_variable * var_b = b->Item; 587 while (var_b) { 588 if (overlap_live_intervals_array(var_a->Live, var_b->Live)) { 589 ra_add_node_interference(graph, 590 node_index, b_index); 591 } 592 var_b = var_b->Friend; 593 } 594 var_a = var_a->Friend; 595 } 596 } 597 } 598 599 /* Add input registers to the interference graph */ 600 for (i = 0, input_node = 0; i< s->NumInputs; i++) { 601 if (!s->Input[i].Writemask) { 602 continue; 603 } 604 for (var_ptr = variables, node_index = 0; 605 var_ptr; var_ptr = var_ptr->Next, node_index++) { 606 struct rc_variable * var = var_ptr->Item; 607 if (overlap_live_intervals_array(s->Input[i].Live, 608 var->Live)) { 609 ra_add_node_interference(graph, node_index, 610 node_count + input_node); 611 } 612 } 613 /* Manually allocate a register for this input */ 614 ra_set_node_reg(graph, node_count + input_node, get_reg_id( 615 s->Input[i].Index, s->Input[i].Writemask)); 616 input_node++; 617 } 618 619 if (!ra_allocate(graph)) { 620 rc_error(s->C, "Ran out of hardware temporaries\n"); 621 return; 622 } 623 624 /* Rewrite the registers */ 625 for (var_ptr = variables, node_index = 0; var_ptr; 626 var_ptr = var_ptr->Next, node_index++) { 627 int reg = ra_get_node_reg(graph, node_index); 628 unsigned int writemask = reg_get_writemask(reg); 629 unsigned int index = reg_get_index(reg); 630 struct rc_variable * var = var_ptr->Item; 631 632 if (!s->C->is_r500 && var->Inst->Type == RC_INSTRUCTION_NORMAL) { 633 writemask = rc_variable_writemask_sum(var); 634 } 635 636 if (var->Dst.File == RC_FILE_INPUT) { 637 continue; 638 } 639 rc_variable_change_dst(var, index, writemask); 640 } 641 642 ralloc_free(graph); 643} 644 645void rc_init_regalloc_state(struct rc_regalloc_state *s) 646{ 647 unsigned i, j, index; 648 unsigned **ra_q_values; 649 650 /* Pre-computed q values. This array describes the maximum number of 651 * a class's [row] registers that are in conflict with a single 652 * register from another class [column]. 653 * 654 * For example: 655 * q_values[0][2] is 3, because a register from class 2 656 * (RC_REG_CLASS_TRIPLE) may conflict with at most 3 registers from 657 * class 0 (RC_REG_CLASS_SINGLE) e.g. T0.xyz conflicts with T0.x, T0.y, 658 * and T0.z. 659 * 660 * q_values[2][0] is 1, because a register from class 0 661 * (RC_REG_CLASS_SINGLE) may conflict with at most 1 register from 662 * class 2 (RC_REG_CLASS_TRIPLE) e.g. T0.x conflicts with T0.xyz 663 * 664 * The q values for each register class [row] will never be greater 665 * than the maximum number of writemask combinations for that class. 666 * 667 * For example: 668 * 669 * Class 2 (RC_REG_CLASS_TRIPLE) only has 1 writemask combination, 670 * so no value in q_values[2][0..RC_REG_CLASS_COUNT] will be greater 671 * than 1. 672 */ 673 const unsigned q_values[RC_REG_CLASS_COUNT][RC_REG_CLASS_COUNT] = { 674 {1, 2, 3, 0, 1, 2, 3, 1, 1, 1, 2, 2, 2, 1, 1, 1, 2, 2, 2}, 675 {2, 3, 3, 0, 2, 3, 3, 2, 2, 2, 3, 3, 3, 2, 2, 2, 3, 3, 3}, 676 {1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 677 {0, 0, 0, 1, 1, 1, 1, 0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1}, 678 {1, 2, 3, 3, 3, 3, 3, 1, 1, 1, 2, 2, 2, 3, 3, 3, 3, 3, 3}, 679 {2, 3, 3, 3, 3, 3, 3, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3}, 680 {1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 681 {1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1}, 682 {1, 1, 1, 0, 1, 1, 1, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1, 0}, 683 {1, 1, 1, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0, 0, 1, 0, 1, 1}, 684 {1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1}, 685 {1, 1, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1}, 686 {1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1}, 687 {1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1}, 688 {1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1}, 689 {1, 1, 1, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1}, 690 {1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 691 {1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1}, 692 {1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1} 693 }; 694 695 /* Allocate the main ra data structure */ 696 s->regs = ra_alloc_reg_set(NULL, R500_PFS_NUM_TEMP_REGS * RC_MASK_XYZW, 697 true); 698 699 /* Create the register classes */ 700 for (i = 0; i < RC_REG_CLASS_COUNT; i++) { 701 const struct rc_class *class = &rc_class_list[i]; 702 s->class_ids[class->ID] = ra_alloc_reg_class(s->regs); 703 704 /* Assign registers to the classes */ 705 for (index = 0; index < R500_PFS_NUM_TEMP_REGS; index++) { 706 for (j = 0; j < class->WritemaskCount; j++) { 707 int reg_id = get_reg_id(index, 708 class->Writemasks[j]); 709 ra_class_add_reg(s->regs, 710 s->class_ids[class->ID], reg_id); 711 } 712 } 713 } 714 715 /* Set the q values. The q_values array is indexed based on 716 * the rc_reg_class ID (RC_REG_CLASS_*) which might be 717 * different than the ID assigned to that class by ra. 718 * This why we need to manually construct this list. 719 */ 720 ra_q_values = MALLOC(RC_REG_CLASS_COUNT * sizeof(unsigned *)); 721 722 for (i = 0; i < RC_REG_CLASS_COUNT; i++) { 723 ra_q_values[i] = MALLOC(RC_REG_CLASS_COUNT * sizeof(unsigned)); 724 for (j = 0; j < RC_REG_CLASS_COUNT; j++) { 725 ra_q_values[s->class_ids[i]][s->class_ids[j]] = 726 q_values[i][j]; 727 } 728 } 729 730 /* Add register conflicts */ 731 add_register_conflicts(s->regs, R500_PFS_NUM_TEMP_REGS); 732 733 ra_set_finalize(s->regs, ra_q_values); 734 735 for (i = 0; i < RC_REG_CLASS_COUNT; i++) { 736 FREE(ra_q_values[i]); 737 } 738 FREE(ra_q_values); 739} 740 741void rc_destroy_regalloc_state(struct rc_regalloc_state *s) 742{ 743 ralloc_free(s->regs); 744} 745 746/** 747 * @param user This parameter should be a pointer to an integer value. If this 748 * integer value is zero, then a simple register allocator will be used that 749 * only allocates space for input registers (\sa do_regalloc_inputs_only). If 750 * user is non-zero, then the regular register allocator will be used 751 * (\sa do_regalloc). 752 */ 753void rc_pair_regalloc(struct radeon_compiler *cc, void *user) 754{ 755 struct r300_fragment_program_compiler *c = 756 (struct r300_fragment_program_compiler*)cc; 757 struct regalloc_state s; 758 int * do_full_regalloc = (int*)user; 759 760 memset(&s, 0, sizeof(s)); 761 s.C = cc; 762 s.NumInputs = rc_get_max_index(cc, RC_FILE_INPUT) + 1; 763 s.Input = memory_pool_malloc(&cc->Pool, 764 s.NumInputs * sizeof(struct register_info)); 765 memset(s.Input, 0, s.NumInputs * sizeof(struct register_info)); 766 767 s.NumTemporaries = rc_get_max_index(cc, RC_FILE_TEMPORARY) + 1; 768 s.Temporary = memory_pool_malloc(&cc->Pool, 769 s.NumTemporaries * sizeof(struct register_info)); 770 memset(s.Temporary, 0, s.NumTemporaries * sizeof(struct register_info)); 771 772 rc_recompute_ips(s.C); 773 774 c->AllocateHwInputs(c, &alloc_input_simple, &s); 775 if (*do_full_regalloc) { 776 do_advanced_regalloc(&s); 777 } else { 778 s.Simple = 1; 779 do_regalloc_inputs_only(&s); 780 } 781 782 /* Rewrite inputs and if we are doing the simple allocation, rewrite 783 * temporaries too. */ 784 for (struct rc_instruction *inst = s.C->Program.Instructions.Next; 785 inst != &s.C->Program.Instructions; 786 inst = inst->Next) { 787 rc_remap_registers(inst, &remap_register, &s); 788 } 789} 790