1/* 2 * Copyright (C) 2009 Nicolai Haehnle. 3 * Copyright 2011 Tom Stellard <tstellar@gmail.com> 4 * 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining 8 * a copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sublicense, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the 16 * next paragraph) shall be included in all copies or substantial 17 * portions of the Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 20 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 21 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 22 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 23 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 24 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 25 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 26 * 27 */ 28 29#include "radeon_program_pair.h" 30 31#include <stdio.h> 32 33#include "main/glheader.h" 34#include "program/register_allocate.h" 35#include "ralloc.h" 36 37#include "r300_fragprog_swizzle.h" 38#include "radeon_compiler.h" 39#include "radeon_compiler_util.h" 40#include "radeon_dataflow.h" 41#include "radeon_list.h" 42#include "radeon_variable.h" 43 44#define VERBOSE 0 45 46#define DBG(...) do { if (VERBOSE) fprintf(stderr, __VA_ARGS__); } while(0) 47 48 49 50struct register_info { 51 struct live_intervals Live[4]; 52 53 unsigned int Used:1; 54 unsigned int Allocated:1; 55 unsigned int File:3; 56 unsigned int Index:RC_REGISTER_INDEX_BITS; 57 unsigned int Writemask; 58}; 59 60struct regalloc_state { 61 struct radeon_compiler * C; 62 63 struct register_info * Input; 64 unsigned int NumInputs; 65 66 struct register_info * Temporary; 67 unsigned int NumTemporaries; 68 69 unsigned int Simple; 70 int LoopEnd; 71}; 72 73enum rc_reg_class { 74 RC_REG_CLASS_SINGLE, 75 RC_REG_CLASS_DOUBLE, 76 RC_REG_CLASS_TRIPLE, 77 RC_REG_CLASS_ALPHA, 78 RC_REG_CLASS_SINGLE_PLUS_ALPHA, 79 RC_REG_CLASS_DOUBLE_PLUS_ALPHA, 80 RC_REG_CLASS_TRIPLE_PLUS_ALPHA, 81 RC_REG_CLASS_X, 82 RC_REG_CLASS_Y, 83 RC_REG_CLASS_Z, 84 RC_REG_CLASS_XY, 85 RC_REG_CLASS_YZ, 86 RC_REG_CLASS_XZ, 87 RC_REG_CLASS_XW, 88 RC_REG_CLASS_YW, 89 RC_REG_CLASS_ZW, 90 RC_REG_CLASS_XYW, 91 RC_REG_CLASS_YZW, 92 RC_REG_CLASS_XZW, 93 RC_REG_CLASS_COUNT 94}; 95 96struct rc_class { 97 enum rc_reg_class Class; 98 99 unsigned int WritemaskCount; 100 101 /** This is 1 if this class is being used by the register allocator 102 * and 0 otherwise */ 103 unsigned int Used; 104 105 /** This is the ID number assigned to this class by ra. */ 106 unsigned int Id; 107 108 /** List of writemasks that belong to this class */ 109 unsigned int Writemasks[3]; 110 111 112}; 113 114static void print_live_intervals(struct live_intervals * src) 115{ 116 if (!src || !src->Used) { 117 DBG("(null)"); 118 return; 119 } 120 121 DBG("(%i,%i)", src->Start, src->End); 122} 123 124static int overlap_live_intervals(struct live_intervals * a, struct live_intervals * b) 125{ 126 if (VERBOSE) { 127 DBG("overlap_live_intervals: "); 128 print_live_intervals(a); 129 DBG(" to "); 130 print_live_intervals(b); 131 DBG("\n"); 132 } 133 134 if (!a->Used || !b->Used) { 135 DBG(" unused interval\n"); 136 return 0; 137 } 138 139 if (a->Start > b->Start) { 140 if (a->Start < b->End) { 141 DBG(" overlap\n"); 142 return 1; 143 } 144 } else if (b->Start > a->Start) { 145 if (b->Start < a->End) { 146 DBG(" overlap\n"); 147 return 1; 148 } 149 } else { /* a->Start == b->Start */ 150 if (a->Start != a->End && b->Start != b->End) { 151 DBG(" overlap\n"); 152 return 1; 153 } 154 } 155 156 DBG(" no overlap\n"); 157 158 return 0; 159} 160 161static void scan_read_callback(void * data, struct rc_instruction * inst, 162 rc_register_file file, unsigned int index, unsigned int mask) 163{ 164 struct regalloc_state * s = data; 165 struct register_info * reg; 166 unsigned int i; 167 168 if (file != RC_FILE_INPUT) 169 return; 170 171 s->Input[index].Used = 1; 172 reg = &s->Input[index]; 173 174 for (i = 0; i < 4; i++) { 175 if (!((mask >> i) & 0x1)) { 176 continue; 177 } 178 reg->Live[i].Used = 1; 179 reg->Live[i].Start = 0; 180 reg->Live[i].End = 181 s->LoopEnd > inst->IP ? s->LoopEnd : inst->IP; 182 } 183} 184 185static void remap_register(void * data, struct rc_instruction * inst, 186 rc_register_file * file, unsigned int * index) 187{ 188 struct regalloc_state * s = data; 189 const struct register_info * reg; 190 191 if (*file == RC_FILE_TEMPORARY && s->Simple) 192 reg = &s->Temporary[*index]; 193 else if (*file == RC_FILE_INPUT) 194 reg = &s->Input[*index]; 195 else 196 return; 197 198 if (reg->Allocated) { 199 *index = reg->Index; 200 } 201} 202 203static void alloc_input_simple(void * data, unsigned int input, 204 unsigned int hwreg) 205{ 206 struct regalloc_state * s = data; 207 208 if (input >= s->NumInputs) 209 return; 210 211 s->Input[input].Allocated = 1; 212 s->Input[input].File = RC_FILE_TEMPORARY; 213 s->Input[input].Index = hwreg; 214} 215 216/* This functions offsets the temporary register indices by the number 217 * of input registers, because input registers are actually temporaries and 218 * should not occupy the same space. 219 * 220 * This pass is supposed to be used to maintain correct allocation of inputs 221 * if the standard register allocation is disabled. */ 222static void do_regalloc_inputs_only(struct regalloc_state * s) 223{ 224 for (unsigned i = 0; i < s->NumTemporaries; i++) { 225 s->Temporary[i].Allocated = 1; 226 s->Temporary[i].File = RC_FILE_TEMPORARY; 227 s->Temporary[i].Index = i + s->NumInputs; 228 } 229} 230 231static unsigned int is_derivative(rc_opcode op) 232{ 233 return (op == RC_OPCODE_DDX || op == RC_OPCODE_DDY); 234} 235 236static int find_class( 237 struct rc_class * classes, 238 unsigned int writemask, 239 unsigned int max_writemask_count) 240{ 241 unsigned int i; 242 for (i = 0; i < RC_REG_CLASS_COUNT; i++) { 243 unsigned int j; 244 if (classes[i].WritemaskCount > max_writemask_count) { 245 continue; 246 } 247 for (j = 0; j < 3; j++) { 248 if (classes[i].Writemasks[j] == writemask) { 249 return i; 250 } 251 } 252 } 253 return -1; 254} 255 256struct variable_get_class_cb_data { 257 unsigned int * can_change_writemask; 258 unsigned int conversion_swizzle; 259}; 260 261static void variable_get_class_read_cb( 262 void * userdata, 263 struct rc_instruction * inst, 264 struct rc_pair_instruction_arg * arg, 265 struct rc_pair_instruction_source * src) 266{ 267 struct variable_get_class_cb_data * d = userdata; 268 unsigned int new_swizzle = rc_adjust_channels(arg->Swizzle, 269 d->conversion_swizzle); 270 if (!r300_swizzle_is_native_basic(new_swizzle)) { 271 *d->can_change_writemask = 0; 272 } 273} 274 275static enum rc_reg_class variable_get_class( 276 struct rc_variable * variable, 277 struct rc_class * classes) 278{ 279 unsigned int i; 280 unsigned int can_change_writemask= 1; 281 unsigned int writemask = rc_variable_writemask_sum(variable); 282 struct rc_list * readers = rc_variable_readers_union(variable); 283 int class_index; 284 285 if (!variable->C->is_r500) { 286 struct rc_class c; 287 struct rc_variable * var_ptr; 288 /* The assumption here is that if an instruction has type 289 * RC_INSTRUCTION_NORMAL then it is a TEX instruction. 290 * r300 and r400 can't swizzle the result of a TEX lookup. */ 291 for (var_ptr = variable; var_ptr; var_ptr = var_ptr->Friend) { 292 if (var_ptr->Inst->Type == RC_INSTRUCTION_NORMAL) { 293 writemask = RC_MASK_XYZW; 294 } 295 } 296 297 /* Check if it is possible to do swizzle packing for r300/r400 298 * without creating non-native swizzles. */ 299 class_index = find_class(classes, writemask, 3); 300 if (class_index < 0) { 301 goto error; 302 } 303 c = classes[class_index]; 304 if (c.WritemaskCount == 1) { 305 goto done; 306 } 307 for (i = 0; i < c.WritemaskCount; i++) { 308 struct rc_variable * var_ptr; 309 for (var_ptr = variable; var_ptr; 310 var_ptr = var_ptr->Friend) { 311 int j; 312 unsigned int conversion_swizzle = 313 rc_make_conversion_swizzle( 314 writemask, c.Writemasks[i]); 315 struct variable_get_class_cb_data d; 316 d.can_change_writemask = &can_change_writemask; 317 d.conversion_swizzle = conversion_swizzle; 318 /* If we get this far var_ptr->Inst has to 319 * be a pair instruction. If variable or any 320 * of its friends are normal instructions, 321 * then the writemask will be set to RC_MASK_XYZW 322 * and the function will return before it gets 323 * here. */ 324 rc_pair_for_all_reads_arg(var_ptr->Inst, 325 variable_get_class_read_cb, &d); 326 327 for (j = 0; j < var_ptr->ReaderCount; j++) { 328 unsigned int old_swizzle; 329 unsigned int new_swizzle; 330 struct rc_reader r = var_ptr->Readers[j]; 331 if (r.Inst->Type == 332 RC_INSTRUCTION_PAIR ) { 333 old_swizzle = r.U.P.Arg->Swizzle; 334 } else { 335 old_swizzle = r.U.I.Src->Swizzle; 336 } 337 new_swizzle = rc_adjust_channels( 338 old_swizzle, conversion_swizzle); 339 if (!r300_swizzle_is_native_basic( 340 new_swizzle)) { 341 can_change_writemask = 0; 342 break; 343 } 344 } 345 if (!can_change_writemask) { 346 break; 347 } 348 } 349 if (!can_change_writemask) { 350 break; 351 } 352 } 353 } 354 355 if (variable->Inst->Type == RC_INSTRUCTION_PAIR) { 356 /* DDX/DDY seem to always fail when their writemasks are 357 * changed.*/ 358 if (is_derivative(variable->Inst->U.P.RGB.Opcode) 359 || is_derivative(variable->Inst->U.P.Alpha.Opcode)) { 360 can_change_writemask = 0; 361 } 362 } 363 for ( ; readers; readers = readers->Next) { 364 struct rc_reader * r = readers->Item; 365 if (r->Inst->Type == RC_INSTRUCTION_PAIR) { 366 if (r->U.P.Arg->Source == RC_PAIR_PRESUB_SRC) { 367 can_change_writemask = 0; 368 break; 369 } 370 /* DDX/DDY also fail when their swizzles are changed. */ 371 if (is_derivative(r->Inst->U.P.RGB.Opcode) 372 || is_derivative(r->Inst->U.P.Alpha.Opcode)) { 373 can_change_writemask = 0; 374 break; 375 } 376 } 377 } 378 379 class_index = find_class(classes, writemask, 380 can_change_writemask ? 3 : 1); 381done: 382 if (class_index > -1) { 383 return classes[class_index].Class; 384 } else { 385error: 386 rc_error(variable->C, 387 "Could not find class for index=%u mask=%u\n", 388 variable->Dst.Index, writemask); 389 return 0; 390 } 391} 392 393static unsigned int overlap_live_intervals_array( 394 struct live_intervals * a, 395 struct live_intervals * b) 396{ 397 unsigned int a_chan, b_chan; 398 for (a_chan = 0; a_chan < 4; a_chan++) { 399 for (b_chan = 0; b_chan < 4; b_chan++) { 400 if (overlap_live_intervals(&a[a_chan], &b[b_chan])) { 401 return 1; 402 } 403 } 404 } 405 return 0; 406} 407 408static unsigned int reg_get_index(int reg) 409{ 410 return reg / RC_MASK_XYZW; 411} 412 413static unsigned int reg_get_writemask(int reg) 414{ 415 return (reg % RC_MASK_XYZW) + 1; 416} 417 418static int get_reg_id(unsigned int index, unsigned int writemask) 419{ 420 assert(writemask); 421 if (writemask == 0) { 422 return 0; 423 } 424 return (index * RC_MASK_XYZW) + (writemask - 1); 425} 426 427#if VERBOSE 428static void print_reg(int reg) 429{ 430 unsigned int index = reg_get_index(reg); 431 unsigned int mask = reg_get_writemask(reg); 432 fprintf(stderr, "Temp[%u].%c%c%c%c", index, 433 mask & RC_MASK_X ? 'x' : '_', 434 mask & RC_MASK_Y ? 'y' : '_', 435 mask & RC_MASK_Z ? 'z' : '_', 436 mask & RC_MASK_W ? 'w' : '_'); 437} 438#endif 439 440static void add_register_conflicts( 441 struct ra_regs * regs, 442 unsigned int max_temp_regs) 443{ 444 unsigned int index, a_mask, b_mask; 445 for (index = 0; index < max_temp_regs; index++) { 446 for(a_mask = 1; a_mask <= RC_MASK_XYZW; a_mask++) { 447 for (b_mask = a_mask + 1; b_mask <= RC_MASK_XYZW; 448 b_mask++) { 449 if (a_mask & b_mask) { 450 ra_add_reg_conflict(regs, 451 get_reg_id(index, a_mask), 452 get_reg_id(index, b_mask)); 453 } 454 } 455 } 456 } 457} 458 459static void do_advanced_regalloc(struct regalloc_state * s) 460{ 461 struct rc_class rc_class_list [] = { 462 {RC_REG_CLASS_SINGLE, 3, 0, 0, 463 {RC_MASK_X, 464 RC_MASK_Y, 465 RC_MASK_Z}}, 466 {RC_REG_CLASS_DOUBLE, 3, 0, 0, 467 {RC_MASK_X | RC_MASK_Y, 468 RC_MASK_X | RC_MASK_Z, 469 RC_MASK_Y | RC_MASK_Z}}, 470 {RC_REG_CLASS_TRIPLE, 1, 0, 0, 471 {RC_MASK_X | RC_MASK_Y | RC_MASK_Z, 472 RC_MASK_NONE, 473 RC_MASK_NONE}}, 474 {RC_REG_CLASS_ALPHA, 1, 0, 0, 475 {RC_MASK_W, 476 RC_MASK_NONE, 477 RC_MASK_NONE}}, 478 {RC_REG_CLASS_SINGLE_PLUS_ALPHA, 3, 0, 0, 479 {RC_MASK_X | RC_MASK_W, 480 RC_MASK_Y | RC_MASK_W, 481 RC_MASK_Z | RC_MASK_W}}, 482 {RC_REG_CLASS_DOUBLE_PLUS_ALPHA, 3, 0, 0, 483 {RC_MASK_X | RC_MASK_Y | RC_MASK_W, 484 RC_MASK_X | RC_MASK_Z | RC_MASK_W, 485 RC_MASK_Y | RC_MASK_Z | RC_MASK_W}}, 486 {RC_REG_CLASS_TRIPLE_PLUS_ALPHA, 1, 0, 0, 487 {RC_MASK_X | RC_MASK_Y | RC_MASK_Z | RC_MASK_W, 488 RC_MASK_NONE, 489 RC_MASK_NONE}}, 490 {RC_REG_CLASS_X, 1, 0, 0, 491 {RC_MASK_X, 492 RC_MASK_NONE, 493 RC_MASK_NONE}}, 494 {RC_REG_CLASS_Y, 1, 0, 0, 495 {RC_MASK_Y, 496 RC_MASK_NONE, 497 RC_MASK_NONE}}, 498 {RC_REG_CLASS_Z, 1, 0, 0, 499 {RC_MASK_Z, 500 RC_MASK_NONE, 501 RC_MASK_NONE}}, 502 {RC_REG_CLASS_XY, 1, 0, 0, 503 {RC_MASK_X | RC_MASK_Y, 504 RC_MASK_NONE, 505 RC_MASK_NONE}}, 506 {RC_REG_CLASS_YZ, 1, 0, 0, 507 {RC_MASK_Y | RC_MASK_Z, 508 RC_MASK_NONE, 509 RC_MASK_NONE}}, 510 {RC_REG_CLASS_XZ, 1, 0, 0, 511 {RC_MASK_X | RC_MASK_Z, 512 RC_MASK_NONE, 513 RC_MASK_NONE}}, 514 {RC_REG_CLASS_XW, 1, 0, 0, 515 {RC_MASK_X | RC_MASK_W, 516 RC_MASK_NONE, 517 RC_MASK_NONE}}, 518 {RC_REG_CLASS_YW, 1, 0, 0, 519 {RC_MASK_Y | RC_MASK_W, 520 RC_MASK_NONE, 521 RC_MASK_NONE}}, 522 {RC_REG_CLASS_ZW, 1, 0, 0, 523 {RC_MASK_Z | RC_MASK_W, 524 RC_MASK_NONE, 525 RC_MASK_NONE}}, 526 {RC_REG_CLASS_XYW, 1, 0, 0, 527 {RC_MASK_X | RC_MASK_Y | RC_MASK_W, 528 RC_MASK_NONE, 529 RC_MASK_NONE}}, 530 {RC_REG_CLASS_YZW, 1, 0, 0, 531 {RC_MASK_Y | RC_MASK_Z | RC_MASK_W, 532 RC_MASK_NONE, 533 RC_MASK_NONE}}, 534 {RC_REG_CLASS_XZW, 1, 0, 0, 535 {RC_MASK_X | RC_MASK_Z | RC_MASK_W, 536 RC_MASK_NONE, 537 RC_MASK_NONE}} 538 }; 539 540 unsigned int i, j, index, input_node, node_count, node_index; 541 unsigned int * node_classes; 542 unsigned int * input_classes; 543 struct rc_instruction * inst; 544 struct rc_list * var_ptr; 545 struct rc_list * variables; 546 struct ra_regs * regs; 547 struct ra_graph * graph; 548 549 /* Allocate the main ra data structure */ 550 regs = ra_alloc_reg_set(NULL, s->C->max_temp_regs * RC_MASK_XYZW); 551 552 /* Get list of program variables */ 553 variables = rc_get_variables(s->C); 554 node_count = rc_list_count(variables); 555 node_classes = memory_pool_malloc(&s->C->Pool, 556 node_count * sizeof(unsigned int)); 557 input_classes = memory_pool_malloc(&s->C->Pool, 558 s->NumInputs * sizeof(unsigned int)); 559 560 for (var_ptr = variables, node_index = 0; var_ptr; 561 var_ptr = var_ptr->Next, node_index++) { 562 unsigned int class_index; 563 /* Compute the live intervals */ 564 rc_variable_compute_live_intervals(var_ptr->Item); 565 566 class_index = variable_get_class(var_ptr->Item, rc_class_list); 567 568 /* If we haven't used this register class yet, mark it 569 * as used and allocate space for it. */ 570 if (!rc_class_list[class_index].Used) { 571 rc_class_list[class_index].Used = 1; 572 rc_class_list[class_index].Id = ra_alloc_reg_class(regs); 573 } 574 575 node_classes[node_index] = rc_class_list[class_index].Id; 576 } 577 578 579 /* Assign registers to the classes */ 580 for (i = 0; i < RC_REG_CLASS_COUNT; i++) { 581 struct rc_class class = rc_class_list[i]; 582 if (!class.Used) { 583 continue; 584 } 585 586 for (index = 0; index < s->C->max_temp_regs; index++) { 587 for (j = 0; j < class.WritemaskCount; j++) { 588 int reg_id = get_reg_id(index, 589 class.Writemasks[j]); 590 ra_class_add_reg(regs, class.Id, reg_id); 591 } 592 } 593 } 594 595 /* Add register conflicts */ 596 add_register_conflicts(regs, s->C->max_temp_regs); 597 598 /* Calculate live intervals for input registers */ 599 for (inst = s->C->Program.Instructions.Next; 600 inst != &s->C->Program.Instructions; 601 inst = inst->Next) { 602 rc_opcode op = rc_get_flow_control_inst(inst); 603 if (op == RC_OPCODE_BGNLOOP) { 604 struct rc_instruction * endloop = 605 rc_match_bgnloop(inst); 606 if (endloop->IP > s->LoopEnd) { 607 s->LoopEnd = endloop->IP; 608 } 609 } 610 rc_for_all_reads_mask(inst, scan_read_callback, s); 611 } 612 613 /* Create classes for input registers */ 614 for (i = 0; i < s->NumInputs; i++) { 615 unsigned int chan, class_id, writemask = 0; 616 for (chan = 0; chan < 4; chan++) { 617 if (s->Input[i].Live[chan].Used) { 618 writemask |= (1 << chan); 619 } 620 } 621 s->Input[i].Writemask = writemask; 622 if (!writemask) { 623 continue; 624 } 625 626 class_id = ra_alloc_reg_class(regs); 627 input_classes[i] = class_id; 628 ra_class_add_reg(regs, class_id, 629 get_reg_id(s->Input[i].Index, writemask)); 630 } 631 632 ra_set_finalize(regs); 633 634 graph = ra_alloc_interference_graph(regs, node_count + s->NumInputs); 635 636 /* Build the interference graph */ 637 for (var_ptr = variables, node_index = 0; var_ptr; 638 var_ptr = var_ptr->Next,node_index++) { 639 struct rc_list * a, * b; 640 unsigned int b_index; 641 642 ra_set_node_class(graph, node_index, node_classes[node_index]); 643 644 for (a = var_ptr, b = var_ptr->Next, b_index = node_index + 1; 645 b; b = b->Next, b_index++) { 646 struct rc_variable * var_a = a->Item; 647 while (var_a) { 648 struct rc_variable * var_b = b->Item; 649 while (var_b) { 650 if (overlap_live_intervals_array(var_a->Live, var_b->Live)) { 651 ra_add_node_interference(graph, 652 node_index, b_index); 653 } 654 var_b = var_b->Friend; 655 } 656 var_a = var_a->Friend; 657 } 658 } 659 } 660 661 /* Add input registers to the interference graph */ 662 for (i = 0, input_node = 0; i< s->NumInputs; i++) { 663 if (!s->Input[i].Writemask) { 664 continue; 665 } 666 ra_set_node_class(graph, node_count + input_node, 667 input_classes[i]); 668 for (var_ptr = variables, node_index = 0; 669 var_ptr; var_ptr = var_ptr->Next, node_index++) { 670 struct rc_variable * var = var_ptr->Item; 671 if (overlap_live_intervals_array(s->Input[i].Live, 672 var->Live)) { 673 ra_add_node_interference(graph, node_index, 674 node_count + input_node); 675 } 676 } 677 /* Manually allocate a register for this input */ 678 ra_set_node_reg(graph, node_count + input_node, get_reg_id( 679 s->Input[i].Index, s->Input[i].Writemask)); 680 input_node++; 681 } 682 683 if (!ra_allocate_no_spills(graph)) { 684 rc_error(s->C, "Ran out of hardware temporaries\n"); 685 return; 686 } 687 688 /* Rewrite the registers */ 689 for (var_ptr = variables, node_index = 0; var_ptr; 690 var_ptr = var_ptr->Next, node_index++) { 691 int reg = ra_get_node_reg(graph, node_index); 692 unsigned int writemask = reg_get_writemask(reg); 693 unsigned int index = reg_get_index(reg); 694 struct rc_variable * var = var_ptr->Item; 695 696 if (!s->C->is_r500 && var->Inst->Type == RC_INSTRUCTION_NORMAL) { 697 writemask = rc_variable_writemask_sum(var); 698 } 699 700 if (var->Dst.File == RC_FILE_INPUT) { 701 continue; 702 } 703 rc_variable_change_dst(var, index, writemask); 704 } 705 706 ralloc_free(graph); 707 ralloc_free(regs); 708} 709 710/** 711 * @param user This parameter should be a pointer to an integer value. If this 712 * integer value is zero, then a simple register allocator will be used that 713 * only allocates space for input registers (\sa do_regalloc_inputs_only). If 714 * user is non-zero, then the regular register allocator will be used 715 * (\sa do_regalloc). 716 */ 717void rc_pair_regalloc(struct radeon_compiler *cc, void *user) 718{ 719 struct r300_fragment_program_compiler *c = 720 (struct r300_fragment_program_compiler*)cc; 721 struct regalloc_state s; 722 int * do_full_regalloc = (int*)user; 723 724 memset(&s, 0, sizeof(s)); 725 s.C = cc; 726 s.NumInputs = rc_get_max_index(cc, RC_FILE_INPUT) + 1; 727 s.Input = memory_pool_malloc(&cc->Pool, 728 s.NumInputs * sizeof(struct register_info)); 729 memset(s.Input, 0, s.NumInputs * sizeof(struct register_info)); 730 731 s.NumTemporaries = rc_get_max_index(cc, RC_FILE_TEMPORARY) + 1; 732 s.Temporary = memory_pool_malloc(&cc->Pool, 733 s.NumTemporaries * sizeof(struct register_info)); 734 memset(s.Temporary, 0, s.NumTemporaries * sizeof(struct register_info)); 735 736 rc_recompute_ips(s.C); 737 738 c->AllocateHwInputs(c, &alloc_input_simple, &s); 739 if (*do_full_regalloc) { 740 do_advanced_regalloc(&s); 741 } else { 742 s.Simple = 1; 743 do_regalloc_inputs_only(&s); 744 } 745 746 /* Rewrite inputs and if we are doing the simple allocation, rewrite 747 * temporaries too. */ 748 for (struct rc_instruction *inst = s.C->Program.Instructions.Next; 749 inst != &s.C->Program.Instructions; 750 inst = inst->Next) { 751 rc_remap_registers(inst, &remap_register, &s); 752 } 753} 754