1/* 2 * Copyright 2009 Nicolai Hähnle <nhaehnle@gmail.com> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. */ 22 23#include "radeon_compiler.h" 24 25#include <stdarg.h> 26#include <stdio.h> 27#include <stdlib.h> 28 29#include "radeon_dataflow.h" 30#include "radeon_program.h" 31#include "radeon_program_pair.h" 32#include "radeon_regalloc.h" 33#include "radeon_compiler_util.h" 34 35 36void rc_init(struct radeon_compiler * c, const struct rc_regalloc_state *rs) 37{ 38 memset(c, 0, sizeof(*c)); 39 40 memory_pool_init(&c->Pool); 41 c->Program.Instructions.Prev = &c->Program.Instructions; 42 c->Program.Instructions.Next = &c->Program.Instructions; 43 c->Program.Instructions.U.I.Opcode = RC_OPCODE_ILLEGAL_OPCODE; 44 c->regalloc_state = rs; 45} 46 47void rc_destroy(struct radeon_compiler * c) 48{ 49 rc_constants_destroy(&c->Program.Constants); 50 memory_pool_destroy(&c->Pool); 51 free(c->ErrorMsg); 52} 53 54void rc_debug(struct radeon_compiler * c, const char * fmt, ...) 55{ 56 va_list ap; 57 58 if (!(c->Debug & RC_DBG_LOG)) 59 return; 60 61 va_start(ap, fmt); 62 vfprintf(stderr, fmt, ap); 63 va_end(ap); 64} 65 66void rc_error(struct radeon_compiler * c, const char * fmt, ...) 67{ 68 va_list ap; 69 70 c->Error = 1; 71 72 if (!c->ErrorMsg) { 73 /* Only remember the first error */ 74 char buf[1024]; 75 int written; 76 77 va_start(ap, fmt); 78 written = vsnprintf(buf, sizeof(buf), fmt, ap); 79 va_end(ap); 80 81 if (written < sizeof(buf)) { 82 c->ErrorMsg = strdup(buf); 83 } else { 84 c->ErrorMsg = malloc(written + 1); 85 86 va_start(ap, fmt); 87 vsnprintf(c->ErrorMsg, written + 1, fmt, ap); 88 va_end(ap); 89 } 90 } 91 92 if (c->Debug & RC_DBG_LOG) { 93 fprintf(stderr, "r300compiler error: "); 94 95 va_start(ap, fmt); 96 vfprintf(stderr, fmt, ap); 97 va_end(ap); 98 } 99} 100 101int rc_if_fail_helper(struct radeon_compiler * c, const char * file, int line, const char * assertion) 102{ 103 rc_error(c, "ICE at %s:%i: assertion failed: %s\n", file, line, assertion); 104 return 1; 105} 106 107/** 108 * Recompute c->Program.InputsRead and c->Program.OutputsWritten 109 * based on which inputs and outputs are actually referenced 110 * in program instructions. 111 */ 112void rc_calculate_inputs_outputs(struct radeon_compiler * c) 113{ 114 struct rc_instruction *inst; 115 116 c->Program.InputsRead = 0; 117 c->Program.OutputsWritten = 0; 118 119 for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) 120 { 121 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); 122 int i; 123 124 for (i = 0; i < opcode->NumSrcRegs; ++i) { 125 if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT) 126 c->Program.InputsRead |= 1 << inst->U.I.SrcReg[i].Index; 127 } 128 129 if (opcode->HasDstReg) { 130 if (inst->U.I.DstReg.File == RC_FILE_OUTPUT) 131 c->Program.OutputsWritten |= 1 << inst->U.I.DstReg.Index; 132 } 133 } 134} 135 136/** 137 * Rewrite the program such that everything that source the given input 138 * register will source new_input instead. 139 */ 140void rc_move_input(struct radeon_compiler * c, unsigned input, struct rc_src_register new_input) 141{ 142 struct rc_instruction * inst; 143 144 c->Program.InputsRead &= ~(1 << input); 145 146 for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { 147 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); 148 unsigned i; 149 150 for(i = 0; i < opcode->NumSrcRegs; ++i) { 151 if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && inst->U.I.SrcReg[i].Index == input) { 152 inst->U.I.SrcReg[i].File = new_input.File; 153 inst->U.I.SrcReg[i].Index = new_input.Index; 154 inst->U.I.SrcReg[i].Swizzle = combine_swizzles(new_input.Swizzle, inst->U.I.SrcReg[i].Swizzle); 155 if (!inst->U.I.SrcReg[i].Abs) { 156 inst->U.I.SrcReg[i].Negate ^= new_input.Negate; 157 inst->U.I.SrcReg[i].Abs = new_input.Abs; 158 } 159 160 c->Program.InputsRead |= 1 << new_input.Index; 161 } 162 } 163 } 164} 165 166 167/** 168 * Rewrite the program such that everything that writes into the given 169 * output register will instead write to new_output. The new_output 170 * writemask is honoured. 171 */ 172void rc_move_output(struct radeon_compiler * c, unsigned output, unsigned new_output, unsigned writemask) 173{ 174 struct rc_instruction * inst; 175 176 c->Program.OutputsWritten &= ~(1 << output); 177 178 for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { 179 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); 180 181 if (opcode->HasDstReg) { 182 if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) { 183 inst->U.I.DstReg.Index = new_output; 184 inst->U.I.DstReg.WriteMask &= writemask; 185 186 c->Program.OutputsWritten |= 1 << new_output; 187 } 188 } 189 } 190} 191 192 193/** 194 * Rewrite the program such that a given output is duplicated. 195 */ 196void rc_copy_output(struct radeon_compiler * c, unsigned output, unsigned dup_output) 197{ 198 unsigned tempreg = rc_find_free_temporary(c); 199 struct rc_instruction * inst; 200 201 for(inst = c->Program.Instructions.Next; inst != &c->Program.Instructions; inst = inst->Next) { 202 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); 203 204 if (opcode->HasDstReg) { 205 if (inst->U.I.DstReg.File == RC_FILE_OUTPUT && inst->U.I.DstReg.Index == output) { 206 inst->U.I.DstReg.File = RC_FILE_TEMPORARY; 207 inst->U.I.DstReg.Index = tempreg; 208 } 209 } 210 } 211 212 inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev); 213 inst->U.I.Opcode = RC_OPCODE_MOV; 214 inst->U.I.DstReg.File = RC_FILE_OUTPUT; 215 inst->U.I.DstReg.Index = output; 216 217 inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; 218 inst->U.I.SrcReg[0].Index = tempreg; 219 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; 220 221 inst = rc_insert_new_instruction(c, c->Program.Instructions.Prev); 222 inst->U.I.Opcode = RC_OPCODE_MOV; 223 inst->U.I.DstReg.File = RC_FILE_OUTPUT; 224 inst->U.I.DstReg.Index = dup_output; 225 226 inst->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; 227 inst->U.I.SrcReg[0].Index = tempreg; 228 inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZW; 229 230 c->Program.OutputsWritten |= 1 << dup_output; 231} 232 233 234/** 235 * Introduce standard code fragment to deal with fragment.position. 236 */ 237void rc_transform_fragment_wpos(struct radeon_compiler * c, unsigned wpos, unsigned new_input, 238 int full_vtransform) 239{ 240 unsigned tempregi = rc_find_free_temporary(c); 241 struct rc_instruction * inst_rcp; 242 struct rc_instruction * inst_mul; 243 struct rc_instruction * inst_mad; 244 struct rc_instruction * inst; 245 246 c->Program.InputsRead &= ~(1 << wpos); 247 c->Program.InputsRead |= 1 << new_input; 248 249 /* perspective divide */ 250 inst_rcp = rc_insert_new_instruction(c, &c->Program.Instructions); 251 inst_rcp->U.I.Opcode = RC_OPCODE_RCP; 252 253 inst_rcp->U.I.DstReg.File = RC_FILE_TEMPORARY; 254 inst_rcp->U.I.DstReg.Index = tempregi; 255 inst_rcp->U.I.DstReg.WriteMask = RC_MASK_W; 256 257 inst_rcp->U.I.SrcReg[0].File = RC_FILE_INPUT; 258 inst_rcp->U.I.SrcReg[0].Index = new_input; 259 inst_rcp->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_WWWW; 260 261 inst_mul = rc_insert_new_instruction(c, inst_rcp); 262 inst_mul->U.I.Opcode = RC_OPCODE_MUL; 263 264 inst_mul->U.I.DstReg.File = RC_FILE_TEMPORARY; 265 inst_mul->U.I.DstReg.Index = tempregi; 266 inst_mul->U.I.DstReg.WriteMask = RC_MASK_XYZ; 267 268 inst_mul->U.I.SrcReg[0].File = RC_FILE_INPUT; 269 inst_mul->U.I.SrcReg[0].Index = new_input; 270 271 inst_mul->U.I.SrcReg[1].File = RC_FILE_TEMPORARY; 272 inst_mul->U.I.SrcReg[1].Index = tempregi; 273 inst_mul->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_WWWW; 274 275 /* viewport transformation */ 276 inst_mad = rc_insert_new_instruction(c, inst_mul); 277 inst_mad->U.I.Opcode = RC_OPCODE_MAD; 278 279 inst_mad->U.I.DstReg.File = RC_FILE_TEMPORARY; 280 inst_mad->U.I.DstReg.Index = tempregi; 281 inst_mad->U.I.DstReg.WriteMask = RC_MASK_XYZ; 282 283 inst_mad->U.I.SrcReg[0].File = RC_FILE_TEMPORARY; 284 inst_mad->U.I.SrcReg[0].Index = tempregi; 285 inst_mad->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_XYZ0; 286 287 inst_mad->U.I.SrcReg[1].File = RC_FILE_CONSTANT; 288 inst_mad->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XYZ0; 289 290 inst_mad->U.I.SrcReg[2].File = RC_FILE_CONSTANT; 291 inst_mad->U.I.SrcReg[2].Swizzle = RC_SWIZZLE_XYZ0; 292 293 if (full_vtransform) { 294 inst_mad->U.I.SrcReg[1].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_SCALE, 0); 295 inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_VIEWPORT_OFFSET, 0); 296 } else { 297 inst_mad->U.I.SrcReg[1].Index = 298 inst_mad->U.I.SrcReg[2].Index = rc_constants_add_state(&c->Program.Constants, RC_STATE_R300_WINDOW_DIMENSION, 0); 299 } 300 301 for (inst = inst_mad->Next; inst != &c->Program.Instructions; inst = inst->Next) { 302 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); 303 unsigned i; 304 305 for(i = 0; i < opcode->NumSrcRegs; i++) { 306 if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && 307 inst->U.I.SrcReg[i].Index == wpos) { 308 inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY; 309 inst->U.I.SrcReg[i].Index = tempregi; 310 } 311 } 312 } 313} 314 315 316/** 317 * The FACE input in hardware contains 1 if it's a back face, 0 otherwise. 318 * Gallium and OpenGL define it the other way around. 319 * 320 * So let's just negate FACE at the beginning of the shader and rewrite the rest 321 * of the shader to read from the newly allocated temporary. 322 */ 323void rc_transform_fragment_face(struct radeon_compiler *c, unsigned face) 324{ 325 unsigned tempregi = rc_find_free_temporary(c); 326 struct rc_instruction *inst_add; 327 struct rc_instruction *inst; 328 329 /* perspective divide */ 330 inst_add = rc_insert_new_instruction(c, &c->Program.Instructions); 331 inst_add->U.I.Opcode = RC_OPCODE_ADD; 332 333 inst_add->U.I.DstReg.File = RC_FILE_TEMPORARY; 334 inst_add->U.I.DstReg.Index = tempregi; 335 inst_add->U.I.DstReg.WriteMask = RC_MASK_X; 336 337 inst_add->U.I.SrcReg[0].File = RC_FILE_NONE; 338 inst_add->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_1111; 339 340 inst_add->U.I.SrcReg[1].File = RC_FILE_INPUT; 341 inst_add->U.I.SrcReg[1].Index = face; 342 inst_add->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_XXXX; 343 inst_add->U.I.SrcReg[1].Negate = RC_MASK_XYZW; 344 345 for (inst = inst_add->Next; inst != &c->Program.Instructions; inst = inst->Next) { 346 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); 347 unsigned i; 348 349 for(i = 0; i < opcode->NumSrcRegs; i++) { 350 if (inst->U.I.SrcReg[i].File == RC_FILE_INPUT && 351 inst->U.I.SrcReg[i].Index == face) { 352 inst->U.I.SrcReg[i].File = RC_FILE_TEMPORARY; 353 inst->U.I.SrcReg[i].Index = tempregi; 354 } 355 } 356 } 357} 358 359static void reg_count_callback(void * userdata, struct rc_instruction * inst, 360 rc_register_file file, unsigned int index, unsigned int mask) 361{ 362 struct rc_program_stats *s = userdata; 363 if (file == RC_FILE_TEMPORARY) 364 (int)index > s->num_temp_regs ? s->num_temp_regs = index : 0; 365 if (file == RC_FILE_INLINE) 366 s->num_inline_literals++; 367} 368 369void rc_get_stats(struct radeon_compiler *c, struct rc_program_stats *s) 370{ 371 struct rc_instruction * tmp; 372 memset(s, 0, sizeof(*s)); 373 374 for(tmp = c->Program.Instructions.Next; tmp != &c->Program.Instructions; 375 tmp = tmp->Next){ 376 const struct rc_opcode_info * info; 377 rc_for_all_reads_mask(tmp, reg_count_callback, s); 378 if (tmp->Type == RC_INSTRUCTION_NORMAL) { 379 info = rc_get_opcode_info(tmp->U.I.Opcode); 380 if (info->Opcode == RC_OPCODE_BEGIN_TEX) 381 continue; 382 if (tmp->U.I.PreSub.Opcode != RC_PRESUB_NONE) 383 s->num_presub_ops++; 384 } else { 385 if (tmp->U.P.RGB.Src[RC_PAIR_PRESUB_SRC].Used) 386 s->num_presub_ops++; 387 if (tmp->U.P.Alpha.Src[RC_PAIR_PRESUB_SRC].Used) 388 s->num_presub_ops++; 389 /* Assuming alpha will never be a flow control or 390 * a tex instruction. */ 391 if (tmp->U.P.Alpha.Opcode != RC_OPCODE_NOP) 392 s->num_alpha_insts++; 393 if (tmp->U.P.RGB.Opcode != RC_OPCODE_NOP) 394 s->num_rgb_insts++; 395 if (tmp->U.P.RGB.Omod != RC_OMOD_MUL_1 && 396 tmp->U.P.RGB.Omod != RC_OMOD_DISABLE) { 397 s->num_omod_ops++; 398 } 399 if (tmp->U.P.Alpha.Omod != RC_OMOD_MUL_1 && 400 tmp->U.P.Alpha.Omod != RC_OMOD_DISABLE) { 401 s->num_omod_ops++; 402 } 403 info = rc_get_opcode_info(tmp->U.P.RGB.Opcode); 404 } 405 if (info->IsFlowControl) 406 s->num_fc_insts++; 407 if (info->HasTexture) 408 s->num_tex_insts++; 409 s->num_insts++; 410 } 411 /* Increment here because the reg_count_callback store the max 412 * temporary reg index in s->nun_temp_regs. */ 413 s->num_temp_regs++; 414} 415 416static void print_stats(struct radeon_compiler * c) 417{ 418 struct rc_program_stats s; 419 420 if (c->initial_num_insts <= 5) 421 return; 422 423 rc_get_stats(c, &s); 424 425 switch (c->type) { 426 case RC_VERTEX_PROGRAM: 427 fprintf(stderr,"~~~~~~~~~ VERTEX PROGRAM ~~~~~~~~\n" 428 "~%4u Instructions\n" 429 "~%4u Flow Control Instructions\n" 430 "~%4u Temporary Registers\n" 431 "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n", 432 s.num_insts, s.num_fc_insts, s.num_temp_regs); 433 break; 434 435 case RC_FRAGMENT_PROGRAM: 436 fprintf(stderr,"~~~~~~~~ FRAGMENT PROGRAM ~~~~~~~\n" 437 "~%4u Instructions\n" 438 "~%4u Vector Instructions (RGB)\n" 439 "~%4u Scalar Instructions (Alpha)\n" 440 "~%4u Flow Control Instructions\n" 441 "~%4u Texture Instructions\n" 442 "~%4u Presub Operations\n" 443 "~%4u OMOD Operations\n" 444 "~%4u Temporary Registers\n" 445 "~%4u Inline Literals\n" 446 "~~~~~~~~~~~~~~ END ~~~~~~~~~~~~~~\n", 447 s.num_insts, s.num_rgb_insts, s.num_alpha_insts, 448 s.num_fc_insts, s.num_tex_insts, s.num_presub_ops, 449 s.num_omod_ops, s.num_temp_regs, s.num_inline_literals); 450 break; 451 default: 452 assert(0); 453 } 454} 455 456static const char *shader_name[RC_NUM_PROGRAM_TYPES] = { 457 "Vertex Program", 458 "Fragment Program" 459}; 460 461void rc_run_compiler_passes(struct radeon_compiler *c, struct radeon_compiler_pass *list) 462{ 463 for (unsigned i = 0; list[i].name; i++) { 464 if (list[i].predicate) { 465 list[i].run(c, list[i].user); 466 467 if (c->Error) 468 return; 469 470 if ((c->Debug & RC_DBG_LOG) && list[i].dump) { 471 fprintf(stderr, "%s: after '%s'\n", shader_name[c->type], list[i].name); 472 rc_print_program(&c->Program); 473 } 474 } 475 } 476} 477 478/* Executes a list of compiler passes given in the parameter 'list'. */ 479void rc_run_compiler(struct radeon_compiler *c, struct radeon_compiler_pass *list) 480{ 481 struct rc_program_stats s; 482 483 rc_get_stats(c, &s); 484 c->initial_num_insts = s.num_insts; 485 486 if (c->Debug & RC_DBG_LOG) { 487 fprintf(stderr, "%s: before compilation\n", shader_name[c->type]); 488 rc_print_program(&c->Program); 489 } 490 491 rc_run_compiler_passes(c, list); 492 493 if (c->Debug & RC_DBG_STATS) 494 print_stats(c); 495} 496 497void rc_validate_final_shader(struct radeon_compiler *c, void *user) 498{ 499 /* Check the number of constants. */ 500 if (c->Program.Constants.Count > c->max_constants) { 501 rc_error(c, "Too many constants. Max: %i, Got: %i\n", 502 c->max_constants, c->Program.Constants.Count); 503 } 504} 505