1/* 2 * Copyright (C) 2009 Nicolai Haehnle. 3 * 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining 7 * a copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sublicense, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * The above copyright notice and this permission notice (including the 15 * next paragraph) shall be included in all copies or substantial 16 * portions of the Software. 17 * 18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 19 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 20 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 21 * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 22 * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 23 * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 24 * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 25 * 26 */ 27 28#include "radeon_program_pair.h" 29 30#include "radeon_compiler.h" 31#include "radeon_compiler_util.h" 32 33 34/** 35 * Finally rewrite ADD, MOV, MUL as the appropriate native instruction 36 * and reverse the order of arguments for CMP. 37 */ 38static void final_rewrite(struct rc_sub_instruction *inst) 39{ 40 struct rc_src_register tmp; 41 42 switch(inst->Opcode) { 43 case RC_OPCODE_ADD: 44 inst->SrcReg[2] = inst->SrcReg[1]; 45 inst->SrcReg[1].File = RC_FILE_NONE; 46 inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111; 47 inst->SrcReg[1].Negate = RC_MASK_NONE; 48 inst->Opcode = RC_OPCODE_MAD; 49 break; 50 case RC_OPCODE_CMP: 51 tmp = inst->SrcReg[2]; 52 inst->SrcReg[2] = inst->SrcReg[0]; 53 inst->SrcReg[0] = tmp; 54 break; 55 case RC_OPCODE_MOV: 56 /* AMD say we should use CMP. 57 * However, when we transform 58 * KIL -r0; 59 * into 60 * CMP tmp, -r0, -r0, 0; 61 * KIL tmp; 62 * we get incorrect behaviour on R500 when r0 == 0.0. 63 * It appears that the R500 KIL hardware treats -0.0 as less 64 * than zero. 65 */ 66 inst->SrcReg[1].File = RC_FILE_NONE; 67 inst->SrcReg[1].Swizzle = RC_SWIZZLE_1111; 68 inst->SrcReg[2].File = RC_FILE_NONE; 69 inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000; 70 inst->Opcode = RC_OPCODE_MAD; 71 break; 72 case RC_OPCODE_MUL: 73 inst->SrcReg[2].File = RC_FILE_NONE; 74 inst->SrcReg[2].Swizzle = RC_SWIZZLE_0000; 75 inst->Opcode = RC_OPCODE_MAD; 76 break; 77 default: 78 /* nothing to do */ 79 break; 80 } 81} 82 83 84/** 85 * Classify an instruction according to which ALUs etc. it needs 86 */ 87static void classify_instruction(struct rc_sub_instruction * inst, 88 int * needrgb, int * needalpha, int * istranscendent) 89{ 90 *needrgb = (inst->DstReg.WriteMask & RC_MASK_XYZ) ? 1 : 0; 91 *needalpha = (inst->DstReg.WriteMask & RC_MASK_W) ? 1 : 0; 92 *istranscendent = 0; 93 94 if (inst->WriteALUResult == RC_ALURESULT_X) 95 *needrgb = 1; 96 else if (inst->WriteALUResult == RC_ALURESULT_W) 97 *needalpha = 1; 98 99 switch(inst->Opcode) { 100 case RC_OPCODE_ADD: 101 case RC_OPCODE_CMP: 102 case RC_OPCODE_CND: 103 case RC_OPCODE_DDX: 104 case RC_OPCODE_DDY: 105 case RC_OPCODE_FRC: 106 case RC_OPCODE_MAD: 107 case RC_OPCODE_MAX: 108 case RC_OPCODE_MIN: 109 case RC_OPCODE_MOV: 110 case RC_OPCODE_MUL: 111 break; 112 case RC_OPCODE_COS: 113 case RC_OPCODE_EX2: 114 case RC_OPCODE_LG2: 115 case RC_OPCODE_RCP: 116 case RC_OPCODE_RSQ: 117 case RC_OPCODE_SIN: 118 *istranscendent = 1; 119 *needalpha = 1; 120 break; 121 case RC_OPCODE_DP4: 122 *needalpha = 1; 123 /* fall through */ 124 case RC_OPCODE_DP3: 125 *needrgb = 1; 126 break; 127 default: 128 break; 129 } 130} 131 132static void src_uses(struct rc_src_register src, unsigned int * rgb, 133 unsigned int * alpha) 134{ 135 int j; 136 for(j = 0; j < 4; ++j) { 137 unsigned int swz = GET_SWZ(src.Swizzle, j); 138 if (swz < 3) 139 *rgb = 1; 140 else if (swz < 4) 141 *alpha = 1; 142 } 143} 144 145/** 146 * Fill the given ALU instruction's opcodes and source operands into the given pair, 147 * if possible. 148 */ 149static void set_pair_instruction(struct r300_fragment_program_compiler *c, 150 struct rc_pair_instruction * pair, 151 struct rc_sub_instruction * inst) 152{ 153 int needrgb, needalpha, istranscendent; 154 const struct rc_opcode_info * opcode; 155 int i; 156 157 memset(pair, 0, sizeof(struct rc_pair_instruction)); 158 159 classify_instruction(inst, &needrgb, &needalpha, &istranscendent); 160 161 if (needrgb) { 162 if (istranscendent) 163 pair->RGB.Opcode = RC_OPCODE_REPL_ALPHA; 164 else 165 pair->RGB.Opcode = inst->Opcode; 166 if (inst->SaturateMode == RC_SATURATE_ZERO_ONE) 167 pair->RGB.Saturate = 1; 168 } 169 if (needalpha) { 170 pair->Alpha.Opcode = inst->Opcode; 171 if (inst->SaturateMode == RC_SATURATE_ZERO_ONE) 172 pair->Alpha.Saturate = 1; 173 } 174 175 opcode = rc_get_opcode_info(inst->Opcode); 176 177 /* Presubtract handling: 178 * We need to make sure that the values used by the presubtract 179 * operation end up in src0 or src1. */ 180 if(inst->PreSub.Opcode != RC_PRESUB_NONE) { 181 /* rc_pair_alloc_source() will fill in data for 182 * pair->{RGB,ALPHA}.Src[RC_PAIR_PRESUB_SRC] */ 183 int j; 184 for(j = 0; j < 3; j++) { 185 int src_regs; 186 if(inst->SrcReg[j].File != RC_FILE_PRESUB) 187 continue; 188 189 src_regs = rc_presubtract_src_reg_count( 190 inst->PreSub.Opcode); 191 for(i = 0; i < src_regs; i++) { 192 unsigned int rgb = 0; 193 unsigned int alpha = 0; 194 src_uses(inst->SrcReg[j], &rgb, &alpha); 195 if(rgb) { 196 pair->RGB.Src[i].File = 197 inst->PreSub.SrcReg[i].File; 198 pair->RGB.Src[i].Index = 199 inst->PreSub.SrcReg[i].Index; 200 pair->RGB.Src[i].Used = 1; 201 } 202 if(alpha) { 203 pair->Alpha.Src[i].File = 204 inst->PreSub.SrcReg[i].File; 205 pair->Alpha.Src[i].Index = 206 inst->PreSub.SrcReg[i].Index; 207 pair->Alpha.Src[i].Used = 1; 208 } 209 } 210 } 211 } 212 213 for(i = 0; i < opcode->NumSrcRegs; ++i) { 214 int source; 215 if (needrgb && !istranscendent) { 216 unsigned int srcrgb = 0; 217 unsigned int srcalpha = 0; 218 unsigned int srcmask = 0; 219 int j; 220 /* We don't care about the alpha channel here. We only 221 * want the part of the swizzle that writes to rgb, 222 * since we are creating an rgb instruction. */ 223 for(j = 0; j < 3; ++j) { 224 unsigned int swz = GET_SWZ(inst->SrcReg[i].Swizzle, j); 225 226 if (swz < RC_SWIZZLE_W) 227 srcrgb = 1; 228 else if (swz == RC_SWIZZLE_W) 229 srcalpha = 1; 230 231 if (swz < RC_SWIZZLE_UNUSED) 232 srcmask |= 1 << j; 233 } 234 source = rc_pair_alloc_source(pair, srcrgb, srcalpha, 235 inst->SrcReg[i].File, inst->SrcReg[i].Index); 236 if (source < 0) { 237 rc_error(&c->Base, "Failed to translate " 238 "rgb instruction.\n"); 239 return; 240 } 241 pair->RGB.Arg[i].Source = source; 242 pair->RGB.Arg[i].Swizzle = 243 rc_init_swizzle(inst->SrcReg[i].Swizzle, 3); 244 pair->RGB.Arg[i].Abs = inst->SrcReg[i].Abs; 245 pair->RGB.Arg[i].Negate = !!(srcmask & inst->SrcReg[i].Negate & (RC_MASK_X | RC_MASK_Y | RC_MASK_Z)); 246 } 247 if (needalpha) { 248 unsigned int srcrgb = 0; 249 unsigned int srcalpha = 0; 250 unsigned int swz; 251 if (istranscendent) { 252 swz = rc_get_scalar_src_swz(inst->SrcReg[i].Swizzle); 253 } else { 254 swz = GET_SWZ(inst->SrcReg[i].Swizzle, 3); 255 } 256 257 if (swz < 3) 258 srcrgb = 1; 259 else if (swz < 4) 260 srcalpha = 1; 261 source = rc_pair_alloc_source(pair, srcrgb, srcalpha, 262 inst->SrcReg[i].File, inst->SrcReg[i].Index); 263 if (source < 0) { 264 rc_error(&c->Base, "Failed to translate " 265 "alpha instruction.\n"); 266 return; 267 } 268 pair->Alpha.Arg[i].Source = source; 269 pair->Alpha.Arg[i].Swizzle = rc_init_swizzle(swz, 1); 270 pair->Alpha.Arg[i].Abs = inst->SrcReg[i].Abs; 271 272 if (istranscendent) { 273 pair->Alpha.Arg[i].Negate = 274 !!(inst->SrcReg[i].Negate & 275 inst->DstReg.WriteMask); 276 } else { 277 pair->Alpha.Arg[i].Negate = 278 !!(inst->SrcReg[i].Negate & RC_MASK_W); 279 } 280 } 281 } 282 283 /* Destination handling */ 284 if (inst->DstReg.File == RC_FILE_OUTPUT) { 285 if (inst->DstReg.Index == c->OutputDepth) { 286 pair->Alpha.DepthWriteMask |= GET_BIT(inst->DstReg.WriteMask, 3); 287 } else { 288 for (i = 0; i < 4; i++) { 289 if (inst->DstReg.Index == c->OutputColor[i]) { 290 pair->RGB.Target = i; 291 pair->Alpha.Target = i; 292 pair->RGB.OutputWriteMask |= 293 inst->DstReg.WriteMask & RC_MASK_XYZ; 294 pair->Alpha.OutputWriteMask |= 295 GET_BIT(inst->DstReg.WriteMask, 3); 296 break; 297 } 298 } 299 } 300 } else { 301 if (needrgb) { 302 pair->RGB.DestIndex = inst->DstReg.Index; 303 pair->RGB.WriteMask |= inst->DstReg.WriteMask & RC_MASK_XYZ; 304 } 305 306 if (needalpha) { 307 pair->Alpha.WriteMask |= (GET_BIT(inst->DstReg.WriteMask, 3) << 3); 308 if (pair->Alpha.WriteMask) { 309 pair->Alpha.DestIndex = inst->DstReg.Index; 310 } 311 } 312 } 313 314 if (needrgb) { 315 pair->RGB.Omod = inst->Omod; 316 } 317 if (needalpha) { 318 pair->Alpha.Omod = inst->Omod; 319 } 320 321 if (inst->WriteALUResult) { 322 pair->WriteALUResult = inst->WriteALUResult; 323 pair->ALUResultCompare = inst->ALUResultCompare; 324 } 325} 326 327 328static void check_opcode_support(struct r300_fragment_program_compiler *c, 329 struct rc_sub_instruction *inst) 330{ 331 const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->Opcode); 332 333 if (opcode->HasDstReg) { 334 if (inst->SaturateMode == RC_SATURATE_MINUS_PLUS_ONE) { 335 rc_error(&c->Base, "Fragment program does not support signed Saturate.\n"); 336 return; 337 } 338 } 339 340 for (unsigned i = 0; i < opcode->NumSrcRegs; i++) { 341 if (inst->SrcReg[i].RelAddr) { 342 rc_error(&c->Base, "Fragment program does not support relative addressing " 343 " of source operands.\n"); 344 return; 345 } 346 } 347} 348 349 350/** 351 * Translate all ALU instructions into corresponding pair instructions, 352 * performing no other changes. 353 */ 354void rc_pair_translate(struct radeon_compiler *cc, void *user) 355{ 356 struct r300_fragment_program_compiler *c = (struct r300_fragment_program_compiler*)cc; 357 358 for(struct rc_instruction * inst = c->Base.Program.Instructions.Next; 359 inst != &c->Base.Program.Instructions; 360 inst = inst->Next) { 361 const struct rc_opcode_info * opcode; 362 struct rc_sub_instruction copy; 363 364 if (inst->Type != RC_INSTRUCTION_NORMAL) 365 continue; 366 367 opcode = rc_get_opcode_info(inst->U.I.Opcode); 368 369 if (opcode->HasTexture || opcode->IsFlowControl || opcode->Opcode == RC_OPCODE_KIL) 370 continue; 371 372 copy = inst->U.I; 373 374 check_opcode_support(c, ©); 375 376 final_rewrite(©); 377 inst->Type = RC_INSTRUCTION_PAIR; 378 set_pair_instruction(c, &inst->U.P, ©); 379 } 380} 381