radeon_program_alu.c revision 3d32e589879806297258e36ea80aae5044293ca3
11c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák/* 21c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * Copyright (C) 2008 Nicolai Haehnle. 31c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * 41c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * All Rights Reserved. 51c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * 61c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * Permission is hereby granted, free of charge, to any person obtaining 71c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * a copy of this software and associated documentation files (the 81c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * "Software"), to deal in the Software without restriction, including 91c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * without limitation the rights to use, copy, modify, merge, publish, 101c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * distribute, sublicense, and/or sell copies of the Software, and to 111c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * permit persons to whom the Software is furnished to do so, subject to 121c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * the following conditions: 131c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * 141c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * The above copyright notice and this permission notice (including the 151c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * next paragraph) shall be included in all copies or substantial 161c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * portions of the Software. 171c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * 181c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 191c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 201c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 211c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 221c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 231c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 241c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 251c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * 261c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák */ 271c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 281c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák/** 291c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * @file 301c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * 311c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * Shareable transformations that transform "special" ALU instructions 321c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * into ALU instructions that are supported by hardware. 331c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * 341c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák */ 351c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 361c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák#include "radeon_program_alu.h" 371c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 381c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák#include "radeon_compiler.h" 391c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák#include "radeon_compiler_util.h" 401c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 411c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 421c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic struct rc_instruction *emit1( 431c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct radeon_compiler * c, struct rc_instruction * after, 441c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg, 451c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_src_register SrcReg) 461c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 471c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction *fpi = rc_insert_new_instruction(c, after); 481c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 491c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák fpi->U.I.Opcode = Opcode; 501c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák fpi->U.I.SaturateMode = Saturate; 511c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák fpi->U.I.DstReg = DstReg; 521c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák fpi->U.I.SrcReg[0] = SrcReg; 531c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return fpi; 541c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 551c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 561c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic struct rc_instruction *emit2( 571c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct radeon_compiler * c, struct rc_instruction * after, 581c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg, 591c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_src_register SrcReg0, struct rc_src_register SrcReg1) 601c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 611c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction *fpi = rc_insert_new_instruction(c, after); 621c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 631c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák fpi->U.I.Opcode = Opcode; 641c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák fpi->U.I.SaturateMode = Saturate; 651c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák fpi->U.I.DstReg = DstReg; 661c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák fpi->U.I.SrcReg[0] = SrcReg0; 671c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák fpi->U.I.SrcReg[1] = SrcReg1; 681c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return fpi; 691c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 701c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 711c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic struct rc_instruction *emit3( 721c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct radeon_compiler * c, struct rc_instruction * after, 731c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg, 741c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_src_register SrcReg0, struct rc_src_register SrcReg1, 751c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_src_register SrcReg2) 761c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 771c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction *fpi = rc_insert_new_instruction(c, after); 781c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 791c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák fpi->U.I.Opcode = Opcode; 801c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák fpi->U.I.SaturateMode = Saturate; 811c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák fpi->U.I.DstReg = DstReg; 821c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák fpi->U.I.SrcReg[0] = SrcReg0; 831c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák fpi->U.I.SrcReg[1] = SrcReg1; 841c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák fpi->U.I.SrcReg[2] = SrcReg2; 851c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return fpi; 861c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 871c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 881c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic struct rc_dst_register dstregtmpmask(int index, int mask) 891c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 9032faaea743ca74f4ba29184ef44ebf2c0e962a46Brian Paul struct rc_dst_register dst = {0, 0, 0}; 911c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák dst.File = RC_FILE_TEMPORARY; 921c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák dst.Index = index; 931c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák dst.WriteMask = mask; 941c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return dst; 951c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 961c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 971c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic const struct rc_src_register builtin_zero = { 981c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák .File = RC_FILE_NONE, 991c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák .Index = 0, 1001c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák .Swizzle = RC_SWIZZLE_0000 1011c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}; 1021c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic const struct rc_src_register builtin_one = { 1031c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák .File = RC_FILE_NONE, 1041c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák .Index = 0, 1051c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák .Swizzle = RC_SWIZZLE_1111 1061c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}; 1073d32e589879806297258e36ea80aae5044293ca3Tom Stellard 1083d32e589879806297258e36ea80aae5044293ca3Tom Stellardstatic const struct rc_src_register builtin_half = { 1093d32e589879806297258e36ea80aae5044293ca3Tom Stellard .File = RC_FILE_NONE, 1103d32e589879806297258e36ea80aae5044293ca3Tom Stellard .Index = 0, 1113d32e589879806297258e36ea80aae5044293ca3Tom Stellard .Swizzle = RC_SWIZZLE_HHHH 1123d32e589879806297258e36ea80aae5044293ca3Tom Stellard}; 1133d32e589879806297258e36ea80aae5044293ca3Tom Stellard 1141c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic const struct rc_src_register srcreg_undefined = { 1151c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák .File = RC_FILE_NONE, 1161c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák .Index = 0, 1171c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák .Swizzle = RC_SWIZZLE_XYZW 1181c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}; 1191c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 1201c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic struct rc_src_register srcreg(int file, int index) 1211c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 1221c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_src_register src = srcreg_undefined; 1231c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák src.File = file; 1241c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák src.Index = index; 1251c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return src; 1261c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 1271c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 1281c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic struct rc_src_register srcregswz(int file, int index, int swz) 1291c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 1301c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_src_register src = srcreg_undefined; 1311c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák src.File = file; 1321c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák src.Index = index; 1331c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák src.Swizzle = swz; 1341c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return src; 1351c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 1361c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 1371c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic struct rc_src_register absolute(struct rc_src_register reg) 1381c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 1391c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_src_register newreg = reg; 1401c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák newreg.Abs = 1; 1411c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák newreg.Negate = RC_MASK_NONE; 1421c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return newreg; 1431c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 1441c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 1451c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic struct rc_src_register negate(struct rc_src_register reg) 1461c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 1471c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_src_register newreg = reg; 1481c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák newreg.Negate = newreg.Negate ^ RC_MASK_XYZW; 1491c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return newreg; 1501c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 1511c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 1521c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic struct rc_src_register swizzle(struct rc_src_register reg, 1531c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_swizzle x, rc_swizzle y, rc_swizzle z, rc_swizzle w) 1541c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 1551c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_src_register swizzled = reg; 1561c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák swizzled.Swizzle = combine_swizzles4(reg.Swizzle, x, y, z, w); 1571c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return swizzled; 1581c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 1591c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 1601c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic struct rc_src_register swizzle_smear(struct rc_src_register reg, 1611c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_swizzle x) 1621c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 1631c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return swizzle(reg, x, x, x, x); 1641c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 1651c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 1661c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic struct rc_src_register swizzle_xxxx(struct rc_src_register reg) 1671c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 1681c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return swizzle_smear(reg, RC_SWIZZLE_X); 1691c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 1701c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 1711c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic struct rc_src_register swizzle_yyyy(struct rc_src_register reg) 1721c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 1731c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return swizzle_smear(reg, RC_SWIZZLE_Y); 1741c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 1751c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 1761c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic struct rc_src_register swizzle_zzzz(struct rc_src_register reg) 1771c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 1781c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return swizzle_smear(reg, RC_SWIZZLE_Z); 1791c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 1801c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 1811c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic struct rc_src_register swizzle_wwww(struct rc_src_register reg) 1821c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 1831c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return swizzle_smear(reg, RC_SWIZZLE_W); 1841c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 1851c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 1861c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic int is_dst_safe_to_reuse(struct rc_instruction *inst) 1871c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 1881c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode); 1891c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned i; 1901c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 1911c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák assert(info->HasDstReg); 1921c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 1931c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (inst->U.I.DstReg.File != RC_FILE_TEMPORARY) 1941c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 1951c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 1961c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák for (i = 0; i < info->NumSrcRegs; i++) { 1971c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY && 1981c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[i].Index == inst->U.I.DstReg.Index) 1991c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 2001c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 2011c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 2021c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 1; 2031c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 2041c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 2051c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic struct rc_dst_register try_to_reuse_dst(struct radeon_compiler *c, 2061c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction *inst) 2071c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 2081c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned tmp; 2091c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 2101c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (is_dst_safe_to_reuse(inst)) 2111c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák tmp = inst->U.I.DstReg.Index; 2121c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák else 2131c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák tmp = rc_find_free_temporary(c); 2141c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 2151c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask); 2161c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 2171c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 2181c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_ABS(struct radeon_compiler* c, 2191c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction* inst) 2201c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 2211c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_src_register src = inst->U.I.SrcReg[0]; 2221c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák src.Abs = 1; 2231c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák src.Negate = RC_MASK_NONE; 2241c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, inst->U.I.DstReg, src); 2251c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_remove_instruction(inst); 2261c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 2271c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 2281c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_CEIL(struct radeon_compiler* c, 2291c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction* inst) 2301c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 2311c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* Assuming: 2321c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * ceil(x) = -floor(-x) 2331c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * 2341c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * After inlining floor: 2351c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * ceil(x) = -(-x-frac(-x)) 2361c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * 2371c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * After simplification: 2381c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * ceil(x) = x+frac(-x) 2391c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák */ 2401c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 2411c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_dst_register dst = try_to_reuse_dst(c, inst); 2421c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, negate(inst->U.I.SrcReg[0])); 2431c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg, 2441c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index)); 2451c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_remove_instruction(inst); 2461c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 2471c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 2481c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_CLAMP(struct radeon_compiler *c, 2491c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction *inst) 2501c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 2511c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* CLAMP dst, src, min, max 2521c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * into: 2531c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * MIN tmp, src, max 2541c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * MAX dst, tmp, min 2551c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák */ 2561c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_dst_register dst = try_to_reuse_dst(c, inst); 2571c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit2(c, inst->Prev, RC_OPCODE_MIN, 0, dst, 2581c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[0], inst->U.I.SrcReg[2]); 2591c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit2(c, inst->Prev, RC_OPCODE_MAX, inst->U.I.SaturateMode, inst->U.I.DstReg, 2601c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1]); 2611c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_remove_instruction(inst); 2621c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 2631c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 2641c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_DP2(struct radeon_compiler* c, 2651c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction* inst) 2661c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 2671c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_src_register src0 = inst->U.I.SrcReg[0]; 2681c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_src_register src1 = inst->U.I.SrcReg[1]; 2691c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák src0.Negate &= ~(RC_MASK_Z | RC_MASK_W); 2701c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák src0.Swizzle &= ~(63 << (3 * 2)); 2711c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák src0.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3)); 2721c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák src1.Negate &= ~(RC_MASK_Z | RC_MASK_W); 2731c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák src1.Swizzle &= ~(63 << (3 * 2)); 2741c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák src1.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3)); 2751c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit2(c, inst->Prev, RC_OPCODE_DP3, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1); 2761c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_remove_instruction(inst); 2771c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 2781c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 2791c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_DPH(struct radeon_compiler* c, 2801c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction* inst) 2811c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 2821c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_src_register src0 = inst->U.I.SrcReg[0]; 2831c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák src0.Negate &= ~RC_MASK_W; 2841c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák src0.Swizzle &= ~(7 << (3 * 3)); 2851c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák src0.Swizzle |= RC_SWIZZLE_ONE << (3 * 3); 2861c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, inst->U.I.SrcReg[1]); 2871c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_remove_instruction(inst); 2881c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 2891c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 2901c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák/** 2911c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * [1, src0.y*src1.y, src0.z, src1.w] 2921c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * So basically MUL with lotsa swizzling. 2931c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák */ 2941c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_DST(struct radeon_compiler* c, 2951c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction* inst) 2961c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 2971c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit2(c, inst->Prev, RC_OPCODE_MUL, inst->U.I.SaturateMode, inst->U.I.DstReg, 2981c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ONE), 2991c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_ONE, RC_SWIZZLE_W)); 3001c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_remove_instruction(inst); 3011c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 3021c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 3031c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_FLR(struct radeon_compiler* c, 3041c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction* inst) 3051c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 3061c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_dst_register dst = try_to_reuse_dst(c, inst); 3071c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, inst->U.I.SrcReg[0]); 3081c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg, 3091c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, dst.Index))); 3101c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_remove_instruction(inst); 3111c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 3121c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 3131c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák/** 3141c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * Definition of LIT (from ARB_fragment_program): 3151c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * 3161c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * tmp = VectorLoad(op0); 3171c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * if (tmp.x < 0) tmp.x = 0; 3181c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * if (tmp.y < 0) tmp.y = 0; 3191c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon); 3201c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon; 3211c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * result.x = 1.0; 3221c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * result.y = tmp.x; 3231c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0; 3241c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * result.w = 1.0; 3251c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * 3261c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * The longest path of computation is the one leading to result.z, 3271c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * consisting of 5 operations. This implementation of LIT takes 3281c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * 5 slots, if the subsequent optimization passes are clever enough 3291c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * to pair instructions correctly. 3301c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák */ 3311c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_LIT(struct radeon_compiler* c, 3321c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction* inst) 3331c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 3341c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int constant; 3351c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int constant_swizzle; 3361c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int temp; 3371c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_src_register srctemp; 3381c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 3391c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák constant = rc_constants_add_immediate_scalar(&c->Program.Constants, -127.999999, &constant_swizzle); 3401c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 3411c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (inst->U.I.DstReg.WriteMask != RC_MASK_XYZW || inst->U.I.DstReg.File != RC_FILE_TEMPORARY) { 3421c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction * inst_mov; 3431c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 3441c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst_mov = emit1(c, inst, 3451c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák RC_OPCODE_MOV, 0, inst->U.I.DstReg, 3461c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák srcreg(RC_FILE_TEMPORARY, rc_find_free_temporary(c))); 3471c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 3481c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.DstReg.File = RC_FILE_TEMPORARY; 3491c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index; 3501c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.DstReg.WriteMask = RC_MASK_XYZW; 3511c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 3521c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 3531c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák temp = inst->U.I.DstReg.Index; 3541c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák srctemp = srcreg(RC_FILE_TEMPORARY, temp); 3551c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 3561c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* tmp.x = max(0.0, Src.x); */ 3571c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* tmp.y = max(0.0, Src.y); */ 3581c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* tmp.w = clamp(Src.z, -128+eps, 128-eps); */ 3591c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit2(c, inst->Prev, RC_OPCODE_MAX, 0, 3601c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák dstregtmpmask(temp, RC_MASK_XYW), 3611c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[0], 3621c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák swizzle(srcreg(RC_FILE_CONSTANT, constant), 3631c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, constant_swizzle&3)); 3641c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit2(c, inst->Prev, RC_OPCODE_MIN, 0, 3651c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák dstregtmpmask(temp, RC_MASK_Z), 3661c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák swizzle_wwww(srctemp), 3671c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák negate(srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle))); 3681c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 3691c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* tmp.w = Pow(tmp.y, tmp.w) */ 3701c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit1(c, inst->Prev, RC_OPCODE_LG2, 0, 3711c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák dstregtmpmask(temp, RC_MASK_W), 3721c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák swizzle_yyyy(srctemp)); 3731c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit2(c, inst->Prev, RC_OPCODE_MUL, 0, 3741c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák dstregtmpmask(temp, RC_MASK_W), 3751c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák swizzle_wwww(srctemp), 3761c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák swizzle_zzzz(srctemp)); 3771c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit1(c, inst->Prev, RC_OPCODE_EX2, 0, 3781c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák dstregtmpmask(temp, RC_MASK_W), 3791c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák swizzle_wwww(srctemp)); 3801c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 3811c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* tmp.z = (tmp.x > 0) ? tmp.w : 0.0 */ 3821c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, 3831c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák dstregtmpmask(temp, RC_MASK_Z), 3841c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák negate(swizzle_xxxx(srctemp)), 3851c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák swizzle_wwww(srctemp), 3861c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák builtin_zero); 3871c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 3881c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0 */ 3891c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, 3901c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák dstregtmpmask(temp, RC_MASK_XYW), 3911c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák swizzle(srctemp, RC_SWIZZLE_ONE, RC_SWIZZLE_X, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE)); 3921c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 3931c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_remove_instruction(inst); 3941c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 3951c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 3961c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_LRP(struct radeon_compiler* c, 3971c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction* inst) 3981c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 3991c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_dst_register dst = try_to_reuse_dst(c, inst); 4001c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 4011c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit2(c, inst->Prev, RC_OPCODE_ADD, 0, 4021c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák dst, 4031c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[1], negate(inst->U.I.SrcReg[2])); 4041c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, 4051c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.DstReg, 4061c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[2]); 4071c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 4081c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_remove_instruction(inst); 4091c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 4101c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 4111c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_POW(struct radeon_compiler* c, 4121c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction* inst) 4131c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 4141c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_dst_register tempdst = try_to_reuse_dst(c, inst); 4151c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_src_register tempsrc = srcreg(RC_FILE_TEMPORARY, tempdst.Index); 4161c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák tempdst.WriteMask = RC_MASK_W; 4171c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák tempsrc.Swizzle = RC_SWIZZLE_WWWW; 4181c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 4191c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit1(c, inst->Prev, RC_OPCODE_LG2, 0, tempdst, swizzle_xxxx(inst->U.I.SrcReg[0])); 4201c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit2(c, inst->Prev, RC_OPCODE_MUL, 0, tempdst, tempsrc, swizzle_xxxx(inst->U.I.SrcReg[1])); 4211c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit1(c, inst->Prev, RC_OPCODE_EX2, inst->U.I.SaturateMode, inst->U.I.DstReg, tempsrc); 4221c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 4231c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_remove_instruction(inst); 4241c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 4251c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 4263d32e589879806297258e36ea80aae5044293ca3Tom Stellard/* dst = ROUND(src) : 4273d32e589879806297258e36ea80aae5044293ca3Tom Stellard * add = src + .5 4283d32e589879806297258e36ea80aae5044293ca3Tom Stellard * frac = FRC(add) 4293d32e589879806297258e36ea80aae5044293ca3Tom Stellard * dst = add - frac 4303d32e589879806297258e36ea80aae5044293ca3Tom Stellard * 4313d32e589879806297258e36ea80aae5044293ca3Tom Stellard * According to the GLSL spec, the implementor can decide which way to round 4323d32e589879806297258e36ea80aae5044293ca3Tom Stellard * when the fraction is .5. We round down for .5. 4333d32e589879806297258e36ea80aae5044293ca3Tom Stellard * 4343d32e589879806297258e36ea80aae5044293ca3Tom Stellard */ 4353d32e589879806297258e36ea80aae5044293ca3Tom Stellardstatic void transform_ROUND(struct radeon_compiler* c, 4363d32e589879806297258e36ea80aae5044293ca3Tom Stellard struct rc_instruction* inst) 4373d32e589879806297258e36ea80aae5044293ca3Tom Stellard{ 4383d32e589879806297258e36ea80aae5044293ca3Tom Stellard unsigned int mask = inst->U.I.DstReg.WriteMask; 4393d32e589879806297258e36ea80aae5044293ca3Tom Stellard unsigned int frac_index, add_index; 4403d32e589879806297258e36ea80aae5044293ca3Tom Stellard struct rc_dst_register frac_dst, add_dst; 4413d32e589879806297258e36ea80aae5044293ca3Tom Stellard struct rc_src_register frac_src, add_src; 4423d32e589879806297258e36ea80aae5044293ca3Tom Stellard 4433d32e589879806297258e36ea80aae5044293ca3Tom Stellard /* add = src + .5 */ 4443d32e589879806297258e36ea80aae5044293ca3Tom Stellard add_index = rc_find_free_temporary(c); 4453d32e589879806297258e36ea80aae5044293ca3Tom Stellard add_dst = dstregtmpmask(add_index, mask); 4463d32e589879806297258e36ea80aae5044293ca3Tom Stellard emit2(c, inst->Prev, RC_OPCODE_ADD, 0, add_dst, inst->U.I.SrcReg[0], 4473d32e589879806297258e36ea80aae5044293ca3Tom Stellard builtin_half); 4483d32e589879806297258e36ea80aae5044293ca3Tom Stellard add_src = srcreg(RC_FILE_TEMPORARY, add_dst.Index); 4493d32e589879806297258e36ea80aae5044293ca3Tom Stellard 4503d32e589879806297258e36ea80aae5044293ca3Tom Stellard 4513d32e589879806297258e36ea80aae5044293ca3Tom Stellard /* frac = FRC(add) */ 4523d32e589879806297258e36ea80aae5044293ca3Tom Stellard frac_index = rc_find_free_temporary(c); 4533d32e589879806297258e36ea80aae5044293ca3Tom Stellard frac_dst = dstregtmpmask(frac_index, mask); 4543d32e589879806297258e36ea80aae5044293ca3Tom Stellard emit1(c, inst->Prev, RC_OPCODE_FRC, 0, frac_dst, add_src); 4553d32e589879806297258e36ea80aae5044293ca3Tom Stellard frac_src = srcreg(RC_FILE_TEMPORARY, frac_dst.Index); 4563d32e589879806297258e36ea80aae5044293ca3Tom Stellard 4573d32e589879806297258e36ea80aae5044293ca3Tom Stellard /* dst = add - frac */ 4583d32e589879806297258e36ea80aae5044293ca3Tom Stellard emit2(c, inst->Prev, RC_OPCODE_ADD, 0, inst->U.I.DstReg, 4593d32e589879806297258e36ea80aae5044293ca3Tom Stellard add_src, negate(frac_src)); 4603d32e589879806297258e36ea80aae5044293ca3Tom Stellard rc_remove_instruction(inst); 4613d32e589879806297258e36ea80aae5044293ca3Tom Stellard} 4623d32e589879806297258e36ea80aae5044293ca3Tom Stellard 4631c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_RSQ(struct radeon_compiler* c, 4641c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction* inst) 4651c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 4661c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[0] = absolute(inst->U.I.SrcReg[0]); 4671c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 4681c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 4691c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_SEQ(struct radeon_compiler* c, 4701c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction* inst) 4711c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 4721c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_dst_register dst = try_to_reuse_dst(c, inst); 4731c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 4741c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); 4751c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, 4761c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_zero, builtin_one); 4771c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 4781c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_remove_instruction(inst); 4791c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 4801c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 4811c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_SFL(struct radeon_compiler* c, 4821c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction* inst) 4831c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 4841c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, inst->U.I.DstReg, builtin_zero); 4851c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_remove_instruction(inst); 4861c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 4871c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 4881c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_SGE(struct radeon_compiler* c, 4891c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction* inst) 4901c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 4911c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_dst_register dst = try_to_reuse_dst(c, inst); 4921c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 4931c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); 4941c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, 4951c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one); 4961c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 4971c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_remove_instruction(inst); 4981c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 4991c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5001c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_SGT(struct radeon_compiler* c, 5011c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction* inst) 5021c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 5031c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_dst_register dst = try_to_reuse_dst(c, inst); 5041c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5051c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]); 5061c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, 5071c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero); 5081c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5091c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_remove_instruction(inst); 5101c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 5111c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5121c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_SLE(struct radeon_compiler* c, 5131c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction* inst) 5141c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 5151c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_dst_register dst = try_to_reuse_dst(c, inst); 5161c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5171c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]); 5181c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, 5191c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one); 5201c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5211c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_remove_instruction(inst); 5221c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 5231c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5241c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_SLT(struct radeon_compiler* c, 5251c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction* inst) 5261c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 5271c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_dst_register dst = try_to_reuse_dst(c, inst); 5281c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5291c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); 5301c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, 5311c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero); 5321c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5331c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_remove_instruction(inst); 5341c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 5351c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5361c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_SNE(struct radeon_compiler* c, 5371c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction* inst) 5381c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 5391c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_dst_register dst = try_to_reuse_dst(c, inst); 5401c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5411c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1])); 5421c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg, 5431c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_one, builtin_zero); 5441c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5451c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_remove_instruction(inst); 5461c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 5471c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5481c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_SSG(struct radeon_compiler* c, 5491c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction* inst) 5501c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 5511c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* result = sign(x) 5521c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * 5531c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * CMP tmp0, -x, 1, 0 5541c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * CMP tmp1, x, 1, 0 5551c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * ADD result, tmp0, -tmp1; 5561c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák */ 5571c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_dst_register dst0; 5581c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned tmp1; 5591c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5601c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* 0 < x */ 5611c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák dst0 = try_to_reuse_dst(c, inst); 5621c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit3(c, inst->Prev, RC_OPCODE_CMP, 0, 5631c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák dst0, 5641c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák negate(inst->U.I.SrcReg[0]), 5651c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák builtin_one, 5661c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák builtin_zero); 5671c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5681c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* x < 0 */ 5691c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák tmp1 = rc_find_free_temporary(c); 5701c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit3(c, inst->Prev, RC_OPCODE_CMP, 0, 5711c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák dstregtmpmask(tmp1, inst->U.I.DstReg.WriteMask), 5721c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[0], 5731c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák builtin_one, 5741c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák builtin_zero); 5751c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5761c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* Either both are zero, or one of them is one and the other is zero. */ 5771c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* result = tmp0 - tmp1 */ 5781c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit2(c, inst->Prev, RC_OPCODE_ADD, 0, 5791c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.DstReg, 5801c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák srcreg(RC_FILE_TEMPORARY, dst0.Index), 5811c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák negate(srcreg(RC_FILE_TEMPORARY, tmp1))); 5821c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5831c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_remove_instruction(inst); 5841c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 5851c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5861c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_SUB(struct radeon_compiler* c, 5871c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction* inst) 5881c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 5891c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.Opcode = RC_OPCODE_ADD; 5901c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[1] = negate(inst->U.I.SrcReg[1]); 5911c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 5921c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5931c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_SWZ(struct radeon_compiler* c, 5941c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction* inst) 5951c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 5961c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.Opcode = RC_OPCODE_MOV; 5971c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 5981c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5991c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_XPD(struct radeon_compiler* c, 6001c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction* inst) 6011c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 6021c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_dst_register dst = try_to_reuse_dst(c, inst); 6031c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 6041c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dst, 6051c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W), 6061c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W)); 6071c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, inst->U.I.DstReg, 6081c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W), 6091c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W), 6101c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák negate(srcreg(RC_FILE_TEMPORARY, dst.Index))); 6111c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 6121c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_remove_instruction(inst); 6131c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 6141c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 6151c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 6161c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák/** 6171c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * Can be used as a transformation for @ref radeonClauseLocalTransform, 6181c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * no userData necessary. 6191c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * 6201c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * Eliminates the following ALU instructions: 6211c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * ABS, CEIL, DPH, DST, FLR, LIT, LRP, POW, SEQ, SFL, SGE, SGT, SLE, SLT, SNE, SUB, SWZ, XPD 6221c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * using: 6231c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP 6241c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * 6251c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * Transforms RSQ to Radeon's native RSQ by explicitly setting 6261c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * absolute value. 6271c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * 6281c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * @note should be applicable to R300 and R500 fragment programs. 6291c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák */ 6301c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákint radeonTransformALU( 6311c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct radeon_compiler * c, 6321c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction* inst, 6331c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák void* unused) 6341c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 6351c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák switch(inst->U.I.Opcode) { 6361c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák case RC_OPCODE_ABS: transform_ABS(c, inst); return 1; 6371c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1; 6381c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák case RC_OPCODE_CLAMP: transform_CLAMP(c, inst); return 1; 6391c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák case RC_OPCODE_DP2: transform_DP2(c, inst); return 1; 6401c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; 6411c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák case RC_OPCODE_DST: transform_DST(c, inst); return 1; 6421c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák case RC_OPCODE_FLR: transform_FLR(c, inst); return 1; 6431c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák case RC_OPCODE_LIT: transform_LIT(c, inst); return 1; 6441c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák case RC_OPCODE_LRP: transform_LRP(c, inst); return 1; 6451c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák case RC_OPCODE_POW: transform_POW(c, inst); return 1; 6463d32e589879806297258e36ea80aae5044293ca3Tom Stellard case RC_OPCODE_ROUND: transform_ROUND(c, inst); return 1; 6471c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák case RC_OPCODE_RSQ: transform_RSQ(c, inst); return 1; 6481c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák case RC_OPCODE_SEQ: transform_SEQ(c, inst); return 1; 6491c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák case RC_OPCODE_SFL: transform_SFL(c, inst); return 1; 6501c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák case RC_OPCODE_SGE: transform_SGE(c, inst); return 1; 6511c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák case RC_OPCODE_SGT: transform_SGT(c, inst); return 1; 6521c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák case RC_OPCODE_SLE: transform_SLE(c, inst); return 1; 6531c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák case RC_OPCODE_SLT: transform_SLT(c, inst); return 1; 6541c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák case RC_OPCODE_SNE: transform_SNE(c, inst); return 1; 6551c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák case RC_OPCODE_SSG: transform_SSG(c, inst); return 1; 6561c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák case RC_OPCODE_SUB: transform_SUB(c, inst); return 1; 6571c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1; 6581c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák case RC_OPCODE_XPD: transform_XPD(c, inst); return 1; 6591c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák default: 6601c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 6611c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 6621c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 6631c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 6641c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 6651c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_r300_vertex_ABS(struct radeon_compiler* c, 6661c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction* inst) 6671c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 6681c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* Note: r500 can take absolute values, but r300 cannot. */ 6691c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.Opcode = RC_OPCODE_MAX; 6701c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[1] = inst->U.I.SrcReg[0]; 6711c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; 6721c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 6731c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 6741c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_r300_vertex_CMP(struct radeon_compiler* c, 6751c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction* inst) 6761c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 6771c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* There is no decent CMP available, so let's rig one up. 6781c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * CMP is defined as dst = src0 < 0.0 ? src1 : src2 6791c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * The following sequence consumes zero to two temps and two extra slots 6801c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * (the second temp and the second slot is consumed by transform_LRP), 6811c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * but should be equivalent: 6821c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * 6831c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * SLT tmp0, src0, 0.0 6841c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * LRP dst, tmp0, src1, src2 6851c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * 6861c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * Yes, I know, I'm a mad scientist. ~ C. & M. */ 6871c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_dst_register dst = try_to_reuse_dst(c, inst); 6881c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 6891c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* SLT tmp0, src0, 0.0 */ 6901c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit2(c, inst->Prev, RC_OPCODE_SLT, 0, 6911c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák dst, 6921c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[0], builtin_zero); 6931c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 6941c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* LRP dst, tmp0, src1, src2 */ 6951c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák transform_LRP(c, 6961c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit3(c, inst->Prev, RC_OPCODE_LRP, 0, 6971c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.DstReg, 6981c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1], inst->U.I.SrcReg[2])); 6991c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 7001c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_remove_instruction(inst); 7011c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 7021c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 7031c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_r300_vertex_DP2(struct radeon_compiler* c, 7041c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction* inst) 7051c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 7061c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction *next_inst = inst->Next; 7071c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák transform_DP2(c, inst); 7081c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák next_inst->Prev->U.I.Opcode = RC_OPCODE_DP4; 7091c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 7101c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 7111c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_r300_vertex_DP3(struct radeon_compiler* c, 7121c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction* inst) 7131c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 7141c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_src_register src0 = inst->U.I.SrcReg[0]; 7151c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_src_register src1 = inst->U.I.SrcReg[1]; 7161c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák src0.Negate &= ~RC_MASK_W; 7171c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák src0.Swizzle &= ~(7 << (3 * 3)); 7181c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák src0.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3); 7191c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák src1.Negate &= ~RC_MASK_W; 7201c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák src1.Swizzle &= ~(7 << (3 * 3)); 7211c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3); 7221c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1); 7231c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_remove_instruction(inst); 7241c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 7251c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 7261c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_r300_vertex_fix_LIT(struct radeon_compiler* c, 7271c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction* inst) 7281c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 7291c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_dst_register dst = try_to_reuse_dst(c, inst); 7301c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned constant_swizzle; 7311c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák int constant = rc_constants_add_immediate_scalar(&c->Program.Constants, 7321c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 0.0000000000000000001, 7331c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák &constant_swizzle); 7341c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 7351c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* MOV dst, src */ 7361c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák dst.WriteMask = RC_MASK_XYZW; 7371c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit1(c, inst->Prev, RC_OPCODE_MOV, 0, 7381c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák dst, 7391c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[0]); 7401c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 7411c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* MAX dst.y, src, 0.00...001 */ 7421c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit2(c, inst->Prev, RC_OPCODE_MAX, 0, 7431c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák dstregtmpmask(dst.Index, RC_MASK_Y), 7441c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák srcreg(RC_FILE_TEMPORARY, dst.Index), 7451c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle)); 7461c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 7471c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[0] = srcreg(RC_FILE_TEMPORARY, dst.Index); 7481c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 7491c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 7501c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_r300_vertex_SEQ(struct radeon_compiler *c, 7511c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction *inst) 7521c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 7531c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* x = y <==> x >= y && y >= x */ 7541c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák int tmp = rc_find_free_temporary(c); 7551c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 7561c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* x <= y */ 7571c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit2(c, inst->Prev, RC_OPCODE_SGE, 0, 7581c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask), 7591c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[0], 7601c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[1]); 7611c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 7621c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* y <= x */ 7631c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit2(c, inst->Prev, RC_OPCODE_SGE, 0, 7641c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.DstReg, 7651c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[1], 7661c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[0]); 7671c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 7681c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* x && y = x * y */ 7691c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit2(c, inst->Prev, RC_OPCODE_MUL, 0, 7701c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.DstReg, 7711c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák srcreg(RC_FILE_TEMPORARY, tmp), 7721c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák srcreg(inst->U.I.DstReg.File, inst->U.I.DstReg.Index)); 7731c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 7741c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_remove_instruction(inst); 7751c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 7761c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 7771c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_r300_vertex_SNE(struct radeon_compiler *c, 7781c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction *inst) 7791c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 7801c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* x != y <==> x < y || y < x */ 7811c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák int tmp = rc_find_free_temporary(c); 7821c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 7831c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* x < y */ 7841c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit2(c, inst->Prev, RC_OPCODE_SLT, 0, 7851c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask), 7861c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[0], 7871c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[1]); 7881c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 7891c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* y < x */ 7901c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit2(c, inst->Prev, RC_OPCODE_SLT, 0, 7911c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.DstReg, 7921c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[1], 7931c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[0]); 7941c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 7951c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* x || y = max(x, y) */ 7961c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit2(c, inst->Prev, RC_OPCODE_MAX, 0, 7971c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.DstReg, 7981c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák srcreg(RC_FILE_TEMPORARY, tmp), 7991c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák srcreg(inst->U.I.DstReg.File, inst->U.I.DstReg.Index)); 8001c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 8011c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_remove_instruction(inst); 8021c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 8031c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 8041c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_r300_vertex_SGT(struct radeon_compiler* c, 8051c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction* inst) 8061c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 8071c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* x > y <==> -x < -y */ 8081c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.Opcode = RC_OPCODE_SLT; 8091c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; 8101c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; 8111c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 8121c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 8131c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_r300_vertex_SLE(struct radeon_compiler* c, 8141c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction* inst) 8151c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 8161c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* x <= y <==> -x >= -y */ 8171c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.Opcode = RC_OPCODE_SGE; 8181c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; 8191c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; 8201c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 8211c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 8221c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_r300_vertex_SSG(struct radeon_compiler* c, 8231c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction* inst) 8241c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 8251c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* result = sign(x) 8261c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * 8271c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * SLT tmp0, 0, x; 8281c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * SLT tmp1, x, 0; 8291c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * ADD result, tmp0, -tmp1; 8301c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák */ 8311c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_dst_register dst0 = try_to_reuse_dst(c, inst); 8321c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned tmp1; 8331c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 8341c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* 0 < x */ 8351c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák dst0 = try_to_reuse_dst(c, inst); 8361c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit2(c, inst->Prev, RC_OPCODE_SLT, 0, 8371c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák dst0, 8381c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák builtin_zero, 8391c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[0]); 8401c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 8411c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* x < 0 */ 8421c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák tmp1 = rc_find_free_temporary(c); 8431c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit2(c, inst->Prev, RC_OPCODE_SLT, 0, 8441c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák dstregtmpmask(tmp1, inst->U.I.DstReg.WriteMask), 8451c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[0], 8461c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák builtin_zero); 8471c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 8481c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* Either both are zero, or one of them is one and the other is zero. */ 8491c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* result = tmp0 - tmp1 */ 8501c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit2(c, inst->Prev, RC_OPCODE_ADD, 0, 8511c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.DstReg, 8521c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák srcreg(RC_FILE_TEMPORARY, dst0.Index), 8531c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák negate(srcreg(RC_FILE_TEMPORARY, tmp1))); 8541c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 8551c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_remove_instruction(inst); 8561c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 8571c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 8581c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák/** 8591c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * For use with rc_local_transform, this transforms non-native ALU 8601c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * instructions of the r300 up to r500 vertex engine. 8611c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák */ 8621c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákint r300_transform_vertex_alu( 8631c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct radeon_compiler * c, 8641c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction* inst, 8651c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák void* unused) 8661c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 8671c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák switch(inst->U.I.Opcode) { 8681c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1; 8691c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1; 8701c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák case RC_OPCODE_CLAMP: transform_CLAMP(c, inst); return 1; 8711c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák case RC_OPCODE_CMP: transform_r300_vertex_CMP(c, inst); return 1; 8721c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák case RC_OPCODE_DP2: transform_r300_vertex_DP2(c, inst); return 1; 8731c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák case RC_OPCODE_DP3: transform_r300_vertex_DP3(c, inst); return 1; 8741c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák case RC_OPCODE_DPH: transform_DPH(c, inst); return 1; 8751c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák case RC_OPCODE_FLR: transform_FLR(c, inst); return 1; 8761c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák case RC_OPCODE_LIT: transform_r300_vertex_fix_LIT(c, inst); return 1; 8771c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák case RC_OPCODE_LRP: transform_LRP(c, inst); return 1; 8781c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák case RC_OPCODE_SEQ: 8791c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (!c->is_r500) { 8801c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák transform_r300_vertex_SEQ(c, inst); 8811c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 1; 8821c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 8831c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 8841c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák case RC_OPCODE_SFL: transform_SFL(c, inst); return 1; 8851c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák case RC_OPCODE_SGT: transform_r300_vertex_SGT(c, inst); return 1; 8861c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák case RC_OPCODE_SLE: transform_r300_vertex_SLE(c, inst); return 1; 8871c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák case RC_OPCODE_SNE: 8881c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (!c->is_r500) { 8891c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák transform_r300_vertex_SNE(c, inst); 8901c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 1; 8911c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 8921c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 8931c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák case RC_OPCODE_SSG: transform_r300_vertex_SSG(c, inst); return 1; 8941c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák case RC_OPCODE_SUB: transform_SUB(c, inst); return 1; 8951c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1; 8961c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák case RC_OPCODE_XPD: transform_XPD(c, inst); return 1; 8971c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák default: 8981c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 8991c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 9001c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 9011c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 9021c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void sincos_constants(struct radeon_compiler* c, unsigned int *constants) 9031c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 9041c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák static const float SinCosConsts[2][4] = { 9051c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák { 9061c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 1.273239545, /* 4/PI */ 9071c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák -0.405284735, /* -4/(PI*PI) */ 9081c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 3.141592654, /* PI */ 9091c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 0.2225 /* weight */ 9101c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák }, 9111c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák { 9121c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 0.75, 9131c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 0.5, 9141c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 0.159154943, /* 1/(2*PI) */ 9151c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 6.283185307 /* 2*PI */ 9161c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 9171c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák }; 9181c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák int i; 9191c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 9201c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák for(i = 0; i < 2; ++i) 9211c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák constants[i] = rc_constants_add_immediate_vec4(&c->Program.Constants, SinCosConsts[i]); 9221c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 9231c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 9241c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák/** 9251c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * Approximate sin(x), where x is clamped to (-pi/2, pi/2). 9261c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * 9271c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * MUL tmp.xy, src, { 4/PI, -4/(PI^2) } 9281c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * MAD tmp.x, tmp.y, |src|, tmp.x 9291c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * MAD tmp.y, tmp.x, |tmp.x|, -tmp.x 9301c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * MAD dest, tmp.y, weight, tmp.x 9311c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák */ 9321c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void sin_approx( 9331c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct radeon_compiler* c, struct rc_instruction * inst, 9341c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_dst_register dst, struct rc_src_register src, const unsigned int* constants) 9351c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 9361c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int tempreg = rc_find_free_temporary(c); 9371c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 9381c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(tempreg, RC_MASK_XY), 9391c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák swizzle_xxxx(src), 9401c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák srcreg(RC_FILE_CONSTANT, constants[0])); 9411c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_X), 9421c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)), 9431c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák absolute(swizzle_xxxx(src)), 9441c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))); 9451c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_Y), 9461c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)), 9471c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák absolute(swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))), 9481c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák negate(swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)))); 9491c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dst, 9501c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)), 9511c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[0])), 9521c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))); 9531c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 9541c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 9551c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák/** 9561c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * Translate the trigonometric functions COS, SIN, and SCS 9571c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * using only the basic instructions 9581c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * MOV, ADD, MUL, MAD, FRC 9591c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák */ 9601c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákint r300_transform_trig_simple(struct radeon_compiler* c, 9611c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction* inst, 9621c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák void* unused) 9631c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 9641c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int constants[2]; 9651c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int tempreg; 9661c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 9671c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (inst->U.I.Opcode != RC_OPCODE_COS && 9681c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.Opcode != RC_OPCODE_SIN && 9691c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.Opcode != RC_OPCODE_SCS) 9701c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 9711c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 9721c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák tempreg = rc_find_free_temporary(c); 9731c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 9741c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák sincos_constants(c, constants); 9751c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 9761c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (inst->U.I.Opcode == RC_OPCODE_COS) { 9771c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* MAD tmp.x, src, 1/(2*PI), 0.75 */ 9781c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* FRC tmp.x, tmp.x */ 9791c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* MAD tmp.z, tmp.x, 2*PI, -PI */ 9801c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), 9811c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák swizzle_xxxx(inst->U.I.SrcReg[0]), 9821c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])), 9831c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák swizzle_xxxx(srcreg(RC_FILE_CONSTANT, constants[1]))); 9841c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W), 9851c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg))); 9861c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), 9871c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)), 9881c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])), 9891c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0])))); 9901c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 9911c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák sin_approx(c, inst, inst->U.I.DstReg, 9921c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)), 9931c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák constants); 9941c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } else if (inst->U.I.Opcode == RC_OPCODE_SIN) { 9951c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), 9961c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák swizzle_xxxx(inst->U.I.SrcReg[0]), 9971c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])), 9981c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák swizzle_yyyy(srcreg(RC_FILE_CONSTANT, constants[1]))); 9991c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W), 10001c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg))); 10011c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W), 10021c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)), 10031c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])), 10041c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0])))); 10051c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 10061c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák sin_approx(c, inst, inst->U.I.DstReg, 10071c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)), 10081c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák constants); 10091c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } else { 10101c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_dst_register dst; 10111c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 10121c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY), 10131c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák swizzle_xxxx(inst->U.I.SrcReg[0]), 10141c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])), 10151c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W)); 10161c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_XY), 10171c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák srcreg(RC_FILE_TEMPORARY, tempreg)); 10181c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY), 10191c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák srcreg(RC_FILE_TEMPORARY, tempreg), 10201c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])), 10211c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0])))); 10221c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 10231c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák dst = inst->U.I.DstReg; 10241c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 10251c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_X; 10261c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák sin_approx(c, inst, dst, 10271c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)), 10281c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák constants); 10291c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 10301c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_Y; 10311c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák sin_approx(c, inst, dst, 10321c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)), 10331c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák constants); 10341c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 10351c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 10361c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_remove_instruction(inst); 10371c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 10381c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 1; 10391c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 10401c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 10411c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void r300_transform_SIN_COS_SCS(struct radeon_compiler *c, 10421c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction *inst, 10431c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned srctmp) 10441c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 10451c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (inst->U.I.Opcode == RC_OPCODE_COS) { 10461c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, inst->U.I.DstReg, 10471c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW)); 10481c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } else if (inst->U.I.Opcode == RC_OPCODE_SIN) { 10491c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode, 10501c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.DstReg, srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW)); 10511c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } else if (inst->U.I.Opcode == RC_OPCODE_SCS) { 10521c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_dst_register moddst = inst->U.I.DstReg; 10531c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 10541c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (inst->U.I.DstReg.WriteMask & RC_MASK_X) { 10551c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák moddst.WriteMask = RC_MASK_X; 10561c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, moddst, 10571c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW)); 10581c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 10591c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (inst->U.I.DstReg.WriteMask & RC_MASK_Y) { 10601c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák moddst.WriteMask = RC_MASK_Y; 10611c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode, moddst, 10621c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW)); 10631c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 10641c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 10651c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 10661c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_remove_instruction(inst); 10671c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 10681c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 10691c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 10701c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák/** 10711c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * Transform the trigonometric functions COS, SIN, and SCS 10721c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * to include pre-scaling by 1/(2*PI) and taking the fractional 10731c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * part, so that the input to COS and SIN is always in the range [0,1). 10741c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * SCS is replaced by one COS and one SIN instruction. 10751c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * 10761c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * @warning This transformation implicitly changes the semantics of SIN and COS! 10771c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák */ 10781c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákint radeonTransformTrigScale(struct radeon_compiler* c, 10791c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction* inst, 10801c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák void* unused) 10811c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 10821c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák static const float RCP_2PI = 0.15915494309189535; 10831c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int temp; 10841c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int constant; 10851c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int constant_swizzle; 10861c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 10871c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (inst->U.I.Opcode != RC_OPCODE_COS && 10881c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.Opcode != RC_OPCODE_SIN && 10891c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.Opcode != RC_OPCODE_SCS) 10901c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 10911c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 10921c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák temp = rc_find_free_temporary(c); 10931c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák constant = rc_constants_add_immediate_scalar(&c->Program.Constants, RCP_2PI, &constant_swizzle); 10941c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 10951c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(temp, RC_MASK_W), 10961c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák swizzle_xxxx(inst->U.I.SrcReg[0]), 10971c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle)); 10981c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W), 10991c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák srcreg(RC_FILE_TEMPORARY, temp)); 11001c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 11011c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák r300_transform_SIN_COS_SCS(c, inst, temp); 11021c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 1; 11031c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 11041c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 11051c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák/** 11061c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * Transform the trigonometric functions COS, SIN, and SCS 11071c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * so that the input to COS and SIN is always in the range [-PI, PI]. 11081c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * SCS is replaced by one COS and one SIN instruction. 11091c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák */ 11101c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákint r300_transform_trig_scale_vertex(struct radeon_compiler *c, 11111c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction *inst, 11121c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák void *unused) 11131c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 11141c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák static const float cons[4] = {0.15915494309189535, 0.5, 6.28318530717959, -3.14159265358979}; 11151c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int temp; 11161c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int constant; 11171c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 11181c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (inst->U.I.Opcode != RC_OPCODE_COS && 11191c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.Opcode != RC_OPCODE_SIN && 11201c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.Opcode != RC_OPCODE_SCS) 11211c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 11221c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 11231c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* Repeat x in the range [-PI, PI]: 11241c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * 11251c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * repeat(x) = frac(x / 2PI + 0.5) * 2PI - PI 11261c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák */ 11271c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 11281c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák temp = rc_find_free_temporary(c); 11291c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák constant = rc_constants_add_immediate_vec4(&c->Program.Constants, cons); 11301c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 11311c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W), 11321c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák swizzle_xxxx(inst->U.I.SrcReg[0]), 11331c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_XXXX), 11341c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_YYYY)); 11351c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W), 11361c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák srcreg(RC_FILE_TEMPORARY, temp)); 11371c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W), 11381c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák srcreg(RC_FILE_TEMPORARY, temp), 11391c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_ZZZZ), 11401c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_WWWW)); 11411c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 11421c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák r300_transform_SIN_COS_SCS(c, inst, temp); 11431c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 1; 11441c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 11451c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 11461c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák/** 11471c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * Rewrite DDX/DDY instructions to properly work with r5xx shaders. 11481c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * The r5xx MDH/MDV instruction provides per-quad partial derivatives. 11491c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * It takes the form A*B+C. A and C are set by setting src0. B should be -1. 11501c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * 11511c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * @warning This explicitly changes the form of DDX and DDY! 11521c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák */ 11531c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 11541c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákint radeonTransformDeriv(struct radeon_compiler* c, 11551c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction* inst, 11561c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák void* unused) 11571c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 11581c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (inst->U.I.Opcode != RC_OPCODE_DDX && inst->U.I.Opcode != RC_OPCODE_DDY) 11591c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 11601c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 11611c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_1111; 11621c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[1].Negate = RC_MASK_XYZW; 11631c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 11641c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 1; 11651c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 11661c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 11671c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák/** 11681c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * IF Temp[0].x -\ 11691c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * KILP - > KIL -abs(Temp[0].x) 11701c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * ENDIF -/ 11711c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * 11721c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * This needs to be done in its own pass, because it modifies the instructions 11731c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * before and after KILP. 11741c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák */ 11751c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákvoid rc_transform_KILP(struct radeon_compiler * c, void *user) 11761c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 11771c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction * inst; 11781c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák for (inst = c->Program.Instructions.Next; 11791c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst != &c->Program.Instructions; inst = inst->Next) { 11801c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 11811c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (inst->U.I.Opcode != RC_OPCODE_KILP) 11821c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák continue; 11831c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 11841c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.Opcode = RC_OPCODE_KIL; 11851c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 11861c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (inst->Prev->U.I.Opcode != RC_OPCODE_IF 11871c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák || inst->Next->U.I.Opcode != RC_OPCODE_ENDIF) { 11881c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[0] = negate(builtin_one); 11891c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } else { 11901c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 11911c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[0] = 11921c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák negate(absolute(inst->Prev->U.I.SrcReg[0])); 11931c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* Remove IF */ 11941c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_remove_instruction(inst->Prev); 11951c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* Remove ENDIF */ 11961c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_remove_instruction(inst->Next); 11971c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 11981c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 11991c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 1200