radeon_program_alu.c revision 3d32e589879806297258e36ea80aae5044293ca3
11c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák/*
21c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * Copyright (C) 2008 Nicolai Haehnle.
31c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák *
41c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * All Rights Reserved.
51c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák *
61c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * Permission is hereby granted, free of charge, to any person obtaining
71c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * a copy of this software and associated documentation files (the
81c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * "Software"), to deal in the Software without restriction, including
91c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * without limitation the rights to use, copy, modify, merge, publish,
101c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * distribute, sublicense, and/or sell copies of the Software, and to
111c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * permit persons to whom the Software is furnished to do so, subject to
121c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * the following conditions:
131c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák *
141c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * The above copyright notice and this permission notice (including the
151c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * next paragraph) shall be included in all copies or substantial
161c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * portions of the Software.
171c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák *
181c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
191c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
201c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
211c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
221c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
231c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
241c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
251c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák *
261c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák */
271c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
281c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák/**
291c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * @file
301c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák *
311c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * Shareable transformations that transform "special" ALU instructions
321c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * into ALU instructions that are supported by hardware.
331c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák *
341c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák */
351c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
361c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák#include "radeon_program_alu.h"
371c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
381c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák#include "radeon_compiler.h"
391c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák#include "radeon_compiler_util.h"
401c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
411c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
421c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic struct rc_instruction *emit1(
431c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct radeon_compiler * c, struct rc_instruction * after,
441c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg,
451c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_src_register SrcReg)
461c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
471c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
481c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
491c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	fpi->U.I.Opcode = Opcode;
501c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	fpi->U.I.SaturateMode = Saturate;
511c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	fpi->U.I.DstReg = DstReg;
521c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	fpi->U.I.SrcReg[0] = SrcReg;
531c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	return fpi;
541c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
551c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
561c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic struct rc_instruction *emit2(
571c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct radeon_compiler * c, struct rc_instruction * after,
581c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg,
591c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_src_register SrcReg0, struct rc_src_register SrcReg1)
601c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
611c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
621c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
631c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	fpi->U.I.Opcode = Opcode;
641c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	fpi->U.I.SaturateMode = Saturate;
651c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	fpi->U.I.DstReg = DstReg;
661c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	fpi->U.I.SrcReg[0] = SrcReg0;
671c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	fpi->U.I.SrcReg[1] = SrcReg1;
681c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	return fpi;
691c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
701c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
711c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic struct rc_instruction *emit3(
721c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct radeon_compiler * c, struct rc_instruction * after,
731c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	rc_opcode Opcode, rc_saturate_mode Saturate, struct rc_dst_register DstReg,
741c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_src_register SrcReg0, struct rc_src_register SrcReg1,
751c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_src_register SrcReg2)
761c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
771c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction *fpi = rc_insert_new_instruction(c, after);
781c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
791c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	fpi->U.I.Opcode = Opcode;
801c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	fpi->U.I.SaturateMode = Saturate;
811c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	fpi->U.I.DstReg = DstReg;
821c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	fpi->U.I.SrcReg[0] = SrcReg0;
831c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	fpi->U.I.SrcReg[1] = SrcReg1;
841c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	fpi->U.I.SrcReg[2] = SrcReg2;
851c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	return fpi;
861c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
871c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
881c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic struct rc_dst_register dstregtmpmask(int index, int mask)
891c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
9032faaea743ca74f4ba29184ef44ebf2c0e962a46Brian Paul	struct rc_dst_register dst = {0, 0, 0};
911c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	dst.File = RC_FILE_TEMPORARY;
921c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	dst.Index = index;
931c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	dst.WriteMask = mask;
941c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	return dst;
951c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
961c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
971c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic const struct rc_src_register builtin_zero = {
981c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	.File = RC_FILE_NONE,
991c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	.Index = 0,
1001c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	.Swizzle = RC_SWIZZLE_0000
1011c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák};
1021c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic const struct rc_src_register builtin_one = {
1031c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	.File = RC_FILE_NONE,
1041c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	.Index = 0,
1051c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	.Swizzle = RC_SWIZZLE_1111
1061c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák};
1073d32e589879806297258e36ea80aae5044293ca3Tom Stellard
1083d32e589879806297258e36ea80aae5044293ca3Tom Stellardstatic const struct rc_src_register builtin_half = {
1093d32e589879806297258e36ea80aae5044293ca3Tom Stellard	.File = RC_FILE_NONE,
1103d32e589879806297258e36ea80aae5044293ca3Tom Stellard	.Index = 0,
1113d32e589879806297258e36ea80aae5044293ca3Tom Stellard	.Swizzle = RC_SWIZZLE_HHHH
1123d32e589879806297258e36ea80aae5044293ca3Tom Stellard};
1133d32e589879806297258e36ea80aae5044293ca3Tom Stellard
1141c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic const struct rc_src_register srcreg_undefined = {
1151c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	.File = RC_FILE_NONE,
1161c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	.Index = 0,
1171c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	.Swizzle = RC_SWIZZLE_XYZW
1181c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák};
1191c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
1201c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic struct rc_src_register srcreg(int file, int index)
1211c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
1221c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_src_register src = srcreg_undefined;
1231c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	src.File = file;
1241c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	src.Index = index;
1251c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	return src;
1261c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
1271c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
1281c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic struct rc_src_register srcregswz(int file, int index, int swz)
1291c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
1301c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_src_register src = srcreg_undefined;
1311c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	src.File = file;
1321c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	src.Index = index;
1331c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	src.Swizzle = swz;
1341c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	return src;
1351c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
1361c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
1371c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic struct rc_src_register absolute(struct rc_src_register reg)
1381c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
1391c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_src_register newreg = reg;
1401c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	newreg.Abs = 1;
1411c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	newreg.Negate = RC_MASK_NONE;
1421c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	return newreg;
1431c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
1441c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
1451c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic struct rc_src_register negate(struct rc_src_register reg)
1461c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
1471c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_src_register newreg = reg;
1481c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	newreg.Negate = newreg.Negate ^ RC_MASK_XYZW;
1491c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	return newreg;
1501c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
1511c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
1521c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic struct rc_src_register swizzle(struct rc_src_register reg,
1531c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		rc_swizzle x, rc_swizzle y, rc_swizzle z, rc_swizzle w)
1541c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
1551c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_src_register swizzled = reg;
1561c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	swizzled.Swizzle = combine_swizzles4(reg.Swizzle, x, y, z, w);
1571c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	return swizzled;
1581c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
1591c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
1601c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic struct rc_src_register swizzle_smear(struct rc_src_register reg,
1611c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		rc_swizzle x)
1621c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
1631c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	return swizzle(reg, x, x, x, x);
1641c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
1651c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
1661c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic struct rc_src_register swizzle_xxxx(struct rc_src_register reg)
1671c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
1681c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	return swizzle_smear(reg, RC_SWIZZLE_X);
1691c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
1701c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
1711c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic struct rc_src_register swizzle_yyyy(struct rc_src_register reg)
1721c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
1731c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	return swizzle_smear(reg, RC_SWIZZLE_Y);
1741c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
1751c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
1761c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic struct rc_src_register swizzle_zzzz(struct rc_src_register reg)
1771c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
1781c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	return swizzle_smear(reg, RC_SWIZZLE_Z);
1791c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
1801c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
1811c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic struct rc_src_register swizzle_wwww(struct rc_src_register reg)
1821c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
1831c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	return swizzle_smear(reg, RC_SWIZZLE_W);
1841c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
1851c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
1861c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic int is_dst_safe_to_reuse(struct rc_instruction *inst)
1871c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
1881c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	const struct rc_opcode_info *info = rc_get_opcode_info(inst->U.I.Opcode);
1891c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	unsigned i;
1901c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
1911c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	assert(info->HasDstReg);
1921c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
1931c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	if (inst->U.I.DstReg.File != RC_FILE_TEMPORARY)
1941c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		return 0;
1951c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
1961c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	for (i = 0; i < info->NumSrcRegs; i++) {
1971c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		if (inst->U.I.SrcReg[i].File == RC_FILE_TEMPORARY &&
1981c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		    inst->U.I.SrcReg[i].Index == inst->U.I.DstReg.Index)
1991c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			return 0;
2001c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	}
2011c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
2021c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	return 1;
2031c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
2041c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
2051c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic struct rc_dst_register try_to_reuse_dst(struct radeon_compiler *c,
2061c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák					       struct rc_instruction *inst)
2071c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
2081c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	unsigned tmp;
2091c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
2101c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	if (is_dst_safe_to_reuse(inst))
2111c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		tmp = inst->U.I.DstReg.Index;
2121c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	else
2131c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		tmp = rc_find_free_temporary(c);
2141c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
2151c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	return dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask);
2161c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
2171c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
2181c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_ABS(struct radeon_compiler* c,
2191c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction* inst)
2201c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
2211c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_src_register src = inst->U.I.SrcReg[0];
2221c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	src.Abs = 1;
2231c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	src.Negate = RC_MASK_NONE;
2241c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, inst->U.I.DstReg, src);
2251c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	rc_remove_instruction(inst);
2261c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
2271c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
2281c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_CEIL(struct radeon_compiler* c,
2291c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction* inst)
2301c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
2311c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* Assuming:
2321c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	 *     ceil(x) = -floor(-x)
2331c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	 *
2341c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	 * After inlining floor:
2351c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	 *     ceil(x) = -(-x-frac(-x))
2361c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	 *
2371c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	 * After simplification:
2381c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	 *     ceil(x) = x+frac(-x)
2391c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	 */
2401c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
2411c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_dst_register dst = try_to_reuse_dst(c, inst);
2421c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, negate(inst->U.I.SrcReg[0]));
2431c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg,
2441c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index));
2451c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	rc_remove_instruction(inst);
2461c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
2471c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
2481c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_CLAMP(struct radeon_compiler *c,
2491c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction *inst)
2501c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
2511c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* CLAMP dst, src, min, max
2521c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	 *    into:
2531c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	 * MIN tmp, src, max
2541c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	 * MAX dst, tmp, min
2551c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	 */
2561c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_dst_register dst = try_to_reuse_dst(c, inst);
2571c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit2(c, inst->Prev, RC_OPCODE_MIN, 0, dst,
2581c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		inst->U.I.SrcReg[0], inst->U.I.SrcReg[2]);
2591c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit2(c, inst->Prev, RC_OPCODE_MAX, inst->U.I.SaturateMode, inst->U.I.DstReg,
2601c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1]);
2611c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	rc_remove_instruction(inst);
2621c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
2631c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
2641c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_DP2(struct radeon_compiler* c,
2651c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction* inst)
2661c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
2671c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_src_register src0 = inst->U.I.SrcReg[0];
2681c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_src_register src1 = inst->U.I.SrcReg[1];
2691c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	src0.Negate &= ~(RC_MASK_Z | RC_MASK_W);
2701c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	src0.Swizzle &= ~(63 << (3 * 2));
2711c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	src0.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3));
2721c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	src1.Negate &= ~(RC_MASK_Z | RC_MASK_W);
2731c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	src1.Swizzle &= ~(63 << (3 * 2));
2741c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	src1.Swizzle |= (RC_SWIZZLE_ZERO << (3 * 2)) | (RC_SWIZZLE_ZERO << (3 * 3));
2751c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit2(c, inst->Prev, RC_OPCODE_DP3, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1);
2761c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	rc_remove_instruction(inst);
2771c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
2781c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
2791c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_DPH(struct radeon_compiler* c,
2801c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction* inst)
2811c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
2821c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_src_register src0 = inst->U.I.SrcReg[0];
2831c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	src0.Negate &= ~RC_MASK_W;
2841c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	src0.Swizzle &= ~(7 << (3 * 3));
2851c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	src0.Swizzle |= RC_SWIZZLE_ONE << (3 * 3);
2861c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, inst->U.I.SrcReg[1]);
2871c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	rc_remove_instruction(inst);
2881c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
2891c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
2901c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák/**
2911c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * [1, src0.y*src1.y, src0.z, src1.w]
2921c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * So basically MUL with lotsa swizzling.
2931c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák */
2941c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_DST(struct radeon_compiler* c,
2951c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction* inst)
2961c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
2971c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit2(c, inst->Prev, RC_OPCODE_MUL, inst->U.I.SaturateMode, inst->U.I.DstReg,
2981c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_ONE),
2991c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_ONE, RC_SWIZZLE_Y, RC_SWIZZLE_ONE, RC_SWIZZLE_W));
3001c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	rc_remove_instruction(inst);
3011c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
3021c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
3031c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_FLR(struct radeon_compiler* c,
3041c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction* inst)
3051c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
3061c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_dst_register dst = try_to_reuse_dst(c, inst);
3071c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dst, inst->U.I.SrcReg[0]);
3081c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit2(c, inst->Prev, RC_OPCODE_ADD, inst->U.I.SaturateMode, inst->U.I.DstReg,
3091c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		inst->U.I.SrcReg[0], negate(srcreg(RC_FILE_TEMPORARY, dst.Index)));
3101c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	rc_remove_instruction(inst);
3111c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
3121c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
3131c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák/**
3141c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * Definition of LIT (from ARB_fragment_program):
3151c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák *
3161c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák *  tmp = VectorLoad(op0);
3171c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák *  if (tmp.x < 0) tmp.x = 0;
3181c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák *  if (tmp.y < 0) tmp.y = 0;
3191c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák *  if (tmp.w < -(128.0-epsilon)) tmp.w = -(128.0-epsilon);
3201c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák *  else if (tmp.w > 128-epsilon) tmp.w = 128-epsilon;
3211c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák *  result.x = 1.0;
3221c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák *  result.y = tmp.x;
3231c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák *  result.z = (tmp.x > 0) ? RoughApproxPower(tmp.y, tmp.w) : 0.0;
3241c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák *  result.w = 1.0;
3251c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák *
3261c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * The longest path of computation is the one leading to result.z,
3271c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * consisting of 5 operations. This implementation of LIT takes
3281c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * 5 slots, if the subsequent optimization passes are clever enough
3291c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * to pair instructions correctly.
3301c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák */
3311c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_LIT(struct radeon_compiler* c,
3321c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction* inst)
3331c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
3341c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	unsigned int constant;
3351c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	unsigned int constant_swizzle;
3361c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	unsigned int temp;
3371c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_src_register srctemp;
3381c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
3391c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	constant = rc_constants_add_immediate_scalar(&c->Program.Constants, -127.999999, &constant_swizzle);
3401c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
3411c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	if (inst->U.I.DstReg.WriteMask != RC_MASK_XYZW || inst->U.I.DstReg.File != RC_FILE_TEMPORARY) {
3421c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		struct rc_instruction * inst_mov;
3431c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
3441c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		inst_mov = emit1(c, inst,
3451c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			RC_OPCODE_MOV, 0, inst->U.I.DstReg,
3461c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			srcreg(RC_FILE_TEMPORARY, rc_find_free_temporary(c)));
3471c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
3481c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		inst->U.I.DstReg.File = RC_FILE_TEMPORARY;
3491c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		inst->U.I.DstReg.Index = inst_mov->U.I.SrcReg[0].Index;
3501c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		inst->U.I.DstReg.WriteMask = RC_MASK_XYZW;
3511c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	}
3521c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
3531c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	temp = inst->U.I.DstReg.Index;
3541c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	srctemp = srcreg(RC_FILE_TEMPORARY, temp);
3551c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
3561c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* tmp.x = max(0.0, Src.x); */
3571c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* tmp.y = max(0.0, Src.y); */
3581c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* tmp.w = clamp(Src.z, -128+eps, 128-eps); */
3591c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit2(c, inst->Prev, RC_OPCODE_MAX, 0,
3601c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		dstregtmpmask(temp, RC_MASK_XYW),
3611c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		inst->U.I.SrcReg[0],
3621c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		swizzle(srcreg(RC_FILE_CONSTANT, constant),
3631c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, RC_SWIZZLE_ZERO, constant_swizzle&3));
3641c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit2(c, inst->Prev, RC_OPCODE_MIN, 0,
3651c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		dstregtmpmask(temp, RC_MASK_Z),
3661c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		swizzle_wwww(srctemp),
3671c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		negate(srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle)));
3681c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
3691c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* tmp.w = Pow(tmp.y, tmp.w) */
3701c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit1(c, inst->Prev, RC_OPCODE_LG2, 0,
3711c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		dstregtmpmask(temp, RC_MASK_W),
3721c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		swizzle_yyyy(srctemp));
3731c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit2(c, inst->Prev, RC_OPCODE_MUL, 0,
3741c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		dstregtmpmask(temp, RC_MASK_W),
3751c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		swizzle_wwww(srctemp),
3761c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		swizzle_zzzz(srctemp));
3771c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit1(c, inst->Prev, RC_OPCODE_EX2, 0,
3781c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		dstregtmpmask(temp, RC_MASK_W),
3791c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		swizzle_wwww(srctemp));
3801c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
3811c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* tmp.z = (tmp.x > 0) ? tmp.w : 0.0 */
3821c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode,
3831c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		dstregtmpmask(temp, RC_MASK_Z),
3841c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		negate(swizzle_xxxx(srctemp)),
3851c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		swizzle_wwww(srctemp),
3861c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		builtin_zero);
3871c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
3881c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* tmp.x, tmp.y, tmp.w = 1.0, tmp.x, 1.0 */
3891c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode,
3901c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		dstregtmpmask(temp, RC_MASK_XYW),
3911c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		swizzle(srctemp, RC_SWIZZLE_ONE, RC_SWIZZLE_X, RC_SWIZZLE_ONE, RC_SWIZZLE_ONE));
3921c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
3931c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	rc_remove_instruction(inst);
3941c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
3951c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
3961c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_LRP(struct radeon_compiler* c,
3971c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction* inst)
3981c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
3991c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_dst_register dst = try_to_reuse_dst(c, inst);
4001c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
4011c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit2(c, inst->Prev, RC_OPCODE_ADD, 0,
4021c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		dst,
4031c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		inst->U.I.SrcReg[1], negate(inst->U.I.SrcReg[2]));
4041c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode,
4051c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		inst->U.I.DstReg,
4061c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		inst->U.I.SrcReg[0], srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[2]);
4071c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
4081c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	rc_remove_instruction(inst);
4091c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
4101c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
4111c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_POW(struct radeon_compiler* c,
4121c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction* inst)
4131c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
4141c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_dst_register tempdst = try_to_reuse_dst(c, inst);
4151c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_src_register tempsrc = srcreg(RC_FILE_TEMPORARY, tempdst.Index);
4161c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	tempdst.WriteMask = RC_MASK_W;
4171c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	tempsrc.Swizzle = RC_SWIZZLE_WWWW;
4181c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
4191c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit1(c, inst->Prev, RC_OPCODE_LG2, 0, tempdst, swizzle_xxxx(inst->U.I.SrcReg[0]));
4201c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit2(c, inst->Prev, RC_OPCODE_MUL, 0, tempdst, tempsrc, swizzle_xxxx(inst->U.I.SrcReg[1]));
4211c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit1(c, inst->Prev, RC_OPCODE_EX2, inst->U.I.SaturateMode, inst->U.I.DstReg, tempsrc);
4221c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
4231c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	rc_remove_instruction(inst);
4241c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
4251c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
4263d32e589879806297258e36ea80aae5044293ca3Tom Stellard/* dst = ROUND(src) :
4273d32e589879806297258e36ea80aae5044293ca3Tom Stellard *   add = src + .5
4283d32e589879806297258e36ea80aae5044293ca3Tom Stellard *   frac = FRC(add)
4293d32e589879806297258e36ea80aae5044293ca3Tom Stellard *   dst = add - frac
4303d32e589879806297258e36ea80aae5044293ca3Tom Stellard *
4313d32e589879806297258e36ea80aae5044293ca3Tom Stellard * According to the GLSL spec, the implementor can decide which way to round
4323d32e589879806297258e36ea80aae5044293ca3Tom Stellard * when the fraction is .5.  We round down for .5.
4333d32e589879806297258e36ea80aae5044293ca3Tom Stellard *
4343d32e589879806297258e36ea80aae5044293ca3Tom Stellard */
4353d32e589879806297258e36ea80aae5044293ca3Tom Stellardstatic void transform_ROUND(struct radeon_compiler* c,
4363d32e589879806297258e36ea80aae5044293ca3Tom Stellard	struct rc_instruction* inst)
4373d32e589879806297258e36ea80aae5044293ca3Tom Stellard{
4383d32e589879806297258e36ea80aae5044293ca3Tom Stellard	unsigned int mask = inst->U.I.DstReg.WriteMask;
4393d32e589879806297258e36ea80aae5044293ca3Tom Stellard	unsigned int frac_index, add_index;
4403d32e589879806297258e36ea80aae5044293ca3Tom Stellard	struct rc_dst_register frac_dst, add_dst;
4413d32e589879806297258e36ea80aae5044293ca3Tom Stellard	struct rc_src_register frac_src, add_src;
4423d32e589879806297258e36ea80aae5044293ca3Tom Stellard
4433d32e589879806297258e36ea80aae5044293ca3Tom Stellard	/* add = src + .5 */
4443d32e589879806297258e36ea80aae5044293ca3Tom Stellard	add_index = rc_find_free_temporary(c);
4453d32e589879806297258e36ea80aae5044293ca3Tom Stellard	add_dst = dstregtmpmask(add_index, mask);
4463d32e589879806297258e36ea80aae5044293ca3Tom Stellard	emit2(c, inst->Prev, RC_OPCODE_ADD, 0, add_dst, inst->U.I.SrcReg[0],
4473d32e589879806297258e36ea80aae5044293ca3Tom Stellard								builtin_half);
4483d32e589879806297258e36ea80aae5044293ca3Tom Stellard	add_src = srcreg(RC_FILE_TEMPORARY, add_dst.Index);
4493d32e589879806297258e36ea80aae5044293ca3Tom Stellard
4503d32e589879806297258e36ea80aae5044293ca3Tom Stellard
4513d32e589879806297258e36ea80aae5044293ca3Tom Stellard	/* frac = FRC(add) */
4523d32e589879806297258e36ea80aae5044293ca3Tom Stellard	frac_index = rc_find_free_temporary(c);
4533d32e589879806297258e36ea80aae5044293ca3Tom Stellard	frac_dst = dstregtmpmask(frac_index, mask);
4543d32e589879806297258e36ea80aae5044293ca3Tom Stellard	emit1(c, inst->Prev, RC_OPCODE_FRC, 0, frac_dst, add_src);
4553d32e589879806297258e36ea80aae5044293ca3Tom Stellard	frac_src = srcreg(RC_FILE_TEMPORARY, frac_dst.Index);
4563d32e589879806297258e36ea80aae5044293ca3Tom Stellard
4573d32e589879806297258e36ea80aae5044293ca3Tom Stellard	/* dst = add - frac */
4583d32e589879806297258e36ea80aae5044293ca3Tom Stellard	emit2(c, inst->Prev, RC_OPCODE_ADD, 0, inst->U.I.DstReg,
4593d32e589879806297258e36ea80aae5044293ca3Tom Stellard						add_src, negate(frac_src));
4603d32e589879806297258e36ea80aae5044293ca3Tom Stellard	rc_remove_instruction(inst);
4613d32e589879806297258e36ea80aae5044293ca3Tom Stellard}
4623d32e589879806297258e36ea80aae5044293ca3Tom Stellard
4631c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_RSQ(struct radeon_compiler* c,
4641c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction* inst)
4651c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
4661c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	inst->U.I.SrcReg[0] = absolute(inst->U.I.SrcReg[0]);
4671c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
4681c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
4691c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_SEQ(struct radeon_compiler* c,
4701c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction* inst)
4711c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
4721c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_dst_register dst = try_to_reuse_dst(c, inst);
4731c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
4741c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
4751c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
4761c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_zero, builtin_one);
4771c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
4781c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	rc_remove_instruction(inst);
4791c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
4801c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
4811c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_SFL(struct radeon_compiler* c,
4821c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction* inst)
4831c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
4841c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit1(c, inst->Prev, RC_OPCODE_MOV, inst->U.I.SaturateMode, inst->U.I.DstReg, builtin_zero);
4851c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	rc_remove_instruction(inst);
4861c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
4871c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
4881c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_SGE(struct radeon_compiler* c,
4891c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction* inst)
4901c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
4911c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_dst_register dst = try_to_reuse_dst(c, inst);
4921c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
4931c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
4941c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
4951c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one);
4961c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
4971c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	rc_remove_instruction(inst);
4981c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
4991c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
5001c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_SGT(struct radeon_compiler* c,
5011c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction* inst)
5021c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
5031c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_dst_register dst = try_to_reuse_dst(c, inst);
5041c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
5051c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]);
5061c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
5071c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero);
5081c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
5091c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	rc_remove_instruction(inst);
5101c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
5111c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
5121c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_SLE(struct radeon_compiler* c,
5131c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction* inst)
5141c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
5151c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_dst_register dst = try_to_reuse_dst(c, inst);
5161c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
5171c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, negate(inst->U.I.SrcReg[0]), inst->U.I.SrcReg[1]);
5181c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
5191c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_zero, builtin_one);
5201c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
5211c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	rc_remove_instruction(inst);
5221c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
5231c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
5241c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_SLT(struct radeon_compiler* c,
5251c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction* inst)
5261c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
5271c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_dst_register dst = try_to_reuse_dst(c, inst);
5281c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
5291c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
5301c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
5311c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		srcreg(RC_FILE_TEMPORARY, dst.Index), builtin_one, builtin_zero);
5321c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
5331c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	rc_remove_instruction(inst);
5341c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
5351c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
5361c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_SNE(struct radeon_compiler* c,
5371c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction* inst)
5381c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
5391c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_dst_register dst = try_to_reuse_dst(c, inst);
5401c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
5411c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit2(c, inst->Prev, RC_OPCODE_ADD, 0, dst, inst->U.I.SrcReg[0], negate(inst->U.I.SrcReg[1]));
5421c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit3(c, inst->Prev, RC_OPCODE_CMP, inst->U.I.SaturateMode, inst->U.I.DstReg,
5431c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		negate(absolute(srcreg(RC_FILE_TEMPORARY, dst.Index))), builtin_one, builtin_zero);
5441c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
5451c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	rc_remove_instruction(inst);
5461c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
5471c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
5481c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_SSG(struct radeon_compiler* c,
5491c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction* inst)
5501c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
5511c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* result = sign(x)
5521c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	 *
5531c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	 *   CMP tmp0, -x, 1, 0
5541c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	 *   CMP tmp1, x, 1, 0
5551c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	 *   ADD result, tmp0, -tmp1;
5561c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	 */
5571c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_dst_register dst0;
5581c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	unsigned tmp1;
5591c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
5601c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* 0 < x */
5611c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	dst0 = try_to_reuse_dst(c, inst);
5621c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit3(c, inst->Prev, RC_OPCODE_CMP, 0,
5631c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	      dst0,
5641c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	      negate(inst->U.I.SrcReg[0]),
5651c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	      builtin_one,
5661c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	      builtin_zero);
5671c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
5681c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* x < 0 */
5691c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	tmp1 = rc_find_free_temporary(c);
5701c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit3(c, inst->Prev, RC_OPCODE_CMP, 0,
5711c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	      dstregtmpmask(tmp1, inst->U.I.DstReg.WriteMask),
5721c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	      inst->U.I.SrcReg[0],
5731c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	      builtin_one,
5741c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	      builtin_zero);
5751c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
5761c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* Either both are zero, or one of them is one and the other is zero. */
5771c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* result = tmp0 - tmp1 */
5781c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit2(c, inst->Prev, RC_OPCODE_ADD, 0,
5791c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	      inst->U.I.DstReg,
5801c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	      srcreg(RC_FILE_TEMPORARY, dst0.Index),
5811c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	      negate(srcreg(RC_FILE_TEMPORARY, tmp1)));
5821c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
5831c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	rc_remove_instruction(inst);
5841c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
5851c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
5861c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_SUB(struct radeon_compiler* c,
5871c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction* inst)
5881c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
5891c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	inst->U.I.Opcode = RC_OPCODE_ADD;
5901c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	inst->U.I.SrcReg[1] = negate(inst->U.I.SrcReg[1]);
5911c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
5921c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
5931c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_SWZ(struct radeon_compiler* c,
5941c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction* inst)
5951c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
5961c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	inst->U.I.Opcode = RC_OPCODE_MOV;
5971c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
5981c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
5991c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_XPD(struct radeon_compiler* c,
6001c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction* inst)
6011c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
6021c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_dst_register dst = try_to_reuse_dst(c, inst);
6031c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
6041c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dst,
6051c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W),
6061c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W));
6071c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit3(c, inst->Prev, RC_OPCODE_MAD, inst->U.I.SaturateMode, inst->U.I.DstReg,
6081c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		swizzle(inst->U.I.SrcReg[0], RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_W),
6091c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		swizzle(inst->U.I.SrcReg[1], RC_SWIZZLE_Z, RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_W),
6101c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		negate(srcreg(RC_FILE_TEMPORARY, dst.Index)));
6111c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
6121c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	rc_remove_instruction(inst);
6131c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
6141c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
6151c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
6161c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák/**
6171c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * Can be used as a transformation for @ref radeonClauseLocalTransform,
6181c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * no userData necessary.
6191c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák *
6201c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * Eliminates the following ALU instructions:
6211c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák *  ABS, CEIL, DPH, DST, FLR, LIT, LRP, POW, SEQ, SFL, SGE, SGT, SLE, SLT, SNE, SUB, SWZ, XPD
6221c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * using:
6231c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák *  MOV, ADD, MUL, MAD, FRC, DP3, LG2, EX2, CMP
6241c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák *
6251c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * Transforms RSQ to Radeon's native RSQ by explicitly setting
6261c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * absolute value.
6271c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák *
6281c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * @note should be applicable to R300 and R500 fragment programs.
6291c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák */
6301c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákint radeonTransformALU(
6311c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct radeon_compiler * c,
6321c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction* inst,
6331c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	void* unused)
6341c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
6351c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	switch(inst->U.I.Opcode) {
6361c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	case RC_OPCODE_ABS: transform_ABS(c, inst); return 1;
6371c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1;
6381c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	case RC_OPCODE_CLAMP: transform_CLAMP(c, inst); return 1;
6391c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	case RC_OPCODE_DP2: transform_DP2(c, inst); return 1;
6401c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	case RC_OPCODE_DPH: transform_DPH(c, inst); return 1;
6411c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	case RC_OPCODE_DST: transform_DST(c, inst); return 1;
6421c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	case RC_OPCODE_FLR: transform_FLR(c, inst); return 1;
6431c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	case RC_OPCODE_LIT: transform_LIT(c, inst); return 1;
6441c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	case RC_OPCODE_LRP: transform_LRP(c, inst); return 1;
6451c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	case RC_OPCODE_POW: transform_POW(c, inst); return 1;
6463d32e589879806297258e36ea80aae5044293ca3Tom Stellard	case RC_OPCODE_ROUND: transform_ROUND(c, inst); return 1;
6471c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	case RC_OPCODE_RSQ: transform_RSQ(c, inst); return 1;
6481c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	case RC_OPCODE_SEQ: transform_SEQ(c, inst); return 1;
6491c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	case RC_OPCODE_SFL: transform_SFL(c, inst); return 1;
6501c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	case RC_OPCODE_SGE: transform_SGE(c, inst); return 1;
6511c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	case RC_OPCODE_SGT: transform_SGT(c, inst); return 1;
6521c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	case RC_OPCODE_SLE: transform_SLE(c, inst); return 1;
6531c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	case RC_OPCODE_SLT: transform_SLT(c, inst); return 1;
6541c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	case RC_OPCODE_SNE: transform_SNE(c, inst); return 1;
6551c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	case RC_OPCODE_SSG: transform_SSG(c, inst); return 1;
6561c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	case RC_OPCODE_SUB: transform_SUB(c, inst); return 1;
6571c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1;
6581c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	case RC_OPCODE_XPD: transform_XPD(c, inst); return 1;
6591c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	default:
6601c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		return 0;
6611c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	}
6621c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
6631c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
6641c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
6651c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_r300_vertex_ABS(struct radeon_compiler* c,
6661c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction* inst)
6671c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
6681c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* Note: r500 can take absolute values, but r300 cannot. */
6691c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	inst->U.I.Opcode = RC_OPCODE_MAX;
6701c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	inst->U.I.SrcReg[1] = inst->U.I.SrcReg[0];
6711c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
6721c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
6731c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
6741c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_r300_vertex_CMP(struct radeon_compiler* c,
6751c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction* inst)
6761c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
6771c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* There is no decent CMP available, so let's rig one up.
6781c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	 * CMP is defined as dst = src0 < 0.0 ? src1 : src2
6791c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	 * The following sequence consumes zero to two temps and two extra slots
6801c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	 * (the second temp and the second slot is consumed by transform_LRP),
6811c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	 * but should be equivalent:
6821c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	 *
6831c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	 * SLT tmp0, src0, 0.0
6841c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	 * LRP dst, tmp0, src1, src2
6851c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	 *
6861c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	 * Yes, I know, I'm a mad scientist. ~ C. & M. */
6871c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_dst_register dst = try_to_reuse_dst(c, inst);
6881c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
6891c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* SLT tmp0, src0, 0.0 */
6901c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
6911c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		dst,
6921c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		inst->U.I.SrcReg[0], builtin_zero);
6931c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
6941c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* LRP dst, tmp0, src1, src2 */
6951c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	transform_LRP(c,
6961c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		emit3(c, inst->Prev, RC_OPCODE_LRP, 0,
6971c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		      inst->U.I.DstReg,
6981c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		      srcreg(RC_FILE_TEMPORARY, dst.Index), inst->U.I.SrcReg[1],  inst->U.I.SrcReg[2]));
6991c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
7001c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	rc_remove_instruction(inst);
7011c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
7021c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
7031c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_r300_vertex_DP2(struct radeon_compiler* c,
7041c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction* inst)
7051c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
7061c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction *next_inst = inst->Next;
7071c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	transform_DP2(c, inst);
7081c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	next_inst->Prev->U.I.Opcode = RC_OPCODE_DP4;
7091c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
7101c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
7111c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_r300_vertex_DP3(struct radeon_compiler* c,
7121c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction* inst)
7131c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
7141c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_src_register src0 = inst->U.I.SrcReg[0];
7151c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_src_register src1 = inst->U.I.SrcReg[1];
7161c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	src0.Negate &= ~RC_MASK_W;
7171c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	src0.Swizzle &= ~(7 << (3 * 3));
7181c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	src0.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
7191c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	src1.Negate &= ~RC_MASK_W;
7201c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	src1.Swizzle &= ~(7 << (3 * 3));
7211c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	src1.Swizzle |= RC_SWIZZLE_ZERO << (3 * 3);
7221c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit2(c, inst->Prev, RC_OPCODE_DP4, inst->U.I.SaturateMode, inst->U.I.DstReg, src0, src1);
7231c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	rc_remove_instruction(inst);
7241c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
7251c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
7261c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_r300_vertex_fix_LIT(struct radeon_compiler* c,
7271c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction* inst)
7281c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
7291c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_dst_register dst = try_to_reuse_dst(c, inst);
7301c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	unsigned constant_swizzle;
7311c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	int constant = rc_constants_add_immediate_scalar(&c->Program.Constants,
7321c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák							 0.0000000000000000001,
7331c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák							 &constant_swizzle);
7341c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
7351c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* MOV dst, src */
7361c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	dst.WriteMask = RC_MASK_XYZW;
7371c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit1(c, inst->Prev, RC_OPCODE_MOV, 0,
7381c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		dst,
7391c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		inst->U.I.SrcReg[0]);
7401c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
7411c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* MAX dst.y, src, 0.00...001 */
7421c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit2(c, inst->Prev, RC_OPCODE_MAX, 0,
7431c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		dstregtmpmask(dst.Index, RC_MASK_Y),
7441c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		srcreg(RC_FILE_TEMPORARY, dst.Index),
7451c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle));
7461c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
7471c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	inst->U.I.SrcReg[0] = srcreg(RC_FILE_TEMPORARY, dst.Index);
7481c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
7491c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
7501c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_r300_vertex_SEQ(struct radeon_compiler *c,
7511c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction *inst)
7521c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
7531c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* x = y  <==>  x >= y && y >= x */
7541c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	int tmp = rc_find_free_temporary(c);
7551c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
7561c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* x <= y */
7571c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit2(c, inst->Prev, RC_OPCODE_SGE, 0,
7581c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	      dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask),
7591c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	      inst->U.I.SrcReg[0],
7601c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	      inst->U.I.SrcReg[1]);
7611c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
7621c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* y <= x */
7631c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit2(c, inst->Prev, RC_OPCODE_SGE, 0,
7641c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	      inst->U.I.DstReg,
7651c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	      inst->U.I.SrcReg[1],
7661c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	      inst->U.I.SrcReg[0]);
7671c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
7681c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* x && y  =  x * y */
7691c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit2(c, inst->Prev, RC_OPCODE_MUL, 0,
7701c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	      inst->U.I.DstReg,
7711c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	      srcreg(RC_FILE_TEMPORARY, tmp),
7721c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	      srcreg(inst->U.I.DstReg.File, inst->U.I.DstReg.Index));
7731c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
7741c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	rc_remove_instruction(inst);
7751c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
7761c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
7771c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_r300_vertex_SNE(struct radeon_compiler *c,
7781c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction *inst)
7791c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
7801c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* x != y  <==>  x < y || y < x */
7811c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	int tmp = rc_find_free_temporary(c);
7821c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
7831c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* x < y */
7841c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
7851c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	      dstregtmpmask(tmp, inst->U.I.DstReg.WriteMask),
7861c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	      inst->U.I.SrcReg[0],
7871c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	      inst->U.I.SrcReg[1]);
7881c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
7891c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* y < x */
7901c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
7911c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	      inst->U.I.DstReg,
7921c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	      inst->U.I.SrcReg[1],
7931c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	      inst->U.I.SrcReg[0]);
7941c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
7951c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* x || y  =  max(x, y) */
7961c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit2(c, inst->Prev, RC_OPCODE_MAX, 0,
7971c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	      inst->U.I.DstReg,
7981c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	      srcreg(RC_FILE_TEMPORARY, tmp),
7991c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	      srcreg(inst->U.I.DstReg.File, inst->U.I.DstReg.Index));
8001c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
8011c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	rc_remove_instruction(inst);
8021c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
8031c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
8041c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_r300_vertex_SGT(struct radeon_compiler* c,
8051c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction* inst)
8061c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
8071c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* x > y  <==>  -x < -y */
8081c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	inst->U.I.Opcode = RC_OPCODE_SLT;
8091c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
8101c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
8111c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
8121c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
8131c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_r300_vertex_SLE(struct radeon_compiler* c,
8141c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction* inst)
8151c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
8161c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* x <= y  <==>  -x >= -y */
8171c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	inst->U.I.Opcode = RC_OPCODE_SGE;
8181c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW;
8191c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW;
8201c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
8211c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
8221c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void transform_r300_vertex_SSG(struct radeon_compiler* c,
8231c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction* inst)
8241c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
8251c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* result = sign(x)
8261c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	 *
8271c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	 *   SLT tmp0, 0, x;
8281c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	 *   SLT tmp1, x, 0;
8291c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	 *   ADD result, tmp0, -tmp1;
8301c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	 */
8311c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_dst_register dst0 = try_to_reuse_dst(c, inst);
8321c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	unsigned tmp1;
8331c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
8341c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* 0 < x */
8351c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	dst0 = try_to_reuse_dst(c, inst);
8361c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
8371c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	      dst0,
8381c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	      builtin_zero,
8391c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	      inst->U.I.SrcReg[0]);
8401c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
8411c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* x < 0 */
8421c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	tmp1 = rc_find_free_temporary(c);
8431c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit2(c, inst->Prev, RC_OPCODE_SLT, 0,
8441c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	      dstregtmpmask(tmp1, inst->U.I.DstReg.WriteMask),
8451c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	      inst->U.I.SrcReg[0],
8461c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	      builtin_zero);
8471c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
8481c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* Either both are zero, or one of them is one and the other is zero. */
8491c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* result = tmp0 - tmp1 */
8501c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit2(c, inst->Prev, RC_OPCODE_ADD, 0,
8511c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	      inst->U.I.DstReg,
8521c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	      srcreg(RC_FILE_TEMPORARY, dst0.Index),
8531c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	      negate(srcreg(RC_FILE_TEMPORARY, tmp1)));
8541c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
8551c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	rc_remove_instruction(inst);
8561c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
8571c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
8581c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák/**
8591c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * For use with rc_local_transform, this transforms non-native ALU
8601c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * instructions of the r300 up to r500 vertex engine.
8611c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák */
8621c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákint r300_transform_vertex_alu(
8631c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct radeon_compiler * c,
8641c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction* inst,
8651c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	void* unused)
8661c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
8671c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	switch(inst->U.I.Opcode) {
8681c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	case RC_OPCODE_ABS: transform_r300_vertex_ABS(c, inst); return 1;
8691c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	case RC_OPCODE_CEIL: transform_CEIL(c, inst); return 1;
8701c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	case RC_OPCODE_CLAMP: transform_CLAMP(c, inst); return 1;
8711c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	case RC_OPCODE_CMP: transform_r300_vertex_CMP(c, inst); return 1;
8721c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	case RC_OPCODE_DP2: transform_r300_vertex_DP2(c, inst); return 1;
8731c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	case RC_OPCODE_DP3: transform_r300_vertex_DP3(c, inst); return 1;
8741c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	case RC_OPCODE_DPH: transform_DPH(c, inst); return 1;
8751c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	case RC_OPCODE_FLR: transform_FLR(c, inst); return 1;
8761c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	case RC_OPCODE_LIT: transform_r300_vertex_fix_LIT(c, inst); return 1;
8771c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	case RC_OPCODE_LRP: transform_LRP(c, inst); return 1;
8781c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	case RC_OPCODE_SEQ:
8791c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		if (!c->is_r500) {
8801c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			transform_r300_vertex_SEQ(c, inst);
8811c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			return 1;
8821c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		}
8831c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		return 0;
8841c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	case RC_OPCODE_SFL: transform_SFL(c, inst); return 1;
8851c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	case RC_OPCODE_SGT: transform_r300_vertex_SGT(c, inst); return 1;
8861c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	case RC_OPCODE_SLE: transform_r300_vertex_SLE(c, inst); return 1;
8871c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	case RC_OPCODE_SNE:
8881c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		if (!c->is_r500) {
8891c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			transform_r300_vertex_SNE(c, inst);
8901c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			return 1;
8911c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		}
8921c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		return 0;
8931c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	case RC_OPCODE_SSG: transform_r300_vertex_SSG(c, inst); return 1;
8941c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	case RC_OPCODE_SUB: transform_SUB(c, inst); return 1;
8951c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	case RC_OPCODE_SWZ: transform_SWZ(c, inst); return 1;
8961c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	case RC_OPCODE_XPD: transform_XPD(c, inst); return 1;
8971c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	default:
8981c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		return 0;
8991c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	}
9001c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
9011c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
9021c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void sincos_constants(struct radeon_compiler* c, unsigned int *constants)
9031c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
9041c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	static const float SinCosConsts[2][4] = {
9051c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		{
9061c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			1.273239545,		/* 4/PI */
9071c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			-0.405284735,		/* -4/(PI*PI) */
9081c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			3.141592654,		/* PI */
9091c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			0.2225			/* weight */
9101c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		},
9111c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		{
9121c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			0.75,
9131c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			0.5,
9141c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			0.159154943,		/* 1/(2*PI) */
9151c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			6.283185307		/* 2*PI */
9161c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		}
9171c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	};
9181c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	int i;
9191c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
9201c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	for(i = 0; i < 2; ++i)
9211c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		constants[i] = rc_constants_add_immediate_vec4(&c->Program.Constants, SinCosConsts[i]);
9221c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
9231c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
9241c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák/**
9251c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * Approximate sin(x), where x is clamped to (-pi/2, pi/2).
9261c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák *
9271c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * MUL tmp.xy, src, { 4/PI, -4/(PI^2) }
9281c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * MAD tmp.x, tmp.y, |src|, tmp.x
9291c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * MAD tmp.y, tmp.x, |tmp.x|, -tmp.x
9301c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * MAD dest, tmp.y, weight, tmp.x
9311c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák */
9321c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void sin_approx(
9331c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct radeon_compiler* c, struct rc_instruction * inst,
9341c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_dst_register dst, struct rc_src_register src, const unsigned int* constants)
9351c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
9361c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	unsigned int tempreg = rc_find_free_temporary(c);
9371c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
9381c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(tempreg, RC_MASK_XY),
9391c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		swizzle_xxxx(src),
9401c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		srcreg(RC_FILE_CONSTANT, constants[0]));
9411c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_X),
9421c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)),
9431c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		absolute(swizzle_xxxx(src)),
9441c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)));
9451c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_Y),
9461c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)),
9471c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		absolute(swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))),
9481c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		negate(swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg))));
9491c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dst,
9501c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)),
9511c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[0])),
9521c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)));
9531c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
9541c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
9551c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák/**
9561c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * Translate the trigonometric functions COS, SIN, and SCS
9571c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * using only the basic instructions
9581c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák *  MOV, ADD, MUL, MAD, FRC
9591c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák */
9601c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákint r300_transform_trig_simple(struct radeon_compiler* c,
9611c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction* inst,
9621c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	void* unused)
9631c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
9641c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	unsigned int constants[2];
9651c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	unsigned int tempreg;
9661c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
9671c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	if (inst->U.I.Opcode != RC_OPCODE_COS &&
9681c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	    inst->U.I.Opcode != RC_OPCODE_SIN &&
9691c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	    inst->U.I.Opcode != RC_OPCODE_SCS)
9701c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		return 0;
9711c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
9721c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	tempreg = rc_find_free_temporary(c);
9731c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
9741c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	sincos_constants(c, constants);
9751c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
9761c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	if (inst->U.I.Opcode == RC_OPCODE_COS) {
9771c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		/* MAD tmp.x, src, 1/(2*PI), 0.75 */
9781c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		/* FRC tmp.x, tmp.x */
9791c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		/* MAD tmp.z, tmp.x, 2*PI, -PI */
9801c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
9811c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			swizzle_xxxx(inst->U.I.SrcReg[0]),
9821c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])),
9831c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			swizzle_xxxx(srcreg(RC_FILE_CONSTANT, constants[1])));
9841c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W),
9851c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)));
9861c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
9871c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)),
9881c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])),
9891c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0]))));
9901c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
9911c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		sin_approx(c, inst, inst->U.I.DstReg,
9921c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)),
9931c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			constants);
9941c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	} else if (inst->U.I.Opcode == RC_OPCODE_SIN) {
9951c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
9961c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			swizzle_xxxx(inst->U.I.SrcReg[0]),
9971c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])),
9981c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			swizzle_yyyy(srcreg(RC_FILE_CONSTANT, constants[1])));
9991c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_W),
10001c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)));
10011c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_W),
10021c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)),
10031c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])),
10041c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0]))));
10051c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
10061c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		sin_approx(c, inst, inst->U.I.DstReg,
10071c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			swizzle_wwww(srcreg(RC_FILE_TEMPORARY, tempreg)),
10081c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			constants);
10091c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	} else {
10101c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		struct rc_dst_register dst;
10111c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
10121c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY),
10131c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			swizzle_xxxx(inst->U.I.SrcReg[0]),
10141c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[1])),
10151c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			swizzle(srcreg(RC_FILE_CONSTANT, constants[1]), RC_SWIZZLE_X, RC_SWIZZLE_Y, RC_SWIZZLE_Z, RC_SWIZZLE_W));
10161c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(tempreg, RC_MASK_XY),
10171c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			srcreg(RC_FILE_TEMPORARY, tempreg));
10181c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(tempreg, RC_MASK_XY),
10191c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			srcreg(RC_FILE_TEMPORARY, tempreg),
10201c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			swizzle_wwww(srcreg(RC_FILE_CONSTANT, constants[1])),
10211c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			negate(swizzle_zzzz(srcreg(RC_FILE_CONSTANT, constants[0]))));
10221c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
10231c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		dst = inst->U.I.DstReg;
10241c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
10251c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_X;
10261c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		sin_approx(c, inst, dst,
10271c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			swizzle_xxxx(srcreg(RC_FILE_TEMPORARY, tempreg)),
10281c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			constants);
10291c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
10301c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		dst.WriteMask = inst->U.I.DstReg.WriteMask & RC_MASK_Y;
10311c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		sin_approx(c, inst, dst,
10321c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			swizzle_yyyy(srcreg(RC_FILE_TEMPORARY, tempreg)),
10331c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			constants);
10341c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	}
10351c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
10361c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	rc_remove_instruction(inst);
10371c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
10381c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	return 1;
10391c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
10401c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
10411c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void r300_transform_SIN_COS_SCS(struct radeon_compiler *c,
10421c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction *inst,
10431c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	unsigned srctmp)
10441c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
10451c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	if (inst->U.I.Opcode == RC_OPCODE_COS) {
10461c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, inst->U.I.DstReg,
10471c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
10481c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	} else if (inst->U.I.Opcode == RC_OPCODE_SIN) {
10491c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode,
10501c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			inst->U.I.DstReg, srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
10511c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	} else if (inst->U.I.Opcode == RC_OPCODE_SCS) {
10521c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		struct rc_dst_register moddst = inst->U.I.DstReg;
10531c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
10541c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		if (inst->U.I.DstReg.WriteMask & RC_MASK_X) {
10551c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			moddst.WriteMask = RC_MASK_X;
10561c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			emit1(c, inst->Prev, RC_OPCODE_COS, inst->U.I.SaturateMode, moddst,
10571c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák				srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
10581c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		}
10591c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		if (inst->U.I.DstReg.WriteMask & RC_MASK_Y) {
10601c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			moddst.WriteMask = RC_MASK_Y;
10611c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			emit1(c, inst->Prev, RC_OPCODE_SIN, inst->U.I.SaturateMode, moddst,
10621c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák				srcregswz(RC_FILE_TEMPORARY, srctmp, RC_SWIZZLE_WWWW));
10631c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		}
10641c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	}
10651c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
10661c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	rc_remove_instruction(inst);
10671c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
10681c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
10691c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
10701c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák/**
10711c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * Transform the trigonometric functions COS, SIN, and SCS
10721c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * to include pre-scaling by 1/(2*PI) and taking the fractional
10731c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * part, so that the input to COS and SIN is always in the range [0,1).
10741c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * SCS is replaced by one COS and one SIN instruction.
10751c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák *
10761c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * @warning This transformation implicitly changes the semantics of SIN and COS!
10771c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák */
10781c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákint radeonTransformTrigScale(struct radeon_compiler* c,
10791c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction* inst,
10801c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	void* unused)
10811c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
10821c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	static const float RCP_2PI = 0.15915494309189535;
10831c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	unsigned int temp;
10841c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	unsigned int constant;
10851c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	unsigned int constant_swizzle;
10861c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
10871c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	if (inst->U.I.Opcode != RC_OPCODE_COS &&
10881c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	    inst->U.I.Opcode != RC_OPCODE_SIN &&
10891c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	    inst->U.I.Opcode != RC_OPCODE_SCS)
10901c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		return 0;
10911c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
10921c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	temp = rc_find_free_temporary(c);
10931c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	constant = rc_constants_add_immediate_scalar(&c->Program.Constants, RCP_2PI, &constant_swizzle);
10941c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
10951c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit2(c, inst->Prev, RC_OPCODE_MUL, 0, dstregtmpmask(temp, RC_MASK_W),
10961c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		swizzle_xxxx(inst->U.I.SrcReg[0]),
10971c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		srcregswz(RC_FILE_CONSTANT, constant, constant_swizzle));
10981c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W),
10991c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		srcreg(RC_FILE_TEMPORARY, temp));
11001c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
11011c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	r300_transform_SIN_COS_SCS(c, inst, temp);
11021c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	return 1;
11031c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
11041c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
11051c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák/**
11061c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * Transform the trigonometric functions COS, SIN, and SCS
11071c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * so that the input to COS and SIN is always in the range [-PI, PI].
11081c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * SCS is replaced by one COS and one SIN instruction.
11091c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák */
11101c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákint r300_transform_trig_scale_vertex(struct radeon_compiler *c,
11111c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction *inst,
11121c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	void *unused)
11131c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
11141c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	static const float cons[4] = {0.15915494309189535, 0.5, 6.28318530717959, -3.14159265358979};
11151c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	unsigned int temp;
11161c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	unsigned int constant;
11171c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
11181c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	if (inst->U.I.Opcode != RC_OPCODE_COS &&
11191c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	    inst->U.I.Opcode != RC_OPCODE_SIN &&
11201c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	    inst->U.I.Opcode != RC_OPCODE_SCS)
11211c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		return 0;
11221c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
11231c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	/* Repeat x in the range [-PI, PI]:
11241c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	 *
11251c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	 *   repeat(x) = frac(x / 2PI + 0.5) * 2PI - PI
11261c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	 */
11271c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
11281c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	temp = rc_find_free_temporary(c);
11291c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	constant = rc_constants_add_immediate_vec4(&c->Program.Constants, cons);
11301c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
11311c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W),
11321c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		swizzle_xxxx(inst->U.I.SrcReg[0]),
11331c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_XXXX),
11341c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_YYYY));
11351c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit1(c, inst->Prev, RC_OPCODE_FRC, 0, dstregtmpmask(temp, RC_MASK_W),
11361c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		srcreg(RC_FILE_TEMPORARY, temp));
11371c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	emit3(c, inst->Prev, RC_OPCODE_MAD, 0, dstregtmpmask(temp, RC_MASK_W),
11381c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		srcreg(RC_FILE_TEMPORARY, temp),
11391c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_ZZZZ),
11401c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		srcregswz(RC_FILE_CONSTANT, constant, RC_SWIZZLE_WWWW));
11411c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
11421c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	r300_transform_SIN_COS_SCS(c, inst, temp);
11431c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	return 1;
11441c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
11451c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
11461c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák/**
11471c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * Rewrite DDX/DDY instructions to properly work with r5xx shaders.
11481c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * The r5xx MDH/MDV instruction provides per-quad partial derivatives.
11491c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * It takes the form A*B+C. A and C are set by setting src0. B should be -1.
11501c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák *
11511c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * @warning This explicitly changes the form of DDX and DDY!
11521c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák */
11531c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
11541c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákint radeonTransformDeriv(struct radeon_compiler* c,
11551c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction* inst,
11561c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	void* unused)
11571c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
11581c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	if (inst->U.I.Opcode != RC_OPCODE_DDX && inst->U.I.Opcode != RC_OPCODE_DDY)
11591c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		return 0;
11601c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
11611c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	inst->U.I.SrcReg[1].Swizzle = RC_SWIZZLE_1111;
11621c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	inst->U.I.SrcReg[1].Negate = RC_MASK_XYZW;
11631c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
11641c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	return 1;
11651c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
11661c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
11671c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák/**
11681c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * IF Temp[0].x -\
11691c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * KILP         - > KIL -abs(Temp[0].x)
11701c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * ENDIF        -/
11711c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák *
11721c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * This needs to be done in its own pass, because it modifies the instructions
11731c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * before and after KILP.
11741c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák */
11751c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákvoid rc_transform_KILP(struct radeon_compiler * c, void *user)
11761c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{
11771c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	struct rc_instruction * inst;
11781c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	for (inst = c->Program.Instructions.Next;
11791c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			inst != &c->Program.Instructions; inst = inst->Next) {
11801c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
11811c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		if (inst->U.I.Opcode != RC_OPCODE_KILP)
11821c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			continue;
11831c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
11841c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		inst->U.I.Opcode = RC_OPCODE_KIL;
11851c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
11861c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		if (inst->Prev->U.I.Opcode != RC_OPCODE_IF
11871c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák				|| inst->Next->U.I.Opcode != RC_OPCODE_ENDIF) {
11881c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			inst->U.I.SrcReg[0] = negate(builtin_one);
11891c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		} else {
11901c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák
11911c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			inst->U.I.SrcReg[0] =
11921c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák				negate(absolute(inst->Prev->U.I.SrcReg[0]));
11931c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			/* Remove IF */
11941c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			rc_remove_instruction(inst->Prev);
11951c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			/* Remove ENDIF */
11961c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák			rc_remove_instruction(inst->Next);
11971c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák		}
11981c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák	}
11991c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}
1200