11c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák/* 21c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * Copyright (C) 2009 Nicolai Haehnle. 31c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * Copyright 2010 Tom Stellard <tstellar@gmail.com> 41c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * 51c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * All Rights Reserved. 61c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * 71c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * Permission is hereby granted, free of charge, to any person obtaining 81c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * a copy of this software and associated documentation files (the 91c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * "Software"), to deal in the Software without restriction, including 101c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * without limitation the rights to use, copy, modify, merge, publish, 111c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * distribute, sublicense, and/or sell copies of the Software, and to 121c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * permit persons to whom the Software is furnished to do so, subject to 131c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * the following conditions: 141c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * 151c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * The above copyright notice and this permission notice (including the 161c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * next paragraph) shall be included in all copies or substantial 171c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * portions of the Software. 181c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * 191c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 201c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 211c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 221c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 231c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 241c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 251c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 261c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * 271c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák */ 281c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 291c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák#include "radeon_dataflow.h" 301c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 311c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák#include "radeon_compiler.h" 321c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák#include "radeon_compiler_util.h" 33e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard#include "radeon_list.h" 341c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák#include "radeon_swizzle.h" 35e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard#include "radeon_variable.h" 361c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 371c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstruct src_clobbered_reads_cb_data { 381c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_register_file File; 391c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int Index; 401c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int Mask; 411c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_reader_data * ReaderData; 421c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}; 431c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 441c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšáktypedef void (*rc_presub_replace_fn)(struct rc_instruction *, 451c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction *, 461c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int); 471c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 481c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner) 491c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 501c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_src_register combine; 511c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák combine.File = inner.File; 521c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák combine.Index = inner.Index; 531c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák combine.RelAddr = inner.RelAddr; 541c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (outer.Abs) { 551c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák combine.Abs = 1; 561c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák combine.Negate = outer.Negate; 571c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } else { 581c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák combine.Abs = inner.Abs; 591c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate); 601c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák combine.Negate ^= outer.Negate; 611c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 621c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle); 631c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return combine; 641c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 651c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 661c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void copy_propagate_scan_read(void * data, struct rc_instruction * inst, 671c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_src_register * src) 681c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 691c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_register_file file = src->File; 701c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_reader_data * reader_data = data; 711c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 721c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if(!rc_inst_can_use_presub(inst, 731c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák reader_data->Writer->U.I.PreSub.Opcode, 741c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_swizzle_to_writemask(src->Swizzle), 751c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák src, 761c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák &reader_data->Writer->U.I.PreSub.SrcReg[0], 771c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák &reader_data->Writer->U.I.PreSub.SrcReg[1])) { 781c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák reader_data->Abort = 1; 791c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return; 801c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 811c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 821c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* XXX This could probably be handled better. */ 831c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (file == RC_FILE_ADDRESS) { 841c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák reader_data->Abort = 1; 851c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return; 861c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 871c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 881c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* These instructions cannot read from the constants file. 891c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * see radeonTransformTEX() 901c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák */ 911c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && 921c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT && 931c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák (inst->U.I.Opcode == RC_OPCODE_TEX || 941c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.Opcode == RC_OPCODE_TXB || 951c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.Opcode == RC_OPCODE_TXP || 961c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.Opcode == RC_OPCODE_TXD || 971c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.Opcode == RC_OPCODE_TXL || 981c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.Opcode == RC_OPCODE_KIL)){ 991c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák reader_data->Abort = 1; 1001c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return; 1011c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 1021c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 1031c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 1041c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void src_clobbered_reads_cb( 1051c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák void * data, 1061c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction * inst, 1071c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_src_register * src) 1081c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 1091c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct src_clobbered_reads_cb_data * sc_data = data; 1101c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 1111c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (src->File == sc_data->File 1121c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák && src->Index == sc_data->Index 1131c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák && (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) { 1141c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 1151c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW; 1161c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 1171c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 1181c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) { 1191c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW; 1201c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 1211c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 1221c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 1231c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void is_src_clobbered_scan_write( 1241c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák void * data, 1251c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction * inst, 1261c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_register_file file, 1271c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int index, 1281c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int mask) 1291c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 1301c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct src_clobbered_reads_cb_data sc_data; 1311c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_reader_data * reader_data = data; 1321c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák sc_data.File = file; 1331c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák sc_data.Index = index; 1341c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák sc_data.Mask = mask; 1351c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák sc_data.ReaderData = reader_data; 1361c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_for_all_reads_src(reader_data->Writer, 1371c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák src_clobbered_reads_cb, &sc_data); 1381c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 1391c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 1401c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov) 1411c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 1421c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_reader_data reader_data; 1431c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int i; 1441c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 1451c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY || 1461c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst_mov->U.I.WriteALUResult || 1471c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst_mov->U.I.SaturateMode) 1481c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return; 1491c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 1501c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* Get a list of all the readers of this MOV instruction. */ 1511c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák reader_data.ExitOnAbort = 1; 1521c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_get_readers(c, inst_mov, &reader_data, 1531c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák copy_propagate_scan_read, NULL, 1541c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák is_src_clobbered_scan_write); 1551c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 1561c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (reader_data.Abort || reader_data.ReaderCount == 0) 1571c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return; 1581c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 1591c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* Propagate the MOV instruction. */ 1601c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák for (i = 0; i < reader_data.ReaderCount; i++) { 1611c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction * inst = reader_data.Readers[i].Inst; 1621c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák *reader_data.Readers[i].U.I.Src = chain_srcregs(*reader_data.Readers[i].U.I.Src, inst_mov->U.I.SrcReg[0]); 1631c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 1641c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB) 1651c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.PreSub = inst_mov->U.I.PreSub; 1661c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 1671c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 1681c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* Finally, remove the original MOV instruction */ 1691c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_remove_instruction(inst_mov); 1701c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 1711c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 1721c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák/** 1731c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * Check if a source register is actually always the same 1741c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * swizzle constant. 1751c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák */ 1761c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic int is_src_uniform_constant(struct rc_src_register src, 1771c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_swizzle * pswz, unsigned int * pnegate) 1781c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 1791c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák int have_used = 0; 1801c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 1811c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (src.File != RC_FILE_NONE) { 1821c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák *pswz = 0; 1831c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 1841c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 1851c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 1861c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák for(unsigned int chan = 0; chan < 4; ++chan) { 1871c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int swz = GET_SWZ(src.Swizzle, chan); 1881c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (swz < 4) { 1891c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák *pswz = 0; 1901c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 1911c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 1921c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (swz == RC_SWIZZLE_UNUSED) 1931c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák continue; 1941c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 1951c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (!have_used) { 1961c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák *pswz = swz; 1971c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák *pnegate = GET_BIT(src.Negate, chan); 1981c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák have_used = 1; 1991c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } else { 2001c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) { 2011c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák *pswz = 0; 2021c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 2031c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 2041c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 2051c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 2061c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 2071c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 1; 2081c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 2091c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 2101c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void constant_folding_mad(struct rc_instruction * inst) 2111c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 2121c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_swizzle swz = 0; 2131c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int negate= 0; 2141c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 2151c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) { 2161c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (swz == RC_SWIZZLE_ZERO) { 2171c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.Opcode = RC_OPCODE_MUL; 2181c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return; 2191c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 2201c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 2211c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 2221c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { 2231c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (swz == RC_SWIZZLE_ONE) { 2241c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.Opcode = RC_OPCODE_ADD; 2251c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (negate) 2261c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; 2271c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2]; 2281c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return; 2291c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } else if (swz == RC_SWIZZLE_ZERO) { 2301c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.Opcode = RC_OPCODE_MOV; 2311c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; 2321c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return; 2331c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 2341c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 2351c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 2361c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { 2371c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (swz == RC_SWIZZLE_ONE) { 2381c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.Opcode = RC_OPCODE_ADD; 2391c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (negate) 2401c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; 2411c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; 2421c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return; 2431c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } else if (swz == RC_SWIZZLE_ZERO) { 2441c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.Opcode = RC_OPCODE_MOV; 2451c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; 2461c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return; 2471c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 2481c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 2491c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 2501c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 2511c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void constant_folding_mul(struct rc_instruction * inst) 2521c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 2531c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_swizzle swz = 0; 2541c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int negate = 0; 2551c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 2561c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { 2571c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (swz == RC_SWIZZLE_ONE) { 2581c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.Opcode = RC_OPCODE_MOV; 2591c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; 2601c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (negate) 2611c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; 2621c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return; 2631c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } else if (swz == RC_SWIZZLE_ZERO) { 2641c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.Opcode = RC_OPCODE_MOV; 2651c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; 2661c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return; 2671c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 2681c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 2691c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 2701c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { 2711c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (swz == RC_SWIZZLE_ONE) { 2721c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.Opcode = RC_OPCODE_MOV; 2731c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (negate) 2741c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; 2751c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return; 2761c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } else if (swz == RC_SWIZZLE_ZERO) { 2771c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.Opcode = RC_OPCODE_MOV; 2781c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; 2791c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return; 2801c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 2811c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 2821c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 2831c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 2841c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void constant_folding_add(struct rc_instruction * inst) 2851c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 2861c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_swizzle swz = 0; 2871c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int negate = 0; 2881c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 2891c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { 2901c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (swz == RC_SWIZZLE_ZERO) { 2911c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.Opcode = RC_OPCODE_MOV; 2921c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; 2931c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return; 2941c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 2951c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 2961c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 2971c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { 2981c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (swz == RC_SWIZZLE_ZERO) { 2991c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.Opcode = RC_OPCODE_MOV; 3001c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return; 3011c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 3021c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 3031c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 3041c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 3051c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák/** 3061c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * Replace 0.0, 1.0 and 0.5 immediate constants by their 3071c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * respective swizzles. Simplify instructions like ADD dst, src, 0; 3081c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák */ 3091c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst) 3101c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 3111c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); 3121c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int i; 3131c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 3141c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */ 3151c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { 3161c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_constant * constant; 3171c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_src_register newsrc; 3181c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák int have_real_reference; 3191c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int chan; 3201c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 3211c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */ 3221c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák for (chan = 0; chan < 4; ++chan) 3231c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) <= 3) 3241c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák break; 3251c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (chan == 4) { 3261c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[src].File = RC_FILE_NONE; 3271c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák continue; 3281c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 3291c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 3301c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* Convert immediates to swizzles. */ 3311c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT || 3321c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[src].RelAddr || 3331c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count) 3341c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák continue; 3351c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 3361c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák constant = 3371c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák &c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index]; 3381c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 3391c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (constant->Type != RC_CONSTANT_IMMEDIATE) 3401c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák continue; 3411c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 3421c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák newsrc = inst->U.I.SrcReg[src]; 3431c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák have_real_reference = 0; 3441c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák for (chan = 0; chan < 4; ++chan) { 3451c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int swz = GET_SWZ(newsrc.Swizzle, chan); 3461c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int newswz; 3471c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák float imm; 3481c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák float baseimm; 3491c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 3501c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (swz >= 4) 3511c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák continue; 3521c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 3531c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák imm = constant->u.Immediate[swz]; 3541c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák baseimm = imm; 3551c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (imm < 0.0) 3561c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák baseimm = -baseimm; 3571c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 3581c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (baseimm == 0.0) { 3591c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák newswz = RC_SWIZZLE_ZERO; 3601c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } else if (baseimm == 1.0) { 3611c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák newswz = RC_SWIZZLE_ONE; 3621c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } else if (baseimm == 0.5 && c->has_half_swizzles) { 3631c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák newswz = RC_SWIZZLE_HALF; 3641c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } else { 3651c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák have_real_reference = 1; 3661c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák continue; 3671c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 3681c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 3691c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák SET_SWZ(newsrc.Swizzle, chan, newswz); 3701c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (imm < 0.0 && !newsrc.Abs) 3711c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák newsrc.Negate ^= 1 << chan; 3721c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 3731c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 3741c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (!have_real_reference) { 3751c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák newsrc.File = RC_FILE_NONE; 3761c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák newsrc.Index = 0; 3771c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 3781c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 3791c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* don't make the swizzle worse */ 3801c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) && 3811c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src])) 3821c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák continue; 3831c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 3841c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[src] = newsrc; 3851c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 3861c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 3871c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* Simplify instructions based on constants */ 3881c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (inst->U.I.Opcode == RC_OPCODE_MAD) 3891c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák constant_folding_mad(inst); 3901c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 3911c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* note: MAD can simplify to MUL or ADD */ 3921c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (inst->U.I.Opcode == RC_OPCODE_MUL) 3931c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák constant_folding_mul(inst); 3941c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák else if (inst->U.I.Opcode == RC_OPCODE_ADD) 3951c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák constant_folding_add(inst); 3961c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 3971c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* In case this instruction has been converted, make sure all of the 3981c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * registers that are no longer used are empty. */ 3991c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák opcode = rc_get_opcode_info(inst->U.I.Opcode); 4001c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák for(i = opcode->NumSrcRegs; i < 3; i++) { 4011c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register)); 4021c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 4031c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 4041c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 4051c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák/** 4061c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * If src and dst use the same register, this function returns a writemask that 4071c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * indicates wich components are read by src. Otherwise zero is returned. 4081c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák */ 4091c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic unsigned int src_reads_dst_mask(struct rc_src_register src, 4101c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_dst_register dst) 4111c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 4121c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (dst.File != src.File || dst.Index != src.Index) { 4131c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 4141c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 4151c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return rc_swizzle_to_writemask(src.Swizzle); 4161c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 4171c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 4181c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák/* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0) 4191c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * in any of its channels. Return 0 otherwise. */ 4201c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic int src_has_const_swz(struct rc_src_register src) { 4211c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák int chan; 4221c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák for(chan = 0; chan < 4; chan++) { 4231c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int swz = GET_SWZ(src.Swizzle, chan); 4241c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF 4251c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák || swz == RC_SWIZZLE_ONE) { 4261c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 1; 4271c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 4281c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 4291c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 4301c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 4311c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 4321c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void presub_scan_read( 4331c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák void * data, 4341c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction * inst, 4351c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_src_register * src) 4361c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 4371c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_reader_data * reader_data = data; 4381c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_presubtract_op * presub_opcode = reader_data->CbData; 4391c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 4401c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (!rc_inst_can_use_presub(inst, *presub_opcode, 4411c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák reader_data->Writer->U.I.DstReg.WriteMask, 4421c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák src, 4431c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák &reader_data->Writer->U.I.SrcReg[0], 4441c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák &reader_data->Writer->U.I.SrcReg[1])) { 4451c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák reader_data->Abort = 1; 4461c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return; 4471c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 4481c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 4491c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 4501c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic int presub_helper( 4511c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct radeon_compiler * c, 4521c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction * inst_add, 4531c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_presubtract_op presub_opcode, 4541c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_presub_replace_fn presub_replace) 4551c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 4561c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_reader_data reader_data; 4571c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int i; 4581c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_presubtract_op cb_op = presub_opcode; 4591c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 4601c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák reader_data.CbData = &cb_op; 4611c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák reader_data.ExitOnAbort = 1; 4621c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL, 4631c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák is_src_clobbered_scan_write); 4641c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 4651c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (reader_data.Abort || reader_data.ReaderCount == 0) 4661c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 4671c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 4681c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák for(i = 0; i < reader_data.ReaderCount; i++) { 4691c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int src_index; 4701c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_reader reader = reader_data.Readers[i]; 4711c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák const struct rc_opcode_info * info = 4721c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_get_opcode_info(reader.Inst->U.I.Opcode); 4731c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 4741c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák for (src_index = 0; src_index < info->NumSrcRegs; src_index++) { 4751c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.I.Src) 4761c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák presub_replace(inst_add, reader.Inst, src_index); 4771c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 4781c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 4791c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 1; 4801c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 4811c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 4821c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák/* This function assumes that inst_add->U.I.SrcReg[0] and 4831c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * inst_add->U.I.SrcReg[1] aren't both negative. */ 4841c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void presub_replace_add( 4851c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction * inst_add, 4861c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction * inst_reader, 4871c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int src_index) 4881c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 4891c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_presubtract_op presub_opcode; 4901c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate) 4911c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák presub_opcode = RC_PRESUB_SUB; 4921c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák else 4931c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák presub_opcode = RC_PRESUB_ADD; 4941c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 4951c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (inst_add->U.I.SrcReg[1].Negate) { 4961c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1]; 4971c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0]; 4981c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } else { 4991c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0]; 5001c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1]; 5011c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 5021c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst_reader->U.I.PreSub.SrcReg[0].Negate = 0; 5031c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst_reader->U.I.PreSub.SrcReg[1].Negate = 0; 5041c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst_reader->U.I.PreSub.Opcode = presub_opcode; 5051c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst_reader->U.I.SrcReg[src_index] = 5061c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák chain_srcregs(inst_reader->U.I.SrcReg[src_index], 5071c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst_reader->U.I.PreSub.SrcReg[0]); 5081c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; 5091c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst_reader->U.I.SrcReg[src_index].Index = presub_opcode; 5101c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 5111c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5121c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic int is_presub_candidate( 5131c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct radeon_compiler * c, 5141c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction * inst) 5151c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 5161c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode); 5171c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int i; 5181c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int is_constant[2] = {0, 0}; 5191c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5201c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák assert(inst->U.I.Opcode == RC_OPCODE_ADD); 5211c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5221c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE 5231c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák || inst->U.I.SaturateMode 524e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard || inst->U.I.WriteALUResult 525e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard || inst->U.I.Omod) { 5261c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 5271c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 5281c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5291c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* If both sources use a constant swizzle, then we can't convert it to 5301c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * a presubtract operation. In fact for the ADD and SUB presubtract 5311c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * operations neither source can contain a constant swizzle. This 5321c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * specific case is checked in peephole_add_presub_add() when 5331c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * we make sure the swizzles for both sources are equal, so we 5341c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * don't need to worry about it here. */ 5351c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák for (i = 0; i < 2; i++) { 5361c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák int chan; 5371c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák for (chan = 0; chan < 4; chan++) { 5381c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_swizzle swz = 5391c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák get_swz(inst->U.I.SrcReg[i].Swizzle, chan); 5401c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (swz == RC_SWIZZLE_ONE 5411c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák || swz == RC_SWIZZLE_ZERO 5421c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák || swz == RC_SWIZZLE_HALF) { 5431c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák is_constant[i] = 1; 5441c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 5451c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 5461c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 5471c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (is_constant[0] && is_constant[1]) 5481c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 5491c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5501c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák for(i = 0; i < info->NumSrcRegs; i++) { 5511c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_src_register src = inst->U.I.SrcReg[i]; 5521c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (src_reads_dst_mask(src, inst->U.I.DstReg)) 5531c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 5541c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5551c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák src.File = RC_FILE_PRESUB; 5561c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src)) 5571c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 5581c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 5591c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 1; 5601c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 5611c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5621c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic int peephole_add_presub_add( 5631c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct radeon_compiler * c, 5641c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction * inst_add) 5651c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 5661c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned dstmask = inst_add->U.I.DstReg.WriteMask; 5671c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask; 5681c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask; 5691c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5701c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle) 5711c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 5721c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5731c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* src0 and src1 can't have absolute values */ 5741c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs) 5751c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 5761c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5771c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* presub_replace_add() assumes only one is negative */ 5781c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate) 5791c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 5801c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5811c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* if src0 is negative, at least all bits of dstmask have to be set */ 5821c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask) 5831c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 5841c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5851c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* if src1 is negative, at least all bits of dstmask have to be set */ 5861c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask) 5871c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 5881c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5891c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (!is_presub_candidate(c, inst_add)) 5901c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 5911c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5921c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) { 5931c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_remove_instruction(inst_add); 5941c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 1; 5951c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 5961c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 5971c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 5981c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5991c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void presub_replace_inv( 6001c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction * inst_add, 6011c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction * inst_reader, 6021c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int src_index) 6031c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 6041c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* We must be careful not to modify inst_add, since it 6051c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * is possible it will remain part of the program.*/ 6061c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1]; 6071c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst_reader->U.I.PreSub.SrcReg[0].Negate = 0; 6081c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV; 6091c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index], 6101c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst_reader->U.I.PreSub.SrcReg[0]); 6111c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 6121c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; 6131c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV; 6141c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 6151c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 6161c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák/** 6171c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1] 6181c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source 6191c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * of the add instruction must have the constatnt 1 swizzle. This function 6201c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * does not check const registers to see if their value is 1.0, so it should 6211c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * be called after the constant_folding optimization. 6221c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * @return 6231c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * 0 if the ADD instruction is still part of the program. 6241c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * 1 if the ADD instruction is no longer part of the program. 6251c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák */ 6261c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic int peephole_add_presub_inv( 6271c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct radeon_compiler * c, 6281c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction * inst_add) 6291c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 6301c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int i, swz; 6311c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 6321c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (!is_presub_candidate(c, inst_add)) 6331c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 6341c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 6351c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* Check if src0 is 1. */ 6361c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* XXX It would be nice to use is_src_uniform_constant here, but that 6371c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * function only works if the register's file is RC_FILE_NONE */ 6381c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák for(i = 0; i < 4; i++ ) { 6391c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i); 6401c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if(((1 << i) & inst_add->U.I.DstReg.WriteMask) 6411c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák && swz != RC_SWIZZLE_ONE) { 6421c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 6431c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 6441c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 6451c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 6461c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* Check src1. */ 6471c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) != 6481c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst_add->U.I.DstReg.WriteMask 6491c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák || inst_add->U.I.SrcReg[1].Abs 6501c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák || (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY 6511c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák && inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT) 6521c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák || src_has_const_swz(inst_add->U.I.SrcReg[1])) { 6531c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 6541c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 6551c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 6561c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 6571c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) { 6581c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_remove_instruction(inst_add); 6591c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 1; 6601c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 6611c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 6621c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 6631c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 664e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellardstruct peephole_mul_cb_data { 665e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard struct rc_dst_register * Writer; 666e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard unsigned int Clobbered; 667e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard}; 668e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard 669e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellardstatic void omod_filter_reader_cb( 670e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard void * userdata, 671e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard struct rc_instruction * inst, 672e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard rc_register_file file, 673e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard unsigned int index, 674e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard unsigned int mask) 675e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard{ 676e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard struct peephole_mul_cb_data * d = userdata; 677e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard if (rc_src_reads_dst_mask(file, mask, index, 678e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard d->Writer->File, d->Writer->Index, d->Writer->WriteMask)) { 679e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard 680e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard d->Clobbered = 1; 681e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard } 682e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard} 683e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard 684d64c6d2ffc086bde7a025269b80c0980f7d908f1Tom Stellardstatic void omod_filter_writer_cb( 685d64c6d2ffc086bde7a025269b80c0980f7d908f1Tom Stellard void * userdata, 686d64c6d2ffc086bde7a025269b80c0980f7d908f1Tom Stellard struct rc_instruction * inst, 687d64c6d2ffc086bde7a025269b80c0980f7d908f1Tom Stellard rc_register_file file, 688d64c6d2ffc086bde7a025269b80c0980f7d908f1Tom Stellard unsigned int index, 689d64c6d2ffc086bde7a025269b80c0980f7d908f1Tom Stellard unsigned int mask) 690d64c6d2ffc086bde7a025269b80c0980f7d908f1Tom Stellard{ 691d64c6d2ffc086bde7a025269b80c0980f7d908f1Tom Stellard struct peephole_mul_cb_data * d = userdata; 692d64c6d2ffc086bde7a025269b80c0980f7d908f1Tom Stellard if (file == d->Writer->File && index == d->Writer->Index && 693d64c6d2ffc086bde7a025269b80c0980f7d908f1Tom Stellard (mask & d->Writer->WriteMask)) { 694d64c6d2ffc086bde7a025269b80c0980f7d908f1Tom Stellard d->Clobbered = 1; 695d64c6d2ffc086bde7a025269b80c0980f7d908f1Tom Stellard } 696d64c6d2ffc086bde7a025269b80c0980f7d908f1Tom Stellard} 697d64c6d2ffc086bde7a025269b80c0980f7d908f1Tom Stellard 698e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellardstatic int peephole_mul_omod( 699e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard struct radeon_compiler * c, 700e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard struct rc_instruction * inst_mul, 701e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard struct rc_list * var_list) 702e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard{ 703dab5f98404677bbbdbe3d5a82b607bfd4689b48eBrian Paul unsigned int chan = 0, swz, i; 704e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard int const_index = -1; 705e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard int temp_index = -1; 706e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard float const_value; 707e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard rc_omod_op omod_op = RC_OMOD_DISABLE; 708e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard struct rc_list * writer_list; 709e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard struct rc_variable * var; 710e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard struct peephole_mul_cb_data cb_data; 711e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard 712e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard for (i = 0; i < 2; i++) { 713e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard unsigned int j; 714e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard if (inst_mul->U.I.SrcReg[i].File != RC_FILE_CONSTANT 715e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard && inst_mul->U.I.SrcReg[i].File != RC_FILE_TEMPORARY) { 716e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard return 0; 717e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard } 718e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard if (inst_mul->U.I.SrcReg[i].File == RC_FILE_TEMPORARY) { 719e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard if (temp_index != -1) { 720e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard /* The instruction has two temp sources */ 721e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard return 0; 722e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard } else { 723e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard temp_index = i; 724e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard continue; 725e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard } 726e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard } 727e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard /* If we get this far Src[i] must be a constant src */ 728e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard if (inst_mul->U.I.SrcReg[i].Negate) { 729e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard return 0; 730e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard } 731e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard /* The constant src needs to read from the same swizzle */ 732e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard swz = RC_SWIZZLE_UNUSED; 733e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard chan = 0; 734e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard for (j = 0; j < 4; j++) { 735e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard unsigned int j_swz = 736e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard GET_SWZ(inst_mul->U.I.SrcReg[i].Swizzle, j); 737e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard if (j_swz == RC_SWIZZLE_UNUSED) { 738e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard continue; 739e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard } 740e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard if (swz == RC_SWIZZLE_UNUSED) { 741e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard swz = j_swz; 742e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard chan = j; 743e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard } else if (j_swz != swz) { 744e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard return 0; 745e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard } 746e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard } 747e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard 748e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard if (const_index != -1) { 749e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard /* The instruction has two constant sources */ 750e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard return 0; 751e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard } else { 752e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard const_index = i; 753e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard } 754e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard } 755e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard 756e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard if (!rc_src_reg_is_immediate(c, inst_mul->U.I.SrcReg[const_index].File, 757e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard inst_mul->U.I.SrcReg[const_index].Index)) { 758e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard return 0; 759e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard } 760e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard const_value = rc_get_constant_value(c, 761e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard inst_mul->U.I.SrcReg[const_index].Index, 762e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard inst_mul->U.I.SrcReg[const_index].Swizzle, 763e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard inst_mul->U.I.SrcReg[const_index].Negate, 764e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard chan); 765e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard 766e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard if (const_value == 2.0f) { 767e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard omod_op = RC_OMOD_MUL_2; 768e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard } else if (const_value == 4.0f) { 769e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard omod_op = RC_OMOD_MUL_4; 770e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard } else if (const_value == 8.0f) { 771e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard omod_op = RC_OMOD_MUL_8; 772e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard } else if (const_value == (1.0f / 2.0f)) { 773e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard omod_op = RC_OMOD_DIV_2; 774e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard } else if (const_value == (1.0f / 4.0f)) { 775e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard omod_op = RC_OMOD_DIV_4; 776e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard } else if (const_value == (1.0f / 8.0f)) { 777e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard omod_op = RC_OMOD_DIV_8; 778e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard } else { 779e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard return 0; 780e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard } 781e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard 782e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard writer_list = rc_variable_list_get_writers_one_reader(var_list, 783e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard RC_INSTRUCTION_NORMAL, &inst_mul->U.I.SrcReg[temp_index]); 784e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard 785e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard if (!writer_list) { 786e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard return 0; 787e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard } 788e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard 789e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard cb_data.Clobbered = 0; 790e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard cb_data.Writer = &inst_mul->U.I.DstReg; 791e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard for (var = writer_list->Item; var; var = var->Friend) { 792e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard struct rc_instruction * inst; 793e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard const struct rc_opcode_info * info = rc_get_opcode_info( 794e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard var->Inst->U.I.Opcode); 795e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard if (info->HasTexture) { 796e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard return 0; 797e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard } 798e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard if (var->Inst->U.I.SaturateMode != RC_SATURATE_NONE) { 799e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard return 0; 800e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard } 801e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard for (inst = inst_mul->Prev; inst != var->Inst; 802e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard inst = inst->Prev) { 803e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard rc_for_all_reads_mask(inst, omod_filter_reader_cb, 804e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard &cb_data); 805d64c6d2ffc086bde7a025269b80c0980f7d908f1Tom Stellard rc_for_all_writes_mask(inst, omod_filter_writer_cb, 806d64c6d2ffc086bde7a025269b80c0980f7d908f1Tom Stellard &cb_data); 807e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard if (cb_data.Clobbered) { 808e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard break; 809e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard } 810e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard } 811e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard } 812e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard 813e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard if (cb_data.Clobbered) { 814e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard return 0; 815e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard } 816e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard 817e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard /* Rewrite the instructions */ 818e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard for (var = writer_list->Item; var; var = var->Friend) { 819e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard struct rc_variable * writer = writer_list->Item; 82013814b0103812adbb5aba0dba4664249e8566290Tom Stellard unsigned conversion_swizzle = rc_make_conversion_swizzle( 82113814b0103812adbb5aba0dba4664249e8566290Tom Stellard writer->Inst->U.I.DstReg.WriteMask, 82213814b0103812adbb5aba0dba4664249e8566290Tom Stellard inst_mul->U.I.DstReg.WriteMask); 823e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard writer->Inst->U.I.Omod = omod_op; 82413814b0103812adbb5aba0dba4664249e8566290Tom Stellard writer->Inst->U.I.DstReg.File = inst_mul->U.I.DstReg.File; 82513814b0103812adbb5aba0dba4664249e8566290Tom Stellard writer->Inst->U.I.DstReg.Index = inst_mul->U.I.DstReg.Index; 82613814b0103812adbb5aba0dba4664249e8566290Tom Stellard rc_normal_rewrite_writemask(writer->Inst, conversion_swizzle); 827e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard writer->Inst->U.I.SaturateMode = inst_mul->U.I.SaturateMode; 828e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard } 829e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard 830e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard rc_remove_instruction(inst_mul); 831e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard 832e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard return 1; 833e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard} 834e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard 8351c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák/** 8361c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * @return 8371c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * 0 if inst is still part of the program. 8381c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * 1 if inst is no longer part of the program. 8391c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák */ 8401c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic int peephole(struct radeon_compiler * c, struct rc_instruction * inst) 8411c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 8421c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák switch(inst->U.I.Opcode){ 8431c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák case RC_OPCODE_ADD: 8441c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (c->has_presub) { 8451c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if(peephole_add_presub_inv(c, inst)) 8461c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 1; 8471c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if(peephole_add_presub_add(c, inst)) 8481c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 1; 8491c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 8501c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák break; 8511c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák default: 8521c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák break; 8531c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 8541c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 8551c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 8561c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 8571c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákvoid rc_optimize(struct radeon_compiler * c, void *user) 8581c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 8591c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction * inst = c->Program.Instructions.Next; 860e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard struct rc_list * var_list; 8611c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák while(inst != &c->Program.Instructions) { 8621c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction * cur = inst; 8631c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst = inst->Next; 8641c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 8651c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák constant_folding(c, cur); 8661c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 8671c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if(peephole(c, cur)) 8681c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák continue; 8691c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 8701c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (cur->U.I.Opcode == RC_OPCODE_MOV) { 8711c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák copy_propagate(c, cur); 8721c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* cur may no longer be part of the program */ 8731c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 8741c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 875e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard 876e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard if (!c->has_omod) { 877e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard return; 878e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard } 879e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard 880e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard inst = c->Program.Instructions.Next; 881e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard while(inst != &c->Program.Instructions) { 882e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard struct rc_instruction * cur = inst; 883e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard inst = inst->Next; 884e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard if (cur->U.I.Opcode == RC_OPCODE_MUL) { 885e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard var_list = rc_get_variables(c); 886e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard peephole_mul_omod(c, cur, var_list); 887e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard } 888e945fb04d04c33da5e77d22d739c5740a522a61eTom Stellard } 8891c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 890