radeon_optimize.c revision 1c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6
11c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák/* 21c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * Copyright (C) 2009 Nicolai Haehnle. 31c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * Copyright 2010 Tom Stellard <tstellar@gmail.com> 41c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * 51c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * All Rights Reserved. 61c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * 71c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * Permission is hereby granted, free of charge, to any person obtaining 81c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * a copy of this software and associated documentation files (the 91c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * "Software"), to deal in the Software without restriction, including 101c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * without limitation the rights to use, copy, modify, merge, publish, 111c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * distribute, sublicense, and/or sell copies of the Software, and to 121c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * permit persons to whom the Software is furnished to do so, subject to 131c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * the following conditions: 141c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * 151c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * The above copyright notice and this permission notice (including the 161c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * next paragraph) shall be included in all copies or substantial 171c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * portions of the Software. 181c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * 191c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 201c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 211c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. 221c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE 231c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION 241c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION 251c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 261c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * 271c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák */ 281c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 291c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák#include "radeon_dataflow.h" 301c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 311c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák#include "radeon_compiler.h" 321c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák#include "radeon_compiler_util.h" 331c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák#include "radeon_swizzle.h" 341c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 351c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstruct src_clobbered_reads_cb_data { 361c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_register_file File; 371c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int Index; 381c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int Mask; 391c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_reader_data * ReaderData; 401c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák}; 411c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 421c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšáktypedef void (*rc_presub_replace_fn)(struct rc_instruction *, 431c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction *, 441c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int); 451c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 461c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic struct rc_src_register chain_srcregs(struct rc_src_register outer, struct rc_src_register inner) 471c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 481c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_src_register combine; 491c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák combine.File = inner.File; 501c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák combine.Index = inner.Index; 511c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák combine.RelAddr = inner.RelAddr; 521c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (outer.Abs) { 531c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák combine.Abs = 1; 541c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák combine.Negate = outer.Negate; 551c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } else { 561c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák combine.Abs = inner.Abs; 571c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák combine.Negate = swizzle_mask(outer.Swizzle, inner.Negate); 581c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák combine.Negate ^= outer.Negate; 591c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 601c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák combine.Swizzle = combine_swizzles(inner.Swizzle, outer.Swizzle); 611c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return combine; 621c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 631c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 641c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void copy_propagate_scan_read(void * data, struct rc_instruction * inst, 651c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_src_register * src) 661c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 671c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_register_file file = src->File; 681c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_reader_data * reader_data = data; 691c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 701c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if(!rc_inst_can_use_presub(inst, 711c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák reader_data->Writer->U.I.PreSub.Opcode, 721c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_swizzle_to_writemask(src->Swizzle), 731c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák src, 741c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák &reader_data->Writer->U.I.PreSub.SrcReg[0], 751c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák &reader_data->Writer->U.I.PreSub.SrcReg[1])) { 761c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák reader_data->Abort = 1; 771c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return; 781c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 791c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 801c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* XXX This could probably be handled better. */ 811c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (file == RC_FILE_ADDRESS) { 821c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák reader_data->Abort = 1; 831c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return; 841c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 851c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 861c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* These instructions cannot read from the constants file. 871c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * see radeonTransformTEX() 881c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák */ 891c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if(reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_TEMPORARY && 901c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák reader_data->Writer->U.I.SrcReg[0].File != RC_FILE_INPUT && 911c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák (inst->U.I.Opcode == RC_OPCODE_TEX || 921c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.Opcode == RC_OPCODE_TXB || 931c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.Opcode == RC_OPCODE_TXP || 941c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.Opcode == RC_OPCODE_TXD || 951c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.Opcode == RC_OPCODE_TXL || 961c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.Opcode == RC_OPCODE_KIL)){ 971c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák reader_data->Abort = 1; 981c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return; 991c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 1001c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 1011c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 1021c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void src_clobbered_reads_cb( 1031c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák void * data, 1041c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction * inst, 1051c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_src_register * src) 1061c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 1071c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct src_clobbered_reads_cb_data * sc_data = data; 1081c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 1091c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (src->File == sc_data->File 1101c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák && src->Index == sc_data->Index 1111c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák && (rc_swizzle_to_writemask(src->Swizzle) & sc_data->Mask)) { 1121c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 1131c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW; 1141c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 1151c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 1161c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (src->RelAddr && sc_data->File == RC_FILE_ADDRESS) { 1171c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák sc_data->ReaderData->AbortOnRead = RC_MASK_XYZW; 1181c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 1191c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 1201c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 1211c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void is_src_clobbered_scan_write( 1221c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák void * data, 1231c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction * inst, 1241c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_register_file file, 1251c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int index, 1261c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int mask) 1271c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 1281c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct src_clobbered_reads_cb_data sc_data; 1291c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_reader_data * reader_data = data; 1301c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák sc_data.File = file; 1311c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák sc_data.Index = index; 1321c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák sc_data.Mask = mask; 1331c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák sc_data.ReaderData = reader_data; 1341c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_for_all_reads_src(reader_data->Writer, 1351c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák src_clobbered_reads_cb, &sc_data); 1361c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 1371c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 1381c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void copy_propagate(struct radeon_compiler * c, struct rc_instruction * inst_mov) 1391c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 1401c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_reader_data reader_data; 1411c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int i; 1421c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 1431c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (inst_mov->U.I.DstReg.File != RC_FILE_TEMPORARY || 1441c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst_mov->U.I.WriteALUResult || 1451c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst_mov->U.I.SaturateMode) 1461c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return; 1471c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 1481c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* Get a list of all the readers of this MOV instruction. */ 1491c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák reader_data.ExitOnAbort = 1; 1501c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_get_readers(c, inst_mov, &reader_data, 1511c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák copy_propagate_scan_read, NULL, 1521c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák is_src_clobbered_scan_write); 1531c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 1541c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (reader_data.Abort || reader_data.ReaderCount == 0) 1551c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return; 1561c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 1571c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* Propagate the MOV instruction. */ 1581c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák for (i = 0; i < reader_data.ReaderCount; i++) { 1591c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction * inst = reader_data.Readers[i].Inst; 1601c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák *reader_data.Readers[i].U.I.Src = chain_srcregs(*reader_data.Readers[i].U.I.Src, inst_mov->U.I.SrcReg[0]); 1611c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 1621c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (inst_mov->U.I.SrcReg[0].File == RC_FILE_PRESUB) 1631c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.PreSub = inst_mov->U.I.PreSub; 1641c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 1651c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 1661c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* Finally, remove the original MOV instruction */ 1671c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_remove_instruction(inst_mov); 1681c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 1691c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 1701c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák/** 1711c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * Check if a source register is actually always the same 1721c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * swizzle constant. 1731c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák */ 1741c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic int is_src_uniform_constant(struct rc_src_register src, 1751c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_swizzle * pswz, unsigned int * pnegate) 1761c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 1771c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák int have_used = 0; 1781c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 1791c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (src.File != RC_FILE_NONE) { 1801c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák *pswz = 0; 1811c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 1821c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 1831c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 1841c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák for(unsigned int chan = 0; chan < 4; ++chan) { 1851c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int swz = GET_SWZ(src.Swizzle, chan); 1861c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (swz < 4) { 1871c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák *pswz = 0; 1881c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 1891c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 1901c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (swz == RC_SWIZZLE_UNUSED) 1911c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák continue; 1921c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 1931c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (!have_used) { 1941c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák *pswz = swz; 1951c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák *pnegate = GET_BIT(src.Negate, chan); 1961c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák have_used = 1; 1971c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } else { 1981c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (swz != *pswz || *pnegate != GET_BIT(src.Negate, chan)) { 1991c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák *pswz = 0; 2001c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 2011c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 2021c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 2031c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 2041c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 2051c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 1; 2061c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 2071c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 2081c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void constant_folding_mad(struct rc_instruction * inst) 2091c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 2101c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_swizzle swz = 0; 2111c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int negate= 0; 2121c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 2131c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (is_src_uniform_constant(inst->U.I.SrcReg[2], &swz, &negate)) { 2141c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (swz == RC_SWIZZLE_ZERO) { 2151c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.Opcode = RC_OPCODE_MUL; 2161c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return; 2171c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 2181c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 2191c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 2201c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { 2211c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (swz == RC_SWIZZLE_ONE) { 2221c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.Opcode = RC_OPCODE_ADD; 2231c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (negate) 2241c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; 2251c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[1] = inst->U.I.SrcReg[2]; 2261c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return; 2271c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } else if (swz == RC_SWIZZLE_ZERO) { 2281c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.Opcode = RC_OPCODE_MOV; 2291c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; 2301c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return; 2311c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 2321c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 2331c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 2341c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { 2351c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (swz == RC_SWIZZLE_ONE) { 2361c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.Opcode = RC_OPCODE_ADD; 2371c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (negate) 2381c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[1].Negate ^= RC_MASK_XYZW; 2391c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; 2401c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return; 2411c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } else if (swz == RC_SWIZZLE_ZERO) { 2421c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.Opcode = RC_OPCODE_MOV; 2431c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[0] = inst->U.I.SrcReg[2]; 2441c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return; 2451c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 2461c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 2471c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 2481c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 2491c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void constant_folding_mul(struct rc_instruction * inst) 2501c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 2511c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_swizzle swz = 0; 2521c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int negate = 0; 2531c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 2541c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { 2551c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (swz == RC_SWIZZLE_ONE) { 2561c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.Opcode = RC_OPCODE_MOV; 2571c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; 2581c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (negate) 2591c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; 2601c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return; 2611c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } else if (swz == RC_SWIZZLE_ZERO) { 2621c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.Opcode = RC_OPCODE_MOV; 2631c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; 2641c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return; 2651c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 2661c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 2671c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 2681c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { 2691c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (swz == RC_SWIZZLE_ONE) { 2701c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.Opcode = RC_OPCODE_MOV; 2711c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (negate) 2721c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[0].Negate ^= RC_MASK_XYZW; 2731c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return; 2741c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } else if (swz == RC_SWIZZLE_ZERO) { 2751c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.Opcode = RC_OPCODE_MOV; 2761c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[0].Swizzle = RC_SWIZZLE_0000; 2771c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return; 2781c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 2791c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 2801c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 2811c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 2821c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void constant_folding_add(struct rc_instruction * inst) 2831c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 2841c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_swizzle swz = 0; 2851c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int negate = 0; 2861c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 2871c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (is_src_uniform_constant(inst->U.I.SrcReg[0], &swz, &negate)) { 2881c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (swz == RC_SWIZZLE_ZERO) { 2891c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.Opcode = RC_OPCODE_MOV; 2901c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[0] = inst->U.I.SrcReg[1]; 2911c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return; 2921c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 2931c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 2941c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 2951c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (is_src_uniform_constant(inst->U.I.SrcReg[1], &swz, &negate)) { 2961c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (swz == RC_SWIZZLE_ZERO) { 2971c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.Opcode = RC_OPCODE_MOV; 2981c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return; 2991c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 3001c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 3011c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 3021c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 3031c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák/** 3041c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * Replace 0.0, 1.0 and 0.5 immediate constants by their 3051c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * respective swizzles. Simplify instructions like ADD dst, src, 0; 3061c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák */ 3071c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void constant_folding(struct radeon_compiler * c, struct rc_instruction * inst) 3081c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 3091c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák const struct rc_opcode_info * opcode = rc_get_opcode_info(inst->U.I.Opcode); 3101c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int i; 3111c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 3121c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* Replace 0.0, 1.0 and 0.5 immediates by their explicit swizzles */ 3131c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák for(unsigned int src = 0; src < opcode->NumSrcRegs; ++src) { 3141c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_constant * constant; 3151c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_src_register newsrc; 3161c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák int have_real_reference; 3171c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int chan; 3181c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 3191c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* If there are only 0, 0.5, 1, or _ swizzles, mark the source as a constant. */ 3201c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák for (chan = 0; chan < 4; ++chan) 3211c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (GET_SWZ(inst->U.I.SrcReg[src].Swizzle, chan) <= 3) 3221c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák break; 3231c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (chan == 4) { 3241c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[src].File = RC_FILE_NONE; 3251c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák continue; 3261c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 3271c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 3281c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* Convert immediates to swizzles. */ 3291c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (inst->U.I.SrcReg[src].File != RC_FILE_CONSTANT || 3301c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[src].RelAddr || 3311c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[src].Index >= c->Program.Constants.Count) 3321c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák continue; 3331c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 3341c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák constant = 3351c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák &c->Program.Constants.Constants[inst->U.I.SrcReg[src].Index]; 3361c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 3371c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (constant->Type != RC_CONSTANT_IMMEDIATE) 3381c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák continue; 3391c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 3401c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák newsrc = inst->U.I.SrcReg[src]; 3411c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák have_real_reference = 0; 3421c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák for (chan = 0; chan < 4; ++chan) { 3431c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int swz = GET_SWZ(newsrc.Swizzle, chan); 3441c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int newswz; 3451c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák float imm; 3461c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák float baseimm; 3471c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 3481c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (swz >= 4) 3491c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák continue; 3501c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 3511c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák imm = constant->u.Immediate[swz]; 3521c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák baseimm = imm; 3531c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (imm < 0.0) 3541c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák baseimm = -baseimm; 3551c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 3561c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (baseimm == 0.0) { 3571c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák newswz = RC_SWIZZLE_ZERO; 3581c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } else if (baseimm == 1.0) { 3591c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák newswz = RC_SWIZZLE_ONE; 3601c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } else if (baseimm == 0.5 && c->has_half_swizzles) { 3611c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák newswz = RC_SWIZZLE_HALF; 3621c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } else { 3631c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák have_real_reference = 1; 3641c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák continue; 3651c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 3661c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 3671c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák SET_SWZ(newsrc.Swizzle, chan, newswz); 3681c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (imm < 0.0 && !newsrc.Abs) 3691c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák newsrc.Negate ^= 1 << chan; 3701c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 3711c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 3721c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (!have_real_reference) { 3731c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák newsrc.File = RC_FILE_NONE; 3741c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák newsrc.Index = 0; 3751c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 3761c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 3771c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* don't make the swizzle worse */ 3781c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, newsrc) && 3791c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák c->SwizzleCaps->IsNative(inst->U.I.Opcode, inst->U.I.SrcReg[src])) 3801c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák continue; 3811c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 3821c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst->U.I.SrcReg[src] = newsrc; 3831c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 3841c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 3851c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* Simplify instructions based on constants */ 3861c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (inst->U.I.Opcode == RC_OPCODE_MAD) 3871c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák constant_folding_mad(inst); 3881c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 3891c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* note: MAD can simplify to MUL or ADD */ 3901c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (inst->U.I.Opcode == RC_OPCODE_MUL) 3911c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák constant_folding_mul(inst); 3921c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák else if (inst->U.I.Opcode == RC_OPCODE_ADD) 3931c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák constant_folding_add(inst); 3941c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 3951c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* In case this instruction has been converted, make sure all of the 3961c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * registers that are no longer used are empty. */ 3971c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák opcode = rc_get_opcode_info(inst->U.I.Opcode); 3981c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák for(i = opcode->NumSrcRegs; i < 3; i++) { 3991c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák memset(&inst->U.I.SrcReg[i], 0, sizeof(struct rc_src_register)); 4001c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 4011c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 4021c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 4031c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák/** 4041c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * If src and dst use the same register, this function returns a writemask that 4051c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * indicates wich components are read by src. Otherwise zero is returned. 4061c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák */ 4071c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic unsigned int src_reads_dst_mask(struct rc_src_register src, 4081c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_dst_register dst) 4091c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 4101c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (dst.File != src.File || dst.Index != src.Index) { 4111c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 4121c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 4131c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return rc_swizzle_to_writemask(src.Swizzle); 4141c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 4151c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 4161c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák/* Return 1 if the source registers has a constant swizzle (e.g. 0, 0.5, 1.0) 4171c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * in any of its channels. Return 0 otherwise. */ 4181c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic int src_has_const_swz(struct rc_src_register src) { 4191c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák int chan; 4201c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák for(chan = 0; chan < 4; chan++) { 4211c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int swz = GET_SWZ(src.Swizzle, chan); 4221c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (swz == RC_SWIZZLE_ZERO || swz == RC_SWIZZLE_HALF 4231c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák || swz == RC_SWIZZLE_ONE) { 4241c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 1; 4251c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 4261c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 4271c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 4281c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 4291c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 4301c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void presub_scan_read( 4311c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák void * data, 4321c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction * inst, 4331c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_src_register * src) 4341c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 4351c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_reader_data * reader_data = data; 4361c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_presubtract_op * presub_opcode = reader_data->CbData; 4371c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 4381c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (!rc_inst_can_use_presub(inst, *presub_opcode, 4391c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák reader_data->Writer->U.I.DstReg.WriteMask, 4401c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák src, 4411c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák &reader_data->Writer->U.I.SrcReg[0], 4421c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák &reader_data->Writer->U.I.SrcReg[1])) { 4431c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák reader_data->Abort = 1; 4441c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return; 4451c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 4461c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 4471c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 4481c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic int presub_helper( 4491c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct radeon_compiler * c, 4501c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction * inst_add, 4511c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_presubtract_op presub_opcode, 4521c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_presub_replace_fn presub_replace) 4531c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 4541c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_reader_data reader_data; 4551c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int i; 4561c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_presubtract_op cb_op = presub_opcode; 4571c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 4581c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák reader_data.CbData = &cb_op; 4591c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák reader_data.ExitOnAbort = 1; 4601c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_get_readers(c, inst_add, &reader_data, presub_scan_read, NULL, 4611c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák is_src_clobbered_scan_write); 4621c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 4631c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (reader_data.Abort || reader_data.ReaderCount == 0) 4641c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 4651c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 4661c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák for(i = 0; i < reader_data.ReaderCount; i++) { 4671c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int src_index; 4681c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_reader reader = reader_data.Readers[i]; 4691c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák const struct rc_opcode_info * info = 4701c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_get_opcode_info(reader.Inst->U.I.Opcode); 4711c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 4721c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák for (src_index = 0; src_index < info->NumSrcRegs; src_index++) { 4731c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (&reader.Inst->U.I.SrcReg[src_index] == reader.U.I.Src) 4741c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák presub_replace(inst_add, reader.Inst, src_index); 4751c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 4761c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 4771c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 1; 4781c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 4791c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 4801c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák/* This function assumes that inst_add->U.I.SrcReg[0] and 4811c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * inst_add->U.I.SrcReg[1] aren't both negative. */ 4821c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void presub_replace_add( 4831c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction * inst_add, 4841c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction * inst_reader, 4851c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int src_index) 4861c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 4871c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_presubtract_op presub_opcode; 4881c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (inst_add->U.I.SrcReg[1].Negate || inst_add->U.I.SrcReg[0].Negate) 4891c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák presub_opcode = RC_PRESUB_SUB; 4901c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák else 4911c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák presub_opcode = RC_PRESUB_ADD; 4921c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 4931c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (inst_add->U.I.SrcReg[1].Negate) { 4941c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1]; 4951c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[0]; 4961c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } else { 4971c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[0]; 4981c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst_reader->U.I.PreSub.SrcReg[1] = inst_add->U.I.SrcReg[1]; 4991c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 5001c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst_reader->U.I.PreSub.SrcReg[0].Negate = 0; 5011c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst_reader->U.I.PreSub.SrcReg[1].Negate = 0; 5021c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst_reader->U.I.PreSub.Opcode = presub_opcode; 5031c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst_reader->U.I.SrcReg[src_index] = 5041c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák chain_srcregs(inst_reader->U.I.SrcReg[src_index], 5051c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst_reader->U.I.PreSub.SrcReg[0]); 5061c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; 5071c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst_reader->U.I.SrcReg[src_index].Index = presub_opcode; 5081c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 5091c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5101c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic int is_presub_candidate( 5111c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct radeon_compiler * c, 5121c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction * inst) 5131c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 5141c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák const struct rc_opcode_info * info = rc_get_opcode_info(inst->U.I.Opcode); 5151c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int i; 5161c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int is_constant[2] = {0, 0}; 5171c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5181c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák assert(inst->U.I.Opcode == RC_OPCODE_ADD); 5191c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5201c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (inst->U.I.PreSub.Opcode != RC_PRESUB_NONE 5211c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák || inst->U.I.SaturateMode 5221c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák || inst->U.I.WriteALUResult) { 5231c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 5241c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 5251c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5261c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* If both sources use a constant swizzle, then we can't convert it to 5271c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * a presubtract operation. In fact for the ADD and SUB presubtract 5281c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * operations neither source can contain a constant swizzle. This 5291c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * specific case is checked in peephole_add_presub_add() when 5301c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * we make sure the swizzles for both sources are equal, so we 5311c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * don't need to worry about it here. */ 5321c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák for (i = 0; i < 2; i++) { 5331c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák int chan; 5341c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák for (chan = 0; chan < 4; chan++) { 5351c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_swizzle swz = 5361c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák get_swz(inst->U.I.SrcReg[i].Swizzle, chan); 5371c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (swz == RC_SWIZZLE_ONE 5381c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák || swz == RC_SWIZZLE_ZERO 5391c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák || swz == RC_SWIZZLE_HALF) { 5401c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák is_constant[i] = 1; 5411c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 5421c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 5431c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 5441c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (is_constant[0] && is_constant[1]) 5451c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 5461c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5471c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák for(i = 0; i < info->NumSrcRegs; i++) { 5481c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_src_register src = inst->U.I.SrcReg[i]; 5491c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (src_reads_dst_mask(src, inst->U.I.DstReg)) 5501c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 5511c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5521c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák src.File = RC_FILE_PRESUB; 5531c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (!c->SwizzleCaps->IsNative(inst->U.I.Opcode, src)) 5541c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 5551c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 5561c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 1; 5571c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 5581c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5591c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic int peephole_add_presub_add( 5601c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct radeon_compiler * c, 5611c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction * inst_add) 5621c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 5631c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned dstmask = inst_add->U.I.DstReg.WriteMask; 5641c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned src0_neg = inst_add->U.I.SrcReg[0].Negate & dstmask; 5651c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned src1_neg = inst_add->U.I.SrcReg[1].Negate & dstmask; 5661c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5671c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (inst_add->U.I.SrcReg[0].Swizzle != inst_add->U.I.SrcReg[1].Swizzle) 5681c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 5691c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5701c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* src0 and src1 can't have absolute values */ 5711c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (inst_add->U.I.SrcReg[0].Abs || inst_add->U.I.SrcReg[1].Abs) 5721c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 5731c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5741c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* presub_replace_add() assumes only one is negative */ 5751c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (inst_add->U.I.SrcReg[0].Negate && inst_add->U.I.SrcReg[1].Negate) 5761c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 5771c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5781c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* if src0 is negative, at least all bits of dstmask have to be set */ 5791c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (inst_add->U.I.SrcReg[0].Negate && src0_neg != dstmask) 5801c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 5811c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5821c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* if src1 is negative, at least all bits of dstmask have to be set */ 5831c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (inst_add->U.I.SrcReg[1].Negate && src1_neg != dstmask) 5841c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 5851c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5861c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (!is_presub_candidate(c, inst_add)) 5871c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 5881c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5891c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (presub_helper(c, inst_add, RC_PRESUB_ADD, presub_replace_add)) { 5901c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_remove_instruction(inst_add); 5911c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 1; 5921c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 5931c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 5941c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 5951c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 5961c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic void presub_replace_inv( 5971c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction * inst_add, 5981c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction * inst_reader, 5991c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int src_index) 6001c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 6011c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* We must be careful not to modify inst_add, since it 6021c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * is possible it will remain part of the program.*/ 6031c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst_reader->U.I.PreSub.SrcReg[0] = inst_add->U.I.SrcReg[1]; 6041c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst_reader->U.I.PreSub.SrcReg[0].Negate = 0; 6051c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst_reader->U.I.PreSub.Opcode = RC_PRESUB_INV; 6061c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst_reader->U.I.SrcReg[src_index] = chain_srcregs(inst_reader->U.I.SrcReg[src_index], 6071c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst_reader->U.I.PreSub.SrcReg[0]); 6081c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 6091c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst_reader->U.I.SrcReg[src_index].File = RC_FILE_PRESUB; 6101c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst_reader->U.I.SrcReg[src_index].Index = RC_PRESUB_INV; 6111c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 6121c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 6131c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák/** 6141c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * PRESUB_INV: ADD TEMP[0], none.1, -TEMP[1] 6151c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * Use the presubtract 1 - src0 for all readers of TEMP[0]. The first source 6161c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * of the add instruction must have the constatnt 1 swizzle. This function 6171c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * does not check const registers to see if their value is 1.0, so it should 6181c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * be called after the constant_folding optimization. 6191c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * @return 6201c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * 0 if the ADD instruction is still part of the program. 6211c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * 1 if the ADD instruction is no longer part of the program. 6221c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák */ 6231c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic int peephole_add_presub_inv( 6241c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct radeon_compiler * c, 6251c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction * inst_add) 6261c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 6271c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák unsigned int i, swz; 6281c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 6291c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (!is_presub_candidate(c, inst_add)) 6301c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 6311c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 6321c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* Check if src0 is 1. */ 6331c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* XXX It would be nice to use is_src_uniform_constant here, but that 6341c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * function only works if the register's file is RC_FILE_NONE */ 6351c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák for(i = 0; i < 4; i++ ) { 6361c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák swz = GET_SWZ(inst_add->U.I.SrcReg[0].Swizzle, i); 6371c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if(((1 << i) & inst_add->U.I.DstReg.WriteMask) 6381c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák && swz != RC_SWIZZLE_ONE) { 6391c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 6401c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 6411c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 6421c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 6431c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* Check src1. */ 6441c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if ((inst_add->U.I.SrcReg[1].Negate & inst_add->U.I.DstReg.WriteMask) != 6451c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst_add->U.I.DstReg.WriteMask 6461c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák || inst_add->U.I.SrcReg[1].Abs 6471c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák || (inst_add->U.I.SrcReg[1].File != RC_FILE_TEMPORARY 6481c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák && inst_add->U.I.SrcReg[1].File != RC_FILE_CONSTANT) 6491c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák || src_has_const_swz(inst_add->U.I.SrcReg[1])) { 6501c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 6511c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 6521c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 6531c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 6541c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (presub_helper(c, inst_add, RC_PRESUB_INV, presub_replace_inv)) { 6551c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák rc_remove_instruction(inst_add); 6561c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 1; 6571c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 6581c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 6591c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 6601c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 6611c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák/** 6621c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * @return 6631c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * 0 if inst is still part of the program. 6641c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák * 1 if inst is no longer part of the program. 6651c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák */ 6661c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákstatic int peephole(struct radeon_compiler * c, struct rc_instruction * inst) 6671c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 6681c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák switch(inst->U.I.Opcode){ 6691c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák case RC_OPCODE_ADD: 6701c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (c->has_presub) { 6711c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if(peephole_add_presub_inv(c, inst)) 6721c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 1; 6731c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if(peephole_add_presub_add(c, inst)) 6741c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 1; 6751c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 6761c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák break; 6771c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák default: 6781c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák break; 6791c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 6801c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák return 0; 6811c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 6821c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 6831c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšákvoid rc_optimize(struct radeon_compiler * c, void *user) 6841c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák{ 6851c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction * inst = c->Program.Instructions.Next; 6861c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák while(inst != &c->Program.Instructions) { 6871c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák struct rc_instruction * cur = inst; 6881c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák inst = inst->Next; 6891c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 6901c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák constant_folding(c, cur); 6911c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 6921c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if(peephole(c, cur)) 6931c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák continue; 6941c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák 6951c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák if (cur->U.I.Opcode == RC_OPCODE_MOV) { 6961c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák copy_propagate(c, cur); 6971c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák /* cur may no longer be part of the program */ 6981c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 6991c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák } 7001c2c4ddbd1e97bfd13430521e5c09cb5ce8e36e6Marek Olšák} 701